diff options
Diffstat (limited to 'drivers/net/ethernet')
527 files changed, 42739 insertions, 9673 deletions
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index f86d4557d8d7..aead145dd91d 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -188,6 +188,7 @@ source "drivers/net/ethernet/sis/Kconfig" source "drivers/net/ethernet/sfc/Kconfig" source "drivers/net/ethernet/smsc/Kconfig" source "drivers/net/ethernet/socionext/Kconfig" +source "drivers/net/ethernet/spacemit/Kconfig" source "drivers/net/ethernet/stmicro/Kconfig" source "drivers/net/ethernet/sun/Kconfig" source "drivers/net/ethernet/sunplus/Kconfig" diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile index 67182339469a..998dd628b202 100644 --- a/drivers/net/ethernet/Makefile +++ b/drivers/net/ethernet/Makefile @@ -91,6 +91,7 @@ obj-$(CONFIG_NET_VENDOR_SOLARFLARE) += sfc/ obj-$(CONFIG_NET_VENDOR_SGI) += sgi/ obj-$(CONFIG_NET_VENDOR_SMSC) += smsc/ obj-$(CONFIG_NET_VENDOR_SOCIONEXT) += socionext/ +obj-$(CONFIG_NET_VENDOR_SPACEMIT) += spacemit/ obj-$(CONFIG_NET_VENDOR_STMICRO) += stmicro/ obj-$(CONFIG_NET_VENDOR_SUN) += sun/ obj-$(CONFIG_NET_VENDOR_SUNPLUS) += sunplus/ diff --git a/drivers/net/ethernet/airoha/airoha_eth.c b/drivers/net/ethernet/airoha/airoha_eth.c index e6b802e3d844..81ea01a652b9 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.c +++ b/drivers/net/ethernet/airoha/airoha_eth.c @@ -698,7 +698,8 @@ static int airoha_qdma_rx_process(struct airoha_queue *q, int budget) reason = FIELD_GET(AIROHA_RXD4_PPE_CPU_REASON, msg1); if (reason == PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED) - airoha_ppe_check_skb(eth->ppe, q->skb, hash); + airoha_ppe_check_skb(ð->ppe->dev, q->skb, hash, + false); done++; napi_gro_receive(&q->napi, q->skb); @@ -2599,13 +2600,15 @@ static int airoha_dev_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) { struct net_device *dev = cb_priv; + struct airoha_gdm_port *port = netdev_priv(dev); + struct airoha_eth *eth = port->qdma->eth; if (!tc_can_offload(dev)) return -EOPNOTSUPP; switch (type) { case TC_SETUP_CLSFLOWER: - return airoha_ppe_setup_tc_block_cb(dev, type_data); + return airoha_ppe_setup_tc_block_cb(ð->ppe->dev, type_data); case TC_SETUP_CLSMATCHALL: return airoha_dev_tc_matchall(dev, type_data); default: diff --git a/drivers/net/ethernet/airoha/airoha_eth.h b/drivers/net/ethernet/airoha/airoha_eth.h index a970b789cf23..cd13c1c1224f 100644 --- a/drivers/net/ethernet/airoha/airoha_eth.h +++ b/drivers/net/ethernet/airoha/airoha_eth.h @@ -13,6 +13,7 @@ #include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/reset.h> +#include <linux/soc/airoha/airoha_offload.h> #include <net/dsa.h> #define AIROHA_MAX_NUM_GDM_PORTS 4 @@ -229,10 +230,6 @@ struct airoha_hw_stats { }; enum { - PPE_CPU_REASON_HIT_UNBIND_RATE_REACHED = 0x0f, -}; - -enum { AIROHA_FOE_STATE_INVALID, AIROHA_FOE_STATE_UNBIND, AIROHA_FOE_STATE_BIND, @@ -252,6 +249,10 @@ enum { #define AIROHA_FOE_MAC_SMAC_ID GENMASK(20, 16) #define AIROHA_FOE_MAC_PPPOE_ID GENMASK(15, 0) +#define AIROHA_FOE_MAC_WDMA_QOS GENMASK(15, 12) +#define AIROHA_FOE_MAC_WDMA_BAND BIT(11) +#define AIROHA_FOE_MAC_WDMA_WCID GENMASK(10, 0) + struct airoha_foe_mac_info_common { u16 vlan1; u16 etype; @@ -470,7 +471,6 @@ struct airoha_flow_table_entry { }; }; - struct airoha_foe_entry data; struct hlist_node l2_subflow_node; /* PPE L2 subflow entry */ u32 hash; @@ -479,6 +479,16 @@ struct airoha_flow_table_entry { struct rhash_head node; unsigned long cookie; + + /* Must be last --ends in a flexible-array member. */ + struct airoha_foe_entry data; +}; + +struct airoha_wdma_info { + u8 idx; + u8 queue; + u16 wcid; + u8 bss; }; /* RX queue to IRQ mapping: BIT(q) in IRQ(n) */ @@ -535,6 +545,7 @@ struct airoha_gdm_port { #define AIROHA_RXD4_FOE_ENTRY GENMASK(15, 0) struct airoha_ppe { + struct airoha_ppe_dev dev; struct airoha_eth *eth; void *foe; @@ -609,9 +620,9 @@ static inline bool airhoa_is_lan_gdm_port(struct airoha_gdm_port *port) bool airoha_is_valid_gdm_port(struct airoha_eth *eth, struct airoha_gdm_port *port); -void airoha_ppe_check_skb(struct airoha_ppe *ppe, struct sk_buff *skb, - u16 hash); -int airoha_ppe_setup_tc_block_cb(struct net_device *dev, void *type_data); +void airoha_ppe_check_skb(struct airoha_ppe_dev *dev, struct sk_buff *skb, + u16 hash, bool rx_wlan); +int airoha_ppe_setup_tc_block_cb(struct airoha_ppe_dev *dev, void *type_data); int airoha_ppe_init(struct airoha_eth *eth); void airoha_ppe_deinit(struct airoha_eth *eth); void airoha_ppe_init_upd_mem(struct airoha_gdm_port *port); diff --git a/drivers/net/ethernet/airoha/airoha_npu.c b/drivers/net/ethernet/airoha/airoha_npu.c index a802f95df99d..8c883f2b2d36 100644 --- a/drivers/net/ethernet/airoha/airoha_npu.c +++ b/drivers/net/ethernet/airoha/airoha_npu.c @@ -13,7 +13,6 @@ #include <linux/regmap.h> #include "airoha_eth.h" -#include "airoha_npu.h" #define NPU_EN7581_FIRMWARE_DATA "airoha/en7581_npu_data.bin" #define NPU_EN7581_FIRMWARE_RV32 "airoha/en7581_npu_rv32.bin" @@ -42,6 +41,22 @@ #define REG_CR_MBQ8_CTRL(_n) (NPU_MBOX_BASE_ADDR + 0x0b0 + ((_n) << 2)) #define REG_CR_NPU_MIB(_n) (NPU_MBOX_BASE_ADDR + 0x140 + ((_n) << 2)) +#define NPU_WLAN_BASE_ADDR 0x30d000 + +#define REG_IRQ_STATUS (NPU_WLAN_BASE_ADDR + 0x030) +#define REG_IRQ_RXDONE(_n) (NPU_WLAN_BASE_ADDR + ((_n) << 2) + 0x034) +#define NPU_IRQ_RX_MASK(_n) ((_n) == 1 ? BIT(17) : BIT(16)) + +#define REG_TX_BASE(_n) (NPU_WLAN_BASE_ADDR + ((_n) << 4) + 0x080) +#define REG_TX_DSCP_NUM(_n) (NPU_WLAN_BASE_ADDR + ((_n) << 4) + 0x084) +#define REG_TX_CPU_IDX(_n) (NPU_WLAN_BASE_ADDR + ((_n) << 4) + 0x088) +#define REG_TX_DMA_IDX(_n) (NPU_WLAN_BASE_ADDR + ((_n) << 4) + 0x08c) + +#define REG_RX_BASE(_n) (NPU_WLAN_BASE_ADDR + ((_n) << 4) + 0x180) +#define REG_RX_DSCP_NUM(_n) (NPU_WLAN_BASE_ADDR + ((_n) << 4) + 0x184) +#define REG_RX_CPU_IDX(_n) (NPU_WLAN_BASE_ADDR + ((_n) << 4) + 0x188) +#define REG_RX_DMA_IDX(_n) (NPU_WLAN_BASE_ADDR + ((_n) << 4) + 0x18c) + #define NPU_TIMER_BASE_ADDR 0x310100 #define REG_WDT_TIMER_CTRL(_n) (NPU_TIMER_BASE_ADDR + ((_n) * 0x100)) #define WDT_EN_MASK BIT(25) @@ -124,6 +139,13 @@ struct ppe_mbox_data { }; }; +struct wlan_mbox_data { + u32 ifindex:4; + u32 func_type:4; + u32 func_id; + DECLARE_FLEX_ARRAY(u8, d); +}; + static int airoha_npu_send_msg(struct airoha_npu *npu, int func_id, void *p, int size) { @@ -357,15 +379,13 @@ out: return err; } -static int airoha_npu_stats_setup(struct airoha_npu *npu, - dma_addr_t foe_stats_addr) +static int airoha_npu_ppe_stats_setup(struct airoha_npu *npu, + dma_addr_t foe_stats_addr, + u32 num_stats_entries) { - int err, size = PPE_STATS_NUM_ENTRIES * sizeof(*npu->stats); + int err, size = num_stats_entries * sizeof(*npu->stats); struct ppe_mbox_data *ppe_data; - if (!size) /* flow stats are disabled */ - return 0; - ppe_data = kzalloc(sizeof(*ppe_data), GFP_ATOMIC); if (!ppe_data) return -ENOMEM; @@ -390,7 +410,137 @@ out: return err; } -struct airoha_npu *airoha_npu_get(struct device *dev, dma_addr_t *stats_addr) +static int airoha_npu_wlan_msg_send(struct airoha_npu *npu, int ifindex, + enum airoha_npu_wlan_set_cmd func_id, + void *data, int data_len, gfp_t gfp) +{ + struct wlan_mbox_data *wlan_data; + int err, len; + + len = sizeof(*wlan_data) + data_len; + wlan_data = kzalloc(len, gfp); + if (!wlan_data) + return -ENOMEM; + + wlan_data->ifindex = ifindex; + wlan_data->func_type = NPU_OP_SET; + wlan_data->func_id = func_id; + memcpy(wlan_data->d, data, data_len); + + err = airoha_npu_send_msg(npu, NPU_FUNC_WIFI, wlan_data, len); + kfree(wlan_data); + + return err; +} + +static int airoha_npu_wlan_msg_get(struct airoha_npu *npu, int ifindex, + enum airoha_npu_wlan_get_cmd func_id, + void *data, int data_len, gfp_t gfp) +{ + struct wlan_mbox_data *wlan_data; + int err, len; + + len = sizeof(*wlan_data) + data_len; + wlan_data = kzalloc(len, gfp); + if (!wlan_data) + return -ENOMEM; + + wlan_data->ifindex = ifindex; + wlan_data->func_type = NPU_OP_GET; + wlan_data->func_id = func_id; + + err = airoha_npu_send_msg(npu, NPU_FUNC_WIFI, wlan_data, len); + if (!err) + memcpy(data, wlan_data->d, data_len); + kfree(wlan_data); + + return err; +} + +static int +airoha_npu_wlan_set_reserved_memory(struct airoha_npu *npu, + int ifindex, const char *name, + enum airoha_npu_wlan_set_cmd func_id) +{ + struct device *dev = npu->dev; + struct resource res; + int err; + u32 val; + + err = of_reserved_mem_region_to_resource_byname(dev->of_node, name, + &res); + if (err) + return err; + + val = res.start; + return airoha_npu_wlan_msg_send(npu, ifindex, func_id, &val, + sizeof(val), GFP_KERNEL); +} + +static int airoha_npu_wlan_init_memory(struct airoha_npu *npu) +{ + enum airoha_npu_wlan_set_cmd cmd = WLAN_FUNC_SET_WAIT_NPU_BAND0_ONCPU; + u32 val = 0; + int err; + + err = airoha_npu_wlan_msg_send(npu, 1, cmd, &val, sizeof(val), + GFP_KERNEL); + if (err) + return err; + + cmd = WLAN_FUNC_SET_WAIT_TX_BUF_CHECK_ADDR; + err = airoha_npu_wlan_set_reserved_memory(npu, 0, "tx-bufid", cmd); + if (err) + return err; + + cmd = WLAN_FUNC_SET_WAIT_PKT_BUF_ADDR; + err = airoha_npu_wlan_set_reserved_memory(npu, 0, "pkt", cmd); + if (err) + return err; + + cmd = WLAN_FUNC_SET_WAIT_TX_PKT_BUF_ADDR; + err = airoha_npu_wlan_set_reserved_memory(npu, 0, "tx-pkt", cmd); + if (err) + return err; + + cmd = WLAN_FUNC_SET_WAIT_IS_FORCE_TO_CPU; + return airoha_npu_wlan_msg_send(npu, 0, cmd, &val, sizeof(val), + GFP_KERNEL); +} + +static u32 airoha_npu_wlan_queue_addr_get(struct airoha_npu *npu, int qid, + bool xmit) +{ + if (xmit) + return REG_TX_BASE(qid + 2); + + return REG_RX_BASE(qid); +} + +static void airoha_npu_wlan_irq_status_set(struct airoha_npu *npu, u32 val) +{ + regmap_write(npu->regmap, REG_IRQ_STATUS, val); +} + +static u32 airoha_npu_wlan_irq_status_get(struct airoha_npu *npu, int q) +{ + u32 val; + + regmap_read(npu->regmap, REG_IRQ_STATUS, &val); + return val; +} + +static void airoha_npu_wlan_irq_enable(struct airoha_npu *npu, int q) +{ + regmap_set_bits(npu->regmap, REG_IRQ_RXDONE(q), NPU_IRQ_RX_MASK(q)); +} + +static void airoha_npu_wlan_irq_disable(struct airoha_npu *npu, int q) +{ + regmap_clear_bits(npu->regmap, REG_IRQ_RXDONE(q), NPU_IRQ_RX_MASK(q)); +} + +struct airoha_npu *airoha_npu_get(struct device *dev) { struct platform_device *pdev; struct device_node *np; @@ -429,17 +579,6 @@ struct airoha_npu *airoha_npu_get(struct device *dev, dma_addr_t *stats_addr) goto error_module_put; } - if (stats_addr) { - int err; - - err = airoha_npu_stats_setup(npu, *stats_addr); - if (err) { - dev_err(dev, "failed to allocate npu stats buffer\n"); - npu = ERR_PTR(err); - goto error_module_put; - } - } - return npu; error_module_put: @@ -491,8 +630,17 @@ static int airoha_npu_probe(struct platform_device *pdev) npu->dev = dev; npu->ops.ppe_init = airoha_npu_ppe_init; npu->ops.ppe_deinit = airoha_npu_ppe_deinit; + npu->ops.ppe_init_stats = airoha_npu_ppe_stats_setup; npu->ops.ppe_flush_sram_entries = airoha_npu_ppe_flush_sram_entries; npu->ops.ppe_foe_commit_entry = airoha_npu_foe_commit_entry; + npu->ops.wlan_init_reserved_memory = airoha_npu_wlan_init_memory; + npu->ops.wlan_send_msg = airoha_npu_wlan_msg_send; + npu->ops.wlan_get_msg = airoha_npu_wlan_msg_get; + npu->ops.wlan_get_queue_addr = airoha_npu_wlan_queue_addr_get; + npu->ops.wlan_set_irq_status = airoha_npu_wlan_irq_status_set; + npu->ops.wlan_get_irq_status = airoha_npu_wlan_irq_status_get; + npu->ops.wlan_enable_irq = airoha_npu_wlan_irq_enable; + npu->ops.wlan_disable_irq = airoha_npu_wlan_irq_disable; npu->regmap = devm_regmap_init_mmio(dev, base, ®map_config); if (IS_ERR(npu->regmap)) @@ -529,6 +677,15 @@ static int airoha_npu_probe(struct platform_device *pdev) INIT_WORK(&core->wdt_work, airoha_npu_wdt_work); } + /* wlan IRQ lines */ + for (i = 0; i < ARRAY_SIZE(npu->irqs); i++) { + irq = platform_get_irq(pdev, i + ARRAY_SIZE(npu->cores) + 1); + if (irq < 0) + return irq; + + npu->irqs[i] = irq; + } + err = dma_set_coherent_mask(dev, DMA_BIT_MASK(32)); if (err) return err; @@ -550,8 +707,7 @@ static int airoha_npu_probe(struct platform_device *pdev) usleep_range(1000, 2000); /* enable NPU cores */ - /* do not start core3 since it is used for WiFi offloading */ - regmap_write(npu->regmap, REG_CR_BOOT_CONFIG, 0xf7); + regmap_write(npu->regmap, REG_CR_BOOT_CONFIG, 0xff); regmap_write(npu->regmap, REG_CR_BOOT_TRIGGER, 0x1); msleep(100); diff --git a/drivers/net/ethernet/airoha/airoha_npu.h b/drivers/net/ethernet/airoha/airoha_npu.h deleted file mode 100644 index 98ec3be74ce4..000000000000 --- a/drivers/net/ethernet/airoha/airoha_npu.h +++ /dev/null @@ -1,36 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (c) 2025 AIROHA Inc - * Author: Lorenzo Bianconi <lorenzo@kernel.org> - */ - -#define NPU_NUM_CORES 8 - -struct airoha_npu { - struct device *dev; - struct regmap *regmap; - - struct airoha_npu_core { - struct airoha_npu *npu; - /* protect concurrent npu memory accesses */ - spinlock_t lock; - struct work_struct wdt_work; - } cores[NPU_NUM_CORES]; - - struct airoha_foe_stats __iomem *stats; - - struct { - int (*ppe_init)(struct airoha_npu *npu); - int (*ppe_deinit)(struct airoha_npu *npu); - int (*ppe_flush_sram_entries)(struct airoha_npu *npu, - dma_addr_t foe_addr, - int sram_num_entries); - int (*ppe_foe_commit_entry)(struct airoha_npu *npu, - dma_addr_t foe_addr, - u32 entry_size, u32 hash, - bool ppe2); - } ops; -}; - -struct airoha_npu *airoha_npu_get(struct device *dev, dma_addr_t *stats_addr); -void airoha_npu_put(struct airoha_npu *npu); diff --git a/drivers/net/ethernet/airoha/airoha_ppe.c b/drivers/net/ethernet/airoha/airoha_ppe.c index 88694b08afa1..691361b25407 100644 --- a/drivers/net/ethernet/airoha/airoha_ppe.c +++ b/drivers/net/ethernet/airoha/airoha_ppe.c @@ -6,11 +6,12 @@ #include <linux/ip.h> #include <linux/ipv6.h> +#include <linux/of_platform.h> +#include <linux/platform_device.h> #include <linux/rhashtable.h> #include <net/ipv6.h> #include <net/pkt_cls.h> -#include "airoha_npu.h" #include "airoha_regs.h" #include "airoha_eth.h" @@ -190,6 +191,31 @@ static int airoha_ppe_flow_mangle_ipv4(const struct flow_action_entry *act, return 0; } +static int airoha_ppe_get_wdma_info(struct net_device *dev, const u8 *addr, + struct airoha_wdma_info *info) +{ + struct net_device_path_stack stack; + struct net_device_path *path; + int err; + + if (!dev) + return -ENODEV; + + err = dev_fill_forward_path(dev, addr, &stack); + if (err) + return err; + + path = &stack.path[stack.num_paths - 1]; + if (path->type != DEV_PATH_MTK_WDMA) + return -1; + + info->idx = path->mtk_wdma.wdma_idx; + info->bss = path->mtk_wdma.bss; + info->wcid = path->mtk_wdma.wcid; + + return 0; +} + static int airoha_get_dsa_port(struct net_device **dev) { #if IS_ENABLED(CONFIG_NET_DSA) @@ -220,9 +246,9 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth, struct airoha_flow_data *data, int l4proto) { - int dsa_port = airoha_get_dsa_port(&dev); + u32 qdata = FIELD_PREP(AIROHA_FOE_SHAPER_ID, 0x7f), ports_pad, val; + int wlan_etype = -EINVAL, dsa_port = airoha_get_dsa_port(&dev); struct airoha_foe_mac_info_common *l2; - u32 qdata, ports_pad, val; u8 smac_id = 0xf; memset(hwe, 0, sizeof(*hwe)); @@ -236,31 +262,47 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth, AIROHA_FOE_IB1_BIND_TTL; hwe->ib1 = val; - val = FIELD_PREP(AIROHA_FOE_IB2_PORT_AG, 0x1f) | - AIROHA_FOE_IB2_PSE_QOS; - if (dsa_port >= 0) - val |= FIELD_PREP(AIROHA_FOE_IB2_NBQ, dsa_port); - + val = FIELD_PREP(AIROHA_FOE_IB2_PORT_AG, 0x1f); if (dev) { - struct airoha_gdm_port *port = netdev_priv(dev); - u8 pse_port; - - if (!airoha_is_valid_gdm_port(eth, port)) - return -EINVAL; - - if (dsa_port >= 0) - pse_port = port->id == 4 ? FE_PSE_PORT_GDM4 : port->id; - else - pse_port = 2; /* uplink relies on GDM2 loopback */ - val |= FIELD_PREP(AIROHA_FOE_IB2_PSE_PORT, pse_port); - - /* For downlink traffic consume SRAM memory for hw forwarding - * descriptors queue. - */ - if (airhoa_is_lan_gdm_port(port)) - val |= AIROHA_FOE_IB2_FAST_PATH; - - smac_id = port->id; + struct airoha_wdma_info info = {}; + + if (!airoha_ppe_get_wdma_info(dev, data->eth.h_dest, &info)) { + val |= FIELD_PREP(AIROHA_FOE_IB2_NBQ, info.idx) | + FIELD_PREP(AIROHA_FOE_IB2_PSE_PORT, + FE_PSE_PORT_CDM4); + qdata |= FIELD_PREP(AIROHA_FOE_ACTDP, info.bss); + wlan_etype = FIELD_PREP(AIROHA_FOE_MAC_WDMA_BAND, + info.idx) | + FIELD_PREP(AIROHA_FOE_MAC_WDMA_WCID, + info.wcid); + } else { + struct airoha_gdm_port *port = netdev_priv(dev); + u8 pse_port; + + if (!airoha_is_valid_gdm_port(eth, port)) + return -EINVAL; + + if (dsa_port >= 0) + pse_port = port->id == 4 ? FE_PSE_PORT_GDM4 + : port->id; + else + pse_port = 2; /* uplink relies on GDM2 + * loopback + */ + + val |= FIELD_PREP(AIROHA_FOE_IB2_PSE_PORT, pse_port) | + AIROHA_FOE_IB2_PSE_QOS; + /* For downlink traffic consume SRAM memory for hw + * forwarding descriptors queue. + */ + if (airhoa_is_lan_gdm_port(port)) + val |= AIROHA_FOE_IB2_FAST_PATH; + if (dsa_port >= 0) + val |= FIELD_PREP(AIROHA_FOE_IB2_NBQ, + dsa_port); + + smac_id = port->id; + } } if (is_multicast_ether_addr(data->eth.h_dest)) @@ -272,7 +314,6 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth, if (type == PPE_PKT_TYPE_IPV6_ROUTE_3T) hwe->ipv6.ports = ports_pad; - qdata = FIELD_PREP(AIROHA_FOE_SHAPER_ID, 0x7f); if (type == PPE_PKT_TYPE_BRIDGE) { airoha_ppe_foe_set_bridge_addrs(&hwe->bridge, &data->eth); hwe->bridge.data = qdata; @@ -313,7 +354,9 @@ static int airoha_ppe_foe_entry_prepare(struct airoha_eth *eth, l2->vlan2 = data->vlan.hdr[1].id; } - if (dsa_port >= 0) { + if (wlan_etype >= 0) { + l2->etype = wlan_etype; + } else if (dsa_port >= 0) { l2->etype = BIT(dsa_port); l2->etype |= !data->vlan.num ? BIT(15) : 0; } else if (data->pppoe.num) { @@ -490,6 +533,10 @@ static void airoha_ppe_foe_flow_stats_update(struct airoha_ppe *ppe, meter = &hwe->ipv4.l2.meter; } + pse_port = FIELD_GET(AIROHA_FOE_IB2_PSE_PORT, *ib2); + if (pse_port == FE_PSE_PORT_CDM4) + return; + airoha_ppe_foe_flow_stat_entry_reset(ppe, npu, index); val = FIELD_GET(AIROHA_FOE_CHANNEL | AIROHA_FOE_QID, *data); @@ -500,7 +547,6 @@ static void airoha_ppe_foe_flow_stats_update(struct airoha_ppe *ppe, AIROHA_FOE_IB2_PSE_QOS | AIROHA_FOE_IB2_FAST_PATH); *meter |= FIELD_PREP(AIROHA_FOE_TUNNEL_MTU, val); - pse_port = FIELD_GET(AIROHA_FOE_IB2_PSE_PORT, *ib2); nbq = pse_port == 1 ? 6 : 5; *ib2 &= ~(AIROHA_FOE_IB2_NBQ | AIROHA_FOE_IB2_PSE_PORT | AIROHA_FOE_IB2_PSE_QOS); @@ -570,7 +616,7 @@ static bool airoha_ppe_foe_compare_entry(struct airoha_flow_table_entry *e, static int airoha_ppe_foe_commit_entry(struct airoha_ppe *ppe, struct airoha_foe_entry *e, - u32 hash) + u32 hash, bool rx_wlan) { struct airoha_foe_entry *hwe = ppe->foe + hash * sizeof(*hwe); u32 ts = airoha_ppe_get_timestamp(ppe); @@ -593,7 +639,8 @@ static int airoha_ppe_foe_commit_entry(struct airoha_ppe *ppe, goto unlock; } - airoha_ppe_foe_flow_stats_update(ppe, npu, hwe, hash); + if (!rx_wlan) + airoha_ppe_foe_flow_stats_update(ppe, npu, hwe, hash); if (hash < PPE_SRAM_NUM_ENTRIES) { dma_addr_t addr = ppe->foe_dma + hash * sizeof(*hwe); @@ -619,7 +666,7 @@ static void airoha_ppe_foe_remove_flow(struct airoha_ppe *ppe, e->data.ib1 &= ~AIROHA_FOE_IB1_BIND_STATE; e->data.ib1 |= FIELD_PREP(AIROHA_FOE_IB1_BIND_STATE, AIROHA_FOE_STATE_INVALID); - airoha_ppe_foe_commit_entry(ppe, &e->data, e->hash); + airoha_ppe_foe_commit_entry(ppe, &e->data, e->hash, false); e->hash = 0xffff; } if (e->type == FLOW_TYPE_L2_SUBFLOW) { @@ -658,7 +705,7 @@ static void airoha_ppe_foe_flow_remove_entry(struct airoha_ppe *ppe, static int airoha_ppe_foe_commit_subflow_entry(struct airoha_ppe *ppe, struct airoha_flow_table_entry *e, - u32 hash) + u32 hash, bool rx_wlan) { u32 mask = AIROHA_FOE_IB1_BIND_PACKET_TYPE | AIROHA_FOE_IB1_BIND_UDP; struct airoha_foe_entry *hwe_p, hwe; @@ -699,14 +746,14 @@ airoha_ppe_foe_commit_subflow_entry(struct airoha_ppe *ppe, } hwe.bridge.data = e->data.bridge.data; - airoha_ppe_foe_commit_entry(ppe, &hwe, hash); + airoha_ppe_foe_commit_entry(ppe, &hwe, hash, rx_wlan); return 0; } static void airoha_ppe_foe_insert_entry(struct airoha_ppe *ppe, struct sk_buff *skb, - u32 hash) + u32 hash, bool rx_wlan) { struct airoha_flow_table_entry *e; struct airoha_foe_bridge br = {}; @@ -739,7 +786,7 @@ static void airoha_ppe_foe_insert_entry(struct airoha_ppe *ppe, if (!airoha_ppe_foe_compare_entry(e, hwe)) continue; - airoha_ppe_foe_commit_entry(ppe, &e->data, hash); + airoha_ppe_foe_commit_entry(ppe, &e->data, hash, rx_wlan); commit_done = true; e->hash = hash; } @@ -751,7 +798,7 @@ static void airoha_ppe_foe_insert_entry(struct airoha_ppe *ppe, e = rhashtable_lookup_fast(&ppe->l2_flows, &br, airoha_l2_flow_table_params); if (e) - airoha_ppe_foe_commit_subflow_entry(ppe, e, hash); + airoha_ppe_foe_commit_subflow_entry(ppe, e, hash, rx_wlan); unlock: spin_unlock_bh(&ppe_lock); } @@ -890,11 +937,10 @@ static int airoha_ppe_entry_idle_time(struct airoha_ppe *ppe, return airoha_ppe_get_entry_idle_time(ppe, e->data.ib1); } -static int airoha_ppe_flow_offload_replace(struct airoha_gdm_port *port, +static int airoha_ppe_flow_offload_replace(struct airoha_eth *eth, struct flow_cls_offload *f) { struct flow_rule *rule = flow_cls_offload_flow_rule(f); - struct airoha_eth *eth = port->qdma->eth; struct airoha_flow_table_entry *e; struct airoha_flow_data data = {}; struct net_device *odev = NULL; @@ -1091,10 +1137,9 @@ free_entry: return err; } -static int airoha_ppe_flow_offload_destroy(struct airoha_gdm_port *port, +static int airoha_ppe_flow_offload_destroy(struct airoha_eth *eth, struct flow_cls_offload *f) { - struct airoha_eth *eth = port->qdma->eth; struct airoha_flow_table_entry *e; e = rhashtable_lookup(ð->flow_table, &f->cookie, @@ -1137,10 +1182,9 @@ void airoha_ppe_foe_entry_get_stats(struct airoha_ppe *ppe, u32 hash, rcu_read_unlock(); } -static int airoha_ppe_flow_offload_stats(struct airoha_gdm_port *port, +static int airoha_ppe_flow_offload_stats(struct airoha_eth *eth, struct flow_cls_offload *f) { - struct airoha_eth *eth = port->qdma->eth; struct airoha_flow_table_entry *e; u32 idle; @@ -1164,16 +1208,16 @@ static int airoha_ppe_flow_offload_stats(struct airoha_gdm_port *port, return 0; } -static int airoha_ppe_flow_offload_cmd(struct airoha_gdm_port *port, +static int airoha_ppe_flow_offload_cmd(struct airoha_eth *eth, struct flow_cls_offload *f) { switch (f->command) { case FLOW_CLS_REPLACE: - return airoha_ppe_flow_offload_replace(port, f); + return airoha_ppe_flow_offload_replace(eth, f); case FLOW_CLS_DESTROY: - return airoha_ppe_flow_offload_destroy(port, f); + return airoha_ppe_flow_offload_destroy(eth, f); case FLOW_CLS_STATS: - return airoha_ppe_flow_offload_stats(port, f); + return airoha_ppe_flow_offload_stats(eth, f); default: break; } @@ -1199,12 +1243,11 @@ static int airoha_ppe_flush_sram_entries(struct airoha_ppe *ppe, static struct airoha_npu *airoha_ppe_npu_get(struct airoha_eth *eth) { - struct airoha_npu *npu = airoha_npu_get(eth->dev, - ð->ppe->foe_stats_dma); + struct airoha_npu *npu = airoha_npu_get(eth->dev); if (IS_ERR(npu)) { request_module("airoha-npu"); - npu = airoha_npu_get(eth->dev, ð->ppe->foe_stats_dma); + npu = airoha_npu_get(eth->dev); } return npu; @@ -1213,6 +1256,7 @@ static struct airoha_npu *airoha_ppe_npu_get(struct airoha_eth *eth) static int airoha_ppe_offload_setup(struct airoha_eth *eth) { struct airoha_npu *npu = airoha_ppe_npu_get(eth); + struct airoha_ppe *ppe = eth->ppe; int err; if (IS_ERR(npu)) @@ -1222,12 +1266,19 @@ static int airoha_ppe_offload_setup(struct airoha_eth *eth) if (err) goto error_npu_put; - airoha_ppe_hw_init(eth->ppe); - err = airoha_ppe_flush_sram_entries(eth->ppe, npu); + if (PPE_STATS_NUM_ENTRIES) { + err = npu->ops.ppe_init_stats(npu, ppe->foe_stats_dma, + PPE_STATS_NUM_ENTRIES); + if (err) + goto error_npu_put; + } + + airoha_ppe_hw_init(ppe); + err = airoha_ppe_flush_sram_entries(ppe, npu); if (err) goto error_npu_put; - airoha_ppe_foe_flow_stats_reset(eth->ppe, npu); + airoha_ppe_foe_flow_stats_reset(ppe, npu); rcu_assign_pointer(eth->npu, npu); synchronize_rcu(); @@ -1240,11 +1291,10 @@ error_npu_put: return err; } -int airoha_ppe_setup_tc_block_cb(struct net_device *dev, void *type_data) +int airoha_ppe_setup_tc_block_cb(struct airoha_ppe_dev *dev, void *type_data) { - struct airoha_gdm_port *port = netdev_priv(dev); - struct flow_cls_offload *cls = type_data; - struct airoha_eth *eth = port->qdma->eth; + struct airoha_ppe *ppe = dev->priv; + struct airoha_eth *eth = ppe->eth; int err = 0; mutex_lock(&flow_offload_mutex); @@ -1252,16 +1302,17 @@ int airoha_ppe_setup_tc_block_cb(struct net_device *dev, void *type_data) if (!eth->npu) err = airoha_ppe_offload_setup(eth); if (!err) - err = airoha_ppe_flow_offload_cmd(port, cls); + err = airoha_ppe_flow_offload_cmd(eth, type_data); mutex_unlock(&flow_offload_mutex); return err; } -void airoha_ppe_check_skb(struct airoha_ppe *ppe, struct sk_buff *skb, - u16 hash) +void airoha_ppe_check_skb(struct airoha_ppe_dev *dev, struct sk_buff *skb, + u16 hash, bool rx_wlan) { + struct airoha_ppe *ppe = dev->priv; u16 now, diff; if (hash > PPE_HASH_MASK) @@ -1273,7 +1324,7 @@ void airoha_ppe_check_skb(struct airoha_ppe *ppe, struct sk_buff *skb, return; ppe->foe_check_time[hash] = now; - airoha_ppe_foe_insert_entry(ppe, skb, hash); + airoha_ppe_foe_insert_entry(ppe, skb, hash, rx_wlan); } void airoha_ppe_init_upd_mem(struct airoha_gdm_port *port) @@ -1297,6 +1348,61 @@ void airoha_ppe_init_upd_mem(struct airoha_gdm_port *port) PPE_UPDMEM_WR_MASK | PPE_UPDMEM_REQ_MASK); } +struct airoha_ppe_dev *airoha_ppe_get_dev(struct device *dev) +{ + struct platform_device *pdev; + struct device_node *np; + struct airoha_eth *eth; + + np = of_parse_phandle(dev->of_node, "airoha,eth", 0); + if (!np) + return ERR_PTR(-ENODEV); + + pdev = of_find_device_by_node(np); + if (!pdev) { + dev_err(dev, "cannot find device node %s\n", np->name); + of_node_put(np); + return ERR_PTR(-ENODEV); + } + of_node_put(np); + + if (!try_module_get(THIS_MODULE)) { + dev_err(dev, "failed to get the device driver module\n"); + goto error_pdev_put; + } + + eth = platform_get_drvdata(pdev); + if (!eth) + goto error_module_put; + + if (!device_link_add(dev, &pdev->dev, DL_FLAG_AUTOREMOVE_SUPPLIER)) { + dev_err(&pdev->dev, + "failed to create device link to consumer %s\n", + dev_name(dev)); + goto error_module_put; + } + + return ð->ppe->dev; + +error_module_put: + module_put(THIS_MODULE); +error_pdev_put: + platform_device_put(pdev); + + return ERR_PTR(-ENODEV); +} +EXPORT_SYMBOL_GPL(airoha_ppe_get_dev); + +void airoha_ppe_put_dev(struct airoha_ppe_dev *dev) +{ + struct airoha_ppe *ppe = dev->priv; + struct airoha_eth *eth = ppe->eth; + + module_put(THIS_MODULE); + put_device(eth->dev); +} +EXPORT_SYMBOL_GPL(airoha_ppe_put_dev); + int airoha_ppe_init(struct airoha_eth *eth) { struct airoha_ppe *ppe; @@ -1306,6 +1412,10 @@ int airoha_ppe_init(struct airoha_eth *eth) if (!ppe) return -ENOMEM; + ppe->dev.ops.setup_tc_block_cb = airoha_ppe_setup_tc_block_cb; + ppe->dev.ops.check_skb = airoha_ppe_check_skb; + ppe->dev.priv = ppe; + foe_size = PPE_NUM_ENTRIES * sizeof(struct airoha_foe_entry); ppe->foe = dmam_alloc_coherent(eth->dev, foe_size, &ppe->foe_dma, GFP_KERNEL); diff --git a/drivers/net/ethernet/airoha/airoha_regs.h b/drivers/net/ethernet/airoha/airoha_regs.h index 150c85995cc1..e1c15c20be8e 100644 --- a/drivers/net/ethernet/airoha/airoha_regs.h +++ b/drivers/net/ethernet/airoha/airoha_regs.h @@ -237,8 +237,8 @@ #define PPE_FLOW_CFG_IP4_TCP_FRAG_MASK BIT(6) #define REG_PPE_IP_PROTO_CHK(_n) (((_n) ? PPE2_BASE : PPE1_BASE) + 0x208) -#define PPE_IP_PROTO_CHK_IPV4_MASK GENMASK(15, 0) -#define PPE_IP_PROTO_CHK_IPV6_MASK GENMASK(31, 16) +#define PPE_IP_PROTO_CHK_IPV4_MASK GENMASK(31, 16) +#define PPE_IP_PROTO_CHK_IPV6_MASK GENMASK(15, 0) #define REG_PPE_TB_CFG(_n) (((_n) ? PPE2_BASE : PPE1_BASE) + 0x21c) #define PPE_SRAM_TB_NUM_ENTRY_MASK GENMASK(26, 24) diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c index a81d3a7a3bb9..fe3479b84a1f 100644 --- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -865,7 +865,10 @@ static u32 ena_get_rxfh_indir_size(struct net_device *netdev) static u32 ena_get_rxfh_key_size(struct net_device *netdev) { - return ENA_HASH_KEY_SIZE; + struct ena_adapter *adapter = netdev_priv(netdev); + struct ena_rss *rss = &adapter->ena_dev->rss; + + return rss->hash_key ? ENA_HASH_KEY_SIZE : 0; } static int ena_indirection_table_set(struct ena_adapter *adapter, diff --git a/drivers/net/ethernet/amd/pds_core/main.c b/drivers/net/ethernet/amd/pds_core/main.c index 9b81e1c260c2..c7a2eff57632 100644 --- a/drivers/net/ethernet/amd/pds_core/main.c +++ b/drivers/net/ethernet/amd/pds_core/main.c @@ -280,7 +280,7 @@ static int pdsc_init_pf(struct pdsc *pdsc) goto err_out_del_dev; } - hr = devl_health_reporter_create(dl, &pdsc_fw_reporter_ops, 0, pdsc); + hr = devl_health_reporter_create(dl, &pdsc_fw_reporter_ops, pdsc); if (IS_ERR(hr)) { devl_unlock(dl); dev_warn(pdsc->dev, "Failed to create fw reporter: %pe\n", hr); diff --git a/drivers/net/ethernet/amd/xgbe/Makefile b/drivers/net/ethernet/amd/xgbe/Makefile index 5b0ab6240cf2..980e27652237 100644 --- a/drivers/net/ethernet/amd/xgbe/Makefile +++ b/drivers/net/ethernet/amd/xgbe/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_AMD_XGBE) += amd-xgbe.o amd-xgbe-objs := xgbe-main.o xgbe-drv.o xgbe-dev.o \ xgbe-desc.o xgbe-ethtool.o xgbe-mdio.o \ - xgbe-hwtstamp.o xgbe-ptp.o \ + xgbe-hwtstamp.o xgbe-ptp.o xgbe-pps.o \ xgbe-i2c.o xgbe-phy-v1.o xgbe-phy-v2.o \ xgbe-platform.o diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h index 009fbc9b11ce..62b01de93db4 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h @@ -227,7 +227,11 @@ #define MAC_TICSNR 0x0d5C #define MAC_TECNR 0x0d60 #define MAC_TECSNR 0x0d64 - +#define MAC_PPSCR 0x0d70 +#define MAC_PPS0_TTSR 0x0d80 +#define MAC_PPS0_TTNSR 0x0d84 +#define MAC_PPS0_INTERVAL 0x0d88 +#define MAC_PPS0_WIDTH 0x0d8C #define MAC_QTFCR_INC 4 #define MAC_MACA_INC 4 #define MAC_HTR_INC 4 @@ -235,6 +239,18 @@ #define MAC_RQC2_INC 4 #define MAC_RQC2_Q_PER_REG 4 +/* PPS helpers */ +#define PPSEN0 BIT(4) +#define MAC_PPSx_TTSR(x) ((MAC_PPS0_TTSR) + ((x) * 0x10)) +#define MAC_PPSx_TTNSR(x) ((MAC_PPS0_TTNSR) + ((x) * 0x10)) +#define MAC_PPSx_INTERVAL(x) ((MAC_PPS0_INTERVAL) + ((x) * 0x10)) +#define MAC_PPSx_WIDTH(x) ((MAC_PPS0_WIDTH) + ((x) * 0x10)) +#define PPS_MAXIDX(x) ((((x) + 1) * 8) - 1) +#define PPS_MINIDX(x) ((x) * 8) +#define XGBE_PPSCMD_STOP 0x5 +#define XGBE_PPSCMD_START 0x2 +#define XGBE_PPSTARGET_PULSE 0x2 + /* MAC register entry bit positions and sizes */ #define MAC_HWF0R_ADDMACADRSEL_INDEX 18 #define MAC_HWF0R_ADDMACADRSEL_WIDTH 5 @@ -496,8 +512,10 @@ #define MAC_VR_SNPSVER_WIDTH 8 #define MAC_VR_USERVER_INDEX 16 #define MAC_VR_USERVER_WIDTH 8 +#define MAC_PPSx_TTNSR_TRGTBUSY0_INDEX 31 +#define MAC_PPSx_TTNSR_TRGTBUSY0_WIDTH 1 -/* MMC register offsets */ + /* MMC register offsets */ #define MMC_CR 0x0800 #define MMC_RISR 0x0804 #define MMC_TISR 0x0808 diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index 2e9b95a94f89..f0989aa01855 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -691,6 +691,21 @@ void xgbe_get_all_hw_features(struct xgbe_prv_data *pdata) hw_feat->pps_out_num = XGMAC_GET_BITS(mac_hfr2, MAC_HWF2R, PPSOUTNUM); hw_feat->aux_snap_num = XGMAC_GET_BITS(mac_hfr2, MAC_HWF2R, AUXSNAPNUM); + /* Sanity check and warn if hardware reports more than supported */ + if (hw_feat->pps_out_num > XGBE_MAX_PPS_OUT) { + dev_warn(pdata->dev, + "Hardware reports %u PPS outputs, limiting to %u\n", + hw_feat->pps_out_num, XGBE_MAX_PPS_OUT); + hw_feat->pps_out_num = XGBE_MAX_PPS_OUT; + } + + if (hw_feat->aux_snap_num > XGBE_MAX_AUX_SNAP) { + dev_warn(pdata->dev, + "Hardware reports %u aux snapshot inputs, limiting to %u\n", + hw_feat->aux_snap_num, XGBE_MAX_AUX_SNAP); + hw_feat->aux_snap_num = XGBE_MAX_AUX_SNAP; + } + /* Translate the Hash Table size into actual number */ switch (hw_feat->hash_table_size) { case 0: diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c index be0d2c7d08dc..b6e1b67a2d0e 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c @@ -329,6 +329,7 @@ static int xgbe_get_coalesce(struct net_device *netdev, ec->rx_coalesce_usecs = pdata->rx_usecs; ec->rx_max_coalesced_frames = pdata->rx_frames; + ec->tx_coalesce_usecs = pdata->tx_usecs; ec->tx_max_coalesced_frames = pdata->tx_frames; return 0; @@ -342,7 +343,8 @@ static int xgbe_set_coalesce(struct net_device *netdev, struct xgbe_prv_data *pdata = netdev_priv(netdev); struct xgbe_hw_if *hw_if = &pdata->hw_if; unsigned int rx_frames, rx_riwt, rx_usecs; - unsigned int tx_frames; + unsigned int tx_frames, tx_usecs; + unsigned int jiffy_us = jiffies_to_usecs(1); rx_riwt = hw_if->usec_to_riwt(pdata, ec->rx_coalesce_usecs); rx_usecs = ec->rx_coalesce_usecs; @@ -364,20 +366,42 @@ static int xgbe_set_coalesce(struct net_device *netdev, return -EINVAL; } + tx_usecs = ec->tx_coalesce_usecs; tx_frames = ec->tx_max_coalesced_frames; /* Check the bounds of values for Tx */ + if (!tx_usecs) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "tx-usecs must not be 0"); + return -EINVAL; + } + if (tx_usecs > XGMAC_MAX_COAL_TX_TICK) { + NL_SET_ERR_MSG_FMT_MOD(extack, "tx-usecs is limited to %d usec", + XGMAC_MAX_COAL_TX_TICK); + return -EINVAL; + } if (tx_frames > pdata->tx_desc_count) { netdev_err(netdev, "tx-frames is limited to %d frames\n", pdata->tx_desc_count); return -EINVAL; } + /* Round tx-usecs to nearest multiple of jiffy granularity */ + if (tx_usecs % jiffy_us) { + tx_usecs = rounddown(tx_usecs, jiffy_us); + if (!tx_usecs) + tx_usecs = jiffy_us; + NL_SET_ERR_MSG_FMT_MOD(extack, + "tx-usecs rounded to %u usec due to jiffy granularity (%u usec)", + tx_usecs, jiffy_us); + } + pdata->rx_riwt = rx_riwt; pdata->rx_usecs = rx_usecs; pdata->rx_frames = rx_frames; hw_if->config_rx_coalesce(pdata); + pdata->tx_usecs = tx_usecs; pdata->tx_frames = tx_frames; hw_if->config_tx_coalesce(pdata); @@ -440,7 +464,7 @@ static int xgbe_set_rxfh(struct net_device *netdev, { struct xgbe_prv_data *pdata = netdev_priv(netdev); struct xgbe_hw_if *hw_if = &pdata->hw_if; - unsigned int ret; + int ret; if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && rxfh->hfunc != ETH_RSS_HASH_TOP) { @@ -709,7 +733,7 @@ out: } static const struct ethtool_ops xgbe_ethtool_ops = { - .supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS | + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_MAX_FRAMES, .get_drvinfo = xgbe_get_drvinfo, .get_msglevel = xgbe_get_msglevel, diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c index d40011e8ddf2..65eb7b577b65 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c @@ -70,7 +70,7 @@ static int xgbe_i2c_set_enable(struct xgbe_prv_data *pdata, bool enable) static int xgbe_i2c_disable(struct xgbe_prv_data *pdata) { - unsigned int ret; + int ret; ret = xgbe_i2c_set_enable(pdata, false); if (ret) { diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c index 23c39e92e783..a56efc1bee33 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c @@ -2902,7 +2902,7 @@ static void xgbe_phy_sfp_setup(struct xgbe_prv_data *pdata) static int xgbe_phy_int_mdio_reset(struct xgbe_prv_data *pdata) { struct xgbe_phy_data *phy_data = pdata->phy_data; - unsigned int ret; + int ret; ret = pdata->hw_if.set_gpio(pdata, phy_data->mdio_reset_gpio); if (ret) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pps.c b/drivers/net/ethernet/amd/xgbe/xgbe-pps.c new file mode 100644 index 000000000000..6d03ae7ab36f --- /dev/null +++ b/drivers/net/ethernet/amd/xgbe/xgbe-pps.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: (GPL-2.0-or-later OR BSD-3-Clause) +/* + * Copyright (c) 2014-2025, Advanced Micro Devices, Inc. + * Copyright (c) 2014, Synopsys, Inc. + * All rights reserved + * + * Author: Raju Rangoju <Raju.Rangoju@amd.com> + */ + +#include "xgbe.h" +#include "xgbe-common.h" + +static u32 get_pps_mask(unsigned int x) +{ + return GENMASK(PPS_MAXIDX(x), PPS_MINIDX(x)); +} + +static u32 get_pps_cmd(unsigned int x, u32 val) +{ + return (val & GENMASK(3, 0)) << PPS_MINIDX(x); +} + +static u32 get_target_mode_sel(unsigned int x, u32 val) +{ + return (val & GENMASK(1, 0)) << (PPS_MAXIDX(x) - 2); +} + +int xgbe_pps_config(struct xgbe_prv_data *pdata, + struct xgbe_pps_config *cfg, int index, bool on) +{ + unsigned int ppscr = 0; + unsigned int tnsec; + u64 period; + + /* Check if target time register is busy */ + tnsec = XGMAC_IOREAD(pdata, MAC_PPSx_TTNSR(index)); + if (XGMAC_GET_BITS(tnsec, MAC_PPSx_TTNSR, TRGTBUSY0)) + return -EBUSY; + + ppscr = XGMAC_IOREAD(pdata, MAC_PPSCR); + ppscr &= ~get_pps_mask(index); + + if (!on) { + /* Disable PPS output */ + ppscr |= get_pps_cmd(index, XGBE_PPSCMD_STOP); + ppscr |= PPSEN0; + XGMAC_IOWRITE(pdata, MAC_PPSCR, ppscr); + + return 0; + } + + /* Configure start time */ + XGMAC_IOWRITE(pdata, MAC_PPSx_TTSR(index), cfg->start.tv_sec); + XGMAC_IOWRITE(pdata, MAC_PPSx_TTNSR(index), cfg->start.tv_nsec); + + period = cfg->period.tv_sec * NSEC_PER_SEC + cfg->period.tv_nsec; + period = div_u64(period, XGBE_V2_TSTAMP_SSINC); + + if (period < 4) + return -EINVAL; + + /* Configure interval and pulse width (50% duty cycle) */ + XGMAC_IOWRITE(pdata, MAC_PPSx_INTERVAL(index), period - 1); + XGMAC_IOWRITE(pdata, MAC_PPSx_WIDTH(index), (period >> 1) - 1); + + /* Enable PPS with pulse train mode */ + ppscr |= get_pps_cmd(index, XGBE_PPSCMD_START); + ppscr |= get_target_mode_sel(index, XGBE_PPSTARGET_PULSE); + ppscr |= PPSEN0; + + XGMAC_IOWRITE(pdata, MAC_PPSCR, ppscr); + + return 0; +} diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c b/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c index 3658afc7801d..0e0b8ec3b504 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-ptp.c @@ -106,7 +106,29 @@ static int xgbe_settime(struct ptp_clock_info *info, static int xgbe_enable(struct ptp_clock_info *info, struct ptp_clock_request *request, int on) { - return -EOPNOTSUPP; + struct xgbe_prv_data *pdata = container_of(info, struct xgbe_prv_data, + ptp_clock_info); + struct xgbe_pps_config *pps_cfg; + unsigned long flags; + int ret; + + dev_dbg(pdata->dev, "rq->type %d on %d\n", request->type, on); + + if (request->type != PTP_CLK_REQ_PEROUT) + return -EOPNOTSUPP; + + pps_cfg = &pdata->pps[request->perout.index]; + + pps_cfg->start.tv_sec = request->perout.start.sec; + pps_cfg->start.tv_nsec = request->perout.start.nsec; + pps_cfg->period.tv_sec = request->perout.period.sec; + pps_cfg->period.tv_nsec = request->perout.period.nsec; + + spin_lock_irqsave(&pdata->tstamp_lock, flags); + ret = xgbe_pps_config(pdata, pps_cfg, request->perout.index, on); + spin_unlock_irqrestore(&pdata->tstamp_lock, flags); + + return ret; } void xgbe_ptp_register(struct xgbe_prv_data *pdata) @@ -122,6 +144,8 @@ void xgbe_ptp_register(struct xgbe_prv_data *pdata) info->adjtime = xgbe_adjtime; info->gettimex64 = xgbe_gettimex; info->settime64 = xgbe_settime; + info->n_per_out = pdata->hw_feat.pps_out_num; + info->n_ext_ts = pdata->hw_feat.aux_snap_num; info->enable = xgbe_enable; clock = ptp_clock_register(info, pdata->dev); diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index d7e03e292ec4..e8bbb6805901 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -142,6 +142,10 @@ #define XGBE_V2_TSTAMP_SNSINC 0 #define XGBE_V2_PTP_ACT_CLK_FREQ 1000000000 +/* Define maximum supported values */ +#define XGBE_MAX_PPS_OUT 4 +#define XGBE_MAX_AUX_SNAP 4 + /* Driver PMT macros */ #define XGMAC_DRIVER_CONTEXT 1 #define XGMAC_IOCTL_CONTEXT 2 @@ -168,6 +172,7 @@ /* Default coalescing parameters */ #define XGMAC_INIT_DMA_TX_USECS 1000 #define XGMAC_INIT_DMA_TX_FRAMES 25 +#define XGMAC_MAX_COAL_TX_TICK 100000 #define XGMAC_MAX_DMA_RIWT 0xff #define XGMAC_INIT_DMA_RX_USECS 30 @@ -672,6 +677,11 @@ struct xgbe_ext_stats { u64 rx_vxlan_csum_errors; }; +struct xgbe_pps_config { + struct timespec64 start; + struct timespec64 period; +}; + struct xgbe_hw_if { int (*tx_complete)(struct xgbe_ring_desc *); @@ -1142,6 +1152,9 @@ struct xgbe_prv_data { struct sk_buff *tx_tstamp_skb; u64 tx_tstamp; + /* Pulse Per Second output */ + struct xgbe_pps_config pps[XGBE_MAX_PPS_OUT]; + /* DCB support */ struct ieee_ets *ets; struct ieee_pfc *pfc; @@ -1304,6 +1317,10 @@ void xgbe_prep_tx_tstamp(struct xgbe_prv_data *pdata, int xgbe_init_ptp(struct xgbe_prv_data *pdata); void xgbe_update_tstamp_time(struct xgbe_prv_data *pdata, unsigned int sec, unsigned int nsec); + +int xgbe_pps_config(struct xgbe_prv_data *pdata, struct xgbe_pps_config *cfg, + int index, bool on); + #ifdef CONFIG_DEBUG_FS void xgbe_debugfs_init(struct xgbe_prv_data *); void xgbe_debugfs_exit(struct xgbe_prv_data *); diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig index 0fc10e6c6902..9fdef874f5ca 100644 --- a/drivers/net/ethernet/broadcom/Kconfig +++ b/drivers/net/ethernet/broadcom/Kconfig @@ -257,6 +257,7 @@ config BNGE tristate "Broadcom Ethernet device support" depends on PCI select NET_DEVLINK + select PAGE_POOL help This driver supports Broadcom 50/100/200/400/800 gigabit Ethernet cards. The module will be called bng_en. To compile this driver as a module, diff --git a/drivers/net/ethernet/broadcom/bnge/bnge.h b/drivers/net/ethernet/broadcom/bnge/bnge.h index 6fb3683b6b04..7aed5f81cd51 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge.h +++ b/drivers/net/ethernet/broadcom/bnge/bnge.h @@ -102,6 +102,10 @@ struct bnge_dev { u16 chip_num; u8 chip_rev; +#if BITS_PER_LONG == 32 + /* ensure atomic 64-bit doorbell writes on 32-bit systems. */ + spinlock_t db_lock; +#endif int db_offset; /* db_offset within db_size */ int db_size; @@ -129,6 +133,7 @@ struct bnge_dev { unsigned long state; #define BNGE_STATE_DRV_REGISTERED 0 +#define BNGE_STATE_OPEN 1 u64 fw_cap; @@ -155,6 +160,7 @@ struct bnge_dev { u16 rss_indir_tbl_entries; u32 rss_cap; + u32 rss_hash_cfg; u16 rx_nr_rings; u16 tx_nr_rings; @@ -213,6 +219,27 @@ static inline bool bnge_is_agg_reqd(struct bnge_dev *bd) return true; } +static inline void bnge_writeq(struct bnge_dev *bd, u64 val, + void __iomem *addr) +{ +#if BITS_PER_LONG == 32 + spin_lock(&bd->db_lock); + lo_hi_writeq(val, addr); + spin_unlock(&bd->db_lock); +#else + writeq(val, addr); +#endif +} + +/* For TX and RX ring doorbells */ +static inline void bnge_db_write(struct bnge_dev *bd, struct bnge_db_info *db, + u32 idx) +{ + bnge_writeq(bd, db->db_key64 | DB_RING_IDX(db, idx), + db->doorbell); +} + bool bnge_aux_registered(struct bnge_dev *bd); +u16 bnge_aux_get_msix(struct bnge_dev *bd); #endif /* _BNGE_H_ */ diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_core.c b/drivers/net/ethernet/broadcom/bnge/bnge_core.c index 68da656f2894..2c72dd34d50d 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_core.c +++ b/drivers/net/ethernet/broadcom/bnge/bnge_core.c @@ -96,6 +96,16 @@ static void bnge_fw_unregister_dev(struct bnge_dev *bd) bnge_free_ctx_mem(bd); } +static void bnge_set_dflt_rss_hash_type(struct bnge_dev *bd) +{ + bd->rss_hash_cfg = VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4 | + VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4 | + VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6 | + VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6 | + VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV4 | + VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6; +} + static int bnge_fw_register_dev(struct bnge_dev *bd) { int rc; @@ -137,6 +147,8 @@ static int bnge_fw_register_dev(struct bnge_dev *bd) goto err_func_unrgtr; } + bnge_set_dflt_rss_hash_type(bd); + return 0; err_func_unrgtr: @@ -296,6 +308,10 @@ static int bnge_probe_one(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_config_uninit; } +#if BITS_PER_LONG == 32 + spin_lock_init(&bd->db_lock); +#endif + rc = bnge_alloc_irqs(bd); if (rc) { dev_err(&pdev->dev, "Error IRQ allocation rc = %d\n", rc); diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_db.h b/drivers/net/ethernet/broadcom/bnge/bnge_db.h new file mode 100644 index 000000000000..950ed582f1d8 --- /dev/null +++ b/drivers/net/ethernet/broadcom/bnge/bnge_db.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2025 Broadcom */ + +#ifndef _BNGE_DB_H_ +#define _BNGE_DB_H_ + +/* 64-bit doorbell */ +#define DBR_EPOCH_SFT 24 +#define DBR_TOGGLE_SFT 25 +#define DBR_XID_SFT 32 +#define DBR_PATH_L2 (0x1ULL << 56) +#define DBR_VALID (0x1ULL << 58) +#define DBR_TYPE_SQ (0x0ULL << 60) +#define DBR_TYPE_SRQ (0x2ULL << 60) +#define DBR_TYPE_CQ (0x4ULL << 60) +#define DBR_TYPE_CQ_ARMALL (0x6ULL << 60) +#define DBR_TYPE_NQ (0xaULL << 60) +#define DBR_TYPE_NQ_ARM (0xbULL << 60) +#define DBR_TYPE_NQ_MASK (0xeULL << 60) + +struct bnge_db_info { + void __iomem *doorbell; + u64 db_key64; + u32 db_ring_mask; + u32 db_epoch_mask; + u8 db_epoch_shift; +}; + +#define DB_EPOCH(db, idx) (((idx) & (db)->db_epoch_mask) << \ + ((db)->db_epoch_shift)) +#define DB_RING_IDX(db, idx) (((idx) & (db)->db_ring_mask) | \ + DB_EPOCH(db, idx)) + +#endif /* _BNGE_DB_H_ */ diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.c b/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.c index 5c178fade065..198f49b40dbf 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.c +++ b/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.c @@ -6,6 +6,8 @@ #include <linux/mm.h> #include <linux/pci.h> #include <linux/bnxt/hsi.h> +#include <linux/if_vlan.h> +#include <net/netdev_queues.h> #include "bnge.h" #include "bnge_hwrm.h" @@ -701,3 +703,483 @@ qportcfg_exit: bnge_hwrm_req_drop(bd, req); return rc; } + +int bnge_hwrm_vnic_set_hds(struct bnge_net *bn, struct bnge_vnic_info *vnic) +{ + u16 hds_thresh = (u16)bn->netdev->cfg_pending->hds_thresh; + struct hwrm_vnic_plcmodes_cfg_input *req; + struct bnge_dev *bd = bn->bd; + int rc; + + rc = bnge_hwrm_req_init(bd, req, HWRM_VNIC_PLCMODES_CFG); + if (rc) + return rc; + + req->flags = cpu_to_le32(VNIC_PLCMODES_CFG_REQ_FLAGS_JUMBO_PLACEMENT); + req->enables = cpu_to_le32(BNGE_PLC_EN_JUMBO_THRES_VALID); + req->jumbo_thresh = cpu_to_le16(bn->rx_buf_use_size); + + if (bnge_is_agg_reqd(bd)) { + req->flags |= cpu_to_le32(VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV4 | + VNIC_PLCMODES_CFG_REQ_FLAGS_HDS_IPV6); + req->enables |= + cpu_to_le32(BNGE_PLC_EN_HDS_THRES_VALID); + req->hds_threshold = cpu_to_le16(hds_thresh); + } + req->vnic_id = cpu_to_le32(vnic->fw_vnic_id); + return bnge_hwrm_req_send(bd, req); +} + +int bnge_hwrm_vnic_ctx_alloc(struct bnge_dev *bd, + struct bnge_vnic_info *vnic, u16 ctx_idx) +{ + struct hwrm_vnic_rss_cos_lb_ctx_alloc_output *resp; + struct hwrm_vnic_rss_cos_lb_ctx_alloc_input *req; + int rc; + + rc = bnge_hwrm_req_init(bd, req, HWRM_VNIC_RSS_COS_LB_CTX_ALLOC); + if (rc) + return rc; + + resp = bnge_hwrm_req_hold(bd, req); + rc = bnge_hwrm_req_send(bd, req); + if (!rc) + vnic->fw_rss_cos_lb_ctx[ctx_idx] = + le16_to_cpu(resp->rss_cos_lb_ctx_id); + bnge_hwrm_req_drop(bd, req); + + return rc; +} + +static void +__bnge_hwrm_vnic_set_rss(struct bnge_net *bn, + struct hwrm_vnic_rss_cfg_input *req, + struct bnge_vnic_info *vnic) +{ + struct bnge_dev *bd = bn->bd; + + bnge_fill_hw_rss_tbl(bn, vnic); + req->flags |= VNIC_RSS_CFG_REQ_FLAGS_IPSEC_HASH_TYPE_CFG_SUPPORT; + + req->hash_type = cpu_to_le32(bd->rss_hash_cfg); + req->hash_mode_flags = VNIC_RSS_CFG_REQ_HASH_MODE_FLAGS_DEFAULT; + req->ring_grp_tbl_addr = cpu_to_le64(vnic->rss_table_dma_addr); + req->hash_key_tbl_addr = cpu_to_le64(vnic->rss_hash_key_dma_addr); +} + +int bnge_hwrm_vnic_set_rss(struct bnge_net *bn, + struct bnge_vnic_info *vnic, bool set_rss) +{ + struct hwrm_vnic_rss_cfg_input *req; + struct bnge_dev *bd = bn->bd; + dma_addr_t ring_tbl_map; + u32 i, nr_ctxs; + int rc; + + rc = bnge_hwrm_req_init(bd, req, HWRM_VNIC_RSS_CFG); + if (rc) + return rc; + + req->vnic_id = cpu_to_le16(vnic->fw_vnic_id); + if (!set_rss) + return bnge_hwrm_req_send(bd, req); + + __bnge_hwrm_vnic_set_rss(bn, req, vnic); + ring_tbl_map = vnic->rss_table_dma_addr; + nr_ctxs = bnge_cal_nr_rss_ctxs(bd->rx_nr_rings); + + bnge_hwrm_req_hold(bd, req); + for (i = 0; i < nr_ctxs; ring_tbl_map += BNGE_RSS_TABLE_SIZE, i++) { + req->ring_grp_tbl_addr = cpu_to_le64(ring_tbl_map); + req->ring_table_pair_index = i; + req->rss_ctx_idx = cpu_to_le16(vnic->fw_rss_cos_lb_ctx[i]); + rc = bnge_hwrm_req_send(bd, req); + if (rc) + goto exit; + } + +exit: + bnge_hwrm_req_drop(bd, req); + return rc; +} + +int bnge_hwrm_vnic_cfg(struct bnge_net *bn, struct bnge_vnic_info *vnic) +{ + struct bnge_rx_ring_info *rxr = &bn->rx_ring[0]; + struct hwrm_vnic_cfg_input *req; + struct bnge_dev *bd = bn->bd; + int rc; + + rc = bnge_hwrm_req_init(bd, req, HWRM_VNIC_CFG); + if (rc) + return rc; + + req->default_rx_ring_id = + cpu_to_le16(rxr->rx_ring_struct.fw_ring_id); + req->default_cmpl_ring_id = + cpu_to_le16(bnge_cp_ring_for_rx(rxr)); + req->enables = + cpu_to_le32(VNIC_CFG_REQ_ENABLES_DEFAULT_RX_RING_ID | + VNIC_CFG_REQ_ENABLES_DEFAULT_CMPL_RING_ID); + vnic->mru = bd->netdev->mtu + ETH_HLEN + VLAN_HLEN; + req->mru = cpu_to_le16(vnic->mru); + + req->vnic_id = cpu_to_le16(vnic->fw_vnic_id); + + if (bd->flags & BNGE_EN_STRIP_VLAN) + req->flags |= cpu_to_le32(VNIC_CFG_REQ_FLAGS_VLAN_STRIP_MODE); + if (vnic->vnic_id == BNGE_VNIC_DEFAULT && bnge_aux_registered(bd)) + req->flags |= cpu_to_le32(BNGE_VNIC_CFG_ROCE_DUAL_MODE); + + return bnge_hwrm_req_send(bd, req); +} + +void bnge_hwrm_update_rss_hash_cfg(struct bnge_net *bn) +{ + struct bnge_vnic_info *vnic = &bn->vnic_info[BNGE_VNIC_DEFAULT]; + struct hwrm_vnic_rss_qcfg_output *resp; + struct hwrm_vnic_rss_qcfg_input *req; + struct bnge_dev *bd = bn->bd; + + if (bnge_hwrm_req_init(bd, req, HWRM_VNIC_RSS_QCFG)) + return; + + req->vnic_id = cpu_to_le16(vnic->fw_vnic_id); + /* all contexts configured to same hash_type, zero always exists */ + req->rss_ctx_idx = cpu_to_le16(vnic->fw_rss_cos_lb_ctx[0]); + resp = bnge_hwrm_req_hold(bd, req); + if (!bnge_hwrm_req_send(bd, req)) + bd->rss_hash_cfg = + le32_to_cpu(resp->hash_type) ?: bd->rss_hash_cfg; + bnge_hwrm_req_drop(bd, req); +} + +int bnge_hwrm_l2_filter_free(struct bnge_dev *bd, struct bnge_l2_filter *fltr) +{ + struct hwrm_cfa_l2_filter_free_input *req; + int rc; + + rc = bnge_hwrm_req_init(bd, req, HWRM_CFA_L2_FILTER_FREE); + if (rc) + return rc; + + req->l2_filter_id = fltr->base.filter_id; + return bnge_hwrm_req_send(bd, req); +} + +int bnge_hwrm_l2_filter_alloc(struct bnge_dev *bd, struct bnge_l2_filter *fltr) +{ + struct hwrm_cfa_l2_filter_alloc_output *resp; + struct hwrm_cfa_l2_filter_alloc_input *req; + int rc; + + rc = bnge_hwrm_req_init(bd, req, HWRM_CFA_L2_FILTER_ALLOC); + if (rc) + return rc; + + req->flags = cpu_to_le32(CFA_L2_FILTER_ALLOC_REQ_FLAGS_PATH_RX); + + req->flags |= cpu_to_le32(CFA_L2_FILTER_ALLOC_REQ_FLAGS_OUTERMOST); + req->dst_id = cpu_to_le16(fltr->base.fw_vnic_id); + req->enables = + cpu_to_le32(CFA_L2_FILTER_ALLOC_REQ_ENABLES_L2_ADDR | + CFA_L2_FILTER_ALLOC_REQ_ENABLES_DST_ID | + CFA_L2_FILTER_ALLOC_REQ_ENABLES_L2_ADDR_MASK); + ether_addr_copy(req->l2_addr, fltr->l2_key.dst_mac_addr); + eth_broadcast_addr(req->l2_addr_mask); + + if (fltr->l2_key.vlan) { + req->enables |= + cpu_to_le32(CFA_L2_FILTER_ALLOC_REQ_ENABLES_L2_IVLAN | + CFA_L2_FILTER_ALLOC_REQ_ENABLES_L2_IVLAN_MASK | + CFA_L2_FILTER_ALLOC_REQ_ENABLES_NUM_VLANS); + req->num_vlans = 1; + req->l2_ivlan = cpu_to_le16(fltr->l2_key.vlan); + req->l2_ivlan_mask = cpu_to_le16(0xfff); + } + + resp = bnge_hwrm_req_hold(bd, req); + rc = bnge_hwrm_req_send(bd, req); + if (!rc) + fltr->base.filter_id = resp->l2_filter_id; + + bnge_hwrm_req_drop(bd, req); + return rc; +} + +int bnge_hwrm_cfa_l2_set_rx_mask(struct bnge_dev *bd, + struct bnge_vnic_info *vnic) +{ + struct hwrm_cfa_l2_set_rx_mask_input *req; + int rc; + + rc = bnge_hwrm_req_init(bd, req, HWRM_CFA_L2_SET_RX_MASK); + if (rc) + return rc; + + req->vnic_id = cpu_to_le32(vnic->fw_vnic_id); + if (vnic->rx_mask & CFA_L2_SET_RX_MASK_REQ_MASK_MCAST) { + req->num_mc_entries = cpu_to_le32(vnic->mc_list_count); + req->mc_tbl_addr = cpu_to_le64(vnic->mc_list_mapping); + } + req->mask = cpu_to_le32(vnic->rx_mask); + return bnge_hwrm_req_send_silent(bd, req); +} + +int bnge_hwrm_vnic_alloc(struct bnge_dev *bd, struct bnge_vnic_info *vnic, + unsigned int nr_rings) +{ + struct hwrm_vnic_alloc_output *resp; + struct hwrm_vnic_alloc_input *req; + unsigned int i; + int rc; + + rc = bnge_hwrm_req_init(bd, req, HWRM_VNIC_ALLOC); + if (rc) + return rc; + + for (i = 0; i < BNGE_MAX_CTX_PER_VNIC; i++) + vnic->fw_rss_cos_lb_ctx[i] = INVALID_HW_RING_ID; + if (vnic->vnic_id == BNGE_VNIC_DEFAULT) + req->flags = cpu_to_le32(VNIC_ALLOC_REQ_FLAGS_DEFAULT); + + resp = bnge_hwrm_req_hold(bd, req); + rc = bnge_hwrm_req_send(bd, req); + if (!rc) + vnic->fw_vnic_id = le32_to_cpu(resp->vnic_id); + bnge_hwrm_req_drop(bd, req); + return rc; +} + +void bnge_hwrm_vnic_free_one(struct bnge_dev *bd, struct bnge_vnic_info *vnic) +{ + if (vnic->fw_vnic_id != INVALID_HW_RING_ID) { + struct hwrm_vnic_free_input *req; + + if (bnge_hwrm_req_init(bd, req, HWRM_VNIC_FREE)) + return; + + req->vnic_id = cpu_to_le32(vnic->fw_vnic_id); + + bnge_hwrm_req_send(bd, req); + vnic->fw_vnic_id = INVALID_HW_RING_ID; + } +} + +void bnge_hwrm_vnic_ctx_free_one(struct bnge_dev *bd, + struct bnge_vnic_info *vnic, u16 ctx_idx) +{ + struct hwrm_vnic_rss_cos_lb_ctx_free_input *req; + + if (bnge_hwrm_req_init(bd, req, HWRM_VNIC_RSS_COS_LB_CTX_FREE)) + return; + + req->rss_cos_lb_ctx_id = + cpu_to_le16(vnic->fw_rss_cos_lb_ctx[ctx_idx]); + + bnge_hwrm_req_send(bd, req); + vnic->fw_rss_cos_lb_ctx[ctx_idx] = INVALID_HW_RING_ID; +} + +void bnge_hwrm_stat_ctx_free(struct bnge_net *bn) +{ + struct hwrm_stat_ctx_free_input *req; + struct bnge_dev *bd = bn->bd; + int i; + + if (bnge_hwrm_req_init(bd, req, HWRM_STAT_CTX_FREE)) + return; + + bnge_hwrm_req_hold(bd, req); + for (i = 0; i < bd->nq_nr_rings; i++) { + struct bnge_napi *bnapi = bn->bnapi[i]; + struct bnge_nq_ring_info *nqr = &bnapi->nq_ring; + + if (nqr->hw_stats_ctx_id != INVALID_STATS_CTX_ID) { + req->stat_ctx_id = cpu_to_le32(nqr->hw_stats_ctx_id); + bnge_hwrm_req_send(bd, req); + + nqr->hw_stats_ctx_id = INVALID_STATS_CTX_ID; + } + } + bnge_hwrm_req_drop(bd, req); +} + +int bnge_hwrm_stat_ctx_alloc(struct bnge_net *bn) +{ + struct hwrm_stat_ctx_alloc_output *resp; + struct hwrm_stat_ctx_alloc_input *req; + struct bnge_dev *bd = bn->bd; + int rc, i; + + rc = bnge_hwrm_req_init(bd, req, HWRM_STAT_CTX_ALLOC); + if (rc) + return rc; + + req->stats_dma_length = cpu_to_le16(bd->hw_ring_stats_size); + req->update_period_ms = cpu_to_le32(bn->stats_coal_ticks / 1000); + + resp = bnge_hwrm_req_hold(bd, req); + for (i = 0; i < bd->nq_nr_rings; i++) { + struct bnge_napi *bnapi = bn->bnapi[i]; + struct bnge_nq_ring_info *nqr = &bnapi->nq_ring; + + req->stats_dma_addr = cpu_to_le64(nqr->stats.hw_stats_map); + + rc = bnge_hwrm_req_send(bd, req); + if (rc) + break; + + nqr->hw_stats_ctx_id = le32_to_cpu(resp->stat_ctx_id); + bn->grp_info[i].fw_stats_ctx = nqr->hw_stats_ctx_id; + } + bnge_hwrm_req_drop(bd, req); + return rc; +} + +int hwrm_ring_free_send_msg(struct bnge_net *bn, + struct bnge_ring_struct *ring, + u32 ring_type, int cmpl_ring_id) +{ + struct hwrm_ring_free_input *req; + struct bnge_dev *bd = bn->bd; + int rc; + + rc = bnge_hwrm_req_init(bd, req, HWRM_RING_FREE); + if (rc) + goto exit; + + req->cmpl_ring = cpu_to_le16(cmpl_ring_id); + req->ring_type = ring_type; + req->ring_id = cpu_to_le16(ring->fw_ring_id); + + bnge_hwrm_req_hold(bd, req); + rc = bnge_hwrm_req_send(bd, req); + bnge_hwrm_req_drop(bd, req); +exit: + if (rc) { + netdev_err(bd->netdev, "hwrm_ring_free type %d failed. rc:%d\n", ring_type, rc); + return -EIO; + } + return 0; +} + +int hwrm_ring_alloc_send_msg(struct bnge_net *bn, + struct bnge_ring_struct *ring, + u32 ring_type, u32 map_index) +{ + struct bnge_ring_mem_info *rmem = &ring->ring_mem; + struct bnge_ring_grp_info *grp_info; + struct hwrm_ring_alloc_output *resp; + struct hwrm_ring_alloc_input *req; + struct bnge_dev *bd = bn->bd; + u16 ring_id, flags = 0; + int rc; + + rc = bnge_hwrm_req_init(bd, req, HWRM_RING_ALLOC); + if (rc) + goto exit; + + req->enables = 0; + if (rmem->nr_pages > 1) { + req->page_tbl_addr = cpu_to_le64(rmem->dma_pg_tbl); + /* Page size is in log2 units */ + req->page_size = BNGE_PAGE_SHIFT; + req->page_tbl_depth = 1; + } else { + req->page_tbl_addr = cpu_to_le64(rmem->dma_arr[0]); + } + req->fbo = 0; + /* Association of ring index with doorbell index and MSIX number */ + req->logical_id = cpu_to_le16(map_index); + + switch (ring_type) { + case HWRM_RING_ALLOC_TX: { + struct bnge_tx_ring_info *txr; + + txr = container_of(ring, struct bnge_tx_ring_info, + tx_ring_struct); + req->ring_type = RING_ALLOC_REQ_RING_TYPE_TX; + /* Association of transmit ring with completion ring */ + grp_info = &bn->grp_info[ring->grp_idx]; + req->cmpl_ring_id = cpu_to_le16(bnge_cp_ring_for_tx(txr)); + req->length = cpu_to_le32(bn->tx_ring_mask + 1); + req->stat_ctx_id = cpu_to_le32(grp_info->fw_stats_ctx); + req->queue_id = cpu_to_le16(ring->queue_id); + req->flags = cpu_to_le16(flags); + break; + } + case HWRM_RING_ALLOC_RX: + req->ring_type = RING_ALLOC_REQ_RING_TYPE_RX; + req->length = cpu_to_le32(bn->rx_ring_mask + 1); + + /* Association of rx ring with stats context */ + grp_info = &bn->grp_info[ring->grp_idx]; + req->rx_buf_size = cpu_to_le16(bn->rx_buf_use_size); + req->stat_ctx_id = cpu_to_le32(grp_info->fw_stats_ctx); + req->enables |= + cpu_to_le32(RING_ALLOC_REQ_ENABLES_RX_BUF_SIZE_VALID); + if (NET_IP_ALIGN == 2) + flags = RING_ALLOC_REQ_FLAGS_RX_SOP_PAD; + req->flags = cpu_to_le16(flags); + break; + case HWRM_RING_ALLOC_AGG: + req->ring_type = RING_ALLOC_REQ_RING_TYPE_RX_AGG; + /* Association of agg ring with rx ring */ + grp_info = &bn->grp_info[ring->grp_idx]; + req->rx_ring_id = cpu_to_le16(grp_info->rx_fw_ring_id); + req->rx_buf_size = cpu_to_le16(BNGE_RX_PAGE_SIZE); + req->stat_ctx_id = cpu_to_le32(grp_info->fw_stats_ctx); + req->enables |= + cpu_to_le32(RING_ALLOC_REQ_ENABLES_RX_RING_ID_VALID | + RING_ALLOC_REQ_ENABLES_RX_BUF_SIZE_VALID); + req->length = cpu_to_le32(bn->rx_agg_ring_mask + 1); + break; + case HWRM_RING_ALLOC_CMPL: + req->ring_type = RING_ALLOC_REQ_RING_TYPE_L2_CMPL; + req->length = cpu_to_le32(bn->cp_ring_mask + 1); + /* Association of cp ring with nq */ + grp_info = &bn->grp_info[map_index]; + req->nq_ring_id = cpu_to_le16(grp_info->nq_fw_ring_id); + req->cq_handle = cpu_to_le64(ring->handle); + req->enables |= + cpu_to_le32(RING_ALLOC_REQ_ENABLES_NQ_RING_ID_VALID); + break; + case HWRM_RING_ALLOC_NQ: + req->ring_type = RING_ALLOC_REQ_RING_TYPE_NQ; + req->length = cpu_to_le32(bn->cp_ring_mask + 1); + req->int_mode = RING_ALLOC_REQ_INT_MODE_MSIX; + break; + default: + netdev_err(bn->netdev, "hwrm alloc invalid ring type %d\n", ring_type); + return -EINVAL; + } + + resp = bnge_hwrm_req_hold(bd, req); + rc = bnge_hwrm_req_send(bd, req); + ring_id = le16_to_cpu(resp->ring_id); + bnge_hwrm_req_drop(bd, req); + +exit: + if (rc) { + netdev_err(bd->netdev, "hwrm_ring_alloc type %d failed. rc:%d\n", ring_type, rc); + return -EIO; + } + ring->fw_ring_id = ring_id; + return rc; +} + +int bnge_hwrm_set_async_event_cr(struct bnge_dev *bd, int idx) +{ + struct hwrm_func_cfg_input *req; + int rc; + + rc = bnge_hwrm_req_init(bd, req, HWRM_FUNC_CFG); + if (rc) + return rc; + + req->fid = cpu_to_le16(0xffff); + req->enables = cpu_to_le32(FUNC_CFG_REQ_ENABLES_ASYNC_EVENT_CR); + req->async_event_cr = cpu_to_le16(idx); + return bnge_hwrm_req_send(bd, req); +} diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.h b/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.h index 6c03923eb559..042f28e84a05 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.h +++ b/drivers/net/ethernet/broadcom/bnge/bnge_hwrm_lib.h @@ -4,6 +4,13 @@ #ifndef _BNGE_HWRM_LIB_H_ #define _BNGE_HWRM_LIB_H_ +#define BNGE_PLC_EN_JUMBO_THRES_VALID \ + VNIC_PLCMODES_CFG_REQ_ENABLES_JUMBO_THRESH_VALID +#define BNGE_PLC_EN_HDS_THRES_VALID \ + VNIC_PLCMODES_CFG_REQ_ENABLES_HDS_THRESHOLD_VALID +#define BNGE_VNIC_CFG_ROCE_DUAL_MODE \ + VNIC_CFG_REQ_FLAGS_ROCE_DUAL_VNIC_MODE + int bnge_hwrm_ver_get(struct bnge_dev *bd); int bnge_hwrm_func_reset(struct bnge_dev *bd); int bnge_hwrm_fw_set_time(struct bnge_dev *bd); @@ -24,4 +31,28 @@ int bnge_hwrm_func_qcfg(struct bnge_dev *bd); int bnge_hwrm_func_resc_qcaps(struct bnge_dev *bd); int bnge_hwrm_queue_qportcfg(struct bnge_dev *bd); +int bnge_hwrm_vnic_set_hds(struct bnge_net *bn, struct bnge_vnic_info *vnic); +int bnge_hwrm_vnic_ctx_alloc(struct bnge_dev *bd, + struct bnge_vnic_info *vnic, u16 ctx_idx); +int bnge_hwrm_vnic_set_rss(struct bnge_net *bn, + struct bnge_vnic_info *vnic, bool set_rss); +int bnge_hwrm_vnic_cfg(struct bnge_net *bn, struct bnge_vnic_info *vnic); +void bnge_hwrm_update_rss_hash_cfg(struct bnge_net *bn); +int bnge_hwrm_vnic_alloc(struct bnge_dev *bd, struct bnge_vnic_info *vnic, + unsigned int nr_rings); +void bnge_hwrm_vnic_free_one(struct bnge_dev *bd, struct bnge_vnic_info *vnic); +void bnge_hwrm_vnic_ctx_free_one(struct bnge_dev *bd, + struct bnge_vnic_info *vnic, u16 ctx_idx); +int bnge_hwrm_l2_filter_free(struct bnge_dev *bd, struct bnge_l2_filter *fltr); +int bnge_hwrm_l2_filter_alloc(struct bnge_dev *bd, struct bnge_l2_filter *fltr); +int bnge_hwrm_cfa_l2_set_rx_mask(struct bnge_dev *bd, + struct bnge_vnic_info *vnic); +void bnge_hwrm_stat_ctx_free(struct bnge_net *bn); +int bnge_hwrm_stat_ctx_alloc(struct bnge_net *bn); +int hwrm_ring_free_send_msg(struct bnge_net *bn, struct bnge_ring_struct *ring, + u32 ring_type, int cmpl_ring_id); +int hwrm_ring_alloc_send_msg(struct bnge_net *bn, + struct bnge_ring_struct *ring, + u32 ring_type, u32 map_index); +int bnge_hwrm_set_async_event_cr(struct bnge_dev *bd, int idx); #endif /* _BNGE_HWRM_LIB_H_ */ diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c index 02254934f3d0..832eeb960bd2 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c +++ b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.c @@ -14,10 +14,2194 @@ #include <linux/if.h> #include <net/ip.h> #include <linux/skbuff.h> +#include <net/page_pool/helpers.h> #include "bnge.h" #include "bnge_hwrm_lib.h" #include "bnge_ethtool.h" +#include "bnge_rmem.h" + +#define BNGE_RING_TO_TC_OFF(bd, tx) \ + ((tx) % (bd)->tx_nr_rings_per_tc) + +#define BNGE_RING_TO_TC(bd, tx) \ + ((tx) / (bd)->tx_nr_rings_per_tc) + +#define BNGE_TC_TO_RING_BASE(bd, tc) \ + ((tc) * (bd)->tx_nr_rings_per_tc) + +static void bnge_free_stats_mem(struct bnge_net *bn, + struct bnge_stats_mem *stats) +{ + struct bnge_dev *bd = bn->bd; + + if (stats->hw_stats) { + dma_free_coherent(bd->dev, stats->len, stats->hw_stats, + stats->hw_stats_map); + stats->hw_stats = NULL; + } +} + +static int bnge_alloc_stats_mem(struct bnge_net *bn, + struct bnge_stats_mem *stats) +{ + struct bnge_dev *bd = bn->bd; + + stats->hw_stats = dma_alloc_coherent(bd->dev, stats->len, + &stats->hw_stats_map, GFP_KERNEL); + if (!stats->hw_stats) + return -ENOMEM; + + return 0; +} + +static void bnge_free_ring_stats(struct bnge_net *bn) +{ + struct bnge_dev *bd = bn->bd; + int i; + + if (!bn->bnapi) + return; + + for (i = 0; i < bd->nq_nr_rings; i++) { + struct bnge_napi *bnapi = bn->bnapi[i]; + struct bnge_nq_ring_info *nqr = &bnapi->nq_ring; + + bnge_free_stats_mem(bn, &nqr->stats); + } +} + +static int bnge_alloc_ring_stats(struct bnge_net *bn) +{ + struct bnge_dev *bd = bn->bd; + u32 size, i; + int rc; + + size = bd->hw_ring_stats_size; + + for (i = 0; i < bd->nq_nr_rings; i++) { + struct bnge_napi *bnapi = bn->bnapi[i]; + struct bnge_nq_ring_info *nqr = &bnapi->nq_ring; + + nqr->stats.len = size; + rc = bnge_alloc_stats_mem(bn, &nqr->stats); + if (rc) + goto err_free_ring_stats; + + nqr->hw_stats_ctx_id = INVALID_STATS_CTX_ID; + } + return 0; + +err_free_ring_stats: + bnge_free_ring_stats(bn); + return rc; +} + +static void bnge_free_nq_desc_arr(struct bnge_nq_ring_info *nqr) +{ + struct bnge_ring_struct *ring = &nqr->ring_struct; + + kfree(nqr->desc_ring); + nqr->desc_ring = NULL; + ring->ring_mem.pg_arr = NULL; + kfree(nqr->desc_mapping); + nqr->desc_mapping = NULL; + ring->ring_mem.dma_arr = NULL; +} + +static void bnge_free_cp_desc_arr(struct bnge_cp_ring_info *cpr) +{ + struct bnge_ring_struct *ring = &cpr->ring_struct; + + kfree(cpr->desc_ring); + cpr->desc_ring = NULL; + ring->ring_mem.pg_arr = NULL; + kfree(cpr->desc_mapping); + cpr->desc_mapping = NULL; + ring->ring_mem.dma_arr = NULL; +} + +static int bnge_alloc_nq_desc_arr(struct bnge_nq_ring_info *nqr, int n) +{ + nqr->desc_ring = kcalloc(n, sizeof(*nqr->desc_ring), GFP_KERNEL); + if (!nqr->desc_ring) + return -ENOMEM; + + nqr->desc_mapping = kcalloc(n, sizeof(*nqr->desc_mapping), GFP_KERNEL); + if (!nqr->desc_mapping) + goto err_free_desc_ring; + return 0; + +err_free_desc_ring: + kfree(nqr->desc_ring); + nqr->desc_ring = NULL; + return -ENOMEM; +} + +static int bnge_alloc_cp_desc_arr(struct bnge_cp_ring_info *cpr, int n) +{ + cpr->desc_ring = kcalloc(n, sizeof(*cpr->desc_ring), GFP_KERNEL); + if (!cpr->desc_ring) + return -ENOMEM; + + cpr->desc_mapping = kcalloc(n, sizeof(*cpr->desc_mapping), GFP_KERNEL); + if (!cpr->desc_mapping) + goto err_free_desc_ring; + return 0; + +err_free_desc_ring: + kfree(cpr->desc_ring); + cpr->desc_ring = NULL; + return -ENOMEM; +} + +static void bnge_free_nq_arrays(struct bnge_net *bn) +{ + struct bnge_dev *bd = bn->bd; + int i; + + for (i = 0; i < bd->nq_nr_rings; i++) { + struct bnge_napi *bnapi = bn->bnapi[i]; + + bnge_free_nq_desc_arr(&bnapi->nq_ring); + } +} + +static int bnge_alloc_nq_arrays(struct bnge_net *bn) +{ + struct bnge_dev *bd = bn->bd; + int i, rc; + + for (i = 0; i < bd->nq_nr_rings; i++) { + struct bnge_napi *bnapi = bn->bnapi[i]; + + rc = bnge_alloc_nq_desc_arr(&bnapi->nq_ring, bn->cp_nr_pages); + if (rc) + goto err_free_nq_arrays; + } + return 0; + +err_free_nq_arrays: + bnge_free_nq_arrays(bn); + return rc; +} + +static void bnge_free_nq_tree(struct bnge_net *bn) +{ + struct bnge_dev *bd = bn->bd; + int i; + + for (i = 0; i < bd->nq_nr_rings; i++) { + struct bnge_napi *bnapi = bn->bnapi[i]; + struct bnge_nq_ring_info *nqr; + struct bnge_ring_struct *ring; + int j; + + nqr = &bnapi->nq_ring; + ring = &nqr->ring_struct; + + bnge_free_ring(bd, &ring->ring_mem); + + if (!nqr->cp_ring_arr) + continue; + + for (j = 0; j < nqr->cp_ring_count; j++) { + struct bnge_cp_ring_info *cpr = &nqr->cp_ring_arr[j]; + + ring = &cpr->ring_struct; + bnge_free_ring(bd, &ring->ring_mem); + bnge_free_cp_desc_arr(cpr); + } + kfree(nqr->cp_ring_arr); + nqr->cp_ring_arr = NULL; + nqr->cp_ring_count = 0; + } +} + +static int alloc_one_cp_ring(struct bnge_net *bn, + struct bnge_cp_ring_info *cpr) +{ + struct bnge_ring_mem_info *rmem; + struct bnge_ring_struct *ring; + struct bnge_dev *bd = bn->bd; + int rc; + + rc = bnge_alloc_cp_desc_arr(cpr, bn->cp_nr_pages); + if (rc) + return -ENOMEM; + ring = &cpr->ring_struct; + rmem = &ring->ring_mem; + rmem->nr_pages = bn->cp_nr_pages; + rmem->page_size = HW_CMPD_RING_SIZE; + rmem->pg_arr = (void **)cpr->desc_ring; + rmem->dma_arr = cpr->desc_mapping; + rmem->flags = BNGE_RMEM_RING_PTE_FLAG; + rc = bnge_alloc_ring(bd, rmem); + if (rc) + goto err_free_cp_desc_arr; + return rc; + +err_free_cp_desc_arr: + bnge_free_cp_desc_arr(cpr); + return rc; +} + +static int bnge_alloc_nq_tree(struct bnge_net *bn) +{ + struct bnge_dev *bd = bn->bd; + int i, j, ulp_msix, rc; + int tcs = 1; + + ulp_msix = bnge_aux_get_msix(bd); + for (i = 0, j = 0; i < bd->nq_nr_rings; i++) { + bool sh = !!(bd->flags & BNGE_EN_SHARED_CHNL); + struct bnge_napi *bnapi = bn->bnapi[i]; + struct bnge_nq_ring_info *nqr; + struct bnge_cp_ring_info *cpr; + struct bnge_ring_struct *ring; + int cp_count = 0, k; + int rx = 0, tx = 0; + + nqr = &bnapi->nq_ring; + nqr->bnapi = bnapi; + ring = &nqr->ring_struct; + + rc = bnge_alloc_ring(bd, &ring->ring_mem); + if (rc) + goto err_free_nq_tree; + + ring->map_idx = ulp_msix + i; + + if (i < bd->rx_nr_rings) { + cp_count++; + rx = 1; + } + + if ((sh && i < bd->tx_nr_rings) || + (!sh && i >= bd->rx_nr_rings)) { + cp_count += tcs; + tx = 1; + } + + nqr->cp_ring_arr = kcalloc(cp_count, sizeof(*cpr), + GFP_KERNEL); + if (!nqr->cp_ring_arr) { + rc = -ENOMEM; + goto err_free_nq_tree; + } + + nqr->cp_ring_count = cp_count; + + for (k = 0; k < cp_count; k++) { + cpr = &nqr->cp_ring_arr[k]; + rc = alloc_one_cp_ring(bn, cpr); + if (rc) + goto err_free_nq_tree; + + cpr->bnapi = bnapi; + cpr->cp_idx = k; + if (!k && rx) { + bn->rx_ring[i].rx_cpr = cpr; + cpr->cp_ring_type = BNGE_NQ_HDL_TYPE_RX; + } else { + int n, tc = k - rx; + + n = BNGE_TC_TO_RING_BASE(bd, tc) + j; + bn->tx_ring[n].tx_cpr = cpr; + cpr->cp_ring_type = BNGE_NQ_HDL_TYPE_TX; + } + } + if (tx) + j++; + } + return 0; + +err_free_nq_tree: + bnge_free_nq_tree(bn); + return rc; +} + +static bool bnge_separate_head_pool(struct bnge_rx_ring_info *rxr) +{ + return rxr->need_head_pool || PAGE_SIZE > BNGE_RX_PAGE_SIZE; +} + +static void bnge_free_one_rx_ring_bufs(struct bnge_net *bn, + struct bnge_rx_ring_info *rxr) +{ + int i, max_idx; + + if (!rxr->rx_buf_ring) + return; + + max_idx = bn->rx_nr_pages * RX_DESC_CNT; + + for (i = 0; i < max_idx; i++) { + struct bnge_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[i]; + void *data = rx_buf->data; + + if (!data) + continue; + + rx_buf->data = NULL; + page_pool_free_va(rxr->head_pool, data, true); + } +} + +static void bnge_free_one_agg_ring_bufs(struct bnge_net *bn, + struct bnge_rx_ring_info *rxr) +{ + int i, max_idx; + + if (!rxr->rx_agg_buf_ring) + return; + + max_idx = bn->rx_agg_nr_pages * RX_DESC_CNT; + + for (i = 0; i < max_idx; i++) { + struct bnge_sw_rx_agg_bd *rx_agg_buf = &rxr->rx_agg_buf_ring[i]; + netmem_ref netmem = rx_agg_buf->netmem; + + if (!netmem) + continue; + + rx_agg_buf->netmem = 0; + __clear_bit(i, rxr->rx_agg_bmap); + + page_pool_recycle_direct_netmem(rxr->page_pool, netmem); + } +} + +static void bnge_free_one_rx_ring_pair_bufs(struct bnge_net *bn, + struct bnge_rx_ring_info *rxr) +{ + bnge_free_one_rx_ring_bufs(bn, rxr); + bnge_free_one_agg_ring_bufs(bn, rxr); +} + +static void bnge_free_rx_ring_pair_bufs(struct bnge_net *bn) +{ + struct bnge_dev *bd = bn->bd; + int i; + + if (!bn->rx_ring) + return; + + for (i = 0; i < bd->rx_nr_rings; i++) + bnge_free_one_rx_ring_pair_bufs(bn, &bn->rx_ring[i]); +} + +static void bnge_free_all_rings_bufs(struct bnge_net *bn) +{ + bnge_free_rx_ring_pair_bufs(bn); +} + +static void bnge_free_rx_rings(struct bnge_net *bn) +{ + struct bnge_dev *bd = bn->bd; + int i; + + for (i = 0; i < bd->rx_nr_rings; i++) { + struct bnge_rx_ring_info *rxr = &bn->rx_ring[i]; + struct bnge_ring_struct *ring; + + page_pool_destroy(rxr->page_pool); + page_pool_destroy(rxr->head_pool); + rxr->page_pool = rxr->head_pool = NULL; + + kfree(rxr->rx_agg_bmap); + rxr->rx_agg_bmap = NULL; + + ring = &rxr->rx_ring_struct; + bnge_free_ring(bd, &ring->ring_mem); + + ring = &rxr->rx_agg_ring_struct; + bnge_free_ring(bd, &ring->ring_mem); + } +} + +static int bnge_alloc_rx_page_pool(struct bnge_net *bn, + struct bnge_rx_ring_info *rxr, + int numa_node) +{ + const unsigned int agg_size_fac = PAGE_SIZE / BNGE_RX_PAGE_SIZE; + const unsigned int rx_size_fac = PAGE_SIZE / SZ_4K; + struct page_pool_params pp = { 0 }; + struct bnge_dev *bd = bn->bd; + struct page_pool *pool; + + pp.pool_size = bn->rx_agg_ring_size / agg_size_fac; + pp.nid = numa_node; + pp.netdev = bn->netdev; + pp.dev = bd->dev; + pp.dma_dir = bn->rx_dir; + pp.max_len = PAGE_SIZE; + pp.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV | + PP_FLAG_ALLOW_UNREADABLE_NETMEM; + pp.queue_idx = rxr->bnapi->index; + + pool = page_pool_create(&pp); + if (IS_ERR(pool)) + return PTR_ERR(pool); + rxr->page_pool = pool; + + rxr->need_head_pool = page_pool_is_unreadable(pool); + if (bnge_separate_head_pool(rxr)) { + pp.pool_size = min(bn->rx_ring_size / rx_size_fac, 1024); + pp.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; + pool = page_pool_create(&pp); + if (IS_ERR(pool)) + goto err_destroy_pp; + } else { + page_pool_get(pool); + } + rxr->head_pool = pool; + return 0; + +err_destroy_pp: + page_pool_destroy(rxr->page_pool); + rxr->page_pool = NULL; + return PTR_ERR(pool); +} + +static void bnge_enable_rx_page_pool(struct bnge_rx_ring_info *rxr) +{ + page_pool_enable_direct_recycling(rxr->head_pool, &rxr->bnapi->napi); + page_pool_enable_direct_recycling(rxr->page_pool, &rxr->bnapi->napi); +} + +static int bnge_alloc_rx_agg_bmap(struct bnge_net *bn, + struct bnge_rx_ring_info *rxr) +{ + u16 mem_size; + + rxr->rx_agg_bmap_size = bn->rx_agg_ring_mask + 1; + mem_size = rxr->rx_agg_bmap_size / 8; + rxr->rx_agg_bmap = kzalloc(mem_size, GFP_KERNEL); + if (!rxr->rx_agg_bmap) + return -ENOMEM; + + return 0; +} + +static int bnge_alloc_rx_rings(struct bnge_net *bn) +{ + int i, rc = 0, agg_rings = 0, cpu; + struct bnge_dev *bd = bn->bd; + + if (bnge_is_agg_reqd(bd)) + agg_rings = 1; + + for (i = 0; i < bd->rx_nr_rings; i++) { + struct bnge_rx_ring_info *rxr = &bn->rx_ring[i]; + struct bnge_ring_struct *ring; + int cpu_node; + + ring = &rxr->rx_ring_struct; + + cpu = cpumask_local_spread(i, dev_to_node(bd->dev)); + cpu_node = cpu_to_node(cpu); + netdev_dbg(bn->netdev, "Allocating page pool for rx_ring[%d] on numa_node: %d\n", + i, cpu_node); + rc = bnge_alloc_rx_page_pool(bn, rxr, cpu_node); + if (rc) + goto err_free_rx_rings; + bnge_enable_rx_page_pool(rxr); + + rc = bnge_alloc_ring(bd, &ring->ring_mem); + if (rc) + goto err_free_rx_rings; + + ring->grp_idx = i; + if (agg_rings) { + ring = &rxr->rx_agg_ring_struct; + rc = bnge_alloc_ring(bd, &ring->ring_mem); + if (rc) + goto err_free_rx_rings; + + ring->grp_idx = i; + rc = bnge_alloc_rx_agg_bmap(bn, rxr); + if (rc) + goto err_free_rx_rings; + } + } + return rc; + +err_free_rx_rings: + bnge_free_rx_rings(bn); + return rc; +} + +static void bnge_free_tx_rings(struct bnge_net *bn) +{ + struct bnge_dev *bd = bn->bd; + int i; + + for (i = 0; i < bd->tx_nr_rings; i++) { + struct bnge_tx_ring_info *txr = &bn->tx_ring[i]; + struct bnge_ring_struct *ring; + + ring = &txr->tx_ring_struct; + + bnge_free_ring(bd, &ring->ring_mem); + } +} + +static int bnge_alloc_tx_rings(struct bnge_net *bn) +{ + struct bnge_dev *bd = bn->bd; + int i, j, rc; + + for (i = 0, j = 0; i < bd->tx_nr_rings; i++) { + struct bnge_tx_ring_info *txr = &bn->tx_ring[i]; + struct bnge_ring_struct *ring; + u8 qidx; + + ring = &txr->tx_ring_struct; + + rc = bnge_alloc_ring(bd, &ring->ring_mem); + if (rc) + goto err_free_tx_rings; + + ring->grp_idx = txr->bnapi->index; + qidx = bd->tc_to_qidx[j]; + ring->queue_id = bd->q_info[qidx].queue_id; + if (BNGE_RING_TO_TC_OFF(bd, i) == (bd->tx_nr_rings_per_tc - 1)) + j++; + } + return 0; + +err_free_tx_rings: + bnge_free_tx_rings(bn); + return rc; +} + +static void bnge_free_vnic_attributes(struct bnge_net *bn) +{ + struct pci_dev *pdev = bn->bd->pdev; + struct bnge_vnic_info *vnic; + int i; + + if (!bn->vnic_info) + return; + + for (i = 0; i < bn->nr_vnics; i++) { + vnic = &bn->vnic_info[i]; + + kfree(vnic->uc_list); + vnic->uc_list = NULL; + + if (vnic->mc_list) { + dma_free_coherent(&pdev->dev, vnic->mc_list_size, + vnic->mc_list, vnic->mc_list_mapping); + vnic->mc_list = NULL; + } + + if (vnic->rss_table) { + dma_free_coherent(&pdev->dev, vnic->rss_table_size, + vnic->rss_table, + vnic->rss_table_dma_addr); + vnic->rss_table = NULL; + } + + vnic->rss_hash_key = NULL; + vnic->flags = 0; + } +} + +static int bnge_alloc_vnic_attributes(struct bnge_net *bn) +{ + struct bnge_dev *bd = bn->bd; + struct bnge_vnic_info *vnic; + int i, size; + + for (i = 0; i < bn->nr_vnics; i++) { + vnic = &bn->vnic_info[i]; + + if (vnic->flags & BNGE_VNIC_UCAST_FLAG) { + int mem_size = (BNGE_MAX_UC_ADDRS - 1) * ETH_ALEN; + + vnic->uc_list = kmalloc(mem_size, GFP_KERNEL); + if (!vnic->uc_list) + goto err_free_vnic_attributes; + } + + if (vnic->flags & BNGE_VNIC_MCAST_FLAG) { + vnic->mc_list_size = BNGE_MAX_MC_ADDRS * ETH_ALEN; + vnic->mc_list = + dma_alloc_coherent(bd->dev, + vnic->mc_list_size, + &vnic->mc_list_mapping, + GFP_KERNEL); + if (!vnic->mc_list) + goto err_free_vnic_attributes; + } + + /* Allocate rss table and hash key */ + size = L1_CACHE_ALIGN(BNGE_MAX_RSS_TABLE_SIZE); + + vnic->rss_table_size = size + HW_HASH_KEY_SIZE; + vnic->rss_table = dma_alloc_coherent(bd->dev, + vnic->rss_table_size, + &vnic->rss_table_dma_addr, + GFP_KERNEL); + if (!vnic->rss_table) + goto err_free_vnic_attributes; + + vnic->rss_hash_key = ((void *)vnic->rss_table) + size; + vnic->rss_hash_key_dma_addr = vnic->rss_table_dma_addr + size; + } + return 0; + +err_free_vnic_attributes: + bnge_free_vnic_attributes(bn); + return -ENOMEM; +} + +static int bnge_alloc_vnics(struct bnge_net *bn) +{ + int num_vnics; + + /* Allocate only 1 VNIC for now + * Additional VNICs will be added based on RFS/NTUPLE in future patches + */ + num_vnics = 1; + + bn->vnic_info = kcalloc(num_vnics, sizeof(struct bnge_vnic_info), + GFP_KERNEL); + if (!bn->vnic_info) + return -ENOMEM; + + bn->nr_vnics = num_vnics; + + return 0; +} + +static void bnge_free_vnics(struct bnge_net *bn) +{ + kfree(bn->vnic_info); + bn->vnic_info = NULL; + bn->nr_vnics = 0; +} + +static void bnge_free_ring_grps(struct bnge_net *bn) +{ + kfree(bn->grp_info); + bn->grp_info = NULL; +} + +static int bnge_init_ring_grps(struct bnge_net *bn) +{ + struct bnge_dev *bd = bn->bd; + int i; + + bn->grp_info = kcalloc(bd->nq_nr_rings, + sizeof(struct bnge_ring_grp_info), + GFP_KERNEL); + if (!bn->grp_info) + return -ENOMEM; + for (i = 0; i < bd->nq_nr_rings; i++) { + bn->grp_info[i].fw_stats_ctx = INVALID_HW_RING_ID; + bn->grp_info[i].fw_grp_id = INVALID_HW_RING_ID; + bn->grp_info[i].rx_fw_ring_id = INVALID_HW_RING_ID; + bn->grp_info[i].agg_fw_ring_id = INVALID_HW_RING_ID; + bn->grp_info[i].nq_fw_ring_id = INVALID_HW_RING_ID; + } + + return 0; +} + +static void bnge_free_core(struct bnge_net *bn) +{ + bnge_free_vnic_attributes(bn); + bnge_free_tx_rings(bn); + bnge_free_rx_rings(bn); + bnge_free_nq_tree(bn); + bnge_free_nq_arrays(bn); + bnge_free_ring_stats(bn); + bnge_free_ring_grps(bn); + bnge_free_vnics(bn); + kfree(bn->tx_ring_map); + bn->tx_ring_map = NULL; + kfree(bn->tx_ring); + bn->tx_ring = NULL; + kfree(bn->rx_ring); + bn->rx_ring = NULL; + kfree(bn->bnapi); + bn->bnapi = NULL; +} + +static int bnge_alloc_core(struct bnge_net *bn) +{ + struct bnge_dev *bd = bn->bd; + int i, j, size, arr_size; + int rc = -ENOMEM; + void *bnapi; + + arr_size = L1_CACHE_ALIGN(sizeof(struct bnge_napi *) * + bd->nq_nr_rings); + size = L1_CACHE_ALIGN(sizeof(struct bnge_napi)); + bnapi = kzalloc(arr_size + size * bd->nq_nr_rings, GFP_KERNEL); + if (!bnapi) + return rc; + + bn->bnapi = bnapi; + bnapi += arr_size; + for (i = 0; i < bd->nq_nr_rings; i++, bnapi += size) { + struct bnge_nq_ring_info *nqr; + + bn->bnapi[i] = bnapi; + bn->bnapi[i]->index = i; + bn->bnapi[i]->bn = bn; + nqr = &bn->bnapi[i]->nq_ring; + nqr->ring_struct.ring_mem.flags = BNGE_RMEM_RING_PTE_FLAG; + } + + bn->rx_ring = kcalloc(bd->rx_nr_rings, + sizeof(struct bnge_rx_ring_info), + GFP_KERNEL); + if (!bn->rx_ring) + goto err_free_core; + + for (i = 0; i < bd->rx_nr_rings; i++) { + struct bnge_rx_ring_info *rxr = &bn->rx_ring[i]; + + rxr->rx_ring_struct.ring_mem.flags = + BNGE_RMEM_RING_PTE_FLAG; + rxr->rx_agg_ring_struct.ring_mem.flags = + BNGE_RMEM_RING_PTE_FLAG; + rxr->bnapi = bn->bnapi[i]; + bn->bnapi[i]->rx_ring = &bn->rx_ring[i]; + } + + bn->tx_ring = kcalloc(bd->tx_nr_rings, + sizeof(struct bnge_tx_ring_info), + GFP_KERNEL); + if (!bn->tx_ring) + goto err_free_core; + + bn->tx_ring_map = kcalloc(bd->tx_nr_rings, sizeof(u16), + GFP_KERNEL); + if (!bn->tx_ring_map) + goto err_free_core; + + if (bd->flags & BNGE_EN_SHARED_CHNL) + j = 0; + else + j = bd->rx_nr_rings; + + for (i = 0; i < bd->tx_nr_rings; i++) { + struct bnge_tx_ring_info *txr = &bn->tx_ring[i]; + struct bnge_napi *bnapi2; + int k; + + txr->tx_ring_struct.ring_mem.flags = BNGE_RMEM_RING_PTE_FLAG; + bn->tx_ring_map[i] = i; + k = j + BNGE_RING_TO_TC_OFF(bd, i); + + bnapi2 = bn->bnapi[k]; + txr->txq_index = i; + txr->tx_napi_idx = + BNGE_RING_TO_TC(bd, txr->txq_index); + bnapi2->tx_ring[txr->tx_napi_idx] = txr; + txr->bnapi = bnapi2; + } + + rc = bnge_alloc_ring_stats(bn); + if (rc) + goto err_free_core; + + rc = bnge_alloc_vnics(bn); + if (rc) + goto err_free_core; + + rc = bnge_alloc_nq_arrays(bn); + if (rc) + goto err_free_core; + + bnge_init_ring_struct(bn); + + rc = bnge_alloc_rx_rings(bn); + if (rc) + goto err_free_core; + + rc = bnge_alloc_tx_rings(bn); + if (rc) + goto err_free_core; + + rc = bnge_alloc_nq_tree(bn); + if (rc) + goto err_free_core; + + bn->vnic_info[BNGE_VNIC_DEFAULT].flags |= BNGE_VNIC_RSS_FLAG | + BNGE_VNIC_MCAST_FLAG | + BNGE_VNIC_UCAST_FLAG; + rc = bnge_alloc_vnic_attributes(bn); + if (rc) + goto err_free_core; + return 0; + +err_free_core: + bnge_free_core(bn); + return rc; +} + +u16 bnge_cp_ring_for_rx(struct bnge_rx_ring_info *rxr) +{ + return rxr->rx_cpr->ring_struct.fw_ring_id; +} + +u16 bnge_cp_ring_for_tx(struct bnge_tx_ring_info *txr) +{ + return txr->tx_cpr->ring_struct.fw_ring_id; +} + +static void bnge_db_nq(struct bnge_net *bn, struct bnge_db_info *db, u32 idx) +{ + bnge_writeq(bn->bd, db->db_key64 | DBR_TYPE_NQ_MASK | + DB_RING_IDX(db, idx), db->doorbell); +} + +static void bnge_db_cq(struct bnge_net *bn, struct bnge_db_info *db, u32 idx) +{ + bnge_writeq(bn->bd, db->db_key64 | DBR_TYPE_CQ_ARMALL | + DB_RING_IDX(db, idx), db->doorbell); +} + +static int bnge_cp_num_to_irq_num(struct bnge_net *bn, int n) +{ + struct bnge_napi *bnapi = bn->bnapi[n]; + struct bnge_nq_ring_info *nqr; + + nqr = &bnapi->nq_ring; + + return nqr->ring_struct.map_idx; +} + +static irqreturn_t bnge_msix(int irq, void *dev_instance) +{ + /* NAPI scheduling to be added in a future patch */ + return IRQ_HANDLED; +} + +static void bnge_init_nq_tree(struct bnge_net *bn) +{ + struct bnge_dev *bd = bn->bd; + int i, j; + + for (i = 0; i < bd->nq_nr_rings; i++) { + struct bnge_nq_ring_info *nqr = &bn->bnapi[i]->nq_ring; + struct bnge_ring_struct *ring = &nqr->ring_struct; + + ring->fw_ring_id = INVALID_HW_RING_ID; + for (j = 0; j < nqr->cp_ring_count; j++) { + struct bnge_cp_ring_info *cpr = &nqr->cp_ring_arr[j]; + + ring = &cpr->ring_struct; + ring->fw_ring_id = INVALID_HW_RING_ID; + } + } +} + +static netmem_ref __bnge_alloc_rx_netmem(struct bnge_net *bn, + dma_addr_t *mapping, + struct bnge_rx_ring_info *rxr, + unsigned int *offset, + gfp_t gfp) +{ + netmem_ref netmem; + + if (PAGE_SIZE > BNGE_RX_PAGE_SIZE) { + netmem = page_pool_alloc_frag_netmem(rxr->page_pool, offset, + BNGE_RX_PAGE_SIZE, gfp); + } else { + netmem = page_pool_alloc_netmems(rxr->page_pool, gfp); + *offset = 0; + } + if (!netmem) + return 0; + + *mapping = page_pool_get_dma_addr_netmem(netmem) + *offset; + return netmem; +} + +static u8 *__bnge_alloc_rx_frag(struct bnge_net *bn, dma_addr_t *mapping, + struct bnge_rx_ring_info *rxr, + gfp_t gfp) +{ + unsigned int offset; + struct page *page; + + page = page_pool_alloc_frag(rxr->head_pool, &offset, + bn->rx_buf_size, gfp); + if (!page) + return NULL; + + *mapping = page_pool_get_dma_addr(page) + bn->rx_dma_offset + offset; + return page_address(page) + offset; +} + +static int bnge_alloc_rx_data(struct bnge_net *bn, + struct bnge_rx_ring_info *rxr, + u16 prod, gfp_t gfp) +{ + struct bnge_sw_rx_bd *rx_buf = &rxr->rx_buf_ring[RING_RX(bn, prod)]; + struct rx_bd *rxbd; + dma_addr_t mapping; + u8 *data; + + rxbd = &rxr->rx_desc_ring[RX_RING(bn, prod)][RX_IDX(prod)]; + data = __bnge_alloc_rx_frag(bn, &mapping, rxr, gfp); + if (!data) + return -ENOMEM; + + rx_buf->data = data; + rx_buf->data_ptr = data + bn->rx_offset; + rx_buf->mapping = mapping; + + rxbd->rx_bd_haddr = cpu_to_le64(mapping); + + return 0; +} + +static int bnge_alloc_one_rx_ring_bufs(struct bnge_net *bn, + struct bnge_rx_ring_info *rxr, + int ring_nr) +{ + u32 prod = rxr->rx_prod; + int i, rc = 0; + + for (i = 0; i < bn->rx_ring_size; i++) { + rc = bnge_alloc_rx_data(bn, rxr, prod, GFP_KERNEL); + if (rc) + break; + prod = NEXT_RX(prod); + } + + /* Abort if not a single buffer can be allocated */ + if (rc && !i) { + netdev_err(bn->netdev, + "RX ring %d: allocated %d/%d buffers, abort\n", + ring_nr, i, bn->rx_ring_size); + return rc; + } + + rxr->rx_prod = prod; + + if (i < bn->rx_ring_size) + netdev_warn(bn->netdev, + "RX ring %d: allocated %d/%d buffers, continuing\n", + ring_nr, i, bn->rx_ring_size); + return 0; +} + +static u16 bnge_find_next_agg_idx(struct bnge_rx_ring_info *rxr, u16 idx) +{ + u16 next, max = rxr->rx_agg_bmap_size; + + next = find_next_zero_bit(rxr->rx_agg_bmap, max, idx); + if (next >= max) + next = find_first_zero_bit(rxr->rx_agg_bmap, max); + return next; +} + +static int bnge_alloc_rx_netmem(struct bnge_net *bn, + struct bnge_rx_ring_info *rxr, + u16 prod, gfp_t gfp) +{ + struct bnge_sw_rx_agg_bd *rx_agg_buf; + u16 sw_prod = rxr->rx_sw_agg_prod; + unsigned int offset = 0; + struct rx_bd *rxbd; + dma_addr_t mapping; + netmem_ref netmem; + + rxbd = &rxr->rx_agg_desc_ring[RX_AGG_RING(bn, prod)][RX_IDX(prod)]; + netmem = __bnge_alloc_rx_netmem(bn, &mapping, rxr, &offset, gfp); + if (!netmem) + return -ENOMEM; + + if (unlikely(test_bit(sw_prod, rxr->rx_agg_bmap))) + sw_prod = bnge_find_next_agg_idx(rxr, sw_prod); + + __set_bit(sw_prod, rxr->rx_agg_bmap); + rx_agg_buf = &rxr->rx_agg_buf_ring[sw_prod]; + rxr->rx_sw_agg_prod = RING_RX_AGG(bn, NEXT_RX_AGG(sw_prod)); + + rx_agg_buf->netmem = netmem; + rx_agg_buf->offset = offset; + rx_agg_buf->mapping = mapping; + rxbd->rx_bd_haddr = cpu_to_le64(mapping); + rxbd->rx_bd_opaque = sw_prod; + return 0; +} + +static int bnge_alloc_one_agg_ring_bufs(struct bnge_net *bn, + struct bnge_rx_ring_info *rxr, + int ring_nr) +{ + u32 prod = rxr->rx_agg_prod; + int i, rc = 0; + + for (i = 0; i < bn->rx_agg_ring_size; i++) { + rc = bnge_alloc_rx_netmem(bn, rxr, prod, GFP_KERNEL); + if (rc) + break; + prod = NEXT_RX_AGG(prod); + } + + if (rc && i < MAX_SKB_FRAGS) { + netdev_err(bn->netdev, + "Agg ring %d: allocated %d/%d buffers (min %d), abort\n", + ring_nr, i, bn->rx_agg_ring_size, MAX_SKB_FRAGS); + goto err_free_one_agg_ring_bufs; + } + + rxr->rx_agg_prod = prod; + + if (i < bn->rx_agg_ring_size) + netdev_warn(bn->netdev, + "Agg ring %d: allocated %d/%d buffers, continuing\n", + ring_nr, i, bn->rx_agg_ring_size); + return 0; + +err_free_one_agg_ring_bufs: + bnge_free_one_agg_ring_bufs(bn, rxr); + return -ENOMEM; +} + +static int bnge_alloc_one_rx_ring_pair_bufs(struct bnge_net *bn, int ring_nr) +{ + struct bnge_rx_ring_info *rxr = &bn->rx_ring[ring_nr]; + int rc; + + rc = bnge_alloc_one_rx_ring_bufs(bn, rxr, ring_nr); + if (rc) + return rc; + + if (bnge_is_agg_reqd(bn->bd)) { + rc = bnge_alloc_one_agg_ring_bufs(bn, rxr, ring_nr); + if (rc) + goto err_free_one_rx_ring_bufs; + } + return 0; + +err_free_one_rx_ring_bufs: + bnge_free_one_rx_ring_bufs(bn, rxr); + return rc; +} + +static void bnge_init_rxbd_pages(struct bnge_ring_struct *ring, u32 type) +{ + struct rx_bd **rx_desc_ring; + u32 prod; + int i; + + rx_desc_ring = (struct rx_bd **)ring->ring_mem.pg_arr; + for (i = 0, prod = 0; i < ring->ring_mem.nr_pages; i++) { + struct rx_bd *rxbd = rx_desc_ring[i]; + int j; + + for (j = 0; j < RX_DESC_CNT; j++, rxbd++, prod++) { + rxbd->rx_bd_len_flags_type = cpu_to_le32(type); + rxbd->rx_bd_opaque = prod; + } + } +} + +static void bnge_init_one_rx_ring_rxbd(struct bnge_net *bn, + struct bnge_rx_ring_info *rxr) +{ + struct bnge_ring_struct *ring; + u32 type; + + type = (bn->rx_buf_use_size << RX_BD_LEN_SHIFT) | + RX_BD_TYPE_RX_PACKET_BD | RX_BD_FLAGS_EOP; + + if (NET_IP_ALIGN == 2) + type |= RX_BD_FLAGS_SOP; + + ring = &rxr->rx_ring_struct; + bnge_init_rxbd_pages(ring, type); + ring->fw_ring_id = INVALID_HW_RING_ID; +} + +static void bnge_init_one_agg_ring_rxbd(struct bnge_net *bn, + struct bnge_rx_ring_info *rxr) +{ + struct bnge_ring_struct *ring; + u32 type; + + ring = &rxr->rx_agg_ring_struct; + ring->fw_ring_id = INVALID_HW_RING_ID; + if (bnge_is_agg_reqd(bn->bd)) { + type = ((u32)BNGE_RX_PAGE_SIZE << RX_BD_LEN_SHIFT) | + RX_BD_TYPE_RX_AGG_BD | RX_BD_FLAGS_SOP; + + bnge_init_rxbd_pages(ring, type); + } +} + +static void bnge_init_one_rx_ring_pair(struct bnge_net *bn, int ring_nr) +{ + struct bnge_rx_ring_info *rxr; + + rxr = &bn->rx_ring[ring_nr]; + bnge_init_one_rx_ring_rxbd(bn, rxr); + + netif_queue_set_napi(bn->netdev, ring_nr, NETDEV_QUEUE_TYPE_RX, + &rxr->bnapi->napi); + + bnge_init_one_agg_ring_rxbd(bn, rxr); +} + +static int bnge_alloc_rx_ring_pair_bufs(struct bnge_net *bn) +{ + int i, rc; + + for (i = 0; i < bn->bd->rx_nr_rings; i++) { + rc = bnge_alloc_one_rx_ring_pair_bufs(bn, i); + if (rc) + goto err_free_rx_ring_pair_bufs; + } + return 0; + +err_free_rx_ring_pair_bufs: + bnge_free_rx_ring_pair_bufs(bn); + return rc; +} + +static void bnge_init_rx_rings(struct bnge_net *bn) +{ + int i; + +#define BNGE_RX_OFFSET (NET_SKB_PAD + NET_IP_ALIGN) +#define BNGE_RX_DMA_OFFSET NET_SKB_PAD + bn->rx_offset = BNGE_RX_OFFSET; + bn->rx_dma_offset = BNGE_RX_DMA_OFFSET; + + for (i = 0; i < bn->bd->rx_nr_rings; i++) + bnge_init_one_rx_ring_pair(bn, i); +} + +static void bnge_init_tx_rings(struct bnge_net *bn) +{ + int i; + + bn->tx_wake_thresh = max(bn->tx_ring_size / 2, BNGE_MIN_TX_DESC_CNT); + + for (i = 0; i < bn->bd->tx_nr_rings; i++) { + struct bnge_tx_ring_info *txr = &bn->tx_ring[i]; + struct bnge_ring_struct *ring = &txr->tx_ring_struct; + + ring->fw_ring_id = INVALID_HW_RING_ID; + + netif_queue_set_napi(bn->netdev, i, NETDEV_QUEUE_TYPE_TX, + &txr->bnapi->napi); + } +} + +static void bnge_init_vnics(struct bnge_net *bn) +{ + struct bnge_vnic_info *vnic0 = &bn->vnic_info[BNGE_VNIC_DEFAULT]; + int i; + + for (i = 0; i < bn->nr_vnics; i++) { + struct bnge_vnic_info *vnic = &bn->vnic_info[i]; + int j; + + vnic->fw_vnic_id = INVALID_HW_RING_ID; + vnic->vnic_id = i; + for (j = 0; j < BNGE_MAX_CTX_PER_VNIC; j++) + vnic->fw_rss_cos_lb_ctx[j] = INVALID_HW_RING_ID; + + if (bn->vnic_info[i].rss_hash_key) { + if (i == BNGE_VNIC_DEFAULT) { + u8 *key = (void *)vnic->rss_hash_key; + int k; + + if (!bn->rss_hash_key_valid && + !bn->rss_hash_key_updated) { + get_random_bytes(bn->rss_hash_key, + HW_HASH_KEY_SIZE); + bn->rss_hash_key_updated = true; + } + + memcpy(vnic->rss_hash_key, bn->rss_hash_key, + HW_HASH_KEY_SIZE); + + if (!bn->rss_hash_key_updated) + continue; + + bn->rss_hash_key_updated = false; + bn->rss_hash_key_valid = true; + + bn->toeplitz_prefix = 0; + for (k = 0; k < 8; k++) { + bn->toeplitz_prefix <<= 8; + bn->toeplitz_prefix |= key[k]; + } + } else { + memcpy(vnic->rss_hash_key, vnic0->rss_hash_key, + HW_HASH_KEY_SIZE); + } + } + } +} + +static void bnge_set_db_mask(struct bnge_net *bn, struct bnge_db_info *db, + u32 ring_type) +{ + switch (ring_type) { + case HWRM_RING_ALLOC_TX: + db->db_ring_mask = bn->tx_ring_mask; + break; + case HWRM_RING_ALLOC_RX: + db->db_ring_mask = bn->rx_ring_mask; + break; + case HWRM_RING_ALLOC_AGG: + db->db_ring_mask = bn->rx_agg_ring_mask; + break; + case HWRM_RING_ALLOC_CMPL: + case HWRM_RING_ALLOC_NQ: + db->db_ring_mask = bn->cp_ring_mask; + break; + } + db->db_epoch_mask = db->db_ring_mask + 1; + db->db_epoch_shift = DBR_EPOCH_SFT - ilog2(db->db_epoch_mask); +} + +static void bnge_set_db(struct bnge_net *bn, struct bnge_db_info *db, + u32 ring_type, u32 map_idx, u32 xid) +{ + struct bnge_dev *bd = bn->bd; + + switch (ring_type) { + case HWRM_RING_ALLOC_TX: + db->db_key64 = DBR_PATH_L2 | DBR_TYPE_SQ; + break; + case HWRM_RING_ALLOC_RX: + case HWRM_RING_ALLOC_AGG: + db->db_key64 = DBR_PATH_L2 | DBR_TYPE_SRQ; + break; + case HWRM_RING_ALLOC_CMPL: + db->db_key64 = DBR_PATH_L2; + break; + case HWRM_RING_ALLOC_NQ: + db->db_key64 = DBR_PATH_L2; + break; + } + db->db_key64 |= ((u64)xid << DBR_XID_SFT) | DBR_VALID; + + db->doorbell = bd->bar1 + bd->db_offset; + bnge_set_db_mask(bn, db, ring_type); +} + +static int bnge_hwrm_cp_ring_alloc(struct bnge_net *bn, + struct bnge_cp_ring_info *cpr) +{ + const u32 type = HWRM_RING_ALLOC_CMPL; + struct bnge_napi *bnapi = cpr->bnapi; + struct bnge_ring_struct *ring; + u32 map_idx = bnapi->index; + int rc; + + ring = &cpr->ring_struct; + ring->handle = BNGE_SET_NQ_HDL(cpr); + rc = hwrm_ring_alloc_send_msg(bn, ring, type, map_idx); + if (rc) + return rc; + + bnge_set_db(bn, &cpr->cp_db, type, map_idx, ring->fw_ring_id); + bnge_db_cq(bn, &cpr->cp_db, cpr->cp_raw_cons); + + return 0; +} + +static int bnge_hwrm_tx_ring_alloc(struct bnge_net *bn, + struct bnge_tx_ring_info *txr, u32 tx_idx) +{ + struct bnge_ring_struct *ring = &txr->tx_ring_struct; + const u32 type = HWRM_RING_ALLOC_TX; + int rc; + + rc = hwrm_ring_alloc_send_msg(bn, ring, type, tx_idx); + if (rc) + return rc; + + bnge_set_db(bn, &txr->tx_db, type, tx_idx, ring->fw_ring_id); + + return 0; +} + +static int bnge_hwrm_rx_agg_ring_alloc(struct bnge_net *bn, + struct bnge_rx_ring_info *rxr) +{ + struct bnge_ring_struct *ring = &rxr->rx_agg_ring_struct; + u32 type = HWRM_RING_ALLOC_AGG; + struct bnge_dev *bd = bn->bd; + u32 grp_idx = ring->grp_idx; + u32 map_idx; + int rc; + + map_idx = grp_idx + bd->rx_nr_rings; + rc = hwrm_ring_alloc_send_msg(bn, ring, type, map_idx); + if (rc) + return rc; + + bnge_set_db(bn, &rxr->rx_agg_db, type, map_idx, + ring->fw_ring_id); + bnge_db_write(bn->bd, &rxr->rx_agg_db, rxr->rx_agg_prod); + bnge_db_write(bn->bd, &rxr->rx_db, rxr->rx_prod); + bn->grp_info[grp_idx].agg_fw_ring_id = ring->fw_ring_id; + + return 0; +} + +static int bnge_hwrm_rx_ring_alloc(struct bnge_net *bn, + struct bnge_rx_ring_info *rxr) +{ + struct bnge_ring_struct *ring = &rxr->rx_ring_struct; + struct bnge_napi *bnapi = rxr->bnapi; + u32 type = HWRM_RING_ALLOC_RX; + u32 map_idx = bnapi->index; + int rc; + + rc = hwrm_ring_alloc_send_msg(bn, ring, type, map_idx); + if (rc) + return rc; + + bnge_set_db(bn, &rxr->rx_db, type, map_idx, ring->fw_ring_id); + bn->grp_info[map_idx].rx_fw_ring_id = ring->fw_ring_id; + + return 0; +} + +static int bnge_hwrm_ring_alloc(struct bnge_net *bn) +{ + struct bnge_dev *bd = bn->bd; + bool agg_rings; + int i, rc = 0; + + agg_rings = !!(bnge_is_agg_reqd(bd)); + for (i = 0; i < bd->nq_nr_rings; i++) { + struct bnge_napi *bnapi = bn->bnapi[i]; + struct bnge_nq_ring_info *nqr = &bnapi->nq_ring; + struct bnge_ring_struct *ring = &nqr->ring_struct; + u32 type = HWRM_RING_ALLOC_NQ; + u32 map_idx = ring->map_idx; + unsigned int vector; + + vector = bd->irq_tbl[map_idx].vector; + disable_irq_nosync(vector); + rc = hwrm_ring_alloc_send_msg(bn, ring, type, map_idx); + if (rc) { + enable_irq(vector); + goto err_out; + } + bnge_set_db(bn, &nqr->nq_db, type, map_idx, ring->fw_ring_id); + bnge_db_nq(bn, &nqr->nq_db, nqr->nq_raw_cons); + enable_irq(vector); + bn->grp_info[i].nq_fw_ring_id = ring->fw_ring_id; + + if (!i) { + rc = bnge_hwrm_set_async_event_cr(bd, ring->fw_ring_id); + if (rc) + netdev_warn(bn->netdev, "Failed to set async event completion ring.\n"); + } + } + + for (i = 0; i < bd->tx_nr_rings; i++) { + struct bnge_tx_ring_info *txr = &bn->tx_ring[i]; + + rc = bnge_hwrm_cp_ring_alloc(bn, txr->tx_cpr); + if (rc) + goto err_out; + rc = bnge_hwrm_tx_ring_alloc(bn, txr, i); + if (rc) + goto err_out; + } + + for (i = 0; i < bd->rx_nr_rings; i++) { + struct bnge_rx_ring_info *rxr = &bn->rx_ring[i]; + struct bnge_cp_ring_info *cpr; + struct bnge_ring_struct *ring; + struct bnge_napi *bnapi; + u32 map_idx, type; + + rc = bnge_hwrm_rx_ring_alloc(bn, rxr); + if (rc) + goto err_out; + /* If we have agg rings, post agg buffers first. */ + if (!agg_rings) + bnge_db_write(bn->bd, &rxr->rx_db, rxr->rx_prod); + + cpr = rxr->rx_cpr; + bnapi = rxr->bnapi; + type = HWRM_RING_ALLOC_CMPL; + map_idx = bnapi->index; + + ring = &cpr->ring_struct; + ring->handle = BNGE_SET_NQ_HDL(cpr); + rc = hwrm_ring_alloc_send_msg(bn, ring, type, map_idx); + if (rc) + goto err_out; + bnge_set_db(bn, &cpr->cp_db, type, map_idx, + ring->fw_ring_id); + bnge_db_cq(bn, &cpr->cp_db, cpr->cp_raw_cons); + } + + if (agg_rings) { + for (i = 0; i < bd->rx_nr_rings; i++) { + rc = bnge_hwrm_rx_agg_ring_alloc(bn, &bn->rx_ring[i]); + if (rc) + goto err_out; + } + } +err_out: + return rc; +} + +void bnge_fill_hw_rss_tbl(struct bnge_net *bn, struct bnge_vnic_info *vnic) +{ + __le16 *ring_tbl = vnic->rss_table; + struct bnge_rx_ring_info *rxr; + struct bnge_dev *bd = bn->bd; + u16 tbl_size, i; + + tbl_size = bnge_get_rxfh_indir_size(bd); + + for (i = 0; i < tbl_size; i++) { + u16 ring_id, j; + + j = bd->rss_indir_tbl[i]; + rxr = &bn->rx_ring[j]; + + ring_id = rxr->rx_ring_struct.fw_ring_id; + *ring_tbl++ = cpu_to_le16(ring_id); + ring_id = bnge_cp_ring_for_rx(rxr); + *ring_tbl++ = cpu_to_le16(ring_id); + } +} + +static int bnge_hwrm_vnic_rss_cfg(struct bnge_net *bn, + struct bnge_vnic_info *vnic) +{ + int rc; + + rc = bnge_hwrm_vnic_set_rss(bn, vnic, true); + if (rc) { + netdev_err(bn->netdev, "hwrm vnic %d set rss failure rc: %d\n", + vnic->vnic_id, rc); + return rc; + } + rc = bnge_hwrm_vnic_cfg(bn, vnic); + if (rc) + netdev_err(bn->netdev, "hwrm vnic %d cfg failure rc: %d\n", + vnic->vnic_id, rc); + return rc; +} + +static int bnge_setup_vnic(struct bnge_net *bn, struct bnge_vnic_info *vnic) +{ + struct bnge_dev *bd = bn->bd; + int rc, i, nr_ctxs; + + nr_ctxs = bnge_cal_nr_rss_ctxs(bd->rx_nr_rings); + for (i = 0; i < nr_ctxs; i++) { + rc = bnge_hwrm_vnic_ctx_alloc(bd, vnic, i); + if (rc) { + netdev_err(bn->netdev, "hwrm vnic %d ctx %d alloc failure rc: %d\n", + vnic->vnic_id, i, rc); + return -ENOMEM; + } + bn->rsscos_nr_ctxs++; + } + + rc = bnge_hwrm_vnic_rss_cfg(bn, vnic); + if (rc) + return rc; + + if (bnge_is_agg_reqd(bd)) { + rc = bnge_hwrm_vnic_set_hds(bn, vnic); + if (rc) + netdev_err(bn->netdev, "hwrm vnic %d set hds failure rc: %d\n", + vnic->vnic_id, rc); + } + return rc; +} + +static void bnge_del_l2_filter(struct bnge_net *bn, struct bnge_l2_filter *fltr) +{ + if (!refcount_dec_and_test(&fltr->refcnt)) + return; + hlist_del_rcu(&fltr->base.hash); + kfree_rcu(fltr, base.rcu); +} + +static void bnge_init_l2_filter(struct bnge_net *bn, + struct bnge_l2_filter *fltr, + struct bnge_l2_key *key, u32 idx) +{ + struct hlist_head *head; + + ether_addr_copy(fltr->l2_key.dst_mac_addr, key->dst_mac_addr); + fltr->l2_key.vlan = key->vlan; + fltr->base.type = BNGE_FLTR_TYPE_L2; + + head = &bn->l2_fltr_hash_tbl[idx]; + hlist_add_head_rcu(&fltr->base.hash, head); + refcount_set(&fltr->refcnt, 1); +} + +static struct bnge_l2_filter *__bnge_lookup_l2_filter(struct bnge_net *bn, + struct bnge_l2_key *key, + u32 idx) +{ + struct bnge_l2_filter *fltr; + struct hlist_head *head; + + head = &bn->l2_fltr_hash_tbl[idx]; + hlist_for_each_entry_rcu(fltr, head, base.hash) { + struct bnge_l2_key *l2_key = &fltr->l2_key; + + if (ether_addr_equal(l2_key->dst_mac_addr, key->dst_mac_addr) && + l2_key->vlan == key->vlan) + return fltr; + } + return NULL; +} + +static struct bnge_l2_filter *bnge_lookup_l2_filter(struct bnge_net *bn, + struct bnge_l2_key *key, + u32 idx) +{ + struct bnge_l2_filter *fltr; + + rcu_read_lock(); + fltr = __bnge_lookup_l2_filter(bn, key, idx); + if (fltr) + refcount_inc(&fltr->refcnt); + rcu_read_unlock(); + return fltr; +} + +static struct bnge_l2_filter *bnge_alloc_l2_filter(struct bnge_net *bn, + struct bnge_l2_key *key, + gfp_t gfp) +{ + struct bnge_l2_filter *fltr; + u32 idx; + + idx = jhash2(&key->filter_key, BNGE_L2_KEY_SIZE, bn->hash_seed) & + BNGE_L2_FLTR_HASH_MASK; + fltr = bnge_lookup_l2_filter(bn, key, idx); + if (fltr) + return fltr; + + fltr = kzalloc(sizeof(*fltr), gfp); + if (!fltr) + return ERR_PTR(-ENOMEM); + + bnge_init_l2_filter(bn, fltr, key, idx); + return fltr; +} + +static int bnge_hwrm_set_vnic_filter(struct bnge_net *bn, u16 vnic_id, u16 idx, + const u8 *mac_addr) +{ + struct bnge_l2_filter *fltr; + struct bnge_l2_key key; + int rc; + + ether_addr_copy(key.dst_mac_addr, mac_addr); + key.vlan = 0; + fltr = bnge_alloc_l2_filter(bn, &key, GFP_KERNEL); + if (IS_ERR(fltr)) + return PTR_ERR(fltr); + + fltr->base.fw_vnic_id = bn->vnic_info[vnic_id].fw_vnic_id; + rc = bnge_hwrm_l2_filter_alloc(bn->bd, fltr); + if (rc) + goto err_del_l2_filter; + bn->vnic_info[vnic_id].l2_filters[idx] = fltr; + return rc; + +err_del_l2_filter: + bnge_del_l2_filter(bn, fltr); + return rc; +} + +static bool bnge_mc_list_updated(struct bnge_net *bn, u32 *rx_mask) +{ + struct bnge_vnic_info *vnic = &bn->vnic_info[BNGE_VNIC_DEFAULT]; + struct net_device *dev = bn->netdev; + struct netdev_hw_addr *ha; + int mc_count = 0, off = 0; + bool update = false; + u8 *haddr; + + netdev_for_each_mc_addr(ha, dev) { + if (mc_count >= BNGE_MAX_MC_ADDRS) { + *rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST; + vnic->mc_list_count = 0; + return false; + } + haddr = ha->addr; + if (!ether_addr_equal(haddr, vnic->mc_list + off)) { + memcpy(vnic->mc_list + off, haddr, ETH_ALEN); + update = true; + } + off += ETH_ALEN; + mc_count++; + } + if (mc_count) + *rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_MCAST; + + if (mc_count != vnic->mc_list_count) { + vnic->mc_list_count = mc_count; + update = true; + } + return update; +} + +static bool bnge_uc_list_updated(struct bnge_net *bn) +{ + struct bnge_vnic_info *vnic = &bn->vnic_info[BNGE_VNIC_DEFAULT]; + struct net_device *dev = bn->netdev; + struct netdev_hw_addr *ha; + int off = 0; + + if (netdev_uc_count(dev) != (vnic->uc_filter_count - 1)) + return true; + + netdev_for_each_uc_addr(ha, dev) { + if (!ether_addr_equal(ha->addr, vnic->uc_list + off)) + return true; + + off += ETH_ALEN; + } + return false; +} + +static bool bnge_promisc_ok(struct bnge_net *bn) +{ + return true; +} + +static int bnge_cfg_def_vnic(struct bnge_net *bn) +{ + struct bnge_vnic_info *vnic = &bn->vnic_info[BNGE_VNIC_DEFAULT]; + struct net_device *dev = bn->netdev; + struct bnge_dev *bd = bn->bd; + struct netdev_hw_addr *ha; + int i, off = 0, rc; + bool uc_update; + + netif_addr_lock_bh(dev); + uc_update = bnge_uc_list_updated(bn); + netif_addr_unlock_bh(dev); + + if (!uc_update) + goto skip_uc; + + for (i = 1; i < vnic->uc_filter_count; i++) { + struct bnge_l2_filter *fltr = vnic->l2_filters[i]; + + bnge_hwrm_l2_filter_free(bd, fltr); + bnge_del_l2_filter(bn, fltr); + } + + vnic->uc_filter_count = 1; + + netif_addr_lock_bh(dev); + if (netdev_uc_count(dev) > (BNGE_MAX_UC_ADDRS - 1)) { + vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS; + } else { + netdev_for_each_uc_addr(ha, dev) { + memcpy(vnic->uc_list + off, ha->addr, ETH_ALEN); + off += ETH_ALEN; + vnic->uc_filter_count++; + } + } + netif_addr_unlock_bh(dev); + + for (i = 1, off = 0; i < vnic->uc_filter_count; i++, off += ETH_ALEN) { + rc = bnge_hwrm_set_vnic_filter(bn, 0, i, vnic->uc_list + off); + if (rc) { + netdev_err(dev, "HWRM vnic filter failure rc: %d\n", rc); + vnic->uc_filter_count = i; + return rc; + } + } + +skip_uc: + if ((vnic->rx_mask & CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS) && + !bnge_promisc_ok(bn)) + vnic->rx_mask &= ~CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS; + rc = bnge_hwrm_cfa_l2_set_rx_mask(bd, vnic); + if (rc && (vnic->rx_mask & CFA_L2_SET_RX_MASK_REQ_MASK_MCAST)) { + netdev_info(dev, "Failed setting MC filters rc: %d, turning on ALL_MCAST mode\n", + rc); + vnic->rx_mask &= ~CFA_L2_SET_RX_MASK_REQ_MASK_MCAST; + vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST; + vnic->mc_list_count = 0; + rc = bnge_hwrm_cfa_l2_set_rx_mask(bd, vnic); + } + if (rc) + netdev_err(dev, "HWRM cfa l2 rx mask failure rc: %d\n", + rc); + + return rc; +} + +static void bnge_hwrm_vnic_free(struct bnge_net *bn) +{ + int i; + + for (i = 0; i < bn->nr_vnics; i++) + bnge_hwrm_vnic_free_one(bn->bd, &bn->vnic_info[i]); +} + +static void bnge_hwrm_vnic_ctx_free(struct bnge_net *bn) +{ + int i, j; + + for (i = 0; i < bn->nr_vnics; i++) { + struct bnge_vnic_info *vnic = &bn->vnic_info[i]; + + for (j = 0; j < BNGE_MAX_CTX_PER_VNIC; j++) { + if (vnic->fw_rss_cos_lb_ctx[j] != INVALID_HW_RING_ID) + bnge_hwrm_vnic_ctx_free_one(bn->bd, vnic, j); + } + } + bn->rsscos_nr_ctxs = 0; +} + +static void bnge_hwrm_clear_vnic_filter(struct bnge_net *bn) +{ + struct bnge_vnic_info *vnic = &bn->vnic_info[BNGE_VNIC_DEFAULT]; + int i; + + for (i = 0; i < vnic->uc_filter_count; i++) { + struct bnge_l2_filter *fltr = vnic->l2_filters[i]; + + bnge_hwrm_l2_filter_free(bn->bd, fltr); + bnge_del_l2_filter(bn, fltr); + } + + vnic->uc_filter_count = 0; +} + +static void bnge_clear_vnic(struct bnge_net *bn) +{ + bnge_hwrm_clear_vnic_filter(bn); + bnge_hwrm_vnic_free(bn); + bnge_hwrm_vnic_ctx_free(bn); +} + +static void bnge_hwrm_rx_ring_free(struct bnge_net *bn, + struct bnge_rx_ring_info *rxr, + bool close_path) +{ + struct bnge_ring_struct *ring = &rxr->rx_ring_struct; + u32 grp_idx = rxr->bnapi->index; + u32 cmpl_ring_id; + + if (ring->fw_ring_id == INVALID_HW_RING_ID) + return; + + cmpl_ring_id = bnge_cp_ring_for_rx(rxr); + hwrm_ring_free_send_msg(bn, ring, + RING_FREE_REQ_RING_TYPE_RX, + close_path ? cmpl_ring_id : + INVALID_HW_RING_ID); + ring->fw_ring_id = INVALID_HW_RING_ID; + bn->grp_info[grp_idx].rx_fw_ring_id = INVALID_HW_RING_ID; +} + +static void bnge_hwrm_rx_agg_ring_free(struct bnge_net *bn, + struct bnge_rx_ring_info *rxr, + bool close_path) +{ + struct bnge_ring_struct *ring = &rxr->rx_agg_ring_struct; + u32 grp_idx = rxr->bnapi->index; + u32 cmpl_ring_id; + + if (ring->fw_ring_id == INVALID_HW_RING_ID) + return; + + cmpl_ring_id = bnge_cp_ring_for_rx(rxr); + hwrm_ring_free_send_msg(bn, ring, RING_FREE_REQ_RING_TYPE_RX_AGG, + close_path ? cmpl_ring_id : + INVALID_HW_RING_ID); + ring->fw_ring_id = INVALID_HW_RING_ID; + bn->grp_info[grp_idx].agg_fw_ring_id = INVALID_HW_RING_ID; +} + +static void bnge_hwrm_tx_ring_free(struct bnge_net *bn, + struct bnge_tx_ring_info *txr, + bool close_path) +{ + struct bnge_ring_struct *ring = &txr->tx_ring_struct; + u32 cmpl_ring_id; + + if (ring->fw_ring_id == INVALID_HW_RING_ID) + return; + + cmpl_ring_id = close_path ? bnge_cp_ring_for_tx(txr) : + INVALID_HW_RING_ID; + hwrm_ring_free_send_msg(bn, ring, RING_FREE_REQ_RING_TYPE_TX, + cmpl_ring_id); + ring->fw_ring_id = INVALID_HW_RING_ID; +} + +static void bnge_hwrm_cp_ring_free(struct bnge_net *bn, + struct bnge_cp_ring_info *cpr) +{ + struct bnge_ring_struct *ring; + + ring = &cpr->ring_struct; + if (ring->fw_ring_id == INVALID_HW_RING_ID) + return; + + hwrm_ring_free_send_msg(bn, ring, RING_FREE_REQ_RING_TYPE_L2_CMPL, + INVALID_HW_RING_ID); + ring->fw_ring_id = INVALID_HW_RING_ID; +} + +static void bnge_hwrm_ring_free(struct bnge_net *bn, bool close_path) +{ + struct bnge_dev *bd = bn->bd; + int i; + + if (!bn->bnapi) + return; + + for (i = 0; i < bd->tx_nr_rings; i++) + bnge_hwrm_tx_ring_free(bn, &bn->tx_ring[i], close_path); + + for (i = 0; i < bd->rx_nr_rings; i++) { + bnge_hwrm_rx_ring_free(bn, &bn->rx_ring[i], close_path); + bnge_hwrm_rx_agg_ring_free(bn, &bn->rx_ring[i], close_path); + } + + for (i = 0; i < bd->nq_nr_rings; i++) { + struct bnge_napi *bnapi = bn->bnapi[i]; + struct bnge_nq_ring_info *nqr; + struct bnge_ring_struct *ring; + int j; + + nqr = &bnapi->nq_ring; + for (j = 0; j < nqr->cp_ring_count && nqr->cp_ring_arr; j++) + bnge_hwrm_cp_ring_free(bn, &nqr->cp_ring_arr[j]); + + ring = &nqr->ring_struct; + if (ring->fw_ring_id != INVALID_HW_RING_ID) { + hwrm_ring_free_send_msg(bn, ring, + RING_FREE_REQ_RING_TYPE_NQ, + INVALID_HW_RING_ID); + ring->fw_ring_id = INVALID_HW_RING_ID; + bn->grp_info[i].nq_fw_ring_id = INVALID_HW_RING_ID; + } + } +} + +static void bnge_setup_msix(struct bnge_net *bn) +{ + struct net_device *dev = bn->netdev; + struct bnge_dev *bd = bn->bd; + int len, i; + + len = sizeof(bd->irq_tbl[0].name); + for (i = 0; i < bd->nq_nr_rings; i++) { + int map_idx = bnge_cp_num_to_irq_num(bn, i); + char *attr; + + if (bd->flags & BNGE_EN_SHARED_CHNL) + attr = "TxRx"; + else if (i < bd->rx_nr_rings) + attr = "rx"; + else + attr = "tx"; + + snprintf(bd->irq_tbl[map_idx].name, len, "%s-%s-%d", dev->name, + attr, i); + bd->irq_tbl[map_idx].handler = bnge_msix; + } +} + +static int bnge_setup_interrupts(struct bnge_net *bn) +{ + struct net_device *dev = bn->netdev; + struct bnge_dev *bd = bn->bd; + + bnge_setup_msix(bn); + + return netif_set_real_num_queues(dev, bd->tx_nr_rings, bd->rx_nr_rings); +} + +static void bnge_hwrm_resource_free(struct bnge_net *bn, bool close_path) +{ + bnge_clear_vnic(bn); + bnge_hwrm_ring_free(bn, close_path); + bnge_hwrm_stat_ctx_free(bn); +} + +static void bnge_free_irq(struct bnge_net *bn) +{ + struct bnge_dev *bd = bn->bd; + struct bnge_irq *irq; + int i; + + for (i = 0; i < bd->nq_nr_rings; i++) { + int map_idx = bnge_cp_num_to_irq_num(bn, i); + + irq = &bd->irq_tbl[map_idx]; + if (irq->requested) { + if (irq->have_cpumask) { + irq_set_affinity_hint(irq->vector, NULL); + free_cpumask_var(irq->cpu_mask); + irq->have_cpumask = 0; + } + free_irq(irq->vector, bn->bnapi[i]); + } + + irq->requested = 0; + } +} + +static int bnge_request_irq(struct bnge_net *bn) +{ + struct bnge_dev *bd = bn->bd; + int i, rc; + + rc = bnge_setup_interrupts(bn); + if (rc) { + netdev_err(bn->netdev, "bnge_setup_interrupts err: %d\n", rc); + return rc; + } + for (i = 0; i < bd->nq_nr_rings; i++) { + int map_idx = bnge_cp_num_to_irq_num(bn, i); + struct bnge_irq *irq = &bd->irq_tbl[map_idx]; + + rc = request_irq(irq->vector, irq->handler, 0, irq->name, + bn->bnapi[i]); + if (rc) + goto err_free_irq; + + netif_napi_set_irq_locked(&bn->bnapi[i]->napi, irq->vector); + irq->requested = 1; + + if (zalloc_cpumask_var(&irq->cpu_mask, GFP_KERNEL)) { + int numa_node = dev_to_node(&bd->pdev->dev); + + irq->have_cpumask = 1; + cpumask_set_cpu(cpumask_local_spread(i, numa_node), + irq->cpu_mask); + rc = irq_set_affinity_hint(irq->vector, irq->cpu_mask); + if (rc) { + netdev_warn(bn->netdev, + "Set affinity failed, IRQ = %d\n", + irq->vector); + goto err_free_irq; + } + } + } + return 0; + +err_free_irq: + bnge_free_irq(bn); + return rc; +} + +static int bnge_init_chip(struct bnge_net *bn) +{ + struct bnge_vnic_info *vnic = &bn->vnic_info[BNGE_VNIC_DEFAULT]; + struct bnge_dev *bd = bn->bd; + int rc; + +#define BNGE_DEF_STATS_COAL_TICKS 1000000 + bn->stats_coal_ticks = BNGE_DEF_STATS_COAL_TICKS; + + rc = bnge_hwrm_stat_ctx_alloc(bn); + if (rc) { + netdev_err(bn->netdev, "hwrm stat ctx alloc failure rc: %d\n", rc); + goto err_out; + } + + rc = bnge_hwrm_ring_alloc(bn); + if (rc) { + netdev_err(bn->netdev, "hwrm ring alloc failure rc: %d\n", rc); + goto err_out; + } + + rc = bnge_hwrm_vnic_alloc(bd, vnic, bd->rx_nr_rings); + if (rc) { + netdev_err(bn->netdev, "hwrm vnic alloc failure rc: %d\n", rc); + goto err_out; + } + + rc = bnge_setup_vnic(bn, vnic); + if (rc) + goto err_out; + + if (bd->rss_cap & BNGE_RSS_CAP_RSS_HASH_TYPE_DELTA) + bnge_hwrm_update_rss_hash_cfg(bn); + + /* Filter for default vnic 0 */ + rc = bnge_hwrm_set_vnic_filter(bn, 0, 0, bn->netdev->dev_addr); + if (rc) { + netdev_err(bn->netdev, "HWRM vnic filter failure rc: %d\n", rc); + goto err_out; + } + vnic->uc_filter_count = 1; + + vnic->rx_mask = 0; + + if (bn->netdev->flags & IFF_BROADCAST) + vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_BCAST; + + if (bn->netdev->flags & IFF_PROMISC) + vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_PROMISCUOUS; + + if (bn->netdev->flags & IFF_ALLMULTI) { + vnic->rx_mask |= CFA_L2_SET_RX_MASK_REQ_MASK_ALL_MCAST; + vnic->mc_list_count = 0; + } else if (bn->netdev->flags & IFF_MULTICAST) { + u32 mask = 0; + + bnge_mc_list_updated(bn, &mask); + vnic->rx_mask |= mask; + } + + rc = bnge_cfg_def_vnic(bn); + if (rc) + goto err_out; + return 0; + +err_out: + bnge_hwrm_resource_free(bn, 0); + return rc; +} + +static int bnge_napi_poll(struct napi_struct *napi, int budget) +{ + int work_done = 0; + + /* defer NAPI implementation to next patch series */ + napi_complete_done(napi, work_done); + + return work_done; +} + +static void bnge_init_napi(struct bnge_net *bn) +{ + struct bnge_dev *bd = bn->bd; + struct bnge_napi *bnapi; + int i; + + for (i = 0; i < bd->nq_nr_rings; i++) { + bnapi = bn->bnapi[i]; + netif_napi_add_config_locked(bn->netdev, &bnapi->napi, + bnge_napi_poll, bnapi->index); + } +} + +static void bnge_del_napi(struct bnge_net *bn) +{ + struct bnge_dev *bd = bn->bd; + int i; + + for (i = 0; i < bd->rx_nr_rings; i++) + netif_queue_set_napi(bn->netdev, i, NETDEV_QUEUE_TYPE_RX, NULL); + for (i = 0; i < bd->tx_nr_rings; i++) + netif_queue_set_napi(bn->netdev, i, NETDEV_QUEUE_TYPE_TX, NULL); + + for (i = 0; i < bd->nq_nr_rings; i++) { + struct bnge_napi *bnapi = bn->bnapi[i]; + + __netif_napi_del_locked(&bnapi->napi); + } + + /* Wait for RCU grace period after removing NAPI instances */ + synchronize_net(); +} + +static int bnge_init_nic(struct bnge_net *bn) +{ + int rc; + + bnge_init_nq_tree(bn); + + bnge_init_rx_rings(bn); + rc = bnge_alloc_rx_ring_pair_bufs(bn); + if (rc) + return rc; + + bnge_init_tx_rings(bn); + + rc = bnge_init_ring_grps(bn); + if (rc) + goto err_free_rx_ring_pair_bufs; + + bnge_init_vnics(bn); + + rc = bnge_init_chip(bn); + if (rc) + goto err_free_ring_grps; + return rc; + +err_free_ring_grps: + bnge_free_ring_grps(bn); + return rc; + +err_free_rx_ring_pair_bufs: + bnge_free_rx_ring_pair_bufs(bn); + return rc; +} + +static int bnge_open_core(struct bnge_net *bn) +{ + struct bnge_dev *bd = bn->bd; + int rc; + + netif_carrier_off(bn->netdev); + + rc = bnge_reserve_rings(bd); + if (rc) { + netdev_err(bn->netdev, "bnge_reserve_rings err: %d\n", rc); + return rc; + } + + rc = bnge_alloc_core(bn); + if (rc) { + netdev_err(bn->netdev, "bnge_alloc_core err: %d\n", rc); + return rc; + } + + bnge_init_napi(bn); + rc = bnge_request_irq(bn); + if (rc) { + netdev_err(bn->netdev, "bnge_request_irq err: %d\n", rc); + goto err_del_napi; + } + + rc = bnge_init_nic(bn); + if (rc) { + netdev_err(bn->netdev, "bnge_init_nic err: %d\n", rc); + goto err_free_irq; + } + set_bit(BNGE_STATE_OPEN, &bd->state); + return 0; + +err_free_irq: + bnge_free_irq(bn); +err_del_napi: + bnge_del_napi(bn); + bnge_free_core(bn); + return rc; +} static netdev_tx_t bnge_start_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -28,11 +2212,42 @@ static netdev_tx_t bnge_start_xmit(struct sk_buff *skb, struct net_device *dev) static int bnge_open(struct net_device *dev) { + struct bnge_net *bn = netdev_priv(dev); + int rc; + + rc = bnge_open_core(bn); + if (rc) + netdev_err(dev, "bnge_open_core err: %d\n", rc); + + return rc; +} + +static int bnge_shutdown_nic(struct bnge_net *bn) +{ + /* TODO: close_path = 0 until we make NAPI functional */ + bnge_hwrm_resource_free(bn, 0); return 0; } +static void bnge_close_core(struct bnge_net *bn) +{ + struct bnge_dev *bd = bn->bd; + + clear_bit(BNGE_STATE_OPEN, &bd->state); + bnge_shutdown_nic(bn); + bnge_free_all_rings_bufs(bn); + bnge_free_irq(bn); + bnge_del_napi(bn); + + bnge_free_core(bn); +} + static int bnge_close(struct net_device *dev) { + struct bnge_net *bn = netdev_priv(dev); + + bnge_close_core(bn); + return 0; } @@ -238,6 +2453,7 @@ int bnge_netdev_alloc(struct bnge_dev *bd, int max_irqs) bn->rx_ring_size = BNGE_DEFAULT_RX_RING_SIZE; bn->tx_ring_size = BNGE_DEFAULT_TX_RING_SIZE; + bn->rx_dir = DMA_FROM_DEVICE; bnge_set_tpa_flags(bd); bnge_set_ring_params(bd); @@ -245,6 +2461,7 @@ int bnge_netdev_alloc(struct bnge_dev *bd, int max_irqs) bnge_init_l2_fltr_tbl(bn); bnge_init_mac_addr(bd); + netdev->request_ops_lock = true; rc = register_netdev(netdev); if (rc) { dev_err(bd->dev, "Register netdev failed rc: %d\n", rc); diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.h b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.h index a650d71a58db..fb3b961536ba 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_netdev.h +++ b/drivers/net/ethernet/broadcom/bnge/bnge_netdev.h @@ -5,6 +5,9 @@ #define _BNGE_NETDEV_H_ #include <linux/bnxt/hsi.h> +#include <linux/io-64-nonatomic-lo-hi.h> +#include <linux/refcount.h> +#include "bnge_db.h" struct tx_bd { __le32 tx_bd_len_flags_type; @@ -113,11 +116,25 @@ struct bnge_sw_rx_bd { }; struct bnge_sw_rx_agg_bd { - struct page *page; + netmem_ref netmem; unsigned int offset; dma_addr_t mapping; }; +#define HWRM_RING_ALLOC_TX 0x1 +#define HWRM_RING_ALLOC_RX 0x2 +#define HWRM_RING_ALLOC_AGG 0x4 +#define HWRM_RING_ALLOC_CMPL 0x8 +#define HWRM_RING_ALLOC_NQ 0x10 + +struct bnge_ring_grp_info { + u16 fw_stats_ctx; + u16 fw_grp_id; + u16 rx_fw_ring_id; + u16 agg_fw_ring_id; + u16 nq_fw_ring_id; +}; + #define BNGE_RX_COPY_THRESH 256 #define BNGE_HW_FEATURE_VLAN_ALL_RX \ @@ -133,6 +150,32 @@ enum { #define BNGE_NET_EN_TPA (BNGE_NET_EN_GRO | BNGE_NET_EN_LRO) +/* Minimum TX BDs for a TX packet with MAX_SKB_FRAGS + 1. We need one extra + * BD because the first TX BD is always a long BD. + */ +#define BNGE_MIN_TX_DESC_CNT (MAX_SKB_FRAGS + 2) + +#define RX_RING(bn, x) (((x) & (bn)->rx_ring_mask) >> (BNGE_PAGE_SHIFT - 4)) +#define RX_AGG_RING(bn, x) (((x) & (bn)->rx_agg_ring_mask) >> \ + (BNGE_PAGE_SHIFT - 4)) +#define RX_IDX(x) ((x) & (RX_DESC_CNT - 1)) + +#define TX_RING(bn, x) (((x) & (bn)->tx_ring_mask) >> (BNGE_PAGE_SHIFT - 4)) +#define TX_IDX(x) ((x) & (TX_DESC_CNT - 1)) + +#define CP_RING(x) (((x) & ~(CP_DESC_CNT - 1)) >> (BNGE_PAGE_SHIFT - 4)) +#define CP_IDX(x) ((x) & (CP_DESC_CNT - 1)) + +#define RING_RX(bn, idx) ((idx) & (bn)->rx_ring_mask) +#define NEXT_RX(idx) ((idx) + 1) + +#define RING_RX_AGG(bn, idx) ((idx) & (bn)->rx_agg_ring_mask) +#define NEXT_RX_AGG(idx) ((idx) + 1) + +#define BNGE_NQ_HDL_TYPE_SHIFT 24 +#define BNGE_NQ_HDL_TYPE_RX 0x00 +#define BNGE_NQ_HDL_TYPE_TX 0x01 + struct bnge_net { struct bnge_dev *bd; struct net_device *netdev; @@ -164,6 +207,30 @@ struct bnge_net { struct hlist_head l2_fltr_hash_tbl[BNGE_L2_FLTR_HASH_SIZE]; u32 hash_seed; u64 toeplitz_prefix; + + struct bnge_napi **bnapi; + + struct bnge_rx_ring_info *rx_ring; + struct bnge_tx_ring_info *tx_ring; + + u16 *tx_ring_map; + enum dma_data_direction rx_dir; + + /* grp_info indexed by napi/nq index */ + struct bnge_ring_grp_info *grp_info; + struct bnge_vnic_info *vnic_info; + int nr_vnics; + int total_irqs; + + u32 tx_wake_thresh; + u16 rx_offset; + u16 rx_dma_offset; + + u8 rss_hash_key[HW_HASH_KEY_SIZE]; + u8 rss_hash_key_valid:1; + u8 rss_hash_key_updated:1; + int rsscos_nr_ctxs; + u32 stats_coal_ticks; }; #define BNGE_DEFAULT_RX_RING_SIZE 511 @@ -203,4 +270,185 @@ void bnge_set_ring_params(struct bnge_dev *bd); #define BNGE_MAX_RX_JUM_DESC_CNT (RX_DESC_CNT * MAX_RX_AGG_PAGES - 1) #define BNGE_MAX_TX_DESC_CNT (TX_DESC_CNT * MAX_TX_PAGES - 1) +#define BNGE_MAX_TXR_PER_NAPI 8 + +#define bnge_for_each_napi_tx(iter, bnapi, txr) \ + for (iter = 0, txr = (bnapi)->tx_ring[0]; txr; \ + txr = (iter < BNGE_MAX_TXR_PER_NAPI - 1) ? \ + (bnapi)->tx_ring[++iter] : NULL) + +#define BNGE_SET_NQ_HDL(cpr) \ + (((cpr)->cp_ring_type << BNGE_NQ_HDL_TYPE_SHIFT) | (cpr)->cp_idx) + +struct bnge_stats_mem { + u64 *sw_stats; + u64 *hw_masks; + void *hw_stats; + dma_addr_t hw_stats_map; + int len; +}; + +struct bnge_cp_ring_info { + struct bnge_napi *bnapi; + dma_addr_t *desc_mapping; + struct tx_cmp **desc_ring; + struct bnge_ring_struct ring_struct; + u8 cp_ring_type; + u8 cp_idx; + u32 cp_raw_cons; + struct bnge_db_info cp_db; +}; + +struct bnge_nq_ring_info { + struct bnge_napi *bnapi; + dma_addr_t *desc_mapping; + struct nqe_cn **desc_ring; + struct bnge_ring_struct ring_struct; + u32 nq_raw_cons; + struct bnge_db_info nq_db; + + struct bnge_stats_mem stats; + u32 hw_stats_ctx_id; + + int cp_ring_count; + struct bnge_cp_ring_info *cp_ring_arr; +}; + +struct bnge_rx_ring_info { + struct bnge_napi *bnapi; + struct bnge_cp_ring_info *rx_cpr; + u16 rx_prod; + u16 rx_agg_prod; + u16 rx_sw_agg_prod; + u16 rx_next_cons; + struct bnge_db_info rx_db; + struct bnge_db_info rx_agg_db; + + struct rx_bd *rx_desc_ring[MAX_RX_PAGES]; + struct bnge_sw_rx_bd *rx_buf_ring; + + struct rx_bd *rx_agg_desc_ring[MAX_RX_AGG_PAGES]; + struct bnge_sw_rx_agg_bd *rx_agg_buf_ring; + + unsigned long *rx_agg_bmap; + u16 rx_agg_bmap_size; + + dma_addr_t rx_desc_mapping[MAX_RX_PAGES]; + dma_addr_t rx_agg_desc_mapping[MAX_RX_AGG_PAGES]; + + struct bnge_ring_struct rx_ring_struct; + struct bnge_ring_struct rx_agg_ring_struct; + struct page_pool *page_pool; + struct page_pool *head_pool; + bool need_head_pool; +}; + +struct bnge_tx_ring_info { + struct bnge_napi *bnapi; + struct bnge_cp_ring_info *tx_cpr; + u16 tx_prod; + u16 tx_cons; + u16 tx_hw_cons; + u16 txq_index; + u8 tx_napi_idx; + u8 kick_pending; + struct bnge_db_info tx_db; + + struct tx_bd *tx_desc_ring[MAX_TX_PAGES]; + struct bnge_sw_tx_bd *tx_buf_ring; + + dma_addr_t tx_desc_mapping[MAX_TX_PAGES]; + + u32 dev_state; +#define BNGE_DEV_STATE_CLOSING 0x1 + + struct bnge_ring_struct tx_ring_struct; +}; + +struct bnge_napi { + struct napi_struct napi; + struct bnge_net *bn; + int index; + + struct bnge_nq_ring_info nq_ring; + struct bnge_rx_ring_info *rx_ring; + struct bnge_tx_ring_info *tx_ring[BNGE_MAX_TXR_PER_NAPI]; +}; + +#define INVALID_STATS_CTX_ID -1 +#define BNGE_VNIC_DEFAULT 0 +#define BNGE_MAX_UC_ADDRS 4 + +struct bnge_vnic_info { + u16 fw_vnic_id; +#define BNGE_MAX_CTX_PER_VNIC 8 + u16 fw_rss_cos_lb_ctx[BNGE_MAX_CTX_PER_VNIC]; + u16 mru; + /* index 0 always dev_addr */ + struct bnge_l2_filter *l2_filters[BNGE_MAX_UC_ADDRS]; + u16 uc_filter_count; + u8 *uc_list; + dma_addr_t rss_table_dma_addr; + __le16 *rss_table; + dma_addr_t rss_hash_key_dma_addr; + u64 *rss_hash_key; + int rss_table_size; +#define BNGE_RSS_TABLE_ENTRIES 64 +#define BNGE_RSS_TABLE_SIZE (BNGE_RSS_TABLE_ENTRIES * 4) +#define BNGE_RSS_TABLE_MAX_TBL 8 +#define BNGE_MAX_RSS_TABLE_SIZE \ + (BNGE_RSS_TABLE_SIZE * BNGE_RSS_TABLE_MAX_TBL) + u32 rx_mask; + + u8 *mc_list; + int mc_list_size; + int mc_list_count; + dma_addr_t mc_list_mapping; +#define BNGE_MAX_MC_ADDRS 16 + + u32 flags; +#define BNGE_VNIC_RSS_FLAG 1 +#define BNGE_VNIC_MCAST_FLAG 4 +#define BNGE_VNIC_UCAST_FLAG 8 + u32 vnic_id; +}; + +struct bnge_filter_base { + struct hlist_node hash; + struct list_head list; + __le64 filter_id; + u8 type; +#define BNGE_FLTR_TYPE_L2 2 + u8 flags; + u16 rxq; + u16 fw_vnic_id; + u16 vf_idx; + unsigned long state; +#define BNGE_FLTR_VALID 0 +#define BNGE_FLTR_FW_DELETED 2 + + struct rcu_head rcu; +}; + +struct bnge_l2_key { + union { + struct { + u8 dst_mac_addr[ETH_ALEN]; + u16 vlan; + }; + u32 filter_key; + }; +}; + +#define BNGE_L2_KEY_SIZE (sizeof(struct bnge_l2_key) / 4) +struct bnge_l2_filter { + /* base filter must be the first member */ + struct bnge_filter_base base; + struct bnge_l2_key l2_key; + refcount_t refcnt; +}; + +u16 bnge_cp_ring_for_rx(struct bnge_rx_ring_info *rxr); +u16 bnge_cp_ring_for_tx(struct bnge_tx_ring_info *txr); +void bnge_fill_hw_rss_tbl(struct bnge_net *bn, struct bnge_vnic_info *vnic); #endif /* _BNGE_NETDEV_H_ */ diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_resc.c b/drivers/net/ethernet/broadcom/bnge/bnge_resc.c index c79a3607a1b7..62ebe03a0dcf 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_resc.c +++ b/drivers/net/ethernet/broadcom/bnge/bnge_resc.c @@ -46,7 +46,7 @@ static int bnge_aux_get_dflt_msix(struct bnge_dev *bd) return min_t(int, roce_msix, num_online_cpus() + 1); } -static u16 bnge_aux_get_msix(struct bnge_dev *bd) +u16 bnge_aux_get_msix(struct bnge_dev *bd) { if (bnge_is_roce_en(bd)) return bd->aux_num_msix; @@ -164,7 +164,7 @@ static int bnge_adjust_rings(struct bnge_dev *bd, u16 *rx, return bnge_fix_rings_count(rx, tx, max_nq, sh); } -static int bnge_cal_nr_rss_ctxs(u16 rx_rings) +int bnge_cal_nr_rss_ctxs(u16 rx_rings) { if (!rx_rings) return 0; @@ -184,7 +184,7 @@ static u16 bnge_get_total_vnics(struct bnge_dev *bd, u16 rx_rings) return 1; } -static u32 bnge_get_rxfh_indir_size(struct bnge_dev *bd) +u32 bnge_get_rxfh_indir_size(struct bnge_dev *bd) { return bnge_cal_nr_rss_ctxs(bd->rx_nr_rings) * BNGE_RSS_TABLE_ENTRIES; diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_resc.h b/drivers/net/ethernet/broadcom/bnge/bnge_resc.h index 54ef1c7d8822..0d6213b27580 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_resc.h +++ b/drivers/net/ethernet/broadcom/bnge/bnge_resc.h @@ -72,6 +72,8 @@ void bnge_free_irqs(struct bnge_dev *bd); int bnge_net_init_dflt_config(struct bnge_dev *bd); void bnge_net_uninit_dflt_config(struct bnge_dev *bd); void bnge_aux_init_dflt_config(struct bnge_dev *bd); +u32 bnge_get_rxfh_indir_size(struct bnge_dev *bd); +int bnge_cal_nr_rss_ctxs(u16 rx_rings); static inline u32 bnge_adjust_pow_two(u32 total_ent, u16 ent_per_blk) diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_rmem.c b/drivers/net/ethernet/broadcom/bnge/bnge_rmem.c index 52ada65943a0..79f5ce2e5d08 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_rmem.c +++ b/drivers/net/ethernet/broadcom/bnge/bnge_rmem.c @@ -95,7 +95,7 @@ int bnge_alloc_ring(struct bnge_dev *bd, struct bnge_ring_mem_info *rmem) &rmem->dma_arr[i], GFP_KERNEL); if (!rmem->pg_arr[i]) - return -ENOMEM; + goto err_free_ring; if (rmem->ctx_mem) bnge_init_ctx_mem(rmem->ctx_mem, rmem->pg_arr[i], @@ -116,10 +116,13 @@ int bnge_alloc_ring(struct bnge_dev *bd, struct bnge_ring_mem_info *rmem) if (rmem->vmem_size) { *rmem->vmem = vzalloc(rmem->vmem_size); if (!(*rmem->vmem)) - return -ENOMEM; + goto err_free_ring; } - return 0; + +err_free_ring: + bnge_free_ring(bd, rmem); + return -ENOMEM; } static int bnge_alloc_ctx_one_lvl(struct bnge_dev *bd, @@ -436,3 +439,61 @@ skip_rdma: return 0; } + +void bnge_init_ring_struct(struct bnge_net *bn) +{ + struct bnge_dev *bd = bn->bd; + int i, j; + + for (i = 0; i < bd->nq_nr_rings; i++) { + struct bnge_napi *bnapi = bn->bnapi[i]; + struct bnge_ring_mem_info *rmem; + struct bnge_nq_ring_info *nqr; + struct bnge_rx_ring_info *rxr; + struct bnge_tx_ring_info *txr; + struct bnge_ring_struct *ring; + + nqr = &bnapi->nq_ring; + ring = &nqr->ring_struct; + rmem = &ring->ring_mem; + rmem->nr_pages = bn->cp_nr_pages; + rmem->page_size = HW_CMPD_RING_SIZE; + rmem->pg_arr = (void **)nqr->desc_ring; + rmem->dma_arr = nqr->desc_mapping; + rmem->vmem_size = 0; + + rxr = bnapi->rx_ring; + if (!rxr) + goto skip_rx; + + ring = &rxr->rx_ring_struct; + rmem = &ring->ring_mem; + rmem->nr_pages = bn->rx_nr_pages; + rmem->page_size = HW_RXBD_RING_SIZE; + rmem->pg_arr = (void **)rxr->rx_desc_ring; + rmem->dma_arr = rxr->rx_desc_mapping; + rmem->vmem_size = SW_RXBD_RING_SIZE * bn->rx_nr_pages; + rmem->vmem = (void **)&rxr->rx_buf_ring; + + ring = &rxr->rx_agg_ring_struct; + rmem = &ring->ring_mem; + rmem->nr_pages = bn->rx_agg_nr_pages; + rmem->page_size = HW_RXBD_RING_SIZE; + rmem->pg_arr = (void **)rxr->rx_agg_desc_ring; + rmem->dma_arr = rxr->rx_agg_desc_mapping; + rmem->vmem_size = SW_RXBD_AGG_RING_SIZE * bn->rx_agg_nr_pages; + rmem->vmem = (void **)&rxr->rx_agg_buf_ring; + +skip_rx: + bnge_for_each_napi_tx(j, bnapi, txr) { + ring = &txr->tx_ring_struct; + rmem = &ring->ring_mem; + rmem->nr_pages = bn->tx_nr_pages; + rmem->page_size = HW_TXBD_RING_SIZE; + rmem->pg_arr = (void **)txr->tx_desc_ring; + rmem->dma_arr = txr->tx_desc_mapping; + rmem->vmem_size = SW_TXBD_RING_SIZE * bn->tx_nr_pages; + rmem->vmem = (void **)&txr->tx_buf_ring; + } + } +} diff --git a/drivers/net/ethernet/broadcom/bnge/bnge_rmem.h b/drivers/net/ethernet/broadcom/bnge/bnge_rmem.h index 300f1d8268ef..341c7f81ed09 100644 --- a/drivers/net/ethernet/broadcom/bnge/bnge_rmem.h +++ b/drivers/net/ethernet/broadcom/bnge/bnge_rmem.h @@ -6,6 +6,7 @@ struct bnge_ctx_mem_type; struct bnge_dev; +struct bnge_net; #define PTU_PTE_VALID 0x1UL #define PTU_PTE_LAST 0x2UL @@ -180,9 +181,22 @@ struct bnge_ctx_mem_info { struct bnge_ctx_mem_type ctx_arr[BNGE_CTX_V2_MAX]; }; +struct bnge_ring_struct { + struct bnge_ring_mem_info ring_mem; + + u16 fw_ring_id; + union { + u16 grp_idx; + u16 map_idx; /* Used by NQs */ + }; + u32 handle; + u8 queue_id; +}; + int bnge_alloc_ring(struct bnge_dev *bd, struct bnge_ring_mem_info *rmem); void bnge_free_ring(struct bnge_dev *bd, struct bnge_ring_mem_info *rmem); int bnge_alloc_ctx_mem(struct bnge_dev *bd); void bnge_free_ctx_mem(struct bnge_dev *bd); +void bnge_init_ring_struct(struct bnge_net *bn); #endif /* _BNGE_RMEM_H_ */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 207a8bb36ae5..1d0e0e7362bd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -142,6 +142,7 @@ static const struct { [NETXTREME_E_P5_VF] = { "Broadcom BCM5750X NetXtreme-E Ethernet Virtual Function" }, [NETXTREME_E_P5_VF_HV] = { "Broadcom BCM5750X NetXtreme-E Virtual Function for Hyper-V" }, [NETXTREME_E_P7_VF] = { "Broadcom BCM5760X Virtual Function" }, + [NETXTREME_E_P7_VF_HV] = { "Broadcom BCM5760X Virtual Function for Hyper-V" }, }; static const struct pci_device_id bnxt_pci_tbl[] = { @@ -217,6 +218,7 @@ static const struct pci_device_id bnxt_pci_tbl[] = { { PCI_VDEVICE(BROADCOM, 0x1808), .driver_data = NETXTREME_E_P5_VF_HV }, { PCI_VDEVICE(BROADCOM, 0x1809), .driver_data = NETXTREME_E_P5_VF_HV }, { PCI_VDEVICE(BROADCOM, 0x1819), .driver_data = NETXTREME_E_P7_VF }, + { PCI_VDEVICE(BROADCOM, 0x181b), .driver_data = NETXTREME_E_P7_VF_HV }, { PCI_VDEVICE(BROADCOM, 0xd800), .driver_data = NETXTREME_S_VF }, #endif { 0 } @@ -263,6 +265,8 @@ const u16 bnxt_bstore_to_trace[] = { [BNXT_CTX_CA1] = DBG_LOG_BUFFER_FLUSH_REQ_TYPE_CA1_TRACE, [BNXT_CTX_CA2] = DBG_LOG_BUFFER_FLUSH_REQ_TYPE_CA2_TRACE, [BNXT_CTX_RIGP1] = DBG_LOG_BUFFER_FLUSH_REQ_TYPE_RIGP1_TRACE, + [BNXT_CTX_KONG] = DBG_LOG_BUFFER_FLUSH_REQ_TYPE_AFM_KONG_HWRM_TRACE, + [BNXT_CTX_QPC] = DBG_LOG_BUFFER_FLUSH_REQ_TYPE_ERR_QPC_TRACE, }; static struct workqueue_struct *bnxt_pf_wq; @@ -315,7 +319,8 @@ static bool bnxt_vf_pciid(enum board_idx idx) return (idx == NETXTREME_C_VF || idx == NETXTREME_E_VF || idx == NETXTREME_S_VF || idx == NETXTREME_C_VF_HV || idx == NETXTREME_E_VF_HV || idx == NETXTREME_E_P5_VF || - idx == NETXTREME_E_P5_VF_HV || idx == NETXTREME_E_P7_VF); + idx == NETXTREME_E_P5_VF_HV || idx == NETXTREME_E_P7_VF || + idx == NETXTREME_E_P7_VF_HV); } #define DB_CP_REARM_FLAGS (DB_KEY_CP | DB_IDX_VALID) @@ -3797,8 +3802,7 @@ static void bnxt_free_rx_rings(struct bnxt *bp) xdp_rxq_info_unreg(&rxr->xdp_rxq); page_pool_destroy(rxr->page_pool); - if (bnxt_separate_head_pool(rxr)) - page_pool_destroy(rxr->head_pool); + page_pool_destroy(rxr->head_pool); rxr->page_pool = rxr->head_pool = NULL; kfree(rxr->rx_agg_bmap); @@ -3845,6 +3849,8 @@ static int bnxt_alloc_rx_page_pool(struct bnxt *bp, pool = page_pool_create(&pp); if (IS_ERR(pool)) goto err_destroy_pp; + } else { + page_pool_get(pool); } rxr->head_pool = pool; @@ -4397,7 +4403,7 @@ static void bnxt_alloc_one_rx_ring_netmem(struct bnxt *bp, for (i = 0; i < bp->rx_agg_ring_size; i++) { if (bnxt_alloc_rx_netmem(bp, rxr, prod, GFP_KERNEL)) { netdev_warn(bp->dev, "init'ed rx ring %d with %d/%d pages only\n", - ring_nr, i, bp->rx_ring_size); + ring_nr, i, bp->rx_agg_ring_size); break; } prod = NEXT_RX_AGG(prod); @@ -6832,7 +6838,7 @@ int bnxt_hwrm_vnic_cfg(struct bnxt *bp, struct bnxt_vnic_info *vnic) req->dflt_ring_grp = cpu_to_le16(bp->grp_info[grp_idx].fw_grp_id); req->lb_rule = cpu_to_le16(0xffff); vnic_mru: - vnic->mru = bp->dev->mtu + ETH_HLEN + VLAN_HLEN; + vnic->mru = bp->dev->mtu + VLAN_ETH_HLEN; req->mru = cpu_to_le16(vnic->mru); req->vnic_id = cpu_to_le16(vnic->fw_vnic_id); @@ -6969,6 +6975,8 @@ static int bnxt_hwrm_vnic_qcaps(struct bnxt *bp) bp->rss_cap |= BNXT_RSS_CAP_ESP_V4_RSS_CAP; if (flags & VNIC_QCAPS_RESP_FLAGS_RSS_IPSEC_ESP_SPI_IPV6_CAP) bp->rss_cap |= BNXT_RSS_CAP_ESP_V6_RSS_CAP; + if (flags & VNIC_QCAPS_RESP_FLAGS_RSS_IPV6_FLOW_LABEL_CAP) + bp->rss_cap |= BNXT_RSS_CAP_IPV6_FLOW_LABEL_RSS_CAP; if (flags & VNIC_QCAPS_RESP_FLAGS_RE_FLUSH_CAP) bp->fw_cap |= BNXT_FW_CAP_VNIC_RE_FLUSH; } @@ -8016,7 +8024,8 @@ static int __bnxt_reserve_rings(struct bnxt *bp) } rx_rings = min_t(int, rx_rings, hwr.grp); hwr.cp = min_t(int, hwr.cp, bp->cp_nr_rings); - if (hwr.stat > bnxt_get_ulp_stat_ctxs(bp)) + if (bnxt_ulp_registered(bp->edev) && + hwr.stat > bnxt_get_ulp_stat_ctxs(bp)) hwr.stat -= bnxt_get_ulp_stat_ctxs(bp); hwr.cp = min_t(int, hwr.cp, hwr.stat); rc = bnxt_trim_rings(bp, &rx_rings, &hwr.tx, hwr.cp, sh); @@ -8024,6 +8033,11 @@ static int __bnxt_reserve_rings(struct bnxt *bp) hwr.rx = rx_rings << 1; tx_cp = bnxt_num_tx_to_cp(bp, hwr.tx); hwr.cp = sh ? max_t(int, tx_cp, rx_rings) : tx_cp + rx_rings; + if (hwr.tx != bp->tx_nr_rings) { + netdev_warn(bp->dev, + "Able to reserve only %d out of %d requested TX rings\n", + hwr.tx, bp->tx_nr_rings); + } bp->tx_nr_rings = hwr.tx; /* If we cannot reserve all the RX rings, reset the RSS map only @@ -9138,7 +9152,7 @@ static int bnxt_hwrm_func_backing_store_cfg_v2(struct bnxt *bp, return rc; } -static int bnxt_backing_store_cfg_v2(struct bnxt *bp, u32 ena) +static int bnxt_backing_store_cfg_v2(struct bnxt *bp) { struct bnxt_ctx_mem_info *ctx = bp->ctx; struct bnxt_ctx_mem_type *ctxm; @@ -9146,7 +9160,7 @@ static int bnxt_backing_store_cfg_v2(struct bnxt *bp, u32 ena) int rc = 0; u16 type; - for (type = BNXT_CTX_SRT; type <= BNXT_CTX_RIGP1; type++) { + for (type = BNXT_CTX_SRT; type <= BNXT_CTX_QPC; type++) { ctxm = &ctx->ctx_arr[type]; if (!bnxt_bs_trace_avail(bp, type)) continue; @@ -9164,12 +9178,13 @@ static int bnxt_backing_store_cfg_v2(struct bnxt *bp, u32 ena) } if (last_type == BNXT_CTX_INV) { - if (!ena) + for (type = 0; type < BNXT_CTX_MAX; type++) { + ctxm = &ctx->ctx_arr[type]; + if (ctxm->mem_valid) + last_type = type; + } + if (last_type == BNXT_CTX_INV) return 0; - else if (ena & FUNC_BACKING_STORE_CFG_REQ_ENABLES_TIM) - last_type = BNXT_CTX_MAX - 1; - else - last_type = BNXT_CTX_L2_MAX - 1; } ctx->ctx_arr[last_type].last = 1; @@ -9296,6 +9311,10 @@ static int bnxt_alloc_ctx_mem(struct bnxt *bp) if (!ctx || (ctx->flags & BNXT_CTX_FLAG_INITED)) return 0; + ena = 0; + if (!(bp->flags & BNXT_FLAG_CHIP_P5_PLUS)) + goto skip_legacy; + ctxm = &ctx->ctx_arr[BNXT_CTX_QP]; l2_qps = ctxm->qp_l2_entries; qp1_qps = ctxm->qp_qp1_entries; @@ -9304,7 +9323,6 @@ static int bnxt_alloc_ctx_mem(struct bnxt *bp) ctxm = &ctx->ctx_arr[BNXT_CTX_SRQ]; srqs = ctxm->srq_l2_entries; max_srqs = ctxm->max_entries; - ena = 0; if ((bp->flags & BNXT_FLAG_ROCE_CAP) && !is_kdump_kernel()) { pg_lvl = 2; if (BNXT_SW_RES_LMT(bp)) { @@ -9398,8 +9416,9 @@ skip_rdma: ena |= FUNC_BACKING_STORE_CFG_REQ_ENABLES_TQM_SP << i; ena |= FUNC_BACKING_STORE_CFG_REQ_DFLT_ENABLES; +skip_legacy: if (bp->fw_cap & BNXT_FW_CAP_BACKING_STORE_V2) - rc = bnxt_backing_store_cfg_v2(bp, ena); + rc = bnxt_backing_store_cfg_v2(bp); else rc = bnxt_hwrm_func_backing_store_cfg(bp, ena); if (rc) { @@ -9613,10 +9632,10 @@ no_ptp: static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) { + u32 flags, flags_ext, flags_ext2, flags_ext3; + struct bnxt_hw_resc *hw_resc = &bp->hw_resc; struct hwrm_func_qcaps_output *resp; struct hwrm_func_qcaps_input *req; - struct bnxt_hw_resc *hw_resc = &bp->hw_resc; - u32 flags, flags_ext, flags_ext2; int rc; rc = hwrm_req_init(bp, req, HWRM_FUNC_QCAPS); @@ -9683,6 +9702,10 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) (flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_ROCE_VF_RESOURCE_MGMT_SUPPORTED)) bp->fw_cap |= BNXT_FW_CAP_ROCE_VF_RESC_MGMT_SUPPORTED; + flags_ext3 = le32_to_cpu(resp->flags_ext3); + if (flags_ext3 & FUNC_QCAPS_RESP_FLAGS_EXT3_ROCE_VF_DYN_ALLOC_SUPPORT) + bp->fw_cap |= BNXT_FW_CAP_ROCE_VF_DYN_ALLOC_SUPPORT; + bp->tx_push_thresh = 0; if ((flags & FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED) && BNXT_FW_MAJ(bp) > 217) @@ -12851,6 +12874,17 @@ static int bnxt_set_xps_mapping(struct bnxt *bp) return rc; } +static int bnxt_tx_nr_rings(struct bnxt *bp) +{ + return bp->num_tc ? bp->tx_nr_rings_per_tc * bp->num_tc : + bp->tx_nr_rings_per_tc; +} + +static int bnxt_tx_nr_rings_per_tc(struct bnxt *bp) +{ + return bp->num_tc ? bp->tx_nr_rings / bp->num_tc : bp->tx_nr_rings; +} + static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) { int rc = 0; @@ -12868,6 +12902,13 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) if (rc) return rc; + /* Make adjustments if reserved TX rings are less than requested */ + bp->tx_nr_rings -= bp->tx_nr_rings_xdp; + bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp); + if (bp->tx_nr_rings_xdp) { + bp->tx_nr_rings_xdp = bp->tx_nr_rings_per_tc; + bp->tx_nr_rings += bp->tx_nr_rings_xdp; + } rc = bnxt_alloc_mem(bp, irq_re_init); if (rc) { netdev_err(bp->dev, "bnxt_alloc_mem err: %x\n", rc); @@ -13237,12 +13278,6 @@ static int bnxt_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return bnxt_hwrm_port_phy_write(bp, mdio->phy_id, mdio->reg_num, mdio->val_in); - case SIOCSHWTSTAMP: - return bnxt_hwtstamp_set(dev, ifr); - - case SIOCGHWTSTAMP: - return bnxt_hwtstamp_get(dev, ifr); - default: /* do nothing */ break; @@ -14707,6 +14742,23 @@ static bool bnxt_fw_pre_resv_vnics(struct bnxt *bp) return false; } +static void bnxt_hwrm_pfcwd_qcaps(struct bnxt *bp) +{ + struct hwrm_queue_pfcwd_timeout_qcaps_output *resp; + struct hwrm_queue_pfcwd_timeout_qcaps_input *req; + int rc; + + bp->max_pfcwd_tmo_ms = 0; + rc = hwrm_req_init(bp, req, HWRM_QUEUE_PFCWD_TIMEOUT_QCAPS); + if (rc) + return; + resp = hwrm_req_hold(bp, req); + rc = hwrm_req_send_silent(bp, req); + if (!rc) + bp->max_pfcwd_tmo_ms = le16_to_cpu(resp->max_pfcwd_timeout); + hwrm_req_drop(bp, req); +} + static int bnxt_fw_init_one_p1(struct bnxt *bp) { int rc; @@ -14784,6 +14836,7 @@ static int bnxt_fw_init_one_p2(struct bnxt *bp) if (bnxt_fw_pre_resv_vnics(bp)) bp->fw_cap |= BNXT_FW_CAP_PRE_RESV_VNICS; + bnxt_hwrm_pfcwd_qcaps(bp); bnxt_hwrm_func_qcfg(bp); bnxt_hwrm_vnic_qcaps(bp); bnxt_hwrm_port_led_qcaps(bp); @@ -15747,6 +15800,8 @@ static const struct net_device_ops bnxt_netdev_ops = { .ndo_xdp_xmit = bnxt_xdp_xmit, .ndo_bridge_getlink = bnxt_bridge_getlink, .ndo_bridge_setlink = bnxt_bridge_setlink, + .ndo_hwtstamp_get = bnxt_hwtstamp_get, + .ndo_hwtstamp_set = bnxt_hwtstamp_set, }; static void bnxt_get_queue_stats_rx(struct net_device *dev, int i, @@ -15898,8 +15953,7 @@ err_rxq_info_unreg: xdp_rxq_info_unreg(&clone->xdp_rxq); err_page_pool_destroy: page_pool_destroy(clone->page_pool); - if (bnxt_separate_head_pool(clone)) - page_pool_destroy(clone->head_pool); + page_pool_destroy(clone->head_pool); clone->page_pool = NULL; clone->head_pool = NULL; return rc; @@ -15917,8 +15971,7 @@ static void bnxt_queue_mem_free(struct net_device *dev, void *qmem) xdp_rxq_info_unreg(&rxr->xdp_rxq); page_pool_destroy(rxr->page_pool); - if (bnxt_separate_head_pool(rxr)) - page_pool_destroy(rxr->head_pool); + page_pool_destroy(rxr->head_pool); rxr->page_pool = NULL; rxr->head_pool = NULL; @@ -16047,7 +16100,7 @@ static int bnxt_queue_start(struct net_device *dev, void *qmem, int idx) napi_enable_locked(&bnapi->napi); bnxt_db_nq_arm(bp, &cpr->cp_db, cpr->cp_raw_cons); - mru = bp->dev->mtu + ETH_HLEN + VLAN_HLEN; + mru = bp->dev->mtu + VLAN_ETH_HLEN; for (i = 0; i < bp->nr_vnics; i++) { vnic = &bp->vnic_info[i]; @@ -16128,7 +16181,7 @@ static void bnxt_remove_one(struct pci_dev *pdev) struct bnxt *bp = netdev_priv(dev); if (BNXT_PF(bp)) - bnxt_sriov_disable(bp); + __bnxt_sriov_disable(bp); bnxt_rdma_aux_device_del(bp); @@ -16325,7 +16378,7 @@ static void bnxt_trim_dflt_sh_rings(struct bnxt *bp) bp->cp_nr_rings = min_t(int, bp->tx_nr_rings_per_tc, bp->rx_nr_rings); bp->rx_nr_rings = bp->cp_nr_rings; bp->tx_nr_rings_per_tc = bp->cp_nr_rings; - bp->tx_nr_rings = bp->tx_nr_rings_per_tc; + bp->tx_nr_rings = bnxt_tx_nr_rings(bp); } static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh) @@ -16357,7 +16410,7 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh) bnxt_trim_dflt_sh_rings(bp); else bp->cp_nr_rings = bp->tx_nr_rings_per_tc + bp->rx_nr_rings; - bp->tx_nr_rings = bp->tx_nr_rings_per_tc; + bp->tx_nr_rings = bnxt_tx_nr_rings(bp); avail_msix = bnxt_get_max_func_irqs(bp) - bp->cp_nr_rings; if (avail_msix >= BNXT_MIN_ROCE_CP_RINGS) { @@ -16370,7 +16423,7 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh) rc = __bnxt_reserve_rings(bp); if (rc && rc != -ENODEV) netdev_warn(bp->dev, "Unable to reserve tx rings\n"); - bp->tx_nr_rings_per_tc = bp->tx_nr_rings; + bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp); if (sh) bnxt_trim_dflt_sh_rings(bp); @@ -16379,7 +16432,7 @@ static int bnxt_set_dflt_rings(struct bnxt *bp, bool sh) rc = __bnxt_reserve_rings(bp); if (rc && rc != -ENODEV) netdev_warn(bp->dev, "2nd rings reservation failed.\n"); - bp->tx_nr_rings_per_tc = bp->tx_nr_rings; + bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp); } if (BNXT_CHIP_TYPE_NITRO_A0(bp)) { bp->rx_nr_rings++; @@ -16413,7 +16466,7 @@ static int bnxt_init_dflt_ring_mode(struct bnxt *bp) if (rc) goto init_dflt_ring_err; - bp->tx_nr_rings_per_tc = bp->tx_nr_rings; + bp->tx_nr_rings_per_tc = bnxt_tx_nr_rings_per_tc(bp); bnxt_set_dflt_rfs(bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index fda0d3cc6227..06a4c2afdf8a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -1968,10 +1968,12 @@ struct bnxt_ctx_mem_type { #define BNXT_CTX_CA1 FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CA1_TRACE #define BNXT_CTX_CA2 FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_CA2_TRACE #define BNXT_CTX_RIGP1 FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_RIGP1_TRACE +#define BNXT_CTX_KONG FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_AFM_KONG_HWRM_TRACE +#define BNXT_CTX_QPC FUNC_BACKING_STORE_QCAPS_V2_REQ_TYPE_ERR_QPC_TRACE #define BNXT_CTX_MAX (BNXT_CTX_TIM + 1) #define BNXT_CTX_L2_MAX (BNXT_CTX_FTQM + 1) -#define BNXT_CTX_V2_MAX (BNXT_CTX_RIGP1 + 1) +#define BNXT_CTX_V2_MAX (BNXT_CTX_QPC + 1) #define BNXT_CTX_INV ((u16)-1) struct bnxt_ctx_mem_info { @@ -2130,6 +2132,7 @@ enum board_idx { NETXTREME_E_P5_VF, NETXTREME_E_P5_VF_HV, NETXTREME_E_P7_VF, + NETXTREME_E_P7_VF_HV, }; #define BNXT_TRACE_BUF_MAGIC_BYTE ((u8)0xbc) @@ -2407,6 +2410,7 @@ struct bnxt { #define BNXT_RSS_CAP_ESP_V4_RSS_CAP BIT(6) #define BNXT_RSS_CAP_ESP_V6_RSS_CAP BIT(7) #define BNXT_RSS_CAP_MULTI_RSS_CTX BIT(8) +#define BNXT_RSS_CAP_IPV6_FLOW_LABEL_RSS_CAP BIT(9) u8 rss_hash_key[HW_HASH_KEY_SIZE]; u8 rss_hash_key_valid:1; @@ -2422,6 +2426,8 @@ struct bnxt { u8 max_q; u8 num_tc; + u16 max_pfcwd_tmo_ms; + u8 tph_mode; unsigned int current_interval; @@ -2475,6 +2481,7 @@ struct bnxt { #define BNXT_FW_CAP_ENABLE_RDMA_SRIOV BIT_ULL(5) #define BNXT_FW_CAP_ROCE_VF_RESC_MGMT_SUPPORTED BIT_ULL(6) #define BNXT_FW_CAP_KONG_MB_CHNL BIT_ULL(7) + #define BNXT_FW_CAP_ROCE_VF_DYN_ALLOC_SUPPORT BIT_ULL(8) #define BNXT_FW_CAP_OVS_64BIT_HANDLE BIT_ULL(10) #define BNXT_FW_CAP_TRUSTED_VF BIT_ULL(11) #define BNXT_FW_CAP_ERROR_RECOVERY BIT_ULL(13) @@ -2519,6 +2526,8 @@ struct bnxt { #define BNXT_SUPPORTS_MULTI_RSS_CTX(bp) \ (BNXT_PF(bp) && BNXT_SUPPORTS_NTUPLE_VNIC(bp) && \ ((bp)->rss_cap & BNXT_RSS_CAP_MULTI_RSS_CTX)) +#define BNXT_ROCE_VF_DYN_ALLOC_CAP(bp) \ + ((bp)->fw_cap & BNXT_FW_CAP_ROCE_VF_DYN_ALLOC_SUPPORT) #define BNXT_SUPPORTS_QUEUE_API(bp) \ (BNXT_PF(bp) && BNXT_SUPPORTS_NTUPLE_VNIC(bp) && \ ((bp)->fw_cap & BNXT_FW_CAP_VNIC_RE_FLUSH)) @@ -2542,6 +2551,7 @@ struct bnxt { u16 fw_rx_stats_ext_size; u16 fw_tx_stats_ext_size; u16 hw_ring_stats_size; + u16 pcie_stat_len; u8 pri2cos_idx[8]; u8 pri2cos_valid; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c index 18d6c94d5cb8..0181ab1f2dfd 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.c @@ -36,6 +36,8 @@ static const u16 bnxt_bstore_to_seg_id[] = { [BNXT_CTX_CA1] = BNXT_CTX_MEM_SEG_CA1, [BNXT_CTX_CA2] = BNXT_CTX_MEM_SEG_CA2, [BNXT_CTX_RIGP1] = BNXT_CTX_MEM_SEG_RIGP1, + [BNXT_CTX_KONG] = BNXT_CTX_MEM_SEG_KONG, + [BNXT_CTX_QPC] = BNXT_CTX_MEM_SEG_QPC, }; static int bnxt_dbg_hwrm_log_buffer_flush(struct bnxt *bp, u16 type, u32 flags, @@ -359,7 +361,7 @@ static u32 bnxt_get_ctx_coredump(struct bnxt *bp, void *buf, u32 offset, if (buf) buf += offset; - for (type = 0 ; type <= BNXT_CTX_RIGP1; type++) { + for (type = 0; type < BNXT_CTX_V2_MAX; type++) { struct bnxt_ctx_mem_type *ctxm = &ctx->ctx_arr[type]; bool trace = bnxt_bs_trace_avail(bp, type); u32 seg_id = bnxt_bstore_to_seg_id[type]; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.h index d1cd6387f3ab..c087df88154a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_coredump.h @@ -102,6 +102,8 @@ struct bnxt_driver_segment_record { #define BNXT_CTX_MEM_SEG_CA1 0x9 #define BNXT_CTX_MEM_SEG_CA2 0xa #define BNXT_CTX_MEM_SEG_RIGP1 0xb +#define BNXT_CTX_MEM_SEG_QPC 0xc +#define BNXT_CTX_MEM_SEG_KONG 0xd #define BNXT_CRASH_DUMP_LEN (8 << 20) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c index 4c4581b0342e..02961d93ed35 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_devlink.c @@ -40,12 +40,6 @@ bnxt_dl_flash_update(struct devlink *dl, struct bnxt *bp = bnxt_get_bp_from_dl(dl); int rc; - if (!BNXT_PF(bp)) { - NL_SET_ERR_MSG_MOD(extack, - "flash update not supported from a VF"); - return -EPERM; - } - devlink_flash_update_status_notify(dl, "Preparing to flash", NULL, 0, 0); rc = bnxt_flash_package_from_fw_obj(bp->dev, params->fw, 0, extack); if (!rc) @@ -220,7 +214,7 @@ __bnxt_dl_reporter_create(struct bnxt *bp, { struct devlink_health_reporter *reporter; - reporter = devlink_health_reporter_create(bp->dl, ops, 0, bp); + reporter = devlink_health_reporter_create(bp->dl, ops, bp); if (IS_ERR(reporter)) { netdev_warn(bp->dev, "Failed to create %s health reporter, rc = %ld\n", ops->name, PTR_ERR(reporter)); @@ -1080,16 +1074,9 @@ static int __bnxt_hwrm_nvm_req(struct bnxt *bp, static int bnxt_hwrm_nvm_req(struct bnxt *bp, u32 param_id, void *msg, union devlink_param_value *val) { - struct hwrm_nvm_get_variable_input *req = msg; const struct bnxt_dl_nvm_param *nvm_param; int i; - /* Get/Set NVM CFG parameter is supported only on PFs */ - if (BNXT_VF(bp)) { - hwrm_req_drop(bp, req); - return -EPERM; - } - for (i = 0; i < ARRAY_SIZE(nvm_params); i++) { nvm_param = &nvm_params[i]; if (nvm_param->id == param_id) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 1b37612b1c01..41686a6f84b5 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -1584,6 +1584,8 @@ static u64 get_ethtool_ipv6_rss(struct bnxt *bp) { if (bp->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6) return RXH_IP_SRC | RXH_IP_DST; + if (bp->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6_FLOW_LABEL) + return RXH_IP_SRC | RXH_IP_DST | RXH_IP6_FL; return 0; } @@ -1662,13 +1664,18 @@ static int bnxt_set_rxfh_fields(struct net_device *dev, if (cmd->data == RXH_4TUPLE) tuple = 4; - else if (cmd->data == RXH_2TUPLE) + else if (cmd->data == RXH_2TUPLE || + cmd->data == (RXH_2TUPLE | RXH_IP6_FL)) tuple = 2; else if (!cmd->data) tuple = 0; else return -EINVAL; + if (cmd->data & RXH_IP6_FL && + !(bp->rss_cap & BNXT_RSS_CAP_IPV6_FLOW_LABEL_RSS_CAP)) + return -EINVAL; + if (cmd->flow_type == TCP_V4_FLOW) { rss_hash_cfg &= ~VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4; if (tuple == 4) @@ -1732,10 +1739,15 @@ static int bnxt_set_rxfh_fields(struct net_device *dev, case AH_V6_FLOW: case ESP_V6_FLOW: case IPV6_FLOW: - if (tuple == 2) + rss_hash_cfg &= ~(VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6 | + VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6_FLOW_LABEL); + if (!tuple) + break; + if (cmd->data & RXH_IP6_FL) + rss_hash_cfg |= + VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6_FLOW_LABEL; + else if (tuple == 2) rss_hash_cfg |= VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6; - else if (!tuple) - rss_hash_cfg &= ~VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6; break; } @@ -2049,38 +2061,52 @@ static void bnxt_get_drvinfo(struct net_device *dev, static int bnxt_get_regs_len(struct net_device *dev) { struct bnxt *bp = netdev_priv(dev); - int reg_len; if (!BNXT_PF(bp)) return -EOPNOTSUPP; - reg_len = BNXT_PXP_REG_LEN; + return BNXT_PXP_REG_LEN + bp->pcie_stat_len; +} + +static void * +__bnxt_hwrm_pcie_qstats(struct bnxt *bp, struct hwrm_pcie_qstats_input *req) +{ + struct pcie_ctx_hw_stats_v2 *hw_pcie_stats; + dma_addr_t hw_pcie_stats_addr; + int rc; + + hw_pcie_stats = hwrm_req_dma_slice(bp, req, sizeof(*hw_pcie_stats), + &hw_pcie_stats_addr); + if (!hw_pcie_stats) + return NULL; - if (bp->fw_cap & BNXT_FW_CAP_PCIE_STATS_SUPPORTED) - reg_len += sizeof(struct pcie_ctx_hw_stats); + req->pcie_stat_size = cpu_to_le16(sizeof(*hw_pcie_stats)); + req->pcie_stat_host_addr = cpu_to_le64(hw_pcie_stats_addr); + rc = hwrm_req_send(bp, req); - return reg_len; + return rc ? NULL : hw_pcie_stats; } #define BNXT_PCIE_32B_ENTRY(start, end) \ - { offsetof(struct pcie_ctx_hw_stats, start), \ - offsetof(struct pcie_ctx_hw_stats, end) } + { offsetof(struct pcie_ctx_hw_stats_v2, start),\ + offsetof(struct pcie_ctx_hw_stats_v2, end) } static const struct { u16 start; u16 end; } bnxt_pcie_32b_entries[] = { BNXT_PCIE_32B_ENTRY(pcie_ltssm_histogram[0], pcie_ltssm_histogram[3]), + BNXT_PCIE_32B_ENTRY(pcie_tl_credit_nph_histogram[0], unused_1), + BNXT_PCIE_32B_ENTRY(pcie_rd_latency_histogram[0], unused_2), }; static void bnxt_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p) { - struct pcie_ctx_hw_stats *hw_pcie_stats; + struct hwrm_pcie_qstats_output *resp; struct hwrm_pcie_qstats_input *req; struct bnxt *bp = netdev_priv(dev); - dma_addr_t hw_pcie_stats_addr; - int rc; + u8 *src; regs->version = 0; if (!(bp->fw_dbg_cap & DBG_QCAPS_RESP_FLAGS_REG_ACCESS_RESTRICTED)) @@ -2092,24 +2118,21 @@ static void bnxt_get_regs(struct net_device *dev, struct ethtool_regs *regs, if (hwrm_req_init(bp, req, HWRM_PCIE_QSTATS)) return; - hw_pcie_stats = hwrm_req_dma_slice(bp, req, sizeof(*hw_pcie_stats), - &hw_pcie_stats_addr); - if (!hw_pcie_stats) { - hwrm_req_drop(bp, req); - return; - } - - regs->version = 1; - hwrm_req_hold(bp, req); /* hold on to slice */ - req->pcie_stat_size = cpu_to_le16(sizeof(*hw_pcie_stats)); - req->pcie_stat_host_addr = cpu_to_le64(hw_pcie_stats_addr); - rc = hwrm_req_send(bp, req); - if (!rc) { + resp = hwrm_req_hold(bp, req); + src = __bnxt_hwrm_pcie_qstats(bp, req); + if (src) { u8 *dst = (u8 *)(_p + BNXT_PXP_REG_LEN); - u8 *src = (u8 *)hw_pcie_stats; - int i, j; + int i, j, len; + + len = min(bp->pcie_stat_len, le16_to_cpu(resp->pcie_stat_size)); + if (len <= sizeof(struct pcie_ctx_hw_stats)) + regs->version = 1; + else if (len < sizeof(struct pcie_ctx_hw_stats_v2)) + regs->version = 2; + else + regs->version = 3; - for (i = 0, j = 0; i < sizeof(*hw_pcie_stats); ) { + for (i = 0, j = 0; i < len; ) { if (i >= bnxt_pcie_32b_entries[j].start && i <= bnxt_pcie_32b_entries[j].end) { u32 *dst32 = (u32 *)(dst + i); @@ -3185,7 +3208,8 @@ static int bnxt_get_fecparam(struct net_device *dev, } static void bnxt_get_fec_stats(struct net_device *dev, - struct ethtool_fec_stats *fec_stats) + struct ethtool_fec_stats *fec_stats, + struct ethtool_fec_hist *hist) { struct bnxt *bp = netdev_priv(dev); u64 *rx; @@ -4376,12 +4400,42 @@ static int bnxt_get_eee(struct net_device *dev, struct ethtool_keee *edata) return 0; } +static int bnxt_hwrm_pfcwd_qcfg(struct bnxt *bp, u16 *val) +{ + struct hwrm_queue_pfcwd_timeout_qcfg_output *resp; + struct hwrm_queue_pfcwd_timeout_qcfg_input *req; + int rc; + + rc = hwrm_req_init(bp, req, HWRM_QUEUE_PFCWD_TIMEOUT_QCFG); + if (rc) + return rc; + resp = hwrm_req_hold(bp, req); + rc = hwrm_req_send(bp, req); + if (!rc) + *val = le16_to_cpu(resp->pfcwd_timeout_value); + hwrm_req_drop(bp, req); + return rc; +} + +static int bnxt_hwrm_pfcwd_cfg(struct bnxt *bp, u16 val) +{ + struct hwrm_queue_pfcwd_timeout_cfg_input *req; + int rc; + + rc = hwrm_req_init(bp, req, HWRM_QUEUE_PFCWD_TIMEOUT_CFG); + if (rc) + return rc; + req->pfcwd_timeout_value = cpu_to_le16(val); + rc = hwrm_req_send(bp, req); + return rc; +} + static int bnxt_set_tunable(struct net_device *dev, const struct ethtool_tunable *tuna, const void *data) { struct bnxt *bp = netdev_priv(dev); - u32 rx_copybreak; + u32 rx_copybreak, val; switch (tuna->id) { case ETHTOOL_RX_COPYBREAK: @@ -4394,6 +4448,15 @@ static int bnxt_set_tunable(struct net_device *dev, bp->rx_copybreak = rx_copybreak; } return 0; + case ETHTOOL_PFC_PREVENTION_TOUT: + if (BNXT_VF(bp) || !bp->max_pfcwd_tmo_ms) + return -EOPNOTSUPP; + + val = *(u16 *)data; + if (val > bp->max_pfcwd_tmo_ms && + val != PFC_STORM_PREVENTION_AUTO) + return -EINVAL; + return bnxt_hwrm_pfcwd_cfg(bp, val); default: return -EOPNOTSUPP; } @@ -4408,6 +4471,10 @@ static int bnxt_get_tunable(struct net_device *dev, case ETHTOOL_RX_COPYBREAK: *(u32 *)data = bp->rx_copybreak; break; + case ETHTOOL_PFC_PREVENTION_TOUT: + if (!bp->max_pfcwd_tmo_ms) + return -EOPNOTSUPP; + return bnxt_hwrm_pfcwd_qcfg(bp, data); default: return -EOPNOTSUPP; } @@ -5254,6 +5321,26 @@ static int bnxt_get_ts_info(struct net_device *dev, return 0; } +static void bnxt_hwrm_pcie_qstats(struct bnxt *bp) +{ + struct hwrm_pcie_qstats_output *resp; + struct hwrm_pcie_qstats_input *req; + + bp->pcie_stat_len = 0; + if (!(bp->fw_cap & BNXT_FW_CAP_PCIE_STATS_SUPPORTED)) + return; + + if (hwrm_req_init(bp, req, HWRM_PCIE_QSTATS)) + return; + + resp = hwrm_req_hold(bp, req); + if (__bnxt_hwrm_pcie_qstats(bp, req)) + bp->pcie_stat_len = min_t(u16, + le16_to_cpu(resp->pcie_stat_size), + sizeof(struct pcie_ctx_hw_stats_v2)); + hwrm_req_drop(bp, req); +} + void bnxt_ethtool_init(struct bnxt *bp) { struct hwrm_selftest_qlist_output *resp; @@ -5262,6 +5349,7 @@ void bnxt_ethtool_init(struct bnxt *bp) struct net_device *dev = bp->dev; int i, rc; + bnxt_hwrm_pcie_qstats(bp); if (!(bp->fw_cap & BNXT_FW_CAP_PKG_VER)) bnxt_get_pkgver(dev); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index ca660e6d28a4..db81cf6d5289 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -560,10 +560,11 @@ static int bnxt_hwrm_ptp_cfg(struct bnxt *bp) return bnxt_ptp_cfg_tstamp_filters(bp); } -int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) +int bnxt_hwtstamp_set(struct net_device *dev, + struct kernel_hwtstamp_config *stmpconf, + struct netlink_ext_ack *extack) { struct bnxt *bp = netdev_priv(dev); - struct hwtstamp_config stmpconf; struct bnxt_ptp_cfg *ptp; u16 old_rxctl; int old_rx_filter, rc; @@ -573,17 +574,14 @@ int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) if (!ptp) return -EOPNOTSUPP; - if (copy_from_user(&stmpconf, ifr->ifr_data, sizeof(stmpconf))) - return -EFAULT; - - if (stmpconf.tx_type != HWTSTAMP_TX_ON && - stmpconf.tx_type != HWTSTAMP_TX_OFF) + if (stmpconf->tx_type != HWTSTAMP_TX_ON && + stmpconf->tx_type != HWTSTAMP_TX_OFF) return -ERANGE; old_rx_filter = ptp->rx_filter; old_rxctl = ptp->rxctl; old_tx_tstamp_en = ptp->tx_tstamp_en; - switch (stmpconf.rx_filter) { + switch (stmpconf->rx_filter) { case HWTSTAMP_FILTER_NONE: ptp->rxctl = 0; ptp->rx_filter = HWTSTAMP_FILTER_NONE; @@ -616,7 +614,7 @@ int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) return -ERANGE; } - if (stmpconf.tx_type == HWTSTAMP_TX_ON) + if (stmpconf->tx_type == HWTSTAMP_TX_ON) ptp->tx_tstamp_en = 1; else ptp->tx_tstamp_en = 0; @@ -625,9 +623,8 @@ int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) if (rc) goto ts_set_err; - stmpconf.rx_filter = ptp->rx_filter; - return copy_to_user(ifr->ifr_data, &stmpconf, sizeof(stmpconf)) ? - -EFAULT : 0; + stmpconf->rx_filter = ptp->rx_filter; + return 0; ts_set_err: ptp->rx_filter = old_rx_filter; @@ -636,22 +633,22 @@ ts_set_err: return rc; } -int bnxt_hwtstamp_get(struct net_device *dev, struct ifreq *ifr) +int bnxt_hwtstamp_get(struct net_device *dev, + struct kernel_hwtstamp_config *stmpconf) { struct bnxt *bp = netdev_priv(dev); - struct hwtstamp_config stmpconf; struct bnxt_ptp_cfg *ptp; ptp = bp->ptp_cfg; if (!ptp) return -EOPNOTSUPP; - stmpconf.flags = 0; - stmpconf.tx_type = ptp->tx_tstamp_en ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; + stmpconf->flags = 0; + stmpconf->tx_type = ptp->tx_tstamp_en ? HWTSTAMP_TX_ON + : HWTSTAMP_TX_OFF; - stmpconf.rx_filter = ptp->rx_filter; - return copy_to_user(ifr->ifr_data, &stmpconf, sizeof(stmpconf)) ? - -EFAULT : 0; + stmpconf->rx_filter = ptp->rx_filter; + return 0; } static int bnxt_map_regs(struct bnxt *bp, u32 *reg_arr, int count, int reg_win) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h index 0481161d26ef..8cc2fae47644 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h @@ -160,8 +160,11 @@ void bnxt_ptp_update_current_time(struct bnxt *bp); void bnxt_ptp_pps_event(struct bnxt *bp, u32 data1, u32 data2); int bnxt_ptp_cfg_tstamp_filters(struct bnxt *bp); void bnxt_ptp_reapply_pps(struct bnxt *bp); -int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr); -int bnxt_hwtstamp_get(struct net_device *dev, struct ifreq *ifr); +int bnxt_hwtstamp_set(struct net_device *dev, + struct kernel_hwtstamp_config *stmpconf, + struct netlink_ext_ack *extack); +int bnxt_hwtstamp_get(struct net_device *dev, + struct kernel_hwtstamp_config *stmpconf); void bnxt_ptp_free_txts_skbs(struct bnxt_ptp_cfg *ptp); int bnxt_ptp_get_txts_prod(struct bnxt_ptp_cfg *ptp, u16 *prod); void bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb, u16 prod); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index 480e18a32caa..80fed2c07b9e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -541,6 +541,13 @@ static void bnxt_hwrm_roce_sriov_cfg(struct bnxt *bp, int num_vfs) if (rc) goto err; + /* In case of VF Dynamic resource allocation, driver will provision + * maximum resources to all the VFs. FW will dynamically allocate + * resources to VFs on the fly, so always divide the resources by 1. + */ + if (BNXT_ROCE_VF_DYN_ALLOC_CAP(bp)) + num_vfs = 1; + cfg_req->fid = cpu_to_le16(0xffff); cfg_req->enables2 = cpu_to_le32(FUNC_CFG_REQ_ENABLES2_ROCE_MAX_AV_PER_VF | @@ -734,7 +741,7 @@ static int bnxt_hwrm_func_cfg(struct bnxt *bp, int num_vfs) FUNC_CFG_REQ_ENABLES_NUM_VNICS | FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS); - mtu = bp->dev->mtu + ETH_HLEN + VLAN_HLEN; + mtu = bp->dev->mtu + VLAN_ETH_HLEN; req->mru = cpu_to_le16(mtu); req->admin_mtu = cpu_to_le16(mtu); @@ -919,7 +926,7 @@ err_out1: return rc; } -void bnxt_sriov_disable(struct bnxt *bp) +void __bnxt_sriov_disable(struct bnxt *bp) { u16 num_vfs = pci_num_vf(bp->pdev); @@ -943,6 +950,14 @@ void bnxt_sriov_disable(struct bnxt *bp) devl_unlock(bp->dl); bnxt_free_vf_resources(bp); +} + +static void bnxt_sriov_disable(struct bnxt *bp) +{ + if (!pci_num_vf(bp->pdev)) + return; + + __bnxt_sriov_disable(bp); /* Reclaim all resources for the PF. */ rtnl_lock(); @@ -1321,7 +1336,7 @@ int bnxt_cfg_hw_sriov(struct bnxt *bp, int *num_vfs, bool reset) return 0; } -void bnxt_sriov_disable(struct bnxt *bp) +void __bnxt_sriov_disable(struct bnxt *bp) { } diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h index 9a4bacba477b..e4979d729312 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h @@ -38,7 +38,7 @@ bool bnxt_is_trusted_vf(struct bnxt *bp, struct bnxt_vf_info *vf); int bnxt_set_vf_trust(struct net_device *dev, int vf_id, bool trust); int bnxt_sriov_configure(struct pci_dev *pdev, int num_vfs); int bnxt_cfg_hw_sriov(struct bnxt *bp, int *num_vfs, bool reset); -void bnxt_sriov_disable(struct bnxt *); +void __bnxt_sriov_disable(struct bnxt *bp); void bnxt_hwrm_exec_fwd_req(struct bnxt *); void bnxt_update_vf_mac(struct bnxt *); int bnxt_approve_mac(struct bnxt *, const u8 *, bool); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c index d72fd248f3aa..2d66bf59cd64 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c @@ -244,7 +244,7 @@ bnxt_tc_parse_pedit(struct bnxt *bp, struct bnxt_tc_actions *actions, offset < offset_of_ip6_daddr + 16) { actions->nat.src_xlate = false; idx = (offset - offset_of_ip6_daddr) / 4; - actions->nat.l3.ipv6.saddr.s6_addr32[idx] = htonl(val); + actions->nat.l3.ipv6.daddr.s6_addr32[idx] = htonl(val); } else { netdev_err(bp->dev, "%s: IPv6_hdr: Invalid pedit field\n", diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c index 58d579dca3f1..3e77a96e5a3e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c @@ -468,9 +468,8 @@ bnxt_xdp_build_skb(struct bnxt *bp, struct sk_buff *skb, u8 num_frags, if (!skb) return NULL; - xdp_update_skb_shared_info(skb, num_frags, - sinfo->xdp_frags_size, - BNXT_RX_PAGE_SIZE * num_frags, - xdp_buff_is_frag_pfmemalloc(xdp)); + xdp_update_skb_frags_info(skb, num_frags, sinfo->xdp_frags_size, + BNXT_RX_PAGE_SIZE * num_frags, + xdp_buff_get_skb_flags(xdp)); return skb; } diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c index a9040c42d2ff..6e97a5a7daaf 100644 --- a/drivers/net/ethernet/broadcom/cnic.c +++ b/drivers/net/ethernet/broadcom/cnic.c @@ -4230,8 +4230,7 @@ static void cnic_cm_stop_bnx2x_hw(struct cnic_dev *dev) cnic_bnx2x_delete_wait(dev, 0); - cancel_delayed_work(&cp->delete_task); - flush_workqueue(cnic_wq); + cancel_delayed_work_sync(&cp->delete_task); if (atomic_read(&cp->iscsi_conn) != 0) netdev_warn(dev->netdev, "%d iSCSI connections not destroyed\n", diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index b4dc93a48718..7f00ec7fd7b9 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -13929,22 +13929,20 @@ static void tg3_self_test(struct net_device *dev, struct ethtool_test *etest, } -static int tg3_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) +static int tg3_hwtstamp_set(struct net_device *dev, + struct kernel_hwtstamp_config *stmpconf, + struct netlink_ext_ack *extack) { struct tg3 *tp = netdev_priv(dev); - struct hwtstamp_config stmpconf; if (!tg3_flag(tp, PTP_CAPABLE)) return -EOPNOTSUPP; - if (copy_from_user(&stmpconf, ifr->ifr_data, sizeof(stmpconf))) - return -EFAULT; - - if (stmpconf.tx_type != HWTSTAMP_TX_ON && - stmpconf.tx_type != HWTSTAMP_TX_OFF) + if (stmpconf->tx_type != HWTSTAMP_TX_ON && + stmpconf->tx_type != HWTSTAMP_TX_OFF) return -ERANGE; - switch (stmpconf.rx_filter) { + switch (stmpconf->rx_filter) { case HWTSTAMP_FILTER_NONE: tp->rxptpctl = 0; break; @@ -14004,74 +14002,72 @@ static int tg3_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) tw32(TG3_RX_PTP_CTL, tp->rxptpctl | TG3_RX_PTP_CTL_HWTS_INTERLOCK); - if (stmpconf.tx_type == HWTSTAMP_TX_ON) + if (stmpconf->tx_type == HWTSTAMP_TX_ON) tg3_flag_set(tp, TX_TSTAMP_EN); else tg3_flag_clear(tp, TX_TSTAMP_EN); - return copy_to_user(ifr->ifr_data, &stmpconf, sizeof(stmpconf)) ? - -EFAULT : 0; + return 0; } -static int tg3_hwtstamp_get(struct net_device *dev, struct ifreq *ifr) +static int tg3_hwtstamp_get(struct net_device *dev, + struct kernel_hwtstamp_config *stmpconf) { struct tg3 *tp = netdev_priv(dev); - struct hwtstamp_config stmpconf; if (!tg3_flag(tp, PTP_CAPABLE)) return -EOPNOTSUPP; - stmpconf.flags = 0; - stmpconf.tx_type = (tg3_flag(tp, TX_TSTAMP_EN) ? - HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF); + stmpconf->flags = 0; + stmpconf->tx_type = tg3_flag(tp, TX_TSTAMP_EN) ? + HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; switch (tp->rxptpctl) { case 0: - stmpconf.rx_filter = HWTSTAMP_FILTER_NONE; + stmpconf->rx_filter = HWTSTAMP_FILTER_NONE; break; case TG3_RX_PTP_CTL_RX_PTP_V1_EN | TG3_RX_PTP_CTL_ALL_V1_EVENTS: - stmpconf.rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT; + stmpconf->rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT; break; case TG3_RX_PTP_CTL_RX_PTP_V1_EN | TG3_RX_PTP_CTL_SYNC_EVNT: - stmpconf.rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_SYNC; + stmpconf->rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_SYNC; break; case TG3_RX_PTP_CTL_RX_PTP_V1_EN | TG3_RX_PTP_CTL_DELAY_REQ: - stmpconf.rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ; + stmpconf->rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ; break; case TG3_RX_PTP_CTL_RX_PTP_V2_EN | TG3_RX_PTP_CTL_ALL_V2_EVENTS: - stmpconf.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + stmpconf->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; break; case TG3_RX_PTP_CTL_RX_PTP_V2_L2_EN | TG3_RX_PTP_CTL_ALL_V2_EVENTS: - stmpconf.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; + stmpconf->rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT; break; case TG3_RX_PTP_CTL_RX_PTP_V2_L4_EN | TG3_RX_PTP_CTL_ALL_V2_EVENTS: - stmpconf.rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT; + stmpconf->rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_EVENT; break; case TG3_RX_PTP_CTL_RX_PTP_V2_EN | TG3_RX_PTP_CTL_SYNC_EVNT: - stmpconf.rx_filter = HWTSTAMP_FILTER_PTP_V2_SYNC; + stmpconf->rx_filter = HWTSTAMP_FILTER_PTP_V2_SYNC; break; case TG3_RX_PTP_CTL_RX_PTP_V2_L2_EN | TG3_RX_PTP_CTL_SYNC_EVNT: - stmpconf.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_SYNC; + stmpconf->rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_SYNC; break; case TG3_RX_PTP_CTL_RX_PTP_V2_L4_EN | TG3_RX_PTP_CTL_SYNC_EVNT: - stmpconf.rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_SYNC; + stmpconf->rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_SYNC; break; case TG3_RX_PTP_CTL_RX_PTP_V2_EN | TG3_RX_PTP_CTL_DELAY_REQ: - stmpconf.rx_filter = HWTSTAMP_FILTER_PTP_V2_DELAY_REQ; + stmpconf->rx_filter = HWTSTAMP_FILTER_PTP_V2_DELAY_REQ; break; case TG3_RX_PTP_CTL_RX_PTP_V2_L2_EN | TG3_RX_PTP_CTL_DELAY_REQ: - stmpconf.rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ; + stmpconf->rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ; break; case TG3_RX_PTP_CTL_RX_PTP_V2_L4_EN | TG3_RX_PTP_CTL_DELAY_REQ: - stmpconf.rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ; + stmpconf->rx_filter = HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ; break; default: WARN_ON_ONCE(1); return -ERANGE; } - return copy_to_user(ifr->ifr_data, &stmpconf, sizeof(stmpconf)) ? - -EFAULT : 0; + return 0; } static int tg3_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) @@ -14126,12 +14122,6 @@ static int tg3_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) return err; - case SIOCSHWTSTAMP: - return tg3_hwtstamp_set(dev, ifr); - - case SIOCGHWTSTAMP: - return tg3_hwtstamp_get(dev, ifr); - default: /* do nothing */ break; @@ -14407,6 +14397,8 @@ static const struct net_device_ops tg3_netdev_ops = { #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = tg3_poll_controller, #endif + .ndo_hwtstamp_get = tg3_hwtstamp_get, + .ndo_hwtstamp_set = tg3_hwtstamp_set, }; static void tg3_get_eeprom_size(struct tg3 *tp) diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index c9a5c8beb2fa..0830c48973aa 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -184,6 +184,13 @@ #define GEM_DCFG8 0x029C /* Design Config 8 */ #define GEM_DCFG10 0x02A4 /* Design Config 10 */ #define GEM_DCFG12 0x02AC /* Design Config 12 */ +#define GEM_ENST_START_TIME_Q0 0x0800 /* ENST Q0 start time */ +#define GEM_ENST_START_TIME_Q1 0x0804 /* ENST Q1 start time */ +#define GEM_ENST_ON_TIME_Q0 0x0820 /* ENST Q0 on time */ +#define GEM_ENST_ON_TIME_Q1 0x0824 /* ENST Q1 on time */ +#define GEM_ENST_OFF_TIME_Q0 0x0840 /* ENST Q0 off time */ +#define GEM_ENST_OFF_TIME_Q1 0x0844 /* ENST Q1 off time */ +#define GEM_ENST_CONTROL 0x0880 /* ENST control register */ #define GEM_USX_CONTROL 0x0A80 /* High speed PCS control register */ #define GEM_USX_STATUS 0x0A88 /* High speed PCS status register */ @@ -213,14 +220,19 @@ #define GEM_ISR(hw_q) (0x0400 + ((hw_q) << 2)) #define GEM_TBQP(hw_q) (0x0440 + ((hw_q) << 2)) -#define GEM_TBQPH(hw_q) (0x04C8) #define GEM_RBQP(hw_q) (0x0480 + ((hw_q) << 2)) #define GEM_RBQS(hw_q) (0x04A0 + ((hw_q) << 2)) -#define GEM_RBQPH(hw_q) (0x04D4) #define GEM_IER(hw_q) (0x0600 + ((hw_q) << 2)) #define GEM_IDR(hw_q) (0x0620 + ((hw_q) << 2)) #define GEM_IMR(hw_q) (0x0640 + ((hw_q) << 2)) +#define GEM_ENST_START_TIME(hw_q) (0x0800 + ((hw_q) << 2)) +#define GEM_ENST_ON_TIME(hw_q) (0x0820 + ((hw_q) << 2)) +#define GEM_ENST_OFF_TIME(hw_q) (0x0840 + ((hw_q) << 2)) + +/* Bitfields in ENST_CONTROL */ +#define GEM_ENST_DISABLE_QUEUE_OFFSET 16 + /* Bitfields in NCR */ #define MACB_LB_OFFSET 0 /* reserved */ #define MACB_LB_SIZE 1 @@ -554,6 +566,23 @@ #define GEM_HIGH_SPEED_OFFSET 26 #define GEM_HIGH_SPEED_SIZE 1 +/* Bitfields in ENST_START_TIME_Qx. */ +#define GEM_START_TIME_SEC_OFFSET 30 +#define GEM_START_TIME_SEC_SIZE 2 +#define GEM_START_TIME_NSEC_OFFSET 0 +#define GEM_START_TIME_NSEC_SIZE 30 + +/* Bitfields in ENST_ON_TIME_Qx. */ +#define GEM_ON_TIME_OFFSET 0 +#define GEM_ON_TIME_SIZE 17 + +/* Bitfields in ENST_OFF_TIME_Qx. */ +#define GEM_OFF_TIME_OFFSET 0 +#define GEM_OFF_TIME_SIZE 17 + +/* Hardware ENST timing registers granularity */ +#define ENST_TIME_GRANULARITY_NS 8 + /* Bitfields in USX_CONTROL. */ #define GEM_USX_CTRL_SPEED_OFFSET 14 #define GEM_USX_CTRL_SPEED_SIZE 3 @@ -739,6 +768,7 @@ #define MACB_CAPS_MIIONRGMII 0x00000200 #define MACB_CAPS_NEED_TSUCLK 0x00000400 #define MACB_CAPS_QUEUE_DISABLE 0x00000800 +#define MACB_CAPS_QBV 0x00001000 #define MACB_CAPS_PCS 0x01000000 #define MACB_CAPS_HIGH_SPEED 0x02000000 #define MACB_CAPS_CLK_HW_CHG 0x04000000 @@ -1214,10 +1244,13 @@ struct macb_queue { unsigned int IDR; unsigned int IMR; unsigned int TBQP; - unsigned int TBQPH; unsigned int RBQS; unsigned int RBQP; - unsigned int RBQPH; + + /* ENST register offsets for this queue */ + unsigned int ENST_START_TIME; + unsigned int ENST_ON_TIME; + unsigned int ENST_OFF_TIME; /* Lock to protect tx_head and tx_tail */ spinlock_t tx_ptr_lock; @@ -1397,6 +1430,19 @@ static inline bool gem_has_ptp(struct macb *bp) return IS_ENABLED(CONFIG_MACB_USE_HWSTAMP) && (bp->caps & MACB_CAPS_GEM_HAS_PTP); } +/* ENST Helper functions */ +static inline u64 enst_ns_to_hw_units(size_t ns, u32 speed_mbps) +{ + return DIV_ROUND_UP((ns) * (speed_mbps), + (ENST_TIME_GRANULARITY_NS * 1000)); +} + +static inline u64 enst_max_hw_interval(u32 speed_mbps) +{ + return DIV_ROUND_UP(GENMASK(GEM_ON_TIME_SIZE - 1, 0) * + ENST_TIME_GRANULARITY_NS * 1000, (speed_mbps)); +} + /** * struct macb_platform_data - platform data for MACB Ethernet used for PCI registration * @pclk: platform clock @@ -1407,4 +1453,21 @@ struct macb_platform_data { struct clk *hclk; }; +/** + * struct macb_queue_enst_config - Configuration for Enhanced Scheduled Traffic + * @start_time_mask: Bitmask representing the start time for the queue + * @on_time_bytes: "on" time nsec expressed in bytes + * @off_time_bytes: "off" time nsec expressed in bytes + * @queue_id: Identifier for the queue + * + * This structure holds the configuration parameters for an ENST queue, + * used to control time-based transmission scheduling in the MACB driver. + */ +struct macb_queue_enst_config { + u32 start_time_mask; + u32 on_time_bytes; + u32 off_time_bytes; + u8 queue_id; +}; + #endif /* _MACB_H */ diff --git a/drivers/net/ethernet/cadence/macb_main.c b/drivers/net/ethernet/cadence/macb_main.c index 9693f0289435..ca2386b83473 100644 --- a/drivers/net/ethernet/cadence/macb_main.c +++ b/drivers/net/ethernet/cadence/macb_main.c @@ -36,6 +36,7 @@ #include <linux/reset.h> #include <linux/firmware/xlnx-zynqmp.h> #include <linux/inetdevice.h> +#include <net/pkt_sched.h> #include "macb.h" /* This structure is only used for MACB on SiFive FU540 devices */ @@ -51,14 +52,10 @@ struct sifive_fu540_macb_mgmt { #define DEFAULT_RX_RING_SIZE 512 /* must be power of 2 */ #define MIN_RX_RING_SIZE 64 #define MAX_RX_RING_SIZE 8192 -#define RX_RING_BYTES(bp) (macb_dma_desc_get_size(bp) \ - * (bp)->rx_ring_size) #define DEFAULT_TX_RING_SIZE 512 /* must be power of 2 */ #define MIN_TX_RING_SIZE 64 #define MAX_TX_RING_SIZE 4096 -#define TX_RING_BYTES(bp) (macb_dma_desc_get_size(bp) \ - * (bp)->tx_ring_size) /* level of occupied TX descriptors under which we wake up TX process */ #define MACB_TX_WAKEUP_THRESH(bp) (3 * (bp)->tx_ring_size / 4) @@ -278,9 +275,9 @@ static void macb_set_hwaddr(struct macb *bp) u32 bottom; u16 top; - bottom = cpu_to_le32(*((u32 *)bp->dev->dev_addr)); + bottom = get_unaligned_le32(bp->dev->dev_addr); macb_or_gem_writel(bp, SA1B, bottom); - top = cpu_to_le16(*((u16 *)(bp->dev->dev_addr + 4))); + top = get_unaligned_le16(bp->dev->dev_addr + 4); macb_or_gem_writel(bp, SA1T, top); if (gem_has_ptp(bp)) { @@ -495,19 +492,19 @@ static void macb_init_buffers(struct macb *bp) struct macb_queue *queue; unsigned int q; - for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { - queue_writel(queue, RBQP, lower_32_bits(queue->rx_ring_dma)); #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - if (bp->hw_dma_cap & HW_DMA_CAP_64B) - queue_writel(queue, RBQPH, - upper_32_bits(queue->rx_ring_dma)); + /* Single register for all queues' high 32 bits. */ + if (bp->hw_dma_cap & HW_DMA_CAP_64B) { + macb_writel(bp, RBQPH, + upper_32_bits(bp->queues[0].rx_ring_dma)); + macb_writel(bp, TBQPH, + upper_32_bits(bp->queues[0].tx_ring_dma)); + } #endif + + for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { + queue_writel(queue, RBQP, lower_32_bits(queue->rx_ring_dma)); queue_writel(queue, TBQP, lower_32_bits(queue->tx_ring_dma)); -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - if (bp->hw_dma_cap & HW_DMA_CAP_64B) - queue_writel(queue, TBQPH, - upper_32_bits(queue->tx_ring_dma)); -#endif } } @@ -1166,10 +1163,6 @@ static void macb_tx_error_task(struct work_struct *work) /* Reinitialize the TX desc queue */ queue_writel(queue, TBQP, lower_32_bits(queue->tx_ring_dma)); -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - if (bp->hw_dma_cap & HW_DMA_CAP_64B) - queue_writel(queue, TBQPH, upper_32_bits(queue->tx_ring_dma)); -#endif /* Make TX ring reflect state of hardware */ queue->tx_head = 0; queue->tx_tail = 0; @@ -1223,12 +1216,13 @@ static int macb_tx_complete(struct macb_queue *queue, int budget) { struct macb *bp = queue->bp; u16 queue_index = queue - bp->queues; + unsigned long flags; unsigned int tail; unsigned int head; int packets = 0; u32 bytes = 0; - spin_lock(&queue->tx_ptr_lock); + spin_lock_irqsave(&queue->tx_ptr_lock, flags); head = queue->tx_head; for (tail = queue->tx_tail; tail != head && packets < budget; tail++) { struct macb_tx_skb *tx_skb; @@ -1291,7 +1285,7 @@ static int macb_tx_complete(struct macb_queue *queue, int budget) CIRC_CNT(queue->tx_head, queue->tx_tail, bp->tx_ring_size) <= MACB_TX_WAKEUP_THRESH(bp)) netif_wake_subqueue(bp->dev, queue_index); - spin_unlock(&queue->tx_ptr_lock); + spin_unlock_irqrestore(&queue->tx_ptr_lock, flags); return packets; } @@ -1707,8 +1701,9 @@ static void macb_tx_restart(struct macb_queue *queue) { struct macb *bp = queue->bp; unsigned int head_idx, tbqp; + unsigned long flags; - spin_lock(&queue->tx_ptr_lock); + spin_lock_irqsave(&queue->tx_ptr_lock, flags); if (queue->tx_head == queue->tx_tail) goto out_tx_ptr_unlock; @@ -1720,19 +1715,20 @@ static void macb_tx_restart(struct macb_queue *queue) if (tbqp == head_idx) goto out_tx_ptr_unlock; - spin_lock_irq(&bp->lock); + spin_lock(&bp->lock); macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART)); - spin_unlock_irq(&bp->lock); + spin_unlock(&bp->lock); out_tx_ptr_unlock: - spin_unlock(&queue->tx_ptr_lock); + spin_unlock_irqrestore(&queue->tx_ptr_lock, flags); } static bool macb_tx_complete_pending(struct macb_queue *queue) { bool retval = false; + unsigned long flags; - spin_lock(&queue->tx_ptr_lock); + spin_lock_irqsave(&queue->tx_ptr_lock, flags); if (queue->tx_head != queue->tx_tail) { /* Make hw descriptor updates visible to CPU */ rmb(); @@ -1740,7 +1736,7 @@ static bool macb_tx_complete_pending(struct macb_queue *queue) if (macb_tx_desc(queue, queue->tx_tail)->ctrl & MACB_BIT(TX_USED)) retval = true; } - spin_unlock(&queue->tx_ptr_lock); + spin_unlock_irqrestore(&queue->tx_ptr_lock, flags); return retval; } @@ -2308,6 +2304,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev) struct macb_queue *queue = &bp->queues[queue_index]; unsigned int desc_cnt, nr_frags, frag_size, f; unsigned int hdrlen; + unsigned long flags; bool is_lso; netdev_tx_t ret = NETDEV_TX_OK; @@ -2368,7 +2365,7 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev) desc_cnt += DIV_ROUND_UP(frag_size, bp->max_tx_length); } - spin_lock_bh(&queue->tx_ptr_lock); + spin_lock_irqsave(&queue->tx_ptr_lock, flags); /* This is a hard error, log it. */ if (CIRC_SPACE(queue->tx_head, queue->tx_tail, @@ -2392,15 +2389,15 @@ static netdev_tx_t macb_start_xmit(struct sk_buff *skb, struct net_device *dev) netdev_tx_sent_queue(netdev_get_tx_queue(bp->dev, queue_index), skb->len); - spin_lock_irq(&bp->lock); + spin_lock(&bp->lock); macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART)); - spin_unlock_irq(&bp->lock); + spin_unlock(&bp->lock); if (CIRC_SPACE(queue->tx_head, queue->tx_tail, bp->tx_ring_size) < 1) netif_stop_subqueue(dev, queue_index); unlock: - spin_unlock_bh(&queue->tx_ptr_lock); + spin_unlock_irqrestore(&queue->tx_ptr_lock, flags); return ret; } @@ -2470,35 +2467,42 @@ static void macb_free_rx_buffers(struct macb *bp) } } +static unsigned int macb_tx_ring_size_per_queue(struct macb *bp) +{ + return macb_dma_desc_get_size(bp) * bp->tx_ring_size + bp->tx_bd_rd_prefetch; +} + +static unsigned int macb_rx_ring_size_per_queue(struct macb *bp) +{ + return macb_dma_desc_get_size(bp) * bp->rx_ring_size + bp->rx_bd_rd_prefetch; +} + static void macb_free_consistent(struct macb *bp) { + struct device *dev = &bp->pdev->dev; struct macb_queue *queue; unsigned int q; - int size; + size_t size; if (bp->rx_ring_tieoff) { - dma_free_coherent(&bp->pdev->dev, macb_dma_desc_get_size(bp), + dma_free_coherent(dev, macb_dma_desc_get_size(bp), bp->rx_ring_tieoff, bp->rx_ring_tieoff_dma); bp->rx_ring_tieoff = NULL; } bp->macbgem_ops.mog_free_rx_buffers(bp); + size = bp->num_queues * macb_tx_ring_size_per_queue(bp); + dma_free_coherent(dev, size, bp->queues[0].tx_ring, bp->queues[0].tx_ring_dma); + + size = bp->num_queues * macb_rx_ring_size_per_queue(bp); + dma_free_coherent(dev, size, bp->queues[0].rx_ring, bp->queues[0].rx_ring_dma); + for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { kfree(queue->tx_skb); queue->tx_skb = NULL; - if (queue->tx_ring) { - size = TX_RING_BYTES(bp) + bp->tx_bd_rd_prefetch; - dma_free_coherent(&bp->pdev->dev, size, - queue->tx_ring, queue->tx_ring_dma); - queue->tx_ring = NULL; - } - if (queue->rx_ring) { - size = RX_RING_BYTES(bp) + bp->rx_bd_rd_prefetch; - dma_free_coherent(&bp->pdev->dev, size, - queue->rx_ring, queue->rx_ring_dma); - queue->rx_ring = NULL; - } + queue->tx_ring = NULL; + queue->rx_ring = NULL; } } @@ -2540,35 +2544,45 @@ static int macb_alloc_rx_buffers(struct macb *bp) static int macb_alloc_consistent(struct macb *bp) { + struct device *dev = &bp->pdev->dev; + dma_addr_t tx_dma, rx_dma; struct macb_queue *queue; unsigned int q; - int size; + void *tx, *rx; + size_t size; + + /* + * Upper 32-bits of Tx/Rx DMA descriptor for each queues much match! + * We cannot enforce this guarantee, the best we can do is do a single + * allocation and hope it will land into alloc_pages() that guarantees + * natural alignment of physical addresses. + */ + + size = bp->num_queues * macb_tx_ring_size_per_queue(bp); + tx = dma_alloc_coherent(dev, size, &tx_dma, GFP_KERNEL); + if (!tx || upper_32_bits(tx_dma) != upper_32_bits(tx_dma + size - 1)) + goto out_err; + netdev_dbg(bp->dev, "Allocated %zu bytes for %u TX rings at %08lx (mapped %p)\n", + size, bp->num_queues, (unsigned long)tx_dma, tx); + + size = bp->num_queues * macb_rx_ring_size_per_queue(bp); + rx = dma_alloc_coherent(dev, size, &rx_dma, GFP_KERNEL); + if (!rx || upper_32_bits(rx_dma) != upper_32_bits(rx_dma + size - 1)) + goto out_err; + netdev_dbg(bp->dev, "Allocated %zu bytes for %u RX rings at %08lx (mapped %p)\n", + size, bp->num_queues, (unsigned long)rx_dma, rx); for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { - size = TX_RING_BYTES(bp) + bp->tx_bd_rd_prefetch; - queue->tx_ring = dma_alloc_coherent(&bp->pdev->dev, size, - &queue->tx_ring_dma, - GFP_KERNEL); - if (!queue->tx_ring) - goto out_err; - netdev_dbg(bp->dev, - "Allocated TX ring for queue %u of %d bytes at %08lx (mapped %p)\n", - q, size, (unsigned long)queue->tx_ring_dma, - queue->tx_ring); + queue->tx_ring = tx + macb_tx_ring_size_per_queue(bp) * q; + queue->tx_ring_dma = tx_dma + macb_tx_ring_size_per_queue(bp) * q; + + queue->rx_ring = rx + macb_rx_ring_size_per_queue(bp) * q; + queue->rx_ring_dma = rx_dma + macb_rx_ring_size_per_queue(bp) * q; size = bp->tx_ring_size * sizeof(struct macb_tx_skb); queue->tx_skb = kmalloc(size, GFP_KERNEL); if (!queue->tx_skb) goto out_err; - - size = RX_RING_BYTES(bp) + bp->rx_bd_rd_prefetch; - queue->rx_ring = dma_alloc_coherent(&bp->pdev->dev, size, - &queue->rx_ring_dma, GFP_KERNEL); - if (!queue->rx_ring) - goto out_err; - netdev_dbg(bp->dev, - "Allocated RX ring of %d bytes at %08lx (mapped %p)\n", - size, (unsigned long)queue->rx_ring_dma, queue->rx_ring); } if (bp->macbgem_ops.mog_alloc_rx_buffers(bp)) goto out_err; @@ -3090,7 +3104,7 @@ static void gem_update_stats(struct macb *bp) /* Add GEM_OCTTXH, GEM_OCTRXH */ val = bp->macb_reg_readl(bp, offset + 4); bp->ethtool_stats[i] += ((u64)val) << 32; - *(p++) += ((u64)val) << 32; + *p += ((u64)val) << 32; } } @@ -4084,6 +4098,223 @@ static void macb_restore_features(struct macb *bp) macb_set_rxflow_feature(bp, features); } +static int macb_taprio_setup_replace(struct net_device *ndev, + struct tc_taprio_qopt_offload *conf) +{ + u64 total_on_time = 0, start_time_sec = 0, start_time = conf->base_time; + u32 configured_queues = 0, speed = 0, start_time_nsec; + struct macb_queue_enst_config *enst_queue; + struct tc_taprio_sched_entry *entry; + struct macb *bp = netdev_priv(ndev); + struct ethtool_link_ksettings kset; + struct macb_queue *queue; + size_t i; + int err; + + if (conf->num_entries > bp->num_queues) { + netdev_err(ndev, "Too many TAPRIO entries: %zu > %d queues\n", + conf->num_entries, bp->num_queues); + return -EINVAL; + } + + if (conf->base_time < 0) { + netdev_err(ndev, "Invalid base_time: must be 0 or positive, got %lld\n", + conf->base_time); + return -ERANGE; + } + + /* Get the current link speed */ + err = phylink_ethtool_ksettings_get(bp->phylink, &kset); + if (unlikely(err)) { + netdev_err(ndev, "Failed to get link settings: %d\n", err); + return err; + } + + speed = kset.base.speed; + if (unlikely(speed <= 0)) { + netdev_err(ndev, "Invalid speed: %d\n", speed); + return -EINVAL; + } + + enst_queue = kcalloc(conf->num_entries, sizeof(*enst_queue), GFP_KERNEL); + if (unlikely(!enst_queue)) + return -ENOMEM; + + /* Pre-validate all entries before making any hardware changes */ + for (i = 0; i < conf->num_entries; i++) { + entry = &conf->entries[i]; + + if (entry->command != TC_TAPRIO_CMD_SET_GATES) { + netdev_err(ndev, "Entry %zu: unsupported command %d\n", + i, entry->command); + err = -EOPNOTSUPP; + goto cleanup; + } + + /* Validate gate_mask: must be nonzero, single queue, and within range */ + if (!is_power_of_2(entry->gate_mask)) { + netdev_err(ndev, "Entry %zu: gate_mask 0x%x is not a power of 2 (only one queue per entry allowed)\n", + i, entry->gate_mask); + err = -EINVAL; + goto cleanup; + } + + /* gate_mask must not select queues outside the valid queue_mask */ + if (entry->gate_mask & ~bp->queue_mask) { + netdev_err(ndev, "Entry %zu: gate_mask 0x%x exceeds queue range (max_queues=%d)\n", + i, entry->gate_mask, bp->num_queues); + err = -EINVAL; + goto cleanup; + } + + /* Check for start time limits */ + start_time_sec = start_time; + start_time_nsec = do_div(start_time_sec, NSEC_PER_SEC); + if (start_time_sec > GENMASK(GEM_START_TIME_SEC_SIZE - 1, 0)) { + netdev_err(ndev, "Entry %zu: Start time %llu s exceeds hardware limit\n", + i, start_time_sec); + err = -ERANGE; + goto cleanup; + } + + /* Check for on time limit */ + if (entry->interval > enst_max_hw_interval(speed)) { + netdev_err(ndev, "Entry %zu: interval %u ns exceeds hardware limit %llu ns\n", + i, entry->interval, enst_max_hw_interval(speed)); + err = -ERANGE; + goto cleanup; + } + + /* Check for off time limit*/ + if ((conf->cycle_time - entry->interval) > enst_max_hw_interval(speed)) { + netdev_err(ndev, "Entry %zu: off_time %llu ns exceeds hardware limit %llu ns\n", + i, conf->cycle_time - entry->interval, + enst_max_hw_interval(speed)); + err = -ERANGE; + goto cleanup; + } + + enst_queue[i].queue_id = order_base_2(entry->gate_mask); + enst_queue[i].start_time_mask = + (start_time_sec << GEM_START_TIME_SEC_OFFSET) | + start_time_nsec; + enst_queue[i].on_time_bytes = + enst_ns_to_hw_units(entry->interval, speed); + enst_queue[i].off_time_bytes = + enst_ns_to_hw_units(conf->cycle_time - entry->interval, speed); + + configured_queues |= entry->gate_mask; + total_on_time += entry->interval; + start_time += entry->interval; + } + + /* Check total interval doesn't exceed cycle time */ + if (total_on_time > conf->cycle_time) { + netdev_err(ndev, "Total ON %llu ns exceeds cycle time %llu ns\n", + total_on_time, conf->cycle_time); + err = -EINVAL; + goto cleanup; + } + + netdev_dbg(ndev, "TAPRIO setup: %zu entries, base_time=%lld ns, cycle_time=%llu ns\n", + conf->num_entries, conf->base_time, conf->cycle_time); + + /* All validations passed - proceed with hardware configuration */ + scoped_guard(spinlock_irqsave, &bp->lock) { + /* Disable ENST queues if running before configuring */ + gem_writel(bp, ENST_CONTROL, + bp->queue_mask << GEM_ENST_DISABLE_QUEUE_OFFSET); + + for (i = 0; i < conf->num_entries; i++) { + queue = &bp->queues[enst_queue[i].queue_id]; + /* Configure queue timing registers */ + queue_writel(queue, ENST_START_TIME, + enst_queue[i].start_time_mask); + queue_writel(queue, ENST_ON_TIME, + enst_queue[i].on_time_bytes); + queue_writel(queue, ENST_OFF_TIME, + enst_queue[i].off_time_bytes); + } + + /* Enable ENST for all configured queues in one write */ + gem_writel(bp, ENST_CONTROL, configured_queues); + } + + netdev_info(ndev, "TAPRIO configuration completed successfully: %zu entries, %d queues configured\n", + conf->num_entries, hweight32(configured_queues)); + +cleanup: + kfree(enst_queue); + return err; +} + +static void macb_taprio_destroy(struct net_device *ndev) +{ + struct macb *bp = netdev_priv(ndev); + struct macb_queue *queue; + u32 enst_disable_mask; + unsigned int q; + + netdev_reset_tc(ndev); + enst_disable_mask = bp->queue_mask << GEM_ENST_DISABLE_QUEUE_OFFSET; + + scoped_guard(spinlock_irqsave, &bp->lock) { + /* Single disable command for all queues */ + gem_writel(bp, ENST_CONTROL, enst_disable_mask); + + /* Clear all queue ENST registers in batch */ + for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { + queue_writel(queue, ENST_START_TIME, 0); + queue_writel(queue, ENST_ON_TIME, 0); + queue_writel(queue, ENST_OFF_TIME, 0); + } + } + netdev_info(ndev, "TAPRIO destroy: All gates disabled\n"); +} + +static int macb_setup_taprio(struct net_device *ndev, + struct tc_taprio_qopt_offload *taprio) +{ + struct macb *bp = netdev_priv(ndev); + int err = 0; + + if (unlikely(!(ndev->hw_features & NETIF_F_HW_TC))) + return -EOPNOTSUPP; + + /* Check if Device is in runtime suspend */ + if (unlikely(pm_runtime_suspended(&bp->pdev->dev))) { + netdev_err(ndev, "Device is in runtime suspend\n"); + return -EOPNOTSUPP; + } + + switch (taprio->cmd) { + case TAPRIO_CMD_REPLACE: + err = macb_taprio_setup_replace(ndev, taprio); + break; + case TAPRIO_CMD_DESTROY: + macb_taprio_destroy(ndev); + break; + default: + err = -EOPNOTSUPP; + } + + return err; +} + +static int macb_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + if (!dev || !type_data) + return -EINVAL; + + switch (type) { + case TC_SETUP_QDISC_TAPRIO: + return macb_setup_taprio(dev, type_data); + default: + return -EOPNOTSUPP; + } +} + static const struct net_device_ops macb_netdev_ops = { .ndo_open = macb_open, .ndo_stop = macb_close, @@ -4101,6 +4332,7 @@ static const struct net_device_ops macb_netdev_ops = { .ndo_features_check = macb_features_check, .ndo_hwtstamp_set = macb_hwtstamp_set, .ndo_hwtstamp_get = macb_hwtstamp_get, + .ndo_setup_tc = macb_setup_tc, }; /* Configure peripheral capabilities according to device tree @@ -4305,12 +4537,6 @@ static int macb_init(struct platform_device *pdev) queue->TBQP = GEM_TBQP(hw_q - 1); queue->RBQP = GEM_RBQP(hw_q - 1); queue->RBQS = GEM_RBQS(hw_q - 1); -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - if (bp->hw_dma_cap & HW_DMA_CAP_64B) { - queue->TBQPH = GEM_TBQPH(hw_q - 1); - queue->RBQPH = GEM_RBQPH(hw_q - 1); - } -#endif } else { /* queue0 uses legacy registers */ queue->ISR = MACB_ISR; @@ -4319,14 +4545,12 @@ static int macb_init(struct platform_device *pdev) queue->IMR = MACB_IMR; queue->TBQP = MACB_TBQP; queue->RBQP = MACB_RBQP; -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - if (bp->hw_dma_cap & HW_DMA_CAP_64B) { - queue->TBQPH = MACB_TBQPH; - queue->RBQPH = MACB_RBQPH; - } -#endif } + queue->ENST_START_TIME = GEM_ENST_START_TIME(hw_q); + queue->ENST_ON_TIME = GEM_ENST_ON_TIME(hw_q); + queue->ENST_OFF_TIME = GEM_ENST_OFF_TIME(hw_q); + /* get irq: here we use the linux queue index, not the hardware * queue index. the queue irq definitions in the device tree * must remove the optional gaps that could exist in the @@ -4379,6 +4603,10 @@ static int macb_init(struct platform_device *pdev) dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_RXCSUM; if (bp->caps & MACB_CAPS_SG_DISABLED) dev->hw_features &= ~NETIF_F_SG; + /* Enable HW_TC if hardware supports QBV */ + if (bp->caps & MACB_CAPS_QBV) + dev->hw_features |= NETIF_F_HW_TC; + dev->features = dev->hw_features; /* Check RX Flow Filters support. @@ -4822,36 +5050,45 @@ static unsigned long fu540_macb_tx_recalc_rate(struct clk_hw *hw, return mgmt->rate; } -static long fu540_macb_tx_round_rate(struct clk_hw *hw, unsigned long rate, - unsigned long *parent_rate) -{ - if (WARN_ON(rate < 2500000)) - return 2500000; - else if (rate == 2500000) - return 2500000; - else if (WARN_ON(rate < 13750000)) - return 2500000; - else if (WARN_ON(rate < 25000000)) - return 25000000; - else if (rate == 25000000) - return 25000000; - else if (WARN_ON(rate < 75000000)) - return 25000000; - else if (WARN_ON(rate < 125000000)) - return 125000000; - else if (rate == 125000000) - return 125000000; - - WARN_ON(rate > 125000000); +static int fu540_macb_tx_determine_rate(struct clk_hw *hw, + struct clk_rate_request *req) +{ + if (WARN_ON(req->rate < 2500000)) + req->rate = 2500000; + else if (req->rate == 2500000) + req->rate = 2500000; + else if (WARN_ON(req->rate < 13750000)) + req->rate = 2500000; + else if (WARN_ON(req->rate < 25000000)) + req->rate = 25000000; + else if (req->rate == 25000000) + req->rate = 25000000; + else if (WARN_ON(req->rate < 75000000)) + req->rate = 25000000; + else if (WARN_ON(req->rate < 125000000)) + req->rate = 125000000; + else if (req->rate == 125000000) + req->rate = 125000000; + else if (WARN_ON(req->rate > 125000000)) + req->rate = 125000000; + else + req->rate = 125000000; - return 125000000; + return 0; } static int fu540_macb_tx_set_rate(struct clk_hw *hw, unsigned long rate, unsigned long parent_rate) { - rate = fu540_macb_tx_round_rate(hw, rate, &parent_rate); - if (rate != 125000000) + struct clk_rate_request req; + int ret; + + clk_hw_init_rate_request(hw, &req, rate); + ret = fu540_macb_tx_determine_rate(hw, &req); + if (ret != 0) + return ret; + + if (req.rate != 125000000) iowrite32(1, mgmt->reg); else iowrite32(0, mgmt->reg); @@ -4862,7 +5099,7 @@ static int fu540_macb_tx_set_rate(struct clk_hw *hw, unsigned long rate, static const struct clk_ops fu540_c000_ops = { .recalc_rate = fu540_macb_tx_recalc_rate, - .round_rate = fu540_macb_tx_round_rate, + .determine_rate = fu540_macb_tx_determine_rate, .set_rate = fu540_macb_tx_set_rate, }; @@ -5123,8 +5360,9 @@ static const struct macb_config sama7g5_emac_config = { static const struct macb_config versal_config = { .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_JUMBO | - MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_BD_RD_PREFETCH | MACB_CAPS_NEED_TSUCLK | - MACB_CAPS_QUEUE_DISABLE, + MACB_CAPS_GEM_HAS_PTP | MACB_CAPS_BD_RD_PREFETCH | + MACB_CAPS_NEED_TSUCLK | MACB_CAPS_QUEUE_DISABLE | + MACB_CAPS_QBV, .dma_burst_length = 16, .clk_init = macb_clk_init, .init = init_reset_optional, @@ -5132,6 +5370,17 @@ static const struct macb_config versal_config = { .usrio = &macb_default_usrio, }; +static const struct macb_config raspberrypi_rp1_config = { + .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_CLK_HW_CHG | + MACB_CAPS_JUMBO | + MACB_CAPS_GEM_HAS_PTP, + .dma_burst_length = 16, + .clk_init = macb_clk_init, + .init = macb_init, + .usrio = &macb_default_usrio, + .jumbo_max_len = 10240, +}; + static const struct of_device_id macb_dt_ids[] = { { .compatible = "cdns,at91sam9260-macb", .data = &at91sam9260_config }, { .compatible = "cdns,macb" }, @@ -5152,6 +5401,7 @@ static const struct of_device_id macb_dt_ids[] = { { .compatible = "microchip,mpfs-macb", .data = &mpfs_config }, { .compatible = "microchip,sama7g5-gem", .data = &sama7g5_gem_config }, { .compatible = "microchip,sama7g5-emac", .data = &sama7g5_emac_config }, + { .compatible = "raspberrypi,rp1-gem", .data = &raspberrypi_rp1_config }, { .compatible = "xlnx,zynqmp-gem", .data = &zynqmp_config}, { .compatible = "xlnx,zynq-gem", .data = &zynq_config }, { .compatible = "xlnx,versal-gem", .data = &versal_config}, @@ -5399,19 +5649,16 @@ static void macb_remove(struct platform_device *pdev) if (dev) { bp = netdev_priv(dev); + unregister_netdev(dev); phy_exit(bp->sgmii_phy); mdiobus_unregister(bp->mii_bus); mdiobus_free(bp->mii_bus); - unregister_netdev(dev); + device_set_wakeup_enable(&bp->pdev->dev, 0); cancel_work_sync(&bp->hresp_err_bh_work); pm_runtime_disable(&pdev->dev); pm_runtime_dont_use_autosuspend(&pdev->dev); - if (!pm_runtime_suspended(&pdev->dev)) { - macb_clks_disable(bp->pclk, bp->hclk, bp->tx_clk, - bp->rx_clk, bp->tsu_clk); - pm_runtime_set_suspended(&pdev->dev); - } + pm_runtime_set_suspended(&pdev->dev); phylink_destroy(bp->phylink); free_netdev(dev); } @@ -5451,6 +5698,11 @@ static int __maybe_unused macb_suspend(struct device *dev) */ tmp = macb_readl(bp, NCR); macb_writel(bp, NCR, tmp & ~(MACB_BIT(TE) | MACB_BIT(RE))); +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + if (!(bp->caps & MACB_CAPS_QUEUE_DISABLE)) + macb_writel(bp, RBQPH, + upper_32_bits(bp->rx_ring_tieoff_dma)); +#endif for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { /* Disable RX queues */ @@ -5460,10 +5712,6 @@ static int __maybe_unused macb_suspend(struct device *dev) /* Tie off RX queues */ queue_writel(queue, RBQP, lower_32_bits(bp->rx_ring_tieoff_dma)); -#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT - queue_writel(queue, RBQPH, - upper_32_bits(bp->rx_ring_tieoff_dma)); -#endif } /* Disable all interrupts */ queue_writel(queue, IDR, -1); diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c index 674c54831875..215dac201b4a 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_core.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c @@ -472,7 +472,7 @@ int setup_rx_oom_poll_fn(struct net_device *netdev) q_no = lio->linfo.rxpciq[q].s.q_no; wq = &lio->rxq_status_wq[q_no]; wq->wq = alloc_workqueue("rxq-oom-status", - WQ_MEM_RECLAIM, 0); + WQ_MEM_RECLAIM | WQ_PERCPU, 0); if (!wq->wq) { dev_err(&oct->pci_dev->dev, "unable to create cavium rxq oom status wq\n"); return -ENOMEM; diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 1d79f6eaa41f..8e2fcec26ea1 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -526,7 +526,8 @@ static inline int setup_link_status_change_wq(struct net_device *netdev) struct octeon_device *oct = lio->oct_dev; lio->link_status_wq.wq = alloc_workqueue("link-status", - WQ_MEM_RECLAIM, 0); + WQ_MEM_RECLAIM | WQ_PERCPU, + 0); if (!lio->link_status_wq.wq) { dev_err(&oct->pci_dev->dev, "unable to create cavium link status wq\n"); return -1; @@ -659,7 +660,8 @@ static inline int setup_sync_octeon_time_wq(struct net_device *netdev) struct octeon_device *oct = lio->oct_dev; lio->sync_octeon_time_wq.wq = - alloc_workqueue("update-octeon-time", WQ_MEM_RECLAIM, 0); + alloc_workqueue("update-octeon-time", + WQ_MEM_RECLAIM | WQ_PERCPU, 0); if (!lio->sync_octeon_time_wq.wq) { dev_err(&oct->pci_dev->dev, "Unable to create wq to update octeon time\n"); return -1; @@ -1734,7 +1736,7 @@ static inline int setup_tx_poll_fn(struct net_device *netdev) struct octeon_device *oct = lio->oct_dev; lio->txq_status_wq.wq = alloc_workqueue("txq-status", - WQ_MEM_RECLAIM, 0); + WQ_MEM_RECLAIM | WQ_PERCPU, 0); if (!lio->txq_status_wq.wq) { dev_err(&oct->pci_dev->dev, "unable to create cavium txq status wq\n"); return -1; diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c index 62c2eadc33e3..3230dff5ba05 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c @@ -304,7 +304,8 @@ static int setup_link_status_change_wq(struct net_device *netdev) struct octeon_device *oct = lio->oct_dev; lio->link_status_wq.wq = alloc_workqueue("link-status", - WQ_MEM_RECLAIM, 0); + WQ_MEM_RECLAIM | WQ_PERCPU, + 0); if (!lio->link_status_wq.wq) { dev_err(&oct->pci_dev->dev, "unable to create cavium link status wq\n"); return -1; diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c index de8a6ce86ad7..d7cfb20eea00 100644 --- a/drivers/net/ethernet/cavium/liquidio/request_manager.c +++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c @@ -126,13 +126,13 @@ int octeon_init_instr_queue(struct octeon_device *oct, oct->io_qmask.iq |= BIT_ULL(iq_no); /* Set the 32B/64B mode for each input queue */ - oct->io_qmask.iq64B |= ((conf->instr_type == 64) << iq_no); + oct->io_qmask.iq64B |= ((u64)(conf->instr_type == 64) << iq_no); iq->iqcmd_64B = (conf->instr_type == 64); oct->fn_list.setup_iq_regs(oct, iq_no); oct->check_db_wq[iq_no].wq = alloc_workqueue("check_iq_db", - WQ_MEM_RECLAIM, + WQ_MEM_RECLAIM | WQ_PERCPU, 0); if (!oct->check_db_wq[iq_no].wq) { vfree(iq->request_list); diff --git a/drivers/net/ethernet/cavium/liquidio/response_manager.c b/drivers/net/ethernet/cavium/liquidio/response_manager.c index 861050966e18..de1a8335b545 100644 --- a/drivers/net/ethernet/cavium/liquidio/response_manager.c +++ b/drivers/net/ethernet/cavium/liquidio/response_manager.c @@ -39,7 +39,8 @@ int octeon_setup_response_list(struct octeon_device *oct) } spin_lock_init(&oct->cmd_resp_wqlock); - oct->dma_comp_wq.wq = alloc_workqueue("dma-comp", WQ_MEM_RECLAIM, 0); + oct->dma_comp_wq.wq = alloc_workqueue("dma-comp", + WQ_MEM_RECLAIM | WQ_PERCPU, 0); if (!oct->dma_comp_wq.wq) { dev_err(&oct->pci_dev->dev, "failed to create wq thread\n"); return -ENOMEM; diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 21495b5dce25..9efb60842ad1 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -1493,13 +1493,17 @@ static int bgx_init_of_phy(struct bgx *bgx) * this cortina phy, for which there is no driver * support, ignore it. */ - if (phy_np && - !of_device_is_compatible(phy_np, "cortina,cs4223-slice")) { - /* Wait until the phy drivers are available */ - pd = of_phy_find_device(phy_np); - if (!pd) - goto defer; - bgx->lmac[lmac].phydev = pd; + if (phy_np) { + if (!of_device_is_compatible(phy_np, "cortina,cs4223-slice")) { + /* Wait until the phy drivers are available */ + pd = of_phy_find_device(phy_np); + if (!pd) { + of_node_put(phy_np); + goto defer; + } + bgx->lmac[lmac].phydev = pd; + } + of_node_put(phy_np); } lmac++; @@ -1515,11 +1519,11 @@ defer: * for phy devices we may have already found. */ while (lmac) { + lmac--; if (bgx->lmac[lmac].phydev) { put_device(&bgx->lmac[lmac].phydev->mdio.dev); bgx->lmac[lmac].phydev = NULL; } - lmac--; } of_node_put(node); return -EPROBE_DEFER; diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index 6f6525983130..4ee970f3bad6 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -171,7 +171,7 @@ static void chtls_purge_receive_queue(struct sock *sk) struct sk_buff *skb; while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { - skb_dst_set(skb, (void *)NULL); + skb_dstref_steal(skb); kfree_skb(skb); } } @@ -194,7 +194,7 @@ static void chtls_purge_recv_queue(struct sock *sk) struct sk_buff *skb; while ((skb = __skb_dequeue(&tlsk->sk_recv_queue)) != NULL) { - skb_dst_set(skb, NULL); + skb_dstref_steal(skb); kfree_skb(skb); } } @@ -505,7 +505,7 @@ static void reset_listen_child(struct sock *child) chtls_send_reset(child, CPL_ABORT_SEND_RST, skb); sock_orphan(child); - INC_ORPHAN_COUNT(child); + tcp_orphan_count_inc(); if (child->sk_state == TCP_CLOSE) inet_csk_destroy_sock(child); } @@ -870,7 +870,7 @@ static void do_abort_syn_rcv(struct sock *child, struct sock *parent) * created only after 3 way handshake is done. */ sock_orphan(child); - INC_ORPHAN_COUNT(child); + tcp_orphan_count_inc(); chtls_release_resources(child); chtls_conn_done(child); } else { @@ -951,6 +951,7 @@ static unsigned int chtls_select_mss(const struct chtls_sock *csk, struct tcp_sock *tp; unsigned int mss; struct sock *sk; + u16 user_mss; mss = ntohs(req->tcpopt.mss); sk = csk->sk; @@ -969,8 +970,9 @@ static unsigned int chtls_select_mss(const struct chtls_sock *csk, tcpoptsz += round_up(TCPOLEN_TIMESTAMP, 4); tp->advmss = dst_metric_advmss(dst); - if (USER_MSS(tp) && tp->advmss > USER_MSS(tp)) - tp->advmss = USER_MSS(tp); + user_mss = USER_MSS(tp); + if (user_mss && tp->advmss > user_mss) + tp->advmss = user_mss; if (tp->advmss > pmtu - iphdrsz) tp->advmss = pmtu - iphdrsz; if (mss && tp->advmss > mss) @@ -1734,7 +1736,7 @@ static int chtls_rx_data(struct chtls_dev *cdev, struct sk_buff *skb) pr_err("can't find conn. for hwtid %u.\n", hwtid); return -EINVAL; } - skb_dst_set(skb, NULL); + skb_dstref_steal(skb); process_cpl_msg(chtls_recv_data, sk, skb); return 0; } @@ -1786,7 +1788,7 @@ static int chtls_rx_pdu(struct chtls_dev *cdev, struct sk_buff *skb) pr_err("can't find conn. for hwtid %u.\n", hwtid); return -EINVAL; } - skb_dst_set(skb, NULL); + skb_dstref_steal(skb); process_cpl_msg(chtls_recv_pdu, sk, skb); return 0; } @@ -1855,7 +1857,7 @@ static int chtls_rx_cmp(struct chtls_dev *cdev, struct sk_buff *skb) pr_err("can't find conn. for hwtid %u.\n", hwtid); return -EINVAL; } - skb_dst_set(skb, NULL); + skb_dstref_steal(skb); process_cpl_msg(chtls_rx_hdr, sk, skb); return 0; diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h index f61ca657601c..29ceff5a5fcb 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.h @@ -90,12 +90,11 @@ struct deferred_skb_cb { #define SND_WSCALE(tp) ((tp)->rx_opt.snd_wscale) #define RCV_WSCALE(tp) ((tp)->rx_opt.rcv_wscale) -#define USER_MSS(tp) ((tp)->rx_opt.user_mss) +#define USER_MSS(tp) (READ_ONCE((tp)->rx_opt.user_mss)) #define TS_RECENT_STAMP(tp) ((tp)->rx_opt.ts_recent_stamp) #define WSCALE_OK(tp) ((tp)->rx_opt.wscale_ok) #define TSTAMP_OK(tp) ((tp)->rx_opt.tstamp_ok) #define SACK_OK(tp) ((tp)->rx_opt.sack_ok) -#define INC_ORPHAN_COUNT(sk) this_cpu_inc(*(sk)->sk_prot->orphan_count) /* TLS SKB */ #define skb_ulp_tls_inline(skb) (ULP_SKB_CB(skb)->ulp.tls.ofld) @@ -171,14 +170,14 @@ static inline void chtls_set_req_addr(struct request_sock *oreq, static inline void chtls_free_skb(struct sock *sk, struct sk_buff *skb) { - skb_dst_set(skb, NULL); + skb_dstref_steal(skb); __skb_unlink(skb, &sk->sk_receive_queue); __kfree_skb(skb); } static inline void chtls_kfree_skb(struct sock *sk, struct sk_buff *skb) { - skb_dst_set(skb, NULL); + skb_dstref_steal(skb); __skb_unlink(skb, &sk->sk_receive_queue); kfree_skb(skb); } diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c index 465fa8077964..4036db466e18 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c @@ -1434,7 +1434,7 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, continue; found_ok_skb: if (!skb->len) { - skb_dst_set(skb, NULL); + skb_dstref_steal(skb); __skb_unlink(skb, &sk->sk_receive_queue); kfree_skb(skb); diff --git a/drivers/net/ethernet/dlink/Kconfig b/drivers/net/ethernet/dlink/Kconfig index e9e13654812c..0d77f84c8e7b 100644 --- a/drivers/net/ethernet/dlink/Kconfig +++ b/drivers/net/ethernet/dlink/Kconfig @@ -32,4 +32,24 @@ config DL2K To compile this driver as a module, choose M here: the module will be called dl2k. +config SUNDANCE + tristate "Sundance Alta support" + depends on PCI + select CRC32 + select MII + help + This driver is for the Sundance "Alta" chip. + More specific information and updates are available from + <http://www.scyld.com/network/sundance.html>. + +config SUNDANCE_MMIO + bool "Use MMIO instead of PIO" + depends on SUNDANCE + help + Enable memory-mapped I/O for interaction with Sundance NIC registers. + Do NOT enable this by default, PIO (enabled when MMIO is disabled) + is known to solve bugs on certain chips. + + If unsure, say N. + endif # NET_VENDOR_DLINK diff --git a/drivers/net/ethernet/dlink/Makefile b/drivers/net/ethernet/dlink/Makefile index 38c236eb6007..3ff503c747db 100644 --- a/drivers/net/ethernet/dlink/Makefile +++ b/drivers/net/ethernet/dlink/Makefile @@ -4,3 +4,4 @@ # obj-$(CONFIG_DL2K) += dl2k.o +obj-$(CONFIG_SUNDANCE) += sundance.o diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c index cc60ee454bf9..1996d2e4e3e2 100644 --- a/drivers/net/ethernet/dlink/dl2k.c +++ b/drivers/net/ethernet/dlink/dl2k.c @@ -964,15 +964,18 @@ receive_packet (struct net_device *dev) } else { struct sk_buff *skb; + skb = NULL; /* Small skbuffs for short packets */ - if (pkt_len > copy_thresh) { + if (pkt_len <= copy_thresh) + skb = netdev_alloc_skb_ip_align(dev, pkt_len); + if (!skb) { dma_unmap_single(&np->pdev->dev, desc_to_dma(desc), np->rx_buf_sz, DMA_FROM_DEVICE); skb_put (skb = np->rx_skbuff[entry], pkt_len); np->rx_skbuff[entry] = NULL; - } else if ((skb = netdev_alloc_skb_ip_align(dev, pkt_len))) { + } else { dma_sync_single_for_cpu(&np->pdev->dev, desc_to_dma(desc), np->rx_buf_sz, @@ -1099,7 +1102,7 @@ get_stats (struct net_device *dev) dev->stats.rx_bytes += dr32(OctetRcvOk); dev->stats.tx_bytes += dr32(OctetXmtOk); - dev->stats.multicast = dr32(McstFramesRcvdOk); + dev->stats.multicast += dr32(McstFramesRcvdOk); dev->stats.collisions += dr32(SingleColFrames) + dr32(MultiColFrames); diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c new file mode 100644 index 000000000000..277c50ef773f --- /dev/null +++ b/drivers/net/ethernet/dlink/sundance.c @@ -0,0 +1,1990 @@ +/* sundance.c: A Linux device driver for the Sundance ST201 "Alta". */ +/* + Written 1999-2000 by Donald Becker. + + This software may be used and distributed according to the terms of + the GNU General Public License (GPL), incorporated herein by reference. + Drivers based on or derived from this code fall under the GPL and must + retain the authorship, copyright and license notice. This file is not + a complete program and may only be used when the entire operating + system is licensed under the GPL. + + The author may be reached as becker@scyld.com, or C/O + Scyld Computing Corporation + 410 Severn Ave., Suite 210 + Annapolis MD 21403 + + Support and updates available at + http://www.scyld.com/network/sundance.html + [link no longer provides useful info -jgarzik] + Archives of the mailing list are still available at + https://www.beowulf.org/pipermail/netdrivers/ + +*/ + +#define DRV_NAME "sundance" + +/* The user-configurable values. + These may be modified when a driver module is loaded.*/ +static int debug = 1; /* 1 normal messages, 0 quiet .. 7 verbose. */ +/* Maximum number of multicast addresses to filter (vs. rx-all-multicast). + Typical is a 64 element hash table based on the Ethernet CRC. */ +static const int multicast_filter_limit = 32; + +/* Set the copy breakpoint for the copy-only-tiny-frames scheme. + Setting to > 1518 effectively disables this feature. + This chip can receive into offset buffers, so the Alpha does not + need a copy-align. */ +static int rx_copybreak; +static int flowctrl=1; + +/* media[] specifies the media type the NIC operates at. + autosense Autosensing active media. + 10mbps_hd 10Mbps half duplex. + 10mbps_fd 10Mbps full duplex. + 100mbps_hd 100Mbps half duplex. + 100mbps_fd 100Mbps full duplex. + 0 Autosensing active media. + 1 10Mbps half duplex. + 2 10Mbps full duplex. + 3 100Mbps half duplex. + 4 100Mbps full duplex. +*/ +#define MAX_UNITS 8 +static char *media[MAX_UNITS]; + + +/* Operational parameters that are set at compile time. */ + +/* Keep the ring sizes a power of two for compile efficiency. + The compiler will convert <unsigned>'%'<2^N> into a bit mask. + Making the Tx ring too large decreases the effectiveness of channel + bonding and packet priority, and more than 128 requires modifying the + Tx error recovery. + Large receive rings merely waste memory. */ +#define TX_RING_SIZE 32 +#define TX_QUEUE_LEN (TX_RING_SIZE - 1) /* Limit ring entries actually used. */ +#define RX_RING_SIZE 64 +#define RX_BUDGET 32 +#define TX_TOTAL_SIZE TX_RING_SIZE*sizeof(struct netdev_desc) +#define RX_TOTAL_SIZE RX_RING_SIZE*sizeof(struct netdev_desc) + +/* Operational parameters that usually are not changed. */ +/* Time in jiffies before concluding the transmitter is hung. */ +#define TX_TIMEOUT (4*HZ) +#define PKT_BUF_SZ 1536 /* Size of each temporary Rx buffer.*/ + +/* Include files, designed to support most kernel versions 2.0.0 and later. */ +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/timer.h> +#include <linux/errno.h> +#include <linux/ioport.h> +#include <linux/interrupt.h> +#include <linux/pci.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/skbuff.h> +#include <linux/init.h> +#include <linux/bitops.h> +#include <linux/uaccess.h> +#include <asm/processor.h> /* Processor type for cache alignment. */ +#include <asm/io.h> +#include <linux/delay.h> +#include <linux/spinlock.h> +#include <linux/dma-mapping.h> +#include <linux/crc32.h> +#include <linux/ethtool.h> +#include <linux/mii.h> + +MODULE_AUTHOR("Donald Becker <becker@scyld.com>"); +MODULE_DESCRIPTION("Sundance Alta Ethernet driver"); +MODULE_LICENSE("GPL"); + +module_param(debug, int, 0); +module_param(rx_copybreak, int, 0); +module_param_array(media, charp, NULL, 0); +module_param(flowctrl, int, 0); +MODULE_PARM_DESC(debug, "Sundance Alta debug level (0-5)"); +MODULE_PARM_DESC(rx_copybreak, "Sundance Alta copy breakpoint for copy-only-tiny-frames"); +MODULE_PARM_DESC(flowctrl, "Sundance Alta flow control [0|1]"); + +/* + Theory of Operation + +I. Board Compatibility + +This driver is designed for the Sundance Technologies "Alta" ST201 chip. + +II. Board-specific settings + +III. Driver operation + +IIIa. Ring buffers + +This driver uses two statically allocated fixed-size descriptor lists +formed into rings by a branch from the final descriptor to the beginning of +the list. The ring sizes are set at compile time by RX/TX_RING_SIZE. +Some chips explicitly use only 2^N sized rings, while others use a +'next descriptor' pointer that the driver forms into rings. + +IIIb/c. Transmit/Receive Structure + +This driver uses a zero-copy receive and transmit scheme. +The driver allocates full frame size skbuffs for the Rx ring buffers at +open() time and passes the skb->data field to the chip as receive data +buffers. When an incoming frame is less than RX_COPYBREAK bytes long, +a fresh skbuff is allocated and the frame is copied to the new skbuff. +When the incoming frame is larger, the skbuff is passed directly up the +protocol stack. Buffers consumed this way are replaced by newly allocated +skbuffs in a later phase of receives. + +The RX_COPYBREAK value is chosen to trade-off the memory wasted by +using a full-sized skbuff for small frames vs. the copying costs of larger +frames. New boards are typically used in generously configured machines +and the underfilled buffers have negligible impact compared to the benefit of +a single allocation size, so the default value of zero results in never +copying packets. When copying is done, the cost is usually mitigated by using +a combined copy/checksum routine. Copying also preloads the cache, which is +most useful with small frames. + +A subtle aspect of the operation is that the IP header at offset 14 in an +ethernet frame isn't longword aligned for further processing. +Unaligned buffers are permitted by the Sundance hardware, so +frames are received into the skbuff at an offset of "+2", 16-byte aligning +the IP header. + +IIId. Synchronization + +The driver runs as two independent, single-threaded flows of control. One +is the send-packet routine, which enforces single-threaded use by the +dev->tbusy flag. The other thread is the interrupt handler, which is single +threaded by the hardware and interrupt handling software. + +The send packet thread has partial control over the Tx ring and 'dev->tbusy' +flag. It sets the tbusy flag whenever it's queuing a Tx packet. If the next +queue slot is empty, it clears the tbusy flag when finished otherwise it sets +the 'lp->tx_full' flag. + +The interrupt handler has exclusive control over the Rx ring and records stats +from the Tx ring. After reaping the stats, it marks the Tx queue entry as +empty by incrementing the dirty_tx mark. Iff the 'lp->tx_full' flag is set, it +clears both the tx_full and tbusy flags. + +IV. Notes + +IVb. References + +The Sundance ST201 datasheet, preliminary version. +The Kendin KS8723 datasheet, preliminary version. +The ICplus IP100 datasheet, preliminary version. +http://www.scyld.com/expert/100mbps.html +http://www.scyld.com/expert/NWay.html + +IVc. Errata + +*/ + +/* Work-around for Kendin chip bugs. */ +#ifndef CONFIG_SUNDANCE_MMIO +#define USE_IO_OPS 1 +#endif + +static const struct pci_device_id sundance_pci_tbl[] = { + { 0x1186, 0x1002, 0x1186, 0x1002, 0, 0, 0 }, + { 0x1186, 0x1002, 0x1186, 0x1003, 0, 0, 1 }, + { 0x1186, 0x1002, 0x1186, 0x1012, 0, 0, 2 }, + { 0x1186, 0x1002, 0x1186, 0x1040, 0, 0, 3 }, + { 0x1186, 0x1002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 4 }, + { 0x13F0, 0x0201, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 5 }, + { 0x13F0, 0x0200, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 6 }, + { } +}; +MODULE_DEVICE_TABLE(pci, sundance_pci_tbl); + +enum { + netdev_io_size = 128 +}; + +struct pci_id_info { + const char *name; +}; +static const struct pci_id_info pci_id_tbl[] = { + {"D-Link DFE-550TX FAST Ethernet Adapter"}, + {"D-Link DFE-550FX 100Mbps Fiber-optics Adapter"}, + {"D-Link DFE-580TX 4 port Server Adapter"}, + {"D-Link DFE-530TXS FAST Ethernet Adapter"}, + {"D-Link DL10050-based FAST Ethernet Adapter"}, + {"Sundance Technology Alta"}, + {"IC Plus Corporation IP100A FAST Ethernet Adapter"}, + { } /* terminate list. */ +}; + +/* This driver was written to use PCI memory space, however x86-oriented + hardware often uses I/O space accesses. */ + +/* Offsets to the device registers. + Unlike software-only systems, device drivers interact with complex hardware. + It's not useful to define symbolic names for every register bit in the + device. The name can only partially document the semantics and make + the driver longer and more difficult to read. + In general, only the important configuration values or bits changed + multiple times should be defined symbolically. +*/ +enum alta_offsets { + DMACtrl = 0x00, + TxListPtr = 0x04, + TxDMABurstThresh = 0x08, + TxDMAUrgentThresh = 0x09, + TxDMAPollPeriod = 0x0a, + RxDMAStatus = 0x0c, + RxListPtr = 0x10, + DebugCtrl0 = 0x1a, + DebugCtrl1 = 0x1c, + RxDMABurstThresh = 0x14, + RxDMAUrgentThresh = 0x15, + RxDMAPollPeriod = 0x16, + LEDCtrl = 0x1a, + ASICCtrl = 0x30, + EEData = 0x34, + EECtrl = 0x36, + FlashAddr = 0x40, + FlashData = 0x44, + WakeEvent = 0x45, + TxStatus = 0x46, + TxFrameId = 0x47, + DownCounter = 0x18, + IntrClear = 0x4a, + IntrEnable = 0x4c, + IntrStatus = 0x4e, + MACCtrl0 = 0x50, + MACCtrl1 = 0x52, + StationAddr = 0x54, + MaxFrameSize = 0x5A, + RxMode = 0x5c, + MIICtrl = 0x5e, + MulticastFilter0 = 0x60, + MulticastFilter1 = 0x64, + RxOctetsLow = 0x68, + RxOctetsHigh = 0x6a, + TxOctetsLow = 0x6c, + TxOctetsHigh = 0x6e, + TxFramesOK = 0x70, + RxFramesOK = 0x72, + StatsCarrierError = 0x74, + StatsLateColl = 0x75, + StatsMultiColl = 0x76, + StatsOneColl = 0x77, + StatsTxDefer = 0x78, + RxMissed = 0x79, + StatsTxXSDefer = 0x7a, + StatsTxAbort = 0x7b, + StatsBcastTx = 0x7c, + StatsBcastRx = 0x7d, + StatsMcastTx = 0x7e, + StatsMcastRx = 0x7f, + /* Aliased and bogus values! */ + RxStatus = 0x0c, +}; + +#define ASIC_HI_WORD(x) ((x) + 2) + +enum ASICCtrl_HiWord_bit { + GlobalReset = 0x0001, + RxReset = 0x0002, + TxReset = 0x0004, + DMAReset = 0x0008, + FIFOReset = 0x0010, + NetworkReset = 0x0020, + HostReset = 0x0040, + ResetBusy = 0x0400, +}; + +/* Bits in the interrupt status/mask registers. */ +enum intr_status_bits { + IntrSummary=0x0001, IntrPCIErr=0x0002, IntrMACCtrl=0x0008, + IntrTxDone=0x0004, IntrRxDone=0x0010, IntrRxStart=0x0020, + IntrDrvRqst=0x0040, + StatsMax=0x0080, LinkChange=0x0100, + IntrTxDMADone=0x0200, IntrRxDMADone=0x0400, +}; + +/* Bits in the RxMode register. */ +enum rx_mode_bits { + AcceptAllIPMulti=0x20, AcceptMultiHash=0x10, AcceptAll=0x08, + AcceptBroadcast=0x04, AcceptMulticast=0x02, AcceptMyPhys=0x01, +}; +/* Bits in MACCtrl. */ +enum mac_ctrl0_bits { + EnbFullDuplex=0x20, EnbRcvLargeFrame=0x40, + EnbFlowCtrl=0x100, EnbPassRxCRC=0x200, +}; +enum mac_ctrl1_bits { + StatsEnable=0x0020, StatsDisable=0x0040, StatsEnabled=0x0080, + TxEnable=0x0100, TxDisable=0x0200, TxEnabled=0x0400, + RxEnable=0x0800, RxDisable=0x1000, RxEnabled=0x2000, +}; + +/* Bits in WakeEvent register. */ +enum wake_event_bits { + WakePktEnable = 0x01, + MagicPktEnable = 0x02, + LinkEventEnable = 0x04, + WolEnable = 0x80, +}; + +/* The Rx and Tx buffer descriptors. */ +/* Note that using only 32 bit fields simplifies conversion to big-endian + architectures. */ +struct netdev_desc { + __le32 next_desc; + __le32 status; + struct desc_frag { __le32 addr, length; } frag; +}; + +/* Bits in netdev_desc.status */ +enum desc_status_bits { + DescOwn=0x8000, + DescEndPacket=0x4000, + DescEndRing=0x2000, + LastFrag=0x80000000, + DescIntrOnTx=0x8000, + DescIntrOnDMADone=0x80000000, + DisableAlign = 0x00000001, +}; + +#define PRIV_ALIGN 15 /* Required alignment mask */ +/* Use __attribute__((aligned (L1_CACHE_BYTES))) to maintain alignment + within the structure. */ +#define MII_CNT 4 +struct netdev_private { + /* Descriptor rings first for alignment. */ + struct netdev_desc *rx_ring; + struct netdev_desc *tx_ring; + struct sk_buff* rx_skbuff[RX_RING_SIZE]; + struct sk_buff* tx_skbuff[TX_RING_SIZE]; + dma_addr_t tx_ring_dma; + dma_addr_t rx_ring_dma; + struct timer_list timer; /* Media monitoring timer. */ + struct net_device *ndev; /* backpointer */ + /* ethtool extra stats */ + struct { + u64 tx_multiple_collisions; + u64 tx_single_collisions; + u64 tx_late_collisions; + u64 tx_deferred; + u64 tx_deferred_excessive; + u64 tx_aborted; + u64 tx_bcasts; + u64 rx_bcasts; + u64 tx_mcasts; + u64 rx_mcasts; + } xstats; + /* Frequently used values: keep some adjacent for cache effect. */ + spinlock_t lock; + int msg_enable; + int chip_id; + unsigned int cur_rx, dirty_rx; /* Producer/consumer ring indices */ + unsigned int rx_buf_sz; /* Based on MTU+slack. */ + struct netdev_desc *last_tx; /* Last Tx descriptor used. */ + unsigned int cur_tx, dirty_tx; + /* These values are keep track of the transceiver/media in use. */ + unsigned int flowctrl:1; + unsigned int default_port:4; /* Last dev->if_port value. */ + unsigned int an_enable:1; + unsigned int speed; + unsigned int wol_enabled:1; /* Wake on LAN enabled */ + struct tasklet_struct rx_tasklet; + struct tasklet_struct tx_tasklet; + int budget; + int cur_task; + /* Multicast and receive mode. */ + spinlock_t mcastlock; /* SMP lock multicast updates. */ + u16 mcast_filter[4]; + /* MII transceiver section. */ + struct mii_if_info mii_if; + int mii_preamble_required; + unsigned char phys[MII_CNT]; /* MII device addresses, only first one used. */ + struct pci_dev *pci_dev; + void __iomem *base; + spinlock_t statlock; +}; + +/* The station address location in the EEPROM. */ +#define EEPROM_SA_OFFSET 0x10 +#define DEFAULT_INTR (IntrRxDMADone | IntrPCIErr | \ + IntrDrvRqst | IntrTxDone | StatsMax | \ + LinkChange) + +static int change_mtu(struct net_device *dev, int new_mtu); +static int eeprom_read(void __iomem *ioaddr, int location); +static int mdio_read(struct net_device *dev, int phy_id, int location); +static void mdio_write(struct net_device *dev, int phy_id, int location, int value); +static int mdio_wait_link(struct net_device *dev, int wait); +static int netdev_open(struct net_device *dev); +static void check_duplex(struct net_device *dev); +static void netdev_timer(struct timer_list *t); +static void tx_timeout(struct net_device *dev, unsigned int txqueue); +static void init_ring(struct net_device *dev); +static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev); +static int reset_tx (struct net_device *dev); +static irqreturn_t intr_handler(int irq, void *dev_instance); +static void rx_poll(struct tasklet_struct *t); +static void tx_poll(struct tasklet_struct *t); +static void refill_rx (struct net_device *dev); +static void netdev_error(struct net_device *dev, int intr_status); +static void netdev_error(struct net_device *dev, int intr_status); +static void set_rx_mode(struct net_device *dev); +static int __set_mac_addr(struct net_device *dev); +static int sundance_set_mac_addr(struct net_device *dev, void *data); +static struct net_device_stats *get_stats(struct net_device *dev); +static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd); +static int netdev_close(struct net_device *dev); +static const struct ethtool_ops ethtool_ops; + +static void sundance_reset(struct net_device *dev, unsigned long reset_cmd) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base + ASICCtrl; + int countdown; + + /* ST201 documentation states ASICCtrl is a 32bit register */ + iowrite32 (reset_cmd | ioread32 (ioaddr), ioaddr); + /* ST201 documentation states reset can take up to 1 ms */ + countdown = 10 + 1; + while (ioread32 (ioaddr) & (ResetBusy << 16)) { + if (--countdown == 0) { + printk(KERN_WARNING "%s : reset not completed !!\n", dev->name); + break; + } + udelay(100); + } +} + +#ifdef CONFIG_NET_POLL_CONTROLLER +static void sundance_poll_controller(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + + disable_irq(np->pci_dev->irq); + intr_handler(np->pci_dev->irq, dev); + enable_irq(np->pci_dev->irq); +} +#endif + +static const struct net_device_ops netdev_ops = { + .ndo_open = netdev_open, + .ndo_stop = netdev_close, + .ndo_start_xmit = start_tx, + .ndo_get_stats = get_stats, + .ndo_set_rx_mode = set_rx_mode, + .ndo_eth_ioctl = netdev_ioctl, + .ndo_tx_timeout = tx_timeout, + .ndo_change_mtu = change_mtu, + .ndo_set_mac_address = sundance_set_mac_addr, + .ndo_validate_addr = eth_validate_addr, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = sundance_poll_controller, +#endif +}; + +static int sundance_probe1(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct net_device *dev; + struct netdev_private *np; + static int card_idx; + int chip_idx = ent->driver_data; + int irq; + int i; + void __iomem *ioaddr; + u16 mii_ctl; + void *ring_space; + dma_addr_t ring_dma; +#ifdef USE_IO_OPS + int bar = 0; +#else + int bar = 1; +#endif + int phy, phy_end, phy_idx = 0; + __le16 addr[ETH_ALEN / 2]; + + if (pci_enable_device(pdev)) + return -EIO; + pci_set_master(pdev); + + irq = pdev->irq; + + dev = alloc_etherdev(sizeof(*np)); + if (!dev) + return -ENOMEM; + SET_NETDEV_DEV(dev, &pdev->dev); + + if (pci_request_regions(pdev, DRV_NAME)) + goto err_out_netdev; + + ioaddr = pci_iomap(pdev, bar, netdev_io_size); + if (!ioaddr) + goto err_out_res; + + for (i = 0; i < 3; i++) + addr[i] = + cpu_to_le16(eeprom_read(ioaddr, i + EEPROM_SA_OFFSET)); + eth_hw_addr_set(dev, (u8 *)addr); + + np = netdev_priv(dev); + np->ndev = dev; + np->base = ioaddr; + np->pci_dev = pdev; + np->chip_id = chip_idx; + np->msg_enable = (1 << debug) - 1; + spin_lock_init(&np->lock); + spin_lock_init(&np->statlock); + tasklet_setup(&np->rx_tasklet, rx_poll); + tasklet_setup(&np->tx_tasklet, tx_poll); + + ring_space = dma_alloc_coherent(&pdev->dev, TX_TOTAL_SIZE, + &ring_dma, GFP_KERNEL); + if (!ring_space) + goto err_out_cleardev; + np->tx_ring = (struct netdev_desc *)ring_space; + np->tx_ring_dma = ring_dma; + + ring_space = dma_alloc_coherent(&pdev->dev, RX_TOTAL_SIZE, + &ring_dma, GFP_KERNEL); + if (!ring_space) + goto err_out_unmap_tx; + np->rx_ring = (struct netdev_desc *)ring_space; + np->rx_ring_dma = ring_dma; + + np->mii_if.dev = dev; + np->mii_if.mdio_read = mdio_read; + np->mii_if.mdio_write = mdio_write; + np->mii_if.phy_id_mask = 0x1f; + np->mii_if.reg_num_mask = 0x1f; + + /* The chip-specific entries in the device structure. */ + dev->netdev_ops = &netdev_ops; + dev->ethtool_ops = ðtool_ops; + dev->watchdog_timeo = TX_TIMEOUT; + + /* MTU range: 68 - 8191 */ + dev->min_mtu = ETH_MIN_MTU; + dev->max_mtu = 8191; + + pci_set_drvdata(pdev, dev); + + i = register_netdev(dev); + if (i) + goto err_out_unmap_rx; + + printk(KERN_INFO "%s: %s at %p, %pM, IRQ %d.\n", + dev->name, pci_id_tbl[chip_idx].name, ioaddr, + dev->dev_addr, irq); + + np->phys[0] = 1; /* Default setting */ + np->mii_preamble_required++; + + /* + * It seems some phys doesn't deal well with address 0 being accessed + * first + */ + if (sundance_pci_tbl[np->chip_id].device == 0x0200) { + phy = 0; + phy_end = 31; + } else { + phy = 1; + phy_end = 32; /* wraps to zero, due to 'phy & 0x1f' */ + } + for (; phy <= phy_end && phy_idx < MII_CNT; phy++) { + int phyx = phy & 0x1f; + int mii_status = mdio_read(dev, phyx, MII_BMSR); + if (mii_status != 0xffff && mii_status != 0x0000) { + np->phys[phy_idx++] = phyx; + np->mii_if.advertising = mdio_read(dev, phyx, MII_ADVERTISE); + if ((mii_status & 0x0040) == 0) + np->mii_preamble_required++; + printk(KERN_INFO "%s: MII PHY found at address %d, status " + "0x%4.4x advertising %4.4x.\n", + dev->name, phyx, mii_status, np->mii_if.advertising); + } + } + np->mii_preamble_required--; + + if (phy_idx == 0) { + printk(KERN_INFO "%s: No MII transceiver found, aborting. ASIC status %x\n", + dev->name, ioread32(ioaddr + ASICCtrl)); + goto err_out_unregister; + } + + np->mii_if.phy_id = np->phys[0]; + + /* Parse override configuration */ + np->an_enable = 1; + if (card_idx < MAX_UNITS) { + if (media[card_idx] != NULL) { + np->an_enable = 0; + if (strcmp (media[card_idx], "100mbps_fd") == 0 || + strcmp (media[card_idx], "4") == 0) { + np->speed = 100; + np->mii_if.full_duplex = 1; + } else if (strcmp (media[card_idx], "100mbps_hd") == 0 || + strcmp (media[card_idx], "3") == 0) { + np->speed = 100; + np->mii_if.full_duplex = 0; + } else if (strcmp (media[card_idx], "10mbps_fd") == 0 || + strcmp (media[card_idx], "2") == 0) { + np->speed = 10; + np->mii_if.full_duplex = 1; + } else if (strcmp (media[card_idx], "10mbps_hd") == 0 || + strcmp (media[card_idx], "1") == 0) { + np->speed = 10; + np->mii_if.full_duplex = 0; + } else { + np->an_enable = 1; + } + } + if (flowctrl == 1) + np->flowctrl = 1; + } + + /* Fibre PHY? */ + if (ioread32 (ioaddr + ASICCtrl) & 0x80) { + /* Default 100Mbps Full */ + if (np->an_enable) { + np->speed = 100; + np->mii_if.full_duplex = 1; + np->an_enable = 0; + } + } + /* Reset PHY */ + mdio_write (dev, np->phys[0], MII_BMCR, BMCR_RESET); + mdelay (300); + /* If flow control enabled, we need to advertise it.*/ + if (np->flowctrl) + mdio_write (dev, np->phys[0], MII_ADVERTISE, np->mii_if.advertising | 0x0400); + mdio_write (dev, np->phys[0], MII_BMCR, BMCR_ANENABLE|BMCR_ANRESTART); + /* Force media type */ + if (!np->an_enable) { + mii_ctl = 0; + mii_ctl |= (np->speed == 100) ? BMCR_SPEED100 : 0; + mii_ctl |= (np->mii_if.full_duplex) ? BMCR_FULLDPLX : 0; + mdio_write (dev, np->phys[0], MII_BMCR, mii_ctl); + printk (KERN_INFO "Override speed=%d, %s duplex\n", + np->speed, np->mii_if.full_duplex ? "Full" : "Half"); + + } + + /* Perhaps move the reset here? */ + /* Reset the chip to erase previous misconfiguration. */ + if (netif_msg_hw(np)) + printk("ASIC Control is %x.\n", ioread32(ioaddr + ASICCtrl)); + sundance_reset(dev, 0x00ff << 16); + if (netif_msg_hw(np)) + printk("ASIC Control is now %x.\n", ioread32(ioaddr + ASICCtrl)); + + card_idx++; + return 0; + +err_out_unregister: + unregister_netdev(dev); +err_out_unmap_rx: + dma_free_coherent(&pdev->dev, RX_TOTAL_SIZE, + np->rx_ring, np->rx_ring_dma); +err_out_unmap_tx: + dma_free_coherent(&pdev->dev, TX_TOTAL_SIZE, + np->tx_ring, np->tx_ring_dma); +err_out_cleardev: + pci_iounmap(pdev, ioaddr); +err_out_res: + pci_release_regions(pdev); +err_out_netdev: + free_netdev (dev); + return -ENODEV; +} + +static int change_mtu(struct net_device *dev, int new_mtu) +{ + if (netif_running(dev)) + return -EBUSY; + WRITE_ONCE(dev->mtu, new_mtu); + return 0; +} + +#define eeprom_delay(ee_addr) ioread32(ee_addr) +/* Read the EEPROM and MII Management Data I/O (MDIO) interfaces. */ +static int eeprom_read(void __iomem *ioaddr, int location) +{ + int boguscnt = 10000; /* Typical 1900 ticks. */ + iowrite16(0x0200 | (location & 0xff), ioaddr + EECtrl); + do { + eeprom_delay(ioaddr + EECtrl); + if (! (ioread16(ioaddr + EECtrl) & 0x8000)) { + return ioread16(ioaddr + EEData); + } + } while (--boguscnt > 0); + return 0; +} + +/* MII transceiver control section. + Read and write the MII registers using software-generated serial + MDIO protocol. See the MII specifications or DP83840A data sheet + for details. + + The maximum data clock rate is 2.5 Mhz. The minimum timing is usually + met by back-to-back 33Mhz PCI cycles. */ +#define mdio_delay() ioread8(mdio_addr) + +enum mii_reg_bits { + MDIO_ShiftClk=0x0001, MDIO_Data=0x0002, MDIO_EnbOutput=0x0004, +}; +#define MDIO_EnbIn (0) +#define MDIO_WRITE0 (MDIO_EnbOutput) +#define MDIO_WRITE1 (MDIO_Data | MDIO_EnbOutput) + +/* Generate the preamble required for initial synchronization and + a few older transceivers. */ +static void mdio_sync(void __iomem *mdio_addr) +{ + int bits = 32; + + /* Establish sync by sending at least 32 logic ones. */ + while (--bits >= 0) { + iowrite8(MDIO_WRITE1, mdio_addr); + mdio_delay(); + iowrite8(MDIO_WRITE1 | MDIO_ShiftClk, mdio_addr); + mdio_delay(); + } +} + +static int mdio_read(struct net_device *dev, int phy_id, int location) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *mdio_addr = np->base + MIICtrl; + int mii_cmd = (0xf6 << 10) | (phy_id << 5) | location; + int i, retval = 0; + + if (np->mii_preamble_required) + mdio_sync(mdio_addr); + + /* Shift the read command bits out. */ + for (i = 15; i >= 0; i--) { + int dataval = (mii_cmd & (1 << i)) ? MDIO_WRITE1 : MDIO_WRITE0; + + iowrite8(dataval, mdio_addr); + mdio_delay(); + iowrite8(dataval | MDIO_ShiftClk, mdio_addr); + mdio_delay(); + } + /* Read the two transition, 16 data, and wire-idle bits. */ + for (i = 19; i > 0; i--) { + iowrite8(MDIO_EnbIn, mdio_addr); + mdio_delay(); + retval = (retval << 1) | ((ioread8(mdio_addr) & MDIO_Data) ? 1 : 0); + iowrite8(MDIO_EnbIn | MDIO_ShiftClk, mdio_addr); + mdio_delay(); + } + return (retval>>1) & 0xffff; +} + +static void mdio_write(struct net_device *dev, int phy_id, int location, int value) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *mdio_addr = np->base + MIICtrl; + int mii_cmd = (0x5002 << 16) | (phy_id << 23) | (location<<18) | value; + int i; + + if (np->mii_preamble_required) + mdio_sync(mdio_addr); + + /* Shift the command bits out. */ + for (i = 31; i >= 0; i--) { + int dataval = (mii_cmd & (1 << i)) ? MDIO_WRITE1 : MDIO_WRITE0; + + iowrite8(dataval, mdio_addr); + mdio_delay(); + iowrite8(dataval | MDIO_ShiftClk, mdio_addr); + mdio_delay(); + } + /* Clear out extra bits. */ + for (i = 2; i > 0; i--) { + iowrite8(MDIO_EnbIn, mdio_addr); + mdio_delay(); + iowrite8(MDIO_EnbIn | MDIO_ShiftClk, mdio_addr); + mdio_delay(); + } +} + +static int mdio_wait_link(struct net_device *dev, int wait) +{ + int bmsr; + int phy_id; + struct netdev_private *np; + + np = netdev_priv(dev); + phy_id = np->phys[0]; + + do { + bmsr = mdio_read(dev, phy_id, MII_BMSR); + if (bmsr & 0x0004) + return 0; + mdelay(1); + } while (--wait > 0); + return -1; +} + +static int netdev_open(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + const int irq = np->pci_dev->irq; + unsigned long flags; + int i; + + sundance_reset(dev, 0x00ff << 16); + + i = request_irq(irq, intr_handler, IRQF_SHARED, dev->name, dev); + if (i) + return i; + + if (netif_msg_ifup(np)) + printk(KERN_DEBUG "%s: netdev_open() irq %d\n", dev->name, irq); + + init_ring(dev); + + iowrite32(np->rx_ring_dma, ioaddr + RxListPtr); + /* The Tx list pointer is written as packets are queued. */ + + /* Initialize other registers. */ + __set_mac_addr(dev); +#if IS_ENABLED(CONFIG_VLAN_8021Q) + iowrite16(dev->mtu + 18, ioaddr + MaxFrameSize); +#else + iowrite16(dev->mtu + 14, ioaddr + MaxFrameSize); +#endif + if (dev->mtu > 2047) + iowrite32(ioread32(ioaddr + ASICCtrl) | 0x0C, ioaddr + ASICCtrl); + + /* Configure the PCI bus bursts and FIFO thresholds. */ + + if (dev->if_port == 0) + dev->if_port = np->default_port; + + spin_lock_init(&np->mcastlock); + + set_rx_mode(dev); + iowrite16(0, ioaddr + IntrEnable); + iowrite16(0, ioaddr + DownCounter); + /* Set the chip to poll every N*320nsec. */ + iowrite8(100, ioaddr + RxDMAPollPeriod); + iowrite8(127, ioaddr + TxDMAPollPeriod); + /* Fix DFE-580TX packet drop issue */ + if (np->pci_dev->revision >= 0x14) + iowrite8(0x01, ioaddr + DebugCtrl1); + netif_start_queue(dev); + + spin_lock_irqsave(&np->lock, flags); + reset_tx(dev); + spin_unlock_irqrestore(&np->lock, flags); + + iowrite16 (StatsEnable | RxEnable | TxEnable, ioaddr + MACCtrl1); + + /* Disable Wol */ + iowrite8(ioread8(ioaddr + WakeEvent) | 0x00, ioaddr + WakeEvent); + np->wol_enabled = 0; + + if (netif_msg_ifup(np)) + printk(KERN_DEBUG "%s: Done netdev_open(), status: Rx %x Tx %x " + "MAC Control %x, %4.4x %4.4x.\n", + dev->name, ioread32(ioaddr + RxStatus), ioread8(ioaddr + TxStatus), + ioread32(ioaddr + MACCtrl0), + ioread16(ioaddr + MACCtrl1), ioread16(ioaddr + MACCtrl0)); + + /* Set the timer to check for link beat. */ + timer_setup(&np->timer, netdev_timer, 0); + np->timer.expires = jiffies + 3*HZ; + add_timer(&np->timer); + + /* Enable interrupts by setting the interrupt mask. */ + iowrite16(DEFAULT_INTR, ioaddr + IntrEnable); + + return 0; +} + +static void check_duplex(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + int mii_lpa = mdio_read(dev, np->phys[0], MII_LPA); + int negotiated = mii_lpa & np->mii_if.advertising; + int duplex; + + /* Force media */ + if (!np->an_enable || mii_lpa == 0xffff) { + if (np->mii_if.full_duplex) + iowrite16 (ioread16 (ioaddr + MACCtrl0) | EnbFullDuplex, + ioaddr + MACCtrl0); + return; + } + + /* Autonegotiation */ + duplex = (negotiated & 0x0100) || (negotiated & 0x01C0) == 0x0040; + if (np->mii_if.full_duplex != duplex) { + np->mii_if.full_duplex = duplex; + if (netif_msg_link(np)) + printk(KERN_INFO "%s: Setting %s-duplex based on MII #%d " + "negotiated capability %4.4x.\n", dev->name, + duplex ? "full" : "half", np->phys[0], negotiated); + iowrite16(ioread16(ioaddr + MACCtrl0) | (duplex ? 0x20 : 0), ioaddr + MACCtrl0); + } +} + +static void netdev_timer(struct timer_list *t) +{ + struct netdev_private *np = timer_container_of(np, t, timer); + struct net_device *dev = np->mii_if.dev; + void __iomem *ioaddr = np->base; + int next_tick = 10*HZ; + + if (netif_msg_timer(np)) { + printk(KERN_DEBUG "%s: Media selection timer tick, intr status %4.4x, " + "Tx %x Rx %x.\n", + dev->name, ioread16(ioaddr + IntrEnable), + ioread8(ioaddr + TxStatus), ioread32(ioaddr + RxStatus)); + } + check_duplex(dev); + np->timer.expires = jiffies + next_tick; + add_timer(&np->timer); +} + +static void tx_timeout(struct net_device *dev, unsigned int txqueue) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + unsigned long flag; + + netif_stop_queue(dev); + tasklet_disable_in_atomic(&np->tx_tasklet); + iowrite16(0, ioaddr + IntrEnable); + printk(KERN_WARNING "%s: Transmit timed out, TxStatus %2.2x " + "TxFrameId %2.2x," + " resetting...\n", dev->name, ioread8(ioaddr + TxStatus), + ioread8(ioaddr + TxFrameId)); + + { + int i; + for (i=0; i<TX_RING_SIZE; i++) { + printk(KERN_DEBUG "%02x %08llx %08x %08x(%02x) %08x %08x\n", i, + (unsigned long long)(np->tx_ring_dma + i*sizeof(*np->tx_ring)), + le32_to_cpu(np->tx_ring[i].next_desc), + le32_to_cpu(np->tx_ring[i].status), + (le32_to_cpu(np->tx_ring[i].status) >> 2) & 0xff, + le32_to_cpu(np->tx_ring[i].frag.addr), + le32_to_cpu(np->tx_ring[i].frag.length)); + } + printk(KERN_DEBUG "TxListPtr=%08x netif_queue_stopped=%d\n", + ioread32(np->base + TxListPtr), + netif_queue_stopped(dev)); + printk(KERN_DEBUG "cur_tx=%d(%02x) dirty_tx=%d(%02x)\n", + np->cur_tx, np->cur_tx % TX_RING_SIZE, + np->dirty_tx, np->dirty_tx % TX_RING_SIZE); + printk(KERN_DEBUG "cur_rx=%d dirty_rx=%d\n", np->cur_rx, np->dirty_rx); + printk(KERN_DEBUG "cur_task=%d\n", np->cur_task); + } + spin_lock_irqsave(&np->lock, flag); + + /* Stop and restart the chip's Tx processes . */ + reset_tx(dev); + spin_unlock_irqrestore(&np->lock, flag); + + dev->if_port = 0; + + netif_trans_update(dev); /* prevent tx timeout */ + dev->stats.tx_errors++; + if (np->cur_tx - np->dirty_tx < TX_QUEUE_LEN - 4) { + netif_wake_queue(dev); + } + iowrite16(DEFAULT_INTR, ioaddr + IntrEnable); + tasklet_enable(&np->tx_tasklet); +} + + +/* Initialize the Rx and Tx rings, along with various 'dev' bits. */ +static void init_ring(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + int i; + + np->cur_rx = np->cur_tx = 0; + np->dirty_rx = np->dirty_tx = 0; + np->cur_task = 0; + + np->rx_buf_sz = (dev->mtu <= 1520 ? PKT_BUF_SZ : dev->mtu + 16); + + /* Initialize all Rx descriptors. */ + for (i = 0; i < RX_RING_SIZE; i++) { + np->rx_ring[i].next_desc = cpu_to_le32(np->rx_ring_dma + + ((i+1)%RX_RING_SIZE)*sizeof(*np->rx_ring)); + np->rx_ring[i].status = 0; + np->rx_ring[i].frag.length = 0; + np->rx_skbuff[i] = NULL; + } + + /* Fill in the Rx buffers. Handle allocation failure gracefully. */ + for (i = 0; i < RX_RING_SIZE; i++) { + dma_addr_t addr; + + struct sk_buff *skb = + netdev_alloc_skb(dev, np->rx_buf_sz + 2); + np->rx_skbuff[i] = skb; + if (skb == NULL) + break; + skb_reserve(skb, 2); /* 16 byte align the IP header. */ + addr = dma_map_single(&np->pci_dev->dev, skb->data, + np->rx_buf_sz, DMA_FROM_DEVICE); + if (dma_mapping_error(&np->pci_dev->dev, addr)) { + dev_kfree_skb(skb); + np->rx_skbuff[i] = NULL; + break; + } + np->rx_ring[i].frag.addr = cpu_to_le32(addr); + np->rx_ring[i].frag.length = cpu_to_le32(np->rx_buf_sz | LastFrag); + } + np->dirty_rx = (unsigned int)(i - RX_RING_SIZE); + + for (i = 0; i < TX_RING_SIZE; i++) { + np->tx_skbuff[i] = NULL; + np->tx_ring[i].status = 0; + } +} + +static void tx_poll(struct tasklet_struct *t) +{ + struct netdev_private *np = from_tasklet(np, t, tx_tasklet); + unsigned head = np->cur_task % TX_RING_SIZE; + struct netdev_desc *txdesc = + &np->tx_ring[(np->cur_tx - 1) % TX_RING_SIZE]; + + /* Chain the next pointer */ + for (; np->cur_tx - np->cur_task > 0; np->cur_task++) { + int entry = np->cur_task % TX_RING_SIZE; + txdesc = &np->tx_ring[entry]; + if (np->last_tx) { + np->last_tx->next_desc = cpu_to_le32(np->tx_ring_dma + + entry*sizeof(struct netdev_desc)); + } + np->last_tx = txdesc; + } + /* Indicate the latest descriptor of tx ring */ + txdesc->status |= cpu_to_le32(DescIntrOnTx); + + if (ioread32 (np->base + TxListPtr) == 0) + iowrite32 (np->tx_ring_dma + head * sizeof(struct netdev_desc), + np->base + TxListPtr); +} + +static netdev_tx_t +start_tx (struct sk_buff *skb, struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + struct netdev_desc *txdesc; + dma_addr_t addr; + unsigned entry; + + /* Calculate the next Tx descriptor entry. */ + entry = np->cur_tx % TX_RING_SIZE; + np->tx_skbuff[entry] = skb; + txdesc = &np->tx_ring[entry]; + + addr = dma_map_single(&np->pci_dev->dev, skb->data, skb->len, + DMA_TO_DEVICE); + if (dma_mapping_error(&np->pci_dev->dev, addr)) + goto drop_frame; + + txdesc->next_desc = 0; + txdesc->status = cpu_to_le32 ((entry << 2) | DisableAlign); + txdesc->frag.addr = cpu_to_le32(addr); + txdesc->frag.length = cpu_to_le32 (skb->len | LastFrag); + + /* Increment cur_tx before tasklet_schedule() */ + np->cur_tx++; + mb(); + /* Schedule a tx_poll() task */ + tasklet_schedule(&np->tx_tasklet); + + /* On some architectures: explicitly flush cache lines here. */ + if (np->cur_tx - np->dirty_tx < TX_QUEUE_LEN - 1 && + !netif_queue_stopped(dev)) { + /* do nothing */ + } else { + netif_stop_queue (dev); + } + if (netif_msg_tx_queued(np)) { + printk (KERN_DEBUG + "%s: Transmit frame #%d queued in slot %d.\n", + dev->name, np->cur_tx, entry); + } + return NETDEV_TX_OK; + +drop_frame: + dev_kfree_skb_any(skb); + np->tx_skbuff[entry] = NULL; + dev->stats.tx_dropped++; + return NETDEV_TX_OK; +} + +/* Reset hardware tx and free all of tx buffers */ +static int +reset_tx (struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + struct sk_buff *skb; + int i; + + /* Reset tx logic, TxListPtr will be cleaned */ + iowrite16 (TxDisable, ioaddr + MACCtrl1); + sundance_reset(dev, (NetworkReset|FIFOReset|DMAReset|TxReset) << 16); + + /* free all tx skbuff */ + for (i = 0; i < TX_RING_SIZE; i++) { + np->tx_ring[i].next_desc = 0; + + skb = np->tx_skbuff[i]; + if (skb) { + dma_unmap_single(&np->pci_dev->dev, + le32_to_cpu(np->tx_ring[i].frag.addr), + skb->len, DMA_TO_DEVICE); + dev_kfree_skb_any(skb); + np->tx_skbuff[i] = NULL; + dev->stats.tx_dropped++; + } + } + np->cur_tx = np->dirty_tx = 0; + np->cur_task = 0; + + np->last_tx = NULL; + iowrite8(127, ioaddr + TxDMAPollPeriod); + + iowrite16 (StatsEnable | RxEnable | TxEnable, ioaddr + MACCtrl1); + return 0; +} + +/* The interrupt handler cleans up after the Tx thread, + and schedule a Rx thread work */ +static irqreturn_t intr_handler(int irq, void *dev_instance) +{ + struct net_device *dev = (struct net_device *)dev_instance; + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + int hw_frame_id; + int tx_cnt; + int tx_status; + int handled = 0; + int i; + + do { + int intr_status = ioread16(ioaddr + IntrStatus); + iowrite16(intr_status, ioaddr + IntrStatus); + + if (netif_msg_intr(np)) + printk(KERN_DEBUG "%s: Interrupt, status %4.4x.\n", + dev->name, intr_status); + + if (!(intr_status & DEFAULT_INTR)) + break; + + handled = 1; + + if (intr_status & (IntrRxDMADone)) { + iowrite16(DEFAULT_INTR & ~(IntrRxDone|IntrRxDMADone), + ioaddr + IntrEnable); + if (np->budget < 0) + np->budget = RX_BUDGET; + tasklet_schedule(&np->rx_tasklet); + } + if (intr_status & (IntrTxDone | IntrDrvRqst)) { + tx_status = ioread16 (ioaddr + TxStatus); + for (tx_cnt=32; tx_status & 0x80; --tx_cnt) { + if (netif_msg_tx_done(np)) + printk + ("%s: Transmit status is %2.2x.\n", + dev->name, tx_status); + if (tx_status & 0x1e) { + if (netif_msg_tx_err(np)) + printk("%s: Transmit error status %4.4x.\n", + dev->name, tx_status); + dev->stats.tx_errors++; + if (tx_status & 0x10) + dev->stats.tx_fifo_errors++; + if (tx_status & 0x08) + dev->stats.collisions++; + if (tx_status & 0x04) + dev->stats.tx_fifo_errors++; + if (tx_status & 0x02) + dev->stats.tx_window_errors++; + + /* + ** This reset has been verified on + ** DFE-580TX boards ! phdm@macqel.be. + */ + if (tx_status & 0x10) { /* TxUnderrun */ + /* Restart Tx FIFO and transmitter */ + sundance_reset(dev, (NetworkReset|FIFOReset|TxReset) << 16); + /* No need to reset the Tx pointer here */ + } + /* Restart the Tx. Need to make sure tx enabled */ + i = 10; + do { + iowrite16(ioread16(ioaddr + MACCtrl1) | TxEnable, ioaddr + MACCtrl1); + if (ioread16(ioaddr + MACCtrl1) & TxEnabled) + break; + mdelay(1); + } while (--i); + } + /* Yup, this is a documentation bug. It cost me *hours*. */ + iowrite16 (0, ioaddr + TxStatus); + if (tx_cnt < 0) { + iowrite32(5000, ioaddr + DownCounter); + break; + } + tx_status = ioread16 (ioaddr + TxStatus); + } + hw_frame_id = (tx_status >> 8) & 0xff; + } else { + hw_frame_id = ioread8(ioaddr + TxFrameId); + } + + if (np->pci_dev->revision >= 0x14) { + spin_lock(&np->lock); + for (; np->cur_tx - np->dirty_tx > 0; np->dirty_tx++) { + int entry = np->dirty_tx % TX_RING_SIZE; + struct sk_buff *skb; + int sw_frame_id; + sw_frame_id = (le32_to_cpu( + np->tx_ring[entry].status) >> 2) & 0xff; + if (sw_frame_id == hw_frame_id && + !(le32_to_cpu(np->tx_ring[entry].status) + & 0x00010000)) + break; + if (sw_frame_id == (hw_frame_id + 1) % + TX_RING_SIZE) + break; + skb = np->tx_skbuff[entry]; + /* Free the original skb. */ + dma_unmap_single(&np->pci_dev->dev, + le32_to_cpu(np->tx_ring[entry].frag.addr), + skb->len, DMA_TO_DEVICE); + dev_consume_skb_irq(np->tx_skbuff[entry]); + np->tx_skbuff[entry] = NULL; + np->tx_ring[entry].frag.addr = 0; + np->tx_ring[entry].frag.length = 0; + } + spin_unlock(&np->lock); + } else { + spin_lock(&np->lock); + for (; np->cur_tx - np->dirty_tx > 0; np->dirty_tx++) { + int entry = np->dirty_tx % TX_RING_SIZE; + struct sk_buff *skb; + if (!(le32_to_cpu(np->tx_ring[entry].status) + & 0x00010000)) + break; + skb = np->tx_skbuff[entry]; + /* Free the original skb. */ + dma_unmap_single(&np->pci_dev->dev, + le32_to_cpu(np->tx_ring[entry].frag.addr), + skb->len, DMA_TO_DEVICE); + dev_consume_skb_irq(np->tx_skbuff[entry]); + np->tx_skbuff[entry] = NULL; + np->tx_ring[entry].frag.addr = 0; + np->tx_ring[entry].frag.length = 0; + } + spin_unlock(&np->lock); + } + + if (netif_queue_stopped(dev) && + np->cur_tx - np->dirty_tx < TX_QUEUE_LEN - 4) { + /* The ring is no longer full, clear busy flag. */ + netif_wake_queue (dev); + } + /* Abnormal error summary/uncommon events handlers. */ + if (intr_status & (IntrPCIErr | LinkChange | StatsMax)) + netdev_error(dev, intr_status); + } while (0); + if (netif_msg_intr(np)) + printk(KERN_DEBUG "%s: exiting interrupt, status=%#4.4x.\n", + dev->name, ioread16(ioaddr + IntrStatus)); + return IRQ_RETVAL(handled); +} + +static void rx_poll(struct tasklet_struct *t) +{ + struct netdev_private *np = from_tasklet(np, t, rx_tasklet); + struct net_device *dev = np->ndev; + int entry = np->cur_rx % RX_RING_SIZE; + int boguscnt = np->budget; + void __iomem *ioaddr = np->base; + int received = 0; + + /* If EOP is set on the next entry, it's a new packet. Send it up. */ + while (1) { + struct netdev_desc *desc = &(np->rx_ring[entry]); + u32 frame_status = le32_to_cpu(desc->status); + int pkt_len; + + if (--boguscnt < 0) { + goto not_done; + } + if (!(frame_status & DescOwn)) + break; + pkt_len = frame_status & 0x1fff; /* Chip omits the CRC. */ + if (netif_msg_rx_status(np)) + printk(KERN_DEBUG " netdev_rx() status was %8.8x.\n", + frame_status); + if (frame_status & 0x001f4000) { + /* There was a error. */ + if (netif_msg_rx_err(np)) + printk(KERN_DEBUG " netdev_rx() Rx error was %8.8x.\n", + frame_status); + dev->stats.rx_errors++; + if (frame_status & 0x00100000) + dev->stats.rx_length_errors++; + if (frame_status & 0x00010000) + dev->stats.rx_fifo_errors++; + if (frame_status & 0x00060000) + dev->stats.rx_frame_errors++; + if (frame_status & 0x00080000) + dev->stats.rx_crc_errors++; + if (frame_status & 0x00100000) { + printk(KERN_WARNING "%s: Oversized Ethernet frame," + " status %8.8x.\n", + dev->name, frame_status); + } + } else { + struct sk_buff *skb; +#ifndef final_version + if (netif_msg_rx_status(np)) + printk(KERN_DEBUG " netdev_rx() normal Rx pkt length %d" + ", bogus_cnt %d.\n", + pkt_len, boguscnt); +#endif + /* Check if the packet is long enough to accept without copying + to a minimally-sized skbuff. */ + if (pkt_len < rx_copybreak && + (skb = netdev_alloc_skb(dev, pkt_len + 2)) != NULL) { + skb_reserve(skb, 2); /* 16 byte align the IP header */ + dma_sync_single_for_cpu(&np->pci_dev->dev, + le32_to_cpu(desc->frag.addr), + np->rx_buf_sz, DMA_FROM_DEVICE); + skb_copy_to_linear_data(skb, np->rx_skbuff[entry]->data, pkt_len); + dma_sync_single_for_device(&np->pci_dev->dev, + le32_to_cpu(desc->frag.addr), + np->rx_buf_sz, DMA_FROM_DEVICE); + skb_put(skb, pkt_len); + } else { + dma_unmap_single(&np->pci_dev->dev, + le32_to_cpu(desc->frag.addr), + np->rx_buf_sz, DMA_FROM_DEVICE); + skb_put(skb = np->rx_skbuff[entry], pkt_len); + np->rx_skbuff[entry] = NULL; + } + skb->protocol = eth_type_trans(skb, dev); + /* Note: checksum -> skb->ip_summed = CHECKSUM_UNNECESSARY; */ + netif_rx(skb); + } + entry = (entry + 1) % RX_RING_SIZE; + received++; + } + np->cur_rx = entry; + refill_rx (dev); + np->budget -= received; + iowrite16(DEFAULT_INTR, ioaddr + IntrEnable); + return; + +not_done: + np->cur_rx = entry; + refill_rx (dev); + if (!received) + received = 1; + np->budget -= received; + if (np->budget <= 0) + np->budget = RX_BUDGET; + tasklet_schedule(&np->rx_tasklet); +} + +static void refill_rx (struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + int entry; + + /* Refill the Rx ring buffers. */ + for (;(np->cur_rx - np->dirty_rx + RX_RING_SIZE) % RX_RING_SIZE > 0; + np->dirty_rx = (np->dirty_rx + 1) % RX_RING_SIZE) { + struct sk_buff *skb; + dma_addr_t addr; + + entry = np->dirty_rx % RX_RING_SIZE; + if (np->rx_skbuff[entry] == NULL) { + skb = netdev_alloc_skb(dev, np->rx_buf_sz + 2); + np->rx_skbuff[entry] = skb; + if (skb == NULL) + break; /* Better luck next round. */ + skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ + addr = dma_map_single(&np->pci_dev->dev, skb->data, + np->rx_buf_sz, DMA_FROM_DEVICE); + if (dma_mapping_error(&np->pci_dev->dev, addr)) { + dev_kfree_skb_irq(skb); + np->rx_skbuff[entry] = NULL; + break; + } + + np->rx_ring[entry].frag.addr = cpu_to_le32(addr); + } + /* Perhaps we need not reset this field. */ + np->rx_ring[entry].frag.length = + cpu_to_le32(np->rx_buf_sz | LastFrag); + np->rx_ring[entry].status = 0; + } +} +static void netdev_error(struct net_device *dev, int intr_status) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + u16 mii_ctl, mii_advertise, mii_lpa; + int speed; + + if (intr_status & LinkChange) { + if (mdio_wait_link(dev, 10) == 0) { + printk(KERN_INFO "%s: Link up\n", dev->name); + if (np->an_enable) { + mii_advertise = mdio_read(dev, np->phys[0], + MII_ADVERTISE); + mii_lpa = mdio_read(dev, np->phys[0], MII_LPA); + mii_advertise &= mii_lpa; + printk(KERN_INFO "%s: Link changed: ", + dev->name); + if (mii_advertise & ADVERTISE_100FULL) { + np->speed = 100; + printk("100Mbps, full duplex\n"); + } else if (mii_advertise & ADVERTISE_100HALF) { + np->speed = 100; + printk("100Mbps, half duplex\n"); + } else if (mii_advertise & ADVERTISE_10FULL) { + np->speed = 10; + printk("10Mbps, full duplex\n"); + } else if (mii_advertise & ADVERTISE_10HALF) { + np->speed = 10; + printk("10Mbps, half duplex\n"); + } else + printk("\n"); + + } else { + mii_ctl = mdio_read(dev, np->phys[0], MII_BMCR); + speed = (mii_ctl & BMCR_SPEED100) ? 100 : 10; + np->speed = speed; + printk(KERN_INFO "%s: Link changed: %dMbps ,", + dev->name, speed); + printk("%s duplex.\n", + (mii_ctl & BMCR_FULLDPLX) ? + "full" : "half"); + } + check_duplex(dev); + if (np->flowctrl && np->mii_if.full_duplex) { + iowrite16(ioread16(ioaddr + MulticastFilter1+2) | 0x0200, + ioaddr + MulticastFilter1+2); + iowrite16(ioread16(ioaddr + MACCtrl0) | EnbFlowCtrl, + ioaddr + MACCtrl0); + } + netif_carrier_on(dev); + } else { + printk(KERN_INFO "%s: Link down\n", dev->name); + netif_carrier_off(dev); + } + } + if (intr_status & StatsMax) { + get_stats(dev); + } + if (intr_status & IntrPCIErr) { + printk(KERN_ERR "%s: Something Wicked happened! %4.4x.\n", + dev->name, intr_status); + /* We must do a global reset of DMA to continue. */ + } +} + +static struct net_device_stats *get_stats(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + unsigned long flags; + u8 late_coll, single_coll, mult_coll; + + spin_lock_irqsave(&np->statlock, flags); + /* The chip only need report frame silently dropped. */ + dev->stats.rx_missed_errors += ioread8(ioaddr + RxMissed); + dev->stats.tx_packets += ioread16(ioaddr + TxFramesOK); + dev->stats.rx_packets += ioread16(ioaddr + RxFramesOK); + dev->stats.tx_carrier_errors += ioread8(ioaddr + StatsCarrierError); + + mult_coll = ioread8(ioaddr + StatsMultiColl); + np->xstats.tx_multiple_collisions += mult_coll; + single_coll = ioread8(ioaddr + StatsOneColl); + np->xstats.tx_single_collisions += single_coll; + late_coll = ioread8(ioaddr + StatsLateColl); + np->xstats.tx_late_collisions += late_coll; + dev->stats.collisions += mult_coll + + single_coll + + late_coll; + + np->xstats.tx_deferred += ioread8(ioaddr + StatsTxDefer); + np->xstats.tx_deferred_excessive += ioread8(ioaddr + StatsTxXSDefer); + np->xstats.tx_aborted += ioread8(ioaddr + StatsTxAbort); + np->xstats.tx_bcasts += ioread8(ioaddr + StatsBcastTx); + np->xstats.rx_bcasts += ioread8(ioaddr + StatsBcastRx); + np->xstats.tx_mcasts += ioread8(ioaddr + StatsMcastTx); + np->xstats.rx_mcasts += ioread8(ioaddr + StatsMcastRx); + + dev->stats.tx_bytes += ioread16(ioaddr + TxOctetsLow); + dev->stats.tx_bytes += ioread16(ioaddr + TxOctetsHigh) << 16; + dev->stats.rx_bytes += ioread16(ioaddr + RxOctetsLow); + dev->stats.rx_bytes += ioread16(ioaddr + RxOctetsHigh) << 16; + + spin_unlock_irqrestore(&np->statlock, flags); + + return &dev->stats; +} + +static void set_rx_mode(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + u16 mc_filter[4]; /* Multicast hash filter */ + u32 rx_mode; + int i; + + if (dev->flags & IFF_PROMISC) { /* Set promiscuous. */ + memset(mc_filter, 0xff, sizeof(mc_filter)); + rx_mode = AcceptBroadcast | AcceptMulticast | AcceptAll | AcceptMyPhys; + } else if ((netdev_mc_count(dev) > multicast_filter_limit) || + (dev->flags & IFF_ALLMULTI)) { + /* Too many to match, or accept all multicasts. */ + memset(mc_filter, 0xff, sizeof(mc_filter)); + rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys; + } else if (!netdev_mc_empty(dev)) { + struct netdev_hw_addr *ha; + int bit; + int index; + int crc; + memset (mc_filter, 0, sizeof (mc_filter)); + netdev_for_each_mc_addr(ha, dev) { + crc = ether_crc_le(ETH_ALEN, ha->addr); + for (index=0, bit=0; bit < 6; bit++, crc <<= 1) + if (crc & 0x80000000) index |= 1 << bit; + mc_filter[index/16] |= (1 << (index % 16)); + } + rx_mode = AcceptBroadcast | AcceptMultiHash | AcceptMyPhys; + } else { + iowrite8(AcceptBroadcast | AcceptMyPhys, ioaddr + RxMode); + return; + } + if (np->mii_if.full_duplex && np->flowctrl) + mc_filter[3] |= 0x0200; + + for (i = 0; i < 4; i++) + iowrite16(mc_filter[i], ioaddr + MulticastFilter0 + i*2); + iowrite8(rx_mode, ioaddr + RxMode); +} + +static int __set_mac_addr(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + u16 addr16; + + addr16 = (dev->dev_addr[0] | (dev->dev_addr[1] << 8)); + iowrite16(addr16, np->base + StationAddr); + addr16 = (dev->dev_addr[2] | (dev->dev_addr[3] << 8)); + iowrite16(addr16, np->base + StationAddr+2); + addr16 = (dev->dev_addr[4] | (dev->dev_addr[5] << 8)); + iowrite16(addr16, np->base + StationAddr+4); + return 0; +} + +/* Invoked with rtnl_lock held */ +static int sundance_set_mac_addr(struct net_device *dev, void *data) +{ + const struct sockaddr *addr = data; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + eth_hw_addr_set(dev, addr->sa_data); + __set_mac_addr(dev); + + return 0; +} + +static const struct { + const char name[ETH_GSTRING_LEN]; +} sundance_stats[] = { + { "tx_multiple_collisions" }, + { "tx_single_collisions" }, + { "tx_late_collisions" }, + { "tx_deferred" }, + { "tx_deferred_excessive" }, + { "tx_aborted" }, + { "tx_bcasts" }, + { "rx_bcasts" }, + { "tx_mcasts" }, + { "rx_mcasts" }, +}; + +static int check_if_running(struct net_device *dev) +{ + if (!netif_running(dev)) + return -EINVAL; + return 0; +} + +static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) +{ + struct netdev_private *np = netdev_priv(dev); + strscpy(info->driver, DRV_NAME, sizeof(info->driver)); + strscpy(info->bus_info, pci_name(np->pci_dev), sizeof(info->bus_info)); +} + +static int get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) +{ + struct netdev_private *np = netdev_priv(dev); + spin_lock_irq(&np->lock); + mii_ethtool_get_link_ksettings(&np->mii_if, cmd); + spin_unlock_irq(&np->lock); + return 0; +} + +static int set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) +{ + struct netdev_private *np = netdev_priv(dev); + int res; + spin_lock_irq(&np->lock); + res = mii_ethtool_set_link_ksettings(&np->mii_if, cmd); + spin_unlock_irq(&np->lock); + return res; +} + +static int nway_reset(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + return mii_nway_restart(&np->mii_if); +} + +static u32 get_link(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + return mii_link_ok(&np->mii_if); +} + +static u32 get_msglevel(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + return np->msg_enable; +} + +static void set_msglevel(struct net_device *dev, u32 val) +{ + struct netdev_private *np = netdev_priv(dev); + np->msg_enable = val; +} + +static void get_strings(struct net_device *dev, u32 stringset, + u8 *data) +{ + if (stringset == ETH_SS_STATS) + memcpy(data, sundance_stats, sizeof(sundance_stats)); +} + +static int get_sset_count(struct net_device *dev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return ARRAY_SIZE(sundance_stats); + default: + return -EOPNOTSUPP; + } +} + +static void get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct netdev_private *np = netdev_priv(dev); + int i = 0; + + get_stats(dev); + data[i++] = np->xstats.tx_multiple_collisions; + data[i++] = np->xstats.tx_single_collisions; + data[i++] = np->xstats.tx_late_collisions; + data[i++] = np->xstats.tx_deferred; + data[i++] = np->xstats.tx_deferred_excessive; + data[i++] = np->xstats.tx_aborted; + data[i++] = np->xstats.tx_bcasts; + data[i++] = np->xstats.rx_bcasts; + data[i++] = np->xstats.tx_mcasts; + data[i++] = np->xstats.rx_mcasts; +} + +#ifdef CONFIG_PM + +static void sundance_get_wol(struct net_device *dev, + struct ethtool_wolinfo *wol) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + u8 wol_bits; + + wol->wolopts = 0; + + wol->supported = (WAKE_PHY | WAKE_MAGIC); + if (!np->wol_enabled) + return; + + wol_bits = ioread8(ioaddr + WakeEvent); + if (wol_bits & MagicPktEnable) + wol->wolopts |= WAKE_MAGIC; + if (wol_bits & LinkEventEnable) + wol->wolopts |= WAKE_PHY; +} + +static int sundance_set_wol(struct net_device *dev, + struct ethtool_wolinfo *wol) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + u8 wol_bits; + + if (!device_can_wakeup(&np->pci_dev->dev)) + return -EOPNOTSUPP; + + np->wol_enabled = !!(wol->wolopts); + wol_bits = ioread8(ioaddr + WakeEvent); + wol_bits &= ~(WakePktEnable | MagicPktEnable | + LinkEventEnable | WolEnable); + + if (np->wol_enabled) { + if (wol->wolopts & WAKE_MAGIC) + wol_bits |= (MagicPktEnable | WolEnable); + if (wol->wolopts & WAKE_PHY) + wol_bits |= (LinkEventEnable | WolEnable); + } + iowrite8(wol_bits, ioaddr + WakeEvent); + + device_set_wakeup_enable(&np->pci_dev->dev, np->wol_enabled); + + return 0; +} +#else +#define sundance_get_wol NULL +#define sundance_set_wol NULL +#endif /* CONFIG_PM */ + +static const struct ethtool_ops ethtool_ops = { + .begin = check_if_running, + .get_drvinfo = get_drvinfo, + .nway_reset = nway_reset, + .get_link = get_link, + .get_wol = sundance_get_wol, + .set_wol = sundance_set_wol, + .get_msglevel = get_msglevel, + .set_msglevel = set_msglevel, + .get_strings = get_strings, + .get_sset_count = get_sset_count, + .get_ethtool_stats = get_ethtool_stats, + .get_link_ksettings = get_link_ksettings, + .set_link_ksettings = set_link_ksettings, +}; + +static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + struct netdev_private *np = netdev_priv(dev); + int rc; + + if (!netif_running(dev)) + return -EINVAL; + + spin_lock_irq(&np->lock); + rc = generic_mii_ioctl(&np->mii_if, if_mii(rq), cmd, NULL); + spin_unlock_irq(&np->lock); + + return rc; +} + +static int netdev_close(struct net_device *dev) +{ + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + struct sk_buff *skb; + int i; + + /* Wait and kill tasklet */ + tasklet_kill(&np->rx_tasklet); + tasklet_kill(&np->tx_tasklet); + np->cur_tx = 0; + np->dirty_tx = 0; + np->cur_task = 0; + np->last_tx = NULL; + + netif_stop_queue(dev); + + if (netif_msg_ifdown(np)) { + printk(KERN_DEBUG "%s: Shutting down ethercard, status was Tx %2.2x " + "Rx %4.4x Int %2.2x.\n", + dev->name, ioread8(ioaddr + TxStatus), + ioread32(ioaddr + RxStatus), ioread16(ioaddr + IntrStatus)); + printk(KERN_DEBUG "%s: Queue pointers were Tx %d / %d, Rx %d / %d.\n", + dev->name, np->cur_tx, np->dirty_tx, np->cur_rx, np->dirty_rx); + } + + /* Disable interrupts by clearing the interrupt mask. */ + iowrite16(0x0000, ioaddr + IntrEnable); + + /* Disable Rx and Tx DMA for safely release resource */ + iowrite32(0x500, ioaddr + DMACtrl); + + /* Stop the chip's Tx and Rx processes. */ + iowrite16(TxDisable | RxDisable | StatsDisable, ioaddr + MACCtrl1); + + for (i = 2000; i > 0; i--) { + if ((ioread32(ioaddr + DMACtrl) & 0xc000) == 0) + break; + mdelay(1); + } + + iowrite16(GlobalReset | DMAReset | FIFOReset | NetworkReset, + ioaddr + ASIC_HI_WORD(ASICCtrl)); + + for (i = 2000; i > 0; i--) { + if ((ioread16(ioaddr + ASIC_HI_WORD(ASICCtrl)) & ResetBusy) == 0) + break; + mdelay(1); + } + +#ifdef __i386__ + if (netif_msg_hw(np)) { + printk(KERN_DEBUG " Tx ring at %8.8x:\n", + (int)(np->tx_ring_dma)); + for (i = 0; i < TX_RING_SIZE; i++) + printk(KERN_DEBUG " #%d desc. %4.4x %8.8x %8.8x.\n", + i, np->tx_ring[i].status, np->tx_ring[i].frag.addr, + np->tx_ring[i].frag.length); + printk(KERN_DEBUG " Rx ring %8.8x:\n", + (int)(np->rx_ring_dma)); + for (i = 0; i < /*RX_RING_SIZE*/4 ; i++) { + printk(KERN_DEBUG " #%d desc. %4.4x %4.4x %8.8x\n", + i, np->rx_ring[i].status, np->rx_ring[i].frag.addr, + np->rx_ring[i].frag.length); + } + } +#endif /* __i386__ debugging only */ + + free_irq(np->pci_dev->irq, dev); + + timer_delete_sync(&np->timer); + + /* Free all the skbuffs in the Rx queue. */ + for (i = 0; i < RX_RING_SIZE; i++) { + np->rx_ring[i].status = 0; + skb = np->rx_skbuff[i]; + if (skb) { + dma_unmap_single(&np->pci_dev->dev, + le32_to_cpu(np->rx_ring[i].frag.addr), + np->rx_buf_sz, DMA_FROM_DEVICE); + dev_kfree_skb(skb); + np->rx_skbuff[i] = NULL; + } + np->rx_ring[i].frag.addr = cpu_to_le32(0xBADF00D0); /* poison */ + } + for (i = 0; i < TX_RING_SIZE; i++) { + np->tx_ring[i].next_desc = 0; + skb = np->tx_skbuff[i]; + if (skb) { + dma_unmap_single(&np->pci_dev->dev, + le32_to_cpu(np->tx_ring[i].frag.addr), + skb->len, DMA_TO_DEVICE); + dev_kfree_skb(skb); + np->tx_skbuff[i] = NULL; + } + } + + return 0; +} + +static void sundance_remove1(struct pci_dev *pdev) +{ + struct net_device *dev = pci_get_drvdata(pdev); + + if (dev) { + struct netdev_private *np = netdev_priv(dev); + unregister_netdev(dev); + dma_free_coherent(&pdev->dev, RX_TOTAL_SIZE, + np->rx_ring, np->rx_ring_dma); + dma_free_coherent(&pdev->dev, TX_TOTAL_SIZE, + np->tx_ring, np->tx_ring_dma); + pci_iounmap(pdev, np->base); + pci_release_regions(pdev); + free_netdev(dev); + } +} + +static int __maybe_unused sundance_suspend(struct device *dev_d) +{ + struct net_device *dev = dev_get_drvdata(dev_d); + struct netdev_private *np = netdev_priv(dev); + void __iomem *ioaddr = np->base; + + if (!netif_running(dev)) + return 0; + + netdev_close(dev); + netif_device_detach(dev); + + if (np->wol_enabled) { + iowrite8(AcceptBroadcast | AcceptMyPhys, ioaddr + RxMode); + iowrite16(RxEnable, ioaddr + MACCtrl1); + } + + device_set_wakeup_enable(dev_d, np->wol_enabled); + + return 0; +} + +static int __maybe_unused sundance_resume(struct device *dev_d) +{ + struct net_device *dev = dev_get_drvdata(dev_d); + int err = 0; + + if (!netif_running(dev)) + return 0; + + err = netdev_open(dev); + if (err) { + printk(KERN_ERR "%s: Can't resume interface!\n", + dev->name); + goto out; + } + + netif_device_attach(dev); + +out: + return err; +} + +static SIMPLE_DEV_PM_OPS(sundance_pm_ops, sundance_suspend, sundance_resume); + +static struct pci_driver sundance_driver = { + .name = DRV_NAME, + .id_table = sundance_pci_tbl, + .probe = sundance_probe1, + .remove = sundance_remove1, + .driver.pm = &sundance_pm_ops, +}; + +module_pci_driver(sundance_driver); diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c index 0f4efd505332..c96d1d6ba8fe 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c @@ -4884,7 +4884,7 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev) priv->tx_tstamp_type = HWTSTAMP_TX_OFF; priv->rx_tstamp = false; - priv->dpaa2_ptp_wq = alloc_workqueue("dpaa2_ptp_wq", 0, 0); + priv->dpaa2_ptp_wq = alloc_workqueue("dpaa2_ptp_wq", WQ_PERCPU, 0); if (!priv->dpaa2_ptp_wq) { err = -ENOMEM; goto err_wq_alloc; diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c index 4643a3380618..b1e1ad9e4b48 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-switch.c @@ -2736,7 +2736,7 @@ static int dpaa2_switch_setup_dpbp(struct ethsw_core *ethsw) dev_err(dev, "dpsw_ctrl_if_set_pools() failed\n"); goto err_get_attr; } - ethsw->bpid = dpbp_attrs.id; + ethsw->bpid = dpbp_attrs.bpid; return 0; diff --git a/drivers/net/ethernet/freescale/enetc/Kconfig b/drivers/net/ethernet/freescale/enetc/Kconfig index 54b0f0a5a6bb..117038104b69 100644 --- a/drivers/net/ethernet/freescale/enetc/Kconfig +++ b/drivers/net/ethernet/freescale/enetc/Kconfig @@ -28,6 +28,7 @@ config NXP_NTMP config FSL_ENETC tristate "ENETC PF driver" + depends on PTP_1588_CLOCK_OPTIONAL depends on PCI_MSI select FSL_ENETC_CORE select FSL_ENETC_IERB @@ -45,6 +46,7 @@ config FSL_ENETC config NXP_ENETC4 tristate "ENETC4 PF driver" + depends on PTP_1588_CLOCK_OPTIONAL depends on PCI_MSI select FSL_ENETC_CORE select FSL_ENETC_MDIO @@ -62,6 +64,7 @@ config NXP_ENETC4 config FSL_ENETC_VF tristate "ENETC VF driver" + depends on PTP_1588_CLOCK_OPTIONAL depends on PCI_MSI select FSL_ENETC_CORE select FSL_ENETC_MDIO diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c index e4287725832e..aae462a0cf5a 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.c +++ b/drivers/net/ethernet/freescale/enetc/enetc.c @@ -221,22 +221,111 @@ static void enetc_unwind_tx_frame(struct enetc_bdr *tx_ring, int count, int i) } } +static void enetc_set_one_step_ts(struct enetc_si *si, bool udp, int offset) +{ + u32 val = ENETC_PM0_SINGLE_STEP_EN; + + val |= ENETC_SET_SINGLE_STEP_OFFSET(offset); + if (udp) + val |= ENETC_PM0_SINGLE_STEP_CH; + + /* The "Correction" field of a packet is updated based on the + * current time and the timestamp provided + */ + enetc_port_mac_wr(si, ENETC_PM0_SINGLE_STEP, val); +} + +static void enetc4_set_one_step_ts(struct enetc_si *si, bool udp, int offset) +{ + u32 val = PM_SINGLE_STEP_EN; + + val |= PM_SINGLE_STEP_OFFSET_SET(offset); + if (udp) + val |= PM_SINGLE_STEP_CH; + + enetc_port_mac_wr(si, ENETC4_PM_SINGLE_STEP(0), val); +} + +static u32 enetc_update_ptp_sync_msg(struct enetc_ndev_priv *priv, + struct sk_buff *skb, bool csum_offload) +{ + struct enetc_skb_cb *enetc_cb = ENETC_SKB_CB(skb); + u16 tstamp_off = enetc_cb->origin_tstamp_off; + u16 corr_off = enetc_cb->correction_off; + struct enetc_si *si = priv->si; + struct enetc_hw *hw = &si->hw; + __be32 new_sec_l, new_nsec; + __be16 new_sec_h; + u32 lo, hi, nsec; + u8 *data; + u64 sec; + + lo = enetc_rd_hot(hw, ENETC_SICTR0); + hi = enetc_rd_hot(hw, ENETC_SICTR1); + sec = (u64)hi << 32 | lo; + nsec = do_div(sec, 1000000000); + + /* Update originTimestamp field of Sync packet + * - 48 bits seconds field + * - 32 bits nanseconds field + * + * In addition, if csum_offload is false, the UDP checksum needs + * to be updated by software after updating originTimestamp field, + * otherwise the hardware will calculate the wrong checksum when + * updating the correction field and update it to the packet. + */ + + data = skb_mac_header(skb); + new_sec_h = htons((sec >> 32) & 0xffff); + new_sec_l = htonl(sec & 0xffffffff); + new_nsec = htonl(nsec); + if (enetc_cb->udp && !csum_offload) { + struct udphdr *uh = udp_hdr(skb); + __be32 old_sec_l, old_nsec; + __be16 old_sec_h; + + old_sec_h = *(__be16 *)(data + tstamp_off); + inet_proto_csum_replace2(&uh->check, skb, old_sec_h, + new_sec_h, false); + + old_sec_l = *(__be32 *)(data + tstamp_off + 2); + inet_proto_csum_replace4(&uh->check, skb, old_sec_l, + new_sec_l, false); + + old_nsec = *(__be32 *)(data + tstamp_off + 6); + inet_proto_csum_replace4(&uh->check, skb, old_nsec, + new_nsec, false); + } + + *(__be16 *)(data + tstamp_off) = new_sec_h; + *(__be32 *)(data + tstamp_off + 2) = new_sec_l; + *(__be32 *)(data + tstamp_off + 6) = new_nsec; + + /* Configure single-step register */ + if (is_enetc_rev1(si)) + enetc_set_one_step_ts(si, enetc_cb->udp, corr_off); + else + enetc4_set_one_step_ts(si, enetc_cb->udp, corr_off); + + return lo & ENETC_TXBD_TSTAMP; +} + static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb) { bool do_vlan, do_onestep_tstamp = false, do_twostep_tstamp = false; struct enetc_ndev_priv *priv = netdev_priv(tx_ring->ndev); - struct enetc_hw *hw = &priv->si->hw; + struct enetc_skb_cb *enetc_cb = ENETC_SKB_CB(skb); struct enetc_tx_swbd *tx_swbd; int len = skb_headlen(skb); union enetc_tx_bd temp_bd; - u8 msgtype, twostep, udp; + bool csum_offload = false; union enetc_tx_bd *txbd; - u16 offset1, offset2; int i, count = 0; skb_frag_t *frag; unsigned int f; dma_addr_t dma; u8 flags = 0; + u32 tstamp; enetc_clear_tx_bd(&temp_bd); if (skb->ip_summed == CHECKSUM_PARTIAL) { @@ -256,11 +345,19 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb) temp_bd.l4_aux = FIELD_PREP(ENETC_TX_BD_L4T, ENETC_TXBD_L4T_UDP); flags |= ENETC_TXBD_FLAGS_CSUM_LSO | ENETC_TXBD_FLAGS_L4CS; + csum_offload = true; } else if (skb_checksum_help(skb)) { return 0; } } + if (enetc_cb->flag & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) { + do_onestep_tstamp = true; + tstamp = enetc_update_ptp_sync_msg(priv, skb, csum_offload); + } else if (enetc_cb->flag & ENETC_F_TX_TSTAMP) { + do_twostep_tstamp = true; + } + i = tx_ring->next_to_use; txbd = ENETC_TXBD(*tx_ring, i); prefetchw(txbd); @@ -280,17 +377,6 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb) count++; do_vlan = skb_vlan_tag_present(skb); - if (skb->cb[0] & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) { - if (enetc_ptp_parse(skb, &udp, &msgtype, &twostep, &offset1, - &offset2) || - msgtype != PTP_MSGTYPE_SYNC || twostep) - WARN_ONCE(1, "Bad packet for one-step timestamping\n"); - else - do_onestep_tstamp = true; - } else if (skb->cb[0] & ENETC_F_TX_TSTAMP) { - do_twostep_tstamp = true; - } - tx_swbd->do_twostep_tstamp = do_twostep_tstamp; tx_swbd->qbv_en = !!(priv->active_offloads & ENETC_F_QBV); tx_swbd->check_wb = tx_swbd->do_twostep_tstamp || tx_swbd->qbv_en; @@ -333,65 +419,9 @@ static int enetc_map_tx_buffs(struct enetc_bdr *tx_ring, struct sk_buff *skb) } if (do_onestep_tstamp) { - __be32 new_sec_l, new_nsec; - u32 lo, hi, nsec, val; - __be16 new_sec_h; - u8 *data; - u64 sec; - - lo = enetc_rd_hot(hw, ENETC_SICTR0); - hi = enetc_rd_hot(hw, ENETC_SICTR1); - sec = (u64)hi << 32 | lo; - nsec = do_div(sec, 1000000000); - /* Configure extension BD */ - temp_bd.ext.tstamp = cpu_to_le32(lo & 0x3fffffff); + temp_bd.ext.tstamp = cpu_to_le32(tstamp); e_flags |= ENETC_TXBD_E_FLAGS_ONE_STEP_PTP; - - /* Update originTimestamp field of Sync packet - * - 48 bits seconds field - * - 32 bits nanseconds field - * - * In addition, the UDP checksum needs to be updated - * by software after updating originTimestamp field, - * otherwise the hardware will calculate the wrong - * checksum when updating the correction field and - * update it to the packet. - */ - data = skb_mac_header(skb); - new_sec_h = htons((sec >> 32) & 0xffff); - new_sec_l = htonl(sec & 0xffffffff); - new_nsec = htonl(nsec); - if (udp) { - struct udphdr *uh = udp_hdr(skb); - __be32 old_sec_l, old_nsec; - __be16 old_sec_h; - - old_sec_h = *(__be16 *)(data + offset2); - inet_proto_csum_replace2(&uh->check, skb, old_sec_h, - new_sec_h, false); - - old_sec_l = *(__be32 *)(data + offset2 + 2); - inet_proto_csum_replace4(&uh->check, skb, old_sec_l, - new_sec_l, false); - - old_nsec = *(__be32 *)(data + offset2 + 6); - inet_proto_csum_replace4(&uh->check, skb, old_nsec, - new_nsec, false); - } - - *(__be16 *)(data + offset2) = new_sec_h; - *(__be32 *)(data + offset2 + 2) = new_sec_l; - *(__be32 *)(data + offset2 + 6) = new_nsec; - - /* Configure single-step register */ - val = ENETC_PM0_SINGLE_STEP_EN; - val |= ENETC_SET_SINGLE_STEP_OFFSET(offset1); - if (udp) - val |= ENETC_PM0_SINGLE_STEP_CH; - - enetc_port_mac_wr(priv->si, ENETC_PM0_SINGLE_STEP, - val); } else if (do_twostep_tstamp) { skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; e_flags |= ENETC_TXBD_E_FLAGS_TWO_STEP_PTP; @@ -938,12 +968,13 @@ err_chained_bd: static netdev_tx_t enetc_start_xmit(struct sk_buff *skb, struct net_device *ndev) { + struct enetc_skb_cb *enetc_cb = ENETC_SKB_CB(skb); struct enetc_ndev_priv *priv = netdev_priv(ndev); struct enetc_bdr *tx_ring; int count; /* Queue one-step Sync packet if already locked */ - if (skb->cb[0] & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) { + if (enetc_cb->flag & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) { if (test_and_set_bit_lock(ENETC_TX_ONESTEP_TSTAMP_IN_PROGRESS, &priv->flags)) { skb_queue_tail(&priv->tx_skbs, skb); @@ -1005,24 +1036,29 @@ drop_packet_err: netdev_tx_t enetc_xmit(struct sk_buff *skb, struct net_device *ndev) { + struct enetc_skb_cb *enetc_cb = ENETC_SKB_CB(skb); struct enetc_ndev_priv *priv = netdev_priv(ndev); u8 udp, msgtype, twostep; u16 offset1, offset2; - /* Mark tx timestamp type on skb->cb[0] if requires */ + /* Mark tx timestamp type on enetc_cb->flag if requires */ if ((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && - (priv->active_offloads & ENETC_F_TX_TSTAMP_MASK)) { - skb->cb[0] = priv->active_offloads & ENETC_F_TX_TSTAMP_MASK; - } else { - skb->cb[0] = 0; - } + (priv->active_offloads & ENETC_F_TX_TSTAMP_MASK)) + enetc_cb->flag = priv->active_offloads & ENETC_F_TX_TSTAMP_MASK; + else + enetc_cb->flag = 0; /* Fall back to two-step timestamp if not one-step Sync packet */ - if (skb->cb[0] & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) { + if (enetc_cb->flag & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) { if (enetc_ptp_parse(skb, &udp, &msgtype, &twostep, &offset1, &offset2) || - msgtype != PTP_MSGTYPE_SYNC || twostep != 0) - skb->cb[0] = ENETC_F_TX_TSTAMP; + msgtype != PTP_MSGTYPE_SYNC || twostep != 0) { + enetc_cb->flag = ENETC_F_TX_TSTAMP; + } else { + enetc_cb->udp = !!udp; + enetc_cb->correction_off = offset1; + enetc_cb->origin_tstamp_off = offset2; + } } return enetc_start_xmit(skb, ndev); @@ -1214,7 +1250,9 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget) if (xdp_frame) { xdp_return_frame(xdp_frame); } else if (skb) { - if (unlikely(skb->cb[0] & ENETC_F_TX_ONESTEP_SYNC_TSTAMP)) { + struct enetc_skb_cb *enetc_cb = ENETC_SKB_CB(skb); + + if (unlikely(enetc_cb->flag & ENETC_F_TX_ONESTEP_SYNC_TSTAMP)) { /* Start work to release lock for next one-step * timestamping packet. And send one skb in * tx_skbs queue if has. @@ -1397,8 +1435,7 @@ static void enetc_get_offloads(struct enetc_bdr *rx_ring, __vlan_hwaccel_put_tag(skb, tpid, le16_to_cpu(rxbd->r.vlan_opt)); } - if (IS_ENABLED(CONFIG_FSL_ENETC_PTP_CLOCK) && - (priv->active_offloads & ENETC_F_RX_TSTAMP)) + if (priv->active_offloads & ENETC_F_RX_TSTAMP) enetc_get_rx_tstamp(rx_ring->ndev, rxbd, skb); } @@ -3301,7 +3338,7 @@ int enetc_hwtstamp_set(struct net_device *ndev, struct enetc_ndev_priv *priv = netdev_priv(ndev); int err, new_offloads = priv->active_offloads; - if (!IS_ENABLED(CONFIG_FSL_ENETC_PTP_CLOCK)) + if (!enetc_ptp_clock_is_enabled(priv->si)) return -EOPNOTSUPP; switch (config->tx_type) { @@ -3351,7 +3388,7 @@ int enetc_hwtstamp_get(struct net_device *ndev, { struct enetc_ndev_priv *priv = netdev_priv(ndev); - if (!IS_ENABLED(CONFIG_FSL_ENETC_PTP_CLOCK)) + if (!enetc_ptp_clock_is_enabled(priv->si)) return -EOPNOTSUPP; if (priv->active_offloads & ENETC_F_TX_ONESTEP_SYNC_TSTAMP) diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h index 62e8ee4d2f04..0ec010a7d640 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc.h +++ b/drivers/net/ethernet/freescale/enetc/enetc.h @@ -54,6 +54,15 @@ struct enetc_tx_swbd { u8 qbv_en:1; }; +struct enetc_skb_cb { + u8 flag; + bool udp; + u16 correction_off; + u16 origin_tstamp_off; +}; + +#define ENETC_SKB_CB(skb) ((struct enetc_skb_cb *)((skb)->cb)) + struct enetc_lso_t { bool ipv6; bool tcp; @@ -217,7 +226,7 @@ static inline union enetc_rx_bd *enetc_rxbd(struct enetc_bdr *rx_ring, int i) { int hw_idx = i; - if (IS_ENABLED(CONFIG_FSL_ENETC_PTP_CLOCK) && rx_ring->ext_en) + if (rx_ring->ext_en) hw_idx = 2 * i; return &(((union enetc_rx_bd *)rx_ring->bd_base)[hw_idx]); @@ -231,7 +240,7 @@ static inline void enetc_rxbd_next(struct enetc_bdr *rx_ring, new_rxbd++; - if (IS_ENABLED(CONFIG_FSL_ENETC_PTP_CLOCK) && rx_ring->ext_en) + if (rx_ring->ext_en) new_rxbd++; if (unlikely(++new_index == rx_ring->bd_count)) { @@ -484,9 +493,6 @@ struct enetc_msg_cmd_set_primary_mac { #define ENETC_CBDR_TIMEOUT 1000 /* usecs */ -/* PTP driver exports */ -extern int enetc_phc_index; - /* SI common */ u32 enetc_port_mac_rd(struct enetc_si *si, u32 reg); void enetc_port_mac_wr(struct enetc_si *si, u32 reg, u32 val); @@ -589,6 +595,14 @@ static inline void enetc_cbd_free_data_mem(struct enetc_si *si, int size, void enetc_reset_ptcmsdur(struct enetc_hw *hw); void enetc_set_ptcmsdur(struct enetc_hw *hw, u32 *queue_max_sdu); +static inline bool enetc_ptp_clock_is_enabled(struct enetc_si *si) +{ + if (is_enetc_rev1(si)) + return IS_ENABLED(CONFIG_FSL_ENETC_PTP_CLOCK); + + return IS_ENABLED(CONFIG_PTP_NETC_V4_TIMER); +} + #ifdef CONFIG_FSL_ENETC_QOS int enetc_qos_query_caps(struct net_device *ndev, void *type_data); int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data); diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h index aa25b445d301..19bf0e89cdc2 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc4_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc4_hw.h @@ -171,6 +171,12 @@ /* Port MAC 0/1 Pause Quanta Threshold Register */ #define ENETC4_PM_PAUSE_THRESH(mac) (0x5064 + (mac) * 0x400) +#define ENETC4_PM_SINGLE_STEP(mac) (0x50c0 + (mac) * 0x400) +#define PM_SINGLE_STEP_CH BIT(6) +#define PM_SINGLE_STEP_OFFSET GENMASK(15, 7) +#define PM_SINGLE_STEP_OFFSET_SET(o) FIELD_PREP(PM_SINGLE_STEP_OFFSET, o) +#define PM_SINGLE_STEP_EN BIT(31) + /* Port MAC 0 Interface Mode Control Register */ #define ENETC4_PM_IF_MODE(mac) (0x5300 + (mac) * 0x400) #define PM_IF_MODE_IFMODE GENMASK(2, 0) diff --git a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c index b3dc1afeefd1..82c443b28b15 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc4_pf.c +++ b/drivers/net/ethernet/freescale/enetc/enetc4_pf.c @@ -569,6 +569,9 @@ static const struct net_device_ops enetc4_ndev_ops = { .ndo_set_features = enetc4_pf_set_features, .ndo_vlan_rx_add_vid = enetc_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = enetc_vlan_rx_del_vid, + .ndo_eth_ioctl = enetc_ioctl, + .ndo_hwtstamp_get = enetc_hwtstamp_get, + .ndo_hwtstamp_set = enetc_hwtstamp_set, }; static struct phylink_pcs * @@ -1016,8 +1019,7 @@ static int enetc4_pf_probe(struct pci_dev *pdev, err = devm_add_action_or_reset(dev, enetc4_pci_remove, pdev); if (err) - return dev_err_probe(dev, err, - "Add enetc4_pci_remove() action failed\n"); + return err; /* si is the private data. */ si = pci_get_drvdata(pdev); @@ -1030,7 +1032,7 @@ static int enetc4_pf_probe(struct pci_dev *pdev, err = enetc_get_driver_data(si); if (err) return dev_err_probe(dev, err, - "Could not get VF driver data\n"); + "Could not get PF driver data\n"); err = enetc4_pf_struct_init(si); if (err) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index 961e76cd8489..71d052de669a 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -4,6 +4,9 @@ #include <linux/ethtool_netlink.h> #include <linux/net_tstamp.h> #include <linux/module.h> +#include <linux/of.h> +#include <linux/ptp_clock_kernel.h> + #include "enetc.h" static const u32 enetc_si_regs[] = { @@ -877,23 +880,58 @@ static int enetc_set_coalesce(struct net_device *ndev, return 0; } -static int enetc_get_ts_info(struct net_device *ndev, - struct kernel_ethtool_ts_info *info) +static int enetc_get_phc_index_by_pdev(struct enetc_si *si) { - struct enetc_ndev_priv *priv = netdev_priv(ndev); - int *phc_idx; - - phc_idx = symbol_get(enetc_phc_index); - if (phc_idx) { - info->phc_index = *phc_idx; - symbol_put(enetc_phc_index); + struct pci_bus *bus = si->pdev->bus; + struct pci_dev *timer_pdev; + unsigned int devfn; + int phc_index; + + switch (si->revision) { + case ENETC_REV_1_0: + devfn = PCI_DEVFN(0, 4); + break; + case ENETC_REV_4_1: + devfn = PCI_DEVFN(24, 0); + break; + default: + return -1; } - if (!IS_ENABLED(CONFIG_FSL_ENETC_PTP_CLOCK)) { - info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE; + timer_pdev = pci_get_domain_bus_and_slot(pci_domain_nr(bus), + bus->number, devfn); + if (!timer_pdev) + return -1; - return 0; - } + phc_index = ptp_clock_index_by_dev(&timer_pdev->dev); + pci_dev_put(timer_pdev); + + return phc_index; +} + +static int enetc_get_phc_index(struct enetc_si *si) +{ + struct device_node *np = si->pdev->dev.of_node; + struct device_node *timer_np; + int phc_index; + + if (!np) + return enetc_get_phc_index_by_pdev(si); + + timer_np = of_parse_phandle(np, "ptp-timer", 0); + if (!timer_np) + return enetc_get_phc_index_by_pdev(si); + + phc_index = ptp_clock_index_by_of_node(timer_np); + of_node_put(timer_np); + + return phc_index; +} + +static void enetc_get_ts_generic_info(struct net_device *ndev, + struct kernel_ethtool_ts_info *info) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | SOF_TIMESTAMPING_RX_HARDWARE | @@ -908,6 +946,27 @@ static int enetc_get_ts_info(struct net_device *ndev, info->rx_filters = (1 << HWTSTAMP_FILTER_NONE) | (1 << HWTSTAMP_FILTER_ALL); +} + +static int enetc_get_ts_info(struct net_device *ndev, + struct kernel_ethtool_ts_info *info) +{ + struct enetc_ndev_priv *priv = netdev_priv(ndev); + struct enetc_si *si = priv->si; + + if (!enetc_ptp_clock_is_enabled(si)) + goto timestamp_tx_sw; + + info->phc_index = enetc_get_phc_index(si); + if (info->phc_index < 0) + goto timestamp_tx_sw; + + enetc_get_ts_generic_info(ndev, info); + + return 0; + +timestamp_tx_sw: + info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE; return 0; } @@ -1296,6 +1355,7 @@ const struct ethtool_ops enetc4_pf_ethtool_ops = { .get_rxfh = enetc_get_rxfh, .set_rxfh = enetc_set_rxfh, .get_rxfh_fields = enetc_get_rxfh_fields, + .get_ts_info = enetc_get_ts_info, }; void enetc_set_ethtool_ops(struct net_device *ndev) diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index 73763e8f4879..377c96325814 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -614,6 +614,7 @@ enum enetc_txbd_flags { #define ENETC_TXBD_STATS_WIN BIT(7) #define ENETC_TXBD_TXSTART_MASK GENMASK(24, 0) #define ENETC_TXBD_FLAGS_OFFSET 24 +#define ENETC_TXBD_TSTAMP GENMASK(29, 0) static inline __le32 enetc_txbd_set_tx_start(u64 tx_start, u8 flags) { diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ptp.c b/drivers/net/ethernet/freescale/enetc/enetc_ptp.c index 5243fc031058..b8413d3b4f16 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ptp.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ptp.c @@ -7,9 +7,6 @@ #include "enetc.h" -int enetc_phc_index = -1; -EXPORT_SYMBOL_GPL(enetc_phc_index); - static struct ptp_clock_info enetc_ptp_caps = { .owner = THIS_MODULE, .name = "ENETC PTP clock", @@ -92,7 +89,6 @@ static int enetc_ptp_probe(struct pci_dev *pdev, if (err) goto err_no_clock; - enetc_phc_index = ptp_qoriq->phc_index; pci_set_drvdata(pdev, ptp_qoriq); return 0; @@ -118,7 +114,6 @@ static void enetc_ptp_remove(struct pci_dev *pdev) { struct ptp_qoriq *ptp_qoriq = pci_get_drvdata(pdev); - enetc_phc_index = -1; ptp_qoriq_free(ptp_qoriq); pci_free_irq_vectors(pdev); kfree(ptp_qoriq); diff --git a/drivers/net/ethernet/freescale/enetc/ntmp.c b/drivers/net/ethernet/freescale/enetc/ntmp.c index ba32c1bbd9e1..0c1d343253bf 100644 --- a/drivers/net/ethernet/freescale/enetc/ntmp.c +++ b/drivers/net/ethernet/freescale/enetc/ntmp.c @@ -52,24 +52,19 @@ int ntmp_init_cbdr(struct netc_cbdr *cbdr, struct device *dev, cbdr->addr_base_align = PTR_ALIGN(cbdr->addr_base, NTMP_BASE_ADDR_ALIGN); - cbdr->next_to_clean = 0; - cbdr->next_to_use = 0; spin_lock_init(&cbdr->ring_lock); + cbdr->next_to_use = netc_read(cbdr->regs.pir); + cbdr->next_to_clean = netc_read(cbdr->regs.cir); + /* Step 1: Configure the base address of the Control BD Ring */ netc_write(cbdr->regs.bar0, lower_32_bits(cbdr->dma_base_align)); netc_write(cbdr->regs.bar1, upper_32_bits(cbdr->dma_base_align)); - /* Step 2: Configure the producer index register */ - netc_write(cbdr->regs.pir, cbdr->next_to_clean); - - /* Step 3: Configure the consumer index register */ - netc_write(cbdr->regs.cir, cbdr->next_to_use); - - /* Step4: Configure the number of BDs of the Control BD Ring */ + /* Step 2: Configure the number of BDs of the Control BD Ring */ netc_write(cbdr->regs.lenr, cbdr->bd_num); - /* Step 5: Enable the Control BD Ring */ + /* Step 3: Enable the Control BD Ring */ netc_write(cbdr->regs.mr, NETC_CBDR_MR_EN); return 0; diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h index 5c8fdcef759b..41e0d85d15da 100644 --- a/drivers/net/ethernet/freescale/fec.h +++ b/drivers/net/ethernet/freescale/fec.h @@ -348,10 +348,11 @@ struct bufdesc_ex { * the skbuffer directly. */ +#define FEC_DRV_RESERVE_SPACE (XDP_PACKET_HEADROOM + \ + SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) #define FEC_ENET_XDP_HEADROOM (XDP_PACKET_HEADROOM) #define FEC_ENET_RX_PAGES 256 -#define FEC_ENET_RX_FRSIZE (PAGE_SIZE - FEC_ENET_XDP_HEADROOM \ - - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) +#define FEC_ENET_RX_FRSIZE (PAGE_SIZE - FEC_DRV_RESERVE_SPACE) #define FEC_ENET_RX_FRPPG (PAGE_SIZE / FEC_ENET_RX_FRSIZE) #define RX_RING_SIZE (FEC_ENET_RX_FRPPG * FEC_ENET_RX_PAGES) #define FEC_ENET_TX_FRSIZE 2048 @@ -513,6 +514,9 @@ struct bufdesc_ex { */ #define FEC_QUIRK_HAS_MDIO_C45 BIT(24) +/* Jumbo Frame support */ +#define FEC_QUIRK_JUMBO_FRAME BIT(25) + struct bufdesc_prop { int qid; /* Address of Rx and Tx buffers */ @@ -619,6 +623,9 @@ struct fec_enet_private { unsigned int total_tx_ring_size; unsigned int total_rx_ring_size; + unsigned int max_buf_size; + unsigned int pagepool_order; + unsigned int rx_frame_size; struct platform_device *pdev; diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 1383918f8a3f..1edcfaee6819 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -167,7 +167,8 @@ static const struct fec_devinfo fec_imx8qm_info = { FEC_QUIRK_ERR007885 | FEC_QUIRK_BUG_CAPTURE | FEC_QUIRK_HAS_RACC | FEC_QUIRK_HAS_COALESCE | FEC_QUIRK_CLEAR_SETUP_MII | FEC_QUIRK_HAS_MULTI_QUEUES | - FEC_QUIRK_DELAYED_CLKS_SUPPORT | FEC_QUIRK_HAS_MDIO_C45, + FEC_QUIRK_DELAYED_CLKS_SUPPORT | FEC_QUIRK_HAS_MDIO_C45 | + FEC_QUIRK_JUMBO_FRAME, }; static const struct fec_devinfo fec_s32v234_info = { @@ -233,6 +234,7 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address"); * 2048 byte skbufs are allocated. However, alignment requirements * varies between FEC variants. Worst case is 64, so round down by 64. */ +#define MAX_JUMBO_BUF_SIZE (round_down(16384 - FEC_DRV_RESERVE_SPACE - 64, 64)) #define PKT_MAXBUF_SIZE (round_down(2048 - 64, 64)) #define PKT_MINBUF_SIZE 64 @@ -253,9 +255,9 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address"); #if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \ defined(CONFIG_M520x) || defined(CONFIG_M532x) || defined(CONFIG_ARM) || \ defined(CONFIG_ARM64) -#define OPT_FRAME_SIZE (PKT_MAXBUF_SIZE << 16) +#define OPT_ARCH_HAS_MAX_FL 1 #else -#define OPT_FRAME_SIZE 0 +#define OPT_ARCH_HAS_MAX_FL 0 #endif /* FEC MII MMFR bits definition */ @@ -470,14 +472,14 @@ fec_enet_create_page_pool(struct fec_enet_private *fep, { struct bpf_prog *xdp_prog = READ_ONCE(fep->xdp_prog); struct page_pool_params pp_params = { - .order = 0, + .order = fep->pagepool_order, .flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV, .pool_size = size, .nid = dev_to_node(&fep->pdev->dev), .dev = &fep->pdev->dev, .dma_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE, .offset = FEC_ENET_XDP_HEADROOM, - .max_len = FEC_ENET_RX_FRSIZE, + .max_len = fep->rx_frame_size, }; int err; @@ -1083,7 +1085,7 @@ static void fec_enet_enable_ring(struct net_device *ndev) for (i = 0; i < fep->num_rx_queues; i++) { rxq = fep->rx_queue[i]; writel(rxq->bd.dma, fep->hwp + FEC_R_DES_START(i)); - writel(PKT_MAXBUF_SIZE, fep->hwp + FEC_R_BUFF_SIZE(i)); + writel(fep->max_buf_size, fep->hwp + FEC_R_BUFF_SIZE(i)); /* enable DMA1/2 */ if (i) @@ -1145,8 +1147,11 @@ static void fec_restart(struct net_device *ndev) { struct fec_enet_private *fep = netdev_priv(ndev); - u32 rcntl = OPT_FRAME_SIZE | FEC_RCR_MII; u32 ecntl = FEC_ECR_ETHEREN; + u32 rcntl = FEC_RCR_MII; + + if (OPT_ARCH_HAS_MAX_FL) + rcntl |= (fep->netdev->mtu + ETH_HLEN + ETH_FCS_LEN) << 16; if (fep->bufdesc_ex) fec_ptp_save_state(fep); @@ -1191,7 +1196,7 @@ fec_restart(struct net_device *ndev) else val &= ~FEC_RACC_OPTIONS; writel(val, fep->hwp + FEC_RACC); - writel(PKT_MAXBUF_SIZE, fep->hwp + FEC_FTRL); + writel(min(fep->rx_frame_size, fep->max_buf_size), fep->hwp + FEC_FTRL); } #endif @@ -1278,8 +1283,18 @@ fec_restart(struct net_device *ndev) if (fep->quirks & FEC_QUIRK_ENET_MAC) { /* enable ENET endian swap */ ecntl |= FEC_ECR_BYTESWP; - /* enable ENET store and forward mode */ - writel(FEC_TXWMRK_STRFWD, fep->hwp + FEC_X_WMRK); + + /* When Jumbo Frame is enabled, the FIFO may not be large enough + * to hold an entire frame. In such cases, if the MTU exceeds + * (PKT_MAXBUF_SIZE - ETH_HLEN - ETH_FCS_LEN), configure the interface + * to operate in cut-through mode, triggered by the FIFO threshold. + * Otherwise, enable the ENET store-and-forward mode. + */ + if ((fep->quirks & FEC_QUIRK_JUMBO_FRAME) && + (ndev->mtu > (PKT_MAXBUF_SIZE - ETH_HLEN - ETH_FCS_LEN))) + writel(0xF, fep->hwp + FEC_X_WMRK); + else + writel(FEC_TXWMRK_STRFWD, fep->hwp + FEC_X_WMRK); } if (fep->bufdesc_ex) @@ -1780,7 +1795,7 @@ fec_enet_rx_queue(struct net_device *ndev, u16 queue_id, int budget) * These get messed up if we get called due to a busy condition. */ bdp = rxq->bd.cur; - xdp_init_buff(&xdp, PAGE_SIZE, &rxq->xdp_rxq); + xdp_init_buff(&xdp, PAGE_SIZE << fep->pagepool_order, &rxq->xdp_rxq); while (!((status = fec16_to_cpu(bdp->cbd_sc)) & BD_ENET_RX_EMPTY)) { @@ -1850,7 +1865,8 @@ fec_enet_rx_queue(struct net_device *ndev, u16 queue_id, int budget) * include that when passing upstream as it messes up * bridging applications. */ - skb = build_skb(page_address(page), PAGE_SIZE); + skb = build_skb(page_address(page), + PAGE_SIZE << fep->pagepool_order); if (unlikely(!skb)) { page_pool_recycle_direct(rxq->page_pool, page); ndev->stats.rx_dropped++; @@ -2363,7 +2379,8 @@ static void fec_enet_phy_reset_after_clk_enable(struct net_device *ndev) */ phy_dev = of_phy_find_device(fep->phy_node); phy_reset_after_clk_enable(phy_dev); - put_device(&phy_dev->mdio.dev); + if (phy_dev) + put_device(&phy_dev->mdio.dev); } } @@ -4020,6 +4037,23 @@ static int fec_hwtstamp_set(struct net_device *ndev, return fec_ptp_set(ndev, config, extack); } +static int fec_change_mtu(struct net_device *ndev, int new_mtu) +{ + struct fec_enet_private *fep = netdev_priv(ndev); + int order; + + if (netif_running(ndev)) + return -EBUSY; + + order = get_order(new_mtu + ETH_HLEN + ETH_FCS_LEN + + FEC_DRV_RESERVE_SPACE); + fep->rx_frame_size = (PAGE_SIZE << order) - FEC_DRV_RESERVE_SPACE; + fep->pagepool_order = order; + WRITE_ONCE(ndev->mtu, new_mtu); + + return 0; +} + static const struct net_device_ops fec_netdev_ops = { .ndo_open = fec_enet_open, .ndo_stop = fec_enet_close, @@ -4029,6 +4063,7 @@ static const struct net_device_ops fec_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_tx_timeout = fec_timeout, .ndo_set_mac_address = fec_set_mac_address, + .ndo_change_mtu = fec_change_mtu, .ndo_eth_ioctl = phy_do_ioctl_running, .ndo_set_features = fec_set_features, .ndo_bpf = fec_enet_bpf, @@ -4559,7 +4594,15 @@ fec_probe(struct platform_device *pdev) fec_enet_clk_enable(ndev, false); pinctrl_pm_select_sleep_state(&pdev->dev); - ndev->max_mtu = PKT_MAXBUF_SIZE - ETH_HLEN - ETH_FCS_LEN; + fep->pagepool_order = 0; + fep->rx_frame_size = FEC_ENET_RX_FRSIZE; + + if (fep->quirks & FEC_QUIRK_JUMBO_FRAME) + fep->max_buf_size = MAX_JUMBO_BUF_SIZE; + else + fep->max_buf_size = PKT_MAXBUF_SIZE; + + ndev->max_mtu = fep->max_buf_size - ETH_HLEN - ETH_FCS_LEN; ret = register_netdev(ndev); if (ret) diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c index a39fcea6a77a..f27ff625fe29 100644 --- a/drivers/net/ethernet/freescale/fman/mac.c +++ b/drivers/net/ethernet/freescale/fman/mac.c @@ -14,8 +14,6 @@ #include <linux/device.h> #include <linux/phy.h> #include <linux/netdevice.h> -#include <linux/phy_fixed.h> -#include <linux/phylink.h> #include <linux/etherdevice.h> #include <linux/libfdt_env.h> #include <linux/platform_device.h> diff --git a/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c b/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c index ba83dbf4ed22..1966dba512f8 100644 --- a/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c +++ b/drivers/net/ethernet/fungible/funeth/funeth_ethtool.c @@ -930,7 +930,8 @@ static void fun_get_rmon_stats(struct net_device *netdev, } static void fun_get_fec_stats(struct net_device *netdev, - struct ethtool_fec_stats *stats) + struct ethtool_fec_stats *stats, + struct ethtool_fec_hist *hist) { const struct funeth_priv *fp = netdev_priv(netdev); diff --git a/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c index 8f5021e59e0a..0e2b703c673a 100644 --- a/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_buffer_mgmt_dqo.c @@ -260,6 +260,11 @@ struct page_pool *gve_rx_create_page_pool(struct gve_priv *priv, .offset = xdp ? XDP_PACKET_HEADROOM : 0, }; + if (priv->header_split_enabled) { + pp.flags |= PP_FLAG_ALLOW_UNREADABLE_NETMEM; + pp.queue_idx = rx->q_num; + } + return page_pool_create(&pp); } diff --git a/drivers/net/ethernet/google/gve/gve_rx_dqo.c b/drivers/net/ethernet/google/gve/gve_rx_dqo.c index 7380c2b7a2d8..55393b784317 100644 --- a/drivers/net/ethernet/google/gve/gve_rx_dqo.c +++ b/drivers/net/ethernet/google/gve/gve_rx_dqo.c @@ -718,6 +718,24 @@ static int gve_rx_xsk_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, return 0; } +static void gve_dma_sync(struct gve_priv *priv, struct gve_rx_ring *rx, + struct gve_rx_buf_state_dqo *buf_state, u16 buf_len) +{ + struct gve_rx_slot_page_info *page_info = &buf_state->page_info; + + if (rx->dqo.page_pool) { + page_pool_dma_sync_netmem_for_cpu(rx->dqo.page_pool, + page_info->netmem, + page_info->page_offset, + buf_len); + } else { + dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr, + page_info->page_offset + + page_info->pad, + buf_len, DMA_FROM_DEVICE); + } +} + /* Returns 0 if descriptor is completed successfully. * Returns -EINVAL if descriptor is invalid. * Returns -ENOMEM if data cannot be copied to skb. @@ -793,13 +811,18 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, rx->rx_hsplit_unsplit_pkt += unsplit; rx->rx_hsplit_bytes += hdr_len; u64_stats_update_end(&rx->statss); + } else if (!rx->ctx.skb_head && rx->dqo.page_pool && + netmem_is_net_iov(buf_state->page_info.netmem)) { + /* when header split is disabled, the header went to the packet + * buffer. If the packet buffer is a net_iov, those can't be + * easily mapped into the kernel space to access the header + * required to process the packet. + */ + goto error; } /* Sync the portion of dma buffer for CPU to read. */ - dma_sync_single_range_for_cpu(&priv->pdev->dev, buf_state->addr, - buf_state->page_info.page_offset + - buf_state->page_info.pad, - buf_len, DMA_FROM_DEVICE); + gve_dma_sync(priv, rx, buf_state, buf_len); /* Append to current skb if one exists. */ if (rx->ctx.skb_head) { @@ -837,7 +860,9 @@ static int gve_rx_dqo(struct napi_struct *napi, struct gve_rx_ring *rx, u64_stats_update_end(&rx->statss); } - if (eop && buf_len <= priv->rx_copybreak) { + if (eop && buf_len <= priv->rx_copybreak && + !(rx->dqo.page_pool && + netmem_is_net_iov(buf_state->page_info.netmem))) { rx->ctx.skb_head = gve_rx_copy(priv->dev, napi, &buf_state->page_info, buf_len); if (unlikely(!rx->ctx.skb_head)) diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c index 2e64dc1ab355..0b92a2e5e986 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_main.c @@ -417,7 +417,7 @@ static int hbg_pci_init(struct pci_dev *pdev) priv->io_base = pcim_iomap_table(pdev)[0]; if (!priv->io_base) - return dev_err_probe(dev, -ENOMEM, "failed to get io base\n"); + return -ENOMEM; pci_set_master(pdev); return 0; diff --git a/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c b/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c index 8b7b476ed7fb..37791de47f6f 100644 --- a/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c +++ b/drivers/net/ethernet/hisilicon/hibmcge/hbg_mdio.c @@ -278,8 +278,7 @@ int hbg_mdio_init(struct hbg_priv *priv) mdio_bus = devm_mdiobus_alloc(dev); if (!mdio_bus) - return dev_err_probe(dev, -ENOMEM, - "failed to alloc MDIO bus\n"); + return -ENOMEM; mdio_bus->parent = dev; mdio_bus->priv = priv; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c index 0255c8acb744..4cce4f4ba6b0 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c @@ -843,7 +843,7 @@ static int hns3_dbg_bd_file_init(struct hnae3_handle *handle, u32 cmd) entry_dir = hns3_dbg_dentry[hns3_dbg_cmd[cmd].dentry].dentry; max_queue_num = hns3_get_max_available_channels(handle); - data = devm_kzalloc(&handle->pdev->dev, max_queue_num * sizeof(*data), + data = devm_kcalloc(&handle->pdev->dev, max_queue_num, sizeof(*data), GFP_KERNEL); if (!data) return -ENOMEM; diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c index d5454e126c85..a5eefa28454c 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c @@ -1659,7 +1659,8 @@ static void hns3_set_msglevel(struct net_device *netdev, u32 msg_level) } static void hns3_get_fec_stats(struct net_device *netdev, - struct ethtool_fec_stats *fec_stats) + struct ethtool_fec_stats *fec_stats, + struct ethtool_fec_hist *hist) { struct hnae3_handle *handle = hns3_get_handle(netdev); struct hnae3_ae_dev *ae_dev = hns3_get_ae_dev(handle); @@ -1927,6 +1928,31 @@ static int hns3_set_tx_spare_buf_size(struct net_device *netdev, return ret; } +static int hns3_check_tx_copybreak(struct net_device *netdev, u32 copybreak) +{ + struct hns3_nic_priv *priv = netdev_priv(netdev); + + if (copybreak < priv->min_tx_copybreak) { + netdev_err(netdev, "tx copybreak %u should be no less than %u!\n", + copybreak, priv->min_tx_copybreak); + return -EINVAL; + } + return 0; +} + +static int hns3_check_tx_spare_buf_size(struct net_device *netdev, u32 buf_size) +{ + struct hns3_nic_priv *priv = netdev_priv(netdev); + + if (buf_size < priv->min_tx_spare_buf_size) { + netdev_err(netdev, + "tx spare buf size %u should be no less than %u!\n", + buf_size, priv->min_tx_spare_buf_size); + return -EINVAL; + } + return 0; +} + static int hns3_set_tunable(struct net_device *netdev, const struct ethtool_tunable *tuna, const void *data) @@ -1943,6 +1969,10 @@ static int hns3_set_tunable(struct net_device *netdev, switch (tuna->id) { case ETHTOOL_TX_COPYBREAK: + ret = hns3_check_tx_copybreak(netdev, *(u32 *)data); + if (ret) + return ret; + priv->tx_copybreak = *(u32 *)data; for (i = 0; i < h->kinfo.num_tqps; i++) @@ -1957,6 +1987,10 @@ static int hns3_set_tunable(struct net_device *netdev, break; case ETHTOOL_TX_COPYBREAK_BUF_SIZE: + ret = hns3_check_tx_spare_buf_size(netdev, *(u32 *)data); + if (ret) + return ret; + old_tx_spare_buf_size = h->kinfo.tx_spare_buf_size; new_tx_spare_buf_size = *(u32 *)data; netdev_info(netdev, "request to set tx spare buf size from %u to %u\n", diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c index f209a05e2033..9d34d28ff168 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c @@ -2182,8 +2182,8 @@ static bool hclge_drop_pfc_buf_till_fit(struct hclge_dev *hdev, return hclge_is_rx_buf_ok(hdev, buf_alloc, rx_all); } -static int hclge_only_alloc_priv_buff(struct hclge_dev *hdev, - struct hclge_pkt_buf_alloc *buf_alloc) +static bool hclge_only_alloc_priv_buff(struct hclge_dev *hdev, + struct hclge_pkt_buf_alloc *buf_alloc) { #define COMPENSATE_BUFFER 0x3C00 #define COMPENSATE_HALF_MPS_NUM 5 @@ -12912,7 +12912,8 @@ static int __init hclge_init(void) { pr_debug("%s is initializing\n", HCLGE_NAME); - hclge_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, HCLGE_NAME); + hclge_wq = alloc_workqueue("%s", WQ_UNBOUND, 0, + HCLGE_NAME); if (!hclge_wq) { pr_err("%s: failed to create workqueue\n", HCLGE_NAME); return -ENOMEM; diff --git a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c index 03e42512a2d5..300bc267a259 100644 --- a/drivers/net/ethernet/huawei/hinic/hinic_devlink.c +++ b/drivers/net/ethernet/huawei/hinic/hinic_devlink.c @@ -443,8 +443,9 @@ int hinic_health_reporters_create(struct hinic_devlink_priv *priv) struct devlink *devlink = priv_to_devlink(priv); priv->hw_fault_reporter = - devlink_health_reporter_create(devlink, &hinic_hw_fault_reporter_ops, - 0, priv); + devlink_health_reporter_create(devlink, + &hinic_hw_fault_reporter_ops, + priv); if (IS_ERR(priv->hw_fault_reporter)) { dev_warn(&priv->hwdev->hwif->pdev->dev, "Failed to create hw fault reporter, err: %ld\n", PTR_ERR(priv->hw_fault_reporter)); @@ -452,8 +453,9 @@ int hinic_health_reporters_create(struct hinic_devlink_priv *priv) } priv->fw_fault_reporter = - devlink_health_reporter_create(devlink, &hinic_fw_fault_reporter_ops, - 0, priv); + devlink_health_reporter_create(devlink, + &hinic_fw_fault_reporter_ops, + priv); if (IS_ERR(priv->fw_fault_reporter)) { dev_warn(&priv->hwdev->hwif->pdev->dev, "Failed to create fw fault reporter, err: %ld\n", PTR_ERR(priv->fw_fault_reporter)); diff --git a/drivers/net/ethernet/huawei/hinic3/Makefile b/drivers/net/ethernet/huawei/hinic3/Makefile index 509dfbfb0e96..c3efa45a6a42 100644 --- a/drivers/net/ethernet/huawei/hinic3/Makefile +++ b/drivers/net/ethernet/huawei/hinic3/Makefile @@ -3,7 +3,9 @@ obj-$(CONFIG_HINIC3) += hinic3.o -hinic3-objs := hinic3_common.o \ +hinic3-objs := hinic3_cmdq.o \ + hinic3_common.o \ + hinic3_eqs.o \ hinic3_hw_cfg.o \ hinic3_hw_comm.o \ hinic3_hwdev.o \ @@ -12,10 +14,12 @@ hinic3-objs := hinic3_common.o \ hinic3_lld.o \ hinic3_main.o \ hinic3_mbox.o \ + hinic3_mgmt.o \ hinic3_netdev_ops.o \ hinic3_nic_cfg.o \ hinic3_nic_io.o \ hinic3_queue_common.o \ + hinic3_rss.o \ hinic3_rx.o \ hinic3_tx.o \ hinic3_wq.o diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.c b/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.c new file mode 100644 index 000000000000..ef539d1b69a3 --- /dev/null +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.c @@ -0,0 +1,915 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + +#include <linux/bitfield.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> + +#include "hinic3_cmdq.h" +#include "hinic3_hwdev.h" +#include "hinic3_hwif.h" +#include "hinic3_mbox.h" + +#define CMDQ_BUF_SIZE 2048 +#define CMDQ_WQEBB_SIZE 64 + +#define CMDQ_CMD_TIMEOUT 5000 +#define CMDQ_ENABLE_WAIT_TIMEOUT 300 + +#define CMDQ_CTXT_CURR_WQE_PAGE_PFN_MASK GENMASK_ULL(51, 0) +#define CMDQ_CTXT_EQ_ID_MASK GENMASK_ULL(60, 53) +#define CMDQ_CTXT_CEQ_ARM_MASK BIT_ULL(61) +#define CMDQ_CTXT_CEQ_EN_MASK BIT_ULL(62) +#define CMDQ_CTXT_HW_BUSY_BIT_MASK BIT_ULL(63) + +#define CMDQ_CTXT_WQ_BLOCK_PFN_MASK GENMASK_ULL(51, 0) +#define CMDQ_CTXT_CI_MASK GENMASK_ULL(63, 52) +#define CMDQ_CTXT_SET(val, member) \ + FIELD_PREP(CMDQ_CTXT_##member##_MASK, val) + +#define CMDQ_WQE_HDR_BUFDESC_LEN_MASK GENMASK(7, 0) +#define CMDQ_WQE_HDR_COMPLETE_FMT_MASK BIT(15) +#define CMDQ_WQE_HDR_DATA_FMT_MASK BIT(22) +#define CMDQ_WQE_HDR_COMPLETE_REQ_MASK BIT(23) +#define CMDQ_WQE_HDR_COMPLETE_SECT_LEN_MASK GENMASK(28, 27) +#define CMDQ_WQE_HDR_CTRL_LEN_MASK GENMASK(30, 29) +#define CMDQ_WQE_HDR_HW_BUSY_BIT_MASK BIT(31) +#define CMDQ_WQE_HDR_SET(val, member) \ + FIELD_PREP(CMDQ_WQE_HDR_##member##_MASK, val) +#define CMDQ_WQE_HDR_GET(val, member) \ + FIELD_GET(CMDQ_WQE_HDR_##member##_MASK, le32_to_cpu(val)) + +#define CMDQ_CTRL_PI_MASK GENMASK(15, 0) +#define CMDQ_CTRL_CMD_MASK GENMASK(23, 16) +#define CMDQ_CTRL_MOD_MASK GENMASK(28, 24) +#define CMDQ_CTRL_HW_BUSY_BIT_MASK BIT(31) +#define CMDQ_CTRL_SET(val, member) \ + FIELD_PREP(CMDQ_CTRL_##member##_MASK, val) +#define CMDQ_CTRL_GET(val, member) \ + FIELD_GET(CMDQ_CTRL_##member##_MASK, val) + +#define CMDQ_WQE_ERRCODE_VAL_MASK GENMASK(30, 0) +#define CMDQ_WQE_ERRCODE_GET(val, member) \ + FIELD_GET(CMDQ_WQE_ERRCODE_##member##_MASK, le32_to_cpu(val)) + +#define CMDQ_DB_INFO_HI_PROD_IDX_MASK GENMASK(7, 0) +#define CMDQ_DB_INFO_SET(val, member) \ + FIELD_PREP(CMDQ_DB_INFO_##member##_MASK, val) + +#define CMDQ_DB_HEAD_QUEUE_TYPE_MASK BIT(23) +#define CMDQ_DB_HEAD_CMDQ_TYPE_MASK GENMASK(26, 24) +#define CMDQ_DB_HEAD_SET(val, member) \ + FIELD_PREP(CMDQ_DB_HEAD_##member##_MASK, val) + +#define CMDQ_CEQE_TYPE_MASK GENMASK(2, 0) +#define CMDQ_CEQE_GET(val, member) \ + FIELD_GET(CMDQ_CEQE_##member##_MASK, le32_to_cpu(val)) + +#define CMDQ_WQE_HEADER(wqe) ((struct cmdq_header *)(wqe)) +#define CMDQ_WQE_COMPLETED(ctrl_info) \ + CMDQ_CTRL_GET(le32_to_cpu(ctrl_info), HW_BUSY_BIT) + +#define CMDQ_PFN(addr) ((addr) >> 12) + +/* cmdq work queue's chip logical address table is up to 512B */ +#define CMDQ_WQ_CLA_SIZE 512 + +/* Completion codes: send, direct sync, force stop */ +#define CMDQ_SEND_CMPT_CODE 10 +#define CMDQ_DIRECT_SYNC_CMPT_CODE 11 +#define CMDQ_FORCE_STOP_CMPT_CODE 12 + +enum cmdq_data_format { + CMDQ_DATA_SGE = 0, + CMDQ_DATA_DIRECT = 1, +}; + +enum cmdq_ctrl_sect_len { + CMDQ_CTRL_SECT_LEN = 1, + CMDQ_CTRL_DIRECT_SECT_LEN = 2, +}; + +enum cmdq_bufdesc_len { + CMDQ_BUFDESC_LCMD_LEN = 2, + CMDQ_BUFDESC_SCMD_LEN = 3, +}; + +enum cmdq_completion_format { + CMDQ_COMPLETE_DIRECT = 0, + CMDQ_COMPLETE_SGE = 1, +}; + +enum cmdq_cmd_type { + CMDQ_CMD_DIRECT_RESP, + CMDQ_CMD_SGE_RESP, +}; + +#define CMDQ_WQE_NUM_WQEBBS 1 + +static struct cmdq_wqe *cmdq_read_wqe(struct hinic3_wq *wq, u16 *ci) +{ + if (hinic3_wq_get_used(wq) == 0) + return NULL; + + *ci = wq->cons_idx & wq->idx_mask; + + return get_q_element(&wq->qpages, wq->cons_idx, NULL); +} + +struct hinic3_cmd_buf *hinic3_alloc_cmd_buf(struct hinic3_hwdev *hwdev) +{ + struct hinic3_cmd_buf *cmd_buf; + struct hinic3_cmdqs *cmdqs; + + cmdqs = hwdev->cmdqs; + + cmd_buf = kmalloc(sizeof(*cmd_buf), GFP_ATOMIC); + if (!cmd_buf) + return NULL; + + cmd_buf->buf = dma_pool_alloc(cmdqs->cmd_buf_pool, GFP_ATOMIC, + &cmd_buf->dma_addr); + if (!cmd_buf->buf) { + dev_err(hwdev->dev, "Failed to allocate cmdq cmd buf from the pool\n"); + goto err_free_cmd_buf; + } + + cmd_buf->size = cpu_to_le16(CMDQ_BUF_SIZE); + refcount_set(&cmd_buf->ref_cnt, 1); + + return cmd_buf; + +err_free_cmd_buf: + kfree(cmd_buf); + + return NULL; +} + +void hinic3_free_cmd_buf(struct hinic3_hwdev *hwdev, + struct hinic3_cmd_buf *cmd_buf) +{ + struct hinic3_cmdqs *cmdqs; + + if (!refcount_dec_and_test(&cmd_buf->ref_cnt)) + return; + + cmdqs = hwdev->cmdqs; + + dma_pool_free(cmdqs->cmd_buf_pool, cmd_buf->buf, cmd_buf->dma_addr); + kfree(cmd_buf); +} + +static void cmdq_clear_cmd_buf(struct hinic3_cmdq_cmd_info *cmd_info, + struct hinic3_hwdev *hwdev) +{ + if (cmd_info->buf_in) { + hinic3_free_cmd_buf(hwdev, cmd_info->buf_in); + cmd_info->buf_in = NULL; + } +} + +static void clear_wqe_complete_bit(struct hinic3_cmdq *cmdq, + struct cmdq_wqe *wqe, u16 ci) +{ + struct cmdq_header *hdr = CMDQ_WQE_HEADER(wqe); + __le32 header_info = hdr->header_info; + enum cmdq_data_format df; + struct cmdq_ctrl *ctrl; + + df = CMDQ_WQE_HDR_GET(header_info, DATA_FMT); + if (df == CMDQ_DATA_SGE) + ctrl = &wqe->wqe_lcmd.ctrl; + else + ctrl = &wqe->wqe_scmd.ctrl; + + /* clear HW busy bit */ + ctrl->ctrl_info = 0; + cmdq->cmd_infos[ci].cmd_type = HINIC3_CMD_TYPE_NONE; + wmb(); /* verify wqe is clear before updating ci */ + hinic3_wq_put_wqebbs(&cmdq->wq, CMDQ_WQE_NUM_WQEBBS); +} + +static void cmdq_update_cmd_status(struct hinic3_cmdq *cmdq, u16 prod_idx, + struct cmdq_wqe *wqe) +{ + struct hinic3_cmdq_cmd_info *cmd_info; + struct cmdq_wqe_lcmd *wqe_lcmd; + __le32 status_info; + + wqe_lcmd = &wqe->wqe_lcmd; + cmd_info = &cmdq->cmd_infos[prod_idx]; + if (cmd_info->errcode) { + status_info = wqe_lcmd->status.status_info; + *cmd_info->errcode = CMDQ_WQE_ERRCODE_GET(status_info, VAL); + } + + if (cmd_info->direct_resp) + *cmd_info->direct_resp = wqe_lcmd->completion.resp.direct.val; +} + +static void cmdq_sync_cmd_handler(struct hinic3_cmdq *cmdq, + struct cmdq_wqe *wqe, u16 ci) +{ + spin_lock(&cmdq->cmdq_lock); + cmdq_update_cmd_status(cmdq, ci, wqe); + if (cmdq->cmd_infos[ci].cmpt_code) { + *cmdq->cmd_infos[ci].cmpt_code = CMDQ_DIRECT_SYNC_CMPT_CODE; + cmdq->cmd_infos[ci].cmpt_code = NULL; + } + + /* Ensure that completion code has been updated before updating done */ + smp_wmb(); + if (cmdq->cmd_infos[ci].done) { + complete(cmdq->cmd_infos[ci].done); + cmdq->cmd_infos[ci].done = NULL; + } + spin_unlock(&cmdq->cmdq_lock); + + cmdq_clear_cmd_buf(&cmdq->cmd_infos[ci], cmdq->hwdev); + clear_wqe_complete_bit(cmdq, wqe, ci); +} + +void hinic3_cmdq_ceq_handler(struct hinic3_hwdev *hwdev, __le32 ceqe_data) +{ + enum hinic3_cmdq_type cmdq_type = CMDQ_CEQE_GET(ceqe_data, TYPE); + struct hinic3_cmdqs *cmdqs = hwdev->cmdqs; + struct hinic3_cmdq_cmd_info *cmd_info; + struct cmdq_wqe_lcmd *wqe_lcmd; + struct hinic3_cmdq *cmdq; + struct cmdq_wqe *wqe; + __le32 ctrl_info; + u16 ci; + + if (unlikely(cmdq_type >= ARRAY_SIZE(cmdqs->cmdq))) + return; + + cmdq = &cmdqs->cmdq[cmdq_type]; + while ((wqe = cmdq_read_wqe(&cmdq->wq, &ci)) != NULL) { + cmd_info = &cmdq->cmd_infos[ci]; + switch (cmd_info->cmd_type) { + case HINIC3_CMD_TYPE_NONE: + return; + case HINIC3_CMD_TYPE_TIMEOUT: + dev_warn(hwdev->dev, "Cmdq timeout, q_id: %u, ci: %u\n", + cmdq_type, ci); + fallthrough; + case HINIC3_CMD_TYPE_FAKE_TIMEOUT: + cmdq_clear_cmd_buf(cmd_info, hwdev); + clear_wqe_complete_bit(cmdq, wqe, ci); + break; + default: + /* only arm bit is using scmd wqe, + * the other wqe is lcmd + */ + wqe_lcmd = &wqe->wqe_lcmd; + ctrl_info = wqe_lcmd->ctrl.ctrl_info; + if (!CMDQ_WQE_COMPLETED(ctrl_info)) + return; + + dma_rmb(); + /* For FORCE_STOP cmd_type, we also need to wait for + * the firmware processing to complete to prevent the + * firmware from accessing the released cmd_buf + */ + if (cmd_info->cmd_type == HINIC3_CMD_TYPE_FORCE_STOP) { + cmdq_clear_cmd_buf(cmd_info, hwdev); + clear_wqe_complete_bit(cmdq, wqe, ci); + } else { + cmdq_sync_cmd_handler(cmdq, wqe, ci); + } + + break; + } + } +} + +static int wait_cmdqs_enable(struct hinic3_cmdqs *cmdqs) +{ + unsigned long end; + + end = jiffies + msecs_to_jiffies(CMDQ_ENABLE_WAIT_TIMEOUT); + do { + if (cmdqs->status & HINIC3_CMDQ_ENABLE) + return 0; + usleep_range(1000, 2000); + } while (time_before(jiffies, end) && !cmdqs->disable_flag); + + cmdqs->disable_flag = 1; + + return -EBUSY; +} + +static void cmdq_set_completion(struct cmdq_completion *complete, + struct hinic3_cmd_buf *buf_out) +{ + struct hinic3_sge *sge = &complete->resp.sge; + + hinic3_set_sge(sge, buf_out->dma_addr, cpu_to_le32(CMDQ_BUF_SIZE)); +} + +static struct cmdq_wqe *cmdq_get_wqe(struct hinic3_wq *wq, u16 *pi) +{ + if (!hinic3_wq_free_wqebbs(wq)) + return NULL; + + return hinic3_wq_get_one_wqebb(wq, pi); +} + +static void cmdq_set_lcmd_bufdesc(struct cmdq_wqe_lcmd *wqe, + struct hinic3_cmd_buf *buf_in) +{ + hinic3_set_sge(&wqe->buf_desc.sge, buf_in->dma_addr, + (__force __le32)buf_in->size); +} + +static void cmdq_set_db(struct hinic3_cmdq *cmdq, + enum hinic3_cmdq_type cmdq_type, u16 prod_idx) +{ + u8 __iomem *db_base = cmdq->hwdev->cmdqs->cmdqs_db_base; + u16 db_ofs = (prod_idx & 0xFF) << 3; + struct cmdq_db db; + + db.db_info = cpu_to_le32(CMDQ_DB_INFO_SET(prod_idx >> 8, HI_PROD_IDX)); + db.db_head = cpu_to_le32(CMDQ_DB_HEAD_SET(1, QUEUE_TYPE) | + CMDQ_DB_HEAD_SET(cmdq_type, CMDQ_TYPE)); + writeq(*(u64 *)&db, db_base + db_ofs); +} + +static void cmdq_wqe_fill(struct cmdq_wqe *hw_wqe, + const struct cmdq_wqe *shadow_wqe) +{ + const struct cmdq_header *src = (struct cmdq_header *)shadow_wqe; + struct cmdq_header *dst = (struct cmdq_header *)hw_wqe; + size_t len; + + len = sizeof(struct cmdq_wqe) - sizeof(struct cmdq_header); + memcpy(dst + 1, src + 1, len); + /* Ensure buffer len before updating header */ + wmb(); + WRITE_ONCE(*dst, *src); +} + +static void cmdq_prepare_wqe_ctrl(struct cmdq_wqe *wqe, u8 wrapped, + u8 mod, u8 cmd, u16 prod_idx, + enum cmdq_completion_format complete_format, + enum cmdq_data_format data_format, + enum cmdq_bufdesc_len buf_len) +{ + struct cmdq_header *hdr = CMDQ_WQE_HEADER(wqe); + enum cmdq_ctrl_sect_len ctrl_len; + struct cmdq_wqe_lcmd *wqe_lcmd; + struct cmdq_wqe_scmd *wqe_scmd; + struct cmdq_ctrl *ctrl; + + if (data_format == CMDQ_DATA_SGE) { + wqe_lcmd = &wqe->wqe_lcmd; + wqe_lcmd->status.status_info = 0; + ctrl = &wqe_lcmd->ctrl; + ctrl_len = CMDQ_CTRL_SECT_LEN; + } else { + wqe_scmd = &wqe->wqe_scmd; + wqe_scmd->status.status_info = 0; + ctrl = &wqe_scmd->ctrl; + ctrl_len = CMDQ_CTRL_DIRECT_SECT_LEN; + } + + ctrl->ctrl_info = + cpu_to_le32(CMDQ_CTRL_SET(prod_idx, PI) | + CMDQ_CTRL_SET(cmd, CMD) | + CMDQ_CTRL_SET(mod, MOD)); + + hdr->header_info = + cpu_to_le32(CMDQ_WQE_HDR_SET(buf_len, BUFDESC_LEN) | + CMDQ_WQE_HDR_SET(complete_format, COMPLETE_FMT) | + CMDQ_WQE_HDR_SET(data_format, DATA_FMT) | + CMDQ_WQE_HDR_SET(1, COMPLETE_REQ) | + CMDQ_WQE_HDR_SET(3, COMPLETE_SECT_LEN) | + CMDQ_WQE_HDR_SET(ctrl_len, CTRL_LEN) | + CMDQ_WQE_HDR_SET(wrapped, HW_BUSY_BIT)); +} + +static void cmdq_set_lcmd_wqe(struct cmdq_wqe *wqe, + enum cmdq_cmd_type cmd_type, + struct hinic3_cmd_buf *buf_in, + struct hinic3_cmd_buf *buf_out, + u8 wrapped, u8 mod, u8 cmd, u16 prod_idx) +{ + enum cmdq_completion_format complete_format = CMDQ_COMPLETE_DIRECT; + struct cmdq_wqe_lcmd *wqe_lcmd = &wqe->wqe_lcmd; + + switch (cmd_type) { + case CMDQ_CMD_DIRECT_RESP: + wqe_lcmd->completion.resp.direct.val = 0; + break; + case CMDQ_CMD_SGE_RESP: + if (buf_out) { + complete_format = CMDQ_COMPLETE_SGE; + cmdq_set_completion(&wqe_lcmd->completion, buf_out); + } + break; + } + + cmdq_prepare_wqe_ctrl(wqe, wrapped, mod, cmd, prod_idx, complete_format, + CMDQ_DATA_SGE, CMDQ_BUFDESC_LCMD_LEN); + cmdq_set_lcmd_bufdesc(wqe_lcmd, buf_in); +} + +static int hinic3_cmdq_sync_timeout_check(struct hinic3_cmdq *cmdq, + struct cmdq_wqe *wqe, u16 pi) +{ + struct cmdq_wqe_lcmd *wqe_lcmd; + struct cmdq_ctrl *ctrl; + __le32 ctrl_info; + + wqe_lcmd = &wqe->wqe_lcmd; + ctrl = &wqe_lcmd->ctrl; + ctrl_info = ctrl->ctrl_info; + if (!CMDQ_WQE_COMPLETED(ctrl_info)) { + dev_dbg(cmdq->hwdev->dev, "Cmdq sync command check busy bit not set\n"); + return -EFAULT; + } + cmdq_update_cmd_status(cmdq, pi, wqe); + + return 0; +} + +static void clear_cmd_info(struct hinic3_cmdq_cmd_info *cmd_info, + const struct hinic3_cmdq_cmd_info *saved_cmd_info) +{ + if (cmd_info->errcode == saved_cmd_info->errcode) + cmd_info->errcode = NULL; + + if (cmd_info->done == saved_cmd_info->done) + cmd_info->done = NULL; + + if (cmd_info->direct_resp == saved_cmd_info->direct_resp) + cmd_info->direct_resp = NULL; +} + +static int wait_cmdq_sync_cmd_completion(struct hinic3_cmdq *cmdq, + struct hinic3_cmdq_cmd_info *cmd_info, + struct hinic3_cmdq_cmd_info *saved_cmd_info, + u64 curr_msg_id, u16 curr_prod_idx, + struct cmdq_wqe *curr_wqe, + u32 timeout) +{ + ulong timeo = msecs_to_jiffies(timeout); + int err; + + if (wait_for_completion_timeout(saved_cmd_info->done, timeo)) + return 0; + + spin_lock_bh(&cmdq->cmdq_lock); + if (cmd_info->cmpt_code == saved_cmd_info->cmpt_code) + cmd_info->cmpt_code = NULL; + + if (*saved_cmd_info->cmpt_code == CMDQ_DIRECT_SYNC_CMPT_CODE) { + dev_dbg(cmdq->hwdev->dev, "Cmdq direct sync command has been completed\n"); + spin_unlock_bh(&cmdq->cmdq_lock); + return 0; + } + + if (curr_msg_id == cmd_info->cmdq_msg_id) { + err = hinic3_cmdq_sync_timeout_check(cmdq, curr_wqe, + curr_prod_idx); + if (err) + cmd_info->cmd_type = HINIC3_CMD_TYPE_TIMEOUT; + else + cmd_info->cmd_type = HINIC3_CMD_TYPE_FAKE_TIMEOUT; + } else { + err = -ETIMEDOUT; + dev_err(cmdq->hwdev->dev, + "Cmdq sync command current msg id mismatch cmd_info msg id\n"); + } + + clear_cmd_info(cmd_info, saved_cmd_info); + spin_unlock_bh(&cmdq->cmdq_lock); + + return err; +} + +static int cmdq_sync_cmd_direct_resp(struct hinic3_cmdq *cmdq, u8 mod, u8 cmd, + struct hinic3_cmd_buf *buf_in, + __le64 *out_param) +{ + struct hinic3_cmdq_cmd_info *cmd_info, saved_cmd_info; + int cmpt_code = CMDQ_SEND_CMPT_CODE; + struct cmdq_wqe *curr_wqe, wqe = {}; + struct hinic3_wq *wq = &cmdq->wq; + u16 curr_prod_idx, next_prod_idx; + struct completion done; + u64 curr_msg_id; + int errcode; + u8 wrapped; + int err; + + spin_lock_bh(&cmdq->cmdq_lock); + curr_wqe = cmdq_get_wqe(wq, &curr_prod_idx); + if (!curr_wqe) { + spin_unlock_bh(&cmdq->cmdq_lock); + return -EBUSY; + } + + wrapped = cmdq->wrapped; + next_prod_idx = curr_prod_idx + CMDQ_WQE_NUM_WQEBBS; + if (next_prod_idx >= wq->q_depth) { + cmdq->wrapped ^= 1; + next_prod_idx -= wq->q_depth; + } + + cmd_info = &cmdq->cmd_infos[curr_prod_idx]; + init_completion(&done); + refcount_inc(&buf_in->ref_cnt); + cmd_info->cmd_type = HINIC3_CMD_TYPE_DIRECT_RESP; + cmd_info->done = &done; + cmd_info->errcode = &errcode; + cmd_info->direct_resp = out_param; + cmd_info->cmpt_code = &cmpt_code; + cmd_info->buf_in = buf_in; + saved_cmd_info = *cmd_info; + cmdq_set_lcmd_wqe(&wqe, CMDQ_CMD_DIRECT_RESP, buf_in, NULL, + wrapped, mod, cmd, curr_prod_idx); + + cmdq_wqe_fill(curr_wqe, &wqe); + (cmd_info->cmdq_msg_id)++; + curr_msg_id = cmd_info->cmdq_msg_id; + cmdq_set_db(cmdq, HINIC3_CMDQ_SYNC, next_prod_idx); + spin_unlock_bh(&cmdq->cmdq_lock); + + err = wait_cmdq_sync_cmd_completion(cmdq, cmd_info, &saved_cmd_info, + curr_msg_id, curr_prod_idx, + curr_wqe, CMDQ_CMD_TIMEOUT); + if (err) { + dev_err(cmdq->hwdev->dev, + "Cmdq sync command timeout, mod: %u, cmd: %u, prod idx: 0x%x\n", + mod, cmd, curr_prod_idx); + err = -ETIMEDOUT; + } + + if (cmpt_code == CMDQ_FORCE_STOP_CMPT_CODE) { + dev_dbg(cmdq->hwdev->dev, + "Force stop cmdq cmd, mod: %u, cmd: %u\n", mod, cmd); + err = -EAGAIN; + } + + smp_rmb(); /* read error code after completion */ + + return err ? err : errcode; +} + +int hinic3_cmdq_direct_resp(struct hinic3_hwdev *hwdev, u8 mod, u8 cmd, + struct hinic3_cmd_buf *buf_in, __le64 *out_param) +{ + struct hinic3_cmdqs *cmdqs; + int err; + + cmdqs = hwdev->cmdqs; + err = wait_cmdqs_enable(cmdqs); + if (err) { + dev_err(hwdev->dev, "Cmdq is disabled\n"); + return err; + } + + err = cmdq_sync_cmd_direct_resp(&cmdqs->cmdq[HINIC3_CMDQ_SYNC], + mod, cmd, buf_in, out_param); + + return err; +} + +static void cmdq_init_queue_ctxt(struct hinic3_hwdev *hwdev, u8 cmdq_id, + struct comm_cmdq_ctxt_info *ctxt_info) +{ + const struct hinic3_cmdqs *cmdqs; + u64 cmdq_first_block_paddr, pfn; + const struct hinic3_wq *wq; + + cmdqs = hwdev->cmdqs; + wq = &cmdqs->cmdq[cmdq_id].wq; + pfn = CMDQ_PFN(hinic3_wq_get_first_wqe_page_addr(wq)); + + ctxt_info->curr_wqe_page_pfn = + cpu_to_le64(CMDQ_CTXT_SET(1, HW_BUSY_BIT) | + CMDQ_CTXT_SET(1, CEQ_EN) | + CMDQ_CTXT_SET(1, CEQ_ARM) | + CMDQ_CTXT_SET(0, EQ_ID) | + CMDQ_CTXT_SET(pfn, CURR_WQE_PAGE_PFN)); + + if (!hinic3_wq_is_0_level_cla(wq)) { + cmdq_first_block_paddr = cmdqs->wq_block_paddr; + pfn = CMDQ_PFN(cmdq_first_block_paddr); + } + + ctxt_info->wq_block_pfn = cpu_to_le64(CMDQ_CTXT_SET(wq->cons_idx, CI) | + CMDQ_CTXT_SET(pfn, WQ_BLOCK_PFN)); +} + +static int init_cmdq(struct hinic3_cmdq *cmdq, struct hinic3_hwdev *hwdev, + enum hinic3_cmdq_type q_type) +{ + int err; + + cmdq->cmdq_type = q_type; + cmdq->wrapped = 1; + cmdq->hwdev = hwdev; + + spin_lock_init(&cmdq->cmdq_lock); + + cmdq->cmd_infos = kcalloc(cmdq->wq.q_depth, sizeof(*cmdq->cmd_infos), + GFP_KERNEL); + if (!cmdq->cmd_infos) { + err = -ENOMEM; + return err; + } + + return 0; +} + +static int hinic3_set_cmdq_ctxt(struct hinic3_hwdev *hwdev, u8 cmdq_id) +{ + struct comm_cmd_set_cmdq_ctxt cmdq_ctxt = {}; + struct mgmt_msg_params msg_params = {}; + int err; + + cmdq_init_queue_ctxt(hwdev, cmdq_id, &cmdq_ctxt.ctxt); + cmdq_ctxt.func_id = hinic3_global_func_id(hwdev); + cmdq_ctxt.cmdq_id = cmdq_id; + + mgmt_msg_params_init_default(&msg_params, &cmdq_ctxt, + sizeof(cmdq_ctxt)); + + err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_COMM, + COMM_CMD_SET_CMDQ_CTXT, &msg_params); + if (err || cmdq_ctxt.head.status) { + dev_err(hwdev->dev, "Failed to set cmdq ctxt, err: %d, status: 0x%x\n", + err, cmdq_ctxt.head.status); + return -EFAULT; + } + + return 0; +} + +static int hinic3_set_cmdq_ctxts(struct hinic3_hwdev *hwdev) +{ + struct hinic3_cmdqs *cmdqs = hwdev->cmdqs; + u8 cmdq_type; + int err; + + for (cmdq_type = 0; cmdq_type < cmdqs->cmdq_num; cmdq_type++) { + err = hinic3_set_cmdq_ctxt(hwdev, cmdq_type); + if (err) + return err; + } + + cmdqs->status |= HINIC3_CMDQ_ENABLE; + cmdqs->disable_flag = 0; + + return 0; +} + +static int create_cmdq_wq(struct hinic3_hwdev *hwdev, + struct hinic3_cmdqs *cmdqs) +{ + u8 cmdq_type; + int err; + + for (cmdq_type = 0; cmdq_type < cmdqs->cmdq_num; cmdq_type++) { + err = hinic3_wq_create(hwdev, &cmdqs->cmdq[cmdq_type].wq, + CMDQ_DEPTH, CMDQ_WQEBB_SIZE); + if (err) { + dev_err(hwdev->dev, "Failed to create cmdq wq\n"); + goto err_destroy_wq; + } + } + + /* 1-level Chip Logical Address (CLA) must put all + * cmdq's wq page addr in one wq block + */ + if (!hinic3_wq_is_0_level_cla(&cmdqs->cmdq[HINIC3_CMDQ_SYNC].wq)) { + if (cmdqs->cmdq[HINIC3_CMDQ_SYNC].wq.qpages.num_pages > + CMDQ_WQ_CLA_SIZE / sizeof(u64)) { + err = -EINVAL; + dev_err(hwdev->dev, + "Cmdq number of wq pages exceeds limit: %lu\n", + CMDQ_WQ_CLA_SIZE / sizeof(u64)); + goto err_destroy_wq; + } + + cmdqs->wq_block_vaddr = + dma_alloc_coherent(hwdev->dev, HINIC3_MIN_PAGE_SIZE, + &cmdqs->wq_block_paddr, GFP_KERNEL); + if (!cmdqs->wq_block_vaddr) { + err = -ENOMEM; + goto err_destroy_wq; + } + + for (cmdq_type = 0; cmdq_type < cmdqs->cmdq_num; cmdq_type++) + memcpy((u8 *)cmdqs->wq_block_vaddr + + CMDQ_WQ_CLA_SIZE * cmdq_type, + cmdqs->cmdq[cmdq_type].wq.wq_block_vaddr, + cmdqs->cmdq[cmdq_type].wq.qpages.num_pages * + sizeof(__be64)); + } + + return 0; + +err_destroy_wq: + while (cmdq_type > 0) { + cmdq_type--; + hinic3_wq_destroy(hwdev, &cmdqs->cmdq[cmdq_type].wq); + } + + return err; +} + +static void destroy_cmdq_wq(struct hinic3_hwdev *hwdev, + struct hinic3_cmdqs *cmdqs) +{ + u8 cmdq_type; + + if (cmdqs->wq_block_vaddr) + dma_free_coherent(hwdev->dev, HINIC3_MIN_PAGE_SIZE, + cmdqs->wq_block_vaddr, cmdqs->wq_block_paddr); + + for (cmdq_type = 0; cmdq_type < cmdqs->cmdq_num; cmdq_type++) + hinic3_wq_destroy(hwdev, &cmdqs->cmdq[cmdq_type].wq); +} + +static int init_cmdqs(struct hinic3_hwdev *hwdev) +{ + struct hinic3_cmdqs *cmdqs; + + cmdqs = kzalloc(sizeof(*cmdqs), GFP_KERNEL); + if (!cmdqs) + return -ENOMEM; + + hwdev->cmdqs = cmdqs; + cmdqs->hwdev = hwdev; + cmdqs->cmdq_num = hwdev->max_cmdq; + + cmdqs->cmd_buf_pool = dma_pool_create("hinic3_cmdq", hwdev->dev, + CMDQ_BUF_SIZE, CMDQ_BUF_SIZE, 0); + if (!cmdqs->cmd_buf_pool) { + dev_err(hwdev->dev, "Failed to create cmdq buffer pool\n"); + kfree(cmdqs); + return -ENOMEM; + } + + return 0; +} + +static void cmdq_flush_sync_cmd(struct hinic3_cmdq_cmd_info *cmd_info) +{ + if (cmd_info->cmd_type != HINIC3_CMD_TYPE_DIRECT_RESP) + return; + + cmd_info->cmd_type = HINIC3_CMD_TYPE_FORCE_STOP; + + if (cmd_info->cmpt_code && + *cmd_info->cmpt_code == CMDQ_SEND_CMPT_CODE) + *cmd_info->cmpt_code = CMDQ_FORCE_STOP_CMPT_CODE; + + if (cmd_info->done) { + complete(cmd_info->done); + cmd_info->done = NULL; + cmd_info->cmpt_code = NULL; + cmd_info->direct_resp = NULL; + cmd_info->errcode = NULL; + } +} + +static void hinic3_cmdq_flush_cmd(struct hinic3_cmdq *cmdq) +{ + struct hinic3_cmdq_cmd_info *cmd_info; + u16 ci; + + spin_lock_bh(&cmdq->cmdq_lock); + while (cmdq_read_wqe(&cmdq->wq, &ci)) { + hinic3_wq_put_wqebbs(&cmdq->wq, CMDQ_WQE_NUM_WQEBBS); + cmd_info = &cmdq->cmd_infos[ci]; + if (cmd_info->cmd_type == HINIC3_CMD_TYPE_DIRECT_RESP) + cmdq_flush_sync_cmd(cmd_info); + } + spin_unlock_bh(&cmdq->cmdq_lock); +} + +void hinic3_cmdq_flush_sync_cmd(struct hinic3_hwdev *hwdev) +{ + struct hinic3_cmdq *cmdq; + u16 wqe_cnt, wqe_idx, i; + struct hinic3_wq *wq; + + cmdq = &hwdev->cmdqs->cmdq[HINIC3_CMDQ_SYNC]; + spin_lock_bh(&cmdq->cmdq_lock); + wq = &cmdq->wq; + wqe_cnt = hinic3_wq_get_used(wq); + for (i = 0; i < wqe_cnt; i++) { + wqe_idx = (wq->cons_idx + i) & wq->idx_mask; + cmdq_flush_sync_cmd(cmdq->cmd_infos + wqe_idx); + } + spin_unlock_bh(&cmdq->cmdq_lock); +} + +static void hinic3_cmdq_reset_all_cmd_buf(struct hinic3_cmdq *cmdq) +{ + u16 i; + + for (i = 0; i < cmdq->wq.q_depth; i++) + cmdq_clear_cmd_buf(&cmdq->cmd_infos[i], cmdq->hwdev); +} + +int hinic3_reinit_cmdq_ctxts(struct hinic3_hwdev *hwdev) +{ + struct hinic3_cmdqs *cmdqs = hwdev->cmdqs; + u8 cmdq_type; + + for (cmdq_type = 0; cmdq_type < cmdqs->cmdq_num; cmdq_type++) { + hinic3_cmdq_flush_cmd(&cmdqs->cmdq[cmdq_type]); + hinic3_cmdq_reset_all_cmd_buf(&cmdqs->cmdq[cmdq_type]); + cmdqs->cmdq[cmdq_type].wrapped = 1; + hinic3_wq_reset(&cmdqs->cmdq[cmdq_type].wq); + } + + return hinic3_set_cmdq_ctxts(hwdev); +} + +int hinic3_cmdqs_init(struct hinic3_hwdev *hwdev) +{ + struct hinic3_cmdqs *cmdqs; + void __iomem *db_base; + u8 cmdq_type; + int err; + + err = init_cmdqs(hwdev); + if (err) + goto err_out; + + cmdqs = hwdev->cmdqs; + err = create_cmdq_wq(hwdev, cmdqs); + if (err) + goto err_free_cmdqs; + + err = hinic3_alloc_db_addr(hwdev, &db_base, NULL); + if (err) { + dev_err(hwdev->dev, "Failed to allocate doorbell address\n"); + goto err_destroy_cmdq_wq; + } + cmdqs->cmdqs_db_base = db_base; + + for (cmdq_type = 0; cmdq_type < cmdqs->cmdq_num; cmdq_type++) { + err = init_cmdq(&cmdqs->cmdq[cmdq_type], hwdev, cmdq_type); + if (err) { + dev_err(hwdev->dev, + "Failed to initialize cmdq type : %d\n", + cmdq_type); + goto err_free_cmd_infos; + } + } + + err = hinic3_set_cmdq_ctxts(hwdev); + if (err) + goto err_free_cmd_infos; + + return 0; + +err_free_cmd_infos: + while (cmdq_type > 0) { + cmdq_type--; + kfree(cmdqs->cmdq[cmdq_type].cmd_infos); + } + + hinic3_free_db_addr(hwdev, cmdqs->cmdqs_db_base); + +err_destroy_cmdq_wq: + destroy_cmdq_wq(hwdev, cmdqs); + +err_free_cmdqs: + dma_pool_destroy(cmdqs->cmd_buf_pool); + kfree(cmdqs); + +err_out: + return err; +} + +void hinic3_cmdqs_free(struct hinic3_hwdev *hwdev) +{ + struct hinic3_cmdqs *cmdqs = hwdev->cmdqs; + u8 cmdq_type; + + cmdqs->status &= ~HINIC3_CMDQ_ENABLE; + + for (cmdq_type = 0; cmdq_type < cmdqs->cmdq_num; cmdq_type++) { + hinic3_cmdq_flush_cmd(&cmdqs->cmdq[cmdq_type]); + hinic3_cmdq_reset_all_cmd_buf(&cmdqs->cmdq[cmdq_type]); + kfree(cmdqs->cmdq[cmdq_type].cmd_infos); + } + + hinic3_free_db_addr(hwdev, cmdqs->cmdqs_db_base); + destroy_cmdq_wq(hwdev, cmdqs); + dma_pool_destroy(cmdqs->cmd_buf_pool); + kfree(cmdqs); +} + +bool hinic3_cmdq_idle(struct hinic3_cmdq *cmdq) +{ + return hinic3_wq_get_used(&cmdq->wq) == 0; +} diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.h b/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.h new file mode 100644 index 000000000000..f99c386a2780 --- /dev/null +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_cmdq.h @@ -0,0 +1,156 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. */ + +#ifndef _HINIC3_CMDQ_H_ +#define _HINIC3_CMDQ_H_ + +#include <linux/dmapool.h> + +#include "hinic3_hw_intf.h" +#include "hinic3_wq.h" + +#define CMDQ_DEPTH 4096 + +struct cmdq_db { + __le32 db_head; + __le32 db_info; +}; + +/* hw defined cmdq wqe header */ +struct cmdq_header { + __le32 header_info; + __le32 saved_data; +}; + +struct cmdq_lcmd_bufdesc { + struct hinic3_sge sge; + __le64 rsvd2; + __le64 rsvd3; +}; + +struct cmdq_status { + __le32 status_info; +}; + +struct cmdq_ctrl { + __le32 ctrl_info; +}; + +struct cmdq_direct_resp { + __le64 val; + __le64 rsvd; +}; + +struct cmdq_completion { + union { + struct hinic3_sge sge; + struct cmdq_direct_resp direct; + } resp; +}; + +struct cmdq_wqe_scmd { + struct cmdq_header header; + __le64 rsvd3; + struct cmdq_status status; + struct cmdq_ctrl ctrl; + struct cmdq_completion completion; + __le32 rsvd10[6]; +}; + +struct cmdq_wqe_lcmd { + struct cmdq_header header; + struct cmdq_status status; + struct cmdq_ctrl ctrl; + struct cmdq_completion completion; + struct cmdq_lcmd_bufdesc buf_desc; +}; + +struct cmdq_wqe { + union { + struct cmdq_wqe_scmd wqe_scmd; + struct cmdq_wqe_lcmd wqe_lcmd; + }; +}; + +static_assert(sizeof(struct cmdq_wqe) == 64); + +enum hinic3_cmdq_type { + HINIC3_CMDQ_SYNC = 0, + HINIC3_MAX_CMDQ_TYPES = 4 +}; + +enum hinic3_cmdq_status { + HINIC3_CMDQ_ENABLE = BIT(0), +}; + +enum hinic3_cmdq_cmd_type { + HINIC3_CMD_TYPE_NONE, + HINIC3_CMD_TYPE_DIRECT_RESP, + HINIC3_CMD_TYPE_FAKE_TIMEOUT, + HINIC3_CMD_TYPE_TIMEOUT, + HINIC3_CMD_TYPE_FORCE_STOP, +}; + +struct hinic3_cmd_buf { + void *buf; + dma_addr_t dma_addr; + __le16 size; + refcount_t ref_cnt; +}; + +struct hinic3_cmdq_cmd_info { + enum hinic3_cmdq_cmd_type cmd_type; + struct completion *done; + int *errcode; + /* completion code */ + int *cmpt_code; + __le64 *direct_resp; + u64 cmdq_msg_id; + struct hinic3_cmd_buf *buf_in; +}; + +struct hinic3_cmdq { + struct hinic3_wq wq; + enum hinic3_cmdq_type cmdq_type; + u8 wrapped; + /* synchronize command submission with completions via event queue */ + spinlock_t cmdq_lock; + struct hinic3_cmdq_cmd_info *cmd_infos; + struct hinic3_hwdev *hwdev; +}; + +struct hinic3_cmdqs { + struct hinic3_hwdev *hwdev; + struct hinic3_cmdq cmdq[HINIC3_MAX_CMDQ_TYPES]; + struct dma_pool *cmd_buf_pool; + /* doorbell area */ + u8 __iomem *cmdqs_db_base; + + /* When command queue uses multiple memory pages (1-level CLA), this + * block will hold aggregated indirection table for all command queues + * of cmdqs. Not used for small cmdq (0-level CLA). + */ + dma_addr_t wq_block_paddr; + void *wq_block_vaddr; + + u32 status; + u32 disable_flag; + u8 cmdq_num; +}; + +int hinic3_cmdqs_init(struct hinic3_hwdev *hwdev); +void hinic3_cmdqs_free(struct hinic3_hwdev *hwdev); + +struct hinic3_cmd_buf *hinic3_alloc_cmd_buf(struct hinic3_hwdev *hwdev); +void hinic3_free_cmd_buf(struct hinic3_hwdev *hwdev, + struct hinic3_cmd_buf *cmd_buf); +void hinic3_cmdq_ceq_handler(struct hinic3_hwdev *hwdev, __le32 ceqe_data); + +int hinic3_cmdq_direct_resp(struct hinic3_hwdev *hwdev, u8 mod, u8 cmd, + struct hinic3_cmd_buf *buf_in, __le64 *out_param); + +void hinic3_cmdq_flush_sync_cmd(struct hinic3_hwdev *hwdev); +int hinic3_reinit_cmdq_ctxts(struct hinic3_hwdev *hwdev); +bool hinic3_cmdq_idle(struct hinic3_cmdq *cmdq); + +#endif diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_common.c b/drivers/net/ethernet/huawei/hinic3/hinic3_common.c index 0aa42068728c..fe4778d152cf 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_common.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_common.c @@ -3,6 +3,7 @@ #include <linux/delay.h> #include <linux/dma-mapping.h> +#include <linux/iopoll.h> #include "hinic3_common.h" @@ -51,3 +52,25 @@ void hinic3_dma_free_coherent_align(struct device *dev, dma_free_coherent(dev, mem_align->real_size, mem_align->ori_vaddr, mem_align->ori_paddr); } + +int hinic3_wait_for_timeout(void *priv_data, wait_cpl_handler handler, + u32 wait_total_ms, u32 wait_once_us) +{ + enum hinic3_wait_return ret; + int err; + + err = read_poll_timeout(handler, ret, ret == HINIC3_WAIT_PROCESS_CPL, + wait_once_us, wait_total_ms * USEC_PER_MSEC, + false, priv_data); + + return err; +} + +/* Data provided to/by cmdq is arranged in structs with little endian fields but + * every dword (32bits) should be swapped since HW swaps it again when it + * copies it from/to host memory. + */ +void hinic3_cmdq_buf_swab32(void *data, int len) +{ + swab32_array(data, len / sizeof(u32)); +} diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_common.h b/drivers/net/ethernet/huawei/hinic3/hinic3_common.h index bb795dace04c..a8fabfae90fb 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_common.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_common.h @@ -18,10 +18,37 @@ struct hinic3_dma_addr_align { dma_addr_t align_paddr; }; +enum hinic3_wait_return { + HINIC3_WAIT_PROCESS_CPL = 0, + HINIC3_WAIT_PROCESS_WAITING = 1, +}; + +struct hinic3_sge { + __le32 hi_addr; + __le32 lo_addr; + __le32 len; + __le32 rsvd; +}; + +static inline void hinic3_set_sge(struct hinic3_sge *sge, dma_addr_t addr, + __le32 len) +{ + sge->hi_addr = cpu_to_le32(upper_32_bits(addr)); + sge->lo_addr = cpu_to_le32(lower_32_bits(addr)); + sge->len = len; + sge->rsvd = 0; +} + int hinic3_dma_zalloc_coherent_align(struct device *dev, u32 size, u32 align, gfp_t flag, struct hinic3_dma_addr_align *mem_align); void hinic3_dma_free_coherent_align(struct device *dev, struct hinic3_dma_addr_align *mem_align); +typedef enum hinic3_wait_return (*wait_cpl_handler)(void *priv_data); +int hinic3_wait_for_timeout(void *priv_data, wait_cpl_handler handler, + u32 wait_total_ms, u32 wait_once_us); + +void hinic3_cmdq_buf_swab32(void *data, int len); + #endif diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_csr.h b/drivers/net/ethernet/huawei/hinic3/hinic3_csr.h new file mode 100644 index 000000000000..e7417e8efa99 --- /dev/null +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_csr.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. */ + +#ifndef _HINIC3_CSR_H_ +#define _HINIC3_CSR_H_ + +#define HINIC3_CFG_REGS_FLAG 0x40000000 +#define HINIC3_REGS_FLAG_MASK 0x3FFFFFFF + +#define HINIC3_VF_CFG_REG_OFFSET 0x2000 + +/* HW interface registers */ +#define HINIC3_CSR_FUNC_ATTR0_ADDR (HINIC3_CFG_REGS_FLAG + 0x0) +#define HINIC3_CSR_FUNC_ATTR1_ADDR (HINIC3_CFG_REGS_FLAG + 0x4) +#define HINIC3_CSR_FUNC_ATTR2_ADDR (HINIC3_CFG_REGS_FLAG + 0x8) +#define HINIC3_CSR_FUNC_ATTR3_ADDR (HINIC3_CFG_REGS_FLAG + 0xC) +#define HINIC3_CSR_FUNC_ATTR4_ADDR (HINIC3_CFG_REGS_FLAG + 0x10) +#define HINIC3_CSR_FUNC_ATTR5_ADDR (HINIC3_CFG_REGS_FLAG + 0x14) +#define HINIC3_CSR_FUNC_ATTR6_ADDR (HINIC3_CFG_REGS_FLAG + 0x18) + +#define HINIC3_FUNC_CSR_MAILBOX_DATA_OFF 0x80 +#define HINIC3_FUNC_CSR_MAILBOX_CONTROL_OFF (HINIC3_CFG_REGS_FLAG + 0x0100) +#define HINIC3_FUNC_CSR_MAILBOX_INT_OFF (HINIC3_CFG_REGS_FLAG + 0x0104) +#define HINIC3_FUNC_CSR_MAILBOX_RESULT_H_OFF (HINIC3_CFG_REGS_FLAG + 0x0108) +#define HINIC3_FUNC_CSR_MAILBOX_RESULT_L_OFF (HINIC3_CFG_REGS_FLAG + 0x010C) + +#define HINIC3_CSR_DMA_ATTR_TBL_ADDR (HINIC3_CFG_REGS_FLAG + 0x380) +#define HINIC3_CSR_DMA_ATTR_INDIR_IDX_ADDR (HINIC3_CFG_REGS_FLAG + 0x390) + +/* MSI-X registers */ +#define HINIC3_CSR_FUNC_MSI_CLR_WR_ADDR (HINIC3_CFG_REGS_FLAG + 0x58) + +#define HINIC3_MSI_CLR_INDIR_RESEND_TIMER_CLR_MASK BIT(0) +#define HINIC3_MSI_CLR_INDIR_INT_MSK_SET_MASK BIT(1) +#define HINIC3_MSI_CLR_INDIR_INT_MSK_CLR_MASK BIT(2) +#define HINIC3_MSI_CLR_INDIR_AUTO_MSK_SET_MASK BIT(3) +#define HINIC3_MSI_CLR_INDIR_AUTO_MSK_CLR_MASK BIT(4) +#define HINIC3_MSI_CLR_INDIR_SIMPLE_INDIR_IDX_MASK GENMASK(31, 22) +#define HINIC3_MSI_CLR_INDIR_SET(val, member) \ + FIELD_PREP(HINIC3_MSI_CLR_INDIR_##member##_MASK, val) + +/* EQ registers */ +#define HINIC3_AEQ_INDIR_IDX_ADDR (HINIC3_CFG_REGS_FLAG + 0x210) +#define HINIC3_CEQ_INDIR_IDX_ADDR (HINIC3_CFG_REGS_FLAG + 0x290) + +#define HINIC3_EQ_INDIR_IDX_ADDR(type) \ + ((type == HINIC3_AEQ) ? HINIC3_AEQ_INDIR_IDX_ADDR : \ + HINIC3_CEQ_INDIR_IDX_ADDR) + +#define HINIC3_AEQ_MTT_OFF_BASE_ADDR (HINIC3_CFG_REGS_FLAG + 0x240) +#define HINIC3_CEQ_MTT_OFF_BASE_ADDR (HINIC3_CFG_REGS_FLAG + 0x2C0) + +#define HINIC3_CSR_EQ_PAGE_OFF_STRIDE 8 + +#define HINIC3_AEQ_HI_PHYS_ADDR_REG(pg_num) \ + (HINIC3_AEQ_MTT_OFF_BASE_ADDR + (pg_num) * \ + HINIC3_CSR_EQ_PAGE_OFF_STRIDE) + +#define HINIC3_AEQ_LO_PHYS_ADDR_REG(pg_num) \ + (HINIC3_AEQ_MTT_OFF_BASE_ADDR + (pg_num) * \ + HINIC3_CSR_EQ_PAGE_OFF_STRIDE + 4) + +#define HINIC3_CEQ_HI_PHYS_ADDR_REG(pg_num) \ + (HINIC3_CEQ_MTT_OFF_BASE_ADDR + (pg_num) * \ + HINIC3_CSR_EQ_PAGE_OFF_STRIDE) + +#define HINIC3_CEQ_LO_PHYS_ADDR_REG(pg_num) \ + (HINIC3_CEQ_MTT_OFF_BASE_ADDR + (pg_num) * \ + HINIC3_CSR_EQ_PAGE_OFF_STRIDE + 4) + +#define HINIC3_CSR_AEQ_CTRL_0_ADDR (HINIC3_CFG_REGS_FLAG + 0x200) +#define HINIC3_CSR_AEQ_CTRL_1_ADDR (HINIC3_CFG_REGS_FLAG + 0x204) +#define HINIC3_CSR_AEQ_PROD_IDX_ADDR (HINIC3_CFG_REGS_FLAG + 0x20C) +#define HINIC3_CSR_AEQ_CI_SIMPLE_INDIR_ADDR (HINIC3_CFG_REGS_FLAG + 0x50) + +#define HINIC3_CSR_CEQ_PROD_IDX_ADDR (HINIC3_CFG_REGS_FLAG + 0x28c) +#define HINIC3_CSR_CEQ_CI_SIMPLE_INDIR_ADDR (HINIC3_CFG_REGS_FLAG + 0x54) + +#endif diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.c b/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.c new file mode 100644 index 000000000000..01686472985b --- /dev/null +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.c @@ -0,0 +1,776 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + +#include <linux/delay.h> + +#include "hinic3_csr.h" +#include "hinic3_eqs.h" +#include "hinic3_hwdev.h" +#include "hinic3_hwif.h" +#include "hinic3_mbox.h" + +#define AEQ_CTRL_0_INTR_IDX_MASK GENMASK(9, 0) +#define AEQ_CTRL_0_DMA_ATTR_MASK GENMASK(17, 12) +#define AEQ_CTRL_0_PCI_INTF_IDX_MASK GENMASK(22, 20) +#define AEQ_CTRL_0_INTR_MODE_MASK BIT(31) +#define AEQ_CTRL_0_SET(val, member) \ + FIELD_PREP(AEQ_CTRL_0_##member##_MASK, val) + +#define AEQ_CTRL_1_LEN_MASK GENMASK(20, 0) +#define AEQ_CTRL_1_ELEM_SIZE_MASK GENMASK(25, 24) +#define AEQ_CTRL_1_PAGE_SIZE_MASK GENMASK(31, 28) +#define AEQ_CTRL_1_SET(val, member) \ + FIELD_PREP(AEQ_CTRL_1_##member##_MASK, val) + +#define CEQ_CTRL_0_INTR_IDX_MASK GENMASK(9, 0) +#define CEQ_CTRL_0_DMA_ATTR_MASK GENMASK(17, 12) +#define CEQ_CTRL_0_LIMIT_KICK_MASK GENMASK(23, 20) +#define CEQ_CTRL_0_PCI_INTF_IDX_MASK GENMASK(25, 24) +#define CEQ_CTRL_0_PAGE_SIZE_MASK GENMASK(30, 27) +#define CEQ_CTRL_0_INTR_MODE_MASK BIT(31) +#define CEQ_CTRL_0_SET(val, member) \ + FIELD_PREP(CEQ_CTRL_0_##member##_MASK, val) + +#define CEQ_CTRL_1_LEN_MASK GENMASK(19, 0) +#define CEQ_CTRL_1_SET(val, member) \ + FIELD_PREP(CEQ_CTRL_1_##member##_MASK, val) + +#define CEQE_TYPE_MASK GENMASK(25, 23) +#define CEQE_TYPE(type) \ + FIELD_GET(CEQE_TYPE_MASK, le32_to_cpu(type)) + +#define CEQE_DATA_MASK GENMASK(25, 0) +#define CEQE_DATA(data) ((data) & cpu_to_le32(CEQE_DATA_MASK)) + +#define EQ_ELEM_DESC_TYPE_MASK GENMASK(6, 0) +#define EQ_ELEM_DESC_SRC_MASK BIT(7) +#define EQ_ELEM_DESC_SIZE_MASK GENMASK(15, 8) +#define EQ_ELEM_DESC_WRAPPED_MASK BIT(31) +#define EQ_ELEM_DESC_GET(val, member) \ + FIELD_GET(EQ_ELEM_DESC_##member##_MASK, le32_to_cpu(val)) + +#define EQ_CI_SIMPLE_INDIR_CI_MASK GENMASK(20, 0) +#define EQ_CI_SIMPLE_INDIR_ARMED_MASK BIT(21) +#define EQ_CI_SIMPLE_INDIR_AEQ_IDX_MASK GENMASK(31, 30) +#define EQ_CI_SIMPLE_INDIR_CEQ_IDX_MASK GENMASK(31, 24) +#define EQ_CI_SIMPLE_INDIR_SET(val, member) \ + FIELD_PREP(EQ_CI_SIMPLE_INDIR_##member##_MASK, val) + +#define EQ_CI_SIMPLE_INDIR_REG_ADDR(eq) \ + (((eq)->type == HINIC3_AEQ) ? \ + HINIC3_CSR_AEQ_CI_SIMPLE_INDIR_ADDR : \ + HINIC3_CSR_CEQ_CI_SIMPLE_INDIR_ADDR) + +#define EQ_PROD_IDX_REG_ADDR(eq) \ + (((eq)->type == HINIC3_AEQ) ? \ + HINIC3_CSR_AEQ_PROD_IDX_ADDR : HINIC3_CSR_CEQ_PROD_IDX_ADDR) + +#define EQ_HI_PHYS_ADDR_REG(type, pg_num) \ + (((type) == HINIC3_AEQ) ? \ + HINIC3_AEQ_HI_PHYS_ADDR_REG(pg_num) : \ + HINIC3_CEQ_HI_PHYS_ADDR_REG(pg_num)) + +#define EQ_LO_PHYS_ADDR_REG(type, pg_num) \ + (((type) == HINIC3_AEQ) ? \ + HINIC3_AEQ_LO_PHYS_ADDR_REG(pg_num) : \ + HINIC3_CEQ_LO_PHYS_ADDR_REG(pg_num)) + +#define EQ_MSIX_RESEND_TIMER_CLEAR 1 + +#define HINIC3_EQ_MAX_PAGES(eq) \ + ((eq)->type == HINIC3_AEQ ? \ + HINIC3_AEQ_MAX_PAGES : HINIC3_CEQ_MAX_PAGES) + +#define HINIC3_TASK_PROCESS_EQE_LIMIT 1024 +#define HINIC3_EQ_UPDATE_CI_STEP 64 +#define HINIC3_EQS_WQ_NAME "hinic3_eqs" + +#define HINIC3_EQ_VALID_SHIFT 31 +#define HINIC3_EQ_WRAPPED(eq) \ + ((eq)->wrapped << HINIC3_EQ_VALID_SHIFT) + +#define HINIC3_EQ_WRAPPED_SHIFT 20 +#define HINIC3_EQ_CONS_IDX(eq) \ + ((eq)->cons_idx | ((eq)->wrapped << HINIC3_EQ_WRAPPED_SHIFT)) + +static const struct hinic3_aeq_elem *get_curr_aeq_elem(const struct hinic3_eq *eq) +{ + return get_q_element(&eq->qpages, eq->cons_idx, NULL); +} + +static const __be32 *get_curr_ceq_elem(const struct hinic3_eq *eq) +{ + return get_q_element(&eq->qpages, eq->cons_idx, NULL); +} + +int hinic3_aeq_register_cb(struct hinic3_hwdev *hwdev, + enum hinic3_aeq_type event, + hinic3_aeq_event_cb hwe_cb) +{ + struct hinic3_aeqs *aeqs; + + aeqs = hwdev->aeqs; + aeqs->aeq_cb[event] = hwe_cb; + spin_lock_init(&aeqs->aeq_lock); + + return 0; +} + +void hinic3_aeq_unregister_cb(struct hinic3_hwdev *hwdev, + enum hinic3_aeq_type event) +{ + struct hinic3_aeqs *aeqs; + + aeqs = hwdev->aeqs; + + spin_lock_bh(&aeqs->aeq_lock); + aeqs->aeq_cb[event] = NULL; + spin_unlock_bh(&aeqs->aeq_lock); +} + +int hinic3_ceq_register_cb(struct hinic3_hwdev *hwdev, + enum hinic3_ceq_event event, + hinic3_ceq_event_cb callback) +{ + struct hinic3_ceqs *ceqs; + + ceqs = hwdev->ceqs; + ceqs->ceq_cb[event] = callback; + spin_lock_init(&ceqs->ceq_lock); + + return 0; +} + +void hinic3_ceq_unregister_cb(struct hinic3_hwdev *hwdev, + enum hinic3_ceq_event event) +{ + struct hinic3_ceqs *ceqs; + + ceqs = hwdev->ceqs; + + spin_lock_bh(&ceqs->ceq_lock); + ceqs->ceq_cb[event] = NULL; + spin_unlock_bh(&ceqs->ceq_lock); +} + +/* Set consumer index in the hw. */ +static void set_eq_cons_idx(struct hinic3_eq *eq, u32 arm_state) +{ + u32 addr = EQ_CI_SIMPLE_INDIR_REG_ADDR(eq); + u32 eq_wrap_ci, val; + + eq_wrap_ci = HINIC3_EQ_CONS_IDX(eq); + val = EQ_CI_SIMPLE_INDIR_SET(arm_state, ARMED); + if (eq->type == HINIC3_AEQ) { + val = val | + EQ_CI_SIMPLE_INDIR_SET(eq_wrap_ci, CI) | + EQ_CI_SIMPLE_INDIR_SET(eq->q_id, AEQ_IDX); + } else { + val = val | + EQ_CI_SIMPLE_INDIR_SET(eq_wrap_ci, CI) | + EQ_CI_SIMPLE_INDIR_SET(eq->q_id, CEQ_IDX); + } + + hinic3_hwif_write_reg(eq->hwdev->hwif, addr, val); +} + +static struct hinic3_ceqs *ceq_to_ceqs(const struct hinic3_eq *eq) +{ + return container_of(eq, struct hinic3_ceqs, ceq[eq->q_id]); +} + +static void ceq_event_handler(struct hinic3_ceqs *ceqs, __le32 ceqe) +{ + enum hinic3_ceq_event event = CEQE_TYPE(ceqe); + struct hinic3_hwdev *hwdev = ceqs->hwdev; + __le32 ceqe_data = CEQE_DATA(ceqe); + + if (event >= HINIC3_MAX_CEQ_EVENTS) { + dev_warn(hwdev->dev, "Ceq unknown event:%d, ceqe data: 0x%x\n", + event, ceqe_data); + return; + } + + spin_lock_bh(&ceqs->ceq_lock); + if (ceqs->ceq_cb[event]) + ceqs->ceq_cb[event](hwdev, ceqe_data); + + spin_unlock_bh(&ceqs->ceq_lock); +} + +static struct hinic3_aeqs *aeq_to_aeqs(const struct hinic3_eq *eq) +{ + return container_of(eq, struct hinic3_aeqs, aeq[eq->q_id]); +} + +static void aeq_event_handler(struct hinic3_aeqs *aeqs, __le32 aeqe, + const struct hinic3_aeq_elem *aeqe_pos) +{ + struct hinic3_hwdev *hwdev = aeqs->hwdev; + u8 data[HINIC3_AEQE_DATA_SIZE], size; + enum hinic3_aeq_type event; + hinic3_aeq_event_cb hwe_cb; + + if (EQ_ELEM_DESC_GET(aeqe, SRC)) + return; + + event = EQ_ELEM_DESC_GET(aeqe, TYPE); + if (event >= HINIC3_MAX_AEQ_EVENTS) { + dev_warn(hwdev->dev, "Aeq unknown event:%d\n", event); + return; + } + + memcpy(data, aeqe_pos->aeqe_data, HINIC3_AEQE_DATA_SIZE); + swab32_array((u32 *)data, HINIC3_AEQE_DATA_SIZE / sizeof(u32)); + size = EQ_ELEM_DESC_GET(aeqe, SIZE); + + spin_lock_bh(&aeqs->aeq_lock); + hwe_cb = aeqs->aeq_cb[event]; + if (hwe_cb) + hwe_cb(aeqs->hwdev, data, size); + spin_unlock_bh(&aeqs->aeq_lock); +} + +static int aeq_irq_handler(struct hinic3_eq *eq) +{ + const struct hinic3_aeq_elem *aeqe_pos; + struct hinic3_aeqs *aeqs; + u32 i, eqe_cnt = 0; + __le32 aeqe; + + aeqs = aeq_to_aeqs(eq); + for (i = 0; i < HINIC3_TASK_PROCESS_EQE_LIMIT; i++) { + aeqe_pos = get_curr_aeq_elem(eq); + aeqe = (__force __le32)swab32((__force __u32)aeqe_pos->desc); + /* HW updates wrapped bit, when it adds eq element event */ + if (EQ_ELEM_DESC_GET(aeqe, WRAPPED) == eq->wrapped) + return 0; + + /* Prevent speculative reads from element */ + dma_rmb(); + aeq_event_handler(aeqs, aeqe, aeqe_pos); + eq->cons_idx++; + if (eq->cons_idx == eq->eq_len) { + eq->cons_idx = 0; + eq->wrapped = !eq->wrapped; + } + + if (++eqe_cnt >= HINIC3_EQ_UPDATE_CI_STEP) { + eqe_cnt = 0; + set_eq_cons_idx(eq, HINIC3_EQ_NOT_ARMED); + } + } + + return -EAGAIN; +} + +static int ceq_irq_handler(struct hinic3_eq *eq) +{ + struct hinic3_ceqs *ceqs; + u32 eqe_cnt = 0; + __be32 ceqe_raw; + __le32 ceqe; + u32 i; + + ceqs = ceq_to_ceqs(eq); + for (i = 0; i < HINIC3_TASK_PROCESS_EQE_LIMIT; i++) { + ceqe_raw = *get_curr_ceq_elem(eq); + ceqe = (__force __le32)swab32((__force __u32)ceqe_raw); + + /* HW updates wrapped bit, when it adds eq element event */ + if (EQ_ELEM_DESC_GET(ceqe, WRAPPED) == eq->wrapped) + return 0; + + ceq_event_handler(ceqs, ceqe); + eq->cons_idx++; + if (eq->cons_idx == eq->eq_len) { + eq->cons_idx = 0; + eq->wrapped = !eq->wrapped; + } + + if (++eqe_cnt >= HINIC3_EQ_UPDATE_CI_STEP) { + eqe_cnt = 0; + set_eq_cons_idx(eq, HINIC3_EQ_NOT_ARMED); + } + } + + return -EAGAIN; +} + +static void reschedule_aeq_handler(struct hinic3_eq *eq) +{ + struct hinic3_aeqs *aeqs = aeq_to_aeqs(eq); + + queue_work(aeqs->workq, &eq->aeq_work); +} + +static int eq_irq_handler(struct hinic3_eq *eq) +{ + int err; + + if (eq->type == HINIC3_AEQ) + err = aeq_irq_handler(eq); + else + err = ceq_irq_handler(eq); + + set_eq_cons_idx(eq, err ? HINIC3_EQ_NOT_ARMED : + HINIC3_EQ_ARMED); + + return err; +} + +static void aeq_irq_work(struct work_struct *work) +{ + struct hinic3_eq *eq = container_of(work, struct hinic3_eq, aeq_work); + int err; + + err = eq_irq_handler(eq); + if (err) + reschedule_aeq_handler(eq); +} + +static irqreturn_t aeq_interrupt(int irq, void *data) +{ + struct workqueue_struct *workq; + struct hinic3_eq *aeq = data; + struct hinic3_hwdev *hwdev; + struct hinic3_aeqs *aeqs; + + aeqs = aeq_to_aeqs(aeq); + hwdev = aeq->hwdev; + + /* clear resend timer cnt register */ + workq = aeqs->workq; + hinic3_msix_intr_clear_resend_bit(hwdev, aeq->msix_entry_idx, + EQ_MSIX_RESEND_TIMER_CLEAR); + queue_work(workq, &aeq->aeq_work); + + return IRQ_HANDLED; +} + +static irqreturn_t ceq_interrupt(int irq, void *data) +{ + struct hinic3_eq *ceq = data; + int err; + + /* clear resend timer counters */ + hinic3_msix_intr_clear_resend_bit(ceq->hwdev, ceq->msix_entry_idx, + EQ_MSIX_RESEND_TIMER_CLEAR); + err = eq_irq_handler(ceq); + if (err) + return IRQ_NONE; + + return IRQ_HANDLED; +} + +static int hinic3_set_ceq_ctrl_reg(struct hinic3_hwdev *hwdev, u16 q_id, + u32 ctrl0, u32 ctrl1) +{ + struct comm_cmd_set_ceq_ctrl_reg ceq_ctrl = {}; + struct mgmt_msg_params msg_params = {}; + int err; + + ceq_ctrl.func_id = hinic3_global_func_id(hwdev); + ceq_ctrl.q_id = q_id; + ceq_ctrl.ctrl0 = ctrl0; + ceq_ctrl.ctrl1 = ctrl1; + + mgmt_msg_params_init_default(&msg_params, &ceq_ctrl, sizeof(ceq_ctrl)); + + err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_COMM, + COMM_CMD_SET_CEQ_CTRL_REG, &msg_params); + if (err || ceq_ctrl.head.status) { + dev_err(hwdev->dev, "Failed to set ceq %u ctrl reg, err: %d status: 0x%x\n", + q_id, err, ceq_ctrl.head.status); + return -EFAULT; + } + + return 0; +} + +static int set_eq_ctrls(struct hinic3_eq *eq) +{ + struct hinic3_hwif *hwif = eq->hwdev->hwif; + struct hinic3_queue_pages *qpages; + u8 pci_intf_idx, elem_size; + u32 mask, ctrl0, ctrl1; + u32 page_size_val; + int err; + + qpages = &eq->qpages; + page_size_val = ilog2(qpages->page_size / HINIC3_MIN_PAGE_SIZE); + pci_intf_idx = hwif->attr.pci_intf_idx; + + if (eq->type == HINIC3_AEQ) { + /* set ctrl0 using read-modify-write */ + mask = AEQ_CTRL_0_INTR_IDX_MASK | + AEQ_CTRL_0_DMA_ATTR_MASK | + AEQ_CTRL_0_PCI_INTF_IDX_MASK | + AEQ_CTRL_0_INTR_MODE_MASK; + ctrl0 = hinic3_hwif_read_reg(hwif, HINIC3_CSR_AEQ_CTRL_0_ADDR); + ctrl0 = (ctrl0 & ~mask) | + AEQ_CTRL_0_SET(eq->msix_entry_idx, INTR_IDX) | + AEQ_CTRL_0_SET(0, DMA_ATTR) | + AEQ_CTRL_0_SET(pci_intf_idx, PCI_INTF_IDX) | + AEQ_CTRL_0_SET(HINIC3_INTR_MODE_ARMED, INTR_MODE); + hinic3_hwif_write_reg(hwif, HINIC3_CSR_AEQ_CTRL_0_ADDR, ctrl0); + + /* HW expects log2(number of 32 byte units). */ + elem_size = qpages->elem_size_shift - 5; + ctrl1 = AEQ_CTRL_1_SET(eq->eq_len, LEN) | + AEQ_CTRL_1_SET(elem_size, ELEM_SIZE) | + AEQ_CTRL_1_SET(page_size_val, PAGE_SIZE); + hinic3_hwif_write_reg(hwif, HINIC3_CSR_AEQ_CTRL_1_ADDR, ctrl1); + } else { + ctrl0 = CEQ_CTRL_0_SET(eq->msix_entry_idx, INTR_IDX) | + CEQ_CTRL_0_SET(0, DMA_ATTR) | + CEQ_CTRL_0_SET(0, LIMIT_KICK) | + CEQ_CTRL_0_SET(pci_intf_idx, PCI_INTF_IDX) | + CEQ_CTRL_0_SET(page_size_val, PAGE_SIZE) | + CEQ_CTRL_0_SET(HINIC3_INTR_MODE_ARMED, INTR_MODE); + + ctrl1 = CEQ_CTRL_1_SET(eq->eq_len, LEN); + + /* set ceq ctrl reg through mgmt cpu */ + err = hinic3_set_ceq_ctrl_reg(eq->hwdev, eq->q_id, ctrl0, + ctrl1); + if (err) + return err; + } + + return 0; +} + +static void ceq_elements_init(struct hinic3_eq *eq, u32 init_val) +{ + __be32 *ceqe; + u32 i; + + for (i = 0; i < eq->eq_len; i++) { + ceqe = get_q_element(&eq->qpages, i, NULL); + *ceqe = cpu_to_be32(init_val); + } + + wmb(); /* Clear ceq elements bit */ +} + +static void aeq_elements_init(struct hinic3_eq *eq, u32 init_val) +{ + struct hinic3_aeq_elem *aeqe; + u32 i; + + for (i = 0; i < eq->eq_len; i++) { + aeqe = get_q_element(&eq->qpages, i, NULL); + aeqe->desc = cpu_to_be32(init_val); + } + + wmb(); /* Clear aeq elements bit */ +} + +static void eq_elements_init(struct hinic3_eq *eq, u32 init_val) +{ + if (eq->type == HINIC3_AEQ) + aeq_elements_init(eq, init_val); + else + ceq_elements_init(eq, init_val); +} + +static int alloc_eq_pages(struct hinic3_eq *eq) +{ + struct hinic3_hwif *hwif = eq->hwdev->hwif; + struct hinic3_queue_pages *qpages; + dma_addr_t page_paddr; + u32 reg, init_val; + u16 pg_idx; + int err; + + qpages = &eq->qpages; + err = hinic3_queue_pages_alloc(eq->hwdev, qpages, HINIC3_MIN_PAGE_SIZE); + if (err) + return err; + + for (pg_idx = 0; pg_idx < qpages->num_pages; pg_idx++) { + page_paddr = qpages->pages[pg_idx].align_paddr; + reg = EQ_HI_PHYS_ADDR_REG(eq->type, pg_idx); + hinic3_hwif_write_reg(hwif, reg, upper_32_bits(page_paddr)); + reg = EQ_LO_PHYS_ADDR_REG(eq->type, pg_idx); + hinic3_hwif_write_reg(hwif, reg, lower_32_bits(page_paddr)); + } + + init_val = HINIC3_EQ_WRAPPED(eq); + eq_elements_init(eq, init_val); + + return 0; +} + +static void eq_calc_page_size_and_num(struct hinic3_eq *eq, u32 elem_size) +{ + u32 max_pages, min_page_size, page_size, total_size; + + /* No need for complicated arithmetic. All values must be power of 2. + * Multiplications give power of 2 and divisions give power of 2 without + * remainder. + */ + max_pages = HINIC3_EQ_MAX_PAGES(eq); + min_page_size = HINIC3_MIN_PAGE_SIZE; + total_size = eq->eq_len * elem_size; + + if (total_size <= max_pages * min_page_size) + page_size = min_page_size; + else + page_size = total_size / max_pages; + + hinic3_queue_pages_init(&eq->qpages, eq->eq_len, page_size, elem_size); +} + +static int request_eq_irq(struct hinic3_eq *eq) +{ + int err; + + if (eq->type == HINIC3_AEQ) { + INIT_WORK(&eq->aeq_work, aeq_irq_work); + snprintf(eq->irq_name, sizeof(eq->irq_name), + "hinic3_aeq%u@pci:%s", eq->q_id, + pci_name(eq->hwdev->pdev)); + err = request_irq(eq->irq_id, aeq_interrupt, 0, + eq->irq_name, eq); + } else { + snprintf(eq->irq_name, sizeof(eq->irq_name), + "hinic3_ceq%u@pci:%s", eq->q_id, + pci_name(eq->hwdev->pdev)); + err = request_threaded_irq(eq->irq_id, NULL, ceq_interrupt, + IRQF_ONESHOT, eq->irq_name, eq); + } + + return err; +} + +static void reset_eq(struct hinic3_eq *eq) +{ + /* clear eq_len to force eqe drop in hardware */ + if (eq->type == HINIC3_AEQ) + hinic3_hwif_write_reg(eq->hwdev->hwif, + HINIC3_CSR_AEQ_CTRL_1_ADDR, 0); + else + hinic3_set_ceq_ctrl_reg(eq->hwdev, eq->q_id, 0, 0); + + hinic3_hwif_write_reg(eq->hwdev->hwif, EQ_PROD_IDX_REG_ADDR(eq), 0); +} + +static int init_eq(struct hinic3_eq *eq, struct hinic3_hwdev *hwdev, u16 q_id, + u32 q_len, enum hinic3_eq_type type, + struct msix_entry *msix_entry) +{ + u32 elem_size; + int err; + + eq->hwdev = hwdev; + eq->q_id = q_id; + eq->type = type; + eq->eq_len = q_len; + + /* Indirect access should set q_id first */ + hinic3_hwif_write_reg(hwdev->hwif, HINIC3_EQ_INDIR_IDX_ADDR(eq->type), + eq->q_id); + + reset_eq(eq); + + eq->cons_idx = 0; + eq->wrapped = 0; + + elem_size = (type == HINIC3_AEQ) ? HINIC3_AEQE_SIZE : HINIC3_CEQE_SIZE; + eq_calc_page_size_and_num(eq, elem_size); + + err = alloc_eq_pages(eq); + if (err) { + dev_err(hwdev->dev, "Failed to allocate pages for eq\n"); + return err; + } + + eq->msix_entry_idx = msix_entry->entry; + eq->irq_id = msix_entry->vector; + + err = set_eq_ctrls(eq); + if (err) { + dev_err(hwdev->dev, "Failed to set ctrls for eq\n"); + goto err_free_queue_pages; + } + + set_eq_cons_idx(eq, HINIC3_EQ_ARMED); + + err = request_eq_irq(eq); + if (err) { + dev_err(hwdev->dev, + "Failed to request irq for the eq, err: %d\n", err); + goto err_free_queue_pages; + } + + hinic3_set_msix_state(hwdev, eq->msix_entry_idx, HINIC3_MSIX_DISABLE); + + return 0; + +err_free_queue_pages: + hinic3_queue_pages_free(hwdev, &eq->qpages); + + return err; +} + +static void remove_eq(struct hinic3_eq *eq) +{ + hinic3_set_msix_state(eq->hwdev, eq->msix_entry_idx, + HINIC3_MSIX_DISABLE); + free_irq(eq->irq_id, eq); + /* Indirect access should set q_id first */ + hinic3_hwif_write_reg(eq->hwdev->hwif, + HINIC3_EQ_INDIR_IDX_ADDR(eq->type), + eq->q_id); + + if (eq->type == HINIC3_AEQ) { + disable_work_sync(&eq->aeq_work); + /* clear eq_len to avoid hw access host memory */ + hinic3_hwif_write_reg(eq->hwdev->hwif, + HINIC3_CSR_AEQ_CTRL_1_ADDR, 0); + } else { + hinic3_set_ceq_ctrl_reg(eq->hwdev, eq->q_id, 0, 0); + } + + /* update consumer index to avoid invalid interrupt */ + eq->cons_idx = hinic3_hwif_read_reg(eq->hwdev->hwif, + EQ_PROD_IDX_REG_ADDR(eq)); + set_eq_cons_idx(eq, HINIC3_EQ_NOT_ARMED); + hinic3_queue_pages_free(eq->hwdev, &eq->qpages); +} + +int hinic3_aeqs_init(struct hinic3_hwdev *hwdev, u16 num_aeqs, + struct msix_entry *msix_entries) +{ + struct hinic3_aeqs *aeqs; + u16 q_id; + int err; + + aeqs = kzalloc(sizeof(*aeqs), GFP_KERNEL); + if (!aeqs) + return -ENOMEM; + + hwdev->aeqs = aeqs; + aeqs->hwdev = hwdev; + aeqs->num_aeqs = num_aeqs; + aeqs->workq = alloc_workqueue(HINIC3_EQS_WQ_NAME, WQ_MEM_RECLAIM, + HINIC3_MAX_AEQS); + if (!aeqs->workq) { + dev_err(hwdev->dev, "Failed to initialize aeq workqueue\n"); + err = -ENOMEM; + goto err_free_aeqs; + } + + for (q_id = 0; q_id < num_aeqs; q_id++) { + err = init_eq(&aeqs->aeq[q_id], hwdev, q_id, + HINIC3_DEFAULT_AEQ_LEN, HINIC3_AEQ, + &msix_entries[q_id]); + if (err) { + dev_err(hwdev->dev, "Failed to init aeq %u\n", + q_id); + goto err_remove_eqs; + } + } + for (q_id = 0; q_id < num_aeqs; q_id++) + hinic3_set_msix_state(hwdev, aeqs->aeq[q_id].msix_entry_idx, + HINIC3_MSIX_ENABLE); + + return 0; + +err_remove_eqs: + while (q_id > 0) { + q_id--; + remove_eq(&aeqs->aeq[q_id]); + } + + destroy_workqueue(aeqs->workq); + +err_free_aeqs: + kfree(aeqs); + + return err; +} + +void hinic3_aeqs_free(struct hinic3_hwdev *hwdev) +{ + struct hinic3_aeqs *aeqs = hwdev->aeqs; + enum hinic3_aeq_type aeq_event; + struct hinic3_eq *eq; + u16 q_id; + + for (q_id = 0; q_id < aeqs->num_aeqs; q_id++) { + eq = aeqs->aeq + q_id; + remove_eq(eq); + hinic3_free_irq(hwdev, eq->irq_id); + } + + for (aeq_event = 0; aeq_event < HINIC3_MAX_AEQ_EVENTS; aeq_event++) + hinic3_aeq_unregister_cb(hwdev, aeq_event); + + destroy_workqueue(aeqs->workq); + + kfree(aeqs); +} + +int hinic3_ceqs_init(struct hinic3_hwdev *hwdev, u16 num_ceqs, + struct msix_entry *msix_entries) +{ + struct hinic3_ceqs *ceqs; + u16 q_id; + int err; + + ceqs = kzalloc(sizeof(*ceqs), GFP_KERNEL); + if (!ceqs) + return -ENOMEM; + + hwdev->ceqs = ceqs; + ceqs->hwdev = hwdev; + ceqs->num_ceqs = num_ceqs; + + for (q_id = 0; q_id < num_ceqs; q_id++) { + err = init_eq(&ceqs->ceq[q_id], hwdev, q_id, + HINIC3_DEFAULT_CEQ_LEN, HINIC3_CEQ, + &msix_entries[q_id]); + if (err) { + dev_err(hwdev->dev, "Failed to init ceq %u\n", + q_id); + goto err_free_ceqs; + } + } + for (q_id = 0; q_id < num_ceqs; q_id++) + hinic3_set_msix_state(hwdev, ceqs->ceq[q_id].msix_entry_idx, + HINIC3_MSIX_ENABLE); + + return 0; + +err_free_ceqs: + while (q_id > 0) { + q_id--; + remove_eq(&ceqs->ceq[q_id]); + } + + kfree(ceqs); + + return err; +} + +void hinic3_ceqs_free(struct hinic3_hwdev *hwdev) +{ + struct hinic3_ceqs *ceqs = hwdev->ceqs; + enum hinic3_ceq_event ceq_event; + struct hinic3_eq *eq; + u16 q_id; + + for (q_id = 0; q_id < ceqs->num_ceqs; q_id++) { + eq = ceqs->ceq + q_id; + remove_eq(eq); + hinic3_free_irq(hwdev, eq->irq_id); + } + + for (ceq_event = 0; ceq_event < HINIC3_MAX_CEQ_EVENTS; ceq_event++) + hinic3_ceq_unregister_cb(hwdev, ceq_event); + + kfree(ceqs); +} diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.h b/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.h new file mode 100644 index 000000000000..005a6e0745b3 --- /dev/null +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_eqs.h @@ -0,0 +1,122 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. */ + +#ifndef _HINIC3_EQS_H_ +#define _HINIC3_EQS_H_ + +#include <linux/interrupt.h> + +#include "hinic3_hw_cfg.h" +#include "hinic3_queue_common.h" + +#define HINIC3_MAX_AEQS 4 +#define HINIC3_MAX_CEQS 32 + +#define HINIC3_AEQ_MAX_PAGES 4 +#define HINIC3_CEQ_MAX_PAGES 8 + +#define HINIC3_AEQE_SIZE 64 +#define HINIC3_CEQE_SIZE 4 + +#define HINIC3_AEQE_DESC_SIZE 4 +#define HINIC3_AEQE_DATA_SIZE (HINIC3_AEQE_SIZE - HINIC3_AEQE_DESC_SIZE) + +#define HINIC3_DEFAULT_AEQ_LEN 0x10000 +#define HINIC3_DEFAULT_CEQ_LEN 0x10000 + +#define HINIC3_EQ_IRQ_NAME_LEN 64 + +#define HINIC3_EQ_USLEEP_LOW_BOUND 900 +#define HINIC3_EQ_USLEEP_HIGH_BOUND 1000 + +enum hinic3_eq_type { + HINIC3_AEQ = 0, + HINIC3_CEQ = 1, +}; + +enum hinic3_eq_intr_mode { + HINIC3_INTR_MODE_ARMED = 0, + HINIC3_INTR_MODE_ALWAYS = 1, +}; + +enum hinic3_eq_ci_arm_state { + HINIC3_EQ_NOT_ARMED = 0, + HINIC3_EQ_ARMED = 1, +}; + +struct hinic3_eq { + struct hinic3_hwdev *hwdev; + struct hinic3_queue_pages qpages; + u16 q_id; + enum hinic3_eq_type type; + u32 eq_len; + u32 cons_idx; + u8 wrapped; + u32 irq_id; + u16 msix_entry_idx; + char irq_name[HINIC3_EQ_IRQ_NAME_LEN]; + struct work_struct aeq_work; +}; + +struct hinic3_aeq_elem { + u8 aeqe_data[HINIC3_AEQE_DATA_SIZE]; + __be32 desc; +}; + +enum hinic3_aeq_type { + HINIC3_HW_INTER_INT = 0, + HINIC3_MBX_FROM_FUNC = 1, + HINIC3_MSG_FROM_FW = 2, + HINIC3_MAX_AEQ_EVENTS = 6, +}; + +typedef void (*hinic3_aeq_event_cb)(struct hinic3_hwdev *hwdev, u8 *data, + u8 size); + +struct hinic3_aeqs { + struct hinic3_hwdev *hwdev; + hinic3_aeq_event_cb aeq_cb[HINIC3_MAX_AEQ_EVENTS]; + struct hinic3_eq aeq[HINIC3_MAX_AEQS]; + u16 num_aeqs; + struct workqueue_struct *workq; + /* lock for aeq event flag */ + spinlock_t aeq_lock; +}; + +enum hinic3_ceq_event { + HINIC3_CMDQ = 3, + HINIC3_MAX_CEQ_EVENTS = 6, +}; + +typedef void (*hinic3_ceq_event_cb)(struct hinic3_hwdev *hwdev, + __le32 ceqe_data); + +struct hinic3_ceqs { + struct hinic3_hwdev *hwdev; + + hinic3_ceq_event_cb ceq_cb[HINIC3_MAX_CEQ_EVENTS]; + + struct hinic3_eq ceq[HINIC3_MAX_CEQS]; + u16 num_ceqs; + /* lock for ceq event flag */ + spinlock_t ceq_lock; +}; + +int hinic3_aeqs_init(struct hinic3_hwdev *hwdev, u16 num_aeqs, + struct msix_entry *msix_entries); +void hinic3_aeqs_free(struct hinic3_hwdev *hwdev); +int hinic3_aeq_register_cb(struct hinic3_hwdev *hwdev, + enum hinic3_aeq_type event, + hinic3_aeq_event_cb hwe_cb); +void hinic3_aeq_unregister_cb(struct hinic3_hwdev *hwdev, + enum hinic3_aeq_type event); +int hinic3_ceqs_init(struct hinic3_hwdev *hwdev, u16 num_ceqs, + struct msix_entry *msix_entries); +void hinic3_ceqs_free(struct hinic3_hwdev *hwdev); +int hinic3_ceq_register_cb(struct hinic3_hwdev *hwdev, + enum hinic3_ceq_event event, + hinic3_ceq_event_cb callback); +void hinic3_ceq_unregister_cb(struct hinic3_hwdev *hwdev, + enum hinic3_ceq_event event); + +#endif diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.c b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.c index 87d9450c30ca..7827c1f626db 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.c @@ -8,6 +8,217 @@ #include "hinic3_hwif.h" #include "hinic3_mbox.h" +#define HINIC3_CFG_MAX_QP 256 + +static void hinic3_parse_pub_res_cap(struct hinic3_hwdev *hwdev, + struct hinic3_dev_cap *cap, + const struct cfg_cmd_dev_cap *dev_cap, + enum hinic3_func_type type) +{ + cap->port_id = dev_cap->port_id; + cap->supp_svcs_bitmap = dev_cap->svc_cap_en; +} + +static void hinic3_parse_l2nic_res_cap(struct hinic3_hwdev *hwdev, + struct hinic3_dev_cap *cap, + const struct cfg_cmd_dev_cap *dev_cap, + enum hinic3_func_type type) +{ + struct hinic3_nic_service_cap *nic_svc_cap = &cap->nic_svc_cap; + + nic_svc_cap->max_sqs = min(dev_cap->nic_max_sq_id + 1, + HINIC3_CFG_MAX_QP); +} + +static void hinic3_parse_dev_cap(struct hinic3_hwdev *hwdev, + const struct cfg_cmd_dev_cap *dev_cap, + enum hinic3_func_type type) +{ + struct hinic3_dev_cap *cap = &hwdev->cfg_mgmt->cap; + + /* Public resource */ + hinic3_parse_pub_res_cap(hwdev, cap, dev_cap, type); + + /* L2 NIC resource */ + if (hinic3_support_nic(hwdev)) + hinic3_parse_l2nic_res_cap(hwdev, cap, dev_cap, type); +} + +static int get_cap_from_fw(struct hinic3_hwdev *hwdev, + enum hinic3_func_type type) +{ + struct mgmt_msg_params msg_params = {}; + struct cfg_cmd_dev_cap dev_cap = {}; + int err; + + dev_cap.func_id = hinic3_global_func_id(hwdev); + + mgmt_msg_params_init_default(&msg_params, &dev_cap, sizeof(dev_cap)); + + err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_CFGM, + CFG_CMD_GET_DEV_CAP, &msg_params); + if (err || dev_cap.head.status) { + dev_err(hwdev->dev, + "Failed to get capability from FW, err: %d, status: 0x%x\n", + err, dev_cap.head.status); + return -EIO; + } + + hinic3_parse_dev_cap(hwdev, &dev_cap, type); + + return 0; +} + +static int hinic3_init_irq_info(struct hinic3_hwdev *hwdev) +{ + struct hinic3_cfg_mgmt_info *cfg_mgmt = hwdev->cfg_mgmt; + struct hinic3_hwif *hwif = hwdev->hwif; + u16 intr_num = hwif->attr.num_irqs; + struct hinic3_irq_info *irq_info; + u16 intr_needed; + + intr_needed = hwif->attr.msix_flex_en ? (hwif->attr.num_aeqs + + hwif->attr.num_ceqs + hwif->attr.num_sq) : intr_num; + if (intr_needed > intr_num) { + dev_warn(hwdev->dev, "Irq num cfg %d is less than the needed irq num %d msix_flex_en %d\n", + intr_num, intr_needed, hwdev->hwif->attr.msix_flex_en); + intr_needed = intr_num; + } + + irq_info = &cfg_mgmt->irq_info; + irq_info->irq = kcalloc(intr_num, sizeof(struct hinic3_irq), + GFP_KERNEL); + if (!irq_info->irq) + return -ENOMEM; + + irq_info->num_irq_hw = intr_needed; + mutex_init(&irq_info->irq_mutex); + + return 0; +} + +static int hinic3_init_irq_alloc_info(struct hinic3_hwdev *hwdev) +{ + struct hinic3_cfg_mgmt_info *cfg_mgmt = hwdev->cfg_mgmt; + struct hinic3_irq *irq = cfg_mgmt->irq_info.irq; + u16 nreq = cfg_mgmt->irq_info.num_irq_hw; + struct pci_dev *pdev = hwdev->pdev; + int actual_irq; + u16 i; + + actual_irq = pci_alloc_irq_vectors(pdev, 2, nreq, PCI_IRQ_MSIX); + if (actual_irq < 0) { + dev_err(hwdev->dev, "Alloc msix entries with threshold 2 failed. actual_irq: %d\n", + actual_irq); + return -ENOMEM; + } + + nreq = actual_irq; + cfg_mgmt->irq_info.num_irq = nreq; + + for (i = 0; i < nreq; ++i) { + irq[i].msix_entry_idx = i; + irq[i].irq_id = pci_irq_vector(pdev, i); + irq[i].allocated = false; + } + + return 0; +} + +int hinic3_init_cfg_mgmt(struct hinic3_hwdev *hwdev) +{ + struct hinic3_cfg_mgmt_info *cfg_mgmt; + int err; + + cfg_mgmt = kzalloc(sizeof(*cfg_mgmt), GFP_KERNEL); + if (!cfg_mgmt) + return -ENOMEM; + + hwdev->cfg_mgmt = cfg_mgmt; + + err = hinic3_init_irq_info(hwdev); + if (err) { + dev_err(hwdev->dev, "Failed to init hinic3_irq_mgmt_info, err: %d\n", + err); + goto err_free_cfg_mgmt; + } + + err = hinic3_init_irq_alloc_info(hwdev); + if (err) { + dev_err(hwdev->dev, "Failed to init hinic3_irq_info, err: %d\n", + err); + goto err_free_irq_info; + } + + return 0; + +err_free_irq_info: + kfree(cfg_mgmt->irq_info.irq); + cfg_mgmt->irq_info.irq = NULL; +err_free_cfg_mgmt: + kfree(cfg_mgmt); + + return err; +} + +void hinic3_free_cfg_mgmt(struct hinic3_hwdev *hwdev) +{ + struct hinic3_cfg_mgmt_info *cfg_mgmt = hwdev->cfg_mgmt; + + pci_free_irq_vectors(hwdev->pdev); + kfree(cfg_mgmt->irq_info.irq); + cfg_mgmt->irq_info.irq = NULL; + kfree(cfg_mgmt); +} + +int hinic3_alloc_irqs(struct hinic3_hwdev *hwdev, u16 num, + struct msix_entry *alloc_arr, u16 *act_num) +{ + struct hinic3_irq_info *irq_info; + struct hinic3_irq *curr; + u16 i, found = 0; + + irq_info = &hwdev->cfg_mgmt->irq_info; + mutex_lock(&irq_info->irq_mutex); + for (i = 0; i < irq_info->num_irq && found < num; i++) { + curr = irq_info->irq + i; + if (curr->allocated) + continue; + curr->allocated = true; + alloc_arr[found].vector = curr->irq_id; + alloc_arr[found].entry = curr->msix_entry_idx; + found++; + } + mutex_unlock(&irq_info->irq_mutex); + + *act_num = found; + + return found == 0 ? -ENOMEM : 0; +} + +void hinic3_free_irq(struct hinic3_hwdev *hwdev, u32 irq_id) +{ + struct hinic3_irq_info *irq_info; + struct hinic3_irq *curr; + u16 i; + + irq_info = &hwdev->cfg_mgmt->irq_info; + mutex_lock(&irq_info->irq_mutex); + for (i = 0; i < irq_info->num_irq; i++) { + curr = irq_info->irq + i; + if (curr->irq_id == irq_id) { + curr->allocated = false; + break; + } + } + mutex_unlock(&irq_info->irq_mutex); +} + +int hinic3_init_capability(struct hinic3_hwdev *hwdev) +{ + return get_cap_from_fw(hwdev, HINIC3_FUNC_TYPE_VF); +} + bool hinic3_support_nic(struct hinic3_hwdev *hwdev) { return hwdev->cfg_mgmt->cap.supp_svcs_bitmap & diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.h b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.h index e017b1ae9f05..58806199bf54 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_cfg.h @@ -42,10 +42,14 @@ struct hinic3_cfg_mgmt_info { struct hinic3_dev_cap cap; }; +int hinic3_init_cfg_mgmt(struct hinic3_hwdev *hwdev); +void hinic3_free_cfg_mgmt(struct hinic3_hwdev *hwdev); + int hinic3_alloc_irqs(struct hinic3_hwdev *hwdev, u16 num, struct msix_entry *alloc_arr, u16 *act_num); void hinic3_free_irq(struct hinic3_hwdev *hwdev, u32 irq_id); +int hinic3_init_capability(struct hinic3_hwdev *hwdev); bool hinic3_support_nic(struct hinic3_hwdev *hwdev); u16 hinic3_func_max_qnum(struct hinic3_hwdev *hwdev); u8 hinic3_physical_port_id(struct hinic3_hwdev *hwdev); diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.c b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.c index 434696ce7dc2..89638813df40 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.c @@ -3,11 +3,43 @@ #include <linux/delay.h> +#include "hinic3_cmdq.h" #include "hinic3_hw_comm.h" #include "hinic3_hwdev.h" #include "hinic3_hwif.h" #include "hinic3_mbox.h" +int hinic3_set_interrupt_cfg_direct(struct hinic3_hwdev *hwdev, + const struct hinic3_interrupt_info *info) +{ + struct comm_cmd_cfg_msix_ctrl_reg msix_cfg = {}; + struct mgmt_msg_params msg_params = {}; + int err; + + msix_cfg.func_id = hinic3_global_func_id(hwdev); + msix_cfg.msix_index = info->msix_index; + msix_cfg.opcode = MGMT_MSG_CMD_OP_SET; + + msix_cfg.lli_credit_cnt = info->lli_credit_limit; + msix_cfg.lli_timer_cnt = info->lli_timer_cfg; + msix_cfg.pending_cnt = info->pending_limit; + msix_cfg.coalesce_timer_cnt = info->coalesc_timer_cfg; + msix_cfg.resend_timer_cnt = info->resend_timer_cfg; + + mgmt_msg_params_init_default(&msg_params, &msix_cfg, sizeof(msix_cfg)); + + err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_COMM, + COMM_CMD_CFG_MSIX_CTRL_REG, &msg_params); + if (err || msix_cfg.head.status) { + dev_err(hwdev->dev, + "Failed to set interrupt config, err: %d, status: 0x%x\n", + err, msix_cfg.head.status); + return -EINVAL; + } + + return 0; +} + int hinic3_func_reset(struct hinic3_hwdev *hwdev, u16 func_id, u64 reset_flag) { struct comm_cmd_func_reset func_reset = {}; @@ -30,3 +62,365 @@ int hinic3_func_reset(struct hinic3_hwdev *hwdev, u16 func_id, u64 reset_flag) return 0; } + +static int hinic3_comm_features_nego(struct hinic3_hwdev *hwdev, u8 opcode, + u64 *s_feature, u16 size) +{ + struct comm_cmd_feature_nego feature_nego = {}; + struct mgmt_msg_params msg_params = {}; + int err; + + feature_nego.func_id = hinic3_global_func_id(hwdev); + feature_nego.opcode = opcode; + if (opcode == MGMT_MSG_CMD_OP_SET) + memcpy(feature_nego.s_feature, s_feature, + array_size(size, sizeof(u64))); + + mgmt_msg_params_init_default(&msg_params, &feature_nego, + sizeof(feature_nego)); + + err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_COMM, + COMM_CMD_FEATURE_NEGO, &msg_params); + if (err || feature_nego.head.status) { + dev_err(hwdev->dev, "Failed to negotiate feature, err: %d, status: 0x%x\n", + err, feature_nego.head.status); + return -EINVAL; + } + + if (opcode == MGMT_MSG_CMD_OP_GET) + memcpy(s_feature, feature_nego.s_feature, + array_size(size, sizeof(u64))); + + return 0; +} + +int hinic3_get_comm_features(struct hinic3_hwdev *hwdev, u64 *s_feature, + u16 size) +{ + return hinic3_comm_features_nego(hwdev, MGMT_MSG_CMD_OP_GET, s_feature, + size); +} + +int hinic3_set_comm_features(struct hinic3_hwdev *hwdev, u64 *s_feature, + u16 size) +{ + return hinic3_comm_features_nego(hwdev, MGMT_MSG_CMD_OP_SET, s_feature, + size); +} + +int hinic3_get_global_attr(struct hinic3_hwdev *hwdev, + struct comm_global_attr *attr) +{ + struct comm_cmd_get_glb_attr get_attr = {}; + struct mgmt_msg_params msg_params = {}; + int err; + + mgmt_msg_params_init_default(&msg_params, &get_attr, sizeof(get_attr)); + + err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_COMM, + COMM_CMD_GET_GLOBAL_ATTR, &msg_params); + if (err || get_attr.head.status) { + dev_err(hwdev->dev, + "Failed to get global attribute, err: %d, status: 0x%x\n", + err, get_attr.head.status); + return -EIO; + } + + memcpy(attr, &get_attr.attr, sizeof(*attr)); + + return 0; +} + +int hinic3_set_func_svc_used_state(struct hinic3_hwdev *hwdev, u16 svc_type, + u8 state) +{ + struct comm_cmd_set_func_svc_used_state used_state = {}; + struct mgmt_msg_params msg_params = {}; + int err; + + used_state.func_id = hinic3_global_func_id(hwdev); + used_state.svc_type = svc_type; + used_state.used_state = state; + + mgmt_msg_params_init_default(&msg_params, &used_state, + sizeof(used_state)); + + err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_COMM, + COMM_CMD_SET_FUNC_SVC_USED_STATE, + &msg_params); + if (err || used_state.head.status) { + dev_err(hwdev->dev, + "Failed to set func service used state, err: %d, status: 0x%x\n", + err, used_state.head.status); + return -EIO; + } + + return 0; +} + +int hinic3_set_dma_attr_tbl(struct hinic3_hwdev *hwdev, u8 entry_idx, u8 st, + u8 at, u8 ph, u8 no_snooping, u8 tph_en) +{ + struct comm_cmd_set_dma_attr dma_attr = {}; + struct mgmt_msg_params msg_params = {}; + int err; + + dma_attr.func_id = hinic3_global_func_id(hwdev); + dma_attr.entry_idx = entry_idx; + dma_attr.st = st; + dma_attr.at = at; + dma_attr.ph = ph; + dma_attr.no_snooping = no_snooping; + dma_attr.tph_en = tph_en; + + mgmt_msg_params_init_default(&msg_params, &dma_attr, sizeof(dma_attr)); + + err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_COMM, + COMM_CMD_SET_DMA_ATTR, &msg_params); + if (err || dma_attr.head.status) { + dev_err(hwdev->dev, "Failed to set dma attr, err: %d, status: 0x%x\n", + err, dma_attr.head.status); + return -EIO; + } + + return 0; +} + +int hinic3_set_wq_page_size(struct hinic3_hwdev *hwdev, u16 func_idx, + u32 page_size) +{ + struct comm_cmd_cfg_wq_page_size page_size_info = {}; + struct mgmt_msg_params msg_params = {}; + int err; + + page_size_info.func_id = func_idx; + page_size_info.page_size = ilog2(page_size / HINIC3_MIN_PAGE_SIZE); + page_size_info.opcode = MGMT_MSG_CMD_OP_SET; + + mgmt_msg_params_init_default(&msg_params, &page_size_info, + sizeof(page_size_info)); + + err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_COMM, + COMM_CMD_CFG_PAGESIZE, &msg_params); + if (err || page_size_info.head.status) { + dev_err(hwdev->dev, + "Failed to set wq page size, err: %d, status: 0x%x\n", + err, page_size_info.head.status); + return -EFAULT; + } + + return 0; +} + +int hinic3_set_cmdq_depth(struct hinic3_hwdev *hwdev, u16 cmdq_depth) +{ + struct comm_cmd_set_root_ctxt root_ctxt = {}; + struct mgmt_msg_params msg_params = {}; + int err; + + root_ctxt.func_id = hinic3_global_func_id(hwdev); + + root_ctxt.set_cmdq_depth = 1; + root_ctxt.cmdq_depth = ilog2(cmdq_depth); + + mgmt_msg_params_init_default(&msg_params, &root_ctxt, + sizeof(root_ctxt)); + + err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_COMM, + COMM_CMD_SET_VAT, &msg_params); + if (err || root_ctxt.head.status) { + dev_err(hwdev->dev, + "Failed to set cmdq depth, err: %d, status: 0x%x\n", + err, root_ctxt.head.status); + return -EFAULT; + } + + return 0; +} + +#define HINIC3_WAIT_CMDQ_IDLE_TIMEOUT 5000 + +static enum hinic3_wait_return check_cmdq_stop_handler(void *priv_data) +{ + struct hinic3_hwdev *hwdev = priv_data; + enum hinic3_cmdq_type cmdq_type; + struct hinic3_cmdqs *cmdqs; + + cmdqs = hwdev->cmdqs; + for (cmdq_type = 0; cmdq_type < cmdqs->cmdq_num; cmdq_type++) { + if (!hinic3_cmdq_idle(&cmdqs->cmdq[cmdq_type])) + return HINIC3_WAIT_PROCESS_WAITING; + } + + return HINIC3_WAIT_PROCESS_CPL; +} + +static int wait_cmdq_stop(struct hinic3_hwdev *hwdev) +{ + struct hinic3_cmdqs *cmdqs = hwdev->cmdqs; + enum hinic3_cmdq_type cmdq_type; + int err; + + if (!(cmdqs->status & HINIC3_CMDQ_ENABLE)) + return 0; + + cmdqs->status &= ~HINIC3_CMDQ_ENABLE; + err = hinic3_wait_for_timeout(hwdev, check_cmdq_stop_handler, + HINIC3_WAIT_CMDQ_IDLE_TIMEOUT, + USEC_PER_MSEC); + + if (err) + goto err_reenable_cmdq; + + return 0; + +err_reenable_cmdq: + for (cmdq_type = 0; cmdq_type < cmdqs->cmdq_num; cmdq_type++) { + if (!hinic3_cmdq_idle(&cmdqs->cmdq[cmdq_type])) + dev_err(hwdev->dev, "Cmdq %d is busy\n", cmdq_type); + } + cmdqs->status |= HINIC3_CMDQ_ENABLE; + + return err; +} + +int hinic3_func_rx_tx_flush(struct hinic3_hwdev *hwdev) +{ + struct comm_cmd_clear_resource clear_db = {}; + struct comm_cmd_clear_resource clr_res = {}; + struct hinic3_hwif *hwif = hwdev->hwif; + struct mgmt_msg_params msg_params = {}; + int ret = 0; + int err; + + err = wait_cmdq_stop(hwdev); + if (err) { + dev_warn(hwdev->dev, "CMDQ is still working, CMDQ timeout value is unreasonable\n"); + ret = err; + } + + hinic3_toggle_doorbell(hwif, DISABLE_DOORBELL); + + clear_db.func_id = hwif->attr.func_global_idx; + mgmt_msg_params_init_default(&msg_params, &clear_db, sizeof(clear_db)); + err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_COMM, + COMM_CMD_FLUSH_DOORBELL, &msg_params); + if (err || clear_db.head.status) { + dev_warn(hwdev->dev, "Failed to flush doorbell, err: %d, status: 0x%x\n", + err, clear_db.head.status); + if (err) + ret = err; + else + ret = -EFAULT; + } + + clr_res.func_id = hwif->attr.func_global_idx; + msg_params.buf_in = &clr_res; + msg_params.in_size = sizeof(clr_res); + err = hinic3_send_mbox_to_mgmt_no_ack(hwdev, MGMT_MOD_COMM, + COMM_CMD_START_FLUSH, + &msg_params); + if (err) { + dev_warn(hwdev->dev, "Failed to notice flush message, err: %d\n", + err); + ret = err; + } + + hinic3_toggle_doorbell(hwif, ENABLE_DOORBELL); + + err = hinic3_reinit_cmdq_ctxts(hwdev); + if (err) { + dev_warn(hwdev->dev, "Failed to reinit cmdq\n"); + ret = err; + } + + return ret; +} + +static int get_hw_rx_buf_size_idx(int rx_buf_sz, u16 *buf_sz_idx) +{ + /* Supported RX buffer sizes in bytes. Configured by array index. */ + static const int supported_sizes[16] = { + [0] = 32, [1] = 64, [2] = 96, [3] = 128, + [4] = 192, [5] = 256, [6] = 384, [7] = 512, + [8] = 768, [9] = 1024, [10] = 1536, [11] = 2048, + [12] = 3072, [13] = 4096, [14] = 8192, [15] = 16384, + }; + u16 idx; + + /* Scan from biggest to smallest. Choose supported size that is equal or + * smaller. For smaller value HW will under-utilize posted buffers. For + * bigger value HW may overrun posted buffers. + */ + idx = ARRAY_SIZE(supported_sizes); + while (idx > 0) { + idx--; + if (supported_sizes[idx] <= rx_buf_sz) { + *buf_sz_idx = idx; + return 0; + } + } + + return -EINVAL; +} + +int hinic3_set_root_ctxt(struct hinic3_hwdev *hwdev, u32 rq_depth, u32 sq_depth, + int rx_buf_sz) +{ + struct comm_cmd_set_root_ctxt root_ctxt = {}; + struct mgmt_msg_params msg_params = {}; + u16 buf_sz_idx; + int err; + + err = get_hw_rx_buf_size_idx(rx_buf_sz, &buf_sz_idx); + if (err) + return err; + + root_ctxt.func_id = hinic3_global_func_id(hwdev); + + root_ctxt.set_cmdq_depth = 0; + root_ctxt.cmdq_depth = 0; + + root_ctxt.lro_en = 1; + + root_ctxt.rq_depth = ilog2(rq_depth); + root_ctxt.rx_buf_sz = buf_sz_idx; + root_ctxt.sq_depth = ilog2(sq_depth); + + mgmt_msg_params_init_default(&msg_params, &root_ctxt, + sizeof(root_ctxt)); + + err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_COMM, + COMM_CMD_SET_VAT, &msg_params); + if (err || root_ctxt.head.status) { + dev_err(hwdev->dev, + "Failed to set root context, err: %d, status: 0x%x\n", + err, root_ctxt.head.status); + return -EFAULT; + } + + return 0; +} + +int hinic3_clean_root_ctxt(struct hinic3_hwdev *hwdev) +{ + struct comm_cmd_set_root_ctxt root_ctxt = {}; + struct mgmt_msg_params msg_params = {}; + int err; + + root_ctxt.func_id = hinic3_global_func_id(hwdev); + + mgmt_msg_params_init_default(&msg_params, &root_ctxt, + sizeof(root_ctxt)); + + err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_COMM, + COMM_CMD_SET_VAT, &msg_params); + if (err || root_ctxt.head.status) { + dev_err(hwdev->dev, + "Failed to set root context, err: %d, status: 0x%x\n", + err, root_ctxt.head.status); + return -EFAULT; + } + + return 0; +} diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.h b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.h index c33a1c77da9c..304f5691f0c2 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_comm.h @@ -8,6 +8,40 @@ struct hinic3_hwdev; +#define HINIC3_WQ_PAGE_SIZE_ORDER 8 + +struct hinic3_interrupt_info { + u32 lli_set; + u32 interrupt_coalesc_set; + u16 msix_index; + u8 lli_credit_limit; + u8 lli_timer_cfg; + u8 pending_limit; + u8 coalesc_timer_cfg; + u8 resend_timer_cfg; +}; + +int hinic3_set_interrupt_cfg_direct(struct hinic3_hwdev *hwdev, + const struct hinic3_interrupt_info *info); int hinic3_func_reset(struct hinic3_hwdev *hwdev, u16 func_id, u64 reset_flag); +int hinic3_get_comm_features(struct hinic3_hwdev *hwdev, u64 *s_feature, + u16 size); +int hinic3_set_comm_features(struct hinic3_hwdev *hwdev, u64 *s_feature, + u16 size); +int hinic3_get_global_attr(struct hinic3_hwdev *hwdev, + struct comm_global_attr *attr); +int hinic3_set_func_svc_used_state(struct hinic3_hwdev *hwdev, u16 svc_type, + u8 state); +int hinic3_set_dma_attr_tbl(struct hinic3_hwdev *hwdev, u8 entry_idx, u8 st, + u8 at, u8 ph, u8 no_snooping, u8 tph_en); + +int hinic3_set_wq_page_size(struct hinic3_hwdev *hwdev, u16 func_idx, + u32 page_size); +int hinic3_set_cmdq_depth(struct hinic3_hwdev *hwdev, u16 cmdq_depth); +int hinic3_func_rx_tx_flush(struct hinic3_hwdev *hwdev); +int hinic3_set_root_ctxt(struct hinic3_hwdev *hwdev, u32 rq_depth, u32 sq_depth, + int rx_buf_sz); +int hinic3_clean_root_ctxt(struct hinic3_hwdev *hwdev); + #endif diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_intf.h b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_intf.h index 22c84093efa2..623cf2d14cbc 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_hw_intf.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hw_intf.h @@ -51,6 +51,48 @@ static inline void mgmt_msg_params_init_default(struct mgmt_msg_params *msg_para msg_params->timeout_ms = 0; } +enum cfg_cmd { + CFG_CMD_GET_DEV_CAP = 0, +}; + +/* Device capabilities, defined by hw */ +struct cfg_cmd_dev_cap { + struct mgmt_msg_head head; + + u16 func_id; + u16 rsvd1; + + /* Public resources */ + u8 host_id; + u8 ep_id; + u8 er_id; + u8 port_id; + + u16 host_total_func; + u8 host_pf_num; + u8 pf_id_start; + u16 host_vf_num; + u16 vf_id_start; + u8 host_oq_id_mask_val; + u8 timer_en; + u8 host_valid_bitmap; + u8 rsvd_host; + + u16 svc_cap_en; + u16 max_vf; + u8 flexq_en; + u8 valid_cos_bitmap; + u8 port_cos_valid_bitmap; + u8 rsvd2[45]; + + /* l2nic */ + u16 nic_max_sq_id; + u16 nic_max_rq_id; + u16 nic_default_num_queues; + + u8 rsvd3[250]; +}; + /* COMM Commands between Driver to fw */ enum comm_cmd { /* Commands for clearing FLR and resources */ @@ -70,6 +112,20 @@ enum comm_cmd { COMM_CMD_SET_DMA_ATTR = 25, }; +struct comm_cmd_cfg_msix_ctrl_reg { + struct mgmt_msg_head head; + u16 func_id; + u8 opcode; + u8 rsvd1; + u16 msix_index; + u8 pending_cnt; + u8 coalesce_timer_cnt; + u8 resend_timer_cnt; + u8 lli_timer_cnt; + u8 lli_credit_cnt; + u8 rsvd2[5]; +}; + enum comm_func_reset_bits { COMM_FUNC_RESET_BIT_FLUSH = BIT(0), COMM_FUNC_RESET_BIT_MQM = BIT(1), @@ -84,6 +140,11 @@ enum comm_func_reset_bits { COMM_FUNC_RESET_BIT_NIC = BIT(13), }; +#define COMM_FUNC_RESET_FLAG \ + (COMM_FUNC_RESET_BIT_COMM | COMM_FUNC_RESET_BIT_COMM_CMD_CH | \ + COMM_FUNC_RESET_BIT_FLUSH | COMM_FUNC_RESET_BIT_MQM | \ + COMM_FUNC_RESET_BIT_SMF | COMM_FUNC_RESET_BIT_PF_BW_CFG) + struct comm_cmd_func_reset { struct mgmt_msg_head head; u16 func_id; @@ -100,6 +161,96 @@ struct comm_cmd_feature_nego { u64 s_feature[COMM_MAX_FEATURE_QWORD]; }; +struct comm_global_attr { + u8 max_host_num; + u8 max_pf_num; + u16 vf_id_start; + /* for api cmd to mgmt cpu */ + u8 mgmt_host_node_id; + u8 cmdq_num; + u8 rsvd1[34]; +}; + +struct comm_cmd_get_glb_attr { + struct mgmt_msg_head head; + struct comm_global_attr attr; +}; + +enum comm_func_svc_type { + COMM_FUNC_SVC_T_COMM = 0, + COMM_FUNC_SVC_T_NIC = 1, +}; + +struct comm_cmd_set_func_svc_used_state { + struct mgmt_msg_head head; + u16 func_id; + u16 svc_type; + u8 used_state; + u8 rsvd[35]; +}; + +struct comm_cmd_set_dma_attr { + struct mgmt_msg_head head; + u16 func_id; + u8 entry_idx; + u8 st; + u8 at; + u8 ph; + u8 no_snooping; + u8 tph_en; + u32 resv1; +}; + +struct comm_cmd_set_ceq_ctrl_reg { + struct mgmt_msg_head head; + u16 func_id; + u16 q_id; + u32 ctrl0; + u32 ctrl1; + u32 rsvd1; +}; + +struct comm_cmd_cfg_wq_page_size { + struct mgmt_msg_head head; + u16 func_id; + u8 opcode; + /* real_size=4KB*2^page_size, range(0~20) must be checked by driver */ + u8 page_size; + u32 rsvd1; +}; + +struct comm_cmd_set_root_ctxt { + struct mgmt_msg_head head; + u16 func_id; + u8 set_cmdq_depth; + u8 cmdq_depth; + u16 rx_buf_sz; + u8 lro_en; + u8 rsvd1; + u16 sq_depth; + u16 rq_depth; + u64 rsvd2; +}; + +struct comm_cmdq_ctxt_info { + __le64 curr_wqe_page_pfn; + __le64 wq_block_pfn; +}; + +struct comm_cmd_set_cmdq_ctxt { + struct mgmt_msg_head head; + u16 func_id; + u8 cmdq_id; + u8 rsvd1[5]; + struct comm_cmdq_ctxt_info ctxt; +}; + +struct comm_cmd_clear_resource { + struct mgmt_msg_head head; + u16 func_id; + u16 rsvd1[3]; +}; + /* Services supported by HW. HW uses these values when delivering events. * HW supports multiple services that are not yet supported by driver * (e.g. RoCE). diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hwdev.c b/drivers/net/ethernet/huawei/hinic3/hinic3_hwdev.c index 6e8788a64925..95a213133be9 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_hwdev.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hwdev.c @@ -1,24 +1,557 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. +#include "hinic3_cmdq.h" +#include "hinic3_csr.h" +#include "hinic3_eqs.h" #include "hinic3_hw_comm.h" #include "hinic3_hwdev.h" #include "hinic3_hwif.h" #include "hinic3_mbox.h" #include "hinic3_mgmt.h" +#define HINIC3_PCIE_SNOOP 0 +#define HINIC3_PCIE_TPH_DISABLE 0 + +#define HINIC3_DMA_ATTR_INDIR_IDX_MASK GENMASK(9, 0) +#define HINIC3_DMA_ATTR_INDIR_IDX_SET(val, member) \ + FIELD_PREP(HINIC3_DMA_ATTR_INDIR_##member##_MASK, val) + +#define HINIC3_DMA_ATTR_ENTRY_ST_MASK GENMASK(7, 0) +#define HINIC3_DMA_ATTR_ENTRY_AT_MASK GENMASK(9, 8) +#define HINIC3_DMA_ATTR_ENTRY_PH_MASK GENMASK(11, 10) +#define HINIC3_DMA_ATTR_ENTRY_NO_SNOOPING_MASK BIT(12) +#define HINIC3_DMA_ATTR_ENTRY_TPH_EN_MASK BIT(13) +#define HINIC3_DMA_ATTR_ENTRY_SET(val, member) \ + FIELD_PREP(HINIC3_DMA_ATTR_ENTRY_##member##_MASK, val) + +#define HINIC3_PCIE_ST_DISABLE 0 +#define HINIC3_PCIE_AT_DISABLE 0 +#define HINIC3_PCIE_PH_DISABLE 0 +#define HINIC3_PCIE_MSIX_ATTR_ENTRY 0 + +#define HINIC3_DEFAULT_EQ_MSIX_PENDING_LIMIT 0 +#define HINIC3_DEFAULT_EQ_MSIX_COALESC_TIMER_CFG 0xFF +#define HINIC3_DEFAULT_EQ_MSIX_RESEND_TIMER_CFG 7 + +#define HINIC3_HWDEV_WQ_NAME "hinic3_hardware" +#define HINIC3_WQ_MAX_REQ 10 + +enum hinic3_hwdev_init_state { + HINIC3_HWDEV_MBOX_INITED = 2, + HINIC3_HWDEV_CMDQ_INITED = 3, +}; + +static int hinic3_comm_aeqs_init(struct hinic3_hwdev *hwdev) +{ + struct msix_entry aeq_msix_entries[HINIC3_MAX_AEQS]; + u16 num_aeqs, resp_num_irq, i; + int err; + + num_aeqs = hwdev->hwif->attr.num_aeqs; + if (num_aeqs > HINIC3_MAX_AEQS) { + dev_warn(hwdev->dev, "Adjust aeq num to %d\n", + HINIC3_MAX_AEQS); + num_aeqs = HINIC3_MAX_AEQS; + } + err = hinic3_alloc_irqs(hwdev, num_aeqs, aeq_msix_entries, + &resp_num_irq); + if (err) { + dev_err(hwdev->dev, "Failed to alloc aeq irqs, num_aeqs: %u\n", + num_aeqs); + return err; + } + + if (resp_num_irq < num_aeqs) { + dev_warn(hwdev->dev, "Adjust aeq num to %u\n", + resp_num_irq); + num_aeqs = resp_num_irq; + } + + err = hinic3_aeqs_init(hwdev, num_aeqs, aeq_msix_entries); + if (err) { + dev_err(hwdev->dev, "Failed to init aeqs\n"); + goto err_free_irqs; + } + + return 0; + +err_free_irqs: + for (i = 0; i < num_aeqs; i++) + hinic3_free_irq(hwdev, aeq_msix_entries[i].vector); + + return err; +} + +static int hinic3_comm_ceqs_init(struct hinic3_hwdev *hwdev) +{ + struct msix_entry ceq_msix_entries[HINIC3_MAX_CEQS]; + u16 num_ceqs, resp_num_irq, i; + int err; + + num_ceqs = hwdev->hwif->attr.num_ceqs; + if (num_ceqs > HINIC3_MAX_CEQS) { + dev_warn(hwdev->dev, "Adjust ceq num to %d\n", + HINIC3_MAX_CEQS); + num_ceqs = HINIC3_MAX_CEQS; + } + + err = hinic3_alloc_irqs(hwdev, num_ceqs, ceq_msix_entries, + &resp_num_irq); + if (err) { + dev_err(hwdev->dev, "Failed to alloc ceq irqs, num_ceqs: %u\n", + num_ceqs); + return err; + } + + if (resp_num_irq < num_ceqs) { + dev_warn(hwdev->dev, "Adjust ceq num to %u\n", + resp_num_irq); + num_ceqs = resp_num_irq; + } + + err = hinic3_ceqs_init(hwdev, num_ceqs, ceq_msix_entries); + if (err) { + dev_err(hwdev->dev, + "Failed to init ceqs, err:%d\n", err); + goto err_free_irqs; + } + + return 0; + +err_free_irqs: + for (i = 0; i < num_ceqs; i++) + hinic3_free_irq(hwdev, ceq_msix_entries[i].vector); + + return err; +} + +static int hinic3_comm_mbox_init(struct hinic3_hwdev *hwdev) +{ + int err; + + err = hinic3_init_mbox(hwdev); + if (err) + return err; + + hinic3_aeq_register_cb(hwdev, HINIC3_MBX_FROM_FUNC, + hinic3_mbox_func_aeqe_handler); + hinic3_aeq_register_cb(hwdev, HINIC3_MSG_FROM_FW, + hinic3_mgmt_msg_aeqe_handler); + + set_bit(HINIC3_HWDEV_MBOX_INITED, &hwdev->func_state); + + return 0; +} + +static void hinic3_comm_mbox_free(struct hinic3_hwdev *hwdev) +{ + spin_lock_bh(&hwdev->channel_lock); + clear_bit(HINIC3_HWDEV_MBOX_INITED, &hwdev->func_state); + spin_unlock_bh(&hwdev->channel_lock); + hinic3_aeq_unregister_cb(hwdev, HINIC3_MBX_FROM_FUNC); + hinic3_aeq_unregister_cb(hwdev, HINIC3_MSG_FROM_FW); + hinic3_free_mbox(hwdev); +} + +static int init_aeqs_msix_attr(struct hinic3_hwdev *hwdev) +{ + struct hinic3_aeqs *aeqs = hwdev->aeqs; + struct hinic3_interrupt_info info = {}; + struct hinic3_eq *eq; + u16 q_id; + int err; + + info.interrupt_coalesc_set = 1; + info.pending_limit = HINIC3_DEFAULT_EQ_MSIX_PENDING_LIMIT; + info.coalesc_timer_cfg = HINIC3_DEFAULT_EQ_MSIX_COALESC_TIMER_CFG; + info.resend_timer_cfg = HINIC3_DEFAULT_EQ_MSIX_RESEND_TIMER_CFG; + + for (q_id = 0; q_id < aeqs->num_aeqs; q_id++) { + eq = &aeqs->aeq[q_id]; + info.msix_index = eq->msix_entry_idx; + err = hinic3_set_interrupt_cfg_direct(hwdev, &info); + if (err) { + dev_err(hwdev->dev, "Set msix attr for aeq %d failed\n", + q_id); + return err; + } + } + + return 0; +} + +static int init_ceqs_msix_attr(struct hinic3_hwdev *hwdev) +{ + struct hinic3_ceqs *ceqs = hwdev->ceqs; + struct hinic3_interrupt_info info = {}; + struct hinic3_eq *eq; + u16 q_id; + int err; + + info.interrupt_coalesc_set = 1; + info.pending_limit = HINIC3_DEFAULT_EQ_MSIX_PENDING_LIMIT; + info.coalesc_timer_cfg = HINIC3_DEFAULT_EQ_MSIX_COALESC_TIMER_CFG; + info.resend_timer_cfg = HINIC3_DEFAULT_EQ_MSIX_RESEND_TIMER_CFG; + + for (q_id = 0; q_id < ceqs->num_ceqs; q_id++) { + eq = &ceqs->ceq[q_id]; + info.msix_index = eq->msix_entry_idx; + err = hinic3_set_interrupt_cfg_direct(hwdev, &info); + if (err) { + dev_err(hwdev->dev, "Set msix attr for ceq %u failed\n", + q_id); + return err; + } + } + + return 0; +} + +static int init_basic_mgmt_channel(struct hinic3_hwdev *hwdev) +{ + int err; + + err = hinic3_comm_aeqs_init(hwdev); + if (err) { + dev_err(hwdev->dev, "Failed to init async event queues\n"); + return err; + } + + err = hinic3_comm_mbox_init(hwdev); + if (err) { + dev_err(hwdev->dev, "Failed to init mailbox\n"); + goto err_free_comm_aeqs; + } + + err = init_aeqs_msix_attr(hwdev); + if (err) { + dev_err(hwdev->dev, "Failed to init aeqs msix attr\n"); + goto err_free_comm_mbox; + } + + return 0; + +err_free_comm_mbox: + hinic3_comm_mbox_free(hwdev); +err_free_comm_aeqs: + hinic3_aeqs_free(hwdev); + + return err; +} + +static void free_base_mgmt_channel(struct hinic3_hwdev *hwdev) +{ + hinic3_comm_mbox_free(hwdev); + hinic3_aeqs_free(hwdev); +} + +static int dma_attr_table_init(struct hinic3_hwdev *hwdev) +{ + u32 addr, val, dst_attr; + + /* Indirect access, set entry_idx first */ + addr = HINIC3_CSR_DMA_ATTR_INDIR_IDX_ADDR; + val = hinic3_hwif_read_reg(hwdev->hwif, addr); + val &= ~HINIC3_DMA_ATTR_ENTRY_AT_MASK; + val |= HINIC3_DMA_ATTR_INDIR_IDX_SET(HINIC3_PCIE_MSIX_ATTR_ENTRY, IDX); + hinic3_hwif_write_reg(hwdev->hwif, addr, val); + + addr = HINIC3_CSR_DMA_ATTR_TBL_ADDR; + val = hinic3_hwif_read_reg(hwdev->hwif, addr); + + dst_attr = HINIC3_DMA_ATTR_ENTRY_SET(HINIC3_PCIE_ST_DISABLE, ST) | + HINIC3_DMA_ATTR_ENTRY_SET(HINIC3_PCIE_AT_DISABLE, AT) | + HINIC3_DMA_ATTR_ENTRY_SET(HINIC3_PCIE_PH_DISABLE, PH) | + HINIC3_DMA_ATTR_ENTRY_SET(HINIC3_PCIE_SNOOP, NO_SNOOPING) | + HINIC3_DMA_ATTR_ENTRY_SET(HINIC3_PCIE_TPH_DISABLE, TPH_EN); + if (val == dst_attr) + return 0; + + return hinic3_set_dma_attr_tbl(hwdev, + HINIC3_PCIE_MSIX_ATTR_ENTRY, + HINIC3_PCIE_ST_DISABLE, + HINIC3_PCIE_AT_DISABLE, + HINIC3_PCIE_PH_DISABLE, + HINIC3_PCIE_SNOOP, + HINIC3_PCIE_TPH_DISABLE); +} + +static int init_basic_attributes(struct hinic3_hwdev *hwdev) +{ + struct comm_global_attr glb_attr; + int err; + + err = hinic3_func_reset(hwdev, hinic3_global_func_id(hwdev), + COMM_FUNC_RESET_FLAG); + if (err) + return err; + + err = hinic3_get_comm_features(hwdev, hwdev->features, + COMM_MAX_FEATURE_QWORD); + if (err) + return err; + + dev_dbg(hwdev->dev, "Comm hw features: 0x%llx\n", hwdev->features[0]); + + err = hinic3_get_global_attr(hwdev, &glb_attr); + if (err) + return err; + + err = hinic3_set_func_svc_used_state(hwdev, COMM_FUNC_SVC_T_COMM, 1); + if (err) + return err; + + err = dma_attr_table_init(hwdev); + if (err) + return err; + + hwdev->max_cmdq = min(glb_attr.cmdq_num, HINIC3_MAX_CMDQ_TYPES); + dev_dbg(hwdev->dev, + "global attribute: max_host: 0x%x, max_pf: 0x%x, vf_id_start: 0x%x, mgmt node id: 0x%x, cmdq_num: 0x%x\n", + glb_attr.max_host_num, glb_attr.max_pf_num, + glb_attr.vf_id_start, glb_attr.mgmt_host_node_id, + glb_attr.cmdq_num); + + return 0; +} + +static int hinic3_comm_cmdqs_init(struct hinic3_hwdev *hwdev) +{ + int err; + + err = hinic3_cmdqs_init(hwdev); + if (err) { + dev_err(hwdev->dev, "Failed to init cmd queues\n"); + return err; + } + + hinic3_ceq_register_cb(hwdev, HINIC3_CMDQ, hinic3_cmdq_ceq_handler); + + err = hinic3_set_cmdq_depth(hwdev, CMDQ_DEPTH); + if (err) { + dev_err(hwdev->dev, "Failed to set cmdq depth\n"); + goto err_free_cmdqs; + } + + set_bit(HINIC3_HWDEV_CMDQ_INITED, &hwdev->func_state); + + return 0; + +err_free_cmdqs: + hinic3_cmdqs_free(hwdev); + + return err; +} + +static void hinic3_comm_cmdqs_free(struct hinic3_hwdev *hwdev) +{ + spin_lock_bh(&hwdev->channel_lock); + clear_bit(HINIC3_HWDEV_CMDQ_INITED, &hwdev->func_state); + spin_unlock_bh(&hwdev->channel_lock); + + hinic3_ceq_unregister_cb(hwdev, HINIC3_CMDQ); + hinic3_cmdqs_free(hwdev); +} + +static int init_cmdqs_channel(struct hinic3_hwdev *hwdev) +{ + int err; + + err = hinic3_comm_ceqs_init(hwdev); + if (err) { + dev_err(hwdev->dev, "Failed to init completion event queues\n"); + return err; + } + + err = init_ceqs_msix_attr(hwdev); + if (err) { + dev_err(hwdev->dev, "Failed to init ceqs msix attr\n"); + goto err_free_ceqs; + } + + hwdev->wq_page_size = HINIC3_MIN_PAGE_SIZE << HINIC3_WQ_PAGE_SIZE_ORDER; + err = hinic3_set_wq_page_size(hwdev, hinic3_global_func_id(hwdev), + hwdev->wq_page_size); + if (err) { + dev_err(hwdev->dev, "Failed to set wq page size\n"); + goto err_free_ceqs; + } + + err = hinic3_comm_cmdqs_init(hwdev); + if (err) { + dev_err(hwdev->dev, "Failed to init cmd queues\n"); + goto err_reset_wq_page_size; + } + + return 0; + +err_reset_wq_page_size: + hinic3_set_wq_page_size(hwdev, hinic3_global_func_id(hwdev), + HINIC3_MIN_PAGE_SIZE); +err_free_ceqs: + hinic3_ceqs_free(hwdev); + + return err; +} + +static void hinic3_free_cmdqs_channel(struct hinic3_hwdev *hwdev) +{ + hinic3_comm_cmdqs_free(hwdev); + hinic3_ceqs_free(hwdev); +} + +static int hinic3_init_comm_ch(struct hinic3_hwdev *hwdev) +{ + int err; + + err = init_basic_mgmt_channel(hwdev); + if (err) + return err; + + err = init_basic_attributes(hwdev); + if (err) + goto err_free_basic_mgmt_ch; + + err = init_cmdqs_channel(hwdev); + if (err) { + dev_err(hwdev->dev, "Failed to init cmdq channel\n"); + goto err_clear_func_svc_used_state; + } + + return 0; + +err_clear_func_svc_used_state: + hinic3_set_func_svc_used_state(hwdev, COMM_FUNC_SVC_T_COMM, 0); +err_free_basic_mgmt_ch: + free_base_mgmt_channel(hwdev); + + return err; +} + +static void hinic3_uninit_comm_ch(struct hinic3_hwdev *hwdev) +{ + hinic3_free_cmdqs_channel(hwdev); + hinic3_set_func_svc_used_state(hwdev, COMM_FUNC_SVC_T_COMM, 0); + free_base_mgmt_channel(hwdev); +} + +static DEFINE_IDA(hinic3_adev_ida); + +static int hinic3_adev_idx_alloc(void) +{ + return ida_alloc(&hinic3_adev_ida, GFP_KERNEL); +} + +static void hinic3_adev_idx_free(int id) +{ + ida_free(&hinic3_adev_ida, id); +} + int hinic3_init_hwdev(struct pci_dev *pdev) { - /* Completed by later submission due to LoC limit. */ - return -EFAULT; + struct hinic3_pcidev *pci_adapter = pci_get_drvdata(pdev); + struct hinic3_hwdev *hwdev; + int err; + + hwdev = kzalloc(sizeof(*hwdev), GFP_KERNEL); + if (!hwdev) + return -ENOMEM; + + pci_adapter->hwdev = hwdev; + hwdev->adapter = pci_adapter; + hwdev->pdev = pci_adapter->pdev; + hwdev->dev = &pci_adapter->pdev->dev; + hwdev->func_state = 0; + hwdev->dev_id = hinic3_adev_idx_alloc(); + spin_lock_init(&hwdev->channel_lock); + + err = hinic3_init_hwif(hwdev); + if (err) { + dev_err(hwdev->dev, "Failed to init hwif\n"); + goto err_free_hwdev; + } + + hwdev->workq = alloc_workqueue(HINIC3_HWDEV_WQ_NAME, WQ_MEM_RECLAIM, + HINIC3_WQ_MAX_REQ); + if (!hwdev->workq) { + dev_err(hwdev->dev, "Failed to alloc hardware workq\n"); + err = -ENOMEM; + goto err_free_hwif; + } + + err = hinic3_init_cfg_mgmt(hwdev); + if (err) { + dev_err(hwdev->dev, "Failed to init config mgmt\n"); + goto err_destroy_workqueue; + } + + err = hinic3_init_comm_ch(hwdev); + if (err) { + dev_err(hwdev->dev, "Failed to init communication channel\n"); + goto err_free_cfg_mgmt; + } + + err = hinic3_init_capability(hwdev); + if (err) { + dev_err(hwdev->dev, "Failed to init capability\n"); + goto err_uninit_comm_ch; + } + + err = hinic3_set_comm_features(hwdev, hwdev->features, + COMM_MAX_FEATURE_QWORD); + if (err) { + dev_err(hwdev->dev, "Failed to set comm features\n"); + goto err_uninit_comm_ch; + } + + return 0; + +err_uninit_comm_ch: + hinic3_uninit_comm_ch(hwdev); +err_free_cfg_mgmt: + hinic3_free_cfg_mgmt(hwdev); +err_destroy_workqueue: + destroy_workqueue(hwdev->workq); +err_free_hwif: + hinic3_free_hwif(hwdev); +err_free_hwdev: + pci_adapter->hwdev = NULL; + hinic3_adev_idx_free(hwdev->dev_id); + kfree(hwdev); + + return err; } void hinic3_free_hwdev(struct hinic3_hwdev *hwdev) { - /* Completed by later submission due to LoC limit. */ + u64 drv_features[COMM_MAX_FEATURE_QWORD] = {}; + + hinic3_set_comm_features(hwdev, drv_features, COMM_MAX_FEATURE_QWORD); + hinic3_func_rx_tx_flush(hwdev); + hinic3_uninit_comm_ch(hwdev); + hinic3_free_cfg_mgmt(hwdev); + destroy_workqueue(hwdev->workq); + hinic3_free_hwif(hwdev); + hinic3_adev_idx_free(hwdev->dev_id); + kfree(hwdev); } void hinic3_set_api_stop(struct hinic3_hwdev *hwdev) { - /* Completed by later submission due to LoC limit. */ + struct hinic3_mbox *mbox; + + spin_lock_bh(&hwdev->channel_lock); + if (test_bit(HINIC3_HWDEV_MBOX_INITED, &hwdev->func_state)) { + mbox = hwdev->mbox; + spin_lock(&mbox->mbox_lock); + if (mbox->event_flag == MBOX_EVENT_START) + mbox->event_flag = MBOX_EVENT_TIMEOUT; + spin_unlock(&mbox->mbox_lock); + } + + if (test_bit(HINIC3_HWDEV_CMDQ_INITED, &hwdev->func_state)) + hinic3_cmdq_flush_sync_cmd(hwdev); + + spin_unlock_bh(&hwdev->channel_lock); } diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.c b/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.c index 0865453bf0e7..f76f140fb6f7 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.c @@ -6,13 +6,428 @@ #include <linux/io.h> #include "hinic3_common.h" +#include "hinic3_csr.h" #include "hinic3_hwdev.h" #include "hinic3_hwif.h" +#define HINIC3_HWIF_READY_TIMEOUT 10000 +#define HINIC3_DB_AND_OUTBOUND_EN_TIMEOUT 60000 +#define HINIC3_PCIE_LINK_DOWN 0xFFFFFFFF + +/* config BAR4/5 4MB, DB & DWQE both 2MB */ +#define HINIC3_DB_DWQE_SIZE 0x00400000 + +/* db/dwqe page size: 4K */ +#define HINIC3_DB_PAGE_SIZE 0x00001000 +#define HINIC3_DWQE_OFFSET 0x00000800 +#define HINIC3_DB_MAX_AREAS (HINIC3_DB_DWQE_SIZE / HINIC3_DB_PAGE_SIZE) + +#define HINIC3_MAX_MSIX_ENTRY 2048 + +#define HINIC3_AF0_FUNC_GLOBAL_IDX_MASK GENMASK(11, 0) +#define HINIC3_AF0_P2P_IDX_MASK GENMASK(16, 12) +#define HINIC3_AF0_PCI_INTF_IDX_MASK GENMASK(19, 17) +#define HINIC3_AF0_FUNC_TYPE_MASK BIT(28) +#define HINIC3_AF0_GET(val, member) \ + FIELD_GET(HINIC3_AF0_##member##_MASK, val) + +#define HINIC3_AF1_AEQS_PER_FUNC_MASK GENMASK(9, 8) +#define HINIC3_AF1_MGMT_INIT_STATUS_MASK BIT(30) +#define HINIC3_AF1_GET(val, member) \ + FIELD_GET(HINIC3_AF1_##member##_MASK, val) + +#define HINIC3_AF2_CEQS_PER_FUNC_MASK GENMASK(8, 0) +#define HINIC3_AF2_IRQS_PER_FUNC_MASK GENMASK(26, 16) +#define HINIC3_AF2_GET(val, member) \ + FIELD_GET(HINIC3_AF2_##member##_MASK, val) + +#define HINIC3_AF4_DOORBELL_CTRL_MASK BIT(0) +#define HINIC3_AF4_GET(val, member) \ + FIELD_GET(HINIC3_AF4_##member##_MASK, val) +#define HINIC3_AF4_SET(val, member) \ + FIELD_PREP(HINIC3_AF4_##member##_MASK, val) + +#define HINIC3_AF5_OUTBOUND_CTRL_MASK BIT(0) +#define HINIC3_AF5_GET(val, member) \ + FIELD_GET(HINIC3_AF5_##member##_MASK, val) + +#define HINIC3_AF6_PF_STATUS_MASK GENMASK(15, 0) +#define HINIC3_AF6_FUNC_MAX_SQ_MASK GENMASK(31, 23) +#define HINIC3_AF6_MSIX_FLEX_EN_MASK BIT(22) +#define HINIC3_AF6_GET(val, member) \ + FIELD_GET(HINIC3_AF6_##member##_MASK, val) + +#define HINIC3_GET_REG_ADDR(reg) ((reg) & (HINIC3_REGS_FLAG_MASK)) + +static void __iomem *hinic3_reg_addr(struct hinic3_hwif *hwif, u32 reg) +{ + return hwif->cfg_regs_base + HINIC3_GET_REG_ADDR(reg); +} + +u32 hinic3_hwif_read_reg(struct hinic3_hwif *hwif, u32 reg) +{ + void __iomem *addr = hinic3_reg_addr(hwif, reg); + + return ioread32be(addr); +} + +void hinic3_hwif_write_reg(struct hinic3_hwif *hwif, u32 reg, u32 val) +{ + void __iomem *addr = hinic3_reg_addr(hwif, reg); + + iowrite32be(val, addr); +} + +static enum hinic3_wait_return check_hwif_ready_handler(void *priv_data) +{ + struct hinic3_hwdev *hwdev = priv_data; + u32 attr1; + + attr1 = hinic3_hwif_read_reg(hwdev->hwif, HINIC3_CSR_FUNC_ATTR1_ADDR); + + return HINIC3_AF1_GET(attr1, MGMT_INIT_STATUS) ? + HINIC3_WAIT_PROCESS_CPL : HINIC3_WAIT_PROCESS_WAITING; +} + +static int wait_hwif_ready(struct hinic3_hwdev *hwdev) +{ + return hinic3_wait_for_timeout(hwdev, check_hwif_ready_handler, + HINIC3_HWIF_READY_TIMEOUT, + USEC_PER_MSEC); +} + +/* Set attr struct from HW attr values. */ +static void set_hwif_attr(struct hinic3_func_attr *attr, u32 attr0, u32 attr1, + u32 attr2, u32 attr3, u32 attr6) +{ + attr->func_global_idx = HINIC3_AF0_GET(attr0, FUNC_GLOBAL_IDX); + attr->port_to_port_idx = HINIC3_AF0_GET(attr0, P2P_IDX); + attr->pci_intf_idx = HINIC3_AF0_GET(attr0, PCI_INTF_IDX); + attr->func_type = HINIC3_AF0_GET(attr0, FUNC_TYPE); + + attr->num_aeqs = BIT(HINIC3_AF1_GET(attr1, AEQS_PER_FUNC)); + attr->num_ceqs = HINIC3_AF2_GET(attr2, CEQS_PER_FUNC); + attr->num_irqs = HINIC3_AF2_GET(attr2, IRQS_PER_FUNC); + if (attr->num_irqs > HINIC3_MAX_MSIX_ENTRY) + attr->num_irqs = HINIC3_MAX_MSIX_ENTRY; + + attr->num_sq = HINIC3_AF6_GET(attr6, FUNC_MAX_SQ); + attr->msix_flex_en = HINIC3_AF6_GET(attr6, MSIX_FLEX_EN); +} + +/* Read attributes from HW and set attribute struct. */ +static int init_hwif_attr(struct hinic3_hwdev *hwdev) +{ + u32 attr0, attr1, attr2, attr3, attr6; + struct hinic3_hwif *hwif; + + hwif = hwdev->hwif; + attr0 = hinic3_hwif_read_reg(hwif, HINIC3_CSR_FUNC_ATTR0_ADDR); + if (attr0 == HINIC3_PCIE_LINK_DOWN) + return -EFAULT; + + attr1 = hinic3_hwif_read_reg(hwif, HINIC3_CSR_FUNC_ATTR1_ADDR); + if (attr1 == HINIC3_PCIE_LINK_DOWN) + return -EFAULT; + + attr2 = hinic3_hwif_read_reg(hwif, HINIC3_CSR_FUNC_ATTR2_ADDR); + if (attr2 == HINIC3_PCIE_LINK_DOWN) + return -EFAULT; + + attr3 = hinic3_hwif_read_reg(hwif, HINIC3_CSR_FUNC_ATTR3_ADDR); + if (attr3 == HINIC3_PCIE_LINK_DOWN) + return -EFAULT; + + attr6 = hinic3_hwif_read_reg(hwif, HINIC3_CSR_FUNC_ATTR6_ADDR); + if (attr6 == HINIC3_PCIE_LINK_DOWN) + return -EFAULT; + + set_hwif_attr(&hwif->attr, attr0, attr1, attr2, attr3, attr6); + + if (!hwif->attr.num_ceqs) { + dev_err(hwdev->dev, "Ceq num cfg in fw is zero\n"); + return -EFAULT; + } + + if (!hwif->attr.num_irqs) { + dev_err(hwdev->dev, + "Irq num cfg in fw is zero, msix_flex_en %d\n", + hwif->attr.msix_flex_en); + return -EFAULT; + } + + return 0; +} + +static enum hinic3_doorbell_ctrl hinic3_get_doorbell_ctrl_status(struct hinic3_hwif *hwif) +{ + u32 attr4 = hinic3_hwif_read_reg(hwif, HINIC3_CSR_FUNC_ATTR4_ADDR); + + return HINIC3_AF4_GET(attr4, DOORBELL_CTRL); +} + +static enum hinic3_outbound_ctrl hinic3_get_outbound_ctrl_status(struct hinic3_hwif *hwif) +{ + u32 attr5 = hinic3_hwif_read_reg(hwif, HINIC3_CSR_FUNC_ATTR5_ADDR); + + return HINIC3_AF5_GET(attr5, OUTBOUND_CTRL); +} + +void hinic3_toggle_doorbell(struct hinic3_hwif *hwif, + enum hinic3_doorbell_ctrl flag) +{ + u32 addr, attr4; + + addr = HINIC3_CSR_FUNC_ATTR4_ADDR; + attr4 = hinic3_hwif_read_reg(hwif, addr); + + attr4 &= ~HINIC3_AF4_DOORBELL_CTRL_MASK; + attr4 |= HINIC3_AF4_SET(flag, DOORBELL_CTRL); + + hinic3_hwif_write_reg(hwif, addr, attr4); +} + +static int db_area_idx_init(struct hinic3_hwif *hwif, u64 db_base_phy, + u8 __iomem *db_base, u64 db_dwqe_len) +{ + struct hinic3_db_area *db_area = &hwif->db_area; + u32 db_max_areas; + + hwif->db_base_phy = db_base_phy; + hwif->db_base = db_base; + hwif->db_dwqe_len = db_dwqe_len; + + db_max_areas = db_dwqe_len > HINIC3_DB_DWQE_SIZE ? + HINIC3_DB_MAX_AREAS : db_dwqe_len / HINIC3_DB_PAGE_SIZE; + db_area->db_bitmap_array = bitmap_zalloc(db_max_areas, GFP_KERNEL); + if (!db_area->db_bitmap_array) + return -ENOMEM; + + db_area->db_max_areas = db_max_areas; + spin_lock_init(&db_area->idx_lock); + + return 0; +} + +static void db_area_idx_free(struct hinic3_db_area *db_area) +{ + bitmap_free(db_area->db_bitmap_array); +} + +static int get_db_idx(struct hinic3_hwif *hwif, u32 *idx) +{ + struct hinic3_db_area *db_area = &hwif->db_area; + u32 pg_idx; + + spin_lock(&db_area->idx_lock); + pg_idx = find_first_zero_bit(db_area->db_bitmap_array, + db_area->db_max_areas); + if (pg_idx == db_area->db_max_areas) { + spin_unlock(&db_area->idx_lock); + return -ENOMEM; + } + set_bit(pg_idx, db_area->db_bitmap_array); + spin_unlock(&db_area->idx_lock); + + *idx = pg_idx; + + return 0; +} + +static void free_db_idx(struct hinic3_hwif *hwif, u32 idx) +{ + struct hinic3_db_area *db_area = &hwif->db_area; + + spin_lock(&db_area->idx_lock); + clear_bit(idx, db_area->db_bitmap_array); + spin_unlock(&db_area->idx_lock); +} + +void hinic3_free_db_addr(struct hinic3_hwdev *hwdev, const u8 __iomem *db_base) +{ + struct hinic3_hwif *hwif; + uintptr_t distance; + u32 idx; + + hwif = hwdev->hwif; + distance = db_base - hwif->db_base; + idx = distance / HINIC3_DB_PAGE_SIZE; + + free_db_idx(hwif, idx); +} + +int hinic3_alloc_db_addr(struct hinic3_hwdev *hwdev, void __iomem **db_base, + void __iomem **dwqe_base) +{ + struct hinic3_hwif *hwif; + u8 __iomem *addr; + u32 idx; + int err; + + hwif = hwdev->hwif; + + err = get_db_idx(hwif, &idx); + if (err) + return err; + + addr = hwif->db_base + idx * HINIC3_DB_PAGE_SIZE; + *db_base = addr; + + if (dwqe_base) + *dwqe_base = addr + HINIC3_DWQE_OFFSET; + + return 0; +} + void hinic3_set_msix_state(struct hinic3_hwdev *hwdev, u16 msix_idx, enum hinic3_msix_state flag) { - /* Completed by later submission due to LoC limit. */ + struct hinic3_hwif *hwif; + u8 int_msk = 1; + u32 mask_bits; + u32 addr; + + hwif = hwdev->hwif; + + if (flag) + mask_bits = HINIC3_MSI_CLR_INDIR_SET(int_msk, INT_MSK_SET); + else + mask_bits = HINIC3_MSI_CLR_INDIR_SET(int_msk, INT_MSK_CLR); + mask_bits = mask_bits | + HINIC3_MSI_CLR_INDIR_SET(msix_idx, SIMPLE_INDIR_IDX); + + addr = HINIC3_CSR_FUNC_MSI_CLR_WR_ADDR; + hinic3_hwif_write_reg(hwif, addr, mask_bits); +} + +static void disable_all_msix(struct hinic3_hwdev *hwdev) +{ + u16 num_irqs = hwdev->hwif->attr.num_irqs; + u16 i; + + for (i = 0; i < num_irqs; i++) + hinic3_set_msix_state(hwdev, i, HINIC3_MSIX_DISABLE); +} + +void hinic3_msix_intr_clear_resend_bit(struct hinic3_hwdev *hwdev, u16 msix_idx, + u8 clear_resend_en) +{ + struct hinic3_hwif *hwif; + u32 msix_ctrl, addr; + + hwif = hwdev->hwif; + + msix_ctrl = HINIC3_MSI_CLR_INDIR_SET(msix_idx, SIMPLE_INDIR_IDX) | + HINIC3_MSI_CLR_INDIR_SET(clear_resend_en, RESEND_TIMER_CLR); + + addr = HINIC3_CSR_FUNC_MSI_CLR_WR_ADDR; + hinic3_hwif_write_reg(hwif, addr, msix_ctrl); +} + +void hinic3_set_msix_auto_mask_state(struct hinic3_hwdev *hwdev, u16 msix_idx, + enum hinic3_msix_auto_mask flag) +{ + struct hinic3_hwif *hwif; + u32 mask_bits; + u32 addr; + + hwif = hwdev->hwif; + + if (flag) + mask_bits = HINIC3_MSI_CLR_INDIR_SET(1, AUTO_MSK_SET); + else + mask_bits = HINIC3_MSI_CLR_INDIR_SET(1, AUTO_MSK_CLR); + + mask_bits = mask_bits | + HINIC3_MSI_CLR_INDIR_SET(msix_idx, SIMPLE_INDIR_IDX); + + addr = HINIC3_CSR_FUNC_MSI_CLR_WR_ADDR; + hinic3_hwif_write_reg(hwif, addr, mask_bits); +} + +static enum hinic3_wait_return check_db_outbound_enable_handler(void *priv_data) +{ + enum hinic3_outbound_ctrl outbound_ctrl; + struct hinic3_hwif *hwif = priv_data; + enum hinic3_doorbell_ctrl db_ctrl; + + db_ctrl = hinic3_get_doorbell_ctrl_status(hwif); + outbound_ctrl = hinic3_get_outbound_ctrl_status(hwif); + if (outbound_ctrl == ENABLE_OUTBOUND && db_ctrl == ENABLE_DOORBELL) + return HINIC3_WAIT_PROCESS_CPL; + + return HINIC3_WAIT_PROCESS_WAITING; +} + +static int wait_until_doorbell_and_outbound_enabled(struct hinic3_hwif *hwif) +{ + return hinic3_wait_for_timeout(hwif, check_db_outbound_enable_handler, + HINIC3_DB_AND_OUTBOUND_EN_TIMEOUT, + USEC_PER_MSEC); +} + +int hinic3_init_hwif(struct hinic3_hwdev *hwdev) +{ + struct hinic3_pcidev *pci_adapter = hwdev->adapter; + struct hinic3_hwif *hwif; + u32 attr1, attr4, attr5; + int err; + + hwif = kzalloc(sizeof(*hwif), GFP_KERNEL); + if (!hwif) + return -ENOMEM; + + hwdev->hwif = hwif; + hwif->cfg_regs_base = (u8 __iomem *)pci_adapter->cfg_reg_base + + HINIC3_VF_CFG_REG_OFFSET; + + err = db_area_idx_init(hwif, pci_adapter->db_base_phy, + pci_adapter->db_base, + pci_adapter->db_dwqe_len); + if (err) { + dev_err(hwdev->dev, "Failed to init db area.\n"); + goto err_free_hwif; + } + + err = wait_hwif_ready(hwdev); + if (err) { + attr1 = hinic3_hwif_read_reg(hwif, HINIC3_CSR_FUNC_ATTR1_ADDR); + dev_err(hwdev->dev, "Chip status is not ready, attr1:0x%x\n", + attr1); + goto err_free_db_area_idx; + } + + err = init_hwif_attr(hwdev); + if (err) { + dev_err(hwdev->dev, "Init hwif attr failed\n"); + goto err_free_db_area_idx; + } + + err = wait_until_doorbell_and_outbound_enabled(hwif); + if (err) { + attr4 = hinic3_hwif_read_reg(hwif, HINIC3_CSR_FUNC_ATTR4_ADDR); + attr5 = hinic3_hwif_read_reg(hwif, HINIC3_CSR_FUNC_ATTR5_ADDR); + dev_err(hwdev->dev, "HW doorbell/outbound is disabled, attr4 0x%x attr5 0x%x\n", + attr4, attr5); + goto err_free_db_area_idx; + } + + disable_all_msix(hwdev); + + return 0; + +err_free_db_area_idx: + db_area_idx_free(&hwif->db_area); +err_free_hwif: + kfree(hwif); + + return err; +} + +void hinic3_free_hwif(struct hinic3_hwdev *hwdev) +{ + db_area_idx_free(&hwdev->hwif->db_area); + kfree(hwdev->hwif); } u16 hinic3_global_func_id(struct hinic3_hwdev *hwdev) diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.h b/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.h index 513c9680e6b6..c02904e861cc 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_hwif.h @@ -45,13 +45,45 @@ struct hinic3_hwif { struct hinic3_func_attr attr; }; +enum hinic3_outbound_ctrl { + ENABLE_OUTBOUND = 0x0, + DISABLE_OUTBOUND = 0x1, +}; + +enum hinic3_doorbell_ctrl { + ENABLE_DOORBELL = 0, + DISABLE_DOORBELL = 1, +}; + enum hinic3_msix_state { HINIC3_MSIX_ENABLE, HINIC3_MSIX_DISABLE, }; +enum hinic3_msix_auto_mask { + HINIC3_CLR_MSIX_AUTO_MASK, + HINIC3_SET_MSIX_AUTO_MASK, +}; + +u32 hinic3_hwif_read_reg(struct hinic3_hwif *hwif, u32 reg); +void hinic3_hwif_write_reg(struct hinic3_hwif *hwif, u32 reg, u32 val); + +void hinic3_toggle_doorbell(struct hinic3_hwif *hwif, + enum hinic3_doorbell_ctrl flag); + +int hinic3_alloc_db_addr(struct hinic3_hwdev *hwdev, void __iomem **db_base, + void __iomem **dwqe_base); +void hinic3_free_db_addr(struct hinic3_hwdev *hwdev, const u8 __iomem *db_base); + +int hinic3_init_hwif(struct hinic3_hwdev *hwdev); +void hinic3_free_hwif(struct hinic3_hwdev *hwdev); + void hinic3_set_msix_state(struct hinic3_hwdev *hwdev, u16 msix_idx, enum hinic3_msix_state flag); +void hinic3_msix_intr_clear_resend_bit(struct hinic3_hwdev *hwdev, u16 msix_idx, + u8 clear_resend_en); +void hinic3_set_msix_auto_mask_state(struct hinic3_hwdev *hwdev, u16 msix_idx, + enum hinic3_msix_auto_mask flag); u16 hinic3_global_func_id(struct hinic3_hwdev *hwdev); diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_irq.c b/drivers/net/ethernet/huawei/hinic3/hinic3_irq.c index 8b92eed25edf..a69b361225e9 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_irq.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_irq.c @@ -38,19 +38,19 @@ static int hinic3_poll(struct napi_struct *napi, int budget) return work_done; } -void qp_add_napi(struct hinic3_irq_cfg *irq_cfg) +static void qp_add_napi(struct hinic3_irq_cfg *irq_cfg) { struct hinic3_nic_dev *nic_dev = netdev_priv(irq_cfg->netdev); + netif_napi_add(nic_dev->netdev, &irq_cfg->napi, hinic3_poll); netif_queue_set_napi(irq_cfg->netdev, irq_cfg->irq_id, NETDEV_QUEUE_TYPE_RX, &irq_cfg->napi); netif_queue_set_napi(irq_cfg->netdev, irq_cfg->irq_id, NETDEV_QUEUE_TYPE_TX, &irq_cfg->napi); - netif_napi_add(nic_dev->netdev, &irq_cfg->napi, hinic3_poll); napi_enable(&irq_cfg->napi); } -void qp_del_napi(struct hinic3_irq_cfg *irq_cfg) +static void qp_del_napi(struct hinic3_irq_cfg *irq_cfg) { napi_disable(&irq_cfg->napi); netif_queue_set_napi(irq_cfg->netdev, irq_cfg->irq_id, @@ -60,3 +60,135 @@ void qp_del_napi(struct hinic3_irq_cfg *irq_cfg) netif_stop_subqueue(irq_cfg->netdev, irq_cfg->irq_id); netif_napi_del(&irq_cfg->napi); } + +static irqreturn_t qp_irq(int irq, void *data) +{ + struct hinic3_irq_cfg *irq_cfg = data; + struct hinic3_nic_dev *nic_dev; + + nic_dev = netdev_priv(irq_cfg->netdev); + hinic3_msix_intr_clear_resend_bit(nic_dev->hwdev, + irq_cfg->msix_entry_idx, 1); + + napi_schedule(&irq_cfg->napi); + + return IRQ_HANDLED; +} + +static int hinic3_request_irq(struct hinic3_irq_cfg *irq_cfg, u16 q_id) +{ + struct hinic3_interrupt_info info = {}; + struct hinic3_nic_dev *nic_dev; + struct net_device *netdev; + int err; + + netdev = irq_cfg->netdev; + nic_dev = netdev_priv(netdev); + qp_add_napi(irq_cfg); + + info.msix_index = irq_cfg->msix_entry_idx; + info.interrupt_coalesc_set = 1; + info.pending_limit = nic_dev->intr_coalesce[q_id].pending_limit; + info.coalesc_timer_cfg = + nic_dev->intr_coalesce[q_id].coalesce_timer_cfg; + info.resend_timer_cfg = nic_dev->intr_coalesce[q_id].resend_timer_cfg; + err = hinic3_set_interrupt_cfg_direct(nic_dev->hwdev, &info); + if (err) { + netdev_err(netdev, "Failed to set RX interrupt coalescing attribute.\n"); + qp_del_napi(irq_cfg); + return err; + } + + err = request_irq(irq_cfg->irq_id, qp_irq, 0, irq_cfg->irq_name, + irq_cfg); + if (err) { + qp_del_napi(irq_cfg); + return err; + } + + irq_set_affinity_hint(irq_cfg->irq_id, &irq_cfg->affinity_mask); + + return 0; +} + +static void hinic3_release_irq(struct hinic3_irq_cfg *irq_cfg) +{ + irq_set_affinity_hint(irq_cfg->irq_id, NULL); + free_irq(irq_cfg->irq_id, irq_cfg); +} + +int hinic3_qps_irq_init(struct net_device *netdev) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + struct pci_dev *pdev = nic_dev->pdev; + struct hinic3_irq_cfg *irq_cfg; + struct msix_entry *msix_entry; + u32 local_cpu; + u16 q_id; + int err; + + for (q_id = 0; q_id < nic_dev->q_params.num_qps; q_id++) { + msix_entry = &nic_dev->qps_msix_entries[q_id]; + irq_cfg = &nic_dev->q_params.irq_cfg[q_id]; + + irq_cfg->irq_id = msix_entry->vector; + irq_cfg->msix_entry_idx = msix_entry->entry; + irq_cfg->netdev = netdev; + irq_cfg->txq = &nic_dev->txqs[q_id]; + irq_cfg->rxq = &nic_dev->rxqs[q_id]; + nic_dev->rxqs[q_id].irq_cfg = irq_cfg; + + local_cpu = cpumask_local_spread(q_id, dev_to_node(&pdev->dev)); + cpumask_set_cpu(local_cpu, &irq_cfg->affinity_mask); + + snprintf(irq_cfg->irq_name, sizeof(irq_cfg->irq_name), + "%s_qp%u", netdev->name, q_id); + + err = hinic3_request_irq(irq_cfg, q_id); + if (err) { + netdev_err(netdev, "Failed to request Rx irq\n"); + goto err_release_irqs; + } + + hinic3_set_msix_auto_mask_state(nic_dev->hwdev, + irq_cfg->msix_entry_idx, + HINIC3_SET_MSIX_AUTO_MASK); + hinic3_set_msix_state(nic_dev->hwdev, irq_cfg->msix_entry_idx, + HINIC3_MSIX_ENABLE); + } + + return 0; + +err_release_irqs: + while (q_id > 0) { + q_id--; + irq_cfg = &nic_dev->q_params.irq_cfg[q_id]; + qp_del_napi(irq_cfg); + hinic3_set_msix_state(nic_dev->hwdev, irq_cfg->msix_entry_idx, + HINIC3_MSIX_DISABLE); + hinic3_set_msix_auto_mask_state(nic_dev->hwdev, + irq_cfg->msix_entry_idx, + HINIC3_CLR_MSIX_AUTO_MASK); + hinic3_release_irq(irq_cfg); + } + + return err; +} + +void hinic3_qps_irq_uninit(struct net_device *netdev) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + struct hinic3_irq_cfg *irq_cfg; + u16 q_id; + + for (q_id = 0; q_id < nic_dev->q_params.num_qps; q_id++) { + irq_cfg = &nic_dev->q_params.irq_cfg[q_id]; + qp_del_napi(irq_cfg); + hinic3_set_msix_state(nic_dev->hwdev, irq_cfg->msix_entry_idx, + HINIC3_MSIX_DISABLE); + hinic3_set_msix_auto_mask_state(nic_dev->hwdev, + irq_cfg->msix_entry_idx, + HINIC3_CLR_MSIX_AUTO_MASK); + hinic3_release_irq(irq_cfg); + } +} diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_lld.c b/drivers/net/ethernet/huawei/hinic3/hinic3_lld.c index 4827326e6a59..3db8241a3b0c 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_lld.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_lld.c @@ -8,6 +8,7 @@ #include "hinic3_hwdev.h" #include "hinic3_lld.h" #include "hinic3_mgmt.h" +#include "hinic3_pci_id_tbl.h" #define HINIC3_VF_PCI_CFG_REG_BAR 0 #define HINIC3_PCI_INTR_REG_BAR 2 @@ -121,6 +122,7 @@ static int hinic3_attach_aux_devices(struct hinic3_hwdev *hwdev) goto err_del_adevs; } mutex_unlock(&pci_adapter->pdev_mutex); + return 0; err_del_adevs: @@ -132,6 +134,7 @@ err_del_adevs: } } mutex_unlock(&pci_adapter->pdev_mutex); + return -ENOMEM; } @@ -153,6 +156,7 @@ struct hinic3_hwdev *hinic3_adev_get_hwdev(struct auxiliary_device *adev) struct hinic3_adev *hadev; hadev = container_of(adev, struct hinic3_adev, adev); + return hadev->hwdev; } @@ -307,6 +311,7 @@ static void hinic3_func_uninit(struct pci_dev *pdev) { struct hinic3_pcidev *pci_adapter = pci_get_drvdata(pdev); + hinic3_flush_mgmt_workq(pci_adapter->hwdev); hinic3_detach_aux_devices(pci_adapter->hwdev); hinic3_free_hwdev(pci_adapter->hwdev); } @@ -333,6 +338,7 @@ err_unmap_bar: err_out: dev_err(&pdev->dev, "PCIe device probe function failed\n"); + return err; } @@ -365,6 +371,7 @@ err_uninit_pci: err_out: dev_err(&pdev->dev, "PCIe device probe failed\n"); + return err; } @@ -377,7 +384,7 @@ static void hinic3_remove(struct pci_dev *pdev) } static const struct pci_device_id hinic3_pci_table[] = { - /* Completed by later submission due to LoC limit. */ + {PCI_VDEVICE(HUAWEI, PCI_DEV_ID_HINIC3_VF), 0}, {0, 0} }; diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_main.c b/drivers/net/ethernet/huawei/hinic3/hinic3_main.c index 497f2a36f35d..6d87d4d895ba 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_main.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_main.c @@ -12,17 +12,59 @@ #include "hinic3_nic_cfg.h" #include "hinic3_nic_dev.h" #include "hinic3_nic_io.h" +#include "hinic3_rss.h" #include "hinic3_rx.h" #include "hinic3_tx.h" #define HINIC3_NIC_DRV_DESC "Intelligent Network Interface Card Driver" -#define HINIC3_RX_BUF_LEN 2048 -#define HINIC3_LRO_REPLENISH_THLD 256 -#define HINIC3_NIC_DEV_WQ_NAME "hinic3_nic_dev_wq" +#define HINIC3_RX_BUF_LEN 2048 +#define HINIC3_LRO_REPLENISH_THLD 256 +#define HINIC3_NIC_DEV_WQ_NAME "hinic3_nic_dev_wq" -#define HINIC3_SQ_DEPTH 1024 -#define HINIC3_RQ_DEPTH 1024 +#define HINIC3_SQ_DEPTH 1024 +#define HINIC3_RQ_DEPTH 1024 + +#define HINIC3_DEFAULT_TXRX_MSIX_PENDING_LIMIT 2 +#define HINIC3_DEFAULT_TXRX_MSIX_COALESC_TIMER_CFG 25 +#define HINIC3_DEFAULT_TXRX_MSIX_RESEND_TIMER_CFG 7 + +static void init_intr_coal_param(struct net_device *netdev) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + struct hinic3_intr_coal_info *info; + u16 i; + + for (i = 0; i < nic_dev->max_qps; i++) { + info = &nic_dev->intr_coalesce[i]; + info->pending_limit = HINIC3_DEFAULT_TXRX_MSIX_PENDING_LIMIT; + info->coalesce_timer_cfg = HINIC3_DEFAULT_TXRX_MSIX_COALESC_TIMER_CFG; + info->resend_timer_cfg = HINIC3_DEFAULT_TXRX_MSIX_RESEND_TIMER_CFG; + } +} + +static int hinic3_init_intr_coalesce(struct net_device *netdev) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + + nic_dev->intr_coalesce = kcalloc(nic_dev->max_qps, + sizeof(*nic_dev->intr_coalesce), + GFP_KERNEL); + + if (!nic_dev->intr_coalesce) + return -ENOMEM; + + init_intr_coal_param(netdev); + + return 0; +} + +static void hinic3_free_intr_coalesce(struct net_device *netdev) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + + kfree(nic_dev->intr_coalesce); +} static int hinic3_alloc_txrxqs(struct net_device *netdev) { @@ -42,8 +84,17 @@ static int hinic3_alloc_txrxqs(struct net_device *netdev) goto err_free_txqs; } + err = hinic3_init_intr_coalesce(netdev); + if (err) { + dev_err(hwdev->dev, "Failed to init_intr_coalesce\n"); + goto err_free_rxqs; + } + return 0; +err_free_rxqs: + hinic3_free_rxqs(netdev); + err_free_txqs: hinic3_free_txqs(netdev); @@ -52,6 +103,7 @@ err_free_txqs: static void hinic3_free_txrxqs(struct net_device *netdev) { + hinic3_free_intr_coalesce(netdev); hinic3_free_rxqs(netdev); hinic3_free_txqs(netdev); } @@ -83,6 +135,8 @@ static int hinic3_sw_init(struct net_device *netdev) nic_dev->q_params.sq_depth = HINIC3_SQ_DEPTH; nic_dev->q_params.rq_depth = HINIC3_RQ_DEPTH; + hinic3_try_to_enable_rss(netdev); + /* VF driver always uses random MAC address. During VM migration to a * new device, the new device should learn the VMs old MAC rather than * provide its own MAC. The product design assumes that every VF is @@ -94,7 +148,7 @@ static int hinic3_sw_init(struct net_device *netdev) hinic3_global_func_id(hwdev)); if (err) { dev_err(hwdev->dev, "Failed to set default MAC\n"); - return err; + goto err_clear_rss_config; } err = hinic3_alloc_txrxqs(netdev); @@ -108,6 +162,8 @@ static int hinic3_sw_init(struct net_device *netdev) err_del_mac: hinic3_del_mac(hwdev, netdev->dev_addr, 0, hinic3_global_func_id(hwdev)); +err_clear_rss_config: + hinic3_clear_rss_config(netdev); return err; } @@ -119,6 +175,7 @@ static void hinic3_sw_uninit(struct net_device *netdev) hinic3_free_txrxqs(netdev); hinic3_del_mac(nic_dev->hwdev, netdev->dev_addr, 0, hinic3_global_func_id(nic_dev->hwdev)); + hinic3_clear_rss_config(netdev); } static void hinic3_assign_netdev_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.c b/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.c index e74d1eb09730..cf67e26acece 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.c @@ -4,13 +4,857 @@ #include <linux/dma-mapping.h> #include "hinic3_common.h" +#include "hinic3_csr.h" #include "hinic3_hwdev.h" #include "hinic3_hwif.h" #include "hinic3_mbox.h" +#define MBOX_INT_DST_AEQN_MASK GENMASK(11, 10) +#define MBOX_INT_SRC_RESP_AEQN_MASK GENMASK(13, 12) +#define MBOX_INT_STAT_DMA_MASK GENMASK(19, 14) +/* TX size, expressed in 4 bytes units */ +#define MBOX_INT_TX_SIZE_MASK GENMASK(24, 20) +/* SO_RO == strong order, relaxed order */ +#define MBOX_INT_STAT_DMA_SO_RO_MASK GENMASK(26, 25) +#define MBOX_INT_WB_EN_MASK BIT(28) +#define MBOX_INT_SET(val, field) \ + FIELD_PREP(MBOX_INT_##field##_MASK, val) + +#define MBOX_CTRL_TRIGGER_AEQE_MASK BIT(0) +#define MBOX_CTRL_TX_STATUS_MASK BIT(1) +#define MBOX_CTRL_DST_FUNC_MASK GENMASK(28, 16) +#define MBOX_CTRL_SET(val, field) \ + FIELD_PREP(MBOX_CTRL_##field##_MASK, val) + +#define MBOX_MSG_POLLING_TIMEOUT_MS 8000 // send msg seg timeout +#define MBOX_COMP_POLLING_TIMEOUT_MS 40000 // response + +#define MBOX_MAX_BUF_SZ 2048 +#define MBOX_HEADER_SZ 8 + +/* MBOX size is 64B, 8B for mbox_header, 8B reserved */ +#define MBOX_SEG_LEN 48 +#define MBOX_SEG_LEN_ALIGN 4 +#define MBOX_WB_STATUS_LEN 16 + +#define MBOX_SEQ_ID_START_VAL 0 +#define MBOX_SEQ_ID_MAX_VAL 42 +#define MBOX_LAST_SEG_MAX_LEN \ + (MBOX_MAX_BUF_SZ - MBOX_SEQ_ID_MAX_VAL * MBOX_SEG_LEN) + +/* mbox write back status is 16B, only first 4B is used */ +#define MBOX_WB_STATUS_ERRCODE_MASK 0xFFFF +#define MBOX_WB_STATUS_MASK 0xFF +#define MBOX_WB_ERROR_CODE_MASK 0xFF00 +#define MBOX_WB_STATUS_FINISHED_SUCCESS 0xFF +#define MBOX_WB_STATUS_NOT_FINISHED 0x00 + +#define MBOX_STATUS_FINISHED(wb) \ + ((FIELD_PREP(MBOX_WB_STATUS_MASK, (wb))) != MBOX_WB_STATUS_NOT_FINISHED) +#define MBOX_STATUS_SUCCESS(wb) \ + ((FIELD_PREP(MBOX_WB_STATUS_MASK, (wb))) == \ + MBOX_WB_STATUS_FINISHED_SUCCESS) +#define MBOX_STATUS_ERRCODE(wb) \ + ((wb) & MBOX_WB_ERROR_CODE_MASK) + +#define MBOX_DMA_MSG_QUEUE_DEPTH 32 +#define MBOX_AREA(hwif) \ + ((hwif)->cfg_regs_base + HINIC3_FUNC_CSR_MAILBOX_DATA_OFF) + +#define MBOX_MQ_CI_OFFSET \ + (HINIC3_CFG_REGS_FLAG + HINIC3_FUNC_CSR_MAILBOX_DATA_OFF + \ + MBOX_HEADER_SZ + MBOX_SEG_LEN) + +#define MBOX_MQ_SYNC_CI_MASK GENMASK(7, 0) +#define MBOX_MQ_ASYNC_CI_MASK GENMASK(15, 8) +#define MBOX_MQ_CI_GET(val, field) \ + FIELD_GET(MBOX_MQ_##field##_CI_MASK, val) + +#define MBOX_MGMT_FUNC_ID 0x1FFF +#define MBOX_COMM_F_MBOX_SEGMENT BIT(3) + +static u8 *get_mobx_body_from_hdr(u8 *header) +{ + return header + MBOX_HEADER_SZ; +} + +static struct hinic3_msg_desc *get_mbox_msg_desc(struct hinic3_mbox *mbox, + enum mbox_msg_direction_type dir, + u16 src_func_id) +{ + struct hinic3_msg_channel *msg_ch; + + msg_ch = (src_func_id == MBOX_MGMT_FUNC_ID) ? + &mbox->mgmt_msg : mbox->func_msg; + + return (dir == MBOX_MSG_SEND) ? + &msg_ch->recv_msg : &msg_ch->resp_msg; +} + +static void resp_mbox_handler(struct hinic3_mbox *mbox, + const struct hinic3_msg_desc *msg_desc) +{ + spin_lock(&mbox->mbox_lock); + if (msg_desc->msg_info.msg_id == mbox->send_msg_id && + mbox->event_flag == MBOX_EVENT_START) + mbox->event_flag = MBOX_EVENT_SUCCESS; + spin_unlock(&mbox->mbox_lock); +} + +static bool mbox_segment_valid(struct hinic3_mbox *mbox, + struct hinic3_msg_desc *msg_desc, + __le64 mbox_header) +{ + u8 seq_id, seg_len, msg_id, mod; + __le16 src_func_idx, cmd; + + seq_id = MBOX_MSG_HEADER_GET(mbox_header, SEQID); + seg_len = MBOX_MSG_HEADER_GET(mbox_header, SEG_LEN); + msg_id = MBOX_MSG_HEADER_GET(mbox_header, MSG_ID); + mod = MBOX_MSG_HEADER_GET(mbox_header, MODULE); + cmd = cpu_to_le16(MBOX_MSG_HEADER_GET(mbox_header, CMD)); + src_func_idx = cpu_to_le16(MBOX_MSG_HEADER_GET(mbox_header, + SRC_GLB_FUNC_IDX)); + + if (seq_id > MBOX_SEQ_ID_MAX_VAL || seg_len > MBOX_SEG_LEN || + (seq_id == MBOX_SEQ_ID_MAX_VAL && seg_len > MBOX_LAST_SEG_MAX_LEN)) + goto err_seg; + + if (seq_id == 0) { + msg_desc->seq_id = seq_id; + msg_desc->msg_info.msg_id = msg_id; + msg_desc->mod = mod; + msg_desc->cmd = cmd; + } else { + if (seq_id != msg_desc->seq_id + 1 || + msg_id != msg_desc->msg_info.msg_id || + mod != msg_desc->mod || cmd != msg_desc->cmd) + goto err_seg; + + msg_desc->seq_id = seq_id; + } + + return true; + +err_seg: + dev_err(mbox->hwdev->dev, + "Mailbox segment check failed, src func id: 0x%x, front seg info: seq id: 0x%x, msg id: 0x%x, mod: 0x%x, cmd: 0x%x\n", + src_func_idx, msg_desc->seq_id, msg_desc->msg_info.msg_id, + msg_desc->mod, msg_desc->cmd); + dev_err(mbox->hwdev->dev, + "Current seg info: seg len: 0x%x, seq id: 0x%x, msg id: 0x%x, mod: 0x%x, cmd: 0x%x\n", + seg_len, seq_id, msg_id, mod, cmd); + + return false; +} + +static void recv_mbox_handler(struct hinic3_mbox *mbox, + u8 *header, struct hinic3_msg_desc *msg_desc) +{ + __le64 mbox_header = *((__force __le64 *)header); + u8 *mbox_body = get_mobx_body_from_hdr(header); + u8 seq_id, seg_len; + int pos; + + if (!mbox_segment_valid(mbox, msg_desc, mbox_header)) { + msg_desc->seq_id = MBOX_SEQ_ID_MAX_VAL; + return; + } + + seq_id = MBOX_MSG_HEADER_GET(mbox_header, SEQID); + seg_len = MBOX_MSG_HEADER_GET(mbox_header, SEG_LEN); + + pos = seq_id * MBOX_SEG_LEN; + memcpy(msg_desc->msg + pos, mbox_body, seg_len); + + if (!MBOX_MSG_HEADER_GET(mbox_header, LAST)) + return; + + msg_desc->msg_len = cpu_to_le16(MBOX_MSG_HEADER_GET(mbox_header, + MSG_LEN)); + msg_desc->msg_info.status = MBOX_MSG_HEADER_GET(mbox_header, STATUS); + + if (MBOX_MSG_HEADER_GET(mbox_header, DIRECTION) == MBOX_MSG_RESP) + resp_mbox_handler(mbox, msg_desc); +} + +void hinic3_mbox_func_aeqe_handler(struct hinic3_hwdev *hwdev, u8 *header, + u8 size) +{ + __le64 mbox_header = *((__force __le64 *)header); + enum mbox_msg_direction_type dir; + struct hinic3_msg_desc *msg_desc; + struct hinic3_mbox *mbox; + u16 src_func_id; + + mbox = hwdev->mbox; + dir = MBOX_MSG_HEADER_GET(mbox_header, DIRECTION); + src_func_id = MBOX_MSG_HEADER_GET(mbox_header, SRC_GLB_FUNC_IDX); + msg_desc = get_mbox_msg_desc(mbox, dir, src_func_id); + recv_mbox_handler(mbox, header, msg_desc); +} + +static int init_mbox_dma_queue(struct hinic3_hwdev *hwdev, + struct mbox_dma_queue *mq) +{ + u32 size; + + mq->depth = MBOX_DMA_MSG_QUEUE_DEPTH; + mq->prod_idx = 0; + mq->cons_idx = 0; + + size = mq->depth * MBOX_MAX_BUF_SZ; + mq->dma_buf_vaddr = dma_alloc_coherent(hwdev->dev, size, + &mq->dma_buf_paddr, + GFP_KERNEL); + if (!mq->dma_buf_vaddr) + return -ENOMEM; + + return 0; +} + +static void uninit_mbox_dma_queue(struct hinic3_hwdev *hwdev, + struct mbox_dma_queue *mq) +{ + dma_free_coherent(hwdev->dev, mq->depth * MBOX_MAX_BUF_SZ, + mq->dma_buf_vaddr, mq->dma_buf_paddr); +} + +static int hinic3_init_mbox_dma_queue(struct hinic3_mbox *mbox) +{ + u32 val; + int err; + + err = init_mbox_dma_queue(mbox->hwdev, &mbox->sync_msg_queue); + if (err) + return err; + + err = init_mbox_dma_queue(mbox->hwdev, &mbox->async_msg_queue); + if (err) { + uninit_mbox_dma_queue(mbox->hwdev, &mbox->sync_msg_queue); + return err; + } + + val = hinic3_hwif_read_reg(mbox->hwdev->hwif, MBOX_MQ_CI_OFFSET); + val &= ~MBOX_MQ_SYNC_CI_MASK; + val &= ~MBOX_MQ_ASYNC_CI_MASK; + hinic3_hwif_write_reg(mbox->hwdev->hwif, MBOX_MQ_CI_OFFSET, val); + + return 0; +} + +static void hinic3_uninit_mbox_dma_queue(struct hinic3_mbox *mbox) +{ + uninit_mbox_dma_queue(mbox->hwdev, &mbox->sync_msg_queue); + uninit_mbox_dma_queue(mbox->hwdev, &mbox->async_msg_queue); +} + +static int alloc_mbox_msg_channel(struct hinic3_msg_channel *msg_ch) +{ + msg_ch->resp_msg.msg = kzalloc(MBOX_MAX_BUF_SZ, GFP_KERNEL); + if (!msg_ch->resp_msg.msg) + return -ENOMEM; + + msg_ch->recv_msg.msg = kzalloc(MBOX_MAX_BUF_SZ, GFP_KERNEL); + if (!msg_ch->recv_msg.msg) { + kfree(msg_ch->resp_msg.msg); + return -ENOMEM; + } + + msg_ch->resp_msg.seq_id = MBOX_SEQ_ID_MAX_VAL; + msg_ch->recv_msg.seq_id = MBOX_SEQ_ID_MAX_VAL; + + return 0; +} + +static void free_mbox_msg_channel(struct hinic3_msg_channel *msg_ch) +{ + kfree(msg_ch->recv_msg.msg); + kfree(msg_ch->resp_msg.msg); +} + +static int init_mgmt_msg_channel(struct hinic3_mbox *mbox) +{ + int err; + + err = alloc_mbox_msg_channel(&mbox->mgmt_msg); + if (err) { + dev_err(mbox->hwdev->dev, "Failed to alloc mgmt message channel\n"); + return err; + } + + err = hinic3_init_mbox_dma_queue(mbox); + if (err) { + dev_err(mbox->hwdev->dev, "Failed to init mbox dma queue\n"); + free_mbox_msg_channel(&mbox->mgmt_msg); + return err; + } + + return 0; +} + +static void uninit_mgmt_msg_channel(struct hinic3_mbox *mbox) +{ + hinic3_uninit_mbox_dma_queue(mbox); + free_mbox_msg_channel(&mbox->mgmt_msg); +} + +static int hinic3_init_func_mbox_msg_channel(struct hinic3_hwdev *hwdev) +{ + struct hinic3_mbox *mbox; + int err; + + mbox = hwdev->mbox; + mbox->func_msg = kzalloc(sizeof(*mbox->func_msg), GFP_KERNEL); + if (!mbox->func_msg) + return -ENOMEM; + + err = alloc_mbox_msg_channel(mbox->func_msg); + if (err) + goto err_free_func_msg; + + return 0; + +err_free_func_msg: + kfree(mbox->func_msg); + mbox->func_msg = NULL; + + return err; +} + +static void hinic3_uninit_func_mbox_msg_channel(struct hinic3_hwdev *hwdev) +{ + struct hinic3_mbox *mbox = hwdev->mbox; + + free_mbox_msg_channel(mbox->func_msg); + kfree(mbox->func_msg); + mbox->func_msg = NULL; +} + +static void prepare_send_mbox(struct hinic3_mbox *mbox) +{ + struct hinic3_send_mbox *send_mbox = &mbox->send_mbox; + + send_mbox->data = MBOX_AREA(mbox->hwdev->hwif); +} + +static int alloc_mbox_wb_status(struct hinic3_mbox *mbox) +{ + struct hinic3_send_mbox *send_mbox = &mbox->send_mbox; + struct hinic3_hwdev *hwdev = mbox->hwdev; + u32 addr_h, addr_l; + + send_mbox->wb_vaddr = dma_alloc_coherent(hwdev->dev, + MBOX_WB_STATUS_LEN, + &send_mbox->wb_paddr, + GFP_KERNEL); + if (!send_mbox->wb_vaddr) + return -ENOMEM; + + addr_h = upper_32_bits(send_mbox->wb_paddr); + addr_l = lower_32_bits(send_mbox->wb_paddr); + hinic3_hwif_write_reg(hwdev->hwif, HINIC3_FUNC_CSR_MAILBOX_RESULT_H_OFF, + addr_h); + hinic3_hwif_write_reg(hwdev->hwif, HINIC3_FUNC_CSR_MAILBOX_RESULT_L_OFF, + addr_l); + + return 0; +} + +static void free_mbox_wb_status(struct hinic3_mbox *mbox) +{ + struct hinic3_send_mbox *send_mbox = &mbox->send_mbox; + struct hinic3_hwdev *hwdev = mbox->hwdev; + + hinic3_hwif_write_reg(hwdev->hwif, HINIC3_FUNC_CSR_MAILBOX_RESULT_H_OFF, + 0); + hinic3_hwif_write_reg(hwdev->hwif, HINIC3_FUNC_CSR_MAILBOX_RESULT_L_OFF, + 0); + + dma_free_coherent(hwdev->dev, MBOX_WB_STATUS_LEN, + send_mbox->wb_vaddr, send_mbox->wb_paddr); +} + +static int hinic3_mbox_pre_init(struct hinic3_hwdev *hwdev, + struct hinic3_mbox *mbox) +{ + mbox->hwdev = hwdev; + mutex_init(&mbox->mbox_send_lock); + spin_lock_init(&mbox->mbox_lock); + + mbox->workq = create_singlethread_workqueue(HINIC3_MBOX_WQ_NAME); + if (!mbox->workq) { + dev_err(hwdev->dev, "Failed to initialize MBOX workqueue\n"); + return -ENOMEM; + } + hwdev->mbox = mbox; + + return 0; +} + +int hinic3_init_mbox(struct hinic3_hwdev *hwdev) +{ + struct hinic3_mbox *mbox; + int err; + + mbox = kzalloc(sizeof(*mbox), GFP_KERNEL); + if (!mbox) + return -ENOMEM; + + err = hinic3_mbox_pre_init(hwdev, mbox); + if (err) + goto err_free_mbox; + + err = init_mgmt_msg_channel(mbox); + if (err) + goto err_destroy_workqueue; + + err = hinic3_init_func_mbox_msg_channel(hwdev); + if (err) + goto err_uninit_mgmt_msg_ch; + + err = alloc_mbox_wb_status(mbox); + if (err) { + dev_err(hwdev->dev, "Failed to alloc mbox write back status\n"); + goto err_uninit_func_mbox_msg_ch; + } + + prepare_send_mbox(mbox); + + return 0; + +err_uninit_func_mbox_msg_ch: + hinic3_uninit_func_mbox_msg_channel(hwdev); + +err_uninit_mgmt_msg_ch: + uninit_mgmt_msg_channel(mbox); + +err_destroy_workqueue: + destroy_workqueue(mbox->workq); + +err_free_mbox: + kfree(mbox); + + return err; +} + +void hinic3_free_mbox(struct hinic3_hwdev *hwdev) +{ + struct hinic3_mbox *mbox = hwdev->mbox; + + destroy_workqueue(mbox->workq); + free_mbox_wb_status(mbox); + hinic3_uninit_func_mbox_msg_channel(hwdev); + uninit_mgmt_msg_channel(mbox); + kfree(mbox); +} + +#define MBOX_DMA_MSG_INIT_XOR_VAL 0x5a5a5a5a +#define MBOX_XOR_DATA_ALIGN 4 +static u32 mbox_dma_msg_xor(u32 *data, u32 msg_len) +{ + u32 xor = MBOX_DMA_MSG_INIT_XOR_VAL; + u32 dw_len = msg_len / sizeof(u32); + u32 i; + + for (i = 0; i < dw_len; i++) + xor ^= data[i]; + + return xor; +} + +#define MBOX_MQ_ID_MASK(mq, idx) ((idx) & ((mq)->depth - 1)) + +static bool is_msg_queue_full(struct mbox_dma_queue *mq) +{ + return MBOX_MQ_ID_MASK(mq, (mq)->prod_idx + 1) == + MBOX_MQ_ID_MASK(mq, (mq)->cons_idx); +} + +static int mbox_prepare_dma_entry(struct hinic3_mbox *mbox, + struct mbox_dma_queue *mq, + struct mbox_dma_msg *dma_msg, + const void *msg, u32 msg_len) +{ + u64 dma_addr, offset; + void *dma_vaddr; + + if (is_msg_queue_full(mq)) { + dev_err(mbox->hwdev->dev, "Mbox sync message queue is busy, pi: %u, ci: %u\n", + mq->prod_idx, MBOX_MQ_ID_MASK(mq, mq->cons_idx)); + return -EBUSY; + } + + /* copy data to DMA buffer */ + offset = mq->prod_idx * MBOX_MAX_BUF_SZ; + dma_vaddr = (u8 *)mq->dma_buf_vaddr + offset; + memcpy(dma_vaddr, msg, msg_len); + dma_addr = mq->dma_buf_paddr + offset; + dma_msg->dma_addr_high = cpu_to_le32(upper_32_bits(dma_addr)); + dma_msg->dma_addr_low = cpu_to_le32(lower_32_bits(dma_addr)); + dma_msg->msg_len = cpu_to_le32(msg_len); + /* The firmware obtains message based on 4B alignment. */ + dma_msg->xor = cpu_to_le32(mbox_dma_msg_xor(dma_vaddr, + ALIGN(msg_len, MBOX_XOR_DATA_ALIGN))); + mq->prod_idx++; + mq->prod_idx = MBOX_MQ_ID_MASK(mq, mq->prod_idx); + + return 0; +} + +static int mbox_prepare_dma_msg(struct hinic3_mbox *mbox, + enum mbox_msg_ack_type ack_type, + struct mbox_dma_msg *dma_msg, const void *msg, + u32 msg_len) +{ + struct mbox_dma_queue *mq; + u32 val; + + val = hinic3_hwif_read_reg(mbox->hwdev->hwif, MBOX_MQ_CI_OFFSET); + if (ack_type == MBOX_MSG_ACK) { + mq = &mbox->sync_msg_queue; + mq->cons_idx = MBOX_MQ_CI_GET(val, SYNC); + } else { + mq = &mbox->async_msg_queue; + mq->cons_idx = MBOX_MQ_CI_GET(val, ASYNC); + } + + return mbox_prepare_dma_entry(mbox, mq, dma_msg, msg, msg_len); +} + +static void clear_mbox_status(struct hinic3_send_mbox *mbox) +{ + __be64 *wb_status = mbox->wb_vaddr; + + *wb_status = 0; + /* clear mailbox write back status */ + wmb(); +} + +static void mbox_dword_write(const void *src, void __iomem *dst, u32 count) +{ + const __le32 *src32 = src; + u32 __iomem *dst32 = dst; + u32 i; + + /* Data written to mbox is arranged in structs with little endian fields + * but when written to HW every dword (32bits) should be swapped since + * the HW will swap it again. + */ + for (i = 0; i < count; i++) + __raw_writel(swab32((__force __u32)src32[i]), dst32 + i); +} + +static void mbox_copy_header(struct hinic3_hwdev *hwdev, + struct hinic3_send_mbox *mbox, __le64 *header) +{ + mbox_dword_write(header, mbox->data, MBOX_HEADER_SZ / sizeof(__le32)); +} + +static void mbox_copy_send_data(struct hinic3_hwdev *hwdev, + struct hinic3_send_mbox *mbox, void *seg, + u32 seg_len) +{ + u32 __iomem *dst = (u32 __iomem *)(mbox->data + MBOX_HEADER_SZ); + u32 count, leftover, last_dword; + const __le32 *src = seg; + + count = seg_len / sizeof(u32); + leftover = seg_len % sizeof(u32); + if (count > 0) + mbox_dword_write(src, dst, count); + + if (leftover > 0) { + last_dword = 0; + memcpy(&last_dword, src + count, leftover); + mbox_dword_write(&last_dword, dst + count, 1); + } +} + +static void write_mbox_msg_attr(struct hinic3_mbox *mbox, + u16 dst_func, u16 dst_aeqn, u32 seg_len) +{ + struct hinic3_hwif *hwif = mbox->hwdev->hwif; + u32 mbox_int, mbox_ctrl, tx_size; + + tx_size = ALIGN(seg_len + MBOX_HEADER_SZ, MBOX_SEG_LEN_ALIGN) >> 2; + + mbox_int = MBOX_INT_SET(dst_aeqn, DST_AEQN) | + MBOX_INT_SET(0, STAT_DMA) | + MBOX_INT_SET(tx_size, TX_SIZE) | + MBOX_INT_SET(0, STAT_DMA_SO_RO) | + MBOX_INT_SET(1, WB_EN); + + mbox_ctrl = MBOX_CTRL_SET(1, TX_STATUS) | + MBOX_CTRL_SET(0, TRIGGER_AEQE) | + MBOX_CTRL_SET(dst_func, DST_FUNC); + + hinic3_hwif_write_reg(hwif, HINIC3_FUNC_CSR_MAILBOX_INT_OFF, mbox_int); + hinic3_hwif_write_reg(hwif, HINIC3_FUNC_CSR_MAILBOX_CONTROL_OFF, + mbox_ctrl); +} + +static u16 get_mbox_status(const struct hinic3_send_mbox *mbox) +{ + __be64 *wb_status = mbox->wb_vaddr; + u64 wb_val; + + wb_val = be64_to_cpu(*wb_status); + /* verify reading before check */ + rmb(); + + return wb_val & MBOX_WB_STATUS_ERRCODE_MASK; +} + +static enum hinic3_wait_return check_mbox_wb_status(void *priv_data) +{ + struct hinic3_mbox *mbox = priv_data; + u16 wb_status; + + wb_status = get_mbox_status(&mbox->send_mbox); + + return MBOX_STATUS_FINISHED(wb_status) ? + HINIC3_WAIT_PROCESS_CPL : HINIC3_WAIT_PROCESS_WAITING; +} + +static int send_mbox_seg(struct hinic3_mbox *mbox, __le64 header, + u16 dst_func, void *seg, u32 seg_len, void *msg_info) +{ + struct hinic3_send_mbox *send_mbox = &mbox->send_mbox; + struct hinic3_hwdev *hwdev = mbox->hwdev; + u8 num_aeqs = hwdev->hwif->attr.num_aeqs; + enum mbox_msg_direction_type dir; + u16 dst_aeqn, wb_status, errcode; + int err; + + /* mbox to mgmt cpu, hardware doesn't care about dst aeq id */ + if (num_aeqs > MBOX_MSG_AEQ_FOR_MBOX) { + dir = MBOX_MSG_HEADER_GET(header, DIRECTION); + dst_aeqn = (dir == MBOX_MSG_SEND) ? + MBOX_MSG_AEQ_FOR_EVENT : MBOX_MSG_AEQ_FOR_MBOX; + } else { + dst_aeqn = 0; + } + + clear_mbox_status(send_mbox); + mbox_copy_header(hwdev, send_mbox, &header); + mbox_copy_send_data(hwdev, send_mbox, seg, seg_len); + write_mbox_msg_attr(mbox, dst_func, dst_aeqn, seg_len); + + err = hinic3_wait_for_timeout(mbox, check_mbox_wb_status, + MBOX_MSG_POLLING_TIMEOUT_MS, + USEC_PER_MSEC); + wb_status = get_mbox_status(send_mbox); + if (err) { + dev_err(hwdev->dev, "Send mailbox segment timeout, wb status: 0x%x\n", + wb_status); + return err; + } + + if (!MBOX_STATUS_SUCCESS(wb_status)) { + dev_err(hwdev->dev, + "Send mailbox segment to function %u error, wb status: 0x%x\n", + dst_func, wb_status); + errcode = MBOX_STATUS_ERRCODE(wb_status); + return errcode ? errcode : -EFAULT; + } + + return 0; +} + +static int send_mbox_msg(struct hinic3_mbox *mbox, u8 mod, u16 cmd, + const void *msg, u32 msg_len, u16 dst_func, + enum mbox_msg_direction_type direction, + enum mbox_msg_ack_type ack_type, + struct mbox_msg_info *msg_info) +{ + enum mbox_msg_data_type data_type = MBOX_MSG_DATA_INLINE; + struct hinic3_hwdev *hwdev = mbox->hwdev; + struct mbox_dma_msg dma_msg; + u32 seg_len = MBOX_SEG_LEN; + __le64 header = 0; + u32 seq_id = 0; + u16 rsp_aeq_id; + u8 *msg_seg; + int err = 0; + u32 left; + + if (hwdev->hwif->attr.num_aeqs > MBOX_MSG_AEQ_FOR_MBOX) + rsp_aeq_id = MBOX_MSG_AEQ_FOR_MBOX; + else + rsp_aeq_id = 0; + + if (dst_func == MBOX_MGMT_FUNC_ID && + !(hwdev->features[0] & MBOX_COMM_F_MBOX_SEGMENT)) { + err = mbox_prepare_dma_msg(mbox, ack_type, &dma_msg, + msg, msg_len); + if (err) + goto err_send; + + msg = &dma_msg; + msg_len = sizeof(dma_msg); + data_type = MBOX_MSG_DATA_DMA; + } + + msg_seg = (u8 *)msg; + left = msg_len; + + header = cpu_to_le64(MBOX_MSG_HEADER_SET(msg_len, MSG_LEN) | + MBOX_MSG_HEADER_SET(mod, MODULE) | + MBOX_MSG_HEADER_SET(seg_len, SEG_LEN) | + MBOX_MSG_HEADER_SET(ack_type, NO_ACK) | + MBOX_MSG_HEADER_SET(data_type, DATA_TYPE) | + MBOX_MSG_HEADER_SET(MBOX_SEQ_ID_START_VAL, SEQID) | + MBOX_MSG_HEADER_SET(direction, DIRECTION) | + MBOX_MSG_HEADER_SET(cmd, CMD) | + MBOX_MSG_HEADER_SET(msg_info->msg_id, MSG_ID) | + MBOX_MSG_HEADER_SET(rsp_aeq_id, AEQ_ID) | + MBOX_MSG_HEADER_SET(MBOX_MSG_FROM_MBOX, SOURCE) | + MBOX_MSG_HEADER_SET(!!msg_info->status, STATUS)); + + while (!(MBOX_MSG_HEADER_GET(header, LAST))) { + if (left <= MBOX_SEG_LEN) { + header &= cpu_to_le64(~MBOX_MSG_HEADER_SEG_LEN_MASK); + header |= + cpu_to_le64(MBOX_MSG_HEADER_SET(left, SEG_LEN) | + MBOX_MSG_HEADER_SET(1, LAST)); + seg_len = left; + } + + err = send_mbox_seg(mbox, header, dst_func, msg_seg, + seg_len, msg_info); + if (err) { + dev_err(hwdev->dev, "Failed to send mbox seg, seq_id=0x%llx\n", + MBOX_MSG_HEADER_GET(header, SEQID)); + goto err_send; + } + + left -= MBOX_SEG_LEN; + msg_seg += MBOX_SEG_LEN; + seq_id++; + header &= cpu_to_le64(~MBOX_MSG_HEADER_SEG_LEN_MASK); + header |= cpu_to_le64(MBOX_MSG_HEADER_SET(seq_id, SEQID)); + } + +err_send: + return err; +} + +static void set_mbox_to_func_event(struct hinic3_mbox *mbox, + enum mbox_event_state event_flag) +{ + spin_lock(&mbox->mbox_lock); + mbox->event_flag = event_flag; + spin_unlock(&mbox->mbox_lock); +} + +static enum hinic3_wait_return check_mbox_msg_finish(void *priv_data) +{ + struct hinic3_mbox *mbox = priv_data; + + return (mbox->event_flag == MBOX_EVENT_SUCCESS) ? + HINIC3_WAIT_PROCESS_CPL : HINIC3_WAIT_PROCESS_WAITING; +} + +static int wait_mbox_msg_completion(struct hinic3_mbox *mbox, + u32 timeout) +{ + u32 wait_time; + int err; + + wait_time = (timeout != 0) ? timeout : MBOX_COMP_POLLING_TIMEOUT_MS; + err = hinic3_wait_for_timeout(mbox, check_mbox_msg_finish, + wait_time, USEC_PER_MSEC); + if (err) { + set_mbox_to_func_event(mbox, MBOX_EVENT_TIMEOUT); + return err; + } + set_mbox_to_func_event(mbox, MBOX_EVENT_END); + + return 0; +} + int hinic3_send_mbox_to_mgmt(struct hinic3_hwdev *hwdev, u8 mod, u16 cmd, const struct mgmt_msg_params *msg_params) { - /* Completed by later submission due to LoC limit. */ - return -EFAULT; + struct hinic3_mbox *mbox = hwdev->mbox; + struct mbox_msg_info msg_info = {}; + struct hinic3_msg_desc *msg_desc; + u32 msg_len; + int err; + + /* expect response message */ + msg_desc = get_mbox_msg_desc(mbox, MBOX_MSG_RESP, MBOX_MGMT_FUNC_ID); + mutex_lock(&mbox->mbox_send_lock); + msg_info.msg_id = (mbox->send_msg_id + 1) & 0xF; + mbox->send_msg_id = msg_info.msg_id; + set_mbox_to_func_event(mbox, MBOX_EVENT_START); + + err = send_mbox_msg(mbox, mod, cmd, msg_params->buf_in, + msg_params->in_size, MBOX_MGMT_FUNC_ID, + MBOX_MSG_SEND, MBOX_MSG_ACK, &msg_info); + if (err) { + dev_err(hwdev->dev, "Send mailbox mod %u, cmd %u failed, msg_id: %u, err: %d\n", + mod, cmd, msg_info.msg_id, err); + set_mbox_to_func_event(mbox, MBOX_EVENT_FAIL); + goto err_send; + } + + if (wait_mbox_msg_completion(mbox, msg_params->timeout_ms)) { + dev_err(hwdev->dev, + "Send mbox msg timeout, msg_id: %u\n", msg_info.msg_id); + err = -ETIMEDOUT; + goto err_send; + } + + if (mod != msg_desc->mod || cmd != le16_to_cpu(msg_desc->cmd)) { + dev_err(hwdev->dev, + "Invalid response mbox message, mod: 0x%x, cmd: 0x%x, expect mod: 0x%x, cmd: 0x%x\n", + msg_desc->mod, msg_desc->cmd, mod, cmd); + err = -EFAULT; + goto err_send; + } + + if (msg_desc->msg_info.status) { + err = msg_desc->msg_info.status; + goto err_send; + } + + if (msg_params->buf_out) { + msg_len = le16_to_cpu(msg_desc->msg_len); + if (msg_len != msg_params->expected_out_size) { + dev_err(hwdev->dev, + "Invalid response mbox message length: %u for mod %d cmd %u, expected length: %u\n", + msg_desc->msg_len, mod, cmd, + msg_params->expected_out_size); + err = -EFAULT; + goto err_send; + } + + memcpy(msg_params->buf_out, msg_desc->msg, msg_len); + } + +err_send: + mutex_unlock(&mbox->mbox_send_lock); + + return err; +} + +int hinic3_send_mbox_to_mgmt_no_ack(struct hinic3_hwdev *hwdev, u8 mod, u16 cmd, + const struct mgmt_msg_params *msg_params) +{ + struct hinic3_mbox *mbox = hwdev->mbox; + struct mbox_msg_info msg_info = {}; + int err; + + mutex_lock(&mbox->mbox_send_lock); + err = send_mbox_msg(mbox, mod, cmd, msg_params->buf_in, + msg_params->in_size, MBOX_MGMT_FUNC_ID, + MBOX_MSG_SEND, MBOX_MSG_NO_ACK, &msg_info); + if (err) + dev_err(hwdev->dev, "Send mailbox no ack failed\n"); + + mutex_unlock(&mbox->mbox_send_lock); + + return err; } diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.h b/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.h index d7a6c37b7eff..e71629e95086 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_mbox.h @@ -8,8 +8,134 @@ #include <linux/mutex.h> struct hinic3_hwdev; +struct mgmt_msg_params; + +#define MBOX_MSG_HEADER_SRC_GLB_FUNC_IDX_MASK GENMASK_ULL(12, 0) +#define MBOX_MSG_HEADER_STATUS_MASK BIT_ULL(13) +#define MBOX_MSG_HEADER_SOURCE_MASK BIT_ULL(15) +#define MBOX_MSG_HEADER_AEQ_ID_MASK GENMASK_ULL(17, 16) +#define MBOX_MSG_HEADER_MSG_ID_MASK GENMASK_ULL(21, 18) +#define MBOX_MSG_HEADER_CMD_MASK GENMASK_ULL(31, 22) +#define MBOX_MSG_HEADER_MSG_LEN_MASK GENMASK_ULL(42, 32) +#define MBOX_MSG_HEADER_MODULE_MASK GENMASK_ULL(47, 43) +#define MBOX_MSG_HEADER_SEG_LEN_MASK GENMASK_ULL(53, 48) +#define MBOX_MSG_HEADER_NO_ACK_MASK BIT_ULL(54) +#define MBOX_MSG_HEADER_DATA_TYPE_MASK BIT_ULL(55) +#define MBOX_MSG_HEADER_SEQID_MASK GENMASK_ULL(61, 56) +#define MBOX_MSG_HEADER_LAST_MASK BIT_ULL(62) +#define MBOX_MSG_HEADER_DIRECTION_MASK BIT_ULL(63) + +#define MBOX_MSG_HEADER_SET(val, member) \ + FIELD_PREP(MBOX_MSG_HEADER_##member##_MASK, val) +#define MBOX_MSG_HEADER_GET(val, member) \ + FIELD_GET(MBOX_MSG_HEADER_##member##_MASK, le64_to_cpu(val)) + +/* identifies if a segment belongs to a message or to a response. A VF is only + * expected to send messages and receive responses. PF driver could receive + * messages and send responses. + */ +enum mbox_msg_direction_type { + MBOX_MSG_SEND = 0, + MBOX_MSG_RESP = 1, +}; + +/* Indicates if mbox message expects a response (ack) or not */ +enum mbox_msg_ack_type { + MBOX_MSG_ACK = 0, + MBOX_MSG_NO_ACK = 1, +}; + +enum mbox_msg_data_type { + MBOX_MSG_DATA_INLINE = 0, + MBOX_MSG_DATA_DMA = 1, +}; + +enum mbox_msg_src_type { + MBOX_MSG_FROM_MBOX = 1, +}; + +enum mbox_msg_aeq_type { + MBOX_MSG_AEQ_FOR_EVENT = 0, + MBOX_MSG_AEQ_FOR_MBOX = 1, +}; + +#define HINIC3_MBOX_WQ_NAME "hinic3_mbox" + +struct mbox_msg_info { + u8 msg_id; + u8 status; +}; + +struct hinic3_msg_desc { + u8 *msg; + __le16 msg_len; + u8 seq_id; + u8 mod; + __le16 cmd; + struct mbox_msg_info msg_info; +}; + +struct hinic3_msg_channel { + struct hinic3_msg_desc resp_msg; + struct hinic3_msg_desc recv_msg; +}; + +struct hinic3_send_mbox { + u8 __iomem *data; + void *wb_vaddr; + dma_addr_t wb_paddr; +}; + +enum mbox_event_state { + MBOX_EVENT_START = 0, + MBOX_EVENT_FAIL = 1, + MBOX_EVENT_SUCCESS = 2, + MBOX_EVENT_TIMEOUT = 3, + MBOX_EVENT_END = 4, +}; + +struct mbox_dma_msg { + __le32 xor; + __le32 dma_addr_high; + __le32 dma_addr_low; + __le32 msg_len; + __le64 rsvd; +}; + +struct mbox_dma_queue { + void *dma_buf_vaddr; + dma_addr_t dma_buf_paddr; + u16 depth; + u16 prod_idx; + u16 cons_idx; +}; + +struct hinic3_mbox { + struct hinic3_hwdev *hwdev; + /* lock for send mbox message and ack message */ + struct mutex mbox_send_lock; + struct hinic3_send_mbox send_mbox; + struct mbox_dma_queue sync_msg_queue; + struct mbox_dma_queue async_msg_queue; + struct workqueue_struct *workq; + /* driver and MGMT CPU */ + struct hinic3_msg_channel mgmt_msg; + /* VF to PF */ + struct hinic3_msg_channel *func_msg; + u8 send_msg_id; + enum mbox_event_state event_flag; + /* lock for mbox event flag */ + spinlock_t mbox_lock; +}; + +void hinic3_mbox_func_aeqe_handler(struct hinic3_hwdev *hwdev, u8 *header, + u8 size); +int hinic3_init_mbox(struct hinic3_hwdev *hwdev); +void hinic3_free_mbox(struct hinic3_hwdev *hwdev); int hinic3_send_mbox_to_mgmt(struct hinic3_hwdev *hwdev, u8 mod, u16 cmd, const struct mgmt_msg_params *msg_params); +int hinic3_send_mbox_to_mgmt_no_ack(struct hinic3_hwdev *hwdev, u8 mod, u16 cmd, + const struct mgmt_msg_params *msg_params); #endif diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_mgmt.c b/drivers/net/ethernet/huawei/hinic3/hinic3_mgmt.c new file mode 100644 index 000000000000..c38d10cd7fac --- /dev/null +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_mgmt.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + +#include "hinic3_eqs.h" +#include "hinic3_hwdev.h" +#include "hinic3_mbox.h" +#include "hinic3_mgmt.h" + +void hinic3_flush_mgmt_workq(struct hinic3_hwdev *hwdev) +{ + if (hwdev->aeqs) + flush_workqueue(hwdev->aeqs->workq); +} + +void hinic3_mgmt_msg_aeqe_handler(struct hinic3_hwdev *hwdev, u8 *header, + u8 size) +{ + if (MBOX_MSG_HEADER_GET(*(__force __le64 *)header, SOURCE) == + MBOX_MSG_FROM_MBOX) + hinic3_mbox_func_aeqe_handler(hwdev, header, size); +} diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_mgmt.h b/drivers/net/ethernet/huawei/hinic3/hinic3_mgmt.h index 4edabeb32112..bbef3b32a6ec 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_mgmt.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_mgmt.h @@ -9,5 +9,7 @@ struct hinic3_hwdev; void hinic3_flush_mgmt_workq(struct hinic3_hwdev *hwdev); +void hinic3_mgmt_msg_aeqe_handler(struct hinic3_hwdev *hwdev, + u8 *header, u8 size); #endif diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_mgmt_interface.h b/drivers/net/ethernet/huawei/hinic3/hinic3_mgmt_interface.h index c4434efdc7f7..6cc0345c39e4 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_mgmt_interface.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_mgmt_interface.h @@ -56,12 +56,105 @@ struct l2nic_cmd_update_mac { u8 new_mac[ETH_ALEN]; }; +struct l2nic_cmd_set_ci_attr { + struct mgmt_msg_head msg_head; + u16 func_idx; + u8 dma_attr_off; + u8 pending_limit; + u8 coalescing_time; + u8 intr_en; + u16 intr_idx; + u32 l2nic_sqn; + u32 rsvd; + u64 ci_addr; +}; + +struct l2nic_cmd_clear_qp_resource { + struct mgmt_msg_head msg_head; + u16 func_id; + u16 rsvd1; +}; + struct l2nic_cmd_force_pkt_drop { struct mgmt_msg_head msg_head; u8 port; u8 rsvd1[3]; }; +struct l2nic_cmd_set_vport_state { + struct mgmt_msg_head msg_head; + u16 func_id; + u16 rsvd1; + /* 0--disable, 1--enable */ + u8 state; + u8 rsvd2[3]; +}; + +struct l2nic_cmd_set_dcb_state { + struct mgmt_msg_head head; + u16 func_id; + /* 0 - get dcb state, 1 - set dcb state */ + u8 op_code; + /* 0 - disable, 1 - enable dcb */ + u8 state; + /* 0 - disable, 1 - enable dcb */ + u8 port_state; + u8 rsvd[7]; +}; + +#define L2NIC_RSS_TYPE_VALID_MASK BIT(23) +#define L2NIC_RSS_TYPE_TCP_IPV6_EXT_MASK BIT(24) +#define L2NIC_RSS_TYPE_IPV6_EXT_MASK BIT(25) +#define L2NIC_RSS_TYPE_TCP_IPV6_MASK BIT(26) +#define L2NIC_RSS_TYPE_IPV6_MASK BIT(27) +#define L2NIC_RSS_TYPE_TCP_IPV4_MASK BIT(28) +#define L2NIC_RSS_TYPE_IPV4_MASK BIT(29) +#define L2NIC_RSS_TYPE_UDP_IPV6_MASK BIT(30) +#define L2NIC_RSS_TYPE_UDP_IPV4_MASK BIT(31) +#define L2NIC_RSS_TYPE_SET(val, member) \ + FIELD_PREP(L2NIC_RSS_TYPE_##member##_MASK, val) +#define L2NIC_RSS_TYPE_GET(val, member) \ + FIELD_GET(L2NIC_RSS_TYPE_##member##_MASK, val) + +#define L2NIC_RSS_INDIR_SIZE 256 +#define L2NIC_RSS_KEY_SIZE 40 + +/* IEEE 802.1Qaz std */ +#define L2NIC_DCB_COS_MAX 0x8 + +struct l2nic_cmd_set_rss_ctx_tbl { + struct mgmt_msg_head msg_head; + u16 func_id; + u16 rsvd1; + u32 context; +}; + +struct l2nic_cmd_cfg_rss_engine { + struct mgmt_msg_head msg_head; + u16 func_id; + u8 opcode; + u8 hash_engine; + u8 rsvd1[4]; +}; + +struct l2nic_cmd_cfg_rss_hash_key { + struct mgmt_msg_head msg_head; + u16 func_id; + u8 opcode; + u8 rsvd1; + u8 key[L2NIC_RSS_KEY_SIZE]; +}; + +struct l2nic_cmd_cfg_rss { + struct mgmt_msg_head msg_head; + u16 func_id; + u8 rss_en; + u8 rq_priority_number; + u8 prio_tc[L2NIC_DCB_COS_MAX]; + u16 num_qps; + u16 rsvd1; +}; + /* Commands between NIC to fw */ enum l2nic_cmd { /* FUNC CFG */ @@ -82,6 +175,32 @@ enum l2nic_cmd { L2NIC_CMD_MAX = 256, }; +struct l2nic_cmd_rss_set_indir_tbl { + __le32 rsvd[4]; + __le16 entry[L2NIC_RSS_INDIR_SIZE]; +}; + +/* NIC CMDQ MODE */ +enum l2nic_ucode_cmd { + L2NIC_UCODE_CMD_MODIFY_QUEUE_CTX = 0, + L2NIC_UCODE_CMD_CLEAN_QUEUE_CTX = 1, + L2NIC_UCODE_CMD_SET_RSS_INDIR_TBL = 4, +}; + +/* hilink mac group command */ +enum mag_cmd { + MAG_CMD_GET_LINK_STATUS = 7, +}; + +/* firmware also use this cmd report link event to driver */ +struct mag_cmd_get_link_status { + struct mgmt_msg_head head; + u8 port_id; + /* 0:link down 1:link up */ + u8 status; + u8 rsvd0[2]; +}; + enum hinic3_nic_feature_cap { HINIC3_NIC_F_CSUM = BIT(0), HINIC3_NIC_F_SCTP_CRC = BIT(1), diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_netdev_ops.c b/drivers/net/ethernet/huawei/hinic3/hinic3_netdev_ops.c index 71104a6b8bef..0fa3c7900225 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_netdev_ops.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_netdev_ops.c @@ -8,19 +8,437 @@ #include "hinic3_nic_cfg.h" #include "hinic3_nic_dev.h" #include "hinic3_nic_io.h" +#include "hinic3_rss.h" #include "hinic3_rx.h" #include "hinic3_tx.h" +/* try to modify the number of irq to the target number, + * and return the actual number of irq. + */ +static u16 hinic3_qp_irq_change(struct net_device *netdev, + u16 dst_num_qp_irq) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + struct msix_entry *qps_msix_entries; + u16 resp_irq_num, irq_num_gap, i; + u16 idx; + int err; + + qps_msix_entries = nic_dev->qps_msix_entries; + if (dst_num_qp_irq > nic_dev->num_qp_irq) { + irq_num_gap = dst_num_qp_irq - nic_dev->num_qp_irq; + err = hinic3_alloc_irqs(nic_dev->hwdev, irq_num_gap, + &qps_msix_entries[nic_dev->num_qp_irq], + &resp_irq_num); + if (err) { + netdev_err(netdev, "Failed to alloc irqs\n"); + return nic_dev->num_qp_irq; + } + + nic_dev->num_qp_irq += resp_irq_num; + } else if (dst_num_qp_irq < nic_dev->num_qp_irq) { + irq_num_gap = nic_dev->num_qp_irq - dst_num_qp_irq; + for (i = 0; i < irq_num_gap; i++) { + idx = (nic_dev->num_qp_irq - i) - 1; + hinic3_free_irq(nic_dev->hwdev, + qps_msix_entries[idx].vector); + qps_msix_entries[idx].vector = 0; + qps_msix_entries[idx].entry = 0; + } + nic_dev->num_qp_irq = dst_num_qp_irq; + } + + return nic_dev->num_qp_irq; +} + +static void hinic3_config_num_qps(struct net_device *netdev, + struct hinic3_dyna_txrxq_params *q_params) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + u16 alloc_num_irq, cur_num_irq; + u16 dst_num_irq; + + if (!test_bit(HINIC3_RSS_ENABLE, &nic_dev->flags)) + q_params->num_qps = 1; + + if (nic_dev->num_qp_irq >= q_params->num_qps) + goto out; + + cur_num_irq = nic_dev->num_qp_irq; + + alloc_num_irq = hinic3_qp_irq_change(netdev, q_params->num_qps); + if (alloc_num_irq < q_params->num_qps) { + q_params->num_qps = alloc_num_irq; + netdev_warn(netdev, "Can not get enough irqs, adjust num_qps to %u\n", + q_params->num_qps); + + /* The current irq may be in use, we must keep it */ + dst_num_irq = max_t(u16, cur_num_irq, q_params->num_qps); + hinic3_qp_irq_change(netdev, dst_num_irq); + } + +out: + netdev_dbg(netdev, "No need to change irqs, num_qps is %u\n", + q_params->num_qps); +} + +static int hinic3_setup_num_qps(struct net_device *netdev) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + + nic_dev->num_qp_irq = 0; + + nic_dev->qps_msix_entries = kcalloc(nic_dev->max_qps, + sizeof(struct msix_entry), + GFP_KERNEL); + if (!nic_dev->qps_msix_entries) + return -ENOMEM; + + hinic3_config_num_qps(netdev, &nic_dev->q_params); + + return 0; +} + +static void hinic3_destroy_num_qps(struct net_device *netdev) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + u16 i; + + for (i = 0; i < nic_dev->num_qp_irq; i++) + hinic3_free_irq(nic_dev->hwdev, + nic_dev->qps_msix_entries[i].vector); + + kfree(nic_dev->qps_msix_entries); +} + +static int hinic3_alloc_txrxq_resources(struct net_device *netdev, + struct hinic3_dyna_txrxq_params *q_params) +{ + int err; + + q_params->txqs_res = kcalloc(q_params->num_qps, + sizeof(*q_params->txqs_res), GFP_KERNEL); + if (!q_params->txqs_res) + return -ENOMEM; + + q_params->rxqs_res = kcalloc(q_params->num_qps, + sizeof(*q_params->rxqs_res), GFP_KERNEL); + if (!q_params->rxqs_res) { + err = -ENOMEM; + goto err_free_txqs_res_arr; + } + + q_params->irq_cfg = kcalloc(q_params->num_qps, + sizeof(*q_params->irq_cfg), GFP_KERNEL); + if (!q_params->irq_cfg) { + err = -ENOMEM; + goto err_free_rxqs_res_arr; + } + + err = hinic3_alloc_txqs_res(netdev, q_params->num_qps, + q_params->sq_depth, q_params->txqs_res); + if (err) { + netdev_err(netdev, "Failed to alloc txqs resource\n"); + goto err_free_irq_cfg; + } + + err = hinic3_alloc_rxqs_res(netdev, q_params->num_qps, + q_params->rq_depth, q_params->rxqs_res); + if (err) { + netdev_err(netdev, "Failed to alloc rxqs resource\n"); + goto err_free_txqs_res; + } + + return 0; + +err_free_txqs_res: + hinic3_free_txqs_res(netdev, q_params->num_qps, q_params->sq_depth, + q_params->txqs_res); +err_free_irq_cfg: + kfree(q_params->irq_cfg); + q_params->irq_cfg = NULL; +err_free_rxqs_res_arr: + kfree(q_params->rxqs_res); + q_params->rxqs_res = NULL; +err_free_txqs_res_arr: + kfree(q_params->txqs_res); + q_params->txqs_res = NULL; + + return err; +} + +static void hinic3_free_txrxq_resources(struct net_device *netdev, + struct hinic3_dyna_txrxq_params *q_params) +{ + hinic3_free_rxqs_res(netdev, q_params->num_qps, q_params->rq_depth, + q_params->rxqs_res); + hinic3_free_txqs_res(netdev, q_params->num_qps, q_params->sq_depth, + q_params->txqs_res); + + kfree(q_params->irq_cfg); + q_params->irq_cfg = NULL; + + kfree(q_params->rxqs_res); + q_params->rxqs_res = NULL; + + kfree(q_params->txqs_res); + q_params->txqs_res = NULL; +} + +static int hinic3_configure_txrxqs(struct net_device *netdev, + struct hinic3_dyna_txrxq_params *q_params) +{ + int err; + + err = hinic3_configure_txqs(netdev, q_params->num_qps, + q_params->sq_depth, q_params->txqs_res); + if (err) { + netdev_err(netdev, "Failed to configure txqs\n"); + return err; + } + + err = hinic3_configure_rxqs(netdev, q_params->num_qps, + q_params->rq_depth, q_params->rxqs_res); + if (err) { + netdev_err(netdev, "Failed to configure rxqs\n"); + return err; + } + + return 0; +} + +static int hinic3_configure(struct net_device *netdev) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + int err; + + netdev->min_mtu = HINIC3_MIN_MTU_SIZE; + netdev->max_mtu = HINIC3_MAX_JUMBO_FRAME_SIZE; + err = hinic3_set_port_mtu(netdev, netdev->mtu); + if (err) { + netdev_err(netdev, "Failed to set mtu\n"); + return err; + } + + /* Ensure DCB is disabled */ + hinic3_sync_dcb_state(nic_dev->hwdev, 1, 0); + + if (test_bit(HINIC3_RSS_ENABLE, &nic_dev->flags)) { + err = hinic3_rss_init(netdev); + if (err) { + netdev_err(netdev, "Failed to init rss\n"); + return err; + } + } + + return 0; +} + +static void hinic3_remove_configure(struct net_device *netdev) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + + if (test_bit(HINIC3_RSS_ENABLE, &nic_dev->flags)) + hinic3_rss_uninit(netdev); +} + +static int hinic3_alloc_channel_resources(struct net_device *netdev, + struct hinic3_dyna_qp_params *qp_params, + struct hinic3_dyna_txrxq_params *trxq_params) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + int err; + + qp_params->num_qps = trxq_params->num_qps; + qp_params->sq_depth = trxq_params->sq_depth; + qp_params->rq_depth = trxq_params->rq_depth; + + err = hinic3_alloc_qps(nic_dev, qp_params); + if (err) { + netdev_err(netdev, "Failed to alloc qps\n"); + return err; + } + + err = hinic3_alloc_txrxq_resources(netdev, trxq_params); + if (err) { + netdev_err(netdev, "Failed to alloc txrxq resources\n"); + hinic3_free_qps(nic_dev, qp_params); + return err; + } + + return 0; +} + +static void hinic3_free_channel_resources(struct net_device *netdev, + struct hinic3_dyna_qp_params *qp_params, + struct hinic3_dyna_txrxq_params *trxq_params) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + + hinic3_free_txrxq_resources(netdev, trxq_params); + hinic3_free_qps(nic_dev, qp_params); +} + +static int hinic3_open_channel(struct net_device *netdev) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + int err; + + err = hinic3_init_qp_ctxts(nic_dev); + if (err) { + netdev_err(netdev, "Failed to init qps\n"); + return err; + } + + err = hinic3_configure_txrxqs(netdev, &nic_dev->q_params); + if (err) { + netdev_err(netdev, "Failed to configure txrxqs\n"); + goto err_free_qp_ctxts; + } + + err = hinic3_qps_irq_init(netdev); + if (err) { + netdev_err(netdev, "Failed to init txrxq irq\n"); + goto err_free_qp_ctxts; + } + + err = hinic3_configure(netdev); + if (err) { + netdev_err(netdev, "Failed to init txrxq irq\n"); + goto err_uninit_qps_irq; + } + + return 0; + +err_uninit_qps_irq: + hinic3_qps_irq_uninit(netdev); +err_free_qp_ctxts: + hinic3_free_qp_ctxts(nic_dev); + + return err; +} + +static void hinic3_close_channel(struct net_device *netdev) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + + hinic3_remove_configure(netdev); + hinic3_qps_irq_uninit(netdev); + hinic3_free_qp_ctxts(nic_dev); +} + +static int hinic3_vport_up(struct net_device *netdev) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + bool link_status_up; + u16 glb_func_id; + int err; + + glb_func_id = hinic3_global_func_id(nic_dev->hwdev); + err = hinic3_set_vport_enable(nic_dev->hwdev, glb_func_id, true); + if (err) { + netdev_err(netdev, "Failed to enable vport\n"); + goto err_flush_qps_res; + } + + err = netif_set_real_num_queues(netdev, nic_dev->q_params.num_qps, + nic_dev->q_params.num_qps); + if (err) { + netdev_err(netdev, "Failed to set real number of queues\n"); + goto err_flush_qps_res; + } + netif_tx_start_all_queues(netdev); + + err = hinic3_get_link_status(nic_dev->hwdev, &link_status_up); + if (!err && link_status_up) + netif_carrier_on(netdev); + + return 0; + +err_flush_qps_res: + hinic3_flush_qps_res(nic_dev->hwdev); + /* wait to guarantee that no packets will be sent to host */ + msleep(100); + + return err; +} + +static void hinic3_vport_down(struct net_device *netdev) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + u16 glb_func_id; + + netif_carrier_off(netdev); + netif_tx_disable(netdev); + + glb_func_id = hinic3_global_func_id(nic_dev->hwdev); + hinic3_set_vport_enable(nic_dev->hwdev, glb_func_id, false); + + hinic3_flush_txqs(netdev); + /* wait to guarantee that no packets will be sent to host */ + msleep(100); + hinic3_flush_qps_res(nic_dev->hwdev); +} + static int hinic3_open(struct net_device *netdev) { - /* Completed by later submission due to LoC limit. */ - return -EFAULT; + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + struct hinic3_dyna_qp_params qp_params; + int err; + + err = hinic3_init_nicio_res(nic_dev); + if (err) { + netdev_err(netdev, "Failed to init nicio resources\n"); + return err; + } + + err = hinic3_setup_num_qps(netdev); + if (err) { + netdev_err(netdev, "Failed to setup num_qps\n"); + goto err_free_nicio_res; + } + + err = hinic3_alloc_channel_resources(netdev, &qp_params, + &nic_dev->q_params); + if (err) + goto err_destroy_num_qps; + + hinic3_init_qps(nic_dev, &qp_params); + + err = hinic3_open_channel(netdev); + if (err) + goto err_uninit_qps; + + err = hinic3_vport_up(netdev); + if (err) + goto err_close_channel; + + return 0; + +err_close_channel: + hinic3_close_channel(netdev); +err_uninit_qps: + hinic3_uninit_qps(nic_dev, &qp_params); + hinic3_free_channel_resources(netdev, &qp_params, &nic_dev->q_params); +err_destroy_num_qps: + hinic3_destroy_num_qps(netdev); +err_free_nicio_res: + hinic3_free_nicio_res(nic_dev); + + return err; } static int hinic3_close(struct net_device *netdev) { - /* Completed by later submission due to LoC limit. */ - return -EFAULT; + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + struct hinic3_dyna_qp_params qp_params; + + hinic3_vport_down(netdev); + hinic3_close_channel(netdev); + hinic3_uninit_qps(nic_dev, &qp_params); + hinic3_free_channel_resources(netdev, &qp_params, &nic_dev->q_params); + + return 0; } static int hinic3_change_mtu(struct net_device *netdev, int new_mtu) diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_nic_cfg.c b/drivers/net/ethernet/huawei/hinic3/hinic3_nic_cfg.c index 5b1a91a18c67..979f47ca77f9 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_nic_cfg.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_nic_cfg.c @@ -39,6 +39,12 @@ static int hinic3_feature_nego(struct hinic3_hwdev *hwdev, u8 opcode, return 0; } +int hinic3_get_nic_feature_from_hw(struct hinic3_nic_dev *nic_dev) +{ + return hinic3_feature_nego(nic_dev->hwdev, MGMT_MSG_CMD_OP_GET, + &nic_dev->nic_io->feature_cap, 1); +} + int hinic3_set_nic_feature_to_hw(struct hinic3_nic_dev *nic_dev) { return hinic3_feature_nego(nic_dev->hwdev, MGMT_MSG_CMD_OP_SET, @@ -82,6 +88,23 @@ static int hinic3_set_function_table(struct hinic3_hwdev *hwdev, u32 cfg_bitmap, return 0; } +int hinic3_init_function_table(struct hinic3_nic_dev *nic_dev) +{ + struct hinic3_nic_io *nic_io = nic_dev->nic_io; + struct l2nic_func_tbl_cfg func_tbl_cfg = {}; + u32 cfg_bitmap; + + func_tbl_cfg.mtu = 0x3FFF; /* default, max mtu */ + func_tbl_cfg.rx_wqe_buf_size = nic_io->rx_buf_len; + + cfg_bitmap = BIT(L2NIC_FUNC_TBL_CFG_INIT) | + BIT(L2NIC_FUNC_TBL_CFG_MTU) | + BIT(L2NIC_FUNC_TBL_CFG_RX_BUF_SIZE); + + return hinic3_set_function_table(nic_dev->hwdev, cfg_bitmap, + &func_tbl_cfg); +} + int hinic3_set_port_mtu(struct net_device *netdev, u16 new_mtu) { struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); @@ -89,6 +112,7 @@ int hinic3_set_port_mtu(struct net_device *netdev, u16 new_mtu) struct hinic3_hwdev *hwdev = nic_dev->hwdev; func_tbl_cfg.mtu = new_mtu; + return hinic3_set_function_table(hwdev, BIT(L2NIC_FUNC_TBL_CFG_MTU), &func_tbl_cfg); } @@ -206,6 +230,63 @@ int hinic3_update_mac(struct hinic3_hwdev *hwdev, const u8 *old_mac, err, mac_info.msg_head.status); return -EIO; } + + return 0; +} + +int hinic3_set_ci_table(struct hinic3_hwdev *hwdev, struct hinic3_sq_attr *attr) +{ + struct l2nic_cmd_set_ci_attr cons_idx_attr = {}; + struct mgmt_msg_params msg_params = {}; + int err; + + cons_idx_attr.func_idx = hinic3_global_func_id(hwdev); + cons_idx_attr.dma_attr_off = attr->dma_attr_off; + cons_idx_attr.pending_limit = attr->pending_limit; + cons_idx_attr.coalescing_time = attr->coalescing_time; + + if (attr->intr_en) { + cons_idx_attr.intr_en = attr->intr_en; + cons_idx_attr.intr_idx = attr->intr_idx; + } + + cons_idx_attr.l2nic_sqn = attr->l2nic_sqn; + cons_idx_attr.ci_addr = attr->ci_dma_base; + + mgmt_msg_params_init_default(&msg_params, &cons_idx_attr, + sizeof(cons_idx_attr)); + + err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_L2NIC, + L2NIC_CMD_SET_SQ_CI_ATTR, &msg_params); + if (err || cons_idx_attr.msg_head.status) { + dev_err(hwdev->dev, + "Failed to set ci attribute table, err: %d, status: 0x%x\n", + err, cons_idx_attr.msg_head.status); + return -EFAULT; + } + + return 0; +} + +int hinic3_flush_qps_res(struct hinic3_hwdev *hwdev) +{ + struct l2nic_cmd_clear_qp_resource sq_res = {}; + struct mgmt_msg_params msg_params = {}; + int err; + + sq_res.func_id = hinic3_global_func_id(hwdev); + + mgmt_msg_params_init_default(&msg_params, &sq_res, sizeof(sq_res)); + + err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_L2NIC, + L2NIC_CMD_CLEAR_QP_RESOURCE, + &msg_params); + if (err || sq_res.msg_head.status) { + dev_err(hwdev->dev, "Failed to clear sq resources, err: %d, status: 0x%x\n", + err, sq_res.msg_head.status); + return -EINVAL; + } + return 0; } @@ -231,3 +312,74 @@ int hinic3_force_drop_tx_pkt(struct hinic3_hwdev *hwdev) return pkt_drop.msg_head.status; } + +int hinic3_sync_dcb_state(struct hinic3_hwdev *hwdev, u8 op_code, u8 state) +{ + struct l2nic_cmd_set_dcb_state dcb_state = {}; + struct mgmt_msg_params msg_params = {}; + int err; + + dcb_state.op_code = op_code; + dcb_state.state = state; + dcb_state.func_id = hinic3_global_func_id(hwdev); + + mgmt_msg_params_init_default(&msg_params, &dcb_state, + sizeof(dcb_state)); + + err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_L2NIC, + L2NIC_CMD_QOS_DCB_STATE, &msg_params); + if (err || dcb_state.head.status) { + dev_err(hwdev->dev, + "Failed to set dcb state, err: %d, status: 0x%x\n", + err, dcb_state.head.status); + return -EFAULT; + } + + return 0; +} + +int hinic3_get_link_status(struct hinic3_hwdev *hwdev, bool *link_status_up) +{ + struct mag_cmd_get_link_status get_link = {}; + struct mgmt_msg_params msg_params = {}; + int err; + + get_link.port_id = hinic3_physical_port_id(hwdev); + + mgmt_msg_params_init_default(&msg_params, &get_link, sizeof(get_link)); + + err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_HILINK, + MAG_CMD_GET_LINK_STATUS, &msg_params); + if (err || get_link.head.status) { + dev_err(hwdev->dev, "Failed to get link state, err: %d, status: 0x%x\n", + err, get_link.head.status); + return -EIO; + } + + *link_status_up = !!get_link.status; + + return 0; +} + +int hinic3_set_vport_enable(struct hinic3_hwdev *hwdev, u16 func_id, + bool enable) +{ + struct l2nic_cmd_set_vport_state en_state = {}; + struct mgmt_msg_params msg_params = {}; + int err; + + en_state.func_id = func_id; + en_state.state = enable ? 1 : 0; + + mgmt_msg_params_init_default(&msg_params, &en_state, sizeof(en_state)); + + err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_L2NIC, + L2NIC_CMD_SET_VPORT_ENABLE, &msg_params); + if (err || en_state.msg_head.status) { + dev_err(hwdev->dev, "Failed to set vport state, err: %d, status: 0x%x\n", + err, en_state.msg_head.status); + return -EINVAL; + } + + return 0; +} diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_nic_cfg.h b/drivers/net/ethernet/huawei/hinic3/hinic3_nic_cfg.h index bf9ce51dc401..b83b567fa542 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_nic_cfg.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_nic_cfg.h @@ -22,11 +22,23 @@ enum hinic3_nic_event_type { HINIC3_NIC_EVENT_LINK_UP = 1, }; +struct hinic3_sq_attr { + u8 dma_attr_off; + u8 pending_limit; + u8 coalescing_time; + u8 intr_en; + u16 intr_idx; + u32 l2nic_sqn; + u64 ci_dma_base; +}; + +int hinic3_get_nic_feature_from_hw(struct hinic3_nic_dev *nic_dev); int hinic3_set_nic_feature_to_hw(struct hinic3_nic_dev *nic_dev); bool hinic3_test_support(struct hinic3_nic_dev *nic_dev, enum hinic3_nic_feature_cap feature_bits); void hinic3_update_nic_feature(struct hinic3_nic_dev *nic_dev, u64 feature_cap); +int hinic3_init_function_table(struct hinic3_nic_dev *nic_dev); int hinic3_set_port_mtu(struct net_device *netdev, u16 new_mtu); int hinic3_set_mac(struct hinic3_hwdev *hwdev, const u8 *mac_addr, u16 vlan_id, @@ -36,6 +48,14 @@ int hinic3_del_mac(struct hinic3_hwdev *hwdev, const u8 *mac_addr, u16 vlan_id, int hinic3_update_mac(struct hinic3_hwdev *hwdev, const u8 *old_mac, u8 *new_mac, u16 vlan_id, u16 func_id); +int hinic3_set_ci_table(struct hinic3_hwdev *hwdev, + struct hinic3_sq_attr *attr); +int hinic3_flush_qps_res(struct hinic3_hwdev *hwdev); int hinic3_force_drop_tx_pkt(struct hinic3_hwdev *hwdev); +int hinic3_sync_dcb_state(struct hinic3_hwdev *hwdev, u8 op_code, u8 state); +int hinic3_get_link_status(struct hinic3_hwdev *hwdev, bool *link_status_up); +int hinic3_set_vport_enable(struct hinic3_hwdev *hwdev, u16 func_id, + bool enable); + #endif diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_nic_dev.h b/drivers/net/ethernet/huawei/hinic3/hinic3_nic_dev.h index c994fc9b6ee0..5ba83261616c 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_nic_dev.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_nic_dev.h @@ -51,6 +51,12 @@ struct hinic3_dyna_txrxq_params { struct hinic3_irq_cfg *irq_cfg; }; +struct hinic3_intr_coal_info { + u8 pending_limit; + u8 coalesce_timer_cfg; + u8 resend_timer_cfg; +}; + struct hinic3_nic_dev { struct pci_dev *pdev; struct net_device *netdev; @@ -67,16 +73,21 @@ struct hinic3_nic_dev { struct hinic3_txq *txqs; struct hinic3_rxq *rxqs; + enum hinic3_rss_hash_type rss_hash_type; + struct hinic3_rss_type rss_type; + u8 *rss_hkey; + u16 *rss_indir; + u16 num_qp_irq; struct msix_entry *qps_msix_entries; + struct hinic3_intr_coal_info *intr_coalesce; + bool link_status_up; }; void hinic3_set_netdev_ops(struct net_device *netdev); - -/* Temporary prototypes. Functions become static in later submission. */ -void qp_add_napi(struct hinic3_irq_cfg *irq_cfg); -void qp_del_napi(struct hinic3_irq_cfg *irq_cfg); +int hinic3_qps_irq_init(struct net_device *netdev); +void hinic3_qps_irq_uninit(struct net_device *netdev); #endif diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_nic_io.c b/drivers/net/ethernet/huawei/hinic3/hinic3_nic_io.c index 34a1f5bd5ac1..d86cd1ba4605 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_nic_io.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_nic_io.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 // Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. +#include "hinic3_cmdq.h" #include "hinic3_hw_comm.h" #include "hinic3_hw_intf.h" #include "hinic3_hwdev.h" @@ -9,13 +10,876 @@ #include "hinic3_nic_dev.h" #include "hinic3_nic_io.h" +#define HINIC3_DEFAULT_TX_CI_PENDING_LIMIT 1 +#define HINIC3_DEFAULT_TX_CI_COALESCING_TIME 1 +#define HINIC3_DEFAULT_DROP_THD_ON (0xFFFF) +#define HINIC3_DEFAULT_DROP_THD_OFF 0 + +#define HINIC3_CI_Q_ADDR_SIZE (64) + +#define HINIC3_CI_TABLE_SIZE(num_qps) \ + (ALIGN((num_qps) * HINIC3_CI_Q_ADDR_SIZE, HINIC3_MIN_PAGE_SIZE)) + +#define HINIC3_CI_VADDR(base_addr, q_id) \ + ((u8 *)(base_addr) + (q_id) * HINIC3_CI_Q_ADDR_SIZE) + +#define HINIC3_CI_PADDR(base_paddr, q_id) \ + ((base_paddr) + (q_id) * HINIC3_CI_Q_ADDR_SIZE) + +#define SQ_WQ_PREFETCH_MAX 1 +#define SQ_WQ_PREFETCH_MIN 1 +#define SQ_WQ_PREFETCH_THRESHOLD 16 + +#define RQ_WQ_PREFETCH_MAX 4 +#define RQ_WQ_PREFETCH_MIN 1 +#define RQ_WQ_PREFETCH_THRESHOLD 256 + +/* (2048 - 8) / 64 */ +#define HINIC3_Q_CTXT_MAX 31 + +enum hinic3_qp_ctxt_type { + HINIC3_QP_CTXT_TYPE_SQ = 0, + HINIC3_QP_CTXT_TYPE_RQ = 1, +}; + +struct hinic3_qp_ctxt_hdr { + __le16 num_queues; + __le16 queue_type; + __le16 start_qid; + __le16 rsvd; +}; + +struct hinic3_sq_ctxt { + __le32 ci_pi; + __le32 drop_mode_sp; + __le32 wq_pfn_hi_owner; + __le32 wq_pfn_lo; + + __le32 rsvd0; + __le32 pkt_drop_thd; + __le32 global_sq_id; + __le32 vlan_ceq_attr; + + __le32 pref_cache; + __le32 pref_ci_owner; + __le32 pref_wq_pfn_hi_ci; + __le32 pref_wq_pfn_lo; + + __le32 rsvd8; + __le32 rsvd9; + __le32 wq_block_pfn_hi; + __le32 wq_block_pfn_lo; +}; + +struct hinic3_rq_ctxt { + __le32 ci_pi; + __le32 ceq_attr; + __le32 wq_pfn_hi_type_owner; + __le32 wq_pfn_lo; + + __le32 rsvd[3]; + __le32 cqe_sge_len; + + __le32 pref_cache; + __le32 pref_ci_owner; + __le32 pref_wq_pfn_hi_ci; + __le32 pref_wq_pfn_lo; + + __le32 pi_paddr_hi; + __le32 pi_paddr_lo; + __le32 wq_block_pfn_hi; + __le32 wq_block_pfn_lo; +}; + +struct hinic3_sq_ctxt_block { + struct hinic3_qp_ctxt_hdr cmdq_hdr; + struct hinic3_sq_ctxt sq_ctxt[HINIC3_Q_CTXT_MAX]; +}; + +struct hinic3_rq_ctxt_block { + struct hinic3_qp_ctxt_hdr cmdq_hdr; + struct hinic3_rq_ctxt rq_ctxt[HINIC3_Q_CTXT_MAX]; +}; + +struct hinic3_clean_queue_ctxt { + struct hinic3_qp_ctxt_hdr cmdq_hdr; + __le32 rsvd; +}; + +#define SQ_CTXT_SIZE(num_sqs) \ + (sizeof(struct hinic3_qp_ctxt_hdr) + \ + (num_sqs) * sizeof(struct hinic3_sq_ctxt)) + +#define RQ_CTXT_SIZE(num_rqs) \ + (sizeof(struct hinic3_qp_ctxt_hdr) + \ + (num_rqs) * sizeof(struct hinic3_rq_ctxt)) + +#define SQ_CTXT_PREF_CI_HI_SHIFT 12 +#define SQ_CTXT_PREF_CI_HI(val) ((val) >> SQ_CTXT_PREF_CI_HI_SHIFT) + +#define SQ_CTXT_PI_IDX_MASK GENMASK(15, 0) +#define SQ_CTXT_CI_IDX_MASK GENMASK(31, 16) +#define SQ_CTXT_CI_PI_SET(val, member) \ + FIELD_PREP(SQ_CTXT_##member##_MASK, val) + +#define SQ_CTXT_MODE_SP_FLAG_MASK BIT(0) +#define SQ_CTXT_MODE_PKT_DROP_MASK BIT(1) +#define SQ_CTXT_MODE_SET(val, member) \ + FIELD_PREP(SQ_CTXT_MODE_##member##_MASK, val) + +#define SQ_CTXT_WQ_PAGE_HI_PFN_MASK GENMASK(19, 0) +#define SQ_CTXT_WQ_PAGE_OWNER_MASK BIT(23) +#define SQ_CTXT_WQ_PAGE_SET(val, member) \ + FIELD_PREP(SQ_CTXT_WQ_PAGE_##member##_MASK, val) + +#define SQ_CTXT_PKT_DROP_THD_ON_MASK GENMASK(15, 0) +#define SQ_CTXT_PKT_DROP_THD_OFF_MASK GENMASK(31, 16) +#define SQ_CTXT_PKT_DROP_THD_SET(val, member) \ + FIELD_PREP(SQ_CTXT_PKT_DROP_##member##_MASK, val) + +#define SQ_CTXT_GLOBAL_SQ_ID_MASK GENMASK(12, 0) +#define SQ_CTXT_GLOBAL_QUEUE_ID_SET(val, member) \ + FIELD_PREP(SQ_CTXT_##member##_MASK, val) + +#define SQ_CTXT_VLAN_INSERT_MODE_MASK GENMASK(20, 19) +#define SQ_CTXT_VLAN_CEQ_EN_MASK BIT(23) +#define SQ_CTXT_VLAN_CEQ_SET(val, member) \ + FIELD_PREP(SQ_CTXT_VLAN_##member##_MASK, val) + +#define SQ_CTXT_PREF_CACHE_THRESHOLD_MASK GENMASK(13, 0) +#define SQ_CTXT_PREF_CACHE_MAX_MASK GENMASK(24, 14) +#define SQ_CTXT_PREF_CACHE_MIN_MASK GENMASK(31, 25) + +#define SQ_CTXT_PREF_CI_HI_MASK GENMASK(3, 0) +#define SQ_CTXT_PREF_OWNER_MASK BIT(4) + +#define SQ_CTXT_PREF_WQ_PFN_HI_MASK GENMASK(19, 0) +#define SQ_CTXT_PREF_CI_LOW_MASK GENMASK(31, 20) +#define SQ_CTXT_PREF_SET(val, member) \ + FIELD_PREP(SQ_CTXT_PREF_##member##_MASK, val) + +#define SQ_CTXT_WQ_BLOCK_PFN_HI_MASK GENMASK(22, 0) +#define SQ_CTXT_WQ_BLOCK_SET(val, member) \ + FIELD_PREP(SQ_CTXT_WQ_BLOCK_##member##_MASK, val) + +#define RQ_CTXT_PI_IDX_MASK GENMASK(15, 0) +#define RQ_CTXT_CI_IDX_MASK GENMASK(31, 16) +#define RQ_CTXT_CI_PI_SET(val, member) \ + FIELD_PREP(RQ_CTXT_##member##_MASK, val) + +#define RQ_CTXT_CEQ_ATTR_INTR_MASK GENMASK(30, 21) +#define RQ_CTXT_CEQ_ATTR_EN_MASK BIT(31) +#define RQ_CTXT_CEQ_ATTR_SET(val, member) \ + FIELD_PREP(RQ_CTXT_CEQ_ATTR_##member##_MASK, val) + +#define RQ_CTXT_WQ_PAGE_HI_PFN_MASK GENMASK(19, 0) +#define RQ_CTXT_WQ_PAGE_WQE_TYPE_MASK GENMASK(29, 28) +#define RQ_CTXT_WQ_PAGE_OWNER_MASK BIT(31) +#define RQ_CTXT_WQ_PAGE_SET(val, member) \ + FIELD_PREP(RQ_CTXT_WQ_PAGE_##member##_MASK, val) + +#define RQ_CTXT_CQE_LEN_MASK GENMASK(29, 28) +#define RQ_CTXT_CQE_LEN_SET(val, member) \ + FIELD_PREP(RQ_CTXT_##member##_MASK, val) + +#define RQ_CTXT_PREF_CACHE_THRESHOLD_MASK GENMASK(13, 0) +#define RQ_CTXT_PREF_CACHE_MAX_MASK GENMASK(24, 14) +#define RQ_CTXT_PREF_CACHE_MIN_MASK GENMASK(31, 25) + +#define RQ_CTXT_PREF_CI_HI_MASK GENMASK(3, 0) +#define RQ_CTXT_PREF_OWNER_MASK BIT(4) + +#define RQ_CTXT_PREF_WQ_PFN_HI_MASK GENMASK(19, 0) +#define RQ_CTXT_PREF_CI_LOW_MASK GENMASK(31, 20) +#define RQ_CTXT_PREF_SET(val, member) \ + FIELD_PREP(RQ_CTXT_PREF_##member##_MASK, val) + +#define RQ_CTXT_WQ_BLOCK_PFN_HI_MASK GENMASK(22, 0) +#define RQ_CTXT_WQ_BLOCK_SET(val, member) \ + FIELD_PREP(RQ_CTXT_WQ_BLOCK_##member##_MASK, val) + +#define WQ_PAGE_PFN_SHIFT 12 +#define WQ_BLOCK_PFN_SHIFT 9 +#define WQ_PAGE_PFN(page_addr) ((page_addr) >> WQ_PAGE_PFN_SHIFT) +#define WQ_BLOCK_PFN(page_addr) ((page_addr) >> WQ_BLOCK_PFN_SHIFT) + int hinic3_init_nic_io(struct hinic3_nic_dev *nic_dev) { - /* Completed by later submission due to LoC limit. */ - return -EFAULT; + struct hinic3_hwdev *hwdev = nic_dev->hwdev; + struct hinic3_nic_io *nic_io; + int err; + + nic_io = kzalloc(sizeof(*nic_io), GFP_KERNEL); + if (!nic_io) + return -ENOMEM; + + nic_dev->nic_io = nic_io; + + err = hinic3_set_func_svc_used_state(hwdev, COMM_FUNC_SVC_T_NIC, 1); + if (err) { + dev_err(hwdev->dev, "Failed to set function svc used state\n"); + goto err_free_nicio; + } + + err = hinic3_init_function_table(nic_dev); + if (err) { + dev_err(hwdev->dev, "Failed to init function table\n"); + goto err_clear_func_svc_used_state; + } + + nic_io->rx_buf_len = nic_dev->rx_buf_len; + + err = hinic3_get_nic_feature_from_hw(nic_dev); + if (err) { + dev_err(hwdev->dev, "Failed to get nic features\n"); + goto err_clear_func_svc_used_state; + } + + nic_io->feature_cap &= HINIC3_NIC_F_ALL_MASK; + nic_io->feature_cap &= HINIC3_NIC_DRV_DEFAULT_FEATURE; + dev_dbg(hwdev->dev, "nic features: 0x%llx\n\n", nic_io->feature_cap); + + return 0; + +err_clear_func_svc_used_state: + hinic3_set_func_svc_used_state(hwdev, COMM_FUNC_SVC_T_NIC, 0); +err_free_nicio: + nic_dev->nic_io = NULL; + kfree(nic_io); + + return err; } void hinic3_free_nic_io(struct hinic3_nic_dev *nic_dev) { - /* Completed by later submission due to LoC limit. */ + struct hinic3_nic_io *nic_io = nic_dev->nic_io; + + hinic3_set_func_svc_used_state(nic_dev->hwdev, COMM_FUNC_SVC_T_NIC, 0); + nic_dev->nic_io = NULL; + kfree(nic_io); +} + +int hinic3_init_nicio_res(struct hinic3_nic_dev *nic_dev) +{ + struct hinic3_nic_io *nic_io = nic_dev->nic_io; + struct hinic3_hwdev *hwdev = nic_dev->hwdev; + void __iomem *db_base; + int err; + + nic_io->max_qps = hinic3_func_max_qnum(hwdev); + + err = hinic3_alloc_db_addr(hwdev, &db_base, NULL); + if (err) { + dev_err(hwdev->dev, "Failed to allocate doorbell for sqs\n"); + return err; + } + nic_io->sqs_db_addr = db_base; + + err = hinic3_alloc_db_addr(hwdev, &db_base, NULL); + if (err) { + hinic3_free_db_addr(hwdev, nic_io->sqs_db_addr); + dev_err(hwdev->dev, "Failed to allocate doorbell for rqs\n"); + return err; + } + nic_io->rqs_db_addr = db_base; + + nic_io->ci_vaddr_base = + dma_alloc_coherent(hwdev->dev, + HINIC3_CI_TABLE_SIZE(nic_io->max_qps), + &nic_io->ci_dma_base, + GFP_KERNEL); + if (!nic_io->ci_vaddr_base) { + hinic3_free_db_addr(hwdev, nic_io->sqs_db_addr); + hinic3_free_db_addr(hwdev, nic_io->rqs_db_addr); + return -ENOMEM; + } + + return 0; +} + +void hinic3_free_nicio_res(struct hinic3_nic_dev *nic_dev) +{ + struct hinic3_nic_io *nic_io = nic_dev->nic_io; + struct hinic3_hwdev *hwdev = nic_dev->hwdev; + + dma_free_coherent(hwdev->dev, + HINIC3_CI_TABLE_SIZE(nic_io->max_qps), + nic_io->ci_vaddr_base, nic_io->ci_dma_base); + + hinic3_free_db_addr(hwdev, nic_io->sqs_db_addr); + hinic3_free_db_addr(hwdev, nic_io->rqs_db_addr); +} + +static int hinic3_create_sq(struct hinic3_hwdev *hwdev, + struct hinic3_io_queue *sq, + u16 q_id, u32 sq_depth, u16 sq_msix_idx) +{ + int err; + + /* sq used & hardware request init 1 */ + sq->owner = 1; + + sq->q_id = q_id; + sq->msix_entry_idx = sq_msix_idx; + + err = hinic3_wq_create(hwdev, &sq->wq, sq_depth, + BIT(HINIC3_SQ_WQEBB_SHIFT)); + if (err) { + dev_err(hwdev->dev, "Failed to create tx queue %u wq\n", + q_id); + return err; + } + + return 0; +} + +static int hinic3_create_rq(struct hinic3_hwdev *hwdev, + struct hinic3_io_queue *rq, + u16 q_id, u32 rq_depth, u16 rq_msix_idx) +{ + int err; + + rq->q_id = q_id; + rq->msix_entry_idx = rq_msix_idx; + + err = hinic3_wq_create(hwdev, &rq->wq, rq_depth, + BIT(HINIC3_RQ_WQEBB_SHIFT + + HINIC3_NORMAL_RQ_WQE)); + if (err) { + dev_err(hwdev->dev, "Failed to create rx queue %u wq\n", + q_id); + return err; + } + + return 0; +} + +static int hinic3_create_qp(struct hinic3_hwdev *hwdev, + struct hinic3_io_queue *sq, + struct hinic3_io_queue *rq, u16 q_id, u32 sq_depth, + u32 rq_depth, u16 qp_msix_idx) +{ + int err; + + err = hinic3_create_sq(hwdev, sq, q_id, sq_depth, qp_msix_idx); + if (err) { + dev_err(hwdev->dev, "Failed to create sq, qid: %u\n", + q_id); + return err; + } + + err = hinic3_create_rq(hwdev, rq, q_id, rq_depth, qp_msix_idx); + if (err) { + dev_err(hwdev->dev, "Failed to create rq, qid: %u\n", + q_id); + goto err_destroy_sq_wq; + } + + return 0; + +err_destroy_sq_wq: + hinic3_wq_destroy(hwdev, &sq->wq); + + return err; +} + +static void hinic3_destroy_qp(struct hinic3_hwdev *hwdev, + struct hinic3_io_queue *sq, + struct hinic3_io_queue *rq) +{ + hinic3_wq_destroy(hwdev, &sq->wq); + hinic3_wq_destroy(hwdev, &rq->wq); +} + +int hinic3_alloc_qps(struct hinic3_nic_dev *nic_dev, + struct hinic3_dyna_qp_params *qp_params) +{ + struct msix_entry *qps_msix_entries = nic_dev->qps_msix_entries; + struct hinic3_nic_io *nic_io = nic_dev->nic_io; + struct hinic3_hwdev *hwdev = nic_dev->hwdev; + struct hinic3_io_queue *sqs; + struct hinic3_io_queue *rqs; + u16 q_id; + int err; + + if (qp_params->num_qps > nic_io->max_qps || !qp_params->num_qps) + return -EINVAL; + + sqs = kcalloc(qp_params->num_qps, sizeof(*sqs), GFP_KERNEL); + if (!sqs) { + err = -ENOMEM; + goto err_out; + } + + rqs = kcalloc(qp_params->num_qps, sizeof(*rqs), GFP_KERNEL); + if (!rqs) { + err = -ENOMEM; + goto err_free_sqs; + } + + for (q_id = 0; q_id < qp_params->num_qps; q_id++) { + err = hinic3_create_qp(hwdev, &sqs[q_id], &rqs[q_id], q_id, + qp_params->sq_depth, qp_params->rq_depth, + qps_msix_entries[q_id].entry); + if (err) { + dev_err(hwdev->dev, "Failed to allocate qp %u, err: %d\n", + q_id, err); + goto err_destroy_qp; + } + } + + qp_params->sqs = sqs; + qp_params->rqs = rqs; + + return 0; + +err_destroy_qp: + while (q_id > 0) { + q_id--; + hinic3_destroy_qp(hwdev, &sqs[q_id], &rqs[q_id]); + } + kfree(rqs); +err_free_sqs: + kfree(sqs); +err_out: + return err; +} + +void hinic3_free_qps(struct hinic3_nic_dev *nic_dev, + struct hinic3_dyna_qp_params *qp_params) +{ + struct hinic3_hwdev *hwdev = nic_dev->hwdev; + u16 q_id; + + for (q_id = 0; q_id < qp_params->num_qps; q_id++) + hinic3_destroy_qp(hwdev, &qp_params->sqs[q_id], + &qp_params->rqs[q_id]); + + kfree(qp_params->sqs); + kfree(qp_params->rqs); +} + +void hinic3_init_qps(struct hinic3_nic_dev *nic_dev, + struct hinic3_dyna_qp_params *qp_params) +{ + struct hinic3_nic_io *nic_io = nic_dev->nic_io; + struct hinic3_io_queue *sqs = qp_params->sqs; + struct hinic3_io_queue *rqs = qp_params->rqs; + u16 q_id; + + nic_io->num_qps = qp_params->num_qps; + nic_io->sq = qp_params->sqs; + nic_io->rq = qp_params->rqs; + for (q_id = 0; q_id < nic_io->num_qps; q_id++) { + sqs[q_id].cons_idx_addr = + (u16 *)HINIC3_CI_VADDR(nic_io->ci_vaddr_base, q_id); + /* clear ci value */ + WRITE_ONCE(*sqs[q_id].cons_idx_addr, 0); + + sqs[q_id].db_addr = nic_io->sqs_db_addr; + rqs[q_id].db_addr = nic_io->rqs_db_addr; + } +} + +void hinic3_uninit_qps(struct hinic3_nic_dev *nic_dev, + struct hinic3_dyna_qp_params *qp_params) +{ + struct hinic3_nic_io *nic_io = nic_dev->nic_io; + + qp_params->sqs = nic_io->sq; + qp_params->rqs = nic_io->rq; + qp_params->num_qps = nic_io->num_qps; +} + +static void hinic3_qp_prepare_cmdq_header(struct hinic3_qp_ctxt_hdr *qp_ctxt_hdr, + enum hinic3_qp_ctxt_type ctxt_type, + u16 num_queues, u16 q_id) +{ + qp_ctxt_hdr->queue_type = cpu_to_le16(ctxt_type); + qp_ctxt_hdr->num_queues = cpu_to_le16(num_queues); + qp_ctxt_hdr->start_qid = cpu_to_le16(q_id); + qp_ctxt_hdr->rsvd = 0; +} + +static void hinic3_sq_prepare_ctxt(struct hinic3_io_queue *sq, u16 sq_id, + struct hinic3_sq_ctxt *sq_ctxt) +{ + u64 wq_page_addr, wq_page_pfn, wq_block_pfn; + u32 wq_block_pfn_hi, wq_block_pfn_lo; + u32 wq_page_pfn_hi, wq_page_pfn_lo; + u16 pi_start, ci_start; + + ci_start = hinic3_get_sq_local_ci(sq); + pi_start = hinic3_get_sq_local_pi(sq); + + wq_page_addr = hinic3_wq_get_first_wqe_page_addr(&sq->wq); + + wq_page_pfn = WQ_PAGE_PFN(wq_page_addr); + wq_page_pfn_hi = upper_32_bits(wq_page_pfn); + wq_page_pfn_lo = lower_32_bits(wq_page_pfn); + + wq_block_pfn = WQ_BLOCK_PFN(sq->wq.wq_block_paddr); + wq_block_pfn_hi = upper_32_bits(wq_block_pfn); + wq_block_pfn_lo = lower_32_bits(wq_block_pfn); + + sq_ctxt->ci_pi = + cpu_to_le32(SQ_CTXT_CI_PI_SET(ci_start, CI_IDX) | + SQ_CTXT_CI_PI_SET(pi_start, PI_IDX)); + + sq_ctxt->drop_mode_sp = + cpu_to_le32(SQ_CTXT_MODE_SET(0, SP_FLAG) | + SQ_CTXT_MODE_SET(0, PKT_DROP)); + + sq_ctxt->wq_pfn_hi_owner = + cpu_to_le32(SQ_CTXT_WQ_PAGE_SET(wq_page_pfn_hi, HI_PFN) | + SQ_CTXT_WQ_PAGE_SET(1, OWNER)); + + sq_ctxt->wq_pfn_lo = cpu_to_le32(wq_page_pfn_lo); + + sq_ctxt->pkt_drop_thd = + cpu_to_le32(SQ_CTXT_PKT_DROP_THD_SET(HINIC3_DEFAULT_DROP_THD_ON, THD_ON) | + SQ_CTXT_PKT_DROP_THD_SET(HINIC3_DEFAULT_DROP_THD_OFF, THD_OFF)); + + sq_ctxt->global_sq_id = + cpu_to_le32(SQ_CTXT_GLOBAL_QUEUE_ID_SET((u32)sq_id, + GLOBAL_SQ_ID)); + + /* enable insert c-vlan by default */ + sq_ctxt->vlan_ceq_attr = + cpu_to_le32(SQ_CTXT_VLAN_CEQ_SET(0, CEQ_EN) | + SQ_CTXT_VLAN_CEQ_SET(1, INSERT_MODE)); + + sq_ctxt->rsvd0 = 0; + + sq_ctxt->pref_cache = + cpu_to_le32(SQ_CTXT_PREF_SET(SQ_WQ_PREFETCH_MIN, CACHE_MIN) | + SQ_CTXT_PREF_SET(SQ_WQ_PREFETCH_MAX, CACHE_MAX) | + SQ_CTXT_PREF_SET(SQ_WQ_PREFETCH_THRESHOLD, CACHE_THRESHOLD)); + + sq_ctxt->pref_ci_owner = + cpu_to_le32(SQ_CTXT_PREF_SET(SQ_CTXT_PREF_CI_HI(ci_start), CI_HI) | + SQ_CTXT_PREF_SET(1, OWNER)); + + sq_ctxt->pref_wq_pfn_hi_ci = + cpu_to_le32(SQ_CTXT_PREF_SET(ci_start, CI_LOW) | + SQ_CTXT_PREF_SET(wq_page_pfn_hi, WQ_PFN_HI)); + + sq_ctxt->pref_wq_pfn_lo = cpu_to_le32(wq_page_pfn_lo); + + sq_ctxt->wq_block_pfn_hi = + cpu_to_le32(SQ_CTXT_WQ_BLOCK_SET(wq_block_pfn_hi, PFN_HI)); + + sq_ctxt->wq_block_pfn_lo = cpu_to_le32(wq_block_pfn_lo); +} + +static void hinic3_rq_prepare_ctxt_get_wq_info(struct hinic3_io_queue *rq, + u32 *wq_page_pfn_hi, + u32 *wq_page_pfn_lo, + u32 *wq_block_pfn_hi, + u32 *wq_block_pfn_lo) +{ + u64 wq_page_addr, wq_page_pfn, wq_block_pfn; + + wq_page_addr = hinic3_wq_get_first_wqe_page_addr(&rq->wq); + + wq_page_pfn = WQ_PAGE_PFN(wq_page_addr); + *wq_page_pfn_hi = upper_32_bits(wq_page_pfn); + *wq_page_pfn_lo = lower_32_bits(wq_page_pfn); + + wq_block_pfn = WQ_BLOCK_PFN(rq->wq.wq_block_paddr); + *wq_block_pfn_hi = upper_32_bits(wq_block_pfn); + *wq_block_pfn_lo = lower_32_bits(wq_block_pfn); +} + +static void hinic3_rq_prepare_ctxt(struct hinic3_io_queue *rq, + struct hinic3_rq_ctxt *rq_ctxt) +{ + u32 wq_block_pfn_hi, wq_block_pfn_lo; + u32 wq_page_pfn_hi, wq_page_pfn_lo; + u16 pi_start, ci_start; + + ci_start = (rq->wq.cons_idx & rq->wq.idx_mask) << HINIC3_NORMAL_RQ_WQE; + pi_start = (rq->wq.prod_idx & rq->wq.idx_mask) << HINIC3_NORMAL_RQ_WQE; + + hinic3_rq_prepare_ctxt_get_wq_info(rq, &wq_page_pfn_hi, &wq_page_pfn_lo, + &wq_block_pfn_hi, &wq_block_pfn_lo); + + rq_ctxt->ci_pi = + cpu_to_le32(RQ_CTXT_CI_PI_SET(ci_start, CI_IDX) | + RQ_CTXT_CI_PI_SET(pi_start, PI_IDX)); + + rq_ctxt->ceq_attr = + cpu_to_le32(RQ_CTXT_CEQ_ATTR_SET(0, EN) | + RQ_CTXT_CEQ_ATTR_SET(rq->msix_entry_idx, INTR)); + + rq_ctxt->wq_pfn_hi_type_owner = + cpu_to_le32(RQ_CTXT_WQ_PAGE_SET(wq_page_pfn_hi, HI_PFN) | + RQ_CTXT_WQ_PAGE_SET(1, OWNER)); + + /* use 16Byte WQE */ + rq_ctxt->wq_pfn_hi_type_owner |= + cpu_to_le32(RQ_CTXT_WQ_PAGE_SET(2, WQE_TYPE)); + rq_ctxt->cqe_sge_len = cpu_to_le32(RQ_CTXT_CQE_LEN_SET(1, CQE_LEN)); + + rq_ctxt->wq_pfn_lo = cpu_to_le32(wq_page_pfn_lo); + + rq_ctxt->pref_cache = + cpu_to_le32(RQ_CTXT_PREF_SET(RQ_WQ_PREFETCH_MIN, CACHE_MIN) | + RQ_CTXT_PREF_SET(RQ_WQ_PREFETCH_MAX, CACHE_MAX) | + RQ_CTXT_PREF_SET(RQ_WQ_PREFETCH_THRESHOLD, CACHE_THRESHOLD)); + + rq_ctxt->pref_ci_owner = + cpu_to_le32(RQ_CTXT_PREF_SET(SQ_CTXT_PREF_CI_HI(ci_start), CI_HI) | + RQ_CTXT_PREF_SET(1, OWNER)); + + rq_ctxt->pref_wq_pfn_hi_ci = + cpu_to_le32(RQ_CTXT_PREF_SET(wq_page_pfn_hi, WQ_PFN_HI) | + RQ_CTXT_PREF_SET(ci_start, CI_LOW)); + + rq_ctxt->pref_wq_pfn_lo = cpu_to_le32(wq_page_pfn_lo); + + rq_ctxt->wq_block_pfn_hi = + cpu_to_le32(RQ_CTXT_WQ_BLOCK_SET(wq_block_pfn_hi, PFN_HI)); + + rq_ctxt->wq_block_pfn_lo = cpu_to_le32(wq_block_pfn_lo); +} + +static int init_sq_ctxts(struct hinic3_nic_dev *nic_dev) +{ + struct hinic3_nic_io *nic_io = nic_dev->nic_io; + struct hinic3_hwdev *hwdev = nic_dev->hwdev; + struct hinic3_sq_ctxt_block *sq_ctxt_block; + u16 q_id, curr_id, max_ctxts, i; + struct hinic3_sq_ctxt *sq_ctxt; + struct hinic3_cmd_buf *cmd_buf; + struct hinic3_io_queue *sq; + __le64 out_param; + int err = 0; + + cmd_buf = hinic3_alloc_cmd_buf(hwdev); + if (!cmd_buf) { + dev_err(hwdev->dev, "Failed to allocate cmd buf\n"); + return -ENOMEM; + } + + q_id = 0; + while (q_id < nic_io->num_qps) { + sq_ctxt_block = cmd_buf->buf; + sq_ctxt = sq_ctxt_block->sq_ctxt; + + max_ctxts = (nic_io->num_qps - q_id) > HINIC3_Q_CTXT_MAX ? + HINIC3_Q_CTXT_MAX : (nic_io->num_qps - q_id); + + hinic3_qp_prepare_cmdq_header(&sq_ctxt_block->cmdq_hdr, + HINIC3_QP_CTXT_TYPE_SQ, max_ctxts, + q_id); + + for (i = 0; i < max_ctxts; i++) { + curr_id = q_id + i; + sq = &nic_io->sq[curr_id]; + hinic3_sq_prepare_ctxt(sq, curr_id, &sq_ctxt[i]); + } + + hinic3_cmdq_buf_swab32(sq_ctxt_block, sizeof(*sq_ctxt_block)); + + cmd_buf->size = cpu_to_le16(SQ_CTXT_SIZE(max_ctxts)); + err = hinic3_cmdq_direct_resp(hwdev, MGMT_MOD_L2NIC, + L2NIC_UCODE_CMD_MODIFY_QUEUE_CTX, + cmd_buf, &out_param); + if (err || out_param) { + dev_err(hwdev->dev, "Failed to set SQ ctxts, err: %d, out_param: 0x%llx\n", + err, out_param); + err = -EFAULT; + break; + } + + q_id += max_ctxts; + } + + hinic3_free_cmd_buf(hwdev, cmd_buf); + + return err; +} + +static int init_rq_ctxts(struct hinic3_nic_dev *nic_dev) +{ + struct hinic3_nic_io *nic_io = nic_dev->nic_io; + struct hinic3_hwdev *hwdev = nic_dev->hwdev; + struct hinic3_rq_ctxt_block *rq_ctxt_block; + u16 q_id, curr_id, max_ctxts, i; + struct hinic3_rq_ctxt *rq_ctxt; + struct hinic3_cmd_buf *cmd_buf; + struct hinic3_io_queue *rq; + __le64 out_param; + int err = 0; + + cmd_buf = hinic3_alloc_cmd_buf(hwdev); + if (!cmd_buf) { + dev_err(hwdev->dev, "Failed to allocate cmd buf\n"); + return -ENOMEM; + } + + q_id = 0; + while (q_id < nic_io->num_qps) { + rq_ctxt_block = cmd_buf->buf; + rq_ctxt = rq_ctxt_block->rq_ctxt; + + max_ctxts = (nic_io->num_qps - q_id) > HINIC3_Q_CTXT_MAX ? + HINIC3_Q_CTXT_MAX : (nic_io->num_qps - q_id); + + hinic3_qp_prepare_cmdq_header(&rq_ctxt_block->cmdq_hdr, + HINIC3_QP_CTXT_TYPE_RQ, max_ctxts, + q_id); + + for (i = 0; i < max_ctxts; i++) { + curr_id = q_id + i; + rq = &nic_io->rq[curr_id]; + hinic3_rq_prepare_ctxt(rq, &rq_ctxt[i]); + } + + hinic3_cmdq_buf_swab32(rq_ctxt_block, sizeof(*rq_ctxt_block)); + + cmd_buf->size = cpu_to_le16(RQ_CTXT_SIZE(max_ctxts)); + + err = hinic3_cmdq_direct_resp(hwdev, MGMT_MOD_L2NIC, + L2NIC_UCODE_CMD_MODIFY_QUEUE_CTX, + cmd_buf, &out_param); + if (err || out_param) { + dev_err(hwdev->dev, "Failed to set RQ ctxts, err: %d, out_param: 0x%llx\n", + err, out_param); + err = -EFAULT; + break; + } + + q_id += max_ctxts; + } + + hinic3_free_cmd_buf(hwdev, cmd_buf); + + return err; +} + +static int init_qp_ctxts(struct hinic3_nic_dev *nic_dev) +{ + int err; + + err = init_sq_ctxts(nic_dev); + if (err) + return err; + + err = init_rq_ctxts(nic_dev); + if (err) + return err; + + return 0; +} + +static int clean_queue_offload_ctxt(struct hinic3_nic_dev *nic_dev, + enum hinic3_qp_ctxt_type ctxt_type) +{ + struct hinic3_nic_io *nic_io = nic_dev->nic_io; + struct hinic3_hwdev *hwdev = nic_dev->hwdev; + struct hinic3_clean_queue_ctxt *ctxt_block; + struct hinic3_cmd_buf *cmd_buf; + __le64 out_param; + int err; + + cmd_buf = hinic3_alloc_cmd_buf(hwdev); + if (!cmd_buf) { + dev_err(hwdev->dev, "Failed to allocate cmd buf\n"); + return -ENOMEM; + } + + ctxt_block = cmd_buf->buf; + ctxt_block->cmdq_hdr.num_queues = cpu_to_le16(nic_io->max_qps); + ctxt_block->cmdq_hdr.queue_type = cpu_to_le16(ctxt_type); + ctxt_block->cmdq_hdr.start_qid = 0; + ctxt_block->cmdq_hdr.rsvd = 0; + ctxt_block->rsvd = 0; + + hinic3_cmdq_buf_swab32(ctxt_block, sizeof(*ctxt_block)); + + cmd_buf->size = cpu_to_le16(sizeof(*ctxt_block)); + + err = hinic3_cmdq_direct_resp(hwdev, MGMT_MOD_L2NIC, + L2NIC_UCODE_CMD_CLEAN_QUEUE_CTX, + cmd_buf, &out_param); + if (err || out_param) { + dev_err(hwdev->dev, "Failed to clean queue offload ctxts, err: %d,out_param: 0x%llx\n", + err, out_param); + + err = -EFAULT; + } + + hinic3_free_cmd_buf(hwdev, cmd_buf); + + return err; +} + +static int clean_qp_offload_ctxt(struct hinic3_nic_dev *nic_dev) +{ + /* clean LRO/TSO context space */ + return clean_queue_offload_ctxt(nic_dev, HINIC3_QP_CTXT_TYPE_SQ) || + clean_queue_offload_ctxt(nic_dev, HINIC3_QP_CTXT_TYPE_RQ); +} + +/* init qps ctxt and set sq ci attr and arm all sq */ +int hinic3_init_qp_ctxts(struct hinic3_nic_dev *nic_dev) +{ + struct hinic3_nic_io *nic_io = nic_dev->nic_io; + struct hinic3_hwdev *hwdev = nic_dev->hwdev; + struct hinic3_sq_attr sq_attr; + u32 rq_depth; + u16 q_id; + int err; + + err = init_qp_ctxts(nic_dev); + if (err) { + dev_err(hwdev->dev, "Failed to init QP ctxts\n"); + return err; + } + + /* clean LRO/TSO context space */ + err = clean_qp_offload_ctxt(nic_dev); + if (err) { + dev_err(hwdev->dev, "Failed to clean qp offload ctxts\n"); + return err; + } + + rq_depth = nic_io->rq[0].wq.q_depth << HINIC3_NORMAL_RQ_WQE; + + err = hinic3_set_root_ctxt(hwdev, rq_depth, nic_io->sq[0].wq.q_depth, + nic_io->rx_buf_len); + if (err) { + dev_err(hwdev->dev, "Failed to set root context\n"); + return err; + } + + for (q_id = 0; q_id < nic_io->num_qps; q_id++) { + sq_attr.ci_dma_base = + HINIC3_CI_PADDR(nic_io->ci_dma_base, q_id) >> 0x2; + sq_attr.pending_limit = HINIC3_DEFAULT_TX_CI_PENDING_LIMIT; + sq_attr.coalescing_time = HINIC3_DEFAULT_TX_CI_COALESCING_TIME; + sq_attr.intr_en = 1; + sq_attr.intr_idx = nic_io->sq[q_id].msix_entry_idx; + sq_attr.l2nic_sqn = q_id; + sq_attr.dma_attr_off = 0; + err = hinic3_set_ci_table(hwdev, &sq_attr); + if (err) { + dev_err(hwdev->dev, "Failed to set ci table\n"); + goto err_clean_root_ctxt; + } + } + + return 0; + +err_clean_root_ctxt: + hinic3_clean_root_ctxt(hwdev); + + return err; +} + +void hinic3_free_qp_ctxts(struct hinic3_nic_dev *nic_dev) +{ + hinic3_clean_root_ctxt(nic_dev->hwdev); } diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_nic_io.h b/drivers/net/ethernet/huawei/hinic3/hinic3_nic_io.h index 865ba6878c48..12eefabcf1db 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_nic_io.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_nic_io.h @@ -75,8 +75,8 @@ static inline u16 hinic3_get_sq_hw_ci(const struct hinic3_io_queue *sq) #define DB_CFLAG_DP_RQ 1 struct hinic3_nic_db { - u32 db_info; - u32 pi_hi; + __le32 db_info; + __le32 pi_hi; }; static inline void hinic3_write_db(struct hinic3_io_queue *queue, int cos, @@ -84,15 +84,25 @@ static inline void hinic3_write_db(struct hinic3_io_queue *queue, int cos, { struct hinic3_nic_db db; - db.db_info = DB_INFO_SET(DB_SRC_TYPE, TYPE) | - DB_INFO_SET(cflag, CFLAG) | - DB_INFO_SET(cos, COS) | - DB_INFO_SET(queue->q_id, QID); - db.pi_hi = DB_PI_HIGH(pi); + db.db_info = + cpu_to_le32(DB_INFO_SET(DB_SRC_TYPE, TYPE) | + DB_INFO_SET(cflag, CFLAG) | + DB_INFO_SET(cos, COS) | + DB_INFO_SET(queue->q_id, QID)); + db.pi_hi = cpu_to_le32(DB_PI_HIGH(pi)); writeq(*((u64 *)&db), DB_ADDR(queue, pi)); } +struct hinic3_dyna_qp_params { + u16 num_qps; + u32 sq_depth; + u32 rq_depth; + + struct hinic3_io_queue *sqs; + struct hinic3_io_queue *rqs; +}; + struct hinic3_nic_io { struct hinic3_io_queue *sq; struct hinic3_io_queue *rq; @@ -117,4 +127,19 @@ struct hinic3_nic_io { int hinic3_init_nic_io(struct hinic3_nic_dev *nic_dev); void hinic3_free_nic_io(struct hinic3_nic_dev *nic_dev); +int hinic3_init_nicio_res(struct hinic3_nic_dev *nic_dev); +void hinic3_free_nicio_res(struct hinic3_nic_dev *nic_dev); + +int hinic3_alloc_qps(struct hinic3_nic_dev *nic_dev, + struct hinic3_dyna_qp_params *qp_params); +void hinic3_free_qps(struct hinic3_nic_dev *nic_dev, + struct hinic3_dyna_qp_params *qp_params); +void hinic3_init_qps(struct hinic3_nic_dev *nic_dev, + struct hinic3_dyna_qp_params *qp_params); +void hinic3_uninit_qps(struct hinic3_nic_dev *nic_dev, + struct hinic3_dyna_qp_params *qp_params); + +int hinic3_init_qp_ctxts(struct hinic3_nic_dev *nic_dev); +void hinic3_free_qp_ctxts(struct hinic3_nic_dev *nic_dev); + #endif diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_pci_id_tbl.h b/drivers/net/ethernet/huawei/hinic3/hinic3_pci_id_tbl.h new file mode 100644 index 000000000000..86c88d0bb4bd --- /dev/null +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_pci_id_tbl.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. */ + +#ifndef _HINIC3_PCI_ID_TBL_H_ +#define _HINIC3_PCI_ID_TBL_H_ + +#define PCI_DEV_ID_HINIC3_VF 0x375F + +#endif diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_rss.c b/drivers/net/ethernet/huawei/hinic3/hinic3_rss.c new file mode 100644 index 000000000000..4ff1b2f79838 --- /dev/null +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_rss.c @@ -0,0 +1,336 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. + +#include <linux/ethtool.h> + +#include "hinic3_cmdq.h" +#include "hinic3_hwdev.h" +#include "hinic3_hwif.h" +#include "hinic3_mbox.h" +#include "hinic3_nic_cfg.h" +#include "hinic3_nic_dev.h" +#include "hinic3_rss.h" + +static void hinic3_fillout_indir_tbl(struct net_device *netdev, u16 *indir) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + u16 i, num_qps; + + num_qps = nic_dev->q_params.num_qps; + for (i = 0; i < L2NIC_RSS_INDIR_SIZE; i++) + indir[i] = ethtool_rxfh_indir_default(i, num_qps); +} + +static int hinic3_rss_cfg(struct hinic3_hwdev *hwdev, u8 rss_en, u16 num_qps) +{ + struct mgmt_msg_params msg_params = {}; + struct l2nic_cmd_cfg_rss rss_cfg = {}; + int err; + + rss_cfg.func_id = hinic3_global_func_id(hwdev); + rss_cfg.rss_en = rss_en; + rss_cfg.rq_priority_number = 0; + rss_cfg.num_qps = num_qps; + + mgmt_msg_params_init_default(&msg_params, &rss_cfg, sizeof(rss_cfg)); + + err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_L2NIC, + L2NIC_CMD_CFG_RSS, &msg_params); + if (err || rss_cfg.msg_head.status) { + dev_err(hwdev->dev, "Failed to set rss cfg, err: %d, status: 0x%x\n", + err, rss_cfg.msg_head.status); + return -EINVAL; + } + + return 0; +} + +static void hinic3_init_rss_parameters(struct net_device *netdev) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + + nic_dev->rss_hash_type = HINIC3_RSS_HASH_ENGINE_TYPE_XOR; + nic_dev->rss_type.tcp_ipv6_ext = 1; + nic_dev->rss_type.ipv6_ext = 1; + nic_dev->rss_type.tcp_ipv6 = 1; + nic_dev->rss_type.ipv6 = 1; + nic_dev->rss_type.tcp_ipv4 = 1; + nic_dev->rss_type.ipv4 = 1; + nic_dev->rss_type.udp_ipv6 = 1; + nic_dev->rss_type.udp_ipv4 = 1; +} + +static void decide_num_qps(struct net_device *netdev) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + unsigned int dev_cpus; + + dev_cpus = netif_get_num_default_rss_queues(); + nic_dev->q_params.num_qps = min(dev_cpus, nic_dev->max_qps); +} + +static int alloc_rss_resource(struct net_device *netdev) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + + nic_dev->rss_hkey = kmalloc_array(L2NIC_RSS_KEY_SIZE, + sizeof(nic_dev->rss_hkey[0]), + GFP_KERNEL); + if (!nic_dev->rss_hkey) + return -ENOMEM; + + netdev_rss_key_fill(nic_dev->rss_hkey, L2NIC_RSS_KEY_SIZE); + + nic_dev->rss_indir = kcalloc(L2NIC_RSS_INDIR_SIZE, sizeof(u16), + GFP_KERNEL); + if (!nic_dev->rss_indir) { + kfree(nic_dev->rss_hkey); + nic_dev->rss_hkey = NULL; + return -ENOMEM; + } + + return 0; +} + +static int hinic3_rss_set_indir_tbl(struct hinic3_hwdev *hwdev, + const u16 *indir_table) +{ + struct l2nic_cmd_rss_set_indir_tbl *indir_tbl; + struct hinic3_cmd_buf *cmd_buf; + __le64 out_param; + int err; + u32 i; + + cmd_buf = hinic3_alloc_cmd_buf(hwdev); + if (!cmd_buf) { + dev_err(hwdev->dev, "Failed to allocate cmd buf\n"); + return -ENOMEM; + } + + cmd_buf->size = cpu_to_le16(sizeof(struct l2nic_cmd_rss_set_indir_tbl)); + indir_tbl = cmd_buf->buf; + memset(indir_tbl, 0, sizeof(*indir_tbl)); + + for (i = 0; i < L2NIC_RSS_INDIR_SIZE; i++) + indir_tbl->entry[i] = cpu_to_le16(indir_table[i]); + + hinic3_cmdq_buf_swab32(indir_tbl, sizeof(*indir_tbl)); + + err = hinic3_cmdq_direct_resp(hwdev, MGMT_MOD_L2NIC, + L2NIC_UCODE_CMD_SET_RSS_INDIR_TBL, + cmd_buf, &out_param); + if (err || out_param) { + dev_err(hwdev->dev, "Failed to set rss indir table\n"); + err = -EFAULT; + } + + hinic3_free_cmd_buf(hwdev, cmd_buf); + + return err; +} + +static int hinic3_set_rss_type(struct hinic3_hwdev *hwdev, + struct hinic3_rss_type rss_type) +{ + struct l2nic_cmd_set_rss_ctx_tbl ctx_tbl = {}; + struct mgmt_msg_params msg_params = {}; + u32 ctx; + int err; + + ctx_tbl.func_id = hinic3_global_func_id(hwdev); + ctx = L2NIC_RSS_TYPE_SET(1, VALID) | + L2NIC_RSS_TYPE_SET(rss_type.ipv4, IPV4) | + L2NIC_RSS_TYPE_SET(rss_type.ipv6, IPV6) | + L2NIC_RSS_TYPE_SET(rss_type.ipv6_ext, IPV6_EXT) | + L2NIC_RSS_TYPE_SET(rss_type.tcp_ipv4, TCP_IPV4) | + L2NIC_RSS_TYPE_SET(rss_type.tcp_ipv6, TCP_IPV6) | + L2NIC_RSS_TYPE_SET(rss_type.tcp_ipv6_ext, TCP_IPV6_EXT) | + L2NIC_RSS_TYPE_SET(rss_type.udp_ipv4, UDP_IPV4) | + L2NIC_RSS_TYPE_SET(rss_type.udp_ipv6, UDP_IPV6); + ctx_tbl.context = ctx; + + mgmt_msg_params_init_default(&msg_params, &ctx_tbl, sizeof(ctx_tbl)); + + err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_L2NIC, + L2NIC_CMD_SET_RSS_CTX_TBL, &msg_params); + + if (ctx_tbl.msg_head.status == MGMT_STATUS_CMD_UNSUPPORTED) { + return MGMT_STATUS_CMD_UNSUPPORTED; + } else if (err || ctx_tbl.msg_head.status) { + dev_err(hwdev->dev, "mgmt Failed to set rss context offload, err: %d, status: 0x%x\n", + err, ctx_tbl.msg_head.status); + return -EINVAL; + } + + return 0; +} + +static int hinic3_rss_cfg_hash_type(struct hinic3_hwdev *hwdev, u8 opcode, + enum hinic3_rss_hash_type *type) +{ + struct l2nic_cmd_cfg_rss_engine hash_type_cmd = {}; + struct mgmt_msg_params msg_params = {}; + int err; + + hash_type_cmd.func_id = hinic3_global_func_id(hwdev); + hash_type_cmd.opcode = opcode; + + if (opcode == MGMT_MSG_CMD_OP_SET) + hash_type_cmd.hash_engine = *type; + + mgmt_msg_params_init_default(&msg_params, &hash_type_cmd, + sizeof(hash_type_cmd)); + + err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_L2NIC, + L2NIC_CMD_CFG_RSS_HASH_ENGINE, + &msg_params); + if (err || hash_type_cmd.msg_head.status) { + dev_err(hwdev->dev, "Failed to %s hash engine, err: %d, status: 0x%x\n", + opcode == MGMT_MSG_CMD_OP_SET ? "set" : "get", + err, hash_type_cmd.msg_head.status); + return -EIO; + } + + if (opcode == MGMT_MSG_CMD_OP_GET) + *type = hash_type_cmd.hash_engine; + + return 0; +} + +static int hinic3_rss_set_hash_type(struct hinic3_hwdev *hwdev, + enum hinic3_rss_hash_type type) +{ + return hinic3_rss_cfg_hash_type(hwdev, MGMT_MSG_CMD_OP_SET, &type); +} + +static int hinic3_config_rss_hw_resource(struct net_device *netdev, + u16 *indir_tbl) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + int err; + + err = hinic3_rss_set_indir_tbl(nic_dev->hwdev, indir_tbl); + if (err) + return err; + + err = hinic3_set_rss_type(nic_dev->hwdev, nic_dev->rss_type); + if (err) + return err; + + return hinic3_rss_set_hash_type(nic_dev->hwdev, nic_dev->rss_hash_type); +} + +static int hinic3_rss_cfg_hash_key(struct hinic3_hwdev *hwdev, u8 opcode, + u8 *key) +{ + struct l2nic_cmd_cfg_rss_hash_key hash_key = {}; + struct mgmt_msg_params msg_params = {}; + int err; + + hash_key.func_id = hinic3_global_func_id(hwdev); + hash_key.opcode = opcode; + + if (opcode == MGMT_MSG_CMD_OP_SET) + memcpy(hash_key.key, key, L2NIC_RSS_KEY_SIZE); + + mgmt_msg_params_init_default(&msg_params, &hash_key, sizeof(hash_key)); + + err = hinic3_send_mbox_to_mgmt(hwdev, MGMT_MOD_L2NIC, + L2NIC_CMD_CFG_RSS_HASH_KEY, &msg_params); + if (err || hash_key.msg_head.status) { + dev_err(hwdev->dev, "Failed to %s hash key, err: %d, status: 0x%x\n", + opcode == MGMT_MSG_CMD_OP_SET ? "set" : "get", + err, hash_key.msg_head.status); + return -EINVAL; + } + + if (opcode == MGMT_MSG_CMD_OP_GET) + memcpy(key, hash_key.key, L2NIC_RSS_KEY_SIZE); + + return 0; +} + +static int hinic3_rss_set_hash_key(struct hinic3_hwdev *hwdev, u8 *key) +{ + return hinic3_rss_cfg_hash_key(hwdev, MGMT_MSG_CMD_OP_SET, key); +} + +static int hinic3_set_hw_rss_parameters(struct net_device *netdev, u8 rss_en) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + int err; + + err = hinic3_rss_set_hash_key(nic_dev->hwdev, nic_dev->rss_hkey); + if (err) + return err; + + hinic3_fillout_indir_tbl(netdev, nic_dev->rss_indir); + + err = hinic3_config_rss_hw_resource(netdev, nic_dev->rss_indir); + if (err) + return err; + + err = hinic3_rss_cfg(nic_dev->hwdev, rss_en, nic_dev->q_params.num_qps); + if (err) + return err; + + return 0; +} + +int hinic3_rss_init(struct net_device *netdev) +{ + return hinic3_set_hw_rss_parameters(netdev, 1); +} + +void hinic3_rss_uninit(struct net_device *netdev) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + + hinic3_rss_cfg(nic_dev->hwdev, 0, 0); +} + +void hinic3_clear_rss_config(struct net_device *netdev) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + + kfree(nic_dev->rss_hkey); + nic_dev->rss_hkey = NULL; + + kfree(nic_dev->rss_indir); + nic_dev->rss_indir = NULL; +} + +void hinic3_try_to_enable_rss(struct net_device *netdev) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + struct hinic3_hwdev *hwdev = nic_dev->hwdev; + int err; + + nic_dev->max_qps = hinic3_func_max_qnum(hwdev); + if (nic_dev->max_qps <= 1 || + !hinic3_test_support(nic_dev, HINIC3_NIC_F_RSS)) + goto err_reset_q_params; + + err = alloc_rss_resource(netdev); + if (err) { + nic_dev->max_qps = 1; + goto err_reset_q_params; + } + + set_bit(HINIC3_RSS_ENABLE, &nic_dev->flags); + decide_num_qps(netdev); + hinic3_init_rss_parameters(netdev); + err = hinic3_set_hw_rss_parameters(netdev, 0); + if (err) { + dev_err(hwdev->dev, "Failed to set hardware rss parameters\n"); + hinic3_clear_rss_config(netdev); + nic_dev->max_qps = 1; + goto err_reset_q_params; + } + + return; + +err_reset_q_params: + clear_bit(HINIC3_RSS_ENABLE, &nic_dev->flags); + nic_dev->q_params.num_qps = nic_dev->max_qps; +} diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_rss.h b/drivers/net/ethernet/huawei/hinic3/hinic3_rss.h new file mode 100644 index 000000000000..78d82c2aca06 --- /dev/null +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_rss.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) Huawei Technologies Co., Ltd. 2025. All rights reserved. */ + +#ifndef _HINIC3_RSS_H_ +#define _HINIC3_RSS_H_ + +#include <linux/netdevice.h> + +int hinic3_rss_init(struct net_device *netdev); +void hinic3_rss_uninit(struct net_device *netdev); +void hinic3_try_to_enable_rss(struct net_device *netdev); +void hinic3_clear_rss_config(struct net_device *netdev); + +#endif diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_rx.c b/drivers/net/ethernet/huawei/hinic3/hinic3_rx.c index 860163e9d66c..16c00c3bb1ed 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_rx.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_rx.c @@ -35,13 +35,35 @@ int hinic3_alloc_rxqs(struct net_device *netdev) { - /* Completed by later submission due to LoC limit. */ - return -EFAULT; + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + struct pci_dev *pdev = nic_dev->pdev; + u16 num_rxqs = nic_dev->max_qps; + struct hinic3_rxq *rxq; + u16 q_id; + + nic_dev->rxqs = kcalloc(num_rxqs, sizeof(*nic_dev->rxqs), GFP_KERNEL); + if (!nic_dev->rxqs) + return -ENOMEM; + + for (q_id = 0; q_id < num_rxqs; q_id++) { + rxq = &nic_dev->rxqs[q_id]; + rxq->netdev = netdev; + rxq->dev = &pdev->dev; + rxq->q_id = q_id; + rxq->buf_len = nic_dev->rx_buf_len; + rxq->buf_len_shift = ilog2(nic_dev->rx_buf_len); + rxq->q_depth = nic_dev->q_params.rq_depth; + rxq->q_mask = nic_dev->q_params.rq_depth - 1; + } + + return 0; } void hinic3_free_rxqs(struct net_device *netdev) { - /* Completed by later submission due to LoC limit. */ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + + kfree(nic_dev->rxqs); } static int rx_alloc_mapped_page(struct page_pool *page_pool, @@ -50,6 +72,9 @@ static int rx_alloc_mapped_page(struct page_pool *page_pool, struct page *page; u32 page_offset; + if (likely(rx_info->page)) + return 0; + page = page_pool_dev_alloc_frag(page_pool, &page_offset, buf_len); if (unlikely(!page)) return -ENOMEM; @@ -60,14 +85,35 @@ static int rx_alloc_mapped_page(struct page_pool *page_pool, return 0; } +/* Associate fixed completion element to every wqe in the rq. Every rq wqe will + * always post completion to the same place. + */ +static void rq_associate_cqes(struct hinic3_rxq *rxq) +{ + struct hinic3_queue_pages *qpages; + struct hinic3_rq_wqe *rq_wqe; + dma_addr_t cqe_dma; + u32 i; + + qpages = &rxq->rq->wq.qpages; + + for (i = 0; i < rxq->q_depth; i++) { + rq_wqe = get_q_element(qpages, i, NULL); + cqe_dma = rxq->cqe_start_paddr + + i * sizeof(struct hinic3_rq_cqe); + rq_wqe->cqe_hi_addr = cpu_to_le32(upper_32_bits(cqe_dma)); + rq_wqe->cqe_lo_addr = cpu_to_le32(lower_32_bits(cqe_dma)); + } +} + static void rq_wqe_buf_set(struct hinic3_io_queue *rq, uint32_t wqe_idx, dma_addr_t dma_addr, u16 len) { struct hinic3_rq_wqe *rq_wqe; rq_wqe = get_q_element(&rq->wq.qpages, wqe_idx, NULL); - rq_wqe->buf_hi_addr = upper_32_bits(dma_addr); - rq_wqe->buf_lo_addr = lower_32_bits(dma_addr); + rq_wqe->buf_hi_addr = cpu_to_le32(upper_32_bits(dma_addr)); + rq_wqe->buf_lo_addr = cpu_to_le32(lower_32_bits(dma_addr)); } static u32 hinic3_rx_fill_buffers(struct hinic3_rxq *rxq) @@ -102,6 +148,41 @@ static u32 hinic3_rx_fill_buffers(struct hinic3_rxq *rxq) return i; } +static u32 hinic3_alloc_rx_buffers(struct hinic3_dyna_rxq_res *rqres, + u32 rq_depth, u16 buf_len) +{ + u32 free_wqebbs = rq_depth - 1; + u32 idx; + int err; + + for (idx = 0; idx < free_wqebbs; idx++) { + err = rx_alloc_mapped_page(rqres->page_pool, + &rqres->rx_info[idx], buf_len); + if (err) + break; + } + + return idx; +} + +static void hinic3_free_rx_buffers(struct hinic3_dyna_rxq_res *rqres, + u32 q_depth) +{ + struct hinic3_rx_info *rx_info; + u32 i; + + /* Free all the Rx ring sk_buffs */ + for (i = 0; i < q_depth; i++) { + rx_info = &rqres->rx_info[i]; + + if (rx_info->page) { + page_pool_put_full_page(rqres->page_pool, + rx_info->page, false); + rx_info->page = NULL; + } + } +} + static void hinic3_add_rx_frag(struct hinic3_rxq *rxq, struct hinic3_rx_info *rx_info, struct sk_buff *skb, u32 size) @@ -279,7 +360,7 @@ static int recv_one_pkt(struct hinic3_rxq *rxq, struct hinic3_rq_cqe *rx_cqe, if (skb_is_nonlinear(skb)) hinic3_pull_tail(skb); - offload_type = rx_cqe->offload_type; + offload_type = le32_to_cpu(rx_cqe->offload_type); hinic3_rx_csum(rxq, offload_type, status, skb); num_lro = RQ_CQE_STATUS_GET(status, NUM_LRO); @@ -299,6 +380,135 @@ static int recv_one_pkt(struct hinic3_rxq *rxq, struct hinic3_rq_cqe *rx_cqe, return 0; } +int hinic3_alloc_rxqs_res(struct net_device *netdev, u16 num_rq, + u32 rq_depth, struct hinic3_dyna_rxq_res *rxqs_res) +{ + u64 cqe_mem_size = sizeof(struct hinic3_rq_cqe) * rq_depth; + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + struct page_pool_params pp_params = {}; + struct hinic3_dyna_rxq_res *rqres; + u32 pkt_idx; + int idx; + + for (idx = 0; idx < num_rq; idx++) { + rqres = &rxqs_res[idx]; + rqres->rx_info = kcalloc(rq_depth, sizeof(*rqres->rx_info), + GFP_KERNEL); + if (!rqres->rx_info) + goto err_free_rqres; + + rqres->cqe_start_vaddr = + dma_alloc_coherent(&nic_dev->pdev->dev, cqe_mem_size, + &rqres->cqe_start_paddr, GFP_KERNEL); + if (!rqres->cqe_start_vaddr) { + netdev_err(netdev, "Failed to alloc rxq%d rx cqe\n", + idx); + goto err_free_rx_info; + } + + pp_params.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; + pp_params.pool_size = rq_depth * nic_dev->rx_buf_len / + PAGE_SIZE; + pp_params.nid = dev_to_node(&nic_dev->pdev->dev); + pp_params.dev = &nic_dev->pdev->dev; + pp_params.dma_dir = DMA_FROM_DEVICE; + pp_params.max_len = PAGE_SIZE; + rqres->page_pool = page_pool_create(&pp_params); + if (IS_ERR(rqres->page_pool)) { + netdev_err(netdev, "Failed to create rxq%d page pool\n", + idx); + goto err_free_cqe; + } + + pkt_idx = hinic3_alloc_rx_buffers(rqres, rq_depth, + nic_dev->rx_buf_len); + if (!pkt_idx) { + netdev_err(netdev, "Failed to alloc rxq%d rx buffers\n", + idx); + goto err_destroy_page_pool; + } + rqres->next_to_alloc = pkt_idx; + } + + return 0; + +err_destroy_page_pool: + page_pool_destroy(rqres->page_pool); +err_free_cqe: + dma_free_coherent(&nic_dev->pdev->dev, cqe_mem_size, + rqres->cqe_start_vaddr, + rqres->cqe_start_paddr); +err_free_rx_info: + kfree(rqres->rx_info); +err_free_rqres: + hinic3_free_rxqs_res(netdev, idx, rq_depth, rxqs_res); + + return -ENOMEM; +} + +void hinic3_free_rxqs_res(struct net_device *netdev, u16 num_rq, + u32 rq_depth, struct hinic3_dyna_rxq_res *rxqs_res) +{ + u64 cqe_mem_size = sizeof(struct hinic3_rq_cqe) * rq_depth; + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + struct hinic3_dyna_rxq_res *rqres; + int idx; + + for (idx = 0; idx < num_rq; idx++) { + rqres = &rxqs_res[idx]; + + hinic3_free_rx_buffers(rqres, rq_depth); + page_pool_destroy(rqres->page_pool); + dma_free_coherent(&nic_dev->pdev->dev, cqe_mem_size, + rqres->cqe_start_vaddr, + rqres->cqe_start_paddr); + kfree(rqres->rx_info); + } +} + +int hinic3_configure_rxqs(struct net_device *netdev, u16 num_rq, + u32 rq_depth, struct hinic3_dyna_rxq_res *rxqs_res) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + struct hinic3_dyna_rxq_res *rqres; + struct msix_entry *msix_entry; + struct hinic3_rxq *rxq; + u16 q_id; + u32 pkts; + + for (q_id = 0; q_id < num_rq; q_id++) { + rxq = &nic_dev->rxqs[q_id]; + rqres = &rxqs_res[q_id]; + msix_entry = &nic_dev->qps_msix_entries[q_id]; + + rxq->irq_id = msix_entry->vector; + rxq->msix_entry_idx = msix_entry->entry; + rxq->next_to_update = 0; + rxq->next_to_alloc = rqres->next_to_alloc; + rxq->q_depth = rq_depth; + rxq->delta = rxq->q_depth; + rxq->q_mask = rxq->q_depth - 1; + rxq->cons_idx = 0; + + rxq->cqe_arr = rqres->cqe_start_vaddr; + rxq->cqe_start_paddr = rqres->cqe_start_paddr; + rxq->rx_info = rqres->rx_info; + rxq->page_pool = rqres->page_pool; + + rxq->rq = &nic_dev->nic_io->rq[rxq->q_id]; + + rq_associate_cqes(rxq); + + pkts = hinic3_rx_fill_buffers(rxq); + if (!pkts) { + netdev_err(netdev, "Failed to fill Rx buffer\n"); + return -ENOMEM; + } + } + + return 0; +} + int hinic3_rx_poll(struct hinic3_rxq *rxq, int budget) { struct hinic3_nic_dev *nic_dev = netdev_priv(rxq->netdev); @@ -311,14 +521,14 @@ int hinic3_rx_poll(struct hinic3_rxq *rxq, int budget) while (likely(nr_pkts < budget)) { sw_ci = rxq->cons_idx & rxq->q_mask; rx_cqe = rxq->cqe_arr + sw_ci; - status = rx_cqe->status; + status = le32_to_cpu(rx_cqe->status); if (!RQ_CQE_STATUS_GET(status, RXDONE)) break; /* make sure we read rx_done before packet length */ rmb(); - vlan_len = rx_cqe->vlan_len; + vlan_len = le32_to_cpu(rx_cqe->vlan_len); pkt_len = RQ_CQE_SGE_GET(vlan_len, LEN); if (recv_one_pkt(rxq, rx_cqe, pkt_len, vlan_len, status)) break; diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_rx.h b/drivers/net/ethernet/huawei/hinic3/hinic3_rx.h index 1cca21858d40..44ae841a3648 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_rx.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_rx.h @@ -27,21 +27,21 @@ /* RX Completion information that is provided by HW for a specific RX WQE */ struct hinic3_rq_cqe { - u32 status; - u32 vlan_len; - u32 offload_type; - u32 rsvd3; - u32 rsvd4; - u32 rsvd5; - u32 rsvd6; - u32 pkt_info; + __le32 status; + __le32 vlan_len; + __le32 offload_type; + __le32 rsvd3; + __le32 rsvd4; + __le32 rsvd5; + __le32 rsvd6; + __le32 pkt_info; }; struct hinic3_rq_wqe { - u32 buf_hi_addr; - u32 buf_lo_addr; - u32 cqe_hi_addr; - u32 cqe_lo_addr; + __le32 buf_hi_addr; + __le32 buf_lo_addr; + __le32 cqe_hi_addr; + __le32 cqe_lo_addr; }; struct hinic3_rx_info { @@ -82,9 +82,23 @@ struct hinic3_rxq { dma_addr_t cqe_start_paddr; } ____cacheline_aligned; +struct hinic3_dyna_rxq_res { + u16 next_to_alloc; + struct hinic3_rx_info *rx_info; + dma_addr_t cqe_start_paddr; + void *cqe_start_vaddr; + struct page_pool *page_pool; +}; + int hinic3_alloc_rxqs(struct net_device *netdev); void hinic3_free_rxqs(struct net_device *netdev); +int hinic3_alloc_rxqs_res(struct net_device *netdev, u16 num_rq, + u32 rq_depth, struct hinic3_dyna_rxq_res *rxqs_res); +void hinic3_free_rxqs_res(struct net_device *netdev, u16 num_rq, + u32 rq_depth, struct hinic3_dyna_rxq_res *rxqs_res); +int hinic3_configure_rxqs(struct net_device *netdev, u16 num_rq, + u32 rq_depth, struct hinic3_dyna_rxq_res *rxqs_res); int hinic3_rx_poll(struct hinic3_rxq *rxq, int budget); #endif diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_tx.c b/drivers/net/ethernet/huawei/hinic3/hinic3_tx.c index 3f7f73430be4..92c43c05e3f2 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_tx.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_tx.c @@ -55,9 +55,9 @@ void hinic3_free_txqs(struct net_device *netdev) static void hinic3_set_buf_desc(struct hinic3_sq_bufdesc *buf_descs, dma_addr_t addr, u32 len) { - buf_descs->hi_addr = upper_32_bits(addr); - buf_descs->lo_addr = lower_32_bits(addr); - buf_descs->len = len; + buf_descs->hi_addr = cpu_to_le32(upper_32_bits(addr)); + buf_descs->lo_addr = cpu_to_le32(lower_32_bits(addr)); + buf_descs->len = cpu_to_le32(len); } static int hinic3_tx_map_skb(struct net_device *netdev, struct sk_buff *skb, @@ -81,10 +81,10 @@ static int hinic3_tx_map_skb(struct net_device *netdev, struct sk_buff *skb, dma_info[0].len = skb_headlen(skb); - wqe_desc->hi_addr = upper_32_bits(dma_info[0].dma); - wqe_desc->lo_addr = lower_32_bits(dma_info[0].dma); + wqe_desc->hi_addr = cpu_to_le32(upper_32_bits(dma_info[0].dma)); + wqe_desc->lo_addr = cpu_to_le32(lower_32_bits(dma_info[0].dma)); - wqe_desc->ctrl_len = dma_info[0].len; + wqe_desc->ctrl_len = cpu_to_le32(dma_info[0].len); for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { frag = &(skb_shinfo(skb)->frags[i]); @@ -116,6 +116,7 @@ err_unmap_page: } dma_unmap_single(&pdev->dev, dma_info[0].dma, dma_info[0].len, DMA_TO_DEVICE); + return err; } @@ -138,6 +139,23 @@ static void hinic3_tx_unmap_skb(struct net_device *netdev, dma_info[0].len, DMA_TO_DEVICE); } +static void free_all_tx_skbs(struct net_device *netdev, u32 sq_depth, + struct hinic3_tx_info *tx_info_arr) +{ + struct hinic3_tx_info *tx_info; + u32 idx; + + for (idx = 0; idx < sq_depth; idx++) { + tx_info = &tx_info_arr[idx]; + if (tx_info->skb) { + hinic3_tx_unmap_skb(netdev, tx_info->skb, + tx_info->dma_info); + dev_kfree_skb_any(tx_info->skb); + tx_info->skb = NULL; + } + } +} + union hinic3_ip { struct iphdr *v4; struct ipv6hdr *v6; @@ -197,7 +215,8 @@ static int hinic3_tx_csum(struct hinic3_txq *txq, struct hinic3_sq_task *task, union hinic3_ip ip; u8 l4_proto; - task->pkt_info0 |= SQ_TASK_INFO0_SET(1, TUNNEL_FLAG); + task->pkt_info0 |= cpu_to_le32(SQ_TASK_INFO0_SET(1, + TUNNEL_FLAG)); ip.hdr = skb_network_header(skb); if (ip.v4->version == 4) { @@ -226,7 +245,7 @@ static int hinic3_tx_csum(struct hinic3_txq *txq, struct hinic3_sq_task *task, } } - task->pkt_info0 |= SQ_TASK_INFO0_SET(1, INNER_L4_EN); + task->pkt_info0 |= cpu_to_le32(SQ_TASK_INFO0_SET(1, INNER_L4_EN)); return 1; } @@ -255,26 +274,28 @@ static void get_inner_l3_l4_type(struct sk_buff *skb, union hinic3_ip *ip, } } -static void hinic3_set_tso_info(struct hinic3_sq_task *task, u32 *queue_info, +static void hinic3_set_tso_info(struct hinic3_sq_task *task, __le32 *queue_info, enum hinic3_l4_offload_type l4_offload, u32 offset, u32 mss) { if (l4_offload == HINIC3_L4_OFFLOAD_TCP) { - *queue_info |= SQ_CTRL_QUEUE_INFO_SET(1, TSO); - task->pkt_info0 |= SQ_TASK_INFO0_SET(1, INNER_L4_EN); + *queue_info |= cpu_to_le32(SQ_CTRL_QUEUE_INFO_SET(1, TSO)); + task->pkt_info0 |= cpu_to_le32(SQ_TASK_INFO0_SET(1, + INNER_L4_EN)); } else if (l4_offload == HINIC3_L4_OFFLOAD_UDP) { - *queue_info |= SQ_CTRL_QUEUE_INFO_SET(1, UFO); - task->pkt_info0 |= SQ_TASK_INFO0_SET(1, INNER_L4_EN); + *queue_info |= cpu_to_le32(SQ_CTRL_QUEUE_INFO_SET(1, UFO)); + task->pkt_info0 |= cpu_to_le32(SQ_TASK_INFO0_SET(1, + INNER_L4_EN)); } /* enable L3 calculation */ - task->pkt_info0 |= SQ_TASK_INFO0_SET(1, INNER_L3_EN); + task->pkt_info0 |= cpu_to_le32(SQ_TASK_INFO0_SET(1, INNER_L3_EN)); - *queue_info |= SQ_CTRL_QUEUE_INFO_SET(offset >> 1, PLDOFF); + *queue_info |= cpu_to_le32(SQ_CTRL_QUEUE_INFO_SET(offset >> 1, PLDOFF)); /* set MSS value */ - *queue_info &= ~SQ_CTRL_QUEUE_INFO_MSS_MASK; - *queue_info |= SQ_CTRL_QUEUE_INFO_SET(mss, MSS); + *queue_info &= cpu_to_le32(~SQ_CTRL_QUEUE_INFO_MSS_MASK); + *queue_info |= cpu_to_le32(SQ_CTRL_QUEUE_INFO_SET(mss, MSS)); } static __sum16 csum_magic(union hinic3_ip *ip, unsigned short proto) @@ -284,7 +305,7 @@ static __sum16 csum_magic(union hinic3_ip *ip, unsigned short proto) csum_ipv6_magic(&ip->v6->saddr, &ip->v6->daddr, 0, proto, 0); } -static int hinic3_tso(struct hinic3_sq_task *task, u32 *queue_info, +static int hinic3_tso(struct hinic3_sq_task *task, __le32 *queue_info, struct sk_buff *skb) { enum hinic3_l4_offload_type l4_offload; @@ -305,15 +326,17 @@ static int hinic3_tso(struct hinic3_sq_task *task, u32 *queue_info, if (skb->encapsulation) { u32 gso_type = skb_shinfo(skb)->gso_type; /* L3 checksum is always enabled */ - task->pkt_info0 |= SQ_TASK_INFO0_SET(1, OUT_L3_EN); - task->pkt_info0 |= SQ_TASK_INFO0_SET(1, TUNNEL_FLAG); + task->pkt_info0 |= cpu_to_le32(SQ_TASK_INFO0_SET(1, OUT_L3_EN)); + task->pkt_info0 |= cpu_to_le32(SQ_TASK_INFO0_SET(1, + TUNNEL_FLAG)); l4.hdr = skb_transport_header(skb); ip.hdr = skb_network_header(skb); if (gso_type & SKB_GSO_UDP_TUNNEL_CSUM) { l4.udp->check = ~csum_magic(&ip, IPPROTO_UDP); - task->pkt_info0 |= SQ_TASK_INFO0_SET(1, OUT_L4_EN); + task->pkt_info0 |= + cpu_to_le32(SQ_TASK_INFO0_SET(1, OUT_L4_EN)); } ip.hdr = skb_inner_network_header(skb); @@ -343,13 +366,14 @@ static void hinic3_set_vlan_tx_offload(struct hinic3_sq_task *task, * 2=select TPID2 in IPSU, 3=select TPID3 in IPSU, * 4=select TPID4 in IPSU */ - task->vlan_offload = SQ_TASK_INFO3_SET(vlan_tag, VLAN_TAG) | - SQ_TASK_INFO3_SET(vlan_tpid, VLAN_TPID) | - SQ_TASK_INFO3_SET(1, VLAN_TAG_VALID); + task->vlan_offload = + cpu_to_le32(SQ_TASK_INFO3_SET(vlan_tag, VLAN_TAG) | + SQ_TASK_INFO3_SET(vlan_tpid, VLAN_TPID) | + SQ_TASK_INFO3_SET(1, VLAN_TAG_VALID)); } static u32 hinic3_tx_offload(struct sk_buff *skb, struct hinic3_sq_task *task, - u32 *queue_info, struct hinic3_txq *txq) + __le32 *queue_info, struct hinic3_txq *txq) { u32 offload = 0; int tso_cs_en; @@ -440,39 +464,41 @@ static u16 hinic3_set_wqe_combo(struct hinic3_txq *txq, } static void hinic3_prepare_sq_ctrl(struct hinic3_sq_wqe_combo *wqe_combo, - u32 queue_info, int nr_descs, u16 owner) + __le32 queue_info, int nr_descs, u16 owner) { struct hinic3_sq_wqe_desc *wqe_desc = wqe_combo->ctrl_bd0; if (wqe_combo->wqe_type == SQ_WQE_COMPACT_TYPE) { wqe_desc->ctrl_len |= - SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT) | - SQ_CTRL_SET(wqe_combo->wqe_type, EXTENDED) | - SQ_CTRL_SET(owner, OWNER); + cpu_to_le32(SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT) | + SQ_CTRL_SET(wqe_combo->wqe_type, EXTENDED) | + SQ_CTRL_SET(owner, OWNER)); /* compact wqe queue_info will transfer to chip */ wqe_desc->queue_info = 0; return; } - wqe_desc->ctrl_len |= SQ_CTRL_SET(nr_descs, BUFDESC_NUM) | - SQ_CTRL_SET(wqe_combo->task_type, TASKSECT_LEN) | - SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT) | - SQ_CTRL_SET(wqe_combo->wqe_type, EXTENDED) | - SQ_CTRL_SET(owner, OWNER); + wqe_desc->ctrl_len |= + cpu_to_le32(SQ_CTRL_SET(nr_descs, BUFDESC_NUM) | + SQ_CTRL_SET(wqe_combo->task_type, TASKSECT_LEN) | + SQ_CTRL_SET(SQ_NORMAL_WQE, DATA_FORMAT) | + SQ_CTRL_SET(wqe_combo->wqe_type, EXTENDED) | + SQ_CTRL_SET(owner, OWNER)); wqe_desc->queue_info = queue_info; - wqe_desc->queue_info |= SQ_CTRL_QUEUE_INFO_SET(1, UC); + wqe_desc->queue_info |= cpu_to_le32(SQ_CTRL_QUEUE_INFO_SET(1, UC)); if (!SQ_CTRL_QUEUE_INFO_GET(wqe_desc->queue_info, MSS)) { wqe_desc->queue_info |= - SQ_CTRL_QUEUE_INFO_SET(HINIC3_TX_MSS_DEFAULT, MSS); + cpu_to_le32(SQ_CTRL_QUEUE_INFO_SET(HINIC3_TX_MSS_DEFAULT, MSS)); } else if (SQ_CTRL_QUEUE_INFO_GET(wqe_desc->queue_info, MSS) < HINIC3_TX_MSS_MIN) { /* mss should not be less than 80 */ - wqe_desc->queue_info &= ~SQ_CTRL_QUEUE_INFO_MSS_MASK; + wqe_desc->queue_info &= + cpu_to_le32(~SQ_CTRL_QUEUE_INFO_MSS_MASK); wqe_desc->queue_info |= - SQ_CTRL_QUEUE_INFO_SET(HINIC3_TX_MSS_MIN, MSS); + cpu_to_le32(SQ_CTRL_QUEUE_INFO_SET(HINIC3_TX_MSS_MIN, MSS)); } } @@ -482,12 +508,13 @@ static netdev_tx_t hinic3_send_one_skb(struct sk_buff *skb, { struct hinic3_sq_wqe_combo wqe_combo = {}; struct hinic3_tx_info *tx_info; - u32 offload, queue_info = 0; struct hinic3_sq_task task; u16 wqebb_cnt, num_sge; + __le32 queue_info = 0; u16 saved_wq_prod_idx; u16 owner, pi = 0; u8 saved_sq_owner; + u32 offload; int err; if (unlikely(skb->len < MIN_SKB_LEN)) { @@ -575,6 +602,7 @@ netdev_tx_t hinic3_xmit_frame(struct sk_buff *skb, struct net_device *netdev) err_drop_pkt: dev_kfree_skb_any(skb); + return NETDEV_TX_OK; } @@ -624,6 +652,90 @@ void hinic3_flush_txqs(struct net_device *netdev) #define HINIC3_BDS_PER_SQ_WQEBB \ (HINIC3_SQ_WQEBB_SIZE / sizeof(struct hinic3_sq_bufdesc)) +int hinic3_alloc_txqs_res(struct net_device *netdev, u16 num_sq, + u32 sq_depth, struct hinic3_dyna_txq_res *txqs_res) +{ + struct hinic3_dyna_txq_res *tqres; + int idx; + + for (idx = 0; idx < num_sq; idx++) { + tqres = &txqs_res[idx]; + + tqres->tx_info = kcalloc(sq_depth, sizeof(*tqres->tx_info), + GFP_KERNEL); + if (!tqres->tx_info) + goto err_free_tqres; + + tqres->bds = kcalloc(sq_depth * HINIC3_BDS_PER_SQ_WQEBB + + HINIC3_MAX_SQ_SGE, sizeof(*tqres->bds), + GFP_KERNEL); + if (!tqres->bds) { + kfree(tqres->tx_info); + goto err_free_tqres; + } + } + + return 0; + +err_free_tqres: + while (idx > 0) { + idx--; + tqres = &txqs_res[idx]; + + kfree(tqres->bds); + kfree(tqres->tx_info); + } + + return -ENOMEM; +} + +void hinic3_free_txqs_res(struct net_device *netdev, u16 num_sq, + u32 sq_depth, struct hinic3_dyna_txq_res *txqs_res) +{ + struct hinic3_dyna_txq_res *tqres; + int idx; + + for (idx = 0; idx < num_sq; idx++) { + tqres = &txqs_res[idx]; + + free_all_tx_skbs(netdev, sq_depth, tqres->tx_info); + kfree(tqres->bds); + kfree(tqres->tx_info); + } +} + +int hinic3_configure_txqs(struct net_device *netdev, u16 num_sq, + u32 sq_depth, struct hinic3_dyna_txq_res *txqs_res) +{ + struct hinic3_nic_dev *nic_dev = netdev_priv(netdev); + struct hinic3_dyna_txq_res *tqres; + struct hinic3_txq *txq; + u16 q_id; + u32 idx; + + for (q_id = 0; q_id < num_sq; q_id++) { + txq = &nic_dev->txqs[q_id]; + tqres = &txqs_res[q_id]; + + txq->q_depth = sq_depth; + txq->q_mask = sq_depth - 1; + + txq->tx_stop_thrs = min(HINIC3_DEFAULT_STOP_THRS, + sq_depth / 20); + txq->tx_start_thrs = min(HINIC3_DEFAULT_START_THRS, + sq_depth / 10); + + txq->tx_info = tqres->tx_info; + for (idx = 0; idx < sq_depth; idx++) + txq->tx_info[idx].dma_info = + &tqres->bds[idx * HINIC3_BDS_PER_SQ_WQEBB]; + + txq->sq = &nic_dev->nic_io->sq[q_id]; + } + + return 0; +} + bool hinic3_tx_poll(struct hinic3_txq *txq, int budget) { struct net_device *netdev = txq->netdev; diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_tx.h b/drivers/net/ethernet/huawei/hinic3/hinic3_tx.h index 9e505cc19dd5..7e1b872ba752 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_tx.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_tx.h @@ -58,7 +58,7 @@ enum hinic3_tx_offload_type { #define SQ_CTRL_QUEUE_INFO_SET(val, member) \ FIELD_PREP(SQ_CTRL_QUEUE_INFO_##member##_MASK, val) #define SQ_CTRL_QUEUE_INFO_GET(val, member) \ - FIELD_GET(SQ_CTRL_QUEUE_INFO_##member##_MASK, val) + FIELD_GET(SQ_CTRL_QUEUE_INFO_##member##_MASK, le32_to_cpu(val)) #define SQ_CTRL_MAX_PLDOFF 221 @@ -77,17 +77,17 @@ enum hinic3_tx_offload_type { FIELD_PREP(SQ_TASK_INFO3_##member##_MASK, val) struct hinic3_sq_wqe_desc { - u32 ctrl_len; - u32 queue_info; - u32 hi_addr; - u32 lo_addr; + __le32 ctrl_len; + __le32 queue_info; + __le32 hi_addr; + __le32 lo_addr; }; struct hinic3_sq_task { - u32 pkt_info0; - u32 ip_identify; - u32 rsvd; - u32 vlan_offload; + __le32 pkt_info0; + __le32 ip_identify; + __le32 rsvd; + __le32 vlan_offload; }; struct hinic3_sq_wqe_combo { @@ -125,9 +125,21 @@ struct hinic3_txq { struct hinic3_io_queue *sq; } ____cacheline_aligned; +struct hinic3_dyna_txq_res { + struct hinic3_tx_info *tx_info; + struct hinic3_dma_info *bds; +}; + int hinic3_alloc_txqs(struct net_device *netdev); void hinic3_free_txqs(struct net_device *netdev); +int hinic3_alloc_txqs_res(struct net_device *netdev, u16 num_sq, + u32 sq_depth, struct hinic3_dyna_txq_res *txqs_res); +void hinic3_free_txqs_res(struct net_device *netdev, u16 num_sq, + u32 sq_depth, struct hinic3_dyna_txq_res *txqs_res); +int hinic3_configure_txqs(struct net_device *netdev, u16 num_sq, + u32 sq_depth, struct hinic3_dyna_txq_res *txqs_res); + netdev_tx_t hinic3_xmit_frame(struct sk_buff *skb, struct net_device *netdev); bool hinic3_tx_poll(struct hinic3_txq *txq, int budget); void hinic3_flush_txqs(struct net_device *netdev); diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_wq.c b/drivers/net/ethernet/huawei/hinic3/hinic3_wq.c index 2ac7efcd1365..bc3ffdc25cf6 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_wq.c +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_wq.c @@ -6,6 +6,110 @@ #include "hinic3_hwdev.h" #include "hinic3_wq.h" +#define WQ_MIN_DEPTH 64 +#define WQ_MAX_DEPTH 65536 +#define WQ_PAGE_ADDR_SIZE sizeof(u64) +#define WQ_MAX_NUM_PAGES (HINIC3_MIN_PAGE_SIZE / WQ_PAGE_ADDR_SIZE) + +static int wq_init_wq_block(struct hinic3_hwdev *hwdev, struct hinic3_wq *wq) +{ + struct hinic3_queue_pages *qpages = &wq->qpages; + int i; + + if (hinic3_wq_is_0_level_cla(wq)) { + wq->wq_block_paddr = qpages->pages[0].align_paddr; + wq->wq_block_vaddr = qpages->pages[0].align_vaddr; + + return 0; + } + + if (wq->qpages.num_pages > WQ_MAX_NUM_PAGES) { + dev_err(hwdev->dev, "wq num_pages exceed limit: %lu\n", + WQ_MAX_NUM_PAGES); + return -EFAULT; + } + + wq->wq_block_vaddr = dma_alloc_coherent(hwdev->dev, + HINIC3_MIN_PAGE_SIZE, + &wq->wq_block_paddr, + GFP_KERNEL); + if (!wq->wq_block_vaddr) + return -ENOMEM; + + for (i = 0; i < qpages->num_pages; i++) + wq->wq_block_vaddr[i] = cpu_to_be64(qpages->pages[i].align_paddr); + + return 0; +} + +static int wq_alloc_pages(struct hinic3_hwdev *hwdev, struct hinic3_wq *wq) +{ + int err; + + err = hinic3_queue_pages_alloc(hwdev, &wq->qpages, 0); + if (err) + return err; + + err = wq_init_wq_block(hwdev, wq); + if (err) { + hinic3_queue_pages_free(hwdev, &wq->qpages); + return err; + } + + return 0; +} + +static void wq_free_pages(struct hinic3_hwdev *hwdev, struct hinic3_wq *wq) +{ + if (!hinic3_wq_is_0_level_cla(wq)) + dma_free_coherent(hwdev->dev, + HINIC3_MIN_PAGE_SIZE, + wq->wq_block_vaddr, + wq->wq_block_paddr); + + hinic3_queue_pages_free(hwdev, &wq->qpages); +} + +int hinic3_wq_create(struct hinic3_hwdev *hwdev, struct hinic3_wq *wq, + u32 q_depth, u16 wqebb_size) +{ + u32 wq_page_size; + + if (q_depth < WQ_MIN_DEPTH || q_depth > WQ_MAX_DEPTH || + !is_power_of_2(q_depth) || !is_power_of_2(wqebb_size)) { + dev_err(hwdev->dev, "Invalid WQ: q_depth %u, wqebb_size %u\n", + q_depth, wqebb_size); + return -EINVAL; + } + + wq_page_size = ALIGN(hwdev->wq_page_size, HINIC3_MIN_PAGE_SIZE); + + memset(wq, 0, sizeof(*wq)); + wq->q_depth = q_depth; + wq->idx_mask = q_depth - 1; + + hinic3_queue_pages_init(&wq->qpages, q_depth, wq_page_size, wqebb_size); + + return wq_alloc_pages(hwdev, wq); +} + +void hinic3_wq_destroy(struct hinic3_hwdev *hwdev, struct hinic3_wq *wq) +{ + wq_free_pages(hwdev, wq); +} + +void hinic3_wq_reset(struct hinic3_wq *wq) +{ + struct hinic3_queue_pages *qpages = &wq->qpages; + u16 pg_idx; + + wq->cons_idx = 0; + wq->prod_idx = 0; + + for (pg_idx = 0; pg_idx < qpages->num_pages; pg_idx++) + memset(qpages->pages[pg_idx].align_vaddr, 0, qpages->page_size); +} + void hinic3_wq_get_multi_wqebbs(struct hinic3_wq *wq, u16 num_wqebbs, u16 *prod_idx, struct hinic3_sq_bufdesc **first_part_wqebbs, @@ -27,3 +131,8 @@ void hinic3_wq_get_multi_wqebbs(struct hinic3_wq *wq, *second_part_wqebbs = get_q_element(&wq->qpages, idx, NULL); } } + +bool hinic3_wq_is_0_level_cla(const struct hinic3_wq *wq) +{ + return wq->qpages.num_pages == 1; +} diff --git a/drivers/net/ethernet/huawei/hinic3/hinic3_wq.h b/drivers/net/ethernet/huawei/hinic3/hinic3_wq.h index ab37893efd7e..9b3f012bec80 100644 --- a/drivers/net/ethernet/huawei/hinic3/hinic3_wq.h +++ b/drivers/net/ethernet/huawei/hinic3/hinic3_wq.h @@ -10,10 +10,10 @@ struct hinic3_sq_bufdesc { /* 31-bits Length, L2NIC only uses length[17:0] */ - u32 len; - u32 rsvd; - u32 hi_addr; - u32 lo_addr; + __le32 len; + __le32 rsvd; + __le32 hi_addr; + __le32 lo_addr; }; /* Work queue is used to submit elements (tx, rx, cmd) to hw. @@ -59,6 +59,7 @@ static inline void *hinic3_wq_get_one_wqebb(struct hinic3_wq *wq, u16 *pi) { *pi = wq->prod_idx & wq->idx_mask; wq->prod_idx++; + return get_q_element(&wq->qpages, *pi, NULL); } @@ -67,10 +68,20 @@ static inline void hinic3_wq_put_wqebbs(struct hinic3_wq *wq, u16 num_wqebbs) wq->cons_idx += num_wqebbs; } +static inline u64 hinic3_wq_get_first_wqe_page_addr(const struct hinic3_wq *wq) +{ + return wq->qpages.pages[0].align_paddr; +} + +int hinic3_wq_create(struct hinic3_hwdev *hwdev, struct hinic3_wq *wq, + u32 q_depth, u16 wqebb_size); +void hinic3_wq_destroy(struct hinic3_hwdev *hwdev, struct hinic3_wq *wq); +void hinic3_wq_reset(struct hinic3_wq *wq); void hinic3_wq_get_multi_wqebbs(struct hinic3_wq *wq, u16 num_wqebbs, u16 *prod_idx, struct hinic3_sq_bufdesc **first_part_wqebbs, struct hinic3_sq_bufdesc **second_part_wqebbs, u16 *first_part_wqebbs_num); +bool hinic3_wq_is_0_level_cla(const struct hinic3_wq *wq); #endif diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index eec971567aac..3808148c1fc7 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -756,6 +756,17 @@ static void deactivate_rx_pools(struct ibmvnic_adapter *adapter) adapter->rx_pool[i].active = 0; } +static void ibmvnic_set_safe_max_ind_descs(struct ibmvnic_adapter *adapter) +{ + if (adapter->cur_max_ind_descs > IBMVNIC_SAFE_IND_DESC) { + netdev_info(adapter->netdev, + "set max ind descs from %u to safe limit %u\n", + adapter->cur_max_ind_descs, + IBMVNIC_SAFE_IND_DESC); + adapter->cur_max_ind_descs = IBMVNIC_SAFE_IND_DESC; + } +} + static void replenish_rx_pool(struct ibmvnic_adapter *adapter, struct ibmvnic_rx_pool *pool) { @@ -843,7 +854,7 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, sub_crq->rx_add.len = cpu_to_be32(pool->buff_size << shift); /* if send_subcrq_indirect queue is full, flush to VIOS */ - if (ind_bufp->index == IBMVNIC_MAX_IND_DESCS || + if (ind_bufp->index == adapter->cur_max_ind_descs || i == count - 1) { lpar_rc = send_subcrq_indirect(adapter, handle, @@ -862,6 +873,14 @@ static void replenish_rx_pool(struct ibmvnic_adapter *adapter, failure: if (lpar_rc != H_PARAMETER && lpar_rc != H_CLOSED) dev_err_ratelimited(dev, "rx: replenish packet buffer failed\n"); + + /* Detect platform limit H_PARAMETER */ + if (lpar_rc == H_PARAMETER) + ibmvnic_set_safe_max_ind_descs(adapter); + + /* For all error case, temporarily drop only this batch + * Rely on TCP/IP retransmissions to retry and recover + */ for (i = ind_bufp->index - 1; i >= 0; --i) { struct ibmvnic_rx_buff *rx_buff; @@ -2381,16 +2400,28 @@ static int ibmvnic_tx_scrq_flush(struct ibmvnic_adapter *adapter, rc = send_subcrq_direct(adapter, handle, (u64 *)ind_bufp->indir_arr); - if (rc) + if (rc) { + dev_err_ratelimited(&adapter->vdev->dev, + "tx_flush failed, rc=%u (%llu entries dma=%pad handle=%llx)\n", + rc, entries, &dma_addr, handle); + /* Detect platform limit H_PARAMETER */ + if (rc == H_PARAMETER) + ibmvnic_set_safe_max_ind_descs(adapter); + + /* For all error case, temporarily drop only this batch + * Rely on TCP/IP retransmissions to retry and recover + */ ibmvnic_tx_scrq_clean_buffer(adapter, tx_scrq); - else + } else { ind_bufp->index = 0; + } return rc; } static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) { struct ibmvnic_adapter *adapter = netdev_priv(netdev); + u32 cur_max_ind_descs = adapter->cur_max_ind_descs; int queue_num = skb_get_queue_mapping(skb); u8 *hdrs = (u8 *)&adapter->tx_rx_desc_req; struct device *dev = &adapter->vdev->dev; @@ -2590,7 +2621,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) tx_crq.v1.n_crq_elem = num_entries; tx_buff->num_entries = num_entries; /* flush buffer if current entry can not fit */ - if (num_entries + ind_bufp->index > IBMVNIC_MAX_IND_DESCS) { + if (num_entries + ind_bufp->index > cur_max_ind_descs) { lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true); if (lpar_rc != H_SUCCESS) goto tx_flush_err; @@ -2603,7 +2634,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev) ind_bufp->index += num_entries; if (__netdev_tx_sent_queue(txq, skb->len, netdev_xmit_more() && - ind_bufp->index < IBMVNIC_MAX_IND_DESCS)) { + ind_bufp->index < cur_max_ind_descs)) { lpar_rc = ibmvnic_tx_scrq_flush(adapter, tx_scrq, true); if (lpar_rc != H_SUCCESS) goto tx_err; @@ -4006,7 +4037,7 @@ static void release_sub_crq_queue(struct ibmvnic_adapter *adapter, } dma_free_coherent(dev, - IBMVNIC_IND_ARR_SZ, + IBMVNIC_IND_MAX_ARR_SZ, scrq->ind_buf.indir_arr, scrq->ind_buf.indir_dma); @@ -4063,7 +4094,7 @@ static struct ibmvnic_sub_crq_queue *init_sub_crq_queue(struct ibmvnic_adapter scrq->ind_buf.indir_arr = dma_alloc_coherent(dev, - IBMVNIC_IND_ARR_SZ, + IBMVNIC_IND_MAX_ARR_SZ, &scrq->ind_buf.indir_dma, GFP_KERNEL); @@ -6369,6 +6400,19 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset) rc = reset_sub_crq_queues(adapter); } } else { + if (adapter->reset_reason == VNIC_RESET_MOBILITY) { + /* After an LPM, reset the max number of indirect + * subcrq descriptors per H_SEND_SUB_CRQ_INDIRECT + * hcall to the default max (e.g POWER8 -> POWER10) + * + * If the new destination platform does not support + * the higher limit max (e.g. POWER10-> POWER8 LPM) + * H_PARAMETER will trigger automatic fallback to the + * safe minimum limit. + */ + adapter->cur_max_ind_descs = IBMVNIC_MAX_IND_DESCS; + } + rc = init_sub_crqs(adapter); } @@ -6520,6 +6564,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) adapter->wait_for_reset = false; adapter->last_reset_time = jiffies; + adapter->cur_max_ind_descs = IBMVNIC_MAX_IND_DESCS; rc = register_netdev(netdev); if (rc) { diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index 246ddce753f9..480dc587078f 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -29,8 +29,9 @@ #define IBMVNIC_BUFFS_PER_POOL 100 #define IBMVNIC_MAX_QUEUES 16 #define IBMVNIC_MAX_QUEUE_SZ 4096 -#define IBMVNIC_MAX_IND_DESCS 16 -#define IBMVNIC_IND_ARR_SZ (IBMVNIC_MAX_IND_DESCS * 32) +#define IBMVNIC_MAX_IND_DESCS 128 +#define IBMVNIC_SAFE_IND_DESC 16 +#define IBMVNIC_IND_MAX_ARR_SZ (IBMVNIC_MAX_IND_DESCS * 32) #define IBMVNIC_TSO_BUF_SZ 65536 #define IBMVNIC_TSO_BUFS 64 @@ -930,6 +931,7 @@ struct ibmvnic_adapter { struct ibmvnic_control_ip_offload_buffer ip_offload_ctrl; dma_addr_t ip_offload_ctrl_tok; u32 msg_enable; + u32 cur_max_ind_descs; /* Vital Product Data (VPD) */ struct ibmvnic_vpd *vpd; diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index b05cc0d7a15d..a563a94e2780 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -146,6 +146,7 @@ config IXGBE tristate "Intel(R) 10GbE PCI Express adapters support" depends on PCI depends on PTP_1588_CLOCK_OPTIONAL + select LIBIE_FWLOG select MDIO select NET_DEVLINK select PLDMFW @@ -297,6 +298,7 @@ config ICE select DIMLIB select LIBIE select LIBIE_ADMINQ + select LIBIE_FWLOG select NET_DEVLINK select PACKING select PLDMFW diff --git a/drivers/net/ethernet/intel/Makefile b/drivers/net/ethernet/intel/Makefile index 04c844ef4964..9a37dc76aef0 100644 --- a/drivers/net/ethernet/intel/Makefile +++ b/drivers/net/ethernet/intel/Makefile @@ -4,7 +4,7 @@ # obj-$(CONFIG_LIBETH) += libeth/ -obj-$(CONFIG_LIBIE) += libie/ +obj-y += libie/ obj-$(CONFIG_E100) += e100.o obj-$(CONFIG_E1000) += e1000/ diff --git a/drivers/net/ethernet/intel/e1000/e1000.h b/drivers/net/ethernet/intel/e1000/e1000.h index 75f3fd1d8d6e..ea6ccf4b728b 100644 --- a/drivers/net/ethernet/intel/e1000/e1000.h +++ b/drivers/net/ethernet/intel/e1000/e1000.h @@ -116,7 +116,7 @@ struct e1000_adapter; #define E1000_MASTER_SLAVE e1000_ms_hw_default #endif -#define E1000_MNG_VLAN_NONE (-1) +#define E1000_MNG_VLAN_NONE 0xFFFF /* wrapper around a pointer to a socket buffer, * so a DMA handle can be stored along with the buffer diff --git a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c index d06d29c6c037..726365c567ef 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_ethtool.c +++ b/drivers/net/ethernet/intel/e1000/e1000_ethtool.c @@ -806,7 +806,7 @@ static int e1000_eeprom_test(struct e1000_adapter *adapter, u64 *data) } /* If Checksum is not Correct return error else test passed */ - if ((checksum != (u16)EEPROM_SUM) && !(*data)) + if (checksum != EEPROM_SUM && !(*data)) *data = 2; return *data; diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.c b/drivers/net/ethernet/intel/e1000/e1000_hw.c index f9328f2e669f..0e5de52b1067 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_hw.c +++ b/drivers/net/ethernet/intel/e1000/e1000_hw.c @@ -3970,7 +3970,7 @@ s32 e1000_validate_eeprom_checksum(struct e1000_hw *hw) return E1000_SUCCESS; #endif - if (checksum == (u16)EEPROM_SUM) + if (checksum == EEPROM_SUM) return E1000_SUCCESS; else { e_dbg("EEPROM Checksum Invalid\n"); @@ -3997,7 +3997,7 @@ s32 e1000_update_eeprom_checksum(struct e1000_hw *hw) } checksum += eeprom_data; } - checksum = (u16)EEPROM_SUM - checksum; + checksum = EEPROM_SUM - checksum; if (e1000_write_eeprom(hw, EEPROM_CHECKSUM_REG, 1, &checksum) < 0) { e_dbg("EEPROM Write Error\n"); return -E1000_ERR_EEPROM; diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index d8595e84326d..292389aceb2d 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -313,8 +313,7 @@ static void e1000_update_mng_vlan(struct e1000_adapter *adapter) } else { adapter->mng_vlan_id = E1000_MNG_VLAN_NONE; } - if ((old_vid != (u16)E1000_MNG_VLAN_NONE) && - (vid != old_vid) && + if (old_vid != E1000_MNG_VLAN_NONE && vid != old_vid && !test_bit(old_vid, adapter->active_vlans)) e1000_vlan_rx_kill_vid(netdev, htons(ETH_P_8021Q), old_vid); diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index 952898151565..018e61aea787 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -64,7 +64,7 @@ struct e1000_info; #define AUTO_ALL_MODES 0 #define E1000_EEPROM_APME 0x0400 -#define E1000_MNG_VLAN_NONE (-1) +#define E1000_MNG_VLAN_NONE 0xFFFF #define DEFAULT_JUMBO 9234 diff --git a/drivers/net/ethernet/intel/e1000e/ethtool.c b/drivers/net/ethernet/intel/e1000e/ethtool.c index c0bbb12eed2e..8e40bb50a01e 100644 --- a/drivers/net/ethernet/intel/e1000e/ethtool.c +++ b/drivers/net/ethernet/intel/e1000e/ethtool.c @@ -549,12 +549,12 @@ static int e1000_set_eeprom(struct net_device *netdev, { struct e1000_adapter *adapter = netdev_priv(netdev); struct e1000_hw *hw = &adapter->hw; + size_t total_len, max_len; u16 *eeprom_buff; - void *ptr; - int max_len; + int ret_val = 0; int first_word; int last_word; - int ret_val = 0; + void *ptr; u16 i; if (eeprom->len == 0) @@ -569,6 +569,10 @@ static int e1000_set_eeprom(struct net_device *netdev, max_len = hw->nvm.word_size * 2; + if (check_add_overflow(eeprom->offset, eeprom->len, &total_len) || + total_len > max_len) + return -EFBIG; + first_word = eeprom->offset >> 1; last_word = (eeprom->offset + eeprom->len - 1) >> 1; eeprom_buff = kmalloc(max_len, GFP_KERNEL); @@ -959,7 +963,7 @@ static int e1000_eeprom_test(struct e1000_adapter *adapter, u64 *data) } /* If Checksum is not Correct return error else test passed */ - if ((checksum != (u16)NVM_SUM) && !(*data)) + if (checksum != NVM_SUM && !(*data)) *data = 2; return *data; diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index b27a61fab371..201322dac233 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -2761,7 +2761,7 @@ static void e1000e_vlan_filter_disable(struct e1000_adapter *adapter) rctl &= ~(E1000_RCTL_VFE | E1000_RCTL_CFIEN); ew32(RCTL, rctl); - if (adapter->mng_vlan_id != (u16)E1000_MNG_VLAN_NONE) { + if (adapter->mng_vlan_id != E1000_MNG_VLAN_NONE) { e1000_vlan_rx_kill_vid(netdev, htons(ETH_P_8021Q), adapter->mng_vlan_id); adapter->mng_vlan_id = E1000_MNG_VLAN_NONE; @@ -2828,7 +2828,7 @@ static void e1000_update_mng_vlan(struct e1000_adapter *adapter) adapter->mng_vlan_id = vid; } - if ((old_vid != (u16)E1000_MNG_VLAN_NONE) && (vid != old_vid)) + if (old_vid != E1000_MNG_VLAN_NONE && vid != old_vid) e1000_vlan_rx_kill_vid(netdev, htons(ETH_P_8021Q), old_vid); } diff --git a/drivers/net/ethernet/intel/e1000e/nvm.c b/drivers/net/ethernet/intel/e1000e/nvm.c index 16369e6d245a..4bde1c9de1b9 100644 --- a/drivers/net/ethernet/intel/e1000e/nvm.c +++ b/drivers/net/ethernet/intel/e1000e/nvm.c @@ -564,7 +564,7 @@ s32 e1000e_validate_nvm_checksum_generic(struct e1000_hw *hw) return 0; } - if (checksum != (u16)NVM_SUM) { + if (checksum != NVM_SUM) { e_dbg("NVM Checksum Invalid\n"); return -E1000_ERR_NVM; } @@ -594,7 +594,7 @@ s32 e1000e_update_nvm_checksum_generic(struct e1000_hw *hw) } checksum += nvm_data; } - checksum = (u16)NVM_SUM - checksum; + checksum = NVM_SUM - checksum; ret_val = e1000_write_nvm(hw, NVM_CHECKSUM_REG, 1, &checksum); if (ret_val) e_dbg("NVM Write Error while updating checksum.\n"); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.c b/drivers/net/ethernet/intel/fm10k/fm10k_common.c index f51a63fca513..1f919a50c765 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_common.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.c @@ -447,17 +447,16 @@ void fm10k_update_hw_stats_q(struct fm10k_hw *hw, struct fm10k_hw_stats_q *q, /** * fm10k_unbind_hw_stats_q - Unbind the queue counters from their queues * @q: pointer to the ring of hardware statistics queue - * @idx: index pointing to the start of the ring iteration * @count: number of queues to iterate over * * Function invalidates the index values for the queues so any updates that * may have happened are ignored and the base for the queue stats is reset. **/ -void fm10k_unbind_hw_stats_q(struct fm10k_hw_stats_q *q, u32 idx, u32 count) +void fm10k_unbind_hw_stats_q(struct fm10k_hw_stats_q *q, u32 count) { u32 i; - for (i = 0; i < count; i++, idx++, q++) { + for (i = 0; i < count; i++, q++) { q->rx_stats_idx = 0; q->tx_stats_idx = 0; } diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.h b/drivers/net/ethernet/intel/fm10k/fm10k_common.h index 4c48fb73b3e7..13fca6a91a01 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_common.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.h @@ -43,6 +43,6 @@ u32 fm10k_read_hw_stats_32b(struct fm10k_hw *hw, u32 addr, void fm10k_update_hw_stats_q(struct fm10k_hw *hw, struct fm10k_hw_stats_q *q, u32 idx, u32 count); #define fm10k_unbind_hw_stats_32b(s) ((s)->base_h = 0) -void fm10k_unbind_hw_stats_q(struct fm10k_hw_stats_q *q, u32 idx, u32 count); +void fm10k_unbind_hw_stats_q(struct fm10k_hw_stats_q *q, u32 count); s32 fm10k_get_host_state_generic(struct fm10k_hw *hw, bool *host_ready); #endif /* _FM10K_COMMON_H_ */ diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c index 1954a04460d1..bf2029144c1d 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c @@ -560,7 +560,7 @@ static int fm10k_set_ringparam(struct net_device *netdev, /* allocate temporary buffer to store rings in */ i = max_t(int, interface->num_tx_queues, interface->num_rx_queues); - temp_ring = vmalloc(array_size(i, sizeof(struct fm10k_ring))); + temp_ring = vmalloc_array(i, sizeof(struct fm10k_ring)); if (!temp_ring) { err = -ENOMEM; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index 142f07ca8bc0..b8c15b837fda 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -37,7 +37,7 @@ static int __init fm10k_init_module(void) pr_info("%s\n", fm10k_copyright); /* create driver workqueue */ - fm10k_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, + fm10k_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM | WQ_PERCPU, 0, fm10k_driver_name); if (!fm10k_workqueue) return -ENOMEM; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c index b9dd7b719832..3394645a18fe 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c @@ -1389,7 +1389,7 @@ static void fm10k_rebind_hw_stats_pf(struct fm10k_hw *hw, fm10k_unbind_hw_stats_32b(&stats->nodesc_drop); /* Unbind Queue Statistics */ - fm10k_unbind_hw_stats_q(stats->q, 0, hw->mac.max_queues); + fm10k_unbind_hw_stats_q(stats->q, hw->mac.max_queues); /* Reinitialize bases for all stats */ fm10k_update_hw_stats_pf(hw, stats); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c index 7fb1961f2921..6861a0bdc14e 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_vf.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_vf.c @@ -465,7 +465,7 @@ static void fm10k_rebind_hw_stats_vf(struct fm10k_hw *hw, struct fm10k_hw_stats *stats) { /* Unbind Queue Statistics */ - fm10k_unbind_hw_stats_q(stats->q, 0, hw->mac.max_queues); + fm10k_unbind_hw_stats_q(stats->q, hw->mac.max_queues); /* Reinitialize bases for all stats */ fm10k_update_hw_stats_vf(hw, stats); diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 49aa4497efce..801a57a925da 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -1278,7 +1278,8 @@ struct i40e_mac_filter *i40e_add_mac_filter(struct i40e_vsi *vsi, const u8 *macaddr); int i40e_del_mac_filter(struct i40e_vsi *vsi, const u8 *macaddr); bool i40e_is_vsi_in_vlan(struct i40e_vsi *vsi); -int i40e_count_filters(struct i40e_vsi *vsi); +int i40e_count_all_filters(struct i40e_vsi *vsi); +int i40e_count_active_filters(struct i40e_vsi *vsi); struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, const u8 *macaddr); void i40e_vlan_stripping_enable(struct i40e_vsi *vsi); static inline bool i40e_is_sw_dcb(struct i40e_pf *pf) diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index 76d872b91a38..cc02a85ad42b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -1561,6 +1561,7 @@ I40E_CHECK_CMD_LENGTH(i40e_aq_set_phy_config); struct i40e_aq_set_mac_config { __le16 max_frame_size; u8 params; +#define I40E_AQ_SET_MAC_CONFIG_CRC_EN BIT(2) u8 tx_timer_priority; /* bitmap */ __le16 tx_timer_value; __le16 fc_refresh_threshold; diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c index 5f1a405cbbf8..518bc738ea3b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_client.c +++ b/drivers/net/ethernet/intel/i40e/i40e_client.c @@ -359,8 +359,8 @@ static void i40e_client_add_instance(struct i40e_pf *pf) if (i40e_client_get_params(vsi, &cdev->lan_info.params)) goto free_cdev; - mac = list_first_entry(&cdev->lan_info.netdev->dev_addrs.list, - struct netdev_hw_addr, list); + mac = list_first_entry_or_null(&cdev->lan_info.netdev->dev_addrs.list, + struct netdev_hw_addr, list); if (mac) ether_addr_copy(cdev->lan_info.lanmac, mac->addr); else diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index 270e7e8cf9cf..59f5c1e810eb 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1190,6 +1190,40 @@ int i40e_set_fc(struct i40e_hw *hw, u8 *aq_failures, } /** + * i40e_aq_set_mac_config - Configure MAC settings + * @hw: pointer to the hw struct + * @max_frame_size: Maximum Frame Size to be supported by the port + * @cmd_details: pointer to command details structure or NULL + * + * Set MAC configuration (0x0603). Note that max_frame_size must be greater + * than zero. + * + * Return: 0 on success, or a negative error code on failure. + */ +int i40e_aq_set_mac_config(struct i40e_hw *hw, u16 max_frame_size, + struct i40e_asq_cmd_details *cmd_details) +{ + struct i40e_aq_set_mac_config *cmd; + struct libie_aq_desc desc; + + cmd = libie_aq_raw(&desc); + + if (max_frame_size == 0) + return -EINVAL; + + i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_set_mac_config); + + cmd->max_frame_size = cpu_to_le16(max_frame_size); + cmd->params = I40E_AQ_SET_MAC_CONFIG_CRC_EN; + +#define I40E_AQ_SET_MAC_CONFIG_FC_DEFAULT_THRESHOLD 0x7FFF + cmd->fc_refresh_threshold = + cpu_to_le16(I40E_AQ_SET_MAC_CONFIG_FC_DEFAULT_THRESHOLD); + + return i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details); +} + +/** * i40e_aq_clear_pxe_mode * @hw: pointer to the hw struct * @cmd_details: pointer to command details structure or NULL diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 6cd6f23d42a6..c17b5d290f0a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -40,48 +40,6 @@ static struct i40e_vsi *i40e_dbg_find_vsi(struct i40e_pf *pf, int seid) * setup, adding or removing filters, or other things. Many of * these will be useful for some forms of unit testing. **************************************************************/ -static char i40e_dbg_command_buf[256] = ""; - -/** - * i40e_dbg_command_read - read for command datum - * @filp: the opened file - * @buffer: where to write the data for the user to read - * @count: the size of the user's buffer - * @ppos: file position offset - **/ -static ssize_t i40e_dbg_command_read(struct file *filp, char __user *buffer, - size_t count, loff_t *ppos) -{ - struct i40e_pf *pf = filp->private_data; - struct i40e_vsi *main_vsi; - int bytes_not_copied; - int buf_size = 256; - char *buf; - int len; - - /* don't allow partial reads */ - if (*ppos != 0) - return 0; - if (count < buf_size) - return -ENOSPC; - - buf = kzalloc(buf_size, GFP_KERNEL); - if (!buf) - return -ENOSPC; - - main_vsi = i40e_pf_get_main_vsi(pf); - len = snprintf(buf, buf_size, "%s: %s\n", main_vsi->netdev->name, - i40e_dbg_command_buf); - - bytes_not_copied = copy_to_user(buffer, buf, len); - kfree(buf); - - if (bytes_not_copied) - return -EFAULT; - - *ppos = len; - return len; -} static char *i40e_filter_state_string[] = { "INVALID", @@ -1621,7 +1579,6 @@ command_write_done: static const struct file_operations i40e_dbg_command_fops = { .owner = THIS_MODULE, .open = simple_open, - .read = i40e_dbg_command_read, .write = i40e_dbg_command_write, }; @@ -1630,48 +1587,6 @@ static const struct file_operations i40e_dbg_command_fops = { * The netdev_ops entry in debugfs is for giving the driver commands * to be executed from the netdev operations. **************************************************************/ -static char i40e_dbg_netdev_ops_buf[256] = ""; - -/** - * i40e_dbg_netdev_ops_read - read for netdev_ops datum - * @filp: the opened file - * @buffer: where to write the data for the user to read - * @count: the size of the user's buffer - * @ppos: file position offset - **/ -static ssize_t i40e_dbg_netdev_ops_read(struct file *filp, char __user *buffer, - size_t count, loff_t *ppos) -{ - struct i40e_pf *pf = filp->private_data; - struct i40e_vsi *main_vsi; - int bytes_not_copied; - int buf_size = 256; - char *buf; - int len; - - /* don't allow partal reads */ - if (*ppos != 0) - return 0; - if (count < buf_size) - return -ENOSPC; - - buf = kzalloc(buf_size, GFP_KERNEL); - if (!buf) - return -ENOSPC; - - main_vsi = i40e_pf_get_main_vsi(pf); - len = snprintf(buf, buf_size, "%s: %s\n", main_vsi->netdev->name, - i40e_dbg_netdev_ops_buf); - - bytes_not_copied = copy_to_user(buffer, buf, len); - kfree(buf); - - if (bytes_not_copied) - return -EFAULT; - - *ppos = len; - return len; -} /** * i40e_dbg_netdev_ops_write - write into netdev_ops datum @@ -1685,35 +1600,36 @@ static ssize_t i40e_dbg_netdev_ops_write(struct file *filp, size_t count, loff_t *ppos) { struct i40e_pf *pf = filp->private_data; + char *cmd_buf, *buf_tmp; int bytes_not_copied; struct i40e_vsi *vsi; - char *buf_tmp; int vsi_seid; int i, cnt; /* don't allow partial writes */ if (*ppos != 0) return 0; - if (count >= sizeof(i40e_dbg_netdev_ops_buf)) - return -ENOSPC; - memset(i40e_dbg_netdev_ops_buf, 0, sizeof(i40e_dbg_netdev_ops_buf)); - bytes_not_copied = copy_from_user(i40e_dbg_netdev_ops_buf, - buffer, count); - if (bytes_not_copied) + cmd_buf = kzalloc(count + 1, GFP_KERNEL); + if (!cmd_buf) + return count; + bytes_not_copied = copy_from_user(cmd_buf, buffer, count); + if (bytes_not_copied) { + kfree(cmd_buf); return -EFAULT; - i40e_dbg_netdev_ops_buf[count] = '\0'; + } + cmd_buf[count] = '\0'; - buf_tmp = strchr(i40e_dbg_netdev_ops_buf, '\n'); + buf_tmp = strchr(cmd_buf, '\n'); if (buf_tmp) { *buf_tmp = '\0'; - count = buf_tmp - i40e_dbg_netdev_ops_buf + 1; + count = buf_tmp - cmd_buf + 1; } - if (strncmp(i40e_dbg_netdev_ops_buf, "change_mtu", 10) == 0) { + if (strncmp(cmd_buf, "change_mtu", 10) == 0) { int mtu; - cnt = sscanf(&i40e_dbg_netdev_ops_buf[11], "%i %i", + cnt = sscanf(&cmd_buf[11], "%i %i", &vsi_seid, &mtu); if (cnt != 2) { dev_info(&pf->pdev->dev, "change_mtu <vsi_seid> <mtu>\n"); @@ -1735,8 +1651,8 @@ static ssize_t i40e_dbg_netdev_ops_write(struct file *filp, dev_info(&pf->pdev->dev, "Could not acquire RTNL - please try again\n"); } - } else if (strncmp(i40e_dbg_netdev_ops_buf, "set_rx_mode", 11) == 0) { - cnt = sscanf(&i40e_dbg_netdev_ops_buf[11], "%i", &vsi_seid); + } else if (strncmp(cmd_buf, "set_rx_mode", 11) == 0) { + cnt = sscanf(&cmd_buf[11], "%i", &vsi_seid); if (cnt != 1) { dev_info(&pf->pdev->dev, "set_rx_mode <vsi_seid>\n"); goto netdev_ops_write_done; @@ -1756,8 +1672,8 @@ static ssize_t i40e_dbg_netdev_ops_write(struct file *filp, dev_info(&pf->pdev->dev, "Could not acquire RTNL - please try again\n"); } - } else if (strncmp(i40e_dbg_netdev_ops_buf, "napi", 4) == 0) { - cnt = sscanf(&i40e_dbg_netdev_ops_buf[4], "%i", &vsi_seid); + } else if (strncmp(cmd_buf, "napi", 4) == 0) { + cnt = sscanf(&cmd_buf[4], "%i", &vsi_seid); if (cnt != 1) { dev_info(&pf->pdev->dev, "napi <vsi_seid>\n"); goto netdev_ops_write_done; @@ -1775,21 +1691,20 @@ static ssize_t i40e_dbg_netdev_ops_write(struct file *filp, dev_info(&pf->pdev->dev, "napi called\n"); } } else { - dev_info(&pf->pdev->dev, "unknown command '%s'\n", - i40e_dbg_netdev_ops_buf); + dev_info(&pf->pdev->dev, "unknown command '%s'\n", cmd_buf); dev_info(&pf->pdev->dev, "available commands\n"); dev_info(&pf->pdev->dev, " change_mtu <vsi_seid> <mtu>\n"); dev_info(&pf->pdev->dev, " set_rx_mode <vsi_seid>\n"); dev_info(&pf->pdev->dev, " napi <vsi_seid>\n"); } netdev_ops_write_done: + kfree(cmd_buf); return count; } static const struct file_operations i40e_dbg_netdev_ops_fops = { .owner = THIS_MODULE, .open = simple_open, - .read = i40e_dbg_netdev_ops_read, .write = i40e_dbg_netdev_ops_write, }; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index b83f823e4917..50be0a60ae13 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1243,12 +1243,30 @@ void i40e_update_stats(struct i40e_vsi *vsi) } /** - * i40e_count_filters - counts VSI mac filters + * i40e_count_all_filters - counts VSI MAC filters * @vsi: the VSI to be searched * - * Returns count of mac filters - **/ -int i40e_count_filters(struct i40e_vsi *vsi) + * Return: count of MAC filters in any state. + */ +int i40e_count_all_filters(struct i40e_vsi *vsi) +{ + struct i40e_mac_filter *f; + struct hlist_node *h; + int bkt, cnt = 0; + + hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) + cnt++; + + return cnt; +} + +/** + * i40e_count_active_filters - counts VSI MAC filters + * @vsi: the VSI to be searched + * + * Return: count of active MAC filters. + */ +int i40e_count_active_filters(struct i40e_vsi *vsi) { struct i40e_mac_filter *f; struct hlist_node *h; @@ -4156,7 +4174,7 @@ free_queue_irqs: irq_num = pf->msix_entries[base + vector].vector; irq_set_affinity_notifier(irq_num, NULL); irq_update_affinity_hint(irq_num, NULL); - free_irq(irq_num, &vsi->q_vectors[vector]); + free_irq(irq_num, vsi->q_vectors[vector]); } return err; } @@ -16045,13 +16063,17 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent) dev_dbg(&pf->pdev->dev, "get supported phy types ret = %pe last_status = %s\n", ERR_PTR(err), libie_aq_str(pf->hw.aq.asq_last_status)); - /* make sure the MFS hasn't been set lower than the default */ #define MAX_FRAME_SIZE_DEFAULT 0x2600 - val = FIELD_GET(I40E_PRTGL_SAH_MFS_MASK, - rd32(&pf->hw, I40E_PRTGL_SAH)); - if (val < MAX_FRAME_SIZE_DEFAULT) - dev_warn(&pdev->dev, "MFS for port %x (%d) has been set below the default (%d)\n", - pf->hw.port, val, MAX_FRAME_SIZE_DEFAULT); + + err = i40e_aq_set_mac_config(hw, MAX_FRAME_SIZE_DEFAULT, NULL); + if (err) + dev_warn(&pdev->dev, "set mac config ret = %pe last_status = %s\n", + ERR_PTR(err), libie_aq_str(pf->hw.aq.asq_last_status)); + + /* Make sure the MFS is set to the expected value */ + val = rd32(hw, I40E_PRTGL_SAH); + FIELD_MODIFY(I40E_PRTGL_SAH_MFS_MASK, &val, MAX_FRAME_SIZE_DEFAULT); + wr32(hw, I40E_PRTGL_SAH, val); /* Add a filter to drop all Flow control frames from any VSI from being * transmitted. By doing so we stop a malicious VF from sending out @@ -16613,7 +16635,7 @@ static int __init i40e_init_module(void) * since we need to be able to guarantee forward progress even under * memory pressure. */ - i40e_wq = alloc_workqueue("%s", 0, 0, i40e_driver_name); + i40e_wq = alloc_workqueue("%s", WQ_PERCPU, 0, i40e_driver_name); if (!i40e_wq) { pr_err("%s: Failed to create workqueue\n", i40e_driver_name); return -ENOMEM; diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h index aef5de53ce3b..26bb7bffe361 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h +++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h @@ -98,6 +98,8 @@ int i40e_aq_set_mac_loopback(struct i40e_hw *hw, struct i40e_asq_cmd_details *cmd_details); int i40e_aq_set_phy_int_mask(struct i40e_hw *hw, u16 mask, struct i40e_asq_cmd_details *cmd_details); +int i40e_aq_set_mac_config(struct i40e_hw *hw, u16 max_frame_size, + struct i40e_asq_cmd_details *cmd_details); int i40e_aq_clear_pxe_mode(struct i40e_hw *hw, struct i40e_asq_cmd_details *cmd_details); int i40e_aq_set_link_restart_an(struct i40e_hw *hw, diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index 048c33039130..cc0b9efc2637 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -948,9 +948,6 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, if (!eop_desc) break; - /* prevent any other reads prior to eop_desc */ - smp_rmb(); - i40e_trace(clean_tx_irq, tx_ring, tx_desc, tx_buf); /* we have caught up to head, no work left to do */ if (tx_head == tx_desc) @@ -2151,10 +2148,10 @@ static struct sk_buff *i40e_construct_skb(struct i40e_ring *rx_ring, memcpy(&skinfo->frags[skinfo->nr_frags], &sinfo->frags[0], sizeof(skb_frag_t) * nr_frags); - xdp_update_skb_shared_info(skb, skinfo->nr_frags + nr_frags, - sinfo->xdp_frags_size, - nr_frags * xdp->frame_sz, - xdp_buff_is_frag_pfmemalloc(xdp)); + xdp_update_skb_frags_info(skb, skinfo->nr_frags + nr_frags, + sinfo->xdp_frags_size, + nr_frags * xdp->frame_sz, + xdp_buff_get_skb_flags(xdp)); /* First buffer has already been processed, so bump ntc */ if (++rx_ring->next_to_clean == rx_ring->count) @@ -2206,10 +2203,9 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, skb_metadata_set(skb, metasize); if (unlikely(xdp_buff_has_frags(xdp))) { - xdp_update_skb_shared_info(skb, nr_frags, - sinfo->xdp_frags_size, - nr_frags * xdp->frame_sz, - xdp_buff_is_frag_pfmemalloc(xdp)); + xdp_update_skb_frags_info(skb, nr_frags, sinfo->xdp_frags_size, + nr_frags * xdp->frame_sz, + xdp_buff_get_skb_flags(xdp)); i40e_process_rx_buffs(rx_ring, I40E_XDP_PASS, xdp); } else { diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 9b8efdeafbcf..081a4526a2f0 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -448,7 +448,7 @@ static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_id, (qtype << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) | (pf_queue_id << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) | BIT(I40E_QINT_RQCTL_CAUSE_ENA_SHIFT) | - (itr_idx << I40E_QINT_RQCTL_ITR_INDX_SHIFT); + FIELD_PREP(I40E_QINT_RQCTL_ITR_INDX_MASK, itr_idx); wr32(hw, reg_idx, reg); } @@ -653,6 +653,13 @@ static int i40e_config_vsi_tx_queue(struct i40e_vf *vf, u16 vsi_id, /* only set the required fields */ tx_ctx.base = info->dma_ring_addr / 128; + + /* ring_len has to be multiple of 8 */ + if (!IS_ALIGNED(info->ring_len, 8) || + info->ring_len > I40E_MAX_NUM_DESCRIPTORS_XL710) { + ret = -EINVAL; + goto error_context; + } tx_ctx.qlen = info->ring_len; tx_ctx.rdylist = le16_to_cpu(vsi->info.qs_handle[0]); tx_ctx.rdylist_act = 0; @@ -716,6 +723,13 @@ static int i40e_config_vsi_rx_queue(struct i40e_vf *vf, u16 vsi_id, /* only set the required fields */ rx_ctx.base = info->dma_ring_addr / 128; + + /* ring_len has to be multiple of 32 */ + if (!IS_ALIGNED(info->ring_len, 32) || + info->ring_len > I40E_MAX_NUM_DESCRIPTORS_XL710) { + ret = -EINVAL; + goto error_param; + } rx_ctx.qlen = info->ring_len; if (info->splithdr_enabled) { @@ -1450,6 +1464,7 @@ static void i40e_trigger_vf_reset(struct i40e_vf *vf, bool flr) * functions that may still be running at this point. */ clear_bit(I40E_VF_STATE_INIT, &vf->vf_states); + clear_bit(I40E_VF_STATE_RESOURCES_LOADED, &vf->vf_states); /* In the case of a VFLR, the HW has already reset the VF and we * just need to clean up, so don't hit the VFRTRIG register. @@ -2116,7 +2131,10 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) size_t len = 0; int ret; - if (!i40e_sync_vf_state(vf, I40E_VF_STATE_INIT)) { + i40e_sync_vf_state(vf, I40E_VF_STATE_INIT); + + if (!test_bit(I40E_VF_STATE_INIT, &vf->vf_states) || + test_bit(I40E_VF_STATE_RESOURCES_LOADED, &vf->vf_states)) { aq_ret = -EINVAL; goto err; } @@ -2219,6 +2237,7 @@ static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf, u8 *msg) vf->default_lan_addr.addr); } set_bit(I40E_VF_STATE_ACTIVE, &vf->vf_states); + set_bit(I40E_VF_STATE_RESOURCES_LOADED, &vf->vf_states); err: /* send the response back to the VF */ @@ -2381,7 +2400,7 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg) } if (vf->adq_enabled) { - if (idx >= ARRAY_SIZE(vf->ch)) { + if (idx >= vf->num_tc) { aq_ret = -ENODEV; goto error_param; } @@ -2402,7 +2421,7 @@ static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg) * to its appropriate VSIs based on TC mapping */ if (vf->adq_enabled) { - if (idx >= ARRAY_SIZE(vf->ch)) { + if (idx >= vf->num_tc) { aq_ret = -ENODEV; goto error_param; } @@ -2452,8 +2471,10 @@ static int i40e_validate_queue_map(struct i40e_vf *vf, u16 vsi_id, u16 vsi_queue_id, queue_id; for_each_set_bit(vsi_queue_id, &queuemap, I40E_MAX_VSI_QP) { - if (vf->adq_enabled) { - vsi_id = vf->ch[vsi_queue_id / I40E_MAX_VF_VSI].vsi_id; + u16 idx = vsi_queue_id / I40E_MAX_VF_VSI; + + if (vf->adq_enabled && idx < vf->num_tc) { + vsi_id = vf->ch[idx].vsi_id; queue_id = (vsi_queue_id % I40E_DEFAULT_QUEUES_PER_VF); } else { queue_id = vsi_queue_id; @@ -2841,24 +2862,6 @@ error_param: (u8 *)&stats, sizeof(stats)); } -/** - * i40e_can_vf_change_mac - * @vf: pointer to the VF info - * - * Return true if the VF is allowed to change its MAC filters, false otherwise - */ -static bool i40e_can_vf_change_mac(struct i40e_vf *vf) -{ - /* If the VF MAC address has been set administratively (via the - * ndo_set_vf_mac command), then deny permission to the VF to - * add/delete unicast MAC addresses, unless the VF is trusted - */ - if (vf->pf_set_mac && !vf->trusted) - return false; - - return true; -} - #define I40E_MAX_MACVLAN_PER_HW 3072 #define I40E_MAX_MACVLAN_PER_PF(num_ports) (I40E_MAX_MACVLAN_PER_HW / \ (num_ports)) @@ -2897,8 +2900,10 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf, struct i40e_pf *pf = vf->pf; struct i40e_vsi *vsi = pf->vsi[vf->lan_vsi_idx]; struct i40e_hw *hw = &pf->hw; - int mac2add_cnt = 0; - int i; + int i, mac_add_max, mac_add_cnt = 0; + bool vf_trusted; + + vf_trusted = test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps); for (i = 0; i < al->num_elements; i++) { struct i40e_mac_filter *f; @@ -2918,9 +2923,8 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf, * The VF may request to set the MAC address filter already * assigned to it so do not return an error in that case. */ - if (!i40e_can_vf_change_mac(vf) && - !is_multicast_ether_addr(addr) && - !ether_addr_equal(addr, vf->default_lan_addr.addr)) { + if (!vf_trusted && !is_multicast_ether_addr(addr) && + vf->pf_set_mac && !ether_addr_equal(addr, vf->default_lan_addr.addr)) { dev_err(&pf->pdev->dev, "VF attempting to override administratively set MAC address, bring down and up the VF interface to resume normal operation\n"); return -EPERM; @@ -2929,29 +2933,33 @@ static inline int i40e_check_vf_permission(struct i40e_vf *vf, /*count filters that really will be added*/ f = i40e_find_mac(vsi, addr); if (!f) - ++mac2add_cnt; + ++mac_add_cnt; } /* If this VF is not privileged, then we can't add more than a limited - * number of addresses. Check to make sure that the additions do not - * push us over the limit. - */ - if (!test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) { - if ((i40e_count_filters(vsi) + mac2add_cnt) > - I40E_VC_MAX_MAC_ADDR_PER_VF) { - dev_err(&pf->pdev->dev, - "Cannot add more MAC addresses, VF is not trusted, switch the VF to trusted to add more functionality\n"); - return -EPERM; - } - /* If this VF is trusted, it can use more resources than untrusted. + * number of addresses. + * + * If this VF is trusted, it can use more resources than untrusted. * However to ensure that every trusted VF has appropriate number of * resources, divide whole pool of resources per port and then across * all VFs. */ - } else { - if ((i40e_count_filters(vsi) + mac2add_cnt) > - I40E_VC_MAX_MACVLAN_PER_TRUSTED_VF(pf->num_alloc_vfs, - hw->num_ports)) { + if (!vf_trusted) + mac_add_max = I40E_VC_MAX_MAC_ADDR_PER_VF; + else + mac_add_max = I40E_VC_MAX_MACVLAN_PER_TRUSTED_VF(pf->num_alloc_vfs, hw->num_ports); + + /* VF can replace all its filters in one step, in this case mac_add_max + * will be added as active and another mac_add_max will be in + * a to-be-removed state. Account for that. + */ + if ((i40e_count_active_filters(vsi) + mac_add_cnt) > mac_add_max || + (i40e_count_all_filters(vsi) + mac_add_cnt) > 2 * mac_add_max) { + if (!vf_trusted) { + dev_err(&pf->pdev->dev, + "Cannot add more MAC addresses, VF is not trusted, switch the VF to trusted to add more functionality\n"); + return -EPERM; + } else { dev_err(&pf->pdev->dev, "Cannot add more MAC addresses, trusted VF exhausted it's resources\n"); return -EPERM; @@ -3587,7 +3595,7 @@ static int i40e_validate_cloud_filter(struct i40e_vf *vf, /* action_meta is TC number here to which the filter is applied */ if (!tc_filter->action_meta || - tc_filter->action_meta > vf->num_tc) { + tc_filter->action_meta >= vf->num_tc) { dev_info(&pf->pdev->dev, "VF %d: Invalid TC number %u\n", vf->vf_id, tc_filter->action_meta); goto err; @@ -3884,6 +3892,8 @@ err: aq_ret); } +#define I40E_MAX_VF_CLOUD_FILTER 0xFF00 + /** * i40e_vc_add_cloud_filter * @vf: pointer to the VF info @@ -3923,6 +3933,14 @@ static int i40e_vc_add_cloud_filter(struct i40e_vf *vf, u8 *msg) goto err_out; } + if (vf->num_cloud_filters >= I40E_MAX_VF_CLOUD_FILTER) { + dev_warn(&pf->pdev->dev, + "VF %d: Max number of filters reached, can't apply cloud filter\n", + vf->vf_id); + aq_ret = -ENOSPC; + goto err_out; + } + cfilter = kzalloc(sizeof(*cfilter), GFP_KERNEL); if (!cfilter) { aq_ret = -ENOMEM; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index 5cf74f16f433..f558b45725c8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -41,7 +41,8 @@ enum i40e_vf_states { I40E_VF_STATE_MC_PROMISC, I40E_VF_STATE_UC_PROMISC, I40E_VF_STATE_PRE_ENABLE, - I40E_VF_STATE_RESETTING + I40E_VF_STATE_RESETTING, + I40E_VF_STATE_RESOURCES_LOADED, }; /* VF capabilities */ diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index 69054af4689a..c2fbe443ef85 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -5491,7 +5491,7 @@ static int iavf_resume(struct device *dev_d) { struct pci_dev *pdev = to_pci_dev(dev_d); struct iavf_adapter *adapter; - u32 err; + int err; adapter = iavf_pdev_to_adapter(pdev); diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile index d0f9c9492363..5b2c666496e7 100644 --- a/drivers/net/ethernet/intel/ice/Makefile +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -42,14 +42,15 @@ ice-y := ice_main.o \ ice_ethtool.o \ ice_repr.o \ ice_tc_lib.o \ - ice_fwlog.o \ ice_debugfs.o \ ice_adapter.o ice-$(CONFIG_PCI_IOV) += \ ice_sriov.o \ - ice_virtchnl.o \ - ice_virtchnl_allowlist.o \ - ice_virtchnl_fdir.o \ + virt/allowlist.o \ + virt/fdir.o \ + virt/queues.o \ + virt/virtchnl.o \ + virt/rss.o \ ice_vf_mbx.o \ ice_vf_vsi_vlan_ops.o \ ice_vf_lib.o diff --git a/drivers/net/ethernet/intel/ice/devlink/health.c b/drivers/net/ethernet/intel/ice/devlink/health.c index ab519c0f28bf..8e9a8a8178d4 100644 --- a/drivers/net/ethernet/intel/ice/devlink/health.c +++ b/drivers/net/ethernet/intel/ice/devlink/health.c @@ -450,9 +450,8 @@ ice_init_devlink_rep(struct ice_pf *pf, { struct devlink *devlink = priv_to_devlink(pf); struct devlink_health_reporter *rep; - const u64 graceful_period = 0; - rep = devl_health_reporter_create(devlink, ops, graceful_period, pf); + rep = devl_health_reporter_create(devlink, ops, pf); if (IS_ERR(rep)) { struct device *dev = ice_pf_to_dev(pf); diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 2098f00b3cd3..22b8323ff0d0 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -84,7 +84,11 @@ #define ICE_BAR0 0 #define ICE_REQ_DESC_MULTIPLE 32 #define ICE_MIN_NUM_DESC 64 -#define ICE_MAX_NUM_DESC 8160 +#define ICE_MAX_NUM_DESC_E810 8160 +#define ICE_MAX_NUM_DESC_E830 8096 +#define ICE_MAX_NUM_DESC_BY_MAC(hw) ((hw)->mac_type == ICE_MAC_E830 ? \ + ICE_MAX_NUM_DESC_E830 : \ + ICE_MAX_NUM_DESC_E810) #define ICE_DFLT_MIN_RX_DESC 512 #define ICE_DFLT_NUM_TX_DESC 256 #define ICE_DFLT_NUM_RX_DESC 2048 @@ -200,9 +204,11 @@ enum ice_feature { ICE_F_SMA_CTRL, ICE_F_CGU, ICE_F_GNSS, + ICE_F_TXTIME, ICE_F_GCS, ICE_F_ROCE_LAG, ICE_F_SRIOV_LAG, + ICE_F_SRIOV_AA_LAG, ICE_F_MBX_LIMIT, ICE_F_MAX }; @@ -510,6 +516,7 @@ enum ice_pf_flags { ICE_FLAG_LINK_LENIENT_MODE_ENA, ICE_FLAG_PLUG_AUX_DEV, ICE_FLAG_UNPLUG_AUX_DEV, + ICE_FLAG_AUX_DEV_CREATED, ICE_FLAG_MTU_CHANGED, ICE_FLAG_GNSS, /* GNSS successfully initialized */ ICE_FLAG_DPLL, /* SyncE/PTP dplls initialized */ @@ -566,9 +573,6 @@ struct ice_pf { struct ice_sw *first_sw; /* first switch created by firmware */ u16 eswitch_mode; /* current mode of eswitch */ struct dentry *ice_debugfs_pf; - struct dentry *ice_debugfs_pf_fwlog; - /* keep track of all the dentrys for FW log modules */ - struct dentry **ice_debugfs_pf_fwlog_modules; struct ice_vfs vfs; DECLARE_BITMAP(features, ICE_F_MAX); DECLARE_BITMAP(state, ICE_STATE_NBITS); @@ -576,6 +580,7 @@ struct ice_pf { DECLARE_BITMAP(misc_thread, ICE_MISC_THREAD_NBITS); unsigned long *avail_txqs; /* bitmap to track PF Tx queue usage */ unsigned long *avail_rxqs; /* bitmap to track PF Rx queue usage */ + unsigned long *txtime_txqs; /* bitmap to track PF Tx Time queue */ unsigned long serv_tmr_period; unsigned long serv_tmr_prev; struct timer_list serv_tmr; @@ -750,6 +755,31 @@ static inline void ice_set_ring_xdp(struct ice_tx_ring *ring) } /** + * ice_is_txtime_ena - check if Tx Time is enabled on the Tx ring + * @ring: pointer to Tx ring + * + * Return: true if the Tx ring has Tx Time enabled, false otherwise. + */ +static inline bool ice_is_txtime_ena(const struct ice_tx_ring *ring) +{ + struct ice_vsi *vsi = ring->vsi; + struct ice_pf *pf = vsi->back; + + return test_bit(ring->q_index, pf->txtime_txqs); +} + +/** + * ice_is_txtime_cfg - check if Tx Time is configured on the Tx ring + * @ring: pointer to Tx ring + * + * Return: true if the Tx ring is configured for Tx ring, false otherwise. + */ +static inline bool ice_is_txtime_cfg(const struct ice_tx_ring *ring) +{ + return !!(ring->flags & ICE_TX_FLAGS_TXTIME); +} + +/** * ice_get_xp_from_qid - get ZC XSK buffer pool bound to a queue ID * @vsi: pointer to VSI * @qid: index of a queue to look at XSK buff pool presence @@ -906,11 +936,10 @@ static inline bool ice_is_adq_active(struct ice_pf *pf) return false; } -void ice_debugfs_fwlog_init(struct ice_pf *pf); +int ice_debugfs_pf_init(struct ice_pf *pf); void ice_debugfs_pf_deinit(struct ice_pf *pf); void ice_debugfs_init(void); void ice_debugfs_exit(void); -void ice_pf_fwlog_update_module(struct ice_pf *pf, int log_level, int module); bool netif_is_ice(const struct net_device *dev); int ice_vsi_setup_tx_rings(struct ice_vsi *vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.c b/drivers/net/ethernet/intel/ice/ice_adapter.c index 9e4adc43e474..b53561c34708 100644 --- a/drivers/net/ethernet/intel/ice/ice_adapter.c +++ b/drivers/net/ethernet/intel/ice/ice_adapter.c @@ -13,16 +13,45 @@ static DEFINE_XARRAY(ice_adapters); static DEFINE_MUTEX(ice_adapters_mutex); -static unsigned long ice_adapter_index(u64 dsn) +#define ICE_ADAPTER_FIXED_INDEX BIT_ULL(63) + +#define ICE_ADAPTER_INDEX_E825C \ + (ICE_DEV_ID_E825C_BACKPLANE | ICE_ADAPTER_FIXED_INDEX) + +static u64 ice_adapter_index(struct pci_dev *pdev) { + switch (pdev->device) { + case ICE_DEV_ID_E825C_BACKPLANE: + case ICE_DEV_ID_E825C_QSFP: + case ICE_DEV_ID_E825C_SFP: + case ICE_DEV_ID_E825C_SGMII: + /* E825C devices have multiple NACs which are connected to the + * same clock source, and which must share the same + * ice_adapter structure. We can't use the serial number since + * each NAC has its own NVM generated with its own unique + * Device Serial Number. Instead, rely on the embedded nature + * of the E825C devices, and use a fixed index. This relies on + * the fact that all E825C physical functions in a given + * system are part of the same overall device. + */ + return ICE_ADAPTER_INDEX_E825C; + default: + return pci_get_dsn(pdev) & ~ICE_ADAPTER_FIXED_INDEX; + } +} + +static unsigned long ice_adapter_xa_index(struct pci_dev *pdev) +{ + u64 index = ice_adapter_index(pdev); + #if BITS_PER_LONG == 64 - return dsn; + return index; #else - return (u32)dsn ^ (u32)(dsn >> 32); + return (u32)index ^ (u32)(index >> 32); #endif } -static struct ice_adapter *ice_adapter_new(u64 dsn) +static struct ice_adapter *ice_adapter_new(struct pci_dev *pdev) { struct ice_adapter *adapter; @@ -30,7 +59,7 @@ static struct ice_adapter *ice_adapter_new(u64 dsn) if (!adapter) return NULL; - adapter->device_serial_number = dsn; + adapter->index = ice_adapter_index(pdev); spin_lock_init(&adapter->ptp_gltsyn_time_lock); spin_lock_init(&adapter->txq_ctx_lock); refcount_set(&adapter->refcount, 1); @@ -64,24 +93,23 @@ static void ice_adapter_free(struct ice_adapter *adapter) */ struct ice_adapter *ice_adapter_get(struct pci_dev *pdev) { - u64 dsn = pci_get_dsn(pdev); struct ice_adapter *adapter; unsigned long index; int err; - index = ice_adapter_index(dsn); + index = ice_adapter_xa_index(pdev); scoped_guard(mutex, &ice_adapters_mutex) { err = xa_insert(&ice_adapters, index, NULL, GFP_KERNEL); if (err == -EBUSY) { adapter = xa_load(&ice_adapters, index); refcount_inc(&adapter->refcount); - WARN_ON_ONCE(adapter->device_serial_number != dsn); + WARN_ON_ONCE(adapter->index != ice_adapter_index(pdev)); return adapter; } if (err) return ERR_PTR(err); - adapter = ice_adapter_new(dsn); + adapter = ice_adapter_new(pdev); if (!adapter) return ERR_PTR(-ENOMEM); xa_store(&ice_adapters, index, adapter, GFP_KERNEL); @@ -100,11 +128,10 @@ struct ice_adapter *ice_adapter_get(struct pci_dev *pdev) */ void ice_adapter_put(struct pci_dev *pdev) { - u64 dsn = pci_get_dsn(pdev); struct ice_adapter *adapter; unsigned long index; - index = ice_adapter_index(dsn); + index = ice_adapter_xa_index(pdev); scoped_guard(mutex, &ice_adapters_mutex) { adapter = xa_load(&ice_adapters, index); if (WARN_ON(!adapter)) diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.h b/drivers/net/ethernet/intel/ice/ice_adapter.h index db66d03c9f96..e95266c7f20b 100644 --- a/drivers/net/ethernet/intel/ice/ice_adapter.h +++ b/drivers/net/ethernet/intel/ice/ice_adapter.h @@ -33,7 +33,7 @@ struct ice_port_list { * @txq_ctx_lock: Spinlock protecting access to the GLCOMM_QTX_CNTX_CTL register * @ctrl_pf: Control PF of the adapter * @ports: Ports list - * @device_serial_number: DSN cached for collision detection on 32bit systems + * @index: 64-bit index cached for collision detection on 32bit systems */ struct ice_adapter { refcount_t refcount; @@ -44,7 +44,7 @@ struct ice_adapter { struct ice_pf *ctrl_pf; struct ice_port_list ports; - u64 device_serial_number; + u64 index; }; struct ice_adapter *ice_adapter_get(struct pci_dev *pdev); diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 3bd3ea3af888..859e9c66f3e7 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -33,6 +33,10 @@ typedef struct __packed { u8 buf[ICE_TXQ_CTX_SZ]; } ice_txq_ctx_buf_t; typedef struct __packed { u8 buf[ICE_TXQ_CTX_FULL_SZ]; } ice_txq_ctx_buf_full_t; +#define ICE_TXTIME_CTX_SZ 25 + +typedef struct __packed { u8 buf[ICE_TXTIME_CTX_SZ]; } ice_txtime_ctx_buf_t; + /* Queue Shutdown (direct 0x0003) */ struct ice_aqc_q_shutdown { u8 driver_unloading; @@ -2060,6 +2064,10 @@ struct ice_aqc_cfg_txqs { #define ICE_AQC_Q_CFG_SRC_PRT_M 0x7 #define ICE_AQC_Q_CFG_DST_PRT_S 3 #define ICE_AQC_Q_CFG_DST_PRT_M (0x7 << ICE_AQC_Q_CFG_DST_PRT_S) +#define ICE_AQC_Q_CFG_MODE_M GENMASK(7, 6) +#define ICE_AQC_Q_CFG_MODE_SAME_PF 0x0 +#define ICE_AQC_Q_CFG_MODE_GIVE_OWN 0x1 +#define ICE_AQC_Q_CFG_MODE_KEEP_OWN 0x2 u8 time_out; #define ICE_AQC_Q_CFG_TIMEOUT_S 2 #define ICE_AQC_Q_CFG_TIMEOUT_M (0x1F << ICE_AQC_Q_CFG_TIMEOUT_S) @@ -2113,6 +2121,34 @@ struct ice_aqc_add_rdma_qset_data { struct ice_aqc_add_tx_rdma_qset_entry rdma_qsets[]; }; +/* Set Tx Time LAN Queue (indirect 0x0C35) */ +struct ice_aqc_set_txtimeqs { + __le16 q_id; + __le16 q_amount; + u8 reserved[4]; + __le32 addr_high; + __le32 addr_low; +}; + +/* This is the descriptor of each queue entry for the Set Tx Time Queue + * command (0x0C35). Only used within struct ice_aqc_set_txtime_qgrp. + */ +struct ice_aqc_set_txtimeqs_perq { + u8 reserved[4]; + ice_txtime_ctx_buf_t txtime_ctx; + u8 reserved1[3]; +}; + +/* The format of the command buffer for Set Tx Time Queue (0x0C35) + * is an array of the following structs. Please note that the length of + * each struct ice_aqc_set_txtime_qgrp is variable due to the variable + * number of queues in each group! + */ +struct ice_aqc_set_txtime_qgrp { + u8 reserved[8]; + struct ice_aqc_set_txtimeqs_perq txtimeqs[]; +}; + /* Download Package (indirect 0x0C40) */ /* Also used for Update Package (indirect 0x0C41 and 0x0C42) */ struct ice_aqc_download_pkg { @@ -2395,42 +2431,6 @@ struct ice_aqc_event_lan_overflow { u8 reserved[8]; }; -enum ice_aqc_fw_logging_mod { - ICE_AQC_FW_LOG_ID_GENERAL = 0, - ICE_AQC_FW_LOG_ID_CTRL, - ICE_AQC_FW_LOG_ID_LINK, - ICE_AQC_FW_LOG_ID_LINK_TOPO, - ICE_AQC_FW_LOG_ID_DNL, - ICE_AQC_FW_LOG_ID_I2C, - ICE_AQC_FW_LOG_ID_SDP, - ICE_AQC_FW_LOG_ID_MDIO, - ICE_AQC_FW_LOG_ID_ADMINQ, - ICE_AQC_FW_LOG_ID_HDMA, - ICE_AQC_FW_LOG_ID_LLDP, - ICE_AQC_FW_LOG_ID_DCBX, - ICE_AQC_FW_LOG_ID_DCB, - ICE_AQC_FW_LOG_ID_XLR, - ICE_AQC_FW_LOG_ID_NVM, - ICE_AQC_FW_LOG_ID_AUTH, - ICE_AQC_FW_LOG_ID_VPD, - ICE_AQC_FW_LOG_ID_IOSF, - ICE_AQC_FW_LOG_ID_PARSER, - ICE_AQC_FW_LOG_ID_SW, - ICE_AQC_FW_LOG_ID_SCHEDULER, - ICE_AQC_FW_LOG_ID_TXQ, - ICE_AQC_FW_LOG_ID_RSVD, - ICE_AQC_FW_LOG_ID_POST, - ICE_AQC_FW_LOG_ID_WATCHDOG, - ICE_AQC_FW_LOG_ID_TASK_DISPATCH, - ICE_AQC_FW_LOG_ID_MNG, - ICE_AQC_FW_LOG_ID_SYNCE, - ICE_AQC_FW_LOG_ID_HEALTH, - ICE_AQC_FW_LOG_ID_TSDRV, - ICE_AQC_FW_LOG_ID_PFREG, - ICE_AQC_FW_LOG_ID_MDLVER, - ICE_AQC_FW_LOG_ID_MAX, -}; - enum ice_aqc_health_status_mask { ICE_AQC_HEALTH_STATUS_SET_PF_SPECIFIC_MASK = BIT(0), ICE_AQC_HEALTH_STATUS_SET_ALL_PF_MASK = BIT(1), @@ -2512,48 +2512,6 @@ struct ice_aqc_health_status_elem { __le32 internal_data2; }; -/* Set FW Logging configuration (indirect 0xFF30) - * Register for FW Logging (indirect 0xFF31) - * Query FW Logging (indirect 0xFF32) - * FW Log Event (indirect 0xFF33) - */ -struct ice_aqc_fw_log { - u8 cmd_flags; -#define ICE_AQC_FW_LOG_CONF_UART_EN BIT(0) -#define ICE_AQC_FW_LOG_CONF_AQ_EN BIT(1) -#define ICE_AQC_FW_LOG_QUERY_REGISTERED BIT(2) -#define ICE_AQC_FW_LOG_CONF_SET_VALID BIT(3) -#define ICE_AQC_FW_LOG_AQ_REGISTER BIT(0) -#define ICE_AQC_FW_LOG_AQ_QUERY BIT(2) - - u8 rsp_flag; - __le16 fw_rt_msb; - union { - struct { - __le32 fw_rt_lsb; - } sync; - struct { - __le16 log_resolution; -#define ICE_AQC_FW_LOG_MIN_RESOLUTION (1) -#define ICE_AQC_FW_LOG_MAX_RESOLUTION (128) - - __le16 mdl_cnt; - } cfg; - } ops; - __le32 addr_high; - __le32 addr_low; -}; - -/* Response Buffer for: - * Set Firmware Logging Configuration (0xFF30) - * Query FW Logging (0xFF32) - */ -struct ice_aqc_fw_log_cfg_resp { - __le16 module_identifier; - u8 log_level; - u8 rsvd0; -}; - /* Admin Queue command opcodes */ enum ice_adminq_opc { /* AQ commands */ @@ -2688,6 +2646,9 @@ enum ice_adminq_opc { ice_aqc_opc_cfg_txqs = 0x0C32, ice_aqc_opc_add_rdma_qset = 0x0C33, + /* Tx Time queue commands */ + ice_aqc_opc_set_txtimeqs = 0x0C35, + /* package commands */ ice_aqc_opc_download_pkg = 0x0C40, ice_aqc_opc_upload_section = 0x0C41, diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index c5da8e9cc0a0..2d35a278c555 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -242,7 +242,8 @@ static void ice_cfg_itr_gran(struct ice_hw *hw) * @ring: ring to get the absolute queue index * @tc: traffic class number */ -static u16 ice_calc_txq_handle(struct ice_vsi *vsi, struct ice_tx_ring *ring, u8 tc) +static u16 +ice_calc_txq_handle(const struct ice_vsi *vsi, struct ice_tx_ring *ring, u8 tc) { WARN_ONCE(ice_ring_is_xdp(ring) && tc, "XDP ring can't belong to TC other than 0\n"); @@ -278,30 +279,20 @@ static void ice_cfg_xps_tx_ring(struct ice_tx_ring *ring) } /** - * ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance - * @ring: The Tx ring to configure - * @tlan_ctx: Pointer to the Tx LAN queue context structure to be initialized - * @pf_q: queue index in the PF space + * ice_set_txq_ctx_vmvf - set queue context VM/VF type and number by VSI type + * @ring: the Tx ring to configure + * @vmvf_type: VM/VF type + * @vmvf_num: VM/VF number * - * Configure the Tx descriptor ring in TLAN context. + * Return: 0 on success and a negative value on error. */ -static void -ice_setup_tx_ctx(struct ice_tx_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q) +static int +ice_set_txq_ctx_vmvf(struct ice_tx_ring *ring, u8 *vmvf_type, u16 *vmvf_num) { struct ice_vsi *vsi = ring->vsi; - struct ice_hw *hw = &vsi->back->hw; - - tlan_ctx->base = ring->dma >> ICE_TLAN_CTX_BASE_S; - - tlan_ctx->port_num = vsi->port_info->lport; - - /* Transmit Queue Length */ - tlan_ctx->qlen = ring->count; - - ice_set_cgd_num(tlan_ctx, ring->dcb_tc); + struct ice_hw *hw; - /* PF number */ - tlan_ctx->pf_num = hw->pf_id; + hw = &vsi->back->hw; /* queue belongs to a specific VSI type * VF / VM index should be programmed per vmvf_type setting: @@ -314,21 +305,60 @@ ice_setup_tx_ctx(struct ice_tx_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf case ICE_VSI_CTRL: case ICE_VSI_PF: if (ring->ch) - tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VMQ; + *vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VMQ; else - tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF; + *vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF; break; case ICE_VSI_VF: /* Firmware expects vmvf_num to be absolute VF ID */ - tlan_ctx->vmvf_num = hw->func_caps.vf_base_id + vsi->vf->vf_id; - tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VF; + *vmvf_num = hw->func_caps.vf_base_id + vsi->vf->vf_id; + *vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VF; break; case ICE_VSI_SF: - tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VMQ; + *vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VMQ; break; default: - return; + dev_info(ice_pf_to_dev(vsi->back), + "Unable to set VMVF type for VSI type %d\n", + vsi->type); + return -EINVAL; } + return 0; +} + +/** + * ice_setup_tx_ctx - setup a struct ice_tlan_ctx instance + * @ring: the Tx ring to configure + * @tlan_ctx: pointer to the Tx LAN queue context structure to be initialized + * @pf_q: queue index in the PF space + * + * Configure the Tx descriptor ring in TLAN context. + * + * Return: 0 on success and a negative value on error. + */ +static int +ice_setup_tx_ctx(struct ice_tx_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q) +{ + struct ice_vsi *vsi = ring->vsi; + struct ice_hw *hw; + int err; + + hw = &vsi->back->hw; + tlan_ctx->base = ring->dma >> ICE_TLAN_CTX_BASE_S; + tlan_ctx->port_num = vsi->port_info->lport; + + /* Transmit Queue Length */ + tlan_ctx->qlen = ring->count; + + ice_set_cgd_num(tlan_ctx, ring->dcb_tc); + + /* PF number */ + tlan_ctx->pf_num = hw->pf_id; + + err = ice_set_txq_ctx_vmvf(ring, &tlan_ctx->vmvf_type, + &tlan_ctx->vmvf_num); + if (err) + return err; /* make sure the context is associated with the right VSI */ if (ring->ch) @@ -355,6 +385,80 @@ ice_setup_tx_ctx(struct ice_tx_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf * 1: Legacy Host Interface */ tlan_ctx->legacy_int = ICE_TX_LEGACY; + + return 0; +} + +/** + * ice_setup_txtime_ctx - setup a struct ice_txtime_ctx instance + * @ring: the tstamp ring to configure + * @txtime_ctx: pointer to the Tx time queue context structure to be initialized + * + * Return: 0 on success and a negative value on error. + */ +static int +ice_setup_txtime_ctx(const struct ice_tstamp_ring *ring, + struct ice_txtime_ctx *txtime_ctx) +{ + struct ice_tx_ring *tx_ring = ring->tx_ring; + struct ice_vsi *vsi = tx_ring->vsi; + struct ice_hw *hw = &vsi->back->hw; + int err; + + txtime_ctx->base = ring->dma >> ICE_TXTIME_CTX_BASE_S; + + /* Tx time Queue Length */ + txtime_ctx->qlen = ring->count; + txtime_ctx->txtime_ena_q = 1; + + /* PF number */ + txtime_ctx->pf_num = hw->pf_id; + + err = ice_set_txq_ctx_vmvf(tx_ring, &txtime_ctx->vmvf_type, + &txtime_ctx->vmvf_num); + if (err) + return err; + + /* make sure the context is associated with the right VSI */ + if (tx_ring->ch) + txtime_ctx->src_vsi = tx_ring->ch->vsi_num; + else + txtime_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx); + + txtime_ctx->ts_res = ICE_TXTIME_CTX_RESOLUTION_128NS; + txtime_ctx->drbell_mode_32 = ICE_TXTIME_CTX_DRBELL_MODE_32; + txtime_ctx->ts_fetch_prof_id = ICE_TXTIME_CTX_FETCH_PROF_ID_0; + + return 0; +} + +/** + * ice_calc_ts_ring_count - calculate the number of Tx time stamp descriptors + * @tx_ring: Tx ring to calculate the count for + * + * Return: the number of Tx time stamp descriptors. + */ +u16 ice_calc_ts_ring_count(struct ice_tx_ring *tx_ring) +{ + u16 prof = ICE_TXTIME_CTX_FETCH_PROF_ID_0; + struct ice_vsi *vsi = tx_ring->vsi; + struct ice_hw *hw = &vsi->back->hw; + u16 max_fetch_desc = 0, fetch, i; + u32 reg; + + for (i = 0; i < ICE_TXTIME_FETCH_PROFILE_CNT; i++) { + reg = rd32(hw, E830_GLTXTIME_FETCH_PROFILE(prof, 0)); + fetch = FIELD_GET(E830_GLTXTIME_FETCH_PROFILE_FETCH_TS_DESC_M, + reg); + max_fetch_desc = max(fetch, max_fetch_desc); + } + + if (!max_fetch_desc) + max_fetch_desc = ICE_TXTIME_FETCH_TS_DESC_DFLT; + + max_fetch_desc = ALIGN(max_fetch_desc, ICE_REQ_DESC_MULTIPLE); + + return tx_ring->count + max_fetch_desc; } /** @@ -882,13 +986,49 @@ void ice_vsi_free_q_vectors(struct ice_vsi *vsi) } /** + * ice_cfg_tstamp - Configure Tx time stamp queue + * @tx_ring: Tx ring to be configured with timestamping + * + * Return: 0 on success and a negative value on error. + */ +static int +ice_cfg_tstamp(struct ice_tx_ring *tx_ring) +{ + DEFINE_RAW_FLEX(struct ice_aqc_set_txtime_qgrp, txtime_qg_buf, + txtimeqs, 1); + u8 txtime_buf_len = struct_size(txtime_qg_buf, txtimeqs, 1); + struct ice_tstamp_ring *tstamp_ring = tx_ring->tstamp_ring; + struct ice_txtime_ctx txtime_ctx = {}; + struct ice_vsi *vsi = tx_ring->vsi; + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + u16 pf_q = tx_ring->reg_idx; + int err; + + err = ice_setup_txtime_ctx(tstamp_ring, &txtime_ctx); + if (err) { + dev_err(ice_pf_to_dev(pf), "Failed to setup Tx time queue context for queue %d, error: %d\n", + pf_q, err); + return err; + } + ice_pack_txtime_ctx(&txtime_ctx, + &txtime_qg_buf->txtimeqs[0].txtime_ctx); + + tstamp_ring->tail = hw->hw_addr + E830_GLQTX_TXTIME_DBELL_LSB(pf_q); + return ice_aq_set_txtimeq(hw, pf_q, 1, txtime_qg_buf, + txtime_buf_len, NULL); +} + +/** * ice_vsi_cfg_txq - Configure single Tx queue * @vsi: the VSI that queue belongs to * @ring: Tx ring to be configured * @qg_buf: queue group buffer + * + * Return: 0 on success and a negative value on error. */ static int -ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_tx_ring *ring, +ice_vsi_cfg_txq(const struct ice_vsi *vsi, struct ice_tx_ring *ring, struct ice_aqc_add_tx_qgrp *qg_buf) { u8 buf_len = struct_size(qg_buf, txqs, 1); @@ -897,15 +1037,20 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_tx_ring *ring, struct ice_channel *ch = ring->ch; struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; + u32 pf_q, vsi_idx; int status; - u16 pf_q; u8 tc; /* Configure XPS */ ice_cfg_xps_tx_ring(ring); pf_q = ring->reg_idx; - ice_setup_tx_ctx(ring, &tlan_ctx, pf_q); + status = ice_setup_tx_ctx(ring, &tlan_ctx, pf_q); + if (status) { + dev_err(ice_pf_to_dev(pf), "Failed to setup Tx context for queue %d, error: %d\n", + pf_q, status); + return status; + } /* copy context contents into the qg_buf */ qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q); ice_pack_txq_ctx(&tlan_ctx, &qg_buf->txqs[0].txq_ctx); @@ -925,14 +1070,15 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_tx_ring *ring, */ ring->q_handle = ice_calc_txq_handle(vsi, ring, tc); - if (ch) - status = ice_ena_vsi_txq(vsi->port_info, ch->ch_vsi->idx, 0, - ring->q_handle, 1, qg_buf, buf_len, - NULL); - else - status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, - ring->q_handle, 1, qg_buf, buf_len, - NULL); + if (ch) { + tc = 0; + vsi_idx = ch->ch_vsi->idx; + } else { + vsi_idx = vsi->idx; + } + + status = ice_ena_vsi_txq(vsi->port_info, vsi_idx, tc, ring->q_handle, + 1, qg_buf, buf_len, NULL); if (status) { dev_err(ice_pf_to_dev(pf), "Failed to set LAN Tx queue context, error: %d\n", status); @@ -947,7 +1093,32 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_tx_ring *ring, if (pf_q == le16_to_cpu(txq->txq_id)) ring->txq_teid = le32_to_cpu(txq->q_teid); + if (ice_is_txtime_ena(ring)) { + status = ice_alloc_setup_tstamp_ring(ring); + if (status) { + dev_err(ice_pf_to_dev(pf), + "Failed to allocate Tx timestamp ring, error: %d\n", + status); + goto err_setup_tstamp; + } + + status = ice_cfg_tstamp(ring); + if (status) { + dev_err(ice_pf_to_dev(pf), "Failed to set Tx Time queue context, error: %d\n", + status); + goto err_cfg_tstamp; + } + } return 0; + +err_cfg_tstamp: + ice_free_tx_tstamp_ring(ring); +err_setup_tstamp: + ice_dis_vsi_txq(vsi->port_info, vsi_idx, tc, 1, &ring->q_handle, + &ring->reg_idx, &ring->txq_teid, ICE_NO_RESET, + tlan_ctx.vmvf_num, NULL); + + return status; } int ice_vsi_cfg_single_txq(struct ice_vsi *vsi, struct ice_tx_ring **tx_rings, @@ -1206,3 +1377,148 @@ ice_fill_txq_meta(const struct ice_vsi *vsi, struct ice_tx_ring *ring, txq_meta->tc = tc; } } + +/** + * ice_qp_reset_stats - Resets all stats for rings of given index + * @vsi: VSI that contains rings of interest + * @q_idx: ring index in array + */ +static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx) +{ + struct ice_vsi_stats *vsi_stat; + struct ice_pf *pf; + + pf = vsi->back; + if (!pf->vsi_stats) + return; + + vsi_stat = pf->vsi_stats[vsi->idx]; + if (!vsi_stat) + return; + + memset(&vsi_stat->rx_ring_stats[q_idx]->rx_stats, 0, + sizeof(vsi_stat->rx_ring_stats[q_idx]->rx_stats)); + memset(&vsi_stat->tx_ring_stats[q_idx]->stats, 0, + sizeof(vsi_stat->tx_ring_stats[q_idx]->stats)); + if (vsi->xdp_rings) + memset(&vsi->xdp_rings[q_idx]->ring_stats->stats, 0, + sizeof(vsi->xdp_rings[q_idx]->ring_stats->stats)); +} + +/** + * ice_qp_clean_rings - Cleans all the rings of a given index + * @vsi: VSI that contains rings of interest + * @q_idx: ring index in array + */ +static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx) +{ + ice_clean_tx_ring(vsi->tx_rings[q_idx]); + if (vsi->xdp_rings) + ice_clean_tx_ring(vsi->xdp_rings[q_idx]); + ice_clean_rx_ring(vsi->rx_rings[q_idx]); +} + +/** + * ice_qp_dis - Disables a queue pair + * @vsi: VSI of interest + * @q_idx: ring index in array + * + * Returns 0 on success, negative on failure. + */ +int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) +{ + struct ice_txq_meta txq_meta = { }; + struct ice_q_vector *q_vector; + struct ice_tx_ring *tx_ring; + struct ice_rx_ring *rx_ring; + int fail = 0; + int err; + + if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq) + return -EINVAL; + + tx_ring = vsi->tx_rings[q_idx]; + rx_ring = vsi->rx_rings[q_idx]; + q_vector = rx_ring->q_vector; + + synchronize_net(); + netif_carrier_off(vsi->netdev); + netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); + + ice_qvec_dis_irq(vsi, rx_ring, q_vector); + ice_qvec_toggle_napi(vsi, q_vector, false); + + ice_fill_txq_meta(vsi, tx_ring, &txq_meta); + err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta); + if (!fail) + fail = err; + if (vsi->xdp_rings) { + struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx]; + + memset(&txq_meta, 0, sizeof(txq_meta)); + ice_fill_txq_meta(vsi, xdp_ring, &txq_meta); + err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, xdp_ring, + &txq_meta); + if (!fail) + fail = err; + } + + ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, false); + ice_qp_clean_rings(vsi, q_idx); + ice_qp_reset_stats(vsi, q_idx); + + return fail; +} + +/** + * ice_qp_ena - Enables a queue pair + * @vsi: VSI of interest + * @q_idx: ring index in array + * + * Returns 0 on success, negative on failure. + */ +int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) +{ + struct ice_q_vector *q_vector; + int fail = 0; + bool link_up; + int err; + + err = ice_vsi_cfg_single_txq(vsi, vsi->tx_rings, q_idx); + if (!fail) + fail = err; + + if (ice_is_xdp_ena_vsi(vsi)) { + struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx]; + + err = ice_vsi_cfg_single_txq(vsi, vsi->xdp_rings, q_idx); + if (!fail) + fail = err; + ice_set_ring_xdp(xdp_ring); + ice_tx_xsk_pool(vsi, q_idx); + } + + err = ice_vsi_cfg_single_rxq(vsi, q_idx); + if (!fail) + fail = err; + + q_vector = vsi->rx_rings[q_idx]->q_vector; + ice_qvec_cfg_msix(vsi, q_vector, q_idx); + + err = ice_vsi_ctrl_one_rx_ring(vsi, true, q_idx, true); + if (!fail) + fail = err; + + ice_qvec_toggle_napi(vsi, q_vector, true); + ice_qvec_ena_irq(vsi, q_vector); + + /* make sure NAPI sees updated ice_{t,x}_ring::xsk_pool */ + synchronize_net(); + ice_get_link_status(vsi->port_info, &link_up); + if (link_up) { + netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); + netif_carrier_on(vsi->netdev); + } + + return fail; +} diff --git a/drivers/net/ethernet/intel/ice/ice_base.h b/drivers/net/ethernet/intel/ice/ice_base.h index b711bc921928..d28294247599 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.h +++ b/drivers/net/ethernet/intel/ice/ice_base.h @@ -32,4 +32,7 @@ ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, void ice_fill_txq_meta(const struct ice_vsi *vsi, struct ice_tx_ring *ring, struct ice_txq_meta *txq_meta); +int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx); +int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx); +u16 ice_calc_ts_ring_count(struct ice_tx_ring *tx_ring); #endif /* _ICE_BASE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index 003d60a4db21..2250426ec91b 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -984,6 +984,37 @@ static int ice_wait_for_fw(struct ice_hw *hw, u32 timeout) return -ETIMEDOUT; } +static int __fwlog_send_cmd(void *priv, struct libie_aq_desc *desc, void *buf, + u16 size) +{ + struct ice_hw *hw = priv; + + return ice_aq_send_cmd(hw, desc, buf, size, NULL); +} + +static int __fwlog_init(struct ice_hw *hw) +{ + struct ice_pf *pf = hw->back; + struct libie_fwlog_api api = { + .pdev = pf->pdev, + .send_cmd = __fwlog_send_cmd, + .priv = hw, + }; + int err; + + /* only support fw log commands on PF 0 */ + if (hw->bus.func) + return -EINVAL; + + err = ice_debugfs_pf_init(pf); + if (err) + return err; + + api.debugfs_root = pf->ice_debugfs_pf; + + return libie_fwlog_init(&hw->fwlog, &api); +} + /** * ice_init_hw - main hardware initialization routine * @hw: pointer to the hardware structure @@ -1012,7 +1043,7 @@ int ice_init_hw(struct ice_hw *hw) if (status) goto err_unroll_cqinit; - status = ice_fwlog_init(hw); + status = __fwlog_init(hw); if (status) ice_debug(hw, ICE_DBG_FW_LOG, "Error initializing FW logging: %d\n", status); @@ -1159,6 +1190,16 @@ err_unroll_cqinit: return status; } +static void __fwlog_deinit(struct ice_hw *hw) +{ + /* only support fw log commands on PF 0 */ + if (hw->bus.func) + return; + + ice_debugfs_pf_deinit(hw->back); + libie_fwlog_deinit(&hw->fwlog); +} + /** * ice_deinit_hw - unroll initialization operations done by ice_init_hw * @hw: pointer to the hardware structure @@ -1177,8 +1218,7 @@ void ice_deinit_hw(struct ice_hw *hw) ice_free_seg(hw); ice_free_hw_tbls(hw); mutex_destroy(&hw->tnl_lock); - - ice_fwlog_deinit(hw); + __fwlog_deinit(hw); ice_destroy_all_ctrlq(hw); /* Clear VSI contexts if not already cleared */ @@ -1693,6 +1733,44 @@ int ice_write_txq_ctx(struct ice_hw *hw, struct ice_tlan_ctx *tlan_ctx, return 0; } +/* Tx time Queue Context */ +static const struct packed_field_u8 ice_txtime_ctx_fields[] = { + /* Field Width LSB */ + ICE_CTX_STORE(ice_txtime_ctx, base, 57, 0), + ICE_CTX_STORE(ice_txtime_ctx, pf_num, 3, 57), + ICE_CTX_STORE(ice_txtime_ctx, vmvf_num, 10, 60), + ICE_CTX_STORE(ice_txtime_ctx, vmvf_type, 2, 70), + ICE_CTX_STORE(ice_txtime_ctx, src_vsi, 10, 72), + ICE_CTX_STORE(ice_txtime_ctx, cpuid, 8, 82), + ICE_CTX_STORE(ice_txtime_ctx, tphrd_desc, 1, 90), + ICE_CTX_STORE(ice_txtime_ctx, qlen, 13, 91), + ICE_CTX_STORE(ice_txtime_ctx, timer_num, 1, 104), + ICE_CTX_STORE(ice_txtime_ctx, txtime_ena_q, 1, 105), + ICE_CTX_STORE(ice_txtime_ctx, drbell_mode_32, 1, 106), + ICE_CTX_STORE(ice_txtime_ctx, ts_res, 4, 107), + ICE_CTX_STORE(ice_txtime_ctx, ts_round_type, 2, 111), + ICE_CTX_STORE(ice_txtime_ctx, ts_pacing_slot, 3, 113), + ICE_CTX_STORE(ice_txtime_ctx, merging_ena, 1, 116), + ICE_CTX_STORE(ice_txtime_ctx, ts_fetch_prof_id, 4, 117), + ICE_CTX_STORE(ice_txtime_ctx, ts_fetch_cache_line_aln_thld, 4, 121), + ICE_CTX_STORE(ice_txtime_ctx, tx_pipe_delay_mode, 1, 125), +}; + +/** + * ice_pack_txtime_ctx - pack Tx time queue context into a HW buffer + * @ctx: the Tx time queue context to pack + * @buf: the HW buffer to pack into + * + * Pack the Tx time queue context from the CPU-friendly unpacked buffer into + * its bit-packed HW layout. + */ +void ice_pack_txtime_ctx(const struct ice_txtime_ctx *ctx, + ice_txtime_ctx_buf_t *buf) +{ + pack_fields(buf, sizeof(*buf), ctx, ice_txtime_ctx_fields, + QUIRK_LITTLE_ENDIAN | QUIRK_LSW32_IS_FIRST); +} + /* Sideband Queue command wrappers */ /** @@ -2418,12 +2496,15 @@ ice_parse_common_caps(struct ice_hw *hw, struct ice_hw_common_caps *caps, caps->reset_restrict_support); break; case LIBIE_AQC_CAPS_FW_LAG_SUPPORT: - caps->roce_lag = !!(number & LIBIE_AQC_BIT_ROCEV2_LAG); + caps->roce_lag = number & LIBIE_AQC_BIT_ROCEV2_LAG; ice_debug(hw, ICE_DBG_INIT, "%s: roce_lag = %u\n", prefix, caps->roce_lag); - caps->sriov_lag = !!(number & LIBIE_AQC_BIT_SRIOV_LAG); + caps->sriov_lag = number & LIBIE_AQC_BIT_SRIOV_LAG; ice_debug(hw, ICE_DBG_INIT, "%s: sriov_lag = %u\n", prefix, caps->sriov_lag); + caps->sriov_aa_lag = number & LIBIE_AQC_BIT_SRIOV_AA_LAG; + ice_debug(hw, ICE_DBG_INIT, "%s: sriov_aa_lag = %u\n", + prefix, caps->sriov_aa_lag); break; case LIBIE_AQC_CAPS_TX_SCHED_TOPO_COMP_MODE: caps->tx_sched_topo_comp_mode_en = (number == 1); @@ -4712,24 +4793,24 @@ do_aq: } /** - * ice_aq_cfg_lan_txq + * ice_aq_cfg_lan_txq - send AQ command 0x0C32 to FW * @hw: pointer to the hardware structure * @buf: buffer for command * @buf_size: size of buffer in bytes * @num_qs: number of queues being configured * @oldport: origination lport * @newport: destination lport + * @mode: cmd_type for move to use * @cd: pointer to command details structure or NULL * * Move/Configure LAN Tx queue (0x0C32) * - * There is a better AQ command to use for moving nodes, so only coding - * this one for configuring the node. + * Return: Zero on success, associated error code on failure. */ int ice_aq_cfg_lan_txq(struct ice_hw *hw, struct ice_aqc_cfg_txqs_buf *buf, u16 buf_size, u16 num_qs, u8 oldport, u8 newport, - struct ice_sq_cd *cd) + u8 mode, struct ice_sq_cd *cd) { struct ice_aqc_cfg_txqs *cmd; struct libie_aq_desc desc; @@ -4742,10 +4823,12 @@ ice_aq_cfg_lan_txq(struct ice_hw *hw, struct ice_aqc_cfg_txqs_buf *buf, if (!buf) return -EINVAL; - cmd->cmd_type = ICE_AQC_Q_CFG_TC_CHNG; + cmd->cmd_type = mode; cmd->num_qs = num_qs; cmd->port_num_chng = (oldport & ICE_AQC_Q_CFG_SRC_PRT_M); cmd->port_num_chng |= FIELD_PREP(ICE_AQC_Q_CFG_DST_PRT_M, newport); + cmd->port_num_chng |= FIELD_PREP(ICE_AQC_Q_CFG_MODE_M, + ICE_AQC_Q_CFG_MODE_KEEP_OWN); cmd->time_out = FIELD_PREP(ICE_AQC_Q_CFG_TIMEOUT_M, 5); cmd->blocked_cgds = 0; @@ -4801,6 +4884,46 @@ ice_aq_add_rdma_qsets(struct ice_hw *hw, u8 num_qset_grps, return ice_aq_send_cmd(hw, &desc, qset_list, buf_size, cd); } +/** + * ice_aq_set_txtimeq - set Tx time queues + * @hw: pointer to the hardware structure + * @txtimeq: first Tx time queue id to configure + * @q_count: number of queues to configure + * @txtime_qg: queue group to be set + * @buf_size: size of buffer for indirect command + * @cd: pointer to command details structure or NULL + * + * Set Tx Time queue (0x0C35) + * Return: 0 on success or negative value on failure. + */ +int +ice_aq_set_txtimeq(struct ice_hw *hw, u16 txtimeq, u8 q_count, + struct ice_aqc_set_txtime_qgrp *txtime_qg, u16 buf_size, + struct ice_sq_cd *cd) +{ + struct ice_aqc_set_txtimeqs *cmd; + struct libie_aq_desc desc; + u16 size; + + if (!txtime_qg || txtimeq > ICE_TXTIME_MAX_QUEUE || + q_count < 1 || q_count > ICE_SET_TXTIME_MAX_Q_AMOUNT) + return -EINVAL; + + size = struct_size(txtime_qg, txtimeqs, q_count); + if (buf_size != size) + return -EINVAL; + + cmd = libie_aq_raw(&desc); + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_txtimeqs); + + desc.flags |= cpu_to_le16(LIBIE_AQ_FLAG_RD); + + cmd->q_id = cpu_to_le16(txtimeq); + cmd->q_amount = cpu_to_le16(q_count); + return ice_aq_send_cmd(hw, &desc, txtime_qg, buf_size, cd); +} + /* End of FW Admin Queue command wrappers */ /** diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 60320cdf7804..e700ac0dc347 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -270,11 +270,17 @@ ice_ena_vsi_txq(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 q_handle, int ice_aq_cfg_lan_txq(struct ice_hw *hw, struct ice_aqc_cfg_txqs_buf *buf, u16 buf_size, u16 num_qs, u8 oldport, u8 newport, - struct ice_sq_cd *cd); + u8 mode, struct ice_sq_cd *cd); int ice_replay_vsi(struct ice_hw *hw, u16 vsi_handle); void ice_replay_post(struct ice_hw *hw); struct ice_q_ctx * ice_get_lan_q_ctx(struct ice_hw *hw, u16 vsi_handle, u8 tc, u16 q_handle); +int +ice_aq_set_txtimeq(struct ice_hw *hw, u16 txtimeq, u8 q_count, + struct ice_aqc_set_txtime_qgrp *txtime_qg, + u16 buf_size, struct ice_sq_cd *cd); +void ice_pack_txtime_ctx(const struct ice_txtime_ctx *ctx, + ice_txtime_ctx_buf_t *buf); int ice_sbq_rw_reg(struct ice_hw *hw, struct ice_sbq_msg_input *in, u16 flag); int ice_aq_get_cgu_input_pin_measure(struct ice_hw *hw, u8 dpll_idx, struct ice_cgu_input_measure *meas, diff --git a/drivers/net/ethernet/intel/ice/ice_ddp.c b/drivers/net/ethernet/intel/ice/ice_ddp.c index e2a036ce76ca..3b2d9c436979 100644 --- a/drivers/net/ethernet/intel/ice/ice_ddp.c +++ b/drivers/net/ethernet/intel/ice/ice_ddp.c @@ -2377,7 +2377,13 @@ ice_get_set_tx_topo(struct ice_hw *hw, u8 *buf, u16 buf_size, * The function will apply the new Tx topology from the package buffer * if available. * - * Return: zero when update was successful, negative values otherwise. + * Return: + * * 0 - Successfully applied topology configuration. + * * -EBUSY - Failed to acquire global configuration lock. + * * -EEXIST - Topology configuration has already been applied. + * * -EIO - Unable to apply topology configuration. + * * -ENODEV - Failed to re-initialize device after applying configuration. + * * Other negative error codes indicate unexpected failures. */ int ice_cfg_tx_topo(struct ice_hw *hw, const void *buf, u32 len) { @@ -2410,7 +2416,7 @@ int ice_cfg_tx_topo(struct ice_hw *hw, const void *buf, u32 len) if (status) { ice_debug(hw, ICE_DBG_INIT, "Get current topology is failed\n"); - return status; + return -EIO; } /* Is default topology already applied ? */ @@ -2497,31 +2503,45 @@ update_topo: ICE_GLOBAL_CFG_LOCK_TIMEOUT); if (status) { ice_debug(hw, ICE_DBG_INIT, "Failed to acquire global lock\n"); - return status; + return -EBUSY; } /* Check if reset was triggered already. */ reg = rd32(hw, GLGEN_RSTAT); if (reg & GLGEN_RSTAT_DEVSTATE_M) { - /* Reset is in progress, re-init the HW again */ ice_debug(hw, ICE_DBG_INIT, "Reset is in progress. Layer topology might be applied already\n"); ice_check_reset(hw); - return 0; + /* Reset is in progress, re-init the HW again */ + goto reinit_hw; } /* Set new topology */ status = ice_get_set_tx_topo(hw, new_topo, size, NULL, NULL, true); if (status) { - ice_debug(hw, ICE_DBG_INIT, "Failed setting Tx topology\n"); - return status; + ice_debug(hw, ICE_DBG_INIT, "Failed to set Tx topology, status %pe\n", + ERR_PTR(status)); + /* only report -EIO here as the caller checks the error value + * and reports an informational error message informing that + * the driver failed to program Tx topology. + */ + status = -EIO; } - /* New topology is updated, delay 1 second before issuing the CORER */ + /* Even if Tx topology config failed, we need to CORE reset here to + * clear the global configuration lock. Delay 1 second to allow + * hardware to settle then issue a CORER + */ msleep(1000); ice_reset(hw, ICE_RESET_CORER); - /* CORER will clear the global lock, so no explicit call - * required for release. - */ + ice_check_reset(hw); + +reinit_hw: + /* Since we triggered a CORER, re-initialize hardware */ + ice_deinit_hw(hw); + if (ice_init_hw(hw)) { + ice_debug(hw, ICE_DBG_INIT, "Failed to re-init hardware after setting Tx topology\n"); + return -ENODEV; + } - return 0; + return status; } diff --git a/drivers/net/ethernet/intel/ice/ice_debugfs.c b/drivers/net/ethernet/intel/ice/ice_debugfs.c index cb71eca6a85b..f450250fc827 100644 --- a/drivers/net/ethernet/intel/ice/ice_debugfs.c +++ b/drivers/net/ethernet/intel/ice/ice_debugfs.c @@ -1,647 +1,20 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2022, Intel Corporation. */ -#include <linux/fs.h> #include <linux/debugfs.h> -#include <linux/random.h> -#include <linux/vmalloc.h> #include "ice.h" static struct dentry *ice_debugfs_root; -/* create a define that has an extra module that doesn't really exist. this - * is so we can add a module 'all' to easily enable/disable all the modules - */ -#define ICE_NR_FW_LOG_MODULES (ICE_AQC_FW_LOG_ID_MAX + 1) - -/* the ordering in this array is important. it matches the ordering of the - * values in the FW so the index is the same value as in ice_aqc_fw_logging_mod - */ -static const char * const ice_fwlog_module_string[] = { - "general", - "ctrl", - "link", - "link_topo", - "dnl", - "i2c", - "sdp", - "mdio", - "adminq", - "hdma", - "lldp", - "dcbx", - "dcb", - "xlr", - "nvm", - "auth", - "vpd", - "iosf", - "parser", - "sw", - "scheduler", - "txq", - "rsvd", - "post", - "watchdog", - "task_dispatch", - "mng", - "synce", - "health", - "tsdrv", - "pfreg", - "mdlver", - "all", -}; - -/* the ordering in this array is important. it matches the ordering of the - * values in the FW so the index is the same value as in ice_fwlog_level - */ -static const char * const ice_fwlog_level_string[] = { - "none", - "error", - "warning", - "normal", - "verbose", -}; - -static const char * const ice_fwlog_log_size[] = { - "128K", - "256K", - "512K", - "1M", - "2M", -}; - -/** - * ice_fwlog_print_module_cfg - print current FW logging module configuration - * @hw: pointer to the HW structure - * @module: module to print - * @s: the seq file to put data into - */ -static void -ice_fwlog_print_module_cfg(struct ice_hw *hw, int module, struct seq_file *s) -{ - struct ice_fwlog_cfg *cfg = &hw->fwlog_cfg; - struct ice_fwlog_module_entry *entry; - - if (module != ICE_AQC_FW_LOG_ID_MAX) { - entry = &cfg->module_entries[module]; - - seq_printf(s, "\tModule: %s, Log Level: %s\n", - ice_fwlog_module_string[entry->module_id], - ice_fwlog_level_string[entry->log_level]); - } else { - int i; - - for (i = 0; i < ICE_AQC_FW_LOG_ID_MAX; i++) { - entry = &cfg->module_entries[i]; - - seq_printf(s, "\tModule: %s, Log Level: %s\n", - ice_fwlog_module_string[entry->module_id], - ice_fwlog_level_string[entry->log_level]); - } - } -} - -static int ice_find_module_by_dentry(struct ice_pf *pf, struct dentry *d) -{ - int i, module; - - module = -1; - /* find the module based on the dentry */ - for (i = 0; i < ICE_NR_FW_LOG_MODULES; i++) { - if (d == pf->ice_debugfs_pf_fwlog_modules[i]) { - module = i; - break; - } - } - - return module; -} - -/** - * ice_debugfs_module_show - read from 'module' file - * @s: the opened file - * @v: pointer to the offset - */ -static int ice_debugfs_module_show(struct seq_file *s, void *v) -{ - const struct file *filp = s->file; - struct dentry *dentry; - struct ice_pf *pf; - int module; - - dentry = file_dentry(filp); - pf = s->private; - - module = ice_find_module_by_dentry(pf, dentry); - if (module < 0) { - dev_info(ice_pf_to_dev(pf), "unknown module\n"); - return -EINVAL; - } - - ice_fwlog_print_module_cfg(&pf->hw, module, s); - - return 0; -} - -static int ice_debugfs_module_open(struct inode *inode, struct file *filp) -{ - return single_open(filp, ice_debugfs_module_show, inode->i_private); -} - -/** - * ice_debugfs_module_write - write into 'module' file - * @filp: the opened file - * @buf: where to find the user's data - * @count: the length of the user's data - * @ppos: file position offset - */ -static ssize_t -ice_debugfs_module_write(struct file *filp, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct ice_pf *pf = file_inode(filp)->i_private; - struct dentry *dentry = file_dentry(filp); - struct device *dev = ice_pf_to_dev(pf); - char user_val[16], *cmd_buf; - int module, log_level, cnt; - - /* don't allow partial writes or invalid input */ - if (*ppos != 0 || count > 8) - return -EINVAL; - - cmd_buf = memdup_user_nul(buf, count); - if (IS_ERR(cmd_buf)) - return PTR_ERR(cmd_buf); - - module = ice_find_module_by_dentry(pf, dentry); - if (module < 0) { - dev_info(dev, "unknown module\n"); - return -EINVAL; - } - - cnt = sscanf(cmd_buf, "%s", user_val); - if (cnt != 1) - return -EINVAL; - - log_level = sysfs_match_string(ice_fwlog_level_string, user_val); - if (log_level < 0) { - dev_info(dev, "unknown log level '%s'\n", user_val); - return -EINVAL; - } - - if (module != ICE_AQC_FW_LOG_ID_MAX) { - ice_pf_fwlog_update_module(pf, log_level, module); - } else { - /* the module 'all' is a shortcut so that we can set - * all of the modules to the same level quickly - */ - int i; - - for (i = 0; i < ICE_AQC_FW_LOG_ID_MAX; i++) - ice_pf_fwlog_update_module(pf, log_level, i); - } - - return count; -} - -static const struct file_operations ice_debugfs_module_fops = { - .owner = THIS_MODULE, - .open = ice_debugfs_module_open, - .read = seq_read, - .release = single_release, - .write = ice_debugfs_module_write, -}; - -/** - * ice_debugfs_nr_messages_read - read from 'nr_messages' file - * @filp: the opened file - * @buffer: where to write the data for the user to read - * @count: the size of the user's buffer - * @ppos: file position offset - */ -static ssize_t ice_debugfs_nr_messages_read(struct file *filp, - char __user *buffer, size_t count, - loff_t *ppos) -{ - struct ice_pf *pf = filp->private_data; - struct ice_hw *hw = &pf->hw; - char buff[32] = {}; - - snprintf(buff, sizeof(buff), "%d\n", - hw->fwlog_cfg.log_resolution); - - return simple_read_from_buffer(buffer, count, ppos, buff, strlen(buff)); -} - -/** - * ice_debugfs_nr_messages_write - write into 'nr_messages' file - * @filp: the opened file - * @buf: where to find the user's data - * @count: the length of the user's data - * @ppos: file position offset - */ -static ssize_t -ice_debugfs_nr_messages_write(struct file *filp, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct ice_pf *pf = filp->private_data; - struct device *dev = ice_pf_to_dev(pf); - struct ice_hw *hw = &pf->hw; - char user_val[8], *cmd_buf; - s16 nr_messages; - ssize_t ret; - - /* don't allow partial writes or invalid input */ - if (*ppos != 0 || count > 4) - return -EINVAL; - - cmd_buf = memdup_user_nul(buf, count); - if (IS_ERR(cmd_buf)) - return PTR_ERR(cmd_buf); - - ret = sscanf(cmd_buf, "%s", user_val); - if (ret != 1) - return -EINVAL; - - ret = kstrtos16(user_val, 0, &nr_messages); - if (ret) - return ret; - - if (nr_messages < ICE_AQC_FW_LOG_MIN_RESOLUTION || - nr_messages > ICE_AQC_FW_LOG_MAX_RESOLUTION) { - dev_err(dev, "Invalid FW log number of messages %d, value must be between %d - %d\n", - nr_messages, ICE_AQC_FW_LOG_MIN_RESOLUTION, - ICE_AQC_FW_LOG_MAX_RESOLUTION); - return -EINVAL; - } - - hw->fwlog_cfg.log_resolution = nr_messages; - - return count; -} - -static const struct file_operations ice_debugfs_nr_messages_fops = { - .owner = THIS_MODULE, - .open = simple_open, - .read = ice_debugfs_nr_messages_read, - .write = ice_debugfs_nr_messages_write, -}; - -/** - * ice_debugfs_enable_read - read from 'enable' file - * @filp: the opened file - * @buffer: where to write the data for the user to read - * @count: the size of the user's buffer - * @ppos: file position offset - */ -static ssize_t ice_debugfs_enable_read(struct file *filp, - char __user *buffer, size_t count, - loff_t *ppos) -{ - struct ice_pf *pf = filp->private_data; - struct ice_hw *hw = &pf->hw; - char buff[32] = {}; - - snprintf(buff, sizeof(buff), "%u\n", - (u16)(hw->fwlog_cfg.options & - ICE_FWLOG_OPTION_IS_REGISTERED) >> 3); - - return simple_read_from_buffer(buffer, count, ppos, buff, strlen(buff)); -} - -/** - * ice_debugfs_enable_write - write into 'enable' file - * @filp: the opened file - * @buf: where to find the user's data - * @count: the length of the user's data - * @ppos: file position offset - */ -static ssize_t -ice_debugfs_enable_write(struct file *filp, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct ice_pf *pf = filp->private_data; - struct ice_hw *hw = &pf->hw; - char user_val[8], *cmd_buf; - bool enable; - ssize_t ret; - - /* don't allow partial writes or invalid input */ - if (*ppos != 0 || count > 2) - return -EINVAL; - - cmd_buf = memdup_user_nul(buf, count); - if (IS_ERR(cmd_buf)) - return PTR_ERR(cmd_buf); - - ret = sscanf(cmd_buf, "%s", user_val); - if (ret != 1) - return -EINVAL; - - ret = kstrtobool(user_val, &enable); - if (ret) - goto enable_write_error; - - if (enable) - hw->fwlog_cfg.options |= ICE_FWLOG_OPTION_ARQ_ENA; - else - hw->fwlog_cfg.options &= ~ICE_FWLOG_OPTION_ARQ_ENA; - - ret = ice_fwlog_set(hw, &hw->fwlog_cfg); - if (ret) - goto enable_write_error; - - if (enable) - ret = ice_fwlog_register(hw); - else - ret = ice_fwlog_unregister(hw); - - if (ret) - goto enable_write_error; - - /* if we get here, nothing went wrong; return count since we didn't - * really write anything - */ - ret = (ssize_t)count; - -enable_write_error: - /* This function always consumes all of the written input, or produces - * an error. Check and enforce this. Otherwise, the write operation - * won't complete properly. - */ - if (WARN_ON(ret != (ssize_t)count && ret >= 0)) - ret = -EIO; - - return ret; -} - -static const struct file_operations ice_debugfs_enable_fops = { - .owner = THIS_MODULE, - .open = simple_open, - .read = ice_debugfs_enable_read, - .write = ice_debugfs_enable_write, -}; - -/** - * ice_debugfs_log_size_read - read from 'log_size' file - * @filp: the opened file - * @buffer: where to write the data for the user to read - * @count: the size of the user's buffer - * @ppos: file position offset - */ -static ssize_t ice_debugfs_log_size_read(struct file *filp, - char __user *buffer, size_t count, - loff_t *ppos) -{ - struct ice_pf *pf = filp->private_data; - struct ice_hw *hw = &pf->hw; - char buff[32] = {}; - int index; - - index = hw->fwlog_ring.index; - snprintf(buff, sizeof(buff), "%s\n", ice_fwlog_log_size[index]); - - return simple_read_from_buffer(buffer, count, ppos, buff, strlen(buff)); -} - -/** - * ice_debugfs_log_size_write - write into 'log_size' file - * @filp: the opened file - * @buf: where to find the user's data - * @count: the length of the user's data - * @ppos: file position offset - */ -static ssize_t -ice_debugfs_log_size_write(struct file *filp, const char __user *buf, - size_t count, loff_t *ppos) -{ - struct ice_pf *pf = filp->private_data; - struct device *dev = ice_pf_to_dev(pf); - struct ice_hw *hw = &pf->hw; - char user_val[8], *cmd_buf; - ssize_t ret; - int index; - - /* don't allow partial writes or invalid input */ - if (*ppos != 0 || count > 5) - return -EINVAL; - - cmd_buf = memdup_user_nul(buf, count); - if (IS_ERR(cmd_buf)) - return PTR_ERR(cmd_buf); - - ret = sscanf(cmd_buf, "%s", user_val); - if (ret != 1) - return -EINVAL; - - index = sysfs_match_string(ice_fwlog_log_size, user_val); - if (index < 0) { - dev_info(dev, "Invalid log size '%s'. The value must be one of 128K, 256K, 512K, 1M, 2M\n", - user_val); - ret = -EINVAL; - goto log_size_write_error; - } else if (hw->fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED) { - dev_info(dev, "FW logging is currently running. Please disable FW logging to change log_size\n"); - ret = -EINVAL; - goto log_size_write_error; - } - - /* free all the buffers and the tracking info and resize */ - ice_fwlog_realloc_rings(hw, index); - - /* if we get here, nothing went wrong; return count since we didn't - * really write anything - */ - ret = (ssize_t)count; - -log_size_write_error: - /* This function always consumes all of the written input, or produces - * an error. Check and enforce this. Otherwise, the write operation - * won't complete properly. - */ - if (WARN_ON(ret != (ssize_t)count && ret >= 0)) - ret = -EIO; - - return ret; -} - -static const struct file_operations ice_debugfs_log_size_fops = { - .owner = THIS_MODULE, - .open = simple_open, - .read = ice_debugfs_log_size_read, - .write = ice_debugfs_log_size_write, -}; - -/** - * ice_debugfs_data_read - read from 'data' file - * @filp: the opened file - * @buffer: where to write the data for the user to read - * @count: the size of the user's buffer - * @ppos: file position offset - */ -static ssize_t ice_debugfs_data_read(struct file *filp, char __user *buffer, - size_t count, loff_t *ppos) -{ - struct ice_pf *pf = filp->private_data; - struct ice_hw *hw = &pf->hw; - int data_copied = 0; - bool done = false; - - if (ice_fwlog_ring_empty(&hw->fwlog_ring)) - return 0; - - while (!ice_fwlog_ring_empty(&hw->fwlog_ring) && !done) { - struct ice_fwlog_data *log; - u16 cur_buf_len; - - log = &hw->fwlog_ring.rings[hw->fwlog_ring.head]; - cur_buf_len = log->data_size; - if (cur_buf_len >= count) { - done = true; - continue; - } - - if (copy_to_user(buffer, log->data, cur_buf_len)) { - /* if there is an error then bail and return whatever - * the driver has copied so far - */ - done = true; - continue; - } - - data_copied += cur_buf_len; - buffer += cur_buf_len; - count -= cur_buf_len; - *ppos += cur_buf_len; - ice_fwlog_ring_increment(&hw->fwlog_ring.head, - hw->fwlog_ring.size); - } - - return data_copied; -} - -/** - * ice_debugfs_data_write - write into 'data' file - * @filp: the opened file - * @buf: where to find the user's data - * @count: the length of the user's data - * @ppos: file position offset - */ -static ssize_t -ice_debugfs_data_write(struct file *filp, const char __user *buf, size_t count, - loff_t *ppos) -{ - struct ice_pf *pf = filp->private_data; - struct device *dev = ice_pf_to_dev(pf); - struct ice_hw *hw = &pf->hw; - ssize_t ret; - - /* don't allow partial writes */ - if (*ppos != 0) - return 0; - - /* any value is allowed to clear the buffer so no need to even look at - * what the value is - */ - if (!(hw->fwlog_cfg.options & ICE_FWLOG_OPTION_IS_REGISTERED)) { - hw->fwlog_ring.head = 0; - hw->fwlog_ring.tail = 0; - } else { - dev_info(dev, "Can't clear FW log data while FW log running\n"); - ret = -EINVAL; - goto nr_buffs_write_error; - } - - /* if we get here, nothing went wrong; return count since we didn't - * really write anything - */ - ret = (ssize_t)count; - -nr_buffs_write_error: - /* This function always consumes all of the written input, or produces - * an error. Check and enforce this. Otherwise, the write operation - * won't complete properly. - */ - if (WARN_ON(ret != (ssize_t)count && ret >= 0)) - ret = -EIO; - - return ret; -} - -static const struct file_operations ice_debugfs_data_fops = { - .owner = THIS_MODULE, - .open = simple_open, - .read = ice_debugfs_data_read, - .write = ice_debugfs_data_write, -}; - -/** - * ice_debugfs_fwlog_init - setup the debugfs directory - * @pf: the ice that is starting up - */ -void ice_debugfs_fwlog_init(struct ice_pf *pf) +int ice_debugfs_pf_init(struct ice_pf *pf) { const char *name = pci_name(pf->pdev); - struct dentry *fw_modules_dir; - struct dentry **fw_modules; - int i; - - /* only support fw log commands on PF 0 */ - if (pf->hw.bus.func) - return; - - /* allocate space for this first because if it fails then we don't - * need to unwind - */ - fw_modules = kcalloc(ICE_NR_FW_LOG_MODULES, sizeof(*fw_modules), - GFP_KERNEL); - if (!fw_modules) - return; pf->ice_debugfs_pf = debugfs_create_dir(name, ice_debugfs_root); if (IS_ERR(pf->ice_debugfs_pf)) - goto err_create_module_files; - - pf->ice_debugfs_pf_fwlog = debugfs_create_dir("fwlog", - pf->ice_debugfs_pf); - if (IS_ERR(pf->ice_debugfs_pf_fwlog)) - goto err_create_module_files; + return PTR_ERR(pf->ice_debugfs_pf); - fw_modules_dir = debugfs_create_dir("modules", - pf->ice_debugfs_pf_fwlog); - if (IS_ERR(fw_modules_dir)) - goto err_create_module_files; - - for (i = 0; i < ICE_NR_FW_LOG_MODULES; i++) { - fw_modules[i] = debugfs_create_file(ice_fwlog_module_string[i], - 0600, fw_modules_dir, pf, - &ice_debugfs_module_fops); - if (IS_ERR(fw_modules[i])) - goto err_create_module_files; - } - - debugfs_create_file("nr_messages", 0600, - pf->ice_debugfs_pf_fwlog, pf, - &ice_debugfs_nr_messages_fops); - - pf->ice_debugfs_pf_fwlog_modules = fw_modules; - - debugfs_create_file("enable", 0600, pf->ice_debugfs_pf_fwlog, - pf, &ice_debugfs_enable_fops); - - debugfs_create_file("log_size", 0600, pf->ice_debugfs_pf_fwlog, - pf, &ice_debugfs_log_size_fops); - - debugfs_create_file("data", 0600, pf->ice_debugfs_pf_fwlog, - pf, &ice_debugfs_data_fops); - - return; - -err_create_module_files: - debugfs_remove_recursive(pf->ice_debugfs_pf_fwlog); - kfree(fw_modules); + return 0; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 55e0f2c6af9e..dc131779d426 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -3147,9 +3147,11 @@ ice_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; + struct ice_hw *hw; - ring->rx_max_pending = ICE_MAX_NUM_DESC; - ring->tx_max_pending = ICE_MAX_NUM_DESC; + hw = &vsi->back->hw; + ring->rx_max_pending = ICE_MAX_NUM_DESC_BY_MAC(hw); + ring->tx_max_pending = ICE_MAX_NUM_DESC_BY_MAC(hw); if (vsi->tx_rings && vsi->rx_rings) { ring->rx_pending = vsi->rx_rings[0]->count; ring->tx_pending = vsi->tx_rings[0]->count; @@ -3177,15 +3179,16 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; int i, timeout = 50, err = 0; + struct ice_hw *hw = &pf->hw; u16 new_rx_cnt, new_tx_cnt; - if (ring->tx_pending > ICE_MAX_NUM_DESC || + if (ring->tx_pending > ICE_MAX_NUM_DESC_BY_MAC(hw) || ring->tx_pending < ICE_MIN_NUM_DESC || - ring->rx_pending > ICE_MAX_NUM_DESC || + ring->rx_pending > ICE_MAX_NUM_DESC_BY_MAC(hw) || ring->rx_pending < ICE_MIN_NUM_DESC) { netdev_err(netdev, "Descriptors requested (Tx: %d / Rx: %d) out of range [%d-%d] (increment %d)\n", ring->tx_pending, ring->rx_pending, - ICE_MIN_NUM_DESC, ICE_MAX_NUM_DESC, + ICE_MIN_NUM_DESC, ICE_MAX_NUM_DESC_BY_MAC(hw), ICE_REQ_DESC_MULTIPLE); return -EINVAL; } @@ -3258,6 +3261,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, tx_rings[i].count = new_tx_cnt; tx_rings[i].desc = NULL; tx_rings[i].tx_buf = NULL; + tx_rings[i].tstamp_ring = NULL; tx_rings[i].tx_tstamps = &pf->ptp.port.tx; err = ice_setup_tx_ring(&tx_rings[i]); if (err) { @@ -4620,10 +4624,12 @@ static int ice_get_port_fec_stats(struct ice_hw *hw, u16 pcs_quad, u16 pcs_port, * ice_get_fec_stats - returns FEC correctable, uncorrectable stats per netdev * @netdev: network interface device structure * @fec_stats: buffer to hold FEC statistics for given port + * @hist: buffer to put FEC histogram statistics for given port * */ static void ice_get_fec_stats(struct net_device *netdev, - struct ethtool_fec_stats *fec_stats) + struct ethtool_fec_stats *fec_stats, + struct ethtool_fec_hist *hist) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_port_topology port_topology; diff --git a/drivers/net/ethernet/intel/ice/ice_fwlog.c b/drivers/net/ethernet/intel/ice/ice_fwlog.c deleted file mode 100644 index a31bb026ad34..000000000000 --- a/drivers/net/ethernet/intel/ice/ice_fwlog.c +++ /dev/null @@ -1,474 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2022, Intel Corporation. */ - -#include <linux/vmalloc.h> -#include "ice.h" -#include "ice_common.h" -#include "ice_fwlog.h" - -bool ice_fwlog_ring_full(struct ice_fwlog_ring *rings) -{ - u16 head, tail; - - head = rings->head; - tail = rings->tail; - - if (head < tail && (tail - head == (rings->size - 1))) - return true; - else if (head > tail && (tail == (head - 1))) - return true; - - return false; -} - -bool ice_fwlog_ring_empty(struct ice_fwlog_ring *rings) -{ - return rings->head == rings->tail; -} - -void ice_fwlog_ring_increment(u16 *item, u16 size) -{ - *item = (*item + 1) & (size - 1); -} - -static int ice_fwlog_alloc_ring_buffs(struct ice_fwlog_ring *rings) -{ - int i, nr_bytes; - u8 *mem; - - nr_bytes = rings->size * ICE_AQ_MAX_BUF_LEN; - mem = vzalloc(nr_bytes); - if (!mem) - return -ENOMEM; - - for (i = 0; i < rings->size; i++) { - struct ice_fwlog_data *ring = &rings->rings[i]; - - ring->data_size = ICE_AQ_MAX_BUF_LEN; - ring->data = mem; - mem += ICE_AQ_MAX_BUF_LEN; - } - - return 0; -} - -static void ice_fwlog_free_ring_buffs(struct ice_fwlog_ring *rings) -{ - int i; - - for (i = 0; i < rings->size; i++) { - struct ice_fwlog_data *ring = &rings->rings[i]; - - /* the first ring is the base memory for the whole range so - * free it - */ - if (!i) - vfree(ring->data); - - ring->data = NULL; - ring->data_size = 0; - } -} - -#define ICE_FWLOG_INDEX_TO_BYTES(n) ((128 * 1024) << (n)) -/** - * ice_fwlog_realloc_rings - reallocate the FW log rings - * @hw: pointer to the HW structure - * @index: the new index to use to allocate memory for the log data - * - */ -void ice_fwlog_realloc_rings(struct ice_hw *hw, int index) -{ - struct ice_fwlog_ring ring; - int status, ring_size; - - /* convert the number of bytes into a number of 4K buffers. externally - * the driver presents the interface to the FW log data as a number of - * bytes because that's easy for users to understand. internally the - * driver uses a ring of buffers because the driver doesn't know where - * the beginning and end of any line of log data is so the driver has - * to overwrite data as complete blocks. when the data is returned to - * the user the driver knows that the data is correct and the FW log - * can be correctly parsed by the tools - */ - ring_size = ICE_FWLOG_INDEX_TO_BYTES(index) / ICE_AQ_MAX_BUF_LEN; - if (ring_size == hw->fwlog_ring.size) - return; - - /* allocate space for the new rings and buffers then release the - * old rings and buffers. that way if we don't have enough - * memory then we at least have what we had before - */ - ring.rings = kcalloc(ring_size, sizeof(*ring.rings), GFP_KERNEL); - if (!ring.rings) - return; - - ring.size = ring_size; - - status = ice_fwlog_alloc_ring_buffs(&ring); - if (status) { - dev_warn(ice_hw_to_dev(hw), "Unable to allocate memory for FW log ring data buffers\n"); - ice_fwlog_free_ring_buffs(&ring); - kfree(ring.rings); - return; - } - - ice_fwlog_free_ring_buffs(&hw->fwlog_ring); - kfree(hw->fwlog_ring.rings); - - hw->fwlog_ring.rings = ring.rings; - hw->fwlog_ring.size = ring.size; - hw->fwlog_ring.index = index; - hw->fwlog_ring.head = 0; - hw->fwlog_ring.tail = 0; -} - -/** - * ice_fwlog_init - Initialize FW logging configuration - * @hw: pointer to the HW structure - * - * This function should be called on driver initialization during - * ice_init_hw(). - */ -int ice_fwlog_init(struct ice_hw *hw) -{ - /* only support fw log commands on PF 0 */ - if (hw->bus.func) - return -EINVAL; - - ice_fwlog_set_supported(hw); - - if (ice_fwlog_supported(hw)) { - int status; - - /* read the current config from the FW and store it */ - status = ice_fwlog_get(hw, &hw->fwlog_cfg); - if (status) - return status; - - hw->fwlog_ring.rings = kcalloc(ICE_FWLOG_RING_SIZE_DFLT, - sizeof(*hw->fwlog_ring.rings), - GFP_KERNEL); - if (!hw->fwlog_ring.rings) { - dev_warn(ice_hw_to_dev(hw), "Unable to allocate memory for FW log rings\n"); - return -ENOMEM; - } - - hw->fwlog_ring.size = ICE_FWLOG_RING_SIZE_DFLT; - hw->fwlog_ring.index = ICE_FWLOG_RING_SIZE_INDEX_DFLT; - - status = ice_fwlog_alloc_ring_buffs(&hw->fwlog_ring); - if (status) { - dev_warn(ice_hw_to_dev(hw), "Unable to allocate memory for FW log ring data buffers\n"); - ice_fwlog_free_ring_buffs(&hw->fwlog_ring); - kfree(hw->fwlog_ring.rings); - return status; - } - - ice_debugfs_fwlog_init(hw->back); - } else { - dev_warn(ice_hw_to_dev(hw), "FW logging is not supported in this NVM image. Please update the NVM to get FW log support\n"); - } - - return 0; -} - -/** - * ice_fwlog_deinit - unroll FW logging configuration - * @hw: pointer to the HW structure - * - * This function should be called in ice_deinit_hw(). - */ -void ice_fwlog_deinit(struct ice_hw *hw) -{ - struct ice_pf *pf = hw->back; - int status; - - /* only support fw log commands on PF 0 */ - if (hw->bus.func) - return; - - ice_debugfs_pf_deinit(hw->back); - - /* make sure FW logging is disabled to not put the FW in a weird state - * for the next driver load - */ - hw->fwlog_cfg.options &= ~ICE_FWLOG_OPTION_ARQ_ENA; - status = ice_fwlog_set(hw, &hw->fwlog_cfg); - if (status) - dev_warn(ice_hw_to_dev(hw), "Unable to turn off FW logging, status: %d\n", - status); - - kfree(pf->ice_debugfs_pf_fwlog_modules); - - pf->ice_debugfs_pf_fwlog_modules = NULL; - - status = ice_fwlog_unregister(hw); - if (status) - dev_warn(ice_hw_to_dev(hw), "Unable to unregister FW logging, status: %d\n", - status); - - if (hw->fwlog_ring.rings) { - ice_fwlog_free_ring_buffs(&hw->fwlog_ring); - kfree(hw->fwlog_ring.rings); - } -} - -/** - * ice_fwlog_supported - Cached for whether FW supports FW logging or not - * @hw: pointer to the HW structure - * - * This will always return false if called before ice_init_hw(), so it must be - * called after ice_init_hw(). - */ -bool ice_fwlog_supported(struct ice_hw *hw) -{ - return hw->fwlog_supported; -} - -/** - * ice_aq_fwlog_set - Set FW logging configuration AQ command (0xFF30) - * @hw: pointer to the HW structure - * @entries: entries to configure - * @num_entries: number of @entries - * @options: options from ice_fwlog_cfg->options structure - * @log_resolution: logging resolution - */ -static int -ice_aq_fwlog_set(struct ice_hw *hw, struct ice_fwlog_module_entry *entries, - u16 num_entries, u16 options, u16 log_resolution) -{ - struct ice_aqc_fw_log_cfg_resp *fw_modules; - struct ice_aqc_fw_log *cmd; - struct libie_aq_desc desc; - int status; - int i; - - fw_modules = kcalloc(num_entries, sizeof(*fw_modules), GFP_KERNEL); - if (!fw_modules) - return -ENOMEM; - - for (i = 0; i < num_entries; i++) { - fw_modules[i].module_identifier = - cpu_to_le16(entries[i].module_id); - fw_modules[i].log_level = entries[i].log_level; - } - - ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_fw_logs_config); - desc.flags |= cpu_to_le16(LIBIE_AQ_FLAG_RD); - - cmd = libie_aq_raw(&desc); - - cmd->cmd_flags = ICE_AQC_FW_LOG_CONF_SET_VALID; - cmd->ops.cfg.log_resolution = cpu_to_le16(log_resolution); - cmd->ops.cfg.mdl_cnt = cpu_to_le16(num_entries); - - if (options & ICE_FWLOG_OPTION_ARQ_ENA) - cmd->cmd_flags |= ICE_AQC_FW_LOG_CONF_AQ_EN; - if (options & ICE_FWLOG_OPTION_UART_ENA) - cmd->cmd_flags |= ICE_AQC_FW_LOG_CONF_UART_EN; - - status = ice_aq_send_cmd(hw, &desc, fw_modules, - sizeof(*fw_modules) * num_entries, - NULL); - - kfree(fw_modules); - - return status; -} - -/** - * ice_fwlog_set - Set the firmware logging settings - * @hw: pointer to the HW structure - * @cfg: config used to set firmware logging - * - * This function should be called whenever the driver needs to set the firmware - * logging configuration. It can be called on initialization, reset, or during - * runtime. - * - * If the PF wishes to receive FW logging then it must register via - * ice_fwlog_register. Note, that ice_fwlog_register does not need to be called - * for init. - */ -int ice_fwlog_set(struct ice_hw *hw, struct ice_fwlog_cfg *cfg) -{ - if (!ice_fwlog_supported(hw)) - return -EOPNOTSUPP; - - return ice_aq_fwlog_set(hw, cfg->module_entries, - ICE_AQC_FW_LOG_ID_MAX, cfg->options, - cfg->log_resolution); -} - -/** - * ice_aq_fwlog_get - Get the current firmware logging configuration (0xFF32) - * @hw: pointer to the HW structure - * @cfg: firmware logging configuration to populate - */ -static int ice_aq_fwlog_get(struct ice_hw *hw, struct ice_fwlog_cfg *cfg) -{ - struct ice_aqc_fw_log_cfg_resp *fw_modules; - struct ice_aqc_fw_log *cmd; - struct libie_aq_desc desc; - u16 module_id_cnt; - int status; - void *buf; - int i; - - memset(cfg, 0, sizeof(*cfg)); - - buf = kzalloc(ICE_AQ_MAX_BUF_LEN, GFP_KERNEL); - if (!buf) - return -ENOMEM; - - ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_fw_logs_query); - cmd = libie_aq_raw(&desc); - - cmd->cmd_flags = ICE_AQC_FW_LOG_AQ_QUERY; - - status = ice_aq_send_cmd(hw, &desc, buf, ICE_AQ_MAX_BUF_LEN, NULL); - if (status) { - ice_debug(hw, ICE_DBG_FW_LOG, "Failed to get FW log configuration\n"); - goto status_out; - } - - module_id_cnt = le16_to_cpu(cmd->ops.cfg.mdl_cnt); - if (module_id_cnt < ICE_AQC_FW_LOG_ID_MAX) { - ice_debug(hw, ICE_DBG_FW_LOG, "FW returned less than the expected number of FW log module IDs\n"); - } else if (module_id_cnt > ICE_AQC_FW_LOG_ID_MAX) { - ice_debug(hw, ICE_DBG_FW_LOG, "FW returned more than expected number of FW log module IDs, setting module_id_cnt to software expected max %u\n", - ICE_AQC_FW_LOG_ID_MAX); - module_id_cnt = ICE_AQC_FW_LOG_ID_MAX; - } - - cfg->log_resolution = le16_to_cpu(cmd->ops.cfg.log_resolution); - if (cmd->cmd_flags & ICE_AQC_FW_LOG_CONF_AQ_EN) - cfg->options |= ICE_FWLOG_OPTION_ARQ_ENA; - if (cmd->cmd_flags & ICE_AQC_FW_LOG_CONF_UART_EN) - cfg->options |= ICE_FWLOG_OPTION_UART_ENA; - if (cmd->cmd_flags & ICE_AQC_FW_LOG_QUERY_REGISTERED) - cfg->options |= ICE_FWLOG_OPTION_IS_REGISTERED; - - fw_modules = (struct ice_aqc_fw_log_cfg_resp *)buf; - - for (i = 0; i < module_id_cnt; i++) { - struct ice_aqc_fw_log_cfg_resp *fw_module = &fw_modules[i]; - - cfg->module_entries[i].module_id = - le16_to_cpu(fw_module->module_identifier); - cfg->module_entries[i].log_level = fw_module->log_level; - } - -status_out: - kfree(buf); - return status; -} - -/** - * ice_fwlog_get - Get the firmware logging settings - * @hw: pointer to the HW structure - * @cfg: config to populate based on current firmware logging settings - */ -int ice_fwlog_get(struct ice_hw *hw, struct ice_fwlog_cfg *cfg) -{ - if (!ice_fwlog_supported(hw)) - return -EOPNOTSUPP; - - return ice_aq_fwlog_get(hw, cfg); -} - -/** - * ice_aq_fwlog_register - Register PF for firmware logging events (0xFF31) - * @hw: pointer to the HW structure - * @reg: true to register and false to unregister - */ -static int ice_aq_fwlog_register(struct ice_hw *hw, bool reg) -{ - struct ice_aqc_fw_log *cmd; - struct libie_aq_desc desc; - - ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_fw_logs_register); - cmd = libie_aq_raw(&desc); - - if (reg) - cmd->cmd_flags = ICE_AQC_FW_LOG_AQ_REGISTER; - - return ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); -} - -/** - * ice_fwlog_register - Register the PF for firmware logging - * @hw: pointer to the HW structure - * - * After this call the PF will start to receive firmware logging based on the - * configuration set in ice_fwlog_set. - */ -int ice_fwlog_register(struct ice_hw *hw) -{ - int status; - - if (!ice_fwlog_supported(hw)) - return -EOPNOTSUPP; - - status = ice_aq_fwlog_register(hw, true); - if (status) - ice_debug(hw, ICE_DBG_FW_LOG, "Failed to register for firmware logging events over ARQ\n"); - else - hw->fwlog_cfg.options |= ICE_FWLOG_OPTION_IS_REGISTERED; - - return status; -} - -/** - * ice_fwlog_unregister - Unregister the PF from firmware logging - * @hw: pointer to the HW structure - */ -int ice_fwlog_unregister(struct ice_hw *hw) -{ - int status; - - if (!ice_fwlog_supported(hw)) - return -EOPNOTSUPP; - - status = ice_aq_fwlog_register(hw, false); - if (status) - ice_debug(hw, ICE_DBG_FW_LOG, "Failed to unregister from firmware logging events over ARQ\n"); - else - hw->fwlog_cfg.options &= ~ICE_FWLOG_OPTION_IS_REGISTERED; - - return status; -} - -/** - * ice_fwlog_set_supported - Set if FW logging is supported by FW - * @hw: pointer to the HW struct - * - * If FW returns success to the ice_aq_fwlog_get call then it supports FW - * logging, else it doesn't. Set the fwlog_supported flag accordingly. - * - * This function is only meant to be called during driver init to determine if - * the FW support FW logging. - */ -void ice_fwlog_set_supported(struct ice_hw *hw) -{ - struct ice_fwlog_cfg *cfg; - int status; - - hw->fwlog_supported = false; - - cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); - if (!cfg) - return; - - /* don't call ice_fwlog_get() because that would check to see if FW - * logging is supported which is what the driver is determining now - */ - status = ice_aq_fwlog_get(hw, cfg); - if (status) - ice_debug(hw, ICE_DBG_FW_LOG, "ice_aq_fwlog_get failed, FW logging is not supported on this version of FW, status %d\n", - status); - else - hw->fwlog_supported = true; - - kfree(cfg); -} diff --git a/drivers/net/ethernet/intel/ice/ice_fwlog.h b/drivers/net/ethernet/intel/ice/ice_fwlog.h deleted file mode 100644 index 287e71fa4b86..000000000000 --- a/drivers/net/ethernet/intel/ice/ice_fwlog.h +++ /dev/null @@ -1,79 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Copyright (C) 2022, Intel Corporation. */ - -#ifndef _ICE_FWLOG_H_ -#define _ICE_FWLOG_H_ -#include "ice_adminq_cmd.h" - -struct ice_hw; - -/* Only a single log level should be set and all log levels under the set value - * are enabled, e.g. if log level is set to ICE_FW_LOG_LEVEL_VERBOSE, then all - * other log levels are included (except ICE_FW_LOG_LEVEL_NONE) - */ -enum ice_fwlog_level { - ICE_FWLOG_LEVEL_NONE = 0, - ICE_FWLOG_LEVEL_ERROR = 1, - ICE_FWLOG_LEVEL_WARNING = 2, - ICE_FWLOG_LEVEL_NORMAL = 3, - ICE_FWLOG_LEVEL_VERBOSE = 4, - ICE_FWLOG_LEVEL_INVALID, /* all values >= this entry are invalid */ -}; - -struct ice_fwlog_module_entry { - /* module ID for the corresponding firmware logging event */ - u16 module_id; - /* verbosity level for the module_id */ - u8 log_level; -}; - -struct ice_fwlog_cfg { - /* list of modules for configuring log level */ - struct ice_fwlog_module_entry module_entries[ICE_AQC_FW_LOG_ID_MAX]; - /* options used to configure firmware logging */ - u16 options; -#define ICE_FWLOG_OPTION_ARQ_ENA BIT(0) -#define ICE_FWLOG_OPTION_UART_ENA BIT(1) - /* set before calling ice_fwlog_init() so the PF registers for firmware - * logging on initialization - */ -#define ICE_FWLOG_OPTION_REGISTER_ON_INIT BIT(2) - /* set in the ice_fwlog_get() response if the PF is registered for FW - * logging events over ARQ - */ -#define ICE_FWLOG_OPTION_IS_REGISTERED BIT(3) - - /* minimum number of log events sent per Admin Receive Queue event */ - u16 log_resolution; -}; - -struct ice_fwlog_data { - u16 data_size; - u8 *data; -}; - -struct ice_fwlog_ring { - struct ice_fwlog_data *rings; - u16 index; - u16 size; - u16 head; - u16 tail; -}; - -#define ICE_FWLOG_RING_SIZE_INDEX_DFLT 3 -#define ICE_FWLOG_RING_SIZE_DFLT 256 -#define ICE_FWLOG_RING_SIZE_MAX 512 - -bool ice_fwlog_ring_full(struct ice_fwlog_ring *rings); -bool ice_fwlog_ring_empty(struct ice_fwlog_ring *rings); -void ice_fwlog_ring_increment(u16 *item, u16 size); -void ice_fwlog_set_supported(struct ice_hw *hw); -bool ice_fwlog_supported(struct ice_hw *hw); -int ice_fwlog_init(struct ice_hw *hw); -void ice_fwlog_deinit(struct ice_hw *hw); -int ice_fwlog_set(struct ice_hw *hw, struct ice_fwlog_cfg *cfg); -int ice_fwlog_get(struct ice_hw *hw, struct ice_fwlog_cfg *cfg); -int ice_fwlog_register(struct ice_hw *hw); -int ice_fwlog_unregister(struct ice_hw *hw); -void ice_fwlog_realloc_rings(struct ice_hw *hw, int index); -#endif /* _ICE_FWLOG_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index dd520aa4d1d6..082ad33c53dc 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -19,6 +19,7 @@ #define QTX_COMM_HEAD_MAX_INDEX 16383 #define QTX_COMM_HEAD_HEAD_S 0 #define QTX_COMM_HEAD_HEAD_M ICE_M(0x1FFF, 0) +#define E830_GLQTX_TXTIME_DBELL_LSB(_DBQM) (0x002E0000 + ((_DBQM) * 8)) #define PF_FW_ARQBAH 0x00080180 #define PF_FW_ARQBAL 0x00080080 #define PF_FW_ARQH 0x00080380 @@ -571,6 +572,8 @@ #define E830_PFPTM_SEM_BUSY_M BIT(0) #define VFINT_DYN_CTLN(_i) (0x00003800 + ((_i) * 4)) #define VFINT_DYN_CTLN_CLEARPBA_M BIT(1) +#define E830_GLTXTIME_FETCH_PROFILE(_i, _j) (0x002D3500 + ((_i) * 4 + (_j) * 64)) +#define E830_GLTXTIME_FETCH_PROFILE_FETCH_TS_DESC_M ICE_M(0x1FF, 0) #define E830_MBX_PF_IN_FLIGHT_VF_MSGS_THRESH 0x00234000 #define E830_MBX_VF_DEC_TRIG(_VF) (0x00233800 + (_VF) * 4) #define E830_MBX_VF_IN_FLIGHT_MSGS_AT_PF_CNT(_VF) (0x00233000 + (_VF) * 4) diff --git a/drivers/net/ethernet/intel/ice/ice_idc.c b/drivers/net/ethernet/intel/ice/ice_idc.c index 6ab53e430f91..420d45c2558b 100644 --- a/drivers/net/ethernet/intel/ice/ice_idc.c +++ b/drivers/net/ethernet/intel/ice/ice_idc.c @@ -336,6 +336,7 @@ int ice_plug_aux_dev(struct ice_pf *pf) mutex_lock(&pf->adev_mutex); cdev->adev = adev; mutex_unlock(&pf->adev_mutex); + set_bit(ICE_FLAG_AUX_DEV_CREATED, pf->flags); return 0; } @@ -347,15 +348,16 @@ void ice_unplug_aux_dev(struct ice_pf *pf) { struct auxiliary_device *adev; + if (!test_and_clear_bit(ICE_FLAG_AUX_DEV_CREATED, pf->flags)) + return; + mutex_lock(&pf->adev_mutex); adev = pf->cdev_info->adev; pf->cdev_info->adev = NULL; mutex_unlock(&pf->adev_mutex); - if (adev) { - auxiliary_device_delete(adev); - auxiliary_device_uninit(adev); - } + auxiliary_device_delete(adev); + auxiliary_device_uninit(adev); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_lag.c b/drivers/net/ethernet/intel/ice/ice_lag.c index b1129da72139..aebf8e08a297 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.c +++ b/drivers/net/ethernet/intel/ice/ice_lag.c @@ -10,12 +10,17 @@ #define ICE_LAG_RES_SHARED BIT(14) #define ICE_LAG_RES_VALID BIT(15) -#define LACP_TRAIN_PKT_LEN 16 -static const u8 lacp_train_pkt[LACP_TRAIN_PKT_LEN] = { 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, - 0x88, 0x09, 0, 0 }; +#define ICE_TRAIN_PKT_LEN 16 +static const u8 lacp_train_pkt[ICE_TRAIN_PKT_LEN] = { 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0x88, 0x09, 0, 0 }; +static const u8 act_act_train_pkt[ICE_TRAIN_PKT_LEN] = { 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0 }; #define ICE_RECIPE_LEN 64 +#define ICE_LAG_SRIOV_CP_RECIPE 10 + static const u8 ice_dflt_vsi_rcp[ICE_RECIPE_LEN] = { 0x05, 0, 0, 0, 0x20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x85, 0, 0x01, 0, 0, 0, 0xff, 0xff, 0x08, 0, 0, 0, 0, 0, 0, 0, @@ -46,10 +51,10 @@ static void ice_lag_set_primary(struct ice_lag *lag) } /** - * ice_lag_set_backup - set PF LAG state to Backup + * ice_lag_set_bkup - set PF LAG state to Backup * @lag: LAG info struct */ -static void ice_lag_set_backup(struct ice_lag *lag) +static void ice_lag_set_bkup(struct ice_lag *lag) { struct ice_pf *pf = lag->pf; @@ -99,6 +104,28 @@ static bool netif_is_same_ice(struct ice_pf *pf, struct net_device *netdev) } /** + * ice_lag_config_eswitch - configure eswitch to work with LAG + * @lag: lag info struct + * @netdev: active network interface device struct + * + * Updates all port representors in eswitch to use @netdev for Tx. + * + * Configures the netdev to keep dst metadata (also used in representor Tx). + * This is required for an uplink without switchdev mode configured. + */ +static void ice_lag_config_eswitch(struct ice_lag *lag, + struct net_device *netdev) +{ + struct ice_repr *repr; + unsigned long id; + + xa_for_each(&lag->pf->eswitch.reprs, id, repr) + repr->dst->u.port_info.lower_dev = netdev; + + netif_keep_dst(netdev); +} + +/** * ice_netdev_to_lag - return pointer to associated lag struct from netdev * @netdev: pointer to net_device struct to query */ @@ -210,13 +237,12 @@ ice_lag_cfg_fltr(struct ice_lag *lag, u32 act, u16 recipe_id, u16 *rule_idx, u8 direction, bool add) { struct ice_sw_rule_lkup_rx_tx *s_rule; + struct ice_hw *hw = &lag->pf->hw; u16 s_rule_sz, vsi_num; - struct ice_hw *hw; u8 *eth_hdr; u32 opc; int err; - hw = &lag->pf->hw; vsi_num = ice_get_hw_vsi_num(hw, 0); s_rule_sz = ICE_SW_RULE_RX_TX_ETH_HDR_SIZE(s_rule); @@ -314,26 +340,15 @@ ice_lag_cfg_drop_fltr(struct ice_lag *lag, bool add) } /** - * ice_lag_cfg_pf_fltrs - set filters up for new active port + * ice_lag_cfg_pf_fltrs_act_bkup - set filters up for new active port * @lag: local interfaces lag struct - * @ptr: opaque data containing notifier event + * @bonding_info: netdev event bonding info */ static void -ice_lag_cfg_pf_fltrs(struct ice_lag *lag, void *ptr) +ice_lag_cfg_pf_fltrs_act_bkup(struct ice_lag *lag, + struct netdev_bonding_info *bonding_info) { - struct netdev_notifier_bonding_info *info; - struct netdev_bonding_info *bonding_info; - struct net_device *event_netdev; - struct device *dev; - - event_netdev = netdev_notifier_info_to_dev(ptr); - /* not for this netdev */ - if (event_netdev != lag->netdev) - return; - - info = (struct netdev_notifier_bonding_info *)ptr; - bonding_info = &info->bonding_info; - dev = ice_pf_to_dev(lag->pf); + struct device *dev = ice_pf_to_dev(lag->pf); /* interface not active - remove old default VSI rule */ if (bonding_info->slave.state && lag->pf_rx_rule_id) { @@ -354,6 +369,105 @@ ice_lag_cfg_pf_fltrs(struct ice_lag *lag, void *ptr) } /** + * ice_lag_cfg_lp_fltr - configure lport filters + * @lag: local interface's lag struct + * @add: add or remove rule + * @cp: control packet only or general PF lport rule + */ +static void +ice_lag_cfg_lp_fltr(struct ice_lag *lag, bool add, bool cp) +{ + struct ice_sw_rule_lkup_rx_tx *s_rule; + struct ice_vsi *vsi = lag->pf->vsi[0]; + u16 buf_len, opc; + + buf_len = ICE_SW_RULE_RX_TX_HDR_SIZE(s_rule, ICE_TRAIN_PKT_LEN); + s_rule = kzalloc(buf_len, GFP_KERNEL); + if (!s_rule) { + netdev_warn(lag->netdev, "-ENOMEM error configuring CP filter\n"); + return; + } + + if (add) { + if (cp) { + s_rule->recipe_id = + cpu_to_le16(ICE_LAG_SRIOV_CP_RECIPE); + memcpy(s_rule->hdr_data, lacp_train_pkt, + ICE_TRAIN_PKT_LEN); + } else { + s_rule->recipe_id = cpu_to_le16(lag->act_act_recipe); + memcpy(s_rule->hdr_data, act_act_train_pkt, + ICE_TRAIN_PKT_LEN); + } + + s_rule->src = cpu_to_le16(vsi->port_info->lport); + s_rule->act = cpu_to_le32(ICE_FWD_TO_VSI | + ICE_SINGLE_ACT_LAN_ENABLE | + ICE_SINGLE_ACT_VALID_BIT | + FIELD_PREP(ICE_SINGLE_ACT_VSI_ID_M, + vsi->vsi_num)); + s_rule->hdr_len = cpu_to_le16(ICE_TRAIN_PKT_LEN); + s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX); + opc = ice_aqc_opc_add_sw_rules; + } else { + opc = ice_aqc_opc_remove_sw_rules; + if (cp) + s_rule->index = cpu_to_le16(lag->cp_rule_idx); + else + s_rule->index = cpu_to_le16(lag->act_act_rule_idx); + } + if (ice_aq_sw_rules(&lag->pf->hw, s_rule, buf_len, 1, opc, NULL)) { + netdev_warn(lag->netdev, "Error %s %s rule for aggregate\n", + add ? "ADDING" : "REMOVING", + cp ? "CONTROL PACKET" : "LPORT"); + goto err_cp_free; + } + + if (add) { + if (cp) + lag->cp_rule_idx = le16_to_cpu(s_rule->index); + else + lag->act_act_rule_idx = le16_to_cpu(s_rule->index); + } else { + if (cp) + lag->cp_rule_idx = 0; + else + lag->act_act_rule_idx = 0; + } + +err_cp_free: + kfree(s_rule); +} + +/** + * ice_lag_cfg_pf_fltrs - set filters up for PF traffic + * @lag: local interfaces lag struct + * @ptr: opaque data containing notifier event + */ +static void +ice_lag_cfg_pf_fltrs(struct ice_lag *lag, void *ptr) +{ + struct netdev_notifier_bonding_info *info = ptr; + struct netdev_bonding_info *bonding_info; + struct net_device *event_netdev; + + event_netdev = netdev_notifier_info_to_dev(ptr); + if (event_netdev != lag->netdev) + return; + + bonding_info = &info->bonding_info; + + if (lag->bond_aa) { + if (lag->need_fltr_cfg) { + ice_lag_cfg_lp_fltr(lag, true, false); + lag->need_fltr_cfg = false; + } + } else { + ice_lag_cfg_pf_fltrs_act_bkup(lag, bonding_info); + } +} + +/** * ice_display_lag_info - print LAG info * @lag: LAG info struct */ @@ -402,12 +516,11 @@ static u16 ice_lag_qbuf_recfg(struct ice_hw *hw, struct ice_aqc_cfg_txqs_buf *qbuf, u16 vsi_num, u16 numq, u8 tc) { + struct ice_pf *pf = hw->back; struct ice_q_ctx *q_ctx; u16 qid, count = 0; - struct ice_pf *pf; int i; - pf = hw->back; for (i = 0; i < numq; i++) { q_ctx = ice_get_lan_q_ctx(hw, vsi_num, tc, i); if (!q_ctx) { @@ -577,7 +690,7 @@ ice_lag_move_vf_node_tc(struct ice_lag *lag, u8 oldport, u8 newport, } if (ice_aq_cfg_lan_txq(&lag->pf->hw, qbuf, qbuf_size, valq, oldport, - newport, NULL)) { + newport, ICE_AQC_Q_CFG_TC_CHNG, NULL)) { dev_warn(dev, "Failure to configure queues for LAG failover\n"); goto qbuf_err; } @@ -677,54 +790,6 @@ ice_lag_move_single_vf_nodes(struct ice_lag *lag, u8 oldport, u8 newport, } /** - * ice_lag_move_new_vf_nodes - Move Tx scheduling nodes for a VF if required - * @vf: the VF to move Tx nodes for - * - * Called just after configuring new VF queues. Check whether the VF Tx - * scheduling nodes need to be updated to fail over to the active port. If so, - * move them now. - */ -void ice_lag_move_new_vf_nodes(struct ice_vf *vf) -{ - struct ice_lag_netdev_list ndlist; - u8 pri_port, act_port; - struct ice_lag *lag; - struct ice_vsi *vsi; - struct ice_pf *pf; - - vsi = ice_get_vf_vsi(vf); - - if (WARN_ON(!vsi)) - return; - - if (WARN_ON(vsi->type != ICE_VSI_VF)) - return; - - pf = vf->pf; - lag = pf->lag; - - mutex_lock(&pf->lag_mutex); - if (!lag->bonded) - goto new_vf_unlock; - - pri_port = pf->hw.port_info->lport; - act_port = lag->active_port; - - if (lag->upper_netdev) - ice_lag_build_netdev_list(lag, &ndlist); - - if (ice_is_feature_supported(pf, ICE_F_SRIOV_LAG) && - lag->bonded && lag->primary && pri_port != act_port && - !list_empty(lag->netdev_head)) - ice_lag_move_single_vf_nodes(lag, pri_port, act_port, vsi->idx); - - ice_lag_destroy_netdev_list(lag, &ndlist); - -new_vf_unlock: - mutex_unlock(&pf->lag_mutex); -} - -/** * ice_lag_move_vf_nodes - move Tx scheduling nodes for all VFs to new port * @lag: lag info struct * @oldport: lport of previous interface @@ -767,61 +832,6 @@ void ice_lag_move_vf_nodes_cfg(struct ice_lag *lag, u8 src_prt, u8 dst_prt) ice_lag_destroy_netdev_list(lag, &ndlist); } -#define ICE_LAG_SRIOV_CP_RECIPE 10 -#define ICE_LAG_SRIOV_TRAIN_PKT_LEN 16 - -/** - * ice_lag_cfg_cp_fltr - configure filter for control packets - * @lag: local interface's lag struct - * @add: add or remove rule - */ -static void -ice_lag_cfg_cp_fltr(struct ice_lag *lag, bool add) -{ - struct ice_sw_rule_lkup_rx_tx *s_rule = NULL; - struct ice_vsi *vsi; - u16 buf_len, opc; - - vsi = lag->pf->vsi[0]; - - buf_len = ICE_SW_RULE_RX_TX_HDR_SIZE(s_rule, - ICE_LAG_SRIOV_TRAIN_PKT_LEN); - s_rule = kzalloc(buf_len, GFP_KERNEL); - if (!s_rule) { - netdev_warn(lag->netdev, "-ENOMEM error configuring CP filter\n"); - return; - } - - if (add) { - s_rule->hdr.type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX); - s_rule->recipe_id = cpu_to_le16(ICE_LAG_SRIOV_CP_RECIPE); - s_rule->src = cpu_to_le16(vsi->port_info->lport); - s_rule->act = cpu_to_le32(ICE_FWD_TO_VSI | - ICE_SINGLE_ACT_LAN_ENABLE | - ICE_SINGLE_ACT_VALID_BIT | - FIELD_PREP(ICE_SINGLE_ACT_VSI_ID_M, vsi->vsi_num)); - s_rule->hdr_len = cpu_to_le16(ICE_LAG_SRIOV_TRAIN_PKT_LEN); - memcpy(s_rule->hdr_data, lacp_train_pkt, LACP_TRAIN_PKT_LEN); - opc = ice_aqc_opc_add_sw_rules; - } else { - opc = ice_aqc_opc_remove_sw_rules; - s_rule->index = cpu_to_le16(lag->cp_rule_idx); - } - if (ice_aq_sw_rules(&lag->pf->hw, s_rule, buf_len, 1, opc, NULL)) { - netdev_warn(lag->netdev, "Error %s CP rule for fail-over\n", - add ? "ADDING" : "REMOVING"); - goto cp_free; - } - - if (add) - lag->cp_rule_idx = le16_to_cpu(s_rule->index); - else - lag->cp_rule_idx = 0; - -cp_free: - kfree(s_rule); -} - /** * ice_lag_prepare_vf_reset - helper to adjust vf lag for reset * @lag: lag struct for interface that owns VF @@ -835,11 +845,20 @@ u8 ice_lag_prepare_vf_reset(struct ice_lag *lag) u8 pri_prt, act_prt; if (lag && lag->bonded && lag->primary && lag->upper_netdev) { - pri_prt = lag->pf->hw.port_info->lport; - act_prt = lag->active_port; - if (act_prt != pri_prt && act_prt != ICE_LAG_INVALID_PORT) { - ice_lag_move_vf_nodes_cfg(lag, act_prt, pri_prt); - return act_prt; + if (!lag->bond_aa) { + pri_prt = lag->pf->hw.port_info->lport; + act_prt = lag->active_port; + if (act_prt != pri_prt && + act_prt != ICE_LAG_INVALID_PORT) { + ice_lag_move_vf_nodes_cfg(lag, act_prt, pri_prt); + return act_prt; + } + } else { + if (lag->port_bitmap & ICE_LAGS_M) { + lag->port_bitmap &= ~ICE_LAGS_M; + ice_lag_aa_failover(lag, ICE_LAGP_IDX, NULL); + lag->port_bitmap |= ICE_LAGS_M; + } } } @@ -857,10 +876,15 @@ void ice_lag_complete_vf_reset(struct ice_lag *lag, u8 act_prt) { u8 pri_prt; - if (lag && lag->bonded && lag->primary && - act_prt != ICE_LAG_INVALID_PORT) { - pri_prt = lag->pf->hw.port_info->lport; - ice_lag_move_vf_nodes_cfg(lag, pri_prt, act_prt); + if (lag && lag->bonded && lag->primary) { + if (!lag->bond_aa) { + pri_prt = lag->pf->hw.port_info->lport; + if (act_prt != ICE_LAG_INVALID_PORT) + ice_lag_move_vf_nodes_cfg(lag, pri_prt, + act_prt); + } else { + ice_lag_aa_failover(lag, ICE_LAGS_IDX, NULL); + } } } @@ -873,13 +897,12 @@ void ice_lag_complete_vf_reset(struct ice_lag *lag, u8 act_prt) */ static void ice_lag_info_event(struct ice_lag *lag, void *ptr) { - struct netdev_notifier_bonding_info *info; + struct netdev_notifier_bonding_info *info = ptr; struct netdev_bonding_info *bonding_info; struct net_device *event_netdev; const char *lag_netdev_name; event_netdev = netdev_notifier_info_to_dev(ptr); - info = ptr; lag_netdev_name = netdev_name(lag->netdev); bonding_info = &info->bonding_info; @@ -897,7 +920,7 @@ static void ice_lag_info_event(struct ice_lag *lag, void *ptr) } if (bonding_info->slave.state) - ice_lag_set_backup(lag); + ice_lag_set_bkup(lag); else ice_lag_set_primary(lag); @@ -906,6 +929,295 @@ lag_out: } /** + * ice_lag_aa_qbuf_recfg - fill a single queue buffer for recfg cmd + * @hw: HW struct that contains the queue context + * @qbuf: pointer to single queue buffer + * @vsi_num: index of the VF VSI in PF space + * @qnum: queue index + * + * Return: Zero on success, error code on failure. + */ +static int +ice_lag_aa_qbuf_recfg(struct ice_hw *hw, struct ice_aqc_cfg_txqs_buf *qbuf, + u16 vsi_num, int qnum) +{ + struct ice_pf *pf = hw->back; + struct ice_q_ctx *q_ctx; + u16 q_id; + + q_ctx = ice_get_lan_q_ctx(hw, vsi_num, 0, qnum); + if (!q_ctx) { + dev_dbg(ice_hw_to_dev(hw), "LAG queue %d no Q context\n", qnum); + return -ENOENT; + } + + if (q_ctx->q_teid == ICE_INVAL_TEID) { + dev_dbg(ice_hw_to_dev(hw), "LAG queue %d INVAL TEID\n", qnum); + return -EINVAL; + } + + if (q_ctx->q_handle == ICE_INVAL_Q_HANDLE) { + dev_dbg(ice_hw_to_dev(hw), "LAG queue %d INVAL Q HANDLE\n", qnum); + return -EINVAL; + } + + q_id = pf->vsi[vsi_num]->txq_map[q_ctx->q_handle]; + qbuf->queue_info[0].q_handle = cpu_to_le16(q_id); + qbuf->queue_info[0].tc = 0; + qbuf->queue_info[0].q_teid = cpu_to_le32(q_ctx->q_teid); + + return 0; +} + +/** + * ice_lag_aa_move_vf_qs - Move some/all VF queues to destination + * @lag: primary interface's lag struct + * @dest: index of destination port + * @vsi_num: index of VF VSI in PF space + * @all: if true move all queues to destination + * @odd: VF wide q indicator for odd/even + * @e_pf: PF struct for the event interface + * + * the parameter "all" is to control whether we are splitting the queues + * between two interfaces or moving them all to the destination interface + */ +static void ice_lag_aa_move_vf_qs(struct ice_lag *lag, u8 dest, u16 vsi_num, + bool all, bool *odd, struct ice_pf *e_pf) +{ + DEFINE_RAW_FLEX(struct ice_aqc_cfg_txqs_buf, qbuf, queue_info, 1); + struct ice_hw *old_hw, *new_hw, *pri_hw, *sec_hw; + struct device *dev = ice_pf_to_dev(lag->pf); + struct ice_vsi_ctx *pv_ctx, *sv_ctx; + struct ice_lag_netdev_list ndlist; + u16 num_q, qbuf_size, sec_vsi_num; + u8 pri_lport, sec_lport; + u32 pvf_teid, svf_teid; + u16 vf_id; + + vf_id = lag->pf->vsi[vsi_num]->vf->vf_id; + /* If sec_vf[] not defined, then no second interface to share with */ + if (lag->sec_vf[vf_id]) + sec_vsi_num = lag->sec_vf[vf_id]->idx; + else + return; + + pri_lport = lag->bond_lport_pri; + sec_lport = lag->bond_lport_sec; + + if (pri_lport == ICE_LAG_INVALID_PORT || + sec_lport == ICE_LAG_INVALID_PORT) + return; + + if (!e_pf) + ice_lag_build_netdev_list(lag, &ndlist); + + pri_hw = &lag->pf->hw; + if (e_pf && lag->pf != e_pf) + sec_hw = &e_pf->hw; + else + sec_hw = ice_lag_find_hw_by_lport(lag, sec_lport); + + if (!pri_hw || !sec_hw) + return; + + if (dest == ICE_LAGP_IDX) { + struct ice_vsi *vsi; + + vsi = ice_get_main_vsi(lag->pf); + if (!vsi) + return; + + old_hw = sec_hw; + new_hw = pri_hw; + ice_lag_config_eswitch(lag, vsi->netdev); + } else { + struct ice_pf *sec_pf = sec_hw->back; + struct ice_vsi *vsi; + + vsi = ice_get_main_vsi(sec_pf); + if (!vsi) + return; + + old_hw = pri_hw; + new_hw = sec_hw; + ice_lag_config_eswitch(lag, vsi->netdev); + } + + pv_ctx = ice_get_vsi_ctx(pri_hw, vsi_num); + if (!pv_ctx) { + dev_warn(dev, "Unable to locate primary VSI %d context for LAG failover\n", + vsi_num); + return; + } + + sv_ctx = ice_get_vsi_ctx(sec_hw, sec_vsi_num); + if (!sv_ctx) { + dev_warn(dev, "Unable to locate secondary VSI %d context for LAG failover\n", + vsi_num); + return; + } + + num_q = pv_ctx->num_lan_q_entries[0]; + qbuf_size = __struct_size(qbuf); + + /* Suspend traffic for primary VSI VF */ + pvf_teid = le32_to_cpu(pv_ctx->sched.vsi_node[0]->info.node_teid); + ice_sched_suspend_resume_elems(pri_hw, 1, &pvf_teid, true); + + /* Suspend traffic for secondary VSI VF */ + svf_teid = le32_to_cpu(sv_ctx->sched.vsi_node[0]->info.node_teid); + ice_sched_suspend_resume_elems(sec_hw, 1, &svf_teid, true); + + for (int i = 0; i < num_q; i++) { + struct ice_sched_node *n_prt, *q_node, *parent; + struct ice_port_info *pi, *new_pi; + struct ice_vsi_ctx *src_ctx; + struct ice_sched_node *p; + struct ice_q_ctx *q_ctx; + u16 dst_vsi_num; + + pi = old_hw->port_info; + new_pi = new_hw->port_info; + + *odd = !(*odd); + if ((dest == ICE_LAGP_IDX && *odd && !all) || + (dest == ICE_LAGS_IDX && !(*odd) && !all) || + lag->q_home[vf_id][i] == dest) + continue; + + if (dest == ICE_LAGP_IDX) + dst_vsi_num = vsi_num; + else + dst_vsi_num = sec_vsi_num; + + n_prt = ice_sched_get_free_qparent(new_hw->port_info, + dst_vsi_num, 0, + ICE_SCHED_NODE_OWNER_LAN); + if (!n_prt) + continue; + + q_ctx = ice_get_lan_q_ctx(pri_hw, vsi_num, 0, i); + if (!q_ctx) + continue; + + if (dest == ICE_LAGP_IDX) + src_ctx = sv_ctx; + else + src_ctx = pv_ctx; + + q_node = ice_sched_find_node_by_teid(src_ctx->sched.vsi_node[0], + q_ctx->q_teid); + if (!q_node) + continue; + + qbuf->src_parent_teid = q_node->info.parent_teid; + qbuf->dst_parent_teid = n_prt->info.node_teid; + + /* Move the node in the HW/FW */ + if (ice_lag_aa_qbuf_recfg(pri_hw, qbuf, vsi_num, i)) + continue; + + if (dest == ICE_LAGP_IDX) + ice_aq_cfg_lan_txq(pri_hw, qbuf, qbuf_size, 1, + sec_lport, pri_lport, + ICE_AQC_Q_CFG_MOVE_TC_CHNG, + NULL); + else + ice_aq_cfg_lan_txq(pri_hw, qbuf, qbuf_size, 1, + pri_lport, sec_lport, + ICE_AQC_Q_CFG_MOVE_TC_CHNG, + NULL); + + /* Move the node in the SW */ + parent = q_node->parent; + if (!parent) + continue; + + for (int n = 0; n < parent->num_children; n++) { + int j; + + if (parent->children[n] != q_node) + continue; + + for (j = n + 1; j < parent->num_children; + j++) { + parent->children[j - 1] = + parent->children[j]; + } + parent->children[j] = NULL; + parent->num_children--; + break; + } + + p = pi->sib_head[0][q_node->tx_sched_layer]; + while (p) { + if (p->sibling == q_node) { + p->sibling = q_node->sibling; + break; + } + p = p->sibling; + } + + if (pi->sib_head[0][q_node->tx_sched_layer] == q_node) + pi->sib_head[0][q_node->tx_sched_layer] = + q_node->sibling; + + q_node->parent = n_prt; + q_node->info.parent_teid = n_prt->info.node_teid; + q_node->sibling = NULL; + p = new_pi->sib_head[0][q_node->tx_sched_layer]; + if (p) { + while (p) { + if (!p->sibling) { + p->sibling = q_node; + break; + } + p = p->sibling; + } + } else { + new_pi->sib_head[0][q_node->tx_sched_layer] = + q_node; + } + + n_prt->children[n_prt->num_children++] = q_node; + lag->q_home[vf_id][i] = dest; + } + + ice_sched_suspend_resume_elems(pri_hw, 1, &pvf_teid, false); + ice_sched_suspend_resume_elems(sec_hw, 1, &svf_teid, false); + + if (!e_pf) + ice_lag_destroy_netdev_list(lag, &ndlist); +} + +/** + * ice_lag_aa_failover - move VF queues in A/A mode + * @lag: primary lag struct + * @dest: index of destination port + * @e_pf: PF struct for event port + */ +void ice_lag_aa_failover(struct ice_lag *lag, u8 dest, struct ice_pf *e_pf) +{ + bool odd = true, all = false; + int i; + + /* Primary can be a target if down (cleanup), but secondary can't */ + if (dest == ICE_LAGS_IDX && !(lag->port_bitmap & ICE_LAGS_M)) + return; + + /* Move all queues to a destination if only one port is active, + * or no ports are active and dest is primary. + */ + if ((lag->port_bitmap ^ (ICE_LAGP_M | ICE_LAGS_M)) || + (!lag->port_bitmap && dest == ICE_LAGP_IDX)) + all = true; + + ice_for_each_vsi(lag->pf, i) + if (lag->pf->vsi[i] && lag->pf->vsi[i]->type == ICE_VSI_VF) + ice_lag_aa_move_vf_qs(lag, dest, i, all, &odd, e_pf); +} + +/** * ice_lag_reclaim_vf_tc - move scheduling nodes back to primary interface * @lag: primary interface lag struct * @src_hw: HW struct current node location @@ -921,13 +1233,12 @@ ice_lag_reclaim_vf_tc(struct ice_lag *lag, struct ice_hw *src_hw, u16 vsi_num, u16 numq, valq, num_moved, qbuf_size; u16 buf_size = __struct_size(buf); struct ice_aqc_cfg_txqs_buf *qbuf; + struct ice_hw *hw = &lag->pf->hw; struct ice_sched_node *n_prt; __le32 teid, parent_teid; struct ice_vsi_ctx *ctx; - struct ice_hw *hw; u32 tmp_teid; - hw = &lag->pf->hw; ctx = ice_get_vsi_ctx(hw, vsi_num); if (!ctx) { dev_warn(dev, "Unable to locate VSI context for LAG reclaim\n"); @@ -968,7 +1279,7 @@ ice_lag_reclaim_vf_tc(struct ice_lag *lag, struct ice_hw *src_hw, u16 vsi_num, if (ice_aq_cfg_lan_txq(hw, qbuf, qbuf_size, numq, src_hw->port_info->lport, hw->port_info->lport, - NULL)) { + ICE_AQC_Q_CFG_TC_CHNG, NULL)) { dev_warn(dev, "Failure to configure queues for LAG failover\n"); goto reclaim_qerr; } @@ -1039,36 +1350,15 @@ static void ice_lag_link(struct ice_lag *lag) lag->bonded = true; lag->role = ICE_LAG_UNSET; + lag->need_fltr_cfg = true; netdev_info(lag->netdev, "Shared SR-IOV resources in bond are active\n"); } /** - * ice_lag_config_eswitch - configure eswitch to work with LAG - * @lag: lag info struct - * @netdev: active network interface device struct - * - * Updates all port representors in eswitch to use @netdev for Tx. - * - * Configures the netdev to keep dst metadata (also used in representor Tx). - * This is required for an uplink without switchdev mode configured. - */ -static void ice_lag_config_eswitch(struct ice_lag *lag, - struct net_device *netdev) -{ - struct ice_repr *repr; - unsigned long id; - - xa_for_each(&lag->pf->eswitch.reprs, id, repr) - repr->dst->u.port_info.lower_dev = netdev; - - netif_keep_dst(netdev); -} - -/** - * ice_lag_unlink - handle unlink event + * ice_lag_act_bkup_unlink - handle unlink event for A/B bond * @lag: LAG info struct */ -static void ice_lag_unlink(struct ice_lag *lag) +static void ice_lag_act_bkup_unlink(struct ice_lag *lag) { u8 pri_port, act_port, loc_port; struct ice_pf *pf = lag->pf; @@ -1104,10 +1394,32 @@ static void ice_lag_unlink(struct ice_lag *lag) } } } +} - lag->bonded = false; - lag->role = ICE_LAG_NONE; - lag->upper_netdev = NULL; +/** + * ice_lag_aa_unlink - handle unlink event for Active-Active bond + * @lag: LAG info struct + */ +static void ice_lag_aa_unlink(struct ice_lag *lag) +{ + struct ice_lag *pri_lag; + + if (lag->primary) { + pri_lag = lag; + lag->port_bitmap &= ~ICE_LAGP_M; + } else { + pri_lag = ice_lag_find_primary(lag); + if (pri_lag) + pri_lag->port_bitmap &= ICE_LAGS_M; + } + + if (pri_lag) { + ice_lag_aa_failover(pri_lag, ICE_LAGP_IDX, lag->pf); + if (lag->primary) + pri_lag->bond_lport_pri = ICE_LAG_INVALID_PORT; + else + pri_lag->bond_lport_sec = ICE_LAG_INVALID_PORT; + } } /** @@ -1123,10 +1435,20 @@ static void ice_lag_link_unlink(struct ice_lag *lag, void *ptr) if (netdev != lag->netdev) return; - if (info->linking) + if (info->linking) { ice_lag_link(lag); - else - ice_lag_unlink(lag); + } else { + if (lag->bond_aa) + ice_lag_aa_unlink(lag); + else + ice_lag_act_bkup_unlink(lag); + + lag->bonded = false; + lag->role = ICE_LAG_NONE; + lag->upper_netdev = NULL; + lag->bond_aa = false; + lag->need_fltr_cfg = false; + } } /** @@ -1224,11 +1546,8 @@ ice_lag_set_swid(u16 primary_swid, struct ice_lag *local_lag, */ static void ice_lag_primary_swid(struct ice_lag *lag, bool link) { - struct ice_hw *hw; - u16 swid; - - hw = &lag->pf->hw; - swid = hw->port_info->sw_id; + struct ice_hw *hw = &lag->pf->hw; + u16 swid = hw->port_info->sw_id; if (ice_share_res(hw, ICE_AQC_RES_TYPE_SWID, link, swid)) dev_warn(ice_pf_to_dev(lag->pf), "Failure to set primary interface shared status\n"); @@ -1241,12 +1560,10 @@ static void ice_lag_primary_swid(struct ice_lag *lag, bool link) */ static void ice_lag_add_prune_list(struct ice_lag *lag, struct ice_pf *event_pf) { - u16 num_vsi, rule_buf_sz, vsi_list_id, event_vsi_num, prim_vsi_idx; - struct ice_sw_rule_vsi_list *s_rule = NULL; + u16 rule_buf_sz, vsi_list_id, event_vsi_num, prim_vsi_idx, num_vsi = 1; + struct ice_sw_rule_vsi_list *s_rule; struct device *dev; - num_vsi = 1; - dev = ice_pf_to_dev(lag->pf); event_vsi_num = event_pf->vsi[0]->vsi_num; prim_vsi_idx = lag->pf->vsi[0]->idx; @@ -1282,12 +1599,10 @@ static void ice_lag_add_prune_list(struct ice_lag *lag, struct ice_pf *event_pf) */ static void ice_lag_del_prune_list(struct ice_lag *lag, struct ice_pf *event_pf) { - u16 num_vsi, vsi_num, vsi_idx, rule_buf_sz, vsi_list_id; - struct ice_sw_rule_vsi_list *s_rule = NULL; + u16 vsi_num, vsi_idx, rule_buf_sz, vsi_list_id, num_vsi = 1; + struct ice_sw_rule_vsi_list *s_rule; struct device *dev; - num_vsi = 1; - dev = ice_pf_to_dev(lag->pf); vsi_num = event_pf->vsi[0]->vsi_num; vsi_idx = lag->pf->vsi[0]->idx; @@ -1335,6 +1650,11 @@ static void ice_lag_init_feature_support_flag(struct ice_pf *pf) ice_set_feature_support(pf, ICE_F_SRIOV_LAG); else ice_clear_feature_support(pf, ICE_F_SRIOV_LAG); + + if (caps->sriov_aa_lag && ice_pkg_has_lport_extract(&pf->hw)) + ice_set_feature_support(pf, ICE_F_SRIOV_AA_LAG); + else + ice_clear_feature_support(pf, ICE_F_SRIOV_AA_LAG); } /** @@ -1344,11 +1664,10 @@ static void ice_lag_init_feature_support_flag(struct ice_pf *pf) */ static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr) { - struct netdev_notifier_changeupper_info *info; + struct netdev_notifier_changeupper_info *info = ptr; struct ice_lag *primary_lag; struct net_device *netdev; - info = ptr; netdev = netdev_notifier_info_to_dev(ptr); /* not for this netdev */ @@ -1369,6 +1688,9 @@ static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr) /* Configure primary's SWID to be shared */ ice_lag_primary_swid(lag, true); primary_lag = lag; + lag->bond_lport_pri = lag->pf->hw.port_info->lport; + lag->bond_lport_sec = ICE_LAG_INVALID_PORT; + lag->port_bitmap = 0; } else { u16 swid; @@ -1378,16 +1700,29 @@ static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr) swid = primary_lag->pf->hw.port_info->sw_id; ice_lag_set_swid(swid, lag, true); ice_lag_add_prune_list(primary_lag, lag->pf); - ice_lag_cfg_drop_fltr(lag, true); + primary_lag->bond_lport_sec = + lag->pf->hw.port_info->lport; } /* add filter for primary control packets */ - ice_lag_cfg_cp_fltr(lag, true); + ice_lag_cfg_lp_fltr(lag, true, true); } else { if (!primary_lag && lag->primary) primary_lag = lag; + if (primary_lag) { + for (int i = 0; i < ICE_MAX_SRIOV_VFS; i++) { + if (primary_lag->sec_vf[i]) { + ice_vsi_release(primary_lag->sec_vf[i]); + primary_lag->sec_vf[i] = NULL; + } + } + } + if (!lag->primary) { ice_lag_set_swid(0, lag, false); + if (primary_lag) + primary_lag->bond_lport_sec = + ICE_LAG_INVALID_PORT; } else { if (primary_lag && lag->primary) { ice_lag_primary_swid(lag, false); @@ -1395,7 +1730,7 @@ static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr) } } /* remove filter for control packets */ - ice_lag_cfg_cp_fltr(lag, false); + ice_lag_cfg_lp_fltr(lag, false, !lag->bond_aa); } } @@ -1408,7 +1743,7 @@ static void ice_lag_changeupper_event(struct ice_lag *lag, void *ptr) */ static void ice_lag_monitor_link(struct ice_lag *lag, void *ptr) { - struct netdev_notifier_changeupper_info *info; + struct netdev_notifier_changeupper_info *info = ptr; struct ice_hw *prim_hw, *active_hw; struct net_device *event_netdev; struct ice_pf *pf; @@ -1421,19 +1756,34 @@ static void ice_lag_monitor_link(struct ice_lag *lag, void *ptr) if (!netif_is_same_ice(lag->pf, event_netdev)) return; + if (info->upper_dev != lag->upper_netdev) + return; + + if (info->linking) + return; + pf = lag->pf; prim_hw = &pf->hw; prim_port = prim_hw->port_info->lport; - info = (struct netdev_notifier_changeupper_info *)ptr; - if (info->upper_dev != lag->upper_netdev) - return; - - if (!info->linking) { - /* Since there are only two interfaces allowed in SRIOV+LAG, if - * one port is leaving, then nodes need to be on primary - * interface. - */ + /* Since there are only two interfaces allowed in SRIOV+LAG, if + * one port is leaving, then nodes need to be on primary + * interface. + */ + if (lag->bond_aa) { + struct ice_netdev_priv *e_ndp; + struct ice_pf *e_pf; + + e_ndp = netdev_priv(event_netdev); + e_pf = e_ndp->vsi->back; + + if (lag->bond_lport_pri != ICE_LAG_INVALID_PORT && + lag->port_bitmap & ICE_LAGS_M) { + lag->port_bitmap &= ~ICE_LAGS_M; + ice_lag_aa_failover(lag, ICE_LAGP_IDX, e_pf); + lag->bond_lport_sec = ICE_LAG_INVALID_PORT; + } + } else { if (prim_port != lag->active_port && lag->active_port != ICE_LAG_INVALID_PORT) { active_hw = ice_lag_find_hw_by_lport(lag, @@ -1445,45 +1795,32 @@ static void ice_lag_monitor_link(struct ice_lag *lag, void *ptr) } /** - * ice_lag_monitor_active - main PF keep track of which port is active + * ice_lag_monitor_act_bkup - keep track of which port is active in A/B LAG * @lag: lag info struct - * @ptr: opaque data containing notifier event + * @b_info: bonding info + * @event_netdev: net_device got target netdev * * This function is for the primary PF to monitor changes in which port is * active and handle changes for SRIOV VF functionality */ -static void ice_lag_monitor_active(struct ice_lag *lag, void *ptr) +static void ice_lag_monitor_act_bkup(struct ice_lag *lag, + struct netdev_bonding_info *b_info, + struct net_device *event_netdev) { - struct net_device *event_netdev, *event_upper; - struct netdev_notifier_bonding_info *info; - struct netdev_bonding_info *bonding_info; struct ice_netdev_priv *event_np; struct ice_pf *pf, *event_pf; u8 prim_port, event_port; - if (!lag->primary) - return; - pf = lag->pf; if (!pf) return; - event_netdev = netdev_notifier_info_to_dev(ptr); - rcu_read_lock(); - event_upper = netdev_master_upper_dev_get_rcu(event_netdev); - rcu_read_unlock(); - if (!netif_is_ice(event_netdev) || event_upper != lag->upper_netdev) - return; - event_np = netdev_priv(event_netdev); event_pf = event_np->vsi->back; event_port = event_pf->hw.port_info->lport; prim_port = pf->hw.port_info->lport; - info = (struct netdev_notifier_bonding_info *)ptr; - bonding_info = &info->bonding_info; - - if (!bonding_info->slave.state) { + if (!b_info->slave.state) { /* if no port is currently active, then nodes and filters exist * on primary port, check if we need to move them */ @@ -1520,6 +1857,128 @@ static void ice_lag_monitor_active(struct ice_lag *lag, void *ptr) } /** + * ice_lag_aa_clear_spoof - adjust the placeholder VSI spoofing for A/A LAG + * @vsi: placeholder VSI to adjust + */ +static void ice_lag_aa_clear_spoof(struct ice_vsi *vsi) +{ + ice_vsi_update_security(vsi, ice_vsi_ctx_clear_antispoof); +} + +/** + * ice_lag_monitor_act_act - Keep track of active ports in A/A LAG + * @lag: lag struct for primary interface + * @b_info: bonding_info for event + * @event_netdev: net_device for target netdev + */ +static void ice_lag_monitor_act_act(struct ice_lag *lag, + struct netdev_bonding_info *b_info, + struct net_device *event_netdev) +{ + struct ice_netdev_priv *event_np; + u8 prim_port, event_port; + struct ice_pf *event_pf; + + event_np = netdev_priv(event_netdev); + event_pf = event_np->vsi->back; + event_port = event_pf->hw.port_info->lport; + prim_port = lag->pf->hw.port_info->lport; + + if (b_info->slave.link == BOND_LINK_UP) { + /* Port is coming up */ + if (prim_port == event_port) { + /* Processing event for primary interface */ + if (lag->bond_lport_pri == ICE_LAG_INVALID_PORT) + return; + + if (!(lag->port_bitmap & ICE_LAGP_M)) { + /* Primary port was not marked up before, move + * some|all VF queues to it and mark as up + */ + lag->port_bitmap |= ICE_LAGP_M; + ice_lag_aa_failover(lag, ICE_LAGP_IDX, event_pf); + } + } else { + if (lag->bond_lport_sec == ICE_LAG_INVALID_PORT) + return; + + /* Create placeholder VSIs on secondary PF. + * The placeholder is necessary so that we have + * an element that represents the VF on the secondary + * interface's scheduling tree. This will be a tree + * root for scheduling nodes when they are moved to + * the secondary interface. + */ + if (!lag->sec_vf[0]) { + struct ice_vsi_cfg_params params = {}; + struct ice_vsi *nvsi; + struct ice_vf *vf; + unsigned int bkt; + + params.type = ICE_VSI_VF; + params.port_info = event_pf->hw.port_info; + params.flags = ICE_VSI_FLAG_INIT; + + ice_for_each_vf(lag->pf, bkt, vf) { + params.vf = vf; + nvsi = ice_vsi_setup(event_pf, + ¶ms); + ice_lag_aa_clear_spoof(nvsi); + lag->sec_vf[vf->vf_id] = nvsi; + } + } + + if (!(lag->port_bitmap & ICE_LAGS_M)) { + /* Secondary port was not marked up before, + * move some|all VF queues to it and mark as up + */ + lag->port_bitmap |= ICE_LAGS_M; + ice_lag_aa_failover(lag, ICE_LAGS_IDX, event_pf); + } + } + } else { + /* Port is going down */ + if (prim_port == event_port) { + lag->port_bitmap &= ~ICE_LAGP_M; + ice_lag_aa_failover(lag, ICE_LAGS_IDX, event_pf); + } else { + lag->port_bitmap &= ~ICE_LAGS_M; + ice_lag_aa_failover(lag, ICE_LAGP_IDX, event_pf); + } + } +} + +/** + * ice_lag_monitor_info - Calls relevant A/A or A/B monitoring function + * @lag: lag info struct + * @ptr: opaque data containing notifier event + * + * This function is for the primary PF to monitor changes in which port is + * active and handle changes for SRIOV VF functionality + */ +static void ice_lag_monitor_info(struct ice_lag *lag, void *ptr) +{ + struct netdev_notifier_bonding_info *info = ptr; + struct net_device *event_netdev, *event_upper; + struct netdev_bonding_info *bonding_info; + + if (!lag->primary) + return; + + event_netdev = netdev_notifier_info_to_dev(ptr); + bonding_info = &info->bonding_info; + rcu_read_lock(); + event_upper = netdev_master_upper_dev_get_rcu(event_netdev); + rcu_read_unlock(); + if (!netif_is_ice(event_netdev) || event_upper != lag->upper_netdev) + return; + + if (lag->bond_aa) + ice_lag_monitor_act_act(lag, bonding_info, event_netdev); + else + ice_lag_monitor_act_bkup(lag, bonding_info, event_netdev); +} +/** * ice_lag_chk_comp - evaluate bonded interface for feature support * @lag: lag info struct * @ptr: opaque data for netdev event info @@ -1527,13 +1986,21 @@ static void ice_lag_monitor_active(struct ice_lag *lag, void *ptr) static bool ice_lag_chk_comp(struct ice_lag *lag, void *ptr) { + struct netdev_notifier_bonding_info *info = ptr; struct net_device *event_netdev, *event_upper; - struct netdev_notifier_bonding_info *info; struct netdev_bonding_info *bonding_info; struct list_head *tmp; struct device *dev; int count = 0; + /* All members need to know if bond A/A or A/B */ + bonding_info = &info->bonding_info; + lag->bond_mode = bonding_info->master.bond_mode; + if (lag->bond_mode != BOND_MODE_ACTIVEBACKUP) + lag->bond_aa = true; + else + lag->bond_aa = false; + if (!lag->primary) return true; @@ -1554,13 +2021,9 @@ ice_lag_chk_comp(struct ice_lag *lag, void *ptr) return false; } - info = (struct netdev_notifier_bonding_info *)ptr; - bonding_info = &info->bonding_info; - lag->bond_mode = bonding_info->master.bond_mode; - if (lag->bond_mode != BOND_MODE_ACTIVEBACKUP) { - dev_info(dev, "Bond Mode not ACTIVE-BACKUP - VF LAG disabled\n"); + if (lag->bond_aa && !ice_is_feature_supported(lag->pf, + ICE_F_SRIOV_AA_LAG)) return false; - } list_for_each(tmp, lag->netdev_head) { struct ice_dcbx_cfg *dcb_cfg, *peer_dcb_cfg; @@ -1664,10 +2127,9 @@ ice_lag_unregister(struct ice_lag *lag, struct net_device *event_netdev) static void ice_lag_monitor_rdma(struct ice_lag *lag, void *ptr) { - struct netdev_notifier_changeupper_info *info; + struct netdev_notifier_changeupper_info *info = ptr; struct net_device *netdev; - info = ptr; netdev = netdev_notifier_info_to_dev(ptr); if (netdev != lag->netdev) @@ -1715,12 +2177,30 @@ static void ice_lag_chk_disabled_bond(struct ice_lag *lag, void *ptr) */ static void ice_lag_disable_sriov_bond(struct ice_lag *lag) { - struct ice_netdev_priv *np; - struct ice_pf *pf; + struct ice_netdev_priv *np = netdev_priv(lag->netdev); + struct ice_pf *pf = np->vsi->back; - np = netdev_priv(lag->netdev); - pf = np->vsi->back; ice_clear_feature_support(pf, ICE_F_SRIOV_LAG); + ice_clear_feature_support(pf, ICE_F_SRIOV_AA_LAG); +} + +/** + * ice_lag_preset_drop_fltr - preset drop filter for A/B bonds + * @lag: local lag struct + * @ptr: opaque data containing event + * + * Sets the initial drop filter for secondary interface in an + * active-backup bond + */ +static void ice_lag_preset_drop_fltr(struct ice_lag *lag, void *ptr) +{ + struct net_device *netdev = netdev_notifier_info_to_dev(ptr); + + if (netdev != lag->netdev || lag->primary || !lag->need_fltr_cfg) + return; + + ice_lag_cfg_drop_fltr(lag, true); + lag->need_fltr_cfg = false; } /** @@ -1761,10 +2241,12 @@ static void ice_lag_process_event(struct work_struct *work) ice_lag_unregister(lag_work->lag, netdev); goto lag_cleanup; } - ice_lag_monitor_active(lag_work->lag, - &lag_work->info.bonding_info); ice_lag_cfg_pf_fltrs(lag_work->lag, &lag_work->info.bonding_info); + ice_lag_preset_drop_fltr(lag_work->lag, + &lag_work->info.bonding_info); + ice_lag_monitor_info(lag_work->lag, + &lag_work->info.bonding_info); } ice_lag_info_event(lag_work->lag, &lag_work->info.bonding_info); break; @@ -1837,9 +2319,8 @@ ice_lag_event_handler(struct notifier_block *notif_blk, unsigned long event, lag_work->lag = lag; lag_work->event = event; if (event == NETDEV_CHANGEUPPER) { - struct netdev_notifier_changeupper_info *info; + struct netdev_notifier_changeupper_info *info = ptr; - info = ptr; upper_netdev = info->upper_dev; } else { upper_netdev = netdev_master_upper_dev_get(netdev); @@ -1889,10 +2370,8 @@ ice_lag_event_handler(struct notifier_block *notif_blk, unsigned long event, */ static int ice_register_lag_handler(struct ice_lag *lag) { + struct notifier_block *notif_blk = &lag->notif_block; struct device *dev = ice_pf_to_dev(lag->pf); - struct notifier_block *notif_blk; - - notif_blk = &lag->notif_block; if (!notif_blk->notifier_call) { notif_blk->notifier_call = ice_lag_event_handler; @@ -1912,10 +2391,9 @@ static int ice_register_lag_handler(struct ice_lag *lag) */ static void ice_unregister_lag_handler(struct ice_lag *lag) { + struct notifier_block *notif_blk = &lag->notif_block; struct device *dev = ice_pf_to_dev(lag->pf); - struct notifier_block *notif_blk; - notif_blk = &lag->notif_block; if (notif_blk->notifier_call) { unregister_netdevice_notifier(notif_blk); dev_dbg(dev, "LAG event handler unregistered\n"); @@ -1977,13 +2455,12 @@ ice_lag_move_vf_nodes_tc_sync(struct ice_lag *lag, struct ice_hw *dest_hw, u16 numq, valq, num_moved, qbuf_size; u16 buf_size = __struct_size(buf); struct ice_aqc_cfg_txqs_buf *qbuf; + struct ice_hw *hw = &lag->pf->hw; struct ice_sched_node *n_prt; __le32 teid, parent_teid; struct ice_vsi_ctx *ctx; - struct ice_hw *hw; u32 tmp_teid; - hw = &lag->pf->hw; ctx = ice_get_vsi_ctx(hw, vsi_num); if (!ctx) { dev_warn(dev, "LAG rebuild failed after reset due to VSI Context failure\n"); @@ -2020,7 +2497,8 @@ ice_lag_move_vf_nodes_tc_sync(struct ice_lag *lag, struct ice_hw *dest_hw, } if (ice_aq_cfg_lan_txq(hw, qbuf, qbuf_size, numq, hw->port_info->lport, - dest_hw->port_info->lport, NULL)) { + dest_hw->port_info->lport, + ICE_AQC_Q_CFG_TC_CHNG, NULL)) { dev_warn(dev, "Failure to configure queues for LAG reset rebuild\n"); goto sync_qerr; } @@ -2116,9 +2594,13 @@ int ice_init_lag(struct ice_pf *pf) lag->netdev = vsi->netdev; lag->role = ICE_LAG_NONE; lag->active_port = ICE_LAG_INVALID_PORT; + lag->port_bitmap = 0x0; lag->bonded = false; + lag->bond_aa = false; + lag->need_fltr_cfg = false; lag->upper_netdev = NULL; lag->notif_block.notifier_call = NULL; + memset(lag->sec_vf, 0, sizeof(lag->sec_vf)); err = ice_register_lag_handler(lag); if (err) { @@ -2136,6 +2618,11 @@ int ice_init_lag(struct ice_pf *pf) if (err) goto free_rcp_res; + err = ice_create_lag_recipe(&pf->hw, &lag->act_act_recipe, + ice_lport_rcp, 1); + if (err) + goto free_lport_res; + /* associate recipes to profiles */ for (n = 0; n < ICE_PROFID_IPV6_GTPU_IPV6_TCP_INNER; n++) { err = ice_aq_get_recipe_to_profile(&pf->hw, n, @@ -2145,7 +2632,8 @@ int ice_init_lag(struct ice_pf *pf) if (recipe_bits & BIT(ICE_SW_LKUP_DFLT)) { recipe_bits |= BIT(lag->pf_recipe) | - BIT(lag->lport_recipe); + BIT(lag->lport_recipe) | + BIT(lag->act_act_recipe); ice_aq_map_recipe_to_profile(&pf->hw, n, recipe_bits, NULL); } @@ -2156,9 +2644,13 @@ int ice_init_lag(struct ice_pf *pf) dev_dbg(dev, "INIT LAG complete\n"); return 0; +free_lport_res: + ice_free_hw_res(&pf->hw, ICE_AQC_RES_TYPE_RECIPE, 1, + &lag->lport_recipe); + free_rcp_res: ice_free_hw_res(&pf->hw, ICE_AQC_RES_TYPE_RECIPE, 1, - &pf->lag->pf_recipe); + &lag->pf_recipe); lag_error: kfree(lag); pf->lag = NULL; @@ -2174,9 +2666,7 @@ lag_error: */ void ice_deinit_lag(struct ice_pf *pf) { - struct ice_lag *lag; - - lag = pf->lag; + struct ice_lag *lag = pf->lag; if (!lag) return; @@ -2245,11 +2735,15 @@ void ice_lag_rebuild(struct ice_pf *pf) ice_lag_move_vf_nodes_sync(prim_lag, &pf->hw); } - ice_lag_cfg_cp_fltr(lag, true); + if (!lag->bond_aa) { + ice_lag_cfg_lp_fltr(lag, true, true); + if (lag->pf_rx_rule_id) + if (ice_lag_cfg_dflt_fltr(lag, true)) + dev_err(ice_pf_to_dev(pf), "Error adding default VSI rule in rebuild\n"); + } else { + ice_lag_cfg_lp_fltr(lag, true, false); + } - if (lag->pf_rx_rule_id) - if (ice_lag_cfg_dflt_fltr(lag, true)) - dev_err(ice_pf_to_dev(pf), "Error adding default VSI rule in rebuild\n"); ice_clear_rdma_cap(pf); lag_rebuild_out: diff --git a/drivers/net/ethernet/intel/ice/ice_lag.h b/drivers/net/ethernet/intel/ice/ice_lag.h index 69347d9f986b..f77ebcd61042 100644 --- a/drivers/net/ethernet/intel/ice/ice_lag.h +++ b/drivers/net/ethernet/intel/ice/ice_lag.h @@ -14,7 +14,11 @@ enum ice_lag_role { ICE_LAG_UNSET }; -#define ICE_LAG_INVALID_PORT 0xFF +#define ICE_LAG_INVALID_PORT 0xFF +#define ICE_LAGP_IDX 0 +#define ICE_LAGS_IDX 1 +#define ICE_LAGP_M 0x1 +#define ICE_LAGS_M 0x2 #define ICE_LAG_RESET_RETRIES 5 #define ICE_SW_DEFAULT_PROFILE 0 @@ -41,12 +45,26 @@ struct ice_lag { u8 active_port; /* lport value for the current active port */ u8 bonded:1; /* currently bonded */ u8 primary:1; /* this is primary */ + u8 bond_aa:1; /* is this bond active-active */ + u8 need_fltr_cfg:1; /* fltrs for A/A bond still need to be make */ + u8 port_bitmap:2; /* bitmap of active ports */ + u8 bond_lport_pri; /* lport values for primary PF */ + u8 bond_lport_sec; /* lport values for secondary PF */ + + /* q_home keeps track of which interface the q is currently on */ + u8 q_home[ICE_MAX_SRIOV_VFS][ICE_MAX_RSS_QS_PER_VF]; + + /* placeholder VSI for hanging VF queues from on secondary interface */ + struct ice_vsi *sec_vf[ICE_MAX_SRIOV_VFS]; + u16 pf_recipe; u16 lport_recipe; + u16 act_act_recipe; u16 pf_rx_rule_id; u16 pf_tx_rule_id; u16 cp_rule_idx; u16 lport_rule_idx; + u16 act_act_rule_idx; u8 role; }; @@ -64,7 +82,7 @@ struct ice_lag_work { } info; }; -void ice_lag_move_new_vf_nodes(struct ice_vf *vf); +void ice_lag_aa_failover(struct ice_lag *lag, u8 dest, struct ice_pf *e_pf); int ice_init_lag(struct ice_pf *pf); void ice_deinit_lag(struct ice_pf *pf); void ice_lag_rebuild(struct ice_pf *pf); diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index 77ba26538b07..10c312d49e05 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -569,4 +569,45 @@ struct ice_tlan_ctx { u8 pkt_shaper_prof_idx; }; +#define ICE_TXTIME_TX_DESC_IDX_M GENMASK(12, 0) +#define ICE_TXTIME_STAMP_M GENMASK(31, 13) + +/* Tx time stamp descriptor */ +struct ice_ts_desc { + __le32 tx_desc_idx_tstamp; +}; + +#define ICE_TS_DESC(R, i) (&(((struct ice_ts_desc *)((R)->desc))[i])) + +#define ICE_TXTIME_MAX_QUEUE 2047 +#define ICE_SET_TXTIME_MAX_Q_AMOUNT 127 +#define ICE_TXTIME_FETCH_TS_DESC_DFLT 8 +#define ICE_TXTIME_FETCH_PROFILE_CNT 16 + +/* Tx Time queue context data */ +struct ice_txtime_ctx { +#define ICE_TXTIME_CTX_BASE_S 7 + u64 base; /* base is defined in 128-byte units */ + u8 pf_num; + u16 vmvf_num; + u8 vmvf_type; + u16 src_vsi; + u8 cpuid; + u8 tphrd_desc; + u16 qlen; + u8 timer_num; + u8 txtime_ena_q; + u8 drbell_mode_32; +#define ICE_TXTIME_CTX_DRBELL_MODE_32 1 + u8 ts_res; +#define ICE_TXTIME_CTX_RESOLUTION_128NS 7 + u8 ts_round_type; + u8 ts_pacing_slot; +#define ICE_TXTIME_CTX_FETCH_PROF_ID_0 0 + u8 merging_ena; + u8 ts_fetch_prof_id; + u8 ts_fetch_cache_line_aln_thld; + u8 tx_pipe_delay_mode; +}; + #endif /* _ICE_LAN_TX_RX_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index a439b5a61a56..4479c824561e 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3950,6 +3950,7 @@ void ice_init_feature_support(struct ice_pf *pf) if (pf->hw.mac_type == ICE_MAC_E830) { ice_set_feature_support(pf, ICE_F_MBX_LIMIT); ice_set_feature_support(pf, ICE_F_GCS); + ice_set_feature_support(pf, ICE_F_TXTIME); } } diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 8e0b06c1e02b..86f5859e88ef 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -39,6 +39,7 @@ static const char ice_copyright[] = "Copyright (c) 2018, Intel Corporation."; MODULE_DESCRIPTION(DRV_SUMMARY); MODULE_IMPORT_NS("LIBIE"); MODULE_IMPORT_NS("LIBIE_ADMINQ"); +MODULE_IMPORT_NS("LIBIE_FWLOG"); MODULE_LICENSE("GPL v2"); MODULE_FIRMWARE(ICE_DDP_PKG_FILE); @@ -1251,32 +1252,6 @@ ice_handle_link_event(struct ice_pf *pf, struct ice_rq_event_info *event) } /** - * ice_get_fwlog_data - copy the FW log data from ARQ event - * @pf: PF that the FW log event is associated with - * @event: event structure containing FW log data - */ -static void -ice_get_fwlog_data(struct ice_pf *pf, struct ice_rq_event_info *event) -{ - struct ice_fwlog_data *fwlog; - struct ice_hw *hw = &pf->hw; - - fwlog = &hw->fwlog_ring.rings[hw->fwlog_ring.tail]; - - memset(fwlog->data, 0, PAGE_SIZE); - fwlog->data_size = le16_to_cpu(event->desc.datalen); - - memcpy(fwlog->data, event->msg_buf, fwlog->data_size); - ice_fwlog_ring_increment(&hw->fwlog_ring.tail, hw->fwlog_ring.size); - - if (ice_fwlog_ring_full(&hw->fwlog_ring)) { - /* the rings are full so bump the head to create room */ - ice_fwlog_ring_increment(&hw->fwlog_ring.head, - hw->fwlog_ring.size); - } -} - -/** * ice_aq_prep_for_event - Prepare to wait for an AdminQ event from firmware * @pf: pointer to the PF private structure * @task: intermediate helper storage and identifier for waiting @@ -1566,7 +1541,8 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type) } break; case ice_aqc_opc_fw_logs_event: - ice_get_fwlog_data(pf, &event); + libie_get_fwlog_data(&hw->fwlog, event.msg_buf, + le16_to_cpu(event.desc.datalen)); break; case ice_aqc_opc_lldp_set_mib_change: ice_dcb_process_lldp_set_mib_change(pf, &event); @@ -3176,12 +3152,14 @@ static irqreturn_t ice_ll_ts_intr(int __always_unused irq, void *data) hw = &pf->hw; tx = &pf->ptp.port.tx; spin_lock_irqsave(&tx->lock, flags); - ice_ptp_complete_tx_single_tstamp(tx); + if (tx->init) { + ice_ptp_complete_tx_single_tstamp(tx); - idx = find_next_bit_wrap(tx->in_use, tx->len, - tx->last_ll_ts_idx_read + 1); - if (idx != tx->len) - ice_ptp_req_tx_single_tstamp(tx, idx); + idx = find_next_bit_wrap(tx->in_use, tx->len, + tx->last_ll_ts_idx_read + 1); + if (idx != tx->len) + ice_ptp_req_tx_single_tstamp(tx, idx); + } spin_unlock_irqrestore(&tx->lock, flags); val = GLINT_DYN_CTL_INTENA_M | GLINT_DYN_CTL_CLEARPBA_M | @@ -3991,6 +3969,11 @@ static void ice_deinit_pf(struct ice_pf *pf) pf->avail_rxqs = NULL; } + if (pf->txtime_txqs) { + bitmap_free(pf->txtime_txqs); + pf->txtime_txqs = NULL; + } + if (pf->ptp.clock) ptp_clock_unregister(pf->ptp.clock); @@ -4084,6 +4067,15 @@ static int ice_init_pf(struct ice_pf *pf) return -ENOMEM; } + pf->txtime_txqs = bitmap_zalloc(pf->max_pf_txqs, GFP_KERNEL); + if (!pf->txtime_txqs) { + bitmap_free(pf->avail_txqs); + pf->avail_txqs = NULL; + bitmap_free(pf->avail_rxqs); + pf->avail_rxqs = NULL; + return -ENOMEM; + } + mutex_init(&pf->vfs.table_lock); hash_init(pf->vfs.table); if (ice_is_feature_supported(pf, ICE_F_MBX_LIMIT)) @@ -4536,17 +4528,23 @@ ice_init_tx_topology(struct ice_hw *hw, const struct firmware *firmware) dev_info(dev, "Tx scheduling layers switching feature disabled\n"); else dev_info(dev, "Tx scheduling layers switching feature enabled\n"); - /* if there was a change in topology ice_cfg_tx_topo triggered - * a CORER and we need to re-init hw + return 0; + } else if (err == -ENODEV) { + /* If we failed to re-initialize the device, we can no longer + * continue loading. */ - ice_deinit_hw(hw); - err = ice_init_hw(hw); - + dev_warn(dev, "Failed to initialize hardware after applying Tx scheduling configuration.\n"); return err; } else if (err == -EIO) { dev_info(dev, "DDP package does not support Tx scheduling layers switching feature - please update to the latest DDP package and try again\n"); + return 0; + } else if (err == -EEXIST) { + return 0; } + /* Do not treat this as a fatal error. */ + dev_info(dev, "Failed to apply Tx scheduling configuration, err %pe\n", + ERR_PTR(err)); return 0; } @@ -4646,19 +4644,6 @@ static void ice_print_wake_reason(struct ice_pf *pf) } /** - * ice_pf_fwlog_update_module - update 1 module - * @pf: pointer to the PF struct - * @log_level: log_level to use for the @module - * @module: module to update - */ -void ice_pf_fwlog_update_module(struct ice_pf *pf, int log_level, int module) -{ - struct ice_hw *hw = &pf->hw; - - hw->fwlog_cfg.module_entries[module].log_level = log_level; -} - -/** * ice_register_netdev - register netdev * @vsi: pointer to the VSI struct */ @@ -7513,7 +7498,8 @@ int ice_vsi_open(struct ice_vsi *vsi) if (err) goto err_setup_rx; - ice_vsi_cfg_netdev_tc(vsi, vsi->tc_cfg.ena_tc); + if (bitmap_empty(pf->txtime_txqs, pf->max_pf_txqs)) + ice_vsi_cfg_netdev_tc(vsi, vsi->tc_cfg.ena_tc); if (vsi->type == ICE_VSI_PF || vsi->type == ICE_VSI_SF) { /* Notify the stack of the actual queue counts. */ @@ -9117,7 +9103,7 @@ static int ice_create_q_channels(struct ice_vsi *vsi) list_add_tail(&ch->list, &vsi->ch_list); vsi->tc_map_vsi[i] = ch->ch_vsi; dev_dbg(ice_pf_to_dev(pf), - "successfully created channel: VSI %pK\n", ch->ch_vsi); + "successfully created channel: VSI %p\n", ch->ch_vsi); } return 0; @@ -9302,6 +9288,96 @@ exit: return ret; } +/** + * ice_cfg_txtime - configure Tx Time for the Tx ring + * @tx_ring: pointer to the Tx ring structure + * + * Return: 0 on success, negative value on failure. + */ +static int ice_cfg_txtime(struct ice_tx_ring *tx_ring) +{ + int err, timeout = 50; + struct ice_vsi *vsi; + struct device *dev; + struct ice_pf *pf; + u32 queue; + + if (!tx_ring) + return -EINVAL; + + vsi = tx_ring->vsi; + pf = vsi->back; + while (test_and_set_bit(ICE_CFG_BUSY, pf->state)) { + timeout--; + if (!timeout) + return -EBUSY; + usleep_range(1000, 2000); + } + + queue = tx_ring->q_index; + dev = ice_pf_to_dev(pf); + + /* Ignore return value, and always attempt to enable queue. */ + ice_qp_dis(vsi, queue); + + err = ice_qp_ena(vsi, queue); + if (err) + dev_err(dev, "Failed to enable Tx queue %d for TxTime configuration\n", + queue); + + clear_bit(ICE_CFG_BUSY, pf->state); + return err; +} + +/** + * ice_offload_txtime - set earliest TxTime first + * @netdev: network interface device structure + * @qopt_off: etf queue option offload from the skb to set + * + * Return: 0 on success, negative value on failure. + */ +static int ice_offload_txtime(struct net_device *netdev, + void *qopt_off) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_pf *pf = np->vsi->back; + struct tc_etf_qopt_offload *qopt; + struct ice_vsi *vsi = np->vsi; + struct ice_tx_ring *tx_ring; + int ret = 0; + + if (!ice_is_feature_supported(pf, ICE_F_TXTIME)) + return -EOPNOTSUPP; + + qopt = qopt_off; + if (!qopt_off || qopt->queue < 0 || qopt->queue >= vsi->num_txq) + return -EINVAL; + + if (qopt->enable) + set_bit(qopt->queue, pf->txtime_txqs); + else + clear_bit(qopt->queue, pf->txtime_txqs); + + if (netif_running(vsi->netdev)) { + tx_ring = vsi->tx_rings[qopt->queue]; + ret = ice_cfg_txtime(tx_ring); + if (ret) + goto err; + } + + netdev_info(netdev, "%s TxTime on queue: %i\n", + str_enable_disable(qopt->enable), qopt->queue); + return 0; + +err: + netdev_err(netdev, "Failed to %s TxTime on queue: %i\n", + str_enable_disable(qopt->enable), qopt->queue); + + if (qopt->enable) + clear_bit(qopt->queue, pf->txtime_txqs); + return ret; +} + static LIST_HEAD(ice_block_cb_list); static int @@ -9365,6 +9441,8 @@ adev_unlock: mutex_unlock(&pf->adev_mutex); } return err; + case TC_SETUP_QDISC_ETF: + return ice_offload_txtime(netdev, type_data); default: return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index e358eb1d719f..fb0f6365a6d6 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -2701,16 +2701,19 @@ irqreturn_t ice_ptp_ts_irq(struct ice_pf *pf) */ if (hw->dev_caps.ts_dev_info.ts_ll_int_read) { struct ice_ptp_tx *tx = &pf->ptp.port.tx; - u8 idx; + u8 idx, last; if (!ice_pf_state_is_nominal(pf)) return IRQ_HANDLED; spin_lock(&tx->lock); - idx = find_next_bit_wrap(tx->in_use, tx->len, - tx->last_ll_ts_idx_read + 1); - if (idx != tx->len) - ice_ptp_req_tx_single_tstamp(tx, idx); + if (tx->init) { + last = tx->last_ll_ts_idx_read + 1; + idx = find_next_bit_wrap(tx->in_use, tx->len, + last); + if (idx != tx->len) + ice_ptp_req_tx_single_tstamp(tx, idx); + } spin_unlock(&tx->lock); return IRQ_HANDLED; diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 9ce4c4db400e..843e82fd3bf9 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -9,7 +9,7 @@ #include "ice_dcb_lib.h" #include "ice_flow.h" #include "ice_eswitch.h" -#include "ice_virtchnl_allowlist.h" +#include "virt/allowlist.h" #include "ice_flex_pipe.h" #include "ice_vf_vsi_vlan_ops.h" #include "ice_vlan.h" diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.h b/drivers/net/ethernet/intel/ice/ice_sriov.h index d1a998a4bef6..6c4fad09a527 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.h +++ b/drivers/net/ethernet/intel/ice/ice_sriov.h @@ -3,9 +3,9 @@ #ifndef _ICE_SRIOV_H_ #define _ICE_SRIOV_H_ -#include "ice_virtchnl_fdir.h" +#include "virt/fdir.h" #include "ice_vf_lib.h" -#include "ice_virtchnl.h" +#include "virt/virtchnl.h" /* Static VF transaction/status register def */ #define VF_DEVICE_STATUS 0xAA diff --git a/drivers/net/ethernet/intel/ice/ice_trace.h b/drivers/net/ethernet/intel/ice/ice_trace.h index 07aab6e130cd..4f35ef8d6b29 100644 --- a/drivers/net/ethernet/intel/ice/ice_trace.h +++ b/drivers/net/ethernet/intel/ice/ice_trace.h @@ -130,7 +130,7 @@ DECLARE_EVENT_CLASS(ice_tx_template, __entry->buf = buf; __assign_str(devname);), - TP_printk("netdev: %s ring: %pK desc: %pK buf %pK", __get_str(devname), + TP_printk("netdev: %s ring: %p desc: %p buf %p", __get_str(devname), __entry->ring, __entry->desc, __entry->buf) ); @@ -158,7 +158,7 @@ DECLARE_EVENT_CLASS(ice_rx_template, __entry->desc = desc; __assign_str(devname);), - TP_printk("netdev: %s ring: %pK desc: %pK", __get_str(devname), + TP_printk("netdev: %s ring: %p desc: %p", __get_str(devname), __entry->ring, __entry->desc) ); DEFINE_EVENT(ice_rx_template, ice_clean_rx_irq, @@ -182,7 +182,7 @@ DECLARE_EVENT_CLASS(ice_rx_indicate_template, __entry->skb = skb; __assign_str(devname);), - TP_printk("netdev: %s ring: %pK desc: %pK skb %pK", __get_str(devname), + TP_printk("netdev: %s ring: %p desc: %p skb %p", __get_str(devname), __entry->ring, __entry->desc, __entry->skb) ); @@ -205,7 +205,7 @@ DECLARE_EVENT_CLASS(ice_xmit_template, __entry->skb = skb; __assign_str(devname);), - TP_printk("netdev: %s skb: %pK ring: %pK", __get_str(devname), + TP_printk("netdev: %s skb: %p ring: %p", __get_str(devname), __entry->skb, __entry->ring) ); @@ -228,7 +228,7 @@ DECLARE_EVENT_CLASS(ice_tx_tstamp_template, TP_fast_assign(__entry->skb = skb; __entry->idx = idx;), - TP_printk("skb %pK idx %d", + TP_printk("skb %p idx %d", __entry->skb, __entry->idx) ); #define DEFINE_TX_TSTAMP_OP_EVENT(name) \ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 29e0088ab6b2..73f08d02f9c7 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -144,6 +144,56 @@ static struct netdev_queue *txring_txq(const struct ice_tx_ring *ring) } /** + * ice_clean_tstamp_ring - clean time stamp ring + * @tx_ring: Tx ring to clean the Time Stamp ring for + */ +static void ice_clean_tstamp_ring(struct ice_tx_ring *tx_ring) +{ + struct ice_tstamp_ring *tstamp_ring = tx_ring->tstamp_ring; + u32 size; + + if (!tstamp_ring->desc) + return; + + size = ALIGN(tstamp_ring->count * sizeof(struct ice_ts_desc), + PAGE_SIZE); + memset(tstamp_ring->desc, 0, size); + tstamp_ring->next_to_use = 0; +} + +/** + * ice_free_tstamp_ring - free time stamp resources per queue + * @tx_ring: Tx ring to free the Time Stamp ring for + */ +void ice_free_tstamp_ring(struct ice_tx_ring *tx_ring) +{ + struct ice_tstamp_ring *tstamp_ring = tx_ring->tstamp_ring; + u32 size; + + if (!tstamp_ring->desc) + return; + + ice_clean_tstamp_ring(tx_ring); + size = ALIGN(tstamp_ring->count * sizeof(struct ice_ts_desc), + PAGE_SIZE); + dmam_free_coherent(tx_ring->dev, size, tstamp_ring->desc, + tstamp_ring->dma); + tstamp_ring->desc = NULL; +} + +/** + * ice_free_tx_tstamp_ring - free time stamp resources per Tx ring + * @tx_ring: Tx ring to free the Time Stamp ring for + */ +void ice_free_tx_tstamp_ring(struct ice_tx_ring *tx_ring) +{ + ice_free_tstamp_ring(tx_ring); + kfree_rcu(tx_ring->tstamp_ring, rcu); + tx_ring->tstamp_ring = NULL; + tx_ring->flags &= ~ICE_TX_FLAGS_TXTIME; +} + +/** * ice_clean_tx_ring - Free any empty Tx buffers * @tx_ring: ring to be cleaned */ @@ -181,6 +231,9 @@ tx_skip_free: /* cleanup Tx queue statistics */ netdev_tx_reset_queue(txring_txq(tx_ring)); + + if (ice_is_txtime_cfg(tx_ring)) + ice_free_tx_tstamp_ring(tx_ring); } /** @@ -332,6 +385,84 @@ static bool ice_clean_tx_irq(struct ice_tx_ring *tx_ring, int napi_budget) } /** + * ice_alloc_tstamp_ring - allocate the Time Stamp ring + * @tx_ring: Tx ring to allocate the Time Stamp ring for + * + * Return: 0 on success, negative on error + */ +static int ice_alloc_tstamp_ring(struct ice_tx_ring *tx_ring) +{ + struct ice_tstamp_ring *tstamp_ring; + + /* allocate with kzalloc(), free with kfree_rcu() */ + tstamp_ring = kzalloc(sizeof(*tstamp_ring), GFP_KERNEL); + if (!tstamp_ring) + return -ENOMEM; + + tstamp_ring->tx_ring = tx_ring; + tx_ring->tstamp_ring = tstamp_ring; + tstamp_ring->desc = NULL; + tstamp_ring->count = ice_calc_ts_ring_count(tx_ring); + tx_ring->flags |= ICE_TX_FLAGS_TXTIME; + return 0; +} + +/** + * ice_setup_tstamp_ring - allocate the Time Stamp ring + * @tx_ring: Tx ring to set up the Time Stamp ring for + * + * Return: 0 on success, negative on error + */ +static int ice_setup_tstamp_ring(struct ice_tx_ring *tx_ring) +{ + struct ice_tstamp_ring *tstamp_ring = tx_ring->tstamp_ring; + struct device *dev = tx_ring->dev; + u32 size; + + /* round up to nearest page */ + size = ALIGN(tstamp_ring->count * sizeof(struct ice_ts_desc), + PAGE_SIZE); + tstamp_ring->desc = dmam_alloc_coherent(dev, size, &tstamp_ring->dma, + GFP_KERNEL); + if (!tstamp_ring->desc) { + dev_err(dev, "Unable to allocate memory for Time stamp Ring, size=%d\n", + size); + return -ENOMEM; + } + + tstamp_ring->next_to_use = 0; + return 0; +} + +/** + * ice_alloc_setup_tstamp_ring - Allocate and setup the Time Stamp ring + * @tx_ring: Tx ring to allocate and setup the Time Stamp ring for + * + * Return: 0 on success, negative on error + */ +int ice_alloc_setup_tstamp_ring(struct ice_tx_ring *tx_ring) +{ + struct device *dev = tx_ring->dev; + int err; + + err = ice_alloc_tstamp_ring(tx_ring); + if (err) { + dev_err(dev, "Unable to allocate Time stamp ring for Tx ring %d\n", + tx_ring->q_index); + return err; + } + + err = ice_setup_tstamp_ring(tx_ring); + if (err) { + dev_err(dev, "Unable to setup Time stamp ring for Tx ring %d\n", + tx_ring->q_index); + ice_free_tx_tstamp_ring(tx_ring); + return err; + } + return 0; +} + +/** * ice_setup_tx_ring - Allocate the Tx descriptors * @tx_ring: the Tx ring to set up * @@ -894,10 +1025,6 @@ ice_add_xdp_frag(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, __skb_fill_page_desc_noacc(sinfo, sinfo->nr_frags++, rx_buf->page, rx_buf->page_offset, size); sinfo->xdp_frags_size += size; - /* remember frag count before XDP prog execution; bpf_xdp_adjust_tail() - * can pop off frags but driver has to handle it on its own - */ - rx_ring->nr_frags = sinfo->nr_frags; if (page_is_pfmemalloc(rx_buf->page)) xdp_buff_set_frag_pfmemalloc(xdp); @@ -968,20 +1095,20 @@ ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size, /** * ice_get_pgcnts - grab page_count() for gathered fragments * @rx_ring: Rx descriptor ring to store the page counts on + * @ntc: the next to clean element (not included in this frame!) * * This function is intended to be called right before running XDP * program so that the page recycling mechanism will be able to take * a correct decision regarding underlying pages; this is done in such * way as XDP program can change the refcount of page */ -static void ice_get_pgcnts(struct ice_rx_ring *rx_ring) +static void ice_get_pgcnts(struct ice_rx_ring *rx_ring, unsigned int ntc) { - u32 nr_frags = rx_ring->nr_frags + 1; u32 idx = rx_ring->first_desc; struct ice_rx_buf *rx_buf; u32 cnt = rx_ring->count; - for (int i = 0; i < nr_frags; i++) { + while (idx != ntc) { rx_buf = &rx_ring->rx_buf[idx]; rx_buf->pgcnt = page_count(rx_buf->page); @@ -1035,10 +1162,9 @@ ice_build_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) skb_metadata_set(skb, metasize); if (unlikely(xdp_buff_has_frags(xdp))) - xdp_update_skb_shared_info(skb, nr_frags, - sinfo->xdp_frags_size, - nr_frags * xdp->frame_sz, - xdp_buff_is_frag_pfmemalloc(xdp)); + xdp_update_skb_frags_info(skb, nr_frags, sinfo->xdp_frags_size, + nr_frags * xdp->frame_sz, + xdp_buff_get_skb_flags(xdp)); return skb; } @@ -1115,10 +1241,10 @@ ice_construct_skb(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp) memcpy(&skinfo->frags[skinfo->nr_frags], &sinfo->frags[0], sizeof(skb_frag_t) * nr_frags); - xdp_update_skb_shared_info(skb, skinfo->nr_frags + nr_frags, - sinfo->xdp_frags_size, - nr_frags * xdp->frame_sz, - xdp_buff_is_frag_pfmemalloc(xdp)); + xdp_update_skb_frags_info(skb, skinfo->nr_frags + nr_frags, + sinfo->xdp_frags_size, + nr_frags * xdp->frame_sz, + xdp_buff_get_skb_flags(xdp)); } return skb; @@ -1154,62 +1280,51 @@ ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf) } /** - * ice_put_rx_mbuf - ice_put_rx_buf() caller, for all frame frags + * ice_put_rx_mbuf - ice_put_rx_buf() caller, for all buffers in frame * @rx_ring: Rx ring with all the auxiliary data * @xdp: XDP buffer carrying linear + frags part - * @xdp_xmit: XDP_TX/XDP_REDIRECT verdict storage - * @ntc: a current next_to_clean value to be stored at rx_ring + * @ntc: the next to clean element (not included in this frame!) * @verdict: return code from XDP program execution * - * Walk through gathered fragments and satisfy internal page - * recycle mechanism; we take here an action related to verdict - * returned by XDP program; + * Called after XDP program is completed, or on error with verdict set to + * ICE_XDP_CONSUMED. + * + * Walk through buffers from first_desc to the end of the frame, releasing + * buffers and satisfying internal page recycle mechanism. The action depends + * on verdict from XDP program. */ static void ice_put_rx_mbuf(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, - u32 *xdp_xmit, u32 ntc, u32 verdict) + u32 ntc, u32 verdict) { - u32 nr_frags = rx_ring->nr_frags + 1; u32 idx = rx_ring->first_desc; u32 cnt = rx_ring->count; - u32 post_xdp_frags = 1; struct ice_rx_buf *buf; - int i; + u32 xdp_frags = 0; + int i = 0; if (unlikely(xdp_buff_has_frags(xdp))) - post_xdp_frags += xdp_get_shared_info_from_buff(xdp)->nr_frags; + xdp_frags = xdp_get_shared_info_from_buff(xdp)->nr_frags; - for (i = 0; i < post_xdp_frags; i++) { + while (idx != ntc) { buf = &rx_ring->rx_buf[idx]; + if (++idx == cnt) + idx = 0; - if (verdict & (ICE_XDP_TX | ICE_XDP_REDIR)) { + /* An XDP program could release fragments from the end of the + * buffer. For these, we need to keep the pagecnt_bias as-is. + * To do this, only adjust pagecnt_bias for fragments up to + * the total remaining after the XDP program has run. + */ + if (verdict != ICE_XDP_CONSUMED) ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); - *xdp_xmit |= verdict; - } else if (verdict & ICE_XDP_CONSUMED) { + else if (i++ <= xdp_frags) buf->pagecnt_bias++; - } else if (verdict == ICE_XDP_PASS) { - ice_rx_buf_adjust_pg_offset(buf, xdp->frame_sz); - } ice_put_rx_buf(rx_ring, buf); - - if (++idx == cnt) - idx = 0; - } - /* handle buffers that represented frags released by XDP prog; - * for these we keep pagecnt_bias as-is; refcount from struct page - * has been decremented within XDP prog and we do not have to increase - * the biased refcnt - */ - for (; i < nr_frags; i++) { - buf = &rx_ring->rx_buf[idx]; - ice_put_rx_buf(rx_ring, buf); - if (++idx == cnt) - idx = 0; } xdp->data = NULL; rx_ring->first_desc = ntc; - rx_ring->nr_frags = 0; } /** @@ -1317,6 +1432,10 @@ static int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) /* retrieve a buffer from the ring */ rx_buf = ice_get_rx_buf(rx_ring, size, ntc); + /* Increment ntc before calls to ice_put_rx_mbuf() */ + if (++ntc == cnt) + ntc = 0; + if (!xdp->data) { void *hard_start; @@ -1325,24 +1444,23 @@ static int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) xdp_prepare_buff(xdp, hard_start, offset, size, !!offset); xdp_buff_clear_frags_flag(xdp); } else if (ice_add_xdp_frag(rx_ring, xdp, rx_buf, size)) { - ice_put_rx_mbuf(rx_ring, xdp, NULL, ntc, ICE_XDP_CONSUMED); + ice_put_rx_mbuf(rx_ring, xdp, ntc, ICE_XDP_CONSUMED); break; } - if (++ntc == cnt) - ntc = 0; /* skip if it is NOP desc */ if (ice_is_non_eop(rx_ring, rx_desc)) continue; - ice_get_pgcnts(rx_ring); + ice_get_pgcnts(rx_ring, ntc); xdp_verdict = ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_desc); if (xdp_verdict == ICE_XDP_PASS) goto construct_skb; total_rx_bytes += xdp_get_buff_len(xdp); total_rx_pkts++; - ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict); + ice_put_rx_mbuf(rx_ring, xdp, ntc, xdp_verdict); + xdp_xmit |= xdp_verdict & (ICE_XDP_TX | ICE_XDP_REDIR); continue; construct_skb: @@ -1352,10 +1470,10 @@ construct_skb: skb = ice_construct_skb(rx_ring, xdp); /* exit if we failed to retrieve a buffer */ if (!skb) { - rx_ring->ring_stats->rx_stats.alloc_page_failed++; + rx_ring->ring_stats->rx_stats.alloc_buf_failed++; xdp_verdict = ICE_XDP_CONSUMED; } - ice_put_rx_mbuf(rx_ring, xdp, &xdp_xmit, ntc, xdp_verdict); + ice_put_rx_mbuf(rx_ring, xdp, ntc, xdp_verdict); if (!skb) break; @@ -1835,10 +1953,46 @@ ice_tx_map(struct ice_tx_ring *tx_ring, struct ice_tx_buf *first, /* notify HW of packet */ kick = __netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount, netdev_xmit_more()); - if (kick) - /* notify HW of packet */ - writel(i, tx_ring->tail); + if (!kick) + return; + if (ice_is_txtime_cfg(tx_ring)) { + struct ice_tstamp_ring *tstamp_ring = tx_ring->tstamp_ring; + u32 tstamp_count = tstamp_ring->count; + u32 j = tstamp_ring->next_to_use; + struct ice_ts_desc *ts_desc; + struct timespec64 ts; + u32 tstamp; + + ts = ktime_to_timespec64(first->skb->tstamp); + tstamp = ts.tv_nsec >> ICE_TXTIME_CTX_RESOLUTION_128NS; + + ts_desc = ICE_TS_DESC(tstamp_ring, j); + ts_desc->tx_desc_idx_tstamp = ice_build_tstamp_desc(i, tstamp); + + j++; + if (j == tstamp_count) { + u32 fetch = tstamp_count - tx_ring->count; + + j = 0; + + /* To prevent an MDD, when wrapping the tstamp ring + * create additional TS descriptors equal to the number + * of the fetch TS descriptors value. HW will merge the + * TS descriptors with the same timestamp value into a + * single descriptor. + */ + for (; j < fetch; j++) { + ts_desc = ICE_TS_DESC(tstamp_ring, j); + ts_desc->tx_desc_idx_tstamp = + ice_build_tstamp_desc(i, tstamp); + } + } + tstamp_ring->next_to_use = j; + writel_relaxed(j, tstamp_ring->tail); + } else { + writel_relaxed(i, tx_ring->tail); + } return; dma_error: diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index fef750c5f288..841a07bfba54 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -310,6 +310,16 @@ enum ice_dynamic_itr { #define ICE_TX_LEGACY 1 /* descriptor ring, associated with a VSI */ +struct ice_tstamp_ring { + struct ice_tx_ring *tx_ring; /* Backreference to associated Tx ring */ + dma_addr_t dma; /* physical address of ring */ + struct rcu_head rcu; /* to avoid race on free */ + u8 __iomem *tail; + void *desc; + u16 next_to_use; + u16 count; +} ____cacheline_internodealigned_in_smp; + struct ice_rx_ring { /* CL1 - 1st cacheline starts here */ void *desc; /* Descriptor ring memory */ @@ -358,7 +368,6 @@ struct ice_rx_ring { struct ice_tx_ring *xdp_ring; struct ice_rx_ring *next; /* pointer to next ring in q_vector */ struct xsk_buff_pool *xsk_pool; - u32 nr_frags; u16 max_frame; u16 rx_buf_len; dma_addr_t dma; /* physical address of ring */ @@ -403,9 +412,11 @@ struct ice_tx_ring { spinlock_t tx_lock; u32 txq_teid; /* Added Tx queue TEID */ /* CL4 - 4th cacheline starts here */ + struct ice_tstamp_ring *tstamp_ring; #define ICE_TX_FLAGS_RING_XDP BIT(0) #define ICE_TX_FLAGS_RING_VLAN_L2TAG1 BIT(1) #define ICE_TX_FLAGS_RING_VLAN_L2TAG2 BIT(2) +#define ICE_TX_FLAGS_TXTIME BIT(3) u8 flags; u8 dcb_tc; /* Traffic class of ring */ u16 quanta_prof_id; @@ -501,6 +512,7 @@ void ice_clean_tx_ring(struct ice_tx_ring *tx_ring); void ice_clean_rx_ring(struct ice_rx_ring *rx_ring); int ice_setup_tx_ring(struct ice_tx_ring *tx_ring); int ice_setup_rx_ring(struct ice_rx_ring *rx_ring); +int ice_alloc_setup_tstamp_ring(struct ice_tx_ring *tx_ring); void ice_free_tx_ring(struct ice_tx_ring *tx_ring); void ice_free_rx_ring(struct ice_rx_ring *rx_ring); int ice_napi_poll(struct napi_struct *napi, int budget); @@ -509,4 +521,6 @@ ice_prgm_fdir_fltr(struct ice_vsi *vsi, struct ice_fltr_desc *fdir_desc, u8 *raw_packet); void ice_clean_ctrl_tx_irq(struct ice_tx_ring *tx_ring); void ice_clean_ctrl_rx_irq(struct ice_rx_ring *rx_ring); +void ice_free_tx_tstamp_ring(struct ice_tx_ring *tx_ring); +void ice_free_tstamp_ring(struct ice_tx_ring *tx_ring); #endif /* _ICE_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h index 6cf32b404127..99717730f21a 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h @@ -54,6 +54,20 @@ ice_build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag) } /** + * ice_build_tstamp_desc - build Tx time stamp descriptor + * @tx_desc: Tx LAN descriptor index + * @tstamp: time stamp + * + * Return: Tx time stamp descriptor + */ +static inline __le32 +ice_build_tstamp_desc(u16 tx_desc, u32 tstamp) +{ + return cpu_to_le32(FIELD_PREP(ICE_TXTIME_TX_DESC_IDX_M, tx_desc) | + FIELD_PREP(ICE_TXTIME_STAMP_M, tstamp)); +} + +/** * ice_get_vlan_tci - get VLAN TCI from Rx flex descriptor * @rx_desc: Rx 32b flex descriptor with RXDID=2 * diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index 03c6c271865d..b0a1b67071c5 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -17,7 +17,7 @@ #include "ice_protocol_type.h" #include "ice_sbq_cmd.h" #include "ice_vlan_mode.h" -#include "ice_fwlog.h" +#include <linux/net/intel/libie/fwlog.h> #include <linux/wait.h> #include <net/dscp.h> @@ -293,8 +293,10 @@ struct ice_hw_common_caps { u8 dcb; u8 ieee_1588; u8 rdma; - u8 roce_lag; - u8 sriov_lag; + + bool roce_lag; + bool sriov_lag; + bool sriov_aa_lag; bool nvm_update_pending_nvm; bool nvm_update_pending_orom; @@ -946,9 +948,7 @@ struct ice_hw { u8 fw_patch; /* firmware patch version */ u32 fw_build; /* firmware build number */ - struct ice_fwlog_cfg fwlog_cfg; - bool fwlog_supported; /* does hardware support FW logging? */ - struct ice_fwlog_ring fwlog_ring; + struct libie_fwlog fwlog; /* Device max aggregate bandwidths corresponding to the GL_PWR_MODE_CTL * register. Used for determining the ITR/INTRL granularity during diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.c b/drivers/net/ethernet/intel/ice/ice_vf_lib.c index 5ee74f3e82dc..de9e81ccee66 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.c @@ -5,7 +5,7 @@ #include "ice.h" #include "ice_lib.h" #include "ice_fltr.h" -#include "ice_virtchnl_allowlist.h" +#include "virt/allowlist.h" /* Public functions which may be accessed by all driver files */ diff --git a/drivers/net/ethernet/intel/ice/ice_vf_lib.h b/drivers/net/ethernet/intel/ice/ice_vf_lib.h index ffe1f9f830ea..b00708907176 100644 --- a/drivers/net/ethernet/intel/ice/ice_vf_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_vf_lib.h @@ -13,7 +13,7 @@ #include <linux/avf/virtchnl.h> #include "ice_type.h" #include "ice_flow.h" -#include "ice_virtchnl_fdir.h" +#include "virt/fdir.h" #include "ice_vsi_vlan_ops.h" #define ICE_MAX_SRIOV_VFS 256 diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index a3a4eaa17739..575fd48f485f 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -19,52 +19,12 @@ static struct xdp_buff **ice_xdp_buf(struct ice_rx_ring *rx_ring, u32 idx) } /** - * ice_qp_reset_stats - Resets all stats for rings of given index - * @vsi: VSI that contains rings of interest - * @q_idx: ring index in array - */ -static void ice_qp_reset_stats(struct ice_vsi *vsi, u16 q_idx) -{ - struct ice_vsi_stats *vsi_stat; - struct ice_pf *pf; - - pf = vsi->back; - if (!pf->vsi_stats) - return; - - vsi_stat = pf->vsi_stats[vsi->idx]; - if (!vsi_stat) - return; - - memset(&vsi_stat->rx_ring_stats[q_idx]->rx_stats, 0, - sizeof(vsi_stat->rx_ring_stats[q_idx]->rx_stats)); - memset(&vsi_stat->tx_ring_stats[q_idx]->stats, 0, - sizeof(vsi_stat->tx_ring_stats[q_idx]->stats)); - if (vsi->xdp_rings) - memset(&vsi->xdp_rings[q_idx]->ring_stats->stats, 0, - sizeof(vsi->xdp_rings[q_idx]->ring_stats->stats)); -} - -/** - * ice_qp_clean_rings - Cleans all the rings of a given index - * @vsi: VSI that contains rings of interest - * @q_idx: ring index in array - */ -static void ice_qp_clean_rings(struct ice_vsi *vsi, u16 q_idx) -{ - ice_clean_tx_ring(vsi->tx_rings[q_idx]); - if (vsi->xdp_rings) - ice_clean_tx_ring(vsi->xdp_rings[q_idx]); - ice_clean_rx_ring(vsi->rx_rings[q_idx]); -} - -/** * ice_qvec_toggle_napi - Enables/disables NAPI for a given q_vector * @vsi: VSI that has netdev * @q_vector: q_vector that has NAPI context * @enable: true for enable, false for disable */ -static void +void ice_qvec_toggle_napi(struct ice_vsi *vsi, struct ice_q_vector *q_vector, bool enable) { @@ -83,7 +43,7 @@ ice_qvec_toggle_napi(struct ice_vsi *vsi, struct ice_q_vector *q_vector, * @rx_ring: Rx ring that will have its IRQ disabled * @q_vector: queue vector */ -static void +void ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_rx_ring *rx_ring, struct ice_q_vector *q_vector) { @@ -113,7 +73,7 @@ ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_rx_ring *rx_ring, * @q_vector: queue vector * @qid: queue index */ -static void +void ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector, u16 qid) { u16 reg_idx = q_vector->reg_idx; @@ -143,7 +103,7 @@ ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector, u16 qid) * @vsi: the VSI that contains queue vector * @q_vector: queue vector */ -static void ice_qvec_ena_irq(struct ice_vsi *vsi, struct ice_q_vector *q_vector) +void ice_qvec_ena_irq(struct ice_vsi *vsi, struct ice_q_vector *q_vector) { struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; @@ -154,111 +114,6 @@ static void ice_qvec_ena_irq(struct ice_vsi *vsi, struct ice_q_vector *q_vector) } /** - * ice_qp_dis - Disables a queue pair - * @vsi: VSI of interest - * @q_idx: ring index in array - * - * Returns 0 on success, negative on failure. - */ -static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) -{ - struct ice_txq_meta txq_meta = { }; - struct ice_q_vector *q_vector; - struct ice_tx_ring *tx_ring; - struct ice_rx_ring *rx_ring; - int fail = 0; - int err; - - if (q_idx >= vsi->num_rxq || q_idx >= vsi->num_txq) - return -EINVAL; - - tx_ring = vsi->tx_rings[q_idx]; - rx_ring = vsi->rx_rings[q_idx]; - q_vector = rx_ring->q_vector; - - synchronize_net(); - netif_carrier_off(vsi->netdev); - netif_tx_stop_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); - - ice_qvec_dis_irq(vsi, rx_ring, q_vector); - ice_qvec_toggle_napi(vsi, q_vector, false); - - ice_fill_txq_meta(vsi, tx_ring, &txq_meta); - err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, tx_ring, &txq_meta); - if (!fail) - fail = err; - if (vsi->xdp_rings) { - struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx]; - - memset(&txq_meta, 0, sizeof(txq_meta)); - ice_fill_txq_meta(vsi, xdp_ring, &txq_meta); - err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, 0, xdp_ring, - &txq_meta); - if (!fail) - fail = err; - } - - ice_vsi_ctrl_one_rx_ring(vsi, false, q_idx, false); - ice_qp_clean_rings(vsi, q_idx); - ice_qp_reset_stats(vsi, q_idx); - - return fail; -} - -/** - * ice_qp_ena - Enables a queue pair - * @vsi: VSI of interest - * @q_idx: ring index in array - * - * Returns 0 on success, negative on failure. - */ -static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) -{ - struct ice_q_vector *q_vector; - int fail = 0; - bool link_up; - int err; - - err = ice_vsi_cfg_single_txq(vsi, vsi->tx_rings, q_idx); - if (!fail) - fail = err; - - if (ice_is_xdp_ena_vsi(vsi)) { - struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx]; - - err = ice_vsi_cfg_single_txq(vsi, vsi->xdp_rings, q_idx); - if (!fail) - fail = err; - ice_set_ring_xdp(xdp_ring); - ice_tx_xsk_pool(vsi, q_idx); - } - - err = ice_vsi_cfg_single_rxq(vsi, q_idx); - if (!fail) - fail = err; - - q_vector = vsi->rx_rings[q_idx]->q_vector; - ice_qvec_cfg_msix(vsi, q_vector, q_idx); - - err = ice_vsi_ctrl_one_rx_ring(vsi, true, q_idx, true); - if (!fail) - fail = err; - - ice_qvec_toggle_napi(vsi, q_vector, true); - ice_qvec_ena_irq(vsi, q_vector); - - /* make sure NAPI sees updated ice_{t,x}_ring::xsk_pool */ - synchronize_net(); - ice_get_link_status(vsi->port_info, &link_up); - if (link_up) { - netif_tx_start_queue(netdev_get_tx_queue(vsi->netdev, q_idx)); - netif_carrier_on(vsi->netdev); - } - - return fail; -} - -/** * ice_xsk_pool_disable - disable a buffer pool region * @vsi: Current VSI * @qid: queue ID diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h index 8dc5d55e26c5..600cbeeaa203 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.h +++ b/drivers/net/ethernet/intel/ice/ice_xsk.h @@ -23,6 +23,13 @@ void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring); void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring); bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, struct xsk_buff_pool *xsk_pool); int ice_realloc_zc_buf(struct ice_vsi *vsi, bool zc); +void ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector, + u16 qid); +void ice_qvec_toggle_napi(struct ice_vsi *vsi, struct ice_q_vector *q_vector, + bool enable); +void ice_qvec_ena_irq(struct ice_vsi *vsi, struct ice_q_vector *q_vector); +void ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_rx_ring *rx_ring, + struct ice_q_vector *q_vector); #else static inline bool ice_xmit_zc(struct ice_tx_ring __always_unused *xdp_ring, struct xsk_buff_pool __always_unused *xsk_pool) @@ -75,5 +82,20 @@ ice_realloc_zc_buf(struct ice_vsi __always_unused *vsi, { return 0; } + +static inline void +ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector, + u16 qid) { } + +static inline void +ice_qvec_toggle_napi(struct ice_vsi *vsi, struct ice_q_vector *q_vector, + bool enable) { } + +static inline void +ice_qvec_ena_irq(struct ice_vsi *vsi, struct ice_q_vector *q_vector) { } + +static inline void +ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_rx_ring *rx_ring, + struct ice_q_vector *q_vector) { } #endif /* CONFIG_XDP_SOCKETS */ #endif /* !_ICE_XSK_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c b/drivers/net/ethernet/intel/ice/virt/allowlist.c index 4c2ec2337b38..a07efec19c45 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.c +++ b/drivers/net/ethernet/intel/ice/virt/allowlist.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2021, Intel Corporation. */ -#include "ice_virtchnl_allowlist.h" +#include "allowlist.h" /* Purpose of this file is to share functionality to allowlist or denylist * opcodes used in PF <-> VF communication. Group of opcodes: diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.h b/drivers/net/ethernet/intel/ice/virt/allowlist.h index d3ae86ded219..d3ae86ded219 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_allowlist.h +++ b/drivers/net/ethernet/intel/ice/virt/allowlist.h diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c b/drivers/net/ethernet/intel/ice/virt/fdir.c index ae83c3914e29..ae83c3914e29 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.c +++ b/drivers/net/ethernet/intel/ice/virt/fdir.c diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h b/drivers/net/ethernet/intel/ice/virt/fdir.h index ac6dcab454b4..ac6dcab454b4 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_fdir.h +++ b/drivers/net/ethernet/intel/ice/virt/fdir.h diff --git a/drivers/net/ethernet/intel/ice/virt/queues.c b/drivers/net/ethernet/intel/ice/virt/queues.c new file mode 100644 index 000000000000..370f6ec2a374 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/virt/queues.c @@ -0,0 +1,973 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2022, Intel Corporation. */ + +#include "virtchnl.h" +#include "queues.h" +#include "ice_vf_lib_private.h" +#include "ice.h" +#include "ice_base.h" +#include "ice_lib.h" + +/** + * ice_vc_get_max_frame_size - get max frame size allowed for VF + * @vf: VF used to determine max frame size + * + * Max frame size is determined based on the current port's max frame size and + * whether a port VLAN is configured on this VF. The VF is not aware whether + * it's in a port VLAN so the PF needs to account for this in max frame size + * checks and sending the max frame size to the VF. + */ +u16 ice_vc_get_max_frame_size(struct ice_vf *vf) +{ + struct ice_port_info *pi = ice_vf_get_port_info(vf); + u16 max_frame_size; + + max_frame_size = pi->phy.link_info.max_frame_size; + + if (ice_vf_is_port_vlan_ena(vf)) + max_frame_size -= VLAN_HLEN; + + return max_frame_size; +} + +/** + * ice_vc_isvalid_q_id + * @vsi: VSI to check queue ID against + * @qid: VSI relative queue ID + * + * check for the valid queue ID + */ +static bool ice_vc_isvalid_q_id(struct ice_vsi *vsi, u16 qid) +{ + /* allocated Tx and Rx queues should be always equal for VF VSI */ + return qid < vsi->alloc_txq; +} + +/** + * ice_vc_isvalid_ring_len + * @ring_len: length of ring + * + * check for the valid ring count, should be multiple of ICE_REQ_DESC_MULTIPLE + * or zero + */ +static bool ice_vc_isvalid_ring_len(u16 ring_len) +{ + return ring_len == 0 || + (ring_len >= ICE_MIN_NUM_DESC && + ring_len <= ICE_MAX_NUM_DESC_E810 && + !(ring_len % ICE_REQ_DESC_MULTIPLE)); +} + +/** + * ice_vf_cfg_qs_bw - Configure per queue bandwidth + * @vf: pointer to the VF info + * @num_queues: number of queues to be configured + * + * Configure per queue bandwidth. + * + * Return: 0 on success or negative error value. + */ +static int ice_vf_cfg_qs_bw(struct ice_vf *vf, u16 num_queues) +{ + struct ice_hw *hw = &vf->pf->hw; + struct ice_vsi *vsi; + int ret; + u16 i; + + vsi = ice_get_vf_vsi(vf); + if (!vsi) + return -EINVAL; + + for (i = 0; i < num_queues; i++) { + u32 p_rate, min_rate; + u8 tc; + + p_rate = vf->qs_bw[i].peak; + min_rate = vf->qs_bw[i].committed; + tc = vf->qs_bw[i].tc; + if (p_rate) + ret = ice_cfg_q_bw_lmt(hw->port_info, vsi->idx, tc, + vf->qs_bw[i].queue_id, + ICE_MAX_BW, p_rate); + else + ret = ice_cfg_q_bw_dflt_lmt(hw->port_info, vsi->idx, tc, + vf->qs_bw[i].queue_id, + ICE_MAX_BW); + if (ret) + return ret; + + if (min_rate) + ret = ice_cfg_q_bw_lmt(hw->port_info, vsi->idx, tc, + vf->qs_bw[i].queue_id, + ICE_MIN_BW, min_rate); + else + ret = ice_cfg_q_bw_dflt_lmt(hw->port_info, vsi->idx, tc, + vf->qs_bw[i].queue_id, + ICE_MIN_BW); + + if (ret) + return ret; + } + + return 0; +} + +/** + * ice_vf_cfg_q_quanta_profile - Configure quanta profile + * @vf: pointer to the VF info + * @quanta_prof_idx: pointer to the quanta profile index + * @quanta_size: quanta size to be set + * + * This function chooses available quanta profile and configures the register. + * The quanta profile is evenly divided by the number of device ports, and then + * available to the specific PF and VFs. The first profile for each PF is a + * reserved default profile. Only quanta size of the rest unused profile can be + * modified. + * + * Return: 0 on success or negative error value. + */ +static int ice_vf_cfg_q_quanta_profile(struct ice_vf *vf, u16 quanta_size, + u16 *quanta_prof_idx) +{ + const u16 n_desc = calc_quanta_desc(quanta_size); + struct ice_hw *hw = &vf->pf->hw; + const u16 n_cmd = 2 * n_desc; + struct ice_pf *pf = vf->pf; + u16 per_pf, begin_id; + u8 n_used; + u32 reg; + + begin_id = (GLCOMM_QUANTA_PROF_MAX_INDEX + 1) / hw->dev_caps.num_funcs * + hw->logical_pf_id; + + if (quanta_size == ICE_DFLT_QUANTA) { + *quanta_prof_idx = begin_id; + } else { + per_pf = (GLCOMM_QUANTA_PROF_MAX_INDEX + 1) / + hw->dev_caps.num_funcs; + n_used = pf->num_quanta_prof_used; + if (n_used < per_pf) { + *quanta_prof_idx = begin_id + 1 + n_used; + pf->num_quanta_prof_used++; + } else { + return -EINVAL; + } + } + + reg = FIELD_PREP(GLCOMM_QUANTA_PROF_QUANTA_SIZE_M, quanta_size) | + FIELD_PREP(GLCOMM_QUANTA_PROF_MAX_CMD_M, n_cmd) | + FIELD_PREP(GLCOMM_QUANTA_PROF_MAX_DESC_M, n_desc); + wr32(hw, GLCOMM_QUANTA_PROF(*quanta_prof_idx), reg); + + return 0; +} + +/** + * ice_vc_validate_vqs_bitmaps - validate Rx/Tx queue bitmaps from VIRTCHNL + * @vqs: virtchnl_queue_select structure containing bitmaps to validate + * + * Return true on successful validation, else false + */ +static bool ice_vc_validate_vqs_bitmaps(struct virtchnl_queue_select *vqs) +{ + if ((!vqs->rx_queues && !vqs->tx_queues) || + vqs->rx_queues >= BIT(ICE_MAX_RSS_QS_PER_VF) || + vqs->tx_queues >= BIT(ICE_MAX_RSS_QS_PER_VF)) + return false; + + return true; +} + +/** + * ice_vf_ena_txq_interrupt - enable Tx queue interrupt via QINT_TQCTL + * @vsi: VSI of the VF to configure + * @q_idx: VF queue index used to determine the queue in the PF's space + */ +void ice_vf_ena_txq_interrupt(struct ice_vsi *vsi, u32 q_idx) +{ + struct ice_hw *hw = &vsi->back->hw; + u32 pfq = vsi->txq_map[q_idx]; + u32 reg; + + reg = rd32(hw, QINT_TQCTL(pfq)); + + /* MSI-X index 0 in the VF's space is always for the OICR, which means + * this is most likely a poll mode VF driver, so don't enable an + * interrupt that was never configured via VIRTCHNL_OP_CONFIG_IRQ_MAP + */ + if (!(reg & QINT_TQCTL_MSIX_INDX_M)) + return; + + wr32(hw, QINT_TQCTL(pfq), reg | QINT_TQCTL_CAUSE_ENA_M); +} + +/** + * ice_vf_ena_rxq_interrupt - enable Tx queue interrupt via QINT_RQCTL + * @vsi: VSI of the VF to configure + * @q_idx: VF queue index used to determine the queue in the PF's space + */ +void ice_vf_ena_rxq_interrupt(struct ice_vsi *vsi, u32 q_idx) +{ + struct ice_hw *hw = &vsi->back->hw; + u32 pfq = vsi->rxq_map[q_idx]; + u32 reg; + + reg = rd32(hw, QINT_RQCTL(pfq)); + + /* MSI-X index 0 in the VF's space is always for the OICR, which means + * this is most likely a poll mode VF driver, so don't enable an + * interrupt that was never configured via VIRTCHNL_OP_CONFIG_IRQ_MAP + */ + if (!(reg & QINT_RQCTL_MSIX_INDX_M)) + return; + + wr32(hw, QINT_RQCTL(pfq), reg | QINT_RQCTL_CAUSE_ENA_M); +} + +/** + * ice_vc_ena_qs_msg + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * + * called from the VF to enable all or specific queue(s) + */ +int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct virtchnl_queue_select *vqs = + (struct virtchnl_queue_select *)msg; + struct ice_vsi *vsi; + unsigned long q_map; + u16 vf_q_id; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (!ice_vc_isvalid_vsi_id(vf, vqs->vsi_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (!ice_vc_validate_vqs_bitmaps(vqs)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + /* Enable only Rx rings, Tx rings were enabled by the FW when the + * Tx queue group list was configured and the context bits were + * programmed using ice_vsi_cfg_txqs + */ + q_map = vqs->rx_queues; + for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { + if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + /* Skip queue if enabled */ + if (test_bit(vf_q_id, vf->rxq_ena)) + continue; + + if (ice_vsi_ctrl_one_rx_ring(vsi, true, vf_q_id, true)) { + dev_err(ice_pf_to_dev(vsi->back), "Failed to enable Rx ring %d on VSI %d\n", + vf_q_id, vsi->vsi_num); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + ice_vf_ena_rxq_interrupt(vsi, vf_q_id); + set_bit(vf_q_id, vf->rxq_ena); + } + + q_map = vqs->tx_queues; + for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { + if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + /* Skip queue if enabled */ + if (test_bit(vf_q_id, vf->txq_ena)) + continue; + + ice_vf_ena_txq_interrupt(vsi, vf_q_id); + set_bit(vf_q_id, vf->txq_ena); + } + + /* Set flag to indicate that queues are enabled */ + if (v_ret == VIRTCHNL_STATUS_SUCCESS) + set_bit(ICE_VF_STATE_QS_ENA, vf->vf_states); + +error_param: + /* send the response to the VF */ + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_QUEUES, v_ret, + NULL, 0); +} + +/** + * ice_vf_vsi_dis_single_txq - disable a single Tx queue + * @vf: VF to disable queue for + * @vsi: VSI for the VF + * @q_id: VF relative (0-based) queue ID + * + * Attempt to disable the Tx queue passed in. If the Tx queue was successfully + * disabled then clear q_id bit in the enabled queues bitmap and return + * success. Otherwise return error. + */ +int ice_vf_vsi_dis_single_txq(struct ice_vf *vf, struct ice_vsi *vsi, u16 q_id) +{ + struct ice_txq_meta txq_meta = { 0 }; + struct ice_tx_ring *ring; + int err; + + if (!test_bit(q_id, vf->txq_ena)) + dev_dbg(ice_pf_to_dev(vsi->back), "Queue %u on VSI %u is not enabled, but stopping it anyway\n", + q_id, vsi->vsi_num); + + ring = vsi->tx_rings[q_id]; + if (!ring) + return -EINVAL; + + ice_fill_txq_meta(vsi, ring, &txq_meta); + + err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, vf->vf_id, ring, &txq_meta); + if (err) { + dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Tx ring %d on VSI %d\n", + q_id, vsi->vsi_num); + return err; + } + + /* Clear enabled queues flag */ + clear_bit(q_id, vf->txq_ena); + + return 0; +} + +/** + * ice_vc_dis_qs_msg + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * + * called from the VF to disable all or specific queue(s) + */ +int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct virtchnl_queue_select *vqs = + (struct virtchnl_queue_select *)msg; + struct ice_vsi *vsi; + unsigned long q_map; + u16 vf_q_id; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) && + !test_bit(ICE_VF_STATE_QS_ENA, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (!ice_vc_isvalid_vsi_id(vf, vqs->vsi_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (!ice_vc_validate_vqs_bitmaps(vqs)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (vqs->tx_queues) { + q_map = vqs->tx_queues; + + for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { + if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (ice_vf_vsi_dis_single_txq(vf, vsi, vf_q_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + } + } + + q_map = vqs->rx_queues; + /* speed up Rx queue disable by batching them if possible */ + if (q_map && + bitmap_equal(&q_map, vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF)) { + if (ice_vsi_stop_all_rx_rings(vsi)) { + dev_err(ice_pf_to_dev(vsi->back), "Failed to stop all Rx rings on VSI %d\n", + vsi->vsi_num); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + bitmap_zero(vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF); + } else if (q_map) { + for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { + if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + /* Skip queue if not enabled */ + if (!test_bit(vf_q_id, vf->rxq_ena)) + continue; + + if (ice_vsi_ctrl_one_rx_ring(vsi, false, vf_q_id, + true)) { + dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Rx ring %d on VSI %d\n", + vf_q_id, vsi->vsi_num); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + /* Clear enabled queues flag */ + clear_bit(vf_q_id, vf->rxq_ena); + } + } + + /* Clear enabled queues flag */ + if (v_ret == VIRTCHNL_STATUS_SUCCESS && ice_vf_has_no_qs_ena(vf)) + clear_bit(ICE_VF_STATE_QS_ENA, vf->vf_states); + +error_param: + /* send the response to the VF */ + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_QUEUES, v_ret, + NULL, 0); +} + +/** + * ice_cfg_interrupt + * @vf: pointer to the VF info + * @vsi: the VSI being configured + * @map: vector map for mapping vectors to queues + * @q_vector: structure for interrupt vector + * configure the IRQ to queue map + */ +static enum virtchnl_status_code +ice_cfg_interrupt(struct ice_vf *vf, struct ice_vsi *vsi, + struct virtchnl_vector_map *map, + struct ice_q_vector *q_vector) +{ + u16 vsi_q_id, vsi_q_id_idx; + unsigned long qmap; + + q_vector->num_ring_rx = 0; + q_vector->num_ring_tx = 0; + + qmap = map->rxq_map; + for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) { + vsi_q_id = vsi_q_id_idx; + + if (!ice_vc_isvalid_q_id(vsi, vsi_q_id)) + return VIRTCHNL_STATUS_ERR_PARAM; + + q_vector->num_ring_rx++; + q_vector->rx.itr_idx = map->rxitr_idx; + vsi->rx_rings[vsi_q_id]->q_vector = q_vector; + ice_cfg_rxq_interrupt(vsi, vsi_q_id, + q_vector->vf_reg_idx, + q_vector->rx.itr_idx); + } + + qmap = map->txq_map; + for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) { + vsi_q_id = vsi_q_id_idx; + + if (!ice_vc_isvalid_q_id(vsi, vsi_q_id)) + return VIRTCHNL_STATUS_ERR_PARAM; + + q_vector->num_ring_tx++; + q_vector->tx.itr_idx = map->txitr_idx; + vsi->tx_rings[vsi_q_id]->q_vector = q_vector; + ice_cfg_txq_interrupt(vsi, vsi_q_id, + q_vector->vf_reg_idx, + q_vector->tx.itr_idx); + } + + return VIRTCHNL_STATUS_SUCCESS; +} + +/** + * ice_vc_cfg_irq_map_msg + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * + * called from the VF to configure the IRQ to queue map + */ +int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + u16 num_q_vectors_mapped, vsi_id, vector_id; + struct virtchnl_irq_map_info *irqmap_info; + struct virtchnl_vector_map *map; + struct ice_vsi *vsi; + int i; + + irqmap_info = (struct virtchnl_irq_map_info *)msg; + num_q_vectors_mapped = irqmap_info->num_vectors; + + /* Check to make sure number of VF vectors mapped is not greater than + * number of VF vectors originally allocated, and check that + * there is actually at least a single VF queue vector mapped + */ + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) || + vf->num_msix < num_q_vectors_mapped || + !num_q_vectors_mapped) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + for (i = 0; i < num_q_vectors_mapped; i++) { + struct ice_q_vector *q_vector; + + map = &irqmap_info->vecmap[i]; + + vector_id = map->vector_id; + vsi_id = map->vsi_id; + /* vector_id is always 0-based for each VF, and can never be + * larger than or equal to the max allowed interrupts per VF + */ + if (!(vector_id < vf->num_msix) || + !ice_vc_isvalid_vsi_id(vf, vsi_id) || + (!vector_id && (map->rxq_map || map->txq_map))) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + /* No need to map VF miscellaneous or rogue vector */ + if (!vector_id) + continue; + + /* Subtract non queue vector from vector_id passed by VF + * to get actual number of VSI queue vector array index + */ + q_vector = vsi->q_vectors[vector_id - ICE_NONQ_VECS_VF]; + if (!q_vector) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + /* lookout for the invalid queue index */ + v_ret = ice_cfg_interrupt(vf, vsi, map, q_vector); + if (v_ret) + goto error_param; + } + +error_param: + /* send the response to the VF */ + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_IRQ_MAP, v_ret, + NULL, 0); +} + +/** + * ice_vc_cfg_q_bw - Configure per queue bandwidth + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer which holds the command descriptor + * + * Configure VF queues bandwidth. + * + * Return: 0 on success or negative error value. + */ +int ice_vc_cfg_q_bw(struct ice_vf *vf, u8 *msg) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct virtchnl_queues_bw_cfg *qbw = + (struct virtchnl_queues_bw_cfg *)msg; + struct ice_vsi *vsi; + u16 i; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) || + !ice_vc_isvalid_vsi_id(vf, qbw->vsi_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + if (qbw->num_queues > ICE_MAX_RSS_QS_PER_VF || + qbw->num_queues > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) { + dev_err(ice_pf_to_dev(vf->pf), "VF-%d trying to configure more than allocated number of queues: %d\n", + vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + for (i = 0; i < qbw->num_queues; i++) { + if (qbw->cfg[i].shaper.peak != 0 && vf->max_tx_rate != 0 && + qbw->cfg[i].shaper.peak > vf->max_tx_rate) { + dev_warn(ice_pf_to_dev(vf->pf), "The maximum queue %d rate limit configuration may not take effect because the maximum TX rate for VF-%d is %d\n", + qbw->cfg[i].queue_id, vf->vf_id, + vf->max_tx_rate); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + if (qbw->cfg[i].shaper.committed != 0 && vf->min_tx_rate != 0 && + qbw->cfg[i].shaper.committed < vf->min_tx_rate) { + dev_warn(ice_pf_to_dev(vf->pf), "The minimum queue %d rate limit configuration may not take effect because the minimum TX rate for VF-%d is %d\n", + qbw->cfg[i].queue_id, vf->vf_id, + vf->min_tx_rate); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + if (qbw->cfg[i].queue_id > vf->num_vf_qs) { + dev_warn(ice_pf_to_dev(vf->pf), "VF-%d trying to configure invalid queue_id\n", + vf->vf_id); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + if (qbw->cfg[i].tc >= ICE_MAX_TRAFFIC_CLASS) { + dev_warn(ice_pf_to_dev(vf->pf), "VF-%d trying to configure a traffic class higher than allowed\n", + vf->vf_id); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + } + + for (i = 0; i < qbw->num_queues; i++) { + vf->qs_bw[i].queue_id = qbw->cfg[i].queue_id; + vf->qs_bw[i].peak = qbw->cfg[i].shaper.peak; + vf->qs_bw[i].committed = qbw->cfg[i].shaper.committed; + vf->qs_bw[i].tc = qbw->cfg[i].tc; + } + + if (ice_vf_cfg_qs_bw(vf, qbw->num_queues)) + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + +err: + /* send the response to the VF */ + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_QUEUE_BW, + v_ret, NULL, 0); +} + +/** + * ice_vc_cfg_q_quanta - Configure per queue quanta + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer which holds the command descriptor + * + * Configure VF queues quanta. + * + * Return: 0 on success or negative error value. + */ +int ice_vc_cfg_q_quanta(struct ice_vf *vf, u8 *msg) +{ + u16 quanta_prof_id, quanta_size, start_qid, num_queues, end_qid, i; + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct virtchnl_quanta_cfg *qquanta = + (struct virtchnl_quanta_cfg *)msg; + struct ice_vsi *vsi; + int ret; + + start_qid = qquanta->queue_select.start_queue_id; + num_queues = qquanta->queue_select.num_queues; + + if (check_add_overflow(start_qid, num_queues, &end_qid)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + if (end_qid > ICE_MAX_RSS_QS_PER_VF || + end_qid > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) { + dev_err(ice_pf_to_dev(vf->pf), "VF-%d trying to configure more than allocated number of queues: %d\n", + vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + quanta_size = qquanta->quanta_size; + if (quanta_size > ICE_MAX_QUANTA_SIZE || + quanta_size < ICE_MIN_QUANTA_SIZE) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + if (quanta_size % 64) { + dev_err(ice_pf_to_dev(vf->pf), "quanta size should be the product of 64\n"); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + ret = ice_vf_cfg_q_quanta_profile(vf, quanta_size, + &quanta_prof_id); + if (ret) { + v_ret = VIRTCHNL_STATUS_ERR_NOT_SUPPORTED; + goto err; + } + + for (i = start_qid; i < end_qid; i++) + vsi->tx_rings[i]->quanta_prof_id = quanta_prof_id; + +err: + /* send the response to the VF */ + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_QUANTA, + v_ret, NULL, 0); +} + +/** + * ice_vc_cfg_qs_msg + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * + * called from the VF to configure the Rx/Tx queues + */ +int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) +{ + struct virtchnl_vsi_queue_config_info *qci = + (struct virtchnl_vsi_queue_config_info *)msg; + struct virtchnl_queue_pair_info *qpi; + struct ice_pf *pf = vf->pf; + struct ice_vsi *vsi; + int i = -1, q_idx; + bool ena_ts; + u8 act_prt; + + mutex_lock(&pf->lag_mutex); + act_prt = ice_lag_prepare_vf_reset(pf->lag); + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) + goto error_param; + + if (!ice_vc_isvalid_vsi_id(vf, qci->vsi_id)) + goto error_param; + + vsi = ice_get_vf_vsi(vf); + if (!vsi) + goto error_param; + + if (qci->num_queue_pairs > ICE_MAX_RSS_QS_PER_VF || + qci->num_queue_pairs > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) { + dev_err(ice_pf_to_dev(pf), "VF-%d requesting more than supported number of queues: %d\n", + vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)); + goto error_param; + } + + for (i = 0; i < qci->num_queue_pairs; i++) { + if (!qci->qpair[i].rxq.crc_disable) + continue; + + if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_CRC) || + vf->vlan_strip_ena) + goto error_param; + } + + for (i = 0; i < qci->num_queue_pairs; i++) { + qpi = &qci->qpair[i]; + if (qpi->txq.vsi_id != qci->vsi_id || + qpi->rxq.vsi_id != qci->vsi_id || + qpi->rxq.queue_id != qpi->txq.queue_id || + qpi->txq.headwb_enabled || + !ice_vc_isvalid_ring_len(qpi->txq.ring_len) || + !ice_vc_isvalid_ring_len(qpi->rxq.ring_len) || + !ice_vc_isvalid_q_id(vsi, qpi->txq.queue_id)) { + goto error_param; + } + + q_idx = qpi->rxq.queue_id; + + /* make sure selected "q_idx" is in valid range of queues + * for selected "vsi" + */ + if (q_idx >= vsi->alloc_txq || q_idx >= vsi->alloc_rxq) { + goto error_param; + } + + /* copy Tx queue info from VF into VSI */ + if (qpi->txq.ring_len > 0) { + vsi->tx_rings[q_idx]->dma = qpi->txq.dma_ring_addr; + vsi->tx_rings[q_idx]->count = qpi->txq.ring_len; + + /* Disable any existing queue first */ + if (ice_vf_vsi_dis_single_txq(vf, vsi, q_idx)) + goto error_param; + + /* Configure a queue with the requested settings */ + if (ice_vsi_cfg_single_txq(vsi, vsi->tx_rings, q_idx)) { + dev_warn(ice_pf_to_dev(pf), "VF-%d failed to configure TX queue %d\n", + vf->vf_id, q_idx); + goto error_param; + } + } + + /* copy Rx queue info from VF into VSI */ + if (qpi->rxq.ring_len > 0) { + u16 max_frame_size = ice_vc_get_max_frame_size(vf); + struct ice_rx_ring *ring = vsi->rx_rings[q_idx]; + u32 rxdid; + + ring->dma = qpi->rxq.dma_ring_addr; + ring->count = qpi->rxq.ring_len; + + if (qpi->rxq.crc_disable) + ring->flags |= ICE_RX_FLAGS_CRC_STRIP_DIS; + else + ring->flags &= ~ICE_RX_FLAGS_CRC_STRIP_DIS; + + if (qpi->rxq.databuffer_size != 0 && + (qpi->rxq.databuffer_size > ((16 * 1024) - 128) || + qpi->rxq.databuffer_size < 1024)) + goto error_param; + ring->rx_buf_len = qpi->rxq.databuffer_size; + if (qpi->rxq.max_pkt_size > max_frame_size || + qpi->rxq.max_pkt_size < 64) + goto error_param; + + ring->max_frame = qpi->rxq.max_pkt_size; + /* add space for the port VLAN since the VF driver is + * not expected to account for it in the MTU + * calculation + */ + if (ice_vf_is_port_vlan_ena(vf)) + ring->max_frame += VLAN_HLEN; + + if (ice_vsi_cfg_single_rxq(vsi, q_idx)) { + dev_warn(ice_pf_to_dev(pf), "VF-%d failed to configure RX queue %d\n", + vf->vf_id, q_idx); + goto error_param; + } + + /* If Rx flex desc is supported, select RXDID for Rx + * queues. Otherwise, use legacy 32byte descriptor + * format. Legacy 16byte descriptor is not supported. + * If this RXDID is selected, return error. + */ + if (vf->driver_caps & + VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) { + rxdid = qpi->rxq.rxdid; + if (!(BIT(rxdid) & pf->supported_rxdids)) + goto error_param; + } else { + rxdid = ICE_RXDID_LEGACY_1; + } + + ena_ts = ((vf->driver_caps & + VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) && + (vf->driver_caps & VIRTCHNL_VF_CAP_PTP) && + (qpi->rxq.flags & VIRTCHNL_PTP_RX_TSTAMP)); + + ice_write_qrxflxp_cntxt(&vsi->back->hw, + vsi->rxq_map[q_idx], rxdid, + ICE_RXDID_PRIO, ena_ts); + } + } + + ice_lag_complete_vf_reset(pf->lag, act_prt); + mutex_unlock(&pf->lag_mutex); + + /* send the response to the VF */ + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES, + VIRTCHNL_STATUS_SUCCESS, NULL, 0); +error_param: + /* disable whatever we can */ + for (; i >= 0; i--) { + if (ice_vsi_ctrl_one_rx_ring(vsi, false, i, true)) + dev_err(ice_pf_to_dev(pf), "VF-%d could not disable RX queue %d\n", + vf->vf_id, i); + if (ice_vf_vsi_dis_single_txq(vf, vsi, i)) + dev_err(ice_pf_to_dev(pf), "VF-%d could not disable TX queue %d\n", + vf->vf_id, i); + } + + ice_lag_complete_vf_reset(pf->lag, act_prt); + mutex_unlock(&pf->lag_mutex); + + /* send the response to the VF */ + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES, + VIRTCHNL_STATUS_ERR_PARAM, NULL, 0); +} + +/** + * ice_vc_request_qs_msg + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * + * VFs get a default number of queues but can use this message to request a + * different number. If the request is successful, PF will reset the VF and + * return 0. If unsuccessful, PF will send message informing VF of number of + * available queue pairs via virtchnl message response to VF. + */ +int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct virtchnl_vf_res_request *vfres = + (struct virtchnl_vf_res_request *)msg; + u16 req_queues = vfres->num_queue_pairs; + struct ice_pf *pf = vf->pf; + u16 max_allowed_vf_queues; + u16 tx_rx_queue_left; + struct device *dev; + u16 cur_queues; + + dev = ice_pf_to_dev(pf); + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + cur_queues = vf->num_vf_qs; + tx_rx_queue_left = min_t(u16, ice_get_avail_txq_count(pf), + ice_get_avail_rxq_count(pf)); + max_allowed_vf_queues = tx_rx_queue_left + cur_queues; + if (!req_queues) { + dev_err(dev, "VF %d tried to request 0 queues. Ignoring.\n", + vf->vf_id); + } else if (req_queues > ICE_MAX_RSS_QS_PER_VF) { + dev_err(dev, "VF %d tried to request more than %d queues.\n", + vf->vf_id, ICE_MAX_RSS_QS_PER_VF); + vfres->num_queue_pairs = ICE_MAX_RSS_QS_PER_VF; + } else if (req_queues > cur_queues && + req_queues - cur_queues > tx_rx_queue_left) { + dev_warn(dev, "VF %d requested %u more queues, but only %u left.\n", + vf->vf_id, req_queues - cur_queues, tx_rx_queue_left); + vfres->num_queue_pairs = min_t(u16, max_allowed_vf_queues, + ICE_MAX_RSS_QS_PER_VF); + } else { + /* request is successful, then reset VF */ + vf->num_req_qs = req_queues; + ice_reset_vf(vf, ICE_VF_RESET_NOTIFY); + dev_info(dev, "VF %d granted request of %u queues.\n", + vf->vf_id, req_queues); + return 0; + } + +error_param: + /* send the response to the VF */ + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_REQUEST_QUEUES, + v_ret, (u8 *)vfres, sizeof(*vfres)); +} + diff --git a/drivers/net/ethernet/intel/ice/virt/queues.h b/drivers/net/ethernet/intel/ice/virt/queues.h new file mode 100644 index 000000000000..c4a792cecea1 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/virt/queues.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2022, Intel Corporation. */ + +#ifndef _ICE_VIRT_QUEUES_H_ +#define _ICE_VIRT_QUEUES_H_ + +#include <linux/types.h> + +struct ice_vf; + +u16 ice_vc_get_max_frame_size(struct ice_vf *vf); +int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg); +int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg); +int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg); +int ice_vc_cfg_q_bw(struct ice_vf *vf, u8 *msg); +int ice_vc_cfg_q_quanta(struct ice_vf *vf, u8 *msg); +int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg); +int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg); + +#endif /* _ICE_VIRT_QUEUES_H_ */ diff --git a/drivers/net/ethernet/intel/ice/virt/rss.c b/drivers/net/ethernet/intel/ice/virt/rss.c new file mode 100644 index 000000000000..cbdbb32d512b --- /dev/null +++ b/drivers/net/ethernet/intel/ice/virt/rss.c @@ -0,0 +1,719 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2022, Intel Corporation. */ + +#include "rss.h" +#include "ice_vf_lib_private.h" +#include "ice.h" + +#define FIELD_SELECTOR(proto_hdr_field) \ + BIT((proto_hdr_field) & PROTO_HDR_FIELD_MASK) + +struct ice_vc_hdr_match_type { + u32 vc_hdr; /* virtchnl headers (VIRTCHNL_PROTO_HDR_XXX) */ + u32 ice_hdr; /* ice headers (ICE_FLOW_SEG_HDR_XXX) */ +}; + +static const struct ice_vc_hdr_match_type ice_vc_hdr_list[] = { + {VIRTCHNL_PROTO_HDR_NONE, ICE_FLOW_SEG_HDR_NONE}, + {VIRTCHNL_PROTO_HDR_ETH, ICE_FLOW_SEG_HDR_ETH}, + {VIRTCHNL_PROTO_HDR_S_VLAN, ICE_FLOW_SEG_HDR_VLAN}, + {VIRTCHNL_PROTO_HDR_C_VLAN, ICE_FLOW_SEG_HDR_VLAN}, + {VIRTCHNL_PROTO_HDR_IPV4, ICE_FLOW_SEG_HDR_IPV4 | + ICE_FLOW_SEG_HDR_IPV_OTHER}, + {VIRTCHNL_PROTO_HDR_IPV6, ICE_FLOW_SEG_HDR_IPV6 | + ICE_FLOW_SEG_HDR_IPV_OTHER}, + {VIRTCHNL_PROTO_HDR_TCP, ICE_FLOW_SEG_HDR_TCP}, + {VIRTCHNL_PROTO_HDR_UDP, ICE_FLOW_SEG_HDR_UDP}, + {VIRTCHNL_PROTO_HDR_SCTP, ICE_FLOW_SEG_HDR_SCTP}, + {VIRTCHNL_PROTO_HDR_PPPOE, ICE_FLOW_SEG_HDR_PPPOE}, + {VIRTCHNL_PROTO_HDR_GTPU_IP, ICE_FLOW_SEG_HDR_GTPU_IP}, + {VIRTCHNL_PROTO_HDR_GTPU_EH, ICE_FLOW_SEG_HDR_GTPU_EH}, + {VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_DWN, + ICE_FLOW_SEG_HDR_GTPU_DWN}, + {VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_UP, + ICE_FLOW_SEG_HDR_GTPU_UP}, + {VIRTCHNL_PROTO_HDR_L2TPV3, ICE_FLOW_SEG_HDR_L2TPV3}, + {VIRTCHNL_PROTO_HDR_ESP, ICE_FLOW_SEG_HDR_ESP}, + {VIRTCHNL_PROTO_HDR_AH, ICE_FLOW_SEG_HDR_AH}, + {VIRTCHNL_PROTO_HDR_PFCP, ICE_FLOW_SEG_HDR_PFCP_SESSION}, +}; + +struct ice_vc_hash_field_match_type { + u32 vc_hdr; /* virtchnl headers + * (VIRTCHNL_PROTO_HDR_XXX) + */ + u32 vc_hash_field; /* virtchnl hash fields selector + * FIELD_SELECTOR((VIRTCHNL_PROTO_HDR_ETH_XXX)) + */ + u64 ice_hash_field; /* ice hash fields + * (BIT_ULL(ICE_FLOW_FIELD_IDX_XXX)) + */ +}; + +static const struct +ice_vc_hash_field_match_type ice_vc_hash_field_list[] = { + {VIRTCHNL_PROTO_HDR_ETH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_SRC), + BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_SA)}, + {VIRTCHNL_PROTO_HDR_ETH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_DST), + BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_DA)}, + {VIRTCHNL_PROTO_HDR_ETH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_DST), + ICE_FLOW_HASH_ETH}, + {VIRTCHNL_PROTO_HDR_ETH, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_ETHERTYPE), + BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_TYPE)}, + {VIRTCHNL_PROTO_HDR_S_VLAN, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_S_VLAN_ID), + BIT_ULL(ICE_FLOW_FIELD_IDX_S_VLAN)}, + {VIRTCHNL_PROTO_HDR_C_VLAN, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_C_VLAN_ID), + BIT_ULL(ICE_FLOW_FIELD_IDX_C_VLAN)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST), + ICE_FLOW_HASH_IPV4}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), + ICE_FLOW_HASH_IPV4 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA)}, + {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA)}, + {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST), + ICE_FLOW_HASH_IPV6}, + {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA) | + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), + ICE_FLOW_HASH_IPV6 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, + {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), + BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, + {VIRTCHNL_PROTO_HDR_TCP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT), + BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_SRC_PORT)}, + {VIRTCHNL_PROTO_HDR_TCP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_DST_PORT), + BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_DST_PORT)}, + {VIRTCHNL_PROTO_HDR_TCP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_DST_PORT), + ICE_FLOW_HASH_TCP_PORT}, + {VIRTCHNL_PROTO_HDR_UDP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT), + BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_SRC_PORT)}, + {VIRTCHNL_PROTO_HDR_UDP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_DST_PORT), + BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_DST_PORT)}, + {VIRTCHNL_PROTO_HDR_UDP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_DST_PORT), + ICE_FLOW_HASH_UDP_PORT}, + {VIRTCHNL_PROTO_HDR_SCTP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT), + BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT)}, + {VIRTCHNL_PROTO_HDR_SCTP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_DST_PORT), + BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_DST_PORT)}, + {VIRTCHNL_PROTO_HDR_SCTP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT) | + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_DST_PORT), + ICE_FLOW_HASH_SCTP_PORT}, + {VIRTCHNL_PROTO_HDR_PPPOE, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_PPPOE_SESS_ID), + BIT_ULL(ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID)}, + {VIRTCHNL_PROTO_HDR_GTPU_IP, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_GTPU_IP_TEID), + BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_IP_TEID)}, + {VIRTCHNL_PROTO_HDR_L2TPV3, + FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_L2TPV3_SESS_ID), + BIT_ULL(ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID)}, + {VIRTCHNL_PROTO_HDR_ESP, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ESP_SPI), + BIT_ULL(ICE_FLOW_FIELD_IDX_ESP_SPI)}, + {VIRTCHNL_PROTO_HDR_AH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_AH_SPI), + BIT_ULL(ICE_FLOW_FIELD_IDX_AH_SPI)}, + {VIRTCHNL_PROTO_HDR_PFCP, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_PFCP_SEID), + BIT_ULL(ICE_FLOW_FIELD_IDX_PFCP_SEID)}, +}; + +/** + * ice_vc_validate_pattern + * @vf: pointer to the VF info + * @proto: virtchnl protocol headers + * + * validate the pattern is supported or not. + * + * Return: true on success, false on error. + */ +bool +ice_vc_validate_pattern(struct ice_vf *vf, struct virtchnl_proto_hdrs *proto) +{ + bool is_ipv4 = false; + bool is_ipv6 = false; + bool is_udp = false; + u16 ptype = -1; + int i = 0; + + while (i < proto->count && + proto->proto_hdr[i].type != VIRTCHNL_PROTO_HDR_NONE) { + switch (proto->proto_hdr[i].type) { + case VIRTCHNL_PROTO_HDR_ETH: + ptype = ICE_PTYPE_MAC_PAY; + break; + case VIRTCHNL_PROTO_HDR_IPV4: + ptype = ICE_PTYPE_IPV4_PAY; + is_ipv4 = true; + break; + case VIRTCHNL_PROTO_HDR_IPV6: + ptype = ICE_PTYPE_IPV6_PAY; + is_ipv6 = true; + break; + case VIRTCHNL_PROTO_HDR_UDP: + if (is_ipv4) + ptype = ICE_PTYPE_IPV4_UDP_PAY; + else if (is_ipv6) + ptype = ICE_PTYPE_IPV6_UDP_PAY; + is_udp = true; + break; + case VIRTCHNL_PROTO_HDR_TCP: + if (is_ipv4) + ptype = ICE_PTYPE_IPV4_TCP_PAY; + else if (is_ipv6) + ptype = ICE_PTYPE_IPV6_TCP_PAY; + break; + case VIRTCHNL_PROTO_HDR_SCTP: + if (is_ipv4) + ptype = ICE_PTYPE_IPV4_SCTP_PAY; + else if (is_ipv6) + ptype = ICE_PTYPE_IPV6_SCTP_PAY; + break; + case VIRTCHNL_PROTO_HDR_GTPU_IP: + case VIRTCHNL_PROTO_HDR_GTPU_EH: + if (is_ipv4) + ptype = ICE_MAC_IPV4_GTPU; + else if (is_ipv6) + ptype = ICE_MAC_IPV6_GTPU; + goto out; + case VIRTCHNL_PROTO_HDR_L2TPV3: + if (is_ipv4) + ptype = ICE_MAC_IPV4_L2TPV3; + else if (is_ipv6) + ptype = ICE_MAC_IPV6_L2TPV3; + goto out; + case VIRTCHNL_PROTO_HDR_ESP: + if (is_ipv4) + ptype = is_udp ? ICE_MAC_IPV4_NAT_T_ESP : + ICE_MAC_IPV4_ESP; + else if (is_ipv6) + ptype = is_udp ? ICE_MAC_IPV6_NAT_T_ESP : + ICE_MAC_IPV6_ESP; + goto out; + case VIRTCHNL_PROTO_HDR_AH: + if (is_ipv4) + ptype = ICE_MAC_IPV4_AH; + else if (is_ipv6) + ptype = ICE_MAC_IPV6_AH; + goto out; + case VIRTCHNL_PROTO_HDR_PFCP: + if (is_ipv4) + ptype = ICE_MAC_IPV4_PFCP_SESSION; + else if (is_ipv6) + ptype = ICE_MAC_IPV6_PFCP_SESSION; + goto out; + default: + break; + } + i++; + } + +out: + return ice_hw_ptype_ena(&vf->pf->hw, ptype); +} + +/** + * ice_vc_parse_rss_cfg - parses hash fields and headers from + * a specific virtchnl RSS cfg + * @hw: pointer to the hardware + * @rss_cfg: pointer to the virtchnl RSS cfg + * @hash_cfg: pointer to the HW hash configuration + * + * Return true if all the protocol header and hash fields in the RSS cfg could + * be parsed, else return false + * + * This function parses the virtchnl RSS cfg to be the intended + * hash fields and the intended header for RSS configuration + */ +static bool ice_vc_parse_rss_cfg(struct ice_hw *hw, + struct virtchnl_rss_cfg *rss_cfg, + struct ice_rss_hash_cfg *hash_cfg) +{ + const struct ice_vc_hash_field_match_type *hf_list; + const struct ice_vc_hdr_match_type *hdr_list; + int i, hf_list_len, hdr_list_len; + u32 *addl_hdrs = &hash_cfg->addl_hdrs; + u64 *hash_flds = &hash_cfg->hash_flds; + + /* set outer layer RSS as default */ + hash_cfg->hdr_type = ICE_RSS_OUTER_HEADERS; + + if (rss_cfg->rss_algorithm == VIRTCHNL_RSS_ALG_TOEPLITZ_SYMMETRIC) + hash_cfg->symm = true; + else + hash_cfg->symm = false; + + hf_list = ice_vc_hash_field_list; + hf_list_len = ARRAY_SIZE(ice_vc_hash_field_list); + hdr_list = ice_vc_hdr_list; + hdr_list_len = ARRAY_SIZE(ice_vc_hdr_list); + + for (i = 0; i < rss_cfg->proto_hdrs.count; i++) { + struct virtchnl_proto_hdr *proto_hdr = + &rss_cfg->proto_hdrs.proto_hdr[i]; + bool hdr_found = false; + int j; + + /* Find matched ice headers according to virtchnl headers. */ + for (j = 0; j < hdr_list_len; j++) { + struct ice_vc_hdr_match_type hdr_map = hdr_list[j]; + + if (proto_hdr->type == hdr_map.vc_hdr) { + *addl_hdrs |= hdr_map.ice_hdr; + hdr_found = true; + } + } + + if (!hdr_found) + return false; + + /* Find matched ice hash fields according to + * virtchnl hash fields. + */ + for (j = 0; j < hf_list_len; j++) { + struct ice_vc_hash_field_match_type hf_map = hf_list[j]; + + if (proto_hdr->type == hf_map.vc_hdr && + proto_hdr->field_selector == hf_map.vc_hash_field) { + *hash_flds |= hf_map.ice_hash_field; + break; + } + } + } + + return true; +} + +/** + * ice_vf_adv_rss_offload_ena - determine if capabilities support advanced + * RSS offloads + * @caps: VF driver negotiated capabilities + * + * Return true if VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF capability is set, + * else return false + */ +static bool ice_vf_adv_rss_offload_ena(u32 caps) +{ + return !!(caps & VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF); +} + +/** + * ice_vc_handle_rss_cfg + * @vf: pointer to the VF info + * @msg: pointer to the message buffer + * @add: add a RSS config if true, otherwise delete a RSS config + * + * This function adds/deletes a RSS config + */ +int ice_vc_handle_rss_cfg(struct ice_vf *vf, u8 *msg, bool add) +{ + u32 v_opcode = add ? VIRTCHNL_OP_ADD_RSS_CFG : VIRTCHNL_OP_DEL_RSS_CFG; + struct virtchnl_rss_cfg *rss_cfg = (struct virtchnl_rss_cfg *)msg; + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct device *dev = ice_pf_to_dev(vf->pf); + struct ice_hw *hw = &vf->pf->hw; + struct ice_vsi *vsi; + + if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) { + dev_dbg(dev, "VF %d attempting to configure RSS, but RSS is not supported by the PF\n", + vf->vf_id); + v_ret = VIRTCHNL_STATUS_ERR_NOT_SUPPORTED; + goto error_param; + } + + if (!ice_vf_adv_rss_offload_ena(vf->driver_caps)) { + dev_dbg(dev, "VF %d attempting to configure RSS, but Advanced RSS offload is not supported\n", + vf->vf_id); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (rss_cfg->proto_hdrs.count > VIRTCHNL_MAX_NUM_PROTO_HDRS || + rss_cfg->rss_algorithm < VIRTCHNL_RSS_ALG_TOEPLITZ_ASYMMETRIC || + rss_cfg->rss_algorithm > VIRTCHNL_RSS_ALG_XOR_SYMMETRIC) { + dev_dbg(dev, "VF %d attempting to configure RSS, but RSS configuration is not valid\n", + vf->vf_id); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (!ice_vc_validate_pattern(vf, &rss_cfg->proto_hdrs)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (rss_cfg->rss_algorithm == VIRTCHNL_RSS_ALG_R_ASYMMETRIC) { + struct ice_vsi_ctx *ctx; + u8 lut_type, hash_type; + int status; + + lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI; + hash_type = add ? ICE_AQ_VSI_Q_OPT_RSS_HASH_XOR : + ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ; + + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) { + v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY; + goto error_param; + } + + ctx->info.q_opt_rss = + FIELD_PREP(ICE_AQ_VSI_Q_OPT_RSS_LUT_M, lut_type) | + FIELD_PREP(ICE_AQ_VSI_Q_OPT_RSS_HASH_M, hash_type); + + /* Preserve existing queueing option setting */ + ctx->info.q_opt_rss |= (vsi->info.q_opt_rss & + ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_M); + ctx->info.q_opt_tc = vsi->info.q_opt_tc; + ctx->info.q_opt_flags = vsi->info.q_opt_rss; + + ctx->info.valid_sections = + cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID); + + status = ice_update_vsi(hw, vsi->idx, ctx, NULL); + if (status) { + dev_err(dev, "update VSI for RSS failed, err %d aq_err %s\n", + status, libie_aq_str(hw->adminq.sq_last_status)); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + } else { + vsi->info.q_opt_rss = ctx->info.q_opt_rss; + } + + kfree(ctx); + } else { + struct ice_rss_hash_cfg cfg; + + /* Only check for none raw pattern case */ + if (!ice_vc_validate_pattern(vf, &rss_cfg->proto_hdrs)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + cfg.addl_hdrs = ICE_FLOW_SEG_HDR_NONE; + cfg.hash_flds = ICE_HASH_INVALID; + cfg.hdr_type = ICE_RSS_ANY_HEADERS; + + if (!ice_vc_parse_rss_cfg(hw, rss_cfg, &cfg)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (add) { + if (ice_add_rss_cfg(hw, vsi, &cfg)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + dev_err(dev, "ice_add_rss_cfg failed for vsi = %d, v_ret = %d\n", + vsi->vsi_num, v_ret); + } + } else { + int status; + + status = ice_rem_rss_cfg(hw, vsi->idx, &cfg); + /* We just ignore -ENOENT, because if two configurations + * share the same profile remove one of them actually + * removes both, since the profile is deleted. + */ + if (status && status != -ENOENT) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + dev_err(dev, "ice_rem_rss_cfg failed for VF ID:%d, error:%d\n", + vf->vf_id, status); + } + } + } + +error_param: + return ice_vc_send_msg_to_vf(vf, v_opcode, v_ret, NULL, 0); +} + +/** + * ice_vc_config_rss_key + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * + * Configure the VF's RSS key + */ +int ice_vc_config_rss_key(struct ice_vf *vf, u8 *msg) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct virtchnl_rss_key *vrk = + (struct virtchnl_rss_key *)msg; + struct ice_vsi *vsi; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (!ice_vc_isvalid_vsi_id(vf, vrk->vsi_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (vrk->key_len != ICE_VSIQF_HKEY_ARRAY_SIZE) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (ice_set_rss_key(vsi, vrk->key)) + v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR; +error_param: + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_KEY, v_ret, + NULL, 0); +} + +/** + * ice_vc_config_rss_lut + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * + * Configure the VF's RSS LUT + */ +int ice_vc_config_rss_lut(struct ice_vf *vf, u8 *msg) +{ + struct virtchnl_rss_lut *vrl = (struct virtchnl_rss_lut *)msg; + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct ice_vsi *vsi; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (!ice_vc_isvalid_vsi_id(vf, vrl->vsi_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (vrl->lut_entries != ICE_LUT_VSI_SIZE) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (ice_set_rss_lut(vsi, vrl->lut, ICE_LUT_VSI_SIZE)) + v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR; +error_param: + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_LUT, v_ret, + NULL, 0); +} + +/** + * ice_vc_config_rss_hfunc + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + * + * Configure the VF's RSS Hash function + */ +int ice_vc_config_rss_hfunc(struct ice_vf *vf, u8 *msg) +{ + struct virtchnl_rss_hfunc *vrh = (struct virtchnl_rss_hfunc *)msg; + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + u8 hfunc = ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ; + struct ice_vsi *vsi; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (!ice_vc_isvalid_vsi_id(vf, vrh->vsi_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto error_param; + } + + if (vrh->rss_algorithm == VIRTCHNL_RSS_ALG_TOEPLITZ_SYMMETRIC) + hfunc = ICE_AQ_VSI_Q_OPT_RSS_HASH_SYM_TPLZ; + + if (ice_set_rss_hfunc(vsi, hfunc)) + v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR; +error_param: + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_HFUNC, v_ret, + NULL, 0); +} + +/** + * ice_vc_get_rss_hashcfg - return the RSS Hash configuration + * @vf: pointer to the VF info + */ +int ice_vc_get_rss_hashcfg(struct ice_vf *vf) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct virtchnl_rss_hashcfg *vrh = NULL; + int len = 0, ret; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) { + dev_err(ice_pf_to_dev(vf->pf), "RSS not supported by PF\n"); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + len = sizeof(struct virtchnl_rss_hashcfg); + vrh = kzalloc(len, GFP_KERNEL); + if (!vrh) { + v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY; + len = 0; + goto err; + } + + vrh->hashcfg = ICE_DEFAULT_RSS_HASHCFG; +err: + /* send the response back to the VF */ + ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_RSS_HASHCFG_CAPS, v_ret, + (u8 *)vrh, len); + kfree(vrh); + return ret; +} + +/** + * ice_vc_set_rss_hashcfg - set RSS Hash configuration bits for the VF + * @vf: pointer to the VF info + * @msg: pointer to the msg buffer + */ +int ice_vc_set_rss_hashcfg(struct ice_vf *vf, u8 *msg) +{ + struct virtchnl_rss_hashcfg *vrh = (struct virtchnl_rss_hashcfg *)msg; + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct ice_pf *pf = vf->pf; + struct ice_vsi *vsi; + struct device *dev; + int status; + + dev = ice_pf_to_dev(pf); + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { + dev_err(dev, "RSS not supported by PF\n"); + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto err; + } + + /* clear all previously programmed RSS configuration to allow VF drivers + * the ability to customize the RSS configuration and/or completely + * disable RSS + */ + status = ice_rem_vsi_rss_cfg(&pf->hw, vsi->idx); + if (status && !vrh->hashcfg) { + /* only report failure to clear the current RSS configuration if + * that was clearly the VF's intention (i.e. vrh->hashcfg = 0) + */ + v_ret = ice_err_to_virt_err(status); + goto err; + } else if (status) { + /* allow the VF to update the RSS configuration even on failure + * to clear the current RSS confguration in an attempt to keep + * RSS in a working state + */ + dev_warn(dev, "Failed to clear the RSS configuration for VF %u\n", + vf->vf_id); + } + + if (vrh->hashcfg) { + status = ice_add_avf_rss_cfg(&pf->hw, vsi, vrh->hashcfg); + v_ret = ice_err_to_virt_err(status); + } + + /* save the requested VF configuration */ + if (!v_ret) + vf->rss_hashcfg = vrh->hashcfg; + + /* send the response to the VF */ +err: + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_SET_RSS_HASHCFG, v_ret, + NULL, 0); +} + diff --git a/drivers/net/ethernet/intel/ice/virt/rss.h b/drivers/net/ethernet/intel/ice/virt/rss.h new file mode 100644 index 000000000000..784d4c43ce8b --- /dev/null +++ b/drivers/net/ethernet/intel/ice/virt/rss.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2022, Intel Corporation. */ + +#ifndef _ICE_VIRT_RSS_H_ +#define _ICE_VIRT_RSS_H_ + +#include <linux/types.h> + +struct ice_vf; + +int ice_vc_handle_rss_cfg(struct ice_vf *vf, u8 *msg, bool add); +int ice_vc_config_rss_key(struct ice_vf *vf, u8 *msg); +int ice_vc_config_rss_lut(struct ice_vf *vf, u8 *msg); +int ice_vc_config_rss_hfunc(struct ice_vf *vf, u8 *msg); +int ice_vc_get_rss_hashcfg(struct ice_vf *vf); +int ice_vc_set_rss_hashcfg(struct ice_vf *vf, u8 *msg); + +#endif /* _ICE_VIRT_RSS_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.c b/drivers/net/ethernet/intel/ice/virt/virtchnl.c index 257967273079..f3f921134379 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.c +++ b/drivers/net/ethernet/intel/ice/virt/virtchnl.c @@ -1,170 +1,20 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (C) 2022, Intel Corporation. */ -#include "ice_virtchnl.h" +#include "virtchnl.h" +#include "queues.h" +#include "rss.h" #include "ice_vf_lib_private.h" #include "ice.h" #include "ice_base.h" #include "ice_lib.h" #include "ice_fltr.h" -#include "ice_virtchnl_allowlist.h" +#include "allowlist.h" #include "ice_vf_vsi_vlan_ops.h" #include "ice_vlan.h" #include "ice_flex_pipe.h" #include "ice_dcb_lib.h" -#define FIELD_SELECTOR(proto_hdr_field) \ - BIT((proto_hdr_field) & PROTO_HDR_FIELD_MASK) - -struct ice_vc_hdr_match_type { - u32 vc_hdr; /* virtchnl headers (VIRTCHNL_PROTO_HDR_XXX) */ - u32 ice_hdr; /* ice headers (ICE_FLOW_SEG_HDR_XXX) */ -}; - -static const struct ice_vc_hdr_match_type ice_vc_hdr_list[] = { - {VIRTCHNL_PROTO_HDR_NONE, ICE_FLOW_SEG_HDR_NONE}, - {VIRTCHNL_PROTO_HDR_ETH, ICE_FLOW_SEG_HDR_ETH}, - {VIRTCHNL_PROTO_HDR_S_VLAN, ICE_FLOW_SEG_HDR_VLAN}, - {VIRTCHNL_PROTO_HDR_C_VLAN, ICE_FLOW_SEG_HDR_VLAN}, - {VIRTCHNL_PROTO_HDR_IPV4, ICE_FLOW_SEG_HDR_IPV4 | - ICE_FLOW_SEG_HDR_IPV_OTHER}, - {VIRTCHNL_PROTO_HDR_IPV6, ICE_FLOW_SEG_HDR_IPV6 | - ICE_FLOW_SEG_HDR_IPV_OTHER}, - {VIRTCHNL_PROTO_HDR_TCP, ICE_FLOW_SEG_HDR_TCP}, - {VIRTCHNL_PROTO_HDR_UDP, ICE_FLOW_SEG_HDR_UDP}, - {VIRTCHNL_PROTO_HDR_SCTP, ICE_FLOW_SEG_HDR_SCTP}, - {VIRTCHNL_PROTO_HDR_PPPOE, ICE_FLOW_SEG_HDR_PPPOE}, - {VIRTCHNL_PROTO_HDR_GTPU_IP, ICE_FLOW_SEG_HDR_GTPU_IP}, - {VIRTCHNL_PROTO_HDR_GTPU_EH, ICE_FLOW_SEG_HDR_GTPU_EH}, - {VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_DWN, - ICE_FLOW_SEG_HDR_GTPU_DWN}, - {VIRTCHNL_PROTO_HDR_GTPU_EH_PDU_UP, - ICE_FLOW_SEG_HDR_GTPU_UP}, - {VIRTCHNL_PROTO_HDR_L2TPV3, ICE_FLOW_SEG_HDR_L2TPV3}, - {VIRTCHNL_PROTO_HDR_ESP, ICE_FLOW_SEG_HDR_ESP}, - {VIRTCHNL_PROTO_HDR_AH, ICE_FLOW_SEG_HDR_AH}, - {VIRTCHNL_PROTO_HDR_PFCP, ICE_FLOW_SEG_HDR_PFCP_SESSION}, -}; - -struct ice_vc_hash_field_match_type { - u32 vc_hdr; /* virtchnl headers - * (VIRTCHNL_PROTO_HDR_XXX) - */ - u32 vc_hash_field; /* virtchnl hash fields selector - * FIELD_SELECTOR((VIRTCHNL_PROTO_HDR_ETH_XXX)) - */ - u64 ice_hash_field; /* ice hash fields - * (BIT_ULL(ICE_FLOW_FIELD_IDX_XXX)) - */ -}; - -static const struct -ice_vc_hash_field_match_type ice_vc_hash_field_list[] = { - {VIRTCHNL_PROTO_HDR_ETH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_SRC), - BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_SA)}, - {VIRTCHNL_PROTO_HDR_ETH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_DST), - BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_DA)}, - {VIRTCHNL_PROTO_HDR_ETH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_SRC) | - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_DST), - ICE_FLOW_HASH_ETH}, - {VIRTCHNL_PROTO_HDR_ETH, - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ETH_ETHERTYPE), - BIT_ULL(ICE_FLOW_FIELD_IDX_ETH_TYPE)}, - {VIRTCHNL_PROTO_HDR_S_VLAN, - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_S_VLAN_ID), - BIT_ULL(ICE_FLOW_FIELD_IDX_S_VLAN)}, - {VIRTCHNL_PROTO_HDR_C_VLAN, - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_C_VLAN_ID), - BIT_ULL(ICE_FLOW_FIELD_IDX_C_VLAN)}, - {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC), - BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA)}, - {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST), - BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA)}, - {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST), - ICE_FLOW_HASH_IPV4}, - {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), - BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_SA) | - BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, - {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), - BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_DA) | - BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, - {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_SRC) | - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_DST) | - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), - ICE_FLOW_HASH_IPV4 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, - {VIRTCHNL_PROTO_HDR_IPV4, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV4_PROT), - BIT_ULL(ICE_FLOW_FIELD_IDX_IPV4_PROT)}, - {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC), - BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA)}, - {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST), - BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA)}, - {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) | - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST), - ICE_FLOW_HASH_IPV6}, - {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) | - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), - BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_SA) | - BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, - {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST) | - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), - BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_DA) | - BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, - {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_SRC) | - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_DST) | - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), - ICE_FLOW_HASH_IPV6 | BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, - {VIRTCHNL_PROTO_HDR_IPV6, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_IPV6_PROT), - BIT_ULL(ICE_FLOW_FIELD_IDX_IPV6_PROT)}, - {VIRTCHNL_PROTO_HDR_TCP, - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT), - BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_SRC_PORT)}, - {VIRTCHNL_PROTO_HDR_TCP, - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_DST_PORT), - BIT_ULL(ICE_FLOW_FIELD_IDX_TCP_DST_PORT)}, - {VIRTCHNL_PROTO_HDR_TCP, - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_SRC_PORT) | - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_TCP_DST_PORT), - ICE_FLOW_HASH_TCP_PORT}, - {VIRTCHNL_PROTO_HDR_UDP, - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT), - BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_SRC_PORT)}, - {VIRTCHNL_PROTO_HDR_UDP, - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_DST_PORT), - BIT_ULL(ICE_FLOW_FIELD_IDX_UDP_DST_PORT)}, - {VIRTCHNL_PROTO_HDR_UDP, - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_SRC_PORT) | - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_UDP_DST_PORT), - ICE_FLOW_HASH_UDP_PORT}, - {VIRTCHNL_PROTO_HDR_SCTP, - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT), - BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT)}, - {VIRTCHNL_PROTO_HDR_SCTP, - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_DST_PORT), - BIT_ULL(ICE_FLOW_FIELD_IDX_SCTP_DST_PORT)}, - {VIRTCHNL_PROTO_HDR_SCTP, - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_SRC_PORT) | - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_SCTP_DST_PORT), - ICE_FLOW_HASH_SCTP_PORT}, - {VIRTCHNL_PROTO_HDR_PPPOE, - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_PPPOE_SESS_ID), - BIT_ULL(ICE_FLOW_FIELD_IDX_PPPOE_SESS_ID)}, - {VIRTCHNL_PROTO_HDR_GTPU_IP, - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_GTPU_IP_TEID), - BIT_ULL(ICE_FLOW_FIELD_IDX_GTPU_IP_TEID)}, - {VIRTCHNL_PROTO_HDR_L2TPV3, - FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_L2TPV3_SESS_ID), - BIT_ULL(ICE_FLOW_FIELD_IDX_L2TPV3_SESS_ID)}, - {VIRTCHNL_PROTO_HDR_ESP, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_ESP_SPI), - BIT_ULL(ICE_FLOW_FIELD_IDX_ESP_SPI)}, - {VIRTCHNL_PROTO_HDR_AH, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_AH_SPI), - BIT_ULL(ICE_FLOW_FIELD_IDX_AH_SPI)}, - {VIRTCHNL_PROTO_HDR_PFCP, FIELD_SELECTOR(VIRTCHNL_PROTO_HDR_PFCP_SEID), - BIT_ULL(ICE_FLOW_FIELD_IDX_PFCP_SEID)}, -}; - /** * ice_vc_vf_broadcast - Broadcast a message to all VFs on PF * @pf: pointer to the PF structure @@ -338,28 +188,6 @@ static int ice_vc_get_ver_msg(struct ice_vf *vf, u8 *msg) } /** - * ice_vc_get_max_frame_size - get max frame size allowed for VF - * @vf: VF used to determine max frame size - * - * Max frame size is determined based on the current port's max frame size and - * whether a port VLAN is configured on this VF. The VF is not aware whether - * it's in a port VLAN so the PF needs to account for this in max frame size - * checks and sending the max frame size to the VF. - */ -static u16 ice_vc_get_max_frame_size(struct ice_vf *vf) -{ - struct ice_port_info *pi = ice_vf_get_port_info(vf); - u16 max_frame_size; - - max_frame_size = pi->phy.link_info.max_frame_size; - - if (ice_vf_is_port_vlan_ena(vf)) - max_frame_size -= VLAN_HLEN; - - return max_frame_size; -} - -/** * ice_vc_get_vlan_caps * @hw: pointer to the hw * @vf: pointer to the VF info @@ -559,488 +387,6 @@ bool ice_vc_isvalid_vsi_id(struct ice_vf *vf, u16 vsi_id) } /** - * ice_vc_isvalid_q_id - * @vsi: VSI to check queue ID against - * @qid: VSI relative queue ID - * - * check for the valid queue ID - */ -static bool ice_vc_isvalid_q_id(struct ice_vsi *vsi, u16 qid) -{ - /* allocated Tx and Rx queues should be always equal for VF VSI */ - return qid < vsi->alloc_txq; -} - -/** - * ice_vc_isvalid_ring_len - * @ring_len: length of ring - * - * check for the valid ring count, should be multiple of ICE_REQ_DESC_MULTIPLE - * or zero - */ -static bool ice_vc_isvalid_ring_len(u16 ring_len) -{ - return ring_len == 0 || - (ring_len >= ICE_MIN_NUM_DESC && - ring_len <= ICE_MAX_NUM_DESC && - !(ring_len % ICE_REQ_DESC_MULTIPLE)); -} - -/** - * ice_vc_validate_pattern - * @vf: pointer to the VF info - * @proto: virtchnl protocol headers - * - * validate the pattern is supported or not. - * - * Return: true on success, false on error. - */ -bool -ice_vc_validate_pattern(struct ice_vf *vf, struct virtchnl_proto_hdrs *proto) -{ - bool is_ipv4 = false; - bool is_ipv6 = false; - bool is_udp = false; - u16 ptype = -1; - int i = 0; - - while (i < proto->count && - proto->proto_hdr[i].type != VIRTCHNL_PROTO_HDR_NONE) { - switch (proto->proto_hdr[i].type) { - case VIRTCHNL_PROTO_HDR_ETH: - ptype = ICE_PTYPE_MAC_PAY; - break; - case VIRTCHNL_PROTO_HDR_IPV4: - ptype = ICE_PTYPE_IPV4_PAY; - is_ipv4 = true; - break; - case VIRTCHNL_PROTO_HDR_IPV6: - ptype = ICE_PTYPE_IPV6_PAY; - is_ipv6 = true; - break; - case VIRTCHNL_PROTO_HDR_UDP: - if (is_ipv4) - ptype = ICE_PTYPE_IPV4_UDP_PAY; - else if (is_ipv6) - ptype = ICE_PTYPE_IPV6_UDP_PAY; - is_udp = true; - break; - case VIRTCHNL_PROTO_HDR_TCP: - if (is_ipv4) - ptype = ICE_PTYPE_IPV4_TCP_PAY; - else if (is_ipv6) - ptype = ICE_PTYPE_IPV6_TCP_PAY; - break; - case VIRTCHNL_PROTO_HDR_SCTP: - if (is_ipv4) - ptype = ICE_PTYPE_IPV4_SCTP_PAY; - else if (is_ipv6) - ptype = ICE_PTYPE_IPV6_SCTP_PAY; - break; - case VIRTCHNL_PROTO_HDR_GTPU_IP: - case VIRTCHNL_PROTO_HDR_GTPU_EH: - if (is_ipv4) - ptype = ICE_MAC_IPV4_GTPU; - else if (is_ipv6) - ptype = ICE_MAC_IPV6_GTPU; - goto out; - case VIRTCHNL_PROTO_HDR_L2TPV3: - if (is_ipv4) - ptype = ICE_MAC_IPV4_L2TPV3; - else if (is_ipv6) - ptype = ICE_MAC_IPV6_L2TPV3; - goto out; - case VIRTCHNL_PROTO_HDR_ESP: - if (is_ipv4) - ptype = is_udp ? ICE_MAC_IPV4_NAT_T_ESP : - ICE_MAC_IPV4_ESP; - else if (is_ipv6) - ptype = is_udp ? ICE_MAC_IPV6_NAT_T_ESP : - ICE_MAC_IPV6_ESP; - goto out; - case VIRTCHNL_PROTO_HDR_AH: - if (is_ipv4) - ptype = ICE_MAC_IPV4_AH; - else if (is_ipv6) - ptype = ICE_MAC_IPV6_AH; - goto out; - case VIRTCHNL_PROTO_HDR_PFCP: - if (is_ipv4) - ptype = ICE_MAC_IPV4_PFCP_SESSION; - else if (is_ipv6) - ptype = ICE_MAC_IPV6_PFCP_SESSION; - goto out; - default: - break; - } - i++; - } - -out: - return ice_hw_ptype_ena(&vf->pf->hw, ptype); -} - -/** - * ice_vc_parse_rss_cfg - parses hash fields and headers from - * a specific virtchnl RSS cfg - * @hw: pointer to the hardware - * @rss_cfg: pointer to the virtchnl RSS cfg - * @hash_cfg: pointer to the HW hash configuration - * - * Return true if all the protocol header and hash fields in the RSS cfg could - * be parsed, else return false - * - * This function parses the virtchnl RSS cfg to be the intended - * hash fields and the intended header for RSS configuration - */ -static bool ice_vc_parse_rss_cfg(struct ice_hw *hw, - struct virtchnl_rss_cfg *rss_cfg, - struct ice_rss_hash_cfg *hash_cfg) -{ - const struct ice_vc_hash_field_match_type *hf_list; - const struct ice_vc_hdr_match_type *hdr_list; - int i, hf_list_len, hdr_list_len; - u32 *addl_hdrs = &hash_cfg->addl_hdrs; - u64 *hash_flds = &hash_cfg->hash_flds; - - /* set outer layer RSS as default */ - hash_cfg->hdr_type = ICE_RSS_OUTER_HEADERS; - - if (rss_cfg->rss_algorithm == VIRTCHNL_RSS_ALG_TOEPLITZ_SYMMETRIC) - hash_cfg->symm = true; - else - hash_cfg->symm = false; - - hf_list = ice_vc_hash_field_list; - hf_list_len = ARRAY_SIZE(ice_vc_hash_field_list); - hdr_list = ice_vc_hdr_list; - hdr_list_len = ARRAY_SIZE(ice_vc_hdr_list); - - for (i = 0; i < rss_cfg->proto_hdrs.count; i++) { - struct virtchnl_proto_hdr *proto_hdr = - &rss_cfg->proto_hdrs.proto_hdr[i]; - bool hdr_found = false; - int j; - - /* Find matched ice headers according to virtchnl headers. */ - for (j = 0; j < hdr_list_len; j++) { - struct ice_vc_hdr_match_type hdr_map = hdr_list[j]; - - if (proto_hdr->type == hdr_map.vc_hdr) { - *addl_hdrs |= hdr_map.ice_hdr; - hdr_found = true; - } - } - - if (!hdr_found) - return false; - - /* Find matched ice hash fields according to - * virtchnl hash fields. - */ - for (j = 0; j < hf_list_len; j++) { - struct ice_vc_hash_field_match_type hf_map = hf_list[j]; - - if (proto_hdr->type == hf_map.vc_hdr && - proto_hdr->field_selector == hf_map.vc_hash_field) { - *hash_flds |= hf_map.ice_hash_field; - break; - } - } - } - - return true; -} - -/** - * ice_vf_adv_rss_offload_ena - determine if capabilities support advanced - * RSS offloads - * @caps: VF driver negotiated capabilities - * - * Return true if VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF capability is set, - * else return false - */ -static bool ice_vf_adv_rss_offload_ena(u32 caps) -{ - return !!(caps & VIRTCHNL_VF_OFFLOAD_ADV_RSS_PF); -} - -/** - * ice_vc_handle_rss_cfg - * @vf: pointer to the VF info - * @msg: pointer to the message buffer - * @add: add a RSS config if true, otherwise delete a RSS config - * - * This function adds/deletes a RSS config - */ -static int ice_vc_handle_rss_cfg(struct ice_vf *vf, u8 *msg, bool add) -{ - u32 v_opcode = add ? VIRTCHNL_OP_ADD_RSS_CFG : VIRTCHNL_OP_DEL_RSS_CFG; - struct virtchnl_rss_cfg *rss_cfg = (struct virtchnl_rss_cfg *)msg; - enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; - struct device *dev = ice_pf_to_dev(vf->pf); - struct ice_hw *hw = &vf->pf->hw; - struct ice_vsi *vsi; - - if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) { - dev_dbg(dev, "VF %d attempting to configure RSS, but RSS is not supported by the PF\n", - vf->vf_id); - v_ret = VIRTCHNL_STATUS_ERR_NOT_SUPPORTED; - goto error_param; - } - - if (!ice_vf_adv_rss_offload_ena(vf->driver_caps)) { - dev_dbg(dev, "VF %d attempting to configure RSS, but Advanced RSS offload is not supported\n", - vf->vf_id); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (rss_cfg->proto_hdrs.count > VIRTCHNL_MAX_NUM_PROTO_HDRS || - rss_cfg->rss_algorithm < VIRTCHNL_RSS_ALG_TOEPLITZ_ASYMMETRIC || - rss_cfg->rss_algorithm > VIRTCHNL_RSS_ALG_XOR_SYMMETRIC) { - dev_dbg(dev, "VF %d attempting to configure RSS, but RSS configuration is not valid\n", - vf->vf_id); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - vsi = ice_get_vf_vsi(vf); - if (!vsi) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (!ice_vc_validate_pattern(vf, &rss_cfg->proto_hdrs)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (rss_cfg->rss_algorithm == VIRTCHNL_RSS_ALG_R_ASYMMETRIC) { - struct ice_vsi_ctx *ctx; - u8 lut_type, hash_type; - int status; - - lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI; - hash_type = add ? ICE_AQ_VSI_Q_OPT_RSS_HASH_XOR : - ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ; - - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) { - v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY; - goto error_param; - } - - ctx->info.q_opt_rss = - FIELD_PREP(ICE_AQ_VSI_Q_OPT_RSS_LUT_M, lut_type) | - FIELD_PREP(ICE_AQ_VSI_Q_OPT_RSS_HASH_M, hash_type); - - /* Preserve existing queueing option setting */ - ctx->info.q_opt_rss |= (vsi->info.q_opt_rss & - ICE_AQ_VSI_Q_OPT_RSS_GBL_LUT_M); - ctx->info.q_opt_tc = vsi->info.q_opt_tc; - ctx->info.q_opt_flags = vsi->info.q_opt_rss; - - ctx->info.valid_sections = - cpu_to_le16(ICE_AQ_VSI_PROP_Q_OPT_VALID); - - status = ice_update_vsi(hw, vsi->idx, ctx, NULL); - if (status) { - dev_err(dev, "update VSI for RSS failed, err %d aq_err %s\n", - status, libie_aq_str(hw->adminq.sq_last_status)); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - } else { - vsi->info.q_opt_rss = ctx->info.q_opt_rss; - } - - kfree(ctx); - } else { - struct ice_rss_hash_cfg cfg; - - /* Only check for none raw pattern case */ - if (!ice_vc_validate_pattern(vf, &rss_cfg->proto_hdrs)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - cfg.addl_hdrs = ICE_FLOW_SEG_HDR_NONE; - cfg.hash_flds = ICE_HASH_INVALID; - cfg.hdr_type = ICE_RSS_ANY_HEADERS; - - if (!ice_vc_parse_rss_cfg(hw, rss_cfg, &cfg)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (add) { - if (ice_add_rss_cfg(hw, vsi, &cfg)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - dev_err(dev, "ice_add_rss_cfg failed for vsi = %d, v_ret = %d\n", - vsi->vsi_num, v_ret); - } - } else { - int status; - - status = ice_rem_rss_cfg(hw, vsi->idx, &cfg); - /* We just ignore -ENOENT, because if two configurations - * share the same profile remove one of them actually - * removes both, since the profile is deleted. - */ - if (status && status != -ENOENT) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - dev_err(dev, "ice_rem_rss_cfg failed for VF ID:%d, error:%d\n", - vf->vf_id, status); - } - } - } - -error_param: - return ice_vc_send_msg_to_vf(vf, v_opcode, v_ret, NULL, 0); -} - -/** - * ice_vc_config_rss_key - * @vf: pointer to the VF info - * @msg: pointer to the msg buffer - * - * Configure the VF's RSS key - */ -static int ice_vc_config_rss_key(struct ice_vf *vf, u8 *msg) -{ - enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; - struct virtchnl_rss_key *vrk = - (struct virtchnl_rss_key *)msg; - struct ice_vsi *vsi; - - if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (!ice_vc_isvalid_vsi_id(vf, vrk->vsi_id)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (vrk->key_len != ICE_VSIQF_HKEY_ARRAY_SIZE) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - vsi = ice_get_vf_vsi(vf); - if (!vsi) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (ice_set_rss_key(vsi, vrk->key)) - v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR; -error_param: - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_KEY, v_ret, - NULL, 0); -} - -/** - * ice_vc_config_rss_lut - * @vf: pointer to the VF info - * @msg: pointer to the msg buffer - * - * Configure the VF's RSS LUT - */ -static int ice_vc_config_rss_lut(struct ice_vf *vf, u8 *msg) -{ - struct virtchnl_rss_lut *vrl = (struct virtchnl_rss_lut *)msg; - enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; - struct ice_vsi *vsi; - - if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (!ice_vc_isvalid_vsi_id(vf, vrl->vsi_id)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (vrl->lut_entries != ICE_LUT_VSI_SIZE) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - vsi = ice_get_vf_vsi(vf); - if (!vsi) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (ice_set_rss_lut(vsi, vrl->lut, ICE_LUT_VSI_SIZE)) - v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR; -error_param: - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_LUT, v_ret, - NULL, 0); -} - -/** - * ice_vc_config_rss_hfunc - * @vf: pointer to the VF info - * @msg: pointer to the msg buffer - * - * Configure the VF's RSS Hash function - */ -static int ice_vc_config_rss_hfunc(struct ice_vf *vf, u8 *msg) -{ - struct virtchnl_rss_hfunc *vrh = (struct virtchnl_rss_hfunc *)msg; - enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; - u8 hfunc = ICE_AQ_VSI_Q_OPT_RSS_HASH_TPLZ; - struct ice_vsi *vsi; - - if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (!ice_vc_isvalid_vsi_id(vf, vrh->vsi_id)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - vsi = ice_get_vf_vsi(vf); - if (!vsi) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (vrh->rss_algorithm == VIRTCHNL_RSS_ALG_TOEPLITZ_SYMMETRIC) - hfunc = ICE_AQ_VSI_Q_OPT_RSS_HASH_SYM_TPLZ; - - if (ice_set_rss_hfunc(vsi, hfunc)) - v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR; -error_param: - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_RSS_HFUNC, v_ret, - NULL, 0); -} - -/** * ice_vc_get_qos_caps - Get current QoS caps from PF * @vf: pointer to the VF info * @@ -1122,110 +468,6 @@ err: } /** - * ice_vf_cfg_qs_bw - Configure per queue bandwidth - * @vf: pointer to the VF info - * @num_queues: number of queues to be configured - * - * Configure per queue bandwidth. - * - * Return: 0 on success or negative error value. - */ -static int ice_vf_cfg_qs_bw(struct ice_vf *vf, u16 num_queues) -{ - struct ice_hw *hw = &vf->pf->hw; - struct ice_vsi *vsi; - int ret; - u16 i; - - vsi = ice_get_vf_vsi(vf); - if (!vsi) - return -EINVAL; - - for (i = 0; i < num_queues; i++) { - u32 p_rate, min_rate; - u8 tc; - - p_rate = vf->qs_bw[i].peak; - min_rate = vf->qs_bw[i].committed; - tc = vf->qs_bw[i].tc; - if (p_rate) - ret = ice_cfg_q_bw_lmt(hw->port_info, vsi->idx, tc, - vf->qs_bw[i].queue_id, - ICE_MAX_BW, p_rate); - else - ret = ice_cfg_q_bw_dflt_lmt(hw->port_info, vsi->idx, tc, - vf->qs_bw[i].queue_id, - ICE_MAX_BW); - if (ret) - return ret; - - if (min_rate) - ret = ice_cfg_q_bw_lmt(hw->port_info, vsi->idx, tc, - vf->qs_bw[i].queue_id, - ICE_MIN_BW, min_rate); - else - ret = ice_cfg_q_bw_dflt_lmt(hw->port_info, vsi->idx, tc, - vf->qs_bw[i].queue_id, - ICE_MIN_BW); - - if (ret) - return ret; - } - - return 0; -} - -/** - * ice_vf_cfg_q_quanta_profile - Configure quanta profile - * @vf: pointer to the VF info - * @quanta_prof_idx: pointer to the quanta profile index - * @quanta_size: quanta size to be set - * - * This function chooses available quanta profile and configures the register. - * The quanta profile is evenly divided by the number of device ports, and then - * available to the specific PF and VFs. The first profile for each PF is a - * reserved default profile. Only quanta size of the rest unused profile can be - * modified. - * - * Return: 0 on success or negative error value. - */ -static int ice_vf_cfg_q_quanta_profile(struct ice_vf *vf, u16 quanta_size, - u16 *quanta_prof_idx) -{ - const u16 n_desc = calc_quanta_desc(quanta_size); - struct ice_hw *hw = &vf->pf->hw; - const u16 n_cmd = 2 * n_desc; - struct ice_pf *pf = vf->pf; - u16 per_pf, begin_id; - u8 n_used; - u32 reg; - - begin_id = (GLCOMM_QUANTA_PROF_MAX_INDEX + 1) / hw->dev_caps.num_funcs * - hw->logical_pf_id; - - if (quanta_size == ICE_DFLT_QUANTA) { - *quanta_prof_idx = begin_id; - } else { - per_pf = (GLCOMM_QUANTA_PROF_MAX_INDEX + 1) / - hw->dev_caps.num_funcs; - n_used = pf->num_quanta_prof_used; - if (n_used < per_pf) { - *quanta_prof_idx = begin_id + 1 + n_used; - pf->num_quanta_prof_used++; - } else { - return -EINVAL; - } - } - - reg = FIELD_PREP(GLCOMM_QUANTA_PROF_QUANTA_SIZE_M, quanta_size) | - FIELD_PREP(GLCOMM_QUANTA_PROF_MAX_CMD_M, n_cmd) | - FIELD_PREP(GLCOMM_QUANTA_PROF_MAX_DESC_M, n_desc); - wr32(hw, GLCOMM_QUANTA_PROF(*quanta_prof_idx), reg); - - return 0; -} - -/** * ice_vc_cfg_promiscuous_mode_msg * @vf: pointer to the VF info * @msg: pointer to the msg buffer @@ -1407,757 +649,6 @@ error_param: } /** - * ice_vc_validate_vqs_bitmaps - validate Rx/Tx queue bitmaps from VIRTCHNL - * @vqs: virtchnl_queue_select structure containing bitmaps to validate - * - * Return true on successful validation, else false - */ -static bool ice_vc_validate_vqs_bitmaps(struct virtchnl_queue_select *vqs) -{ - if ((!vqs->rx_queues && !vqs->tx_queues) || - vqs->rx_queues >= BIT(ICE_MAX_RSS_QS_PER_VF) || - vqs->tx_queues >= BIT(ICE_MAX_RSS_QS_PER_VF)) - return false; - - return true; -} - -/** - * ice_vf_ena_txq_interrupt - enable Tx queue interrupt via QINT_TQCTL - * @vsi: VSI of the VF to configure - * @q_idx: VF queue index used to determine the queue in the PF's space - */ -void ice_vf_ena_txq_interrupt(struct ice_vsi *vsi, u32 q_idx) -{ - struct ice_hw *hw = &vsi->back->hw; - u32 pfq = vsi->txq_map[q_idx]; - u32 reg; - - reg = rd32(hw, QINT_TQCTL(pfq)); - - /* MSI-X index 0 in the VF's space is always for the OICR, which means - * this is most likely a poll mode VF driver, so don't enable an - * interrupt that was never configured via VIRTCHNL_OP_CONFIG_IRQ_MAP - */ - if (!(reg & QINT_TQCTL_MSIX_INDX_M)) - return; - - wr32(hw, QINT_TQCTL(pfq), reg | QINT_TQCTL_CAUSE_ENA_M); -} - -/** - * ice_vf_ena_rxq_interrupt - enable Tx queue interrupt via QINT_RQCTL - * @vsi: VSI of the VF to configure - * @q_idx: VF queue index used to determine the queue in the PF's space - */ -void ice_vf_ena_rxq_interrupt(struct ice_vsi *vsi, u32 q_idx) -{ - struct ice_hw *hw = &vsi->back->hw; - u32 pfq = vsi->rxq_map[q_idx]; - u32 reg; - - reg = rd32(hw, QINT_RQCTL(pfq)); - - /* MSI-X index 0 in the VF's space is always for the OICR, which means - * this is most likely a poll mode VF driver, so don't enable an - * interrupt that was never configured via VIRTCHNL_OP_CONFIG_IRQ_MAP - */ - if (!(reg & QINT_RQCTL_MSIX_INDX_M)) - return; - - wr32(hw, QINT_RQCTL(pfq), reg | QINT_RQCTL_CAUSE_ENA_M); -} - -/** - * ice_vc_ena_qs_msg - * @vf: pointer to the VF info - * @msg: pointer to the msg buffer - * - * called from the VF to enable all or specific queue(s) - */ -static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg) -{ - enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; - struct virtchnl_queue_select *vqs = - (struct virtchnl_queue_select *)msg; - struct ice_vsi *vsi; - unsigned long q_map; - u16 vf_q_id; - - if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (!ice_vc_isvalid_vsi_id(vf, vqs->vsi_id)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (!ice_vc_validate_vqs_bitmaps(vqs)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - vsi = ice_get_vf_vsi(vf); - if (!vsi) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - /* Enable only Rx rings, Tx rings were enabled by the FW when the - * Tx queue group list was configured and the context bits were - * programmed using ice_vsi_cfg_txqs - */ - q_map = vqs->rx_queues; - for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { - if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - /* Skip queue if enabled */ - if (test_bit(vf_q_id, vf->rxq_ena)) - continue; - - if (ice_vsi_ctrl_one_rx_ring(vsi, true, vf_q_id, true)) { - dev_err(ice_pf_to_dev(vsi->back), "Failed to enable Rx ring %d on VSI %d\n", - vf_q_id, vsi->vsi_num); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - ice_vf_ena_rxq_interrupt(vsi, vf_q_id); - set_bit(vf_q_id, vf->rxq_ena); - } - - q_map = vqs->tx_queues; - for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { - if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - /* Skip queue if enabled */ - if (test_bit(vf_q_id, vf->txq_ena)) - continue; - - ice_vf_ena_txq_interrupt(vsi, vf_q_id); - set_bit(vf_q_id, vf->txq_ena); - } - - /* Set flag to indicate that queues are enabled */ - if (v_ret == VIRTCHNL_STATUS_SUCCESS) - set_bit(ICE_VF_STATE_QS_ENA, vf->vf_states); - -error_param: - /* send the response to the VF */ - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_QUEUES, v_ret, - NULL, 0); -} - -/** - * ice_vf_vsi_dis_single_txq - disable a single Tx queue - * @vf: VF to disable queue for - * @vsi: VSI for the VF - * @q_id: VF relative (0-based) queue ID - * - * Attempt to disable the Tx queue passed in. If the Tx queue was successfully - * disabled then clear q_id bit in the enabled queues bitmap and return - * success. Otherwise return error. - */ -int ice_vf_vsi_dis_single_txq(struct ice_vf *vf, struct ice_vsi *vsi, u16 q_id) -{ - struct ice_txq_meta txq_meta = { 0 }; - struct ice_tx_ring *ring; - int err; - - if (!test_bit(q_id, vf->txq_ena)) - dev_dbg(ice_pf_to_dev(vsi->back), "Queue %u on VSI %u is not enabled, but stopping it anyway\n", - q_id, vsi->vsi_num); - - ring = vsi->tx_rings[q_id]; - if (!ring) - return -EINVAL; - - ice_fill_txq_meta(vsi, ring, &txq_meta); - - err = ice_vsi_stop_tx_ring(vsi, ICE_NO_RESET, vf->vf_id, ring, &txq_meta); - if (err) { - dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Tx ring %d on VSI %d\n", - q_id, vsi->vsi_num); - return err; - } - - /* Clear enabled queues flag */ - clear_bit(q_id, vf->txq_ena); - - return 0; -} - -/** - * ice_vc_dis_qs_msg - * @vf: pointer to the VF info - * @msg: pointer to the msg buffer - * - * called from the VF to disable all or specific queue(s) - */ -static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) -{ - enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; - struct virtchnl_queue_select *vqs = - (struct virtchnl_queue_select *)msg; - struct ice_vsi *vsi; - unsigned long q_map; - u16 vf_q_id; - - if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) && - !test_bit(ICE_VF_STATE_QS_ENA, vf->vf_states)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (!ice_vc_isvalid_vsi_id(vf, vqs->vsi_id)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (!ice_vc_validate_vqs_bitmaps(vqs)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - vsi = ice_get_vf_vsi(vf); - if (!vsi) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (vqs->tx_queues) { - q_map = vqs->tx_queues; - - for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { - if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - if (ice_vf_vsi_dis_single_txq(vf, vsi, vf_q_id)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - } - } - - q_map = vqs->rx_queues; - /* speed up Rx queue disable by batching them if possible */ - if (q_map && - bitmap_equal(&q_map, vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF)) { - if (ice_vsi_stop_all_rx_rings(vsi)) { - dev_err(ice_pf_to_dev(vsi->back), "Failed to stop all Rx rings on VSI %d\n", - vsi->vsi_num); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - bitmap_zero(vf->rxq_ena, ICE_MAX_RSS_QS_PER_VF); - } else if (q_map) { - for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { - if (!ice_vc_isvalid_q_id(vsi, vf_q_id)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - /* Skip queue if not enabled */ - if (!test_bit(vf_q_id, vf->rxq_ena)) - continue; - - if (ice_vsi_ctrl_one_rx_ring(vsi, false, vf_q_id, - true)) { - dev_err(ice_pf_to_dev(vsi->back), "Failed to stop Rx ring %d on VSI %d\n", - vf_q_id, vsi->vsi_num); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - /* Clear enabled queues flag */ - clear_bit(vf_q_id, vf->rxq_ena); - } - } - - /* Clear enabled queues flag */ - if (v_ret == VIRTCHNL_STATUS_SUCCESS && ice_vf_has_no_qs_ena(vf)) - clear_bit(ICE_VF_STATE_QS_ENA, vf->vf_states); - -error_param: - /* send the response to the VF */ - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_QUEUES, v_ret, - NULL, 0); -} - -/** - * ice_cfg_interrupt - * @vf: pointer to the VF info - * @vsi: the VSI being configured - * @map: vector map for mapping vectors to queues - * @q_vector: structure for interrupt vector - * configure the IRQ to queue map - */ -static enum virtchnl_status_code -ice_cfg_interrupt(struct ice_vf *vf, struct ice_vsi *vsi, - struct virtchnl_vector_map *map, - struct ice_q_vector *q_vector) -{ - u16 vsi_q_id, vsi_q_id_idx; - unsigned long qmap; - - q_vector->num_ring_rx = 0; - q_vector->num_ring_tx = 0; - - qmap = map->rxq_map; - for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) { - vsi_q_id = vsi_q_id_idx; - - if (!ice_vc_isvalid_q_id(vsi, vsi_q_id)) - return VIRTCHNL_STATUS_ERR_PARAM; - - q_vector->num_ring_rx++; - q_vector->rx.itr_idx = map->rxitr_idx; - vsi->rx_rings[vsi_q_id]->q_vector = q_vector; - ice_cfg_rxq_interrupt(vsi, vsi_q_id, - q_vector->vf_reg_idx, - q_vector->rx.itr_idx); - } - - qmap = map->txq_map; - for_each_set_bit(vsi_q_id_idx, &qmap, ICE_MAX_RSS_QS_PER_VF) { - vsi_q_id = vsi_q_id_idx; - - if (!ice_vc_isvalid_q_id(vsi, vsi_q_id)) - return VIRTCHNL_STATUS_ERR_PARAM; - - q_vector->num_ring_tx++; - q_vector->tx.itr_idx = map->txitr_idx; - vsi->tx_rings[vsi_q_id]->q_vector = q_vector; - ice_cfg_txq_interrupt(vsi, vsi_q_id, - q_vector->vf_reg_idx, - q_vector->tx.itr_idx); - } - - return VIRTCHNL_STATUS_SUCCESS; -} - -/** - * ice_vc_cfg_irq_map_msg - * @vf: pointer to the VF info - * @msg: pointer to the msg buffer - * - * called from the VF to configure the IRQ to queue map - */ -static int ice_vc_cfg_irq_map_msg(struct ice_vf *vf, u8 *msg) -{ - enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; - u16 num_q_vectors_mapped, vsi_id, vector_id; - struct virtchnl_irq_map_info *irqmap_info; - struct virtchnl_vector_map *map; - struct ice_vsi *vsi; - int i; - - irqmap_info = (struct virtchnl_irq_map_info *)msg; - num_q_vectors_mapped = irqmap_info->num_vectors; - - /* Check to make sure number of VF vectors mapped is not greater than - * number of VF vectors originally allocated, and check that - * there is actually at least a single VF queue vector mapped - */ - if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) || - vf->num_msix < num_q_vectors_mapped || - !num_q_vectors_mapped) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - vsi = ice_get_vf_vsi(vf); - if (!vsi) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - for (i = 0; i < num_q_vectors_mapped; i++) { - struct ice_q_vector *q_vector; - - map = &irqmap_info->vecmap[i]; - - vector_id = map->vector_id; - vsi_id = map->vsi_id; - /* vector_id is always 0-based for each VF, and can never be - * larger than or equal to the max allowed interrupts per VF - */ - if (!(vector_id < vf->num_msix) || - !ice_vc_isvalid_vsi_id(vf, vsi_id) || - (!vector_id && (map->rxq_map || map->txq_map))) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - /* No need to map VF miscellaneous or rogue vector */ - if (!vector_id) - continue; - - /* Subtract non queue vector from vector_id passed by VF - * to get actual number of VSI queue vector array index - */ - q_vector = vsi->q_vectors[vector_id - ICE_NONQ_VECS_VF]; - if (!q_vector) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - /* lookout for the invalid queue index */ - v_ret = ice_cfg_interrupt(vf, vsi, map, q_vector); - if (v_ret) - goto error_param; - } - -error_param: - /* send the response to the VF */ - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_IRQ_MAP, v_ret, - NULL, 0); -} - -/** - * ice_vc_cfg_q_bw - Configure per queue bandwidth - * @vf: pointer to the VF info - * @msg: pointer to the msg buffer which holds the command descriptor - * - * Configure VF queues bandwidth. - * - * Return: 0 on success or negative error value. - */ -static int ice_vc_cfg_q_bw(struct ice_vf *vf, u8 *msg) -{ - enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; - struct virtchnl_queues_bw_cfg *qbw = - (struct virtchnl_queues_bw_cfg *)msg; - struct ice_vsi *vsi; - u16 i; - - if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) || - !ice_vc_isvalid_vsi_id(vf, qbw->vsi_id)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - - vsi = ice_get_vf_vsi(vf); - if (!vsi) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - - if (qbw->num_queues > ICE_MAX_RSS_QS_PER_VF || - qbw->num_queues > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) { - dev_err(ice_pf_to_dev(vf->pf), "VF-%d trying to configure more than allocated number of queues: %d\n", - vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - - for (i = 0; i < qbw->num_queues; i++) { - if (qbw->cfg[i].shaper.peak != 0 && vf->max_tx_rate != 0 && - qbw->cfg[i].shaper.peak > vf->max_tx_rate) { - dev_warn(ice_pf_to_dev(vf->pf), "The maximum queue %d rate limit configuration may not take effect because the maximum TX rate for VF-%d is %d\n", - qbw->cfg[i].queue_id, vf->vf_id, - vf->max_tx_rate); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - if (qbw->cfg[i].shaper.committed != 0 && vf->min_tx_rate != 0 && - qbw->cfg[i].shaper.committed < vf->min_tx_rate) { - dev_warn(ice_pf_to_dev(vf->pf), "The minimum queue %d rate limit configuration may not take effect because the minimum TX rate for VF-%d is %d\n", - qbw->cfg[i].queue_id, vf->vf_id, - vf->min_tx_rate); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - if (qbw->cfg[i].queue_id > vf->num_vf_qs) { - dev_warn(ice_pf_to_dev(vf->pf), "VF-%d trying to configure invalid queue_id\n", - vf->vf_id); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - if (qbw->cfg[i].tc >= ICE_MAX_TRAFFIC_CLASS) { - dev_warn(ice_pf_to_dev(vf->pf), "VF-%d trying to configure a traffic class higher than allowed\n", - vf->vf_id); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - } - - for (i = 0; i < qbw->num_queues; i++) { - vf->qs_bw[i].queue_id = qbw->cfg[i].queue_id; - vf->qs_bw[i].peak = qbw->cfg[i].shaper.peak; - vf->qs_bw[i].committed = qbw->cfg[i].shaper.committed; - vf->qs_bw[i].tc = qbw->cfg[i].tc; - } - - if (ice_vf_cfg_qs_bw(vf, qbw->num_queues)) - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - -err: - /* send the response to the VF */ - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_QUEUE_BW, - v_ret, NULL, 0); -} - -/** - * ice_vc_cfg_q_quanta - Configure per queue quanta - * @vf: pointer to the VF info - * @msg: pointer to the msg buffer which holds the command descriptor - * - * Configure VF queues quanta. - * - * Return: 0 on success or negative error value. - */ -static int ice_vc_cfg_q_quanta(struct ice_vf *vf, u8 *msg) -{ - u16 quanta_prof_id, quanta_size, start_qid, num_queues, end_qid, i; - enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; - struct virtchnl_quanta_cfg *qquanta = - (struct virtchnl_quanta_cfg *)msg; - struct ice_vsi *vsi; - int ret; - - start_qid = qquanta->queue_select.start_queue_id; - num_queues = qquanta->queue_select.num_queues; - - if (check_add_overflow(start_qid, num_queues, &end_qid)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - - if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - - vsi = ice_get_vf_vsi(vf); - if (!vsi) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - - if (end_qid > ICE_MAX_RSS_QS_PER_VF || - end_qid > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) { - dev_err(ice_pf_to_dev(vf->pf), "VF-%d trying to configure more than allocated number of queues: %d\n", - vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - - quanta_size = qquanta->quanta_size; - if (quanta_size > ICE_MAX_QUANTA_SIZE || - quanta_size < ICE_MIN_QUANTA_SIZE) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - - if (quanta_size % 64) { - dev_err(ice_pf_to_dev(vf->pf), "quanta size should be the product of 64\n"); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - - ret = ice_vf_cfg_q_quanta_profile(vf, quanta_size, - &quanta_prof_id); - if (ret) { - v_ret = VIRTCHNL_STATUS_ERR_NOT_SUPPORTED; - goto err; - } - - for (i = start_qid; i < end_qid; i++) - vsi->tx_rings[i]->quanta_prof_id = quanta_prof_id; - -err: - /* send the response to the VF */ - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_QUANTA, - v_ret, NULL, 0); -} - -/** - * ice_vc_cfg_qs_msg - * @vf: pointer to the VF info - * @msg: pointer to the msg buffer - * - * called from the VF to configure the Rx/Tx queues - */ -static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg) -{ - struct virtchnl_vsi_queue_config_info *qci = - (struct virtchnl_vsi_queue_config_info *)msg; - struct virtchnl_queue_pair_info *qpi; - struct ice_pf *pf = vf->pf; - struct ice_vsi *vsi; - int i = -1, q_idx; - bool ena_ts; - u8 act_prt; - - mutex_lock(&pf->lag_mutex); - act_prt = ice_lag_prepare_vf_reset(pf->lag); - - if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) - goto error_param; - - if (!ice_vc_isvalid_vsi_id(vf, qci->vsi_id)) - goto error_param; - - vsi = ice_get_vf_vsi(vf); - if (!vsi) - goto error_param; - - if (qci->num_queue_pairs > ICE_MAX_RSS_QS_PER_VF || - qci->num_queue_pairs > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) { - dev_err(ice_pf_to_dev(pf), "VF-%d requesting more than supported number of queues: %d\n", - vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)); - goto error_param; - } - - for (i = 0; i < qci->num_queue_pairs; i++) { - if (!qci->qpair[i].rxq.crc_disable) - continue; - - if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_CRC) || - vf->vlan_strip_ena) - goto error_param; - } - - for (i = 0; i < qci->num_queue_pairs; i++) { - qpi = &qci->qpair[i]; - if (qpi->txq.vsi_id != qci->vsi_id || - qpi->rxq.vsi_id != qci->vsi_id || - qpi->rxq.queue_id != qpi->txq.queue_id || - qpi->txq.headwb_enabled || - !ice_vc_isvalid_ring_len(qpi->txq.ring_len) || - !ice_vc_isvalid_ring_len(qpi->rxq.ring_len) || - !ice_vc_isvalid_q_id(vsi, qpi->txq.queue_id)) { - goto error_param; - } - - q_idx = qpi->rxq.queue_id; - - /* make sure selected "q_idx" is in valid range of queues - * for selected "vsi" - */ - if (q_idx >= vsi->alloc_txq || q_idx >= vsi->alloc_rxq) { - goto error_param; - } - - /* copy Tx queue info from VF into VSI */ - if (qpi->txq.ring_len > 0) { - vsi->tx_rings[q_idx]->dma = qpi->txq.dma_ring_addr; - vsi->tx_rings[q_idx]->count = qpi->txq.ring_len; - - /* Disable any existing queue first */ - if (ice_vf_vsi_dis_single_txq(vf, vsi, q_idx)) - goto error_param; - - /* Configure a queue with the requested settings */ - if (ice_vsi_cfg_single_txq(vsi, vsi->tx_rings, q_idx)) { - dev_warn(ice_pf_to_dev(pf), "VF-%d failed to configure TX queue %d\n", - vf->vf_id, q_idx); - goto error_param; - } - } - - /* copy Rx queue info from VF into VSI */ - if (qpi->rxq.ring_len > 0) { - u16 max_frame_size = ice_vc_get_max_frame_size(vf); - struct ice_rx_ring *ring = vsi->rx_rings[q_idx]; - u32 rxdid; - - ring->dma = qpi->rxq.dma_ring_addr; - ring->count = qpi->rxq.ring_len; - - if (qpi->rxq.crc_disable) - ring->flags |= ICE_RX_FLAGS_CRC_STRIP_DIS; - else - ring->flags &= ~ICE_RX_FLAGS_CRC_STRIP_DIS; - - if (qpi->rxq.databuffer_size != 0 && - (qpi->rxq.databuffer_size > ((16 * 1024) - 128) || - qpi->rxq.databuffer_size < 1024)) - goto error_param; - ring->rx_buf_len = qpi->rxq.databuffer_size; - if (qpi->rxq.max_pkt_size > max_frame_size || - qpi->rxq.max_pkt_size < 64) - goto error_param; - - ring->max_frame = qpi->rxq.max_pkt_size; - /* add space for the port VLAN since the VF driver is - * not expected to account for it in the MTU - * calculation - */ - if (ice_vf_is_port_vlan_ena(vf)) - ring->max_frame += VLAN_HLEN; - - if (ice_vsi_cfg_single_rxq(vsi, q_idx)) { - dev_warn(ice_pf_to_dev(pf), "VF-%d failed to configure RX queue %d\n", - vf->vf_id, q_idx); - goto error_param; - } - - /* If Rx flex desc is supported, select RXDID for Rx - * queues. Otherwise, use legacy 32byte descriptor - * format. Legacy 16byte descriptor is not supported. - * If this RXDID is selected, return error. - */ - if (vf->driver_caps & - VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) { - rxdid = qpi->rxq.rxdid; - if (!(BIT(rxdid) & pf->supported_rxdids)) - goto error_param; - } else { - rxdid = ICE_RXDID_LEGACY_1; - } - - ena_ts = ((vf->driver_caps & - VIRTCHNL_VF_OFFLOAD_RX_FLEX_DESC) && - (vf->driver_caps & VIRTCHNL_VF_CAP_PTP) && - (qpi->rxq.flags & VIRTCHNL_PTP_RX_TSTAMP)); - - ice_write_qrxflxp_cntxt(&vsi->back->hw, - vsi->rxq_map[q_idx], rxdid, - ICE_RXDID_PRIO, ena_ts); - } - } - - ice_lag_complete_vf_reset(pf->lag, act_prt); - mutex_unlock(&pf->lag_mutex); - - /* send the response to the VF */ - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES, - VIRTCHNL_STATUS_SUCCESS, NULL, 0); -error_param: - /* disable whatever we can */ - for (; i >= 0; i--) { - if (ice_vsi_ctrl_one_rx_ring(vsi, false, i, true)) - dev_err(ice_pf_to_dev(pf), "VF-%d could not disable RX queue %d\n", - vf->vf_id, i); - if (ice_vf_vsi_dis_single_txq(vf, vsi, i)) - dev_err(ice_pf_to_dev(pf), "VF-%d could not disable TX queue %d\n", - vf->vf_id, i); - } - - ice_lag_complete_vf_reset(pf->lag, act_prt); - mutex_unlock(&pf->lag_mutex); - - ice_lag_move_new_vf_nodes(vf); - - /* send the response to the VF */ - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES, - VIRTCHNL_STATUS_ERR_PARAM, NULL, 0); -} - -/** * ice_can_vf_change_mac * @vf: pointer to the VF info * @@ -2531,66 +1022,6 @@ static int ice_vc_del_mac_addr_msg(struct ice_vf *vf, u8 *msg) } /** - * ice_vc_request_qs_msg - * @vf: pointer to the VF info - * @msg: pointer to the msg buffer - * - * VFs get a default number of queues but can use this message to request a - * different number. If the request is successful, PF will reset the VF and - * return 0. If unsuccessful, PF will send message informing VF of number of - * available queue pairs via virtchnl message response to VF. - */ -static int ice_vc_request_qs_msg(struct ice_vf *vf, u8 *msg) -{ - enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; - struct virtchnl_vf_res_request *vfres = - (struct virtchnl_vf_res_request *)msg; - u16 req_queues = vfres->num_queue_pairs; - struct ice_pf *pf = vf->pf; - u16 max_allowed_vf_queues; - u16 tx_rx_queue_left; - struct device *dev; - u16 cur_queues; - - dev = ice_pf_to_dev(pf); - if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto error_param; - } - - cur_queues = vf->num_vf_qs; - tx_rx_queue_left = min_t(u16, ice_get_avail_txq_count(pf), - ice_get_avail_rxq_count(pf)); - max_allowed_vf_queues = tx_rx_queue_left + cur_queues; - if (!req_queues) { - dev_err(dev, "VF %d tried to request 0 queues. Ignoring.\n", - vf->vf_id); - } else if (req_queues > ICE_MAX_RSS_QS_PER_VF) { - dev_err(dev, "VF %d tried to request more than %d queues.\n", - vf->vf_id, ICE_MAX_RSS_QS_PER_VF); - vfres->num_queue_pairs = ICE_MAX_RSS_QS_PER_VF; - } else if (req_queues > cur_queues && - req_queues - cur_queues > tx_rx_queue_left) { - dev_warn(dev, "VF %d requested %u more queues, but only %u left.\n", - vf->vf_id, req_queues - cur_queues, tx_rx_queue_left); - vfres->num_queue_pairs = min_t(u16, max_allowed_vf_queues, - ICE_MAX_RSS_QS_PER_VF); - } else { - /* request is successful, then reset VF */ - vf->num_req_qs = req_queues; - ice_reset_vf(vf, ICE_VF_RESET_NOTIFY); - dev_info(dev, "VF %d granted request of %u queues.\n", - vf->vf_id, req_queues); - return 0; - } - -error_param: - /* send the response to the VF */ - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_REQUEST_QUEUES, - v_ret, (u8 *)vfres, sizeof(*vfres)); -} - -/** * ice_vf_vlan_offload_ena - determine if capabilities support VLAN offloads * @caps: VF driver negotiated capabilities * @@ -2983,112 +1414,6 @@ error_param: } /** - * ice_vc_get_rss_hashcfg - return the RSS Hash configuration - * @vf: pointer to the VF info - */ -static int ice_vc_get_rss_hashcfg(struct ice_vf *vf) -{ - enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; - struct virtchnl_rss_hashcfg *vrh = NULL; - int len = 0, ret; - - if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - - if (!test_bit(ICE_FLAG_RSS_ENA, vf->pf->flags)) { - dev_err(ice_pf_to_dev(vf->pf), "RSS not supported by PF\n"); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - - len = sizeof(struct virtchnl_rss_hashcfg); - vrh = kzalloc(len, GFP_KERNEL); - if (!vrh) { - v_ret = VIRTCHNL_STATUS_ERR_NO_MEMORY; - len = 0; - goto err; - } - - vrh->hashcfg = ICE_DEFAULT_RSS_HASHCFG; -err: - /* send the response back to the VF */ - ret = ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_GET_RSS_HASHCFG_CAPS, v_ret, - (u8 *)vrh, len); - kfree(vrh); - return ret; -} - -/** - * ice_vc_set_rss_hashcfg - set RSS Hash configuration bits for the VF - * @vf: pointer to the VF info - * @msg: pointer to the msg buffer - */ -static int ice_vc_set_rss_hashcfg(struct ice_vf *vf, u8 *msg) -{ - struct virtchnl_rss_hashcfg *vrh = (struct virtchnl_rss_hashcfg *)msg; - enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; - struct ice_pf *pf = vf->pf; - struct ice_vsi *vsi; - struct device *dev; - int status; - - dev = ice_pf_to_dev(pf); - - if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - - if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { - dev_err(dev, "RSS not supported by PF\n"); - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - - vsi = ice_get_vf_vsi(vf); - if (!vsi) { - v_ret = VIRTCHNL_STATUS_ERR_PARAM; - goto err; - } - - /* clear all previously programmed RSS configuration to allow VF drivers - * the ability to customize the RSS configuration and/or completely - * disable RSS - */ - status = ice_rem_vsi_rss_cfg(&pf->hw, vsi->idx); - if (status && !vrh->hashcfg) { - /* only report failure to clear the current RSS configuration if - * that was clearly the VF's intention (i.e. vrh->hashcfg = 0) - */ - v_ret = ice_err_to_virt_err(status); - goto err; - } else if (status) { - /* allow the VF to update the RSS configuration even on failure - * to clear the current RSS confguration in an attempt to keep - * RSS in a working state - */ - dev_warn(dev, "Failed to clear the RSS configuration for VF %u\n", - vf->vf_id); - } - - if (vrh->hashcfg) { - status = ice_add_avf_rss_cfg(&pf->hw, vsi, vrh->hashcfg); - v_ret = ice_err_to_virt_err(status); - } - - /* save the requested VF configuration */ - if (!v_ret) - vf->rss_hashcfg = vrh->hashcfg; - - /* send the response to the VF */ -err: - return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_SET_RSS_HASHCFG, v_ret, - NULL, 0); -} - -/** * ice_vc_query_rxdid - query RXDID supported by DDP package * @vf: pointer to VF info * diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl.h b/drivers/net/ethernet/intel/ice/virt/virtchnl.h index 71bb456e2d71..71bb456e2d71 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl.h +++ b/drivers/net/ethernet/intel/ice/virt/virtchnl.h diff --git a/drivers/net/ethernet/intel/idpf/Kconfig b/drivers/net/ethernet/intel/idpf/Kconfig index 2c359a8551c7..adab2154125b 100644 --- a/drivers/net/ethernet/intel/idpf/Kconfig +++ b/drivers/net/ethernet/intel/idpf/Kconfig @@ -6,7 +6,7 @@ config IDPF depends on PCI_MSI depends on PTP_1588_CLOCK_OPTIONAL select DIMLIB - select LIBETH + select LIBETH_XDP help This driver supports Intel(R) Infrastructure Data Path Function devices. diff --git a/drivers/net/ethernet/intel/idpf/Makefile b/drivers/net/ethernet/intel/idpf/Makefile index 4ef4b2b5e37a..651ddee942bd 100644 --- a/drivers/net/ethernet/intel/idpf/Makefile +++ b/drivers/net/ethernet/intel/idpf/Makefile @@ -21,3 +21,6 @@ idpf-$(CONFIG_IDPF_SINGLEQ) += idpf_singleq_txrx.o idpf-$(CONFIG_PTP_1588_CLOCK) += idpf_ptp.o idpf-$(CONFIG_PTP_1588_CLOCK) += idpf_virtchnl_ptp.o + +idpf-y += xdp.o +idpf-y += xsk.o diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index f4c0eaf9bde3..ca4da0c89979 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -40,6 +40,7 @@ struct idpf_vport_max_q; #define IDPF_NUM_CHUNKS_PER_MSG(struct_sz, chunk_sz) \ ((IDPF_CTLQ_MAX_BUF_LEN - (struct_sz)) / (chunk_sz)) +#define IDPF_WAIT_FOR_MARKER_TIMEO 500 #define IDPF_MAX_WAIT 500 /* available message levels */ @@ -148,6 +149,7 @@ enum idpf_vport_state { * @link_speed_mbps: Link speed in mbps * @vport_idx: Relative vport index * @max_tx_hdr_size: Max header length hardware can support + * @tx_max_bufs: Max buffers that can be transmitted with scatter-gather * @state: See enum idpf_vport_state * @netstats: Packet and byte stats * @stats_lock: Lock to protect stats update @@ -159,6 +161,7 @@ struct idpf_netdev_priv { u32 link_speed_mbps; u16 vport_idx; u16 max_tx_hdr_size; + u16 tx_max_bufs; enum idpf_vport_state state; struct rtnl_link_stats64 netstats; spinlock_t stats_lock; @@ -246,16 +249,28 @@ enum idpf_vport_reset_cause { /** * enum idpf_vport_flags - Vport flags * @IDPF_VPORT_DEL_QUEUES: To send delete queues message - * @IDPF_VPORT_SW_MARKER: Indicate TX pipe drain software marker packets - * processing is done * @IDPF_VPORT_FLAGS_NBITS: Must be last */ enum idpf_vport_flags { IDPF_VPORT_DEL_QUEUES, - IDPF_VPORT_SW_MARKER, IDPF_VPORT_FLAGS_NBITS, }; +/** + * struct idpf_tstamp_stats - Tx timestamp statistics + * @stats_sync: See struct u64_stats_sync + * @packets: Number of packets successfully timestamped by the hardware + * @discarded: Number of Tx skbs discarded due to cached PHC + * being too old to correctly extend timestamp + * @flushed: Number of Tx skbs flushed due to interface closed + */ +struct idpf_tstamp_stats { + struct u64_stats_sync stats_sync; + u64_stats_t packets; + u64_stats_t discarded; + u64_stats_t flushed; +}; + struct idpf_port_stats { struct u64_stats_sync stats_sync; u64_stats_t rx_hw_csum_err; @@ -287,6 +302,10 @@ struct idpf_fsteer_fltr { * @txq_model: Split queue or single queue queuing model * @txqs: Used only in hotpath to get to the right queue very fast * @crc_enable: Enable CRC insertion offload + * @xdpsq_share: whether XDPSQ sharing is enabled + * @num_xdp_txq: number of XDPSQs + * @xdp_txq_offset: index of the first XDPSQ (== number of regular SQs) + * @xdp_prog: installed XDP program * @num_rxq: Number of allocated RX queues * @num_bufq: Number of allocated buffer queues * @rxq_desc_count: RX queue descriptor count. *MUST* have enough descriptors @@ -312,16 +331,19 @@ struct idpf_fsteer_fltr { * @num_q_vectors: Number of IRQ vectors allocated * @q_vectors: Array of queue vectors * @q_vector_idxs: Starting index of queue vectors + * @noirq_dyn_ctl: register to enable/disable the vector for NOIRQ queues + * @noirq_dyn_ctl_ena: value to write to the above to enable it + * @noirq_v_idx: ID of the NOIRQ vector * @max_mtu: device given max possible MTU * @default_mac_addr: device will give a default MAC to use * @rx_itr_profile: RX profiles for Dynamic Interrupt Moderation * @tx_itr_profile: TX profiles for Dynamic Interrupt Moderation * @port_stats: per port csum, header split, and other offload stats * @link_up: True if link is up - * @sw_marker_wq: workqueue for marker packets * @tx_tstamp_caps: Capabilities negotiated for Tx timestamping * @tstamp_config: The Tx tstamp config * @tstamp_task: Tx timestamping task + * @tstamp_stats: Tx timestamping statistics */ struct idpf_vport { u16 num_txq; @@ -335,6 +357,11 @@ struct idpf_vport { struct idpf_tx_queue **txqs; bool crc_enable; + bool xdpsq_share; + u16 num_xdp_txq; + u16 xdp_txq_offset; + struct bpf_prog *xdp_prog; + u16 num_rxq; u16 num_bufq; u32 rxq_desc_count; @@ -359,6 +386,11 @@ struct idpf_vport { u16 num_q_vectors; struct idpf_q_vector *q_vectors; u16 *q_vector_idxs; + + void __iomem *noirq_dyn_ctl; + u32 noirq_dyn_ctl_ena; + u16 noirq_v_idx; + u16 max_mtu; u8 default_mac_addr[ETH_ALEN]; u16 rx_itr_profile[IDPF_DIM_PROFILE_SLOTS]; @@ -367,11 +399,10 @@ struct idpf_vport { bool link_up; - wait_queue_head_t sw_marker_wq; - struct idpf_ptp_vport_tx_tstamp_caps *tx_tstamp_caps; struct kernel_hwtstamp_config tstamp_config; struct work_struct tstamp_task; + struct idpf_tstamp_stats tstamp_stats; }; /** @@ -433,6 +464,7 @@ struct idpf_q_coalesce { * ethtool * @num_req_rxq_desc: Number of user requested RX queue descriptors through * ethtool + * @xdp_prog: requested XDP program to install * @user_flags: User toggled config flags * @mac_filter_list: List of MAC filters * @num_fsteer_fltrs: number of flow steering filters @@ -447,6 +479,7 @@ struct idpf_vport_user_config_data { u16 num_req_rx_qs; u32 num_req_txq_desc; u32 num_req_rxq_desc; + struct bpf_prog *xdp_prog; DECLARE_BITMAP(user_flags, __IDPF_USER_FLAGS_NBITS); struct list_head mac_filter_list; u32 num_fsteer_fltrs; @@ -676,6 +709,11 @@ static inline int idpf_is_queue_model_split(u16 q_model) q_model == VIRTCHNL2_QUEUE_MODEL_SPLIT; } +static inline bool idpf_xdp_enabled(const struct idpf_vport *vport) +{ + return vport->adapter && vport->xdp_prog; +} + #define idpf_is_cap_ena(adapter, field, flag) \ idpf_is_capability_ena(adapter, false, field, flag) #define idpf_is_cap_ena_all(adapter, field, flag) \ @@ -957,6 +995,13 @@ static inline void idpf_vport_ctrl_unlock(struct net_device *netdev) mutex_unlock(&np->adapter->vport_ctrl_lock); } +static inline bool idpf_vport_ctrl_is_locked(struct net_device *netdev) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + + return mutex_is_locked(&np->adapter->vport_ctrl_lock); +} + void idpf_statistics_task(struct work_struct *work); void idpf_init_task(struct work_struct *work); void idpf_service_task(struct work_struct *work); diff --git a/drivers/net/ethernet/intel/idpf/idpf_dev.c b/drivers/net/ethernet/intel/idpf/idpf_dev.c index bfa60f7d43de..3a04a6bd0d7c 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_dev.c +++ b/drivers/net/ethernet/intel/idpf/idpf_dev.c @@ -77,7 +77,7 @@ static int idpf_intr_reg_init(struct idpf_vport *vport) int num_vecs = vport->num_q_vectors; struct idpf_vec_regs *reg_vals; int num_regs, i, err = 0; - u32 rx_itr, tx_itr; + u32 rx_itr, tx_itr, val; u16 total_vecs; total_vecs = idpf_get_reserved_vecs(vport->adapter); @@ -121,6 +121,15 @@ static int idpf_intr_reg_init(struct idpf_vport *vport) intr->tx_itr = idpf_get_reg_addr(adapter, tx_itr); } + /* Data vector for NOIRQ queues */ + + val = reg_vals[vport->q_vector_idxs[i] - IDPF_MBX_Q_VEC].dyn_ctl_reg; + vport->noirq_dyn_ctl = idpf_get_reg_addr(adapter, val); + + val = PF_GLINT_DYN_CTL_WB_ON_ITR_M | PF_GLINT_DYN_CTL_INTENA_MSK_M | + FIELD_PREP(PF_GLINT_DYN_CTL_ITR_INDX_M, IDPF_NO_ITR_UPDATE_IDX); + vport->noirq_dyn_ctl_ena = val; + free_reg_vals: kfree(reg_vals); diff --git a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c index 0eb812ac19c2..a5a1eec9ade8 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c +++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c @@ -1245,8 +1245,8 @@ static void idpf_get_ethtool_stats(struct net_device *netdev, * * returns pointer to rx vector */ -static struct idpf_q_vector *idpf_find_rxq_vec(const struct idpf_vport *vport, - int q_num) +struct idpf_q_vector *idpf_find_rxq_vec(const struct idpf_vport *vport, + u32 q_num) { int q_grp, q_idx; @@ -1266,8 +1266,8 @@ static struct idpf_q_vector *idpf_find_rxq_vec(const struct idpf_vport *vport, * * returns pointer to tx vector */ -static struct idpf_q_vector *idpf_find_txq_vec(const struct idpf_vport *vport, - int q_num) +struct idpf_q_vector *idpf_find_txq_vec(const struct idpf_vport *vport, + u32 q_num) { int q_grp; @@ -1685,6 +1685,61 @@ unlock: return err; } +/** + * idpf_get_ts_stats - Collect HW tstamping statistics + * @netdev: network interface device structure + * @ts_stats: HW timestamping stats structure + * + * Collect HW timestamping statistics including successfully timestamped + * packets, discarded due to illegal values, flushed during releasing PTP and + * skipped due to lack of the free index. + */ +static void idpf_get_ts_stats(struct net_device *netdev, + struct ethtool_ts_stats *ts_stats) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + struct idpf_vport *vport; + unsigned int start; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + do { + start = u64_stats_fetch_begin(&vport->tstamp_stats.stats_sync); + ts_stats->pkts = u64_stats_read(&vport->tstamp_stats.packets); + ts_stats->lost = u64_stats_read(&vport->tstamp_stats.flushed); + ts_stats->err = u64_stats_read(&vport->tstamp_stats.discarded); + } while (u64_stats_fetch_retry(&vport->tstamp_stats.stats_sync, start)); + + if (np->state != __IDPF_VPORT_UP) + goto exit; + + for (u16 i = 0; i < vport->num_txq_grp; i++) { + struct idpf_txq_group *txq_grp = &vport->txq_grps[i]; + + for (u16 j = 0; j < txq_grp->num_txq; j++) { + struct idpf_tx_queue *txq = txq_grp->txqs[j]; + struct idpf_tx_queue_stats *stats; + u64 ts; + + if (!txq) + continue; + + stats = &txq->q_stats; + do { + start = u64_stats_fetch_begin(&txq->stats_sync); + + ts = u64_stats_read(&stats->tstamp_skipped); + } while (u64_stats_fetch_retry(&txq->stats_sync, + start)); + + ts_stats->lost += ts; + } + } + +exit: + idpf_vport_ctrl_unlock(netdev); +} + static const struct ethtool_ops idpf_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_USE_ADAPTIVE, @@ -1711,6 +1766,7 @@ static const struct ethtool_ops idpf_ethtool_ops = { .set_ringparam = idpf_set_ringparam, .get_link_ksettings = idpf_get_link_ksettings, .get_ts_info = idpf_get_ts_info, + .get_ts_stats = idpf_get_ts_stats, }; /** diff --git a/drivers/net/ethernet/intel/idpf/idpf_idc.c b/drivers/net/ethernet/intel/idpf/idpf_idc.c index 4d2905103215..7e20a07e98e5 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_idc.c +++ b/drivers/net/ethernet/intel/idpf/idpf_idc.c @@ -247,10 +247,10 @@ static void idpf_unplug_aux_dev(struct auxiliary_device *adev) if (!adev) return; + ida_free(&idpf_idc_ida, adev->id); + auxiliary_device_delete(adev); auxiliary_device_uninit(adev); - - ida_free(&idpf_idc_ida, adev->id); } /** diff --git a/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h index 7492d1713243..20d5af64e750 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h @@ -186,13 +186,17 @@ struct idpf_base_tx_desc { __le64 qw1; /* type_cmd_offset_bsz_l2tag1 */ }; /* read used with buffer queues */ -struct idpf_splitq_tx_compl_desc { +struct idpf_splitq_4b_tx_compl_desc { /* qid=[10:0] comptype=[13:11] rsvd=[14] gen=[15] */ __le16 qid_comptype_gen; union { __le16 q_head; /* Queue head */ __le16 compl_tag; /* Completion tag */ } q_head_compl_tag; +}; /* writeback used with completion queues */ + +struct idpf_splitq_tx_compl_desc { + struct idpf_splitq_4b_tx_compl_desc common; u8 ts[3]; u8 rsvd; /* Reserved */ }; /* writeback used with completion queues */ diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 2c2a3e85d693..8a941f0fb048 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -4,6 +4,8 @@ #include "idpf.h" #include "idpf_virtchnl.h" #include "idpf_ptp.h" +#include "xdp.h" +#include "xsk.h" static const struct net_device_ops idpf_netdev_ops; @@ -776,6 +778,7 @@ static int idpf_cfg_netdev(struct idpf_vport *vport) np->vport_idx = vport->idx; np->vport_id = vport->vport_id; np->max_tx_hdr_size = idpf_get_max_tx_hdr_size(adapter); + np->tx_max_bufs = idpf_get_max_tx_bufs(adapter); spin_lock_init(&np->stats_lock); @@ -834,6 +837,8 @@ static int idpf_cfg_netdev(struct idpf_vport *vport) netdev->hw_features |= netdev->features | other_offloads; netdev->vlan_features |= netdev->features | other_offloads; netdev->hw_enc_features |= dflt_features | other_offloads; + idpf_xdp_set_features(vport); + idpf_set_ethtool_ops(netdev); netif_set_affinity_auto(netdev); SET_NETDEV_DEV(netdev, &adapter->pdev->dev); @@ -883,14 +888,18 @@ static void idpf_remove_features(struct idpf_vport *vport) /** * idpf_vport_stop - Disable a vport * @vport: vport to disable + * @rtnl: whether to take RTNL lock */ -static void idpf_vport_stop(struct idpf_vport *vport) +static void idpf_vport_stop(struct idpf_vport *vport, bool rtnl) { struct idpf_netdev_priv *np = netdev_priv(vport->netdev); if (np->state <= __IDPF_VPORT_DOWN) return; + if (rtnl) + rtnl_lock(); + netif_carrier_off(vport->netdev); netif_tx_disable(vport->netdev); @@ -909,9 +918,13 @@ static void idpf_vport_stop(struct idpf_vport *vport) vport->link_up = false; idpf_vport_intr_deinit(vport); + idpf_xdp_rxq_info_deinit_all(vport); idpf_vport_queues_rel(vport); idpf_vport_intr_rel(vport); np->state = __IDPF_VPORT_DOWN; + + if (rtnl) + rtnl_unlock(); } /** @@ -935,7 +948,7 @@ static int idpf_stop(struct net_device *netdev) idpf_vport_ctrl_lock(netdev); vport = idpf_netdev_to_vport(netdev); - idpf_vport_stop(vport); + idpf_vport_stop(vport, false); idpf_vport_ctrl_unlock(netdev); @@ -1028,7 +1041,7 @@ static void idpf_vport_dealloc(struct idpf_vport *vport) idpf_idc_deinit_vport_aux_device(vport->vdev_info); idpf_deinit_mac_addr(vport); - idpf_vport_stop(vport); + idpf_vport_stop(vport, true); if (!test_bit(IDPF_HR_RESET_IN_PROG, adapter->flags)) idpf_decfg_netdev(vport); @@ -1134,7 +1147,7 @@ static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter, if (!vport) return vport; - num_max_q = max(max_q->max_txq, max_q->max_rxq); + num_max_q = max(max_q->max_txq, max_q->max_rxq) + IDPF_RESERVED_VECS; if (!adapter->vport_config[idx]) { struct idpf_vport_config *vport_config; struct idpf_q_coalesce *q_coal; @@ -1308,13 +1321,13 @@ static void idpf_restore_features(struct idpf_vport *vport) */ static int idpf_set_real_num_queues(struct idpf_vport *vport) { - int err; + int err, txq = vport->num_txq - vport->num_xdp_txq; err = netif_set_real_num_rx_queues(vport->netdev, vport->num_rxq); if (err) return err; - return netif_set_real_num_tx_queues(vport->netdev, vport->num_txq); + return netif_set_real_num_tx_queues(vport->netdev, txq); } /** @@ -1369,8 +1382,9 @@ static void idpf_rx_init_buf_tail(struct idpf_vport *vport) /** * idpf_vport_open - Bring up a vport * @vport: vport to bring up + * @rtnl: whether to take RTNL lock */ -static int idpf_vport_open(struct idpf_vport *vport) +static int idpf_vport_open(struct idpf_vport *vport, bool rtnl) { struct idpf_netdev_priv *np = netdev_priv(vport->netdev); struct idpf_adapter *adapter = vport->adapter; @@ -1380,6 +1394,9 @@ static int idpf_vport_open(struct idpf_vport *vport) if (np->state != __IDPF_VPORT_DOWN) return -EBUSY; + if (rtnl) + rtnl_lock(); + /* we do not allow interface up just yet */ netif_carrier_off(vport->netdev); @@ -1387,7 +1404,7 @@ static int idpf_vport_open(struct idpf_vport *vport) if (err) { dev_err(&adapter->pdev->dev, "Failed to allocate interrupts for vport %u: %d\n", vport->vport_id, err); - return err; + goto err_rtnl_unlock; } err = idpf_vport_queues_alloc(vport); @@ -1408,35 +1425,44 @@ static int idpf_vport_open(struct idpf_vport *vport) goto queues_rel; } - err = idpf_rx_bufs_init_all(vport); + err = idpf_queue_reg_init(vport); if (err) { - dev_err(&adapter->pdev->dev, "Failed to initialize RX buffers for vport %u: %d\n", + dev_err(&adapter->pdev->dev, "Failed to initialize queue registers for vport %u: %d\n", vport->vport_id, err); goto queues_rel; } - err = idpf_queue_reg_init(vport); + err = idpf_rx_bufs_init_all(vport); if (err) { - dev_err(&adapter->pdev->dev, "Failed to initialize queue registers for vport %u: %d\n", + dev_err(&adapter->pdev->dev, "Failed to initialize RX buffers for vport %u: %d\n", vport->vport_id, err); goto queues_rel; } idpf_rx_init_buf_tail(vport); + + err = idpf_xdp_rxq_info_init_all(vport); + if (err) { + netdev_err(vport->netdev, + "Failed to initialize XDP RxQ info for vport %u: %pe\n", + vport->vport_id, ERR_PTR(err)); + goto intr_deinit; + } + idpf_vport_intr_ena(vport); err = idpf_send_config_queues_msg(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to configure queues for vport %u, %d\n", vport->vport_id, err); - goto intr_deinit; + goto rxq_deinit; } err = idpf_send_map_unmap_queue_vector_msg(vport, true); if (err) { dev_err(&adapter->pdev->dev, "Failed to map queue vectors for vport %u: %d\n", vport->vport_id, err); - goto intr_deinit; + goto rxq_deinit; } err = idpf_send_enable_queues_msg(vport); @@ -1474,6 +1500,9 @@ static int idpf_vport_open(struct idpf_vport *vport) goto deinit_rss; } + if (rtnl) + rtnl_unlock(); + return 0; deinit_rss: @@ -1484,6 +1513,8 @@ disable_queues: idpf_send_disable_queues_msg(vport); unmap_queue_vectors: idpf_send_map_unmap_queue_vector_msg(vport, false); +rxq_deinit: + idpf_xdp_rxq_info_deinit_all(vport); intr_deinit: idpf_vport_intr_deinit(vport); queues_rel: @@ -1491,6 +1522,10 @@ queues_rel: intr_rel: idpf_vport_intr_rel(vport); +err_rtnl_unlock: + if (rtnl) + rtnl_unlock(); + return err; } @@ -1547,8 +1582,6 @@ void idpf_init_task(struct work_struct *work) index = vport->idx; vport_config = adapter->vport_config[index]; - init_waitqueue_head(&vport->sw_marker_wq); - spin_lock_init(&vport_config->mac_filter_list_lock); INIT_LIST_HEAD(&vport_config->user_config.mac_filter_list); @@ -1571,7 +1604,7 @@ void idpf_init_task(struct work_struct *work) np = netdev_priv(vport->netdev); np->state = __IDPF_VPORT_DOWN; if (test_and_clear_bit(IDPF_VPORT_UP_REQUESTED, vport_config->flags)) - idpf_vport_open(vport); + idpf_vport_open(vport, true); /* Spawn and return 'idpf_init_task' work queue until all the * default vports are created @@ -1961,7 +1994,7 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, idpf_send_delete_queues_msg(vport); } else { set_bit(IDPF_VPORT_DEL_QUEUES, vport->flags); - idpf_vport_stop(vport); + idpf_vport_stop(vport, false); } idpf_deinit_rss(vport); @@ -1991,7 +2024,7 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, goto err_open; if (current_state == __IDPF_VPORT_UP) - err = idpf_vport_open(vport); + err = idpf_vport_open(vport, false); goto free_vport; @@ -2001,7 +2034,7 @@ err_reset: err_open: if (current_state == __IDPF_VPORT_UP) - idpf_vport_open(vport); + idpf_vport_open(vport, false); free_vport: kfree(new_vport); @@ -2239,7 +2272,7 @@ static int idpf_open(struct net_device *netdev) if (err) goto unlock; - err = idpf_vport_open(vport); + err = idpf_vport_open(vport, false); unlock: idpf_vport_ctrl_unlock(netdev); @@ -2272,6 +2305,92 @@ static int idpf_change_mtu(struct net_device *netdev, int new_mtu) } /** + * idpf_chk_tso_segment - Check skb is not using too many buffers + * @skb: send buffer + * @max_bufs: maximum number of buffers + * + * For TSO we need to count the TSO header and segment payload separately. As + * such we need to check cases where we have max_bufs-1 fragments or more as we + * can potentially require max_bufs+1 DMA transactions, 1 for the TSO header, 1 + * for the segment payload in the first descriptor, and another max_buf-1 for + * the fragments. + * + * Returns true if the packet needs to be software segmented by core stack. + */ +static bool idpf_chk_tso_segment(const struct sk_buff *skb, + unsigned int max_bufs) +{ + const struct skb_shared_info *shinfo = skb_shinfo(skb); + const skb_frag_t *frag, *stale; + int nr_frags, sum; + + /* no need to check if number of frags is less than max_bufs - 1 */ + nr_frags = shinfo->nr_frags; + if (nr_frags < (max_bufs - 1)) + return false; + + /* We need to walk through the list and validate that each group + * of max_bufs-2 fragments totals at least gso_size. + */ + nr_frags -= max_bufs - 2; + frag = &shinfo->frags[0]; + + /* Initialize size to the negative value of gso_size minus 1. We use + * this as the worst case scenario in which the frag ahead of us only + * provides one byte which is why we are limited to max_bufs-2 + * descriptors for a single transmit as the header and previous + * fragment are already consuming 2 descriptors. + */ + sum = 1 - shinfo->gso_size; + + /* Add size of frags 0 through 4 to create our initial sum */ + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + + /* Walk through fragments adding latest fragment, testing it, and + * then removing stale fragments from the sum. + */ + for (stale = &shinfo->frags[0];; stale++) { + int stale_size = skb_frag_size(stale); + + sum += skb_frag_size(frag++); + + /* The stale fragment may present us with a smaller + * descriptor than the actual fragment size. To account + * for that we need to remove all the data on the front and + * figure out what the remainder would be in the last + * descriptor associated with the fragment. + */ + if (stale_size > IDPF_TX_MAX_DESC_DATA) { + int align_pad = -(skb_frag_off(stale)) & + (IDPF_TX_MAX_READ_REQ_SIZE - 1); + + sum -= align_pad; + stale_size -= align_pad; + + do { + sum -= IDPF_TX_MAX_DESC_DATA_ALIGNED; + stale_size -= IDPF_TX_MAX_DESC_DATA_ALIGNED; + } while (stale_size > IDPF_TX_MAX_DESC_DATA); + } + + /* if sum is negative we failed to make sufficient progress */ + if (sum < 0) + return true; + + if (!nr_frags--) + break; + + sum -= stale_size; + } + + return false; +} + +/** * idpf_features_check - Validate packet conforms to limits * @skb: skb buffer * @netdev: This port's netdev @@ -2292,12 +2411,15 @@ static netdev_features_t idpf_features_check(struct sk_buff *skb, if (skb->ip_summed != CHECKSUM_PARTIAL) return features; - /* We cannot support GSO if the MSS is going to be less than - * 88 bytes. If it is then we need to drop support for GSO. - */ - if (skb_is_gso(skb) && - (skb_shinfo(skb)->gso_size < IDPF_TX_TSO_MIN_MSS)) - features &= ~NETIF_F_GSO_MASK; + if (skb_is_gso(skb)) { + /* We cannot support GSO if the MSS is going to be less than + * 88 bytes. If it is then we need to drop support for GSO. + */ + if (skb_shinfo(skb)->gso_size < IDPF_TX_TSO_MIN_MSS) + features &= ~NETIF_F_GSO_MASK; + else if (idpf_chk_tso_segment(skb, np->tx_max_bufs)) + features &= ~NETIF_F_GSO_MASK; + } /* Ensure MACLEN is <= 126 bytes (63 words) and not an odd size */ len = skb_network_offset(skb); @@ -2344,6 +2466,7 @@ static int idpf_set_mac(struct net_device *netdev, void *p) struct idpf_netdev_priv *np = netdev_priv(netdev); struct idpf_vport_config *vport_config; struct sockaddr *addr = p; + u8 old_mac_addr[ETH_ALEN]; struct idpf_vport *vport; int err = 0; @@ -2367,17 +2490,19 @@ static int idpf_set_mac(struct net_device *netdev, void *p) if (ether_addr_equal(netdev->dev_addr, addr->sa_data)) goto unlock_mutex; + ether_addr_copy(old_mac_addr, vport->default_mac_addr); + ether_addr_copy(vport->default_mac_addr, addr->sa_data); vport_config = vport->adapter->vport_config[vport->idx]; err = idpf_add_mac_filter(vport, np, addr->sa_data, false); if (err) { __idpf_del_mac_filter(vport_config, addr->sa_data); + ether_addr_copy(vport->default_mac_addr, netdev->dev_addr); goto unlock_mutex; } - if (is_valid_ether_addr(vport->default_mac_addr)) - idpf_del_mac_filter(vport, np, vport->default_mac_addr, false); + if (is_valid_ether_addr(old_mac_addr)) + __idpf_del_mac_filter(vport_config, old_mac_addr); - ether_addr_copy(vport->default_mac_addr, addr->sa_data); eth_hw_addr_set(netdev, addr->sa_data); unlock_mutex: @@ -2492,4 +2617,7 @@ static const struct net_device_ops idpf_netdev_ops = { .ndo_tx_timeout = idpf_tx_timeout, .ndo_hwtstamp_get = idpf_hwtstamp_get, .ndo_hwtstamp_set = idpf_hwtstamp_set, + .ndo_bpf = idpf_xdp, + .ndo_xdp_xmit = idpf_xdp_xmit, + .ndo_xsk_wakeup = idpf_xsk_wakeup, }; diff --git a/drivers/net/ethernet/intel/idpf/idpf_main.c b/drivers/net/ethernet/intel/idpf/idpf_main.c index dfe9126f1f4a..8c46481d2e1f 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_main.c +++ b/drivers/net/ethernet/intel/idpf/idpf_main.c @@ -9,6 +9,7 @@ MODULE_DESCRIPTION(DRV_SUMMARY); MODULE_IMPORT_NS("LIBETH"); +MODULE_IMPORT_NS("LIBETH_XDP"); MODULE_LICENSE("GPL"); /** diff --git a/drivers/net/ethernet/intel/idpf/idpf_ptp.c b/drivers/net/ethernet/intel/idpf/idpf_ptp.c index ee21f2ff0cad..142823af1f9e 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_ptp.c +++ b/drivers/net/ethernet/intel/idpf/idpf_ptp.c @@ -618,8 +618,13 @@ u64 idpf_ptp_extend_ts(struct idpf_vport *vport, u64 in_tstamp) discard_time = ptp->cached_phc_jiffies + 2 * HZ; - if (time_is_before_jiffies(discard_time)) + if (time_is_before_jiffies(discard_time)) { + u64_stats_update_begin(&vport->tstamp_stats.stats_sync); + u64_stats_inc(&vport->tstamp_stats.discarded); + u64_stats_update_end(&vport->tstamp_stats.stats_sync); + return 0; + } return idpf_ptp_tstamp_extend_32b_to_64b(ptp->cached_phc_time, lower_32_bits(in_tstamp)); @@ -853,10 +858,14 @@ static void idpf_ptp_release_vport_tstamp(struct idpf_vport *vport) /* Remove list with latches in use */ head = &vport->tx_tstamp_caps->latches_in_use; + u64_stats_update_begin(&vport->tstamp_stats.stats_sync); list_for_each_entry_safe(ptp_tx_tstamp, tmp, head, list_member) { + u64_stats_inc(&vport->tstamp_stats.flushed); + list_del(&ptp_tx_tstamp->list_member); kfree(ptp_tx_tstamp); } + u64_stats_update_end(&vport->tstamp_stats.stats_sync); spin_unlock_bh(&vport->tx_tstamp_caps->latches_lock); diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c index 555879b1248d..61e613066140 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c @@ -1,8 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2023 Intel Corporation */ -#include <net/libeth/rx.h> -#include <net/libeth/tx.h> +#include <net/libeth/xdp.h> #include "idpf.h" @@ -180,6 +179,58 @@ static int idpf_tx_singleq_csum(struct sk_buff *skb, } /** + * idpf_tx_singleq_dma_map_error - handle TX DMA map errors + * @txq: queue to send buffer on + * @skb: send buffer + * @first: original first buffer info buffer for packet + * @idx: starting point on ring to unwind + */ +static void idpf_tx_singleq_dma_map_error(struct idpf_tx_queue *txq, + struct sk_buff *skb, + struct idpf_tx_buf *first, u16 idx) +{ + struct libeth_sq_napi_stats ss = { }; + struct libeth_cq_pp cp = { + .dev = txq->dev, + .ss = &ss, + }; + + u64_stats_update_begin(&txq->stats_sync); + u64_stats_inc(&txq->q_stats.dma_map_errs); + u64_stats_update_end(&txq->stats_sync); + + /* clear dma mappings for failed tx_buf map */ + for (;;) { + struct idpf_tx_buf *tx_buf; + + tx_buf = &txq->tx_buf[idx]; + libeth_tx_complete(tx_buf, &cp); + if (tx_buf == first) + break; + if (idx == 0) + idx = txq->desc_count; + idx--; + } + + if (skb_is_gso(skb)) { + union idpf_tx_flex_desc *tx_desc; + + /* If we failed a DMA mapping for a TSO packet, we will have + * used one additional descriptor for a context + * descriptor. Reset that here. + */ + tx_desc = &txq->flex_tx[idx]; + memset(tx_desc, 0, sizeof(*tx_desc)); + if (idx == 0) + idx = txq->desc_count; + idx--; + } + + /* Update tail in case netdev_xmit_more was previously true */ + idpf_tx_buf_hw_update(txq, idx, false); +} + +/** * idpf_tx_singleq_map - Build the Tx base descriptor * @tx_q: queue to send buffer on * @first: first buffer info buffer to use @@ -219,8 +270,9 @@ static void idpf_tx_singleq_map(struct idpf_tx_queue *tx_q, for (frag = &skb_shinfo(skb)->frags[0];; frag++) { unsigned int max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; - if (dma_mapping_error(tx_q->dev, dma)) - return idpf_tx_dma_map_error(tx_q, skb, first, i); + if (unlikely(dma_mapping_error(tx_q->dev, dma))) + return idpf_tx_singleq_dma_map_error(tx_q, skb, + first, i); /* record length, and DMA address */ dma_unmap_len_set(tx_buf, len, size); @@ -362,11 +414,11 @@ netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, { struct idpf_tx_offload_params offload = { }; struct idpf_tx_buf *first; + u32 count, buf_count = 1; int csum, tso, needed; - unsigned int count; __be16 protocol; - count = idpf_tx_desc_count_required(tx_q, skb); + count = idpf_tx_res_count_required(tx_q, skb, &buf_count); if (unlikely(!count)) return idpf_tx_drop_skb(tx_q, skb); @@ -602,7 +654,7 @@ static void idpf_rx_singleq_csum(struct idpf_rx_queue *rxq, bool ipv4, ipv6; /* check if Rx checksum is enabled */ - if (!libeth_rx_pt_has_checksum(rxq->netdev, decoded)) + if (!libeth_rx_pt_has_checksum(rxq->xdp_rxq.dev, decoded)) return; /* check if HW has decoded the packet and checksum */ @@ -741,7 +793,7 @@ static void idpf_rx_singleq_base_hash(struct idpf_rx_queue *rx_q, { u64 mask, qw1; - if (!libeth_rx_pt_has_hash(rx_q->netdev, decoded)) + if (!libeth_rx_pt_has_hash(rx_q->xdp_rxq.dev, decoded)) return; mask = VIRTCHNL2_RX_BASE_DESC_FLTSTAT_RSS_HASH_M; @@ -769,7 +821,7 @@ static void idpf_rx_singleq_flex_hash(struct idpf_rx_queue *rx_q, const union virtchnl2_rx_desc *rx_desc, struct libeth_rx_pt decoded) { - if (!libeth_rx_pt_has_hash(rx_q->netdev, decoded)) + if (!libeth_rx_pt_has_hash(rx_q->xdp_rxq.dev, decoded)) return; if (FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_RSS_VALID_M, @@ -781,7 +833,7 @@ static void idpf_rx_singleq_flex_hash(struct idpf_rx_queue *rx_q, } /** - * idpf_rx_singleq_process_skb_fields - Populate skb header fields from Rx + * __idpf_rx_singleq_process_skb_fields - Populate skb header fields from Rx * descriptor * @rx_q: Rx ring being processed * @skb: pointer to current skb being populated @@ -793,17 +845,14 @@ static void idpf_rx_singleq_flex_hash(struct idpf_rx_queue *rx_q, * other fields within the skb. */ static void -idpf_rx_singleq_process_skb_fields(struct idpf_rx_queue *rx_q, - struct sk_buff *skb, - const union virtchnl2_rx_desc *rx_desc, - u16 ptype) +__idpf_rx_singleq_process_skb_fields(struct idpf_rx_queue *rx_q, + struct sk_buff *skb, + const union virtchnl2_rx_desc *rx_desc, + u16 ptype) { struct libeth_rx_pt decoded = rx_q->rx_ptype_lkup[ptype]; struct libeth_rx_csum csum_bits; - /* modifies the skb - consumes the enet header */ - skb->protocol = eth_type_trans(skb, rx_q->netdev); - /* Check if we're using base mode descriptor IDs */ if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) { idpf_rx_singleq_base_hash(rx_q, skb, rx_desc, decoded); @@ -814,7 +863,6 @@ idpf_rx_singleq_process_skb_fields(struct idpf_rx_queue *rx_q, } idpf_rx_singleq_csum(rx_q, skb, csum_bits, decoded); - skb_record_rx_queue(skb, rx_q->idx); } /** @@ -950,6 +998,32 @@ idpf_rx_singleq_extract_fields(const struct idpf_rx_queue *rx_q, idpf_rx_singleq_extract_flex_fields(rx_desc, fields); } +static bool +idpf_rx_singleq_process_skb_fields(struct sk_buff *skb, + const struct libeth_xdp_buff *xdp, + struct libeth_rq_napi_stats *rs) +{ + struct libeth_rqe_info fields; + struct idpf_rx_queue *rxq; + + rxq = libeth_xdp_buff_to_rq(xdp, typeof(*rxq), xdp_rxq); + + idpf_rx_singleq_extract_fields(rxq, xdp->desc, &fields); + __idpf_rx_singleq_process_skb_fields(rxq, skb, xdp->desc, + fields.ptype); + + return true; +} + +static void idpf_xdp_run_pass(struct libeth_xdp_buff *xdp, + struct napi_struct *napi, + struct libeth_rq_napi_stats *rs, + const union virtchnl2_rx_desc *desc) +{ + libeth_xdp_run_pass(xdp, NULL, napi, rs, desc, NULL, + idpf_rx_singleq_process_skb_fields); +} + /** * idpf_rx_singleq_clean - Reclaim resources after receive completes * @rx_q: rx queue to clean @@ -959,14 +1033,15 @@ idpf_rx_singleq_extract_fields(const struct idpf_rx_queue *rx_q, */ static int idpf_rx_singleq_clean(struct idpf_rx_queue *rx_q, int budget) { - unsigned int total_rx_bytes = 0, total_rx_pkts = 0; - struct sk_buff *skb = rx_q->skb; + struct libeth_rq_napi_stats rs = { }; u16 ntc = rx_q->next_to_clean; + LIBETH_XDP_ONSTACK_BUFF(xdp); u16 cleaned_count = 0; - bool failure = false; + + libeth_xdp_init_buff(xdp, &rx_q->xdp, &rx_q->xdp_rxq); /* Process Rx packets bounded by budget */ - while (likely(total_rx_pkts < (unsigned int)budget)) { + while (likely(rs.packets < budget)) { struct libeth_rqe_info fields = { }; union virtchnl2_rx_desc *rx_desc; struct idpf_rx_buf *rx_buf; @@ -993,73 +1068,41 @@ static int idpf_rx_singleq_clean(struct idpf_rx_queue *rx_q, int budget) idpf_rx_singleq_extract_fields(rx_q, rx_desc, &fields); rx_buf = &rx_q->rx_buf[ntc]; - if (!libeth_rx_sync_for_cpu(rx_buf, fields.len)) - goto skip_data; - - if (skb) - idpf_rx_add_frag(rx_buf, skb, fields.len); - else - skb = idpf_rx_build_skb(rx_buf, fields.len); - - /* exit if we failed to retrieve a buffer */ - if (!skb) - break; - -skip_data: + libeth_xdp_process_buff(xdp, rx_buf, fields.len); rx_buf->netmem = 0; IDPF_SINGLEQ_BUMP_RING_IDX(rx_q, ntc); cleaned_count++; /* skip if it is non EOP desc */ - if (idpf_rx_singleq_is_non_eop(rx_desc) || unlikely(!skb)) + if (idpf_rx_singleq_is_non_eop(rx_desc) || + unlikely(!xdp->data)) continue; #define IDPF_RXD_ERR_S FIELD_PREP(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, \ VIRTCHNL2_RX_BASE_DESC_ERROR_RXE_M) if (unlikely(idpf_rx_singleq_test_staterr(rx_desc, IDPF_RXD_ERR_S))) { - dev_kfree_skb_any(skb); - skb = NULL; + libeth_xdp_return_buff_slow(xdp); continue; } - /* pad skb if needed (to make valid ethernet frame) */ - if (eth_skb_pad(skb)) { - skb = NULL; - continue; - } - - /* probably a little skewed due to removing CRC */ - total_rx_bytes += skb->len; - - /* protocol */ - idpf_rx_singleq_process_skb_fields(rx_q, skb, rx_desc, - fields.ptype); - - /* send completed skb up the stack */ - napi_gro_receive(rx_q->pp->p.napi, skb); - skb = NULL; - - /* update budget accounting */ - total_rx_pkts++; + idpf_xdp_run_pass(xdp, rx_q->pp->p.napi, &rs, rx_desc); } - rx_q->skb = skb; - rx_q->next_to_clean = ntc; + libeth_xdp_save_buff(&rx_q->xdp, xdp); page_pool_nid_changed(rx_q->pp, numa_mem_id()); if (cleaned_count) - failure = idpf_rx_singleq_buf_hw_alloc_all(rx_q, cleaned_count); + idpf_rx_singleq_buf_hw_alloc_all(rx_q, cleaned_count); u64_stats_update_begin(&rx_q->stats_sync); - u64_stats_add(&rx_q->q_stats.packets, total_rx_pkts); - u64_stats_add(&rx_q->q_stats.bytes, total_rx_bytes); + u64_stats_add(&rx_q->q_stats.packets, rs.packets); + u64_stats_add(&rx_q->q_stats.bytes, rs.bytes); u64_stats_update_end(&rx_q->stats_sync); - /* guarantee a trip back through this routine if there was a failure */ - return failure ? budget : (int)total_rx_pkts; + return rs.packets; } /** diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 66a1b040639d..828f7c444d30 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -1,52 +1,36 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2023 Intel Corporation */ -#include <net/libeth/rx.h> -#include <net/libeth/tx.h> - #include "idpf.h" #include "idpf_ptp.h" #include "idpf_virtchnl.h" +#include "xdp.h" +#include "xsk.h" -struct idpf_tx_stash { - struct hlist_node hlist; - struct libeth_sqe buf; -}; - -#define idpf_tx_buf_compl_tag(buf) (*(u32 *)&(buf)->priv) +#define idpf_tx_buf_next(buf) (*(u32 *)&(buf)->priv) LIBETH_SQE_CHECK_PRIV(u32); -static bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, - unsigned int count); - /** - * idpf_buf_lifo_push - push a buffer pointer onto stack - * @stack: pointer to stack struct - * @buf: pointer to buf to push + * idpf_chk_linearize - Check if skb exceeds max descriptors per packet + * @skb: send buffer + * @max_bufs: maximum scatter gather buffers for single packet + * @count: number of buffers this packet needs * - * Returns 0 on success, negative on failure - **/ -static int idpf_buf_lifo_push(struct idpf_buf_lifo *stack, - struct idpf_tx_stash *buf) + * Make sure we don't exceed maximum scatter gather buffers for a single + * packet. + * TSO case has been handled earlier from idpf_features_check(). + */ +static bool idpf_chk_linearize(const struct sk_buff *skb, + unsigned int max_bufs, + unsigned int count) { - if (unlikely(stack->top == stack->size)) - return -ENOSPC; - - stack->bufs[stack->top++] = buf; - - return 0; -} + if (likely(count <= max_bufs)) + return false; -/** - * idpf_buf_lifo_pop - pop a buffer pointer from stack - * @stack: pointer to stack struct - **/ -static struct idpf_tx_stash *idpf_buf_lifo_pop(struct idpf_buf_lifo *stack) -{ - if (unlikely(!stack->top)) - return NULL; + if (skb_is_gso(skb)) + return false; - return stack->bufs[--stack->top]; + return true; } /** @@ -70,59 +54,42 @@ void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue) } } -/** - * idpf_tx_buf_rel_all - Free any empty Tx buffers - * @txq: queue to be cleaned - */ -static void idpf_tx_buf_rel_all(struct idpf_tx_queue *txq) +static void idpf_tx_buf_clean(struct idpf_tx_queue *txq) { struct libeth_sq_napi_stats ss = { }; - struct idpf_buf_lifo *buf_stack; - struct idpf_tx_stash *stash; + struct xdp_frame_bulk bq; struct libeth_cq_pp cp = { .dev = txq->dev, + .bq = &bq, .ss = &ss, }; - struct hlist_node *tmp; - u32 i, tag; - /* Buffers already cleared, nothing to do */ - if (!txq->tx_buf) - return; + xdp_frame_bulk_init(&bq); /* Free all the Tx buffer sk_buffs */ - for (i = 0; i < txq->desc_count; i++) - libeth_tx_complete(&txq->tx_buf[i], &cp); - - kfree(txq->tx_buf); - txq->tx_buf = NULL; + for (u32 i = 0; i < txq->buf_pool_size; i++) + libeth_tx_complete_any(&txq->tx_buf[i], &cp); - if (!idpf_queue_has(FLOW_SCH_EN, txq)) - return; + xdp_flush_frame_bulk(&bq); +} - buf_stack = &txq->stash->buf_stack; - if (!buf_stack->bufs) +/** + * idpf_tx_buf_rel_all - Free any empty Tx buffers + * @txq: queue to be cleaned + */ +static void idpf_tx_buf_rel_all(struct idpf_tx_queue *txq) +{ + /* Buffers already cleared, nothing to do */ + if (!txq->tx_buf) return; - /* - * If a Tx timeout occurred, there are potentially still bufs in the - * hash table, free them here. - */ - hash_for_each_safe(txq->stash->sched_buf_hash, tag, tmp, stash, - hlist) { - if (!stash) - continue; - - libeth_tx_complete(&stash->buf, &cp); - hash_del(&stash->hlist); - idpf_buf_lifo_push(buf_stack, stash); - } - - for (i = 0; i < buf_stack->size; i++) - kfree(buf_stack->bufs[i]); + if (idpf_queue_has(XSK, txq)) + idpf_xsksq_clean(txq); + else + idpf_tx_buf_clean(txq); - kfree(buf_stack->bufs); - buf_stack->bufs = NULL; + kfree(txq->tx_buf); + txq->tx_buf = NULL; } /** @@ -133,12 +100,24 @@ static void idpf_tx_buf_rel_all(struct idpf_tx_queue *txq) */ static void idpf_tx_desc_rel(struct idpf_tx_queue *txq) { + bool xdp = idpf_queue_has(XDP, txq); + + if (xdp) + libeth_xdpsq_deinit_timer(txq->timer); + idpf_tx_buf_rel_all(txq); - netdev_tx_reset_subqueue(txq->netdev, txq->idx); + + if (!xdp) + netdev_tx_reset_subqueue(txq->netdev, txq->idx); + + idpf_xsk_clear_queue(txq, VIRTCHNL2_QUEUE_TYPE_TX); if (!txq->desc_ring) return; + if (!xdp && txq->refillq) + kfree(txq->refillq->ring); + dmam_free_coherent(txq->dev, txq->size, txq->desc_ring, txq->dma); txq->desc_ring = NULL; txq->next_to_use = 0; @@ -153,12 +132,14 @@ static void idpf_tx_desc_rel(struct idpf_tx_queue *txq) */ static void idpf_compl_desc_rel(struct idpf_compl_queue *complq) { + idpf_xsk_clear_queue(complq, VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION); + if (!complq->comp) return; dma_free_coherent(complq->netdev->dev.parent, complq->size, - complq->comp, complq->dma); - complq->comp = NULL; + complq->desc_ring, complq->dma); + complq->desc_ring = NULL; complq->next_to_use = 0; complq->next_to_clean = 0; } @@ -195,41 +176,18 @@ static void idpf_tx_desc_rel_all(struct idpf_vport *vport) */ static int idpf_tx_buf_alloc_all(struct idpf_tx_queue *tx_q) { - struct idpf_buf_lifo *buf_stack; - int buf_size; - int i; - /* Allocate book keeping buffers only. Buffers to be supplied to HW * are allocated by kernel network stack and received as part of skb */ - buf_size = sizeof(struct idpf_tx_buf) * tx_q->desc_count; - tx_q->tx_buf = kzalloc(buf_size, GFP_KERNEL); + if (idpf_queue_has(FLOW_SCH_EN, tx_q)) + tx_q->buf_pool_size = U16_MAX; + else + tx_q->buf_pool_size = tx_q->desc_count; + tx_q->tx_buf = kcalloc(tx_q->buf_pool_size, sizeof(*tx_q->tx_buf), + GFP_KERNEL); if (!tx_q->tx_buf) return -ENOMEM; - if (!idpf_queue_has(FLOW_SCH_EN, tx_q)) - return 0; - - buf_stack = &tx_q->stash->buf_stack; - - /* Initialize tx buf stack for out-of-order completions if - * flow scheduling offload is enabled - */ - buf_stack->bufs = kcalloc(tx_q->desc_count, sizeof(*buf_stack->bufs), - GFP_KERNEL); - if (!buf_stack->bufs) - return -ENOMEM; - - buf_stack->size = tx_q->desc_count; - buf_stack->top = tx_q->desc_count; - - for (i = 0; i < tx_q->desc_count; i++) { - buf_stack->bufs[i] = kzalloc(sizeof(*buf_stack->bufs[i]), - GFP_KERNEL); - if (!buf_stack->bufs[i]) - return -ENOMEM; - } - return 0; } @@ -244,6 +202,7 @@ static int idpf_tx_desc_alloc(const struct idpf_vport *vport, struct idpf_tx_queue *tx_q) { struct device *dev = tx_q->dev; + struct idpf_sw_queue *refillq; int err; err = idpf_tx_buf_alloc_all(tx_q); @@ -267,6 +226,33 @@ static int idpf_tx_desc_alloc(const struct idpf_vport *vport, tx_q->next_to_clean = 0; idpf_queue_set(GEN_CHK, tx_q); + idpf_xsk_setup_queue(vport, tx_q, VIRTCHNL2_QUEUE_TYPE_TX); + + if (!idpf_queue_has(FLOW_SCH_EN, tx_q)) + return 0; + + refillq = tx_q->refillq; + refillq->desc_count = tx_q->buf_pool_size; + refillq->ring = kcalloc(refillq->desc_count, sizeof(u32), + GFP_KERNEL); + if (!refillq->ring) { + err = -ENOMEM; + goto err_alloc; + } + + for (unsigned int i = 0; i < refillq->desc_count; i++) + refillq->ring[i] = + FIELD_PREP(IDPF_RFL_BI_BUFID_M, i) | + FIELD_PREP(IDPF_RFL_BI_GEN_M, + idpf_queue_has(GEN_CHK, refillq)); + + /* Go ahead and flip the GEN bit since this counts as filling + * up the ring, i.e. we already ring wrapped. + */ + idpf_queue_change(GEN_CHK, refillq); + + tx_q->last_re = tx_q->desc_count - IDPF_TX_SPLITQ_RE_MIN_GAP; + return 0; err_alloc: @@ -285,18 +271,25 @@ err_alloc: static int idpf_compl_desc_alloc(const struct idpf_vport *vport, struct idpf_compl_queue *complq) { - complq->size = array_size(complq->desc_count, sizeof(*complq->comp)); + u32 desc_size; - complq->comp = dma_alloc_coherent(complq->netdev->dev.parent, - complq->size, &complq->dma, - GFP_KERNEL); - if (!complq->comp) + desc_size = idpf_queue_has(FLOW_SCH_EN, complq) ? + sizeof(*complq->comp) : sizeof(*complq->comp_4b); + complq->size = array_size(complq->desc_count, desc_size); + + complq->desc_ring = dma_alloc_coherent(complq->netdev->dev.parent, + complq->size, &complq->dma, + GFP_KERNEL); + if (!complq->desc_ring) return -ENOMEM; complq->next_to_use = 0; complq->next_to_clean = 0; idpf_queue_set(GEN_CHK, complq); + idpf_xsk_setup_queue(vport, complq, + VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION); + return 0; } @@ -317,8 +310,6 @@ static int idpf_tx_desc_alloc_all(struct idpf_vport *vport) for (i = 0; i < vport->num_txq_grp; i++) { for (j = 0; j < vport->txq_grps[i].num_txq; j++) { struct idpf_tx_queue *txq = vport->txq_grps[i].txqs[j]; - u8 gen_bits = 0; - u16 bufidx_mask; err = idpf_tx_desc_alloc(vport, txq); if (err) { @@ -327,34 +318,6 @@ static int idpf_tx_desc_alloc_all(struct idpf_vport *vport) i); goto err_out; } - - if (!idpf_is_queue_model_split(vport->txq_model)) - continue; - - txq->compl_tag_cur_gen = 0; - - /* Determine the number of bits in the bufid - * mask and add one to get the start of the - * generation bits - */ - bufidx_mask = txq->desc_count - 1; - while (bufidx_mask >> 1) { - txq->compl_tag_gen_s++; - bufidx_mask = bufidx_mask >> 1; - } - txq->compl_tag_gen_s++; - - gen_bits = IDPF_TX_SPLITQ_COMPL_TAG_WIDTH - - txq->compl_tag_gen_s; - txq->compl_tag_gen_max = GETMAXVAL(gen_bits); - - /* Set bufid mask based on location of first - * gen bit; it cannot simply be the descriptor - * ring size-1 since we can have size values - * where not all of those bits are set. - */ - txq->compl_tag_bufid_m = - GETMAXVAL(txq->compl_tag_gen_s); } if (!idpf_is_queue_model_split(vport->txq_model)) @@ -426,6 +389,11 @@ static void idpf_rx_buf_rel_bufq(struct idpf_buf_queue *bufq) if (!bufq->buf) return; + if (idpf_queue_has(XSK, bufq)) { + idpf_xskfq_rel(bufq); + return; + } + /* Free all the bufs allocated and given to hw on Rx queue */ for (u32 i = 0; i < bufq->desc_count; i++) idpf_rx_page_rel(&bufq->buf[i]); @@ -474,14 +442,14 @@ static void idpf_rx_desc_rel(struct idpf_rx_queue *rxq, struct device *dev, if (!rxq) return; - if (rxq->skb) { - dev_kfree_skb_any(rxq->skb); - rxq->skb = NULL; - } + if (!idpf_queue_has(XSK, rxq)) + libeth_xdp_return_stash(&rxq->xdp); if (!idpf_is_queue_model_split(model)) idpf_rx_buf_rel_all(rxq); + idpf_xsk_clear_queue(rxq, VIRTCHNL2_QUEUE_TYPE_RX); + rxq->next_to_alloc = 0; rxq->next_to_clean = 0; rxq->next_to_use = 0; @@ -504,6 +472,7 @@ static void idpf_rx_desc_rel_bufq(struct idpf_buf_queue *bufq, return; idpf_rx_buf_rel_bufq(bufq); + idpf_xsk_clear_queue(bufq, VIRTCHNL2_QUEUE_TYPE_RX_BUFFER); bufq->next_to_alloc = 0; bufq->next_to_clean = 0; @@ -586,6 +555,7 @@ static int idpf_rx_hdr_buf_alloc_all(struct idpf_buf_queue *bufq) struct libeth_fq fq = { .count = bufq->desc_count, .type = LIBETH_FQE_HDR, + .xdp = idpf_xdp_enabled(bufq->q_vector->vport), .nid = idpf_q_vector_to_mem(bufq->q_vector), }; int ret; @@ -603,18 +573,18 @@ static int idpf_rx_hdr_buf_alloc_all(struct idpf_buf_queue *bufq) } /** - * idpf_rx_post_buf_refill - Post buffer id to refill queue + * idpf_post_buf_refill - Post buffer id to refill queue * @refillq: refill queue to post to * @buf_id: buffer id to post */ -static void idpf_rx_post_buf_refill(struct idpf_sw_queue *refillq, u16 buf_id) +static void idpf_post_buf_refill(struct idpf_sw_queue *refillq, u16 buf_id) { u32 nta = refillq->next_to_use; /* store the buffer ID and the SW maintained GEN bit to the refillq */ refillq->ring[nta] = - FIELD_PREP(IDPF_RX_BI_BUFID_M, buf_id) | - FIELD_PREP(IDPF_RX_BI_GEN_M, + FIELD_PREP(IDPF_RFL_BI_BUFID_M, buf_id) | + FIELD_PREP(IDPF_RFL_BI_GEN_M, idpf_queue_has(GEN_CHK, refillq)); if (unlikely(++nta == refillq->desc_count)) { @@ -785,10 +755,14 @@ static int idpf_rx_bufs_init(struct idpf_buf_queue *bufq, .count = bufq->desc_count, .type = type, .hsplit = idpf_queue_has(HSPLIT_EN, bufq), + .xdp = idpf_xdp_enabled(bufq->q_vector->vport), .nid = idpf_q_vector_to_mem(bufq->q_vector), }; int ret; + if (idpf_queue_has(XSK, bufq)) + return idpf_xskfq_init(bufq); + ret = libeth_rx_fq_create(&fq, &bufq->q_vector->napi); if (ret) return ret; @@ -812,6 +786,8 @@ int idpf_rx_bufs_init_all(struct idpf_vport *vport) bool split = idpf_is_queue_model_split(vport->rxq_model); int i, j, err; + idpf_xdp_copy_prog_to_rqs(vport, vport->xdp_prog); + for (i = 0; i < vport->num_rxq_grp; i++) { struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; u32 truesize = 0; @@ -882,6 +858,8 @@ static int idpf_rx_desc_alloc(const struct idpf_vport *vport, rxq->next_to_use = 0; idpf_queue_set(GEN_CHK, rxq); + idpf_xsk_setup_queue(vport, rxq, VIRTCHNL2_QUEUE_TYPE_RX); + return 0; } @@ -907,9 +885,10 @@ static int idpf_bufq_desc_alloc(const struct idpf_vport *vport, bufq->next_to_alloc = 0; bufq->next_to_clean = 0; bufq->next_to_use = 0; - idpf_queue_set(GEN_CHK, bufq); + idpf_xsk_setup_queue(vport, bufq, VIRTCHNL2_QUEUE_TYPE_RX_BUFFER); + return 0; } @@ -975,6 +954,341 @@ err_out: return err; } +static int idpf_init_queue_set(const struct idpf_queue_set *qs) +{ + const struct idpf_vport *vport = qs->vport; + bool splitq; + int err; + + splitq = idpf_is_queue_model_split(vport->rxq_model); + + for (u32 i = 0; i < qs->num; i++) { + const struct idpf_queue_ptr *q = &qs->qs[i]; + struct idpf_buf_queue *bufq; + + switch (q->type) { + case VIRTCHNL2_QUEUE_TYPE_RX: + err = idpf_rx_desc_alloc(vport, q->rxq); + if (err) + break; + + err = idpf_xdp_rxq_info_init(q->rxq); + if (err) + break; + + if (!splitq) + err = idpf_rx_bufs_init_singleq(q->rxq); + + break; + case VIRTCHNL2_QUEUE_TYPE_RX_BUFFER: + bufq = q->bufq; + + err = idpf_bufq_desc_alloc(vport, bufq); + if (err) + break; + + for (u32 j = 0; j < bufq->q_vector->num_bufq; j++) { + struct idpf_buf_queue * const *bufqs; + enum libeth_fqe_type type; + u32 ts; + + bufqs = bufq->q_vector->bufq; + if (bufqs[j] != bufq) + continue; + + if (j) { + type = LIBETH_FQE_SHORT; + ts = bufqs[j - 1]->truesize >> 1; + } else { + type = LIBETH_FQE_MTU; + ts = 0; + } + + bufq->truesize = ts; + + err = idpf_rx_bufs_init(bufq, type); + break; + } + + break; + case VIRTCHNL2_QUEUE_TYPE_TX: + err = idpf_tx_desc_alloc(vport, q->txq); + break; + case VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION: + err = idpf_compl_desc_alloc(vport, q->complq); + break; + default: + continue; + } + + if (err) + return err; + } + + return 0; +} + +static void idpf_clean_queue_set(const struct idpf_queue_set *qs) +{ + const struct idpf_vport *vport = qs->vport; + struct device *dev = vport->netdev->dev.parent; + + for (u32 i = 0; i < qs->num; i++) { + const struct idpf_queue_ptr *q = &qs->qs[i]; + + switch (q->type) { + case VIRTCHNL2_QUEUE_TYPE_RX: + idpf_xdp_rxq_info_deinit(q->rxq, vport->rxq_model); + idpf_rx_desc_rel(q->rxq, dev, vport->rxq_model); + break; + case VIRTCHNL2_QUEUE_TYPE_RX_BUFFER: + idpf_rx_desc_rel_bufq(q->bufq, dev); + break; + case VIRTCHNL2_QUEUE_TYPE_TX: + idpf_tx_desc_rel(q->txq); + + if (idpf_queue_has(XDP, q->txq)) { + q->txq->pending = 0; + q->txq->xdp_tx = 0; + } else { + q->txq->txq_grp->num_completions_pending = 0; + } + + writel(q->txq->next_to_use, q->txq->tail); + break; + case VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION: + idpf_compl_desc_rel(q->complq); + q->complq->num_completions = 0; + break; + default: + break; + } + } +} + +static void idpf_qvec_ena_irq(struct idpf_q_vector *qv) +{ + if (qv->num_txq) { + u32 itr; + + if (IDPF_ITR_IS_DYNAMIC(qv->tx_intr_mode)) + itr = qv->vport->tx_itr_profile[qv->tx_dim.profile_ix]; + else + itr = qv->tx_itr_value; + + idpf_vport_intr_write_itr(qv, itr, true); + } + + if (qv->num_rxq) { + u32 itr; + + if (IDPF_ITR_IS_DYNAMIC(qv->rx_intr_mode)) + itr = qv->vport->rx_itr_profile[qv->rx_dim.profile_ix]; + else + itr = qv->rx_itr_value; + + idpf_vport_intr_write_itr(qv, itr, false); + } + + if (qv->num_txq || qv->num_rxq) + idpf_vport_intr_update_itr_ena_irq(qv); +} + +/** + * idpf_vector_to_queue_set - create a queue set associated with the given + * queue vector + * @qv: queue vector corresponding to the queue pair + * + * Returns a pointer to a dynamically allocated array of pointers to all + * queues associated with a given queue vector (@qv). + * Please note that the caller is responsible to free the memory allocated + * by this function using kfree(). + * + * Return: &idpf_queue_set on success, %NULL in case of error. + */ +static struct idpf_queue_set * +idpf_vector_to_queue_set(struct idpf_q_vector *qv) +{ + bool xdp = qv->vport->xdp_txq_offset && !qv->num_xsksq; + struct idpf_vport *vport = qv->vport; + struct idpf_queue_set *qs; + u32 num; + + num = qv->num_rxq + qv->num_bufq + qv->num_txq + qv->num_complq; + num += xdp ? qv->num_rxq * 2 : qv->num_xsksq * 2; + if (!num) + return NULL; + + qs = idpf_alloc_queue_set(vport, num); + if (!qs) + return NULL; + + num = 0; + + for (u32 i = 0; i < qv->num_bufq; i++) { + qs->qs[num].type = VIRTCHNL2_QUEUE_TYPE_RX_BUFFER; + qs->qs[num++].bufq = qv->bufq[i]; + } + + for (u32 i = 0; i < qv->num_rxq; i++) { + qs->qs[num].type = VIRTCHNL2_QUEUE_TYPE_RX; + qs->qs[num++].rxq = qv->rx[i]; + } + + for (u32 i = 0; i < qv->num_txq; i++) { + qs->qs[num].type = VIRTCHNL2_QUEUE_TYPE_TX; + qs->qs[num++].txq = qv->tx[i]; + } + + for (u32 i = 0; i < qv->num_complq; i++) { + qs->qs[num].type = VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION; + qs->qs[num++].complq = qv->complq[i]; + } + + if (!vport->xdp_txq_offset) + goto finalize; + + if (xdp) { + for (u32 i = 0; i < qv->num_rxq; i++) { + u32 idx = vport->xdp_txq_offset + qv->rx[i]->idx; + + qs->qs[num].type = VIRTCHNL2_QUEUE_TYPE_TX; + qs->qs[num++].txq = vport->txqs[idx]; + + qs->qs[num].type = VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION; + qs->qs[num++].complq = vport->txqs[idx]->complq; + } + } else { + for (u32 i = 0; i < qv->num_xsksq; i++) { + qs->qs[num].type = VIRTCHNL2_QUEUE_TYPE_TX; + qs->qs[num++].txq = qv->xsksq[i]; + + qs->qs[num].type = VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION; + qs->qs[num++].complq = qv->xsksq[i]->complq; + } + } + +finalize: + if (num != qs->num) { + kfree(qs); + return NULL; + } + + return qs; +} + +static int idpf_qp_enable(const struct idpf_queue_set *qs, u32 qid) +{ + struct idpf_vport *vport = qs->vport; + struct idpf_q_vector *q_vector; + int err; + + q_vector = idpf_find_rxq_vec(vport, qid); + + err = idpf_init_queue_set(qs); + if (err) { + netdev_err(vport->netdev, "Could not initialize queues in pair %u: %pe\n", + qid, ERR_PTR(err)); + return err; + } + + if (!vport->xdp_txq_offset) + goto config; + + q_vector->xsksq = kcalloc(DIV_ROUND_UP(vport->num_rxq_grp, + vport->num_q_vectors), + sizeof(*q_vector->xsksq), GFP_KERNEL); + if (!q_vector->xsksq) + return -ENOMEM; + + for (u32 i = 0; i < qs->num; i++) { + const struct idpf_queue_ptr *q = &qs->qs[i]; + + if (q->type != VIRTCHNL2_QUEUE_TYPE_TX) + continue; + + if (!idpf_queue_has(XSK, q->txq)) + continue; + + idpf_xsk_init_wakeup(q_vector); + + q->txq->q_vector = q_vector; + q_vector->xsksq[q_vector->num_xsksq++] = q->txq; + } + +config: + err = idpf_send_config_queue_set_msg(qs); + if (err) { + netdev_err(vport->netdev, "Could not configure queues in pair %u: %pe\n", + qid, ERR_PTR(err)); + return err; + } + + err = idpf_send_enable_queue_set_msg(qs); + if (err) { + netdev_err(vport->netdev, "Could not enable queues in pair %u: %pe\n", + qid, ERR_PTR(err)); + return err; + } + + napi_enable(&q_vector->napi); + idpf_qvec_ena_irq(q_vector); + + netif_start_subqueue(vport->netdev, qid); + + return 0; +} + +static int idpf_qp_disable(const struct idpf_queue_set *qs, u32 qid) +{ + struct idpf_vport *vport = qs->vport; + struct idpf_q_vector *q_vector; + int err; + + q_vector = idpf_find_rxq_vec(vport, qid); + netif_stop_subqueue(vport->netdev, qid); + + writel(0, q_vector->intr_reg.dyn_ctl); + napi_disable(&q_vector->napi); + + err = idpf_send_disable_queue_set_msg(qs); + if (err) { + netdev_err(vport->netdev, "Could not disable queues in pair %u: %pe\n", + qid, ERR_PTR(err)); + return err; + } + + idpf_clean_queue_set(qs); + + kfree(q_vector->xsksq); + q_vector->num_xsksq = 0; + + return 0; +} + +/** + * idpf_qp_switch - enable or disable queues associated with queue pair + * @vport: vport to switch the pair for + * @qid: index of the queue pair to switch + * @en: whether to enable or disable the pair + * + * Return: 0 on success, -errno on failure. + */ +int idpf_qp_switch(struct idpf_vport *vport, u32 qid, bool en) +{ + struct idpf_q_vector *q_vector = idpf_find_rxq_vec(vport, qid); + struct idpf_queue_set *qs __free(kfree) = NULL; + + if (idpf_find_txq_vec(vport, qid) != q_vector) + return -EINVAL; + + qs = idpf_vector_to_queue_set(q_vector); + if (!qs) + return -ENOMEM; + + return en ? idpf_qp_enable(qs, qid) : idpf_qp_disable(qs, qid); +} + /** * idpf_txq_group_rel - Release all resources for txq groups * @vport: vport to release txq groups on @@ -995,6 +1309,11 @@ static void idpf_txq_group_rel(struct idpf_vport *vport) struct idpf_txq_group *txq_grp = &vport->txq_grps[i]; for (j = 0; j < txq_grp->num_txq; j++) { + if (flow_sch_en) { + kfree(txq_grp->txqs[j]->refillq); + txq_grp->txqs[j]->refillq = NULL; + } + kfree(txq_grp->txqs[j]); txq_grp->txqs[j] = NULL; } @@ -1004,9 +1323,6 @@ static void idpf_txq_group_rel(struct idpf_vport *vport) kfree(txq_grp->complq); txq_grp->complq = NULL; - - if (flow_sch_en) - kfree(txq_grp->stashes); } kfree(vport->txq_grps); vport->txq_grps = NULL; @@ -1088,8 +1404,12 @@ static void idpf_vport_queue_grp_rel_all(struct idpf_vport *vport) */ void idpf_vport_queues_rel(struct idpf_vport *vport) { + idpf_xdp_copy_prog_to_rqs(vport, NULL); + idpf_tx_desc_rel_all(vport); idpf_rx_desc_rel_all(vport); + + idpf_xdpsqs_put(vport); idpf_vport_queue_grp_rel_all(vport); kfree(vport->txqs); @@ -1163,6 +1483,18 @@ void idpf_vport_init_num_qs(struct idpf_vport *vport, if (idpf_is_queue_model_split(vport->rxq_model)) vport->num_bufq = le16_to_cpu(vport_msg->num_rx_bufq); + vport->xdp_prog = config_data->xdp_prog; + if (idpf_xdp_enabled(vport)) { + vport->xdp_txq_offset = config_data->num_req_tx_qs; + vport->num_xdp_txq = le16_to_cpu(vport_msg->num_tx_q) - + vport->xdp_txq_offset; + vport->xdpsq_share = libeth_xdpsq_shared(vport->num_xdp_txq); + } else { + vport->xdp_txq_offset = 0; + vport->num_xdp_txq = 0; + vport->xdpsq_share = false; + } + /* Adjust number of buffer queues per Rx queue group. */ if (!idpf_is_queue_model_split(vport->rxq_model)) { vport->num_bufqs_per_qgrp = 0; @@ -1234,22 +1566,17 @@ int idpf_vport_calc_total_qs(struct idpf_adapter *adapter, u16 vport_idx, int dflt_splitq_txq_grps = 0, dflt_singleq_txqs = 0; int dflt_splitq_rxq_grps = 0, dflt_singleq_rxqs = 0; u16 num_req_tx_qs = 0, num_req_rx_qs = 0; + struct idpf_vport_user_config_data *user; struct idpf_vport_config *vport_config; u16 num_txq_grps, num_rxq_grps; - u32 num_qs; + u32 num_qs, num_xdpsq; vport_config = adapter->vport_config[vport_idx]; if (vport_config) { num_req_tx_qs = vport_config->user_config.num_req_tx_qs; num_req_rx_qs = vport_config->user_config.num_req_rx_qs; } else { - int num_cpus; - - /* Restrict num of queues to cpus online as a default - * configuration to give best performance. User can always - * override to a max number of queues via ethtool. - */ - num_cpus = num_online_cpus(); + u32 num_cpus = netif_get_num_default_rss_queues(); dflt_splitq_txq_grps = min_t(int, max_q->max_txq, num_cpus); dflt_singleq_txqs = min_t(int, max_q->max_txq, num_cpus); @@ -1284,6 +1611,24 @@ int idpf_vport_calc_total_qs(struct idpf_adapter *adapter, u16 vport_idx, vport_msg->num_rx_bufq = 0; } + if (!vport_config) + return 0; + + user = &vport_config->user_config; + user->num_req_rx_qs = le16_to_cpu(vport_msg->num_rx_q); + user->num_req_tx_qs = le16_to_cpu(vport_msg->num_tx_q); + + if (vport_config->user_config.xdp_prog) + num_xdpsq = libeth_xdpsq_num(user->num_req_rx_qs, + user->num_req_tx_qs, + vport_config->max_q.max_txq); + else + num_xdpsq = 0; + + vport_msg->num_tx_q = cpu_to_le16(user->num_req_tx_qs + num_xdpsq); + if (idpf_is_queue_model_split(le16_to_cpu(vport_msg->txq_model))) + vport_msg->num_tx_complq = vport_msg->num_tx_q; + return 0; } @@ -1333,14 +1678,13 @@ static void idpf_vport_calc_numq_per_grp(struct idpf_vport *vport, static void idpf_rxq_set_descids(const struct idpf_vport *vport, struct idpf_rx_queue *q) { - if (idpf_is_queue_model_split(vport->rxq_model)) { - q->rxdids = VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M; - } else { - if (vport->base_rxd) - q->rxdids = VIRTCHNL2_RXDID_1_32B_BASE_M; - else - q->rxdids = VIRTCHNL2_RXDID_2_FLEX_SQ_NIC_M; - } + if (idpf_is_queue_model_split(vport->rxq_model)) + return; + + if (vport->base_rxd) + q->rxdids = VIRTCHNL2_RXDID_1_32B_BASE_M; + else + q->rxdids = VIRTCHNL2_RXDID_2_FLEX_SQ_NIC_M; } /** @@ -1367,7 +1711,6 @@ static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq) for (i = 0; i < vport->num_txq_grp; i++) { struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; struct idpf_adapter *adapter = vport->adapter; - struct idpf_txq_stash *stashes; int j; tx_qgrp->vport = vport; @@ -1380,15 +1723,6 @@ static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq) goto err_alloc; } - if (split && flow_sch_en) { - stashes = kcalloc(num_txq, sizeof(*stashes), - GFP_KERNEL); - if (!stashes) - goto err_alloc; - - tx_qgrp->stashes = stashes; - } - for (j = 0; j < tx_qgrp->num_txq; j++) { struct idpf_tx_queue *q = tx_qgrp->txqs[j]; @@ -1398,6 +1732,7 @@ static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq) q->tx_min_pkt_len = idpf_get_min_tx_pkt_len(adapter); q->netdev = vport->netdev; q->txq_grp = tx_qgrp; + q->rel_q_id = j; if (!split) { q->clean_budget = vport->compln_clean_budget; @@ -1408,12 +1743,14 @@ static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq) if (!flow_sch_en) continue; - if (split) { - q->stash = &stashes[j]; - hash_init(q->stash->sched_buf_hash); - } - idpf_queue_set(FLOW_SCH_EN, q); + + q->refillq = kzalloc(sizeof(*q->refillq), GFP_KERNEL); + if (!q->refillq) + goto err_alloc; + + idpf_queue_set(GEN_CHK, q->refillq); + idpf_queue_set(RFL_GEN_CHK, q->refillq); } if (!split) @@ -1556,7 +1893,6 @@ skip_splitq_rx_init: setup_rxq: q->desc_count = vport->rxq_desc_count; q->rx_ptype_lkup = vport->rx_ptype_lkup; - q->netdev = vport->netdev; q->bufq_sets = rx_qgrp->splitq.bufq_sets; q->idx = (i * num_rxq) + j; q->rx_buffer_low_watermark = IDPF_LOW_WATERMARK; @@ -1617,15 +1953,19 @@ int idpf_vport_queues_alloc(struct idpf_vport *vport) if (err) goto err_out; - err = idpf_tx_desc_alloc_all(vport); + err = idpf_vport_init_fast_path_txqs(vport); if (err) goto err_out; - err = idpf_rx_desc_alloc_all(vport); + err = idpf_xdpsqs_get(vport); if (err) goto err_out; - err = idpf_vport_init_fast_path_txqs(vport); + err = idpf_tx_desc_alloc_all(vport); + if (err) + goto err_out; + + err = idpf_rx_desc_alloc_all(vport); if (err) goto err_out; @@ -1638,32 +1978,6 @@ err_out: } /** - * idpf_tx_handle_sw_marker - Handle queue marker packet - * @tx_q: tx queue to handle software marker - */ -static void idpf_tx_handle_sw_marker(struct idpf_tx_queue *tx_q) -{ - struct idpf_netdev_priv *priv = netdev_priv(tx_q->netdev); - struct idpf_vport *vport = priv->vport; - int i; - - idpf_queue_clear(SW_MARKER, tx_q); - /* Hardware must write marker packets to all queues associated with - * completion queues. So check if all queues received marker packets - */ - for (i = 0; i < vport->num_txq; i++) - /* If we're still waiting on any other TXQ marker completions, - * just return now since we cannot wake up the marker_wq yet. - */ - if (idpf_queue_has(SW_MARKER, vport->txqs[i])) - return; - - /* Drain complete */ - set_bit(IDPF_VPORT_SW_MARKER, vport->flags); - wake_up(&vport->sw_marker_wq); -} - -/** * idpf_tx_read_tstamp - schedule a work to read Tx timestamp value * @txq: queue to read the timestamp from * @skb: socket buffer to provide Tx timestamp value @@ -1697,87 +2011,6 @@ static void idpf_tx_read_tstamp(struct idpf_tx_queue *txq, struct sk_buff *skb) spin_unlock_bh(&tx_tstamp_caps->status_lock); } -/** - * idpf_tx_clean_stashed_bufs - clean bufs that were stored for - * out of order completions - * @txq: queue to clean - * @compl_tag: completion tag of packet to clean (from completion descriptor) - * @cleaned: pointer to stats struct to track cleaned packets/bytes - * @budget: Used to determine if we are in netpoll - */ -static void idpf_tx_clean_stashed_bufs(struct idpf_tx_queue *txq, - u16 compl_tag, - struct libeth_sq_napi_stats *cleaned, - int budget) -{ - struct idpf_tx_stash *stash; - struct hlist_node *tmp_buf; - struct libeth_cq_pp cp = { - .dev = txq->dev, - .ss = cleaned, - .napi = budget, - }; - - /* Buffer completion */ - hash_for_each_possible_safe(txq->stash->sched_buf_hash, stash, tmp_buf, - hlist, compl_tag) { - if (unlikely(idpf_tx_buf_compl_tag(&stash->buf) != compl_tag)) - continue; - - hash_del(&stash->hlist); - - if (stash->buf.type == LIBETH_SQE_SKB && - (skb_shinfo(stash->buf.skb)->tx_flags & SKBTX_IN_PROGRESS)) - idpf_tx_read_tstamp(txq, stash->buf.skb); - - libeth_tx_complete(&stash->buf, &cp); - - /* Push shadow buf back onto stack */ - idpf_buf_lifo_push(&txq->stash->buf_stack, stash); - } -} - -/** - * idpf_stash_flow_sch_buffers - store buffer parameters info to be freed at a - * later time (only relevant for flow scheduling mode) - * @txq: Tx queue to clean - * @tx_buf: buffer to store - */ -static int idpf_stash_flow_sch_buffers(struct idpf_tx_queue *txq, - struct idpf_tx_buf *tx_buf) -{ - struct idpf_tx_stash *stash; - - if (unlikely(tx_buf->type <= LIBETH_SQE_CTX)) - return 0; - - stash = idpf_buf_lifo_pop(&txq->stash->buf_stack); - if (unlikely(!stash)) { - net_err_ratelimited("%s: No out-of-order TX buffers left!\n", - netdev_name(txq->netdev)); - - return -ENOMEM; - } - - /* Store buffer params in shadow buffer */ - stash->buf.skb = tx_buf->skb; - stash->buf.bytes = tx_buf->bytes; - stash->buf.packets = tx_buf->packets; - stash->buf.type = tx_buf->type; - stash->buf.nr_frags = tx_buf->nr_frags; - dma_unmap_addr_set(&stash->buf, dma, dma_unmap_addr(tx_buf, dma)); - dma_unmap_len_set(&stash->buf, len, dma_unmap_len(tx_buf, len)); - idpf_tx_buf_compl_tag(&stash->buf) = idpf_tx_buf_compl_tag(tx_buf); - - /* Add buffer to buf_hash table to be freed later */ - hash_add(txq->stash->sched_buf_hash, &stash->hlist, - idpf_tx_buf_compl_tag(&stash->buf)); - - tx_buf->type = LIBETH_SQE_EMPTY; - - return 0; -} - #define idpf_tx_splitq_clean_bump_ntc(txq, ntc, desc, buf) \ do { \ if (unlikely(++(ntc) == (txq)->desc_count)) { \ @@ -1805,14 +2038,8 @@ do { \ * Separate packet completion events will be reported on the completion queue, * and the buffers will be cleaned separately. The stats are not updated from * this function when using flow-based scheduling. - * - * Furthermore, in flow scheduling mode, check to make sure there are enough - * reserve buffers to stash the packet. If there are not, return early, which - * will leave next_to_clean pointing to the packet that failed to be stashed. - * - * Return: false in the scenario above, true otherwise. */ -static bool idpf_tx_splitq_clean(struct idpf_tx_queue *tx_q, u16 end, +static void idpf_tx_splitq_clean(struct idpf_tx_queue *tx_q, u16 end, int napi_budget, struct libeth_sq_napi_stats *cleaned, bool descs_only) @@ -1826,7 +2053,12 @@ static bool idpf_tx_splitq_clean(struct idpf_tx_queue *tx_q, u16 end, .napi = napi_budget, }; struct idpf_tx_buf *tx_buf; - bool clean_complete = true; + + if (descs_only) { + /* Bump ring index to mark as cleaned. */ + tx_q->next_to_clean = end; + return; + } tx_desc = &tx_q->flex_tx[ntc]; next_pending_desc = &tx_q->flex_tx[end]; @@ -1846,136 +2078,61 @@ static bool idpf_tx_splitq_clean(struct idpf_tx_queue *tx_q, u16 end, break; eop_idx = tx_buf->rs_idx; + libeth_tx_complete(tx_buf, &cp); - if (descs_only) { - if (IDPF_TX_BUF_RSV_UNUSED(tx_q) < tx_buf->nr_frags) { - clean_complete = false; - goto tx_splitq_clean_out; - } - - idpf_stash_flow_sch_buffers(tx_q, tx_buf); + /* unmap remaining buffers */ + while (ntc != eop_idx) { + idpf_tx_splitq_clean_bump_ntc(tx_q, ntc, + tx_desc, tx_buf); - while (ntc != eop_idx) { - idpf_tx_splitq_clean_bump_ntc(tx_q, ntc, - tx_desc, tx_buf); - idpf_stash_flow_sch_buffers(tx_q, tx_buf); - } - } else { + /* unmap any remaining paged data */ libeth_tx_complete(tx_buf, &cp); - - /* unmap remaining buffers */ - while (ntc != eop_idx) { - idpf_tx_splitq_clean_bump_ntc(tx_q, ntc, - tx_desc, tx_buf); - - /* unmap any remaining paged data */ - libeth_tx_complete(tx_buf, &cp); - } } fetch_next_txq_desc: idpf_tx_splitq_clean_bump_ntc(tx_q, ntc, tx_desc, tx_buf); } -tx_splitq_clean_out: tx_q->next_to_clean = ntc; - - return clean_complete; } -#define idpf_tx_clean_buf_ring_bump_ntc(txq, ntc, buf) \ -do { \ - (buf)++; \ - (ntc)++; \ - if (unlikely((ntc) == (txq)->desc_count)) { \ - buf = (txq)->tx_buf; \ - ntc = 0; \ - } \ -} while (0) - /** - * idpf_tx_clean_buf_ring - clean flow scheduling TX queue buffers + * idpf_tx_clean_bufs - clean flow scheduling TX queue buffers * @txq: queue to clean - * @compl_tag: completion tag of packet to clean (from completion descriptor) + * @buf_id: packet's starting buffer ID, from completion descriptor * @cleaned: pointer to stats struct to track cleaned packets/bytes * @budget: Used to determine if we are in netpoll * - * Cleans all buffers associated with the input completion tag either from the - * TX buffer ring or from the hash table if the buffers were previously - * stashed. Returns the byte/segment count for the cleaned packet associated - * this completion tag. + * Clean all buffers associated with the packet starting at buf_id. Returns the + * byte/segment count for the cleaned packet. */ -static bool idpf_tx_clean_buf_ring(struct idpf_tx_queue *txq, u16 compl_tag, - struct libeth_sq_napi_stats *cleaned, - int budget) +static void idpf_tx_clean_bufs(struct idpf_tx_queue *txq, u32 buf_id, + struct libeth_sq_napi_stats *cleaned, + int budget) { - u16 idx = compl_tag & txq->compl_tag_bufid_m; struct idpf_tx_buf *tx_buf = NULL; struct libeth_cq_pp cp = { .dev = txq->dev, .ss = cleaned, .napi = budget, }; - u16 ntc, orig_idx = idx; - - tx_buf = &txq->tx_buf[idx]; - - if (unlikely(tx_buf->type <= LIBETH_SQE_CTX || - idpf_tx_buf_compl_tag(tx_buf) != compl_tag)) - return false; + tx_buf = &txq->tx_buf[buf_id]; if (tx_buf->type == LIBETH_SQE_SKB) { if (skb_shinfo(tx_buf->skb)->tx_flags & SKBTX_IN_PROGRESS) idpf_tx_read_tstamp(txq, tx_buf->skb); libeth_tx_complete(tx_buf, &cp); + idpf_post_buf_refill(txq->refillq, buf_id); } - idpf_tx_clean_buf_ring_bump_ntc(txq, idx, tx_buf); + while (idpf_tx_buf_next(tx_buf) != IDPF_TXBUF_NULL) { + buf_id = idpf_tx_buf_next(tx_buf); - while (idpf_tx_buf_compl_tag(tx_buf) == compl_tag) { + tx_buf = &txq->tx_buf[buf_id]; libeth_tx_complete(tx_buf, &cp); - idpf_tx_clean_buf_ring_bump_ntc(txq, idx, tx_buf); + idpf_post_buf_refill(txq->refillq, buf_id); } - - /* - * It's possible the packet we just cleaned was an out of order - * completion, which means we can stash the buffers starting from - * the original next_to_clean and reuse the descriptors. We need - * to compare the descriptor ring next_to_clean packet's "first" buffer - * to the "first" buffer of the packet we just cleaned to determine if - * this is the case. Howevever, next_to_clean can point to either a - * reserved buffer that corresponds to a context descriptor used for the - * next_to_clean packet (TSO packet) or the "first" buffer (single - * packet). The orig_idx from the packet we just cleaned will always - * point to the "first" buffer. If next_to_clean points to a reserved - * buffer, let's bump ntc once and start the comparison from there. - */ - ntc = txq->next_to_clean; - tx_buf = &txq->tx_buf[ntc]; - - if (tx_buf->type == LIBETH_SQE_CTX) - idpf_tx_clean_buf_ring_bump_ntc(txq, ntc, tx_buf); - - /* - * If ntc still points to a different "first" buffer, clean the - * descriptor ring and stash all of the buffers for later cleaning. If - * we cannot stash all of the buffers, next_to_clean will point to the - * "first" buffer of the packet that could not be stashed and cleaning - * will start there next time. - */ - if (unlikely(tx_buf != &txq->tx_buf[orig_idx] && - !idpf_tx_splitq_clean(txq, orig_idx, budget, cleaned, - true))) - return true; - - /* - * Otherwise, update next_to_clean to reflect the cleaning that was - * done above. - */ - txq->next_to_clean = idx; - - return true; } /** @@ -1994,22 +2151,17 @@ static void idpf_tx_handle_rs_completion(struct idpf_tx_queue *txq, struct libeth_sq_napi_stats *cleaned, int budget) { - u16 compl_tag; + /* RS completion contains queue head for queue based scheduling or + * completion tag for flow based scheduling. + */ + u16 rs_compl_val = le16_to_cpu(desc->common.q_head_compl_tag.q_head); if (!idpf_queue_has(FLOW_SCH_EN, txq)) { - u16 head = le16_to_cpu(desc->q_head_compl_tag.q_head); - - idpf_tx_splitq_clean(txq, head, budget, cleaned, false); + idpf_tx_splitq_clean(txq, rs_compl_val, budget, cleaned, false); return; } - compl_tag = le16_to_cpu(desc->q_head_compl_tag.compl_tag); - - /* If we didn't clean anything on the ring, this packet must be - * in the hash table. Go clean it there. - */ - if (!idpf_tx_clean_buf_ring(txq, compl_tag, cleaned, budget)) - idpf_tx_clean_stashed_bufs(txq, compl_tag, cleaned, budget); + idpf_tx_clean_bufs(txq, rs_compl_val, cleaned, budget); } /** @@ -2037,19 +2189,19 @@ static bool idpf_tx_clean_complq(struct idpf_compl_queue *complq, int budget, do { struct libeth_sq_napi_stats cleaned_stats = { }; struct idpf_tx_queue *tx_q; + __le16 hw_head; int rel_tx_qid; - u16 hw_head; u8 ctype; /* completion type */ u16 gen; /* if the descriptor isn't done, no work yet to do */ - gen = le16_get_bits(tx_desc->qid_comptype_gen, + gen = le16_get_bits(tx_desc->common.qid_comptype_gen, IDPF_TXD_COMPLQ_GEN_M); if (idpf_queue_has(GEN_CHK, complq) != gen) break; /* Find necessary info of TX queue to clean buffers */ - rel_tx_qid = le16_get_bits(tx_desc->qid_comptype_gen, + rel_tx_qid = le16_get_bits(tx_desc->common.qid_comptype_gen, IDPF_TXD_COMPLQ_QID_M); if (rel_tx_qid >= complq->txq_grp->num_txq || !complq->txq_grp->txqs[rel_tx_qid]) { @@ -2059,22 +2211,19 @@ static bool idpf_tx_clean_complq(struct idpf_compl_queue *complq, int budget, tx_q = complq->txq_grp->txqs[rel_tx_qid]; /* Determine completion type */ - ctype = le16_get_bits(tx_desc->qid_comptype_gen, + ctype = le16_get_bits(tx_desc->common.qid_comptype_gen, IDPF_TXD_COMPLQ_COMPL_TYPE_M); switch (ctype) { case IDPF_TXD_COMPLT_RE: - hw_head = le16_to_cpu(tx_desc->q_head_compl_tag.q_head); + hw_head = tx_desc->common.q_head_compl_tag.q_head; - idpf_tx_splitq_clean(tx_q, hw_head, budget, - &cleaned_stats, true); + idpf_tx_splitq_clean(tx_q, le16_to_cpu(hw_head), + budget, &cleaned_stats, true); break; case IDPF_TXD_COMPLT_RS: idpf_tx_handle_rs_completion(tx_q, tx_desc, &cleaned_stats, budget); break; - case IDPF_TXD_COMPLT_SW_MARKER: - idpf_tx_handle_sw_marker(tx_q); - break; default: netdev_err(tx_q->netdev, "Unknown TX completion type: %d\n", ctype); @@ -2126,8 +2275,7 @@ fetch_next_desc: /* Update BQL */ nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); - dont_wake = !complq_ok || IDPF_TX_BUF_RSV_LOW(tx_q) || - np->state != __IDPF_VPORT_UP || + dont_wake = !complq_ok || np->state != __IDPF_VPORT_UP || !netif_carrier_ok(tx_q->netdev); /* Check if the TXQ needs to and can be restarted */ __netif_txq_completed_wake(nq, tx_q->cleaned_pkts, tx_q->cleaned_bytes, @@ -2148,6 +2296,69 @@ fetch_next_desc: } /** + * idpf_wait_for_sw_marker_completion - wait for SW marker of disabled Tx queue + * @txq: disabled Tx queue + * + * When Tx queue is requested for disabling, the CP sends a special completion + * descriptor called "SW marker", meaning the queue is ready to be destroyed. + * If, for some reason, the marker is not received within 500 ms, break the + * polling to not hang the driver. + */ +void idpf_wait_for_sw_marker_completion(const struct idpf_tx_queue *txq) +{ + struct idpf_compl_queue *complq; + unsigned long timeout; + bool flow, gen_flag; + u32 ntc; + + if (!idpf_queue_has(SW_MARKER, txq)) + return; + + complq = idpf_queue_has(XDP, txq) ? txq->complq : txq->txq_grp->complq; + ntc = complq->next_to_clean; + + flow = idpf_queue_has(FLOW_SCH_EN, complq); + gen_flag = idpf_queue_has(GEN_CHK, complq); + + timeout = jiffies + msecs_to_jiffies(IDPF_WAIT_FOR_MARKER_TIMEO); + + do { + struct idpf_splitq_4b_tx_compl_desc *tx_desc; + struct idpf_tx_queue *target; + u32 ctype_gen, id; + + tx_desc = flow ? &complq->comp[ntc].common : + &complq->comp_4b[ntc]; + ctype_gen = le16_to_cpu(tx_desc->qid_comptype_gen); + + if (!!(ctype_gen & IDPF_TXD_COMPLQ_GEN_M) != gen_flag) { + usleep_range(500, 1000); + continue; + } + + if (FIELD_GET(IDPF_TXD_COMPLQ_COMPL_TYPE_M, ctype_gen) != + IDPF_TXD_COMPLT_SW_MARKER) + goto next; + + id = FIELD_GET(IDPF_TXD_COMPLQ_QID_M, ctype_gen); + target = complq->txq_grp->txqs[id]; + + idpf_queue_clear(SW_MARKER, target); + if (target == txq) + break; + +next: + if (unlikely(++ntc == complq->desc_count)) { + ntc = 0; + gen_flag = !gen_flag; + } + } while (time_before(jiffies, timeout)); + + idpf_queue_assign(GEN_CHK, complq, gen_flag); + complq->next_to_clean = ntc; +} + +/** * idpf_tx_splitq_build_ctb - populate command tag and size for queue * based scheduling descriptors * @desc: descriptor to populate @@ -2184,15 +2395,21 @@ void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc, desc->flow.qw1.compl_tag = cpu_to_le16(params->compl_tag); } -/* Global conditions to tell whether the txq (and related resources) - * has room to allow the use of "size" descriptors. +/** + * idpf_tx_splitq_has_room - check if enough Tx splitq resources are available + * @tx_q: the queue to be checked + * @descs_needed: number of descriptors required for this packet + * @bufs_needed: number of Tx buffers required for this packet + * + * Return: 0 if no room available, 1 otherwise */ -static int idpf_txq_has_room(struct idpf_tx_queue *tx_q, u32 size) +static int idpf_txq_has_room(struct idpf_tx_queue *tx_q, u32 descs_needed, + u32 bufs_needed) { - if (IDPF_DESC_UNUSED(tx_q) < size || + if (IDPF_DESC_UNUSED(tx_q) < descs_needed || IDPF_TX_COMPLQ_PENDING(tx_q->txq_grp) > IDPF_TX_COMPLQ_OVERFLOW_THRESH(tx_q->txq_grp->complq) || - IDPF_TX_BUF_RSV_LOW(tx_q)) + idpf_tx_splitq_get_free_bufs(tx_q->refillq) < bufs_needed) return 0; return 1; } @@ -2201,14 +2418,21 @@ static int idpf_txq_has_room(struct idpf_tx_queue *tx_q, u32 size) * idpf_tx_maybe_stop_splitq - 1st level check for Tx splitq stop conditions * @tx_q: the queue to be checked * @descs_needed: number of descriptors required for this packet + * @bufs_needed: number of buffers needed for this packet * - * Returns 0 if stop is not needed + * Return: 0 if stop is not needed */ static int idpf_tx_maybe_stop_splitq(struct idpf_tx_queue *tx_q, - unsigned int descs_needed) + u32 descs_needed, + u32 bufs_needed) { + /* Since we have multiple resources to check for splitq, our + * start,stop_thrs becomes a boolean check instead of a count + * threshold. + */ if (netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx, - idpf_txq_has_room(tx_q, descs_needed), + idpf_txq_has_room(tx_q, descs_needed, + bufs_needed), 1, 1)) return 0; @@ -2250,14 +2474,16 @@ void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val, } /** - * idpf_tx_desc_count_required - calculate number of Tx descriptors needed + * idpf_tx_res_count_required - get number of Tx resources needed for this pkt * @txq: queue to send buffer on * @skb: send buffer + * @bufs_needed: (output) number of buffers needed for this skb. * - * Returns number of data descriptors needed for this skb. + * Return: number of data descriptors and buffers needed for this skb. */ -unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq, - struct sk_buff *skb) +unsigned int idpf_tx_res_count_required(struct idpf_tx_queue *txq, + struct sk_buff *skb, + u32 *bufs_needed) { const struct skb_shared_info *shinfo; unsigned int count = 0, i; @@ -2268,6 +2494,7 @@ unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq, return count; shinfo = skb_shinfo(skb); + *bufs_needed += shinfo->nr_frags; for (i = 0; i < shinfo->nr_frags; i++) { unsigned int size; @@ -2297,71 +2524,89 @@ unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq, } /** - * idpf_tx_dma_map_error - handle TX DMA map errors - * @txq: queue to send buffer on - * @skb: send buffer - * @first: original first buffer info buffer for packet - * @idx: starting point on ring to unwind + * idpf_tx_splitq_bump_ntu - adjust NTU and generation + * @txq: the tx ring to wrap + * @ntu: ring index to bump */ -void idpf_tx_dma_map_error(struct idpf_tx_queue *txq, struct sk_buff *skb, - struct idpf_tx_buf *first, u16 idx) +static unsigned int idpf_tx_splitq_bump_ntu(struct idpf_tx_queue *txq, u16 ntu) { - struct libeth_sq_napi_stats ss = { }; - struct libeth_cq_pp cp = { - .dev = txq->dev, - .ss = &ss, - }; + ntu++; - u64_stats_update_begin(&txq->stats_sync); - u64_stats_inc(&txq->q_stats.dma_map_errs); - u64_stats_update_end(&txq->stats_sync); + if (ntu == txq->desc_count) + ntu = 0; - /* clear dma mappings for failed tx_buf map */ - for (;;) { - struct idpf_tx_buf *tx_buf; + return ntu; +} - tx_buf = &txq->tx_buf[idx]; - libeth_tx_complete(tx_buf, &cp); - if (tx_buf == first) - break; - if (idx == 0) - idx = txq->desc_count; - idx--; - } +/** + * idpf_tx_get_free_buf_id - get a free buffer ID from the refill queue + * @refillq: refill queue to get buffer ID from + * @buf_id: return buffer ID + * + * Return: true if a buffer ID was found, false if not + */ +static bool idpf_tx_get_free_buf_id(struct idpf_sw_queue *refillq, + u32 *buf_id) +{ + u32 ntc = refillq->next_to_clean; + u32 refill_desc; - if (skb_is_gso(skb)) { - union idpf_tx_flex_desc *tx_desc; + refill_desc = refillq->ring[ntc]; - /* If we failed a DMA mapping for a TSO packet, we will have - * used one additional descriptor for a context - * descriptor. Reset that here. - */ - tx_desc = &txq->flex_tx[idx]; - memset(tx_desc, 0, sizeof(*tx_desc)); - if (idx == 0) - idx = txq->desc_count; - idx--; + if (unlikely(idpf_queue_has(RFL_GEN_CHK, refillq) != + !!(refill_desc & IDPF_RFL_BI_GEN_M))) + return false; + + *buf_id = FIELD_GET(IDPF_RFL_BI_BUFID_M, refill_desc); + + if (unlikely(++ntc == refillq->desc_count)) { + idpf_queue_change(RFL_GEN_CHK, refillq); + ntc = 0; } - /* Update tail in case netdev_xmit_more was previously true */ - idpf_tx_buf_hw_update(txq, idx, false); + refillq->next_to_clean = ntc; + + return true; } /** - * idpf_tx_splitq_bump_ntu - adjust NTU and generation - * @txq: the tx ring to wrap - * @ntu: ring index to bump + * idpf_tx_splitq_pkt_err_unmap - Unmap buffers and bump tail in case of error + * @txq: Tx queue to unwind + * @params: pointer to splitq params struct + * @first: starting buffer for packet to unmap */ -static unsigned int idpf_tx_splitq_bump_ntu(struct idpf_tx_queue *txq, u16 ntu) +static void idpf_tx_splitq_pkt_err_unmap(struct idpf_tx_queue *txq, + struct idpf_tx_splitq_params *params, + struct idpf_tx_buf *first) { - ntu++; + struct idpf_sw_queue *refillq = txq->refillq; + struct libeth_sq_napi_stats ss = { }; + struct idpf_tx_buf *tx_buf = first; + struct libeth_cq_pp cp = { + .dev = txq->dev, + .ss = &ss, + }; - if (ntu == txq->desc_count) { - ntu = 0; - txq->compl_tag_cur_gen = IDPF_TX_ADJ_COMPL_TAG_GEN(txq); + u64_stats_update_begin(&txq->stats_sync); + u64_stats_inc(&txq->q_stats.dma_map_errs); + u64_stats_update_end(&txq->stats_sync); + + libeth_tx_complete(tx_buf, &cp); + while (idpf_tx_buf_next(tx_buf) != IDPF_TXBUF_NULL) { + tx_buf = &txq->tx_buf[idpf_tx_buf_next(tx_buf)]; + libeth_tx_complete(tx_buf, &cp); } - return ntu; + /* Update tail in case netdev_xmit_more was previously true. */ + idpf_tx_buf_hw_update(txq, params->prev_ntu, false); + + if (!refillq) + return; + + /* Restore refillq state to avoid leaking tags. */ + if (params->prev_refill_gen != idpf_queue_has(RFL_GEN_CHK, refillq)) + idpf_queue_change(RFL_GEN_CHK, refillq); + refillq->next_to_clean = params->prev_refill_ntc; } /** @@ -2385,6 +2630,7 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q, struct netdev_queue *nq; struct sk_buff *skb; skb_frag_t *frag; + u32 next_buf_id; u16 td_cmd = 0; dma_addr_t dma; @@ -2402,17 +2648,16 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q, tx_buf = first; first->nr_frags = 0; - params->compl_tag = - (tx_q->compl_tag_cur_gen << tx_q->compl_tag_gen_s) | i; - for (frag = &skb_shinfo(skb)->frags[0];; frag++) { unsigned int max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; - if (dma_mapping_error(tx_q->dev, dma)) - return idpf_tx_dma_map_error(tx_q, skb, first, i); + if (unlikely(dma_mapping_error(tx_q->dev, dma))) { + idpf_tx_buf_next(tx_buf) = IDPF_TXBUF_NULL; + return idpf_tx_splitq_pkt_err_unmap(tx_q, params, + first); + } first->nr_frags++; - idpf_tx_buf_compl_tag(tx_buf) = params->compl_tag; tx_buf->type = LIBETH_SQE_FRAG; /* record length, and DMA address */ @@ -2468,29 +2713,12 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q, max_data); if (unlikely(++i == tx_q->desc_count)) { - tx_buf = tx_q->tx_buf; tx_desc = &tx_q->flex_tx[0]; i = 0; - tx_q->compl_tag_cur_gen = - IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q); } else { - tx_buf++; tx_desc++; } - /* Since this packet has a buffer that is going to span - * multiple descriptors, it's going to leave holes in - * to the TX buffer ring. To ensure these holes do not - * cause issues in the cleaning routines, we will clear - * them of any stale data and assign them the same - * completion tag as the current packet. Then when the - * packet is being cleaned, the cleaning routines will - * simply pass over these holes and finish cleaning the - * rest of the packet. - */ - tx_buf->type = LIBETH_SQE_EMPTY; - idpf_tx_buf_compl_tag(tx_buf) = params->compl_tag; - /* Adjust the DMA offset and the remaining size of the * fragment. On the first iteration of this loop, * max_data will be >= 12K and <= 16K-1. On any @@ -2515,15 +2743,25 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q, idpf_tx_splitq_build_desc(tx_desc, params, td_cmd, size); if (unlikely(++i == tx_q->desc_count)) { - tx_buf = tx_q->tx_buf; tx_desc = &tx_q->flex_tx[0]; i = 0; - tx_q->compl_tag_cur_gen = IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q); } else { - tx_buf++; tx_desc++; } + if (idpf_queue_has(FLOW_SCH_EN, tx_q)) { + if (unlikely(!idpf_tx_get_free_buf_id(tx_q->refillq, + &next_buf_id))) { + idpf_tx_buf_next(tx_buf) = IDPF_TXBUF_NULL; + return idpf_tx_splitq_pkt_err_unmap(tx_q, params, + first); + } + } else { + next_buf_id = i; + } + idpf_tx_buf_next(tx_buf) = next_buf_id; + tx_buf = &tx_q->tx_buf[next_buf_id]; + size = skb_frag_size(frag); data_len -= size; @@ -2538,6 +2776,7 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q, /* write last descriptor with RS and EOP bits */ first->rs_idx = i; + idpf_tx_buf_next(tx_buf) = IDPF_TXBUF_NULL; td_cmd |= params->eop_cmd; idpf_tx_splitq_build_desc(tx_desc, params, td_cmd, size); i = idpf_tx_splitq_bump_ntu(tx_q, i); @@ -2627,111 +2866,6 @@ int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off) return 1; } -/** - * __idpf_chk_linearize - Check skb is not using too many buffers - * @skb: send buffer - * @max_bufs: maximum number of buffers - * - * For TSO we need to count the TSO header and segment payload separately. As - * such we need to check cases where we have max_bufs-1 fragments or more as we - * can potentially require max_bufs+1 DMA transactions, 1 for the TSO header, 1 - * for the segment payload in the first descriptor, and another max_buf-1 for - * the fragments. - */ -static bool __idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs) -{ - const struct skb_shared_info *shinfo = skb_shinfo(skb); - const skb_frag_t *frag, *stale; - int nr_frags, sum; - - /* no need to check if number of frags is less than max_bufs - 1 */ - nr_frags = shinfo->nr_frags; - if (nr_frags < (max_bufs - 1)) - return false; - - /* We need to walk through the list and validate that each group - * of max_bufs-2 fragments totals at least gso_size. - */ - nr_frags -= max_bufs - 2; - frag = &shinfo->frags[0]; - - /* Initialize size to the negative value of gso_size minus 1. We use - * this as the worst case scenario in which the frag ahead of us only - * provides one byte which is why we are limited to max_bufs-2 - * descriptors for a single transmit as the header and previous - * fragment are already consuming 2 descriptors. - */ - sum = 1 - shinfo->gso_size; - - /* Add size of frags 0 through 4 to create our initial sum */ - sum += skb_frag_size(frag++); - sum += skb_frag_size(frag++); - sum += skb_frag_size(frag++); - sum += skb_frag_size(frag++); - sum += skb_frag_size(frag++); - - /* Walk through fragments adding latest fragment, testing it, and - * then removing stale fragments from the sum. - */ - for (stale = &shinfo->frags[0];; stale++) { - int stale_size = skb_frag_size(stale); - - sum += skb_frag_size(frag++); - - /* The stale fragment may present us with a smaller - * descriptor than the actual fragment size. To account - * for that we need to remove all the data on the front and - * figure out what the remainder would be in the last - * descriptor associated with the fragment. - */ - if (stale_size > IDPF_TX_MAX_DESC_DATA) { - int align_pad = -(skb_frag_off(stale)) & - (IDPF_TX_MAX_READ_REQ_SIZE - 1); - - sum -= align_pad; - stale_size -= align_pad; - - do { - sum -= IDPF_TX_MAX_DESC_DATA_ALIGNED; - stale_size -= IDPF_TX_MAX_DESC_DATA_ALIGNED; - } while (stale_size > IDPF_TX_MAX_DESC_DATA); - } - - /* if sum is negative we failed to make sufficient progress */ - if (sum < 0) - return true; - - if (!nr_frags--) - break; - - sum -= stale_size; - } - - return false; -} - -/** - * idpf_chk_linearize - Check if skb exceeds max descriptors per packet - * @skb: send buffer - * @max_bufs: maximum scatter gather buffers for single packet - * @count: number of buffers this packet needs - * - * Make sure we don't exceed maximum scatter gather buffers for a single - * packet. We have to do some special checking around the boundary (max_bufs-1) - * if TSO is on since we need count the TSO header and payload separately. - * E.g.: a packet with 7 fragments can require 9 DMA transactions; 1 for TSO - * header, 1 for segment payload, and then 7 for the fragments. - */ -static bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, - unsigned int count) -{ - if (likely(count < max_bufs)) - return false; - if (skb_is_gso(skb)) - return __idpf_chk_linearize(skb, max_bufs); - - return count > max_bufs; -} /** * idpf_tx_splitq_get_ctx_desc - grab next desc and update buffer ring @@ -2746,8 +2880,6 @@ idpf_tx_splitq_get_ctx_desc(struct idpf_tx_queue *txq) union idpf_flex_tx_ctx_desc *desc; int i = txq->next_to_use; - txq->tx_buf[i].type = LIBETH_SQE_CTX; - /* grab the next descriptor */ desc = &txq->flex_ctx[i]; txq->next_to_use = idpf_tx_splitq_bump_ntu(txq, i); @@ -2841,6 +2973,21 @@ static void idpf_tx_set_tstamp_desc(union idpf_flex_tx_ctx_desc *ctx_desc, #endif /* CONFIG_PTP_1588_CLOCK */ /** + * idpf_tx_splitq_need_re - check whether RE bit needs to be set + * @tx_q: pointer to Tx queue + * + * Return: true if RE bit needs to be set, false otherwise + */ +static bool idpf_tx_splitq_need_re(struct idpf_tx_queue *tx_q) +{ + int gap = tx_q->next_to_use - tx_q->last_re; + + gap += (gap < 0) ? tx_q->desc_count : 0; + + return gap >= IDPF_TX_SPLITQ_RE_MIN_GAP; +} + +/** * idpf_tx_splitq_frame - Sends buffer on Tx ring using flex descriptors * @skb: send buffer * @tx_q: queue to send buffer on @@ -2850,13 +2997,16 @@ static void idpf_tx_set_tstamp_desc(union idpf_flex_tx_ctx_desc *ctx_desc, static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, struct idpf_tx_queue *tx_q) { - struct idpf_tx_splitq_params tx_params = { }; + struct idpf_tx_splitq_params tx_params = { + .prev_ntu = tx_q->next_to_use, + }; union idpf_flex_tx_ctx_desc *ctx_desc; struct idpf_tx_buf *first; - unsigned int count; + u32 count, buf_count = 1; int tso, idx; + u32 buf_id; - count = idpf_tx_desc_count_required(tx_q, skb); + count = idpf_tx_res_count_required(tx_q, skb, &buf_count); if (unlikely(!count)) return idpf_tx_drop_skb(tx_q, skb); @@ -2866,7 +3016,7 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, /* Check for splitq specific TX resources */ count += (IDPF_TX_DESCS_PER_CACHE_LINE + tso); - if (idpf_tx_maybe_stop_splitq(tx_q, count)) { + if (idpf_tx_maybe_stop_splitq(tx_q, count, buf_count)) { idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); return NETDEV_TX_BUSY; @@ -2898,36 +3048,47 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, idpf_tx_set_tstamp_desc(ctx_desc, idx); } - /* record the location of the first descriptor for this packet */ - first = &tx_q->tx_buf[tx_q->next_to_use]; - first->skb = skb; + if (idpf_queue_has(FLOW_SCH_EN, tx_q)) { + struct idpf_sw_queue *refillq = tx_q->refillq; - if (tso) { - first->packets = tx_params.offload.tso_segs; - first->bytes = skb->len + - ((first->packets - 1) * tx_params.offload.tso_hdr_len); - } else { - first->packets = 1; - first->bytes = max_t(unsigned int, skb->len, ETH_ZLEN); - } + /* Save refillq state in case of a packet rollback. Otherwise, + * the tags will be leaked since they will be popped from the + * refillq but never reposted during cleaning. + */ + tx_params.prev_refill_gen = + idpf_queue_has(RFL_GEN_CHK, refillq); + tx_params.prev_refill_ntc = refillq->next_to_clean; + + if (unlikely(!idpf_tx_get_free_buf_id(tx_q->refillq, + &buf_id))) { + if (tx_params.prev_refill_gen != + idpf_queue_has(RFL_GEN_CHK, refillq)) + idpf_queue_change(RFL_GEN_CHK, refillq); + refillq->next_to_clean = tx_params.prev_refill_ntc; + + tx_q->next_to_use = tx_params.prev_ntu; + return idpf_tx_drop_skb(tx_q, skb); + } + tx_params.compl_tag = buf_id; - if (idpf_queue_has(FLOW_SCH_EN, tx_q)) { tx_params.dtype = IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE; tx_params.eop_cmd = IDPF_TXD_FLEX_FLOW_CMD_EOP; - /* Set the RE bit to catch any packets that may have not been - * stashed during RS completion cleaning. MIN_GAP is set to - * MIN_RING size to ensure it will be set at least once each - * time around the ring. + /* Set the RE bit to periodically "clean" the descriptor ring. + * MIN_GAP is set to MIN_RING size to ensure it will be set at + * least once each time around the ring. */ - if (!(tx_q->next_to_use % IDPF_TX_SPLITQ_RE_MIN_GAP)) { + if (idpf_tx_splitq_need_re(tx_q)) { tx_params.eop_cmd |= IDPF_TXD_FLEX_FLOW_CMD_RE; tx_q->txq_grp->num_completions_pending++; + tx_q->last_re = tx_q->next_to_use; } if (skb->ip_summed == CHECKSUM_PARTIAL) tx_params.offload.td_cmd |= IDPF_TXD_FLEX_FLOW_CMD_CS_EN; } else { + buf_id = tx_q->next_to_use; + tx_params.dtype = IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2; tx_params.eop_cmd = IDPF_TXD_LAST_DESC_CMD; @@ -2935,6 +3096,18 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, tx_params.offload.td_cmd |= IDPF_TX_FLEX_DESC_CMD_CS_EN; } + first = &tx_q->tx_buf[buf_id]; + first->skb = skb; + + if (tso) { + first->packets = tx_params.offload.tso_segs; + first->bytes = skb->len + + ((first->packets - 1) * tx_params.offload.tso_hdr_len); + } else { + first->packets = 1; + first->bytes = max_t(unsigned int, skb->len, ETH_ZLEN); + } + idpf_tx_splitq_map(tx_q, &tx_params, first); return NETDEV_TX_OK; @@ -2949,10 +3122,11 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, */ netdev_tx_t idpf_tx_start(struct sk_buff *skb, struct net_device *netdev) { - struct idpf_vport *vport = idpf_netdev_to_vport(netdev); + const struct idpf_vport *vport = idpf_netdev_to_vport(netdev); struct idpf_tx_queue *tx_q; - if (unlikely(skb_get_queue_mapping(skb) >= vport->num_txq)) { + if (unlikely(skb_get_queue_mapping(skb) >= + vport->num_txq - vport->num_xdp_txq)) { dev_kfree_skb_any(skb); return NETDEV_TX_OK; @@ -2989,7 +3163,7 @@ idpf_rx_hash(const struct idpf_rx_queue *rxq, struct sk_buff *skb, { u32 hash; - if (!libeth_rx_pt_has_hash(rxq->netdev, decoded)) + if (!libeth_rx_pt_has_hash(rxq->xdp_rxq.dev, decoded)) return; hash = le16_to_cpu(rx_desc->hash1) | @@ -3015,7 +3189,7 @@ static void idpf_rx_csum(struct idpf_rx_queue *rxq, struct sk_buff *skb, bool ipv4, ipv6; /* check if Rx checksum is enabled */ - if (!libeth_rx_pt_has_checksum(rxq->netdev, decoded)) + if (!libeth_rx_pt_has_checksum(rxq->xdp_rxq.dev, decoded)) return; /* check if HW has decoded the packet and checksum */ @@ -3187,7 +3361,7 @@ idpf_rx_hwtstamp(const struct idpf_rx_queue *rxq, } /** - * idpf_rx_process_skb_fields - Populate skb header fields from Rx descriptor + * __idpf_rx_process_skb_fields - Populate skb header fields from Rx descriptor * @rxq: Rx descriptor ring packet is being transacted on * @skb: pointer to current skb being populated * @rx_desc: Receive descriptor @@ -3197,8 +3371,8 @@ idpf_rx_hwtstamp(const struct idpf_rx_queue *rxq, * other fields within the skb. */ static int -idpf_rx_process_skb_fields(struct idpf_rx_queue *rxq, struct sk_buff *skb, - const struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc) +__idpf_rx_process_skb_fields(struct idpf_rx_queue *rxq, struct sk_buff *skb, + const struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc) { struct libeth_rx_csum csum_bits; struct libeth_rx_pt decoded; @@ -3214,9 +3388,6 @@ idpf_rx_process_skb_fields(struct idpf_rx_queue *rxq, struct sk_buff *skb, if (idpf_queue_has(PTP, rxq)) idpf_rx_hwtstamp(rxq, rx_desc, skb); - skb->protocol = eth_type_trans(skb, rxq->netdev); - skb_record_rx_queue(skb, rxq->idx); - if (le16_get_bits(rx_desc->hdrlen_flags, VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_M)) return idpf_rx_rsc(rxq, skb, rx_desc, decoded); @@ -3227,25 +3398,24 @@ idpf_rx_process_skb_fields(struct idpf_rx_queue *rxq, struct sk_buff *skb, return 0; } -/** - * idpf_rx_add_frag - Add contents of Rx buffer to sk_buff as a frag - * @rx_buf: buffer containing page to add - * @skb: sk_buff to place the data into - * @size: packet length from rx_desc - * - * This function will add the data contained in rx_buf->page to the skb. - * It will just attach the page as a frag to the skb. - * The function will then update the page offset. - */ -void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb, - unsigned int size) +bool idpf_rx_process_skb_fields(struct sk_buff *skb, + const struct libeth_xdp_buff *xdp, + struct libeth_rq_napi_stats *rs) { - u32 hr = netmem_get_pp(rx_buf->netmem)->p.offset; + struct idpf_rx_queue *rxq; + + rxq = libeth_xdp_buff_to_rq(xdp, typeof(*rxq), xdp_rxq); - skb_add_rx_frag_netmem(skb, skb_shinfo(skb)->nr_frags, rx_buf->netmem, - rx_buf->offset + hr, size, rx_buf->truesize); + return !__idpf_rx_process_skb_fields(rxq, skb, xdp->desc); } +LIBETH_XDP_DEFINE_START(); +LIBETH_XDP_DEFINE_RUN(static idpf_xdp_run_pass, idpf_xdp_run_prog, + idpf_xdp_tx_flush_bulk, idpf_rx_process_skb_fields); +LIBETH_XDP_DEFINE_FINALIZE(static idpf_xdp_finalize_rx, idpf_xdp_tx_flush_bulk, + idpf_xdp_tx_finalize); +LIBETH_XDP_DEFINE_END(); + /** * idpf_rx_hsplit_wa - handle header buffer overflows and split errors * @hdr: Rx buffer for the headers @@ -3288,36 +3458,6 @@ static u32 idpf_rx_hsplit_wa(const struct libeth_fqe *hdr, } /** - * idpf_rx_build_skb - Allocate skb and populate it from header buffer - * @buf: Rx buffer to pull data from - * @size: the length of the packet - * - * This function allocates an skb. It then populates it with the page data from - * the current receive descriptor, taking care to set up the skb correctly. - */ -struct sk_buff *idpf_rx_build_skb(const struct libeth_fqe *buf, u32 size) -{ - struct page *buf_page = __netmem_to_page(buf->netmem); - u32 hr = pp_page_to_nmdesc(buf_page)->pp->p.offset; - struct sk_buff *skb; - void *va; - - va = page_address(buf_page) + buf->offset; - prefetch(va + hr); - - skb = napi_build_skb(va, buf->truesize); - if (unlikely(!skb)) - return NULL; - - skb_mark_for_recycle(skb); - - skb_reserve(skb, hr); - __skb_put(skb, size); - - return skb; -} - -/** * idpf_rx_splitq_test_staterr - tests bits in Rx descriptor * status and error fields * @stat_err_field: field from descriptor to test bits in @@ -3358,13 +3498,18 @@ static bool idpf_rx_splitq_is_eop(struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_de */ static int idpf_rx_splitq_clean(struct idpf_rx_queue *rxq, int budget) { - int total_rx_bytes = 0, total_rx_pkts = 0; struct idpf_buf_queue *rx_bufq = NULL; - struct sk_buff *skb = rxq->skb; + struct libeth_rq_napi_stats rs = { }; u16 ntc = rxq->next_to_clean; + LIBETH_XDP_ONSTACK_BUFF(xdp); + LIBETH_XDP_ONSTACK_BULK(bq); + + libeth_xdp_tx_init_bulk(&bq, rxq->xdp_prog, rxq->xdp_rxq.dev, + rxq->xdpsqs, rxq->num_xdp_txq); + libeth_xdp_init_buff(xdp, &rxq->xdp, &rxq->xdp_rxq); /* Process Rx packets bounded by budget */ - while (likely(total_rx_pkts < budget)) { + while (likely(rs.packets < budget)) { struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc; struct libeth_fqe *hdr, *rx_buf = NULL; struct idpf_sw_queue *refillq = NULL; @@ -3378,18 +3523,14 @@ static int idpf_rx_splitq_clean(struct idpf_rx_queue *rxq, int budget) /* get the Rx desc from Rx queue based on 'next_to_clean' */ rx_desc = &rxq->rx[ntc].flex_adv_nic_3_wb; - /* This memory barrier is needed to keep us from reading - * any other fields out of the rx_desc - */ - dma_rmb(); - /* if the descriptor isn't done, no work yet to do */ gen_id = le16_get_bits(rx_desc->pktlen_gen_bufq_id, VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_M); - if (idpf_queue_has(GEN_CHK, rxq) != gen_id) break; + dma_rmb(); + rxdid = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_RXDID_M, rx_desc->rxdid_ucast); if (rxdid != VIRTCHNL2_RXDID_2_FLEX_SPLITQ) { @@ -3434,7 +3575,7 @@ static int idpf_rx_splitq_clean(struct idpf_rx_queue *rxq, int budget) hdr = &rx_bufq->hdr_buf[buf_id]; - if (unlikely(!hdr_len && !skb)) { + if (unlikely(!hdr_len && !xdp->data)) { hdr_len = idpf_rx_hsplit_wa(hdr, rx_buf, pkt_len); /* If failed, drop both buffers by setting len to 0 */ pkt_len -= hdr_len ? : pkt_len; @@ -3444,75 +3585,37 @@ static int idpf_rx_splitq_clean(struct idpf_rx_queue *rxq, int budget) u64_stats_update_end(&rxq->stats_sync); } - if (libeth_rx_sync_for_cpu(hdr, hdr_len)) { - skb = idpf_rx_build_skb(hdr, hdr_len); - if (!skb) - break; - - u64_stats_update_begin(&rxq->stats_sync); - u64_stats_inc(&rxq->q_stats.hsplit_pkts); - u64_stats_update_end(&rxq->stats_sync); - } + if (libeth_xdp_process_buff(xdp, hdr, hdr_len)) + rs.hsplit++; hdr->netmem = 0; payload: - if (!libeth_rx_sync_for_cpu(rx_buf, pkt_len)) - goto skip_data; - - if (skb) - idpf_rx_add_frag(rx_buf, skb, pkt_len); - else - skb = idpf_rx_build_skb(rx_buf, pkt_len); - - /* exit if we failed to retrieve a buffer */ - if (!skb) - break; - -skip_data: + libeth_xdp_process_buff(xdp, rx_buf, pkt_len); rx_buf->netmem = 0; - idpf_rx_post_buf_refill(refillq, buf_id); + idpf_post_buf_refill(refillq, buf_id); IDPF_RX_BUMP_NTC(rxq, ntc); /* skip if it is non EOP desc */ - if (!idpf_rx_splitq_is_eop(rx_desc) || unlikely(!skb)) - continue; - - /* pad skb if needed (to make valid ethernet frame) */ - if (eth_skb_pad(skb)) { - skb = NULL; - continue; - } - - /* probably a little skewed due to removing CRC */ - total_rx_bytes += skb->len; - - /* protocol */ - if (unlikely(idpf_rx_process_skb_fields(rxq, skb, rx_desc))) { - dev_kfree_skb_any(skb); - skb = NULL; + if (!idpf_rx_splitq_is_eop(rx_desc) || unlikely(!xdp->data)) continue; - } - - /* send completed skb up the stack */ - napi_gro_receive(rxq->napi, skb); - skb = NULL; - /* update budget accounting */ - total_rx_pkts++; + idpf_xdp_run_pass(xdp, &bq, rxq->napi, &rs, rx_desc); } + idpf_xdp_finalize_rx(&bq); + rxq->next_to_clean = ntc; + libeth_xdp_save_buff(&rxq->xdp, xdp); - rxq->skb = skb; u64_stats_update_begin(&rxq->stats_sync); - u64_stats_add(&rxq->q_stats.packets, total_rx_pkts); - u64_stats_add(&rxq->q_stats.bytes, total_rx_bytes); + u64_stats_add(&rxq->q_stats.packets, rs.packets); + u64_stats_add(&rxq->q_stats.bytes, rs.bytes); + u64_stats_add(&rxq->q_stats.hsplit_pkts, rs.hsplit); u64_stats_update_end(&rxq->stats_sync); - /* guarantee a trip back through this routine if there was a failure */ - return total_rx_pkts; + return rs.packets; } /** @@ -3580,10 +3683,10 @@ static void idpf_rx_clean_refillq(struct idpf_buf_queue *bufq, bool failure; if (idpf_queue_has(RFL_GEN_CHK, refillq) != - !!(refill_desc & IDPF_RX_BI_GEN_M)) + !!(refill_desc & IDPF_RFL_BI_GEN_M)) break; - buf_id = FIELD_GET(IDPF_RX_BI_BUFID_M, refill_desc); + buf_id = FIELD_GET(IDPF_RFL_BI_BUFID_M, refill_desc); failure = idpf_rx_update_bufq_desc(bufq, buf_id, buf_desc); if (failure) break; @@ -3655,7 +3758,7 @@ static irqreturn_t idpf_vport_intr_clean_queues(int __always_unused irq, struct idpf_q_vector *q_vector = (struct idpf_q_vector *)data; q_vector->total_events++; - napi_schedule(&q_vector->napi); + napi_schedule_irqoff(&q_vector->napi); return IRQ_HANDLED; } @@ -3696,6 +3799,8 @@ void idpf_vport_intr_rel(struct idpf_vport *vport) for (u32 v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) { struct idpf_q_vector *q_vector = &vport->q_vectors[v_idx]; + kfree(q_vector->xsksq); + q_vector->xsksq = NULL; kfree(q_vector->complq); q_vector->complq = NULL; kfree(q_vector->bufq); @@ -3710,6 +3815,20 @@ void idpf_vport_intr_rel(struct idpf_vport *vport) vport->q_vectors = NULL; } +static void idpf_q_vector_set_napi(struct idpf_q_vector *q_vector, bool link) +{ + struct napi_struct *napi = link ? &q_vector->napi : NULL; + struct net_device *dev = q_vector->vport->netdev; + + for (u32 i = 0; i < q_vector->num_rxq; i++) + netif_queue_set_napi(dev, q_vector->rx[i]->idx, + NETDEV_QUEUE_TYPE_RX, napi); + + for (u32 i = 0; i < q_vector->num_txq; i++) + netif_queue_set_napi(dev, q_vector->tx[i]->idx, + NETDEV_QUEUE_TYPE_TX, napi); +} + /** * idpf_vport_intr_rel_irq - Free the IRQ association with the OS * @vport: main vport structure @@ -3730,6 +3849,7 @@ static void idpf_vport_intr_rel_irq(struct idpf_vport *vport) vidx = vport->q_vector_idxs[vector]; irq_num = adapter->msix_entries[vidx].vector; + idpf_q_vector_set_napi(q_vector, false); kfree(free_irq(irq_num, q_vector)); } } @@ -3743,6 +3863,8 @@ static void idpf_vport_intr_dis_irq_all(struct idpf_vport *vport) struct idpf_q_vector *q_vector = vport->q_vectors; int q_idx; + writel(0, vport->noirq_dyn_ctl); + for (q_idx = 0; q_idx < vport->num_q_vectors; q_idx++) writel(0, q_vector[q_idx].intr_reg.dyn_ctl); } @@ -3917,6 +4039,8 @@ static int idpf_vport_intr_req_irq(struct idpf_vport *vport) "Request_irq failed, error: %d\n", err); goto free_q_irqs; } + + idpf_q_vector_set_napi(q_vector, true); } return 0; @@ -3984,6 +4108,8 @@ static void idpf_vport_intr_ena_irq_all(struct idpf_vport *vport) if (qv->num_txq || qv->num_rxq) idpf_vport_intr_update_itr_ena_irq(qv); } + + writel(vport->noirq_dyn_ctl_ena, vport->noirq_dyn_ctl); } /** @@ -4133,7 +4259,9 @@ static bool idpf_rx_splitq_clean_all(struct idpf_q_vector *q_vec, int budget, struct idpf_rx_queue *rxq = q_vec->rx[i]; int pkts_cleaned_per_q; - pkts_cleaned_per_q = idpf_rx_splitq_clean(rxq, budget_per_q); + pkts_cleaned_per_q = idpf_queue_has(XSK, rxq) ? + idpf_xskrq_poll(rxq, budget_per_q) : + idpf_rx_splitq_clean(rxq, budget_per_q); /* if we clean as many as budgeted, we must not be done */ if (pkts_cleaned_per_q >= budget_per_q) clean_complete = false; @@ -4143,8 +4271,10 @@ static bool idpf_rx_splitq_clean_all(struct idpf_q_vector *q_vec, int budget, nid = numa_mem_id(); - for (i = 0; i < q_vec->num_bufq; i++) - idpf_rx_clean_refillq_all(q_vec->bufq[i], nid); + for (i = 0; i < q_vec->num_bufq; i++) { + if (!idpf_queue_has(XSK, q_vec->bufq[i])) + idpf_rx_clean_refillq_all(q_vec->bufq[i], nid); + } return clean_complete; } @@ -4158,7 +4288,7 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget) { struct idpf_q_vector *q_vector = container_of(napi, struct idpf_q_vector, napi); - bool clean_complete; + bool clean_complete = true; int work_done = 0; /* Handle case where we are called by netpoll with a budget of 0 */ @@ -4168,8 +4298,13 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget) return 0; } - clean_complete = idpf_rx_splitq_clean_all(q_vector, budget, &work_done); - clean_complete &= idpf_tx_splitq_clean_all(q_vector, budget, &work_done); + for (u32 i = 0; i < q_vector->num_xsksq; i++) + clean_complete &= idpf_xsk_xmit(q_vector->xsksq[i]); + + clean_complete &= idpf_tx_splitq_clean_all(q_vector, budget, + &work_done); + clean_complete &= idpf_rx_splitq_clean_all(q_vector, budget, + &work_done); /* If work not completed, return budget and polling will return */ if (!clean_complete) { @@ -4177,20 +4312,12 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget) return budget; } - /* Switch to poll mode in the tear-down path after sending disable - * queues virtchnl message, as the interrupts will be disabled after - * that. - */ - if (unlikely(q_vector->num_txq && idpf_queue_has(POLL_MODE, - q_vector->tx[0]))) - return budget; - work_done = min_t(int, work_done, budget - 1); /* Exit the polling mode, but don't re-enable interrupts if stack might * poll us due to busy-polling */ - if (likely(napi_complete_done(napi, work_done))) + if (napi_complete_done(napi, work_done)) idpf_vport_intr_update_itr_ena_irq(q_vector); else idpf_vport_intr_set_wb_on_itr(q_vector); @@ -4206,8 +4333,8 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget) */ static void idpf_vport_intr_map_vector_to_qs(struct idpf_vport *vport) { + u16 num_txq_grp = vport->num_txq_grp - vport->num_xdp_txq; bool split = idpf_is_queue_model_split(vport->rxq_model); - u16 num_txq_grp = vport->num_txq_grp; struct idpf_rxq_group *rx_qgrp; struct idpf_txq_group *tx_qgrp; u32 i, qv_idx, q_index; @@ -4283,6 +4410,21 @@ static void idpf_vport_intr_map_vector_to_qs(struct idpf_vport *vport) qv_idx++; } + + for (i = 0; i < vport->num_xdp_txq; i++) { + struct idpf_tx_queue *xdpsq; + struct idpf_q_vector *qv; + + xdpsq = vport->txqs[vport->xdp_txq_offset + i]; + if (!idpf_queue_has(XSK, xdpsq)) + continue; + + qv = idpf_find_rxq_vec(vport, i); + idpf_xsk_init_wakeup(qv); + + xdpsq->q_vector = qv; + qv->xsksq[qv->num_xsksq++] = xdpsq; + } } /** @@ -4303,6 +4445,8 @@ static int idpf_vport_intr_init_vec_idx(struct idpf_vport *vport) for (i = 0; i < vport->num_q_vectors; i++) vport->q_vectors[i].v_idx = vport->q_vector_idxs[i]; + vport->noirq_v_idx = vport->q_vector_idxs[i]; + return 0; } @@ -4316,6 +4460,8 @@ static int idpf_vport_intr_init_vec_idx(struct idpf_vport *vport) for (i = 0; i < vport->num_q_vectors; i++) vport->q_vectors[i].v_idx = vecids[vport->q_vector_idxs[i]]; + vport->noirq_v_idx = vecids[vport->q_vector_idxs[i]]; + kfree(vecids); return 0; @@ -4416,6 +4562,15 @@ int idpf_vport_intr_alloc(struct idpf_vport *vport) GFP_KERNEL); if (!q_vector->complq) goto error; + + if (!vport->xdp_txq_offset) + continue; + + q_vector->xsksq = kcalloc(rxqs_per_vector, + sizeof(*q_vector->xsksq), + GFP_KERNEL); + if (!q_vector->xsksq) + goto error; } return 0; diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index 281de655a813..75b977094741 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -7,8 +7,10 @@ #include <linux/dim.h> #include <net/libeth/cache.h> -#include <net/tcp.h> +#include <net/libeth/types.h> #include <net/netdev_queues.h> +#include <net/tcp.h> +#include <net/xdp.h> #include "idpf_lan_txrx.h" #include "virtchnl2_lan_desc.h" @@ -58,6 +60,8 @@ #define IDPF_MBX_Q_VEC 1 #define IDPF_MIN_Q_VEC 1 #define IDPF_MIN_RDMA_VEC 2 +/* Data vector for NOIRQ queues */ +#define IDPF_RESERVED_VECS 1 #define IDPF_DFLT_TX_Q_DESC_COUNT 512 #define IDPF_DFLT_TX_COMPLQ_DESC_COUNT 512 @@ -108,8 +112,8 @@ do { \ */ #define IDPF_TX_SPLITQ_RE_MIN_GAP 64 -#define IDPF_RX_BI_GEN_M BIT(16) -#define IDPF_RX_BI_BUFID_M GENMASK(15, 0) +#define IDPF_RFL_BI_GEN_M BIT(16) +#define IDPF_RFL_BI_BUFID_M GENMASK(15, 0) #define IDPF_RXD_EOF_SPLITQ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_EOF_M #define IDPF_RXD_EOF_SINGLEQ VIRTCHNL2_RX_BASE_DESC_STATUS_EOF_M @@ -118,10 +122,6 @@ do { \ ((((txq)->next_to_clean > (txq)->next_to_use) ? 0 : (txq)->desc_count) + \ (txq)->next_to_clean - (txq)->next_to_use - 1) -#define IDPF_TX_BUF_RSV_UNUSED(txq) ((txq)->stash->buf_stack.top) -#define IDPF_TX_BUF_RSV_LOW(txq) (IDPF_TX_BUF_RSV_UNUSED(txq) < \ - (txq)->desc_count >> 2) - #define IDPF_TX_COMPLQ_OVERFLOW_THRESH(txcq) ((txcq)->desc_count >> 1) /* Determine the absolute number of completions pending, i.e. the number of * completions that are expected to arrive on the TX completion queue. @@ -131,11 +131,7 @@ do { \ 0 : U32_MAX) + \ (txq)->num_completions_pending - (txq)->complq->num_completions) -#define IDPF_TX_SPLITQ_COMPL_TAG_WIDTH 16 -/* Adjust the generation for the completion tag and wrap if necessary */ -#define IDPF_TX_ADJ_COMPL_TAG_GEN(txq) \ - ((++(txq)->compl_tag_cur_gen) >= (txq)->compl_tag_gen_max ? \ - 0 : (txq)->compl_tag_cur_gen) +#define IDPF_TXBUF_NULL U32_MAX #define IDPF_TXD_LAST_DESC_CMD (IDPF_TX_DESC_CMD_EOP | IDPF_TX_DESC_CMD_RS) @@ -145,6 +141,8 @@ do { \ #define IDPF_TX_FLAGS_TUNNEL BIT(3) #define IDPF_TX_FLAGS_TSYN BIT(4) +struct libeth_rq_napi_stats; + union idpf_tx_flex_desc { struct idpf_flex_tx_desc q; /* queue based scheduling */ struct idpf_flex_tx_sched_desc flow; /* flow based scheduling */ @@ -153,18 +151,6 @@ union idpf_tx_flex_desc { #define idpf_tx_buf libeth_sqe /** - * struct idpf_buf_lifo - LIFO for managing OOO completions - * @top: Used to know how many buffers are left - * @size: Total size of LIFO - * @bufs: Backing array - */ -struct idpf_buf_lifo { - u16 top; - u16 size; - struct idpf_tx_stash **bufs; -}; - -/** * struct idpf_tx_offload_params - Offload parameters for a given packet * @tx_flags: Feature flags enabled for this packet * @hdr_offsets: Offset parameter for single queue model @@ -196,6 +182,9 @@ struct idpf_tx_offload_params { * @compl_tag: Associated tag for completion * @td_tag: Descriptor tunneling tag * @offload: Offload parameters + * @prev_ntu: stored TxQ next_to_use in case of rollback + * @prev_refill_ntc: stored refillq next_to_clean in case of packet rollback + * @prev_refill_gen: stored refillq generation bit in case of packet rollback */ struct idpf_tx_splitq_params { enum idpf_tx_desc_dtype_value dtype; @@ -206,6 +195,10 @@ struct idpf_tx_splitq_params { }; struct idpf_tx_offload_params offload; + + u16 prev_ntu; + u16 prev_refill_ntc; + bool prev_refill_gen; }; enum idpf_tx_ctx_desc_eipt_offload { @@ -288,11 +281,13 @@ struct idpf_ptype_state { * bit and Q_RFL_GEN is the SW bit. * @__IDPF_Q_FLOW_SCH_EN: Enable flow scheduling * @__IDPF_Q_SW_MARKER: Used to indicate TX queue marker completions - * @__IDPF_Q_POLL_MODE: Enable poll mode * @__IDPF_Q_CRC_EN: enable CRC offload in singleq mode * @__IDPF_Q_HSPLIT_EN: enable header split on Rx (splitq) * @__IDPF_Q_PTP: indicates whether the Rx timestamping is enabled for the * queue + * @__IDPF_Q_NOIRQ: queue is polling-driven and has no interrupt + * @__IDPF_Q_XDP: this is an XDP queue + * @__IDPF_Q_XSK: the queue has an XSk pool installed * @__IDPF_Q_FLAGS_NBITS: Must be last */ enum idpf_queue_flags_t { @@ -300,10 +295,12 @@ enum idpf_queue_flags_t { __IDPF_Q_RFL_GEN_CHK, __IDPF_Q_FLOW_SCH_EN, __IDPF_Q_SW_MARKER, - __IDPF_Q_POLL_MODE, __IDPF_Q_CRC_EN, __IDPF_Q_HSPLIT_EN, __IDPF_Q_PTP, + __IDPF_Q_NOIRQ, + __IDPF_Q_XDP, + __IDPF_Q_XSK, __IDPF_Q_FLAGS_NBITS, }; @@ -370,14 +367,17 @@ struct idpf_intr_reg { * @num_txq: Number of TX queues * @num_bufq: Number of buffer queues * @num_complq: number of completion queues + * @num_xsksq: number of XSk send queues * @rx: Array of RX queues to service * @tx: Array of TX queues to service * @bufq: Array of buffer queues to service * @complq: array of completion queues + * @xsksq: array of XSk send queues * @intr_reg: See struct idpf_intr_reg - * @napi: napi handler + * @csd: XSk wakeup CSD * @total_events: Number of interrupts processed * @wb_on_itr: whether WB on ITR is enabled + * @napi: napi handler * @tx_dim: Data for TX net_dim algorithm * @tx_itr_value: TX interrupt throttling rate * @tx_intr_mode: Dynamic ITR or not @@ -396,19 +396,24 @@ struct idpf_q_vector { u16 num_txq; u16 num_bufq; u16 num_complq; + u16 num_xsksq; struct idpf_rx_queue **rx; struct idpf_tx_queue **tx; struct idpf_buf_queue **bufq; struct idpf_compl_queue **complq; + struct idpf_tx_queue **xsksq; struct idpf_intr_reg intr_reg; __cacheline_group_end_aligned(read_mostly); __cacheline_group_begin_aligned(read_write); - struct napi_struct napi; + call_single_data_t csd; + u16 total_events; bool wb_on_itr; + struct napi_struct napi; + struct dim tx_dim; u16 tx_itr_value; bool tx_intr_mode; @@ -425,8 +430,8 @@ struct idpf_q_vector { __cacheline_group_end_aligned(cold); }; -libeth_cacheline_set_assert(struct idpf_q_vector, 120, - 24 + sizeof(struct napi_struct) + +libeth_cacheline_set_assert(struct idpf_q_vector, 136, + 56 + sizeof(struct napi_struct) + 2 * sizeof(struct dim), 8); @@ -468,38 +473,32 @@ struct idpf_tx_queue_stats { #define IDPF_DIM_DEFAULT_PROFILE_IX 1 /** - * struct idpf_txq_stash - Tx buffer stash for Flow-based scheduling mode - * @buf_stack: Stack of empty buffers to store buffer info for out of order - * buffer completions. See struct idpf_buf_lifo - * @sched_buf_hash: Hash table to store buffers - */ -struct idpf_txq_stash { - struct idpf_buf_lifo buf_stack; - DECLARE_HASHTABLE(sched_buf_hash, 12); -} ____cacheline_aligned; - -/** * struct idpf_rx_queue - software structure representing a receive queue * @rx: universal receive descriptor array * @single_buf: buffer descriptor array in singleq * @desc_ring: virtual descriptor ring address * @bufq_sets: Pointer to the array of buffer queues in splitq mode * @napi: NAPI instance corresponding to this queue (splitq) + * @xdp_prog: attached XDP program * @rx_buf: See struct &libeth_fqe * @pp: Page pool pointer in singleq mode - * @netdev: &net_device corresponding to this queue * @tail: Tail offset. Used for both queue models single and split. * @flags: See enum idpf_queue_flags_t * @idx: For RX queue, it is used to index to total RX queue across groups and * used for skb reporting. * @desc_count: Number of descriptors + * @num_xdp_txq: total number of XDP Tx queues + * @xdpsqs: shortcut for XDP Tx queues array * @rxdids: Supported RX descriptor ids + * @truesize: data buffer truesize in singleq * @rx_ptype_lkup: LUT of Rx ptypes + * @xdp_rxq: XDP queue info * @next_to_use: Next descriptor to use * @next_to_clean: Next descriptor to clean * @next_to_alloc: RX buffer to allocate at - * @skb: Pointer to the skb - * @truesize: data buffer truesize in singleq + * @xdp: XDP buffer with the current frame + * @xsk: current XDP buffer in XSk mode + * @pool: XSk pool if installed * @cached_phc_time: Cached PHC time for the Rx queue * @stats_sync: See struct u64_stats_sync * @q_stats: See union idpf_rx_queue_stats @@ -524,30 +523,44 @@ struct idpf_rx_queue { struct { struct idpf_bufq_set *bufq_sets; struct napi_struct *napi; + struct bpf_prog __rcu *xdp_prog; }; struct { struct libeth_fqe *rx_buf; struct page_pool *pp; + void __iomem *tail; }; }; - struct net_device *netdev; - void __iomem *tail; DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS); u16 idx; u16 desc_count; - u32 rxdids; + u32 num_xdp_txq; + union { + struct idpf_tx_queue **xdpsqs; + struct { + u32 rxdids; + u32 truesize; + }; + }; const struct libeth_rx_pt *rx_ptype_lkup; + + struct xdp_rxq_info xdp_rxq; __cacheline_group_end_aligned(read_mostly); __cacheline_group_begin_aligned(read_write); - u16 next_to_use; - u16 next_to_clean; - u16 next_to_alloc; + u32 next_to_use; + u32 next_to_clean; + u32 next_to_alloc; - struct sk_buff *skb; - u32 truesize; + union { + struct libeth_xdp_buff_stash xdp; + struct { + struct libeth_xdp_buff *xsk; + struct xsk_buff_pool *pool; + }; + }; u64 cached_phc_time; struct u64_stats_sync stats_sync; @@ -567,8 +580,11 @@ struct idpf_rx_queue { u16 rx_max_pkt_size; __cacheline_group_end_aligned(cold); }; -libeth_cacheline_set_assert(struct idpf_rx_queue, 64, - 88 + sizeof(struct u64_stats_sync), +libeth_cacheline_set_assert(struct idpf_rx_queue, + ALIGN(64, __alignof(struct xdp_rxq_info)) + + sizeof(struct xdp_rxq_info), + 96 + offsetof(struct idpf_rx_queue, q_stats) - + offsetofend(struct idpf_rx_queue, cached_phc_time), 32); /** @@ -580,36 +596,21 @@ libeth_cacheline_set_assert(struct idpf_rx_queue, 64, * @desc_ring: virtual descriptor ring address * @tx_buf: See struct idpf_tx_buf * @txq_grp: See struct idpf_txq_group + * @complq: corresponding completion queue in XDP mode * @dev: Device back pointer for DMA mapping + * @pool: corresponding XSk pool if installed * @tail: Tail offset. Used for both queue models single and split * @flags: See enum idpf_queue_flags_t * @idx: For TX queue, it is used as index to map between TX queue group and * hot path TX pointers stored in vport. Used in both singleq/splitq. * @desc_count: Number of descriptors * @tx_min_pkt_len: Min supported packet length - * @compl_tag_gen_s: Completion tag generation bit - * The format of the completion tag will change based on the TXQ - * descriptor ring size so that we can maintain roughly the same level - * of "uniqueness" across all descriptor sizes. For example, if the - * TXQ descriptor ring size is 64 (the minimum size supported), the - * completion tag will be formatted as below: - * 15 6 5 0 - * -------------------------------- - * | GEN=0-1023 |IDX = 0-63| - * -------------------------------- - * - * This gives us 64*1024 = 65536 possible unique values. Similarly, if - * the TXQ descriptor ring size is 8160 (the maximum size supported), - * the completion tag will be formatted as below: - * 15 13 12 0 - * -------------------------------- - * |GEN | IDX = 0-8159 | - * -------------------------------- - * - * This gives us 8*8160 = 65280 possible unique values. + * @thresh: XDP queue cleaning threshold * @netdev: &net_device corresponding to this queue * @next_to_use: Next descriptor to use * @next_to_clean: Next descriptor to clean + * @last_re: last descriptor index that RE bit was set + * @tx_max_bufs: Max buffers that can be transmitted with scatter-gather * @cleaned_bytes: Splitq only, TXQ only: When a TX completion is received on * the TX completion queue, it can be for any TXQ associated * with that completion queue. This means we can clean up to @@ -620,11 +621,11 @@ libeth_cacheline_set_assert(struct idpf_rx_queue, 64, * only once at the end of the cleaning routine. * @clean_budget: singleq only, queue cleaning budget * @cleaned_pkts: Number of packets cleaned for the above said case - * @tx_max_bufs: Max buffers that can be transmitted with scatter-gather - * @stash: Tx buffer stash for Flow-based scheduling mode - * @compl_tag_bufid_m: Completion tag buffer id mask - * @compl_tag_cur_gen: Used to keep track of current completion tag generation - * @compl_tag_gen_max: To determine when compl_tag_cur_gen should be reset + * @refillq: Pointer to refill queue + * @pending: number of pending descriptors to send in QB + * @xdp_tx: number of pending &xdp_buff or &xdp_frame buffers + * @timer: timer for XDP Tx queue cleanup + * @xdp_lock: lock for XDP Tx queues sharing * @cached_tstamp_caps: Tx timestamp capabilities negotiated with the CP * @tstamp_task: Work that handles Tx timestamp read * @stats_sync: See struct u64_stats_sync @@ -633,6 +634,8 @@ libeth_cacheline_set_assert(struct idpf_rx_queue, 64, * @size: Length of descriptor ring in bytes * @dma: Physical address of ring * @q_vector: Backreference to associated vector + * @buf_pool_size: Total number of idpf_tx_buf + * @rel_q_id: relative virtchnl queue index */ struct idpf_tx_queue { __cacheline_group_begin_aligned(read_mostly); @@ -645,36 +648,53 @@ struct idpf_tx_queue { void *desc_ring; }; struct libeth_sqe *tx_buf; - struct idpf_txq_group *txq_grp; - struct device *dev; + union { + struct idpf_txq_group *txq_grp; + struct idpf_compl_queue *complq; + }; + union { + struct device *dev; + struct xsk_buff_pool *pool; + }; void __iomem *tail; DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS); u16 idx; u16 desc_count; - u16 tx_min_pkt_len; - u16 compl_tag_gen_s; + union { + u16 tx_min_pkt_len; + u32 thresh; + }; struct net_device *netdev; __cacheline_group_end_aligned(read_mostly); __cacheline_group_begin_aligned(read_write); - u16 next_to_use; - u16 next_to_clean; + u32 next_to_use; + u32 next_to_clean; union { - u32 cleaned_bytes; - u32 clean_budget; - }; - u16 cleaned_pkts; + struct { + u16 last_re; + u16 tx_max_bufs; - u16 tx_max_bufs; - struct idpf_txq_stash *stash; + union { + u32 cleaned_bytes; + u32 clean_budget; + }; + u16 cleaned_pkts; - u16 compl_tag_bufid_m; - u16 compl_tag_cur_gen; - u16 compl_tag_gen_max; + struct idpf_sw_queue *refillq; + }; + struct { + u32 pending; + u32 xdp_tx; + + struct libeth_xdpsq_timer *timer; + struct libeth_xdpsq_lock xdp_lock; + }; + }; struct idpf_ptp_vport_tx_tstamp_caps *cached_tstamp_caps; struct work_struct *tstamp_task; @@ -689,25 +709,36 @@ struct idpf_tx_queue { dma_addr_t dma; struct idpf_q_vector *q_vector; + + u32 buf_pool_size; + u32 rel_q_id; __cacheline_group_end_aligned(cold); }; libeth_cacheline_set_assert(struct idpf_tx_queue, 64, - 112 + sizeof(struct u64_stats_sync), - 24); + 104 + + offsetof(struct idpf_tx_queue, cached_tstamp_caps) - + offsetofend(struct idpf_tx_queue, timer) + + offsetof(struct idpf_tx_queue, q_stats) - + offsetofend(struct idpf_tx_queue, tstamp_task), + 32); /** * struct idpf_buf_queue - software structure representing a buffer queue * @split_buf: buffer descriptor array - * @hdr_buf: &libeth_fqe for header buffers - * @hdr_pp: &page_pool for header buffers * @buf: &libeth_fqe for data buffers * @pp: &page_pool for data buffers + * @xsk_buf: &xdp_buff for XSk Rx buffers + * @pool: &xsk_buff_pool on XSk queues + * @hdr_buf: &libeth_fqe for header buffers + * @hdr_pp: &page_pool for header buffers * @tail: Tail offset * @flags: See enum idpf_queue_flags_t * @desc_count: Number of descriptors + * @thresh: refill threshold in XSk * @next_to_use: Next descriptor to use * @next_to_clean: Next descriptor to clean * @next_to_alloc: RX buffer to allocate at + * @pending: number of buffers to refill (Xsk) * @hdr_truesize: truesize for buffer headers * @truesize: truesize for data buffers * @q_id: Queue id @@ -721,14 +752,24 @@ libeth_cacheline_set_assert(struct idpf_tx_queue, 64, struct idpf_buf_queue { __cacheline_group_begin_aligned(read_mostly); struct virtchnl2_splitq_rx_buf_desc *split_buf; + union { + struct { + struct libeth_fqe *buf; + struct page_pool *pp; + }; + struct { + struct libeth_xdp_buff **xsk_buf; + struct xsk_buff_pool *pool; + }; + }; struct libeth_fqe *hdr_buf; struct page_pool *hdr_pp; - struct libeth_fqe *buf; - struct page_pool *pp; void __iomem *tail; DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS); u32 desc_count; + + u32 thresh; __cacheline_group_end_aligned(read_mostly); __cacheline_group_begin_aligned(read_write); @@ -736,6 +777,7 @@ struct idpf_buf_queue { u32 next_to_clean; u32 next_to_alloc; + u32 pending; u32 hdr_truesize; u32 truesize; __cacheline_group_end_aligned(read_write); @@ -756,7 +798,9 @@ libeth_cacheline_set_assert(struct idpf_buf_queue, 64, 24, 32); /** * struct idpf_compl_queue - software structure representing a completion queue - * @comp: completion descriptor array + * @comp: 8-byte completion descriptor array + * @comp_4b: 4-byte completion descriptor array + * @desc_ring: virtual descriptor ring address * @txq_grp: See struct idpf_txq_group * @flags: See enum idpf_queue_flags_t * @desc_count: Number of descriptors @@ -776,7 +820,12 @@ libeth_cacheline_set_assert(struct idpf_buf_queue, 64, 24, 32); */ struct idpf_compl_queue { __cacheline_group_begin_aligned(read_mostly); - struct idpf_splitq_tx_compl_desc *comp; + union { + struct idpf_splitq_tx_compl_desc *comp; + struct idpf_splitq_4b_tx_compl_desc *comp_4b; + + void *desc_ring; + }; struct idpf_txq_group *txq_grp; DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS); @@ -903,7 +952,6 @@ struct idpf_rxq_group { * @vport: Vport back pointer * @num_txq: Number of TX queues associated * @txqs: Array of TX queue pointers - * @stashes: array of OOO stashes for the queues * @complq: Associated completion queue pointer, split queue only * @num_completions_pending: Total number of completions pending for the * completion queue, acculumated for all TX queues @@ -918,7 +966,6 @@ struct idpf_txq_group { u16 num_txq; struct idpf_tx_queue *txqs[IDPF_LARGE_MAX_Q]; - struct idpf_txq_stash *stashes; struct idpf_compl_queue *complq; @@ -1011,6 +1058,17 @@ static inline void idpf_vport_intr_set_wb_on_itr(struct idpf_q_vector *q_vector) reg->dyn_ctl); } +/** + * idpf_tx_splitq_get_free_bufs - get number of free buf_ids in refillq + * @refillq: pointer to refillq containing buf_ids + */ +static inline u32 idpf_tx_splitq_get_free_bufs(struct idpf_sw_queue *refillq) +{ + return (refillq->next_to_use > refillq->next_to_clean ? + 0 : refillq->desc_count) + + refillq->next_to_use - refillq->next_to_clean - 1; +} + int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget); void idpf_vport_init_num_qs(struct idpf_vport *vport, struct virtchnl2_create_vport *vport_msg); @@ -1031,23 +1089,30 @@ int idpf_config_rss(struct idpf_vport *vport); int idpf_init_rss(struct idpf_vport *vport); void idpf_deinit_rss(struct idpf_vport *vport); int idpf_rx_bufs_init_all(struct idpf_vport *vport); -void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb, - unsigned int size); -struct sk_buff *idpf_rx_build_skb(const struct libeth_fqe *buf, u32 size); + +struct idpf_q_vector *idpf_find_rxq_vec(const struct idpf_vport *vport, + u32 q_num); +struct idpf_q_vector *idpf_find_txq_vec(const struct idpf_vport *vport, + u32 q_num); +int idpf_qp_switch(struct idpf_vport *vport, u32 qid, bool en); + void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val, bool xmit_more); unsigned int idpf_size_to_txd_count(unsigned int size); netdev_tx_t idpf_tx_drop_skb(struct idpf_tx_queue *tx_q, struct sk_buff *skb); -void idpf_tx_dma_map_error(struct idpf_tx_queue *txq, struct sk_buff *skb, - struct idpf_tx_buf *first, u16 ring_idx); -unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq, - struct sk_buff *skb); +unsigned int idpf_tx_res_count_required(struct idpf_tx_queue *txq, + struct sk_buff *skb, u32 *buf_count); void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue); netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, struct idpf_tx_queue *tx_q); netdev_tx_t idpf_tx_start(struct sk_buff *skb, struct net_device *netdev); bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rxq, u16 cleaned_count); +bool idpf_rx_process_skb_fields(struct sk_buff *skb, + const struct libeth_xdp_buff *xdp, + struct libeth_rq_napi_stats *rs); int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off); +void idpf_wait_for_sw_marker_completion(const struct idpf_tx_queue *txq); + #endif /* !_IDPF_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c b/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c index 259d50fded67..4cc58c83688c 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c +++ b/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c @@ -76,7 +76,7 @@ static int idpf_vf_intr_reg_init(struct idpf_vport *vport) int num_vecs = vport->num_q_vectors; struct idpf_vec_regs *reg_vals; int num_regs, i, err = 0; - u32 rx_itr, tx_itr; + u32 rx_itr, tx_itr, val; u16 total_vecs; total_vecs = idpf_get_reserved_vecs(vport->adapter); @@ -120,6 +120,15 @@ static int idpf_vf_intr_reg_init(struct idpf_vport *vport) intr->tx_itr = idpf_get_reg_addr(adapter, tx_itr); } + /* Data vector for NOIRQ queues */ + + val = reg_vals[vport->q_vector_idxs[i] - IDPF_MBX_Q_VEC].dyn_ctl_reg; + vport->noirq_dyn_ctl = idpf_get_reg_addr(adapter, val); + + val = VF_INT_DYN_CTLN_WB_ON_ITR_M | VF_INT_DYN_CTLN_INTENA_MSK_M | + FIELD_PREP(VF_INT_DYN_CTLN_ITR_INDX_M, IDPF_NO_ITR_UPDATE_IDX); + vport->noirq_dyn_ctl_ena = val; + free_reg_vals: kfree(reg_vals); diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index a028c69f7fdc..cbb5fa30f5a0 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -702,9 +702,9 @@ int idpf_recv_mb_msg(struct idpf_adapter *adapter) /* If post failed clear the only buffer we supplied */ if (post_err) { if (dma_mem) - dmam_free_coherent(&adapter->pdev->dev, - dma_mem->size, dma_mem->va, - dma_mem->pa); + dma_free_coherent(&adapter->pdev->dev, + dma_mem->size, dma_mem->va, + dma_mem->pa); break; } @@ -716,34 +716,145 @@ int idpf_recv_mb_msg(struct idpf_adapter *adapter) return err; } +struct idpf_chunked_msg_params { + u32 (*prepare_msg)(const struct idpf_vport *vport, + void *buf, const void *pos, + u32 num); + + const void *chunks; + u32 num_chunks; + + u32 chunk_sz; + u32 config_sz; + + u32 vc_op; +}; + +struct idpf_queue_set *idpf_alloc_queue_set(struct idpf_vport *vport, u32 num) +{ + struct idpf_queue_set *qp; + + qp = kzalloc(struct_size(qp, qs, num), GFP_KERNEL); + if (!qp) + return NULL; + + qp->vport = vport; + qp->num = num; + + return qp; +} + /** - * idpf_wait_for_marker_event - wait for software marker response + * idpf_send_chunked_msg - send VC message consisting of chunks * @vport: virtual port data structure + * @params: message params * - * Returns 0 success, negative on failure. - **/ -static int idpf_wait_for_marker_event(struct idpf_vport *vport) + * Helper function for preparing a message describing queues to be enabled + * or disabled. + * + * Return: the total size of the prepared message. + */ +static int idpf_send_chunked_msg(struct idpf_vport *vport, + const struct idpf_chunked_msg_params *params) { - int event; - int i; + struct idpf_vc_xn_params xn_params = { + .vc_op = params->vc_op, + .timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC, + }; + const void *pos = params->chunks; + u32 num_chunks, num_msgs, buf_sz; + void *buf __free(kfree) = NULL; + u32 totqs = params->num_chunks; + + num_chunks = min(IDPF_NUM_CHUNKS_PER_MSG(params->config_sz, + params->chunk_sz), totqs); + num_msgs = DIV_ROUND_UP(totqs, num_chunks); - for (i = 0; i < vport->num_txq; i++) - idpf_queue_set(SW_MARKER, vport->txqs[i]); + buf_sz = params->config_sz + num_chunks * params->chunk_sz; + buf = kzalloc(buf_sz, GFP_KERNEL); + if (!buf) + return -ENOMEM; - event = wait_event_timeout(vport->sw_marker_wq, - test_and_clear_bit(IDPF_VPORT_SW_MARKER, - vport->flags), - msecs_to_jiffies(500)); + xn_params.send_buf.iov_base = buf; - for (i = 0; i < vport->num_txq; i++) - idpf_queue_clear(POLL_MODE, vport->txqs[i]); + for (u32 i = 0; i < num_msgs; i++) { + ssize_t reply_sz; - if (event) - return 0; + memset(buf, 0, buf_sz); + xn_params.send_buf.iov_len = buf_sz; - dev_warn(&vport->adapter->pdev->dev, "Failed to receive marker packets\n"); + if (params->prepare_msg(vport, buf, pos, num_chunks) != buf_sz) + return -EINVAL; + + reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); + if (reply_sz < 0) + return reply_sz; + + pos += num_chunks * params->chunk_sz; + totqs -= num_chunks; + + num_chunks = min(num_chunks, totqs); + buf_sz = params->config_sz + num_chunks * params->chunk_sz; + } - return -ETIMEDOUT; + return 0; +} + +/** + * idpf_wait_for_marker_event_set - wait for software marker response for + * selected Tx queues + * @qs: set of the Tx queues + * + * Return: 0 success, -errno on failure. + */ +static int idpf_wait_for_marker_event_set(const struct idpf_queue_set *qs) +{ + struct idpf_tx_queue *txq; + bool markers_rcvd = true; + + for (u32 i = 0; i < qs->num; i++) { + switch (qs->qs[i].type) { + case VIRTCHNL2_QUEUE_TYPE_TX: + txq = qs->qs[i].txq; + + idpf_queue_set(SW_MARKER, txq); + idpf_wait_for_sw_marker_completion(txq); + markers_rcvd &= !idpf_queue_has(SW_MARKER, txq); + break; + default: + break; + } + } + + if (!markers_rcvd) { + netdev_warn(qs->vport->netdev, + "Failed to receive marker packets\n"); + return -ETIMEDOUT; + } + + return 0; +} + +/** + * idpf_wait_for_marker_event - wait for software marker response + * @vport: virtual port data structure + * + * Return: 0 success, negative on failure. + **/ +static int idpf_wait_for_marker_event(struct idpf_vport *vport) +{ + struct idpf_queue_set *qs __free(kfree) = NULL; + + qs = idpf_alloc_queue_set(vport, vport->num_txq); + if (!qs) + return -ENOMEM; + + for (u32 i = 0; i < qs->num; i++) { + qs->qs[i].type = VIRTCHNL2_QUEUE_TYPE_TX; + qs->qs[i].txq = vport->txqs[i]; + } + + return idpf_wait_for_marker_event_set(qs); } /** @@ -1061,21 +1172,35 @@ int idpf_vport_alloc_max_qs(struct idpf_adapter *adapter, struct idpf_avail_queue_info *avail_queues = &adapter->avail_queues; struct virtchnl2_get_capabilities *caps = &adapter->caps; u16 default_vports = idpf_get_default_vports(adapter); - int max_rx_q, max_tx_q; + u32 max_rx_q, max_tx_q, max_buf_q, max_compl_q; mutex_lock(&adapter->queue_lock); + /* Caps are device-wide. Give each vport an equal piece */ max_rx_q = le16_to_cpu(caps->max_rx_q) / default_vports; max_tx_q = le16_to_cpu(caps->max_tx_q) / default_vports; - if (adapter->num_alloc_vports < default_vports) { - max_q->max_rxq = min_t(u16, max_rx_q, IDPF_MAX_Q); - max_q->max_txq = min_t(u16, max_tx_q, IDPF_MAX_Q); - } else { - max_q->max_rxq = IDPF_MIN_Q; - max_q->max_txq = IDPF_MIN_Q; + max_buf_q = le16_to_cpu(caps->max_rx_bufq) / default_vports; + max_compl_q = le16_to_cpu(caps->max_tx_complq) / default_vports; + + if (adapter->num_alloc_vports >= default_vports) { + max_rx_q = IDPF_MIN_Q; + max_tx_q = IDPF_MIN_Q; } - max_q->max_bufq = max_q->max_rxq * IDPF_MAX_BUFQS_PER_RXQ_GRP; - max_q->max_complq = max_q->max_txq; + + /* + * Harmonize the numbers. The current implementation always creates + * `IDPF_MAX_BUFQS_PER_RXQ_GRP` buffer queues for each Rx queue and + * one completion queue for each Tx queue for best performance. + * If less buffer or completion queues is available, cap the number + * of the corresponding Rx/Tx queues. + */ + max_rx_q = min(max_rx_q, max_buf_q / IDPF_MAX_BUFQS_PER_RXQ_GRP); + max_tx_q = min(max_tx_q, max_compl_q); + + max_q->max_rxq = max_rx_q; + max_q->max_txq = max_tx_q; + max_q->max_bufq = max_rx_q * IDPF_MAX_BUFQS_PER_RXQ_GRP; + max_q->max_complq = max_tx_q; if (avail_queues->avail_rxq < max_q->max_rxq || avail_queues->avail_txq < max_q->max_txq || @@ -1506,7 +1631,7 @@ int idpf_send_destroy_vport_msg(struct idpf_vport *vport) xn_params.vc_op = VIRTCHNL2_OP_DESTROY_VPORT; xn_params.send_buf.iov_base = &v_id; xn_params.send_buf.iov_len = sizeof(v_id); - xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); return reply_sz < 0 ? reply_sz : 0; @@ -1554,236 +1679,368 @@ int idpf_send_disable_vport_msg(struct idpf_vport *vport) xn_params.vc_op = VIRTCHNL2_OP_DISABLE_VPORT; xn_params.send_buf.iov_base = &v_id; xn_params.send_buf.iov_len = sizeof(v_id); - xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); return reply_sz < 0 ? reply_sz : 0; } /** - * idpf_send_config_tx_queues_msg - Send virtchnl config tx queues message + * idpf_fill_txq_config_chunk - fill chunk describing the Tx queue + * @vport: virtual port data structure + * @q: Tx queue to be inserted into VC chunk + * @qi: pointer to the buffer containing the VC chunk + */ +static void idpf_fill_txq_config_chunk(const struct idpf_vport *vport, + const struct idpf_tx_queue *q, + struct virtchnl2_txq_info *qi) +{ + u32 val; + + qi->queue_id = cpu_to_le32(q->q_id); + qi->model = cpu_to_le16(vport->txq_model); + qi->type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX); + qi->ring_len = cpu_to_le16(q->desc_count); + qi->dma_ring_addr = cpu_to_le64(q->dma); + qi->relative_queue_id = cpu_to_le16(q->rel_q_id); + + if (!idpf_is_queue_model_split(vport->txq_model)) { + qi->sched_mode = cpu_to_le16(VIRTCHNL2_TXQ_SCHED_MODE_QUEUE); + return; + } + + if (idpf_queue_has(XDP, q)) + val = q->complq->q_id; + else + val = q->txq_grp->complq->q_id; + + qi->tx_compl_queue_id = cpu_to_le16(val); + + if (idpf_queue_has(FLOW_SCH_EN, q)) + val = VIRTCHNL2_TXQ_SCHED_MODE_FLOW; + else + val = VIRTCHNL2_TXQ_SCHED_MODE_QUEUE; + + qi->sched_mode = cpu_to_le16(val); +} + +/** + * idpf_fill_complq_config_chunk - fill chunk describing the completion queue + * @vport: virtual port data structure + * @q: completion queue to be inserted into VC chunk + * @qi: pointer to the buffer containing the VC chunk + */ +static void idpf_fill_complq_config_chunk(const struct idpf_vport *vport, + const struct idpf_compl_queue *q, + struct virtchnl2_txq_info *qi) +{ + u32 val; + + qi->queue_id = cpu_to_le32(q->q_id); + qi->model = cpu_to_le16(vport->txq_model); + qi->type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION); + qi->ring_len = cpu_to_le16(q->desc_count); + qi->dma_ring_addr = cpu_to_le64(q->dma); + + if (idpf_queue_has(FLOW_SCH_EN, q)) + val = VIRTCHNL2_TXQ_SCHED_MODE_FLOW; + else + val = VIRTCHNL2_TXQ_SCHED_MODE_QUEUE; + + qi->sched_mode = cpu_to_le16(val); +} + +/** + * idpf_prepare_cfg_txqs_msg - prepare message to configure selected Tx queues * @vport: virtual port data structure + * @buf: buffer containing the message + * @pos: pointer to the first chunk describing the tx queue + * @num_chunks: number of chunks in the message * - * Send config tx queues virtchnl message. Returns 0 on success, negative on - * failure. + * Helper function for preparing the message describing configuration of + * Tx queues. + * + * Return: the total size of the prepared message. */ -static int idpf_send_config_tx_queues_msg(struct idpf_vport *vport) +static u32 idpf_prepare_cfg_txqs_msg(const struct idpf_vport *vport, + void *buf, const void *pos, + u32 num_chunks) +{ + struct virtchnl2_config_tx_queues *ctq = buf; + + ctq->vport_id = cpu_to_le32(vport->vport_id); + ctq->num_qinfo = cpu_to_le16(num_chunks); + memcpy(ctq->qinfo, pos, num_chunks * sizeof(*ctq->qinfo)); + + return struct_size(ctq, qinfo, num_chunks); +} + +/** + * idpf_send_config_tx_queue_set_msg - send virtchnl config Tx queues + * message for selected queues + * @qs: set of the Tx queues to configure + * + * Send config queues virtchnl message for queues contained in the @qs array. + * The @qs array can contain Tx queues (or completion queues) only. + * + * Return: 0 on success, -errno on failure. + */ +static int idpf_send_config_tx_queue_set_msg(const struct idpf_queue_set *qs) { - struct virtchnl2_config_tx_queues *ctq __free(kfree) = NULL; struct virtchnl2_txq_info *qi __free(kfree) = NULL; - struct idpf_vc_xn_params xn_params = {}; - u32 config_sz, chunk_sz, buf_sz; - int totqs, num_msgs, num_chunks; - ssize_t reply_sz; - int i, k = 0; + struct idpf_chunked_msg_params params = { + .vc_op = VIRTCHNL2_OP_CONFIG_TX_QUEUES, + .prepare_msg = idpf_prepare_cfg_txqs_msg, + .config_sz = sizeof(struct virtchnl2_config_tx_queues), + .chunk_sz = sizeof(*qi), + }; - totqs = vport->num_txq + vport->num_complq; - qi = kcalloc(totqs, sizeof(struct virtchnl2_txq_info), GFP_KERNEL); + qi = kcalloc(qs->num, sizeof(*qi), GFP_KERNEL); if (!qi) return -ENOMEM; - /* Populate the queue info buffer with all queue context info */ - for (i = 0; i < vport->num_txq_grp; i++) { - struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; - int j, sched_mode; - - for (j = 0; j < tx_qgrp->num_txq; j++, k++) { - qi[k].queue_id = - cpu_to_le32(tx_qgrp->txqs[j]->q_id); - qi[k].model = - cpu_to_le16(vport->txq_model); - qi[k].type = - cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX); - qi[k].ring_len = - cpu_to_le16(tx_qgrp->txqs[j]->desc_count); - qi[k].dma_ring_addr = - cpu_to_le64(tx_qgrp->txqs[j]->dma); - if (idpf_is_queue_model_split(vport->txq_model)) { - struct idpf_tx_queue *q = tx_qgrp->txqs[j]; - - qi[k].tx_compl_queue_id = - cpu_to_le16(tx_qgrp->complq->q_id); - qi[k].relative_queue_id = cpu_to_le16(j); - - if (idpf_queue_has(FLOW_SCH_EN, q)) - qi[k].sched_mode = - cpu_to_le16(VIRTCHNL2_TXQ_SCHED_MODE_FLOW); - else - qi[k].sched_mode = - cpu_to_le16(VIRTCHNL2_TXQ_SCHED_MODE_QUEUE); - } else { - qi[k].sched_mode = - cpu_to_le16(VIRTCHNL2_TXQ_SCHED_MODE_QUEUE); - } - } + params.chunks = qi; - if (!idpf_is_queue_model_split(vport->txq_model)) - continue; + for (u32 i = 0; i < qs->num; i++) { + if (qs->qs[i].type == VIRTCHNL2_QUEUE_TYPE_TX) + idpf_fill_txq_config_chunk(qs->vport, qs->qs[i].txq, + &qi[params.num_chunks++]); + else if (qs->qs[i].type == VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION) + idpf_fill_complq_config_chunk(qs->vport, + qs->qs[i].complq, + &qi[params.num_chunks++]); + } - qi[k].queue_id = cpu_to_le32(tx_qgrp->complq->q_id); - qi[k].model = cpu_to_le16(vport->txq_model); - qi[k].type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION); - qi[k].ring_len = cpu_to_le16(tx_qgrp->complq->desc_count); - qi[k].dma_ring_addr = cpu_to_le64(tx_qgrp->complq->dma); + return idpf_send_chunked_msg(qs->vport, ¶ms); +} - if (idpf_queue_has(FLOW_SCH_EN, tx_qgrp->complq)) - sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_FLOW; - else - sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_QUEUE; - qi[k].sched_mode = cpu_to_le16(sched_mode); +/** + * idpf_send_config_tx_queues_msg - send virtchnl config Tx queues message + * @vport: virtual port data structure + * + * Return: 0 on success, -errno on failure. + */ +static int idpf_send_config_tx_queues_msg(struct idpf_vport *vport) +{ + struct idpf_queue_set *qs __free(kfree) = NULL; + u32 totqs = vport->num_txq + vport->num_complq; + u32 k = 0; + + qs = idpf_alloc_queue_set(vport, totqs); + if (!qs) + return -ENOMEM; - k++; + /* Populate the queue info buffer with all queue context info */ + for (u32 i = 0; i < vport->num_txq_grp; i++) { + const struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + + for (u32 j = 0; j < tx_qgrp->num_txq; j++) { + qs->qs[k].type = VIRTCHNL2_QUEUE_TYPE_TX; + qs->qs[k++].txq = tx_qgrp->txqs[j]; + } + + if (idpf_is_queue_model_split(vport->txq_model)) { + qs->qs[k].type = VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION; + qs->qs[k++].complq = tx_qgrp->complq; + } } /* Make sure accounting agrees */ if (k != totqs) return -EINVAL; - /* Chunk up the queue contexts into multiple messages to avoid - * sending a control queue message buffer that is too large - */ - config_sz = sizeof(struct virtchnl2_config_tx_queues); - chunk_sz = sizeof(struct virtchnl2_txq_info); + return idpf_send_config_tx_queue_set_msg(qs); +} - num_chunks = min_t(u32, IDPF_NUM_CHUNKS_PER_MSG(config_sz, chunk_sz), - totqs); - num_msgs = DIV_ROUND_UP(totqs, num_chunks); +/** + * idpf_fill_rxq_config_chunk - fill chunk describing the Rx queue + * @vport: virtual port data structure + * @q: Rx queue to be inserted into VC chunk + * @qi: pointer to the buffer containing the VC chunk + */ +static void idpf_fill_rxq_config_chunk(const struct idpf_vport *vport, + struct idpf_rx_queue *q, + struct virtchnl2_rxq_info *qi) +{ + const struct idpf_bufq_set *sets; + + qi->queue_id = cpu_to_le32(q->q_id); + qi->model = cpu_to_le16(vport->rxq_model); + qi->type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX); + qi->ring_len = cpu_to_le16(q->desc_count); + qi->dma_ring_addr = cpu_to_le64(q->dma); + qi->max_pkt_size = cpu_to_le32(q->rx_max_pkt_size); + qi->rx_buffer_low_watermark = cpu_to_le16(q->rx_buffer_low_watermark); + qi->qflags = cpu_to_le16(VIRTCHNL2_RX_DESC_SIZE_32BYTE); + if (idpf_is_feature_ena(vport, NETIF_F_GRO_HW)) + qi->qflags |= cpu_to_le16(VIRTCHNL2_RXQ_RSC); + + if (!idpf_is_queue_model_split(vport->rxq_model)) { + qi->data_buffer_size = cpu_to_le32(q->rx_buf_size); + qi->desc_ids = cpu_to_le64(q->rxdids); - buf_sz = struct_size(ctq, qinfo, num_chunks); - ctq = kzalloc(buf_sz, GFP_KERNEL); - if (!ctq) - return -ENOMEM; + return; + } - xn_params.vc_op = VIRTCHNL2_OP_CONFIG_TX_QUEUES; - xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + sets = q->bufq_sets; - for (i = 0, k = 0; i < num_msgs; i++) { - memset(ctq, 0, buf_sz); - ctq->vport_id = cpu_to_le32(vport->vport_id); - ctq->num_qinfo = cpu_to_le16(num_chunks); - memcpy(ctq->qinfo, &qi[k], chunk_sz * num_chunks); + /* + * In splitq mode, RxQ buffer size should be set to that of the first + * buffer queue associated with this RxQ. + */ + q->rx_buf_size = sets[0].bufq.rx_buf_size; + qi->data_buffer_size = cpu_to_le32(q->rx_buf_size); - xn_params.send_buf.iov_base = ctq; - xn_params.send_buf.iov_len = buf_sz; - reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); - if (reply_sz < 0) - return reply_sz; + qi->rx_bufq1_id = cpu_to_le16(sets[0].bufq.q_id); + if (vport->num_bufqs_per_qgrp > IDPF_SINGLE_BUFQ_PER_RXQ_GRP) { + qi->bufq2_ena = IDPF_BUFQ2_ENA; + qi->rx_bufq2_id = cpu_to_le16(sets[1].bufq.q_id); + } - k += num_chunks; - totqs -= num_chunks; - num_chunks = min(num_chunks, totqs); - /* Recalculate buffer size */ - buf_sz = struct_size(ctq, qinfo, num_chunks); + q->rx_hbuf_size = sets[0].bufq.rx_hbuf_size; + + if (idpf_queue_has(HSPLIT_EN, q)) { + qi->qflags |= cpu_to_le16(VIRTCHNL2_RXQ_HDR_SPLIT); + qi->hdr_buffer_size = cpu_to_le16(q->rx_hbuf_size); } - return 0; + qi->desc_ids = cpu_to_le64(VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M); +} + +/** + * idpf_fill_bufq_config_chunk - fill chunk describing the buffer queue + * @vport: virtual port data structure + * @q: buffer queue to be inserted into VC chunk + * @qi: pointer to the buffer containing the VC chunk + */ +static void idpf_fill_bufq_config_chunk(const struct idpf_vport *vport, + const struct idpf_buf_queue *q, + struct virtchnl2_rxq_info *qi) +{ + qi->queue_id = cpu_to_le32(q->q_id); + qi->model = cpu_to_le16(vport->rxq_model); + qi->type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX_BUFFER); + qi->ring_len = cpu_to_le16(q->desc_count); + qi->dma_ring_addr = cpu_to_le64(q->dma); + qi->data_buffer_size = cpu_to_le32(q->rx_buf_size); + qi->rx_buffer_low_watermark = cpu_to_le16(q->rx_buffer_low_watermark); + qi->desc_ids = cpu_to_le64(VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M); + qi->buffer_notif_stride = IDPF_RX_BUF_STRIDE; + if (idpf_is_feature_ena(vport, NETIF_F_GRO_HW)) + qi->qflags = cpu_to_le16(VIRTCHNL2_RXQ_RSC); + + if (idpf_queue_has(HSPLIT_EN, q)) { + qi->qflags |= cpu_to_le16(VIRTCHNL2_RXQ_HDR_SPLIT); + qi->hdr_buffer_size = cpu_to_le16(q->rx_hbuf_size); + } } /** - * idpf_send_config_rx_queues_msg - Send virtchnl config rx queues message + * idpf_prepare_cfg_rxqs_msg - prepare message to configure selected Rx queues * @vport: virtual port data structure + * @buf: buffer containing the message + * @pos: pointer to the first chunk describing the rx queue + * @num_chunks: number of chunks in the message * - * Send config rx queues virtchnl message. Returns 0 on success, negative on - * failure. + * Helper function for preparing the message describing configuration of + * Rx queues. + * + * Return: the total size of the prepared message. */ -static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport) +static u32 idpf_prepare_cfg_rxqs_msg(const struct idpf_vport *vport, + void *buf, const void *pos, + u32 num_chunks) +{ + struct virtchnl2_config_rx_queues *crq = buf; + + crq->vport_id = cpu_to_le32(vport->vport_id); + crq->num_qinfo = cpu_to_le16(num_chunks); + memcpy(crq->qinfo, pos, num_chunks * sizeof(*crq->qinfo)); + + return struct_size(crq, qinfo, num_chunks); +} + +/** + * idpf_send_config_rx_queue_set_msg - send virtchnl config Rx queues message + * for selected queues. + * @qs: set of the Rx queues to configure + * + * Send config queues virtchnl message for queues contained in the @qs array. + * The @qs array can contain Rx queues (or buffer queues) only. + * + * Return: 0 on success, -errno on failure. + */ +static int idpf_send_config_rx_queue_set_msg(const struct idpf_queue_set *qs) { - struct virtchnl2_config_rx_queues *crq __free(kfree) = NULL; struct virtchnl2_rxq_info *qi __free(kfree) = NULL; - struct idpf_vc_xn_params xn_params = {}; - u32 config_sz, chunk_sz, buf_sz; - int totqs, num_msgs, num_chunks; - ssize_t reply_sz; - int i, k = 0; + struct idpf_chunked_msg_params params = { + .vc_op = VIRTCHNL2_OP_CONFIG_RX_QUEUES, + .prepare_msg = idpf_prepare_cfg_rxqs_msg, + .config_sz = sizeof(struct virtchnl2_config_rx_queues), + .chunk_sz = sizeof(*qi), + }; - totqs = vport->num_rxq + vport->num_bufq; - qi = kcalloc(totqs, sizeof(struct virtchnl2_rxq_info), GFP_KERNEL); + qi = kcalloc(qs->num, sizeof(*qi), GFP_KERNEL); if (!qi) return -ENOMEM; - /* Populate the queue info buffer with all queue context info */ - for (i = 0; i < vport->num_rxq_grp; i++) { - struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; - u16 num_rxq; - int j; - - if (!idpf_is_queue_model_split(vport->rxq_model)) - goto setup_rxqs; - - for (j = 0; j < vport->num_bufqs_per_qgrp; j++, k++) { - struct idpf_buf_queue *bufq = - &rx_qgrp->splitq.bufq_sets[j].bufq; - - qi[k].queue_id = cpu_to_le32(bufq->q_id); - qi[k].model = cpu_to_le16(vport->rxq_model); - qi[k].type = - cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX_BUFFER); - qi[k].desc_ids = cpu_to_le64(VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M); - qi[k].ring_len = cpu_to_le16(bufq->desc_count); - qi[k].dma_ring_addr = cpu_to_le64(bufq->dma); - qi[k].data_buffer_size = cpu_to_le32(bufq->rx_buf_size); - qi[k].buffer_notif_stride = IDPF_RX_BUF_STRIDE; - qi[k].rx_buffer_low_watermark = - cpu_to_le16(bufq->rx_buffer_low_watermark); - if (idpf_is_feature_ena(vport, NETIF_F_GRO_HW)) - qi[k].qflags |= cpu_to_le16(VIRTCHNL2_RXQ_RSC); - } + params.chunks = qi; -setup_rxqs: - if (idpf_is_queue_model_split(vport->rxq_model)) - num_rxq = rx_qgrp->splitq.num_rxq_sets; - else - num_rxq = rx_qgrp->singleq.num_rxq; + for (u32 i = 0; i < qs->num; i++) { + if (qs->qs[i].type == VIRTCHNL2_QUEUE_TYPE_RX) + idpf_fill_rxq_config_chunk(qs->vport, qs->qs[i].rxq, + &qi[params.num_chunks++]); + else if (qs->qs[i].type == VIRTCHNL2_QUEUE_TYPE_RX_BUFFER) + idpf_fill_bufq_config_chunk(qs->vport, qs->qs[i].bufq, + &qi[params.num_chunks++]); + } - for (j = 0; j < num_rxq; j++, k++) { - const struct idpf_bufq_set *sets; - struct idpf_rx_queue *rxq; + return idpf_send_chunked_msg(qs->vport, ¶ms); +} - if (!idpf_is_queue_model_split(vport->rxq_model)) { - rxq = rx_qgrp->singleq.rxqs[j]; - goto common_qi_fields; - } +/** + * idpf_send_config_rx_queues_msg - send virtchnl config Rx queues message + * @vport: virtual port data structure + * + * Return: 0 on success, -errno on failure. + */ +static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport) +{ + bool splitq = idpf_is_queue_model_split(vport->rxq_model); + struct idpf_queue_set *qs __free(kfree) = NULL; + u32 totqs = vport->num_rxq + vport->num_bufq; + u32 k = 0; - rxq = &rx_qgrp->splitq.rxq_sets[j]->rxq; - sets = rxq->bufq_sets; + qs = idpf_alloc_queue_set(vport, totqs); + if (!qs) + return -ENOMEM; - /* In splitq mode, RXQ buffer size should be - * set to that of the first buffer queue - * associated with this RXQ. - */ - rxq->rx_buf_size = sets[0].bufq.rx_buf_size; + /* Populate the queue info buffer with all queue context info */ + for (u32 i = 0; i < vport->num_rxq_grp; i++) { + const struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + u32 num_rxq; - qi[k].rx_bufq1_id = cpu_to_le16(sets[0].bufq.q_id); - if (vport->num_bufqs_per_qgrp > IDPF_SINGLE_BUFQ_PER_RXQ_GRP) { - qi[k].bufq2_ena = IDPF_BUFQ2_ENA; - qi[k].rx_bufq2_id = - cpu_to_le16(sets[1].bufq.q_id); - } - qi[k].rx_buffer_low_watermark = - cpu_to_le16(rxq->rx_buffer_low_watermark); - if (idpf_is_feature_ena(vport, NETIF_F_GRO_HW)) - qi[k].qflags |= cpu_to_le16(VIRTCHNL2_RXQ_RSC); - - rxq->rx_hbuf_size = sets[0].bufq.rx_hbuf_size; - - if (idpf_queue_has(HSPLIT_EN, rxq)) { - qi[k].qflags |= - cpu_to_le16(VIRTCHNL2_RXQ_HDR_SPLIT); - qi[k].hdr_buffer_size = - cpu_to_le16(rxq->rx_hbuf_size); - } + if (!splitq) { + num_rxq = rx_qgrp->singleq.num_rxq; + goto rxq; + } -common_qi_fields: - qi[k].queue_id = cpu_to_le32(rxq->q_id); - qi[k].model = cpu_to_le16(vport->rxq_model); - qi[k].type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX); - qi[k].ring_len = cpu_to_le16(rxq->desc_count); - qi[k].dma_ring_addr = cpu_to_le64(rxq->dma); - qi[k].max_pkt_size = cpu_to_le32(rxq->rx_max_pkt_size); - qi[k].data_buffer_size = cpu_to_le32(rxq->rx_buf_size); - qi[k].qflags |= - cpu_to_le16(VIRTCHNL2_RX_DESC_SIZE_32BYTE); - qi[k].desc_ids = cpu_to_le64(rxq->rxdids); + for (u32 j = 0; j < vport->num_bufqs_per_qgrp; j++) { + qs->qs[k].type = VIRTCHNL2_QUEUE_TYPE_RX_BUFFER; + qs->qs[k++].bufq = &rx_qgrp->splitq.bufq_sets[j].bufq; + } + + num_rxq = rx_qgrp->splitq.num_rxq_sets; + +rxq: + for (u32 j = 0; j < num_rxq; j++) { + qs->qs[k].type = VIRTCHNL2_QUEUE_TYPE_RX; + + if (splitq) + qs->qs[k++].rxq = + &rx_qgrp->splitq.rxq_sets[j]->rxq; + else + qs->qs[k++].rxq = rx_qgrp->singleq.rxqs[j]; } } @@ -1791,317 +2048,395 @@ common_qi_fields: if (k != totqs) return -EINVAL; - /* Chunk up the queue contexts into multiple messages to avoid - * sending a control queue message buffer that is too large - */ - config_sz = sizeof(struct virtchnl2_config_rx_queues); - chunk_sz = sizeof(struct virtchnl2_rxq_info); + return idpf_send_config_rx_queue_set_msg(qs); +} - num_chunks = min_t(u32, IDPF_NUM_CHUNKS_PER_MSG(config_sz, chunk_sz), - totqs); - num_msgs = DIV_ROUND_UP(totqs, num_chunks); +/** + * idpf_prepare_ena_dis_qs_msg - prepare message to enable/disable selected + * queues + * @vport: virtual port data structure + * @buf: buffer containing the message + * @pos: pointer to the first chunk describing the queue + * @num_chunks: number of chunks in the message + * + * Helper function for preparing the message describing queues to be enabled + * or disabled. + * + * Return: the total size of the prepared message. + */ +static u32 idpf_prepare_ena_dis_qs_msg(const struct idpf_vport *vport, + void *buf, const void *pos, + u32 num_chunks) +{ + struct virtchnl2_del_ena_dis_queues *eq = buf; + + eq->vport_id = cpu_to_le32(vport->vport_id); + eq->chunks.num_chunks = cpu_to_le16(num_chunks); + memcpy(eq->chunks.chunks, pos, + num_chunks * sizeof(*eq->chunks.chunks)); + + return struct_size(eq, chunks.chunks, num_chunks); +} + +/** + * idpf_send_ena_dis_queue_set_msg - send virtchnl enable or disable queues + * message for selected queues + * @qs: set of the queues to enable or disable + * @en: whether to enable or disable queues + * + * Send enable or disable queues virtchnl message for queues contained + * in the @qs array. + * The @qs array can contain pointers to both Rx and Tx queues. + * + * Return: 0 on success, -errno on failure. + */ +static int idpf_send_ena_dis_queue_set_msg(const struct idpf_queue_set *qs, + bool en) +{ + struct virtchnl2_queue_chunk *qc __free(kfree) = NULL; + struct idpf_chunked_msg_params params = { + .vc_op = en ? VIRTCHNL2_OP_ENABLE_QUEUES : + VIRTCHNL2_OP_DISABLE_QUEUES, + .prepare_msg = idpf_prepare_ena_dis_qs_msg, + .config_sz = sizeof(struct virtchnl2_del_ena_dis_queues), + .chunk_sz = sizeof(*qc), + .num_chunks = qs->num, + }; - buf_sz = struct_size(crq, qinfo, num_chunks); - crq = kzalloc(buf_sz, GFP_KERNEL); - if (!crq) + qc = kcalloc(qs->num, sizeof(*qc), GFP_KERNEL); + if (!qc) return -ENOMEM; - xn_params.vc_op = VIRTCHNL2_OP_CONFIG_RX_QUEUES; - xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + params.chunks = qc; - for (i = 0, k = 0; i < num_msgs; i++) { - memset(crq, 0, buf_sz); - crq->vport_id = cpu_to_le32(vport->vport_id); - crq->num_qinfo = cpu_to_le16(num_chunks); - memcpy(crq->qinfo, &qi[k], chunk_sz * num_chunks); + for (u32 i = 0; i < qs->num; i++) { + const struct idpf_queue_ptr *q = &qs->qs[i]; + u32 qid; - xn_params.send_buf.iov_base = crq; - xn_params.send_buf.iov_len = buf_sz; - reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); - if (reply_sz < 0) - return reply_sz; + qc[i].type = cpu_to_le32(q->type); + qc[i].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); - k += num_chunks; - totqs -= num_chunks; - num_chunks = min(num_chunks, totqs); - /* Recalculate buffer size */ - buf_sz = struct_size(crq, qinfo, num_chunks); + switch (q->type) { + case VIRTCHNL2_QUEUE_TYPE_RX: + qid = q->rxq->q_id; + break; + case VIRTCHNL2_QUEUE_TYPE_TX: + qid = q->txq->q_id; + break; + case VIRTCHNL2_QUEUE_TYPE_RX_BUFFER: + qid = q->bufq->q_id; + break; + case VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION: + qid = q->complq->q_id; + break; + default: + return -EINVAL; + } + + qc[i].start_queue_id = cpu_to_le32(qid); } - return 0; + return idpf_send_chunked_msg(qs->vport, ¶ms); } /** - * idpf_send_ena_dis_queues_msg - Send virtchnl enable or disable - * queues message + * idpf_send_ena_dis_queues_msg - send virtchnl enable or disable queues + * message * @vport: virtual port data structure - * @ena: if true enable, false disable + * @en: whether to enable or disable queues * - * Send enable or disable queues virtchnl message. Returns 0 on success, - * negative on failure. + * Return: 0 on success, -errno on failure. */ -static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, bool ena) +static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, bool en) { - struct virtchnl2_del_ena_dis_queues *eq __free(kfree) = NULL; - struct virtchnl2_queue_chunk *qc __free(kfree) = NULL; - u32 num_msgs, num_chunks, num_txq, num_rxq, num_q; - struct idpf_vc_xn_params xn_params = {}; - struct virtchnl2_queue_chunks *qcs; - u32 config_sz, chunk_sz, buf_sz; - ssize_t reply_sz; - int i, j, k = 0; + struct idpf_queue_set *qs __free(kfree) = NULL; + u32 num_txq, num_q, k = 0; + bool split; num_txq = vport->num_txq + vport->num_complq; - num_rxq = vport->num_rxq + vport->num_bufq; - num_q = num_txq + num_rxq; - buf_sz = sizeof(struct virtchnl2_queue_chunk) * num_q; - qc = kzalloc(buf_sz, GFP_KERNEL); - if (!qc) + num_q = num_txq + vport->num_rxq + vport->num_bufq; + + qs = idpf_alloc_queue_set(vport, num_q); + if (!qs) return -ENOMEM; - for (i = 0; i < vport->num_txq_grp; i++) { - struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + split = idpf_is_queue_model_split(vport->txq_model); - for (j = 0; j < tx_qgrp->num_txq; j++, k++) { - qc[k].type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX); - qc[k].start_queue_id = cpu_to_le32(tx_qgrp->txqs[j]->q_id); - qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); - } - } - if (vport->num_txq != k) - return -EINVAL; + for (u32 i = 0; i < vport->num_txq_grp; i++) { + const struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; - if (!idpf_is_queue_model_split(vport->txq_model)) - goto setup_rx; + for (u32 j = 0; j < tx_qgrp->num_txq; j++) { + qs->qs[k].type = VIRTCHNL2_QUEUE_TYPE_TX; + qs->qs[k++].txq = tx_qgrp->txqs[j]; + } - for (i = 0; i < vport->num_txq_grp; i++, k++) { - struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + if (!split) + continue; - qc[k].type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION); - qc[k].start_queue_id = cpu_to_le32(tx_qgrp->complq->q_id); - qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); + qs->qs[k].type = VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION; + qs->qs[k++].complq = tx_qgrp->complq; } - if (vport->num_complq != (k - vport->num_txq)) + + if (k != num_txq) return -EINVAL; -setup_rx: - for (i = 0; i < vport->num_rxq_grp; i++) { - struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + split = idpf_is_queue_model_split(vport->rxq_model); - if (idpf_is_queue_model_split(vport->rxq_model)) + for (u32 i = 0; i < vport->num_rxq_grp; i++) { + const struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + u32 num_rxq; + + if (split) num_rxq = rx_qgrp->splitq.num_rxq_sets; else num_rxq = rx_qgrp->singleq.num_rxq; - for (j = 0; j < num_rxq; j++, k++) { - if (idpf_is_queue_model_split(vport->rxq_model)) { - qc[k].start_queue_id = - cpu_to_le32(rx_qgrp->splitq.rxq_sets[j]->rxq.q_id); - qc[k].type = - cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX); - } else { - qc[k].start_queue_id = - cpu_to_le32(rx_qgrp->singleq.rxqs[j]->q_id); - qc[k].type = - cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX); - } - qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); - } - } - if (vport->num_rxq != k - (vport->num_txq + vport->num_complq)) - return -EINVAL; - - if (!idpf_is_queue_model_split(vport->rxq_model)) - goto send_msg; + for (u32 j = 0; j < num_rxq; j++) { + qs->qs[k].type = VIRTCHNL2_QUEUE_TYPE_RX; - for (i = 0; i < vport->num_rxq_grp; i++) { - struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + if (split) + qs->qs[k++].rxq = + &rx_qgrp->splitq.rxq_sets[j]->rxq; + else + qs->qs[k++].rxq = rx_qgrp->singleq.rxqs[j]; + } - for (j = 0; j < vport->num_bufqs_per_qgrp; j++, k++) { - const struct idpf_buf_queue *q; + if (!split) + continue; - q = &rx_qgrp->splitq.bufq_sets[j].bufq; - qc[k].type = - cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX_BUFFER); - qc[k].start_queue_id = cpu_to_le32(q->q_id); - qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); + for (u32 j = 0; j < vport->num_bufqs_per_qgrp; j++) { + qs->qs[k].type = VIRTCHNL2_QUEUE_TYPE_RX_BUFFER; + qs->qs[k++].bufq = &rx_qgrp->splitq.bufq_sets[j].bufq; } } - if (vport->num_bufq != k - (vport->num_txq + - vport->num_complq + - vport->num_rxq)) + + if (k != num_q) return -EINVAL; -send_msg: - /* Chunk up the queue info into multiple messages */ - config_sz = sizeof(struct virtchnl2_del_ena_dis_queues); - chunk_sz = sizeof(struct virtchnl2_queue_chunk); + return idpf_send_ena_dis_queue_set_msg(qs, en); +} - num_chunks = min_t(u32, IDPF_NUM_CHUNKS_PER_MSG(config_sz, chunk_sz), - num_q); - num_msgs = DIV_ROUND_UP(num_q, num_chunks); +/** + * idpf_prep_map_unmap_queue_set_vector_msg - prepare message to map or unmap + * queue set to the interrupt vector + * @vport: virtual port data structure + * @buf: buffer containing the message + * @pos: pointer to the first chunk describing the vector mapping + * @num_chunks: number of chunks in the message + * + * Helper function for preparing the message describing mapping queues to + * q_vectors. + * + * Return: the total size of the prepared message. + */ +static u32 +idpf_prep_map_unmap_queue_set_vector_msg(const struct idpf_vport *vport, + void *buf, const void *pos, + u32 num_chunks) +{ + struct virtchnl2_queue_vector_maps *vqvm = buf; - buf_sz = struct_size(eq, chunks.chunks, num_chunks); - eq = kzalloc(buf_sz, GFP_KERNEL); - if (!eq) + vqvm->vport_id = cpu_to_le32(vport->vport_id); + vqvm->num_qv_maps = cpu_to_le16(num_chunks); + memcpy(vqvm->qv_maps, pos, num_chunks * sizeof(*vqvm->qv_maps)); + + return struct_size(vqvm, qv_maps, num_chunks); +} + +/** + * idpf_send_map_unmap_queue_set_vector_msg - send virtchnl map or unmap + * queue set vector message + * @qs: set of the queues to map or unmap + * @map: true for map and false for unmap + * + * Return: 0 on success, -errno on failure. + */ +static int +idpf_send_map_unmap_queue_set_vector_msg(const struct idpf_queue_set *qs, + bool map) +{ + struct virtchnl2_queue_vector *vqv __free(kfree) = NULL; + struct idpf_chunked_msg_params params = { + .vc_op = map ? VIRTCHNL2_OP_MAP_QUEUE_VECTOR : + VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR, + .prepare_msg = idpf_prep_map_unmap_queue_set_vector_msg, + .config_sz = sizeof(struct virtchnl2_queue_vector_maps), + .chunk_sz = sizeof(*vqv), + .num_chunks = qs->num, + }; + bool split; + + vqv = kcalloc(qs->num, sizeof(*vqv), GFP_KERNEL); + if (!vqv) return -ENOMEM; - if (ena) { - xn_params.vc_op = VIRTCHNL2_OP_ENABLE_QUEUES; - xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; - } else { - xn_params.vc_op = VIRTCHNL2_OP_DISABLE_QUEUES; - xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC; - } + params.chunks = vqv; - for (i = 0, k = 0; i < num_msgs; i++) { - memset(eq, 0, buf_sz); - eq->vport_id = cpu_to_le32(vport->vport_id); - eq->chunks.num_chunks = cpu_to_le16(num_chunks); - qcs = &eq->chunks; - memcpy(qcs->chunks, &qc[k], chunk_sz * num_chunks); + split = idpf_is_queue_model_split(qs->vport->txq_model); - xn_params.send_buf.iov_base = eq; - xn_params.send_buf.iov_len = buf_sz; - reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); - if (reply_sz < 0) - return reply_sz; + for (u32 i = 0; i < qs->num; i++) { + const struct idpf_queue_ptr *q = &qs->qs[i]; + const struct idpf_q_vector *vec; + u32 qid, v_idx, itr_idx; + + vqv[i].queue_type = cpu_to_le32(q->type); + + switch (q->type) { + case VIRTCHNL2_QUEUE_TYPE_RX: + qid = q->rxq->q_id; + + if (idpf_queue_has(NOIRQ, q->rxq)) + vec = NULL; + else + vec = q->rxq->q_vector; + + if (vec) { + v_idx = vec->v_idx; + itr_idx = vec->rx_itr_idx; + } else { + v_idx = qs->vport->noirq_v_idx; + itr_idx = VIRTCHNL2_ITR_IDX_0; + } + break; + case VIRTCHNL2_QUEUE_TYPE_TX: + qid = q->txq->q_id; + + if (idpf_queue_has(NOIRQ, q->txq)) + vec = NULL; + else if (idpf_queue_has(XDP, q->txq)) + vec = q->txq->complq->q_vector; + else if (split) + vec = q->txq->txq_grp->complq->q_vector; + else + vec = q->txq->q_vector; + + if (vec) { + v_idx = vec->v_idx; + itr_idx = vec->tx_itr_idx; + } else { + v_idx = qs->vport->noirq_v_idx; + itr_idx = VIRTCHNL2_ITR_IDX_1; + } + break; + default: + return -EINVAL; + } - k += num_chunks; - num_q -= num_chunks; - num_chunks = min(num_chunks, num_q); - /* Recalculate buffer size */ - buf_sz = struct_size(eq, chunks.chunks, num_chunks); + vqv[i].queue_id = cpu_to_le32(qid); + vqv[i].vector_id = cpu_to_le16(v_idx); + vqv[i].itr_idx = cpu_to_le32(itr_idx); } - return 0; + return idpf_send_chunked_msg(qs->vport, ¶ms); } /** - * idpf_send_map_unmap_queue_vector_msg - Send virtchnl map or unmap queue - * vector message + * idpf_send_map_unmap_queue_vector_msg - send virtchnl map or unmap queue + * vector message * @vport: virtual port data structure * @map: true for map and false for unmap * - * Send map or unmap queue vector virtchnl message. Returns 0 on success, - * negative on failure. + * Return: 0 on success, -errno on failure. */ int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map) { - struct virtchnl2_queue_vector_maps *vqvm __free(kfree) = NULL; - struct virtchnl2_queue_vector *vqv __free(kfree) = NULL; - struct idpf_vc_xn_params xn_params = {}; - u32 config_sz, chunk_sz, buf_sz; - u32 num_msgs, num_chunks, num_q; - ssize_t reply_sz; - int i, j, k = 0; + struct idpf_queue_set *qs __free(kfree) = NULL; + u32 num_q = vport->num_txq + vport->num_rxq; + u32 k = 0; - num_q = vport->num_txq + vport->num_rxq; - - buf_sz = sizeof(struct virtchnl2_queue_vector) * num_q; - vqv = kzalloc(buf_sz, GFP_KERNEL); - if (!vqv) + qs = idpf_alloc_queue_set(vport, num_q); + if (!qs) return -ENOMEM; - for (i = 0; i < vport->num_txq_grp; i++) { - struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; - - for (j = 0; j < tx_qgrp->num_txq; j++, k++) { - vqv[k].queue_type = - cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX); - vqv[k].queue_id = cpu_to_le32(tx_qgrp->txqs[j]->q_id); + for (u32 i = 0; i < vport->num_txq_grp; i++) { + const struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; - if (idpf_is_queue_model_split(vport->txq_model)) { - vqv[k].vector_id = - cpu_to_le16(tx_qgrp->complq->q_vector->v_idx); - vqv[k].itr_idx = - cpu_to_le32(tx_qgrp->complq->q_vector->tx_itr_idx); - } else { - vqv[k].vector_id = - cpu_to_le16(tx_qgrp->txqs[j]->q_vector->v_idx); - vqv[k].itr_idx = - cpu_to_le32(tx_qgrp->txqs[j]->q_vector->tx_itr_idx); - } + for (u32 j = 0; j < tx_qgrp->num_txq; j++) { + qs->qs[k].type = VIRTCHNL2_QUEUE_TYPE_TX; + qs->qs[k++].txq = tx_qgrp->txqs[j]; } } - if (vport->num_txq != k) + if (k != vport->num_txq) return -EINVAL; - for (i = 0; i < vport->num_rxq_grp; i++) { - struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; - u16 num_rxq; + for (u32 i = 0; i < vport->num_rxq_grp; i++) { + const struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + u32 num_rxq; if (idpf_is_queue_model_split(vport->rxq_model)) num_rxq = rx_qgrp->splitq.num_rxq_sets; else num_rxq = rx_qgrp->singleq.num_rxq; - for (j = 0; j < num_rxq; j++, k++) { - struct idpf_rx_queue *rxq; + for (u32 j = 0; j < num_rxq; j++) { + qs->qs[k].type = VIRTCHNL2_QUEUE_TYPE_RX; if (idpf_is_queue_model_split(vport->rxq_model)) - rxq = &rx_qgrp->splitq.rxq_sets[j]->rxq; + qs->qs[k++].rxq = + &rx_qgrp->splitq.rxq_sets[j]->rxq; else - rxq = rx_qgrp->singleq.rxqs[j]; - - vqv[k].queue_type = - cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX); - vqv[k].queue_id = cpu_to_le32(rxq->q_id); - vqv[k].vector_id = cpu_to_le16(rxq->q_vector->v_idx); - vqv[k].itr_idx = cpu_to_le32(rxq->q_vector->rx_itr_idx); + qs->qs[k++].rxq = rx_qgrp->singleq.rxqs[j]; } } - if (idpf_is_queue_model_split(vport->txq_model)) { - if (vport->num_rxq != k - vport->num_complq) - return -EINVAL; - } else { - if (vport->num_rxq != k - vport->num_txq) - return -EINVAL; - } + if (k != num_q) + return -EINVAL; - /* Chunk up the vector info into multiple messages */ - config_sz = sizeof(struct virtchnl2_queue_vector_maps); - chunk_sz = sizeof(struct virtchnl2_queue_vector); + return idpf_send_map_unmap_queue_set_vector_msg(qs, map); +} - num_chunks = min_t(u32, IDPF_NUM_CHUNKS_PER_MSG(config_sz, chunk_sz), - num_q); - num_msgs = DIV_ROUND_UP(num_q, num_chunks); +/** + * idpf_send_enable_queue_set_msg - send enable queues virtchnl message for + * selected queues + * @qs: set of the queues + * + * Send enable queues virtchnl message for queues contained in the @qs array. + * + * Return: 0 on success, -errno on failure. + */ +int idpf_send_enable_queue_set_msg(const struct idpf_queue_set *qs) +{ + return idpf_send_ena_dis_queue_set_msg(qs, true); +} - buf_sz = struct_size(vqvm, qv_maps, num_chunks); - vqvm = kzalloc(buf_sz, GFP_KERNEL); - if (!vqvm) - return -ENOMEM; +/** + * idpf_send_disable_queue_set_msg - send disable queues virtchnl message for + * selected queues + * @qs: set of the queues + * + * Return: 0 on success, -errno on failure. + */ +int idpf_send_disable_queue_set_msg(const struct idpf_queue_set *qs) +{ + int err; - if (map) { - xn_params.vc_op = VIRTCHNL2_OP_MAP_QUEUE_VECTOR; - xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; - } else { - xn_params.vc_op = VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR; - xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC; - } + err = idpf_send_ena_dis_queue_set_msg(qs, false); + if (err) + return err; - for (i = 0, k = 0; i < num_msgs; i++) { - memset(vqvm, 0, buf_sz); - xn_params.send_buf.iov_base = vqvm; - xn_params.send_buf.iov_len = buf_sz; - vqvm->vport_id = cpu_to_le32(vport->vport_id); - vqvm->num_qv_maps = cpu_to_le16(num_chunks); - memcpy(vqvm->qv_maps, &vqv[k], chunk_sz * num_chunks); + return idpf_wait_for_marker_event_set(qs); +} - reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); - if (reply_sz < 0) - return reply_sz; +/** + * idpf_send_config_queue_set_msg - send virtchnl config queues message for + * selected queues + * @qs: set of the queues + * + * Send config queues virtchnl message for queues contained in the @qs array. + * The @qs array can contain both Rx or Tx queues. + * + * Return: 0 on success, -errno on failure. + */ +int idpf_send_config_queue_set_msg(const struct idpf_queue_set *qs) +{ + int err; - k += num_chunks; - num_q -= num_chunks; - num_chunks = min(num_chunks, num_q); - /* Recalculate buffer size */ - buf_sz = struct_size(vqvm, qv_maps, num_chunks); - } + err = idpf_send_config_tx_queue_set_msg(qs); + if (err) + return err; - return 0; + return idpf_send_config_rx_queue_set_msg(qs); } /** @@ -2125,24 +2460,12 @@ int idpf_send_enable_queues_msg(struct idpf_vport *vport) */ int idpf_send_disable_queues_msg(struct idpf_vport *vport) { - int err, i; + int err; err = idpf_send_ena_dis_queues_msg(vport, false); if (err) return err; - /* switch to poll mode as interrupts will be disabled after disable - * queues virtchnl message is sent - */ - for (i = 0; i < vport->num_txq; i++) - idpf_queue_set(POLL_MODE, vport->txqs[i]); - - /* schedule the napi to receive all the marker packets */ - local_bh_disable(); - for (i = 0; i < vport->num_q_vectors; i++) - napi_schedule(&vport->q_vectors[i].napi); - local_bh_enable(); - return idpf_wait_for_marker_event(vport); } @@ -2207,7 +2530,7 @@ int idpf_send_delete_queues_msg(struct idpf_vport *vport) num_chunks); xn_params.vc_op = VIRTCHNL2_OP_DEL_QUEUES; - xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; xn_params.send_buf.iov_base = eq; xn_params.send_buf.iov_len = buf_size; reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); @@ -2371,7 +2694,7 @@ int idpf_send_dealloc_vectors_msg(struct idpf_adapter *adapter) xn_params.vc_op = VIRTCHNL2_OP_DEALLOC_VECTORS; xn_params.send_buf.iov_base = vcs; xn_params.send_buf.iov_len = buf_size; - xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; reply_sz = idpf_vc_xn_exec(adapter, &xn_params); if (reply_sz < 0) return reply_sz; @@ -3285,9 +3608,17 @@ int idpf_vport_alloc_vec_indexes(struct idpf_vport *vport) { struct idpf_vector_info vec_info; int num_alloc_vecs; + u32 req; vec_info.num_curr_vecs = vport->num_q_vectors; - vec_info.num_req_vecs = max(vport->num_txq, vport->num_rxq); + if (vec_info.num_curr_vecs) + vec_info.num_curr_vecs += IDPF_RESERVED_VECS; + + /* XDPSQs are all bound to the NOIRQ vector from IDPF_RESERVED_VECS */ + req = max(vport->num_txq - vport->num_xdp_txq, vport->num_rxq) + + IDPF_RESERVED_VECS; + vec_info.num_req_vecs = req; + vec_info.default_vport = vport->default_vport; vec_info.index = vport->idx; @@ -3300,7 +3631,7 @@ int idpf_vport_alloc_vec_indexes(struct idpf_vport *vport) return -EINVAL; } - vport->num_q_vectors = num_alloc_vecs; + vport->num_q_vectors = num_alloc_vecs - IDPF_RESERVED_VECS; return 0; } @@ -3765,6 +4096,16 @@ u32 idpf_get_vport_id(struct idpf_vport *vport) return le32_to_cpu(vport_msg->vport_id); } +static void idpf_set_mac_type(struct idpf_vport *vport, + struct virtchnl2_mac_addr *mac_addr) +{ + bool is_primary; + + is_primary = ether_addr_equal(vport->default_mac_addr, mac_addr->addr); + mac_addr->type = is_primary ? VIRTCHNL2_MAC_ADDR_PRIMARY : + VIRTCHNL2_MAC_ADDR_EXTRA; +} + /** * idpf_mac_filter_async_handler - Async callback for mac filters * @adapter: private data struct @@ -3894,6 +4235,7 @@ int idpf_add_del_mac_filters(struct idpf_vport *vport, list) { if (add && f->add) { ether_addr_copy(mac_addr[i].addr, f->macaddr); + idpf_set_mac_type(vport, &mac_addr[i]); i++; f->add = false; if (i == total_filters) @@ -3901,6 +4243,7 @@ int idpf_add_del_mac_filters(struct idpf_vport *vport, } if (!add && f->remove) { ether_addr_copy(mac_addr[i].addr, f->macaddr); + idpf_set_mac_type(vport, &mac_addr[i]); i++; f->remove = false; if (i == total_filters) diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h index 86f30f0db07a..eac3d15daa42 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h @@ -4,7 +4,8 @@ #ifndef _IDPF_VIRTCHNL_H_ #define _IDPF_VIRTCHNL_H_ -#define IDPF_VC_XN_MIN_TIMEOUT_MSEC 2000 +#include "virtchnl2.h" + #define IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC (60 * 1000) #define IDPF_VC_XN_IDX_M GENMASK(7, 0) #define IDPF_VC_XN_SALT_M GENMASK(15, 8) @@ -115,6 +116,33 @@ int idpf_recv_mb_msg(struct idpf_adapter *adapter); int idpf_send_mb_msg(struct idpf_adapter *adapter, u32 op, u16 msg_size, u8 *msg, u16 cookie); +struct idpf_queue_ptr { + enum virtchnl2_queue_type type; + union { + struct idpf_rx_queue *rxq; + struct idpf_tx_queue *txq; + struct idpf_buf_queue *bufq; + struct idpf_compl_queue *complq; + }; +}; + +struct idpf_queue_set { + struct idpf_vport *vport; + + u32 num; + struct idpf_queue_ptr qs[] __counted_by(num); +}; + +struct idpf_queue_set *idpf_alloc_queue_set(struct idpf_vport *vport, u32 num); + +int idpf_send_enable_queue_set_msg(const struct idpf_queue_set *qs); +int idpf_send_disable_queue_set_msg(const struct idpf_queue_set *qs); +int idpf_send_config_queue_set_msg(const struct idpf_queue_set *qs); + +int idpf_send_disable_queues_msg(struct idpf_vport *vport); +int idpf_send_config_queues_msg(struct idpf_vport *vport); +int idpf_send_enable_queues_msg(struct idpf_vport *vport); + void idpf_vport_init(struct idpf_vport *vport, struct idpf_vport_max_q *max_q); u32 idpf_get_vport_id(struct idpf_vport *vport); int idpf_send_create_vport_msg(struct idpf_adapter *adapter, @@ -131,9 +159,6 @@ void idpf_vport_dealloc_max_qs(struct idpf_adapter *adapter, int idpf_send_add_queues_msg(const struct idpf_vport *vport, u16 num_tx_q, u16 num_complq, u16 num_rx_q, u16 num_rx_bufq); int idpf_send_delete_queues_msg(struct idpf_vport *vport); -int idpf_send_enable_queues_msg(struct idpf_vport *vport); -int idpf_send_disable_queues_msg(struct idpf_vport *vport); -int idpf_send_config_queues_msg(struct idpf_vport *vport); int idpf_vport_alloc_vec_indexes(struct idpf_vport *vport); int idpf_get_vec_ids(struct idpf_adapter *adapter, diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl_ptp.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl_ptp.c index 4f1fb0cefe51..8a2e0f8c5e36 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl_ptp.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl_ptp.c @@ -521,6 +521,10 @@ idpf_ptp_get_tstamp_value(struct idpf_vport *vport, list_add(&ptp_tx_tstamp->list_member, &tx_tstamp_caps->latches_free); + u64_stats_update_begin(&vport->tstamp_stats.stats_sync); + u64_stats_inc(&vport->tstamp_stats.packets); + u64_stats_update_end(&vport->tstamp_stats.stats_sync); + return 0; } diff --git a/drivers/net/ethernet/intel/idpf/xdp.c b/drivers/net/ethernet/intel/idpf/xdp.c new file mode 100644 index 000000000000..21ce25b0567f --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/xdp.c @@ -0,0 +1,486 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2025 Intel Corporation */ + +#include "idpf.h" +#include "idpf_virtchnl.h" +#include "xdp.h" +#include "xsk.h" + +static int idpf_rxq_for_each(const struct idpf_vport *vport, + int (*fn)(struct idpf_rx_queue *rxq, void *arg), + void *arg) +{ + bool splitq = idpf_is_queue_model_split(vport->rxq_model); + + if (!vport->rxq_grps) + return -ENETDOWN; + + for (u32 i = 0; i < vport->num_rxq_grp; i++) { + const struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + u32 num_rxq; + + if (splitq) + num_rxq = rx_qgrp->splitq.num_rxq_sets; + else + num_rxq = rx_qgrp->singleq.num_rxq; + + for (u32 j = 0; j < num_rxq; j++) { + struct idpf_rx_queue *q; + int err; + + if (splitq) + q = &rx_qgrp->splitq.rxq_sets[j]->rxq; + else + q = rx_qgrp->singleq.rxqs[j]; + + err = fn(q, arg); + if (err) + return err; + } + } + + return 0; +} + +static int __idpf_xdp_rxq_info_init(struct idpf_rx_queue *rxq, void *arg) +{ + const struct idpf_vport *vport = rxq->q_vector->vport; + bool split = idpf_is_queue_model_split(vport->rxq_model); + int err; + + err = __xdp_rxq_info_reg(&rxq->xdp_rxq, vport->netdev, rxq->idx, + rxq->q_vector->napi.napi_id, + rxq->rx_buf_size); + if (err) + return err; + + if (idpf_queue_has(XSK, rxq)) { + err = xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq, + MEM_TYPE_XSK_BUFF_POOL, + rxq->pool); + if (err) + goto unreg; + } else { + const struct page_pool *pp; + + pp = split ? rxq->bufq_sets[0].bufq.pp : rxq->pp; + xdp_rxq_info_attach_page_pool(&rxq->xdp_rxq, pp); + } + + if (!split) + return 0; + + rxq->xdpsqs = &vport->txqs[vport->xdp_txq_offset]; + rxq->num_xdp_txq = vport->num_xdp_txq; + + return 0; + +unreg: + xdp_rxq_info_unreg(&rxq->xdp_rxq); + + return err; +} + +int idpf_xdp_rxq_info_init(struct idpf_rx_queue *rxq) +{ + return __idpf_xdp_rxq_info_init(rxq, NULL); +} + +int idpf_xdp_rxq_info_init_all(const struct idpf_vport *vport) +{ + return idpf_rxq_for_each(vport, __idpf_xdp_rxq_info_init, NULL); +} + +static int __idpf_xdp_rxq_info_deinit(struct idpf_rx_queue *rxq, void *arg) +{ + if (idpf_is_queue_model_split((size_t)arg)) { + rxq->xdpsqs = NULL; + rxq->num_xdp_txq = 0; + } + + if (!idpf_queue_has(XSK, rxq)) + xdp_rxq_info_detach_mem_model(&rxq->xdp_rxq); + + xdp_rxq_info_unreg(&rxq->xdp_rxq); + + return 0; +} + +void idpf_xdp_rxq_info_deinit(struct idpf_rx_queue *rxq, u32 model) +{ + __idpf_xdp_rxq_info_deinit(rxq, (void *)(size_t)model); +} + +void idpf_xdp_rxq_info_deinit_all(const struct idpf_vport *vport) +{ + idpf_rxq_for_each(vport, __idpf_xdp_rxq_info_deinit, + (void *)(size_t)vport->rxq_model); +} + +static int idpf_xdp_rxq_assign_prog(struct idpf_rx_queue *rxq, void *arg) +{ + struct bpf_prog *prog = arg; + struct bpf_prog *old; + + if (prog) + bpf_prog_inc(prog); + + old = rcu_replace_pointer(rxq->xdp_prog, prog, lockdep_rtnl_is_held()); + if (old) + bpf_prog_put(old); + + return 0; +} + +void idpf_xdp_copy_prog_to_rqs(const struct idpf_vport *vport, + struct bpf_prog *xdp_prog) +{ + idpf_rxq_for_each(vport, idpf_xdp_rxq_assign_prog, xdp_prog); +} + +static void idpf_xdp_tx_timer(struct work_struct *work); + +int idpf_xdpsqs_get(const struct idpf_vport *vport) +{ + struct libeth_xdpsq_timer **timers __free(kvfree) = NULL; + struct net_device *dev; + u32 sqs; + + if (!idpf_xdp_enabled(vport)) + return 0; + + timers = kvcalloc(vport->num_xdp_txq, sizeof(*timers), GFP_KERNEL); + if (!timers) + return -ENOMEM; + + for (u32 i = 0; i < vport->num_xdp_txq; i++) { + timers[i] = kzalloc_node(sizeof(*timers[i]), GFP_KERNEL, + cpu_to_mem(i)); + if (!timers[i]) { + for (int j = i - 1; j >= 0; j--) + kfree(timers[j]); + + return -ENOMEM; + } + } + + dev = vport->netdev; + sqs = vport->xdp_txq_offset; + + for (u32 i = sqs; i < vport->num_txq; i++) { + struct idpf_tx_queue *xdpsq = vport->txqs[i]; + + xdpsq->complq = xdpsq->txq_grp->complq; + kfree(xdpsq->refillq); + xdpsq->refillq = NULL; + + idpf_queue_clear(FLOW_SCH_EN, xdpsq); + idpf_queue_clear(FLOW_SCH_EN, xdpsq->complq); + idpf_queue_set(NOIRQ, xdpsq); + idpf_queue_set(XDP, xdpsq); + idpf_queue_set(XDP, xdpsq->complq); + + xdpsq->timer = timers[i - sqs]; + libeth_xdpsq_get(&xdpsq->xdp_lock, dev, vport->xdpsq_share); + libeth_xdpsq_init_timer(xdpsq->timer, xdpsq, &xdpsq->xdp_lock, + idpf_xdp_tx_timer); + + xdpsq->pending = 0; + xdpsq->xdp_tx = 0; + xdpsq->thresh = libeth_xdp_queue_threshold(xdpsq->desc_count); + } + + return 0; +} + +void idpf_xdpsqs_put(const struct idpf_vport *vport) +{ + struct net_device *dev; + u32 sqs; + + if (!idpf_xdp_enabled(vport)) + return; + + dev = vport->netdev; + sqs = vport->xdp_txq_offset; + + for (u32 i = sqs; i < vport->num_txq; i++) { + struct idpf_tx_queue *xdpsq = vport->txqs[i]; + + if (!idpf_queue_has_clear(XDP, xdpsq)) + continue; + + libeth_xdpsq_deinit_timer(xdpsq->timer); + libeth_xdpsq_put(&xdpsq->xdp_lock, dev); + + kfree(xdpsq->timer); + xdpsq->refillq = NULL; + idpf_queue_clear(NOIRQ, xdpsq); + } +} + +static int idpf_xdp_parse_cqe(const struct idpf_splitq_4b_tx_compl_desc *desc, + bool gen) +{ + u32 val; + +#ifdef __LIBETH_WORD_ACCESS + val = *(const u32 *)desc; +#else + val = ((u32)le16_to_cpu(desc->q_head_compl_tag.q_head) << 16) | + le16_to_cpu(desc->qid_comptype_gen); +#endif + if (!!(val & IDPF_TXD_COMPLQ_GEN_M) != gen) + return -ENODATA; + + if (unlikely((val & GENMASK(IDPF_TXD_COMPLQ_GEN_S - 1, 0)) != + FIELD_PREP(IDPF_TXD_COMPLQ_COMPL_TYPE_M, + IDPF_TXD_COMPLT_RS))) + return -EINVAL; + + return upper_16_bits(val); +} + +u32 idpf_xdpsq_poll(struct idpf_tx_queue *xdpsq, u32 budget) +{ + struct idpf_compl_queue *cq = xdpsq->complq; + u32 tx_ntc = xdpsq->next_to_clean; + u32 tx_cnt = xdpsq->desc_count; + u32 ntc = cq->next_to_clean; + u32 cnt = cq->desc_count; + u32 done_frames; + bool gen; + + gen = idpf_queue_has(GEN_CHK, cq); + + for (done_frames = 0; done_frames < budget; ) { + int ret; + + ret = idpf_xdp_parse_cqe(&cq->comp_4b[ntc], gen); + if (ret >= 0) { + done_frames = ret > tx_ntc ? ret - tx_ntc : + ret + tx_cnt - tx_ntc; + goto next; + } + + switch (ret) { + case -ENODATA: + goto out; + case -EINVAL: + break; + } + +next: + if (unlikely(++ntc == cnt)) { + ntc = 0; + gen = !gen; + idpf_queue_change(GEN_CHK, cq); + } + } + +out: + cq->next_to_clean = ntc; + + return done_frames; +} + +static u32 idpf_xdpsq_complete(void *_xdpsq, u32 budget) +{ + struct libeth_xdpsq_napi_stats ss = { }; + struct idpf_tx_queue *xdpsq = _xdpsq; + u32 tx_ntc = xdpsq->next_to_clean; + u32 tx_cnt = xdpsq->desc_count; + struct xdp_frame_bulk bq; + struct libeth_cq_pp cp = { + .dev = xdpsq->dev, + .bq = &bq, + .xss = &ss, + .napi = true, + }; + u32 done_frames; + + done_frames = idpf_xdpsq_poll(xdpsq, budget); + if (unlikely(!done_frames)) + return 0; + + xdp_frame_bulk_init(&bq); + + for (u32 i = 0; likely(i < done_frames); i++) { + libeth_xdp_complete_tx(&xdpsq->tx_buf[tx_ntc], &cp); + + if (unlikely(++tx_ntc == tx_cnt)) + tx_ntc = 0; + } + + xdp_flush_frame_bulk(&bq); + + xdpsq->next_to_clean = tx_ntc; + xdpsq->pending -= done_frames; + xdpsq->xdp_tx -= cp.xdp_tx; + + return done_frames; +} + +static u32 idpf_xdp_tx_prep(void *_xdpsq, struct libeth_xdpsq *sq) +{ + struct idpf_tx_queue *xdpsq = _xdpsq; + u32 free; + + libeth_xdpsq_lock(&xdpsq->xdp_lock); + + free = xdpsq->desc_count - xdpsq->pending; + if (free < xdpsq->thresh) + free += idpf_xdpsq_complete(xdpsq, xdpsq->thresh); + + *sq = (struct libeth_xdpsq){ + .sqes = xdpsq->tx_buf, + .descs = xdpsq->desc_ring, + .count = xdpsq->desc_count, + .lock = &xdpsq->xdp_lock, + .ntu = &xdpsq->next_to_use, + .pending = &xdpsq->pending, + .xdp_tx = &xdpsq->xdp_tx, + }; + + return free; +} + +LIBETH_XDP_DEFINE_START(); +LIBETH_XDP_DEFINE_TIMER(static idpf_xdp_tx_timer, idpf_xdpsq_complete); +LIBETH_XDP_DEFINE_FLUSH_TX(idpf_xdp_tx_flush_bulk, idpf_xdp_tx_prep, + idpf_xdp_tx_xmit); +LIBETH_XDP_DEFINE_FLUSH_XMIT(static idpf_xdp_xmit_flush_bulk, idpf_xdp_tx_prep, + idpf_xdp_tx_xmit); +LIBETH_XDP_DEFINE_END(); + +int idpf_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags) +{ + const struct idpf_netdev_priv *np = netdev_priv(dev); + const struct idpf_vport *vport = np->vport; + + if (unlikely(!netif_carrier_ok(dev) || !vport->link_up)) + return -ENETDOWN; + + return libeth_xdp_xmit_do_bulk(dev, n, frames, flags, + &vport->txqs[vport->xdp_txq_offset], + vport->num_xdp_txq, + idpf_xdp_xmit_flush_bulk, + idpf_xdp_tx_finalize); +} + +static int idpf_xdpmo_rx_hash(const struct xdp_md *ctx, u32 *hash, + enum xdp_rss_hash_type *rss_type) +{ + const struct libeth_xdp_buff *xdp = (typeof(xdp))ctx; + struct idpf_xdp_rx_desc desc __uninitialized; + const struct idpf_rx_queue *rxq; + struct libeth_rx_pt pt; + + rxq = libeth_xdp_buff_to_rq(xdp, typeof(*rxq), xdp_rxq); + + idpf_xdp_get_qw0(&desc, xdp->desc); + + pt = rxq->rx_ptype_lkup[idpf_xdp_rx_pt(&desc)]; + if (!libeth_rx_pt_has_hash(rxq->xdp_rxq.dev, pt)) + return -ENODATA; + + idpf_xdp_get_qw2(&desc, xdp->desc); + + return libeth_xdpmo_rx_hash(hash, rss_type, idpf_xdp_rx_hash(&desc), + pt); +} + +static const struct xdp_metadata_ops idpf_xdpmo = { + .xmo_rx_hash = idpf_xdpmo_rx_hash, +}; + +void idpf_xdp_set_features(const struct idpf_vport *vport) +{ + if (!idpf_is_queue_model_split(vport->rxq_model)) + return; + + libeth_xdp_set_features_noredir(vport->netdev, &idpf_xdpmo, + idpf_get_max_tx_bufs(vport->adapter), + libeth_xsktmo); +} + +static int idpf_xdp_setup_prog(struct idpf_vport *vport, + const struct netdev_bpf *xdp) +{ + const struct idpf_netdev_priv *np = netdev_priv(vport->netdev); + struct bpf_prog *old, *prog = xdp->prog; + struct idpf_vport_config *cfg; + int ret; + + cfg = vport->adapter->vport_config[vport->idx]; + + if (test_bit(IDPF_REMOVE_IN_PROG, vport->adapter->flags) || + !test_bit(IDPF_VPORT_REG_NETDEV, cfg->flags) || + !!vport->xdp_prog == !!prog) { + if (np->state == __IDPF_VPORT_UP) + idpf_xdp_copy_prog_to_rqs(vport, prog); + + old = xchg(&vport->xdp_prog, prog); + if (old) + bpf_prog_put(old); + + cfg->user_config.xdp_prog = prog; + + return 0; + } + + if (!vport->num_xdp_txq && vport->num_txq == cfg->max_q.max_txq) { + NL_SET_ERR_MSG_MOD(xdp->extack, + "No Tx queues available for XDP, please decrease the number of regular SQs"); + return -ENOSPC; + } + + old = cfg->user_config.xdp_prog; + cfg->user_config.xdp_prog = prog; + + ret = idpf_initiate_soft_reset(vport, IDPF_SR_Q_CHANGE); + if (ret) { + NL_SET_ERR_MSG_MOD(xdp->extack, + "Could not reopen the vport after XDP setup"); + + cfg->user_config.xdp_prog = old; + old = prog; + } + + if (old) + bpf_prog_put(old); + + libeth_xdp_set_redirect(vport->netdev, vport->xdp_prog); + + return ret; +} + +int idpf_xdp(struct net_device *dev, struct netdev_bpf *xdp) +{ + struct idpf_vport *vport; + int ret; + + idpf_vport_ctrl_lock(dev); + vport = idpf_netdev_to_vport(dev); + + if (!idpf_is_queue_model_split(vport->txq_model)) + goto notsupp; + + switch (xdp->command) { + case XDP_SETUP_PROG: + ret = idpf_xdp_setup_prog(vport, xdp); + break; + case XDP_SETUP_XSK_POOL: + ret = idpf_xsk_pool_setup(vport, xdp); + break; + default: +notsupp: + ret = -EOPNOTSUPP; + break; + } + + idpf_vport_ctrl_unlock(dev); + + return ret; +} diff --git a/drivers/net/ethernet/intel/idpf/xdp.h b/drivers/net/ethernet/intel/idpf/xdp.h new file mode 100644 index 000000000000..479f5ef3c604 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/xdp.h @@ -0,0 +1,175 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2025 Intel Corporation */ + +#ifndef _IDPF_XDP_H_ +#define _IDPF_XDP_H_ + +#include <net/libeth/xdp.h> + +#include "idpf_txrx.h" + +int idpf_xdp_rxq_info_init(struct idpf_rx_queue *rxq); +int idpf_xdp_rxq_info_init_all(const struct idpf_vport *vport); +void idpf_xdp_rxq_info_deinit(struct idpf_rx_queue *rxq, u32 model); +void idpf_xdp_rxq_info_deinit_all(const struct idpf_vport *vport); +void idpf_xdp_copy_prog_to_rqs(const struct idpf_vport *vport, + struct bpf_prog *xdp_prog); + +int idpf_xdpsqs_get(const struct idpf_vport *vport); +void idpf_xdpsqs_put(const struct idpf_vport *vport); + +u32 idpf_xdpsq_poll(struct idpf_tx_queue *xdpsq, u32 budget); +bool idpf_xdp_tx_flush_bulk(struct libeth_xdp_tx_bulk *bq, u32 flags); + +/** + * idpf_xdp_tx_xmit - produce a single HW Tx descriptor out of XDP desc + * @desc: XDP descriptor to pull the DMA address and length from + * @i: descriptor index on the queue to fill + * @sq: XDP queue to produce the HW Tx descriptor on + * @priv: &xsk_tx_metadata_ops on XSk xmit or %NULL + */ +static inline void idpf_xdp_tx_xmit(struct libeth_xdp_tx_desc desc, u32 i, + const struct libeth_xdpsq *sq, u64 priv) +{ + struct idpf_flex_tx_desc *tx_desc = sq->descs; + u32 cmd; + + cmd = FIELD_PREP(IDPF_FLEX_TXD_QW1_DTYPE_M, + IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2); + if (desc.flags & LIBETH_XDP_TX_LAST) + cmd |= FIELD_PREP(IDPF_FLEX_TXD_QW1_CMD_M, + IDPF_TX_DESC_CMD_EOP); + if (priv && (desc.flags & LIBETH_XDP_TX_CSUM)) + cmd |= FIELD_PREP(IDPF_FLEX_TXD_QW1_CMD_M, + IDPF_TX_FLEX_DESC_CMD_CS_EN); + + tx_desc = &tx_desc[i]; + tx_desc->buf_addr = cpu_to_le64(desc.addr); +#ifdef __LIBETH_WORD_ACCESS + *(u64 *)&tx_desc->qw1 = ((u64)desc.len << 48) | cmd; +#else + tx_desc->qw1.buf_size = cpu_to_le16(desc.len); + tx_desc->qw1.cmd_dtype = cpu_to_le16(cmd); +#endif +} + +static inline void idpf_xdpsq_set_rs(const struct idpf_tx_queue *xdpsq) +{ + u32 ntu, cmd; + + ntu = xdpsq->next_to_use; + if (unlikely(!ntu)) + ntu = xdpsq->desc_count; + + cmd = FIELD_PREP(IDPF_FLEX_TXD_QW1_CMD_M, IDPF_TX_DESC_CMD_RS); +#ifdef __LIBETH_WORD_ACCESS + *(u64 *)&xdpsq->flex_tx[ntu - 1].q.qw1 |= cmd; +#else + xdpsq->flex_tx[ntu - 1].q.qw1.cmd_dtype |= cpu_to_le16(cmd); +#endif +} + +static inline void idpf_xdpsq_update_tail(const struct idpf_tx_queue *xdpsq) +{ + dma_wmb(); + writel_relaxed(xdpsq->next_to_use, xdpsq->tail); +} + +/** + * idpf_xdp_tx_finalize - finalize sending over XDPSQ + * @_xdpsq: XDP Tx queue + * @sent: whether any frames were sent + * @flush: whether to update RS bit and the tail register + * + * Set the RS bit ("end of batch"), bump the tail, and queue the cleanup timer. + * To be called after a NAPI polling loop, at the end of .ndo_xdp_xmit() etc. + */ +static inline void idpf_xdp_tx_finalize(void *_xdpsq, bool sent, bool flush) +{ + struct idpf_tx_queue *xdpsq = _xdpsq; + + if ((!flush || unlikely(!sent)) && + likely(xdpsq->desc_count - 1 != xdpsq->pending)) + return; + + libeth_xdpsq_lock(&xdpsq->xdp_lock); + + idpf_xdpsq_set_rs(xdpsq); + idpf_xdpsq_update_tail(xdpsq); + + libeth_xdpsq_queue_timer(xdpsq->timer); + + libeth_xdpsq_unlock(&xdpsq->xdp_lock); +} + +struct idpf_xdp_rx_desc { + aligned_u64 qw0; +#define IDPF_XDP_RX_BUFQ BIT_ULL(47) +#define IDPF_XDP_RX_GEN BIT_ULL(46) +#define IDPF_XDP_RX_LEN GENMASK_ULL(45, 32) +#define IDPF_XDP_RX_PT GENMASK_ULL(25, 16) + + aligned_u64 qw1; +#define IDPF_XDP_RX_BUF GENMASK_ULL(47, 32) +#define IDPF_XDP_RX_EOP BIT_ULL(1) + + aligned_u64 qw2; +#define IDPF_XDP_RX_HASH GENMASK_ULL(31, 0) + + aligned_u64 qw3; +} __aligned(4 * sizeof(u64)); +static_assert(sizeof(struct idpf_xdp_rx_desc) == + sizeof(struct virtchnl2_rx_flex_desc_adv_nic_3)); + +#define idpf_xdp_rx_bufq(desc) !!((desc)->qw0 & IDPF_XDP_RX_BUFQ) +#define idpf_xdp_rx_gen(desc) !!((desc)->qw0 & IDPF_XDP_RX_GEN) +#define idpf_xdp_rx_len(desc) FIELD_GET(IDPF_XDP_RX_LEN, (desc)->qw0) +#define idpf_xdp_rx_pt(desc) FIELD_GET(IDPF_XDP_RX_PT, (desc)->qw0) +#define idpf_xdp_rx_buf(desc) FIELD_GET(IDPF_XDP_RX_BUF, (desc)->qw1) +#define idpf_xdp_rx_eop(desc) !!((desc)->qw1 & IDPF_XDP_RX_EOP) +#define idpf_xdp_rx_hash(desc) FIELD_GET(IDPF_XDP_RX_HASH, (desc)->qw2) + +static inline void +idpf_xdp_get_qw0(struct idpf_xdp_rx_desc *desc, + const struct virtchnl2_rx_flex_desc_adv_nic_3 *rxd) +{ +#ifdef __LIBETH_WORD_ACCESS + desc->qw0 = ((const typeof(desc))rxd)->qw0; +#else + desc->qw0 = ((u64)le16_to_cpu(rxd->pktlen_gen_bufq_id) << 32) | + ((u64)le16_to_cpu(rxd->ptype_err_fflags0) << 16); +#endif +} + +static inline void +idpf_xdp_get_qw1(struct idpf_xdp_rx_desc *desc, + const struct virtchnl2_rx_flex_desc_adv_nic_3 *rxd) +{ +#ifdef __LIBETH_WORD_ACCESS + desc->qw1 = ((const typeof(desc))rxd)->qw1; +#else + desc->qw1 = ((u64)le16_to_cpu(rxd->buf_id) << 32) | + rxd->status_err0_qw1; +#endif +} + +static inline void +idpf_xdp_get_qw2(struct idpf_xdp_rx_desc *desc, + const struct virtchnl2_rx_flex_desc_adv_nic_3 *rxd) +{ +#ifdef __LIBETH_WORD_ACCESS + desc->qw2 = ((const typeof(desc))rxd)->qw2; +#else + desc->qw2 = ((u64)rxd->hash3 << 24) | + ((u64)rxd->ff2_mirrid_hash2.hash2 << 16) | + le16_to_cpu(rxd->hash1); +#endif +} + +void idpf_xdp_set_features(const struct idpf_vport *vport); + +int idpf_xdp(struct net_device *dev, struct netdev_bpf *xdp); +int idpf_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags); + +#endif /* _IDPF_XDP_H_ */ diff --git a/drivers/net/ethernet/intel/idpf/xsk.c b/drivers/net/ethernet/intel/idpf/xsk.c new file mode 100644 index 000000000000..fd2cc43ab43c --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/xsk.c @@ -0,0 +1,633 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2025 Intel Corporation */ + +#include <net/libeth/xsk.h> + +#include "idpf.h" +#include "xdp.h" +#include "xsk.h" + +static void idpf_xsk_tx_timer(struct work_struct *work); + +static void idpf_xsk_setup_rxq(const struct idpf_vport *vport, + struct idpf_rx_queue *rxq) +{ + struct xsk_buff_pool *pool; + + pool = xsk_get_pool_from_qid(vport->netdev, rxq->idx); + if (!pool || !pool->dev || !xsk_buff_can_alloc(pool, 1)) + return; + + rxq->pool = pool; + + idpf_queue_set(XSK, rxq); +} + +static void idpf_xsk_setup_bufq(const struct idpf_vport *vport, + struct idpf_buf_queue *bufq) +{ + struct xsk_buff_pool *pool; + u32 qid = U32_MAX; + + for (u32 i = 0; i < vport->num_rxq_grp; i++) { + const struct idpf_rxq_group *grp = &vport->rxq_grps[i]; + + for (u32 j = 0; j < vport->num_bufqs_per_qgrp; j++) { + if (&grp->splitq.bufq_sets[j].bufq == bufq) { + qid = grp->splitq.rxq_sets[0]->rxq.idx; + goto setup; + } + } + } + +setup: + pool = xsk_get_pool_from_qid(vport->netdev, qid); + if (!pool || !pool->dev || !xsk_buff_can_alloc(pool, 1)) + return; + + bufq->pool = pool; + + idpf_queue_set(XSK, bufq); +} + +static void idpf_xsk_setup_txq(const struct idpf_vport *vport, + struct idpf_tx_queue *txq) +{ + struct xsk_buff_pool *pool; + u32 qid; + + idpf_queue_clear(XSK, txq); + + if (!idpf_queue_has(XDP, txq)) + return; + + qid = txq->idx - vport->xdp_txq_offset; + + pool = xsk_get_pool_from_qid(vport->netdev, qid); + if (!pool || !pool->dev) + return; + + txq->pool = pool; + libeth_xdpsq_init_timer(txq->timer, txq, &txq->xdp_lock, + idpf_xsk_tx_timer); + + idpf_queue_assign(NOIRQ, txq, xsk_uses_need_wakeup(pool)); + idpf_queue_set(XSK, txq); +} + +static void idpf_xsk_setup_complq(const struct idpf_vport *vport, + struct idpf_compl_queue *complq) +{ + const struct xsk_buff_pool *pool; + u32 qid; + + idpf_queue_clear(XSK, complq); + + if (!idpf_queue_has(XDP, complq)) + return; + + qid = complq->txq_grp->txqs[0]->idx - vport->xdp_txq_offset; + + pool = xsk_get_pool_from_qid(vport->netdev, qid); + if (!pool || !pool->dev) + return; + + idpf_queue_set(XSK, complq); +} + +void idpf_xsk_setup_queue(const struct idpf_vport *vport, void *q, + enum virtchnl2_queue_type type) +{ + if (!idpf_xdp_enabled(vport)) + return; + + switch (type) { + case VIRTCHNL2_QUEUE_TYPE_RX: + idpf_xsk_setup_rxq(vport, q); + break; + case VIRTCHNL2_QUEUE_TYPE_RX_BUFFER: + idpf_xsk_setup_bufq(vport, q); + break; + case VIRTCHNL2_QUEUE_TYPE_TX: + idpf_xsk_setup_txq(vport, q); + break; + case VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION: + idpf_xsk_setup_complq(vport, q); + break; + default: + break; + } +} + +void idpf_xsk_clear_queue(void *q, enum virtchnl2_queue_type type) +{ + struct idpf_compl_queue *complq; + struct idpf_buf_queue *bufq; + struct idpf_rx_queue *rxq; + struct idpf_tx_queue *txq; + + switch (type) { + case VIRTCHNL2_QUEUE_TYPE_RX: + rxq = q; + if (!idpf_queue_has_clear(XSK, rxq)) + return; + + rxq->pool = NULL; + break; + case VIRTCHNL2_QUEUE_TYPE_RX_BUFFER: + bufq = q; + if (!idpf_queue_has_clear(XSK, bufq)) + return; + + bufq->pool = NULL; + break; + case VIRTCHNL2_QUEUE_TYPE_TX: + txq = q; + if (!idpf_queue_has_clear(XSK, txq)) + return; + + idpf_queue_set(NOIRQ, txq); + txq->dev = txq->netdev->dev.parent; + break; + case VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION: + complq = q; + idpf_queue_clear(XSK, complq); + break; + default: + break; + } +} + +void idpf_xsk_init_wakeup(struct idpf_q_vector *qv) +{ + libeth_xsk_init_wakeup(&qv->csd, &qv->napi); +} + +void idpf_xsksq_clean(struct idpf_tx_queue *xdpsq) +{ + struct libeth_xdpsq_napi_stats ss = { }; + u32 ntc = xdpsq->next_to_clean; + struct xdp_frame_bulk bq; + struct libeth_cq_pp cp = { + .dev = xdpsq->pool->dev, + .bq = &bq, + .xss = &ss, + }; + u32 xsk_frames = 0; + + xdp_frame_bulk_init(&bq); + + while (ntc != xdpsq->next_to_use) { + struct libeth_sqe *sqe = &xdpsq->tx_buf[ntc]; + + if (sqe->type) + libeth_xdp_complete_tx(sqe, &cp); + else + xsk_frames++; + + if (unlikely(++ntc == xdpsq->desc_count)) + ntc = 0; + } + + xdp_flush_frame_bulk(&bq); + + if (xsk_frames) + xsk_tx_completed(xdpsq->pool, xsk_frames); +} + +static noinline u32 idpf_xsksq_complete_slow(struct idpf_tx_queue *xdpsq, + u32 done) +{ + struct libeth_xdpsq_napi_stats ss = { }; + u32 ntc = xdpsq->next_to_clean; + u32 cnt = xdpsq->desc_count; + struct xdp_frame_bulk bq; + struct libeth_cq_pp cp = { + .dev = xdpsq->pool->dev, + .bq = &bq, + .xss = &ss, + .napi = true, + }; + u32 xsk_frames = 0; + + xdp_frame_bulk_init(&bq); + + for (u32 i = 0; likely(i < done); i++) { + struct libeth_sqe *sqe = &xdpsq->tx_buf[ntc]; + + if (sqe->type) + libeth_xdp_complete_tx(sqe, &cp); + else + xsk_frames++; + + if (unlikely(++ntc == cnt)) + ntc = 0; + } + + xdp_flush_frame_bulk(&bq); + + xdpsq->next_to_clean = ntc; + xdpsq->xdp_tx -= cp.xdp_tx; + + return xsk_frames; +} + +static __always_inline u32 idpf_xsksq_complete(void *_xdpsq, u32 budget) +{ + struct idpf_tx_queue *xdpsq = _xdpsq; + u32 tx_ntc = xdpsq->next_to_clean; + u32 tx_cnt = xdpsq->desc_count; + u32 done_frames; + u32 xsk_frames; + + done_frames = idpf_xdpsq_poll(xdpsq, budget); + if (unlikely(!done_frames)) + return 0; + + if (likely(!xdpsq->xdp_tx)) { + tx_ntc += done_frames; + if (tx_ntc >= tx_cnt) + tx_ntc -= tx_cnt; + + xdpsq->next_to_clean = tx_ntc; + xsk_frames = done_frames; + + goto finalize; + } + + xsk_frames = idpf_xsksq_complete_slow(xdpsq, done_frames); + if (xsk_frames) +finalize: + xsk_tx_completed(xdpsq->pool, xsk_frames); + + xdpsq->pending -= done_frames; + + return done_frames; +} + +static u32 idpf_xsk_tx_prep(void *_xdpsq, struct libeth_xdpsq *sq) +{ + struct idpf_tx_queue *xdpsq = _xdpsq; + u32 free; + + libeth_xdpsq_lock(&xdpsq->xdp_lock); + + free = xdpsq->desc_count - xdpsq->pending; + if (free < xdpsq->thresh) + free += idpf_xsksq_complete(xdpsq, xdpsq->thresh); + + *sq = (struct libeth_xdpsq){ + .pool = xdpsq->pool, + .sqes = xdpsq->tx_buf, + .descs = xdpsq->desc_ring, + .count = xdpsq->desc_count, + .lock = &xdpsq->xdp_lock, + .ntu = &xdpsq->next_to_use, + .pending = &xdpsq->pending, + .xdp_tx = &xdpsq->xdp_tx, + }; + + return free; +} + +static u32 idpf_xsk_xmit_prep(void *_xdpsq, struct libeth_xdpsq *sq) +{ + struct idpf_tx_queue *xdpsq = _xdpsq; + + *sq = (struct libeth_xdpsq){ + .pool = xdpsq->pool, + .sqes = xdpsq->tx_buf, + .descs = xdpsq->desc_ring, + .count = xdpsq->desc_count, + .lock = &xdpsq->xdp_lock, + .ntu = &xdpsq->next_to_use, + .pending = &xdpsq->pending, + }; + + /* + * The queue is cleaned, the budget is already known, optimize out + * the second min() by passing the type limit. + */ + return U32_MAX; +} + +bool idpf_xsk_xmit(struct idpf_tx_queue *xsksq) +{ + u32 free; + + libeth_xdpsq_lock(&xsksq->xdp_lock); + + free = xsksq->desc_count - xsksq->pending; + if (free < xsksq->thresh) + free += idpf_xsksq_complete(xsksq, xsksq->thresh); + + return libeth_xsk_xmit_do_bulk(xsksq->pool, xsksq, + min(free - 1, xsksq->thresh), + libeth_xsktmo, idpf_xsk_xmit_prep, + idpf_xdp_tx_xmit, idpf_xdp_tx_finalize); +} + +LIBETH_XDP_DEFINE_START(); +LIBETH_XDP_DEFINE_TIMER(static idpf_xsk_tx_timer, idpf_xsksq_complete); +LIBETH_XSK_DEFINE_FLUSH_TX(static idpf_xsk_tx_flush_bulk, idpf_xsk_tx_prep, + idpf_xdp_tx_xmit); +LIBETH_XSK_DEFINE_RUN(static idpf_xsk_run_pass, idpf_xsk_run_prog, + idpf_xsk_tx_flush_bulk, idpf_rx_process_skb_fields); +LIBETH_XSK_DEFINE_FINALIZE(static idpf_xsk_finalize_rx, idpf_xsk_tx_flush_bulk, + idpf_xdp_tx_finalize); +LIBETH_XDP_DEFINE_END(); + +static void idpf_xskfqe_init(const struct libeth_xskfq_fp *fq, u32 i) +{ + struct virtchnl2_splitq_rx_buf_desc *desc = fq->descs; + + desc = &desc[i]; +#ifdef __LIBETH_WORD_ACCESS + *(u64 *)&desc->qword0 = i; +#else + desc->qword0.buf_id = cpu_to_le16(i); +#endif + desc->pkt_addr = cpu_to_le64(libeth_xsk_buff_xdp_get_dma(fq->fqes[i])); +} + +static bool idpf_xskfq_refill_thresh(struct idpf_buf_queue *bufq, u32 count) +{ + struct libeth_xskfq_fp fq = { + .pool = bufq->pool, + .fqes = bufq->xsk_buf, + .descs = bufq->split_buf, + .ntu = bufq->next_to_use, + .count = bufq->desc_count, + }; + u32 done; + + done = libeth_xskfqe_alloc(&fq, count, idpf_xskfqe_init); + writel(fq.ntu, bufq->tail); + + bufq->next_to_use = fq.ntu; + bufq->pending -= done; + + return done == count; +} + +static bool idpf_xskfq_refill(struct idpf_buf_queue *bufq) +{ + u32 count, rx_thresh = bufq->thresh; + + count = ALIGN_DOWN(bufq->pending - 1, rx_thresh); + + for (u32 i = 0; i < count; i += rx_thresh) { + if (unlikely(!idpf_xskfq_refill_thresh(bufq, rx_thresh))) + return false; + } + + return true; +} + +int idpf_xskfq_init(struct idpf_buf_queue *bufq) +{ + struct libeth_xskfq fq = { + .pool = bufq->pool, + .count = bufq->desc_count, + .nid = idpf_q_vector_to_mem(bufq->q_vector), + }; + int ret; + + ret = libeth_xskfq_create(&fq); + if (ret) + return ret; + + bufq->xsk_buf = fq.fqes; + bufq->pending = fq.pending; + bufq->thresh = fq.thresh; + bufq->rx_buf_size = fq.buf_len; + + if (!idpf_xskfq_refill(bufq)) + netdev_err(bufq->pool->netdev, + "failed to allocate XSk buffers for qid %d\n", + bufq->pool->queue_id); + + bufq->next_to_alloc = bufq->next_to_use; + + idpf_queue_clear(HSPLIT_EN, bufq); + bufq->rx_hbuf_size = 0; + + return 0; +} + +void idpf_xskfq_rel(struct idpf_buf_queue *bufq) +{ + struct libeth_xskfq fq = { + .fqes = bufq->xsk_buf, + }; + + libeth_xskfq_destroy(&fq); + + bufq->rx_buf_size = fq.buf_len; + bufq->thresh = fq.thresh; + bufq->pending = fq.pending; +} + +struct idpf_xskfq_refill_set { + struct { + struct idpf_buf_queue *q; + u32 buf_id; + u32 pending; + } bufqs[IDPF_MAX_BUFQS_PER_RXQ_GRP]; +}; + +static bool idpf_xskfq_refill_set(const struct idpf_xskfq_refill_set *set) +{ + bool ret = true; + + for (u32 i = 0; i < ARRAY_SIZE(set->bufqs); i++) { + struct idpf_buf_queue *bufq = set->bufqs[i].q; + u32 ntc; + + if (!bufq) + continue; + + ntc = set->bufqs[i].buf_id; + if (unlikely(++ntc == bufq->desc_count)) + ntc = 0; + + bufq->next_to_clean = ntc; + bufq->pending += set->bufqs[i].pending; + + if (bufq->pending > bufq->thresh) + ret &= idpf_xskfq_refill(bufq); + } + + return ret; +} + +int idpf_xskrq_poll(struct idpf_rx_queue *rxq, u32 budget) +{ + struct idpf_xskfq_refill_set set = { }; + struct libeth_rq_napi_stats rs = { }; + bool wake, gen, fail = false; + u32 ntc = rxq->next_to_clean; + struct libeth_xdp_buff *xdp; + LIBETH_XDP_ONSTACK_BULK(bq); + u32 cnt = rxq->desc_count; + + wake = xsk_uses_need_wakeup(rxq->pool); + if (wake) + xsk_clear_rx_need_wakeup(rxq->pool); + + gen = idpf_queue_has(GEN_CHK, rxq); + + libeth_xsk_tx_init_bulk(&bq, rxq->xdp_prog, rxq->xdp_rxq.dev, + rxq->xdpsqs, rxq->num_xdp_txq); + xdp = rxq->xsk; + + while (likely(rs.packets < budget)) { + const struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc; + struct idpf_xdp_rx_desc desc __uninitialized; + struct idpf_buf_queue *bufq; + u32 bufq_id, buf_id; + + rx_desc = &rxq->rx[ntc].flex_adv_nic_3_wb; + + idpf_xdp_get_qw0(&desc, rx_desc); + if (idpf_xdp_rx_gen(&desc) != gen) + break; + + dma_rmb(); + + bufq_id = idpf_xdp_rx_bufq(&desc); + bufq = set.bufqs[bufq_id].q; + if (!bufq) { + bufq = &rxq->bufq_sets[bufq_id].bufq; + set.bufqs[bufq_id].q = bufq; + } + + idpf_xdp_get_qw1(&desc, rx_desc); + buf_id = idpf_xdp_rx_buf(&desc); + + set.bufqs[bufq_id].buf_id = buf_id; + set.bufqs[bufq_id].pending++; + + xdp = libeth_xsk_process_buff(xdp, bufq->xsk_buf[buf_id], + idpf_xdp_rx_len(&desc)); + + if (unlikely(++ntc == cnt)) { + ntc = 0; + gen = !gen; + idpf_queue_change(GEN_CHK, rxq); + } + + if (!idpf_xdp_rx_eop(&desc) || unlikely(!xdp)) + continue; + + fail = !idpf_xsk_run_pass(xdp, &bq, rxq->napi, &rs, rx_desc); + xdp = NULL; + + if (fail) + break; + } + + idpf_xsk_finalize_rx(&bq); + + rxq->next_to_clean = ntc; + rxq->xsk = xdp; + + fail |= !idpf_xskfq_refill_set(&set); + + u64_stats_update_begin(&rxq->stats_sync); + u64_stats_add(&rxq->q_stats.packets, rs.packets); + u64_stats_add(&rxq->q_stats.bytes, rs.bytes); + u64_stats_update_end(&rxq->stats_sync); + + if (!wake) + return unlikely(fail) ? budget : rs.packets; + + if (unlikely(fail)) + xsk_set_rx_need_wakeup(rxq->pool); + + return rs.packets; +} + +int idpf_xsk_pool_setup(struct idpf_vport *vport, struct netdev_bpf *bpf) +{ + struct xsk_buff_pool *pool = bpf->xsk.pool; + u32 qid = bpf->xsk.queue_id; + bool restart; + int ret; + + if (pool && !IS_ALIGNED(xsk_pool_get_rx_frame_size(pool), + LIBETH_RX_BUF_STRIDE)) { + NL_SET_ERR_MSG_FMT_MOD(bpf->extack, + "%s: HW doesn't support frames sizes not aligned to %u (qid %u: %u)", + netdev_name(vport->netdev), + LIBETH_RX_BUF_STRIDE, qid, + xsk_pool_get_rx_frame_size(pool)); + return -EINVAL; + } + + restart = idpf_xdp_enabled(vport) && netif_running(vport->netdev); + if (!restart) + goto pool; + + ret = idpf_qp_switch(vport, qid, false); + if (ret) { + NL_SET_ERR_MSG_FMT_MOD(bpf->extack, + "%s: failed to disable queue pair %u: %pe", + netdev_name(vport->netdev), qid, + ERR_PTR(ret)); + return ret; + } + +pool: + ret = libeth_xsk_setup_pool(vport->netdev, qid, pool); + if (ret) { + NL_SET_ERR_MSG_FMT_MOD(bpf->extack, + "%s: failed to configure XSk pool for pair %u: %pe", + netdev_name(vport->netdev), qid, + ERR_PTR(ret)); + return ret; + } + + if (!restart) + return 0; + + ret = idpf_qp_switch(vport, qid, true); + if (ret) { + NL_SET_ERR_MSG_FMT_MOD(bpf->extack, + "%s: failed to enable queue pair %u: %pe", + netdev_name(vport->netdev), qid, + ERR_PTR(ret)); + goto err_dis; + } + + return 0; + +err_dis: + libeth_xsk_setup_pool(vport->netdev, qid, false); + + return ret; +} + +int idpf_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) +{ + const struct idpf_netdev_priv *np = netdev_priv(dev); + const struct idpf_vport *vport = np->vport; + struct idpf_q_vector *q_vector; + + if (unlikely(idpf_vport_ctrl_is_locked(dev))) + return -EBUSY; + + if (unlikely(!vport->link_up)) + return -ENETDOWN; + + if (unlikely(!vport->num_xdp_txq)) + return -ENXIO; + + q_vector = idpf_find_rxq_vec(vport, qid); + if (unlikely(!q_vector->xsksq)) + return -ENXIO; + + libeth_xsk_wakeup(&q_vector->csd, qid); + + return 0; +} diff --git a/drivers/net/ethernet/intel/idpf/xsk.h b/drivers/net/ethernet/intel/idpf/xsk.h new file mode 100644 index 000000000000..b622d08c03e8 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/xsk.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2025 Intel Corporation */ + +#ifndef _IDPF_XSK_H_ +#define _IDPF_XSK_H_ + +#include <linux/types.h> + +enum virtchnl2_queue_type; +struct idpf_buf_queue; +struct idpf_q_vector; +struct idpf_rx_queue; +struct idpf_tx_queue; +struct idpf_vport; +struct net_device; +struct netdev_bpf; + +void idpf_xsk_setup_queue(const struct idpf_vport *vport, void *q, + enum virtchnl2_queue_type type); +void idpf_xsk_clear_queue(void *q, enum virtchnl2_queue_type type); +void idpf_xsk_init_wakeup(struct idpf_q_vector *qv); + +int idpf_xskfq_init(struct idpf_buf_queue *bufq); +void idpf_xskfq_rel(struct idpf_buf_queue *bufq); +void idpf_xsksq_clean(struct idpf_tx_queue *xdpq); + +int idpf_xskrq_poll(struct idpf_rx_queue *rxq, u32 budget); +bool idpf_xsk_xmit(struct idpf_tx_queue *xsksq); + +int idpf_xsk_pool_setup(struct idpf_vport *vport, struct netdev_bpf *xdp); +int idpf_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags); + +#endif /* !_IDPF_XSK_H_ */ diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.c b/drivers/net/ethernet/intel/igb/e1000_82575.c index 64dfc362d1dc..44a85ad749a4 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.c +++ b/drivers/net/ethernet/intel/igb/e1000_82575.c @@ -2372,7 +2372,7 @@ static s32 igb_validate_nvm_checksum_with_offset(struct e1000_hw *hw, checksum += nvm_data; } - if (checksum != (u16) NVM_SUM) { + if (checksum != NVM_SUM) { hw_dbg("NVM Checksum Invalid\n"); ret_val = -E1000_ERR_NVM; goto out; @@ -2406,7 +2406,7 @@ static s32 igb_update_nvm_checksum_with_offset(struct e1000_hw *hw, u16 offset) } checksum += nvm_data; } - checksum = (u16) NVM_SUM - checksum; + checksum = NVM_SUM - checksum; ret_val = hw->nvm.ops.write(hw, (NVM_CHECKSUM_REG + offset), 1, &checksum); if (ret_val) diff --git a/drivers/net/ethernet/intel/igb/e1000_i210.c b/drivers/net/ethernet/intel/igb/e1000_i210.c index 503b239868e8..9db29b231d6a 100644 --- a/drivers/net/ethernet/intel/igb/e1000_i210.c +++ b/drivers/net/ethernet/intel/igb/e1000_i210.c @@ -602,7 +602,7 @@ static s32 igb_update_nvm_checksum_i210(struct e1000_hw *hw) } checksum += nvm_data; } - checksum = (u16) NVM_SUM - checksum; + checksum = NVM_SUM - checksum; ret_val = igb_write_nvm_srwr(hw, NVM_CHECKSUM_REG, 1, &checksum); if (ret_val) { diff --git a/drivers/net/ethernet/intel/igb/e1000_nvm.c b/drivers/net/ethernet/intel/igb/e1000_nvm.c index 2dcd64d6dec3..c8638502c2be 100644 --- a/drivers/net/ethernet/intel/igb/e1000_nvm.c +++ b/drivers/net/ethernet/intel/igb/e1000_nvm.c @@ -636,7 +636,7 @@ s32 igb_validate_nvm_checksum(struct e1000_hw *hw) checksum += nvm_data; } - if (checksum != (u16) NVM_SUM) { + if (checksum != NVM_SUM) { hw_dbg("NVM Checksum Invalid\n"); ret_val = -E1000_ERR_NVM; goto out; @@ -668,7 +668,7 @@ s32 igb_update_nvm_checksum(struct e1000_hw *hw) } checksum += nvm_data; } - checksum = (u16) NVM_SUM - checksum; + checksum = NVM_SUM - checksum; ret_val = hw->nvm.ops.write(hw, NVM_CHECKSUM_REG, 1, &checksum); if (ret_val) hw_dbg("NVM Write Error while updating checksum.\n"); diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index c3f4f7cd264e..0fff1df81b7b 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -217,7 +217,7 @@ static inline int igb_skb_pad(void) #define IGB_MASTER_SLAVE e1000_ms_hw_default #endif -#define IGB_MNG_VLAN_NONE -1 +#define IGB_MNG_VLAN_NONE 0xFFFF enum igb_tx_flags { /* cmd_type flags */ diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 92ef33459aec..f8a208c84f15 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -920,11 +920,11 @@ static int igb_set_ringparam(struct net_device *netdev, } if (adapter->num_tx_queues > adapter->num_rx_queues) - temp_ring = vmalloc(array_size(sizeof(struct igb_ring), - adapter->num_tx_queues)); + temp_ring = vmalloc_array(adapter->num_tx_queues, + sizeof(struct igb_ring)); else - temp_ring = vmalloc(array_size(sizeof(struct igb_ring), - adapter->num_rx_queues)); + temp_ring = vmalloc_array(adapter->num_rx_queues, + sizeof(struct igb_ring)); if (!temp_ring) { err = -ENOMEM; @@ -2081,11 +2081,8 @@ static void igb_diag_test(struct net_device *netdev, } else { dev_info(&adapter->pdev->dev, "online testing starting\n"); - /* PHY is powered down when interface is down */ - if (if_running && igb_link_test(adapter, &data[TEST_LINK])) + if (igb_link_test(adapter, &data[TEST_LINK])) eth_test->flags |= ETH_TEST_FL_FAILED; - else - data[TEST_LINK] = 0; /* Online tests aren't run; pass by default */ data[TEST_REG] = 0; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index a9a7a94ae61e..85f9589cc568 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -1531,8 +1531,7 @@ static void igb_update_mng_vlan(struct igb_adapter *adapter) adapter->mng_vlan_id = IGB_MNG_VLAN_NONE; } - if ((old_vid != (u16)IGB_MNG_VLAN_NONE) && - (vid != old_vid) && + if (old_vid != IGB_MNG_VLAN_NONE && vid != old_vid && !test_bit(old_vid, adapter->active_vlans)) { /* remove VID from filter table */ igb_vfta_set(hw, vid, pf_id, false, true); @@ -4453,8 +4452,7 @@ int igb_setup_rx_resources(struct igb_ring *rx_ring) if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) xdp_rxq_info_unreg(&rx_ring->xdp_rxq); res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev, - rx_ring->queue_index, - rx_ring->q_vector->napi.napi_id); + rx_ring->queue_index, 0); if (res < 0) { dev_err(dev, "Failed to register xdp_rxq index %u\n", rx_ring->queue_index); diff --git a/drivers/net/ethernet/intel/igbvf/ethtool.c b/drivers/net/ethernet/intel/igbvf/ethtool.c index 773895c663fd..9c08ebfad804 100644 --- a/drivers/net/ethernet/intel/igbvf/ethtool.c +++ b/drivers/net/ethernet/intel/igbvf/ethtool.c @@ -30,11 +30,12 @@ static const struct igbvf_stats igbvf_gstrings_stats[] = { { "rx_bytes", IGBVF_STAT(stats.gorc, stats.base_gorc) }, { "tx_bytes", IGBVF_STAT(stats.gotc, stats.base_gotc) }, { "multicast", IGBVF_STAT(stats.mprc, stats.base_mprc) }, - { "lbrx_bytes", IGBVF_STAT(stats.gorlbc, stats.base_gorlbc) }, { "lbrx_packets", IGBVF_STAT(stats.gprlbc, stats.base_gprlbc) }, + { "lbtx_packets", IGBVF_STAT(stats.gptlbc, stats.base_gptlbc) }, + { "lbrx_bytes", IGBVF_STAT(stats.gorlbc, stats.base_gorlbc) }, + { "lbtx_bytes", IGBVF_STAT(stats.gotlbc, stats.base_gotlbc) }, { "tx_restart_queue", IGBVF_STAT(restart_queue, zero_base) }, { "tx_timeout_count", IGBVF_STAT(tx_timeout_count, zero_base) }, - { "rx_long_byte_count", IGBVF_STAT(stats.gorc, stats.base_gorc) }, { "rx_csum_offload_good", IGBVF_STAT(hw_csum_good, zero_base) }, { "rx_csum_offload_errors", IGBVF_STAT(hw_csum_err, zero_base) }, { "rx_header_split", IGBVF_STAT(rx_hdr_split, zero_base) }, diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h index 266bfcf2a28f..a427f05814c1 100644 --- a/drivers/net/ethernet/intel/igc/igc.h +++ b/drivers/net/ethernet/intel/igc/igc.h @@ -345,6 +345,7 @@ struct igc_adapter { /* LEDs */ struct mutex led_mutex; struct igc_led_classdev *leds; + bool leds_available; }; void igc_up(struct igc_adapter *adapter); diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c index ecb35b693ce5..f3e7218ba6f3 100644 --- a/drivers/net/ethernet/intel/igc/igc_ethtool.c +++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c @@ -627,11 +627,11 @@ igc_ethtool_set_ringparam(struct net_device *netdev, } if (adapter->num_tx_queues > adapter->num_rx_queues) - temp_ring = vmalloc(array_size(sizeof(struct igc_ring), - adapter->num_tx_queues)); + temp_ring = vmalloc_array(adapter->num_tx_queues, + sizeof(struct igc_ring)); else - temp_ring = vmalloc(array_size(sizeof(struct igc_ring), - adapter->num_rx_queues)); + temp_ring = vmalloc_array(adapter->num_rx_queues, + sizeof(struct igc_ring)); if (!temp_ring) { err = -ENOMEM; diff --git a/drivers/net/ethernet/intel/igc/igc_i225.c b/drivers/net/ethernet/intel/igc/igc_i225.c index 0dd61719f1ed..5226d10cc95b 100644 --- a/drivers/net/ethernet/intel/igc/igc_i225.c +++ b/drivers/net/ethernet/intel/igc/igc_i225.c @@ -435,7 +435,7 @@ static s32 igc_update_nvm_checksum_i225(struct igc_hw *hw) } checksum += nvm_data; } - checksum = (u16)NVM_SUM - checksum; + checksum = NVM_SUM - checksum; ret_val = igc_write_nvm_srwr(hw, NVM_CHECKSUM_REG, 1, &checksum); if (ret_val) { diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index e79b14d50b24..728d7ca5338b 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -7335,8 +7335,14 @@ static int igc_probe(struct pci_dev *pdev, if (IS_ENABLED(CONFIG_IGC_LEDS)) { err = igc_led_setup(adapter); - if (err) - goto err_register; + if (err) { + netdev_warn_once(netdev, + "LED init failed (%d); continuing without LED support\n", + err); + adapter->leds_available = false; + } else { + adapter->leds_available = true; + } } return 0; @@ -7392,7 +7398,7 @@ static void igc_remove(struct pci_dev *pdev) cancel_work_sync(&adapter->watchdog_task); hrtimer_cancel(&adapter->hrtimer); - if (IS_ENABLED(CONFIG_IGC_LEDS)) + if (IS_ENABLED(CONFIG_IGC_LEDS) && adapter->leds_available) igc_led_free(adapter); /* Release control of h/w to f/w. If f/w is AMT enabled, this diff --git a/drivers/net/ethernet/intel/igc/igc_nvm.c b/drivers/net/ethernet/intel/igc/igc_nvm.c index efd121c03967..a47b8d39238c 100644 --- a/drivers/net/ethernet/intel/igc/igc_nvm.c +++ b/drivers/net/ethernet/intel/igc/igc_nvm.c @@ -123,7 +123,7 @@ s32 igc_validate_nvm_checksum(struct igc_hw *hw) checksum += nvm_data; } - if (checksum != (u16)NVM_SUM) { + if (checksum != NVM_SUM) { hw_dbg("NVM Checksum Invalid\n"); ret_val = -IGC_ERR_NVM; goto out; @@ -155,7 +155,7 @@ s32 igc_update_nvm_checksum(struct igc_hw *hw) } checksum += nvm_data; } - checksum = (u16)NVM_SUM - checksum; + checksum = NVM_SUM - checksum; ret_val = hw->nvm.ops.write(hw, NVM_CHECKSUM_REG, 1, &checksum); if (ret_val) hw_dbg("NVM Write Error while updating checksum.\n"); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index 4ff19426ab74..3ea6765f9c5d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -1739,9 +1739,9 @@ int ixgbe_calc_eeprom_checksum_generic(struct ixgbe_hw *hw) } } - checksum = (u16)IXGBE_EEPROM_SUM - checksum; + checksum = IXGBE_EEPROM_SUM - checksum; - return (int)checksum; + return checksum; } /** diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c index d74116441d1c..c2f8189a0738 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.c @@ -774,7 +774,7 @@ static void ixgbe_parse_vf_func_caps(struct ixgbe_hw *hw, * from parsing capabilities and use this to calculate the number of resources * per PF based on the max value passed in. * - * Return: the number of resources per PF or 0, if no PH are available. + * Return: the number of resources per PF or 0, if no PFs are available. */ static u32 ixgbe_get_num_per_func(struct ixgbe_hw *hw, u32 max) { @@ -1953,6 +1953,16 @@ int ixgbe_identify_phy_e610(struct ixgbe_hw *hw) phy_type_low & IXGBE_PHY_TYPE_LOW_1G_SGMII || phy_type_high & IXGBE_PHY_TYPE_HIGH_1G_USXGMII) hw->phy.speeds_supported |= IXGBE_LINK_SPEED_1GB_FULL; + if (phy_type_low & IXGBE_PHY_TYPE_LOW_2500BASE_T || + phy_type_low & IXGBE_PHY_TYPE_LOW_2500BASE_X || + phy_type_low & IXGBE_PHY_TYPE_LOW_2500BASE_KX || + phy_type_high & IXGBE_PHY_TYPE_HIGH_2500M_SGMII || + phy_type_high & IXGBE_PHY_TYPE_HIGH_2500M_USXGMII) + hw->phy.speeds_supported |= IXGBE_LINK_SPEED_2_5GB_FULL; + if (phy_type_low & IXGBE_PHY_TYPE_LOW_5GBASE_T || + phy_type_low & IXGBE_PHY_TYPE_LOW_5GBASE_KR || + phy_type_high & IXGBE_PHY_TYPE_HIGH_5G_USXGMII) + hw->phy.speeds_supported |= IXGBE_LINK_SPEED_5GB_FULL; if (phy_type_low & IXGBE_PHY_TYPE_LOW_10GBASE_T || phy_type_low & IXGBE_PHY_TYPE_LOW_10G_SFI_DA || phy_type_low & IXGBE_PHY_TYPE_LOW_10GBASE_SR || @@ -1963,31 +1973,10 @@ int ixgbe_identify_phy_e610(struct ixgbe_hw *hw) phy_type_high & IXGBE_PHY_TYPE_HIGH_10G_USXGMII) hw->phy.speeds_supported |= IXGBE_LINK_SPEED_10GB_FULL; - /* 2.5 and 5 Gbps link speeds must be excluded from the - * auto-negotiation set used during driver initialization due to - * compatibility issues with certain switches. Those issues do not - * exist in case of E610 2.5G SKU device (0x57b1). - */ - if (!hw->phy.autoneg_advertised && - hw->device_id != IXGBE_DEV_ID_E610_2_5G_T) - hw->phy.autoneg_advertised = hw->phy.speeds_supported; - - if (phy_type_low & IXGBE_PHY_TYPE_LOW_2500BASE_T || - phy_type_low & IXGBE_PHY_TYPE_LOW_2500BASE_X || - phy_type_low & IXGBE_PHY_TYPE_LOW_2500BASE_KX || - phy_type_high & IXGBE_PHY_TYPE_HIGH_2500M_SGMII || - phy_type_high & IXGBE_PHY_TYPE_HIGH_2500M_USXGMII) - hw->phy.speeds_supported |= IXGBE_LINK_SPEED_2_5GB_FULL; - - if (!hw->phy.autoneg_advertised && - hw->device_id == IXGBE_DEV_ID_E610_2_5G_T) + /* Initialize autoneg speeds */ + if (!hw->phy.autoneg_advertised) hw->phy.autoneg_advertised = hw->phy.speeds_supported; - if (phy_type_low & IXGBE_PHY_TYPE_LOW_5GBASE_T || - phy_type_low & IXGBE_PHY_TYPE_LOW_5GBASE_KR || - phy_type_high & IXGBE_PHY_TYPE_HIGH_5G_USXGMII) - hw->phy.speeds_supported |= IXGBE_LINK_SPEED_5GB_FULL; - /* Set PHY ID */ memcpy(&hw->phy.id, pcaps.phy_id_oui, sizeof(u32)); @@ -3008,50 +2997,71 @@ static int ixgbe_get_nvm_srev(struct ixgbe_hw *hw, * Searches through the Option ROM flash contents to locate the CIVD data for * the image. * - * Return: the exit code of the operation. + * Return: -ENOMEM when cannot allocate memory, -EDOM for checksum violation, + * -ENODATA when cannot find proper data, -EIO for faulty read or + * 0 on success. + * + * On success @civd stores collected data. */ static int ixgbe_get_orom_civd_data(struct ixgbe_hw *hw, enum ixgbe_bank_select bank, struct ixgbe_orom_civd_info *civd) { - struct ixgbe_orom_civd_info tmp; + u32 orom_size = hw->flash.banks.orom_size; + u8 *orom_data; u32 offset; int err; + orom_data = kzalloc(orom_size, GFP_KERNEL); + if (!orom_data) + return -ENOMEM; + + err = ixgbe_read_flash_module(hw, bank, + IXGBE_E610_SR_1ST_OROM_BANK_PTR, 0, + orom_data, orom_size); + if (err) { + err = -EIO; + goto cleanup; + } + /* The CIVD section is located in the Option ROM aligned to 512 bytes. * The first 4 bytes must contain the ASCII characters "$CIV". * A simple modulo 256 sum of all of the bytes of the structure must * equal 0. */ - for (offset = 0; (offset + SZ_512) <= hw->flash.banks.orom_size; - offset += SZ_512) { + for (offset = 0; offset + SZ_512 <= orom_size; offset += SZ_512) { + struct ixgbe_orom_civd_info *tmp; u8 sum = 0; u32 i; - err = ixgbe_read_flash_module(hw, bank, - IXGBE_E610_SR_1ST_OROM_BANK_PTR, - offset, - (u8 *)&tmp, sizeof(tmp)); - if (err) - return err; + BUILD_BUG_ON(sizeof(*tmp) > SZ_512); + + tmp = (struct ixgbe_orom_civd_info *)&orom_data[offset]; /* Skip forward until we find a matching signature */ - if (memcmp(IXGBE_OROM_CIV_SIGNATURE, tmp.signature, - sizeof(tmp.signature))) + if (memcmp(IXGBE_OROM_CIV_SIGNATURE, tmp->signature, + sizeof(tmp->signature))) continue; /* Verify that the simple checksum is zero */ - for (i = 0; i < sizeof(tmp); i++) - sum += ((u8 *)&tmp)[i]; + for (i = 0; i < sizeof(*tmp); i++) + sum += ((u8 *)tmp)[i]; + + if (sum) { + err = -EDOM; + goto cleanup; + } - if (sum) - return -EDOM; + *civd = *tmp; + err = 0; - *civd = tmp; - return 0; + goto cleanup; } - return -ENODATA; + err = -ENODATA; +cleanup: + kfree(orom_data); + return err; } /** @@ -3125,7 +3135,7 @@ static int ixgbe_get_orom_ver_info(struct ixgbe_hw *hw, if (err) return err; - combo_ver = le32_to_cpu(civd.combo_ver); + combo_ver = get_unaligned_le32(&civd.combo_ver); orom->major = (u8)FIELD_GET(IXGBE_OROM_VER_MASK, combo_ver); orom->patch = (u8)FIELD_GET(IXGBE_OROM_VER_PATCH_MASK, combo_ver); @@ -3911,6 +3921,38 @@ static int ixgbe_read_pba_string_e610(struct ixgbe_hw *hw, u8 *pba_num, return err; } +static int __fwlog_send_cmd(void *priv, struct libie_aq_desc *desc, void *buf, + u16 size) +{ + struct ixgbe_hw *hw = priv; + + return ixgbe_aci_send_cmd(hw, desc, buf, size); +} + +int ixgbe_fwlog_init(struct ixgbe_hw *hw) +{ + struct ixgbe_adapter *adapter = hw->back; + struct libie_fwlog_api api = { + .pdev = adapter->pdev, + .send_cmd = __fwlog_send_cmd, + .debugfs_root = adapter->ixgbe_dbg_adapter, + .priv = hw, + }; + + if (hw->mac.type != ixgbe_mac_e610) + return -EOPNOTSUPP; + + return libie_fwlog_init(&hw->fwlog, &api); +} + +void ixgbe_fwlog_deinit(struct ixgbe_hw *hw) +{ + if (hw->mac.type != ixgbe_mac_e610) + return; + + libie_fwlog_deinit(&hw->fwlog); +} + static const struct ixgbe_mac_operations mac_ops_e610 = { .init_hw = ixgbe_init_hw_generic, .start_hw = ixgbe_start_hw_e610, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h index 782c489b0fa7..11916b979d28 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_e610.h @@ -96,5 +96,7 @@ int ixgbe_aci_update_nvm(struct ixgbe_hw *hw, u16 module_typeid, bool last_command, u8 command_flags); int ixgbe_nvm_write_activate(struct ixgbe_hw *hw, u16 cmd_flags, u8 *response_flags); +int ixgbe_fwlog_init(struct ixgbe_hw *hw); +void ixgbe_fwlog_deinit(struct ixgbe_hw *hw); #endif /* _IXGBE_E610_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 25c3a09ad7f1..2d660e9edb80 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -1278,7 +1278,7 @@ static int ixgbe_set_ringparam(struct net_device *netdev, /* allocate temporary buffer to store rings in */ i = max_t(int, adapter->num_tx_queues + adapter->num_xdp_queues, adapter->num_rx_queues); - temp_ring = vmalloc(array_size(i, sizeof(struct ixgbe_ring))); + temp_ring = vmalloc_array(i, sizeof(struct ixgbe_ring)); if (!temp_ring) { err = -ENOMEM; @@ -3571,13 +3571,13 @@ ixgbe_get_eee_fw(struct ixgbe_adapter *adapter, struct ethtool_keee *edata) for (i = 0; i < ARRAY_SIZE(ixgbe_ls_map); ++i) { if (hw->phy.eee_speeds_supported & ixgbe_ls_map[i].mac_speed) - linkmode_set_bit(ixgbe_lp_map[i].link_mode, + linkmode_set_bit(ixgbe_ls_map[i].link_mode, edata->supported); } for (i = 0; i < ARRAY_SIZE(ixgbe_ls_map); ++i) { if (hw->phy.eee_speeds_advertised & ixgbe_ls_map[i].mac_speed) - linkmode_set_bit(ixgbe_lp_map[i].link_mode, + linkmode_set_bit(ixgbe_ls_map[i].link_mode, edata->advertised); } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 80e6a2ef1350..90d4e57b1c93 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -172,6 +172,7 @@ static int debug = -1; module_param(debug, int, 0); MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); +MODULE_IMPORT_NS("LIBIE_FWLOG"); MODULE_DESCRIPTION("Intel(R) 10 Gigabit PCI Express Network Driver"); MODULE_LICENSE("GPL v2"); @@ -3355,6 +3356,10 @@ static void ixgbe_handle_fw_event(struct ixgbe_adapter *adapter) e_crit(drv, "%s\n", ixgbe_overheat_msg); ixgbe_down(adapter); break; + case libie_aqc_opc_fw_logs_event: + libie_get_fwlog_data(&hw->fwlog, event.msg_buf, + le16_to_cpu(event.desc.datalen)); + break; default: e_warn(hw, "unknown FW async event captured\n"); break; @@ -6973,6 +6978,13 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter, break; } + /* Make sure the SWFW semaphore is in a valid state */ + if (hw->mac.ops.init_swfw_sync) + hw->mac.ops.init_swfw_sync(hw); + + if (hw->mac.type == ixgbe_mac_e610) + mutex_init(&hw->aci.lock); + #ifdef IXGBE_FCOE /* FCoE support exists, always init the FCoE lock */ spin_lock_init(&adapter->fcoe.lock); @@ -11643,10 +11655,6 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) goto err_sw_init; - /* Make sure the SWFW semaphore is in a valid state */ - if (hw->mac.ops.init_swfw_sync) - hw->mac.ops.init_swfw_sync(hw); - if (ixgbe_check_fw_error(adapter)) return ixgbe_recovery_probe(adapter); @@ -11850,8 +11858,6 @@ skip_sriov: ether_addr_copy(hw->mac.addr, hw->mac.perm_addr); ixgbe_mac_set_default_filter(adapter); - if (hw->mac.type == ixgbe_mac_e610) - mutex_init(&hw->aci.lock); timer_setup(&adapter->service_timer, ixgbe_service_timer, 0); if (ixgbe_removed(hw->hw_addr)) { @@ -11998,6 +12004,10 @@ skip_sriov: ixgbe_devlink_init_regions(adapter); devl_register(adapter->devlink); devl_unlock(adapter->devlink); + + if (ixgbe_fwlog_init(hw)) + e_dev_info("Firmware logging not supported\n"); + return 0; err_netdev: @@ -12007,9 +12017,9 @@ err_register: devl_unlock(adapter->devlink); ixgbe_release_hw_control(adapter); ixgbe_clear_interrupt_scheme(adapter); +err_sw_init: if (hw->mac.type == ixgbe_mac_e610) mutex_destroy(&adapter->hw.aci.lock); -err_sw_init: ixgbe_disable_sriov(adapter); adapter->flags2 &= ~IXGBE_FLAG2_SEARCH_FOR_SFP; iounmap(adapter->io_addr); @@ -12055,15 +12065,14 @@ static void ixgbe_remove(struct pci_dev *pdev) devl_lock(adapter->devlink); devl_unregister(adapter->devlink); ixgbe_devlink_destroy_regions(adapter); + ixgbe_fwlog_deinit(&adapter->hw); ixgbe_dbg_adapter_exit(adapter); set_bit(__IXGBE_REMOVING, &adapter->state); cancel_work_sync(&adapter->service_task); - if (adapter->hw.mac.type == ixgbe_mac_e610) { + if (adapter->hw.mac.type == ixgbe_mac_e610) ixgbe_disable_link_status_events(adapter); - mutex_destroy(&adapter->hw.aci.lock); - } if (adapter->mii_bus) mdiobus_unregister(adapter->mii_bus); @@ -12123,6 +12132,9 @@ static void ixgbe_remove(struct pci_dev *pdev) disable_dev = !test_and_set_bit(__IXGBE_DISABLED, &adapter->state); free_netdev(netdev); + if (adapter->hw.mac.type == ixgbe_mac_e610) + mutex_destroy(&adapter->hw.aci.lock); + if (disable_dev) pci_disable_device(pdev); } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 36577091cd9e..b1bfeb21537a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -7,6 +7,7 @@ #include <linux/types.h> #include <linux/mdio.h> #include <linux/netdevice.h> +#include <linux/net/intel/libie/fwlog.h> #include "ixgbe_type_e610.h" /* Device IDs */ @@ -3752,6 +3753,7 @@ struct ixgbe_hw { struct ixgbe_flash_info flash; struct ixgbe_hw_dev_caps dev_caps; struct ixgbe_hw_func_caps func_caps; + struct libie_fwlog fwlog; }; struct ixgbe_info { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h index d2f22d8558f8..ff8d640a50b1 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type_e610.h @@ -932,7 +932,7 @@ struct ixgbe_orom_civd_info { __le32 combo_ver; /* Combo Image Version number */ u8 combo_name_len; /* Length of the unicode combo image version string, max of 32 */ __le16 combo_name[32]; /* Unicode string representing the Combo Image version */ -}; +} __packed; /* Function specific capabilities */ struct ixgbe_hw_func_caps { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c index c2353aed0120..e67e2feb045b 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c @@ -373,9 +373,9 @@ static int ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw) } } - checksum = (u16)IXGBE_EEPROM_SUM - checksum; + checksum = IXGBE_EEPROM_SUM - checksum; - return (int)checksum; + return checksum; } /** diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index bfa647086c70..76d2fa3ef518 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -1060,9 +1060,9 @@ static int ixgbe_calc_checksum_X550(struct ixgbe_hw *hw, u16 *buffer, return status; } - checksum = (u16)IXGBE_EEPROM_SUM - checksum; + checksum = IXGBE_EEPROM_SUM - checksum; - return (int)checksum; + return checksum; } /** ixgbe_calc_eeprom_checksum_X550 - Calculates and returns the checksum @@ -1163,7 +1163,7 @@ static int ixgbe_validate_eeprom_checksum_X550(struct ixgbe_hw *hw, return status; } -/** ixgbe_write_ee_hostif_X550 - Write EEPROM word using hostif +/** ixgbe_write_ee_hostif_data_X550 - Write EEPROM word using hostif * @hw: pointer to hardware structure * @offset: offset of word in the EEPROM to write * @data: word write to the EEPROM @@ -2318,7 +2318,7 @@ static int ixgbe_get_link_capabilities_X550em(struct ixgbe_hw *hw, } /** - * ixgbe_get_lasi_ext_t_x550em - Determime external Base T PHY interrupt cause + * ixgbe_get_lasi_ext_t_x550em - Determine external Base T PHY interrupt cause * @hw: pointer to hardware structure * @lsc: pointer to boolean flag which indicates whether external Base T * PHY interrupt is lsc @@ -2628,7 +2628,7 @@ static int ixgbe_ext_phy_t_x550em_get_link(struct ixgbe_hw *hw, bool *link_up) } /** ixgbe_setup_internal_phy_t_x550em - Configure KR PHY to X557 link - * @hw: point to hardware structure + * @hw: pointer to hardware structure * * Configures the link between the integrated KR PHY and the external X557 PHY * The driver will call this function when it gets a link status change @@ -2745,7 +2745,7 @@ static int ixgbe_led_off_t_x550em(struct ixgbe_hw *hw, u32 led_idx) if (led_idx >= IXGBE_X557_MAX_LED_INDEX) return -EINVAL; - /* To turn on the LED, set mode to ON. */ + /* To turn off the LED, set mode to OFF. */ hw->phy.ops.read_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx, MDIO_MMD_VEND1, &phy_data); phy_data &= ~IXGBE_X557_LED_MANUAL_SET_MASK; @@ -2812,7 +2812,7 @@ int ixgbe_set_fw_drv_ver_x550(struct ixgbe_hw *hw, u8 maj, u8 min, return ret_val; } -/** ixgbe_get_lcd_x550em - Determine lowest common denominator +/** ixgbe_get_lcd_t_x550em - Determine lowest common denominator * @hw: pointer to hardware structure * @lcd_speed: pointer to lowest common link speed * diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c index 7ac53171b041..bebad564188e 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c +++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c @@ -276,9 +276,9 @@ static int ixgbevf_set_ringparam(struct net_device *netdev, } if (new_tx_count != adapter->tx_ring_count) { - tx_ring = vmalloc(array_size(sizeof(*tx_ring), - adapter->num_tx_queues + - adapter->num_xdp_queues)); + tx_ring = vmalloc_array(adapter->num_tx_queues + + adapter->num_xdp_queues, + sizeof(*tx_ring)); if (!tx_ring) { err = -ENOMEM; goto clear_reset; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 535d0f71f521..28e25641b167 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -4323,7 +4323,7 @@ static int ixgbevf_resume(struct device *dev_d) struct pci_dev *pdev = to_pci_dev(dev_d); struct net_device *netdev = pci_get_drvdata(pdev); struct ixgbevf_adapter *adapter = netdev_priv(netdev); - u32 err; + int err; adapter->hw.hw_addr = adapter->io_addr; smp_mb__before_atomic(); diff --git a/drivers/net/ethernet/intel/libie/Kconfig b/drivers/net/ethernet/intel/libie/Kconfig index e6072758e3d8..70831c7e336e 100644 --- a/drivers/net/ethernet/intel/libie/Kconfig +++ b/drivers/net/ethernet/intel/libie/Kconfig @@ -14,3 +14,12 @@ config LIBIE_ADMINQ help Helper functions used by Intel Ethernet drivers for administration queue command interface (aka adminq). + +config LIBIE_FWLOG + tristate + select LIBIE_ADMINQ + help + Library to support firmware logging on device that have support + for it. Firmware logging is using admin queue interface to communicate + with the device. Debugfs is a user interface used to config logging + and dump all collected logs. diff --git a/drivers/net/ethernet/intel/libie/Makefile b/drivers/net/ethernet/intel/libie/Makefile index e98f00b865d3..db57fc6780ea 100644 --- a/drivers/net/ethernet/intel/libie/Makefile +++ b/drivers/net/ethernet/intel/libie/Makefile @@ -8,3 +8,7 @@ libie-y := rx.o obj-$(CONFIG_LIBIE_ADMINQ) += libie_adminq.o libie_adminq-y := adminq.o + +obj-$(CONFIG_LIBIE_FWLOG) += libie_fwlog.o + +libie_fwlog-y := fwlog.o diff --git a/drivers/net/ethernet/intel/libie/adminq.c b/drivers/net/ethernet/intel/libie/adminq.c index 55356548e3f0..7b4ff479e7e5 100644 --- a/drivers/net/ethernet/intel/libie/adminq.c +++ b/drivers/net/ethernet/intel/libie/adminq.c @@ -6,7 +6,7 @@ static const char * const libie_aq_str_arr[] = { #define LIBIE_AQ_STR(x) \ - [LIBIE_AQ_RC_##x] = "LIBIE_AQ_RC" #x + [LIBIE_AQ_RC_##x] = "LIBIE_AQ_RC_" #x LIBIE_AQ_STR(OK), LIBIE_AQ_STR(EPERM), LIBIE_AQ_STR(ENOENT), diff --git a/drivers/net/ethernet/intel/libie/fwlog.c b/drivers/net/ethernet/intel/libie/fwlog.c new file mode 100644 index 000000000000..f39cc11cb7c5 --- /dev/null +++ b/drivers/net/ethernet/intel/libie/fwlog.c @@ -0,0 +1,1115 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2022, Intel Corporation. */ + +#include <linux/debugfs.h> +#include <linux/export.h> +#include <linux/fs.h> +#include <linux/net/intel/libie/fwlog.h> +#include <linux/pci.h> +#include <linux/random.h> +#include <linux/vmalloc.h> + +#define DEFAULT_SYMBOL_NAMESPACE "LIBIE_FWLOG" + +/* create a define that has an extra module that doesn't really exist. this + * is so we can add a module 'all' to easily enable/disable all the modules + */ +#define LIBIE_NR_FW_LOG_MODULES (LIBIE_AQC_FW_LOG_ID_MAX + 1) + +/* the ordering in this array is important. it matches the ordering of the + * values in the FW so the index is the same value as in + * libie_aqc_fw_logging_mod + */ +static const char * const libie_fwlog_module_string[] = { + "general", + "ctrl", + "link", + "link_topo", + "dnl", + "i2c", + "sdp", + "mdio", + "adminq", + "hdma", + "lldp", + "dcbx", + "dcb", + "xlr", + "nvm", + "auth", + "vpd", + "iosf", + "parser", + "sw", + "scheduler", + "txq", + "rsvd", + "post", + "watchdog", + "task_dispatch", + "mng", + "synce", + "health", + "tsdrv", + "pfreg", + "mdlver", + "all", +}; + +/* the ordering in this array is important. it matches the ordering of the + * values in the FW so the index is the same value as in libie_fwlog_level + */ +static const char * const libie_fwlog_level_string[] = { + "none", + "error", + "warning", + "normal", + "verbose", +}; + +static const char * const libie_fwlog_log_size[] = { + "128K", + "256K", + "512K", + "1M", + "2M", +}; + +static bool libie_fwlog_ring_empty(struct libie_fwlog_ring *rings) +{ + return rings->head == rings->tail; +} + +static void libie_fwlog_ring_increment(u16 *item, u16 size) +{ + *item = (*item + 1) & (size - 1); +} + +static int libie_fwlog_alloc_ring_buffs(struct libie_fwlog_ring *rings) +{ + int i, nr_bytes; + u8 *mem; + + nr_bytes = rings->size * LIBIE_AQ_MAX_BUF_LEN; + mem = vzalloc(nr_bytes); + if (!mem) + return -ENOMEM; + + for (i = 0; i < rings->size; i++) { + struct libie_fwlog_data *ring = &rings->rings[i]; + + ring->data_size = LIBIE_AQ_MAX_BUF_LEN; + ring->data = mem; + mem += LIBIE_AQ_MAX_BUF_LEN; + } + + return 0; +} + +static void libie_fwlog_free_ring_buffs(struct libie_fwlog_ring *rings) +{ + int i; + + for (i = 0; i < rings->size; i++) { + struct libie_fwlog_data *ring = &rings->rings[i]; + + /* the first ring is the base memory for the whole range so + * free it + */ + if (!i) + vfree(ring->data); + + ring->data = NULL; + ring->data_size = 0; + } +} + +#define LIBIE_FWLOG_INDEX_TO_BYTES(n) ((128 * 1024) << (n)) +/** + * libie_fwlog_realloc_rings - reallocate the FW log rings + * @fwlog: pointer to the fwlog structure + * @index: the new index to use to allocate memory for the log data + * + */ +static void libie_fwlog_realloc_rings(struct libie_fwlog *fwlog, int index) +{ + struct libie_fwlog_ring ring; + int status, ring_size; + + /* convert the number of bytes into a number of 4K buffers. externally + * the driver presents the interface to the FW log data as a number of + * bytes because that's easy for users to understand. internally the + * driver uses a ring of buffers because the driver doesn't know where + * the beginning and end of any line of log data is so the driver has + * to overwrite data as complete blocks. when the data is returned to + * the user the driver knows that the data is correct and the FW log + * can be correctly parsed by the tools + */ + ring_size = LIBIE_FWLOG_INDEX_TO_BYTES(index) / LIBIE_AQ_MAX_BUF_LEN; + if (ring_size == fwlog->ring.size) + return; + + /* allocate space for the new rings and buffers then release the + * old rings and buffers. that way if we don't have enough + * memory then we at least have what we had before + */ + ring.rings = kcalloc(ring_size, sizeof(*ring.rings), GFP_KERNEL); + if (!ring.rings) + return; + + ring.size = ring_size; + + status = libie_fwlog_alloc_ring_buffs(&ring); + if (status) { + dev_warn(&fwlog->pdev->dev, "Unable to allocate memory for FW log ring data buffers\n"); + libie_fwlog_free_ring_buffs(&ring); + kfree(ring.rings); + return; + } + + libie_fwlog_free_ring_buffs(&fwlog->ring); + kfree(fwlog->ring.rings); + + fwlog->ring.rings = ring.rings; + fwlog->ring.size = ring.size; + fwlog->ring.index = index; + fwlog->ring.head = 0; + fwlog->ring.tail = 0; +} + +/** + * libie_fwlog_supported - Cached for whether FW supports FW logging or not + * @fwlog: pointer to the fwlog structure + * + * This will always return false if called before libie_init_hw(), so it must be + * called after libie_init_hw(). + */ +static bool libie_fwlog_supported(struct libie_fwlog *fwlog) +{ + return fwlog->supported; +} + +/** + * libie_aq_fwlog_set - Set FW logging configuration AQ command (0xFF30) + * @fwlog: pointer to the fwlog structure + * @entries: entries to configure + * @num_entries: number of @entries + * @options: options from libie_fwlog_cfg->options structure + * @log_resolution: logging resolution + */ +static int +libie_aq_fwlog_set(struct libie_fwlog *fwlog, + struct libie_fwlog_module_entry *entries, u16 num_entries, + u16 options, u16 log_resolution) +{ + struct libie_aqc_fw_log_cfg_resp *fw_modules; + struct libie_aq_desc desc = {0}; + struct libie_aqc_fw_log *cmd; + int status; + int i; + + fw_modules = kcalloc(num_entries, sizeof(*fw_modules), GFP_KERNEL); + if (!fw_modules) + return -ENOMEM; + + for (i = 0; i < num_entries; i++) { + fw_modules[i].module_identifier = + cpu_to_le16(entries[i].module_id); + fw_modules[i].log_level = entries[i].log_level; + } + + desc.opcode = cpu_to_le16(libie_aqc_opc_fw_logs_config); + desc.flags = cpu_to_le16(LIBIE_AQ_FLAG_SI) | + cpu_to_le16(LIBIE_AQ_FLAG_RD); + + cmd = libie_aq_raw(&desc); + + cmd->cmd_flags = LIBIE_AQC_FW_LOG_CONF_SET_VALID; + cmd->ops.cfg.log_resolution = cpu_to_le16(log_resolution); + cmd->ops.cfg.mdl_cnt = cpu_to_le16(num_entries); + + if (options & LIBIE_FWLOG_OPTION_ARQ_ENA) + cmd->cmd_flags |= LIBIE_AQC_FW_LOG_CONF_AQ_EN; + if (options & LIBIE_FWLOG_OPTION_UART_ENA) + cmd->cmd_flags |= LIBIE_AQC_FW_LOG_CONF_UART_EN; + + status = fwlog->send_cmd(fwlog->priv, &desc, fw_modules, + sizeof(*fw_modules) * num_entries); + + kfree(fw_modules); + + return status; +} + +/** + * libie_fwlog_set - Set the firmware logging settings + * @fwlog: pointer to the fwlog structure + * @cfg: config used to set firmware logging + * + * This function should be called whenever the driver needs to set the firmware + * logging configuration. It can be called on initialization, reset, or during + * runtime. + * + * If the PF wishes to receive FW logging then it must register via + * libie_fwlog_register. Note, that libie_fwlog_register does not need to be called + * for init. + */ +static int libie_fwlog_set(struct libie_fwlog *fwlog, + struct libie_fwlog_cfg *cfg) +{ + if (!libie_fwlog_supported(fwlog)) + return -EOPNOTSUPP; + + return libie_aq_fwlog_set(fwlog, cfg->module_entries, + LIBIE_AQC_FW_LOG_ID_MAX, cfg->options, + cfg->log_resolution); +} + +/** + * libie_aq_fwlog_register - Register PF for firmware logging events (0xFF31) + * @fwlog: pointer to the fwlog structure + * @reg: true to register and false to unregister + */ +static int libie_aq_fwlog_register(struct libie_fwlog *fwlog, bool reg) +{ + struct libie_aq_desc desc = {0}; + struct libie_aqc_fw_log *cmd; + + desc.opcode = cpu_to_le16(libie_aqc_opc_fw_logs_register); + desc.flags = cpu_to_le16(LIBIE_AQ_FLAG_SI); + cmd = libie_aq_raw(&desc); + + if (reg) + cmd->cmd_flags = LIBIE_AQC_FW_LOG_AQ_REGISTER; + + return fwlog->send_cmd(fwlog->priv, &desc, NULL, 0); +} + +/** + * libie_fwlog_register - Register the PF for firmware logging + * @fwlog: pointer to the fwlog structure + * + * After this call the PF will start to receive firmware logging based on the + * configuration set in libie_fwlog_set. + */ +static int libie_fwlog_register(struct libie_fwlog *fwlog) +{ + int status; + + if (!libie_fwlog_supported(fwlog)) + return -EOPNOTSUPP; + + status = libie_aq_fwlog_register(fwlog, true); + if (status) + dev_dbg(&fwlog->pdev->dev, "Failed to register for firmware logging events over ARQ\n"); + else + fwlog->cfg.options |= LIBIE_FWLOG_OPTION_IS_REGISTERED; + + return status; +} + +/** + * libie_fwlog_unregister - Unregister the PF from firmware logging + * @fwlog: pointer to the fwlog structure + */ +static int libie_fwlog_unregister(struct libie_fwlog *fwlog) +{ + int status; + + if (!libie_fwlog_supported(fwlog)) + return -EOPNOTSUPP; + + status = libie_aq_fwlog_register(fwlog, false); + if (status) + dev_dbg(&fwlog->pdev->dev, "Failed to unregister from firmware logging events over ARQ\n"); + else + fwlog->cfg.options &= ~LIBIE_FWLOG_OPTION_IS_REGISTERED; + + return status; +} + +/** + * libie_fwlog_print_module_cfg - print current FW logging module configuration + * @cfg: pointer to the fwlog cfg structure + * @module: module to print + * @s: the seq file to put data into + */ +static void +libie_fwlog_print_module_cfg(struct libie_fwlog_cfg *cfg, int module, + struct seq_file *s) +{ + struct libie_fwlog_module_entry *entry; + + if (module != LIBIE_AQC_FW_LOG_ID_MAX) { + entry = &cfg->module_entries[module]; + + seq_printf(s, "\tModule: %s, Log Level: %s\n", + libie_fwlog_module_string[entry->module_id], + libie_fwlog_level_string[entry->log_level]); + } else { + int i; + + for (i = 0; i < LIBIE_AQC_FW_LOG_ID_MAX; i++) { + entry = &cfg->module_entries[i]; + + seq_printf(s, "\tModule: %s, Log Level: %s\n", + libie_fwlog_module_string[entry->module_id], + libie_fwlog_level_string[entry->log_level]); + } + } +} + +static int libie_find_module_by_dentry(struct dentry **modules, struct dentry *d) +{ + int i, module; + + module = -1; + /* find the module based on the dentry */ + for (i = 0; i < LIBIE_NR_FW_LOG_MODULES; i++) { + if (d == modules[i]) { + module = i; + break; + } + } + + return module; +} + +/** + * libie_debugfs_module_show - read from 'module' file + * @s: the opened file + * @v: pointer to the offset + */ +static int libie_debugfs_module_show(struct seq_file *s, void *v) +{ + struct libie_fwlog *fwlog = s->private; + const struct file *filp = s->file; + struct dentry *dentry; + int module; + + dentry = file_dentry(filp); + + module = libie_find_module_by_dentry(fwlog->debugfs_modules, dentry); + if (module < 0) { + dev_info(&fwlog->pdev->dev, "unknown module\n"); + return -EINVAL; + } + + libie_fwlog_print_module_cfg(&fwlog->cfg, module, s); + + return 0; +} + +static int libie_debugfs_module_open(struct inode *inode, struct file *filp) +{ + return single_open(filp, libie_debugfs_module_show, inode->i_private); +} + +/** + * libie_debugfs_module_write - write into 'module' file + * @filp: the opened file + * @buf: where to find the user's data + * @count: the length of the user's data + * @ppos: file position offset + */ +static ssize_t +libie_debugfs_module_write(struct file *filp, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct libie_fwlog *fwlog = file_inode(filp)->i_private; + struct dentry *dentry = file_dentry(filp); + struct device *dev = &fwlog->pdev->dev; + char user_val[16], *cmd_buf; + int module, log_level, cnt; + + /* don't allow partial writes or invalid input */ + if (*ppos != 0 || count > 8) + return -EINVAL; + + cmd_buf = memdup_user_nul(buf, count); + if (IS_ERR(cmd_buf)) + return PTR_ERR(cmd_buf); + + module = libie_find_module_by_dentry(fwlog->debugfs_modules, dentry); + if (module < 0) { + dev_info(dev, "unknown module\n"); + return -EINVAL; + } + + cnt = sscanf(cmd_buf, "%s", user_val); + if (cnt != 1) + return -EINVAL; + + log_level = sysfs_match_string(libie_fwlog_level_string, user_val); + if (log_level < 0) { + dev_info(dev, "unknown log level '%s'\n", user_val); + return -EINVAL; + } + + if (module != LIBIE_AQC_FW_LOG_ID_MAX) { + fwlog->cfg.module_entries[module].log_level = log_level; + } else { + /* the module 'all' is a shortcut so that we can set + * all of the modules to the same level quickly + */ + int i; + + for (i = 0; i < LIBIE_AQC_FW_LOG_ID_MAX; i++) + fwlog->cfg.module_entries[i].log_level = log_level; + } + + return count; +} + +static const struct file_operations libie_debugfs_module_fops = { + .owner = THIS_MODULE, + .open = libie_debugfs_module_open, + .read = seq_read, + .release = single_release, + .write = libie_debugfs_module_write, +}; + +/** + * libie_debugfs_nr_messages_read - read from 'nr_messages' file + * @filp: the opened file + * @buffer: where to write the data for the user to read + * @count: the size of the user's buffer + * @ppos: file position offset + */ +static ssize_t libie_debugfs_nr_messages_read(struct file *filp, + char __user *buffer, size_t count, + loff_t *ppos) +{ + struct libie_fwlog *fwlog = filp->private_data; + char buff[32] = {}; + + snprintf(buff, sizeof(buff), "%d\n", + fwlog->cfg.log_resolution); + + return simple_read_from_buffer(buffer, count, ppos, buff, strlen(buff)); +} + +/** + * libie_debugfs_nr_messages_write - write into 'nr_messages' file + * @filp: the opened file + * @buf: where to find the user's data + * @count: the length of the user's data + * @ppos: file position offset + */ +static ssize_t +libie_debugfs_nr_messages_write(struct file *filp, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct libie_fwlog *fwlog = filp->private_data; + struct device *dev = &fwlog->pdev->dev; + char user_val[8], *cmd_buf; + s16 nr_messages; + ssize_t ret; + + /* don't allow partial writes or invalid input */ + if (*ppos != 0 || count > 4) + return -EINVAL; + + cmd_buf = memdup_user_nul(buf, count); + if (IS_ERR(cmd_buf)) + return PTR_ERR(cmd_buf); + + ret = sscanf(cmd_buf, "%s", user_val); + if (ret != 1) + return -EINVAL; + + ret = kstrtos16(user_val, 0, &nr_messages); + if (ret) + return ret; + + if (nr_messages < LIBIE_AQC_FW_LOG_MIN_RESOLUTION || + nr_messages > LIBIE_AQC_FW_LOG_MAX_RESOLUTION) { + dev_err(dev, "Invalid FW log number of messages %d, value must be between %d - %d\n", + nr_messages, LIBIE_AQC_FW_LOG_MIN_RESOLUTION, + LIBIE_AQC_FW_LOG_MAX_RESOLUTION); + return -EINVAL; + } + + fwlog->cfg.log_resolution = nr_messages; + + return count; +} + +static const struct file_operations libie_debugfs_nr_messages_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = libie_debugfs_nr_messages_read, + .write = libie_debugfs_nr_messages_write, +}; + +/** + * libie_debugfs_enable_read - read from 'enable' file + * @filp: the opened file + * @buffer: where to write the data for the user to read + * @count: the size of the user's buffer + * @ppos: file position offset + */ +static ssize_t libie_debugfs_enable_read(struct file *filp, + char __user *buffer, size_t count, + loff_t *ppos) +{ + struct libie_fwlog *fwlog = filp->private_data; + char buff[32] = {}; + + snprintf(buff, sizeof(buff), "%u\n", + (u16)(fwlog->cfg.options & + LIBIE_FWLOG_OPTION_IS_REGISTERED) >> 3); + + return simple_read_from_buffer(buffer, count, ppos, buff, strlen(buff)); +} + +/** + * libie_debugfs_enable_write - write into 'enable' file + * @filp: the opened file + * @buf: where to find the user's data + * @count: the length of the user's data + * @ppos: file position offset + */ +static ssize_t +libie_debugfs_enable_write(struct file *filp, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct libie_fwlog *fwlog = filp->private_data; + char user_val[8], *cmd_buf; + bool enable; + ssize_t ret; + + /* don't allow partial writes or invalid input */ + if (*ppos != 0 || count > 2) + return -EINVAL; + + cmd_buf = memdup_user_nul(buf, count); + if (IS_ERR(cmd_buf)) + return PTR_ERR(cmd_buf); + + ret = sscanf(cmd_buf, "%s", user_val); + if (ret != 1) + return -EINVAL; + + ret = kstrtobool(user_val, &enable); + if (ret) + goto enable_write_error; + + if (enable) + fwlog->cfg.options |= LIBIE_FWLOG_OPTION_ARQ_ENA; + else + fwlog->cfg.options &= ~LIBIE_FWLOG_OPTION_ARQ_ENA; + + ret = libie_fwlog_set(fwlog, &fwlog->cfg); + if (ret) + goto enable_write_error; + + if (enable) + ret = libie_fwlog_register(fwlog); + else + ret = libie_fwlog_unregister(fwlog); + + if (ret) + goto enable_write_error; + + /* if we get here, nothing went wrong; return count since we didn't + * really write anything + */ + ret = (ssize_t)count; + +enable_write_error: + /* This function always consumes all of the written input, or produces + * an error. Check and enforce this. Otherwise, the write operation + * won't complete properly. + */ + if (WARN_ON(ret != (ssize_t)count && ret >= 0)) + ret = -EIO; + + return ret; +} + +static const struct file_operations libie_debugfs_enable_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = libie_debugfs_enable_read, + .write = libie_debugfs_enable_write, +}; + +/** + * libie_debugfs_log_size_read - read from 'log_size' file + * @filp: the opened file + * @buffer: where to write the data for the user to read + * @count: the size of the user's buffer + * @ppos: file position offset + */ +static ssize_t libie_debugfs_log_size_read(struct file *filp, + char __user *buffer, size_t count, + loff_t *ppos) +{ + struct libie_fwlog *fwlog = filp->private_data; + char buff[32] = {}; + int index; + + index = fwlog->ring.index; + snprintf(buff, sizeof(buff), "%s\n", libie_fwlog_log_size[index]); + + return simple_read_from_buffer(buffer, count, ppos, buff, strlen(buff)); +} + +/** + * libie_debugfs_log_size_write - write into 'log_size' file + * @filp: the opened file + * @buf: where to find the user's data + * @count: the length of the user's data + * @ppos: file position offset + */ +static ssize_t +libie_debugfs_log_size_write(struct file *filp, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct libie_fwlog *fwlog = filp->private_data; + struct device *dev = &fwlog->pdev->dev; + char user_val[8], *cmd_buf; + ssize_t ret; + int index; + + /* don't allow partial writes or invalid input */ + if (*ppos != 0 || count > 5) + return -EINVAL; + + cmd_buf = memdup_user_nul(buf, count); + if (IS_ERR(cmd_buf)) + return PTR_ERR(cmd_buf); + + ret = sscanf(cmd_buf, "%s", user_val); + if (ret != 1) + return -EINVAL; + + index = sysfs_match_string(libie_fwlog_log_size, user_val); + if (index < 0) { + dev_info(dev, "Invalid log size '%s'. The value must be one of 128K, 256K, 512K, 1M, 2M\n", + user_val); + ret = -EINVAL; + goto log_size_write_error; + } else if (fwlog->cfg.options & LIBIE_FWLOG_OPTION_IS_REGISTERED) { + dev_info(dev, "FW logging is currently running. Please disable FW logging to change log_size\n"); + ret = -EINVAL; + goto log_size_write_error; + } + + /* free all the buffers and the tracking info and resize */ + libie_fwlog_realloc_rings(fwlog, index); + + /* if we get here, nothing went wrong; return count since we didn't + * really write anything + */ + ret = (ssize_t)count; + +log_size_write_error: + /* This function always consumes all of the written input, or produces + * an error. Check and enforce this. Otherwise, the write operation + * won't complete properly. + */ + if (WARN_ON(ret != (ssize_t)count && ret >= 0)) + ret = -EIO; + + return ret; +} + +static const struct file_operations libie_debugfs_log_size_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = libie_debugfs_log_size_read, + .write = libie_debugfs_log_size_write, +}; + +/** + * libie_debugfs_data_read - read from 'data' file + * @filp: the opened file + * @buffer: where to write the data for the user to read + * @count: the size of the user's buffer + * @ppos: file position offset + */ +static ssize_t libie_debugfs_data_read(struct file *filp, char __user *buffer, + size_t count, loff_t *ppos) +{ + struct libie_fwlog *fwlog = filp->private_data; + int data_copied = 0; + bool done = false; + + if (libie_fwlog_ring_empty(&fwlog->ring)) + return 0; + + while (!libie_fwlog_ring_empty(&fwlog->ring) && !done) { + struct libie_fwlog_data *log; + u16 cur_buf_len; + + log = &fwlog->ring.rings[fwlog->ring.head]; + cur_buf_len = log->data_size; + if (cur_buf_len >= count) { + done = true; + continue; + } + + if (copy_to_user(buffer, log->data, cur_buf_len)) { + /* if there is an error then bail and return whatever + * the driver has copied so far + */ + done = true; + continue; + } + + data_copied += cur_buf_len; + buffer += cur_buf_len; + count -= cur_buf_len; + *ppos += cur_buf_len; + libie_fwlog_ring_increment(&fwlog->ring.head, fwlog->ring.size); + } + + return data_copied; +} + +/** + * libie_debugfs_data_write - write into 'data' file + * @filp: the opened file + * @buf: where to find the user's data + * @count: the length of the user's data + * @ppos: file position offset + */ +static ssize_t +libie_debugfs_data_write(struct file *filp, const char __user *buf, size_t count, + loff_t *ppos) +{ + struct libie_fwlog *fwlog = filp->private_data; + struct device *dev = &fwlog->pdev->dev; + ssize_t ret; + + /* don't allow partial writes */ + if (*ppos != 0) + return 0; + + /* any value is allowed to clear the buffer so no need to even look at + * what the value is + */ + if (!(fwlog->cfg.options & LIBIE_FWLOG_OPTION_IS_REGISTERED)) { + fwlog->ring.head = 0; + fwlog->ring.tail = 0; + } else { + dev_info(dev, "Can't clear FW log data while FW log running\n"); + ret = -EINVAL; + goto nr_buffs_write_error; + } + + /* if we get here, nothing went wrong; return count since we didn't + * really write anything + */ + ret = (ssize_t)count; + +nr_buffs_write_error: + /* This function always consumes all of the written input, or produces + * an error. Check and enforce this. Otherwise, the write operation + * won't complete properly. + */ + if (WARN_ON(ret != (ssize_t)count && ret >= 0)) + ret = -EIO; + + return ret; +} + +static const struct file_operations libie_debugfs_data_fops = { + .owner = THIS_MODULE, + .open = simple_open, + .read = libie_debugfs_data_read, + .write = libie_debugfs_data_write, +}; + +/** + * libie_debugfs_fwlog_init - setup the debugfs directory + * @fwlog: pointer to the fwlog structure + * @root: debugfs root entry on which fwlog director will be registered + */ +static void libie_debugfs_fwlog_init(struct libie_fwlog *fwlog, + struct dentry *root) +{ + struct dentry *fw_modules_dir; + struct dentry **fw_modules; + int i; + + /* allocate space for this first because if it fails then we don't + * need to unwind + */ + fw_modules = kcalloc(LIBIE_NR_FW_LOG_MODULES, sizeof(*fw_modules), + GFP_KERNEL); + if (!fw_modules) + return; + + fwlog->debugfs = debugfs_create_dir("fwlog", root); + if (IS_ERR(fwlog->debugfs)) + goto err_create_module_files; + + fw_modules_dir = debugfs_create_dir("modules", fwlog->debugfs); + if (IS_ERR(fw_modules_dir)) + goto err_create_module_files; + + for (i = 0; i < LIBIE_NR_FW_LOG_MODULES; i++) { + fw_modules[i] = debugfs_create_file(libie_fwlog_module_string[i], + 0600, fw_modules_dir, fwlog, + &libie_debugfs_module_fops); + if (IS_ERR(fw_modules[i])) + goto err_create_module_files; + } + + debugfs_create_file("nr_messages", 0600, fwlog->debugfs, fwlog, + &libie_debugfs_nr_messages_fops); + + fwlog->debugfs_modules = fw_modules; + + debugfs_create_file("enable", 0600, fwlog->debugfs, fwlog, + &libie_debugfs_enable_fops); + + debugfs_create_file("log_size", 0600, fwlog->debugfs, fwlog, + &libie_debugfs_log_size_fops); + + debugfs_create_file("data", 0600, fwlog->debugfs, fwlog, + &libie_debugfs_data_fops); + + return; + +err_create_module_files: + debugfs_remove_recursive(fwlog->debugfs); + kfree(fw_modules); +} + +static bool libie_fwlog_ring_full(struct libie_fwlog_ring *rings) +{ + u16 head, tail; + + head = rings->head; + tail = rings->tail; + + if (head < tail && (tail - head == (rings->size - 1))) + return true; + else if (head > tail && (tail == (head - 1))) + return true; + + return false; +} + +/** + * libie_aq_fwlog_get - Get the current firmware logging configuration (0xFF32) + * @fwlog: pointer to the fwlog structure + * @cfg: firmware logging configuration to populate + */ +static int libie_aq_fwlog_get(struct libie_fwlog *fwlog, + struct libie_fwlog_cfg *cfg) +{ + struct libie_aqc_fw_log_cfg_resp *fw_modules; + struct libie_aq_desc desc = {0}; + struct libie_aqc_fw_log *cmd; + u16 module_id_cnt; + int status; + void *buf; + int i; + + memset(cfg, 0, sizeof(*cfg)); + + buf = kzalloc(LIBIE_AQ_MAX_BUF_LEN, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + desc.opcode = cpu_to_le16(libie_aqc_opc_fw_logs_query); + desc.flags = cpu_to_le16(LIBIE_AQ_FLAG_SI); + cmd = libie_aq_raw(&desc); + + cmd->cmd_flags = LIBIE_AQC_FW_LOG_AQ_QUERY; + + status = fwlog->send_cmd(fwlog->priv, &desc, buf, LIBIE_AQ_MAX_BUF_LEN); + if (status) { + dev_dbg(&fwlog->pdev->dev, "Failed to get FW log configuration\n"); + goto status_out; + } + + module_id_cnt = le16_to_cpu(cmd->ops.cfg.mdl_cnt); + if (module_id_cnt < LIBIE_AQC_FW_LOG_ID_MAX) { + dev_dbg(&fwlog->pdev->dev, "FW returned less than the expected number of FW log module IDs\n"); + } else if (module_id_cnt > LIBIE_AQC_FW_LOG_ID_MAX) { + dev_dbg(&fwlog->pdev->dev, "FW returned more than expected number of FW log module IDs, setting module_id_cnt to software expected max %u\n", + LIBIE_AQC_FW_LOG_ID_MAX); + module_id_cnt = LIBIE_AQC_FW_LOG_ID_MAX; + } + + cfg->log_resolution = le16_to_cpu(cmd->ops.cfg.log_resolution); + if (cmd->cmd_flags & LIBIE_AQC_FW_LOG_CONF_AQ_EN) + cfg->options |= LIBIE_FWLOG_OPTION_ARQ_ENA; + if (cmd->cmd_flags & LIBIE_AQC_FW_LOG_CONF_UART_EN) + cfg->options |= LIBIE_FWLOG_OPTION_UART_ENA; + if (cmd->cmd_flags & LIBIE_AQC_FW_LOG_QUERY_REGISTERED) + cfg->options |= LIBIE_FWLOG_OPTION_IS_REGISTERED; + + fw_modules = (struct libie_aqc_fw_log_cfg_resp *)buf; + + for (i = 0; i < module_id_cnt; i++) { + struct libie_aqc_fw_log_cfg_resp *fw_module = &fw_modules[i]; + + cfg->module_entries[i].module_id = + le16_to_cpu(fw_module->module_identifier); + cfg->module_entries[i].log_level = fw_module->log_level; + } + +status_out: + kfree(buf); + return status; +} + +/** + * libie_fwlog_set_supported - Set if FW logging is supported by FW + * @fwlog: pointer to the fwlog structure + * + * If FW returns success to the libie_aq_fwlog_get call then it supports FW + * logging, else it doesn't. Set the fwlog_supported flag accordingly. + * + * This function is only meant to be called during driver init to determine if + * the FW support FW logging. + */ +static void libie_fwlog_set_supported(struct libie_fwlog *fwlog) +{ + struct libie_fwlog_cfg *cfg; + int status; + + fwlog->supported = false; + + cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); + if (!cfg) + return; + + status = libie_aq_fwlog_get(fwlog, cfg); + if (status) + dev_dbg(&fwlog->pdev->dev, "libie_aq_fwlog_get failed, FW logging is not supported on this version of FW, status %d\n", + status); + else + fwlog->supported = true; + + kfree(cfg); +} + +/** + * libie_fwlog_init - Initialize FW logging configuration + * @fwlog: pointer to the fwlog structure + * @api: api structure to init fwlog + * + * This function should be called on driver initialization during + * libie_init_hw(). + */ +int libie_fwlog_init(struct libie_fwlog *fwlog, struct libie_fwlog_api *api) +{ + fwlog->api = *api; + libie_fwlog_set_supported(fwlog); + + if (libie_fwlog_supported(fwlog)) { + int status; + + /* read the current config from the FW and store it */ + status = libie_aq_fwlog_get(fwlog, &fwlog->cfg); + if (status) + return status; + + fwlog->ring.rings = kcalloc(LIBIE_FWLOG_RING_SIZE_DFLT, + sizeof(*fwlog->ring.rings), + GFP_KERNEL); + if (!fwlog->ring.rings) { + dev_warn(&fwlog->pdev->dev, "Unable to allocate memory for FW log rings\n"); + return -ENOMEM; + } + + fwlog->ring.size = LIBIE_FWLOG_RING_SIZE_DFLT; + fwlog->ring.index = LIBIE_FWLOG_RING_SIZE_INDEX_DFLT; + + status = libie_fwlog_alloc_ring_buffs(&fwlog->ring); + if (status) { + dev_warn(&fwlog->pdev->dev, "Unable to allocate memory for FW log ring data buffers\n"); + libie_fwlog_free_ring_buffs(&fwlog->ring); + kfree(fwlog->ring.rings); + return status; + } + + libie_debugfs_fwlog_init(fwlog, api->debugfs_root); + } else { + dev_warn(&fwlog->pdev->dev, "FW logging is not supported in this NVM image. Please update the NVM to get FW log support\n"); + } + + return 0; +} +EXPORT_SYMBOL_GPL(libie_fwlog_init); + +/** + * libie_fwlog_deinit - unroll FW logging configuration + * @fwlog: pointer to the fwlog structure + * + * This function should be called in libie_deinit_hw(). + */ +void libie_fwlog_deinit(struct libie_fwlog *fwlog) +{ + int status; + + /* make sure FW logging is disabled to not put the FW in a weird state + * for the next driver load + */ + fwlog->cfg.options &= ~LIBIE_FWLOG_OPTION_ARQ_ENA; + status = libie_fwlog_set(fwlog, &fwlog->cfg); + if (status) + dev_warn(&fwlog->pdev->dev, "Unable to turn off FW logging, status: %d\n", + status); + + kfree(fwlog->debugfs_modules); + + fwlog->debugfs_modules = NULL; + + status = libie_fwlog_unregister(fwlog); + if (status) + dev_warn(&fwlog->pdev->dev, "Unable to unregister FW logging, status: %d\n", + status); + + if (fwlog->ring.rings) { + libie_fwlog_free_ring_buffs(&fwlog->ring); + kfree(fwlog->ring.rings); + } +} +EXPORT_SYMBOL_GPL(libie_fwlog_deinit); + +/** + * libie_get_fwlog_data - copy the FW log data from ARQ event + * @fwlog: fwlog that the FW log event is associated with + * @buf: event buffer pointer + * @len: len of event descriptor + */ +void libie_get_fwlog_data(struct libie_fwlog *fwlog, u8 *buf, u16 len) +{ + struct libie_fwlog_data *log; + + log = &fwlog->ring.rings[fwlog->ring.tail]; + + memset(log->data, 0, PAGE_SIZE); + log->data_size = len; + + memcpy(log->data, buf, log->data_size); + libie_fwlog_ring_increment(&fwlog->ring.tail, fwlog->ring.size); + + if (libie_fwlog_ring_full(&fwlog->ring)) { + /* the rings are full so bump the head to create room */ + libie_fwlog_ring_increment(&fwlog->ring.head, fwlog->ring.size); + } +} +EXPORT_SYMBOL_GPL(libie_get_fwlog_data); + +void libie_fwlog_reregister(struct libie_fwlog *fwlog) +{ + if (!(fwlog->cfg.options & LIBIE_FWLOG_OPTION_IS_REGISTERED)) + return; + + if (libie_fwlog_register(fwlog)) + fwlog->cfg.options &= ~LIBIE_FWLOG_OPTION_IS_REGISTERED; +} +EXPORT_SYMBOL_GPL(libie_fwlog_reregister); + +MODULE_DESCRIPTION("Intel(R) Ethernet common library"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index 476e73e502fe..89ccb8eb82c7 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -2416,10 +2416,9 @@ mvneta_swbm_build_skb(struct mvneta_port *pp, struct page_pool *pool, skb->ip_summed = mvneta_rx_csum(pp, desc_status); if (unlikely(xdp_buff_has_frags(xdp))) - xdp_update_skb_shared_info(skb, num_frags, - sinfo->xdp_frags_size, - num_frags * xdp->frame_sz, - xdp_buff_is_frag_pfmemalloc(xdp)); + xdp_update_skb_frags_info(skb, num_frags, sinfo->xdp_frags_size, + num_frags * xdp->frame_sz, + xdp_buff_get_skb_flags(xdp)); return skb; } @@ -2985,6 +2984,13 @@ out: if (txq->count >= txq->tx_stop_threshold) netif_tx_stop_queue(nq); + /* This is not really the true transmit point, since we batch + * up several before hitting the hardware, but is the best we + * can do without more complexity to walk the packets in the + * pending section of the transmit queue. + */ + skb_tx_timestamp(skb); + if (!netdev_xmit_more() || netif_xmit_stopped(nq) || txq->pending + frags > MVNETA_TXQ_DEC_SENT_MASK) mvneta_txq_pend_desc_add(pp, txq, frags); @@ -5357,6 +5363,7 @@ static const struct ethtool_ops mvneta_eth_tool_ops = { .set_link_ksettings = mvneta_ethtool_set_link_ksettings, .get_wol = mvneta_ethtool_get_wol, .set_wol = mvneta_ethtool_set_wol, + .get_ts_info = ethtool_op_get_ts_info, .get_eee = mvneta_ethtool_get_eee, .set_eee = mvneta_ethtool_set_eee, }; diff --git a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c index 8ebb985d2573..ab0c99aa9f9a 100644 --- a/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c +++ b/drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c @@ -4439,6 +4439,8 @@ out: txq_pcpu->count += frags; aggr_txq->count += frags; + skb_tx_timestamp(skb); + /* Enable transmit */ wmb(); mvpp2_aggr_txq_pend_desc_add(port, frags); @@ -5252,14 +5254,14 @@ static int mvpp2_ethtool_get_ts_info(struct net_device *dev, { struct mvpp2_port *port = netdev_priv(dev); + ethtool_op_get_ts_info(dev, info); if (!port->hwtstamp) - return -EOPNOTSUPP; + return 0; info->phc_index = mvpp22_tai_ptp_clock_index(port->priv->tai); - info->so_timestamping = SOF_TIMESTAMPING_TX_SOFTWARE | - SOF_TIMESTAMPING_TX_HARDWARE | - SOF_TIMESTAMPING_RX_HARDWARE | - SOF_TIMESTAMPING_RAW_HARDWARE; + info->so_timestamping |= SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON); info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | @@ -6222,6 +6224,12 @@ static struct mvpp2_port *mvpp2_pcs_gmac_to_port(struct phylink_pcs *pcs) return container_of(pcs, struct mvpp2_port, pcs_gmac); } +static unsigned int mvpp2_xjg_pcs_inband_caps(struct phylink_pcs *pcs, + phy_interface_t interface) +{ + return LINK_INBAND_DISABLE; +} + static void mvpp2_xlg_pcs_get_state(struct phylink_pcs *pcs, unsigned int neg_mode, struct phylink_link_state *state) @@ -6256,6 +6264,7 @@ static int mvpp2_xlg_pcs_config(struct phylink_pcs *pcs, unsigned int neg_mode, } static const struct phylink_pcs_ops mvpp2_phylink_xlg_pcs_ops = { + .pcs_inband_caps = mvpp2_xjg_pcs_inband_caps, .pcs_get_state = mvpp2_xlg_pcs_get_state, .pcs_config = mvpp2_xlg_pcs_config, }; diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_ethtool.c b/drivers/net/ethernet/marvell/octeon_ep/octep_ethtool.c index a88c006ea65b..01c6c0a2f283 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_ethtool.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_ethtool.c @@ -437,6 +437,15 @@ static int octep_set_link_ksettings(struct net_device *netdev, return 0; } +static void octep_get_channels(struct net_device *dev, + struct ethtool_channels *channel) +{ + struct octep_device *oct = netdev_priv(dev); + + channel->max_combined = CFG_GET_PORTS_MAX_IO_RINGS(oct->conf); + channel->combined_count = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); +} + static const struct ethtool_ops octep_ethtool_ops = { .get_drvinfo = octep_get_drvinfo, .get_link = ethtool_op_get_link, @@ -445,6 +454,7 @@ static const struct ethtool_ops octep_ethtool_ops = { .get_ethtool_stats = octep_get_ethtool_stats, .get_link_ksettings = octep_get_link_ksettings, .set_link_ksettings = octep_set_link_ksettings, + .get_channels = octep_get_channels, }; void octep_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index 24499bb36c00..bcea3fc26a8c 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -1124,11 +1124,24 @@ static int octep_set_features(struct net_device *dev, netdev_features_t features return err; } +static bool octep_is_vf_valid(struct octep_device *oct, int vf) +{ + if (vf >= CFG_GET_ACTIVE_VFS(oct->conf)) { + netdev_err(oct->netdev, "Invalid VF ID %d\n", vf); + return false; + } + + return true; +} + static int octep_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi) { struct octep_device *oct = netdev_priv(dev); + if (!octep_is_vf_valid(oct, vf)) + return -EINVAL; + ivi->vf = vf; ether_addr_copy(ivi->mac, oct->vf_info[vf].mac_addr); ivi->spoofchk = true; @@ -1143,6 +1156,9 @@ static int octep_set_vf_mac(struct net_device *dev, int vf, u8 *mac) struct octep_device *oct = netdev_priv(dev); int err; + if (!octep_is_vf_valid(oct, vf)) + return -EINVAL; + if (!is_valid_ether_addr(mac)) { dev_err(&oct->pdev->dev, "Invalid MAC Address %pM\n", mac); return -EADDRNOTAVAIL; diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.c b/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.c index ebecdd29f3bd..0867fab61b19 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_pfvf_mbox.c @@ -196,6 +196,7 @@ static void octep_pfvf_get_mac_addr(struct octep_device *oct, u32 vf_id, vf_id); return; } + ether_addr_copy(oct->vf_info[vf_id].mac_addr, rsp->s_set_mac.mac_addr); rsp->s_set_mac.type = OCTEP_PFVF_MBOX_TYPE_RSP_ACK; } @@ -205,6 +206,8 @@ static void octep_pfvf_dev_remove(struct octep_device *oct, u32 vf_id, { int err; + /* Reset VF-specific information maintained by the PF */ + memset(&oct->vf_info[vf_id], 0, sizeof(struct octep_pfvf_info)); err = octep_ctrl_net_dev_remove(oct, vf_id); if (err) { rsp->s.type = OCTEP_PFVF_MBOX_TYPE_RSP_NACK; diff --git a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_ethtool.c b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_ethtool.c index d60441928ba9..241a7e7c7ad2 100644 --- a/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_ethtool.c +++ b/drivers/net/ethernet/marvell/octeon_ep_vf/octep_vf_ethtool.c @@ -244,6 +244,15 @@ static int octep_vf_get_link_ksettings(struct net_device *netdev, return 0; } +static void octep_vf_get_channels(struct net_device *dev, + struct ethtool_channels *channel) +{ + struct octep_vf_device *oct = netdev_priv(dev); + + channel->max_combined = CFG_GET_PORTS_MAX_IO_RINGS(oct->conf); + channel->combined_count = CFG_GET_PORTS_ACTIVE_IO_RINGS(oct->conf); +} + static const struct ethtool_ops octep_vf_ethtool_ops = { .get_drvinfo = octep_vf_get_drvinfo, .get_link = ethtool_op_get_link, @@ -251,6 +260,7 @@ static const struct ethtool_ops octep_vf_ethtool_ops = { .get_sset_count = octep_vf_get_sset_count, .get_ethtool_stats = octep_vf_get_ethtool_stats, .get_link_ksettings = octep_vf_get_link_ksettings, + .get_channels = octep_vf_get_channels, }; void octep_vf_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c index 4ff19a04b23e..d374a4454836 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.c @@ -21,8 +21,7 @@ #include "rvu.h" #include "lmac_common.h" -#define DRV_NAME "Marvell-CGX/RPM" -#define DRV_STRING "Marvell CGX/RPM Driver" +#define DRV_NAME "Marvell-CGX-RPM" #define CGX_RX_STAT_GLOBAL_INDEX 9 @@ -1978,6 +1977,13 @@ static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_release_regions; } + if (!is_cn20k(pdev) && + !is_cgx_mapped_to_nix(pdev->subsystem_device, cgx->cgx_id)) { + dev_notice(dev, "CGX %d not mapped to NIX, skipping probe\n", + cgx->cgx_id); + goto err_release_regions; + } + cgx->lmac_count = cgx->mac_ops->get_nr_lmacs(cgx); if (!cgx->lmac_count) { dev_notice(dev, "CGX %d LMAC count is zero, skipping probe\n", cgx->cgx_id); @@ -1998,7 +2004,7 @@ static int cgx_probe(struct pci_dev *pdev, const struct pci_device_id *id) /* init wq for processing linkup requests */ INIT_WORK(&cgx->cgx_cmd_work, cgx_lmac_linkup_work); - cgx->cgx_cmd_workq = alloc_workqueue("cgx_cmd_workq", 0, 0); + cgx->cgx_cmd_workq = alloc_workqueue("cgx_cmd_workq", WQ_PERCPU, 0); if (!cgx->cgx_cmd_workq) { dev_err(dev, "alloc workqueue failed for cgx cmd"); err = -ENOMEM; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h index 950231e7ea71..92ccf343dfe0 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/cgx.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/cgx.h @@ -161,10 +161,6 @@ int cgx_get_link_info(void *cgxd, int lmac_id, struct cgx_link_user_info *linfo); int cgx_lmac_linkup_start(void *cgxd); int cgx_get_fwdata_base(u64 *base); -int cgx_lmac_get_pause_frm(void *cgxd, int lmac_id, - u8 *tx_pause, u8 *rx_pause); -int cgx_lmac_set_pause_frm(void *cgxd, int lmac_id, - u8 tx_pause, u8 rx_pause); void cgx_lmac_ptp_config(void *cgxd, int lmac_id, bool enable); u8 cgx_lmac_get_p2x(int cgx_id, int lmac_id); int cgx_set_fec(u64 fec, int cgx_id, int lmac_id); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c b/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c index d7030dfa5dad..a80c8e7c94f2 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/mcs_rvu_if.c @@ -913,7 +913,7 @@ int rvu_mcs_init(struct rvu *rvu) /* Initialize the wq for handling mcs interrupts */ INIT_LIST_HEAD(&rvu->mcs_intrq_head); INIT_WORK(&rvu->mcs_intr_work, mcs_intr_handler_task); - rvu->mcs_intr_wq = alloc_workqueue("mcs_intr_wq", 0, 0); + rvu->mcs_intr_wq = alloc_workqueue("mcs_intr_wq", WQ_PERCPU, 0); if (!rvu->mcs_intr_wq) { dev_err(rvu->dev, "mcs alloc workqueue failed\n"); return -ENOMEM; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index c6bb3aaa8e0d..2d78e08f985f 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -1164,6 +1164,9 @@ cpt: rvu_program_channels(rvu); cgx_start_linkup(rvu); + rvu_block_bcast_xon(rvu, BLKADDR_NIX0); + rvu_block_bcast_xon(rvu, BLKADDR_NIX1); + err = rvu_mcs_init(rvu); if (err) { dev_err(rvu->dev, "%s: Failed to initialize mcs\n", __func__); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 7ee1fdeb5295..b58283341923 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -783,6 +783,20 @@ static inline bool is_cn10kb(struct rvu *rvu) return false; } +static inline bool is_cgx_mapped_to_nix(unsigned short id, u8 cgx_id) +{ + /* On CNF10KA and CNF10KB silicons only two CGX blocks are connected + * to NIX. + */ + if (id == PCI_SUBSYS_DEVID_CNF10K_A || id == PCI_SUBSYS_DEVID_CNF10K_B) + return cgx_id <= 1; + + return !(cgx_id && !(id == PCI_SUBSYS_DEVID_96XX || + id == PCI_SUBSYS_DEVID_98XX || + id == PCI_SUBSYS_DEVID_CN10K_A || + id == PCI_SUBSYS_DEVID_CN10K_B)); +} + static inline bool is_rvu_npc_hash_extract_en(struct rvu *rvu) { u64 npc_const3; @@ -1017,6 +1031,7 @@ int rvu_nix_mcast_update_mcam_entry(struct rvu *rvu, u16 pcifunc, void rvu_nix_flr_free_bpids(struct rvu *rvu, u16 pcifunc); int rvu_alloc_cint_qint_mem(struct rvu *rvu, struct rvu_pfvf *pfvf, int blkaddr, int nixlf); +void rvu_block_bcast_xon(struct rvu *rvu, int blkaddr); /* NPC APIs */ void rvu_npc_freemem(struct rvu *rvu); int rvu_npc_get_pkind(struct rvu *rvu, u16 pf); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c index 3303c475414a..3abd750a4bd7 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c @@ -315,7 +315,7 @@ static int cgx_lmac_event_handler_init(struct rvu *rvu) spin_lock_init(&rvu->cgx_evq_lock); INIT_LIST_HEAD(&rvu->cgx_evq_head); INIT_WORK(&rvu->cgx_evh_work, cgx_evhandler_task); - rvu->cgx_evh_wq = alloc_workqueue("rvu_evh_wq", 0, 0); + rvu->cgx_evh_wq = alloc_workqueue("rvu_evh_wq", WQ_PERCPU, 0); if (!rvu->cgx_evh_wq) { dev_err(rvu->dev, "alloc workqueue failed"); return -ENOMEM; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c index 27c3a2daaaa9..3735372539bd 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_devlink.c @@ -505,7 +505,9 @@ static int rvu_nix_register_reporters(struct rvu_devlink *rvu_dl) rvu_reporters->nix_event_ctx = nix_event_context; rvu_reporters->rvu_hw_nix_intr_reporter = - devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_nix_intr_reporter_ops, 0, rvu); + devlink_health_reporter_create(rvu_dl->dl, + &rvu_hw_nix_intr_reporter_ops, + rvu); if (IS_ERR(rvu_reporters->rvu_hw_nix_intr_reporter)) { dev_warn(rvu->dev, "Failed to create hw_nix_intr reporter, err=%ld\n", PTR_ERR(rvu_reporters->rvu_hw_nix_intr_reporter)); @@ -513,7 +515,9 @@ static int rvu_nix_register_reporters(struct rvu_devlink *rvu_dl) } rvu_reporters->rvu_hw_nix_gen_reporter = - devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_nix_gen_reporter_ops, 0, rvu); + devlink_health_reporter_create(rvu_dl->dl, + &rvu_hw_nix_gen_reporter_ops, + rvu); if (IS_ERR(rvu_reporters->rvu_hw_nix_gen_reporter)) { dev_warn(rvu->dev, "Failed to create hw_nix_gen reporter, err=%ld\n", PTR_ERR(rvu_reporters->rvu_hw_nix_gen_reporter)); @@ -521,7 +525,9 @@ static int rvu_nix_register_reporters(struct rvu_devlink *rvu_dl) } rvu_reporters->rvu_hw_nix_err_reporter = - devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_nix_err_reporter_ops, 0, rvu); + devlink_health_reporter_create(rvu_dl->dl, + &rvu_hw_nix_err_reporter_ops, + rvu); if (IS_ERR(rvu_reporters->rvu_hw_nix_err_reporter)) { dev_warn(rvu->dev, "Failed to create hw_nix_err reporter, err=%ld\n", PTR_ERR(rvu_reporters->rvu_hw_nix_err_reporter)); @@ -529,7 +535,9 @@ static int rvu_nix_register_reporters(struct rvu_devlink *rvu_dl) } rvu_reporters->rvu_hw_nix_ras_reporter = - devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_nix_ras_reporter_ops, 0, rvu); + devlink_health_reporter_create(rvu_dl->dl, + &rvu_hw_nix_ras_reporter_ops, + rvu); if (IS_ERR(rvu_reporters->rvu_hw_nix_ras_reporter)) { dev_warn(rvu->dev, "Failed to create hw_nix_ras reporter, err=%ld\n", PTR_ERR(rvu_reporters->rvu_hw_nix_ras_reporter)); @@ -1051,7 +1059,9 @@ static int rvu_npa_register_reporters(struct rvu_devlink *rvu_dl) rvu_reporters->npa_event_ctx = npa_event_context; rvu_reporters->rvu_hw_npa_intr_reporter = - devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_npa_intr_reporter_ops, 0, rvu); + devlink_health_reporter_create(rvu_dl->dl, + &rvu_hw_npa_intr_reporter_ops, + rvu); if (IS_ERR(rvu_reporters->rvu_hw_npa_intr_reporter)) { dev_warn(rvu->dev, "Failed to create hw_npa_intr reporter, err=%ld\n", PTR_ERR(rvu_reporters->rvu_hw_npa_intr_reporter)); @@ -1059,7 +1069,9 @@ static int rvu_npa_register_reporters(struct rvu_devlink *rvu_dl) } rvu_reporters->rvu_hw_npa_gen_reporter = - devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_npa_gen_reporter_ops, 0, rvu); + devlink_health_reporter_create(rvu_dl->dl, + &rvu_hw_npa_gen_reporter_ops, + rvu); if (IS_ERR(rvu_reporters->rvu_hw_npa_gen_reporter)) { dev_warn(rvu->dev, "Failed to create hw_npa_gen reporter, err=%ld\n", PTR_ERR(rvu_reporters->rvu_hw_npa_gen_reporter)); @@ -1067,7 +1079,9 @@ static int rvu_npa_register_reporters(struct rvu_devlink *rvu_dl) } rvu_reporters->rvu_hw_npa_err_reporter = - devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_npa_err_reporter_ops, 0, rvu); + devlink_health_reporter_create(rvu_dl->dl, + &rvu_hw_npa_err_reporter_ops, + rvu); if (IS_ERR(rvu_reporters->rvu_hw_npa_err_reporter)) { dev_warn(rvu->dev, "Failed to create hw_npa_err reporter, err=%ld\n", PTR_ERR(rvu_reporters->rvu_hw_npa_err_reporter)); @@ -1075,7 +1089,9 @@ static int rvu_npa_register_reporters(struct rvu_devlink *rvu_dl) } rvu_reporters->rvu_hw_npa_ras_reporter = - devlink_health_reporter_create(rvu_dl->dl, &rvu_hw_npa_ras_reporter_ops, 0, rvu); + devlink_health_reporter_create(rvu_dl->dl, + &rvu_hw_npa_ras_reporter_ops, + rvu); if (IS_ERR(rvu_reporters->rvu_hw_npa_ras_reporter)) { dev_warn(rvu->dev, "Failed to create hw_npa_ras reporter, err=%ld\n", PTR_ERR(rvu_reporters->rvu_hw_npa_ras_reporter)); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 60db1f616cc8..828316211b24 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -6616,3 +6616,19 @@ unlock_grp: return ret; } + +/* On CN10k and older series of silicons, hardware may incorrectly + * assert XOFF on certain channels. Issue a write on NIX_AF_RX_CHANX_CFG + * to broadcacst XON on the same. + */ +void rvu_block_bcast_xon(struct rvu *rvu, int blkaddr) +{ + struct rvu_block *block = &rvu->hw->block[blkaddr]; + u64 cfg; + + if (!block->implemented || is_cn20k(rvu->pdev)) + return; + + cfg = rvu_read64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(0)); + rvu_write64(rvu, blkaddr, NIX_AF_RX_CHANX_CFG(0), cfg); +} diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_rep.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_rep.c index 03099bc570bd..4415d0ce9aef 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_rep.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_rep.c @@ -376,7 +376,7 @@ int rvu_rep_install_mcam_rules(struct rvu *rvu) spin_lock_init(&rvu->rep_evtq_lock); INIT_LIST_HEAD(&rvu->rep_evtq_head); INIT_WORK(&rvu->rep_evt_work, rvu_rep_wq_handler); - rvu->rep_evt_wq = alloc_workqueue("rep_evt_wq", 0, 0); + rvu->rep_evt_wq = alloc_workqueue("rep_evt_wq", WQ_PERCPU, 0); if (!rvu->rep_evt_wq) { dev_err(rvu->dev, "REP workqueue allocation failed\n"); return -ENOMEM; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c index c691f0722154..77543d472345 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/cn10k_ipsec.c @@ -798,7 +798,8 @@ int cn10k_ipsec_init(struct net_device *netdev) pf->ipsec.sa_size = sa_size; INIT_WORK(&pf->ipsec.sa_work, cn10k_ipsec_sa_wq_handler); - pf->ipsec.sa_workq = alloc_workqueue("cn10k_ipsec_sa_workq", 0, 0); + pf->ipsec.sa_workq = alloc_workqueue("cn10k_ipsec_sa_workq", + WQ_PERCPU, 0); if (!pf->ipsec.sa_workq) { netdev_err(pf->netdev, "SA alloc workqueue failed\n"); return -ENOMEM; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c index f674729124e6..aff17c37ddde 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c @@ -124,7 +124,9 @@ void otx2_get_dev_stats(struct otx2_nic *pfvf) dev_stats->rx_ucast_frames; dev_stats->tx_bytes = OTX2_GET_TX_STATS(TX_OCTS); - dev_stats->tx_drops = OTX2_GET_TX_STATS(TX_DROP); + dev_stats->tx_drops = OTX2_GET_TX_STATS(TX_DROP) + + (unsigned long)atomic_long_read(&dev_stats->tx_discards); + dev_stats->tx_bcast_frames = OTX2_GET_TX_STATS(TX_BCAST); dev_stats->tx_mcast_frames = OTX2_GET_TX_STATS(TX_MCAST); dev_stats->tx_ucast_frames = OTX2_GET_TX_STATS(TX_UCAST); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h index e3765b73c434..1c8a3c078a64 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h @@ -153,6 +153,7 @@ struct otx2_dev_stats { u64 tx_bcast_frames; u64 tx_mcast_frames; u64 tx_drops; + atomic_long_t tx_discards; }; /* Driver counted stats */ diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c index 998c734ff839..b90e23dc49de 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ethtool.c @@ -1283,7 +1283,8 @@ end: } static void otx2_get_fec_stats(struct net_device *netdev, - struct ethtool_fec_stats *fec_stats) + struct ethtool_fec_stats *fec_stats, + struct ethtool_fec_hist *hist) { struct otx2_nic *pfvf = netdev_priv(netdev); struct cgx_fw_data *rsp; diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index b23585c5e5c2..e808995703cf 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -2220,6 +2220,7 @@ static netdev_tx_t otx2_xmit(struct sk_buff *skb, struct net_device *netdev) { struct otx2_nic *pf = netdev_priv(netdev); int qidx = skb_get_queue_mapping(skb); + struct otx2_dev_stats *dev_stats; struct otx2_snd_queue *sq; struct netdev_queue *txq; int sq_idx; @@ -2232,6 +2233,8 @@ static netdev_tx_t otx2_xmit(struct sk_buff *skb, struct net_device *netdev) /* Check for minimum and maximum packet length */ if (skb->len <= ETH_HLEN || (!skb_shinfo(skb)->gso_size && skb->len > pf->tx_max_pktlen)) { + dev_stats = &pf->hw.dev_stats; + atomic_long_inc(&dev_stats->tx_discards); dev_kfree_skb(skb); return NETDEV_TX_OK; } @@ -3539,6 +3542,7 @@ static void otx2_remove(struct pci_dev *pdev) otx2_disable_mbox_intr(pf); otx2_pfaf_mbox_destroy(pf); pci_free_irq_vectors(pf->pdev); + bitmap_free(pf->af_xdp_zc_qidx); pci_set_drvdata(pdev, NULL); free_netdev(netdev); } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c index e52cc6b1a26c..dedd586ed310 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_ptp.c @@ -491,7 +491,7 @@ void otx2_ptp_destroy(struct otx2_nic *pfvf) if (!ptp) return; - cancel_delayed_work(&pfvf->ptp->synctstamp_work); + cancel_delayed_work_sync(&pfvf->ptp->synctstamp_work); ptp_clock_unregister(ptp->ptp_clock); kfree(ptp); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c index 5f80b23c5335..26a08d2cfbb1 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_tc.c @@ -1326,7 +1326,6 @@ static int otx2_tc_add_flow(struct otx2_nic *nic, free_leaf: otx2_tc_del_from_flow_list(flow_cfg, new_node); - kfree_rcu(new_node, rcu); if (new_node->is_act_police) { mutex_lock(&nic->mbox.lock); @@ -1346,6 +1345,7 @@ free_leaf: mutex_unlock(&nic->mbox.lock); } + kfree_rcu(new_node, rcu); return rc; } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c index 5589fccd370b..25381f079b97 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_vf.c @@ -417,9 +417,19 @@ static netdev_tx_t otx2vf_xmit(struct sk_buff *skb, struct net_device *netdev) { struct otx2_nic *vf = netdev_priv(netdev); int qidx = skb_get_queue_mapping(skb); + struct otx2_dev_stats *dev_stats; struct otx2_snd_queue *sq; struct netdev_queue *txq; + /* Check for minimum and maximum packet length */ + if (skb->len <= ETH_HLEN || + (!skb_shinfo(skb)->gso_size && skb->len > vf->tx_max_pktlen)) { + dev_stats = &vf->hw.dev_stats; + atomic_long_inc(&dev_stats->tx_discards); + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } + sq = &vf->qset.sq[qidx]; txq = netdev_get_tx_queue(netdev, qidx); @@ -844,6 +854,7 @@ static void otx2vf_remove(struct pci_dev *pdev) qmem_free(vf->dev, vf->dync_lmt); otx2vf_vfaf_mbox_destroy(vf); pci_free_irq_vectors(vf->pdev); + bitmap_free(vf->af_xdp_zc_qidx); pci_set_drvdata(pdev, NULL); free_netdev(netdev); } diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/rep.c b/drivers/net/ethernet/marvell/octeontx2/nic/rep.c index 25af98034e2e..b476733a0234 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/rep.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/rep.c @@ -371,7 +371,8 @@ static void rvu_rep_get_stats(struct work_struct *work) stats->rx_mcast_frames = rsp->rx.mcast; stats->tx_bytes = rsp->tx.octs; stats->tx_frames = rsp->tx.ucast + rsp->tx.bcast + rsp->tx.mcast; - stats->tx_drops = rsp->tx.drop; + stats->tx_drops = rsp->tx.drop + + (unsigned long)atomic_long_read(&stats->tx_discards); exit: mutex_unlock(&priv->mbox.lock); } @@ -418,6 +419,16 @@ static netdev_tx_t rvu_rep_xmit(struct sk_buff *skb, struct net_device *dev) struct otx2_nic *pf = rep->mdev; struct otx2_snd_queue *sq; struct netdev_queue *txq; + struct rep_stats *stats; + + /* Check for minimum and maximum packet length */ + if (skb->len <= ETH_HLEN || + (!skb_shinfo(skb)->gso_size && skb->len > pf->tx_max_pktlen)) { + stats = &rep->stats; + atomic_long_inc(&stats->tx_discards); + dev_kfree_skb(skb); + return NETDEV_TX_OK; + } sq = &pf->qset.sq[rep->rep_id]; txq = netdev_get_tx_queue(dev, 0); diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/rep.h b/drivers/net/ethernet/marvell/octeontx2/nic/rep.h index 38446b3e4f13..5bc9e2c7d800 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/rep.h +++ b/drivers/net/ethernet/marvell/octeontx2/nic/rep.h @@ -27,6 +27,7 @@ struct rep_stats { u64 tx_bytes; u64 tx_frames; u64 tx_drops; + atomic_long_t tx_discards; }; struct rep_dev { diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c index 71ffb55d1fc4..65e7ef033bde 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c @@ -1500,7 +1500,7 @@ EXPORT_SYMBOL(prestera_device_unregister); static int __init prestera_module_init(void) { - prestera_wq = alloc_workqueue("prestera", 0, 0); + prestera_wq = alloc_workqueue("prestera", WQ_PERCPU, 0); if (!prestera_wq) return -ENOMEM; diff --git a/drivers/net/ethernet/marvell/prestera/prestera_pci.c b/drivers/net/ethernet/marvell/prestera/prestera_pci.c index c45d108b2f6d..3e13322470da 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_pci.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_pci.c @@ -898,7 +898,7 @@ static int prestera_pci_probe(struct pci_dev *pdev, dev_info(fw->dev.dev, "Prestera FW is ready\n"); - fw->wq = alloc_workqueue("prestera_fw_wq", WQ_HIGHPRI, 1); + fw->wq = alloc_workqueue("prestera_fw_wq", WQ_HIGHPRI | WQ_PERCPU, 1); if (!fw->wq) { err = -ENOMEM; goto err_wq_alloc; diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 5a5fcde76dc0..e68997a29191 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1761,6 +1761,13 @@ static netdev_tx_t mtk_start_xmit(struct sk_buff *skb, struct net_device *dev) bool gso = false; int tx_num; + if (skb_vlan_tag_present(skb) && + !eth_proto_is_802_3(eth_hdr(skb)->h_proto)) { + skb = __vlan_hwaccel_push_inside(skb); + if (!skb) + goto dropped; + } + /* normally we can rely on the stack not calling this more than once, * however we have 2 queues running on the same ring so we need to lock * the ring access @@ -1806,8 +1813,9 @@ static netdev_tx_t mtk_start_xmit(struct sk_buff *skb, struct net_device *dev) drop: spin_unlock(ð->page_lock); - stats->tx_dropped++; dev_kfree_skb_any(skb); +dropped: + stats->tx_dropped++; return NETDEV_TX_OK; } diff --git a/drivers/net/ethernet/mediatek/mtk_wed.c b/drivers/net/ethernet/mediatek/mtk_wed.c index 0a80d8f8cff7..3dbb113b792c 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.c +++ b/drivers/net/ethernet/mediatek/mtk_wed.c @@ -59,7 +59,9 @@ struct mtk_wed_flow_block_priv { static const struct mtk_wed_soc_data mt7622_data = { .regmap = { .tx_bm_tkid = 0x088, - .wpdma_rx_ring0 = 0x770, + .wpdma_rx_ring = { + 0x770, + }, .reset_idx_tx_mask = GENMASK(3, 0), .reset_idx_rx_mask = GENMASK(17, 16), }, @@ -70,7 +72,9 @@ static const struct mtk_wed_soc_data mt7622_data = { static const struct mtk_wed_soc_data mt7986_data = { .regmap = { .tx_bm_tkid = 0x0c8, - .wpdma_rx_ring0 = 0x770, + .wpdma_rx_ring = { + 0x770, + }, .reset_idx_tx_mask = GENMASK(1, 0), .reset_idx_rx_mask = GENMASK(7, 6), }, @@ -81,7 +85,10 @@ static const struct mtk_wed_soc_data mt7986_data = { static const struct mtk_wed_soc_data mt7988_data = { .regmap = { .tx_bm_tkid = 0x0c8, - .wpdma_rx_ring0 = 0x7d0, + .wpdma_rx_ring = { + 0x7d0, + 0x7d8, + }, .reset_idx_tx_mask = GENMASK(1, 0), .reset_idx_rx_mask = GENMASK(7, 6), }, @@ -621,8 +628,8 @@ mtk_wed_amsdu_init(struct mtk_wed_device *dev) return ret; } - /* eagle E1 PCIE1 tx ring 22 flow control issue */ - if (dev->wlan.id == 0x7991) + /* Kite and Eagle E1 PCIE1 tx ring 22 flow control issue */ + if (dev->wlan.id == 0x7991 || dev->wlan.id == 0x7992) wed_clr(dev, MTK_WED_AMSDU_FIFO, MTK_WED_AMSDU_IS_PRIOR0_RING); wed_set(dev, MTK_WED_CTRL, MTK_WED_CTRL_TX_AMSDU_EN); @@ -1239,7 +1246,11 @@ mtk_wed_set_wpdma(struct mtk_wed_device *dev) return; wed_w32(dev, MTK_WED_WPDMA_RX_GLO_CFG, dev->wlan.wpdma_rx_glo); - wed_w32(dev, dev->hw->soc->regmap.wpdma_rx_ring0, dev->wlan.wpdma_rx); + wed_w32(dev, dev->hw->soc->regmap.wpdma_rx_ring[0], + dev->wlan.wpdma_rx[0]); + if (mtk_wed_is_v3_or_greater(dev->hw)) + wed_w32(dev, dev->hw->soc->regmap.wpdma_rx_ring[1], + dev->wlan.wpdma_rx[1]); if (!dev->wlan.hw_rro) return; @@ -2323,6 +2334,16 @@ mtk_wed_start(struct mtk_wed_device *dev, u32 irq_mask) if (!dev->rx_wdma[i].desc) mtk_wed_wdma_rx_ring_setup(dev, i, 16, false); + if (dev->wlan.hw_rro) { + for (i = 0; i < MTK_WED_RX_PAGE_QUEUES; i++) { + u32 addr = MTK_WED_RRO_MSDU_PG_CTRL0(i) + + MTK_WED_RING_OFS_COUNT; + + if (!wed_r32(dev, addr)) + wed_w32(dev, addr, 1); + } + } + mtk_wed_hw_init(dev); mtk_wed_configure_irq(dev, irq_mask); diff --git a/drivers/net/ethernet/mediatek/mtk_wed.h b/drivers/net/ethernet/mediatek/mtk_wed.h index c1f0479d7a71..b49aee9a8b65 100644 --- a/drivers/net/ethernet/mediatek/mtk_wed.h +++ b/drivers/net/ethernet/mediatek/mtk_wed.h @@ -17,7 +17,7 @@ struct mtk_wed_wo; struct mtk_wed_soc_data { struct { u32 tx_bm_tkid; - u32 wpdma_rx_ring0; + u32 wpdma_rx_ring[MTK_WED_RX_QUEUES]; u32 reset_idx_tx_mask; u32 reset_idx_rx_mask; } regmap; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 92a16ddb7d86..13666d50b90f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -267,8 +267,10 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv, pp.dma_dir = priv->dma_dir; ring->pp = page_pool_create(&pp); - if (!ring->pp) + if (IS_ERR(ring->pp)) { + err = PTR_ERR(ring->pp); goto err_ring; + } if (xdp_rxq_info_reg(&ring->xdp_rxq, priv->dev, queue_index, 0) < 0) goto err_pp; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 6ec7d6e0181d..3c3e84100d5a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -8,7 +8,6 @@ config MLX5_CORE depends on PCI select AUXILIARY_BUS select NET_DEVLINK - depends on VXLAN || !VXLAN depends on MLXFW || !MLXFW depends on PTP_1588_CLOCK_OPTIONAL depends on PCI_HYPERV_INTERFACE || !PCI_HYPERV_INTERFACE @@ -208,3 +207,14 @@ config MLX5_DPLL help DPLL support in Mellanox Technologies ConnectX NICs. +config MLX5_EN_PSP + bool "Mellanox Technologies support for PSP cryptography-offload acceleration" + depends on INET_PSP + depends on MLX5_CORE_EN + default y + help + mlx5 device offload support for Google PSP Security Protocol offload. + Adds support for PSP encryption offload and for SPI and key generation + interfaces to PSP Stack which supports PSP crypto offload. + + If unsure, say Y. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index a253c73db9e5..8ffa286a18f5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -17,7 +17,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \ diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o diag/reporter_vnic.o \ - fw_reset.o qos.o lib/tout.o lib/aso.o wc.o fs_pool.o + fw_reset.o qos.o lib/tout.o lib/aso.o wc.o fs_pool.o lib/nv_param.o # # Netdev basic @@ -69,7 +69,7 @@ mlx5_core-$(CONFIG_MLX5_TC_SAMPLE) += en/tc/sample.o # Core extra # mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \ - ecpf.o rdma.o esw/legacy.o \ + ecpf.o rdma.o esw/legacy.o esw/adj_vport.o \ esw/devlink_port.o esw/vporttbl.o esw/qos.o esw/ipsec.o mlx5_core-$(CONFIG_MLX5_ESWITCH) += esw/acl/helper.o \ @@ -85,7 +85,9 @@ mlx5_core-$(CONFIG_MLX5_BRIDGE) += esw/bridge.o esw/bridge_mcast.o esw/bridge mlx5_core-$(CONFIG_HWMON) += hwmon.o mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o -mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o +ifneq ($(CONFIG_VXLAN),) + mlx5_core-y += lib/vxlan.o +endif mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += lib/hv.o lib/hv_vhca.o @@ -110,6 +112,8 @@ mlx5_core-$(CONFIG_MLX5_EN_TLS) += en_accel/ktls_stats.o \ en_accel/fs_tcp.o en_accel/ktls.o en_accel/ktls_txrx.o \ en_accel/ktls_tx.o en_accel/ktls_rx.o +mlx5_core-$(CONFIG_MLX5_EN_PSP) += en_accel/psp.o en_accel/psp_rxtx.o + # # SW Steering # diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index e395ef5f356e..722282cebce9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -294,6 +294,10 @@ static void poll_timeout(struct mlx5_cmd_work_ent *ent) return; } cond_resched(); + if (mlx5_cmd_is_down(dev)) { + ent->ret = -ENXIO; + return; + } } while (time_before(jiffies, poll_end)); ent->ret = -ETIMEDOUT; @@ -1070,7 +1074,7 @@ static void cmd_work_handler(struct work_struct *work) poll_timeout(ent); /* make sure we read the descriptor after ownership is SW */ rmb(); - mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, (ent->ret == -ETIMEDOUT)); + mlx5_cmd_comp_handler(dev, 1ULL << ent->idx, !!ent->ret); } } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index 1fd403713baf..e9f319a9bdd6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -145,7 +145,6 @@ int mlx5_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, mlx5_core_dbg(dev, "failed adding CP 0x%x to debug file system\n", cq->cqn); - cq->uar = dev->priv.uar; cq->irqn = eq->core.irqn; return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c index 3ffa3fbacd16..fceea83abbd7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.c @@ -10,6 +10,7 @@ #include "esw/qos.h" #include "sf/dev/dev.h" #include "sf/sf.h" +#include "lib/nv_param.h" static int mlx5_devlink_flash_update(struct devlink *devlink, struct devlink_flash_update_params *params, @@ -160,7 +161,7 @@ static int mlx5_devlink_reload_fw_activate(struct devlink *devlink, struct netli if (err) return err; - mlx5_unload_one_devl_locked(dev, true); + mlx5_sync_reset_unload_flow(dev, true); err = mlx5_health_wait_pci_up(dev); if (err) NL_SET_ERR_MSG_MOD(extack, "FW activate aborted, PCI reads fail after reset"); @@ -203,11 +204,6 @@ static int mlx5_devlink_reload_down(struct devlink *devlink, bool netns_change, return 0; } - if (mlx5_lag_is_active(dev)) { - NL_SET_ERR_MSG_MOD(extack, "reload is unsupported in Lag mode"); - return -EOPNOTSUPP; - } - if (mlx5_core_is_mp_slave(dev)) { NL_SET_ERR_MSG_MOD(extack, "reload is unsupported for multi port slave"); return -EOPNOTSUPP; @@ -534,6 +530,25 @@ mlx5_devlink_hairpin_queue_size_validate(struct devlink *devlink, u32 id, return 0; } +static int mlx5_devlink_num_doorbells_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *mdev = devlink_priv(devlink); + u32 val32 = val.vu32; + u32 max_num_channels; + + max_num_channels = mlx5e_get_max_num_channels(mdev); + if (val32 > max_num_channels) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Requested num_doorbells (%u) exceeds maximum number of channels (%u)", + val32, max_num_channels); + return -EINVAL; + } + + return 0; +} + static void mlx5_devlink_hairpin_params_init_values(struct devlink *devlink) { struct mlx5_core_dev *dev = devlink_priv(devlink); @@ -613,6 +628,9 @@ static const struct devlink_param mlx5_devlink_eth_params[] = { "hairpin_queue_size", DEVLINK_PARAM_TYPE_U32, BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, mlx5_devlink_hairpin_queue_size_validate), + DEVLINK_PARAM_GENERIC(NUM_DOORBELLS, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, + mlx5_devlink_num_doorbells_validate), }; static int mlx5_devlink_eth_params_register(struct devlink *devlink) @@ -636,6 +654,10 @@ static int mlx5_devlink_eth_params_register(struct devlink *devlink) mlx5_devlink_hairpin_params_init_values(devlink); + value.vu32 = MLX5_DEFAULT_NUM_DOORBELLS; + devl_param_driverinit_value_set(devlink, + DEVLINK_PARAM_GENERIC_ID_NUM_DOORBELLS, + value); return 0; } @@ -650,6 +672,105 @@ static void mlx5_devlink_eth_params_unregister(struct devlink *devlink) ARRAY_SIZE(mlx5_devlink_eth_params)); } +#define MLX5_PCIE_CONG_THRESH_MAX 10000 +#define MLX5_PCIE_CONG_THRESH_DEF_LOW 7500 +#define MLX5_PCIE_CONG_THRESH_DEF_HIGH 9000 + +static int +mlx5_devlink_pcie_cong_thresh_validate(struct devlink *devl, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + if (val.vu16 > MLX5_PCIE_CONG_THRESH_MAX) { + NL_SET_ERR_MSG_FMT_MOD(extack, "Value %u > max supported (%u)", + val.vu16, MLX5_PCIE_CONG_THRESH_MAX); + + return -EINVAL; + } + + switch (id) { + case MLX5_DEVLINK_PARAM_ID_PCIE_CONG_IN_LOW: + case MLX5_DEVLINK_PARAM_ID_PCIE_CONG_IN_HIGH: + case MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_LOW: + case MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_HIGH: + break; + default: + return -EOPNOTSUPP; + } + + return 0; +} + +static void mlx5_devlink_pcie_cong_init_values(struct devlink *devlink) +{ + union devlink_param_value value; + u32 id; + + value.vu16 = MLX5_PCIE_CONG_THRESH_DEF_LOW; + id = MLX5_DEVLINK_PARAM_ID_PCIE_CONG_IN_LOW; + devl_param_driverinit_value_set(devlink, id, value); + + value.vu16 = MLX5_PCIE_CONG_THRESH_DEF_HIGH; + id = MLX5_DEVLINK_PARAM_ID_PCIE_CONG_IN_HIGH; + devl_param_driverinit_value_set(devlink, id, value); + + value.vu16 = MLX5_PCIE_CONG_THRESH_DEF_LOW; + id = MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_LOW; + devl_param_driverinit_value_set(devlink, id, value); + + value.vu16 = MLX5_PCIE_CONG_THRESH_DEF_HIGH; + id = MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_HIGH; + devl_param_driverinit_value_set(devlink, id, value); +} + +static const struct devlink_param mlx5_devlink_pcie_cong_params[] = { + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_PCIE_CONG_IN_LOW, + "pcie_cong_inbound_low", DEVLINK_PARAM_TYPE_U16, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, + mlx5_devlink_pcie_cong_thresh_validate), + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_PCIE_CONG_IN_HIGH, + "pcie_cong_inbound_high", DEVLINK_PARAM_TYPE_U16, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, + mlx5_devlink_pcie_cong_thresh_validate), + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_LOW, + "pcie_cong_outbound_low", DEVLINK_PARAM_TYPE_U16, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, + mlx5_devlink_pcie_cong_thresh_validate), + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_HIGH, + "pcie_cong_outbound_high", DEVLINK_PARAM_TYPE_U16, + BIT(DEVLINK_PARAM_CMODE_DRIVERINIT), NULL, NULL, + mlx5_devlink_pcie_cong_thresh_validate), +}; + +static int mlx5_devlink_pcie_cong_params_register(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + int err; + + if (!mlx5_pcie_cong_event_supported(dev)) + return 0; + + err = devl_params_register(devlink, mlx5_devlink_pcie_cong_params, + ARRAY_SIZE(mlx5_devlink_pcie_cong_params)); + if (err) + return err; + + mlx5_devlink_pcie_cong_init_values(devlink); + + return 0; +} + +static void mlx5_devlink_pcie_cong_params_unregister(struct devlink *devlink) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + + if (!mlx5_pcie_cong_event_supported(dev)) + return; + + devl_params_unregister(devlink, mlx5_devlink_pcie_cong_params, + ARRAY_SIZE(mlx5_devlink_pcie_cong_params)); +} + static int mlx5_devlink_enable_rdma_validate(struct devlink *devlink, u32 id, union devlink_param_value val, struct netlink_ext_ack *extack) @@ -895,8 +1016,20 @@ int mlx5_devlink_params_register(struct devlink *devlink) if (err) goto max_uc_list_err; + err = mlx5_devlink_pcie_cong_params_register(devlink); + if (err) + goto pcie_cong_err; + + err = mlx5_nv_param_register_dl_params(devlink); + if (err) + goto nv_param_err; + return 0; +nv_param_err: + mlx5_devlink_pcie_cong_params_unregister(devlink); +pcie_cong_err: + mlx5_devlink_max_uc_list_params_unregister(devlink); max_uc_list_err: mlx5_devlink_auxdev_params_unregister(devlink); auxdev_reg_err: @@ -907,6 +1040,8 @@ auxdev_reg_err: void mlx5_devlink_params_unregister(struct devlink *devlink) { + mlx5_nv_param_unregister_dl_params(devlink); + mlx5_devlink_pcie_cong_params_unregister(devlink); mlx5_devlink_max_uc_list_params_unregister(devlink); mlx5_devlink_auxdev_params_unregister(devlink); devl_params_unregister(devlink, mlx5_devlink_params, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h index 961f75da6227..c9555119a661 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/devlink.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/devlink.h @@ -22,6 +22,11 @@ enum mlx5_devlink_param_id { MLX5_DEVLINK_PARAM_ID_ESW_MULTIPORT, MLX5_DEVLINK_PARAM_ID_HAIRPIN_NUM_QUEUES, MLX5_DEVLINK_PARAM_ID_HAIRPIN_QUEUE_SIZE, + MLX5_DEVLINK_PARAM_ID_PCIE_CONG_IN_LOW, + MLX5_DEVLINK_PARAM_ID_PCIE_CONG_IN_HIGH, + MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_LOW, + MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_HIGH, + MLX5_DEVLINK_PARAM_ID_CQE_COMPRESSION_TYPE }; struct mlx5_trap_ctx { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c index 86253a89c24c..7cae0c6e5e8a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. */ +#include <linux/mlx5/vport.h> + #include "reporter_vnic.h" #include "en_stats.h" #include "devlink.h" @@ -105,6 +107,15 @@ void mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev, } if (MLX5_CAP_GEN(dev, nic_cap_reg)) mlx5_reporter_vnic_diagnose_counter_icm(dev, fmsg, vport_num, other_vport); + if (MLX5_CAP_GEN(dev, vnic_env_cnt_bar_uar_access)) + devlink_fmsg_u32_pair_put(fmsg, "bar_uar_access", + VNIC_ENV_GET(&vnic, bar_uar_access)); + if (MLX5_CAP_GEN(dev, vnic_env_cnt_odp_page_fault)) { + devlink_fmsg_u32_pair_put(fmsg, "odp_local_triggered_page_fault", + VNIC_ENV_GET(&vnic, odp_local_triggered_page_fault)); + devlink_fmsg_u32_pair_put(fmsg, "odp_remote_triggered_page_fault", + VNIC_ENV_GET(&vnic, odp_remote_triggered_page_fault)); + } devlink_fmsg_obj_nest_end(fmsg); devlink_fmsg_pair_nest_end(fmsg); @@ -133,11 +144,11 @@ void mlx5_reporter_vnic_create(struct mlx5_core_dev *dev) health->vnic_reporter = devlink_health_reporter_create(devlink, &mlx5_reporter_vnic_ops, - 0, dev); + dev); if (IS_ERR(health->vnic_reporter)) mlx5_core_warn(dev, - "Failed to create vnic reporter, err = %ld\n", - PTR_ERR(health->vnic_reporter)); + "Failed to create vnic reporter, err = %pe\n", + health->vnic_reporter); } void mlx5_reporter_vnic_destroy(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 0dd3bc0f4caa..14e3207b14e7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -47,6 +47,7 @@ #include <linux/rhashtable.h> #include <net/udp_tunnel.h> #include <net/switchdev.h> +#include <net/psp/types.h> #include <net/xdp.h> #include <linux/dim.h> #include <linux/bits.h> @@ -68,7 +69,7 @@ struct page_pool; #define MLX5E_METADATA_ETHER_TYPE (0x8CE4) #define MLX5E_METADATA_ETHER_LEN 8 -#define MLX5E_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN) +#define MLX5E_ETH_HARD_MTU (ETH_HLEN + PSP_ENCAP_HLEN + PSP_TRL_SIZE + VLAN_HLEN + ETH_FCS_LEN) #define MLX5E_HW2SW_MTU(params, hwmtu) ((hwmtu) - ((params)->hard_mtu)) #define MLX5E_SW2HW_MTU(params, swmtu) ((swmtu) + ((params)->hard_mtu)) @@ -344,6 +345,7 @@ struct mlx5e_cq { /* data path - accessed per napi poll */ u16 event_ctr; struct napi_struct *napi; + struct mlx5_uars_page *uar; struct mlx5_core_cq mcq; struct mlx5e_ch_stats *ch_stats; @@ -788,6 +790,7 @@ struct mlx5e_channel { int vec_ix; int sd_ix; int cpu; + struct mlx5_sq_bfreg *bfreg; /* Sync between icosq recovery and XSK enable/disable. */ struct mutex icosq_recovery_lock; @@ -936,6 +939,9 @@ struct mlx5e_priv { #ifdef CONFIG_MLX5_EN_IPSEC struct mlx5e_ipsec *ipsec; #endif +#ifdef CONFIG_MLX5_EN_PSP + struct mlx5e_psp *psp; +#endif #ifdef CONFIG_MLX5_EN_TLS struct mlx5e_tls *tls; #endif @@ -950,6 +956,7 @@ struct mlx5e_priv { struct mlx5e_mqprio_rl *mqprio_rl; struct dentry *dfs_root; struct mlx5_devcom_comp_dev *devcom; + struct ethtool_fec_hist_range *fec_ranges; }; struct mlx5e_dev { @@ -1060,6 +1067,7 @@ struct mlx5e_create_cq_param { struct mlx5e_ch_stats *ch_stats; int node; int ix; + struct mlx5_uars_page *uar; }; struct mlx5e_cq_param; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h index 9560fcba643f..c3408b3f7010 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/fs.h @@ -57,7 +57,7 @@ struct mlx5e_l2_table { bool promisc_enabled; }; -#define MLX5E_NUM_INDIR_TIRS (MLX5_NUM_TT - 1) +#define MLX5E_NUM_INDIR_TIRS (MLX5_NUM_INDIR_TIRS) #define MLX5_HASH_IP (MLX5_HASH_FIELD_SEL_SRC_IP |\ MLX5_HASH_FIELD_SEL_DST_IP) @@ -88,10 +88,11 @@ enum { #ifdef CONFIG_MLX5_EN_ARFS MLX5E_ARFS_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1, #endif -#ifdef CONFIG_MLX5_EN_IPSEC +#if defined(CONFIG_MLX5_EN_IPSEC) || defined(CONFIG_MLX5_EN_PSP) MLX5E_ACCEL_FS_ESP_FT_LEVEL = MLX5E_INNER_TTC_FT_LEVEL + 1, MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL, MLX5E_ACCEL_FS_POL_FT_LEVEL, + MLX5E_ACCEL_FS_POL_MISS_FT_LEVEL, MLX5E_ACCEL_FS_ESP_FT_ROCE_LEVEL, #endif }; @@ -131,7 +132,8 @@ struct mlx5e_ptp_fs; void mlx5e_set_ttc_params(struct mlx5e_flow_steering *fs, struct mlx5e_rx_res *rx_res, - struct ttc_params *ttc_params, bool tunnel); + struct ttc_params *ttc_params, bool tunnel, + bool ipsec_rss); void mlx5e_destroy_ttc_table(struct mlx5e_flow_steering *fs); int mlx5e_create_ttc_table(struct mlx5e_flow_steering *fs, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c index b4f3bd7d346e..195863b2c013 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/hv_vhca_stats.c @@ -138,8 +138,8 @@ void mlx5e_hv_vhca_stats_create(struct mlx5e_priv *priv) if (IS_ERR_OR_NULL(agent)) { if (IS_ERR(agent)) netdev_warn(priv->netdev, - "Failed to create hv vhca stats agent, err = %ld\n", - PTR_ERR(agent)); + "Failed to create hv vhca stats agent, err = %pe\n", + agent); kvfree(priv->stats_agent.buf); return; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c index 3cca06a74cf9..3692298e10f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c @@ -6,6 +6,7 @@ #include "en/port.h" #include "en_accel/en_accel.h" #include "en_accel/ipsec.h" +#include "en_accel/psp.h" #include <linux/dim.h> #include <net/page_pool/types.h> #include <net/xdp_sock_drv.h> @@ -611,6 +612,7 @@ void mlx5e_build_create_cq_param(struct mlx5e_create_cq_param *ccp, struct mlx5e .ch_stats = c->stats, .node = cpu_to_node(c->cpu), .ix = c->vec_ix, + .uar = c->bfreg->up, }; } @@ -810,7 +812,7 @@ static void mlx5e_build_common_cq_param(struct mlx5_core_dev *mdev, { void *cqc = param->cqc; - MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); + MLX5_SET(cqc, cqc, uar_page, mdev->priv.bfreg.up->index); if (MLX5_CAP_GEN(mdev, cqe_128_always) && cache_line_size() >= 128) MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD); } @@ -1003,7 +1005,8 @@ void mlx5e_build_sq_param(struct mlx5_core_dev *mdev, bool allow_swp; allow_swp = mlx5_geneve_tx_allowed(mdev) || - (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO); + (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO) || + mlx5_is_psp_device(mdev); mlx5e_build_sq_param_common(mdev, param); MLX5_SET(wq, wq, log_wq_sz, params->log_sq_size); MLX5_SET(sqc, sqc, allow_swp, allow_swp); @@ -1229,7 +1232,6 @@ static void mlx5e_build_async_icosq_param(struct mlx5_core_dev *mdev, void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, struct mlx5e_sq_param *param) { void *sqc = param->sqc; @@ -1256,7 +1258,7 @@ int mlx5e_build_channel_param(struct mlx5_core_dev *mdev, async_icosq_log_wq_sz = mlx5e_build_async_icosq_log_wq_sz(mdev); mlx5e_build_sq_param(mdev, params, &cparam->txq_sq); - mlx5e_build_xdpsq_param(mdev, params, NULL, &cparam->xdp_sq); + mlx5e_build_xdpsq_param(mdev, params, &cparam->xdp_sq); mlx5e_build_icosq_param(mdev, icosq_log_wq_sz, &cparam->icosq); mlx5e_build_async_icosq_param(mdev, async_icosq_log_wq_sz, &cparam->async_icosq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h index 488ccdbc1e2c..00617c65fe3c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.h @@ -51,6 +51,7 @@ struct mlx5e_create_sq_param { u32 tisn; u8 tis_lst_sz; u8 min_inline_mode; + u32 uar_page; }; /* Striding RQ dynamic parameters */ @@ -132,7 +133,6 @@ void mlx5e_build_tx_cq_param(struct mlx5_core_dev *mdev, struct mlx5e_cq_param *param); void mlx5e_build_xdpsq_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, - struct mlx5e_xsk_param *xsk, struct mlx5e_sq_param *param); int mlx5e_build_channel_param(struct mlx5_core_dev *mdev, struct mlx5e_params *params, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.c b/drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.c index 0ed017569a19..2eb666a46f39 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/pcie_cong_event.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB // Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. +#include "../devlink.h" #include "en.h" #include "pcie_cong_event.h" @@ -23,6 +24,7 @@ struct mlx5e_pcie_cong_stats { u32 pci_bw_inbound_low; u32 pci_bw_outbound_high; u32 pci_bw_outbound_low; + u32 pci_bw_stale_event; }; struct mlx5e_pcie_cong_event { @@ -41,13 +43,6 @@ struct mlx5e_pcie_cong_event { struct mlx5e_pcie_cong_stats stats; }; -/* In units of 0.01 % */ -static const struct mlx5e_pcie_cong_thresh default_thresh_config = { - .inbound_high = 9000, - .inbound_low = 7500, - .outbound_high = 9000, - .outbound_low = 7500, -}; static const struct counter_desc mlx5e_pcie_cong_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats, @@ -58,6 +53,8 @@ static const struct counter_desc mlx5e_pcie_cong_stats_desc[] = { pci_bw_outbound_high) }, { MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats, pci_bw_outbound_low) }, + { MLX5E_DECLARE_STAT(struct mlx5e_pcie_cong_stats, + pci_bw_stale_event) }, }; #define NUM_PCIE_CONG_COUNTERS ARRAY_SIZE(mlx5e_pcie_cong_stats_desc) @@ -218,8 +215,10 @@ static void mlx5e_pcie_cong_event_work(struct work_struct *work) } changes = cong_event->state ^ new_cong_state; - if (!changes) + if (!changes) { + cong_event->stats.pci_bw_stale_event++; return; + } cong_event->state = new_cong_state; @@ -249,8 +248,60 @@ static int mlx5e_pcie_cong_event_handler(struct notifier_block *nb, return NOTIFY_OK; } +static int +mlx5e_pcie_cong_get_thresh_config(struct mlx5_core_dev *dev, + struct mlx5e_pcie_cong_thresh *config) +{ + u32 ids[4] = { + MLX5_DEVLINK_PARAM_ID_PCIE_CONG_IN_LOW, + MLX5_DEVLINK_PARAM_ID_PCIE_CONG_IN_HIGH, + MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_LOW, + MLX5_DEVLINK_PARAM_ID_PCIE_CONG_OUT_HIGH, + }; + struct devlink *devlink = priv_to_devlink(dev); + union devlink_param_value val[4]; + + for (int i = 0; i < 4; i++) { + u32 id = ids[i]; + int err; + + err = devl_param_driverinit_value_get(devlink, id, &val[i]); + if (err) + return err; + } + + config->inbound_low = val[0].vu16; + config->inbound_high = val[1].vu16; + config->outbound_low = val[2].vu16; + config->outbound_high = val[3].vu16; + + return 0; +} + +static int +mlx5e_thresh_config_validate(struct mlx5_core_dev *mdev, + const struct mlx5e_pcie_cong_thresh *config) +{ + int err = 0; + + if (config->inbound_low >= config->inbound_high) { + err = -EINVAL; + mlx5_core_err(mdev, "PCIe inbound congestion threshold configuration invalid: low (%u) >= high (%u).\n", + config->inbound_low, config->inbound_high); + } + + if (config->outbound_low >= config->outbound_high) { + err = -EINVAL; + mlx5_core_err(mdev, "PCIe outbound congestion threshold configuration invalid: low (%u) >= high (%u).\n", + config->outbound_low, config->outbound_high); + } + + return err; +} + int mlx5e_pcie_cong_event_init(struct mlx5e_priv *priv) { + struct mlx5e_pcie_cong_thresh thresh_config = {}; struct mlx5e_pcie_cong_event *cong_event; struct mlx5_core_dev *mdev = priv->mdev; int err; @@ -258,6 +309,16 @@ int mlx5e_pcie_cong_event_init(struct mlx5e_priv *priv) if (!mlx5_pcie_cong_event_supported(mdev)) return 0; + err = mlx5e_pcie_cong_get_thresh_config(mdev, &thresh_config); + if (WARN_ON(err)) + return err; + + err = mlx5e_thresh_config_validate(mdev, &thresh_config); + if (err) { + mlx5_core_err(mdev, "PCIe congestion event feature disabled\n"); + return err; + } + cong_event = kvzalloc_node(sizeof(*cong_event), GFP_KERNEL, mdev->priv.numa_node); if (!cong_event) @@ -269,7 +330,7 @@ int mlx5e_pcie_cong_event_init(struct mlx5e_priv *priv) cong_event->priv = priv; - err = mlx5_cmd_pcie_cong_event_set(mdev, &default_thresh_config, + err = mlx5_cmd_pcie_cong_event_set(mdev, &thresh_config, &cong_event->obj_id); if (err) { mlx5_core_warn(mdev, "Error creating a PCIe congestion event object\n"); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c index 3efa8bf1d14e..4720523813b9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port_buffer.c @@ -575,7 +575,6 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, if (err) return err; } - priv->dcbx.xoff = xoff; /* Apply the settings */ if (update_buffer) { @@ -584,6 +583,8 @@ int mlx5e_port_manual_buffer_config(struct mlx5e_priv *priv, return err; } + priv->dcbx.xoff = xoff; + if (update_prio2buffer) err = mlx5e_port_set_priority2buffer(priv->mdev, prio2buffer); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c index 391b4e9c9dc4..c93ee969ea64 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.c @@ -334,7 +334,7 @@ static int mlx5e_ptp_alloc_txqsq(struct mlx5e_ptp *c, int txq_ix, sq->mdev = mdev; sq->ch_ix = MLX5E_PTP_CHANNEL_IX; sq->txq_ix = txq_ix; - sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; + sq->uar_map = c->bfreg->map; sq->min_inline_mode = params->tx_min_inline_mode; sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); sq->stats = &c->priv->ptp_stats.sq[tc]; @@ -486,6 +486,7 @@ static int mlx5e_ptp_open_txqsq(struct mlx5e_ptp *c, u32 tisn, csp.wq_ctrl = &txqsq->wq_ctrl; csp.min_inline_mode = txqsq->min_inline_mode; csp.ts_cqe_to_dest_cqn = ptpsq->ts_cq.mcq.cqn; + csp.uar_page = c->bfreg->index; err = mlx5e_create_sq_rdy(c->mdev, sqp, &csp, 0, &txqsq->sqn); if (err) @@ -577,6 +578,7 @@ static int mlx5e_ptp_open_tx_cqs(struct mlx5e_ptp *c, ccp.ch_stats = c->stats; ccp.napi = &c->napi; ccp.ix = MLX5E_PTP_CHANNEL_IX; + ccp.uar = c->bfreg->up; cq_param = &cparams->txq_sq_param.cqp; @@ -626,6 +628,7 @@ static int mlx5e_ptp_open_rx_cq(struct mlx5e_ptp *c, ccp.ch_stats = c->stats; ccp.napi = &c->napi; ccp.ix = MLX5E_PTP_CHANNEL_IX; + ccp.uar = c->bfreg->up; cq_param = &cparams->rq_param.cqp; @@ -900,6 +903,7 @@ int mlx5e_ptp_open(struct mlx5e_priv *priv, struct mlx5e_params *params, c->num_tc = mlx5e_get_dcb_num_tc(params); c->stats = &priv->ptp_stats.ch; c->lag_port = lag_port; + c->bfreg = &mdev->priv.bfreg; err = mlx5e_ptp_set_state(c, params); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h index 883c044852f1..1b3c9648220b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/ptp.h @@ -66,6 +66,7 @@ struct mlx5e_ptp { struct mlx5_core_dev *mdev; struct hwtstamp_config *tstamp; DECLARE_BITMAP(state, MLX5E_PTP_STATE_NUM_STATES); + struct mlx5_sq_bfreg *bfreg; }; static inline bool mlx5e_use_ptpsq(struct sk_buff *skb) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c index 0f5d7ea8956f..9d1c677814e0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c @@ -488,8 +488,8 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb, fdb_info, br_offloads); if (IS_ERR(work)) { - WARN_ONCE(1, "Failed to init switchdev work, err=%ld", - PTR_ERR(work)); + WARN_ONCE(1, "Failed to init switchdev work, err=%pe", + work); return notifier_from_errno(PTR_ERR(work)); } @@ -527,7 +527,8 @@ void mlx5e_rep_bridge_init(struct mlx5e_priv *priv) br_offloads = mlx5_esw_bridge_init(esw); rtnl_unlock(); if (IS_ERR(br_offloads)) { - esw_warn(mdev, "Failed to init esw bridge (err=%ld)\n", PTR_ERR(br_offloads)); + esw_warn(mdev, "Failed to init esw bridge (err=%pe)\n", + br_offloads); return; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c index 16c44d628eda..b1415992ffa2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_rx.c @@ -651,25 +651,29 @@ void mlx5e_reporter_icosq_resume_recovery(struct mlx5e_channel *c) mutex_unlock(&c->icosq_recovery_lock); } +#define MLX5E_REPORTER_RX_GRACEFUL_PERIOD 500 +#define MLX5E_REPORTER_RX_BURST_PERIOD 500 + static const struct devlink_health_reporter_ops mlx5_rx_reporter_ops = { .name = "rx", .recover = mlx5e_rx_reporter_recover, .diagnose = mlx5e_rx_reporter_diagnose, .dump = mlx5e_rx_reporter_dump, + .default_graceful_period = MLX5E_REPORTER_RX_GRACEFUL_PERIOD, + .default_burst_period = MLX5E_REPORTER_RX_BURST_PERIOD, }; -#define MLX5E_REPORTER_RX_GRACEFUL_PERIOD 500 - void mlx5e_reporter_rx_create(struct mlx5e_priv *priv) { + struct devlink_port *port = priv->netdev->devlink_port; struct devlink_health_reporter *reporter; - reporter = devlink_port_health_reporter_create(priv->netdev->devlink_port, + reporter = devlink_port_health_reporter_create(port, &mlx5_rx_reporter_ops, - MLX5E_REPORTER_RX_GRACEFUL_PERIOD, priv); + priv); if (IS_ERR(reporter)) { - netdev_warn(priv->netdev, "Failed to create rx reporter, err = %ld\n", - PTR_ERR(reporter)); + netdev_warn(priv->netdev, "Failed to create rx reporter, err = %pe\n", + reporter); return; } priv->rx_reporter = reporter; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c index 85d5cb39b107..9e2cf191ed30 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/reporter_tx.c @@ -539,26 +539,30 @@ void mlx5e_reporter_tx_ptpsq_unhealthy(struct mlx5e_ptpsq *ptpsq) mlx5e_health_report(priv, priv->tx_reporter, err_str, &err_ctx); } +#define MLX5E_REPORTER_TX_GRACEFUL_PERIOD 500 +#define MLX5E_REPORTER_TX_BURST_PERIOD 500 + static const struct devlink_health_reporter_ops mlx5_tx_reporter_ops = { .name = "tx", .recover = mlx5e_tx_reporter_recover, .diagnose = mlx5e_tx_reporter_diagnose, .dump = mlx5e_tx_reporter_dump, + .default_graceful_period = MLX5E_REPORTER_TX_GRACEFUL_PERIOD, + .default_burst_period = MLX5E_REPORTER_TX_BURST_PERIOD, }; -#define MLX5_REPORTER_TX_GRACEFUL_PERIOD 500 - void mlx5e_reporter_tx_create(struct mlx5e_priv *priv) { + struct devlink_port *port = priv->netdev->devlink_port; struct devlink_health_reporter *reporter; - reporter = devlink_port_health_reporter_create(priv->netdev->devlink_port, + reporter = devlink_port_health_reporter_create(port, &mlx5_tx_reporter_ops, - MLX5_REPORTER_TX_GRACEFUL_PERIOD, priv); + priv); if (IS_ERR(reporter)) { netdev_warn(priv->netdev, - "Failed to create tx reporter, err = %ld\n", - PTR_ERR(reporter)); + "Failed to create tx reporter, err = %pe\n", + reporter); return; } priv->tx_reporter = reporter; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c index c68ba0e58fa6..c96cbc4b0dbf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.c @@ -75,15 +75,14 @@ struct mlx5e_rss { struct mlx5e_tir *inner_tir[MLX5E_NUM_INDIR_TIRS]; struct mlx5e_rqt rqt; struct mlx5_core_dev *mdev; /* primary */ - u32 drop_rqn; - bool inner_ft_support; + struct mlx5e_rss_params params; bool enabled; refcount_t refcnt; }; bool mlx5e_rss_get_inner_ft_support(struct mlx5e_rss *rss) { - return rss->inner_ft_support; + return rss->params.inner_ft_support; } void mlx5e_rss_params_indir_modify_actual_size(struct mlx5e_rss *rss, u32 num_channels) @@ -91,7 +90,7 @@ void mlx5e_rss_params_indir_modify_actual_size(struct mlx5e_rss *rss, u32 num_ch rss->indir.actual_table_size = mlx5e_rqt_size(rss->mdev, num_channels); } -int mlx5e_rss_params_indir_init(struct mlx5e_rss_params_indir *indir, struct mlx5_core_dev *mdev, +int mlx5e_rss_params_indir_init(struct mlx5e_rss_params_indir *indir, u32 actual_table_size, u32 max_table_size) { indir->table = kvmalloc_array(max_table_size, sizeof(*indir->table), GFP_KERNEL); @@ -139,7 +138,8 @@ static struct mlx5e_rss *mlx5e_rss_init_copy(const struct mlx5e_rss *from) if (!rss) return ERR_PTR(-ENOMEM); - err = mlx5e_rss_params_indir_init(&rss->indir, from->mdev, from->indir.actual_table_size, + err = mlx5e_rss_params_indir_init(&rss->indir, + from->indir.actual_table_size, from->indir.max_table_size); if (err) goto err_free_rss; @@ -192,11 +192,12 @@ mlx5e_rss_get_tt_config(struct mlx5e_rss *rss, enum mlx5_traffic_types tt) return rss_tt; } -static int mlx5e_rss_create_tir(struct mlx5e_rss *rss, - enum mlx5_traffic_types tt, - const struct mlx5e_packet_merge_param *init_pkt_merge_param, - bool inner) +static int +mlx5e_rss_create_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, + const struct mlx5e_packet_merge_param *pkt_merge_param, + bool inner) { + bool rss_inner = rss->params.inner_ft_support; struct mlx5e_rss_params_traffic_type rss_tt; struct mlx5e_tir_builder *builder; struct mlx5e_tir **tir_p; @@ -204,7 +205,7 @@ static int mlx5e_rss_create_tir(struct mlx5e_rss *rss, u32 rqtn; int err; - if (inner && !rss->inner_ft_support) { + if (inner && !rss_inner) { mlx5e_rss_warn(rss->mdev, "Cannot create inner indirect TIR[%d], RSS inner FT is not supported.\n", tt); @@ -227,8 +228,8 @@ static int mlx5e_rss_create_tir(struct mlx5e_rss *rss, rqtn = mlx5e_rqt_get_rqtn(&rss->rqt); mlx5e_tir_builder_build_rqt(builder, rss->mdev->mlx5e_res.hw_objs.td.tdn, - rqtn, rss->inner_ft_support); - mlx5e_tir_builder_build_packet_merge(builder, init_pkt_merge_param); + rqtn, rss_inner); + mlx5e_tir_builder_build_packet_merge(builder, pkt_merge_param); rss_tt = mlx5e_rss_get_tt_config(rss, tt); mlx5e_tir_builder_build_rss(builder, &rss->hash, &rss_tt, inner); @@ -264,15 +265,16 @@ static void mlx5e_rss_destroy_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types *tir_p = NULL; } -static int mlx5e_rss_create_tirs(struct mlx5e_rss *rss, - const struct mlx5e_packet_merge_param *init_pkt_merge_param, - bool inner) +static int +mlx5e_rss_create_tirs(struct mlx5e_rss *rss, + const struct mlx5e_packet_merge_param *pkt_merge_param, + bool inner) { enum mlx5_traffic_types tt, max_tt; int err; for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) { - err = mlx5e_rss_create_tir(rss, tt, init_pkt_merge_param, inner); + err = mlx5e_rss_create_tir(rss, tt, pkt_merge_param, inner); if (err) goto err_destroy_tirs; } @@ -335,7 +337,7 @@ static int mlx5e_rss_update_tirs(struct mlx5e_rss *rss) tt, err); } - if (!rss->inner_ft_support) + if (!rss->params.inner_ft_support) continue; err = mlx5e_rss_update_tir(rss, tt, true); @@ -355,14 +357,16 @@ static int mlx5e_rss_init_no_tirs(struct mlx5e_rss *rss) refcount_set(&rss->refcnt, 1); return mlx5e_rqt_init_direct(&rss->rqt, rss->mdev, true, - rss->drop_rqn, rss->indir.max_table_size); + rss->params.drop_rqn, + rss->indir.max_table_size); } -struct mlx5e_rss *mlx5e_rss_init(struct mlx5_core_dev *mdev, bool inner_ft_support, u32 drop_rqn, - const struct mlx5e_packet_merge_param *init_pkt_merge_param, - enum mlx5e_rss_init_type type, unsigned int nch, - unsigned int max_nch) +struct mlx5e_rss * +mlx5e_rss_init(struct mlx5_core_dev *mdev, + const struct mlx5e_rss_params *params, + const struct mlx5e_rss_init_params *init_params) { + u32 rqt_max_size, rqt_size; struct mlx5e_rss *rss; int err; @@ -370,29 +374,31 @@ struct mlx5e_rss *mlx5e_rss_init(struct mlx5_core_dev *mdev, bool inner_ft_suppo if (!rss) return ERR_PTR(-ENOMEM); - err = mlx5e_rss_params_indir_init(&rss->indir, mdev, - mlx5e_rqt_size(mdev, nch), - mlx5e_rqt_size(mdev, max_nch)); + rqt_size = mlx5e_rqt_size(mdev, init_params->nch); + rqt_max_size = mlx5e_rqt_size(mdev, init_params->max_nch); + err = mlx5e_rss_params_indir_init(&rss->indir, rqt_size, rqt_max_size); if (err) goto err_free_rss; rss->mdev = mdev; - rss->inner_ft_support = inner_ft_support; - rss->drop_rqn = drop_rqn; + rss->params = *params; err = mlx5e_rss_init_no_tirs(rss); if (err) goto err_free_indir; - if (type == MLX5E_RSS_INIT_NO_TIRS) + if (init_params->type == MLX5E_RSS_INIT_NO_TIRS) goto out; - err = mlx5e_rss_create_tirs(rss, init_pkt_merge_param, false); + err = mlx5e_rss_create_tirs(rss, init_params->pkt_merge_param, + false); if (err) goto err_destroy_rqt; - if (inner_ft_support) { - err = mlx5e_rss_create_tirs(rss, init_pkt_merge_param, true); + if (params->inner_ft_support) { + err = mlx5e_rss_create_tirs(rss, + init_params->pkt_merge_param, + true); if (err) goto err_destroy_tirs; } @@ -418,7 +424,7 @@ int mlx5e_rss_cleanup(struct mlx5e_rss *rss) mlx5e_rss_destroy_tirs(rss, false); - if (rss->inner_ft_support) + if (rss->params.inner_ft_support) mlx5e_rss_destroy_tirs(rss, true); mlx5e_rqt_destroy(&rss->rqt); @@ -448,7 +454,7 @@ u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, { struct mlx5e_tir *tir; - WARN_ON(inner && !rss->inner_ft_support); + WARN_ON(inner && !rss->params.inner_ft_support); tir = rss_get_tir(rss, tt, inner); WARN_ON(!tir); @@ -468,10 +474,10 @@ bool mlx5e_rss_valid_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, bool /* Fill the "tirn" output parameter. * Create the requested TIR if it's its first usage. */ -int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss, - enum mlx5_traffic_types tt, - const struct mlx5e_packet_merge_param *init_pkt_merge_param, - bool inner, u32 *tirn) +int +mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, + const struct mlx5e_packet_merge_param *pkt_merge_param, + bool inner, u32 *tirn) { struct mlx5e_tir *tir; @@ -479,7 +485,7 @@ int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss, if (!tir) { /* TIR doesn't exist, create one */ int err; - err = mlx5e_rss_create_tir(rss, tt, init_pkt_merge_param, inner); + err = mlx5e_rss_create_tir(rss, tt, pkt_merge_param, inner); if (err) return err; tir = rss_get_tir(rss, tt, inner); @@ -512,10 +518,11 @@ void mlx5e_rss_disable(struct mlx5e_rss *rss) int err; rss->enabled = false; - err = mlx5e_rqt_redirect_direct(&rss->rqt, rss->drop_rqn, NULL); + err = mlx5e_rqt_redirect_direct(&rss->rqt, rss->params.drop_rqn, NULL); if (err) mlx5e_rss_warn(rss->mdev, "Failed to redirect RQT %#x to drop RQ %#x: err = %d\n", - mlx5e_rqt_get_rqtn(&rss->rqt), rss->drop_rqn, err); + mlx5e_rqt_get_rqtn(&rss->rqt), + rss->params.drop_rqn, err); } int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss, @@ -548,7 +555,7 @@ int mlx5e_rss_packet_merge_set_param(struct mlx5e_rss *rss, } inner_tir: - if (!rss->inner_ft_support) + if (!rss->params.inner_ft_support) continue; tir = rss_get_tir(rss, tt, true); @@ -681,7 +688,7 @@ int mlx5e_rss_set_hash_fields(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, return err; } - if (!(rss->inner_ft_support)) + if (!(rss->params.inner_ft_support)) return 0; err = mlx5e_rss_update_tir(rss, tt, true); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h index c6c1b2847cf5..5fb03cd0a411 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rss.h @@ -13,19 +13,31 @@ enum mlx5e_rss_init_type { MLX5E_RSS_INIT_TIRS }; +struct mlx5e_rss_init_params { + enum mlx5e_rss_init_type type; + const struct mlx5e_packet_merge_param *pkt_merge_param; + unsigned int nch; + unsigned int max_nch; +}; + +struct mlx5e_rss_params { + bool inner_ft_support; + u32 drop_rqn; +}; + struct mlx5e_rss_params_traffic_type mlx5e_rss_get_default_tt_config(enum mlx5_traffic_types tt); struct mlx5e_rss; -int mlx5e_rss_params_indir_init(struct mlx5e_rss_params_indir *indir, struct mlx5_core_dev *mdev, +int mlx5e_rss_params_indir_init(struct mlx5e_rss_params_indir *indir, u32 actual_table_size, u32 max_table_size); void mlx5e_rss_params_indir_cleanup(struct mlx5e_rss_params_indir *indir); void mlx5e_rss_params_indir_modify_actual_size(struct mlx5e_rss *rss, u32 num_channels); -struct mlx5e_rss *mlx5e_rss_init(struct mlx5_core_dev *mdev, bool inner_ft_support, u32 drop_rqn, - const struct mlx5e_packet_merge_param *init_pkt_merge_param, - enum mlx5e_rss_init_type type, unsigned int nch, - unsigned int max_nch); +struct mlx5e_rss * +mlx5e_rss_init(struct mlx5_core_dev *mdev, + const struct mlx5e_rss_params *params, + const struct mlx5e_rss_init_params *init_params); int mlx5e_rss_cleanup(struct mlx5e_rss *rss); void mlx5e_rss_refcnt_inc(struct mlx5e_rss *rss); @@ -37,10 +49,10 @@ u32 mlx5e_rss_get_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, bool inner); bool mlx5e_rss_valid_tir(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, bool inner); u32 mlx5e_rss_get_rqtn(struct mlx5e_rss *rss); -int mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss, - enum mlx5_traffic_types tt, - const struct mlx5e_packet_merge_param *init_pkt_merge_param, - bool inner, u32 *tirn); +int +mlx5e_rss_obtain_tirn(struct mlx5e_rss *rss, enum mlx5_traffic_types tt, + const struct mlx5e_packet_merge_param *pkt_merge_param, + bool inner, u32 *tirn); void mlx5e_rss_enable(struct mlx5e_rss *rss, u32 *rqns, u32 *vhca_ids, unsigned int num_rqns); void mlx5e_rss_disable(struct mlx5e_rss *rss); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c index a2acbfee2b77..ac26a32845d0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.c @@ -54,17 +54,30 @@ static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res, unsigned int init_nch) { bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; + struct mlx5e_rss_init_params init_params; + struct mlx5e_rss_params rss_params; struct mlx5e_rss *rss; if (WARN_ON(res->rss[0])) return -EINVAL; - rss = mlx5e_rss_init(res->mdev, inner_ft_support, res->drop_rqn, - &res->pkt_merge_param, MLX5E_RSS_INIT_TIRS, init_nch, res->max_nch); + init_params = (struct mlx5e_rss_init_params) { + .type = MLX5E_RSS_INIT_TIRS, + .pkt_merge_param = &res->pkt_merge_param, + .nch = init_nch, + .max_nch = res->max_nch, + }; + + rss_params = (struct mlx5e_rss_params) { + .inner_ft_support = inner_ft_support, + .drop_rqn = res->drop_rqn, + }; + + rss = mlx5e_rss_init(res->mdev, &rss_params, &init_params); if (IS_ERR(rss)) return PTR_ERR(rss); - mlx5e_rss_set_indir_uniform(rss, init_nch); + mlx5e_rss_set_indir_uniform(rss, init_params.nch); res->rss[0] = rss; @@ -74,18 +87,30 @@ static int mlx5e_rx_res_rss_init_def(struct mlx5e_rx_res *res, int mlx5e_rx_res_rss_init(struct mlx5e_rx_res *res, u32 rss_idx, unsigned int init_nch) { bool inner_ft_support = res->features & MLX5E_RX_RES_FEATURE_INNER_FT; + struct mlx5e_rss_init_params init_params; + struct mlx5e_rss_params rss_params; struct mlx5e_rss *rss; if (WARN_ON_ONCE(res->rss[rss_idx])) return -ENOSPC; - rss = mlx5e_rss_init(res->mdev, inner_ft_support, res->drop_rqn, - &res->pkt_merge_param, MLX5E_RSS_INIT_NO_TIRS, init_nch, - res->max_nch); + init_params = (struct mlx5e_rss_init_params) { + .type = MLX5E_RSS_INIT_NO_TIRS, + .pkt_merge_param = &res->pkt_merge_param, + .nch = init_nch, + .max_nch = res->max_nch, + }; + + rss_params = (struct mlx5e_rss_params) { + .inner_ft_support = inner_ft_support, + .drop_rqn = res->drop_rqn, + }; + + rss = mlx5e_rss_init(res->mdev, &rss_params, &init_params); if (IS_ERR(rss)) return PTR_ERR(rss); - mlx5e_rss_set_indir_uniform(rss, init_nch); + mlx5e_rss_set_indir_uniform(rss, init_params.nch); if (res->rss_active) { u32 *vhca_ids = get_vhca_ids(res, 0); @@ -438,7 +463,7 @@ static void mlx5e_rx_res_ptp_destroy(struct mlx5e_rx_res *res) struct mlx5e_rx_res * mlx5e_rx_res_create(struct mlx5_core_dev *mdev, enum mlx5e_rx_res_features features, unsigned int max_nch, u32 drop_rqn, - const struct mlx5e_packet_merge_param *init_pkt_merge_param, + const struct mlx5e_packet_merge_param *pkt_merge_param, unsigned int init_nch) { bool multi_vhca = features & MLX5E_RX_RES_FEATURE_MULTI_VHCA; @@ -454,7 +479,7 @@ mlx5e_rx_res_create(struct mlx5_core_dev *mdev, enum mlx5e_rx_res_features featu res->max_nch = max_nch; res->drop_rqn = drop_rqn; - res->pkt_merge_param = *init_pkt_merge_param; + res->pkt_merge_param = *pkt_merge_param; init_rwsem(&res->pkt_merge_param_sem); err = mlx5e_rx_res_rss_init_def(res, init_nch); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h index 1d049e2aa264..65a857c215e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rx_res.h @@ -27,7 +27,7 @@ enum mlx5e_rx_res_features { struct mlx5e_rx_res * mlx5e_rx_res_create(struct mlx5_core_dev *mdev, enum mlx5e_rx_res_features features, unsigned int max_nch, u32 drop_rqn, - const struct mlx5e_packet_merge_param *init_pkt_merge_param, + const struct mlx5e_packet_merge_param *pkt_merge_param, unsigned int init_nch); void mlx5e_rx_res_destroy(struct mlx5e_rx_res *res); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_hmfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_hmfs.c index 01d522b02947..d3db6146fcad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_hmfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_hmfs.c @@ -136,8 +136,8 @@ mlx5_ct_fs_hmfs_matcher_get(struct mlx5_ct_fs *fs, struct mlx5_flow_spec *spec, hws_bwc_matcher = mlx5_ct_fs_hmfs_matcher_create(fs, tbl, spec, ipv4, tcp, gre); if (IS_ERR(hws_bwc_matcher)) { netdev_warn(fs->netdev, - "ct_fs_hmfs: failed to create bwc matcher (nat %d, ipv4 %d, tcp %d, gre %d), err: %ld\n", - nat, ipv4, tcp, gre, PTR_ERR(hws_bwc_matcher)); + "ct_fs_hmfs: failed to create bwc matcher (nat %d, ipv4 %d, tcp %d, gre %d), err: %pe\n", + nat, ipv4, tcp, gre, hws_bwc_matcher); hmfs_matcher = ERR_CAST(hws_bwc_matcher); goto out_unlock; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c index 0c97c5899904..4d6924b644c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/ct_fs_smfs.c @@ -148,8 +148,8 @@ mlx5_ct_fs_smfs_matcher_get(struct mlx5_ct_fs *fs, bool nat, bool ipv4, bool tcp dr_matcher = mlx5_ct_fs_smfs_matcher_create(fs, tbl, ipv4, tcp, gre, prio); if (IS_ERR(dr_matcher)) { netdev_warn(fs->netdev, - "ct_fs_smfs: failed to create matcher (nat %d, ipv4 %d, tcp %d, gre %d), err: %ld\n", - nat, ipv4, tcp, gre, PTR_ERR(dr_matcher)); + "ct_fs_smfs: failed to create matcher (nat %d, ipv4 %d, tcp %d, gre %d), err: %pe\n", + nat, ipv4, tcp, gre, dr_matcher); smfs_matcher = ERR_CAST(dr_matcher); goto out_unlock; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c index 8afcec0c5d3c..896f718483c3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/int_port.c @@ -93,8 +93,8 @@ mlx5e_int_port_create_rx_rule(struct mlx5_eswitch *esw, flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec, &flow_act, dest, 1); if (IS_ERR(flow_rule)) - mlx5_core_warn(esw->dev, "ft offloads: Failed to add internal vport rx rule err %ld\n", - PTR_ERR(flow_rule)); + mlx5_core_warn(esw->dev, "ft offloads: Failed to add internal vport rx rule err %pe\n", + flow_rule); kvfree(spec); @@ -322,8 +322,8 @@ mlx5e_tc_int_port_init(struct mlx5e_priv *priv) sizeof(u32) * 2, (1 << ESW_VPORT_BITS) - 1, true); if (IS_ERR(int_port_priv->metadata_mapping)) { - mlx5_core_warn(priv->mdev, "Can't allocate metadata mapping of int port offload, err=%ld\n", - PTR_ERR(int_port_priv->metadata_mapping)); + mlx5_core_warn(priv->mdev, "Can't allocate metadata mapping of int port offload, err=%pe\n", + int_port_priv->metadata_mapping); goto err_mapping; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c index 2162d776fe35..a14f216048cd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c @@ -1,7 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ /* Copyright (c) 2018 Mellanox Technologies. */ -#include <net/inet_ecn.h> +#include <net/flow.h> +#include <net/inet_dscp.h> #include <net/vxlan.h> #include <net/gre.h> #include <net/geneve.h> @@ -233,7 +234,7 @@ int mlx5e_tc_tun_create_header_ipv4(struct mlx5e_priv *priv, int err; /* add the IP fields */ - attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK; + attr.fl.fl4.flowi4_dscp = inet_dsfield_to_dscp(tun_key->tos); attr.fl.fl4.daddr = tun_key->u.ipv4.dst; attr.fl.fl4.saddr = tun_key->u.ipv4.src; attr.ttl = tun_key->ttl; @@ -349,7 +350,7 @@ int mlx5e_tc_tun_update_header_ipv4(struct mlx5e_priv *priv, int err; /* add the IP fields */ - attr.fl.fl4.flowi4_tos = tun_key->tos & ~INET_ECN_MASK; + attr.fl.fl4.flowi4_dscp = inet_dsfield_to_dscp(tun_key->tos); attr.fl.fl4.daddr = tun_key->u.ipv4.dst; attr.fl.fl4.saddr = tun_key->u.ipv4.src; attr.ttl = tun_key->ttl; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index a0fc76a1bc08..0735d10f2bac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -172,8 +172,8 @@ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv, &reformat_params, MLX5_FLOW_NAMESPACE_FDB); if (IS_ERR(e->pkt_reformat)) { - mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n", - PTR_ERR(e->pkt_reformat)); + mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %pe\n", + e->pkt_reformat); return; } e->flags |= MLX5_ENCAP_ENTRY_VALID; @@ -1845,8 +1845,8 @@ static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event queue_work(priv->wq, &fib_work->work); } else if (IS_ERR(fib_work)) { NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work"); - mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n", - PTR_ERR(fib_work)); + mlx5_core_warn(priv->mdev, "Failed to init fib work, %pe\n", + fib_work); } break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c index b5c19396e096..996fcdb5a29d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/trap.c @@ -76,6 +76,7 @@ static int mlx5e_open_trap_rq(struct mlx5e_priv *priv, struct mlx5e_trap *t) ccp.ch_stats = t->stats; ccp.napi = &t->napi; ccp.ix = 0; + ccp.uar = mdev->priv.bfreg.up; err = mlx5e_open_cq(priv->mdev, trap_moder, &rq_param->cqp, &ccp, &rq->cq); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h index 5dc04bbfc71b..6760bb0336df 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h @@ -309,10 +309,7 @@ mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map, static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) { - struct mlx5_core_cq *mcq; - - mcq = &cq->mcq; - mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, cq->wq.cc); + mlx5_cq_arm(&cq->mcq, MLX5_CQ_DB_REQ_NOT, cq->uar->map, cq->wq.cc); } static inline struct mlx5e_sq_dma * diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c index d743e823362a..dbd88eb5c082 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/setup.c @@ -54,7 +54,7 @@ static void mlx5e_build_xsk_cparam(struct mlx5_core_dev *mdev, struct mlx5e_channel_param *cparam) { mlx5e_build_rq_param(mdev, params, xsk, &cparam->rq); - mlx5e_build_xdpsq_param(mdev, params, xsk, &cparam->xdp_sq); + mlx5e_build_xdpsq_param(mdev, params, &cparam->xdp_sq); } static int mlx5e_init_xsk_rq(struct mlx5e_channel *c, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h index 33e32584b07f..8bef99e8367e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h @@ -42,6 +42,8 @@ #include <en_accel/macsec.h> #include "en.h" #include "en/txrx.h" +#include "en_accel/psp.h" +#include "en_accel/psp_rxtx.h" #if IS_ENABLED(CONFIG_GENEVE) #include <net/geneve.h> @@ -119,6 +121,9 @@ struct mlx5e_accel_tx_state { #ifdef CONFIG_MLX5_EN_IPSEC struct mlx5e_accel_tx_ipsec_state ipsec; #endif +#ifdef CONFIG_MLX5_EN_PSP + struct mlx5e_accel_tx_psp_state psp_st; +#endif }; static inline bool mlx5e_accel_tx_begin(struct net_device *dev, @@ -137,6 +142,13 @@ static inline bool mlx5e_accel_tx_begin(struct net_device *dev, return false; #endif +#ifdef CONFIG_MLX5_EN_PSP + if (mlx5e_psp_is_offload(skb, dev)) { + if (unlikely(!mlx5e_psp_handle_tx_skb(dev, skb, &state->psp_st))) + return false; + } +#endif + #ifdef CONFIG_MLX5_EN_IPSEC if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state) && xfrm_offload(skb)) { if (unlikely(!mlx5e_ipsec_handle_tx_skb(dev, skb, &state->ipsec))) @@ -157,8 +169,14 @@ static inline bool mlx5e_accel_tx_begin(struct net_device *dev, } static inline unsigned int mlx5e_accel_tx_ids_len(struct mlx5e_txqsq *sq, + struct sk_buff *skb, struct mlx5e_accel_tx_state *state) { +#ifdef CONFIG_MLX5_EN_PSP + if (mlx5e_psp_is_offload_state(&state->psp_st)) + return mlx5e_psp_tx_ids_len(&state->psp_st); +#endif + #ifdef CONFIG_MLX5_EN_IPSEC if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state)) return mlx5e_ipsec_tx_ids_len(&state->ipsec); @@ -172,8 +190,14 @@ static inline unsigned int mlx5e_accel_tx_ids_len(struct mlx5e_txqsq *sq, static inline void mlx5e_accel_tx_eseg(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5e_accel_tx_state *accel, struct mlx5_wqe_eth_seg *eseg, u16 ihs) { +#ifdef CONFIG_MLX5_EN_PSP + if (mlx5e_psp_is_offload_state(&accel->psp_st)) + mlx5e_psp_tx_build_eseg(priv, skb, &accel->psp_st, eseg); +#endif + #ifdef CONFIG_MLX5_EN_IPSEC if (xfrm_offload(skb)) mlx5e_ipsec_tx_build_eseg(priv, skb, eseg); @@ -199,6 +223,11 @@ static inline void mlx5e_accel_tx_finish(struct mlx5e_txqsq *sq, mlx5e_ktls_handle_tx_wqe(&wqe->ctrl, &state->tls); #endif +#ifdef CONFIG_MLX5_EN_PSP + if (mlx5e_psp_is_offload_state(&state->psp_st)) + mlx5e_psp_handle_tx_wqe(wqe, &state->psp_st, inlseg); +#endif + #ifdef CONFIG_MLX5_EN_IPSEC if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state) && state->ipsec.xo && state->ipsec.tailen) @@ -208,21 +237,40 @@ static inline void mlx5e_accel_tx_finish(struct mlx5e_txqsq *sq, static inline int mlx5e_accel_init_rx(struct mlx5e_priv *priv) { - return mlx5e_ktls_init_rx(priv); + int err; + + err = mlx5_accel_psp_fs_init_rx_tables(priv); + if (err) + goto out; + + err = mlx5e_ktls_init_rx(priv); + if (err) + mlx5_accel_psp_fs_cleanup_rx_tables(priv); + +out: + return err; } static inline void mlx5e_accel_cleanup_rx(struct mlx5e_priv *priv) { mlx5e_ktls_cleanup_rx(priv); + mlx5_accel_psp_fs_cleanup_rx_tables(priv); } static inline int mlx5e_accel_init_tx(struct mlx5e_priv *priv) { + int err; + + err = mlx5_accel_psp_fs_init_tx_tables(priv); + if (err) + return err; + return mlx5e_ktls_init_tx(priv); } static inline void mlx5e_accel_cleanup_tx(struct mlx5e_priv *priv) { mlx5e_ktls_cleanup_tx(priv); + mlx5_accel_psp_fs_cleanup_tx_tables(priv); } #endif /* __MLX5E_EN_ACCEL_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c index 4f83e3172767..1febdc5b81f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/fs_tcp.c @@ -138,7 +138,7 @@ struct mlx5_flow_handle *mlx5e_accel_fs_add_sk(struct mlx5e_flow_steering *fs, flow = mlx5_add_flow_rules(ft->t, spec, &flow_act, &dest, 1); if (IS_ERR(flow)) - fs_err(fs, "mlx5_add_flow_rules() failed, flow is %ld\n", PTR_ERR(flow)); + fs_err(fs, "mlx5_add_flow_rules() failed, flow is %pe\n", flow); out: kvfree(spec); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h index ffcd0cdeb775..23703f28386a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h @@ -185,6 +185,7 @@ struct mlx5e_ipsec_rx_create_attr { u32 family; int prio; int pol_level; + int pol_miss_level; int sa_level; int status_level; enum mlx5_flow_namespace_type chains_ns; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c index 98b6a3a623f9..6ccfc2af07b7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_fs.c @@ -61,6 +61,7 @@ struct mlx5e_ipsec_rx { struct mlx5_flow_table *pol_miss_ft; struct mlx5_flow_handle *pol_miss_rule; u8 allow_tunnel_mode : 1; + u8 ttc_rules_added : 1; }; /* IPsec RX flow steering */ @@ -585,6 +586,20 @@ out: return err; } +static struct mlx5_flow_destination +ipsec_rx_decrypted_pkt_def_dest(struct mlx5_ttc_table *ttc, u32 family) +{ + struct mlx5_flow_destination dest; + + if (!mlx5_ttc_has_esp_flow_group(ttc)) + return mlx5_ttc_get_default_dest(ttc, family2tt(family)); + + dest.ft = mlx5_get_ttc_flow_table(ttc); + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + + return dest; +} + static void ipsec_rx_update_default_dest(struct mlx5e_ipsec_rx *rx, struct mlx5_flow_destination *old_dest, struct mlx5_flow_destination *new_dest) @@ -598,10 +613,10 @@ static void handle_ipsec_rx_bringup(struct mlx5e_ipsec *ipsec, u32 family) { struct mlx5e_ipsec_rx *rx = ipsec_rx(ipsec, family, XFRM_DEV_OFFLOAD_PACKET); struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(ipsec->fs, false); + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(ipsec->fs, false); struct mlx5_flow_destination old_dest, new_dest; - old_dest = mlx5_ttc_get_default_dest(mlx5e_fs_get_ttc(ipsec->fs, false), - family2tt(family)); + old_dest = ipsec_rx_decrypted_pkt_def_dest(ttc, family); mlx5_ipsec_fs_roce_rx_create(ipsec->mdev, ipsec->roce, ns, &old_dest, family, MLX5E_ACCEL_FS_ESP_FT_ROCE_LEVEL, MLX5E_NIC_PRIO); @@ -614,12 +629,12 @@ static void handle_ipsec_rx_bringup(struct mlx5e_ipsec *ipsec, u32 family) static void handle_ipsec_rx_cleanup(struct mlx5e_ipsec *ipsec, u32 family) { struct mlx5e_ipsec_rx *rx = ipsec_rx(ipsec, family, XFRM_DEV_OFFLOAD_PACKET); + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(ipsec->fs, false); struct mlx5_flow_destination old_dest, new_dest; old_dest.ft = mlx5_ipsec_fs_roce_ft_get(ipsec->roce, family); old_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; - new_dest = mlx5_ttc_get_default_dest(mlx5e_fs_get_ttc(ipsec->fs, false), - family2tt(family)); + new_dest = ipsec_rx_decrypted_pkt_def_dest(ttc, family); ipsec_rx_update_default_dest(rx, &old_dest, &new_dest); mlx5_ipsec_fs_roce_rx_destroy(ipsec->roce, family, ipsec->mdev); @@ -669,10 +684,13 @@ static void ipsec_mpv_work_handler(struct work_struct *_work) complete(&work->master_priv->ipsec->comp); } -static void ipsec_rx_ft_disconnect(struct mlx5e_ipsec *ipsec, u32 family) +static void ipsec_rx_ft_disconnect(struct mlx5e_ipsec *ipsec, + struct mlx5e_ipsec_rx *rx, u32 family) { struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(ipsec->fs, false); + if (rx->ttc_rules_added) + mlx5_ttc_destroy_ipsec_rules(ttc); mlx5_ttc_fwd_default_dest(ttc, family2tt(family)); } @@ -707,7 +725,7 @@ static void rx_destroy(struct mlx5_core_dev *mdev, struct mlx5e_ipsec *ipsec, { /* disconnect */ if (rx != ipsec->rx_esw) - ipsec_rx_ft_disconnect(ipsec, family); + ipsec_rx_ft_disconnect(ipsec, rx, family); mlx5_del_flow_rules(rx->sa.rule); mlx5_destroy_flow_group(rx->sa.group); @@ -747,6 +765,7 @@ static void ipsec_rx_create_attr_set(struct mlx5e_ipsec *ipsec, attr->family = family; attr->prio = MLX5E_NIC_PRIO; attr->pol_level = MLX5E_ACCEL_FS_POL_FT_LEVEL; + attr->pol_miss_level = MLX5E_ACCEL_FS_POL_MISS_FT_LEVEL; attr->sa_level = MLX5E_ACCEL_FS_ESP_FT_LEVEL; attr->status_level = MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL; attr->chains_ns = MLX5_FLOW_NAMESPACE_KERNEL; @@ -763,7 +782,7 @@ static int ipsec_rx_status_pass_dest_get(struct mlx5e_ipsec *ipsec, if (rx == ipsec->rx_esw) return mlx5_esw_ipsec_rx_status_pass_dest_get(ipsec, dest); - *dest = mlx5_ttc_get_default_dest(attr->ttc, family2tt(attr->family)); + *dest = ipsec_rx_decrypted_pkt_def_dest(attr->ttc, attr->family); err = mlx5_ipsec_fs_roce_rx_create(ipsec->mdev, ipsec->roce, attr->ns, dest, attr->family, MLX5E_ACCEL_FS_ESP_FT_ROCE_LEVEL, attr->prio); @@ -806,10 +825,16 @@ static void ipsec_rx_ft_connect(struct mlx5e_ipsec *ipsec, struct mlx5e_ipsec_rx_create_attr *attr) { struct mlx5_flow_destination dest = {}; + struct mlx5_ttc_table *ttc, *inner_ttc; dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest.ft = rx->ft.sa; - mlx5_ttc_fwd_dest(attr->ttc, family2tt(attr->family), &dest); + if (mlx5_ttc_fwd_dest(attr->ttc, family2tt(attr->family), &dest)) + return; + + ttc = mlx5e_fs_get_ttc(ipsec->fs, false); + inner_ttc = mlx5e_fs_get_ttc(ipsec->fs, true); + rx->ttc_rules_added = !mlx5_ttc_create_ipsec_rules(ttc, inner_ttc); } static int ipsec_rx_chains_create_miss(struct mlx5e_ipsec *ipsec, @@ -833,7 +858,7 @@ static int ipsec_rx_chains_create_miss(struct mlx5e_ipsec *ipsec, ft_attr.max_fte = 1; ft_attr.autogroup.max_num_groups = 1; - ft_attr.level = attr->pol_level; + ft_attr.level = attr->pol_miss_level; ft_attr.prio = attr->prio; ft = mlx5_create_auto_grouped_flow_table(attr->ns, &ft_attr); @@ -1704,8 +1729,8 @@ static int setup_modify_header(struct mlx5e_ipsec *ipsec, int type, u32 val, u8 modify_hdr = mlx5_modify_header_alloc(mdev, ns_type, num_of_actions, action); if (IS_ERR(modify_hdr)) { - mlx5_core_err(mdev, "Failed to allocate modify_header %ld\n", - PTR_ERR(modify_hdr)); + mlx5_core_err(mdev, "Failed to allocate modify_header %pe\n", + modify_hdr); return PTR_ERR(modify_hdr); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h index 3cc640669247..45b0d19e735c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h @@ -40,7 +40,7 @@ #include "en/txrx.h" /* Bit31: IPsec marker, Bit30: reserved, Bit29-24: IPsec syndrome, Bit23-0: IPsec obj id */ -#define MLX5_IPSEC_METADATA_MARKER(metadata) (((metadata) >> 31) & 0x1) +#define MLX5_IPSEC_METADATA_MARKER(metadata) ((((metadata) >> 30) & 0x3) == 0x2) #define MLX5_IPSEC_METADATA_SYNDROM(metadata) (((metadata) >> 24) & GENMASK(5, 0)) #define MLX5_IPSEC_METADATA_HANDLE(metadata) ((metadata) & GENMASK(23, 0)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c index 65ccb33edafb..d7a11ff9bbdb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_rx.c @@ -498,9 +498,9 @@ static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb) depth += sizeof(struct iphdr); th = (void *)iph + sizeof(struct iphdr); - sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, - iph->saddr, th->source, iph->daddr, - th->dest, netdev->ifindex); + sk = inet_lookup_established(net, iph->saddr, th->source, + iph->daddr, th->dest, + netdev->ifindex); #if IS_ENABLED(CONFIG_IPV6) } else { struct ipv6hdr *ipv6h = (struct ipv6hdr *)iph; @@ -508,8 +508,7 @@ static void resync_update_sn(struct mlx5e_rq *rq, struct sk_buff *skb) depth += sizeof(struct ipv6hdr); th = (void *)ipv6h + sizeof(struct ipv6hdr); - sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, - &ipv6h->saddr, th->source, + sk = __inet6_lookup_established(net, &ipv6h->saddr, th->source, &ipv6h->daddr, ntohs(th->dest), netdev->ifindex, 0); #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index 6ab02f3fc291..528b04d4de41 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -1676,7 +1676,7 @@ void mlx5e_macsec_tx_build_eseg(struct mlx5e_macsec *macsec, if (!fs_id) return; - eseg->flow_table_metadata = cpu_to_be32(MLX5_ETH_WQE_FT_META_MACSEC | fs_id << 2); + eseg->flow_table_metadata = cpu_to_be32(MLX5_MACSEC_TX_METADATA(fs_id)); } void mlx5e_macsec_offload_handle_rx_skb(struct net_device *netdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c new file mode 100644 index 000000000000..b4cb131c5f81 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.c @@ -0,0 +1,952 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ +#include <linux/mlx5/device.h> +#include <net/psp.h> +#include <linux/psp.h> +#include "mlx5_core.h" +#include "psp.h" +#include "lib/crypto.h" +#include "en_accel/psp.h" +#include "fs_core.h" + +enum accel_fs_psp_type { + ACCEL_FS_PSP4, + ACCEL_FS_PSP6, + ACCEL_FS_PSP_NUM_TYPES, +}; + +enum accel_psp_syndrome { + PSP_OK = 0, + PSP_ICV_FAIL, + PSP_BAD_TRAILER, +}; + +struct mlx5e_psp_tx { + struct mlx5_flow_namespace *ns; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + struct mlx5_flow_handle *rule; + struct mutex mutex; /* Protect PSP TX steering */ + u32 refcnt; +}; + +struct mlx5e_psp_rx_err { + struct mlx5_flow_table *ft; + struct mlx5_flow_handle *rule; + struct mlx5_flow_handle *drop_rule; + struct mlx5_modify_hdr *copy_modify_hdr; +}; + +struct mlx5e_accel_fs_psp_prot { + struct mlx5_flow_table *ft; + struct mlx5_flow_group *miss_group; + struct mlx5_flow_handle *miss_rule; + struct mlx5_flow_destination default_dest; + struct mlx5e_psp_rx_err rx_err; + u32 refcnt; + struct mutex prot_mutex; /* protect ESP4/ESP6 protocol */ + struct mlx5_flow_handle *def_rule; +}; + +struct mlx5e_accel_fs_psp { + struct mlx5e_accel_fs_psp_prot fs_prot[ACCEL_FS_PSP_NUM_TYPES]; +}; + +struct mlx5e_psp_fs { + struct mlx5_core_dev *mdev; + struct mlx5e_psp_tx *tx_fs; + /* Rx manage */ + struct mlx5e_flow_steering *fs; + struct mlx5e_accel_fs_psp *rx_fs; +}; + +/* PSP RX flow steering */ +static enum mlx5_traffic_types fs_psp2tt(enum accel_fs_psp_type i) +{ + if (i == ACCEL_FS_PSP4) + return MLX5_TT_IPV4_UDP; + + return MLX5_TT_IPV6_UDP; +} + +static void accel_psp_fs_rx_err_del_rules(struct mlx5e_psp_fs *fs, + struct mlx5e_psp_rx_err *rx_err) +{ + if (rx_err->drop_rule) { + mlx5_del_flow_rules(rx_err->drop_rule); + rx_err->drop_rule = NULL; + } + + if (rx_err->rule) { + mlx5_del_flow_rules(rx_err->rule); + rx_err->rule = NULL; + } + + if (rx_err->copy_modify_hdr) { + mlx5_modify_header_dealloc(fs->mdev, rx_err->copy_modify_hdr); + rx_err->copy_modify_hdr = NULL; + } +} + +static void accel_psp_fs_rx_err_destroy_ft(struct mlx5e_psp_fs *fs, + struct mlx5e_psp_rx_err *rx_err) +{ + accel_psp_fs_rx_err_del_rules(fs, rx_err); + + if (rx_err->ft) { + mlx5_destroy_flow_table(rx_err->ft); + rx_err->ft = NULL; + } +} + +static void accel_psp_setup_syndrome_match(struct mlx5_flow_spec *spec, + enum accel_psp_syndrome syndrome) +{ + void *misc_params_2; + + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; + misc_params_2 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters_2); + MLX5_SET_TO_ONES(fte_match_set_misc2, misc_params_2, psp_syndrome); + misc_params_2 = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters_2); + MLX5_SET(fte_match_set_misc2, misc_params_2, psp_syndrome, syndrome); +} + +static int accel_psp_fs_rx_err_add_rule(struct mlx5e_psp_fs *fs, + struct mlx5e_accel_fs_psp_prot *fs_prot, + struct mlx5e_psp_rx_err *rx_err) +{ + u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; + struct mlx5_core_dev *mdev = fs->mdev; + struct mlx5_flow_act flow_act = {}; + struct mlx5_modify_hdr *modify_hdr; + struct mlx5_flow_handle *fte; + struct mlx5_flow_spec *spec; + int err = 0; + + spec = kzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + + /* Action to copy 7 bit psp_syndrome to regB[23:29] */ + MLX5_SET(copy_action_in, action, action_type, MLX5_ACTION_TYPE_COPY); + MLX5_SET(copy_action_in, action, src_field, MLX5_ACTION_IN_FIELD_PSP_SYNDROME); + MLX5_SET(copy_action_in, action, src_offset, 0); + MLX5_SET(copy_action_in, action, length, 7); + MLX5_SET(copy_action_in, action, dst_field, MLX5_ACTION_IN_FIELD_METADATA_REG_B); + MLX5_SET(copy_action_in, action, dst_offset, 23); + + modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_KERNEL, + 1, action); + if (IS_ERR(modify_hdr)) { + err = PTR_ERR(modify_hdr); + mlx5_core_err(mdev, + "fail to alloc psp copy modify_header_id err=%d\n", err); + goto out_spec; + } + + accel_psp_setup_syndrome_match(spec, PSP_OK); + /* create fte */ + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | + MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + flow_act.modify_hdr = modify_hdr; + fte = mlx5_add_flow_rules(rx_err->ft, spec, &flow_act, + &fs_prot->default_dest, 1); + if (IS_ERR(fte)) { + err = PTR_ERR(fte); + mlx5_core_err(mdev, "fail to add psp rx err copy rule err=%d\n", err); + goto out; + } + rx_err->rule = fte; + + /* add default drop rule */ + memset(spec, 0, sizeof(*spec)); + memset(&flow_act, 0, sizeof(flow_act)); + /* create fte */ + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP; + fte = mlx5_add_flow_rules(rx_err->ft, spec, &flow_act, NULL, 0); + if (IS_ERR(fte)) { + err = PTR_ERR(fte); + mlx5_core_err(mdev, "fail to add psp rx err drop rule err=%d\n", err); + goto out_drop_rule; + } + rx_err->drop_rule = fte; + rx_err->copy_modify_hdr = modify_hdr; + + goto out_spec; + +out_drop_rule: + mlx5_del_flow_rules(rx_err->rule); + rx_err->rule = NULL; +out: + mlx5_modify_header_dealloc(mdev, modify_hdr); +out_spec: + kfree(spec); + return err; +} + +static int accel_psp_fs_rx_err_create_ft(struct mlx5e_psp_fs *fs, + struct mlx5e_accel_fs_psp_prot *fs_prot, + struct mlx5e_psp_rx_err *rx_err) +{ + struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs->fs, false); + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_table *ft; + int err; + + ft_attr.max_fte = 2; + ft_attr.autogroup.max_num_groups = 2; + ft_attr.level = MLX5E_ACCEL_FS_ESP_FT_ERR_LEVEL; // MLX5E_ACCEL_FS_TCP_FT_LEVEL + ft_attr.prio = MLX5E_NIC_PRIO; + ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + mlx5_core_err(fs->mdev, "fail to create psp rx inline ft err=%d\n", err); + return err; + } + + rx_err->ft = ft; + err = accel_psp_fs_rx_err_add_rule(fs, fs_prot, rx_err); + if (err) + goto out_err; + + return 0; + +out_err: + mlx5_destroy_flow_table(ft); + rx_err->ft = NULL; + return err; +} + +static void accel_psp_fs_rx_fs_destroy(struct mlx5e_accel_fs_psp_prot *fs_prot) +{ + if (fs_prot->def_rule) { + mlx5_del_flow_rules(fs_prot->def_rule); + fs_prot->def_rule = NULL; + } + + if (fs_prot->miss_rule) { + mlx5_del_flow_rules(fs_prot->miss_rule); + fs_prot->miss_rule = NULL; + } + + if (fs_prot->miss_group) { + mlx5_destroy_flow_group(fs_prot->miss_group); + fs_prot->miss_group = NULL; + } + + if (fs_prot->ft) { + mlx5_destroy_flow_table(fs_prot->ft); + fs_prot->ft = NULL; + } +} + +static void setup_fte_udp_psp(struct mlx5_flow_spec *spec, u16 udp_port) +{ + spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS; + MLX5_SET(fte_match_set_lyr_2_4, spec->match_criteria, udp_dport, 0xffff); + MLX5_SET(fte_match_set_lyr_2_4, spec->match_value, udp_dport, udp_port); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, spec->match_criteria, ip_protocol); + MLX5_SET(fte_match_set_lyr_2_4, spec->match_value, ip_protocol, IPPROTO_UDP); +} + +static int accel_psp_fs_rx_create_ft(struct mlx5e_psp_fs *fs, + struct mlx5e_accel_fs_psp_prot *fs_prot) +{ + struct mlx5_flow_namespace *ns = mlx5e_fs_get_ns(fs->fs, false); + u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {}; + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_modify_hdr *modify_hdr = NULL; + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_flow_destination dest = {}; + struct mlx5_core_dev *mdev = fs->mdev; + struct mlx5_flow_group *miss_group; + MLX5_DECLARE_FLOW_ACT(flow_act); + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + struct mlx5_flow_table *ft; + u32 *flow_group_in; + int err = 0; + + flow_group_in = kvzalloc(inlen, GFP_KERNEL); + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!flow_group_in || !spec) { + err = -ENOMEM; + goto out; + } + + /* Create FT */ + ft_attr.max_fte = 2; + ft_attr.level = MLX5E_ACCEL_FS_ESP_FT_LEVEL; + ft_attr.prio = MLX5E_NIC_PRIO; + ft_attr.autogroup.num_reserved_entries = 1; + ft_attr.autogroup.max_num_groups = 1; + ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + mlx5_core_err(mdev, "fail to create psp rx ft err=%d\n", err); + goto out_err; + } + fs_prot->ft = ft; + + /* Create miss_group */ + MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, ft->max_fte - 1); + MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, ft->max_fte - 1); + miss_group = mlx5_create_flow_group(ft, flow_group_in); + if (IS_ERR(miss_group)) { + err = PTR_ERR(miss_group); + mlx5_core_err(mdev, "fail to create psp rx miss_group err=%d\n", err); + goto out_err; + } + fs_prot->miss_group = miss_group; + + /* Create miss rule */ + rule = mlx5_add_flow_rules(ft, spec, &flow_act, &fs_prot->default_dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "fail to create psp rx miss_rule err=%d\n", err); + goto out_err; + } + fs_prot->miss_rule = rule; + + /* Add default Rx psp rule */ + setup_fte_udp_psp(spec, PSP_DEFAULT_UDP_PORT); + flow_act.crypto.type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_PSP; + /* Set bit[31, 30] PSP marker */ + /* Set bit[29-23] psp_syndrome is set in error FT */ +#define MLX5E_PSP_MARKER_BIT (BIT(30) | BIT(31)) + MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); + MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_B); + MLX5_SET(set_action_in, action, data, MLX5E_PSP_MARKER_BIT); + MLX5_SET(set_action_in, action, offset, 0); + MLX5_SET(set_action_in, action, length, 32); + + modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_KERNEL, 1, action); + if (IS_ERR(modify_hdr)) { + err = PTR_ERR(modify_hdr); + mlx5_core_err(mdev, "fail to alloc psp set modify_header_id err=%d\n", err); + modify_hdr = NULL; + goto out_err; + } + + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | + MLX5_FLOW_CONTEXT_ACTION_CRYPTO_DECRYPT | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; + flow_act.modify_hdr = modify_hdr; + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = fs_prot->rx_err.ft; + rule = mlx5_add_flow_rules(fs_prot->ft, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, + "fail to add psp rule Rx decryption, err=%d, flow_act.action = %#04X\n", + err, flow_act.action); + goto out_err; + } + + fs_prot->def_rule = rule; + goto out; + +out_err: + accel_psp_fs_rx_fs_destroy(fs_prot); +out: + kvfree(flow_group_in); + kvfree(spec); + return err; +} + +static int accel_psp_fs_rx_destroy(struct mlx5e_psp_fs *fs, enum accel_fs_psp_type type) +{ + struct mlx5e_accel_fs_psp_prot *fs_prot; + struct mlx5e_accel_fs_psp *accel_psp; + + accel_psp = fs->rx_fs; + + /* The netdev unreg already happened, so all offloaded rule are already removed */ + fs_prot = &accel_psp->fs_prot[type]; + + accel_psp_fs_rx_fs_destroy(fs_prot); + + accel_psp_fs_rx_err_destroy_ft(fs, &fs_prot->rx_err); + + return 0; +} + +static int accel_psp_fs_rx_create(struct mlx5e_psp_fs *fs, enum accel_fs_psp_type type) +{ + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs->fs, false); + struct mlx5e_accel_fs_psp_prot *fs_prot; + struct mlx5e_accel_fs_psp *accel_psp; + int err; + + accel_psp = fs->rx_fs; + fs_prot = &accel_psp->fs_prot[type]; + + fs_prot->default_dest = mlx5_ttc_get_default_dest(ttc, fs_psp2tt(type)); + + err = accel_psp_fs_rx_err_create_ft(fs, fs_prot, &fs_prot->rx_err); + if (err) + return err; + + err = accel_psp_fs_rx_create_ft(fs, fs_prot); + if (err) + accel_psp_fs_rx_err_destroy_ft(fs, &fs_prot->rx_err); + + return err; +} + +static int accel_psp_fs_rx_ft_get(struct mlx5e_psp_fs *fs, enum accel_fs_psp_type type) +{ + struct mlx5e_accel_fs_psp_prot *fs_prot; + struct mlx5_flow_destination dest = {}; + struct mlx5e_accel_fs_psp *accel_psp; + struct mlx5_ttc_table *ttc; + int err = 0; + + if (!fs || !fs->rx_fs) + return -EINVAL; + + ttc = mlx5e_fs_get_ttc(fs->fs, false); + accel_psp = fs->rx_fs; + fs_prot = &accel_psp->fs_prot[type]; + mutex_lock(&fs_prot->prot_mutex); + if (fs_prot->refcnt++) + goto out; + + /* create FT */ + err = accel_psp_fs_rx_create(fs, type); + if (err) { + fs_prot->refcnt--; + goto out; + } + + /* connect */ + dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; + dest.ft = fs_prot->ft; + mlx5_ttc_fwd_dest(ttc, fs_psp2tt(type), &dest); + +out: + mutex_unlock(&fs_prot->prot_mutex); + return err; +} + +static void accel_psp_fs_rx_ft_put(struct mlx5e_psp_fs *fs, enum accel_fs_psp_type type) +{ + struct mlx5_ttc_table *ttc = mlx5e_fs_get_ttc(fs->fs, false); + struct mlx5e_accel_fs_psp_prot *fs_prot; + struct mlx5e_accel_fs_psp *accel_psp; + + accel_psp = fs->rx_fs; + fs_prot = &accel_psp->fs_prot[type]; + mutex_lock(&fs_prot->prot_mutex); + if (--fs_prot->refcnt) + goto out; + + /* disconnect */ + mlx5_ttc_fwd_default_dest(ttc, fs_psp2tt(type)); + + /* remove FT */ + accel_psp_fs_rx_destroy(fs, type); + +out: + mutex_unlock(&fs_prot->prot_mutex); +} + +static void accel_psp_fs_cleanup_rx(struct mlx5e_psp_fs *fs) +{ + struct mlx5e_accel_fs_psp_prot *fs_prot; + struct mlx5e_accel_fs_psp *accel_psp; + enum accel_fs_psp_type i; + + if (!fs->rx_fs) + return; + + accel_psp = fs->rx_fs; + for (i = 0; i < ACCEL_FS_PSP_NUM_TYPES; i++) { + fs_prot = &accel_psp->fs_prot[i]; + mutex_destroy(&fs_prot->prot_mutex); + WARN_ON(fs_prot->refcnt); + } + kfree(fs->rx_fs); + fs->rx_fs = NULL; +} + +static int accel_psp_fs_init_rx(struct mlx5e_psp_fs *fs) +{ + struct mlx5e_accel_fs_psp_prot *fs_prot; + struct mlx5e_accel_fs_psp *accel_psp; + enum accel_fs_psp_type i; + + accel_psp = kzalloc(sizeof(*accel_psp), GFP_KERNEL); + if (!accel_psp) + return -ENOMEM; + + for (i = 0; i < ACCEL_FS_PSP_NUM_TYPES; i++) { + fs_prot = &accel_psp->fs_prot[i]; + mutex_init(&fs_prot->prot_mutex); + } + + fs->rx_fs = accel_psp; + + return 0; +} + +void mlx5_accel_psp_fs_cleanup_rx_tables(struct mlx5e_priv *priv) +{ + int i; + + if (!priv->psp) + return; + + for (i = 0; i < ACCEL_FS_PSP_NUM_TYPES; i++) + accel_psp_fs_rx_ft_put(priv->psp->fs, i); +} + +int mlx5_accel_psp_fs_init_rx_tables(struct mlx5e_priv *priv) +{ + struct mlx5e_psp_fs *fs; + int err, i; + + if (!priv->psp) + return 0; + + fs = priv->psp->fs; + for (i = 0; i < ACCEL_FS_PSP_NUM_TYPES; i++) { + err = accel_psp_fs_rx_ft_get(fs, i); + if (err) + goto out_err; + } + + return 0; + +out_err: + i--; + while (i >= 0) { + accel_psp_fs_rx_ft_put(fs, i); + --i; + } + + return err; +} + +static int accel_psp_fs_tx_create_ft_table(struct mlx5e_psp_fs *fs) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_table_attr ft_attr = {}; + struct mlx5_core_dev *mdev = fs->mdev; + struct mlx5_flow_act flow_act = {}; + u32 *in, *mc, *outer_headers_c; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + struct mlx5e_psp_tx *tx_fs; + struct mlx5_flow_table *ft; + struct mlx5_flow_group *fg; + int err = 0; + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + in = kvzalloc(inlen, GFP_KERNEL); + if (!spec || !in) { + err = -ENOMEM; + goto out; + } + + ft_attr.max_fte = 1; +#define MLX5E_PSP_PRIO 0 + ft_attr.prio = MLX5E_PSP_PRIO; +#define MLX5E_PSP_LEVEL 0 + ft_attr.level = MLX5E_PSP_LEVEL; + ft_attr.autogroup.max_num_groups = 1; + + tx_fs = fs->tx_fs; + ft = mlx5_create_flow_table(tx_fs->ns, &ft_attr); + if (IS_ERR(ft)) { + err = PTR_ERR(ft); + mlx5_core_err(mdev, "PSP: fail to add psp tx flow table, err = %d\n", err); + goto out; + } + + mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + outer_headers_c = MLX5_ADDR_OF(fte_match_param, mc, outer_headers); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol); + MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, outer_headers_c, udp_dport); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + fg = mlx5_create_flow_group(ft, in); + if (IS_ERR(fg)) { + err = PTR_ERR(fg); + mlx5_core_err(mdev, "PSP: fail to add psp tx flow group, err = %d\n", err); + goto err_create_fg; + } + + setup_fte_udp_psp(spec, PSP_DEFAULT_UDP_PORT); + flow_act.crypto.type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_PSP; + flow_act.flags |= FLOW_ACT_NO_APPEND; + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW | + MLX5_FLOW_CONTEXT_ACTION_CRYPTO_ENCRYPT; + rule = mlx5_add_flow_rules(ft, spec, &flow_act, NULL, 0); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(mdev, "PSP: fail to add psp tx flow rule, err = %d\n", err); + goto err_add_flow_rule; + } + + tx_fs->ft = ft; + tx_fs->fg = fg; + tx_fs->rule = rule; + goto out; + +err_add_flow_rule: + mlx5_destroy_flow_group(fg); +err_create_fg: + mlx5_destroy_flow_table(ft); +out: + kvfree(in); + kvfree(spec); + return err; +} + +static void accel_psp_fs_tx_destroy(struct mlx5e_psp_tx *tx_fs) +{ + if (!tx_fs->ft) + return; + + mlx5_del_flow_rules(tx_fs->rule); + mlx5_destroy_flow_group(tx_fs->fg); + mlx5_destroy_flow_table(tx_fs->ft); +} + +static int accel_psp_fs_tx_ft_get(struct mlx5e_psp_fs *fs) +{ + struct mlx5e_psp_tx *tx_fs = fs->tx_fs; + int err = 0; + + mutex_lock(&tx_fs->mutex); + if (tx_fs->refcnt++) + goto out; + + err = accel_psp_fs_tx_create_ft_table(fs); + if (err) + tx_fs->refcnt--; +out: + mutex_unlock(&tx_fs->mutex); + return err; +} + +static void accel_psp_fs_tx_ft_put(struct mlx5e_psp_fs *fs) +{ + struct mlx5e_psp_tx *tx_fs = fs->tx_fs; + + mutex_lock(&tx_fs->mutex); + if (--tx_fs->refcnt) + goto out; + + accel_psp_fs_tx_destroy(tx_fs); +out: + mutex_unlock(&tx_fs->mutex); +} + +static void accel_psp_fs_cleanup_tx(struct mlx5e_psp_fs *fs) +{ + struct mlx5e_psp_tx *tx_fs = fs->tx_fs; + + if (!tx_fs) + return; + + mutex_destroy(&tx_fs->mutex); + WARN_ON(tx_fs->refcnt); + kfree(tx_fs); + fs->tx_fs = NULL; +} + +static int accel_psp_fs_init_tx(struct mlx5e_psp_fs *fs) +{ + struct mlx5_flow_namespace *ns; + struct mlx5e_psp_tx *tx_fs; + + ns = mlx5_get_flow_namespace(fs->mdev, MLX5_FLOW_NAMESPACE_EGRESS_IPSEC); + if (!ns) + return -EOPNOTSUPP; + + tx_fs = kzalloc(sizeof(*tx_fs), GFP_KERNEL); + if (!tx_fs) + return -ENOMEM; + + mutex_init(&tx_fs->mutex); + tx_fs->ns = ns; + fs->tx_fs = tx_fs; + return 0; +} + +void mlx5_accel_psp_fs_cleanup_tx_tables(struct mlx5e_priv *priv) +{ + if (!priv->psp) + return; + + accel_psp_fs_tx_ft_put(priv->psp->fs); +} + +int mlx5_accel_psp_fs_init_tx_tables(struct mlx5e_priv *priv) +{ + if (!priv->psp) + return 0; + + return accel_psp_fs_tx_ft_get(priv->psp->fs); +} + +static void mlx5e_accel_psp_fs_cleanup(struct mlx5e_psp_fs *fs) +{ + accel_psp_fs_cleanup_rx(fs); + accel_psp_fs_cleanup_tx(fs); + kfree(fs); +} + +static struct mlx5e_psp_fs *mlx5e_accel_psp_fs_init(struct mlx5e_priv *priv) +{ + struct mlx5e_psp_fs *fs; + int err = 0; + + fs = kzalloc(sizeof(*fs), GFP_KERNEL); + if (!fs) + return ERR_PTR(-ENOMEM); + + fs->mdev = priv->mdev; + err = accel_psp_fs_init_tx(fs); + if (err) + goto err_tx; + + fs->fs = priv->fs; + err = accel_psp_fs_init_rx(fs); + if (err) + goto err_rx; + + return fs; + +err_rx: + accel_psp_fs_cleanup_tx(fs); +err_tx: + kfree(fs); + return ERR_PTR(err); +} + +static int +mlx5e_psp_set_config(struct psp_dev *psd, struct psp_dev_config *conf, + struct netlink_ext_ack *extack) +{ + return 0; /* TODO: this should actually do things to the device */ +} + +static int +mlx5e_psp_generate_key_spi(struct mlx5_core_dev *mdev, + enum mlx5_psp_gen_spi_in_key_size keysz, + unsigned int keysz_bytes, + struct psp_key_parsed *key) +{ + u32 out[MLX5_ST_SZ_DW(psp_gen_spi_out) + MLX5_ST_SZ_DW(key_spi)] = {}; + u32 in[MLX5_ST_SZ_DW(psp_gen_spi_in)] = {}; + void *outkey; + int err; + + WARN_ON_ONCE(keysz_bytes > PSP_MAX_KEY); + + MLX5_SET(psp_gen_spi_in, in, opcode, MLX5_CMD_OP_PSP_GEN_SPI); + MLX5_SET(psp_gen_spi_in, in, key_size, keysz); + MLX5_SET(psp_gen_spi_in, in, num_of_spi, 1); + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); + if (err) + return err; + + outkey = MLX5_ADDR_OF(psp_gen_spi_out, out, key_spi); + key->spi = cpu_to_be32(MLX5_GET(key_spi, outkey, spi)); + memcpy(key->key, MLX5_ADDR_OF(key_spi, outkey, key) + 32 - keysz_bytes, + keysz_bytes); + + return 0; +} + +static int +mlx5e_psp_rx_spi_alloc(struct psp_dev *psd, u32 version, + struct psp_key_parsed *assoc, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = netdev_priv(psd->main_netdev); + enum mlx5_psp_gen_spi_in_key_size keysz; + u8 keysz_bytes; + + switch (version) { + case PSP_VERSION_HDR0_AES_GCM_128: + keysz = MLX5_PSP_GEN_SPI_IN_KEY_SIZE_128; + keysz_bytes = 16; + break; + case PSP_VERSION_HDR0_AES_GCM_256: + keysz = MLX5_PSP_GEN_SPI_IN_KEY_SIZE_256; + keysz_bytes = 32; + break; + default: + return -EINVAL; + } + + return mlx5e_psp_generate_key_spi(priv->mdev, keysz, keysz_bytes, assoc); +} + +struct psp_key { + u32 id; +}; + +static int mlx5e_psp_assoc_add(struct psp_dev *psd, struct psp_assoc *pas, + struct netlink_ext_ack *extack) +{ + struct mlx5e_priv *priv = netdev_priv(psd->main_netdev); + struct mlx5_core_dev *mdev = priv->mdev; + struct psp_key_parsed *tx = &pas->tx; + struct mlx5e_psp *psp = priv->psp; + struct psp_key *nkey; + int err; + + mdev = priv->mdev; + nkey = (struct psp_key *)pas->drv_data; + + err = mlx5_create_encryption_key(mdev, tx->key, + psp_key_size(pas->version), + MLX5_ACCEL_OBJ_PSP_KEY, + &nkey->id); + if (err) { + mlx5_core_err(mdev, "Failed to create encryption key (err = %d)\n", err); + return err; + } + + atomic_inc(&psp->tx_key_cnt); + return 0; +} + +static void mlx5e_psp_assoc_del(struct psp_dev *psd, struct psp_assoc *pas) +{ + struct mlx5e_priv *priv = netdev_priv(psd->main_netdev); + struct mlx5e_psp *psp = priv->psp; + struct psp_key *nkey; + + nkey = (struct psp_key *)pas->drv_data; + mlx5_destroy_encryption_key(priv->mdev, nkey->id); + atomic_dec(&psp->tx_key_cnt); +} + +static int mlx5e_psp_rotate_key(struct mlx5_core_dev *mdev) +{ + u32 in[MLX5_ST_SZ_DW(psp_rotate_key_in)] = {}; + u32 out[MLX5_ST_SZ_DW(psp_rotate_key_out)]; + + MLX5_SET(psp_rotate_key_in, in, opcode, + MLX5_CMD_OP_PSP_ROTATE_KEY); + + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); +} + +static int +mlx5e_psp_key_rotate(struct psp_dev *psd, struct netlink_ext_ack *exack) +{ + struct mlx5e_priv *priv = netdev_priv(psd->main_netdev); + + /* no support for protecting against external rotations */ + psd->generation = 0; + + return mlx5e_psp_rotate_key(priv->mdev); +} + +static struct psp_dev_ops mlx5_psp_ops = { + .set_config = mlx5e_psp_set_config, + .rx_spi_alloc = mlx5e_psp_rx_spi_alloc, + .tx_key_add = mlx5e_psp_assoc_add, + .tx_key_del = mlx5e_psp_assoc_del, + .key_rotate = mlx5e_psp_key_rotate, +}; + +void mlx5e_psp_unregister(struct mlx5e_priv *priv) +{ + if (!priv->psp || !priv->psp->psp) + return; + + psp_dev_unregister(priv->psp->psp); +} + +void mlx5e_psp_register(struct mlx5e_priv *priv) +{ + /* FW Caps missing */ + if (!priv->psp) + return; + + priv->psp->caps.assoc_drv_spc = sizeof(u32); + priv->psp->caps.versions = 1 << PSP_VERSION_HDR0_AES_GCM_128; + if (MLX5_CAP_PSP(priv->mdev, psp_crypto_esp_aes_gcm_256_encrypt) && + MLX5_CAP_PSP(priv->mdev, psp_crypto_esp_aes_gcm_256_decrypt)) + priv->psp->caps.versions |= 1 << PSP_VERSION_HDR0_AES_GCM_256; + + priv->psp->psp = psp_dev_create(priv->netdev, &mlx5_psp_ops, + &priv->psp->caps, NULL); + if (IS_ERR(priv->psp->psp)) + mlx5_core_err(priv->mdev, "PSP failed to register due to %pe\n", + priv->psp->psp); +} + +int mlx5e_psp_init(struct mlx5e_priv *priv) +{ + struct mlx5_core_dev *mdev = priv->mdev; + struct mlx5e_psp_fs *fs; + struct mlx5e_psp *psp; + int err; + + if (!mlx5_is_psp_device(mdev)) { + mlx5_core_dbg(mdev, "PSP offload not supported\n"); + return -EOPNOTSUPP; + } + + if (!MLX5_CAP_ETH(mdev, swp)) { + mlx5_core_dbg(mdev, "SWP not supported\n"); + return -EOPNOTSUPP; + } + + if (!MLX5_CAP_ETH(mdev, swp_csum)) { + mlx5_core_dbg(mdev, "SWP checksum not supported\n"); + return -EOPNOTSUPP; + } + + if (!MLX5_CAP_ETH(mdev, swp_csum_l4_partial)) { + mlx5_core_dbg(mdev, "SWP L4 partial checksum not supported\n"); + return -EOPNOTSUPP; + } + + if (!MLX5_CAP_ETH(mdev, swp_lso)) { + mlx5_core_dbg(mdev, "PSP LSO not supported\n"); + return -EOPNOTSUPP; + } + + psp = kzalloc(sizeof(*psp), GFP_KERNEL); + if (!psp) + return -ENOMEM; + + priv->psp = psp; + fs = mlx5e_accel_psp_fs_init(priv); + if (IS_ERR(fs)) { + err = PTR_ERR(fs); + goto out_err; + } + + psp->fs = fs; + + mlx5_core_dbg(priv->mdev, "PSP attached to netdevice\n"); + return 0; + +out_err: + priv->psp = NULL; + kfree(psp); + return err; +} + +void mlx5e_psp_cleanup(struct mlx5e_priv *priv) +{ + struct mlx5e_psp *psp = priv->psp; + + if (!psp) + return; + + WARN_ON(atomic_read(&psp->tx_key_cnt)); + mlx5e_accel_psp_fs_cleanup(psp->fs); + priv->psp = NULL; + kfree(psp); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.h new file mode 100644 index 000000000000..42bb671fb2cb --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp.h @@ -0,0 +1,61 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5E_ACCEL_PSP_H__ +#define __MLX5E_ACCEL_PSP_H__ +#if IS_ENABLED(CONFIG_MLX5_EN_PSP) +#include <net/psp/types.h> +#include "en.h" + +struct mlx5e_psp { + struct psp_dev *psp; + struct psp_dev_caps caps; + struct mlx5e_psp_fs *fs; + atomic_t tx_key_cnt; +}; + +static inline bool mlx5_is_psp_device(struct mlx5_core_dev *mdev) +{ + if (!MLX5_CAP_GEN(mdev, psp)) + return false; + + if (!MLX5_CAP_PSP(mdev, psp_crypto_offload) || + !MLX5_CAP_PSP(mdev, psp_crypto_esp_aes_gcm_128_encrypt) || + !MLX5_CAP_PSP(mdev, psp_crypto_esp_aes_gcm_128_decrypt)) + return false; + + return true; +} + +int mlx5_accel_psp_fs_init_rx_tables(struct mlx5e_priv *priv); +void mlx5_accel_psp_fs_cleanup_rx_tables(struct mlx5e_priv *priv); +int mlx5_accel_psp_fs_init_tx_tables(struct mlx5e_priv *priv); +void mlx5_accel_psp_fs_cleanup_tx_tables(struct mlx5e_priv *priv); +void mlx5e_psp_register(struct mlx5e_priv *priv); +void mlx5e_psp_unregister(struct mlx5e_priv *priv); +int mlx5e_psp_init(struct mlx5e_priv *priv); +void mlx5e_psp_cleanup(struct mlx5e_priv *priv); +#else +static inline int mlx5_accel_psp_fs_init_rx_tables(struct mlx5e_priv *priv) +{ + return 0; +} + +static inline void mlx5_accel_psp_fs_cleanup_rx_tables(struct mlx5e_priv *priv) { } +static inline int mlx5_accel_psp_fs_init_tx_tables(struct mlx5e_priv *priv) +{ + return 0; +} + +static inline void mlx5_accel_psp_fs_cleanup_tx_tables(struct mlx5e_priv *priv) { } +static inline bool mlx5_is_psp_device(struct mlx5_core_dev *mdev) +{ + return false; +} + +static inline void mlx5e_psp_register(struct mlx5e_priv *priv) { } +static inline void mlx5e_psp_unregister(struct mlx5e_priv *priv) { } +static inline int mlx5e_psp_init(struct mlx5e_priv *priv) { return 0; } +static inline void mlx5e_psp_cleanup(struct mlx5e_priv *priv) { } +#endif /* CONFIG_MLX5_EN_PSP */ +#endif /* __MLX5E_ACCEL_PSP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp_rxtx.c new file mode 100644 index 000000000000..828bff1137af --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp_rxtx.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <linux/udp.h> +#include <net/protocol.h> +#include <net/udp.h> +#include <net/ip6_checksum.h> +#include <net/psp/types.h> + +#include "en.h" +#include "psp.h" +#include "en_accel/psp_rxtx.h" +#include "en_accel/psp.h" + +enum { + MLX5E_PSP_OFFLOAD_RX_SYNDROME_DECRYPTED, + MLX5E_PSP_OFFLOAD_RX_SYNDROME_AUTH_FAILED, + MLX5E_PSP_OFFLOAD_RX_SYNDROME_BAD_TRAILER, +}; + +static void mlx5e_psp_set_swp(struct sk_buff *skb, + struct mlx5e_accel_tx_psp_state *psp_st, + struct mlx5_wqe_eth_seg *eseg) +{ + /* Tunnel Mode: + * SWP: OutL3 InL3 InL4 + * Pkt: MAC IP ESP IP L4 + * + * Transport Mode: + * SWP: OutL3 OutL4 + * Pkt: MAC IP ESP L4 + * + * Tunnel(VXLAN TCP/UDP) over Transport Mode + * SWP: OutL3 InL3 InL4 + * Pkt: MAC IP ESP UDP VXLAN IP L4 + */ + u8 inner_ipproto = 0; + struct ethhdr *eth; + + /* Shared settings */ + eseg->swp_outer_l3_offset = skb_network_offset(skb) / 2; + if (skb->protocol == htons(ETH_P_IPV6)) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_OUTER_L3_IPV6; + + if (skb->inner_protocol_type == ENCAP_TYPE_IPPROTO) { + inner_ipproto = skb->inner_ipproto; + /* Set SWP additional flags for packet of type IP|UDP|PSP|[ TCP | UDP ] */ + switch (inner_ipproto) { + case IPPROTO_UDP: + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP; + fallthrough; + case IPPROTO_TCP: + eseg->swp_inner_l4_offset = skb_inner_transport_offset(skb) / 2; + break; + default: + break; + } + } else { + /* IP in IP tunneling like vxlan*/ + if (skb->inner_protocol_type != ENCAP_TYPE_ETHER) + return; + + eth = (struct ethhdr *)skb_inner_mac_header(skb); + switch (ntohs(eth->h_proto)) { + case ETH_P_IP: + inner_ipproto = ((struct iphdr *)((char *)skb->data + + skb_inner_network_offset(skb)))->protocol; + break; + case ETH_P_IPV6: + inner_ipproto = ((struct ipv6hdr *)((char *)skb->data + + skb_inner_network_offset(skb)))->nexthdr; + break; + default: + break; + } + + /* Tunnel(VXLAN TCP/UDP) over Transport Mode PSP i.e. PSP payload is vxlan tunnel */ + switch (inner_ipproto) { + case IPPROTO_UDP: + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L4_UDP; + fallthrough; + case IPPROTO_TCP: + eseg->swp_inner_l3_offset = skb_inner_network_offset(skb) / 2; + eseg->swp_inner_l4_offset = + (skb->csum_start + skb->head - skb->data) / 2; + if (skb->protocol == htons(ETH_P_IPV6)) + eseg->swp_flags |= MLX5_ETH_WQE_SWP_INNER_L3_IPV6; + break; + default: + break; + } + + psp_st->inner_ipproto = inner_ipproto; + } +} + +static bool mlx5e_psp_set_state(struct mlx5e_priv *priv, + struct sk_buff *skb, + struct mlx5e_accel_tx_psp_state *psp_st) +{ + struct psp_assoc *pas; + bool ret = false; + + rcu_read_lock(); + pas = psp_skb_get_assoc_rcu(skb); + if (!pas) + goto out; + + ret = true; + psp_st->tailen = PSP_TRL_SIZE; + psp_st->spi = pas->tx.spi; + psp_st->ver = pas->version; + psp_st->keyid = *(u32 *)pas->drv_data; + +out: + rcu_read_unlock(); + return ret; +} + +bool mlx5e_psp_offload_handle_rx_skb(struct net_device *netdev, struct sk_buff *skb, + struct mlx5_cqe64 *cqe) +{ + u32 psp_meta_data = be32_to_cpu(cqe->ft_metadata); + struct mlx5e_priv *priv = netdev_priv(netdev); + u16 dev_id = priv->psp->psp->id; + bool strip_icv = true; + u8 generation = 0; + + /* TBD: report errors as SW counters to ethtool, any further handling ? */ + if (MLX5_PSP_METADATA_SYNDROME(psp_meta_data) != MLX5E_PSP_OFFLOAD_RX_SYNDROME_DECRYPTED) + goto drop; + + if (psp_dev_rcv(skb, dev_id, generation, strip_icv)) + goto drop; + + skb->decrypted = 1; + return false; + +drop: + kfree_skb(skb); + return true; +} + +void mlx5e_psp_tx_build_eseg(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5e_accel_tx_psp_state *psp_st, + struct mlx5_wqe_eth_seg *eseg) +{ + if (!mlx5_is_psp_device(priv->mdev)) + return; + + if (unlikely(skb->protocol != htons(ETH_P_IP) && + skb->protocol != htons(ETH_P_IPV6))) + return; + + mlx5e_psp_set_swp(skb, psp_st, eseg); + /* Special WA for PSP LSO in ConnectX7 */ + eseg->swp_outer_l3_offset = 0; + eseg->swp_inner_l3_offset = 0; + + eseg->flow_table_metadata |= cpu_to_be32(psp_st->keyid); + eseg->trailer |= cpu_to_be32(MLX5_ETH_WQE_INSERT_TRAILER) | + cpu_to_be32(MLX5_ETH_WQE_TRAILER_HDR_OUTER_L4_ASSOC); +} + +void mlx5e_psp_handle_tx_wqe(struct mlx5e_tx_wqe *wqe, + struct mlx5e_accel_tx_psp_state *psp_st, + struct mlx5_wqe_inline_seg *inlseg) +{ + inlseg->byte_count = cpu_to_be32(psp_st->tailen | MLX5_INLINE_SEG); +} + +bool mlx5e_psp_handle_tx_skb(struct net_device *netdev, + struct sk_buff *skb, + struct mlx5e_accel_tx_psp_state *psp_st) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct net *net = sock_net(skb->sk); + const struct ipv6hdr *ip6; + struct tcphdr *th; + + if (!mlx5e_psp_set_state(priv, skb, psp_st)) + return true; + + /* psp_encap of the packet */ + if (!psp_dev_encapsulate(net, skb, psp_st->spi, psp_st->ver, 0)) { + kfree_skb_reason(skb, SKB_DROP_REASON_PSP_OUTPUT); + return false; + } + if (skb_is_gso(skb)) { + ip6 = ipv6_hdr(skb); + th = inner_tcp_hdr(skb); + + th->check = ~tcp_v6_check(skb_shinfo(skb)->gso_size + inner_tcp_hdrlen(skb), &ip6->saddr, + &ip6->daddr, 0); + } + + return true; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp_rxtx.h new file mode 100644 index 000000000000..70289c921bd6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/psp_rxtx.h @@ -0,0 +1,121 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5E_PSP_RXTX_H__ +#define __MLX5E_PSP_RXTX_H__ + +#include <linux/skbuff.h> +#include <net/xfrm.h> +#include <net/psp.h> +#include "en.h" +#include "en/txrx.h" + +/* Bit30: PSP marker, Bit29-23: PSP syndrome, Bit22-0: PSP obj id */ +#define MLX5_PSP_METADATA_MARKER(metadata) ((((metadata) >> 30) & 0x3) == 0x3) +#define MLX5_PSP_METADATA_SYNDROME(metadata) (((metadata) >> 23) & GENMASK(6, 0)) +#define MLX5_PSP_METADATA_HANDLE(metadata) ((metadata) & GENMASK(22, 0)) + +struct mlx5e_accel_tx_psp_state { + u32 tailen; + u32 keyid; + __be32 spi; + u8 inner_ipproto; + u8 ver; +}; + +#ifdef CONFIG_MLX5_EN_PSP +static inline bool mlx5e_psp_is_offload_state(struct mlx5e_accel_tx_psp_state *psp_state) +{ + return (psp_state->tailen != 0); +} + +static inline bool mlx5e_psp_is_offload(struct sk_buff *skb, struct net_device *netdev) +{ + bool ret; + + rcu_read_lock(); + ret = !!psp_skb_get_assoc_rcu(skb); + rcu_read_unlock(); + return ret; +} + +bool mlx5e_psp_handle_tx_skb(struct net_device *netdev, + struct sk_buff *skb, + struct mlx5e_accel_tx_psp_state *psp_st); + +void mlx5e_psp_tx_build_eseg(struct mlx5e_priv *priv, struct sk_buff *skb, + struct mlx5e_accel_tx_psp_state *psp_st, + struct mlx5_wqe_eth_seg *eseg); + +void mlx5e_psp_handle_tx_wqe(struct mlx5e_tx_wqe *wqe, + struct mlx5e_accel_tx_psp_state *psp_st, + struct mlx5_wqe_inline_seg *inlseg); + +static inline bool mlx5e_psp_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5e_accel_tx_psp_state *psp_st, + struct mlx5_wqe_eth_seg *eseg) +{ + u8 inner_ipproto; + + if (!mlx5e_psp_is_offload_state(psp_st)) + return false; + + inner_ipproto = psp_st->inner_ipproto; + eseg->cs_flags = MLX5_ETH_WQE_L3_CSUM; + if (inner_ipproto) { + eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM; + if (inner_ipproto == IPPROTO_TCP || inner_ipproto == IPPROTO_UDP) + eseg->cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM; + if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) + sq->stats->csum_partial_inner++; + } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { + eseg->cs_flags |= MLX5_ETH_WQE_L4_INNER_CSUM; + sq->stats->csum_partial_inner++; + } + + return true; +} + +static inline unsigned int mlx5e_psp_tx_ids_len(struct mlx5e_accel_tx_psp_state *psp_st) +{ + return psp_st->tailen; +} + +static inline bool mlx5e_psp_is_rx_flow(struct mlx5_cqe64 *cqe) +{ + return MLX5_PSP_METADATA_MARKER(be32_to_cpu(cqe->ft_metadata)); +} + +bool mlx5e_psp_offload_handle_rx_skb(struct net_device *netdev, struct sk_buff *skb, + struct mlx5_cqe64 *cqe); +#else +static inline bool mlx5e_psp_is_offload_state(struct mlx5e_accel_tx_psp_state *psp_state) +{ + return false; +} + +static inline bool mlx5e_psp_is_offload(struct sk_buff *skb, struct net_device *netdev) +{ + return false; +} + +static inline bool mlx5e_psp_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, + struct mlx5e_accel_tx_psp_state *psp_st, + struct mlx5_wqe_eth_seg *eseg) +{ + return false; +} + +static inline bool mlx5e_psp_is_rx_flow(struct mlx5_cqe64 *cqe) +{ + return false; +} + +static inline bool mlx5e_psp_offload_handle_rx_skb(struct net_device *netdev, + struct sk_buff *skb, + struct mlx5_cqe64 *cqe) +{ + return false; +} +#endif /* CONFIG_MLX5_EN_PSP */ +#endif /* __MLX5E_PSP_RXTX_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index 6ed3a32b7e22..30424ccad584 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -30,6 +30,7 @@ * SOFTWARE. */ +#include "devlink.h" #include "en.h" #include "lib/crypto.h" @@ -140,9 +141,22 @@ err_close_tises: return err; } +static unsigned int +mlx5e_get_devlink_param_num_doorbells(struct mlx5_core_dev *dev) +{ + const u32 param_id = DEVLINK_PARAM_GENERIC_ID_NUM_DOORBELLS; + struct devlink *devlink = priv_to_devlink(dev); + union devlink_param_value val; + int err; + + err = devl_param_driverinit_value_get(devlink, param_id, &val); + return err ? MLX5_DEFAULT_NUM_DOORBELLS : val.vu32; +} + int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev, bool create_tises) { struct mlx5e_hw_objs *res = &mdev->mlx5e_res.hw_objs; + unsigned int num_doorbells, i; int err; err = mlx5_core_alloc_pd(mdev, &res->pdn); @@ -163,17 +177,30 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev, bool create_tises) goto err_dealloc_transport_domain; } - err = mlx5_alloc_bfreg(mdev, &res->bfreg, false, false); - if (err) { - mlx5_core_err(mdev, "alloc bfreg failed, %d\n", err); + num_doorbells = min(mlx5e_get_devlink_param_num_doorbells(mdev), + mlx5e_get_max_num_channels(mdev)); + res->bfregs = kcalloc(num_doorbells, sizeof(*res->bfregs), GFP_KERNEL); + if (!res->bfregs) { + err = -ENOMEM; goto err_destroy_mkey; } + for (i = 0; i < num_doorbells; i++) { + err = mlx5_alloc_bfreg(mdev, res->bfregs + i, false, false); + if (err) { + mlx5_core_warn(mdev, + "could only allocate %d/%d doorbells, err %d.\n", + i, num_doorbells, err); + break; + } + } + res->num_bfregs = i; + if (create_tises) { err = mlx5e_create_tises(mdev, res->tisn); if (err) { mlx5_core_err(mdev, "alloc tises failed, %d\n", err); - goto err_destroy_bfreg; + goto err_destroy_bfregs; } res->tisn_valid = true; } @@ -183,15 +210,17 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev, bool create_tises) mdev->mlx5e_res.dek_priv = mlx5_crypto_dek_init(mdev); if (IS_ERR(mdev->mlx5e_res.dek_priv)) { - mlx5_core_err(mdev, "crypto dek init failed, %ld\n", - PTR_ERR(mdev->mlx5e_res.dek_priv)); + mlx5_core_err(mdev, "crypto dek init failed, %pe\n", + mdev->mlx5e_res.dek_priv); mdev->mlx5e_res.dek_priv = NULL; } return 0; -err_destroy_bfreg: - mlx5_free_bfreg(mdev, &res->bfreg); +err_destroy_bfregs: + for (i = 0; i < res->num_bfregs; i++) + mlx5_free_bfreg(mdev, res->bfregs + i); + kfree(res->bfregs); err_destroy_mkey: mlx5_core_destroy_mkey(mdev, res->mkey); err_dealloc_transport_domain: @@ -209,7 +238,9 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev) mdev->mlx5e_res.dek_priv = NULL; if (res->tisn_valid) mlx5e_destroy_tises(mdev, res->tisn); - mlx5_free_bfreg(mdev, &res->bfreg); + for (unsigned int i = 0; i < res->num_bfregs; i++) + mlx5_free_bfreg(mdev, res->bfregs + i); + kfree(res->bfregs); mlx5_core_destroy_mkey(mdev, res->mkey); mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); mlx5_core_dealloc_pd(mdev, res->pdn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index d507366d773e..53e5ae252eac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -1494,7 +1494,8 @@ static int mlx5e_get_rxfh(struct net_device *netdev, struct ethtool_rxfh_param * } static int mlx5e_rxfh_hfunc_check(struct mlx5e_priv *priv, - const struct ethtool_rxfh_param *rxfh) + const struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack) { unsigned int count; @@ -1504,8 +1505,10 @@ static int mlx5e_rxfh_hfunc_check(struct mlx5e_priv *priv, unsigned int xor8_max_channels = mlx5e_rqt_max_num_channels_allowed_for_xor8(); if (count > xor8_max_channels) { - netdev_err(priv->netdev, "%s: Cannot set RSS hash function to XOR, current number of channels (%d) exceeds the maximum allowed for XOR8 RSS hfunc (%d)\n", - __func__, count, xor8_max_channels); + NL_SET_ERR_MSG_FMT_MOD( + extack, + "Number of channels (%u) exceeds the max for XOR8 RSS (%u)", + count, xor8_max_channels); return -EINVAL; } } @@ -1524,7 +1527,7 @@ static int mlx5e_set_rxfh(struct net_device *dev, mutex_lock(&priv->state_lock); - err = mlx5e_rxfh_hfunc_check(priv, rxfh); + err = mlx5e_rxfh_hfunc_check(priv, rxfh, extack); if (err) goto unlock; @@ -1550,7 +1553,7 @@ static int mlx5e_create_rxfh_context(struct net_device *dev, mutex_lock(&priv->state_lock); - err = mlx5e_rxfh_hfunc_check(priv, rxfh); + err = mlx5e_rxfh_hfunc_check(priv, rxfh, extack); if (err) goto unlock; @@ -1590,7 +1593,7 @@ static int mlx5e_modify_rxfh_context(struct net_device *dev, mutex_lock(&priv->state_lock); - err = mlx5e_rxfh_hfunc_check(priv, rxfh); + err = mlx5e_rxfh_hfunc_check(priv, rxfh, extack); if (err) goto unlock; @@ -1927,11 +1930,12 @@ static int mlx5e_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) } static void mlx5e_get_fec_stats(struct net_device *netdev, - struct ethtool_fec_stats *fec_stats) + struct ethtool_fec_stats *fec_stats, + struct ethtool_fec_hist *hist) { struct mlx5e_priv *priv = netdev_priv(netdev); - mlx5e_stats_fec_get(priv, fec_stats); + mlx5e_stats_fec_get(priv, fec_stats, hist); } static int mlx5e_get_fecparam(struct net_device *netdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 265c4ca85f7d..8928d2dcd43f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -905,6 +905,9 @@ static void mlx5e_set_inner_ttc_params(struct mlx5e_flow_steering *fs, ft_attr->prio = MLX5E_NIC_PRIO; for (tt = 0; tt < MLX5_NUM_TT; tt++) { + if (mlx5_ttc_is_decrypted_esp_tt(tt)) + continue; + ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR; ttc_params->dests[tt].tir_num = tt == MLX5_TT_ANY ? @@ -914,9 +917,17 @@ static void mlx5e_set_inner_ttc_params(struct mlx5e_flow_steering *fs, } } +static bool mlx5e_ipsec_rss_supported(struct mlx5_core_dev *mdev) +{ + return MLX5_CAP_NIC_RX_FT_FIELD_SUPPORT_2(mdev, ipsec_next_header) && + MLX5_CAP_NIC_RX_FT_FIELD_SUPPORT_2(mdev, outer_l4_type_ext) && + MLX5_CAP_NIC_RX_FT_FIELD_SUPPORT_2(mdev, inner_l4_type_ext); +} + void mlx5e_set_ttc_params(struct mlx5e_flow_steering *fs, struct mlx5e_rx_res *rx_res, - struct ttc_params *ttc_params, bool tunnel) + struct ttc_params *ttc_params, bool tunnel, + bool ipsec_rss) { struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr; @@ -927,7 +938,13 @@ void mlx5e_set_ttc_params(struct mlx5e_flow_steering *fs, ft_attr->level = MLX5E_TTC_FT_LEVEL; ft_attr->prio = MLX5E_NIC_PRIO; + ttc_params->ipsec_rss = ipsec_rss && + mlx5e_ipsec_rss_supported(fs->mdev); + for (tt = 0; tt < MLX5_NUM_TT; tt++) { + if (mlx5_ttc_is_decrypted_esp_tt(tt)) + continue; + ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR; ttc_params->dests[tt].tir_num = tt == MLX5_TT_ANY ? @@ -1293,7 +1310,7 @@ int mlx5e_create_ttc_table(struct mlx5e_flow_steering *fs, { struct ttc_params ttc_params = {}; - mlx5e_set_ttc_params(fs, rx_res, &ttc_params, true); + mlx5e_set_ttc_params(fs, rx_res, &ttc_params, true, true); fs->ttc = mlx5_create_ttc_table(fs->mdev, &ttc_params); return PTR_ERR_OR_ZERO(fs->ttc); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 21bb88c5d3dc..a56825921c23 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -52,6 +52,7 @@ #include "en_tc.h" #include "en_rep.h" #include "en_accel/ipsec.h" +#include "en_accel/psp.h" #include "en_accel/macsec.h" #include "en_accel/en_accel.h" #include "en_accel/ktls.h" @@ -232,9 +233,13 @@ static int mlx5e_devcom_event_mpv(int event, void *my_data, void *event_data) static int mlx5e_devcom_init_mpv(struct mlx5e_priv *priv, u64 *data) { + struct mlx5_devcom_match_attr attr = { + .key.val = *data, + }; + priv->devcom = mlx5_devcom_register_component(priv->mdev->priv.devc, MLX5_DEVCOM_MPV, - *data, + &attr, mlx5e_devcom_event_mpv, priv); if (IS_ERR(priv->devcom)) @@ -777,13 +782,6 @@ static void mlx5e_rq_shampo_hd_info_free(struct mlx5e_rq *rq) bitmap_free(rq->mpwqe.shampo->bitmap); } -static bool mlx5_rq_needs_separate_hd_pool(struct mlx5e_rq *rq) -{ - struct netdev_rx_queue *rxq = __netif_get_rx_queue(rq->netdev, rq->ix); - - return !!rxq->mp_params.mp_ops; -} - static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev, struct mlx5e_params *params, struct mlx5e_rq_param *rqp, @@ -822,7 +820,7 @@ static int mlx5_rq_shampo_alloc(struct mlx5_core_dev *mdev, hd_pool_size = (rq->mpwqe.shampo->hd_per_wqe * wq_size) / MLX5E_SHAMPO_WQ_HEADER_PER_PAGE; - if (mlx5_rq_needs_separate_hd_pool(rq)) { + if (netif_rxq_has_unreadable_mp(rq->netdev, rq->ix)) { /* Separate page pool for shampo headers */ struct page_pool_params pp_params = { }; @@ -1536,7 +1534,7 @@ static int mlx5e_alloc_xdpsq(struct mlx5e_channel *c, sq->pdev = c->pdev; sq->mkey_be = c->mkey_be; sq->channel = c; - sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; + sq->uar_map = c->bfreg->map; sq->min_inline_mode = params->tx_min_inline_mode; sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu) - ETH_FCS_LEN; sq->xsk_pool = xsk_pool; @@ -1621,7 +1619,7 @@ static int mlx5e_alloc_icosq(struct mlx5e_channel *c, int err; sq->channel = c; - sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; + sq->uar_map = c->bfreg->map; sq->reserved_room = param->stop_room; param->wq.db_numa_node = cpu_to_node(c->cpu); @@ -1706,7 +1704,7 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c, sq->priv = c->priv; sq->ch_ix = c->ix; sq->txq_ix = txq_ix; - sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; + sq->uar_map = c->bfreg->map; sq->min_inline_mode = params->tx_min_inline_mode; sq->hw_mtu = MLX5E_SW2HW_MTU(params, params->sw_mtu); sq->max_sq_mpw_wqebbs = mlx5e_get_max_sq_aligned_wqebbs(mdev); @@ -1782,7 +1780,7 @@ static int mlx5e_create_sq(struct mlx5_core_dev *mdev, MLX5_SET(sqc, sqc, flush_in_error_en, 1); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); - MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.hw_objs.bfreg.index); + MLX5_SET(wq, wq, uar_page, csp->uar_page); MLX5_SET(wq, wq, log_wq_pg_sz, csp->wq_ctrl->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(wq, wq, dbr_addr, csp->wq_ctrl->db.dma); @@ -1886,6 +1884,7 @@ int mlx5e_open_txqsq(struct mlx5e_channel *c, u32 tisn, int txq_ix, csp.cqn = sq->cq.mcq.cqn; csp.wq_ctrl = &sq->wq_ctrl; csp.min_inline_mode = sq->min_inline_mode; + csp.uar_page = c->bfreg->index; err = mlx5e_create_sq_rdy(c->mdev, param, &csp, qos_queue_group_id, &sq->sqn); if (err) goto err_free_txqsq; @@ -2056,6 +2055,7 @@ static int mlx5e_open_icosq(struct mlx5e_channel *c, struct mlx5e_params *params csp.cqn = sq->cq.mcq.cqn; csp.wq_ctrl = &sq->wq_ctrl; csp.min_inline_mode = params->tx_min_inline_mode; + csp.uar_page = c->bfreg->index; err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn); if (err) goto err_free_icosq; @@ -2116,6 +2116,7 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params, csp.cqn = sq->cq.mcq.cqn; csp.wq_ctrl = &sq->wq_ctrl; csp.min_inline_mode = sq->min_inline_mode; + csp.uar_page = c->bfreg->index; set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state); err = mlx5e_create_sq_rdy(c->mdev, param, &csp, 0, &sq->sqn); @@ -2186,6 +2187,7 @@ static void mlx5e_close_xdpredirect_sq(struct mlx5e_xdpsq *xdpsq) static int mlx5e_alloc_cq_common(struct mlx5_core_dev *mdev, struct net_device *netdev, struct workqueue_struct *workqueue, + struct mlx5_uars_page *uar, struct mlx5e_cq_param *param, struct mlx5e_cq *cq) { @@ -2217,6 +2219,7 @@ static int mlx5e_alloc_cq_common(struct mlx5_core_dev *mdev, cq->mdev = mdev; cq->netdev = netdev; cq->workqueue = workqueue; + cq->uar = uar; return 0; } @@ -2232,7 +2235,8 @@ static int mlx5e_alloc_cq(struct mlx5_core_dev *mdev, param->wq.db_numa_node = ccp->node; param->eq_ix = ccp->ix; - err = mlx5e_alloc_cq_common(mdev, ccp->netdev, ccp->wq, param, cq); + err = mlx5e_alloc_cq_common(mdev, ccp->netdev, ccp->wq, + ccp->uar, param, cq); cq->napi = ccp->napi; cq->ch_stats = ccp->ch_stats; @@ -2277,7 +2281,7 @@ static int mlx5e_create_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) MLX5_SET(cqc, cqc, cq_period_mode, mlx5e_cq_period_mode(param->cq_period_mode)); MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); - MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); + MLX5_SET(cqc, cqc, uar_page, cq->uar->index); MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); @@ -2744,6 +2748,20 @@ void mlx5e_trigger_napi_sched(struct napi_struct *napi) local_bh_enable(); } +static void mlx5e_channel_pick_doorbell(struct mlx5e_channel *c) +{ + struct mlx5e_hw_objs *hw_objs = &c->mdev->mlx5e_res.hw_objs; + + /* No dedicated Ethernet doorbells, use the global one. */ + if (hw_objs->num_bfregs == 0) { + c->bfreg = &c->mdev->priv.bfreg; + return; + } + + /* Round-robin between doorbells. */ + c->bfreg = hw_objs->bfregs + c->vec_ix % hw_objs->num_bfregs; +} + static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, struct mlx5e_params *params, struct xsk_buff_pool *xsk_pool, @@ -2798,6 +2816,8 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, c->aff_mask = irq_get_effective_affinity_mask(irq); c->lag_port = mlx5e_enumerate_lag_port(mdev, ix); + mlx5e_channel_pick_doorbell(c); + netif_napi_add_config_locked(netdev, &c->napi, mlx5e_napi_poll, ix); netif_napi_set_irq_locked(&c->napi, irq); @@ -3569,7 +3589,8 @@ static int mlx5e_alloc_drop_cq(struct mlx5e_priv *priv, param->wq.buf_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); param->wq.db_numa_node = dev_to_node(mlx5_core_dma_dev(mdev)); - return mlx5e_alloc_cq_common(priv->mdev, priv->netdev, priv->wq, param, cq); + return mlx5e_alloc_cq_common(priv->mdev, priv->netdev, priv->wq, + mdev->priv.bfreg.up, param, cq); } int mlx5e_open_drop_rq(struct mlx5e_priv *priv, @@ -5625,12 +5646,36 @@ static int mlx5e_queue_start(struct net_device *dev, void *newq, return 0; } +static struct device *mlx5e_queue_get_dma_dev(struct net_device *dev, + int queue_index) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + struct mlx5e_channels *channels; + struct device *pdev = NULL; + struct mlx5e_channel *ch; + + channels = &priv->channels; + + mutex_lock(&priv->state_lock); + + if (queue_index >= channels->num) + goto out; + + ch = channels->c[queue_index]; + pdev = ch->pdev; +out: + mutex_unlock(&priv->state_lock); + + return pdev; +} + static const struct netdev_queue_mgmt_ops mlx5e_queue_mgmt_ops = { .ndo_queue_mem_size = sizeof(struct mlx5_qmgmt_data), .ndo_queue_mem_alloc = mlx5e_queue_mem_alloc, .ndo_queue_mem_free = mlx5e_queue_mem_free, .ndo_queue_start = mlx5e_queue_start, .ndo_queue_stop = mlx5e_queue_stop, + .ndo_queue_get_dma_dev = mlx5e_queue_get_dma_dev, }; static void mlx5e_build_nic_netdev(struct net_device *netdev) @@ -5858,6 +5903,10 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, } priv->fs = fs; + err = mlx5e_psp_init(priv); + if (err) + mlx5_core_err(mdev, "PSP initialization failed, %d\n", err); + err = mlx5e_ktls_init(priv); if (err) mlx5_core_err(mdev, "TLS initialization failed, %d\n", err); @@ -5870,6 +5919,7 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, if (take_rtnl) rtnl_lock(); + mlx5e_psp_register(priv); /* update XDP supported features */ mlx5e_set_xdp_feature(netdev); @@ -5882,7 +5932,9 @@ static int mlx5e_nic_init(struct mlx5_core_dev *mdev, static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) { mlx5e_health_destroy_reporters(priv); + mlx5e_psp_unregister(priv); mlx5e_ktls_cleanup(priv); + mlx5e_psp_cleanup(priv); mlx5e_fs_cleanup(priv->fs); debugfs_remove_recursive(priv->dfs_root); priv->fs = NULL; @@ -6212,8 +6264,15 @@ int mlx5e_priv_init(struct mlx5e_priv *priv, if (!priv->channel_stats) goto err_free_tx_rates; + priv->fec_ranges = kcalloc(ETHTOOL_FEC_HIST_MAX, + sizeof(*priv->fec_ranges), GFP_KERNEL); + if (!priv->fec_ranges) + goto err_free_channel_stats; + return 0; +err_free_channel_stats: + kfree(priv->channel_stats); err_free_tx_rates: kfree(priv->tx_rates); err_free_txq2sq_stats: @@ -6237,6 +6296,7 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv) if (!priv->mdev) return; + kfree(priv->fec_ranges); for (i = 0; i < priv->stats_nch; i++) kvfree(priv->channel_stats[i]); kfree(priv->channel_stats); @@ -6730,6 +6790,7 @@ static void _mlx5e_remove(struct auxiliary_device *adev) * is already unregistered before changing to NIC profile. */ if (priv->netdev->reg_state == NETREG_REGISTERED) { + mlx5e_psp_unregister(priv); unregister_netdev(priv->netdev); _mlx5e_suspend(adev, false); } else { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 63a7a788fb0d..0335ca8277ef 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -974,7 +974,7 @@ static int mlx5e_create_rep_ttc_table(struct mlx5e_priv *priv) MLX5_FLOW_NAMESPACE_KERNEL), false); /* The inner_ttc in the ttc params is intentionally not set */ - mlx5e_set_ttc_params(priv->fs, priv->rx_res, &ttc_params, false); + mlx5e_set_ttc_params(priv->fs, priv->rx_res, &ttc_params, false, false); if (rep->vport != MLX5_VPORT_UPLINK) /* To give uplik rep TTC a lower level for chaining from root ft */ @@ -1447,11 +1447,11 @@ static void mlx5e_rep_vnic_reporter_create(struct mlx5e_priv *priv, reporter = devl_port_health_reporter_create(dl_port, &mlx5_rep_vnic_reporter_ops, - 0, rpriv); + rpriv); if (IS_ERR(reporter)) { mlx5_core_err(priv->mdev, - "Failed to create representor vnic reporter, err = %ld\n", - PTR_ERR(reporter)); + "Failed to create representor vnic reporter, err = %pe\n", + reporter); return; } @@ -1506,12 +1506,21 @@ static const struct mlx5e_profile mlx5e_uplink_rep_profile = { static int mlx5e_vport_uplink_rep_load(struct mlx5_core_dev *dev, struct mlx5_eswitch_rep *rep) { - struct mlx5e_priv *priv = netdev_priv(mlx5_uplink_netdev_get(dev)); struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep); + struct net_device *netdev; + struct mlx5e_priv *priv; + int err; + + netdev = mlx5_uplink_netdev_get(dev); + if (!netdev) + return 0; + priv = netdev_priv(netdev); rpriv->netdev = priv->netdev; - return mlx5e_netdev_change_profile(priv, &mlx5e_uplink_rep_profile, - rpriv); + err = mlx5e_netdev_change_profile(priv, &mlx5e_uplink_rep_profile, + rpriv); + mlx5_uplink_netdev_put(dev, netdev); + return err; } static void @@ -1638,8 +1647,16 @@ mlx5e_vport_rep_unload(struct mlx5_eswitch_rep *rep) { struct mlx5e_rep_priv *rpriv = mlx5e_rep_to_rep_priv(rep); struct net_device *netdev = rpriv->netdev; - struct mlx5e_priv *priv = netdev_priv(netdev); - void *ppriv = priv->ppriv; + struct mlx5e_priv *priv; + void *ppriv; + + if (!netdev) { + ppriv = rpriv; + goto free_ppriv; + } + + priv = netdev_priv(netdev); + ppriv = priv->ppriv; if (rep->vport == MLX5_VPORT_UPLINK) { mlx5e_vport_uplink_rep_unload(rpriv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index b8c609d91d11..263d5628ee44 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -51,6 +51,7 @@ #include "ipoib/ipoib.h" #include "en_accel/ipsec.h" #include "en_accel/macsec.h" +#include "en_accel/psp_rxtx.h" #include "en_accel/ipsec_rxtx.h" #include "en_accel/ktls_txrx.h" #include "en/xdp.h" @@ -1289,8 +1290,12 @@ static void mlx5e_shampo_update_ipv4_tcp_hdr(struct mlx5e_rq *rq, struct iphdr * tcp->check = ~tcp_v4_check(skb->len - tcp_off, ipv4->saddr, ipv4->daddr, 0); skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV4; - if (ntohs(ipv4->id) == rq->hw_gro_data->second_ip_id) - skb_shinfo(skb)->gso_type |= SKB_GSO_TCP_FIXEDID; + if (ntohs(ipv4->id) == rq->hw_gro_data->second_ip_id) { + bool encap = rq->hw_gro_data->fk.control.flags & FLOW_DIS_ENCAPSULATION; + + skb_shinfo(skb)->gso_type |= encap ? SKB_GSO_TCP_FIXEDID_INNER : + SKB_GSO_TCP_FIXEDID; + } skb->csum_start = (unsigned char *)tcp - skb->head; skb->csum_offset = offsetof(struct tcphdr, check); @@ -1521,6 +1526,11 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = csum_unfold((__force __sum16)cqe->check_sum); + if (unlikely(mlx5e_psp_is_rx_flow(cqe))) { + /* TBD: PSP csum complete corrections for now chose csum_unnecessary path */ + goto csum_unnecessary; + } + if (test_bit(MLX5E_RQ_STATE_CSUM_FULL, &rq->state)) return; /* CQE csum covers all received bytes */ @@ -1549,7 +1559,7 @@ csum_none: #define MLX5E_CE_BIT_MASK 0x80 -static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, +static inline bool mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, u32 cqe_bcnt, struct mlx5e_rq *rq, struct sk_buff *skb) @@ -1563,6 +1573,11 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, if (unlikely(get_cqe_tls_offload(cqe))) mlx5e_ktls_handle_rx_skb(rq, skb, cqe, &cqe_bcnt); + if (unlikely(mlx5e_psp_is_rx_flow(cqe))) { + if (mlx5e_psp_offload_handle_rx_skb(netdev, skb, cqe)) + return true; + } + if (unlikely(mlx5_ipsec_is_rx_flow(cqe))) mlx5e_ipsec_offload_handle_rx_skb(netdev, skb, be32_to_cpu(cqe->ft_metadata)); @@ -1608,9 +1623,11 @@ static inline void mlx5e_build_rx_skb(struct mlx5_cqe64 *cqe, if (unlikely(mlx5e_skb_is_multicast(skb))) stats->mcast_packets++; + + return false; } -static void mlx5e_shampo_complete_rx_cqe(struct mlx5e_rq *rq, +static bool mlx5e_shampo_complete_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, u32 cqe_bcnt, struct sk_buff *skb) @@ -1620,16 +1637,20 @@ static void mlx5e_shampo_complete_rx_cqe(struct mlx5e_rq *rq, stats->packets++; stats->bytes += cqe_bcnt; if (NAPI_GRO_CB(skb)->count != 1) - return; - mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb); + return false; + + if (mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb)) + return true; + skb_reset_network_header(skb); if (!skb_flow_dissect_flow_keys(skb, &rq->hw_gro_data->fk, 0)) { napi_gro_receive(rq->cq.napi, skb); rq->hw_gro_data->skb = NULL; } + return false; } -static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, +static inline bool mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, u32 cqe_bcnt, struct sk_buff *skb) @@ -1638,7 +1659,7 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, stats->packets++; stats->bytes += cqe_bcnt; - mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb); + return mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb); } static inline @@ -1796,10 +1817,9 @@ mlx5e_skb_from_cqe_nonlinear(struct mlx5e_rq *rq, struct mlx5e_wqe_frag_info *wi if (xdp_buff_has_frags(&mxbuf->xdp)) { /* sinfo->nr_frags is reset by build_skb, calculate again. */ - xdp_update_skb_shared_info(skb, wi - head_wi - 1, - sinfo->xdp_frags_size, truesize, - xdp_buff_is_frag_pfmemalloc( - &mxbuf->xdp)); + xdp_update_skb_frags_info(skb, wi - head_wi - 1, + sinfo->xdp_frags_size, truesize, + xdp_buff_get_skb_flags(&mxbuf->xdp)); for (struct mlx5e_wqe_frag_info *pwi = head_wi + 1; pwi < wi; pwi++) pwi->frag_page->frags++; @@ -1855,7 +1875,8 @@ static void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) goto wq_cyc_pop; } - mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + if (mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb)) + goto wq_cyc_pop; if (mlx5e_cqe_regb_chain(cqe)) if (!mlx5e_tc_update_skb_nic(cqe, skb)) { @@ -1902,7 +1923,8 @@ static void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) goto wq_cyc_pop; } - mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + if (mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb)) + goto wq_cyc_pop; if (rep->vlan && skb_vlan_tag_present(skb)) skb_vlan_pop(skb); @@ -1951,7 +1973,8 @@ static void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 if (!skb) goto mpwrq_cqe_out; - mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + if (mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb)) + goto mpwrq_cqe_out; mlx5e_rep_tc_receive(cqe, rq, skb); @@ -2105,10 +2128,10 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w struct mlx5e_frag_page *pagep; /* sinfo->nr_frags is reset by build_skb, calculate again. */ - xdp_update_skb_shared_info(skb, frag_page - head_page, - sinfo->xdp_frags_size, truesize, - xdp_buff_is_frag_pfmemalloc( - &mxbuf->xdp)); + xdp_update_skb_frags_info(skb, frag_page - head_page, + sinfo->xdp_frags_size, + truesize, + xdp_buff_get_skb_flags(&mxbuf->xdp)); pagep = head_page; do @@ -2122,10 +2145,10 @@ mlx5e_skb_from_cqe_mpwrq_nonlinear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *w if (xdp_buff_has_frags(&mxbuf->xdp)) { struct mlx5e_frag_page *pagep; - xdp_update_skb_shared_info(skb, sinfo->nr_frags, - sinfo->xdp_frags_size, truesize, - xdp_buff_is_frag_pfmemalloc( - &mxbuf->xdp)); + xdp_update_skb_frags_info(skb, sinfo->nr_frags, + sinfo->xdp_frags_size, + truesize, + xdp_buff_get_skb_flags(&mxbuf->xdp)); pagep = frag_page - sinfo->nr_frags; do @@ -2388,7 +2411,10 @@ static void mlx5e_handle_rx_cqe_mpwrq_shampo(struct mlx5e_rq *rq, struct mlx5_cq stats->hds_nosplit_bytes += data_bcnt; } - mlx5e_shampo_complete_rx_cqe(rq, cqe, cqe_bcnt, *skb); + if (mlx5e_shampo_complete_rx_cqe(rq, cqe, cqe_bcnt, *skb)) { + *skb = NULL; + goto free_hd_entry; + } if (flush && rq->hw_gro_data->skb) mlx5e_shampo_flush_skb(rq, cqe, match); free_hd_entry: @@ -2446,7 +2472,8 @@ static void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cq if (!skb) goto mpwrq_cqe_out; - mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + if (mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb)) + goto mpwrq_cqe_out; if (mlx5e_cqe_regb_chain(cqe)) if (!mlx5e_tc_update_skb_nic(cqe, skb)) { @@ -2779,7 +2806,8 @@ static void mlx5e_trap_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe if (!skb) goto wq_cyc_pop; - mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + if (mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb)) + goto wq_cyc_pop; skb_push(skb, ETH_HLEN); mlx5_devlink_trap_report(rq->mdev, trap_id, skb, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c index 87536f158d07..7c029a7d0fd7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c @@ -1446,16 +1446,13 @@ static void fec_set_rs_stats(struct ethtool_fec_stats *fec_stats, u32 *ppcnt) } static void fec_set_block_stats(struct mlx5e_priv *priv, + int mode, struct ethtool_fec_stats *fec_stats) { struct mlx5_core_dev *mdev = priv->mdev; u32 out[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {}; int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); - int mode = fec_active_mode(mdev); - - if (mode == MLX5E_FEC_NOFEC) - return; MLX5_SET(ppcnt_reg, in, local_port, 1); MLX5_SET(ppcnt_reg, in, grp, MLX5_PHYSICAL_LAYER_COUNTERS_GROUP); @@ -1466,6 +1463,7 @@ static void fec_set_block_stats(struct mlx5e_priv *priv, case MLX5E_FEC_RS_528_514: case MLX5E_FEC_RS_544_514: case MLX5E_FEC_LLRS_272_257_1: + case MLX5E_FEC_RS_544_514_INTERLEAVED_QUAD: fec_set_rs_stats(fec_stats, out); return; case MLX5E_FEC_FIRECODE: @@ -1493,14 +1491,130 @@ static void fec_set_corrected_bits_total(struct mlx5e_priv *priv, phy_corrected_bits); } +#define MLX5_RS_HISTOGRAM_ENTRIES \ + (MLX5_FLD_SZ_BYTES(rs_histogram_cntrs, hist) / \ + MLX5_FLD_SZ_BYTES(rs_histogram_cntrs, hist[0])) + +enum { + MLX5E_HISTOGRAM_FEC_RS_544_514 = 1, + MLX5E_HISTOGRAM_FEC_LLRS = 2, + MLX5E_HISTOGRAM_FEC_RS_528_514 = 3, +}; + +static bool fec_rs_validate_hist_type(int mode, int hist_type) +{ + switch (mode) { + case MLX5E_FEC_RS_528_514: + return hist_type == MLX5E_HISTOGRAM_FEC_RS_528_514; + case MLX5E_FEC_RS_544_514_INTERLEAVED_QUAD: + case MLX5E_FEC_RS_544_514: + return hist_type == MLX5E_HISTOGRAM_FEC_RS_544_514; + case MLX5E_FEC_LLRS_272_257_1: + return hist_type == MLX5E_HISTOGRAM_FEC_LLRS; + default: + break; + } + + return false; +} + +static u8 +fec_rs_histogram_fill_ranges(struct mlx5e_priv *priv, int mode, + const struct ethtool_fec_hist_range **ranges) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 out[MLX5_ST_SZ_DW(pphcr_reg)] = {0}; + u32 in[MLX5_ST_SZ_DW(pphcr_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(pphcr_reg); + u8 hist_type, num_of_bins; + + memset(priv->fec_ranges, 0, + ETHTOOL_FEC_HIST_MAX * sizeof(*priv->fec_ranges)); + MLX5_SET(pphcr_reg, in, local_port, 1); + if (mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPHCR, 0, 0)) + return 0; + + hist_type = MLX5_GET(pphcr_reg, out, active_hist_type); + if (!fec_rs_validate_hist_type(mode, hist_type)) + return 0; + + num_of_bins = MLX5_GET(pphcr_reg, out, num_of_bins); + if (WARN_ON_ONCE(num_of_bins > MLX5_RS_HISTOGRAM_ENTRIES)) + return 0; + + for (int i = 0; i < num_of_bins; i++) { + void *bin_range = MLX5_ADDR_OF(pphcr_reg, out, bin_range[i]); + + priv->fec_ranges[i].high = MLX5_GET(bin_range_layout, bin_range, + high_val); + priv->fec_ranges[i].low = MLX5_GET(bin_range_layout, bin_range, + low_val); + } + *ranges = priv->fec_ranges; + + return num_of_bins; +} + +static void fec_rs_histogram_fill_stats(struct mlx5e_priv *priv, + u8 num_of_bins, + struct ethtool_fec_hist *hist) +{ + struct mlx5_core_dev *mdev = priv->mdev; + u32 out[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + u32 in[MLX5_ST_SZ_DW(ppcnt_reg)] = {0}; + int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); + void *rs_histogram_cntrs; + + MLX5_SET(ppcnt_reg, in, local_port, 1); + MLX5_SET(ppcnt_reg, in, grp, MLX5_RS_FEC_HISTOGRAM_GROUP); + if (mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPCNT, 0, 0)) + return; + + rs_histogram_cntrs = MLX5_ADDR_OF(ppcnt_reg, out, + counter_set.rs_histogram_cntrs); + /* Guaranteed that num_of_bins is less than MLX5E_FEC_RS_HIST_MAX + * by fec_rs_histogram_fill_ranges(). + */ + for (int i = 0; i < num_of_bins; i++) + hist->values[i].sum = MLX5_GET64(rs_histogram_cntrs, + rs_histogram_cntrs, + hist[i]); +} + +static void fec_set_histograms_stats(struct mlx5e_priv *priv, int mode, + struct ethtool_fec_hist *hist) +{ + u8 num_of_bins; + + switch (mode) { + case MLX5E_FEC_RS_528_514: + case MLX5E_FEC_RS_544_514: + case MLX5E_FEC_LLRS_272_257_1: + case MLX5E_FEC_RS_544_514_INTERLEAVED_QUAD: + num_of_bins = + fec_rs_histogram_fill_ranges(priv, mode, &hist->ranges); + if (num_of_bins) + return fec_rs_histogram_fill_stats(priv, num_of_bins, + hist); + break; + default: + return; + } +} + void mlx5e_stats_fec_get(struct mlx5e_priv *priv, - struct ethtool_fec_stats *fec_stats) + struct ethtool_fec_stats *fec_stats, + struct ethtool_fec_hist *hist) { - if (!MLX5_CAP_PCAM_FEATURE(priv->mdev, ppcnt_statistical_group)) + int mode = fec_active_mode(priv->mdev); + + if (mode == MLX5E_FEC_NOFEC || + !MLX5_CAP_PCAM_FEATURE(priv->mdev, ppcnt_statistical_group)) return; fec_set_corrected_bits_total(priv, fec_stats); - fec_set_block_stats(priv, fec_stats); + fec_set_block_stats(priv, mode, fec_stats); + fec_set_histograms_stats(priv, mode, hist); } #define PPORT_ETH_EXT_OFF(c) \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 72dbcc1928ef..09f155acb461 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -117,7 +117,8 @@ void mlx5e_stats_update_ndo_stats(struct mlx5e_priv *priv); void mlx5e_stats_pause_get(struct mlx5e_priv *priv, struct ethtool_pause_stats *pause_stats); void mlx5e_stats_fec_get(struct mlx5e_priv *priv, - struct ethtool_fec_stats *fec_stats); + struct ethtool_fec_stats *fec_stats, + struct ethtool_fec_hist *hist); void mlx5e_stats_eth_phy_get(struct mlx5e_priv *priv, struct ethtool_eth_phy_stats *phy_stats); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 32c07a8b03d1..00c2763e57ca 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -66,6 +66,7 @@ #include "lib/devcom.h" #include "lib/geneve.h" #include "lib/fs_chains.h" +#include "lib/mlx5.h" #include "diag/en_tc_tracepoint.h" #include <asm/div64.h> #include "lag/lag.h" @@ -757,11 +758,11 @@ static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp) struct mlx5e_priv *priv = hp->func_priv; struct mlx5_core_dev *mdev = priv->mdev; struct mlx5e_rss_params_indir indir; + u32 rqt_size; int err; - err = mlx5e_rss_params_indir_init(&indir, mdev, - mlx5e_rqt_size(mdev, hp->num_channels), - mlx5e_rqt_size(mdev, hp->num_channels)); + rqt_size = mlx5e_rqt_size(mdev, hp->num_channels); + err = mlx5e_rss_params_indir_init(&indir, rqt_size, rqt_size); if (err) return err; @@ -837,6 +838,9 @@ static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp, ttc_params->ns_type = MLX5_FLOW_NAMESPACE_KERNEL; for (tt = 0; tt < MLX5_NUM_TT; tt++) { + if (mlx5_ttc_is_decrypted_esp_tt(tt)) + continue; + ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR; ttc_params->dests[tt].tir_num = tt == MLX5_TT_ANY ? @@ -5387,12 +5391,13 @@ void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht) int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv) { const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts); + struct mlx5_devcom_match_attr attr = {}; struct netdev_phys_item_id ppid; struct mlx5e_rep_priv *rpriv; struct mapping_ctx *mapping; struct mlx5_eswitch *esw; struct mlx5e_priv *priv; - u64 mapping_id, key; + u64 mapping_id; int err = 0; rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv); @@ -5448,8 +5453,10 @@ int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv) err = netif_get_port_parent_id(priv->netdev, &ppid, false); if (!err) { - memcpy(&key, &ppid.id, sizeof(key)); - mlx5_esw_offloads_devcom_init(esw, key); + memcpy(&attr.key.val, &ppid.id, sizeof(attr.key.val)); + attr.flags = MLX5_DEVCOM_MATCH_FLAGS_NS; + attr.net = mlx5_core_net(esw->dev); + mlx5_esw_offloads_devcom_init(esw, &attr); } return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 319061d31602..b7227afcb51d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -39,6 +39,7 @@ #include "ipoib/ipoib.h" #include "en_accel/en_accel.h" #include "en_accel/ipsec_rxtx.h" +#include "en_accel/psp_rxtx.h" #include "en_accel/macsec.h" #include "en/ptp.h" #include <net/ipv6.h> @@ -120,6 +121,11 @@ mlx5e_txwqe_build_eseg_csum(struct mlx5e_txqsq *sq, struct sk_buff *skb, struct mlx5e_accel_tx_state *accel, struct mlx5_wqe_eth_seg *eseg) { +#ifdef CONFIG_MLX5_EN_PSP + if (unlikely(mlx5e_psp_txwqe_build_eseg_csum(sq, skb, &accel->psp_st, eseg))) + return; +#endif + if (unlikely(mlx5e_ipsec_txwqe_build_eseg_csum(sq, skb, eseg))) return; @@ -297,7 +303,7 @@ static void mlx5e_sq_xmit_prepare(struct mlx5e_txqsq *sq, struct sk_buff *skb, stats->packets++; } - attr->insz = mlx5e_accel_tx_ids_len(sq, accel); + attr->insz = mlx5e_accel_tx_ids_len(sq, skb, accel); stats->bytes += attr->num_bytes; } @@ -653,7 +659,7 @@ static void mlx5e_cqe_ts_id_eseg(struct mlx5e_ptpsq *ptpsq, struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg) { if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) - eseg->flow_table_metadata = + eseg->flow_table_metadata |= cpu_to_be32(mlx5e_ptp_metadata_fifo_peek(&ptpsq->metadata_freelist)); } @@ -661,7 +667,7 @@ static void mlx5e_txwqe_build_eseg(struct mlx5e_priv *priv, struct mlx5e_txqsq * struct sk_buff *skb, struct mlx5e_accel_tx_state *accel, struct mlx5_wqe_eth_seg *eseg, u16 ihs) { - mlx5e_accel_tx_eseg(priv, skb, eseg, ihs); + mlx5e_accel_tx_eseg(priv, skb, accel, eseg, ihs); mlx5e_txwqe_build_eseg_csum(sq, skb, accel, eseg); if (unlikely(sq->ptpsq)) mlx5e_cqe_ts_id_eseg(sq->ptpsq, skb, eseg); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 1ab77159409d..25499da177bc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -32,9 +32,7 @@ enum { MLX5_EQ_STATE_ALWAYS_ARMED = 0xb, }; -enum { - MLX5_EQ_DOORBEL_OFFSET = 0x40, -}; +#define MLX5_EQ_DOORBELL_OFFSET 0x40 /* budget must be smaller than MLX5_NUM_SPARE_EQE to guarantee that we update * the ci before we polled all the entries in the EQ. MLX5_NUM_SPARE_EQE is @@ -309,7 +307,7 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry); MLX5_SET(eqc, eqc, log_eq_size, eq->fbc.log_sz); - MLX5_SET(eqc, eqc, uar_page, priv->uar->index); + MLX5_SET(eqc, eqc, uar_page, priv->bfreg.up->index); MLX5_SET(eqc, eqc, intr, vecidx); MLX5_SET(eqc, eqc, log_page_size, eq->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); @@ -322,7 +320,7 @@ create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, eq->eqn = MLX5_GET(create_eq_out, out, eq_number); eq->irqn = pci_irq_vector(dev->pdev, vecidx); eq->dev = dev; - eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; + eq->doorbell = priv->bfreg.up->map + MLX5_EQ_DOORBELL_OFFSET; err = mlx5_debug_eq_add(dev, eq); if (err) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c index 7dd1dc3f77c7..c9a1654d83a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c @@ -87,8 +87,8 @@ int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, drop_counter = mlx5_fc_create(esw->dev, false); if (IS_ERR(drop_counter)) { esw_warn(esw->dev, - "vport[%d] configure egress drop rule counter err(%ld)\n", - vport->vport, PTR_ERR(drop_counter)); + "vport[%d] configure egress drop rule counter err(%pe)\n", + vport->vport, drop_counter); drop_counter = NULL; } vport->egress.legacy.drop_counter = drop_counter; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/adj_vport.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/adj_vport.c new file mode 100644 index 000000000000..0091ba697bae --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/adj_vport.c @@ -0,0 +1,209 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +// Copyright (c) 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. + +#include "fs_core.h" +#include "eswitch.h" + +enum { + MLX5_ADJ_VPORT_DISCONNECT = 0x0, + MLX5_ADJ_VPORT_CONNECT = 0x1, +}; + +static int mlx5_esw_adj_vport_modify(struct mlx5_core_dev *dev, + u16 vport, bool connect) +{ + u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)] = {}; + + MLX5_SET(modify_vport_state_in, in, opcode, + MLX5_CMD_OP_MODIFY_VPORT_STATE); + MLX5_SET(modify_vport_state_in, in, op_mod, + MLX5_VPORT_STATE_OP_MOD_ESW_VPORT); + MLX5_SET(modify_vport_state_in, in, other_vport, 1); + MLX5_SET(modify_vport_state_in, in, vport_number, vport); + MLX5_SET(modify_vport_state_in, in, ingress_connect_valid, 1); + MLX5_SET(modify_vport_state_in, in, egress_connect_valid, 1); + MLX5_SET(modify_vport_state_in, in, ingress_connect, connect); + MLX5_SET(modify_vport_state_in, in, egress_connect, connect); + + return mlx5_cmd_exec_in(dev, modify_vport_state, in); +} + +static void mlx5_esw_destroy_esw_vport(struct mlx5_core_dev *dev, u16 vport) +{ + u32 in[MLX5_ST_SZ_DW(destroy_esw_vport_in)] = {}; + + MLX5_SET(destroy_esw_vport_in, in, opcode, + MLX5_CMD_OPCODE_DESTROY_ESW_VPORT); + MLX5_SET(destroy_esw_vport_in, in, vport_num, vport); + + mlx5_cmd_exec_in(dev, destroy_esw_vport, in); +} + +static int mlx5_esw_create_esw_vport(struct mlx5_core_dev *dev, u16 vhca_id, + u16 *vport_num) +{ + u32 out[MLX5_ST_SZ_DW(create_esw_vport_out)] = {}; + u32 in[MLX5_ST_SZ_DW(create_esw_vport_in)] = {}; + int err; + + MLX5_SET(create_esw_vport_in, in, opcode, + MLX5_CMD_OPCODE_CREATE_ESW_VPORT); + MLX5_SET(create_esw_vport_in, in, managed_vhca_id, vhca_id); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (!err) + *vport_num = MLX5_GET(create_esw_vport_out, out, vport_num); + + return err; +} + +static int mlx5_esw_adj_vport_create(struct mlx5_eswitch *esw, u16 vhca_id, + const void *rid_info_reg) +{ + struct mlx5_vport *vport; + u16 vport_num; + int err; + + err = mlx5_esw_create_esw_vport(esw->dev, vhca_id, &vport_num); + if (err) { + esw_warn(esw->dev, + "Failed to create adjacent vport for vhca_id %d, err %d\n", + vhca_id, err); + return err; + } + + esw_debug(esw->dev, "Created adjacent vport[%d] %d for vhca_id 0x%x\n", + esw->last_vport_idx, vport_num, vhca_id); + + err = mlx5_esw_vport_alloc(esw, esw->last_vport_idx++, vport_num); + if (err) + goto destroy_esw_vport; + + xa_set_mark(&esw->vports, vport_num, MLX5_ESW_VPT_VF); + vport = mlx5_eswitch_get_vport(esw, vport_num); + vport->adjacent = true; + vport->vhca_id = vhca_id; + + vport->adj_info.parent_pci_devfn = + MLX5_GET(function_vhca_rid_info_reg, rid_info_reg, + parent_pci_device_function); + vport->adj_info.function_id = + MLX5_GET(function_vhca_rid_info_reg, rid_info_reg, function_id); + + mlx5_fs_vport_egress_acl_ns_add(esw->dev->priv.steering, vport->index); + mlx5_fs_vport_ingress_acl_ns_add(esw->dev->priv.steering, vport->index); + err = mlx5_esw_offloads_rep_add(esw, vport); + if (err) + goto acl_ns_remove; + + mlx5_esw_adj_vport_modify(esw->dev, vport_num, MLX5_ADJ_VPORT_CONNECT); + return 0; + +acl_ns_remove: + mlx5_fs_vport_ingress_acl_ns_remove(esw->dev->priv.steering, + vport->index); + mlx5_fs_vport_egress_acl_ns_remove(esw->dev->priv.steering, + vport->index); + mlx5_esw_vport_free(esw, vport); +destroy_esw_vport: + mlx5_esw_destroy_esw_vport(esw->dev, vport_num); + return err; +} + +static void mlx5_esw_adj_vport_destroy(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + u16 vport_num = vport->vport; + + esw_debug(esw->dev, "Destroying adjacent vport %d for vhca_id 0x%x\n", + vport_num, vport->vhca_id); + mlx5_esw_adj_vport_modify(esw->dev, vport_num, + MLX5_ADJ_VPORT_DISCONNECT); + mlx5_esw_offloads_rep_remove(esw, vport); + mlx5_fs_vport_egress_acl_ns_remove(esw->dev->priv.steering, + vport->index); + mlx5_fs_vport_ingress_acl_ns_remove(esw->dev->priv.steering, + vport->index); + mlx5_esw_vport_free(esw, vport); + /* Reset the vport index back so new adj vports can use this index. + * When vport count can incrementally change, this needs to be modified. + */ + esw->last_vport_idx--; + mlx5_esw_destroy_esw_vport(esw->dev, vport_num); +} + +void mlx5_esw_adjacent_vhcas_cleanup(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + unsigned long i; + + if (!MLX5_CAP_GEN_2(esw->dev, delegated_vhca_max)) + return; + + mlx5_esw_for_each_vf_vport(esw, i, vport, U16_MAX) { + if (!vport->adjacent) + continue; + mlx5_esw_adj_vport_destroy(esw, vport); + } +} + +void mlx5_esw_adjacent_vhcas_setup(struct mlx5_eswitch *esw) +{ + u32 delegated_vhca_max = MLX5_CAP_GEN_2(esw->dev, delegated_vhca_max); + u32 in[MLX5_ST_SZ_DW(query_delegated_vhca_in)] = {}; + int outlen, err, i = 0; + u8 *out; + u32 count; + + if (!delegated_vhca_max) + return; + + outlen = MLX5_ST_SZ_BYTES(query_delegated_vhca_out) + + delegated_vhca_max * + MLX5_ST_SZ_BYTES(delegated_function_vhca_rid_info); + + esw_debug(esw->dev, "delegated_vhca_max=%d\n", delegated_vhca_max); + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return; + + MLX5_SET(query_delegated_vhca_in, in, opcode, + MLX5_CMD_OPCODE_QUERY_DELEGATED_VHCA); + + err = mlx5_cmd_exec(esw->dev, in, sizeof(in), out, outlen); + if (err) { + kvfree(out); + esw_warn(esw->dev, "Failed to query delegated vhca, err %d\n", + err); + return; + } + + count = MLX5_GET(query_delegated_vhca_out, out, functions_count); + esw_debug(esw->dev, "Delegated vhca functions count %d\n", count); + + for (i = 0; i < count; i++) { + const void *rid_info, *rid_info_reg; + u16 vhca_id; + + rid_info = MLX5_ADDR_OF(query_delegated_vhca_out, out, + delegated_function_vhca_rid_info[i]); + + rid_info_reg = MLX5_ADDR_OF(delegated_function_vhca_rid_info, + rid_info, function_vhca_rid_info); + + vhca_id = MLX5_GET(function_vhca_rid_info_reg, rid_info_reg, + vhca_id); + esw_debug(esw->dev, "Delegating vhca_id 0x%x\n", vhca_id); + + err = mlx5_esw_adj_vport_create(esw, vhca_id, rid_info_reg); + if (err) { + esw_warn(esw->dev, + "Failed to init adjacent vhca 0x%x, err %d\n", + vhca_id, err); + break; + } + } + + kvfree(out); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index 76e35c827da0..60e10047770f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -81,7 +81,8 @@ mlx5_esw_bridge_table_create(int max_fte, u32 level, struct mlx5_eswitch *esw) ft_attr.prio = FDB_BR_OFFLOAD; fdb = mlx5_create_flow_table(ns, &ft_attr); if (IS_ERR(fdb)) - esw_warn(dev, "Failed to create bridge FDB Table (err=%ld)\n", PTR_ERR(fdb)); + esw_warn(dev, "Failed to create bridge FDB Table (err=%pe)\n", + fdb); return fdb; } @@ -121,8 +122,8 @@ mlx5_esw_bridge_ingress_vlan_proto_fg_create(unsigned int from, unsigned int to, kvfree(in); if (IS_ERR(fg)) esw_warn(esw->dev, - "Failed to create VLAN(proto=%x) flow group for bridge ingress table (err=%ld)\n", - vlan_proto, PTR_ERR(fg)); + "Failed to create VLAN(proto=%x) flow group for bridge ingress table (err=%pe)\n", + vlan_proto, fg); return fg; } @@ -180,8 +181,8 @@ mlx5_esw_bridge_ingress_vlan_proto_filter_fg_create(unsigned int from, unsigned fg = mlx5_create_flow_group(ingress_ft, in); if (IS_ERR(fg)) esw_warn(esw->dev, - "Failed to create bridge ingress table VLAN filter flow group (err=%ld)\n", - PTR_ERR(fg)); + "Failed to create bridge ingress table VLAN filter flow group (err=%pe)\n", + fg); kvfree(in); return fg; } @@ -237,8 +238,8 @@ mlx5_esw_bridge_ingress_mac_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow fg = mlx5_create_flow_group(ingress_ft, in); if (IS_ERR(fg)) esw_warn(esw->dev, - "Failed to create MAC flow group for bridge ingress table (err=%ld)\n", - PTR_ERR(fg)); + "Failed to create MAC flow group for bridge ingress table (err=%pe)\n", + fg); kvfree(in); return fg; @@ -274,8 +275,8 @@ mlx5_esw_bridge_egress_vlan_proto_fg_create(unsigned int from, unsigned int to, fg = mlx5_create_flow_group(egress_ft, in); if (IS_ERR(fg)) esw_warn(esw->dev, - "Failed to create VLAN flow group for bridge egress table (err=%ld)\n", - PTR_ERR(fg)); + "Failed to create VLAN flow group for bridge egress table (err=%pe)\n", + fg); kvfree(in); return fg; } @@ -324,8 +325,8 @@ mlx5_esw_bridge_egress_mac_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_ fg = mlx5_create_flow_group(egress_ft, in); if (IS_ERR(fg)) esw_warn(esw->dev, - "Failed to create bridge egress table MAC flow group (err=%ld)\n", - PTR_ERR(fg)); + "Failed to create bridge egress table MAC flow group (err=%pe)\n", + fg); kvfree(in); return fg; } @@ -354,8 +355,8 @@ mlx5_esw_bridge_egress_miss_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow fg = mlx5_create_flow_group(egress_ft, in); if (IS_ERR(fg)) esw_warn(esw->dev, - "Failed to create bridge egress table miss flow group (err=%ld)\n", - PTR_ERR(fg)); + "Failed to create bridge egress table miss flow group (err=%pe)\n", + fg); kvfree(in); return fg; } @@ -501,8 +502,8 @@ mlx5_esw_bridge_egress_table_init(struct mlx5_esw_bridge_offloads *br_offloads, if (mlx5_esw_bridge_pkt_reformat_vlan_pop_supported(esw)) { miss_fg = mlx5_esw_bridge_egress_miss_fg_create(esw, egress_ft); if (IS_ERR(miss_fg)) { - esw_warn(esw->dev, "Failed to create miss flow group (err=%ld)\n", - PTR_ERR(miss_fg)); + esw_warn(esw->dev, "Failed to create miss flow group (err=%pe)\n", + miss_fg); miss_fg = NULL; goto skip_miss_flow; } @@ -510,8 +511,8 @@ mlx5_esw_bridge_egress_table_init(struct mlx5_esw_bridge_offloads *br_offloads, miss_pkt_reformat = mlx5_esw_bridge_pkt_reformat_vlan_pop_create(esw); if (IS_ERR(miss_pkt_reformat)) { esw_warn(esw->dev, - "Failed to alloc packet reformat REMOVE_HEADER (err=%ld)\n", - PTR_ERR(miss_pkt_reformat)); + "Failed to alloc packet reformat REMOVE_HEADER (err=%pe)\n", + miss_pkt_reformat); miss_pkt_reformat = NULL; mlx5_destroy_flow_group(miss_fg); miss_fg = NULL; @@ -522,8 +523,8 @@ mlx5_esw_bridge_egress_table_init(struct mlx5_esw_bridge_offloads *br_offloads, br_offloads->skip_ft, miss_pkt_reformat); if (IS_ERR(miss_handle)) { - esw_warn(esw->dev, "Failed to create miss flow (err=%ld)\n", - PTR_ERR(miss_handle)); + esw_warn(esw->dev, "Failed to create miss flow (err=%pe)\n", + miss_handle); miss_handle = NULL; mlx5_packet_reformat_dealloc(esw->dev, miss_pkt_reformat); miss_pkt_reformat = NULL; @@ -1048,8 +1049,8 @@ mlx5_esw_bridge_vlan_push_create(u16 vlan_proto, struct mlx5_esw_bridge_vlan *vl &reformat_params, MLX5_FLOW_NAMESPACE_FDB); if (IS_ERR(pkt_reformat)) { - esw_warn(esw->dev, "Failed to alloc packet reformat INSERT_HEADER (err=%ld)\n", - PTR_ERR(pkt_reformat)); + esw_warn(esw->dev, "Failed to alloc packet reformat INSERT_HEADER (err=%pe)\n", + pkt_reformat); return PTR_ERR(pkt_reformat); } @@ -1076,8 +1077,8 @@ mlx5_esw_bridge_vlan_pop_create(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_e pkt_reformat = mlx5_esw_bridge_pkt_reformat_vlan_pop_create(esw); if (IS_ERR(pkt_reformat)) { - esw_warn(esw->dev, "Failed to alloc packet reformat REMOVE_HEADER (err=%ld)\n", - PTR_ERR(pkt_reformat)); + esw_warn(esw->dev, "Failed to alloc packet reformat REMOVE_HEADER (err=%pe)\n", + pkt_reformat); return PTR_ERR(pkt_reformat); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index c33accadae0f..cf88a106d80d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -27,6 +27,7 @@ static void mlx5_esw_offloads_pf_vf_devlink_port_attrs_set(struct mlx5_eswitch * { struct mlx5_core_dev *dev = esw->dev; struct netdev_phys_item_id ppid = {}; + struct mlx5_vport *vport; u32 controller_num = 0; bool external; u16 pfnum; @@ -42,10 +43,18 @@ static void mlx5_esw_offloads_pf_vf_devlink_port_attrs_set(struct mlx5_eswitch * dl_port->attrs.switch_id.id_len = ppid.id_len; devlink_port_attrs_pci_pf_set(dl_port, controller_num, pfnum, external); } else if (mlx5_eswitch_is_vf_vport(esw, vport_num)) { + u16 func_id = vport_num - 1; + + vport = mlx5_eswitch_get_vport(esw, vport_num); memcpy(dl_port->attrs.switch_id.id, ppid.id, ppid.id_len); dl_port->attrs.switch_id.id_len = ppid.id_len; + if (vport->adjacent) { + func_id = vport->adj_info.function_id; + pfnum = vport->adj_info.parent_pci_devfn; + } + devlink_port_attrs_pci_vf_set(dl_port, controller_num, pfnum, - vport_num - 1, external); + func_id, external); } else if (mlx5_core_is_ec_vf_vport(esw->dev, vport_num)) { u16 base_vport = mlx5_core_ec_vf_vport_base(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 8b4977650183..56e6f54b1e2e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -971,8 +971,9 @@ esw_qos_vport_tc_enable(struct mlx5_vport *vport, enum sched_node_type type, max_level = 1 << MLX5_CAP_QOS(vport_node->esw->dev, log_esw_max_sched_depth); if (new_level > max_level) { - NL_SET_ERR_MSG_MOD(extack, - "TC arbitration on leafs is not supported beyond max scheduling depth"); + NL_SET_ERR_MSG_FMT_MOD(extack, + "TC arbitration on leafs is not supported beyond max depth %d", + max_level); return -EOPNOTSUPP; } } @@ -1444,8 +1445,9 @@ static int esw_qos_node_enable_tc_arbitration(struct mlx5_esw_sched_node *node, new_level = node->level + 1; max_level = 1 << MLX5_CAP_QOS(node->esw->dev, log_esw_max_sched_depth); if (new_level > max_level) { - NL_SET_ERR_MSG_MOD(extack, - "TC arbitration on nodes is not supported beyond max scheduling depth"); + NL_SET_ERR_MSG_FMT_MOD(extack, + "TC arbitration on nodes is not supported beyond max depth %d", + max_level); return -EOPNOTSUPP; } @@ -1515,6 +1517,7 @@ static u32 mlx5_esw_qos_lag_link_speed_get_locked(struct mlx5_core_dev *mdev) speed = lksettings.base.speed; out: + mlx5_uplink_netdev_put(mdev, slave); return speed; } @@ -1996,8 +1999,9 @@ mlx5_esw_qos_node_validate_set_parent(struct mlx5_esw_sched_node *node, max_level = 1 << MLX5_CAP_QOS(node->esw->dev, log_esw_max_sched_depth); if (new_level > max_level) { - NL_SET_ERR_MSG_MOD(extack, - "Node hierarchy depth exceeds the maximum supported level"); + NL_SET_ERR_MSG_FMT_MOD(extack, + "Node hierarchy depth %d exceeds the maximum supported level %d", + new_level, max_level); return -EOPNOTSUPP; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c index 749c3957a128..407062096a82 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/vporttbl.c @@ -45,8 +45,8 @@ esw_vport_tbl_create(struct mlx5_eswitch *esw, struct mlx5_flow_namespace *ns, ft_attr.flags = vport_ns->flags; fdb = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); if (IS_ERR(fdb)) { - esw_warn(esw->dev, "Failed to create per vport FDB Table err %ld\n", - PTR_ERR(fdb)); + esw_warn(esw->dev, "Failed to create per vport FDB Table err %pe\n", + fdb); } return fdb; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 4917d185d0c3..e2ffb87b94cb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -257,8 +257,8 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u16 vport, bool rx_rule, &flow_act, &dest, 1); if (IS_ERR(flow_rule)) { esw_warn(esw->dev, - "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n", - dmac_v, dmac_c, vport, PTR_ERR(flow_rule)); + "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%pe)\n", + dmac_v, dmac_c, vport, flow_rule); flow_rule = NULL; } @@ -820,6 +820,7 @@ static int mlx5_esw_vport_caps_get(struct mlx5_eswitch *esw, struct mlx5_vport * hca_caps = MLX5_ADDR_OF(query_hca_cap_out, query_ctx, capability); vport->info.roce_enabled = MLX5_GET(cmd_hca_cap, hca_caps, roce); + vport->vhca_id = MLX5_GET(cmd_hca_cap, hca_caps, vhca_id); if (!MLX5_CAP_GEN_MAX(esw->dev, hca_cap_2)) goto out_free; @@ -839,6 +840,18 @@ out_free: return err; } +bool mlx5_esw_vport_vhca_id(struct mlx5_eswitch *esw, u16 vportn, u16 *vhca_id) +{ + struct mlx5_vport *vport; + + vport = mlx5_eswitch_get_vport(esw, vportn); + if (IS_ERR(vport) || MLX5_VPORT_INVAL_VHCA_ID(vport)) + return false; + + *vhca_id = vport->vhca_id; + return true; +} + static int esw_vport_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { bool vst_mode_steering = esw_vst_mode_is_steering(esw); @@ -929,7 +942,7 @@ int mlx5_esw_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, if (!mlx5_esw_is_manager_vport(esw, vport_num) && MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) { - ret = mlx5_esw_vport_vhca_id_set(esw, vport_num); + ret = mlx5_esw_vport_vhca_id_map(esw, vport); if (ret) goto err_vhca_mapping; } @@ -973,7 +986,7 @@ void mlx5_esw_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport) if (!mlx5_esw_is_manager_vport(esw, vport_num) && MLX5_CAP_GEN(esw->dev, vhca_resource_manager)) - mlx5_esw_vport_vhca_id_clear(esw, vport_num); + mlx5_esw_vport_vhca_id_unmap(esw, vport); if (vport->vport != MLX5_VPORT_PF && (vport->info.ipsec_crypto_enabled || vport->info.ipsec_packet_enabled)) @@ -1038,6 +1051,25 @@ const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) return ERR_PTR(err); } +static int mlx5_esw_host_functions_enabled_query(struct mlx5_eswitch *esw) +{ + const u32 *query_host_out; + + if (!mlx5_core_is_ecpf_esw_manager(esw->dev)) + return 0; + + query_host_out = mlx5_esw_query_functions(esw->dev); + if (IS_ERR(query_host_out)) + return PTR_ERR(query_host_out); + + esw->esw_funcs.host_funcs_disabled = + MLX5_GET(query_esw_functions_out, query_host_out, + host_params_context.host_pf_not_exist); + + kvfree(query_host_out); + return 0; +} + static void mlx5_eswitch_event_handler_register(struct mlx5_eswitch *esw) { if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) { @@ -1185,7 +1217,8 @@ void mlx5_eswitch_unload_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs) unsigned long i; mlx5_esw_for_each_vf_vport(esw, i, vport, num_vfs) { - if (!vport->enabled) + /* Adjacent VFs are unloaded separately */ + if (!vport->enabled || vport->adjacent) continue; mlx5_eswitch_unload_pf_vf_vport(esw, vport->vport); } @@ -1204,6 +1237,42 @@ static void mlx5_eswitch_unload_ec_vf_vports(struct mlx5_eswitch *esw, } } +static void mlx5_eswitch_unload_adj_vf_vports(struct mlx5_eswitch *esw) +{ + struct mlx5_vport *vport; + unsigned long i; + + mlx5_esw_for_each_vf_vport(esw, i, vport, U16_MAX) { + if (!vport->enabled || !vport->adjacent) + continue; + mlx5_eswitch_unload_pf_vf_vport(esw, vport->vport); + } +} + +static int +mlx5_eswitch_load_adj_vf_vports(struct mlx5_eswitch *esw, + enum mlx5_eswitch_vport_event enabled_events) +{ + struct mlx5_vport *vport; + unsigned long i; + int err; + + mlx5_esw_for_each_vf_vport(esw, i, vport, U16_MAX) { + if (!vport->adjacent) + continue; + err = mlx5_eswitch_load_pf_vf_vport(esw, vport->vport, + enabled_events); + if (err) + goto unload_adj_vf_vport; + } + + return 0; + +unload_adj_vf_vport: + mlx5_eswitch_unload_adj_vf_vports(esw); + return err; +} + int mlx5_eswitch_load_vf_vports(struct mlx5_eswitch *esw, u16 num_vfs, enum mlx5_eswitch_vport_event enabled_events) { @@ -1278,17 +1347,19 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, esw->mode == MLX5_ESWITCH_LEGACY; /* Enable PF vport */ - if (pf_needed) { + if (pf_needed && mlx5_esw_host_functions_enabled(esw->dev)) { ret = mlx5_eswitch_load_pf_vf_vport(esw, MLX5_VPORT_PF, enabled_events); if (ret) return ret; } - /* Enable external host PF HCA */ - ret = host_pf_enable_hca(esw->dev); - if (ret) - goto pf_hca_err; + if (mlx5_esw_host_functions_enabled(esw->dev)) { + /* Enable external host PF HCA */ + ret = host_pf_enable_hca(esw->dev); + if (ret) + goto pf_hca_err; + } /* Enable ECPF vport */ if (mlx5_ecpf_vport_exists(esw->dev)) { @@ -1311,8 +1382,16 @@ mlx5_eswitch_enable_pf_vf_vports(struct mlx5_eswitch *esw, enabled_events); if (ret) goto vf_err; + + /* Enable adjacent VF vports */ + ret = mlx5_eswitch_load_adj_vf_vports(esw, enabled_events); + if (ret) + goto unload_vf_vports; + return 0; +unload_vf_vports: + mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); vf_err: if (mlx5_core_ec_sriov_enabled(esw->dev)) mlx5_eswitch_unload_ec_vf_vports(esw, esw->esw_funcs.num_ec_vfs); @@ -1320,9 +1399,10 @@ ec_vf_err: if (mlx5_ecpf_vport_exists(esw->dev)) mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_ECPF); ecpf_err: - host_pf_disable_hca(esw->dev); + if (mlx5_esw_host_functions_enabled(esw->dev)) + host_pf_disable_hca(esw->dev); pf_hca_err: - if (pf_needed) + if (pf_needed && mlx5_esw_host_functions_enabled(esw->dev)) mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF); return ret; } @@ -1332,6 +1412,8 @@ pf_hca_err: */ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw) { + mlx5_eswitch_unload_adj_vf_vports(esw); + mlx5_eswitch_unload_vf_vports(esw, esw->esw_funcs.num_vfs); if (mlx5_core_ec_sriov_enabled(esw->dev)) @@ -1342,10 +1424,12 @@ void mlx5_eswitch_disable_pf_vf_vports(struct mlx5_eswitch *esw) mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_ECPF); } - host_pf_disable_hca(esw->dev); + if (mlx5_esw_host_functions_enabled(esw->dev)) + host_pf_disable_hca(esw->dev); - if (mlx5_core_is_ecpf_esw_manager(esw->dev) || - esw->mode == MLX5_ESWITCH_LEGACY) + if ((mlx5_core_is_ecpf_esw_manager(esw->dev) || + esw->mode == MLX5_ESWITCH_LEGACY) && + mlx5_esw_host_functions_enabled(esw->dev)) mlx5_eswitch_unload_pf_vf_vport(esw, MLX5_VPORT_PF); } @@ -1402,19 +1486,76 @@ static void mlx5_esw_mode_change_notify(struct mlx5_eswitch *esw, u16 mode) blocking_notifier_call_chain(&esw->n_head, 0, &info); } +static int mlx5_esw_egress_acls_init(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + int total_vports = mlx5_eswitch_get_total_vports(dev); + int err; + int i; + + for (i = 0; i < total_vports; i++) { + err = mlx5_fs_vport_egress_acl_ns_add(steering, i); + if (err) + goto acl_ns_remove; + } + return 0; + +acl_ns_remove: + while (i--) + mlx5_fs_vport_egress_acl_ns_remove(steering, i); + return err; +} + +static void mlx5_esw_egress_acls_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + int total_vports = mlx5_eswitch_get_total_vports(dev); + int i; + + for (i = total_vports - 1; i >= 0; i--) + mlx5_fs_vport_egress_acl_ns_remove(steering, i); +} + +static int mlx5_esw_ingress_acls_init(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + int total_vports = mlx5_eswitch_get_total_vports(dev); + int err; + int i; + + for (i = 0; i < total_vports; i++) { + err = mlx5_fs_vport_ingress_acl_ns_add(steering, i); + if (err) + goto acl_ns_remove; + } + return 0; + +acl_ns_remove: + while (i--) + mlx5_fs_vport_ingress_acl_ns_remove(steering, i); + return err; +} + +static void mlx5_esw_ingress_acls_cleanup(struct mlx5_core_dev *dev) +{ + struct mlx5_flow_steering *steering = dev->priv.steering; + int total_vports = mlx5_eswitch_get_total_vports(dev); + int i; + + for (i = total_vports - 1; i >= 0; i--) + mlx5_fs_vport_ingress_acl_ns_remove(steering, i); +} + static int mlx5_esw_acls_ns_init(struct mlx5_eswitch *esw) { struct mlx5_core_dev *dev = esw->dev; - int total_vports; int err; if (esw->flags & MLX5_ESWITCH_VPORT_ACL_NS_CREATED) return 0; - total_vports = mlx5_eswitch_get_total_vports(dev); - if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) { - err = mlx5_fs_egress_acls_init(dev, total_vports); + err = mlx5_esw_egress_acls_init(dev); if (err) return err; } else { @@ -1422,7 +1563,7 @@ static int mlx5_esw_acls_ns_init(struct mlx5_eswitch *esw) } if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) { - err = mlx5_fs_ingress_acls_init(dev, total_vports); + err = mlx5_esw_ingress_acls_init(dev); if (err) goto err; } else { @@ -1433,7 +1574,7 @@ static int mlx5_esw_acls_ns_init(struct mlx5_eswitch *esw) err: if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) - mlx5_fs_egress_acls_cleanup(dev); + mlx5_esw_egress_acls_cleanup(dev); return err; } @@ -1443,9 +1584,9 @@ static void mlx5_esw_acls_ns_cleanup(struct mlx5_eswitch *esw) esw->flags &= ~MLX5_ESWITCH_VPORT_ACL_NS_CREATED; if (MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) - mlx5_fs_ingress_acls_cleanup(dev); + mlx5_esw_ingress_acls_cleanup(dev); if (MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support)) - mlx5_fs_egress_acls_cleanup(dev); + mlx5_esw_egress_acls_cleanup(dev); } /** @@ -1674,7 +1815,8 @@ int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 * void *hca_caps; int err; - if (!mlx5_core_is_ecpf(dev)) { + if (!mlx5_core_is_ecpf(dev) || + !mlx5_esw_host_functions_enabled(dev)) { *max_sfs = 0; return 0; } @@ -1696,8 +1838,7 @@ out_free: return err; } -static int mlx5_esw_vport_alloc(struct mlx5_eswitch *esw, - int index, u16 vport_num) +int mlx5_esw_vport_alloc(struct mlx5_eswitch *esw, int index, u16 vport_num) { struct mlx5_vport *vport; int err; @@ -1710,6 +1851,7 @@ static int mlx5_esw_vport_alloc(struct mlx5_eswitch *esw, vport->vport = vport_num; vport->index = index; vport->info.link_state = MLX5_VPORT_ADMIN_STATE_AUTO; + vport->vhca_id = MLX5_VHCA_ID_INVALID; INIT_WORK(&vport->vport_change_handler, esw_vport_change_handler); err = xa_insert(&esw->vports, vport_num, vport, GFP_KERNEL); if (err) @@ -1723,8 +1865,9 @@ insert_err: return err; } -static void mlx5_esw_vport_free(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +void mlx5_esw_vport_free(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { + esw->total_vports--; xa_erase(&esw->vports, vport->vport); kfree(vport); } @@ -1750,21 +1893,23 @@ static int mlx5_esw_vports_init(struct mlx5_eswitch *esw) xa_init(&esw->vports); - err = mlx5_esw_vport_alloc(esw, idx, MLX5_VPORT_PF); - if (err) - goto err; - if (esw->first_host_vport == MLX5_VPORT_PF) - xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_HOST_FN); - idx++; - - for (i = 0; i < mlx5_core_max_vfs(dev); i++) { - err = mlx5_esw_vport_alloc(esw, idx, idx); + if (mlx5_esw_host_functions_enabled(dev)) { + err = mlx5_esw_vport_alloc(esw, idx, MLX5_VPORT_PF); if (err) goto err; - xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_VF); - xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_HOST_FN); + if (esw->first_host_vport == MLX5_VPORT_PF) + xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_HOST_FN); idx++; + for (i = 0; i < mlx5_core_max_vfs(dev); i++) { + err = mlx5_esw_vport_alloc(esw, idx, idx); + if (err) + goto err; + xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_VF); + xa_set_mark(&esw->vports, idx, MLX5_ESW_VPT_HOST_FN); + idx++; + } } + base_sf_num = mlx5_sf_start_function_id(dev); for (i = 0; i < mlx5_sf_max_functions(dev); i++) { err = mlx5_esw_vport_alloc(esw, idx, base_sf_num + i); @@ -1806,6 +1951,9 @@ static int mlx5_esw_vports_init(struct mlx5_eswitch *esw) err = mlx5_esw_vport_alloc(esw, idx, MLX5_VPORT_UPLINK); if (err) goto err; + + /* Adjacent vports or other dynamically create vports will use this */ + esw->last_vport_idx = ++idx; return 0; err: @@ -1864,6 +2012,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) goto free_esw; esw->dev = dev; + dev->priv.eswitch = esw; esw->manager_vport = mlx5_eswitch_manager_vport(dev); esw->first_host_vport = mlx5_eswitch_first_host_vport_num(dev); @@ -1874,11 +2023,14 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) goto abort; } + err = mlx5_esw_host_functions_enabled_query(esw); + if (err) + goto abort; + err = mlx5_esw_vports_init(esw); if (err) goto abort; - dev->priv.eswitch = esw; err = esw_offloads_init(esw); if (err) goto reps_err; @@ -2410,3 +2562,11 @@ void mlx5_eswitch_unblock_ipsec(struct mlx5_core_dev *dev) dev->num_ipsec_offloads--; mutex_unlock(&esw->state_lock); } + +bool mlx5_esw_host_functions_enabled(const struct mlx5_core_dev *dev) +{ + if (!dev->priv.eswitch) + return true; + + return !dev->priv.eswitch->esw_funcs.host_funcs_disabled; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index 45506ad56847..df3756d7e52e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -197,6 +197,11 @@ static inline struct mlx5_vport *mlx5_devlink_port_vport_get(struct devlink_port return mlx5_devlink_port_get(dl_port)->vport; } +#define MLX5_VHCA_ID_INVALID (-1) + +#define MLX5_VPORT_INVAL_VHCA_ID(vport) \ + ((vport)->vhca_id == MLX5_VHCA_ID_INVALID) + struct mlx5_vport { struct mlx5_core_dev *dev; struct hlist_head uc_list[MLX5_L2_ADDR_HASH_SIZE]; @@ -209,6 +214,13 @@ struct mlx5_vport { struct vport_egress egress; u32 default_metadata; u32 metadata; + int vhca_id; + + bool adjacent; /* delegated vhca from adjacent function */ + struct { + u16 parent_pci_devfn; /* Adjacent parent PCI device function */ + u16 function_id; /* Function ID of the delegated VPort */ + } adj_info; struct mlx5_vport_info info; @@ -323,6 +335,7 @@ struct mlx5_host_work { struct mlx5_esw_functions { struct mlx5_nb nb; + bool host_funcs_disabled; u16 num_vfs; u16 num_ec_vfs; }; @@ -377,6 +390,7 @@ struct mlx5_eswitch { struct mlx5_esw_bridge_offloads *br_offloads; struct mlx5_esw_offload offloads; + u32 last_vport_idx; int mode; u16 manager_vport; u16 first_host_vport; @@ -410,6 +424,8 @@ int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); +int mlx5_esw_vport_alloc(struct mlx5_eswitch *esw, int index, u16 vport_num); +void mlx5_esw_vport_free(struct mlx5_eswitch *esw, struct mlx5_vport *vport); #define MLX5_ESWITCH_IGNORE_NUM_VFS (-1) int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs); @@ -417,7 +433,8 @@ int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs); void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf); void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw); void mlx5_eswitch_disable(struct mlx5_eswitch *esw); -void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw, u64 key); +void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw, + const struct mlx5_devcom_match_attr *attr); void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw); bool mlx5_esw_offloads_devcom_is_ready(struct mlx5_eswitch *esw); int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, @@ -615,6 +632,9 @@ bool mlx5_esw_multipath_prereq(struct mlx5_core_dev *dev0, const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev); +void mlx5_esw_adjacent_vhcas_setup(struct mlx5_eswitch *esw); +void mlx5_esw_adjacent_vhcas_cleanup(struct mlx5_eswitch *esw); + #define MLX5_DEBUG_ESWITCH_MASK BIT(3) #define esw_info(__dev, format, ...) \ @@ -817,9 +837,17 @@ struct devlink_port *mlx5_esw_offloads_devlink_port(struct mlx5_eswitch *esw, u1 int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 *sf_base_id); -int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num); -void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num); +int mlx5_esw_vport_vhca_id_map(struct mlx5_eswitch *esw, + struct mlx5_vport *vport); +void mlx5_esw_vport_vhca_id_unmap(struct mlx5_eswitch *esw, + struct mlx5_vport *vport); int mlx5_eswitch_vhca_id_to_vport(struct mlx5_eswitch *esw, u16 vhca_id, u16 *vport_num); +bool mlx5_esw_vport_vhca_id(struct mlx5_eswitch *esw, u16 vportn, u16 *vhca_id); + +void mlx5_esw_offloads_rep_remove(struct mlx5_eswitch *esw, + const struct mlx5_vport *vport); +int mlx5_esw_offloads_rep_add(struct mlx5_eswitch *esw, + const struct mlx5_vport *vport); /** * struct mlx5_esw_event_info - Indicates eswitch mode changed/changing. @@ -893,6 +921,7 @@ int mlx5_esw_ipsec_vf_packet_offload_set(struct mlx5_eswitch *esw, struct mlx5_v bool enable); int mlx5_esw_ipsec_vf_packet_offload_supported(struct mlx5_core_dev *dev, u16 vport_num); +bool mlx5_esw_host_functions_enabled(const struct mlx5_core_dev *dev); #else /* CONFIG_MLX5_ESWITCH */ /* eswitch API stubs */ static inline int mlx5_eswitch_init(struct mlx5_core_dev *dev) { return 0; } @@ -900,7 +929,9 @@ static inline void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) {} static inline int mlx5_eswitch_enable(struct mlx5_eswitch *esw, int num_vfs) { return 0; } static inline void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw, bool clear_vf) {} static inline void mlx5_eswitch_disable(struct mlx5_eswitch *esw) {} -static inline void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw, u64 key) {} +static inline void +mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw, + const struct mlx5_devcom_match_attr *attr) {} static inline void mlx5_esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw) {} static inline bool mlx5_esw_offloads_devcom_is_ready(struct mlx5_eswitch *esw) { return false; } static inline bool mlx5_eswitch_is_funcs_handler(struct mlx5_core_dev *dev) { return false; } @@ -960,6 +991,19 @@ static inline bool mlx5_eswitch_block_ipsec(struct mlx5_core_dev *dev) } static inline void mlx5_eswitch_unblock_ipsec(struct mlx5_core_dev *dev) {} + +static inline bool +mlx5_esw_host_functions_enabled(const struct mlx5_core_dev *dev) +{ + return true; +} + +static inline bool +mlx5_esw_vport_vhca_id(struct mlx5_eswitch *esw, u16 vportn, u16 *vhca_id) +{ + return -EOPNOTSUPP; +} + #endif /* CONFIG_MLX5_ESWITCH */ #endif /* __MLX5_ESWITCH_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index bee906661282..52c3de24bea3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -1016,8 +1016,8 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *on_esw, flow_rule = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(on_esw), spec, &flow_act, &dest, 1); if (IS_ERR(flow_rule)) - esw_warn(on_esw->dev, "FDB: Failed to add send to vport rule err %ld\n", - PTR_ERR(flow_rule)); + esw_warn(on_esw->dev, "FDB: Failed to add send to vport rule err %pe\n", + flow_rule); out: kvfree(spec); return flow_rule; @@ -1065,8 +1065,8 @@ mlx5_eswitch_add_send_to_vport_meta_rule(struct mlx5_eswitch *esw, u16 vport_num flow_rule = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), spec, &flow_act, &dest, 1); if (IS_ERR(flow_rule)) - esw_warn(esw->dev, "FDB: Failed to add send to vport meta rule vport %d, err %ld\n", - vport_num, PTR_ERR(flow_rule)); + esw_warn(esw->dev, "FDB: Failed to add send to vport meta rule vport %d, err %pe\n", + vport_num, flow_rule); kvfree(spec); return flow_rule; @@ -1213,7 +1213,8 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - if (mlx5_core_is_ecpf_esw_manager(peer_dev)) { + if (mlx5_core_is_ecpf_esw_manager(peer_dev) && + mlx5_esw_host_functions_enabled(peer_dev)) { peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_PF); esw_set_peer_miss_rule_source_port(esw, peer_esw, spec, MLX5_VPORT_PF); @@ -1239,19 +1240,21 @@ static int esw_add_fdb_peer_miss_rules(struct mlx5_eswitch *esw, flows[peer_vport->index] = flow; } - mlx5_esw_for_each_vf_vport(peer_esw, i, peer_vport, - mlx5_core_max_vfs(peer_dev)) { - esw_set_peer_miss_rule_source_port(esw, - peer_esw, - spec, peer_vport->vport); + if (mlx5_esw_host_functions_enabled(esw->dev)) { + mlx5_esw_for_each_vf_vport(peer_esw, i, peer_vport, + mlx5_core_max_vfs(peer_dev)) { + esw_set_peer_miss_rule_source_port(esw, peer_esw, + spec, + peer_vport->vport); - flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), - spec, &flow_act, &dest, 1); - if (IS_ERR(flow)) { - err = PTR_ERR(flow); - goto add_vf_flow_err; + flow = mlx5_add_flow_rules(mlx5_eswitch_get_slow_fdb(esw), + spec, &flow_act, &dest, 1); + if (IS_ERR(flow)) { + err = PTR_ERR(flow); + goto add_vf_flow_err; + } + flows[peer_vport->index] = flow; } - flows[peer_vport->index] = flow; } if (mlx5_core_ec_sriov_enabled(peer_dev)) { @@ -1301,7 +1304,9 @@ add_vf_flow_err: mlx5_del_flow_rules(flows[peer_vport->index]); } add_ecpf_flow_err: - if (mlx5_core_is_ecpf_esw_manager(peer_dev)) { + + if (mlx5_core_is_ecpf_esw_manager(peer_dev) && + mlx5_esw_host_functions_enabled(peer_dev)) { peer_vport = mlx5_eswitch_get_vport(peer_esw, MLX5_VPORT_PF); mlx5_del_flow_rules(flows[peer_vport->index]); } @@ -2154,7 +2159,9 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, u16 vport, flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec, &flow_act, dest, 1); if (IS_ERR(flow_rule)) { - esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule err %ld\n", PTR_ERR(flow_rule)); + esw_warn(esw->dev, + "fs offloads: Failed to add vport rx rule err %pe\n", + flow_rule); goto out; } @@ -2173,8 +2180,8 @@ static int esw_create_vport_rx_drop_rule(struct mlx5_eswitch *esw) &flow_act, NULL, 0); if (IS_ERR(flow_rule)) { esw_warn(esw->dev, - "fs offloads: Failed to add vport rx drop rule err %ld\n", - PTR_ERR(flow_rule)); + "fs offloads: Failed to add vport rx drop rule err %pe\n", + flow_rule); return PTR_ERR(flow_rule); } @@ -2373,7 +2380,20 @@ static int esw_offloads_start(struct mlx5_eswitch *esw, return 0; } -static int mlx5_esw_offloads_rep_init(struct mlx5_eswitch *esw, const struct mlx5_vport *vport) +void mlx5_esw_offloads_rep_remove(struct mlx5_eswitch *esw, + const struct mlx5_vport *vport) +{ + struct mlx5_eswitch_rep *rep = xa_load(&esw->offloads.vport_reps, + vport->vport); + + if (!rep) + return; + xa_erase(&esw->offloads.vport_reps, vport->vport); + kfree(rep); +} + +int mlx5_esw_offloads_rep_add(struct mlx5_eswitch *esw, + const struct mlx5_vport *vport) { struct mlx5_eswitch_rep *rep; int rep_type; @@ -2385,9 +2405,19 @@ static int mlx5_esw_offloads_rep_init(struct mlx5_eswitch *esw, const struct mlx rep->vport = vport->vport; rep->vport_index = vport->index; - for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) - atomic_set(&rep->rep_data[rep_type].state, REP_UNREGISTERED); - + for (rep_type = 0; rep_type < NUM_REP_TYPES; rep_type++) { + if (!esw->offloads.rep_ops[rep_type]) { + atomic_set(&rep->rep_data[rep_type].state, + REP_UNREGISTERED); + continue; + } + /* Dynamic/delegated vports add their representors after + * mlx5_eswitch_register_vport_reps, so mark them as registered + * for them to be loaded later with the others. + */ + rep->esw = esw; + atomic_set(&rep->rep_data[rep_type].state, REP_REGISTERED); + } err = xa_insert(&esw->offloads.vport_reps, rep->vport, rep, GFP_KERNEL); if (err) goto insert_err; @@ -2425,7 +2455,7 @@ static int esw_offloads_init_reps(struct mlx5_eswitch *esw) xa_init(&esw->offloads.vport_reps); mlx5_esw_for_each_vport(esw, i, vport) { - err = mlx5_esw_offloads_rep_init(esw, vport); + err = mlx5_esw_offloads_rep_add(esw, vport); if (err) goto err; } @@ -3076,7 +3106,8 @@ err_out: return err; } -void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw, u64 key) +void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw, + const struct mlx5_devcom_match_attr *attr) { int i; @@ -3095,7 +3126,7 @@ void mlx5_esw_offloads_devcom_init(struct mlx5_eswitch *esw, u64 key) esw->num_peers = 0; esw->devcom = mlx5_devcom_register_component(esw->dev->priv.devc, MLX5_DEVCOM_ESW_OFFLOADS, - key, + attr, mlx5_esw_offloads_devcom_event, esw); if (IS_ERR(esw->devcom)) @@ -3533,6 +3564,8 @@ int esw_offloads_enable(struct mlx5_eswitch *esw) int err; mutex_init(&esw->offloads.termtbl_mutex); + mlx5_esw_adjacent_vhcas_setup(esw); + err = mlx5_rdma_enable_roce(esw->dev); if (err) goto err_roce; @@ -3597,6 +3630,7 @@ err_vport_metadata: err_metadata: mlx5_rdma_disable_roce(esw->dev); err_roce: + mlx5_esw_adjacent_vhcas_cleanup(esw); mutex_destroy(&esw->offloads.termtbl_mutex); return err; } @@ -3630,6 +3664,7 @@ void esw_offloads_disable(struct mlx5_eswitch *esw) mapping_destroy(esw->offloads.reg_c0_obj_pool); esw_offloads_metadata_uninit(esw); mlx5_rdma_disable_roce(esw->dev); + mlx5_esw_adjacent_vhcas_cleanup(esw); mutex_destroy(&esw->offloads.termtbl_mutex); } @@ -3739,6 +3774,29 @@ void mlx5_eswitch_unblock_mode(struct mlx5_core_dev *dev) up_write(&esw->mode_lock); } +/* Returns false only when uplink netdev exists and its netns is different from + * devlink's netns. True for all others so entering switchdev mode is allowed. + */ +static bool mlx5_devlink_netdev_netns_immutable_set(struct devlink *devlink, + bool immutable) +{ + struct mlx5_core_dev *mdev = devlink_priv(devlink); + struct net_device *netdev; + bool ret; + + netdev = mlx5_uplink_netdev_get(mdev); + if (!netdev) + return true; + + rtnl_lock(); + netdev->netns_immutable = immutable; + ret = net_eq(dev_net(netdev), devlink_net(devlink)); + rtnl_unlock(); + + mlx5_uplink_netdev_put(mdev, netdev); + return ret; +} + int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, struct netlink_ext_ack *extack) { @@ -3781,6 +3839,14 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, esw->eswitch_operation_in_progress = true; up_write(&esw->mode_lock); + if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && + !mlx5_devlink_netdev_netns_immutable_set(devlink, true)) { + NL_SET_ERR_MSG_MOD(extack, + "Can't change E-Switch mode to switchdev when netdev net namespace has diverged from the devlink's."); + err = -EINVAL; + goto skip; + } + if (mode == DEVLINK_ESWITCH_MODE_LEGACY) esw->dev->priv.flags |= MLX5_PRIV_FLAGS_SWITCH_LEGACY; mlx5_eswitch_disable_locked(esw); @@ -3799,6 +3865,8 @@ int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode, } skip: + if (mode == DEVLINK_ESWITCH_MODE_SWITCHDEV && err) + mlx5_devlink_netdev_netns_immutable_set(devlink, false); down_write(&esw->mode_lock); esw->eswitch_operation_in_progress = false; unlock: @@ -4059,7 +4127,8 @@ mlx5_eswitch_vport_has_rep(const struct mlx5_eswitch *esw, u16 vport_num) { /* Currently, only ECPF based device has representor for host PF. */ if (vport_num == MLX5_VPORT_PF && - !mlx5_core_is_ecpf_esw_manager(esw->dev)) + (!mlx5_core_is_ecpf_esw_manager(esw->dev) || + !mlx5_esw_host_functions_enabled(esw->dev))) return false; if (vport_num == MLX5_VPORT_ECPF && @@ -4161,23 +4230,28 @@ u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, } EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match); -int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num) +int mlx5_esw_vport_vhca_id_map(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { u16 *old_entry, *vhca_map_entry, vhca_id; - int err; - err = mlx5_vport_get_vhca_id(esw->dev, vport_num, &vhca_id); - if (err) { - esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%u,err=%d)\n", - vport_num, err); - return err; + if (WARN_ONCE(MLX5_VPORT_INVAL_VHCA_ID(vport), + "vport %d vhca_id is not set", vport->vport)) { + int err; + + err = mlx5_vport_get_vhca_id(vport->dev, vport->vport, + &vhca_id); + if (err) + return err; + vport->vhca_id = vhca_id; } + vhca_id = vport->vhca_id; vhca_map_entry = kmalloc(sizeof(*vhca_map_entry), GFP_KERNEL); if (!vhca_map_entry) return -ENOMEM; - *vhca_map_entry = vport_num; + *vhca_map_entry = vport->vport; old_entry = xa_store(&esw->offloads.vhca_map, vhca_id, vhca_map_entry, GFP_KERNEL); if (xa_is_err(old_entry)) { kfree(vhca_map_entry); @@ -4187,17 +4261,12 @@ int mlx5_esw_vport_vhca_id_set(struct mlx5_eswitch *esw, u16 vport_num) return 0; } -void mlx5_esw_vport_vhca_id_clear(struct mlx5_eswitch *esw, u16 vport_num) +void mlx5_esw_vport_vhca_id_unmap(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) { - u16 *vhca_map_entry, vhca_id; - int err; - - err = mlx5_vport_get_vhca_id(esw->dev, vport_num, &vhca_id); - if (err) - esw_warn(esw->dev, "Getting vhca_id for vport failed (vport=%hu,err=%d)\n", - vport_num, err); + u16 *vhca_map_entry; - vhca_map_entry = xa_erase(&esw->offloads.vhca_map, vhca_id); + vhca_map_entry = xa_erase(&esw->offloads.vhca_map, vport->vhca_id); kfree(vhca_map_entry); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c index c4de6bf8d1b6..cb1319974f83 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -475,7 +475,6 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size) *conn->cq.mcq.arm_db = 0; conn->cq.mcq.vector = 0; conn->cq.mcq.comp = mlx5_fpga_conn_cq_complete; - conn->cq.mcq.uar = fdev->conn_res.uar; tasklet_setup(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet); mlx5_fpga_dbg(fdev, "Created CQ #0x%x\n", conn->cq.mcq.cqn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index d87392360dbd..2db3ffb0a2b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -114,9 +114,9 @@ #define ETHTOOL_NUM_PRIOS 11 #define ETHTOOL_MIN_LEVEL (KERNEL_MIN_LEVEL + ETHTOOL_NUM_PRIOS) /* Vlan, mac, ttc, inner ttc, {UDP/ANY/aRFS/accel/{esp, esp_err}}, IPsec policy, - * {IPsec RoCE MPV,Alias table},IPsec RoCE policy + * IPsec policy miss, {IPsec RoCE MPV,Alias table},IPsec RoCE policy */ -#define KERNEL_NIC_PRIO_NUM_LEVELS 10 +#define KERNEL_NIC_PRIO_NUM_LEVELS 11 #define KERNEL_NIC_NUM_PRIOS 1 /* One more level for tc, and one more for promisc */ #define KERNEL_MIN_LEVEL (KERNEL_NIC_PRIO_NUM_LEVELS + 2) @@ -663,7 +663,7 @@ static void del_sw_hw_rule(struct fs_node *node) BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) | BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS); fte->act_dests.action.action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT; - mlx5_fc_local_destroy(rule->dest_attr.counter); + mlx5_fc_local_put(rule->dest_attr.counter); goto out; } @@ -2793,30 +2793,32 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, } EXPORT_SYMBOL(mlx5_get_flow_namespace); +struct mlx5_vport_acl_root_ns { + u16 vport_idx; + struct mlx5_flow_root_namespace *root_ns; +}; + struct mlx5_flow_namespace * mlx5_get_flow_vport_namespace(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type, int vport_idx) { struct mlx5_flow_steering *steering = dev->priv.steering; + struct mlx5_vport_acl_root_ns *vport_ns; if (!steering) return NULL; switch (type) { case MLX5_FLOW_NAMESPACE_ESW_EGRESS: - if (vport_idx >= steering->esw_egress_acl_vports) - return NULL; - if (steering->esw_egress_root_ns && - steering->esw_egress_root_ns[vport_idx]) - return &steering->esw_egress_root_ns[vport_idx]->ns; + vport_ns = xa_load(&steering->esw_egress_root_ns, vport_idx); + if (vport_ns) + return &vport_ns->root_ns->ns; else return NULL; case MLX5_FLOW_NAMESPACE_ESW_INGRESS: - if (vport_idx >= steering->esw_ingress_acl_vports) - return NULL; - if (steering->esw_ingress_root_ns && - steering->esw_ingress_root_ns[vport_idx]) - return &steering->esw_ingress_root_ns[vport_idx]->ns; + vport_ns = xa_load(&steering->esw_ingress_root_ns, vport_idx); + if (vport_ns) + return &vport_ns->root_ns->ns; else return NULL; case MLX5_FLOW_NAMESPACE_RDMA_TRANSPORT_RX: @@ -3575,118 +3577,102 @@ out_err: return err; } -static int init_egress_acl_root_ns(struct mlx5_flow_steering *steering, int vport) +static void +mlx5_fs_remove_vport_acl_root_ns(struct xarray *esw_acl_root_ns, u16 vport_idx) { - struct fs_prio *prio; - - steering->esw_egress_root_ns[vport] = create_root_ns(steering, FS_FT_ESW_EGRESS_ACL); - if (!steering->esw_egress_root_ns[vport]) - return -ENOMEM; + struct mlx5_vport_acl_root_ns *vport_ns; - /* create 1 prio*/ - prio = fs_create_prio(&steering->esw_egress_root_ns[vport]->ns, 0, 1); - return PTR_ERR_OR_ZERO(prio); + vport_ns = xa_erase(esw_acl_root_ns, vport_idx); + if (vport_ns) { + cleanup_root_ns(vport_ns->root_ns); + kfree(vport_ns); + } } -static int init_ingress_acl_root_ns(struct mlx5_flow_steering *steering, int vport) +static int +mlx5_fs_add_vport_acl_root_ns(struct mlx5_flow_steering *steering, + struct xarray *esw_acl_root_ns, + enum fs_flow_table_type table_type, + u16 vport_idx) { + struct mlx5_vport_acl_root_ns *vport_ns; struct fs_prio *prio; + int err; - steering->esw_ingress_root_ns[vport] = create_root_ns(steering, FS_FT_ESW_INGRESS_ACL); - if (!steering->esw_ingress_root_ns[vport]) - return -ENOMEM; + /* sanity check, intended xarrays are used */ + if (WARN_ON(esw_acl_root_ns != &steering->esw_egress_root_ns && + esw_acl_root_ns != &steering->esw_ingress_root_ns)) + return -EINVAL; - /* create 1 prio*/ - prio = fs_create_prio(&steering->esw_ingress_root_ns[vport]->ns, 0, 1); - return PTR_ERR_OR_ZERO(prio); -} + if (table_type != FS_FT_ESW_EGRESS_ACL && + table_type != FS_FT_ESW_INGRESS_ACL) { + mlx5_core_err(steering->dev, + "Invalid table type %d for egress/ingress ACLs\n", + table_type); + return -EINVAL; + } -int mlx5_fs_egress_acls_init(struct mlx5_core_dev *dev, int total_vports) -{ - struct mlx5_flow_steering *steering = dev->priv.steering; - int err; - int i; + if (xa_load(esw_acl_root_ns, vport_idx)) + return -EEXIST; - steering->esw_egress_root_ns = - kcalloc(total_vports, - sizeof(*steering->esw_egress_root_ns), - GFP_KERNEL); - if (!steering->esw_egress_root_ns) + vport_ns = kzalloc(sizeof(*vport_ns), GFP_KERNEL); + if (!vport_ns) return -ENOMEM; - for (i = 0; i < total_vports; i++) { - err = init_egress_acl_root_ns(steering, i); - if (err) - goto cleanup_root_ns; + vport_ns->root_ns = create_root_ns(steering, table_type); + if (!vport_ns->root_ns) { + err = -ENOMEM; + goto kfree_vport_ns; } - steering->esw_egress_acl_vports = total_vports; + + /* create 1 prio*/ + prio = fs_create_prio(&vport_ns->root_ns->ns, 0, 1); + if (IS_ERR(prio)) { + err = PTR_ERR(prio); + goto cleanup_root_ns; + } + + vport_ns->vport_idx = vport_idx; + err = xa_insert(esw_acl_root_ns, vport_idx, vport_ns, GFP_KERNEL); + if (err) + goto cleanup_root_ns; return 0; cleanup_root_ns: - for (i--; i >= 0; i--) - cleanup_root_ns(steering->esw_egress_root_ns[i]); - kfree(steering->esw_egress_root_ns); - steering->esw_egress_root_ns = NULL; + cleanup_root_ns(vport_ns->root_ns); +kfree_vport_ns: + kfree(vport_ns); return err; } -void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev) +int mlx5_fs_vport_egress_acl_ns_add(struct mlx5_flow_steering *steering, + u16 vport_idx) { - struct mlx5_flow_steering *steering = dev->priv.steering; - int i; - - if (!steering->esw_egress_root_ns) - return; - - for (i = 0; i < steering->esw_egress_acl_vports; i++) - cleanup_root_ns(steering->esw_egress_root_ns[i]); - - kfree(steering->esw_egress_root_ns); - steering->esw_egress_root_ns = NULL; + return mlx5_fs_add_vport_acl_root_ns(steering, + &steering->esw_egress_root_ns, + FS_FT_ESW_EGRESS_ACL, vport_idx); } -int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports) +int mlx5_fs_vport_ingress_acl_ns_add(struct mlx5_flow_steering *steering, + u16 vport_idx) { - struct mlx5_flow_steering *steering = dev->priv.steering; - int err; - int i; - - steering->esw_ingress_root_ns = - kcalloc(total_vports, - sizeof(*steering->esw_ingress_root_ns), - GFP_KERNEL); - if (!steering->esw_ingress_root_ns) - return -ENOMEM; - - for (i = 0; i < total_vports; i++) { - err = init_ingress_acl_root_ns(steering, i); - if (err) - goto cleanup_root_ns; - } - steering->esw_ingress_acl_vports = total_vports; - return 0; - -cleanup_root_ns: - for (i--; i >= 0; i--) - cleanup_root_ns(steering->esw_ingress_root_ns[i]); - kfree(steering->esw_ingress_root_ns); - steering->esw_ingress_root_ns = NULL; - return err; + return mlx5_fs_add_vport_acl_root_ns(steering, + &steering->esw_ingress_root_ns, + FS_FT_ESW_INGRESS_ACL, vport_idx); } -void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev) +void mlx5_fs_vport_egress_acl_ns_remove(struct mlx5_flow_steering *steering, + int vport_idx) { - struct mlx5_flow_steering *steering = dev->priv.steering; - int i; - - if (!steering->esw_ingress_root_ns) - return; - - for (i = 0; i < steering->esw_ingress_acl_vports; i++) - cleanup_root_ns(steering->esw_ingress_root_ns[i]); + mlx5_fs_remove_vport_acl_root_ns(&steering->esw_egress_root_ns, + vport_idx); +} - kfree(steering->esw_ingress_root_ns); - steering->esw_ingress_root_ns = NULL; +void mlx5_fs_vport_ingress_acl_ns_remove(struct mlx5_flow_steering *steering, + int vport_idx) +{ + mlx5_fs_remove_vport_acl_root_ns(&steering->esw_ingress_root_ns, + vport_idx); } u32 mlx5_fs_get_capabilities(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type) @@ -3734,6 +3720,13 @@ static int mlx5_fs_mode_validate(struct devlink *devlink, u32 id, char *value = val.vstr; u8 eswitch_mode; + eswitch_mode = mlx5_eswitch_mode(dev); + if (eswitch_mode == MLX5_ESWITCH_OFFLOADS) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Changing fs mode is not supported when eswitch offloads enabled."); + return -EOPNOTSUPP; + } + if (!strcmp(value, "dmfs")) return 0; @@ -3759,14 +3752,6 @@ static int mlx5_fs_mode_validate(struct devlink *devlink, u32 id, return -EINVAL; } - eswitch_mode = mlx5_eswitch_mode(dev); - if (eswitch_mode == MLX5_ESWITCH_OFFLOADS) { - NL_SET_ERR_MSG_FMT_MOD(extack, - "Moving to %s is not supported when eswitch offloads enabled.", - value); - return -EOPNOTSUPP; - } - return 0; } @@ -3819,6 +3804,11 @@ void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev) { struct mlx5_flow_steering *steering = dev->priv.steering; + WARN_ON(!xa_empty(&steering->esw_egress_root_ns)); + WARN_ON(!xa_empty(&steering->esw_ingress_root_ns)); + xa_destroy(&steering->esw_egress_root_ns); + xa_destroy(&steering->esw_ingress_root_ns); + cleanup_root_ns(steering->root_ns); cleanup_fdb_root_ns(steering); cleanup_root_ns(steering->port_sel_root_ns); @@ -3909,6 +3899,8 @@ int mlx5_fs_core_init(struct mlx5_core_dev *dev) goto err; } + xa_init(&steering->esw_egress_root_ns); + xa_init(&steering->esw_ingress_root_ns); return 0; err: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 500826229b0b..8458ce203dac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -151,16 +151,14 @@ struct mlx5_flow_steering { struct mlx5_flow_root_namespace *root_ns; struct mlx5_flow_root_namespace *fdb_root_ns; struct mlx5_flow_namespace **fdb_sub_ns; - struct mlx5_flow_root_namespace **esw_egress_root_ns; - struct mlx5_flow_root_namespace **esw_ingress_root_ns; + struct xarray esw_egress_root_ns; + struct xarray esw_ingress_root_ns; struct mlx5_flow_root_namespace *sniffer_tx_root_ns; struct mlx5_flow_root_namespace *sniffer_rx_root_ns; struct mlx5_flow_root_namespace *rdma_rx_root_ns; struct mlx5_flow_root_namespace *rdma_tx_root_ns; struct mlx5_flow_root_namespace *egress_root_ns; struct mlx5_flow_root_namespace *port_sel_root_ns; - int esw_egress_acl_vports; - int esw_ingress_acl_vports; struct mlx5_flow_root_namespace **rdma_transport_rx_root_ns; struct mlx5_flow_root_namespace **rdma_transport_tx_root_ns; int rdma_transport_rx_vports; @@ -343,6 +341,7 @@ struct mlx5_fc { enum mlx5_fc_type type; struct mlx5_fc_bulk *bulk; struct mlx5_fc_cache cache; + refcount_t fc_local_refcount; /* last{packets,bytes} are used for calculating deltas since last reading. */ u64 lastpackets; u64 lastbytes; @@ -378,10 +377,14 @@ void mlx5_fs_core_free(struct mlx5_core_dev *dev); int mlx5_fs_core_init(struct mlx5_core_dev *dev); void mlx5_fs_core_cleanup(struct mlx5_core_dev *dev); -int mlx5_fs_egress_acls_init(struct mlx5_core_dev *dev, int total_vports); -void mlx5_fs_egress_acls_cleanup(struct mlx5_core_dev *dev); -int mlx5_fs_ingress_acls_init(struct mlx5_core_dev *dev, int total_vports); -void mlx5_fs_ingress_acls_cleanup(struct mlx5_core_dev *dev); +int mlx5_fs_vport_egress_acl_ns_add(struct mlx5_flow_steering *steering, + u16 vport_idx); +int mlx5_fs_vport_ingress_acl_ns_add(struct mlx5_flow_steering *steering, + u16 vport_idx); +void mlx5_fs_vport_egress_acl_ns_remove(struct mlx5_flow_steering *steering, + int vport_idx); +void mlx5_fs_vport_ingress_acl_ns_remove(struct mlx5_flow_steering *steering, + int vport_idx); u32 mlx5_fs_get_capabilities(struct mlx5_core_dev *dev, enum mlx5_flow_namespace_type type); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c index 492775d3d193..83001eda3884 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_counters.c @@ -562,17 +562,36 @@ mlx5_fc_local_create(u32 counter_id, u32 offset, u32 bulk_size) counter->id = counter_id; fc_bulk->base_id = counter_id - offset; fc_bulk->fs_bulk.bulk_len = bulk_size; + refcount_set(&fc_bulk->hws_data.hws_action_refcount, 0); + mutex_init(&fc_bulk->hws_data.lock); counter->bulk = fc_bulk; + refcount_set(&counter->fc_local_refcount, 1); return counter; } EXPORT_SYMBOL(mlx5_fc_local_create); void mlx5_fc_local_destroy(struct mlx5_fc *counter) { - if (!counter || counter->type != MLX5_FC_TYPE_LOCAL) - return; - kfree(counter->bulk); kfree(counter); } EXPORT_SYMBOL(mlx5_fc_local_destroy); + +void mlx5_fc_local_get(struct mlx5_fc *counter) +{ + if (!counter || counter->type != MLX5_FC_TYPE_LOCAL) + return; + + refcount_inc(&counter->fc_local_refcount); +} + +void mlx5_fc_local_put(struct mlx5_fc *counter) +{ + if (!counter || counter->type != MLX5_FC_TYPE_LOCAL) + return; + + if (!refcount_dec_and_test(&counter->fc_local_refcount)) + return; + + mlx5_fc_local_destroy(counter); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 57476487e31f..eeb4437975f2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -294,6 +294,12 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) return err; } + if (MLX5_CAP_GEN(dev, psp)) { + err = mlx5_core_get_caps(dev, MLX5_CAP_PSP); + if (err) + return err; + } + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c index 69933addd921..89e399606877 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.c @@ -6,13 +6,15 @@ #include "fw_reset.h" #include "diag/fw_tracer.h" #include "lib/tout.h" +#include "sf/sf.h" enum { MLX5_FW_RESET_FLAGS_RESET_REQUESTED, MLX5_FW_RESET_FLAGS_NACK_RESET_REQUEST, MLX5_FW_RESET_FLAGS_PENDING_COMP, MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, - MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED + MLX5_FW_RESET_FLAGS_RELOAD_REQUIRED, + MLX5_FW_RESET_FLAGS_UNLOAD_EVENT, }; struct mlx5_fw_reset { @@ -25,6 +27,7 @@ struct mlx5_fw_reset { struct work_struct reset_reload_work; struct work_struct reset_now_work; struct work_struct reset_abort_work; + struct delayed_work reset_timeout_work; unsigned long reset_flags; u8 reset_method; struct timer_list timer; @@ -219,7 +222,7 @@ int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev) return mlx5_reg_mfrl_set(dev, MLX5_MFRL_REG_RESET_LEVEL0, 0, 0, false); } -static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev, bool unloaded) +static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev) { struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; struct devlink *devlink = priv_to_devlink(dev); @@ -228,8 +231,7 @@ static void mlx5_fw_reset_complete_reload(struct mlx5_core_dev *dev, bool unload if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) { complete(&fw_reset->done); } else { - if (!unloaded) - mlx5_unload_one(dev, false); + mlx5_sync_reset_unload_flow(dev, false); if (mlx5_health_wait_pci_up(dev)) mlx5_core_err(dev, "reset reload flow aborted, PCI reads still not working\n"); else @@ -258,6 +260,8 @@ static int mlx5_sync_reset_clear_reset_requested(struct mlx5_core_dev *dev, bool return -EALREADY; } + if (current_work() != &fw_reset->reset_timeout_work.work) + cancel_delayed_work(&fw_reset->reset_timeout_work); mlx5_stop_sync_reset_poll(dev); if (poll_health) mlx5_start_health_poll(dev); @@ -272,7 +276,7 @@ static void mlx5_sync_reset_reload_work(struct work_struct *work) mlx5_sync_reset_clear_reset_requested(dev, false); mlx5_enter_error_state(dev, true); - mlx5_fw_reset_complete_reload(dev, false); + mlx5_fw_reset_complete_reload(dev); } #define MLX5_RESET_POLL_INTERVAL (HZ / 10) @@ -329,6 +333,11 @@ static int mlx5_sync_reset_set_reset_requested(struct mlx5_core_dev *dev) } mlx5_stop_health_poll(dev, true); mlx5_start_sync_reset_poll(dev); + + if (!test_bit(MLX5_FW_RESET_FLAGS_DROP_NEW_REQUESTS, + &fw_reset->reset_flags)) + schedule_delayed_work(&fw_reset->reset_timeout_work, + msecs_to_jiffies(mlx5_tout_ms(dev, PCI_SYNC_UPDATE))); return 0; } @@ -428,6 +437,11 @@ static bool mlx5_is_reset_now_capable(struct mlx5_core_dev *dev, return false; } + if (!mlx5_core_is_ecpf(dev) && !mlx5_sf_table_empty(dev)) { + mlx5_core_warn(dev, "SFs should be removed before reset\n"); + return false; + } + #if IS_ENABLED(CONFIG_HOTPLUG_PCI_PCIE) if (reset_method != MLX5_MFRL_REG_PCI_RESET_METHOD_HOT_RESET) { err = mlx5_check_hotplug_interrupt(dev, bridge); @@ -586,6 +600,65 @@ static int mlx5_sync_pci_reset(struct mlx5_core_dev *dev, u8 reset_method) return err; } +void mlx5_sync_reset_unload_flow(struct mlx5_core_dev *dev, bool locked) +{ + struct mlx5_fw_reset *fw_reset = dev->priv.fw_reset; + unsigned long timeout; + int poll_freq = 20; + bool reset_action; + u8 rst_state; + int err; + + if (locked) + mlx5_unload_one_devl_locked(dev, false); + else + mlx5_unload_one(dev, false); + + if (!test_bit(MLX5_FW_RESET_FLAGS_UNLOAD_EVENT, &fw_reset->reset_flags)) + return; + + mlx5_set_fw_rst_ack(dev); + mlx5_core_warn(dev, "Sync Reset Unload done, device reset expected\n"); + + reset_action = false; + timeout = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, RESET_UNLOAD)); + do { + rst_state = mlx5_get_fw_rst_state(dev); + if (rst_state == MLX5_FW_RST_STATE_TOGGLE_REQ || + rst_state == MLX5_FW_RST_STATE_IDLE) { + reset_action = true; + break; + } + if (rst_state == MLX5_FW_RST_STATE_DROP_MODE) { + mlx5_core_info(dev, "Sync Reset Drop mode ack\n"); + mlx5_set_fw_rst_ack(dev); + poll_freq = 1000; + } + msleep(poll_freq); + } while (!time_after(jiffies, timeout)); + + if (!reset_action) { + mlx5_core_err(dev, "Got timeout waiting for sync reset action, state = %u\n", + rst_state); + fw_reset->ret = -ETIMEDOUT; + goto done; + } + + mlx5_core_warn(dev, "Sync Reset, got reset action. rst_state = %u\n", + rst_state); + if (rst_state == MLX5_FW_RST_STATE_TOGGLE_REQ) { + err = mlx5_sync_pci_reset(dev, fw_reset->reset_method); + if (err) { + mlx5_core_warn(dev, "mlx5_sync_pci_reset failed, err %d\n", + err); + fw_reset->ret = err; + } + } + +done: + clear_bit(MLX5_FW_RESET_FLAGS_UNLOAD_EVENT, &fw_reset->reset_flags); +} + static void mlx5_sync_reset_now_event(struct work_struct *work) { struct mlx5_fw_reset *fw_reset = container_of(work, struct mlx5_fw_reset, @@ -613,17 +686,13 @@ static void mlx5_sync_reset_now_event(struct work_struct *work) mlx5_enter_error_state(dev, true); done: fw_reset->ret = err; - mlx5_fw_reset_complete_reload(dev, false); + mlx5_fw_reset_complete_reload(dev); } static void mlx5_sync_reset_unload_event(struct work_struct *work) { struct mlx5_fw_reset *fw_reset; struct mlx5_core_dev *dev; - unsigned long timeout; - int poll_freq = 20; - bool reset_action; - u8 rst_state; int err; fw_reset = container_of(work, struct mlx5_fw_reset, reset_unload_work); @@ -632,6 +701,7 @@ static void mlx5_sync_reset_unload_event(struct work_struct *work) if (mlx5_sync_reset_clear_reset_requested(dev, false)) return; + set_bit(MLX5_FW_RESET_FLAGS_UNLOAD_EVENT, &fw_reset->reset_flags); mlx5_core_warn(dev, "Sync Reset Unload. Function is forced down.\n"); err = mlx5_cmd_fast_teardown_hca(dev); @@ -640,49 +710,7 @@ static void mlx5_sync_reset_unload_event(struct work_struct *work) else mlx5_enter_error_state(dev, true); - if (test_bit(MLX5_FW_RESET_FLAGS_PENDING_COMP, &fw_reset->reset_flags)) - mlx5_unload_one_devl_locked(dev, false); - else - mlx5_unload_one(dev, false); - - mlx5_set_fw_rst_ack(dev); - mlx5_core_warn(dev, "Sync Reset Unload done, device reset expected\n"); - - reset_action = false; - timeout = jiffies + msecs_to_jiffies(mlx5_tout_ms(dev, RESET_UNLOAD)); - do { - rst_state = mlx5_get_fw_rst_state(dev); - if (rst_state == MLX5_FW_RST_STATE_TOGGLE_REQ || - rst_state == MLX5_FW_RST_STATE_IDLE) { - reset_action = true; - break; - } - if (rst_state == MLX5_FW_RST_STATE_DROP_MODE) { - mlx5_core_info(dev, "Sync Reset Drop mode ack\n"); - mlx5_set_fw_rst_ack(dev); - poll_freq = 1000; - } - msleep(poll_freq); - } while (!time_after(jiffies, timeout)); - - if (!reset_action) { - mlx5_core_err(dev, "Got timeout waiting for sync reset action, state = %u\n", - rst_state); - fw_reset->ret = -ETIMEDOUT; - goto done; - } - - mlx5_core_warn(dev, "Sync Reset, got reset action. rst_state = %u\n", rst_state); - if (rst_state == MLX5_FW_RST_STATE_TOGGLE_REQ) { - err = mlx5_sync_pci_reset(dev, fw_reset->reset_method); - if (err) { - mlx5_core_warn(dev, "mlx5_sync_pci_reset failed, err %d\n", err); - fw_reset->ret = err; - } - } - -done: - mlx5_fw_reset_complete_reload(dev, true); + mlx5_fw_reset_complete_reload(dev); } static void mlx5_sync_reset_abort_event(struct work_struct *work) @@ -719,6 +747,19 @@ static void mlx5_sync_reset_events_handle(struct mlx5_fw_reset *fw_reset, struct } } +static void mlx5_sync_reset_timeout_work(struct work_struct *work) +{ + struct delayed_work *dwork = container_of(work, struct delayed_work, + work); + struct mlx5_fw_reset *fw_reset = + container_of(dwork, struct mlx5_fw_reset, reset_timeout_work); + struct mlx5_core_dev *dev = fw_reset->dev; + + if (mlx5_sync_reset_clear_reset_requested(dev, true)) + return; + mlx5_core_warn(dev, "PCI Sync FW Update Reset Timeout.\n"); +} + static int fw_reset_event_notifier(struct notifier_block *nb, unsigned long action, void *data) { struct mlx5_fw_reset *fw_reset = mlx5_nb_cof(nb, struct mlx5_fw_reset, nb); @@ -802,6 +843,7 @@ void mlx5_drain_fw_reset(struct mlx5_core_dev *dev) cancel_work_sync(&fw_reset->reset_reload_work); cancel_work_sync(&fw_reset->reset_now_work); cancel_work_sync(&fw_reset->reset_abort_work); + cancel_delayed_work(&fw_reset->reset_timeout_work); } static const struct devlink_param mlx5_fw_reset_devlink_params[] = { @@ -845,6 +887,8 @@ int mlx5_fw_reset_init(struct mlx5_core_dev *dev) INIT_WORK(&fw_reset->reset_reload_work, mlx5_sync_reset_reload_work); INIT_WORK(&fw_reset->reset_now_work, mlx5_sync_reset_now_event); INIT_WORK(&fw_reset->reset_abort_work, mlx5_sync_reset_abort_event); + INIT_DELAYED_WORK(&fw_reset->reset_timeout_work, + mlx5_sync_reset_timeout_work); init_completion(&fw_reset->done); return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h index ea527d06a85f..d5b28525c960 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw_reset.h @@ -12,6 +12,7 @@ int mlx5_fw_reset_set_reset_sync(struct mlx5_core_dev *dev, u8 reset_type_sel, int mlx5_fw_reset_set_live_patch(struct mlx5_core_dev *dev); int mlx5_fw_reset_wait_reset_done(struct mlx5_core_dev *dev); +void mlx5_sync_reset_unload_flow(struct mlx5_core_dev *dev, bool locked); int mlx5_fw_reset_verify_fw_complete(struct mlx5_core_dev *dev, struct netlink_ext_ack *extack); void mlx5_fw_reset_events_start(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index cf7a1edd0530..aeeb136f5ebc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -669,57 +669,64 @@ static void mlx5_fw_fatal_reporter_err_work(struct work_struct *work) } } +#define MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD 180000 +#define MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD 60000 +#define MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD 30000 +#define MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD \ + MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD + +static +const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ecpf_ops = { + .name = "fw_fatal", + .recover = mlx5_fw_fatal_reporter_recover, + .dump = mlx5_fw_fatal_reporter_dump, + .default_graceful_period = + MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD, +}; + static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_pf_ops = { .name = "fw_fatal", .recover = mlx5_fw_fatal_reporter_recover, .dump = mlx5_fw_fatal_reporter_dump, + .default_graceful_period = MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD, }; static const struct devlink_health_reporter_ops mlx5_fw_fatal_reporter_ops = { .name = "fw_fatal", .recover = mlx5_fw_fatal_reporter_recover, + .default_graceful_period = + MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD, }; -#define MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD 180000 -#define MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD 60000 -#define MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD 30000 -#define MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD MLX5_FW_REPORTER_VF_GRACEFUL_PERIOD - void mlx5_fw_reporters_create(struct mlx5_core_dev *dev) { const struct devlink_health_reporter_ops *fw_fatal_ops; struct mlx5_core_health *health = &dev->priv.health; const struct devlink_health_reporter_ops *fw_ops; struct devlink *devlink = priv_to_devlink(dev); - u64 grace_period; - fw_fatal_ops = &mlx5_fw_fatal_reporter_pf_ops; fw_ops = &mlx5_fw_reporter_pf_ops; if (mlx5_core_is_ecpf(dev)) { - grace_period = MLX5_FW_REPORTER_ECPF_GRACEFUL_PERIOD; + fw_fatal_ops = &mlx5_fw_fatal_reporter_ecpf_ops; } else if (mlx5_core_is_pf(dev)) { - grace_period = MLX5_FW_REPORTER_PF_GRACEFUL_PERIOD; + fw_fatal_ops = &mlx5_fw_fatal_reporter_pf_ops; } else { /* VF or SF */ - grace_period = MLX5_FW_REPORTER_DEFAULT_GRACEFUL_PERIOD; fw_fatal_ops = &mlx5_fw_fatal_reporter_ops; fw_ops = &mlx5_fw_reporter_ops; } - health->fw_reporter = - devl_health_reporter_create(devlink, fw_ops, 0, dev); + health->fw_reporter = devl_health_reporter_create(devlink, fw_ops, dev); if (IS_ERR(health->fw_reporter)) - mlx5_core_warn(dev, "Failed to create fw reporter, err = %ld\n", - PTR_ERR(health->fw_reporter)); - - health->fw_fatal_reporter = - devl_health_reporter_create(devlink, - fw_fatal_ops, - grace_period, - dev); + mlx5_core_warn(dev, "Failed to create fw reporter, err = %pe\n", + health->fw_reporter); + + health->fw_fatal_reporter = devl_health_reporter_create(devlink, + fw_fatal_ops, + dev); if (IS_ERR(health->fw_fatal_reporter)) - mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %ld\n", - PTR_ERR(health->fw_fatal_reporter)); + mlx5_core_warn(dev, "Failed to create fw fatal reporter, err = %pe\n", + health->fw_fatal_reporter); } static void mlx5_fw_reporters_destroy(struct mlx5_core_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c index 82d3c2568244..14d339eceb92 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/irq_affinity.c @@ -150,8 +150,8 @@ mlx5_irq_affinity_request(struct mlx5_core_dev *dev, struct mlx5_irq_pool *pool, if (IS_ERR(new_irq)) { if (!least_loaded_irq) { /* We failed to create an IRQ and we didn't find an IRQ */ - mlx5_core_err(pool->dev, "Didn't find a matching IRQ. err = %ld\n", - PTR_ERR(new_irq)); + mlx5_core_err(pool->dev, "Didn't find a matching IRQ. err = %pe\n", + new_irq); mutex_unlock(&pool->lock); return new_irq; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c index d058cbb4a00c..59c00c911275 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -35,6 +35,7 @@ #include <linux/mlx5/driver.h> #include <linux/mlx5/eswitch.h> #include <linux/mlx5/vport.h> +#include "lib/mlx5.h" #include "lib/devcom.h" #include "mlx5_core.h" #include "eswitch.h" @@ -231,9 +232,13 @@ static void mlx5_do_bond_work(struct work_struct *work); static void mlx5_ldev_free(struct kref *ref) { struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref); + struct net *net; + + if (ldev->nb.notifier_call) { + net = read_pnet(&ldev->net); + unregister_netdevice_notifier_net(net, &ldev->nb); + } - if (ldev->nb.notifier_call) - unregister_netdevice_notifier_net(&init_net, &ldev->nb); mlx5_lag_mp_cleanup(ldev); cancel_delayed_work_sync(&ldev->bond_work); destroy_workqueue(ldev->wq); @@ -271,7 +276,8 @@ static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev) INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); ldev->nb.notifier_call = mlx5_lag_netdev_event; - if (register_netdevice_notifier_net(&init_net, &ldev->nb)) { + write_pnet(&ldev->net, mlx5_core_net(dev)); + if (register_netdevice_notifier_net(read_pnet(&ldev->net), &ldev->nb)) { ldev->nb.notifier_call = NULL; mlx5_core_err(dev, "Failed to register LAG netdev notifier\n"); } @@ -1404,6 +1410,36 @@ static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) return 0; } +static void mlx5_lag_unregister_hca_devcom_comp(struct mlx5_core_dev *dev) +{ + mlx5_devcom_unregister_component(dev->priv.hca_devcom_comp); +} + +static int mlx5_lag_register_hca_devcom_comp(struct mlx5_core_dev *dev) +{ + struct mlx5_devcom_match_attr attr = { + .key.val = mlx5_query_nic_system_image_guid(dev), + .flags = MLX5_DEVCOM_MATCH_FLAGS_NS, + .net = mlx5_core_net(dev), + }; + + /* This component is use to sync adding core_dev to lag_dev and to sync + * changes of mlx5_adev_devices between LAG layer and other layers. + */ + dev->priv.hca_devcom_comp = + mlx5_devcom_register_component(dev->priv.devc, + MLX5_DEVCOM_HCA_PORTS, + &attr, NULL, dev); + if (IS_ERR(dev->priv.hca_devcom_comp)) { + mlx5_core_err(dev, + "Failed to register devcom HCA component, err: %ld\n", + PTR_ERR(dev->priv.hca_devcom_comp)); + return PTR_ERR(dev->priv.hca_devcom_comp); + } + + return 0; +} + void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev) { struct mlx5_lag *ldev; @@ -1425,6 +1461,7 @@ recheck: } mlx5_ldev_remove_mdev(ldev, dev); mutex_unlock(&ldev->lock); + mlx5_lag_unregister_hca_devcom_comp(dev); mlx5_ldev_put(ldev); } @@ -1435,7 +1472,7 @@ void mlx5_lag_add_mdev(struct mlx5_core_dev *dev) if (!mlx5_lag_is_supported(dev)) return; - if (IS_ERR_OR_NULL(dev->priv.hca_devcom_comp)) + if (mlx5_lag_register_hca_devcom_comp(dev)) return; recheck: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h index c2f256bb2bc2..4918eee2b3da 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -67,6 +67,7 @@ struct mlx5_lag { struct workqueue_struct *wq; struct delayed_work bond_work; struct notifier_block nb; + possible_net_t net; struct lag_mp lag_mp; struct mlx5_lag_port_sel port_sel; /* Protect lag fields/state changes */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c index 58bd749b5e4d..129725159a93 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c @@ -100,7 +100,7 @@ static int create_aso_cq(struct mlx5_aso_cq *cq, void *cqc_data) MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); - MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); + MLX5_SET(cqc, cqc, uar_page, mdev->priv.bfreg.up->index); MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); @@ -129,7 +129,7 @@ static int mlx5_aso_create_cq(struct mlx5_core_dev *mdev, int numa_node, return -ENOMEM; MLX5_SET(cqc, cqc_data, log_cq_size, 1); - MLX5_SET(cqc, cqc_data, uar_page, mdev->priv.uar->index); + MLX5_SET(cqc, cqc_data, uar_page, mdev->priv.bfreg.up->index); if (MLX5_CAP_GEN(mdev, cqe_128_always) && cache_line_size() >= 128) MLX5_SET(cqc, cqc_data, cqe_sz, CQE_STRIDE_128_PAD); @@ -163,7 +163,7 @@ static int mlx5_aso_alloc_sq(struct mlx5_core_dev *mdev, int numa_node, struct mlx5_wq_param param; int err; - sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; + sq->uar_map = mdev->priv.bfreg.map; param.db_numa_node = numa_node; param.buf_numa_node = numa_node; @@ -203,7 +203,7 @@ static int create_aso_sq(struct mlx5_core_dev *mdev, int pdn, MLX5_SET(sqc, sqc, ts_format, ts_format); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); - MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.hw_objs.bfreg.index); + MLX5_SET(wq, wq, uar_page, mdev->priv.bfreg.index); MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index 214d732d18e9..d0ba83d77cd1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -247,27 +247,24 @@ static bool mlx5_is_ptm_source_time_available(struct mlx5_core_dev *dev) return !!MLX5_GET(mtptm_reg, out, psta); } -static int mlx5_mtctr_syncdevicetime(ktime_t *device_time, - struct system_counterval_t *sys_counterval, - void *ctx) +static int mlx5_mtctr_read(struct mlx5_core_dev *mdev, + bool real_time_mode, + struct system_counterval_t *sys_counterval, + u64 *device) { u32 out[MLX5_ST_SZ_DW(mtctr_reg)] = {0}; u32 in[MLX5_ST_SZ_DW(mtctr_reg)] = {0}; - struct mlx5_core_dev *mdev = ctx; - bool real_time_mode; - u64 host, device; + u64 host; int err; - real_time_mode = mlx5_real_time_mode(mdev); - MLX5_SET(mtctr_reg, in, first_clock_timestamp_request, MLX5_MTCTR_REQUEST_PTM_ROOT_CLOCK); MLX5_SET(mtctr_reg, in, second_clock_timestamp_request, real_time_mode ? MLX5_MTCTR_REQUEST_REAL_TIME_CLOCK : - MLX5_MTCTR_REQUEST_FREE_RUNNING_COUNTER); + MLX5_MTCTR_REQUEST_FREE_RUNNING_COUNTER); - err = mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out), MLX5_REG_MTCTR, - 0, 0); + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out), + MLX5_REG_MTCTR, 0, 0); if (err) return err; @@ -281,8 +278,26 @@ static int mlx5_mtctr_syncdevicetime(ktime_t *device_time, .cs_id = CSID_X86_ART, .use_nsecs = true, }; + *device = MLX5_GET64(mtctr_reg, out, second_clock_timestamp); + + return 0; +} + +static int mlx5_mtctr_syncdevicetime(ktime_t *device_time, + struct system_counterval_t *sys_counterval, + void *ctx) +{ + struct mlx5_core_dev *mdev = ctx; + bool real_time_mode; + u64 device; + int err; + + real_time_mode = mlx5_real_time_mode(mdev); + + err = mlx5_mtctr_read(mdev, real_time_mode, sys_counterval, &device); + if (err) + return err; - device = MLX5_GET64(mtctr_reg, out, second_clock_timestamp); if (real_time_mode) *device_time = ns_to_ktime(REAL_TIME_TO_NS(device >> 32, device & U32_MAX)); else @@ -291,6 +306,23 @@ static int mlx5_mtctr_syncdevicetime(ktime_t *device_time, return 0; } +static int +mlx5_mtctr_syncdevicecyclestime(ktime_t *device_time, + struct system_counterval_t *sys_counterval, + void *ctx) +{ + struct mlx5_core_dev *mdev = ctx; + u64 device; + int err; + + err = mlx5_mtctr_read(mdev, false, sys_counterval, &device); + if (err) + return err; + *device_time = ns_to_ktime(device); + + return 0; +} + static int mlx5_ptp_getcrosststamp(struct ptp_clock_info *ptp, struct system_device_crosststamp *cts) { @@ -315,6 +347,32 @@ unlock: mlx5_clock_unlock(clock); return err; } + +static int mlx5_ptp_getcrosscycles(struct ptp_clock_info *ptp, + struct system_device_crosststamp *cts) +{ + struct mlx5_clock *clock = + container_of(ptp, struct mlx5_clock, ptp_info); + struct system_time_snapshot history_begin = {0}; + struct mlx5_core_dev *mdev; + int err; + + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); + + if (!mlx5_is_ptm_source_time_available(mdev)) { + err = -EBUSY; + goto unlock; + } + + ktime_get_snapshot(&history_begin); + + err = get_device_system_crosststamp(mlx5_mtctr_syncdevicecyclestime, + mdev, &history_begin, cts); +unlock: + mlx5_clock_unlock(clock); + return err; +} #endif /* CONFIG_X86 */ static u64 mlx5_read_time(struct mlx5_core_dev *dev, @@ -513,6 +571,24 @@ out: return 0; } +static int mlx5_ptp_getcyclesx(struct ptp_clock_info *ptp, + struct timespec64 *ts, + struct ptp_system_timestamp *sts) +{ + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, + ptp_info); + struct mlx5_core_dev *mdev; + u64 cycles; + + mlx5_clock_lock(clock); + mdev = mlx5_clock_mdev_get(clock); + + cycles = mlx5_read_time(mdev, sts, false); + *ts = ns_to_timespec64(cycles); + mlx5_clock_unlock(clock); + return 0; +} + static int mlx5_ptp_adjtime_real_time(struct mlx5_core_dev *mdev, s64 delta) { u32 in[MLX5_ST_SZ_DW(mtutc_reg)] = {}; @@ -1229,6 +1305,7 @@ static void mlx5_init_timer_max_freq_adjustment(struct mlx5_core_dev *mdev) static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev) { struct mlx5_clock *clock = mdev->clock; + bool expose_cycles; /* Configure the PHC */ clock->ptp_info = mlx5_ptp_clock_info; @@ -1236,12 +1313,22 @@ static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev) if (MLX5_CAP_MCAM_REG(mdev, mtutc)) mlx5_init_timer_max_freq_adjustment(mdev); + expose_cycles = !MLX5_CAP_GEN(mdev, disciplined_fr_counter) || + !mlx5_real_time_mode(mdev); + #ifdef CONFIG_X86 if (MLX5_CAP_MCAM_REG3(mdev, mtptm) && - MLX5_CAP_MCAM_REG3(mdev, mtctr) && boot_cpu_has(X86_FEATURE_ART)) + MLX5_CAP_MCAM_REG3(mdev, mtctr) && boot_cpu_has(X86_FEATURE_ART)) { clock->ptp_info.getcrosststamp = mlx5_ptp_getcrosststamp; + if (expose_cycles) + clock->ptp_info.getcrosscycles = + mlx5_ptp_getcrosscycles; + } #endif /* CONFIG_X86 */ + if (expose_cycles) + clock->ptp_info.getcyclesx64 = mlx5_ptp_getcyclesx; + mlx5_timecounter_init(mdev); mlx5_init_clock_info(mdev); mlx5_init_overflow_period(mdev); @@ -1278,9 +1365,9 @@ static void mlx5_init_clock_dev(struct mlx5_core_dev *mdev) clock->ptp = ptp_clock_register(&clock->ptp_info, clock->shared ? NULL : &mdev->pdev->dev); if (IS_ERR(clock->ptp)) { - mlx5_core_warn(mdev, "%sptp_clock_register failed %ld\n", + mlx5_core_warn(mdev, "%sptp_clock_register failed %pe\n", clock->shared ? "shared clock " : "", - PTR_ERR(clock->ptp)); + clock->ptp); clock->ptp = NULL; } @@ -1348,14 +1435,20 @@ static int mlx5_clock_alloc(struct mlx5_core_dev *mdev, bool shared) static void mlx5_shared_clock_register(struct mlx5_core_dev *mdev, u64 key) { struct mlx5_core_dev *peer_dev, *next = NULL; + struct mlx5_devcom_match_attr attr = { + .key.val = key, + }; + struct mlx5_devcom_comp_dev *compd; struct mlx5_devcom_comp_dev *pos; - mdev->clock_state->compdev = mlx5_devcom_register_component(mdev->priv.devc, - MLX5_DEVCOM_SHARED_CLOCK, - key, NULL, mdev); - if (IS_ERR(mdev->clock_state->compdev)) + compd = mlx5_devcom_register_component(mdev->priv.devc, + MLX5_DEVCOM_SHARED_CLOCK, + &attr, NULL, mdev); + if (IS_ERR(compd)) return; + mdev->clock_state->compdev = compd; + mlx5_devcom_comp_lock(mdev->clock_state->compdev); mlx5_devcom_for_each_peer_entry(mdev->clock_state->compdev, peer_dev, pos) { if (peer_dev->clock) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.h index c819c047bb9c..4821163a547f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/crypto.h @@ -8,6 +8,7 @@ enum { MLX5_ACCEL_OBJ_TLS_KEY = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_TLS, MLX5_ACCEL_OBJ_IPSEC_KEY = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_IPSEC, MLX5_ACCEL_OBJ_MACSEC_KEY = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_MACSEC, + MLX5_ACCEL_OBJ_PSP_KEY = MLX5_GENERAL_OBJECT_TYPE_ENCRYPTION_KEY_PURPOSE_PSP, MLX5_ACCEL_OBJ_TYPE_KEY_NUM, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c index 7b0766c89f4c..faa2833602c8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.c @@ -4,6 +4,7 @@ #include <linux/mlx5/vport.h> #include <linux/list.h> #include "lib/devcom.h" +#include "lib/mlx5.h" #include "mlx5_core.h" static LIST_HEAD(devcom_dev_list); @@ -22,11 +23,17 @@ struct mlx5_devcom_dev { struct kref ref; }; +struct mlx5_devcom_key { + u32 flags; + union mlx5_devcom_match_key key; + possible_net_t net; +}; + struct mlx5_devcom_comp { struct list_head comp_list; enum mlx5_devcom_component id; - u64 key; struct list_head comp_dev_list_head; + struct mlx5_devcom_key key; mlx5_devcom_event_handler_t handler; struct kref ref; bool ready; @@ -108,7 +115,8 @@ void mlx5_devcom_unregister_device(struct mlx5_devcom_dev *devc) } static struct mlx5_devcom_comp * -mlx5_devcom_comp_alloc(u64 id, u64 key, mlx5_devcom_event_handler_t handler) +mlx5_devcom_comp_alloc(u64 id, const struct mlx5_devcom_match_attr *attr, + mlx5_devcom_event_handler_t handler) { struct mlx5_devcom_comp *comp; @@ -117,7 +125,10 @@ mlx5_devcom_comp_alloc(u64 id, u64 key, mlx5_devcom_event_handler_t handler) return ERR_PTR(-ENOMEM); comp->id = id; - comp->key = key; + comp->key.key = attr->key; + comp->key.flags = attr->flags; + if (attr->flags & MLX5_DEVCOM_MATCH_FLAGS_NS) + write_pnet(&comp->key.net, attr->net); comp->handler = handler; init_rwsem(&comp->sem); lockdep_register_key(&comp->lock_key); @@ -180,21 +191,34 @@ devcom_free_comp_dev(struct mlx5_devcom_comp_dev *devcom) static bool devcom_component_equal(struct mlx5_devcom_comp *devcom, enum mlx5_devcom_component id, - u64 key) + const struct mlx5_devcom_match_attr *attr) { - return devcom->id == id && devcom->key == key; + if (devcom->id != id) + return false; + + if (devcom->key.flags != attr->flags) + return false; + + if (memcmp(&devcom->key.key, &attr->key, sizeof(devcom->key.key))) + return false; + + if (devcom->key.flags & MLX5_DEVCOM_MATCH_FLAGS_NS && + !net_eq(read_pnet(&devcom->key.net), attr->net)) + return false; + + return true; } static struct mlx5_devcom_comp * devcom_component_get(struct mlx5_devcom_dev *devc, enum mlx5_devcom_component id, - u64 key, + const struct mlx5_devcom_match_attr *attr, mlx5_devcom_event_handler_t handler) { struct mlx5_devcom_comp *comp; devcom_for_each_component(comp) { - if (devcom_component_equal(comp, id, key)) { + if (devcom_component_equal(comp, id, attr)) { if (handler == comp->handler) { kref_get(&comp->ref); return comp; @@ -212,7 +236,7 @@ devcom_component_get(struct mlx5_devcom_dev *devc, struct mlx5_devcom_comp_dev * mlx5_devcom_register_component(struct mlx5_devcom_dev *devc, enum mlx5_devcom_component id, - u64 key, + const struct mlx5_devcom_match_attr *attr, mlx5_devcom_event_handler_t handler, void *data) { @@ -223,14 +247,14 @@ mlx5_devcom_register_component(struct mlx5_devcom_dev *devc, return ERR_PTR(-EINVAL); mutex_lock(&comp_list_lock); - comp = devcom_component_get(devc, id, key, handler); + comp = devcom_component_get(devc, id, attr, handler); if (IS_ERR(comp)) { devcom = ERR_PTR(-EINVAL); goto out_unlock; } if (!comp) { - comp = mlx5_devcom_comp_alloc(id, key, handler); + comp = mlx5_devcom_comp_alloc(id, attr, handler); if (IS_ERR(comp)) { devcom = ERR_CAST(comp); goto out_unlock; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h index c79699b94a02..609c85f47917 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/devcom.h @@ -6,6 +6,20 @@ #include <linux/mlx5/driver.h> +enum mlx5_devom_match_flags { + MLX5_DEVCOM_MATCH_FLAGS_NS = BIT(0), +}; + +union mlx5_devcom_match_key { + u64 val; +}; + +struct mlx5_devcom_match_attr { + u32 flags; + union mlx5_devcom_match_key key; + struct net *net; +}; + enum mlx5_devcom_component { MLX5_DEVCOM_ESW_OFFLOADS, MLX5_DEVCOM_MPV, @@ -25,7 +39,7 @@ void mlx5_devcom_unregister_device(struct mlx5_devcom_dev *devc); struct mlx5_devcom_comp_dev * mlx5_devcom_register_component(struct mlx5_devcom_dev *devc, enum mlx5_devcom_component id, - u64 key, + const struct mlx5_devcom_match_attr *attr, mlx5_devcom_event_handler_t handler, void *data); void mlx5_devcom_unregister_component(struct mlx5_devcom_comp_dev *devcom); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c index ca9ecec358b2..7adad784ad46 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.c @@ -9,7 +9,7 @@ #include "mlx5_core.h" #include "lib/fs_ttc.h" -#define MLX5_TTC_MAX_NUM_GROUPS 4 +#define MLX5_TTC_MAX_NUM_GROUPS 7 #define MLX5_TTC_GROUP_TCPUDP_SIZE (MLX5_TT_IPV6_UDP + 1) struct mlx5_fs_ttc_groups { @@ -31,10 +31,14 @@ static int mlx5_fs_ttc_table_size(const struct mlx5_fs_ttc_groups *groups) /* L3/L4 traffic type classifier */ struct mlx5_ttc_table { int num_groups; + const struct mlx5_fs_ttc_groups *groups; + struct mlx5_core_dev *mdev; struct mlx5_flow_table *t; struct mlx5_flow_group **g; struct mlx5_ttc_rule rules[MLX5_NUM_TT]; struct mlx5_flow_handle *tunnel_rules[MLX5_NUM_TUNNEL_TT]; + u32 refcnt; + struct mutex mutex; /* Protect adding rules for ipsec crypto offload */ }; struct mlx5_flow_table *mlx5_get_ttc_flow_table(struct mlx5_ttc_table *ttc) @@ -163,6 +167,8 @@ static struct mlx5_etype_proto ttc_tunnel_rules[] = { enum TTC_GROUP_TYPE { TTC_GROUPS_DEFAULT = 0, TTC_GROUPS_USE_L4_TYPE = 1, + TTC_GROUPS_DEFAULT_ESP = 2, + TTC_GROUPS_USE_L4_TYPE_ESP = 3, }; static const struct mlx5_fs_ttc_groups ttc_groups[] = { @@ -184,6 +190,31 @@ static const struct mlx5_fs_ttc_groups ttc_groups[] = { BIT(0), }, }, + [TTC_GROUPS_DEFAULT_ESP] = { + .num_groups = 6, + .group_size = { + MLX5_TTC_GROUP_TCPUDP_SIZE + BIT(1) + + MLX5_NUM_TUNNEL_TT, + BIT(2), /* decrypted outer L4 */ + BIT(2), /* decrypted inner L4 */ + BIT(1), /* ESP */ + BIT(1), + BIT(0), + }, + }, + [TTC_GROUPS_USE_L4_TYPE_ESP] = { + .use_l4_type = true, + .num_groups = 7, + .group_size = { + MLX5_TTC_GROUP_TCPUDP_SIZE, + BIT(1) + MLX5_NUM_TUNNEL_TT, + BIT(2), /* decrypted outer L4 */ + BIT(2), /* decrypted inner L4 */ + BIT(1), /* ESP */ + BIT(1), + BIT(0), + }, + }, }; static const struct mlx5_fs_ttc_groups inner_ttc_groups[] = { @@ -207,6 +238,23 @@ static const struct mlx5_fs_ttc_groups inner_ttc_groups[] = { }, }; +static const struct mlx5_fs_ttc_groups * +mlx5_ttc_get_fs_groups(bool use_l4_type, bool ipsec_rss) +{ + if (!ipsec_rss) + return use_l4_type ? &ttc_groups[TTC_GROUPS_USE_L4_TYPE] : + &ttc_groups[TTC_GROUPS_DEFAULT]; + + return use_l4_type ? &ttc_groups[TTC_GROUPS_USE_L4_TYPE_ESP] : + &ttc_groups[TTC_GROUPS_DEFAULT_ESP]; +} + +bool mlx5_ttc_has_esp_flow_group(struct mlx5_ttc_table *ttc) +{ + return ttc->groups == &ttc_groups[TTC_GROUPS_DEFAULT_ESP] || + ttc->groups == &ttc_groups[TTC_GROUPS_USE_L4_TYPE_ESP]; +} + u8 mlx5_get_proto_by_tunnel_type(enum mlx5_tunnel_types tt) { return ttc_tunnel_rules[tt].proto; @@ -257,6 +305,31 @@ static u8 mlx5_etype_to_ipv(u16 ethertype) return 0; } +static void mlx5_fs_ttc_set_match_ipv_outer(struct mlx5_core_dev *mdev, + struct mlx5_flow_spec *spec, + u16 etype) +{ + int match_ipv_outer = + MLX5_CAP_FLOWTABLE_NIC_RX(mdev, + ft_field_support.outer_ip_version); + u8 ipv; + + ipv = mlx5_etype_to_ipv(etype); + if (match_ipv_outer && ipv) { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, + outer_headers.ip_version, ipv); + } else { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.ethertype); + MLX5_SET(fte_match_param, spec->match_value, + outer_headers.ethertype, etype); + } + + spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; +} + static void mlx5_fs_ttc_set_match_proto(void *headers_c, void *headers_v, u8 proto, bool use_l4_type) { @@ -279,16 +352,12 @@ static void mlx5_fs_ttc_set_match_proto(void *headers_c, void *headers_v, static struct mlx5_flow_handle * mlx5_generate_ttc_rule(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, struct mlx5_flow_destination *dest, u16 etype, u8 proto, - bool use_l4_type) + bool use_l4_type, bool ipsec_rss) { - int match_ipv_outer = - MLX5_CAP_FLOWTABLE_NIC_RX(dev, - ft_field_support.outer_ip_version); MLX5_DECLARE_FLOW_ACT(flow_act); struct mlx5_flow_handle *rule; struct mlx5_flow_spec *spec; int err = 0; - u8 ipv; spec = kvzalloc(sizeof(*spec), GFP_KERNEL); if (!spec) @@ -305,15 +374,15 @@ mlx5_generate_ttc_rule(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, proto, use_l4_type); } - ipv = mlx5_etype_to_ipv(etype); - if (match_ipv_outer && ipv) { - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ip_version); - MLX5_SET(fte_match_param, spec->match_value, outer_headers.ip_version, ipv); - } else if (etype) { - spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; - MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.ethertype); - MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype); + if (etype) + mlx5_fs_ttc_set_match_ipv_outer(dev, spec, etype); + + if (ipsec_rss && proto == IPPROTO_ESP) { + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + misc_parameters_2.ipsec_next_header); + MLX5_SET(fte_match_param, spec->match_value, + misc_parameters_2.ipsec_next_header, 0); + spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2; } rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1); @@ -342,12 +411,16 @@ static int mlx5_generate_ttc_table_rules(struct mlx5_core_dev *dev, for (tt = 0; tt < MLX5_NUM_TT; tt++) { struct mlx5_ttc_rule *rule = &rules[tt]; + if (mlx5_ttc_is_decrypted_esp_tt(tt)) + continue; + if (test_bit(tt, params->ignore_dests)) continue; rule->rule = mlx5_generate_ttc_rule(dev, ft, ¶ms->dests[tt], ttc_rules[tt].etype, ttc_rules[tt].proto, - use_l4_type); + use_l4_type, + params->ipsec_rss); if (IS_ERR(rule->rule)) { err = PTR_ERR(rule->rule); rule->rule = NULL; @@ -370,7 +443,7 @@ static int mlx5_generate_ttc_table_rules(struct mlx5_core_dev *dev, ¶ms->tunnel_dests[tt], ttc_tunnel_rules[tt].etype, ttc_tunnel_rules[tt].proto, - use_l4_type); + use_l4_type, false); if (IS_ERR(trules[tt])) { err = PTR_ERR(trules[tt]); trules[tt] = NULL; @@ -385,10 +458,78 @@ del_rules: return err; } +static int mlx5_create_ttc_table_ipsec_groups(struct mlx5_ttc_table *ttc, + bool use_ipv, + u32 *in, int *next_ix) +{ + u8 *mc = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + const struct mlx5_fs_ttc_groups *groups = ttc->groups; + int ix = *next_ix; + + MLX5_SET(fte_match_param, mc, outer_headers.ip_protocol, 0); + + /* decrypted ESP outer group */ + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.l4_type_ext); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += groups->group_size[ttc->num_groups]; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + MLX5_SET(fte_match_param, mc, outer_headers.l4_type_ext, 0); + + /* decrypted ESP inner group */ + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_INNER_HEADERS); + if (use_ipv) + MLX5_SET(fte_match_param, mc, outer_headers.ip_version, 0); + else + MLX5_SET(fte_match_param, mc, outer_headers.ethertype, 0); + MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.ip_version); + MLX5_SET_TO_ONES(fte_match_param, mc, inner_headers.l4_type_ext); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += groups->group_size[ttc->num_groups]; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + MLX5_SET(fte_match_param, mc, inner_headers.ip_version, 0); + MLX5_SET(fte_match_param, mc, inner_headers.l4_type_ext, 0); + + /* undecrypted ESP group */ + MLX5_SET_CFG(in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_2); + if (use_ipv) + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_version); + else + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ethertype); + MLX5_SET_TO_ONES(fte_match_param, mc, outer_headers.ip_protocol); + MLX5_SET_TO_ONES(fte_match_param, mc, + misc_parameters_2.ipsec_next_header); + MLX5_SET_CFG(in, start_flow_index, ix); + ix += groups->group_size[ttc->num_groups]; + MLX5_SET_CFG(in, end_flow_index, ix - 1); + ttc->g[ttc->num_groups] = mlx5_create_flow_group(ttc->t, in); + if (IS_ERR(ttc->g[ttc->num_groups])) + goto err; + ttc->num_groups++; + + *next_ix = ix; + + return 0; + +err: + return PTR_ERR(ttc->g[ttc->num_groups]); +} + static int mlx5_create_ttc_table_groups(struct mlx5_ttc_table *ttc, - bool use_ipv, - const struct mlx5_fs_ttc_groups *groups) + bool use_ipv) { + const struct mlx5_fs_ttc_groups *groups = ttc->groups; int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); int ix = 0; u32 *in; @@ -436,8 +577,18 @@ static int mlx5_create_ttc_table_groups(struct mlx5_ttc_table *ttc, goto err; ttc->num_groups++; + if (mlx5_ttc_has_esp_flow_group(ttc)) { + err = mlx5_create_ttc_table_ipsec_groups(ttc, use_ipv, in, &ix); + if (err) + goto err; + + MLX5_SET(fte_match_param, mc, + misc_parameters_2.ipsec_next_header, 0); + } + /* L3 Group */ MLX5_SET(fte_match_param, mc, outer_headers.ip_protocol, 0); + MLX5_SET_CFG(in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); MLX5_SET_CFG(in, start_flow_index, ix); ix += groups->group_size[ttc->num_groups]; MLX5_SET_CFG(in, end_flow_index, ix - 1); @@ -527,6 +678,9 @@ static int mlx5_generate_inner_ttc_table_rules(struct mlx5_core_dev *dev, for (tt = 0; tt < MLX5_NUM_TT; tt++) { struct mlx5_ttc_rule *rule = &rules[tt]; + if (mlx5_ttc_is_decrypted_esp_tt(tt)) + continue; + if (test_bit(tt, params->ignore_dests)) continue; rule->rule = mlx5_generate_inner_ttc_rule(dev, ft, @@ -700,6 +854,7 @@ void mlx5_destroy_ttc_table(struct mlx5_ttc_table *ttc) kfree(ttc->g); mlx5_destroy_flow_table(ttc->t); + mutex_destroy(&ttc->mutex); kvfree(ttc); } @@ -709,7 +864,6 @@ struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev, bool match_ipv_outer = MLX5_CAP_FLOWTABLE_NIC_RX(dev, ft_field_support.outer_ip_version); - const struct mlx5_fs_ttc_groups *groups; struct mlx5_flow_namespace *ns; struct mlx5_ttc_table *ttc; bool use_l4_type; @@ -738,11 +892,10 @@ struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev, return ERR_PTR(-EOPNOTSUPP); } - groups = use_l4_type ? &ttc_groups[TTC_GROUPS_USE_L4_TYPE] : - &ttc_groups[TTC_GROUPS_DEFAULT]; + ttc->groups = mlx5_ttc_get_fs_groups(use_l4_type, params->ipsec_rss); WARN_ON_ONCE(params->ft_attr.max_fte); - params->ft_attr.max_fte = mlx5_fs_ttc_table_size(groups); + params->ft_attr.max_fte = mlx5_fs_ttc_table_size(ttc->groups); ttc->t = mlx5_create_flow_table(ns, ¶ms->ft_attr); if (IS_ERR(ttc->t)) { err = PTR_ERR(ttc->t); @@ -750,7 +903,7 @@ struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev, return ERR_PTR(err); } - err = mlx5_create_ttc_table_groups(ttc, match_ipv_outer, groups); + err = mlx5_create_ttc_table_groups(ttc, match_ipv_outer); if (err) goto destroy_ft; @@ -758,6 +911,9 @@ struct mlx5_ttc_table *mlx5_create_ttc_table(struct mlx5_core_dev *dev, if (err) goto destroy_ft; + ttc->mdev = dev; + mutex_init(&ttc->mutex); + return ttc; destroy_ft: @@ -791,3 +947,194 @@ int mlx5_ttc_fwd_default_dest(struct mlx5_ttc_table *ttc, return mlx5_ttc_fwd_dest(ttc, type, &dest); } + +static void _mlx5_ttc_destroy_ipsec_rules(struct mlx5_ttc_table *ttc) +{ + enum mlx5_traffic_types i; + + for (i = MLX5_TT_DECRYPTED_ESP_OUTER_IPV4_TCP; + i <= MLX5_TT_DECRYPTED_ESP_INNER_IPV6_UDP; i++) { + if (!ttc->rules[i].rule) + continue; + + mlx5_del_flow_rules(ttc->rules[i].rule); + ttc->rules[i].rule = NULL; + } +} + +void mlx5_ttc_destroy_ipsec_rules(struct mlx5_ttc_table *ttc) +{ + if (!mlx5_ttc_has_esp_flow_group(ttc)) + return; + + mutex_lock(&ttc->mutex); + if (--ttc->refcnt) + goto unlock; + + _mlx5_ttc_destroy_ipsec_rules(ttc); +unlock: + mutex_unlock(&ttc->mutex); +} + +static int mlx5_ttc_get_tt_attrs(enum mlx5_traffic_types type, + u16 *etype, int *l4_type_ext, + enum mlx5_traffic_types *tir_tt) +{ + switch (type) { + case MLX5_TT_DECRYPTED_ESP_OUTER_IPV4_TCP: + case MLX5_TT_DECRYPTED_ESP_INNER_IPV4_TCP: + *etype = ETH_P_IP; + *l4_type_ext = MLX5_PACKET_L4_TYPE_EXT_TCP; + *tir_tt = MLX5_TT_IPV4_TCP; + break; + case MLX5_TT_DECRYPTED_ESP_OUTER_IPV6_TCP: + case MLX5_TT_DECRYPTED_ESP_INNER_IPV6_TCP: + *etype = ETH_P_IPV6; + *l4_type_ext = MLX5_PACKET_L4_TYPE_EXT_TCP; + *tir_tt = MLX5_TT_IPV6_TCP; + break; + case MLX5_TT_DECRYPTED_ESP_OUTER_IPV4_UDP: + case MLX5_TT_DECRYPTED_ESP_INNER_IPV4_UDP: + *etype = ETH_P_IP; + *l4_type_ext = MLX5_PACKET_L4_TYPE_EXT_UDP; + *tir_tt = MLX5_TT_IPV4_UDP; + break; + case MLX5_TT_DECRYPTED_ESP_OUTER_IPV6_UDP: + case MLX5_TT_DECRYPTED_ESP_INNER_IPV6_UDP: + *etype = ETH_P_IPV6; + *l4_type_ext = MLX5_PACKET_L4_TYPE_EXT_UDP; + *tir_tt = MLX5_TT_IPV6_UDP; + break; + default: + return -EINVAL; + } + + return 0; +} + +static struct mlx5_flow_handle * +mlx5_ttc_create_ipsec_outer_rule(struct mlx5_ttc_table *ttc, + enum mlx5_traffic_types type) +{ + struct mlx5_flow_destination dest; + MLX5_DECLARE_FLOW_ACT(flow_act); + enum mlx5_traffic_types tir_tt; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int l4_type_ext; + u16 etype; + int err; + + err = mlx5_ttc_get_tt_attrs(type, &etype, &l4_type_ext, &tir_tt); + if (err) + return ERR_PTR(err); + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + mlx5_fs_ttc_set_match_ipv_outer(ttc->mdev, spec, etype); + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + outer_headers.l4_type_ext); + MLX5_SET(fte_match_param, spec->match_value, + outer_headers.l4_type_ext, l4_type_ext); + + dest = mlx5_ttc_get_default_dest(ttc, tir_tt); + + rule = mlx5_add_flow_rules(ttc->t, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(ttc->mdev, "%s: add rule failed\n", __func__); + } + + kvfree(spec); + return err ? ERR_PTR(err) : rule; +} + +static struct mlx5_flow_handle * +mlx5_ttc_create_ipsec_inner_rule(struct mlx5_ttc_table *ttc, + struct mlx5_ttc_table *inner_ttc, + enum mlx5_traffic_types type) +{ + struct mlx5_flow_destination dest; + MLX5_DECLARE_FLOW_ACT(flow_act); + enum mlx5_traffic_types tir_tt; + struct mlx5_flow_handle *rule; + struct mlx5_flow_spec *spec; + int l4_type_ext; + u16 etype; + int err; + + err = mlx5_ttc_get_tt_attrs(type, &etype, &l4_type_ext, &tir_tt); + if (err) + return ERR_PTR(err); + + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return ERR_PTR(-ENOMEM); + + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + inner_headers.ip_version); + MLX5_SET(fte_match_param, spec->match_value, + inner_headers.ip_version, mlx5_etype_to_ipv(etype)); + MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, + inner_headers.l4_type_ext); + MLX5_SET(fte_match_param, spec->match_value, + inner_headers.l4_type_ext, l4_type_ext); + + dest = mlx5_ttc_get_default_dest(inner_ttc, tir_tt); + + spec->match_criteria_enable = MLX5_MATCH_INNER_HEADERS; + + rule = mlx5_add_flow_rules(ttc->t, spec, &flow_act, &dest, 1); + if (IS_ERR(rule)) { + err = PTR_ERR(rule); + mlx5_core_err(ttc->mdev, "%s: add rule failed\n", __func__); + } + + kvfree(spec); + return err ? ERR_PTR(err) : rule; +} + +int mlx5_ttc_create_ipsec_rules(struct mlx5_ttc_table *ttc, + struct mlx5_ttc_table *inner_ttc) +{ + struct mlx5_flow_handle *rule; + enum mlx5_traffic_types i; + + if (!mlx5_ttc_has_esp_flow_group(ttc)) + return 0; + + mutex_lock(&ttc->mutex); + if (ttc->refcnt) + goto skip; + + for (i = MLX5_TT_DECRYPTED_ESP_OUTER_IPV4_TCP; + i <= MLX5_TT_DECRYPTED_ESP_OUTER_IPV6_UDP; i++) { + rule = mlx5_ttc_create_ipsec_outer_rule(ttc, i); + if (IS_ERR(rule)) + goto err_out; + + ttc->rules[i].rule = rule; + } + + for (i = MLX5_TT_DECRYPTED_ESP_INNER_IPV4_TCP; + i <= MLX5_TT_DECRYPTED_ESP_INNER_IPV6_UDP; i++) { + rule = mlx5_ttc_create_ipsec_inner_rule(ttc, inner_ttc, i); + if (IS_ERR(rule)) + goto err_out; + + ttc->rules[i].rule = rule; + } + +skip: + ttc->refcnt++; + mutex_unlock(&ttc->mutex); + return 0; + +err_out: + _mlx5_ttc_destroy_ipsec_rules(ttc); + mutex_unlock(&ttc->mutex); + return PTR_ERR(rule); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h index ab9434fe3ae6..95f6e56724a2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/fs_ttc.h @@ -18,6 +18,14 @@ enum mlx5_traffic_types { MLX5_TT_IPV4, MLX5_TT_IPV6, MLX5_TT_ANY, + MLX5_TT_DECRYPTED_ESP_OUTER_IPV4_TCP, + MLX5_TT_DECRYPTED_ESP_OUTER_IPV6_TCP, + MLX5_TT_DECRYPTED_ESP_OUTER_IPV4_UDP, + MLX5_TT_DECRYPTED_ESP_OUTER_IPV6_UDP, + MLX5_TT_DECRYPTED_ESP_INNER_IPV4_TCP, + MLX5_TT_DECRYPTED_ESP_INNER_IPV6_TCP, + MLX5_TT_DECRYPTED_ESP_INNER_IPV4_UDP, + MLX5_TT_DECRYPTED_ESP_INNER_IPV6_UDP, MLX5_NUM_TT, MLX5_NUM_INDIR_TIRS = MLX5_TT_ANY, }; @@ -47,6 +55,7 @@ struct ttc_params { bool inner_ttc; DECLARE_BITMAP(ignore_tunnel_dests, MLX5_NUM_TUNNEL_TT); struct mlx5_flow_destination tunnel_dests[MLX5_NUM_TUNNEL_TT]; + bool ipsec_rss; }; const char *mlx5_ttc_get_name(enum mlx5_traffic_types tt); @@ -70,4 +79,14 @@ int mlx5_ttc_fwd_default_dest(struct mlx5_ttc_table *ttc, bool mlx5_tunnel_inner_ft_supported(struct mlx5_core_dev *mdev); u8 mlx5_get_proto_by_tunnel_type(enum mlx5_tunnel_types tt); +bool mlx5_ttc_has_esp_flow_group(struct mlx5_ttc_table *ttc); +int mlx5_ttc_create_ipsec_rules(struct mlx5_ttc_table *ttc, + struct mlx5_ttc_table *inner_ttc); +void mlx5_ttc_destroy_ipsec_rules(struct mlx5_ttc_table *ttc); +static inline bool mlx5_ttc_is_decrypted_esp_tt(enum mlx5_traffic_types tt) +{ + return tt >= MLX5_TT_DECRYPTED_ESP_OUTER_IPV4_TCP && + tt <= MLX5_TT_DECRYPTED_ESP_INNER_IPV6_UDP; +} + #endif /* __MLX5_FS_TTC_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c index b7d4b1a2baf2..d524f0220513 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/ipsec_fs_roce.c @@ -164,6 +164,8 @@ ipsec_fs_roce_rx_rule_setup(struct mlx5_core_dev *mdev, roce->rule = rule; memset(spec, 0, sizeof(*spec)); + if (default_dst->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) + flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL; rule = mlx5_add_flow_rules(roce->ft, spec, &flow_act, default_dst, 1); if (IS_ERR(rule)) { err = PTR_ERR(rule); @@ -178,6 +180,8 @@ ipsec_fs_roce_rx_rule_setup(struct mlx5_core_dev *mdev, goto out; flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + if (default_dst->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) + flow_act.flags &= ~FLOW_ACT_IGNORE_FLOW_LEVEL; dst.type = MLX5_FLOW_DESTINATION_TYPE_TABLE_TYPE; dst.ft = roce->ft_rdma; rule = mlx5_add_flow_rules(roce->nic_master_ft, NULL, &flow_act, &dst, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.c index 762d55ba9e51..e6be2f01daf4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.c @@ -45,11 +45,7 @@ #define MLX5_SECTAG_HEADER_SIZE_WITHOUT_SCI 0x8 #define MLX5_SECTAG_HEADER_SIZE_WITH_SCI (MLX5_SECTAG_HEADER_SIZE_WITHOUT_SCI + MACSEC_SCI_LEN) -/* MACsec RX flow steering */ -#define MLX5_ETH_WQE_FT_META_MACSEC_MASK 0x3E - /* MACsec fs_id handling for steering */ -#define macsec_fs_set_tx_fs_id(fs_id) (MLX5_ETH_WQE_FT_META_MACSEC | (fs_id) << 2) #define macsec_fs_set_rx_fs_id(fs_id) ((fs_id) | BIT(30)) struct mlx5_sectag_header { @@ -597,7 +593,7 @@ static int macsec_fs_tx_setup_fte(struct mlx5_macsec_fs *macsec_fs, MLX5_SET(fte_match_param, spec->match_criteria, misc_parameters_2.metadata_reg_a, MLX5_ETH_WQE_FT_META_MACSEC_MASK); MLX5_SET(fte_match_param, spec->match_value, misc_parameters_2.metadata_reg_a, - macsec_fs_set_tx_fs_id(id)); + MLX5_MACSEC_TX_METADATA(id)); *fs_id = id; flow_act->crypto.type = MLX5_FLOW_CONTEXT_ENCRYPT_DECRYPT_TYPE_MACSEC; @@ -2219,9 +2215,11 @@ static int mlx5_macsec_fs_add_roce_rule_tx(struct mlx5_macsec_fs *macsec_fs, u32 MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET); MLX5_SET(set_action_in, action, field, MLX5_ACTION_IN_FIELD_METADATA_REG_A); - MLX5_SET(set_action_in, action, data, macsec_fs_set_tx_fs_id(fs_id)); - MLX5_SET(set_action_in, action, offset, 0); - MLX5_SET(set_action_in, action, length, 32); + MLX5_SET(set_action_in, action, data, + mlx5_macsec_fs_set_tx_fs_id(fs_id)); + MLX5_SET(set_action_in, action, offset, + MLX5_ETH_WQE_FT_META_MACSEC_SHIFT); + MLX5_SET(set_action_in, action, length, 8); modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_RDMA_TX_MACSEC, 1, action); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.h index 34b80c3ef6a5..15acaff43641 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/macsec_fs.h @@ -12,6 +12,21 @@ #define MLX5_MACSEC_METADATA_MARKER(metadata) ((((metadata) >> 30) & 0x3) == 0x1) #define MLX5_MACSEC_RX_METADAT_HANDLE(metadata) ((metadata) & MLX5_MACSEC_RX_FS_ID_MASK) +/* MACsec TX flow steering */ +#define MLX5_ETH_WQE_FT_META_MACSEC_MASK \ + (MLX5_ETH_WQE_FT_META_MACSEC | MLX5_ETH_WQE_FT_META_MACSEC_FS_ID_MASK) +#define MLX5_ETH_WQE_FT_META_MACSEC_SHIFT MLX5_ETH_WQE_FT_META_SHIFT + +/* MACsec fs_id handling for steering */ +#define mlx5_macsec_fs_set_tx_fs_id(fs_id) \ + (((MLX5_ETH_WQE_FT_META_MACSEC) >> MLX5_ETH_WQE_FT_META_MACSEC_SHIFT) \ + | ((fs_id) << 2)) + +#define MLX5_MACSEC_TX_METADATA(fs_id) \ + (mlx5_macsec_fs_set_tx_fs_id(fs_id) << \ + MLX5_ETH_WQE_FT_META_MACSEC_SHIFT) + +/* MACsec fs_id uses 4 bits, supports up to 16 interfaces */ #define MLX5_MACSEC_NUM_OF_SUPPORTED_INTERFACES 16 struct mlx5_macsec_fs; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h index b111ccd03b02..74ea5da58b7e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mlx5.h @@ -47,7 +47,20 @@ int mlx5_crdump_collect(struct mlx5_core_dev *dev, u32 *cr_data); static inline struct net_device *mlx5_uplink_netdev_get(struct mlx5_core_dev *mdev) { - return mdev->mlx5e_res.uplink_netdev; + struct mlx5e_resources *mlx5e_res = &mdev->mlx5e_res; + struct net_device *netdev; + + mutex_lock(&mlx5e_res->uplink_netdev_lock); + netdev = mlx5e_res->uplink_netdev; + netdev_hold(netdev, &mlx5e_res->tracker, GFP_KERNEL); + mutex_unlock(&mlx5e_res->uplink_netdev_lock); + return netdev; +} + +static inline void mlx5_uplink_netdev_put(struct mlx5_core_dev *mdev, + struct net_device *netdev) +{ + netdev_put(netdev, &mdev->mlx5e_res.tracker); } struct mlx5_sd; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c new file mode 100644 index 000000000000..459a0b4d08e6 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.c @@ -0,0 +1,567 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "nv_param.h" +#include "mlx5_core.h" + +enum { + MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CONF = 0x80, + MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CAP = 0x81, + MLX5_CLASS_0_CTRL_ID_NV_SW_OFFLOAD_CONFIG = 0x10a, + + MLX5_CLASS_3_CTRL_ID_NV_PF_PCI_CONF = 0x80, +}; + +struct mlx5_ifc_configuration_item_type_class_global_bits { + u8 type_class[0x8]; + u8 parameter_index[0x18]; +}; + +struct mlx5_ifc_configuration_item_type_class_per_host_pf_bits { + u8 type_class[0x8]; + u8 pf_index[0x6]; + u8 pci_bus_index[0x8]; + u8 parameter_index[0xa]; +}; + +union mlx5_ifc_config_item_type_auto_bits { + struct mlx5_ifc_configuration_item_type_class_global_bits + configuration_item_type_class_global; + struct mlx5_ifc_configuration_item_type_class_per_host_pf_bits + configuration_item_type_class_per_host_pf; + u8 reserved_at_0[0x20]; +}; + +struct mlx5_ifc_config_item_bits { + u8 valid[0x2]; + u8 priority[0x2]; + u8 header_type[0x2]; + u8 ovr_en[0x1]; + u8 rd_en[0x1]; + u8 access_mode[0x2]; + u8 reserved_at_a[0x1]; + u8 writer_id[0x5]; + u8 version[0x4]; + u8 reserved_at_14[0x2]; + u8 host_id_valid[0x1]; + u8 length[0x9]; + + union mlx5_ifc_config_item_type_auto_bits type; + + u8 reserved_at_40[0x10]; + u8 crc16[0x10]; +}; + +struct mlx5_ifc_mnvda_reg_bits { + struct mlx5_ifc_config_item_bits configuration_item_header; + + u8 configuration_item_data[64][0x20]; +}; + +struct mlx5_ifc_nv_global_pci_conf_bits { + u8 sriov_valid[0x1]; + u8 reserved_at_1[0x10]; + u8 per_pf_total_vf[0x1]; + u8 reserved_at_12[0xe]; + + u8 sriov_en[0x1]; + u8 reserved_at_21[0xf]; + u8 total_vfs[0x10]; + + u8 reserved_at_40[0x20]; +}; + +struct mlx5_ifc_nv_global_pci_cap_bits { + u8 max_vfs_per_pf_valid[0x1]; + u8 reserved_at_1[0x13]; + u8 per_pf_total_vf_supported[0x1]; + u8 reserved_at_15[0xb]; + + u8 sriov_support[0x1]; + u8 reserved_at_21[0xf]; + u8 max_vfs_per_pf[0x10]; + + u8 reserved_at_40[0x60]; +}; + +struct mlx5_ifc_nv_pf_pci_conf_bits { + u8 reserved_at_0[0x9]; + u8 pf_total_vf_en[0x1]; + u8 reserved_at_a[0x16]; + + u8 reserved_at_20[0x20]; + + u8 reserved_at_40[0x10]; + u8 total_vf[0x10]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_nv_sw_offload_conf_bits { + u8 ip_over_vxlan_port[0x10]; + u8 tunnel_ecn_copy_offload_disable[0x1]; + u8 pci_atomic_mode[0x3]; + u8 sr_enable[0x1]; + u8 ptp_cyc2realtime[0x1]; + u8 vector_calc_disable[0x1]; + u8 uctx_en[0x1]; + u8 prio_tag_required_en[0x1]; + u8 esw_fdb_ipv4_ttl_modify_enable[0x1]; + u8 mkey_by_name[0x1]; + u8 ip_over_vxlan_en[0x1]; + u8 one_qp_per_recovery[0x1]; + u8 cqe_compression[0x3]; + u8 tunnel_udp_entropy_proto_disable[0x1]; + u8 reserved_at_21[0x1]; + u8 ar_enable[0x1]; + u8 log_max_outstanding_wqe[0x5]; + u8 vf_migration[0x2]; + u8 log_tx_psn_win[0x6]; + u8 lro_log_timeout3[0x4]; + u8 lro_log_timeout2[0x4]; + u8 lro_log_timeout1[0x4]; + u8 lro_log_timeout0[0x4]; +}; + +#define MNVDA_HDR_SZ \ + (MLX5_ST_SZ_BYTES(mnvda_reg) - \ + MLX5_BYTE_OFF(mnvda_reg, configuration_item_data)) + +#define MLX5_SET_CFG_ITEM_TYPE(_cls_name, _mnvda_ptr, _field, _val) \ + MLX5_SET(mnvda_reg, _mnvda_ptr, \ + configuration_item_header.type.configuration_item_type_class_##_cls_name._field, \ + _val) + +#define MLX5_SET_CFG_HDR_LEN(_mnvda_ptr, _cls_name) \ + MLX5_SET(mnvda_reg, _mnvda_ptr, configuration_item_header.length, \ + MLX5_ST_SZ_BYTES(_cls_name)) + +#define MLX5_GET_CFG_HDR_LEN(_mnvda_ptr) \ + MLX5_GET(mnvda_reg, _mnvda_ptr, configuration_item_header.length) + +static int mlx5_nv_param_read(struct mlx5_core_dev *dev, void *mnvda, + size_t len) +{ + u32 param_idx, type_class; + u32 header_len; + void *cls_ptr; + int err; + + if (WARN_ON(len > MLX5_ST_SZ_BYTES(mnvda_reg)) || len < MNVDA_HDR_SZ) + return -EINVAL; /* A caller bug */ + + err = mlx5_core_access_reg(dev, mnvda, len, mnvda, len, MLX5_REG_MNVDA, + 0, 0); + if (!err) + return 0; + + cls_ptr = MLX5_ADDR_OF(mnvda_reg, mnvda, + configuration_item_header.type.configuration_item_type_class_global); + + type_class = MLX5_GET(configuration_item_type_class_global, cls_ptr, + type_class); + param_idx = MLX5_GET(configuration_item_type_class_global, cls_ptr, + parameter_index); + header_len = MLX5_GET_CFG_HDR_LEN(mnvda); + + mlx5_core_warn(dev, "Failed to read mnvda reg: type_class 0x%x, param_idx 0x%x, header_len %u, err %d\n", + type_class, param_idx, header_len, err); + + return -EOPNOTSUPP; +} + +static int mlx5_nv_param_write(struct mlx5_core_dev *dev, void *mnvda, + size_t len) +{ + if (WARN_ON(len > MLX5_ST_SZ_BYTES(mnvda_reg)) || len < MNVDA_HDR_SZ) + return -EINVAL; + + if (WARN_ON(MLX5_GET_CFG_HDR_LEN(mnvda) == 0)) + return -EINVAL; + + return mlx5_core_access_reg(dev, mnvda, len, mnvda, len, MLX5_REG_MNVDA, + 0, 1); +} + +static int +mlx5_nv_param_read_sw_offload_conf(struct mlx5_core_dev *dev, void *mnvda, + size_t len) +{ + MLX5_SET_CFG_ITEM_TYPE(global, mnvda, type_class, 0); + MLX5_SET_CFG_ITEM_TYPE(global, mnvda, parameter_index, + MLX5_CLASS_0_CTRL_ID_NV_SW_OFFLOAD_CONFIG); + MLX5_SET_CFG_HDR_LEN(mnvda, nv_sw_offload_conf); + + return mlx5_nv_param_read(dev, mnvda, len); +} + +static const char *const + cqe_compress_str[] = { "balanced", "aggressive" }; + +static int +mlx5_nv_param_devlink_cqe_compress_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {}; + u8 value = U8_MAX; + void *data; + int err; + + err = mlx5_nv_param_read_sw_offload_conf(dev, mnvda, sizeof(mnvda)); + if (err) + return err; + + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + value = MLX5_GET(nv_sw_offload_conf, data, cqe_compression); + + if (value >= ARRAY_SIZE(cqe_compress_str)) + return -EOPNOTSUPP; + + strscpy(ctx->val.vstr, cqe_compress_str[value], sizeof(ctx->val.vstr)); + return 0; +} + +static int +mlx5_nv_param_devlink_cqe_compress_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(cqe_compress_str); i++) { + if (!strcmp(val.vstr, cqe_compress_str[i])) + return 0; + } + + NL_SET_ERR_MSG_MOD(extack, + "Invalid value, supported values are balanced/aggressive"); + return -EOPNOTSUPP; +} + +static int +mlx5_nv_param_devlink_cqe_compress_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {}; + int err = 0; + void *data; + u8 value; + + if (!strcmp(ctx->val.vstr, "aggressive")) + value = 1; + else /* balanced: can't be anything else already validated above */ + value = 0; + + err = mlx5_nv_param_read_sw_offload_conf(dev, mnvda, sizeof(mnvda)); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to read sw_offload_conf mnvda reg"); + return err; + } + + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + MLX5_SET(nv_sw_offload_conf, data, cqe_compression, value); + + return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda)); +} + +static int mlx5_nv_param_read_global_pci_conf(struct mlx5_core_dev *dev, + void *mnvda, size_t len) +{ + MLX5_SET_CFG_ITEM_TYPE(global, mnvda, type_class, 0); + MLX5_SET_CFG_ITEM_TYPE(global, mnvda, parameter_index, + MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CONF); + MLX5_SET_CFG_HDR_LEN(mnvda, nv_global_pci_conf); + + return mlx5_nv_param_read(dev, mnvda, len); +} + +static int mlx5_nv_param_read_global_pci_cap(struct mlx5_core_dev *dev, + void *mnvda, size_t len) +{ + MLX5_SET_CFG_ITEM_TYPE(global, mnvda, type_class, 0); + MLX5_SET_CFG_ITEM_TYPE(global, mnvda, parameter_index, + MLX5_CLASS_0_CTRL_ID_NV_GLOBAL_PCI_CAP); + MLX5_SET_CFG_HDR_LEN(mnvda, nv_global_pci_cap); + + return mlx5_nv_param_read(dev, mnvda, len); +} + +static int mlx5_nv_param_read_per_host_pf_conf(struct mlx5_core_dev *dev, + void *mnvda, size_t len) +{ + MLX5_SET_CFG_ITEM_TYPE(per_host_pf, mnvda, type_class, 3); + MLX5_SET_CFG_ITEM_TYPE(per_host_pf, mnvda, parameter_index, + MLX5_CLASS_3_CTRL_ID_NV_PF_PCI_CONF); + MLX5_SET_CFG_HDR_LEN(mnvda, nv_pf_pci_conf); + + return mlx5_nv_param_read(dev, mnvda, len); +} + +static int mlx5_devlink_enable_sriov_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {}; + bool sriov_en = false; + void *data; + int err; + + err = mlx5_nv_param_read_global_pci_cap(dev, mnvda, sizeof(mnvda)); + if (err) + return err; + + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + if (!MLX5_GET(nv_global_pci_cap, data, sriov_support)) { + ctx->val.vbool = false; + return 0; + } + + memset(mnvda, 0, sizeof(mnvda)); + err = mlx5_nv_param_read_global_pci_conf(dev, mnvda, sizeof(mnvda)); + if (err) + return err; + + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + sriov_en = MLX5_GET(nv_global_pci_conf, data, sriov_en); + if (!MLX5_GET(nv_global_pci_conf, data, per_pf_total_vf)) { + ctx->val.vbool = sriov_en; + return 0; + } + + /* SRIOV is per PF */ + memset(mnvda, 0, sizeof(mnvda)); + err = mlx5_nv_param_read_per_host_pf_conf(dev, mnvda, sizeof(mnvda)); + if (err) + return err; + + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + ctx->val.vbool = sriov_en && + MLX5_GET(nv_pf_pci_conf, data, pf_total_vf_en); + return 0; +} + +static int mlx5_devlink_enable_sriov_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {}; + bool per_pf_support; + void *cap, *data; + int err; + + err = mlx5_nv_param_read_global_pci_cap(dev, mnvda, sizeof(mnvda)); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to read global PCI capability"); + return err; + } + + cap = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + per_pf_support = MLX5_GET(nv_global_pci_cap, cap, + per_pf_total_vf_supported); + + if (!MLX5_GET(nv_global_pci_cap, cap, sriov_support)) { + NL_SET_ERR_MSG_MOD(extack, + "SRIOV is not supported on this device"); + return -EOPNOTSUPP; + } + + if (!per_pf_support) { + /* We don't allow global SRIOV setting on per PF devlink */ + NL_SET_ERR_MSG_MOD(extack, + "SRIOV is not per PF on this device"); + return -EOPNOTSUPP; + } + + memset(mnvda, 0, sizeof(mnvda)); + err = mlx5_nv_param_read_global_pci_conf(dev, mnvda, sizeof(mnvda)); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Unable to read global PCI configuration"); + return err; + } + + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + + /* setup per PF sriov mode */ + MLX5_SET(nv_global_pci_conf, data, sriov_valid, 1); + MLX5_SET(nv_global_pci_conf, data, sriov_en, 1); + MLX5_SET(nv_global_pci_conf, data, per_pf_total_vf, 1); + + err = mlx5_nv_param_write(dev, mnvda, sizeof(mnvda)); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Unable to write global PCI configuration"); + return err; + } + + /* enable/disable sriov on this PF */ + memset(mnvda, 0, sizeof(mnvda)); + err = mlx5_nv_param_read_per_host_pf_conf(dev, mnvda, sizeof(mnvda)); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Unable to read per host PF configuration"); + return err; + } + MLX5_SET(nv_pf_pci_conf, data, pf_total_vf_en, ctx->val.vbool); + return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda)); +} + +static int mlx5_devlink_total_vfs_get(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)] = {}; + void *data; + int err; + + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + + err = mlx5_nv_param_read_global_pci_cap(dev, mnvda, sizeof(mnvda)); + if (err) + return err; + + if (!MLX5_GET(nv_global_pci_cap, data, sriov_support)) { + ctx->val.vu32 = 0; + return 0; + } + + memset(mnvda, 0, sizeof(mnvda)); + err = mlx5_nv_param_read_global_pci_conf(dev, mnvda, sizeof(mnvda)); + if (err) + return err; + + if (!MLX5_GET(nv_global_pci_conf, data, per_pf_total_vf)) { + ctx->val.vu32 = MLX5_GET(nv_global_pci_conf, data, total_vfs); + return 0; + } + + /* SRIOV is per PF */ + memset(mnvda, 0, sizeof(mnvda)); + err = mlx5_nv_param_read_per_host_pf_conf(dev, mnvda, sizeof(mnvda)); + if (err) + return err; + + ctx->val.vu32 = MLX5_GET(nv_pf_pci_conf, data, total_vf); + + return 0; +} + +static int mlx5_devlink_total_vfs_set(struct devlink *devlink, u32 id, + struct devlink_param_gset_ctx *ctx, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u32 mnvda[MLX5_ST_SZ_DW(mnvda_reg)]; + void *data; + int err; + + err = mlx5_nv_param_read_global_pci_cap(dev, mnvda, sizeof(mnvda)); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to read global pci cap"); + return err; + } + + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + if (!MLX5_GET(nv_global_pci_cap, data, sriov_support)) { + NL_SET_ERR_MSG_MOD(extack, "Not configurable on this device"); + return -EOPNOTSUPP; + } + + if (!MLX5_GET(nv_global_pci_cap, data, per_pf_total_vf_supported)) { + /* We don't allow global SRIOV setting on per PF devlink */ + NL_SET_ERR_MSG_MOD(extack, + "SRIOV is not per PF on this device"); + return -EOPNOTSUPP; + } + + memset(mnvda, 0, sizeof(mnvda)); + err = mlx5_nv_param_read_global_pci_conf(dev, mnvda, sizeof(mnvda)); + if (err) + return err; + + MLX5_SET(nv_global_pci_conf, data, sriov_valid, 1); + MLX5_SET(nv_global_pci_conf, data, per_pf_total_vf, 1); + + err = mlx5_nv_param_write(dev, mnvda, sizeof(mnvda)); + if (err) + return err; + + memset(mnvda, 0, sizeof(mnvda)); + err = mlx5_nv_param_read_per_host_pf_conf(dev, mnvda, sizeof(mnvda)); + if (err) + return err; + + data = MLX5_ADDR_OF(mnvda_reg, mnvda, configuration_item_data); + MLX5_SET(nv_pf_pci_conf, data, total_vf, ctx->val.vu32); + return mlx5_nv_param_write(dev, mnvda, sizeof(mnvda)); +} + +static int mlx5_devlink_total_vfs_validate(struct devlink *devlink, u32 id, + union devlink_param_value val, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(devlink); + u32 cap[MLX5_ST_SZ_DW(mnvda_reg)]; + void *data; + u16 max; + int err; + + data = MLX5_ADDR_OF(mnvda_reg, cap, configuration_item_data); + + err = mlx5_nv_param_read_global_pci_cap(dev, cap, sizeof(cap)); + if (err) + return err; + + if (!MLX5_GET(nv_global_pci_cap, data, max_vfs_per_pf_valid)) + return 0; /* optimistic, but set might fail later */ + + max = MLX5_GET(nv_global_pci_cap, data, max_vfs_per_pf); + if (val.vu16 > max) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Max allowed by device is %u", max); + return -EINVAL; + } + + return 0; +} + +static const struct devlink_param mlx5_nv_param_devlink_params[] = { + DEVLINK_PARAM_GENERIC(ENABLE_SRIOV, BIT(DEVLINK_PARAM_CMODE_PERMANENT), + mlx5_devlink_enable_sriov_get, + mlx5_devlink_enable_sriov_set, NULL), + DEVLINK_PARAM_GENERIC(TOTAL_VFS, BIT(DEVLINK_PARAM_CMODE_PERMANENT), + mlx5_devlink_total_vfs_get, + mlx5_devlink_total_vfs_set, + mlx5_devlink_total_vfs_validate), + DEVLINK_PARAM_DRIVER(MLX5_DEVLINK_PARAM_ID_CQE_COMPRESSION_TYPE, + "cqe_compress_type", DEVLINK_PARAM_TYPE_STRING, + BIT(DEVLINK_PARAM_CMODE_PERMANENT), + mlx5_nv_param_devlink_cqe_compress_get, + mlx5_nv_param_devlink_cqe_compress_set, + mlx5_nv_param_devlink_cqe_compress_validate), +}; + +int mlx5_nv_param_register_dl_params(struct devlink *devlink) +{ + if (!mlx5_core_is_pf(devlink_priv(devlink))) + return 0; + + return devl_params_register(devlink, mlx5_nv_param_devlink_params, + ARRAY_SIZE(mlx5_nv_param_devlink_params)); +} + +void mlx5_nv_param_unregister_dl_params(struct devlink *devlink) +{ + if (!mlx5_core_is_pf(devlink_priv(devlink))) + return; + + devl_params_unregister(devlink, mlx5_nv_param_devlink_params, + ARRAY_SIZE(mlx5_nv_param_devlink_params)); +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.h new file mode 100644 index 000000000000..9f4922ff7745 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/nv_param.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_NV_PARAM_H +#define __MLX5_NV_PARAM_H + +#include <linux/mlx5/driver.h> +#include "devlink.h" + +int mlx5_nv_param_register_dl_params(struct devlink *devlink); +void mlx5_nv_param_unregister_dl_params(struct devlink *devlink); + +#endif + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c index eeb0b7ea05f1..f5c2701f6e87 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/sd.c @@ -210,13 +210,17 @@ static void sd_cleanup(struct mlx5_core_dev *dev) static int sd_register(struct mlx5_core_dev *dev) { struct mlx5_devcom_comp_dev *devcom, *pos; + struct mlx5_devcom_match_attr attr = {}; struct mlx5_core_dev *peer, *primary; struct mlx5_sd *sd, *primary_sd; int err, i; sd = mlx5_get_sd(dev); + attr.key.val = sd->group_id; + attr.flags = MLX5_DEVCOM_MATCH_FLAGS_NS; + attr.net = mlx5_core_net(dev); devcom = mlx5_devcom_register_component(dev->priv.devc, MLX5_DEVCOM_SD_GROUP, - sd->group_id, NULL, dev); + &attr, NULL, dev); if (IS_ERR(devcom)) return PTR_ERR(devcom); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 8517d4e5d5ef..df93625c9dfa 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -973,36 +973,14 @@ static void mlx5_pci_close(struct mlx5_core_dev *dev) mlx5_pci_disable_device(dev); } -static void mlx5_register_hca_devcom_comp(struct mlx5_core_dev *dev) -{ - /* This component is use to sync adding core_dev to lag_dev and to sync - * changes of mlx5_adev_devices between LAG layer and other layers. - */ - if (!mlx5_lag_is_supported(dev)) - return; - - dev->priv.hca_devcom_comp = - mlx5_devcom_register_component(dev->priv.devc, MLX5_DEVCOM_HCA_PORTS, - mlx5_query_nic_system_image_guid(dev), - NULL, dev); - if (IS_ERR(dev->priv.hca_devcom_comp)) - mlx5_core_err(dev, "Failed to register devcom HCA component\n"); -} - -static void mlx5_unregister_hca_devcom_comp(struct mlx5_core_dev *dev) -{ - mlx5_devcom_unregister_component(dev->priv.hca_devcom_comp); -} - static int mlx5_init_once(struct mlx5_core_dev *dev) { int err; dev->priv.devc = mlx5_devcom_register_device(dev); if (IS_ERR(dev->priv.devc)) - mlx5_core_warn(dev, "failed to register devcom device %ld\n", - PTR_ERR(dev->priv.devc)); - mlx5_register_hca_devcom_comp(dev); + mlx5_core_warn(dev, "failed to register devcom device %pe\n", + dev->priv.devc); err = mlx5_query_board_id(dev); if (err) { @@ -1140,7 +1118,6 @@ err_eq_cleanup: err_irq_cleanup: mlx5_irq_table_cleanup(dev); err_devcom: - mlx5_unregister_hca_devcom_comp(dev); mlx5_devcom_unregister_device(dev->priv.devc); return err; @@ -1171,7 +1148,6 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev) mlx5_events_cleanup(dev); mlx5_eq_table_cleanup(dev); mlx5_irq_table_cleanup(dev); - mlx5_unregister_hca_devcom_comp(dev); mlx5_devcom_unregister_device(dev->priv.devc); } @@ -1340,10 +1316,9 @@ static int mlx5_load(struct mlx5_core_dev *dev) { int err; - dev->priv.uar = mlx5_get_uars_page(dev); - if (IS_ERR(dev->priv.uar)) { - mlx5_core_err(dev, "Failed allocating uar, aborting\n"); - err = PTR_ERR(dev->priv.uar); + err = mlx5_alloc_bfreg(dev, &dev->priv.bfreg, false, false); + if (err) { + mlx5_core_err(dev, "Failed allocating bfreg, %d\n", err); return err; } @@ -1454,7 +1429,7 @@ err_eq_table: err_irq_table: mlx5_pagealloc_stop(dev); mlx5_events_stop(dev); - mlx5_put_uars_page(dev, dev->priv.uar); + mlx5_free_bfreg(dev, &dev->priv.bfreg); return err; } @@ -1479,7 +1454,7 @@ static void mlx5_unload(struct mlx5_core_dev *dev) mlx5_irq_table_destroy(dev); mlx5_pagealloc_stop(dev); mlx5_events_stop(dev); - mlx5_put_uars_page(dev, dev->priv.uar); + mlx5_free_bfreg(dev, &dev->priv.bfreg); } int mlx5_init_one_devl_locked(struct mlx5_core_dev *dev) @@ -1798,6 +1773,7 @@ static const int types[] = { MLX5_CAP_VDPA_EMULATION, MLX5_CAP_IPSEC, MLX5_CAP_PORT_SELECTION, + MLX5_CAP_PSP, MLX5_CAP_MACSEC, MLX5_CAP_ADV_VIRTUALIZATION, MLX5_CAP_CRYPTO, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 9d3504f5abfa..082259b56816 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -449,8 +449,6 @@ int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap #define mlx5_vport_get_other_func_general_cap(dev, vport, out) \ mlx5_vport_get_other_func_cap(dev, vport, out, MLX5_CAP_GENERAL) -int mlx5_vport_get_vhca_id(struct mlx5_core_dev *dev, u16 vport, u16 *vhca_id); - static inline u32 mlx5_sriov_get_vf_total_msix(struct pci_dev *pdev) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 9bc9bd83c232..cd68c4b2c0bf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -489,9 +489,12 @@ static int reclaim_pages_cmd(struct mlx5_core_dev *dev, u32 func_id; u32 npages; u32 i = 0; + int err; - if (!mlx5_cmd_is_down(dev)) - return mlx5_cmd_do(dev, in, in_size, out, out_size); + err = mlx5_cmd_do(dev, in, in_size, out, out_size); + /* If FW is gone (-ENXIO), proceed to forceful reclaim */ + if (err != -ENXIO) + return err; /* No hard feelings, we want our pages back! */ npages = MLX5_GET(manage_pages_in, in, input_num_entries); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 692ef9c2f729..e18a850c615c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -54,7 +54,7 @@ static int mlx5_core_func_to_vport(const struct mlx5_core_dev *dev, /** * mlx5_get_default_msix_vec_count - Get the default number of MSI-X vectors - * to be ssigned to each VF. + * to be assigned to each VF. * @dev: PF to work on * @num_vfs: Number of enabled VFs */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 2d7adf7444ba..aa9f2b0a77d3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -1170,7 +1170,11 @@ const struct mlx5_link_info *mlx5_port_ptys2info(struct mlx5_core_dev *mdev, mlx5e_port_get_link_mode_info_arr(mdev, &table, &max_size, force_legacy); i = find_first_bit(&temp, max_size); - if (i < max_size) + + /* mlx5e_link_info has holes. Check speed + * is not zero as indication of one. + */ + if (i < max_size && table[i].speed) return &table[i]; return NULL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/diag/dev_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/diag/dev_tracepoint.h index 0537de86f981..9b0f44253f33 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/diag/dev_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/diag/dev_tracepoint.h @@ -28,7 +28,7 @@ DECLARE_EVENT_CLASS(mlx5_sf_dev_template, __entry->hw_fn_id = sfdev->fn_id; __entry->sfnum = sfdev->sfnum; ), - TP_printk("(%s) sfdev=%pK aux_id=%d hw_id=0x%x sfnum=%u\n", + TP_printk("(%s) sfdev=%p aux_id=%d hw_id=0x%x sfnum=%u\n", __get_str(devname), __entry->sfdev, __entry->aux_id, __entry->hw_fn_id, __entry->sfnum) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c index 0864ba625c07..3304f25cc805 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/devlink.c @@ -518,3 +518,13 @@ void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev) WARN_ON(!xa_empty(&table->function_ids)); kfree(table); } + +bool mlx5_sf_table_empty(const struct mlx5_core_dev *dev) +{ + struct mlx5_sf_table *table = dev->priv.sf_table; + + if (!table) + return true; + + return xa_empty(&table->function_ids); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h index 860f9ddb7107..89559a37997a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/sf.h @@ -17,6 +17,7 @@ void mlx5_sf_hw_table_destroy(struct mlx5_core_dev *dev); int mlx5_sf_table_init(struct mlx5_core_dev *dev); void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev); +bool mlx5_sf_table_empty(const struct mlx5_core_dev *dev); int mlx5_devlink_sf_port_new(struct devlink *devlink, const struct devlink_port_new_attrs *add_attr, @@ -61,6 +62,11 @@ static inline void mlx5_sf_table_cleanup(struct mlx5_core_dev *dev) { } +static inline bool mlx5_sf_table_empty(const struct mlx5_core_dev *dev) +{ + return true; +} + #endif #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c index 396804369b00..fe56b59e24c5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/action.c @@ -117,7 +117,7 @@ static int hws_action_get_shared_stc_nic(struct mlx5hws_context *ctx, mlx5hws_err(ctx, "No such stc_type: %d\n", stc_type); pr_warn("HWS: Invalid stc_type: %d\n", stc_type); ret = -EINVAL; - goto unlock_and_out; + goto free_shared_stc; } ret = mlx5hws_action_alloc_single_stc(ctx, &stc_attr, tbl_type, @@ -1360,7 +1360,7 @@ free_action: struct mlx5hws_action * mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, size_t num_dest, struct mlx5hws_action_dest_attr *dests, - bool ignore_flow_level, u32 flags) + u32 flags) { struct mlx5hws_cmd_set_fte_dest *dest_list = NULL; struct mlx5hws_cmd_ft_create_attr ft_attr = {0}; @@ -1397,7 +1397,7 @@ mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, size_t num_dest, MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dest_list[i].destination_id = dests[i].dest->dest_obj.obj_id; fte_attr.action_flags |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - fte_attr.ignore_flow_level = ignore_flow_level; + fte_attr.ignore_flow_level = 1; if (dests[i].is_wire_ft) last_dest_idx = i; break; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c index adeccc588e5d..6ef0c4be27e1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.c @@ -51,9 +51,6 @@ static void hws_bwc_matcher_init_attr(struct mlx5hws_bwc_matcher *bwc_matcher, u8 size_log_rx, u8 size_log_tx, struct mlx5hws_matcher_attr *attr) { - struct mlx5hws_bwc_matcher *first_matcher = - bwc_matcher->complex_first_bwc_matcher; - memset(attr, 0, sizeof(*attr)); attr->priority = priority; @@ -66,9 +63,6 @@ static void hws_bwc_matcher_init_attr(struct mlx5hws_bwc_matcher *bwc_matcher, attr->size[MLX5HWS_MATCHER_SIZE_TYPE_TX].rule.num_log = size_log_tx; attr->resizable = true; attr->max_num_of_at_attach = MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM; - - attr->isolated_matcher_end_ft_id = - first_matcher ? first_matcher->matcher->end_ft_id : 0; } static int @@ -171,10 +165,16 @@ hws_bwc_matcher_move_all_simple(struct mlx5hws_bwc_matcher *bwc_matcher) static int hws_bwc_matcher_move_all(struct mlx5hws_bwc_matcher *bwc_matcher) { - if (!bwc_matcher->complex) + switch (bwc_matcher->matcher_type) { + case MLX5HWS_BWC_MATCHER_SIMPLE: return hws_bwc_matcher_move_all_simple(bwc_matcher); - - return mlx5hws_bwc_matcher_move_all_complex(bwc_matcher); + case MLX5HWS_BWC_MATCHER_COMPLEX_FIRST: + return mlx5hws_bwc_matcher_complex_move_first(bwc_matcher); + case MLX5HWS_BWC_MATCHER_COMPLEX_SUBMATCHER: + return mlx5hws_bwc_matcher_complex_move(bwc_matcher); + default: + return -EINVAL; + } } static int hws_bwc_matcher_move(struct mlx5hws_bwc_matcher *bwc_matcher) @@ -249,6 +249,7 @@ int mlx5hws_bwc_matcher_create_simple(struct mlx5hws_bwc_matcher *bwc_matcher, bwc_matcher->tx_size.size_log, &attr); + bwc_matcher->matcher_type = MLX5HWS_BWC_MATCHER_SIMPLE; bwc_matcher->priority = priority; bwc_matcher->size_of_at_array = MLX5HWS_BWC_MATCHER_ATTACH_AT_NUM; @@ -393,7 +394,7 @@ int mlx5hws_bwc_matcher_destroy(struct mlx5hws_bwc_matcher *bwc_matcher) "BWC matcher destroy: matcher still has %u RX and %u TX rules\n", rx_rules, tx_rules); - if (bwc_matcher->complex) + if (bwc_matcher->matcher_type == MLX5HWS_BWC_MATCHER_COMPLEX_FIRST) mlx5hws_bwc_matcher_destroy_complex(bwc_matcher); else mlx5hws_bwc_matcher_destroy_simple(bwc_matcher); @@ -651,7 +652,8 @@ int mlx5hws_bwc_rule_destroy_simple(struct mlx5hws_bwc_rule *bwc_rule) int mlx5hws_bwc_rule_destroy(struct mlx5hws_bwc_rule *bwc_rule) { - bool is_complex = !!bwc_rule->bwc_matcher->complex; + bool is_complex = bwc_rule->bwc_matcher->matcher_type == + MLX5HWS_BWC_MATCHER_COMPLEX_FIRST; int ret = 0; if (is_complex) @@ -1147,7 +1149,7 @@ mlx5hws_bwc_rule_create(struct mlx5hws_bwc_matcher *bwc_matcher, bwc_queue_idx = hws_bwc_gen_queue_idx(ctx); - if (bwc_matcher->complex) + if (bwc_matcher->matcher_type == MLX5HWS_BWC_MATCHER_COMPLEX_FIRST) ret = mlx5hws_bwc_rule_create_complex(bwc_rule, params, flow_source, @@ -1216,10 +1218,9 @@ int mlx5hws_bwc_rule_action_update(struct mlx5hws_bwc_rule *bwc_rule, return -EINVAL; } - /* For complex rule, the update should happen on the second matcher */ - if (bwc_rule->isolated_bwc_rule) - return hws_bwc_rule_action_update(bwc_rule->isolated_bwc_rule, - rule_actions); - else - return hws_bwc_rule_action_update(bwc_rule, rule_actions); + /* For complex rules, the update should happen on the last subrule. */ + while (bwc_rule->next_subrule) + bwc_rule = bwc_rule->next_subrule; + + return hws_bwc_rule_action_update(bwc_rule, rule_actions); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h index af391d70c14f..b905511f5c53 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc.h @@ -18,6 +18,21 @@ #define MLX5HWS_BWC_POLLING_TIMEOUT 60 +enum mlx5hws_bwc_matcher_type { + /* Standalone bwc matcher. */ + MLX5HWS_BWC_MATCHER_SIMPLE, + /* The first matcher of a complex matcher. When rules are inserted into + * a matcher of this type, they are split into subrules and inserted + * into their corresponding submatchers. + */ + MLX5HWS_BWC_MATCHER_COMPLEX_FIRST, + /* A submatcher that is part of a complex matcher. For most purposes + * these are treated as simple matchers, except when it comes to moving + * rules during resize. + */ + MLX5HWS_BWC_MATCHER_COMPLEX_SUBMATCHER, +}; + struct mlx5hws_bwc_matcher_complex_data; struct mlx5hws_bwc_matcher_size { @@ -31,9 +46,9 @@ struct mlx5hws_bwc_matcher { struct mlx5hws_match_template *mt; struct mlx5hws_action_template **at; struct mlx5hws_bwc_matcher_complex_data *complex; - struct mlx5hws_bwc_matcher *complex_first_bwc_matcher; u8 num_of_at; u8 size_of_at_array; + enum mlx5hws_bwc_matcher_type matcher_type; u32 priority; struct mlx5hws_bwc_matcher_size rx_size; struct mlx5hws_bwc_matcher_size tx_size; @@ -43,8 +58,8 @@ struct mlx5hws_bwc_matcher { struct mlx5hws_bwc_rule { struct mlx5hws_bwc_matcher *bwc_matcher; struct mlx5hws_rule *rule; - struct mlx5hws_bwc_rule *isolated_bwc_rule; - struct mlx5hws_bwc_complex_rule_hash_node *complex_hash_node; + struct mlx5hws_bwc_rule *next_subrule; + struct mlx5hws_bwc_complex_subrule_data *subrule_data; u32 flow_source; u16 bwc_queue_idx; bool skip_rx; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c index 14e79579c719..660630f18ce9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.c @@ -3,25 +3,27 @@ #include "internal.h" -#define HWS_CLEAR_MATCH_PARAM(mask, field) \ - MLX5_SET(fte_match_param, (mask)->match_buf, field, 0) - -#define HWS_SZ_MATCH_PARAM (MLX5_ST_SZ_DW_MATCH_PARAM * 4) - -static const struct rhashtable_params hws_refcount_hash = { - .key_len = sizeof_field(struct mlx5hws_bwc_complex_rule_hash_node, - match_buf), - .key_offset = offsetof(struct mlx5hws_bwc_complex_rule_hash_node, - match_buf), - .head_offset = offsetof(struct mlx5hws_bwc_complex_rule_hash_node, - hash_node), - .automatic_shrinking = true, - .min_size = 1, +/* We chain submatchers by applying three rules on a subrule: modify header (to + * set register C6), jump to table (to the next submatcher) and the mandatory + * last rule. + */ +#define HWS_NUM_CHAIN_ACTIONS 3 + +static const struct rhashtable_params hws_rules_hash_params = { + .key_len = sizeof_field(struct mlx5hws_bwc_complex_subrule_data, + match_tag), + .key_offset = + offsetof(struct mlx5hws_bwc_complex_subrule_data, match_tag), + .head_offset = + offsetof(struct mlx5hws_bwc_complex_subrule_data, hash_node), + .automatic_shrinking = true, .min_size = 1, }; -bool mlx5hws_bwc_match_params_is_complex(struct mlx5hws_context *ctx, - u8 match_criteria_enable, - struct mlx5hws_match_parameters *mask) +static bool +hws_match_params_exceeds_definer(struct mlx5hws_context *ctx, + u8 match_criteria_enable, + struct mlx5hws_match_parameters *mask, + bool allow_jumbo) { struct mlx5hws_definer match_layout = {0}; struct mlx5hws_match_template *mt; @@ -36,11 +38,11 @@ bool mlx5hws_bwc_match_params_is_complex(struct mlx5hws_context *ctx, mask->match_sz, match_criteria_enable); if (!mt) { - mlx5hws_err(ctx, "BWC: failed creating match template\n"); + mlx5hws_err(ctx, "Complex matcher: failed creating match template\n"); return false; } - ret = mlx5hws_definer_calc_layout(ctx, mt, &match_layout); + ret = mlx5hws_definer_calc_layout(ctx, mt, &match_layout, allow_jumbo); if (ret) { /* The only case that we're interested in is E2BIG, * which means that the match parameters need to be @@ -64,825 +66,481 @@ bool mlx5hws_bwc_match_params_is_complex(struct mlx5hws_context *ctx, return is_complex; } -static void -hws_bwc_matcher_complex_params_clear_fld(struct mlx5hws_context *ctx, - enum mlx5hws_definer_fname fname, +bool mlx5hws_bwc_match_params_is_complex(struct mlx5hws_context *ctx, + u8 match_criteria_enable, struct mlx5hws_match_parameters *mask) { - struct mlx5hws_cmd_query_caps *caps = ctx->caps; - - switch (fname) { - case MLX5HWS_DEFINER_FNAME_ETH_TYPE_O: - case MLX5HWS_DEFINER_FNAME_ETH_TYPE_I: - case MLX5HWS_DEFINER_FNAME_ETH_L3_TYPE_O: - case MLX5HWS_DEFINER_FNAME_ETH_L3_TYPE_I: - case MLX5HWS_DEFINER_FNAME_IP_VERSION_O: - case MLX5HWS_DEFINER_FNAME_IP_VERSION_I: - /* Because of the strict requirements for IP address matching - * that require ethtype/ip_version matching as well, don't clear - * these fields - have them in both parts of the complex matcher - */ - break; - case MLX5HWS_DEFINER_FNAME_ETH_SMAC_47_16_O: - HWS_CLEAR_MATCH_PARAM(mask, outer_headers.smac_47_16); - break; - case MLX5HWS_DEFINER_FNAME_ETH_SMAC_47_16_I: - HWS_CLEAR_MATCH_PARAM(mask, inner_headers.smac_47_16); - break; - case MLX5HWS_DEFINER_FNAME_ETH_SMAC_15_0_O: - HWS_CLEAR_MATCH_PARAM(mask, outer_headers.smac_15_0); - break; - case MLX5HWS_DEFINER_FNAME_ETH_SMAC_15_0_I: - HWS_CLEAR_MATCH_PARAM(mask, inner_headers.smac_15_0); - break; - case MLX5HWS_DEFINER_FNAME_ETH_DMAC_47_16_O: - HWS_CLEAR_MATCH_PARAM(mask, outer_headers.dmac_47_16); - break; - case MLX5HWS_DEFINER_FNAME_ETH_DMAC_47_16_I: - HWS_CLEAR_MATCH_PARAM(mask, inner_headers.dmac_47_16); - break; - case MLX5HWS_DEFINER_FNAME_ETH_DMAC_15_0_O: - HWS_CLEAR_MATCH_PARAM(mask, outer_headers.dmac_15_0); - break; - case MLX5HWS_DEFINER_FNAME_ETH_DMAC_15_0_I: - HWS_CLEAR_MATCH_PARAM(mask, inner_headers.dmac_15_0); - break; - case MLX5HWS_DEFINER_FNAME_VLAN_TYPE_O: - HWS_CLEAR_MATCH_PARAM(mask, outer_headers.cvlan_tag); - HWS_CLEAR_MATCH_PARAM(mask, outer_headers.svlan_tag); - break; - case MLX5HWS_DEFINER_FNAME_VLAN_TYPE_I: - HWS_CLEAR_MATCH_PARAM(mask, inner_headers.cvlan_tag); - HWS_CLEAR_MATCH_PARAM(mask, inner_headers.svlan_tag); - break; - case MLX5HWS_DEFINER_FNAME_VLAN_FIRST_PRIO_O: - HWS_CLEAR_MATCH_PARAM(mask, outer_headers.first_prio); - break; - case MLX5HWS_DEFINER_FNAME_VLAN_FIRST_PRIO_I: - HWS_CLEAR_MATCH_PARAM(mask, inner_headers.first_prio); - break; - case MLX5HWS_DEFINER_FNAME_VLAN_CFI_O: - HWS_CLEAR_MATCH_PARAM(mask, outer_headers.first_cfi); - break; - case MLX5HWS_DEFINER_FNAME_VLAN_CFI_I: - HWS_CLEAR_MATCH_PARAM(mask, inner_headers.first_cfi); - break; - case MLX5HWS_DEFINER_FNAME_VLAN_ID_O: - HWS_CLEAR_MATCH_PARAM(mask, outer_headers.first_vid); - break; - case MLX5HWS_DEFINER_FNAME_VLAN_ID_I: - HWS_CLEAR_MATCH_PARAM(mask, inner_headers.first_vid); - break; - case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_TYPE_O: - HWS_CLEAR_MATCH_PARAM(mask, - misc_parameters.outer_second_cvlan_tag); - HWS_CLEAR_MATCH_PARAM(mask, - misc_parameters.outer_second_svlan_tag); - break; - case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_TYPE_I: - HWS_CLEAR_MATCH_PARAM(mask, - misc_parameters.inner_second_cvlan_tag); - HWS_CLEAR_MATCH_PARAM(mask, - misc_parameters.inner_second_svlan_tag); - break; - case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_PRIO_O: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.outer_second_prio); - break; - case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_PRIO_I: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.inner_second_prio); - break; - case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_CFI_O: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.outer_second_cfi); - break; - case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_CFI_I: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.inner_second_cfi); - break; - case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_ID_O: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.outer_second_vid); - break; - case MLX5HWS_DEFINER_FNAME_VLAN_SECOND_ID_I: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.inner_second_vid); - break; - case MLX5HWS_DEFINER_FNAME_IPV4_IHL_O: - HWS_CLEAR_MATCH_PARAM(mask, outer_headers.ipv4_ihl); - break; - case MLX5HWS_DEFINER_FNAME_IPV4_IHL_I: - HWS_CLEAR_MATCH_PARAM(mask, inner_headers.ipv4_ihl); - break; - case MLX5HWS_DEFINER_FNAME_IP_DSCP_O: - HWS_CLEAR_MATCH_PARAM(mask, outer_headers.ip_dscp); - break; - case MLX5HWS_DEFINER_FNAME_IP_DSCP_I: - HWS_CLEAR_MATCH_PARAM(mask, inner_headers.ip_dscp); - break; - case MLX5HWS_DEFINER_FNAME_IP_ECN_O: - HWS_CLEAR_MATCH_PARAM(mask, outer_headers.ip_ecn); - break; - case MLX5HWS_DEFINER_FNAME_IP_ECN_I: - HWS_CLEAR_MATCH_PARAM(mask, inner_headers.ip_ecn); - break; - case MLX5HWS_DEFINER_FNAME_IP_TTL_O: - HWS_CLEAR_MATCH_PARAM(mask, outer_headers.ttl_hoplimit); - break; - case MLX5HWS_DEFINER_FNAME_IP_TTL_I: - HWS_CLEAR_MATCH_PARAM(mask, inner_headers.ttl_hoplimit); - break; - case MLX5HWS_DEFINER_FNAME_IPV4_DST_O: - HWS_CLEAR_MATCH_PARAM(mask, - outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0); - break; - case MLX5HWS_DEFINER_FNAME_IPV4_SRC_O: - HWS_CLEAR_MATCH_PARAM(mask, - outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0); - break; - case MLX5HWS_DEFINER_FNAME_IPV4_DST_I: - HWS_CLEAR_MATCH_PARAM(mask, - inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0); - break; - case MLX5HWS_DEFINER_FNAME_IPV4_SRC_I: - HWS_CLEAR_MATCH_PARAM(mask, - inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0); - break; - case MLX5HWS_DEFINER_FNAME_IP_FRAG_O: - HWS_CLEAR_MATCH_PARAM(mask, outer_headers.frag); - break; - case MLX5HWS_DEFINER_FNAME_IP_FRAG_I: - HWS_CLEAR_MATCH_PARAM(mask, inner_headers.frag); - break; - case MLX5HWS_DEFINER_FNAME_IPV6_FLOW_LABEL_O: - HWS_CLEAR_MATCH_PARAM(mask, - misc_parameters.outer_ipv6_flow_label); - break; - case MLX5HWS_DEFINER_FNAME_IPV6_FLOW_LABEL_I: - HWS_CLEAR_MATCH_PARAM(mask, - misc_parameters.inner_ipv6_flow_label); - break; - case MLX5HWS_DEFINER_FNAME_IPV6_DST_127_96_O: - case MLX5HWS_DEFINER_FNAME_IPV6_DST_95_64_O: - case MLX5HWS_DEFINER_FNAME_IPV6_DST_63_32_O: - case MLX5HWS_DEFINER_FNAME_IPV6_DST_31_0_O: - HWS_CLEAR_MATCH_PARAM(mask, - outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_127_96); - HWS_CLEAR_MATCH_PARAM(mask, - outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_95_64); - HWS_CLEAR_MATCH_PARAM(mask, - outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_63_32); - HWS_CLEAR_MATCH_PARAM(mask, - outer_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0); - break; - case MLX5HWS_DEFINER_FNAME_IPV6_SRC_127_96_O: - case MLX5HWS_DEFINER_FNAME_IPV6_SRC_95_64_O: - case MLX5HWS_DEFINER_FNAME_IPV6_SRC_63_32_O: - case MLX5HWS_DEFINER_FNAME_IPV6_SRC_31_0_O: - HWS_CLEAR_MATCH_PARAM(mask, - outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_127_96); - HWS_CLEAR_MATCH_PARAM(mask, - outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_95_64); - HWS_CLEAR_MATCH_PARAM(mask, - outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_63_32); - HWS_CLEAR_MATCH_PARAM(mask, - outer_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0); - break; - case MLX5HWS_DEFINER_FNAME_IPV6_DST_127_96_I: - case MLX5HWS_DEFINER_FNAME_IPV6_DST_95_64_I: - case MLX5HWS_DEFINER_FNAME_IPV6_DST_63_32_I: - case MLX5HWS_DEFINER_FNAME_IPV6_DST_31_0_I: - HWS_CLEAR_MATCH_PARAM(mask, - inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_127_96); - HWS_CLEAR_MATCH_PARAM(mask, - inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_95_64); - HWS_CLEAR_MATCH_PARAM(mask, - inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_63_32); - HWS_CLEAR_MATCH_PARAM(mask, - inner_headers.dst_ipv4_dst_ipv6.ipv6_simple_layout.ipv6_31_0); - break; - case MLX5HWS_DEFINER_FNAME_IPV6_SRC_127_96_I: - case MLX5HWS_DEFINER_FNAME_IPV6_SRC_95_64_I: - case MLX5HWS_DEFINER_FNAME_IPV6_SRC_63_32_I: - case MLX5HWS_DEFINER_FNAME_IPV6_SRC_31_0_I: - HWS_CLEAR_MATCH_PARAM(mask, - inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_127_96); - HWS_CLEAR_MATCH_PARAM(mask, - inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_95_64); - HWS_CLEAR_MATCH_PARAM(mask, - inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_63_32); - HWS_CLEAR_MATCH_PARAM(mask, - inner_headers.src_ipv4_src_ipv6.ipv6_simple_layout.ipv6_31_0); - break; - case MLX5HWS_DEFINER_FNAME_IP_PROTOCOL_O: - HWS_CLEAR_MATCH_PARAM(mask, outer_headers.ip_protocol); - break; - case MLX5HWS_DEFINER_FNAME_IP_PROTOCOL_I: - HWS_CLEAR_MATCH_PARAM(mask, inner_headers.ip_protocol); - break; - case MLX5HWS_DEFINER_FNAME_L4_SPORT_O: - HWS_CLEAR_MATCH_PARAM(mask, outer_headers.tcp_sport); - HWS_CLEAR_MATCH_PARAM(mask, outer_headers.udp_sport); - break; - case MLX5HWS_DEFINER_FNAME_L4_SPORT_I: - HWS_CLEAR_MATCH_PARAM(mask, inner_headers.tcp_dport); - HWS_CLEAR_MATCH_PARAM(mask, inner_headers.udp_dport); - break; - case MLX5HWS_DEFINER_FNAME_L4_DPORT_O: - HWS_CLEAR_MATCH_PARAM(mask, outer_headers.tcp_dport); - HWS_CLEAR_MATCH_PARAM(mask, outer_headers.udp_dport); - break; - case MLX5HWS_DEFINER_FNAME_L4_DPORT_I: - HWS_CLEAR_MATCH_PARAM(mask, inner_headers.tcp_dport); - HWS_CLEAR_MATCH_PARAM(mask, inner_headers.udp_dport); - break; - case MLX5HWS_DEFINER_FNAME_TCP_FLAGS_O: - HWS_CLEAR_MATCH_PARAM(mask, outer_headers.tcp_flags); - break; - case MLX5HWS_DEFINER_FNAME_TCP_ACK_NUM: - case MLX5HWS_DEFINER_FNAME_TCP_SEQ_NUM: - HWS_CLEAR_MATCH_PARAM(mask, - misc_parameters_3.outer_tcp_seq_num); - HWS_CLEAR_MATCH_PARAM(mask, - misc_parameters_3.outer_tcp_ack_num); - HWS_CLEAR_MATCH_PARAM(mask, - misc_parameters_3.inner_tcp_seq_num); - HWS_CLEAR_MATCH_PARAM(mask, - misc_parameters_3.inner_tcp_ack_num); - break; - case MLX5HWS_DEFINER_FNAME_GTP_TEID: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.gtpu_teid); - break; - case MLX5HWS_DEFINER_FNAME_GTP_MSG_TYPE: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.gtpu_msg_type); - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.gtpu_msg_flags); - break; - case MLX5HWS_DEFINER_FNAME_GTPU_FIRST_EXT_DW0: - HWS_CLEAR_MATCH_PARAM(mask, - misc_parameters_3.gtpu_first_ext_dw_0); - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.gtpu_dw_0); - break; - case MLX5HWS_DEFINER_FNAME_GTPU_DW2: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.gtpu_dw_2); - break; - case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_0: - case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_1: - case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_2: - case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_3: - case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_4: - case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_5: - case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_6: - case MLX5HWS_DEFINER_FNAME_FLEX_PARSER_7: - HWS_CLEAR_MATCH_PARAM(mask, - misc_parameters_2.outer_first_mpls_over_gre); - HWS_CLEAR_MATCH_PARAM(mask, - misc_parameters_2.outer_first_mpls_over_udp); - HWS_CLEAR_MATCH_PARAM(mask, - misc_parameters_3.geneve_tlv_option_0_data); - HWS_CLEAR_MATCH_PARAM(mask, - misc_parameters_4.prog_sample_field_id_0); - HWS_CLEAR_MATCH_PARAM(mask, - misc_parameters_4.prog_sample_field_value_0); - HWS_CLEAR_MATCH_PARAM(mask, - misc_parameters_4.prog_sample_field_value_1); - HWS_CLEAR_MATCH_PARAM(mask, - misc_parameters_4.prog_sample_field_id_2); - HWS_CLEAR_MATCH_PARAM(mask, - misc_parameters_4.prog_sample_field_value_2); - HWS_CLEAR_MATCH_PARAM(mask, - misc_parameters_4.prog_sample_field_id_3); - HWS_CLEAR_MATCH_PARAM(mask, - misc_parameters_4.prog_sample_field_value_3); - break; - case MLX5HWS_DEFINER_FNAME_VXLAN_VNI: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.vxlan_vni); - break; - case MLX5HWS_DEFINER_FNAME_VXLAN_GPE_FLAGS: - HWS_CLEAR_MATCH_PARAM(mask, - misc_parameters_3.outer_vxlan_gpe_flags); - break; - case MLX5HWS_DEFINER_FNAME_VXLAN_GPE_RSVD0: - break; - case MLX5HWS_DEFINER_FNAME_VXLAN_GPE_PROTO: - HWS_CLEAR_MATCH_PARAM(mask, - misc_parameters_3.outer_vxlan_gpe_next_protocol); - break; - case MLX5HWS_DEFINER_FNAME_VXLAN_GPE_VNI: - HWS_CLEAR_MATCH_PARAM(mask, - misc_parameters_3.outer_vxlan_gpe_vni); - break; - case MLX5HWS_DEFINER_FNAME_GENEVE_OPT_LEN: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.geneve_opt_len); - break; - case MLX5HWS_DEFINER_FNAME_GENEVE_OAM: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.geneve_oam); - break; - case MLX5HWS_DEFINER_FNAME_GENEVE_PROTO: - HWS_CLEAR_MATCH_PARAM(mask, - misc_parameters.geneve_protocol_type); - break; - case MLX5HWS_DEFINER_FNAME_GENEVE_VNI: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.geneve_vni); - break; - case MLX5HWS_DEFINER_FNAME_SOURCE_QP: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.source_sqn); - break; - case MLX5HWS_DEFINER_FNAME_SOURCE_GVMI: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.source_port); - HWS_CLEAR_MATCH_PARAM(mask, - misc_parameters.source_eswitch_owner_vhca_id); - break; - case MLX5HWS_DEFINER_FNAME_REG_0: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_c_0); - break; - case MLX5HWS_DEFINER_FNAME_REG_1: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_c_1); - break; - case MLX5HWS_DEFINER_FNAME_REG_2: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_c_2); - break; - case MLX5HWS_DEFINER_FNAME_REG_3: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_c_3); - break; - case MLX5HWS_DEFINER_FNAME_REG_4: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_c_4); - break; - case MLX5HWS_DEFINER_FNAME_REG_5: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_c_5); - break; - case MLX5HWS_DEFINER_FNAME_REG_7: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_c_7); - break; - case MLX5HWS_DEFINER_FNAME_REG_A: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.metadata_reg_a); - break; - case MLX5HWS_DEFINER_FNAME_GRE_C: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.gre_c_present); - break; - case MLX5HWS_DEFINER_FNAME_GRE_K: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.gre_k_present); - break; - case MLX5HWS_DEFINER_FNAME_GRE_S: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.gre_s_present); - break; - case MLX5HWS_DEFINER_FNAME_GRE_PROTOCOL: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.gre_protocol); - break; - case MLX5HWS_DEFINER_FNAME_GRE_OPT_KEY: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters.gre_key.key); - break; - case MLX5HWS_DEFINER_FNAME_ICMP_DW1: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.icmp_header_data); - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.icmp_type); - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.icmp_code); - HWS_CLEAR_MATCH_PARAM(mask, - misc_parameters_3.icmpv6_header_data); - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.icmpv6_type); - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_3.icmpv6_code); - break; - case MLX5HWS_DEFINER_FNAME_MPLS0_O: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.outer_first_mpls); - break; - case MLX5HWS_DEFINER_FNAME_MPLS0_I: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_2.inner_first_mpls); - break; - case MLX5HWS_DEFINER_FNAME_TNL_HDR_0: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_5.tunnel_header_0); - break; - case MLX5HWS_DEFINER_FNAME_TNL_HDR_1: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_5.tunnel_header_1); - break; - case MLX5HWS_DEFINER_FNAME_TNL_HDR_2: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_5.tunnel_header_2); - break; - case MLX5HWS_DEFINER_FNAME_TNL_HDR_3: - HWS_CLEAR_MATCH_PARAM(mask, misc_parameters_5.tunnel_header_3); - break; - case MLX5HWS_DEFINER_FNAME_FLEX_PARSER0_OK: - case MLX5HWS_DEFINER_FNAME_FLEX_PARSER1_OK: - case MLX5HWS_DEFINER_FNAME_FLEX_PARSER2_OK: - case MLX5HWS_DEFINER_FNAME_FLEX_PARSER3_OK: - case MLX5HWS_DEFINER_FNAME_FLEX_PARSER4_OK: - case MLX5HWS_DEFINER_FNAME_FLEX_PARSER5_OK: - case MLX5HWS_DEFINER_FNAME_FLEX_PARSER6_OK: - case MLX5HWS_DEFINER_FNAME_FLEX_PARSER7_OK: - /* assuming this is flex parser for geneve option */ - if ((fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER0_OK && - ctx->caps->flex_parser_id_geneve_tlv_option_0 != 0) || - (fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER1_OK && - ctx->caps->flex_parser_id_geneve_tlv_option_0 != 1) || - (fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER2_OK && - ctx->caps->flex_parser_id_geneve_tlv_option_0 != 2) || - (fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER3_OK && - ctx->caps->flex_parser_id_geneve_tlv_option_0 != 3) || - (fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER4_OK && - ctx->caps->flex_parser_id_geneve_tlv_option_0 != 4) || - (fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER5_OK && - ctx->caps->flex_parser_id_geneve_tlv_option_0 != 5) || - (fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER6_OK && - ctx->caps->flex_parser_id_geneve_tlv_option_0 != 6) || - (fname == MLX5HWS_DEFINER_FNAME_FLEX_PARSER7_OK && - ctx->caps->flex_parser_id_geneve_tlv_option_0 != 7)) { - mlx5hws_err(ctx, - "Complex params: unsupported field %s (%d), flex parser ID for geneve is %d\n", - mlx5hws_definer_fname_to_str(fname), fname, - caps->flex_parser_id_geneve_tlv_option_0); - break; - } - HWS_CLEAR_MATCH_PARAM(mask, - misc_parameters.geneve_tlv_option_0_exist); - break; - case MLX5HWS_DEFINER_FNAME_REG_6: - default: - mlx5hws_err(ctx, "Complex params: unsupported field %s (%d)\n", - mlx5hws_definer_fname_to_str(fname), fname); - break; - } + return hws_match_params_exceeds_definer(ctx, match_criteria_enable, + mask, true); } -static bool -hws_bwc_matcher_complex_params_comb_is_valid(struct mlx5hws_definer_fc *fc, - int fc_sz, - u32 combination_num) +static int +hws_get_last_set_dword_idx(const struct mlx5hws_match_parameters *mask) { - bool m1[MLX5HWS_DEFINER_FNAME_MAX] = {0}; - bool m2[MLX5HWS_DEFINER_FNAME_MAX] = {0}; - bool is_first_matcher; int i; - for (i = 0; i < fc_sz; i++) { - is_first_matcher = !(combination_num & BIT(i)); - if (is_first_matcher) - m1[fc[i].fname] = true; - else - m2[fc[i].fname] = true; - } - - /* Not all the fields can be split into separate matchers. - * Some should be together on the same matcher. - * For example, IPv6 parts - the whole IPv6 address should be on the - * same matcher in order for us to deduce if it's IPv6 or IPv4 address. - */ - if (m1[MLX5HWS_DEFINER_FNAME_IP_FRAG_O] && - (m2[MLX5HWS_DEFINER_FNAME_ETH_SMAC_15_0_O] || - m2[MLX5HWS_DEFINER_FNAME_ETH_SMAC_47_16_O] || - m2[MLX5HWS_DEFINER_FNAME_ETH_DMAC_15_0_O] || - m2[MLX5HWS_DEFINER_FNAME_ETH_DMAC_47_16_O])) - return false; - - if (m2[MLX5HWS_DEFINER_FNAME_IP_FRAG_O] && - (m1[MLX5HWS_DEFINER_FNAME_ETH_SMAC_15_0_O] || - m1[MLX5HWS_DEFINER_FNAME_ETH_SMAC_47_16_O] || - m1[MLX5HWS_DEFINER_FNAME_ETH_DMAC_15_0_O] || - m1[MLX5HWS_DEFINER_FNAME_ETH_DMAC_47_16_O])) - return false; + for (i = mask->match_sz / 4 - 1; i >= 0; i--) + if (mask->match_buf[i]) + return i; - if (m1[MLX5HWS_DEFINER_FNAME_IP_FRAG_I] && - (m2[MLX5HWS_DEFINER_FNAME_ETH_SMAC_47_16_I] || - m2[MLX5HWS_DEFINER_FNAME_ETH_SMAC_15_0_I] || - m2[MLX5HWS_DEFINER_FNAME_ETH_DMAC_47_16_I] || - m2[MLX5HWS_DEFINER_FNAME_ETH_DMAC_15_0_I])) - return false; + return -1; +} - if (m2[MLX5HWS_DEFINER_FNAME_IP_FRAG_I] && - (m1[MLX5HWS_DEFINER_FNAME_ETH_SMAC_47_16_I] || - m1[MLX5HWS_DEFINER_FNAME_ETH_SMAC_15_0_I] || - m1[MLX5HWS_DEFINER_FNAME_ETH_DMAC_47_16_I] || - m1[MLX5HWS_DEFINER_FNAME_ETH_DMAC_15_0_I])) - return false; +static bool hws_match_mask_is_empty(const struct mlx5hws_match_parameters *mask) +{ + return hws_get_last_set_dword_idx(mask) == -1; +} - /* Don't split outer IPv6 dest address. */ - if ((m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_127_96_O] || - m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_95_64_O] || - m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_63_32_O] || - m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_31_0_O]) && - (m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_127_96_O] || - m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_95_64_O] || - m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_63_32_O] || - m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_31_0_O])) - return false; +static bool hws_dword_is_inner_ipaddr_off(int dword_off) +{ + /* IPv4 and IPv6 addresses share the same entry via a union, and the + * source and dest addresses are contiguous in the fte_match_param. So + * we need to check 8 words. + */ + static const int inner_ip_dword_off = + __mlx5_dw_off(fte_match_param, inner_headers.src_ipv4_src_ipv6); - /* Don't split outer IPv6 source address. */ - if ((m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_127_96_O] || - m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_95_64_O] || - m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_63_32_O] || - m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_31_0_O]) && - (m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_127_96_O] || - m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_95_64_O] || - m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_63_32_O] || - m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_31_0_O])) - return false; + return dword_off >= inner_ip_dword_off && + dword_off < inner_ip_dword_off + 8; +} - /* Don't split inner IPv6 dest address. */ - if ((m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_127_96_I] || - m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_95_64_I] || - m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_63_32_I] || - m1[MLX5HWS_DEFINER_FNAME_IPV6_DST_31_0_I]) && - (m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_127_96_I] || - m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_95_64_I] || - m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_63_32_I] || - m2[MLX5HWS_DEFINER_FNAME_IPV6_DST_31_0_I])) - return false; +static bool hws_dword_is_outer_ipaddr_off(int dword_off) +{ + static const int outer_ip_dword_off = + __mlx5_dw_off(fte_match_param, outer_headers.src_ipv4_src_ipv6); - /* Don't split inner IPv6 source address. */ - if ((m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_127_96_I] || - m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_95_64_I] || - m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_63_32_I] || - m1[MLX5HWS_DEFINER_FNAME_IPV6_SRC_31_0_I]) && - (m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_127_96_I] || - m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_95_64_I] || - m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_63_32_I] || - m2[MLX5HWS_DEFINER_FNAME_IPV6_SRC_31_0_I])) - return false; + return dword_off >= outer_ip_dword_off && + dword_off < outer_ip_dword_off + 8; +} - /* Don't split GRE parameters. */ - if ((m1[MLX5HWS_DEFINER_FNAME_GRE_C] || - m1[MLX5HWS_DEFINER_FNAME_GRE_K] || - m1[MLX5HWS_DEFINER_FNAME_GRE_S] || - m1[MLX5HWS_DEFINER_FNAME_GRE_PROTOCOL]) && - (m2[MLX5HWS_DEFINER_FNAME_GRE_C] || - m2[MLX5HWS_DEFINER_FNAME_GRE_K] || - m2[MLX5HWS_DEFINER_FNAME_GRE_S] || - m2[MLX5HWS_DEFINER_FNAME_GRE_PROTOCOL])) - return false; +static void hws_add_dword_to_mask(struct mlx5hws_match_parameters *mask, + const struct mlx5hws_match_parameters *orig, + int dword_idx, bool *added_inner_ipv, + bool *added_outer_ipv) +{ + mask->match_buf[dword_idx] |= orig->match_buf[dword_idx]; - /* Don't split TCP ack/seq numbers. */ - if ((m1[MLX5HWS_DEFINER_FNAME_TCP_ACK_NUM] || - m1[MLX5HWS_DEFINER_FNAME_TCP_SEQ_NUM]) && - (m2[MLX5HWS_DEFINER_FNAME_TCP_ACK_NUM] || - m2[MLX5HWS_DEFINER_FNAME_TCP_SEQ_NUM])) - return false; + *added_inner_ipv = false; + *added_outer_ipv = false; - /* Don't split flex parser. */ - if ((m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_0] || - m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_1] || - m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_2] || - m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_3] || - m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_4] || - m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_5] || - m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_6] || - m1[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_7]) && - (m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_0] || - m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_1] || - m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_2] || - m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_3] || - m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_4] || - m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_5] || - m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_6] || - m2[MLX5HWS_DEFINER_FNAME_FLEX_PARSER_7])) - return false; + /* Any IP address fragment must be accompanied by a match on IP version. + * Use the `added_ipv` variables to keep track if we added IP versions + * specifically for this dword, so that we can roll them back if the + * match params become too large to fit into a definer. + */ + if (hws_dword_is_inner_ipaddr_off(dword_idx) && + !MLX5_GET(fte_match_param, mask->match_buf, + inner_headers.ip_version)) { + MLX5_SET(fte_match_param, mask->match_buf, + inner_headers.ip_version, 0xf); + *added_inner_ipv = true; + } + if (hws_dword_is_outer_ipaddr_off(dword_idx) && + !MLX5_GET(fte_match_param, mask->match_buf, + outer_headers.ip_version)) { + MLX5_SET(fte_match_param, mask->match_buf, + outer_headers.ip_version, 0xf); + *added_outer_ipv = true; + } +} - return true; +static void hws_remove_dword_from_mask(struct mlx5hws_match_parameters *mask, + int dword_idx, bool added_inner_ipv, + bool added_outer_ipv) +{ + mask->match_buf[dword_idx] = 0; + if (added_inner_ipv) + MLX5_SET(fte_match_param, mask->match_buf, + inner_headers.ip_version, 0); + if (added_outer_ipv) + MLX5_SET(fte_match_param, mask->match_buf, + outer_headers.ip_version, 0); } -static void -hws_bwc_matcher_complex_params_comb_create(struct mlx5hws_context *ctx, - struct mlx5hws_match_parameters *m, - struct mlx5hws_match_parameters *m1, - struct mlx5hws_match_parameters *m2, - struct mlx5hws_definer_fc *fc, - int fc_sz, - u32 combination_num) +/* Avoid leaving a single lower dword in `mask` if there are others present in + * `orig`. Splitting IPv6 addresses like this causes them to be interpreted as + * IPv4. + */ +static void hws_avoid_ipv6_split_of(struct mlx5hws_match_parameters *orig, + struct mlx5hws_match_parameters *mask, + int off) { - bool is_first_matcher; - int i; + /* Masks are allocated to a full fte_match_param, but it can't hurt to + * double check. + */ + if (orig->match_sz <= off + 3 || mask->match_sz <= off + 3) + return; - memcpy(m1->match_buf, m->match_buf, m->match_sz); - memcpy(m2->match_buf, m->match_buf, m->match_sz); + /* Lower dword is not set, nothing to do. */ + if (!mask->match_buf[off + 3]) + return; - for (i = 0; i < fc_sz; i++) { - is_first_matcher = !(combination_num & BIT(i)); - hws_bwc_matcher_complex_params_clear_fld(ctx, - fc[i].fname, - is_first_matcher ? - m2 : m1); - } + /* Higher dwords also present in `mask`, no ambiguity. */ + if (mask->match_buf[off] || mask->match_buf[off + 1] || + mask->match_buf[off + 2]) + return; + + /* There are no higher dwords in `orig`, i.e. we match on IPv4. */ + if (!orig->match_buf[off] && !orig->match_buf[off + 1] && + !orig->match_buf[off + 2]) + return; - MLX5_SET(fte_match_param, m2->match_buf, - misc_parameters_2.metadata_reg_c_6, -1); + /* Put the lower dword back in `orig`. It is always safe to do this, the + * dword will just be picked up in the next submask. + */ + orig->match_buf[off + 3] = mask->match_buf[off + 3]; + mask->match_buf[off + 3] = 0; } -static void -hws_bwc_matcher_complex_params_destroy(struct mlx5hws_match_parameters *mask_1, - struct mlx5hws_match_parameters *mask_2) +static void hws_avoid_ipv6_split(struct mlx5hws_match_parameters *orig, + struct mlx5hws_match_parameters *mask) { - kfree(mask_1->match_buf); - kfree(mask_2->match_buf); + hws_avoid_ipv6_split_of(orig, mask, + __mlx5_dw_off(fte_match_param, + outer_headers.src_ipv4_src_ipv6)); + hws_avoid_ipv6_split_of(orig, mask, + __mlx5_dw_off(fte_match_param, + outer_headers.dst_ipv4_dst_ipv6)); + hws_avoid_ipv6_split_of(orig, mask, + __mlx5_dw_off(fte_match_param, + inner_headers.src_ipv4_src_ipv6)); + hws_avoid_ipv6_split_of(orig, mask, + __mlx5_dw_off(fte_match_param, + inner_headers.dst_ipv4_dst_ipv6)); } -static int -hws_bwc_matcher_complex_params_create(struct mlx5hws_context *ctx, - u8 match_criteria, - struct mlx5hws_match_parameters *mask, - struct mlx5hws_match_parameters *mask_1, - struct mlx5hws_match_parameters *mask_2) +/* Build a subset of the `orig` match parameters into `mask`. This subset is + * guaranteed to fit in a single definer an as such is a candidate for being a + * part of a complex matcher. Upon successful execution, the match params that + * go into `mask` are cleared from `orig`. + */ +static int hws_get_simple_params(struct mlx5hws_context *ctx, u8 match_criteria, + struct mlx5hws_match_parameters *orig, + struct mlx5hws_match_parameters *mask) { - struct mlx5hws_definer_fc *fc; - u32 num_of_combinations; - int fc_sz = 0; - int res = 0; - u32 i; - - if (MLX5_GET(fte_match_param, mask->match_buf, - misc_parameters_2.metadata_reg_c_6)) { - mlx5hws_err(ctx, "Complex matcher: REG_C_6 matching is reserved\n"); - res = -EINVAL; - goto out; - } + bool added_inner_ipv, added_outer_ipv; + int dword_idx; + u32 *backup; + int ret; - mask_1->match_buf = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), - GFP_KERNEL); - mask_2->match_buf = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), - GFP_KERNEL); - if (!mask_1->match_buf || !mask_2->match_buf) { - mlx5hws_err(ctx, "Complex matcher: failed to allocate match_param\n"); - res = -ENOMEM; - goto free_params; - } + dword_idx = hws_get_last_set_dword_idx(orig); + /* Nothing to do, we consumed all of the match params before. */ + if (dword_idx == -1) + return 0; - mask_1->match_sz = mask->match_sz; - mask_2->match_sz = mask->match_sz; + backup = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + if (!backup) + return -ENOMEM; - fc = mlx5hws_definer_conv_match_params_to_compressed_fc(ctx, - match_criteria, - mask->match_buf, - &fc_sz); - if (!fc) { - res = -ENOMEM; - goto free_params; - } + while (1) { + dword_idx = hws_get_last_set_dword_idx(orig); + /* Nothing to do, we consumed all of the original match params + * into this subset, which still fits into a single matcher. + */ + if (dword_idx == -1) { + ret = 0; + goto free_backup; + } - if (fc_sz >= sizeof(num_of_combinations) * BITS_PER_BYTE) { - mlx5hws_err(ctx, - "Complex matcher: too many match parameters (%d)\n", - fc_sz); - res = -EINVAL; - goto free_fc; + memcpy(backup, mask->match_buf, mask->match_sz); + + /* Try to add this dword to the current subset. */ + hws_add_dword_to_mask(mask, orig, dword_idx, &added_inner_ipv, + &added_outer_ipv); + + if (hws_match_params_exceeds_definer(ctx, match_criteria, mask, + false)) { + /* We just added a match param that makes the definer + * too large. Revert and return what we had before. + * Note that we can't just zero out the affected fields, + * because it's possible that the dword we're looking at + * wasn't zero before (e.g. it included auto-added + * matches in IP version. This is why we employ the + * rather cumbersome memcpy for backing up. + */ + memcpy(mask->match_buf, backup, mask->match_sz); + /* Possible future improvement: We can't add any more + * dwords, but it may be possible to squeeze in + * individual bytes, as definers have special slots for + * those. + * + * For now, keep the code simple. This results in an + * extra submatcher in some cases, but it's good enough. + */ + ret = 0; + break; + } + + /* The current subset of match params still fits in a single + * definer. Remove the dword from the original mask. + * + * Also remove any explicit match on IP version if we just + * included one here. We will still automatically add it to + * accompany any IP address fragment, but do not need to + * consider it by itself. + */ + hws_remove_dword_from_mask(orig, dword_idx, added_inner_ipv, + added_outer_ipv); } - /* We have list of all the match fields from the match parameter. - * Now try all the possibilities of splitting them into two match - * buffers and look for the supported combination. + /* Make sure we have not picked up a single lower dword of an IPv6 + * address, as the firmware will erroneously treat it as an IPv4 + * address. */ - num_of_combinations = 1 << fc_sz; + hws_avoid_ipv6_split(orig, mask); - /* Start from combination at index 1 - we know that 0 is unsupported */ - for (i = 1; i < num_of_combinations; i++) { - if (!hws_bwc_matcher_complex_params_comb_is_valid(fc, fc_sz, i)) - continue; +free_backup: + kfree(backup); - hws_bwc_matcher_complex_params_comb_create(ctx, - mask, mask_1, mask_2, - fc, fc_sz, i); - /* We now have two separate sets of match params. - * Check if each of them can be used in its own matcher. + return ret; +} + +static int +hws_bwc_matcher_split_mask(struct mlx5hws_context *ctx, u8 match_criteria, + const struct mlx5hws_match_parameters *mask, + struct mlx5hws_match_parameters *submasks, + int *num_submasks) +{ + struct mlx5hws_match_parameters mask_copy; + int ret, i = 0; + + mask_copy.match_sz = MLX5_ST_SZ_BYTES(fte_match_param); + mask_copy.match_buf = kzalloc(mask_copy.match_sz, GFP_KERNEL); + if (!mask_copy.match_buf) + return -ENOMEM; + + memcpy(mask_copy.match_buf, mask->match_buf, mask->match_sz); + + while (!hws_match_mask_is_empty(&mask_copy)) { + if (i >= MLX5HWS_BWC_COMPLEX_MAX_SUBMATCHERS) { + mlx5hws_err(ctx, + "Complex matcher: mask too large for %d matchers\n", + MLX5HWS_BWC_COMPLEX_MAX_SUBMATCHERS); + ret = -E2BIG; + goto free_copy; + } + /* All but the first matcher need to match on register C6 to + * connect pieces of the complex rule together. */ - if (!mlx5hws_bwc_match_params_is_complex(ctx, - match_criteria, - mask_1) && - !mlx5hws_bwc_match_params_is_complex(ctx, - match_criteria, - mask_2)) - break; + if (i > 0) { + MLX5_SET(fte_match_param, submasks[i].match_buf, + misc_parameters_2.metadata_reg_c_6, -1); + match_criteria |= MLX5HWS_DEFINER_MATCH_CRITERIA_MISC2; + } + ret = hws_get_simple_params(ctx, match_criteria, &mask_copy, + &submasks[i]); + if (ret < 0) + goto free_copy; + i++; } - if (i == num_of_combinations) { - /* We've scanned all the combinations, but to no avail */ - mlx5hws_err(ctx, "Complex matcher: couldn't find match params combination\n"); - res = -EINVAL; - goto free_fc; - } + *num_submasks = i; + ret = 0; - kfree(fc); - return 0; +free_copy: + kfree(mask_copy.match_buf); -free_fc: - kfree(fc); -free_params: - hws_bwc_matcher_complex_params_destroy(mask_1, mask_2); -out: - return res; + return ret; } -static int -hws_bwc_isolated_table_create(struct mlx5hws_bwc_matcher *bwc_matcher, - struct mlx5hws_table *table) +static struct mlx5hws_table * +hws_isolated_table_create(const struct mlx5hws_bwc_matcher *cmatcher) { + struct mlx5hws_bwc_complex_submatcher *first_subm; struct mlx5hws_cmd_ft_modify_attr ft_attr = {0}; - struct mlx5hws_context *ctx = table->ctx; struct mlx5hws_table_attr tbl_attr = {0}; - struct mlx5hws_table *isolated_tbl; - int ret = 0; + struct mlx5hws_table *orig_tbl; + struct mlx5hws_context *ctx; + struct mlx5hws_table *tbl; + int ret; - tbl_attr.type = table->type; - tbl_attr.level = table->level; + first_subm = &cmatcher->complex->submatchers[0]; + orig_tbl = first_subm->tbl; + ctx = orig_tbl->ctx; - bwc_matcher->complex->isolated_tbl = - mlx5hws_table_create(ctx, &tbl_attr); - isolated_tbl = bwc_matcher->complex->isolated_tbl; - if (!isolated_tbl) - return -EINVAL; + tbl_attr.type = orig_tbl->type; + tbl_attr.level = orig_tbl->level; + tbl = mlx5hws_table_create(ctx, &tbl_attr); + if (!tbl) + return ERR_PTR(-EINVAL); - /* Set the default miss of the isolated table to - * point to the end anchor of the original matcher. + /* Set the default miss of the isolated table to point + * to the end anchor of the original matcher. */ - mlx5hws_cmd_set_attr_connect_miss_tbl(ctx, - isolated_tbl->fw_ft_type, - isolated_tbl->type, - &ft_attr); - ft_attr.table_miss_id = bwc_matcher->matcher->end_ft_id; - - ret = mlx5hws_cmd_flow_table_modify(ctx->mdev, - &ft_attr, - isolated_tbl->ft_id); + mlx5hws_cmd_set_attr_connect_miss_tbl(ctx, tbl->fw_ft_type, + tbl->type, &ft_attr); + ft_attr.table_miss_id = first_subm->bwc_matcher->matcher->end_ft_id; + + ret = mlx5hws_cmd_flow_table_modify(ctx->mdev, &ft_attr, tbl->ft_id); if (ret) { - mlx5hws_err(ctx, "Failed setting isolated tbl default miss\n"); + mlx5hws_err(ctx, "Complex matcher: failed to set isolated tbl default miss\n"); goto destroy_tbl; } - return 0; + return tbl; destroy_tbl: - mlx5hws_table_destroy(isolated_tbl); - return ret; + mlx5hws_table_destroy(tbl); + + return ERR_PTR(ret); } -static void hws_bwc_isolated_table_destroy(struct mlx5hws_table *isolated_tbl) +static int hws_submatcher_init_first(struct mlx5hws_bwc_matcher *cmatcher, + struct mlx5hws_table *table, u32 priority, + u8 match_criteria, + struct mlx5hws_match_parameters *mask) { - /* This table is isolated - no table is pointing to it, no need to - * disconnect it from anywhere, it won't affect any other table's miss. + enum mlx5hws_action_type action_types[HWS_NUM_CHAIN_ACTIONS]; + struct mlx5hws_bwc_complex_submatcher *subm; + int ret; + + subm = &cmatcher->complex->submatchers[0]; + + /* The first submatcher lives in the original table and does not have an + * associated jump to table action. It also points to the outer complex + * matcher. */ - mlx5hws_table_destroy(isolated_tbl); + subm->tbl = table; + subm->action_tbl = NULL; + subm->bwc_matcher = cmatcher; + + action_types[0] = MLX5HWS_ACTION_TYP_MODIFY_HDR; + action_types[1] = MLX5HWS_ACTION_TYP_TBL; + action_types[2] = MLX5HWS_ACTION_TYP_LAST; + + ret = mlx5hws_bwc_matcher_create_simple(subm->bwc_matcher, subm->tbl, + priority, match_criteria, mask, + action_types); + if (ret) + return ret; + + subm->bwc_matcher->matcher_type = MLX5HWS_BWC_MATCHER_COMPLEX_FIRST; + + ret = rhashtable_init(&subm->rules_hash, &hws_rules_hash_params); + if (ret) + goto destroy_matcher; + mutex_init(&subm->hash_lock); + ida_init(&subm->chain_ida); + + return 0; + +destroy_matcher: + mlx5hws_bwc_matcher_destroy_simple(subm->bwc_matcher); + + return ret; } -static int -hws_bwc_isolated_matcher_create(struct mlx5hws_bwc_matcher *bwc_matcher, - struct mlx5hws_table *table, - u8 match_criteria_enable, - struct mlx5hws_match_parameters *mask) +static int hws_submatcher_init(struct mlx5hws_bwc_matcher *cmatcher, int idx, + struct mlx5hws_table *table, u32 priority, + u8 match_criteria, + struct mlx5hws_match_parameters *mask) { - struct mlx5hws_table *isolated_tbl = bwc_matcher->complex->isolated_tbl; - struct mlx5hws_bwc_matcher *isolated_bwc_matcher; - struct mlx5hws_context *ctx = table->ctx; + enum mlx5hws_action_type action_types[HWS_NUM_CHAIN_ACTIONS]; + struct mlx5hws_bwc_complex_submatcher *subm; + bool is_last; int ret; - isolated_bwc_matcher = kzalloc(sizeof(*bwc_matcher), GFP_KERNEL); - if (!isolated_bwc_matcher) - return -ENOMEM; + if (!idx) + return hws_submatcher_init_first(cmatcher, table, priority, + match_criteria, mask); + + subm = &cmatcher->complex->submatchers[idx]; + is_last = idx == cmatcher->complex->num_submatchers - 1; + + subm->tbl = hws_isolated_table_create(cmatcher); + if (IS_ERR(subm->tbl)) + return PTR_ERR(subm->tbl); + + subm->action_tbl = + mlx5hws_action_create_dest_table(subm->tbl->ctx, subm->tbl, + MLX5HWS_ACTION_FLAG_HWS_FDB); + if (!subm->action_tbl) { + ret = -EINVAL; + goto destroy_tbl; + } + + subm->bwc_matcher = kzalloc(sizeof(*subm->bwc_matcher), GFP_KERNEL); + if (!subm->bwc_matcher) { + ret = -ENOMEM; + goto destroy_action; + } - bwc_matcher->complex->isolated_bwc_matcher = isolated_bwc_matcher; + /* Every matcher other than the first also matches of register C6 to + * bind subrules together in the complex rule using the chain ids. + */ + match_criteria |= MLX5HWS_DEFINER_MATCH_CRITERIA_MISC2; - /* Isolated BWC matcher needs access to the first BWC matcher */ - isolated_bwc_matcher->complex_first_bwc_matcher = bwc_matcher; + action_types[0] = MLX5HWS_ACTION_TYP_MODIFY_HDR; + action_types[1] = MLX5HWS_ACTION_TYP_TBL; + action_types[2] = MLX5HWS_ACTION_TYP_LAST; - /* Isolated matcher needs to match on REG_C_6, - * so make sure its criteria bit is on. + /* Every matcher other than the last sets register C6 and jumps to the + * next submatcher's table. The final submatcher will use the + * user-supplied actions and will attach an action template at rule + * insertion time. */ - match_criteria_enable |= MLX5HWS_DEFINER_MATCH_CRITERIA_MISC2; - - ret = mlx5hws_bwc_matcher_create_simple(isolated_bwc_matcher, - isolated_tbl, - 0, - match_criteria_enable, - mask, - NULL); - if (ret) { - mlx5hws_err(ctx, "Complex matcher: failed creating isolated BWC matcher\n"); + ret = mlx5hws_bwc_matcher_create_simple(subm->bwc_matcher, subm->tbl, + priority, match_criteria, mask, + is_last ? NULL : action_types); + if (ret) goto free_matcher; - } + + subm->bwc_matcher->matcher_type = + MLX5HWS_BWC_MATCHER_COMPLEX_SUBMATCHER; + + ret = rhashtable_init(&subm->rules_hash, &hws_rules_hash_params); + if (ret) + goto destroy_matcher; + mutex_init(&subm->hash_lock); + ida_init(&subm->chain_ida); return 0; +destroy_matcher: + mlx5hws_bwc_matcher_destroy_simple(subm->bwc_matcher); free_matcher: - kfree(bwc_matcher->complex->isolated_bwc_matcher); + kfree(subm->bwc_matcher); +destroy_action: + mlx5hws_action_destroy(subm->action_tbl); +destroy_tbl: + mlx5hws_table_destroy(subm->tbl); + return ret; } -static void -hws_bwc_isolated_matcher_destroy(struct mlx5hws_bwc_matcher *bwc_matcher) +static void hws_submatcher_destroy(struct mlx5hws_bwc_matcher *cmatcher, + int idx) { - mlx5hws_bwc_matcher_destroy_simple(bwc_matcher); - kfree(bwc_matcher); + struct mlx5hws_bwc_complex_submatcher *subm; + + subm = &cmatcher->complex->submatchers[idx]; + + ida_destroy(&subm->chain_ida); + mutex_destroy(&subm->hash_lock); + rhashtable_destroy(&subm->rules_hash); + + if (subm->bwc_matcher) { + mlx5hws_bwc_matcher_destroy_simple(subm->bwc_matcher); + if (idx) + kfree(subm->bwc_matcher); + } + + /* We own all of the isolated tables, but not the original one. */ + if (idx) { + mlx5hws_action_destroy(subm->action_tbl); + mlx5hws_table_destroy(subm->tbl); + } } static int -hws_bwc_isolated_actions_create(struct mlx5hws_bwc_matcher *bwc_matcher, - struct mlx5hws_table *table) +hws_complex_data_actions_init(struct mlx5hws_bwc_matcher_complex_data *cdata) { - struct mlx5hws_table *isolated_tbl = bwc_matcher->complex->isolated_tbl; + struct mlx5hws_context *ctx = cdata->submatchers[0].tbl->ctx; u8 modify_hdr_action[MLX5_ST_SZ_BYTES(set_action_in)] = {0}; - struct mlx5hws_context *ctx = table->ctx; struct mlx5hws_action_mh_pattern ptrn; int ret = 0; - /* Create action to jump to isolated table */ - - bwc_matcher->complex->action_go_to_tbl = - mlx5hws_action_create_dest_table(ctx, - isolated_tbl, - MLX5HWS_ACTION_FLAG_HWS_FDB); - if (!bwc_matcher->complex->action_go_to_tbl) { - mlx5hws_err(ctx, "Complex matcher: failed to create go-to-tbl action\n"); - return -EINVAL; - } - /* Create modify header action to set REG_C_6 */ - MLX5_SET(set_action_in, modify_hdr_action, action_type, MLX5_MODIFICATION_TYPE_SET); MLX5_SET(set_action_in, modify_hdr_action, @@ -895,19 +553,18 @@ hws_bwc_isolated_actions_create(struct mlx5hws_bwc_matcher *bwc_matcher, ptrn.data = (void *)modify_hdr_action; ptrn.sz = MLX5HWS_ACTION_DOUBLE_SIZE; - bwc_matcher->complex->action_metadata = + cdata->action_metadata = mlx5hws_action_create_modify_header(ctx, 1, &ptrn, 0, MLX5HWS_ACTION_FLAG_HWS_FDB); - if (!bwc_matcher->complex->action_metadata) { - ret = -EINVAL; - goto destroy_action_go_to_tbl; + if (!cdata->action_metadata) { + mlx5hws_err(ctx, "Complex matcher: failed to create set reg C6 action\n"); + return -EINVAL; } /* Create last action */ - - bwc_matcher->complex->action_last = + cdata->action_last = mlx5hws_action_create_last(ctx, MLX5HWS_ACTION_FLAG_HWS_FDB); - if (!bwc_matcher->complex->action_last) { + if (!cdata->action_last) { mlx5hws_err(ctx, "Complex matcher: failed to create last action\n"); ret = -EINVAL; goto destroy_action_metadata; @@ -916,196 +573,130 @@ hws_bwc_isolated_actions_create(struct mlx5hws_bwc_matcher *bwc_matcher, return 0; destroy_action_metadata: - mlx5hws_action_destroy(bwc_matcher->complex->action_metadata); -destroy_action_go_to_tbl: - mlx5hws_action_destroy(bwc_matcher->complex->action_go_to_tbl); + mlx5hws_action_destroy(cdata->action_metadata); + return ret; } static void -hws_bwc_isolated_actions_destroy(struct mlx5hws_bwc_matcher *bwc_matcher) +hws_complex_data_actions_destroy(struct mlx5hws_bwc_matcher_complex_data *cdata) { - mlx5hws_action_destroy(bwc_matcher->complex->action_last); - mlx5hws_action_destroy(bwc_matcher->complex->action_metadata); - mlx5hws_action_destroy(bwc_matcher->complex->action_go_to_tbl); + mlx5hws_action_destroy(cdata->action_last); + mlx5hws_action_destroy(cdata->action_metadata); } int mlx5hws_bwc_matcher_create_complex(struct mlx5hws_bwc_matcher *bwc_matcher, struct mlx5hws_table *table, - u32 priority, - u8 match_criteria_enable, + u32 priority, u8 match_criteria_enable, struct mlx5hws_match_parameters *mask) { - enum mlx5hws_action_type complex_init_action_types[3]; - struct mlx5hws_bwc_matcher *isolated_bwc_matcher; - struct mlx5hws_match_parameters mask_1 = {0}; - struct mlx5hws_match_parameters mask_2 = {0}; + struct mlx5hws_match_parameters + submasks[MLX5HWS_BWC_COMPLEX_MAX_SUBMATCHERS] = {0}; + struct mlx5hws_bwc_matcher_complex_data *cdata; struct mlx5hws_context *ctx = table->ctx; - int ret; - - ret = hws_bwc_matcher_complex_params_create(table->ctx, - match_criteria_enable, - mask, &mask_1, &mask_2); - if (ret) - goto err; - - bwc_matcher->complex = - kzalloc(sizeof(*bwc_matcher->complex), GFP_KERNEL); - if (!bwc_matcher->complex) { - ret = -ENOMEM; - goto free_masks; - } + int num_submatchers; + int i, ret; - ret = rhashtable_init(&bwc_matcher->complex->refcount_hash, - &hws_refcount_hash); - if (ret) { - mlx5hws_err(ctx, "Complex matcher: failed to initialize rhashtable\n"); - goto free_complex; + for (i = 0; i < ARRAY_SIZE(submasks); i++) { + submasks[i].match_sz = MLX5_ST_SZ_BYTES(fte_match_param); + submasks[i].match_buf = kzalloc(submasks[i].match_sz, + GFP_KERNEL); + if (!submasks[i].match_buf) { + ret = -ENOMEM; + goto free_submasks; + } } - mutex_init(&bwc_matcher->complex->hash_lock); - ida_init(&bwc_matcher->complex->metadata_ida); - - /* Create initial action template for the first matcher. - * Usually the initial AT is just dummy, but in case of complex - * matcher we know exactly which actions should it have. - */ - - complex_init_action_types[0] = MLX5HWS_ACTION_TYP_MODIFY_HDR; - complex_init_action_types[1] = MLX5HWS_ACTION_TYP_TBL; - complex_init_action_types[2] = MLX5HWS_ACTION_TYP_LAST; - - /* Create the first matcher */ - - ret = mlx5hws_bwc_matcher_create_simple(bwc_matcher, - table, - priority, - match_criteria_enable, - &mask_1, - complex_init_action_types); + ret = hws_bwc_matcher_split_mask(ctx, match_criteria_enable, mask, + submasks, &num_submatchers); if (ret) - goto destroy_ida; - - /* Create isolated table to hold the second isolated matcher */ + goto free_submasks; - ret = hws_bwc_isolated_table_create(bwc_matcher, table); - if (ret) { - mlx5hws_err(ctx, "Complex matcher: failed creating isolated table\n"); - goto destroy_first_matcher; + cdata = kzalloc(sizeof(*cdata), GFP_KERNEL); + if (!cdata) { + ret = -ENOMEM; + goto free_submasks; } - /* Now create the second BWC matcher - the isolated one */ + bwc_matcher->complex = cdata; + cdata->num_submatchers = num_submatchers; - ret = hws_bwc_isolated_matcher_create(bwc_matcher, table, - match_criteria_enable, &mask_2); - if (ret) { - mlx5hws_err(ctx, "Complex matcher: failed creating isolated matcher\n"); - goto destroy_isolated_tbl; + for (i = 0; i < num_submatchers; i++) { + ret = hws_submatcher_init(bwc_matcher, i, table, priority, + match_criteria_enable, &submasks[i]); + if (ret) + goto destroy_submatchers; } - /* Create action for isolated matcher's rules */ - - ret = hws_bwc_isolated_actions_create(bwc_matcher, table); - if (ret) { - mlx5hws_err(ctx, "Complex matcher: failed creating isolated actions\n"); - goto destroy_isolated_matcher; - } + ret = hws_complex_data_actions_init(cdata); + if (ret) + goto destroy_submatchers; - hws_bwc_matcher_complex_params_destroy(&mask_1, &mask_2); - return 0; + ret = 0; + goto free_submasks; -destroy_isolated_matcher: - isolated_bwc_matcher = bwc_matcher->complex->isolated_bwc_matcher; - hws_bwc_isolated_matcher_destroy(isolated_bwc_matcher); -destroy_isolated_tbl: - hws_bwc_isolated_table_destroy(bwc_matcher->complex->isolated_tbl); -destroy_first_matcher: - mlx5hws_bwc_matcher_destroy_simple(bwc_matcher); -destroy_ida: - ida_destroy(&bwc_matcher->complex->metadata_ida); - mutex_destroy(&bwc_matcher->complex->hash_lock); - rhashtable_destroy(&bwc_matcher->complex->refcount_hash); -free_complex: - kfree(bwc_matcher->complex); +destroy_submatchers: + while (i--) + hws_submatcher_destroy(bwc_matcher, i); + kfree(cdata); bwc_matcher->complex = NULL; -free_masks: - hws_bwc_matcher_complex_params_destroy(&mask_1, &mask_2); -err: + +free_submasks: + for (i = 0; i < ARRAY_SIZE(submasks); i++) + kfree(submasks[i].match_buf); + return ret; } void mlx5hws_bwc_matcher_destroy_complex(struct mlx5hws_bwc_matcher *bwc_matcher) { - struct mlx5hws_bwc_matcher *isolated_bwc_matcher = - bwc_matcher->complex->isolated_bwc_matcher; - - hws_bwc_isolated_actions_destroy(bwc_matcher); - hws_bwc_isolated_matcher_destroy(isolated_bwc_matcher); - hws_bwc_isolated_table_destroy(bwc_matcher->complex->isolated_tbl); - mlx5hws_bwc_matcher_destroy_simple(bwc_matcher); - ida_destroy(&bwc_matcher->complex->metadata_ida); - mutex_destroy(&bwc_matcher->complex->hash_lock); - rhashtable_destroy(&bwc_matcher->complex->refcount_hash); + int i; + + hws_complex_data_actions_destroy(bwc_matcher->complex); + for (i = 0; i < bwc_matcher->complex->num_submatchers; i++) + hws_submatcher_destroy(bwc_matcher, i); kfree(bwc_matcher->complex); bwc_matcher->complex = NULL; } -static void -hws_bwc_matcher_complex_hash_lock(struct mlx5hws_bwc_matcher *bwc_matcher) -{ - mutex_lock(&bwc_matcher->complex->hash_lock); -} - -static void -hws_bwc_matcher_complex_hash_unlock(struct mlx5hws_bwc_matcher *bwc_matcher) -{ - mutex_unlock(&bwc_matcher->complex->hash_lock); -} - static int -hws_bwc_rule_complex_hash_node_get(struct mlx5hws_bwc_rule *bwc_rule, - struct mlx5hws_match_parameters *params) +hws_complex_get_subrule_data(struct mlx5hws_bwc_rule *bwc_rule, + struct mlx5hws_bwc_complex_submatcher *subm, + u32 *match_params) +__must_hold(&subm->hash_lock) { - struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; - struct mlx5hws_bwc_complex_rule_hash_node *node, *old_node; - struct rhashtable *refcount_hash; - int ret, i; - - bwc_rule->complex_hash_node = NULL; + struct mlx5hws_bwc_matcher *bwc_matcher = subm->bwc_matcher; + struct mlx5hws_bwc_complex_subrule_data *sr_data, *old_data; + struct mlx5hws_match_template *mt; + int ret; - node = kzalloc(sizeof(*node), GFP_KERNEL); - if (unlikely(!node)) + sr_data = kzalloc(sizeof(*sr_data), GFP_KERNEL); + if (!sr_data) return -ENOMEM; - ret = ida_alloc(&bwc_matcher->complex->metadata_ida, GFP_KERNEL); + ret = ida_alloc(&subm->chain_ida, GFP_KERNEL); if (ret < 0) - goto err_free_node; - node->tag = ret; + goto free_sr_data; + sr_data->chain_id = ret; - refcount_set(&node->refcount, 1); + refcount_set(&sr_data->refcount, 1); - /* Clear match buffer - turn off all the unrelated fields - * in accordance with the match params mask for the first - * matcher out of the two parts of the complex matcher. - * The resulting mask is the key for the hash. - */ - for (i = 0; i < MLX5_ST_SZ_DW_MATCH_PARAM; i++) - node->match_buf[i] = params->match_buf[i] & - bwc_matcher->mt->match_param[i]; - - refcount_hash = &bwc_matcher->complex->refcount_hash; - old_node = rhashtable_lookup_get_insert_fast(refcount_hash, - &node->hash_node, - hws_refcount_hash); - if (IS_ERR(old_node)) { - ret = PTR_ERR(old_node); - goto err_free_ida; + mt = bwc_matcher->matcher->mt; + mlx5hws_definer_create_tag(match_params, mt->fc, mt->fc_sz, + (u8 *)&sr_data->match_tag); + + old_data = rhashtable_lookup_get_insert_fast(&subm->rules_hash, + &sr_data->hash_node, + hws_rules_hash_params); + if (IS_ERR(old_data)) { + ret = PTR_ERR(old_data); + goto free_ida; } - if (old_node) { + if (old_data) { /* Rule with the same tag already exists - update refcount */ - refcount_inc(&old_node->refcount); + refcount_inc(&old_data->refcount); /* Let the new rule use the same tag as the existing rule. * Note that we don't have any indication for the rule creation * process that a rule with similar matching params already @@ -1114,247 +705,281 @@ hws_bwc_rule_complex_hash_node_get(struct mlx5hws_bwc_rule *bwc_rule, * There's some performance advantage in skipping such cases, * so this is left for future optimizations. */ - ida_free(&bwc_matcher->complex->metadata_ida, node->tag); - kfree(node); - node = old_node; + bwc_rule->subrule_data = old_data; + ret = 0; + goto free_ida; } - bwc_rule->complex_hash_node = node; + bwc_rule->subrule_data = sr_data; return 0; -err_free_ida: - ida_free(&bwc_matcher->complex->metadata_ida, node->tag); -err_free_node: - kfree(node); +free_ida: + ida_free(&subm->chain_ida, sr_data->chain_id); +free_sr_data: + kfree(sr_data); + return ret; } static void -hws_bwc_rule_complex_hash_node_put(struct mlx5hws_bwc_rule *bwc_rule, - bool *is_last_rule) +hws_complex_put_subrule_data(struct mlx5hws_bwc_rule *bwc_rule, + struct mlx5hws_bwc_complex_submatcher *subm, + bool *is_last_rule) +__must_hold(&subm->hash_lock) { - struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; - struct mlx5hws_bwc_complex_rule_hash_node *node; + struct mlx5hws_bwc_complex_subrule_data *sr_data; if (is_last_rule) *is_last_rule = false; - node = bwc_rule->complex_hash_node; - if (refcount_dec_and_test(&node->refcount)) { - rhashtable_remove_fast(&bwc_matcher->complex->refcount_hash, - &node->hash_node, - hws_refcount_hash); - ida_free(&bwc_matcher->complex->metadata_ida, node->tag); - kfree(node); + sr_data = bwc_rule->subrule_data; + if (refcount_dec_and_test(&sr_data->refcount)) { + rhashtable_remove_fast(&subm->rules_hash, + &sr_data->hash_node, + hws_rules_hash_params); + ida_free(&subm->chain_ida, sr_data->chain_id); + kfree(sr_data); if (is_last_rule) *is_last_rule = true; } - bwc_rule->complex_hash_node = NULL; + bwc_rule->subrule_data = NULL; } -int mlx5hws_bwc_rule_create_complex(struct mlx5hws_bwc_rule *bwc_rule, - struct mlx5hws_match_parameters *params, - u32 flow_source, - struct mlx5hws_rule_action rule_actions[], - u16 bwc_queue_idx) +static int hws_complex_subrule_create(struct mlx5hws_bwc_matcher *cmatcher, + struct mlx5hws_bwc_rule *subrule, + u32 *match_params, u32 flow_source, + int bwc_queue_idx, int subm_idx, + struct mlx5hws_rule_action *actions, + u32 *chain_id) { - struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; - struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; + struct mlx5hws_rule_action chain_actions[HWS_NUM_CHAIN_ACTIONS] = {0}; u8 modify_hdr_action[MLX5_ST_SZ_BYTES(set_action_in)] = {0}; - struct mlx5hws_rule_action rule_actions_1[3] = {0}; - struct mlx5hws_bwc_matcher *isolated_bwc_matcher; - u32 *match_buf_2; - u32 metadata_val; - int ret = 0; + struct mlx5hws_bwc_matcher_complex_data *cdata; + struct mlx5hws_bwc_complex_submatcher *subm; + int ret; - isolated_bwc_matcher = bwc_matcher->complex->isolated_bwc_matcher; - bwc_rule->isolated_bwc_rule = - mlx5hws_bwc_rule_alloc(isolated_bwc_matcher); - if (unlikely(!bwc_rule->isolated_bwc_rule)) - return -ENOMEM; + cdata = cmatcher->complex; + subm = &cdata->submatchers[subm_idx]; - hws_bwc_matcher_complex_hash_lock(bwc_matcher); + mutex_lock(&subm->hash_lock); - /* Get a new hash node for this complex rule. - * If this is a unique set of match params for the first matcher, - * we will get a new hash node with newly allocated IDA. - * Otherwise we will get an existing node with IDA and updated refcount. - */ - ret = hws_bwc_rule_complex_hash_node_get(bwc_rule, params); - if (unlikely(ret)) { - mlx5hws_err(ctx, "Complex rule: failed getting RHT node for this rule\n"); - goto free_isolated_rule; + ret = hws_complex_get_subrule_data(subrule, subm, match_params); + if (ret) + goto unlock; + + *chain_id = subrule->subrule_data->chain_id; + + if (!actions) { + MLX5_SET(set_action_in, modify_hdr_action, data, *chain_id); + chain_actions[0].action = cdata->action_metadata; + chain_actions[0].modify_header.data = modify_hdr_action; + chain_actions[1].action = + cdata->submatchers[subm_idx + 1].action_tbl; + chain_actions[2].action = cdata->action_last; + actions = chain_actions; } - /* No need to clear match buffer's fields in accordance to what - * will actually be matched on first and second matchers. - * Both matchers were created with the appropriate masks - * and each of them holds the appropriate field copy array, - * so rule creation will use only the fields that will be copied - * in accordance with setters in field copy array. - * We do, however, need to temporary allocate match buffer - * for the second (isolated) rule in order to not modify - * user's match params buffer. - */ - - match_buf_2 = kmemdup(params->match_buf, - MLX5_ST_SZ_BYTES(fte_match_param), - GFP_KERNEL); - if (unlikely(!match_buf_2)) { - mlx5hws_err(ctx, "Complex rule: failed allocating match_buf\n"); - ret = -ENOMEM; - goto hash_node_put; - } + ret = mlx5hws_bwc_rule_create_simple(subrule, match_params, actions, + flow_source, bwc_queue_idx); + if (ret) + goto put_subrule_data; - /* On 2nd matcher, use unique 32-bit ID as a matching tag */ - metadata_val = bwc_rule->complex_hash_node->tag; - MLX5_SET(fte_match_param, match_buf_2, - misc_parameters_2.metadata_reg_c_6, metadata_val); - - /* Isolated rule's rule_actions contain all the original actions */ - ret = mlx5hws_bwc_rule_create_simple(bwc_rule->isolated_bwc_rule, - match_buf_2, - rule_actions, - flow_source, - bwc_queue_idx); - kfree(match_buf_2); - if (unlikely(ret)) { - mlx5hws_err(ctx, - "Complex rule: failed creating isolated BWC rule (%d)\n", - ret); - goto hash_node_put; - } + ret = 0; + goto unlock; - /* First rule's rule_actions contain setting metadata and - * jump to isolated table that contains the second matcher. - * Set metadata value to a unique value for this rule. - */ +put_subrule_data: + hws_complex_put_subrule_data(subrule, subm, NULL); +unlock: + mutex_unlock(&subm->hash_lock); - MLX5_SET(set_action_in, modify_hdr_action, - action_type, MLX5_MODIFICATION_TYPE_SET); - MLX5_SET(set_action_in, modify_hdr_action, - field, MLX5_MODI_META_REG_C_6); - MLX5_SET(set_action_in, modify_hdr_action, - length, 0); /* zero means length of 32 */ - MLX5_SET(set_action_in, modify_hdr_action, - offset, 0); - MLX5_SET(set_action_in, modify_hdr_action, - data, metadata_val); + return ret; +} - rule_actions_1[0].action = bwc_matcher->complex->action_metadata; - rule_actions_1[0].modify_header.offset = 0; - rule_actions_1[0].modify_header.data = modify_hdr_action; +static int hws_complex_subrule_destroy(struct mlx5hws_bwc_rule *bwc_rule, + struct mlx5hws_bwc_matcher *cmatcher, + int subm_idx) +{ + struct mlx5hws_bwc_matcher_complex_data *cdata; + struct mlx5hws_bwc_complex_submatcher *subm; + struct mlx5hws_context *ctx; + bool is_last_rule; + int ret = 0; - rule_actions_1[1].action = bwc_matcher->complex->action_go_to_tbl; - rule_actions_1[2].action = bwc_matcher->complex->action_last; + cdata = cmatcher->complex; + subm = &cdata->submatchers[subm_idx]; + ctx = subm->tbl->ctx; - ret = mlx5hws_bwc_rule_create_simple(bwc_rule, - params->match_buf, - rule_actions_1, - flow_source, - bwc_queue_idx); + mutex_lock(&subm->hash_lock); - if (unlikely(ret)) { + hws_complex_put_subrule_data(bwc_rule, subm, &is_last_rule); + bwc_rule->rule->skip_delete = !is_last_rule; + ret = mlx5hws_bwc_rule_destroy_simple(bwc_rule); + if (unlikely(ret)) mlx5hws_err(ctx, - "Complex rule: failed creating first BWC rule (%d)\n", - ret); - goto destroy_isolated_rule; - } + "Complex rule: failed to delete subrule %d (%d)\n", + subm_idx, ret); - hws_bwc_matcher_complex_hash_unlock(bwc_matcher); + if (subm_idx) + mlx5hws_bwc_rule_free(bwc_rule); - return 0; + mutex_unlock(&subm->hash_lock); -destroy_isolated_rule: - mlx5hws_bwc_rule_destroy_simple(bwc_rule->isolated_bwc_rule); -hash_node_put: - hws_bwc_rule_complex_hash_node_put(bwc_rule, NULL); -free_isolated_rule: - hws_bwc_matcher_complex_hash_unlock(bwc_matcher); - mlx5hws_bwc_rule_free(bwc_rule->isolated_bwc_rule); return ret; } -int mlx5hws_bwc_rule_destroy_complex(struct mlx5hws_bwc_rule *bwc_rule) +int mlx5hws_bwc_rule_create_complex(struct mlx5hws_bwc_rule *bwc_rule, + struct mlx5hws_match_parameters *params, + u32 flow_source, + struct mlx5hws_rule_action rule_actions[], + u16 bwc_queue_idx) { - struct mlx5hws_context *ctx = bwc_rule->bwc_matcher->matcher->tbl->ctx; - struct mlx5hws_bwc_rule *isolated_bwc_rule; - int ret_isolated, ret; - bool is_last_rule; + struct mlx5hws_bwc_rule + *subrules[MLX5HWS_BWC_COMPLEX_MAX_SUBMATCHERS] = {0}; + struct mlx5hws_bwc_matcher *cmatcher = bwc_rule->bwc_matcher; + struct mlx5hws_bwc_matcher_complex_data *cdata; + struct mlx5hws_rule_action *subrule_actions; + struct mlx5hws_bwc_complex_submatcher *subm; + struct mlx5hws_bwc_rule *subrule; + u32 *match_params; + u32 chain_id; + int i, ret; - hws_bwc_matcher_complex_hash_lock(bwc_rule->bwc_matcher); + cdata = cmatcher->complex; + if (!cdata) + return -EINVAL; - hws_bwc_rule_complex_hash_node_put(bwc_rule, &is_last_rule); - bwc_rule->rule->skip_delete = !is_last_rule; + /* Duplicate user data because we will modify it to set register C6 + * values. For the same reason, make sure that we allocate a full + * match_param even if the user gave us fewer bytes. We need to ensure + * there is space for the match on C6. + */ + match_params = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL); + if (!match_params) + return -ENOMEM; - ret = mlx5hws_bwc_rule_destroy_simple(bwc_rule); - if (unlikely(ret)) - mlx5hws_err(ctx, "BWC complex rule: failed destroying first rule\n"); + memcpy(match_params, params->match_buf, params->match_sz); + + ret = hws_complex_subrule_create(cmatcher, bwc_rule, match_params, + flow_source, bwc_queue_idx, 0, + NULL, &chain_id); + if (ret) + goto free_match_params; + subrules[0] = bwc_rule; + + for (i = 1; i < cdata->num_submatchers; i++) { + subm = &cdata->submatchers[i]; + subrule = mlx5hws_bwc_rule_alloc(subm->bwc_matcher); + if (!subrule) { + ret = -ENOMEM; + goto destroy_subrules; + } + + /* Match on the previous subrule's chain_id. This is how + * subrules are connected in steering. + */ + MLX5_SET(fte_match_param, match_params, + misc_parameters_2.metadata_reg_c_6, chain_id); + + /* The last subrule uses the complex rule's user-specified + * actions. Everything else uses the chaining rules based on the + * next table and chain_id. + */ + subrule_actions = + i == cdata->num_submatchers - 1 ? rule_actions : NULL; + + ret = hws_complex_subrule_create(cmatcher, subrule, + match_params, flow_source, + bwc_queue_idx, i, + subrule_actions, &chain_id); + if (ret) { + mlx5hws_bwc_rule_free(subrule); + goto destroy_subrules; + } + + subrules[i] = subrule; + } + + for (i = 0; i < cdata->num_submatchers - 1; i++) + subrules[i]->next_subrule = subrules[i + 1]; - isolated_bwc_rule = bwc_rule->isolated_bwc_rule; - ret_isolated = mlx5hws_bwc_rule_destroy_simple(isolated_bwc_rule); - if (unlikely(ret_isolated)) - mlx5hws_err(ctx, "BWC complex rule: failed destroying second (isolated) rule\n"); + kfree(match_params); - hws_bwc_matcher_complex_hash_unlock(bwc_rule->bwc_matcher); + return 0; - mlx5hws_bwc_rule_free(isolated_bwc_rule); +destroy_subrules: + while (i--) + hws_complex_subrule_destroy(subrules[i], cmatcher, i); +free_match_params: + kfree(match_params); - return ret || ret_isolated; + return ret; } -static void -hws_bwc_matcher_clear_hash_rtcs(struct mlx5hws_bwc_matcher *bwc_matcher) +int mlx5hws_bwc_rule_destroy_complex(struct mlx5hws_bwc_rule *bwc_rule) { - struct mlx5hws_bwc_complex_rule_hash_node *node; - struct rhashtable_iter iter; + struct mlx5hws_bwc_matcher *bwc_matcher = bwc_rule->bwc_matcher; + struct mlx5hws_bwc_rule + *subrules[MLX5HWS_BWC_COMPLEX_MAX_SUBMATCHERS] = {0}; + struct mlx5hws_bwc_matcher_complex_data *cdata; + int i, err, ret_val; + + cdata = bwc_matcher->complex; + + /* Construct a list of all the subrules we need to destroy. */ + subrules[0] = bwc_rule; + for (i = 1; i < cdata->num_submatchers; i++) + subrules[i] = subrules[i - 1]->next_subrule; + + ret_val = 0; + for (i = 0; i < cdata->num_submatchers; i++) { + err = hws_complex_subrule_destroy(subrules[i], bwc_matcher, i); + /* If something goes wrong, plow along to destroy all of the + * subrules but return an error upstack. + */ + if (unlikely(err)) + ret_val = err; + } - rhashtable_walk_enter(&bwc_matcher->complex->refcount_hash, &iter); - rhashtable_walk_start(&iter); + return ret_val; +} - while ((node = rhashtable_walk_next(&iter)) != NULL) { - if (IS_ERR(node)) +static void +hws_bwc_matcher_init_move(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; + u16 bwc_queues = mlx5hws_bwc_queues(ctx); + struct mlx5hws_bwc_rule *bwc_rule; + struct list_head *rules_list; + int i; + + for (i = 0; i < bwc_queues; i++) { + rules_list = &bwc_matcher->rules[i]; + if (list_empty(rules_list)) continue; - node->rtc_valid = false; - } - rhashtable_walk_stop(&iter); - rhashtable_walk_exit(&iter); + list_for_each_entry(bwc_rule, rules_list, list_node) { + if (!bwc_rule->subrule_data) + continue; + bwc_rule->subrule_data->was_moved = false; + } + } } -int -mlx5hws_bwc_matcher_move_all_complex(struct mlx5hws_bwc_matcher *bwc_matcher) +int mlx5hws_bwc_matcher_complex_move(struct mlx5hws_bwc_matcher *bwc_matcher) { struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; struct mlx5hws_matcher *matcher = bwc_matcher->matcher; u16 bwc_queues = mlx5hws_bwc_queues(ctx); struct mlx5hws_bwc_rule *tmp_bwc_rule; struct mlx5hws_rule_attr rule_attr; - struct mlx5hws_table *isolated_tbl; int move_error = 0, poll_error = 0; struct mlx5hws_rule *tmp_rule; struct list_head *rules_list; u32 expected_completions = 1; - u32 end_ft_id; - int i, ret; + int i, ret = 0; - /* We are rehashing the matcher that is the first part of the complex - * matcher. Need to update the isolated matcher to point to the end_ft - * of this new matcher. This needs to be done before moving any rules - * to prevent possible steering loops. - */ - isolated_tbl = bwc_matcher->complex->isolated_tbl; - end_ft_id = bwc_matcher->matcher->resize_dst->end_ft_id; - ret = mlx5hws_matcher_update_end_ft_isolated(isolated_tbl, end_ft_id); - if (ret) { - mlx5hws_err(ctx, - "Failed updating end_ft of isolated matcher (%d)\n", - ret); - return ret; - } - - hws_bwc_matcher_clear_hash_rtcs(bwc_matcher); + hws_bwc_matcher_init_move(bwc_matcher); mlx5hws_bwc_rule_fill_attr(bwc_matcher, 0, 0, &rule_attr); @@ -1369,15 +994,15 @@ mlx5hws_bwc_matcher_move_all_complex(struct mlx5hws_bwc_matcher *bwc_matcher) /* Check if a rule with similar tag has already * been moved. */ - if (tmp_bwc_rule->complex_hash_node->rtc_valid) { - /* This rule is a duplicate of rule with similar - * tag that has already been moved earlier. - * Just update this rule's RTCs. + if (tmp_bwc_rule->subrule_data->was_moved) { + /* This rule is a duplicate of rule with + * identical tag that has already been moved + * earlier. Just update this rule's RTCs. */ tmp_bwc_rule->rule->rtc_0 = - tmp_bwc_rule->complex_hash_node->rtc_0; + tmp_bwc_rule->subrule_data->rtc_0; tmp_bwc_rule->rule->rtc_1 = - tmp_bwc_rule->complex_hash_node->rtc_1; + tmp_bwc_rule->subrule_data->rtc_1; tmp_bwc_rule->rule->matcher = tmp_bwc_rule->rule->matcher->resize_dst; continue; @@ -1425,12 +1050,12 @@ mlx5hws_bwc_matcher_move_all_complex(struct mlx5hws_bwc_matcher *bwc_matcher) /* Done moving the rule to the new matcher, * now update RTCs for all the duplicated rules. */ - tmp_bwc_rule->complex_hash_node->rtc_0 = + tmp_bwc_rule->subrule_data->rtc_0 = tmp_bwc_rule->rule->rtc_0; - tmp_bwc_rule->complex_hash_node->rtc_1 = + tmp_bwc_rule->subrule_data->rtc_1 = tmp_bwc_rule->rule->rtc_1; - tmp_bwc_rule->complex_hash_node->rtc_valid = true; + tmp_bwc_rule->subrule_data->was_moved = true; } } @@ -1442,3 +1067,35 @@ mlx5hws_bwc_matcher_move_all_complex(struct mlx5hws_bwc_matcher *bwc_matcher) return ret; } + +int +mlx5hws_bwc_matcher_complex_move_first(struct mlx5hws_bwc_matcher *bwc_matcher) +{ + struct mlx5hws_context *ctx = bwc_matcher->matcher->tbl->ctx; + struct mlx5hws_bwc_matcher_complex_data *cdata; + struct mlx5hws_table *isolated_tbl; + u32 end_ft_id; + int i, ret; + + cdata = bwc_matcher->complex; + + /* We are rehashing the first submatcher. We need to update the + * subsequent submatchers to point to the end_ft of this new matcher. + * This needs to be done before moving any rules to prevent possible + * steering loops. + */ + end_ft_id = bwc_matcher->matcher->resize_dst->end_ft_id; + for (i = 1; i < cdata->num_submatchers; i++) { + isolated_tbl = cdata->submatchers[i].tbl; + ret = mlx5hws_matcher_update_end_ft_isolated(isolated_tbl, + end_ft_id); + if (ret) { + mlx5hws_err(ctx, + "Complex matcher: failed updating end_ft of isolated matcher (%d)\n", + ret); + return ret; + } + } + + return mlx5hws_bwc_matcher_complex_move(bwc_matcher); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.h index a6887c7e39d5..d07de631ce9f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/bwc_complex.h @@ -4,25 +4,60 @@ #ifndef HWS_BWC_COMPLEX_H_ #define HWS_BWC_COMPLEX_H_ -struct mlx5hws_bwc_complex_rule_hash_node { - u32 match_buf[MLX5_ST_SZ_DW_MATCH_PARAM]; - u32 tag; +#define MLX5HWS_BWC_COMPLEX_MAX_SUBMATCHERS 4 + +/* A matcher can't contain two rules with the same match tag, but it is possible + * that two different complex rules' subrules have the same match tag. In that + * case, those subrules correspond to a single rule, and we need to refcount. + */ +struct mlx5hws_bwc_complex_subrule_data { + struct mlx5hws_rule_match_tag match_tag; refcount_t refcount; - bool rtc_valid; + /* The chain_id is what glues individual subrules into larger complex + * rules. It is the value that this subrule writes to register C6, and + * that the next subrule matches against. + */ + u32 chain_id; u32 rtc_0; u32 rtc_1; + /* During rehash we iterate through all the subrules to move them. But + * two or more subrules can share the same physical rule in the + * submatcher, so we use `was_moved` to keep track if a given rule was + * already moved. + */ + bool was_moved; struct rhash_head hash_node; }; +struct mlx5hws_bwc_complex_submatcher { + /* Isolated table that the matcher lives in. Not set for the first + * matcher, which lives in the original table. + */ + struct mlx5hws_table *tbl; + /* Match a rule with this action to go to `tbl`. This is set in all + * submatchers but the first. + */ + struct mlx5hws_action *action_tbl; + /* This submatcher's simple matcher. The first submatcher points to the + * outer (complex) matcher. + */ + struct mlx5hws_bwc_matcher *bwc_matcher; + struct rhashtable rules_hash; + struct ida chain_ida; + struct mutex hash_lock; /* Protect the hash and ida. */ +}; + struct mlx5hws_bwc_matcher_complex_data { - struct mlx5hws_table *isolated_tbl; - struct mlx5hws_bwc_matcher *isolated_bwc_matcher; + struct mlx5hws_bwc_complex_submatcher + submatchers[MLX5HWS_BWC_COMPLEX_MAX_SUBMATCHERS]; + int num_submatchers; + /* Actions used by all but the last submatcher to point to the next + * submatcher in the chain. The last submatcher uses the action template + * from the complex matcher, to perform the actions that the user + * originally requested. + */ struct mlx5hws_action *action_metadata; - struct mlx5hws_action *action_go_to_tbl; struct mlx5hws_action *action_last; - struct rhashtable refcount_hash; - struct mutex hash_lock; /* Protect the refcount rhashtable */ - struct ida metadata_ida; }; bool mlx5hws_bwc_match_params_is_complex(struct mlx5hws_context *ctx, @@ -37,7 +72,10 @@ int mlx5hws_bwc_matcher_create_complex(struct mlx5hws_bwc_matcher *bwc_matcher, void mlx5hws_bwc_matcher_destroy_complex(struct mlx5hws_bwc_matcher *bwc_matcher); -int mlx5hws_bwc_matcher_move_all_complex(struct mlx5hws_bwc_matcher *bwc_matcher); +int mlx5hws_bwc_matcher_complex_move(struct mlx5hws_bwc_matcher *bwc_matcher); + +int +mlx5hws_bwc_matcher_complex_move_first(struct mlx5hws_bwc_matcher *bwc_matcher); int mlx5hws_bwc_rule_create_complex(struct mlx5hws_bwc_rule *bwc_rule, struct mlx5hws_match_parameters *params, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c index 0bdcab2e5cf3..f22eaf506d28 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/cmd.c @@ -1200,34 +1200,20 @@ out: int mlx5hws_cmd_query_gvmi(struct mlx5_core_dev *mdev, bool other_function, u16 vport_number, u16 *gvmi) { - bool ec_vf_func = other_function ? mlx5_core_is_ec_vf_vport(mdev, vport_number) : false; - u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {}; - int out_size; - void *out; int err; - out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out); - out = kzalloc(out_size, GFP_KERNEL); - if (!out) - return -ENOMEM; - - MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); - MLX5_SET(query_hca_cap_in, in, other_function, other_function); - MLX5_SET(query_hca_cap_in, in, function_id, - mlx5_vport_to_func_id(mdev, vport_number, ec_vf_func)); - MLX5_SET(query_hca_cap_in, in, ec_vf_function, ec_vf_func); - MLX5_SET(query_hca_cap_in, in, op_mod, - MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 | HCA_CAP_OPMOD_GET_CUR); + if (!other_function) { + /* self vhca_id */ + *gvmi = MLX5_CAP_GEN(mdev, vhca_id); + return 0; + } - err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out); + err = mlx5_vport_get_vhca_id(mdev, vport_number, gvmi); if (err) { - kfree(out); + mlx5_core_err(mdev, "Failed to get vport vhca id for vport %d\n", + vport_number); return err; } - *gvmi = MLX5_GET(query_hca_cap_out, out, capability.cmd_hca_cap.vhca_id); - - kfree(out); - return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c index c6436c3a7a83..82fd122d4284 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.c @@ -1280,7 +1280,7 @@ hws_definer_conv_misc2(struct mlx5hws_definer_conv_data *cd, struct mlx5hws_definer_fc *fc = cd->fc; struct mlx5hws_definer_fc *curr_fc; - if (HWS_IS_FLD_SET_SZ(match_param, misc_parameters_2.reserved_at_1a0, 0x8) || + if (HWS_IS_FLD_SET_SZ(match_param, misc_parameters_2.psp_syndrome, 0x8) || HWS_IS_FLD_SET_SZ(match_param, misc_parameters_2.ipsec_next_header, 0x8) || HWS_IS_FLD_SET_SZ(match_param, misc_parameters_2.reserved_at_1c0, 0x40) || @@ -1831,80 +1831,6 @@ err_free_fc: return ret; } -struct mlx5hws_definer_fc * -mlx5hws_definer_conv_match_params_to_compressed_fc(struct mlx5hws_context *ctx, - u8 match_criteria_enable, - u32 *match_param, - int *fc_sz) -{ - struct mlx5hws_definer_fc *compressed_fc = NULL; - struct mlx5hws_definer_conv_data cd = {0}; - struct mlx5hws_definer_fc *fc; - int ret; - - fc = hws_definer_alloc_fc(ctx, MLX5HWS_DEFINER_FNAME_MAX); - if (!fc) - return NULL; - - cd.fc = fc; - cd.ctx = ctx; - - if (match_criteria_enable & MLX5HWS_DEFINER_MATCH_CRITERIA_OUTER) { - ret = hws_definer_conv_outer(&cd, match_param); - if (ret) - goto err_free_fc; - } - - if (match_criteria_enable & MLX5HWS_DEFINER_MATCH_CRITERIA_INNER) { - ret = hws_definer_conv_inner(&cd, match_param); - if (ret) - goto err_free_fc; - } - - if (match_criteria_enable & MLX5HWS_DEFINER_MATCH_CRITERIA_MISC) { - ret = hws_definer_conv_misc(&cd, match_param); - if (ret) - goto err_free_fc; - } - - if (match_criteria_enable & MLX5HWS_DEFINER_MATCH_CRITERIA_MISC2) { - ret = hws_definer_conv_misc2(&cd, match_param); - if (ret) - goto err_free_fc; - } - - if (match_criteria_enable & MLX5HWS_DEFINER_MATCH_CRITERIA_MISC3) { - ret = hws_definer_conv_misc3(&cd, match_param); - if (ret) - goto err_free_fc; - } - - if (match_criteria_enable & MLX5HWS_DEFINER_MATCH_CRITERIA_MISC4) { - ret = hws_definer_conv_misc4(&cd, match_param); - if (ret) - goto err_free_fc; - } - - if (match_criteria_enable & MLX5HWS_DEFINER_MATCH_CRITERIA_MISC5) { - ret = hws_definer_conv_misc5(&cd, match_param); - if (ret) - goto err_free_fc; - } - - /* Allocate fc array on mt */ - compressed_fc = hws_definer_alloc_compressed_fc(fc); - if (!compressed_fc) { - mlx5hws_err(ctx, - "Convert to compressed fc: failed to set field copy to match template\n"); - goto err_free_fc; - } - *fc_sz = hws_definer_get_fc_size(fc); - -err_free_fc: - kfree(fc); - return compressed_fc; -} - static int hws_definer_find_byte_in_tag(struct mlx5hws_definer *definer, u32 hl_byte_off, @@ -2067,7 +1993,7 @@ hws_definer_copy_sel_ctrl(struct mlx5hws_definer_sel_ctrl *ctrl, static int hws_definer_find_best_match_fit(struct mlx5hws_context *ctx, struct mlx5hws_definer *definer, - u8 *hl) + u8 *hl, bool allow_jumbo) { struct mlx5hws_definer_sel_ctrl ctrl = {0}; bool found; @@ -2084,6 +2010,9 @@ hws_definer_find_best_match_fit(struct mlx5hws_context *ctx, return 0; } + if (!allow_jumbo) + return -E2BIG; + /* Try to create a full/limited jumbo definer */ ctrl.allowed_full_dw = ctx->caps->full_dw_jumbo_support ? DW_SELECTORS : DW_SELECTORS_MATCH; @@ -2160,7 +2089,8 @@ int mlx5hws_definer_compare(struct mlx5hws_definer *definer_a, int mlx5hws_definer_calc_layout(struct mlx5hws_context *ctx, struct mlx5hws_match_template *mt, - struct mlx5hws_definer *match_definer) + struct mlx5hws_definer *match_definer, + bool allow_jumbo) { u8 *match_hl; int ret; @@ -2182,7 +2112,8 @@ mlx5hws_definer_calc_layout(struct mlx5hws_context *ctx, } /* Find the match definer layout for header layout match union */ - ret = hws_definer_find_best_match_fit(ctx, match_definer, match_hl); + ret = hws_definer_find_best_match_fit(ctx, match_definer, match_hl, + allow_jumbo); if (ret) { if (ret == -E2BIG) mlx5hws_dbg(ctx, @@ -2370,7 +2301,7 @@ int mlx5hws_definer_mt_init(struct mlx5hws_context *ctx, struct mlx5hws_definer match_layout = {0}; int ret; - ret = mlx5hws_definer_calc_layout(ctx, mt, &match_layout); + ret = mlx5hws_definer_calc_layout(ctx, mt, &match_layout, true); if (ret) { mlx5hws_err(ctx, "Failed to calculate matcher definer layout\n"); return ret; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.h index 62da55389331..141f3eb2e307 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/definer.h @@ -823,13 +823,8 @@ void mlx5hws_definer_free(struct mlx5hws_context *ctx, int mlx5hws_definer_calc_layout(struct mlx5hws_context *ctx, struct mlx5hws_match_template *mt, - struct mlx5hws_definer *match_definer); - -struct mlx5hws_definer_fc * -mlx5hws_definer_conv_match_params_to_compressed_fc(struct mlx5hws_context *ctx, - u8 match_criteria_enable, - u32 *match_param, - int *fc_sz); + struct mlx5hws_definer *match_definer, + bool allow_jumbo); const char *mlx5hws_definer_fname_to_str(enum mlx5hws_definer_fname fname); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c index 131e74b2b774..6a4c4cccd643 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws.c @@ -572,12 +572,12 @@ static void mlx5_fs_put_dest_action_sampler(struct mlx5_fs_hws_context *fs_ctx, static struct mlx5hws_action * mlx5_fs_create_action_dest_array(struct mlx5hws_context *ctx, struct mlx5hws_action_dest_attr *dests, - u32 num_of_dests, bool ignore_flow_level) + u32 num_of_dests) { u32 flags = MLX5HWS_ACTION_FLAG_HWS_FDB | MLX5HWS_ACTION_FLAG_SHARED; return mlx5hws_action_create_dest_array(ctx, num_of_dests, dests, - ignore_flow_level, flags); + flags); } static struct mlx5hws_action * @@ -1014,19 +1014,14 @@ static int mlx5_fs_fte_get_hws_actions(struct mlx5_flow_root_namespace *ns, } (*ractions)[num_actions++].action = dest_actions->dest; } else if (num_dest_actions > 1) { - bool ignore_flow_level; - if (num_actions == MLX5_FLOW_CONTEXT_ACTION_MAX || num_fs_actions == MLX5_FLOW_CONTEXT_ACTION_MAX) { err = -EOPNOTSUPP; goto free_actions; } - ignore_flow_level = - !!(fte_action->flags & FLOW_ACT_IGNORE_FLOW_LEVEL); tmp_action = mlx5_fs_create_action_dest_array(ctx, dest_actions, - num_dest_actions, - ignore_flow_level); + num_dest_actions); if (!tmp_action) { err = -EOPNOTSUPP; goto free_actions; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws_pools.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws_pools.c index f1ecdba74e1f..839d71bd4216 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws_pools.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/fs_hws_pools.c @@ -407,15 +407,21 @@ struct mlx5hws_action *mlx5_fc_get_hws_action(struct mlx5hws_context *ctx, { struct mlx5_fs_hws_create_action_ctx create_ctx; struct mlx5_fc_bulk *fc_bulk = counter->bulk; + struct mlx5hws_action *hws_action; create_ctx.hws_ctx = ctx; create_ctx.id = fc_bulk->base_id; create_ctx.actions_type = MLX5HWS_ACTION_TYP_CTR; - return mlx5_fs_get_hws_action(&fc_bulk->hws_data, &create_ctx); + mlx5_fc_local_get(counter); + hws_action = mlx5_fs_get_hws_action(&fc_bulk->hws_data, &create_ctx); + if (!hws_action) + mlx5_fc_local_put(counter); + return hws_action; } void mlx5_fc_put_hws_action(struct mlx5_fc *counter) { mlx5_fs_put_hws_action(&counter->bulk->hws_data); + mlx5_fc_local_put(counter); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h index 2498ceff2060..1ad7a50d938b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/mlx5hws.h @@ -735,7 +735,6 @@ mlx5hws_action_create_push_vlan(struct mlx5hws_context *ctx, u32 flags); * @num_dest: The number of dests attributes. * @dests: The destination array. Each contains a destination action and can * have additional actions. - * @ignore_flow_level: Whether to turn on 'ignore_flow_level' for this dest. * @flags: Action creation flags (enum mlx5hws_action_flags). * * Return: pointer to mlx5hws_action on success NULL otherwise. @@ -743,7 +742,7 @@ mlx5hws_action_create_push_vlan(struct mlx5hws_context *ctx, u32 flags); struct mlx5hws_action * mlx5hws_action_create_dest_array(struct mlx5hws_context *ctx, size_t num_dest, struct mlx5hws_action_dest_attr *dests, - bool ignore_flow_level, u32 flags); + u32 flags); /** * mlx5hws_action_create_insert_header - Create insert header action. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c index 51e4c551e0ef..d56271a9e4f0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pat_arg.c @@ -279,7 +279,7 @@ int mlx5hws_pat_get_pattern(struct mlx5hws_context *ctx, return ret; clean_pattern: - mlx5hws_cmd_header_modify_pattern_destroy(ctx->mdev, *pattern_id); + mlx5hws_cmd_header_modify_pattern_destroy(ctx->mdev, ptrn_id); out_unlock: mutex_unlock(&ctx->pattern_cache->lock); return ret; @@ -527,7 +527,6 @@ int mlx5hws_pat_calc_nop(__be64 *pattern, size_t num_actions, u32 *nop_locations, __be64 *new_pat) { u16 prev_src_field = INVALID_FIELD, prev_dst_field = INVALID_FIELD; - u16 src_field, dst_field; u8 action_type; bool dependent; size_t i, j; @@ -539,6 +538,9 @@ int mlx5hws_pat_calc_nop(__be64 *pattern, size_t num_actions, return 0; for (i = 0, j = 0; i < num_actions; i++, j++) { + u16 src_field = INVALID_FIELD; + u16 dst_field = INVALID_FIELD; + if (j >= max_actions) return -EINVAL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c index 7e37d6e9eb83..7b5071c3df36 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/pool.c @@ -124,6 +124,7 @@ static int hws_pool_buddy_init(struct mlx5hws_pool *pool) mlx5hws_err(pool->ctx, "Failed to create resource type: %d size %zu\n", pool->type, pool->alloc_log_sz); mlx5hws_buddy_cleanup(buddy); + kfree(buddy); return -ENOMEM; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c index b0595c9b09e4..24ef7d66fa8a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/hws/send.c @@ -690,7 +690,7 @@ static int hws_send_ring_alloc_sq(struct mlx5_core_dev *mdev, size_t buf_sz; int err; - sq->uar_map = mdev->mlx5e_res.hw_objs.bfreg.map; + sq->uar_map = mdev->priv.bfreg.map; sq->mdev = mdev; param.db_numa_node = numa_node; @@ -764,7 +764,7 @@ static int hws_send_ring_create_sq(struct mlx5_core_dev *mdev, u32 pdn, MLX5_SET(sqc, sqc, ts_format, ts_format); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); - MLX5_SET(wq, wq, uar_page, mdev->mlx5e_res.hw_objs.bfreg.index); + MLX5_SET(wq, wq, uar_page, mdev->priv.bfreg.index); MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma); @@ -940,7 +940,7 @@ static int hws_send_ring_create_cq(struct mlx5_core_dev *mdev, (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas)); MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); - MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); + MLX5_SET(cqc, cqc, uar_page, mdev->priv.bfreg.up->index); MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); @@ -963,7 +963,7 @@ static int hws_send_ring_open_cq(struct mlx5_core_dev *mdev, if (!cqc_data) return -ENOMEM; - MLX5_SET(cqc, cqc_data, uar_page, mdev->priv.uar->index); + MLX5_SET(cqc, cqc_data, uar_page, mdev->priv.bfreg.up->index); MLX5_SET(cqc, cqc_data, log_cq_size, ilog2(queue->num_entries)); err = hws_send_ring_alloc_cq(mdev, numa_node, queue, cqc_data, cq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_cmd.c index baefb9a3fa05..1ebb2b15c080 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_cmd.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019 Mellanox Technologies. */ #include "dr_types.h" +#include "eswitch.h" int mlx5dr_cmd_query_esw_vport_context(struct mlx5_core_dev *mdev, bool other_vport, @@ -34,34 +35,21 @@ int mlx5dr_cmd_query_esw_vport_context(struct mlx5_core_dev *mdev, int mlx5dr_cmd_query_gvmi(struct mlx5_core_dev *mdev, bool other_vport, u16 vport_number, u16 *gvmi) { - bool ec_vf_func = other_vport ? mlx5_core_is_ec_vf_vport(mdev, vport_number) : false; - u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {}; - int out_size; - void *out; int err; - out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out); - out = kzalloc(out_size, GFP_KERNEL); - if (!out) - return -ENOMEM; - - MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); - MLX5_SET(query_hca_cap_in, in, other_function, other_vport); - MLX5_SET(query_hca_cap_in, in, function_id, mlx5_vport_to_func_id(mdev, vport_number, ec_vf_func)); - MLX5_SET(query_hca_cap_in, in, ec_vf_function, ec_vf_func); - MLX5_SET(query_hca_cap_in, in, op_mod, - MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 | - HCA_CAP_OPMOD_GET_CUR); + if (!other_vport) { + /* self vhca_id */ + *gvmi = MLX5_CAP_GEN(mdev, vhca_id); + return 0; + } - err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out); + err = mlx5_vport_get_vhca_id(mdev, vport_number, gvmi); if (err) { - kfree(out); + mlx5_core_err(mdev, "Failed to get vport vhca id for vport %d\n", + vport_number); return err; } - *gvmi = MLX5_GET(query_hca_cap_out, out, capability.cmd_hca_cap.vhca_id); - - kfree(out); return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c index 4fd4e8483382..077a77fde670 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/sws/dr_send.c @@ -1131,7 +1131,6 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev, *cq->mcq.arm_db = cpu_to_be32(2 << 28); cq->mcq.vector = 0; - cq->mcq.uar = uar; cq->mdev = mdev; return cq; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index da5c24fc7b30..2ed2e530b07d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -36,6 +36,7 @@ #include <linux/mlx5/vport.h> #include <linux/mlx5/eswitch.h> #include "mlx5_core.h" +#include "eswitch.h" #include "sf/sf.h" /* Mutex to hold while enabling or disabling RoCE */ @@ -1189,18 +1190,44 @@ u64 mlx5_query_nic_system_image_guid(struct mlx5_core_dev *mdev) } EXPORT_SYMBOL_GPL(mlx5_query_nic_system_image_guid); +static bool mlx5_vport_use_vhca_id_as_func_id(struct mlx5_core_dev *dev, + u16 vport_num, u16 *vhca_id) +{ + if (!MLX5_CAP_GEN_2(dev, function_id_type_vhca_id)) + return false; + + return mlx5_esw_vport_vhca_id(dev->priv.eswitch, vport_num, vhca_id); +} + int mlx5_vport_get_other_func_cap(struct mlx5_core_dev *dev, u16 vport, void *out, u16 opmod) { - bool ec_vf_func = mlx5_core_is_ec_vf_vport(dev, vport); u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)] = {}; + u16 vhca_id = 0, function_id = 0; + bool ec_vf_func = false; + + /* if this vport is referring to a vport on the ec PF (embedded cpu ) + * let the FW know which domain we are querying since vport numbers or + * function_ids are not unique across the different PF domains, + * unless we use vhca_id as the function_id below. + */ + ec_vf_func = mlx5_core_is_ec_vf_vport(dev, vport); + function_id = mlx5_vport_to_func_id(dev, vport, ec_vf_func); + + if (mlx5_vport_use_vhca_id_as_func_id(dev, vport, &vhca_id)) { + MLX5_SET(query_hca_cap_in, in, function_id_type, 1); + function_id = vhca_id; + ec_vf_func = false; + mlx5_core_dbg(dev, "%s using vhca_id as function_id for vport %d vhca_id 0x%x\n", + __func__, vport, vhca_id); + } opmod = (opmod << 1) | (HCA_CAP_OPMOD_GET_MAX & 0x01); MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); MLX5_SET(query_hca_cap_in, in, op_mod, opmod); - MLX5_SET(query_hca_cap_in, in, function_id, mlx5_vport_to_func_id(dev, vport, ec_vf_func)); MLX5_SET(query_hca_cap_in, in, other_function, true); MLX5_SET(query_hca_cap_in, in, ec_vf_function, ec_vf_func); + MLX5_SET(query_hca_cap_in, in, function_id, function_id); return mlx5_cmd_exec_inout(dev, query_hca_cap, in, out); } EXPORT_SYMBOL_GPL(mlx5_vport_get_other_func_cap); @@ -1212,7 +1239,9 @@ int mlx5_vport_get_vhca_id(struct mlx5_core_dev *dev, u16 vport, u16 *vhca_id) void *hca_caps; int err; - *vhca_id = 0; + /* try get vhca_id via eswitch */ + if (mlx5_esw_vport_vhca_id(dev->priv.eswitch, vport, vhca_id)) + return 0; query_ctx = kzalloc(query_out_sz, GFP_KERNEL); if (!query_ctx) @@ -1229,12 +1258,14 @@ out_free: kfree(query_ctx); return err; } +EXPORT_SYMBOL_GPL(mlx5_vport_get_vhca_id); int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap, u16 vport, u16 opmod) { - bool ec_vf_func = mlx5_core_is_ec_vf_vport(dev, vport); int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in); + u16 vhca_id = 0, function_id = 0; + bool ec_vf_func = false; void *set_hca_cap; void *set_ctx; int ret; @@ -1243,14 +1274,29 @@ int mlx5_vport_set_other_func_cap(struct mlx5_core_dev *dev, const void *hca_cap if (!set_ctx) return -ENOMEM; + /* if this vport is referring to a vport on the ec PF (embedded cpu ) + * let the FW know which domain we are querying since vport numbers or + * function_ids are not unique across the different PF domains, + * unless we use vhca_id as the function_id below. + */ + ec_vf_func = mlx5_core_is_ec_vf_vport(dev, vport); + function_id = mlx5_vport_to_func_id(dev, vport, ec_vf_func); + + if (mlx5_vport_use_vhca_id_as_func_id(dev, vport, &vhca_id)) { + MLX5_SET(set_hca_cap_in, set_ctx, function_id_type, 1); + function_id = vhca_id; + ec_vf_func = false; + mlx5_core_dbg(dev, "%s using vhca_id as function_id for vport %d vhca_id 0x%x\n", + __func__, vport, vhca_id); + } + MLX5_SET(set_hca_cap_in, set_ctx, opcode, MLX5_CMD_OP_SET_HCA_CAP); MLX5_SET(set_hca_cap_in, set_ctx, op_mod, opmod << 1); set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability); memcpy(set_hca_cap, hca_cap, MLX5_ST_SZ_BYTES(cmd_hca_cap)); - MLX5_SET(set_hca_cap_in, set_ctx, function_id, - mlx5_vport_to_func_id(dev, vport, ec_vf_func)); MLX5_SET(set_hca_cap_in, set_ctx, other_function, true); MLX5_SET(set_hca_cap_in, set_ctx, ec_vf_function, ec_vf_func); + MLX5_SET(set_hca_cap_in, set_ctx, function_id, function_id); ret = mlx5_cmd_exec_in(dev, set_hca_cap, set_ctx); kfree(set_ctx); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wc.c b/drivers/net/ethernet/mellanox/mlx5/core/wc.c index 2f0316616fa4..c281153bd411 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wc.c @@ -7,6 +7,10 @@ #include "mlx5_core.h" #include "wq.h" +#if IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && IS_ENABLED(CONFIG_ARM64) +#include <asm/neon.h> +#endif + #define TEST_WC_NUM_WQES 255 #define TEST_WC_LOG_CQ_SZ (order_base_2(TEST_WC_NUM_WQES)) #define TEST_WC_SQ_LOG_WQ_SZ TEST_WC_LOG_CQ_SZ @@ -94,7 +98,7 @@ static int create_wc_cq(struct mlx5_wc_cq *cq, void *cqc_data) MLX5_SET(cqc, cqc, cq_period_mode, MLX5_CQ_PERIOD_MODE_START_FROM_EQE); MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn); - MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); + MLX5_SET(cqc, cqc, uar_page, mdev->priv.bfreg.up->index); MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); @@ -116,7 +120,7 @@ static int mlx5_wc_create_cq(struct mlx5_core_dev *mdev, struct mlx5_wc_cq *cq) return -ENOMEM; MLX5_SET(cqc, cqc, log_cq_size, TEST_WC_LOG_CQ_SZ); - MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); + MLX5_SET(cqc, cqc, uar_page, mdev->priv.bfreg.up->index); if (MLX5_CAP_GEN(mdev, cqe_128_always) && cache_line_size() >= 128) MLX5_SET(cqc, cqc, cqe_sz, CQE_STRIDE_128_PAD); @@ -255,7 +259,29 @@ static void mlx5_wc_destroy_sq(struct mlx5_wc_sq *sq) mlx5_wq_destroy(&sq->wq_ctrl); } -static void mlx5_wc_post_nop(struct mlx5_wc_sq *sq, bool signaled) +static void mlx5_iowrite64_copy(struct mlx5_wc_sq *sq, __be32 mmio_wqe[16], + size_t mmio_wqe_size, unsigned int offset) +{ +#if IS_ENABLED(CONFIG_KERNEL_MODE_NEON) && IS_ENABLED(CONFIG_ARM64) + if (cpu_has_neon()) { + kernel_neon_begin(); + asm volatile + (".arch_extension simd;\n\t" + "ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%0]\n\t" + "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%1]" + : + : "r"(mmio_wqe), "r"(sq->bfreg.map + offset) + : "memory", "v0", "v1", "v2", "v3"); + kernel_neon_end(); + return; + } +#endif + __iowrite64_copy(sq->bfreg.map + offset, mmio_wqe, + mmio_wqe_size / 8); +} + +static void mlx5_wc_post_nop(struct mlx5_wc_sq *sq, unsigned int *offset, + bool signaled) { int buf_size = (1 << MLX5_CAP_GEN(sq->cq.mdev, log_bf_reg_size)) / 2; struct mlx5_wqe_ctrl_seg *ctrl; @@ -288,10 +314,9 @@ static void mlx5_wc_post_nop(struct mlx5_wc_sq *sq, bool signaled) */ wmb(); - __iowrite64_copy(sq->bfreg.map + sq->bfreg.offset, mmio_wqe, - sizeof(mmio_wqe) / 8); + mlx5_iowrite64_copy(sq, mmio_wqe, sizeof(mmio_wqe), *offset); - sq->bfreg.offset ^= buf_size; + *offset ^= buf_size; } static int mlx5_wc_poll_cq(struct mlx5_wc_sq *sq) @@ -332,6 +357,7 @@ static int mlx5_wc_poll_cq(struct mlx5_wc_sq *sq) static void mlx5_core_test_wc(struct mlx5_core_dev *mdev) { + unsigned int offset = 0; unsigned long expires; struct mlx5_wc_sq *sq; int i, err; @@ -358,9 +384,9 @@ static void mlx5_core_test_wc(struct mlx5_core_dev *mdev) goto err_create_sq; for (i = 0; i < TEST_WC_NUM_WQES - 1; i++) - mlx5_wc_post_nop(sq, false); + mlx5_wc_post_nop(sq, &offset, false); - mlx5_wc_post_nop(sq, true); + mlx5_wc_post_nop(sq, &offset, true); expires = jiffies + TEST_WC_POLLING_MAX_TIME_JIFFIES; do { diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 2bb2b77351bd..83c7cf3bbea3 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -886,7 +886,7 @@ static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core) if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX)) return 0; - emad_wq = alloc_workqueue("mlxsw_core_emad", 0, 0); + emad_wq = alloc_workqueue("mlxsw_core_emad", WQ_PERCPU, 0); if (!emad_wq) return -ENOMEM; mlxsw_core->emad_wq = emad_wq; @@ -2043,7 +2043,7 @@ static int mlxsw_core_health_init(struct mlxsw_core *mlxsw_core) return 0; fw_fatal = devl_health_reporter_create(devlink, &mlxsw_core_health_fw_fatal_ops, - 0, mlxsw_core); + mlxsw_core); if (IS_ERR(fw_fatal)) { dev_err(mlxsw_core->bus_info->dev, "Failed to create fw fatal reporter"); return PTR_ERR(fw_fatal); @@ -3381,7 +3381,7 @@ static int __init mlxsw_core_module_init(void) if (err) return err; - mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, 0, 0); + mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, WQ_PERCPU, 0); if (!mlxsw_wq) { err = -ENOMEM; goto err_alloc_workqueue; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c index 50e591420bd9..b1094aaffa5f 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_cnt.c @@ -170,8 +170,7 @@ void mlxsw_sp_counter_pool_fini(struct mlxsw_sp *mlxsw_sp) struct devlink *devlink = priv_to_devlink(mlxsw_sp->core); mlxsw_sp_counter_sub_pools_fini(mlxsw_sp); - WARN_ON(find_first_bit(pool->usage, pool->pool_size) != - pool->pool_size); + WARN_ON(!bitmap_empty(pool->usage, pool->pool_size)); WARN_ON(atomic_read(&pool->active_entries_count)); bitmap_free(pool->usage); devl_resource_occ_get_unregister(devlink, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic.h b/drivers/net/ethernet/meta/fbnic/fbnic.h index c376e06880c9..b03e5a3d5144 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic.h @@ -27,6 +27,8 @@ struct fbnic_dev { struct net_device *netdev; struct dentry *dbg_fbd; struct device *hwmon; + struct devlink_health_reporter *fw_reporter; + struct devlink_health_reporter *otp_reporter; u32 __iomem *uc_addr0; u32 __iomem *uc_addr4; @@ -84,8 +86,9 @@ struct fbnic_dev { /* Local copy of hardware statistics */ struct fbnic_hw_stats hw_stats; - /* Lock protecting access to hw_stats */ - spinlock_t hw_stats_lock; + /* Firmware time since boot in milliseconds */ + u64 firmware_time; + u64 prev_firmware_time; struct fbnic_fw_log fw_log; }; @@ -158,8 +161,13 @@ extern char fbnic_driver_name[]; void fbnic_devlink_free(struct fbnic_dev *fbd); struct fbnic_dev *fbnic_devlink_alloc(struct pci_dev *pdev); +int fbnic_devlink_health_create(struct fbnic_dev *fbd); +void fbnic_devlink_health_destroy(struct fbnic_dev *fbd); void fbnic_devlink_register(struct fbnic_dev *fbd); void fbnic_devlink_unregister(struct fbnic_dev *fbd); +void __printf(2, 3) +fbnic_devlink_fw_report(struct fbnic_dev *fbd, const char *format, ...); +void fbnic_devlink_otp_check(struct fbnic_dev *fbd, const char *msg); int fbnic_fw_request_mbx(struct fbnic_dev *fbd); void fbnic_fw_free_mbx(struct fbnic_dev *fbd); @@ -190,6 +198,8 @@ void fbnic_dbg_fbd_exit(struct fbnic_dev *fbd); void fbnic_dbg_init(void); void fbnic_dbg_exit(void); +void fbnic_rpc_reset_valid_entries(struct fbnic_dev *fbd); + void fbnic_csr_get_regs(struct fbnic_dev *fbd, u32 *data, u32 *regs_version); int fbnic_csr_regs_len(struct fbnic_dev *fbd); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h index a81db842aa53..d3a7ad921f18 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_csr.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_csr.h @@ -790,6 +790,21 @@ enum { #define FBNIC_CSR_END_PCS 0x10668 /* CSR section delimiter */ #define FBNIC_CSR_START_RSFEC 0x10800 /* CSR section delimiter */ + +/* We have 4 RSFEC engines present in our part, however we are only using 1. + * As such only CCW(0) and NCCW(0) will never be non-zero and the other + * registers can be ignored. + */ +#define FBNIC_RSFEC_CCW_LO(n) (0x10802 + 8 * (n)) /* 0x42008 + 32*n */ +#define FBNIC_RSFEC_CCW_HI(n) (0x10803 + 8 * (n)) /* 0x4200c + 32*n */ +#define FBNIC_RSFEC_NCCW_LO(n) (0x10804 + 8 * (n)) /* 0x42010 + 32*n */ +#define FBNIC_RSFEC_NCCW_HI(n) (0x10805 + 8 * (n)) /* 0x42014 + 32*n */ + +#define FBNIC_PCS_MAX_LANES 4 +#define FBNIC_PCS_SYMBLERR_LO(n) \ + (0x10880 + 2 * (n)) /* 0x42200 + 8*n */ +#define FBNIC_PCS_SYMBLERR_HI(n) \ + (0x10881 + 2 * (n)) /* 0x42204 + 8*n */ #define FBNIC_CSR_END_RSFEC 0x108c8 /* CSR section delimiter */ /* MAC MAC registers (ASIC only) */ @@ -829,6 +844,10 @@ enum { #define FBNIC_CSR_END_SIG 0x1184e /* CSR section delimiter */ #define FBNIC_CSR_START_MAC_STAT 0x11a00 +#define FBNIC_MAC_STAT_RX_XOFF_STB_L 0x11a00 /* 0x46800 */ +#define FBNIC_MAC_STAT_RX_XOFF_STB_H 0x11a01 /* 0x46804 */ +#define FBNIC_MAC_STAT_TX_XOFF_STB_L 0x11a04 /* 0x46810 */ +#define FBNIC_MAC_STAT_TX_XOFF_STB_H 0x11a05 /* 0x46814 */ #define FBNIC_MAC_STAT_RX_BYTE_COUNT_L 0x11a08 /* 0x46820 */ #define FBNIC_MAC_STAT_RX_BYTE_COUNT_H 0x11a09 /* 0x46824 */ #define FBNIC_MAC_STAT_RX_ALIGN_ERROR_L 0x11a0a /* 0x46828 */ @@ -1159,4 +1178,22 @@ enum { #define FBNIC_IPC_MBX_DESC_FW_CMPL DESC_BIT(1) #define FBNIC_IPC_MBX_DESC_HOST_CMPL DESC_BIT(0) +/* OTP Registers + * These registers are accessible via bar4 offset and are written by CMRT + * on boot. For the write status, the register is broken up in half with OTP + * Write Data Status occupying the top 16 bits and the ECC status occupying the + * bottom 16 bits. + */ +#define FBNIC_NS_OTP_STATUS 0x0021d +#define FBNIC_NS_OTP_WRITE_STATUS 0x0021e + +#define FBNIC_NS_OTP_WRITE_DATA_STATUS_MASK CSR_GENMASK(31, 16) +#define FBNIC_NS_OTP_WRITE_ECC_STATUS_MASK CSR_GENMASK(15, 0) + +#define FBNIC_REGS_VERSION CSR_GENMASK(31, 16) +#define FBNIC_REGS_HW_TYPE CSR_GENMASK(15, 8) +enum{ + FBNIC_CSR_VERSION_V1_0_ASIC = 1, +}; + #endif /* _FBNIC_CSR_H_ */ diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c b/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c index c5f81f139e7e..b62b1d5b1453 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_devlink.c @@ -8,6 +8,7 @@ #include <net/devlink.h> #include "fbnic.h" +#include "fbnic_fw.h" #include "fbnic_tlv.h" #define FBNIC_SN_STR_LEN 24 @@ -369,6 +370,254 @@ static const struct devlink_ops fbnic_devlink_ops = { .flash_update = fbnic_devlink_flash_update, }; +static int fbnic_fw_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) +{ + struct fbnic_dev *fbd = devlink_health_reporter_priv(reporter); + u32 offset, index, index_count, length, size; + struct fbnic_fw_completion *fw_cmpl; + u8 *dump_data, **data; + int err; + + fw_cmpl = fbnic_fw_alloc_cmpl(FBNIC_TLV_MSG_ID_COREDUMP_GET_INFO_RESP); + if (!fw_cmpl) + return -ENOMEM; + + err = fbnic_fw_xmit_coredump_info_msg(fbd, fw_cmpl, true); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to transmit core dump info msg"); + goto cmpl_free; + } + if (!wait_for_completion_timeout(&fw_cmpl->done, 2 * HZ)) { + NL_SET_ERR_MSG_MOD(extack, + "Timed out waiting on core dump info"); + err = -ETIMEDOUT; + goto cmpl_cleanup; + } + + size = fw_cmpl->u.coredump_info.size; + err = fw_cmpl->result; + + fbnic_mbx_clear_cmpl(fbd, fw_cmpl); + fbnic_fw_put_cmpl(fw_cmpl); + + /* Handle error returned by firmware */ + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Firmware core dump returned error"); + return err; + } + if (!size) { + NL_SET_ERR_MSG_MOD(extack, + "Firmware core dump returned size 0"); + return -EIO; + } + + /* Read the dump, we can only transfer TLV_MAX_DATA at a time */ + index_count = DIV_ROUND_UP(size, TLV_MAX_DATA); + + fw_cmpl = __fbnic_fw_alloc_cmpl(FBNIC_TLV_MSG_ID_COREDUMP_READ_RESP, + sizeof(void *) * index_count + size); + if (!fw_cmpl) + return -ENOMEM; + + /* Populate pointer table w/ pointer offsets */ + dump_data = (void *)&fw_cmpl->u.coredump.data[index_count]; + data = fw_cmpl->u.coredump.data; + fw_cmpl->u.coredump.size = size; + fw_cmpl->u.coredump.stride = TLV_MAX_DATA; + + for (index = 0; index < index_count; index++) { + /* First iteration installs completion */ + struct fbnic_fw_completion *cmpl_arg = index ? NULL : fw_cmpl; + + offset = index * TLV_MAX_DATA; + length = min(size - offset, TLV_MAX_DATA); + + data[index] = dump_data + offset; + err = fbnic_fw_xmit_coredump_read_msg(fbd, cmpl_arg, + offset, length); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to transmit core dump msg"); + if (cmpl_arg) + goto cmpl_free; + else + goto cmpl_cleanup; + } + + if (wait_for_completion_timeout(&fw_cmpl->done, 2 * HZ)) { + reinit_completion(&fw_cmpl->done); + } else { + NL_SET_ERR_MSG_FMT_MOD(extack, + "Timed out waiting on core dump (%d/%d)", + index + 1, index_count); + err = -ETIMEDOUT; + goto cmpl_cleanup; + } + + /* If we didn't see the reply record as incomplete */ + if (fw_cmpl->u.coredump.data[index]) { + NL_SET_ERR_MSG_FMT_MOD(extack, + "No data for core dump chunk (%d/%d)", + index + 1, index_count); + err = -EIO; + goto cmpl_cleanup; + } + } + + devlink_fmsg_binary_pair_nest_start(fmsg, "FW coredump"); + + for (offset = 0; offset < size; offset += length) { + length = min_t(u32, size - offset, TLV_MAX_DATA); + + devlink_fmsg_binary_put(fmsg, dump_data + offset, length); + } + + devlink_fmsg_binary_pair_nest_end(fmsg); + +cmpl_cleanup: + fbnic_mbx_clear_cmpl(fbd, fw_cmpl); +cmpl_free: + fbnic_fw_put_cmpl(fw_cmpl); + + return err; +} + +static int +fbnic_fw_reporter_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct fbnic_dev *fbd = devlink_health_reporter_priv(reporter); + u32 sec, msec; + + /* Device is most likely down, we're not exchanging heartbeats */ + if (!fbd->prev_firmware_time) + return 0; + + sec = div_u64_rem(fbd->firmware_time, MSEC_PER_SEC, &msec); + + devlink_fmsg_pair_nest_start(fmsg, "last_heartbeat"); + devlink_fmsg_obj_nest_start(fmsg); + devlink_fmsg_pair_nest_start(fmsg, "fw_uptime"); + devlink_fmsg_obj_nest_start(fmsg); + devlink_fmsg_u32_pair_put(fmsg, "sec", sec); + devlink_fmsg_u32_pair_put(fmsg, "msec", msec); + devlink_fmsg_obj_nest_end(fmsg); + devlink_fmsg_pair_nest_end(fmsg); + devlink_fmsg_obj_nest_end(fmsg); + devlink_fmsg_pair_nest_end(fmsg); + + return 0; +} + +void __printf(2, 3) +fbnic_devlink_fw_report(struct fbnic_dev *fbd, const char *format, ...) +{ + char msg[FBNIC_FW_LOG_MAX_SIZE]; + va_list args; + + va_start(args, format); + vsnprintf(msg, FBNIC_FW_LOG_MAX_SIZE, format, args); + va_end(args); + + devlink_health_report(fbd->fw_reporter, msg, fbd); + if (fbnic_fw_log_ready(fbd)) + fbnic_fw_log_write(fbd, 0, fbd->firmware_time, msg); +} + +static const struct devlink_health_reporter_ops fbnic_fw_ops = { + .name = "fw", + .dump = fbnic_fw_reporter_dump, + .diagnose = fbnic_fw_reporter_diagnose, +}; + +static u32 fbnic_read_otp_status(struct fbnic_dev *fbd) +{ + return fbnic_fw_rd32(fbd, FBNIC_NS_OTP_STATUS); +} + +static int +fbnic_otp_reporter_dump(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, void *priv_ctx, + struct netlink_ext_ack *extack) +{ + struct fbnic_dev *fbd = devlink_health_reporter_priv(reporter); + u32 otp_status, otp_write_status, m; + + otp_status = fbnic_read_otp_status(fbd); + otp_write_status = fbnic_fw_rd32(fbd, FBNIC_NS_OTP_WRITE_STATUS); + + /* Dump OTP status */ + devlink_fmsg_pair_nest_start(fmsg, "OTP"); + devlink_fmsg_obj_nest_start(fmsg); + + devlink_fmsg_u32_pair_put(fmsg, "Status", otp_status); + + /* Extract OTP Write Data status */ + m = FBNIC_NS_OTP_WRITE_DATA_STATUS_MASK; + devlink_fmsg_u32_pair_put(fmsg, "Data", + FIELD_GET(m, otp_write_status)); + + /* Extract OTP Write ECC status */ + m = FBNIC_NS_OTP_WRITE_ECC_STATUS_MASK; + devlink_fmsg_u32_pair_put(fmsg, "ECC", + FIELD_GET(m, otp_write_status)); + + devlink_fmsg_obj_nest_end(fmsg); + devlink_fmsg_pair_nest_end(fmsg); + + return 0; +} + +void fbnic_devlink_otp_check(struct fbnic_dev *fbd, const char *msg) +{ + /* Check if there is anything to report */ + if (!fbnic_read_otp_status(fbd)) + return; + + devlink_health_report(fbd->otp_reporter, msg, fbd); + if (fbnic_fw_log_ready(fbd)) + fbnic_fw_log_write(fbd, 0, fbd->firmware_time, msg); +} + +static const struct devlink_health_reporter_ops fbnic_otp_ops = { + .name = "otp", + .dump = fbnic_otp_reporter_dump, +}; + +int fbnic_devlink_health_create(struct fbnic_dev *fbd) +{ + fbd->fw_reporter = devlink_health_reporter_create(priv_to_devlink(fbd), + &fbnic_fw_ops, fbd); + if (IS_ERR(fbd->fw_reporter)) { + dev_warn(fbd->dev, + "Failed to create FW fault reporter: %pe\n", + fbd->fw_reporter); + return PTR_ERR(fbd->fw_reporter); + } + + fbd->otp_reporter = devlink_health_reporter_create(priv_to_devlink(fbd), + &fbnic_otp_ops, fbd); + if (IS_ERR(fbd->otp_reporter)) { + devlink_health_reporter_destroy(fbd->fw_reporter); + dev_warn(fbd->dev, + "Failed to create OTP fault reporter: %pe\n", + fbd->otp_reporter); + return PTR_ERR(fbd->otp_reporter); + } + + return 0; +} + +void fbnic_devlink_health_destroy(struct fbnic_dev *fbd) +{ + devlink_health_reporter_destroy(fbd->otp_reporter); + devlink_health_reporter_destroy(fbd->fw_reporter); +} + void fbnic_devlink_free(struct fbnic_dev *fbd) { struct devlink *devlink = priv_to_devlink(fbd); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c index dc7ba8d5fc43..a1c2db69b198 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_ethtool.c @@ -2,6 +2,7 @@ /* Copyright (c) Meta Platforms, Inc. and affiliates. */ #include <linux/ethtool.h> +#include <linux/ethtool_netlink.h> #include <linux/netdevice.h> #include <linux/pci.h> #include <net/ipv6.h> @@ -111,6 +112,20 @@ static const struct fbnic_stat fbnic_gstrings_hw_q_stats[] = { FBNIC_HW_RXB_DEQUEUE_STATS_LEN * FBNIC_RXB_DEQUEUE_INDICES + \ FBNIC_HW_Q_STATS_LEN * FBNIC_MAX_QUEUES) +#define FBNIC_QUEUE_STAT(name, stat) \ + FBNIC_STAT_FIELDS(fbnic_ring, name, stat) + +static const struct fbnic_stat fbnic_gstrings_xdp_stats[] = { + FBNIC_QUEUE_STAT("xdp_tx_queue_%u_packets", stats.packets), + FBNIC_QUEUE_STAT("xdp_tx_queue_%u_bytes", stats.bytes), + FBNIC_QUEUE_STAT("xdp_tx_queue_%u_dropped", stats.dropped), +}; + +#define FBNIC_XDP_STATS_LEN ARRAY_SIZE(fbnic_gstrings_xdp_stats) + +#define FBNIC_STATS_LEN \ + (FBNIC_HW_STATS_LEN + FBNIC_XDP_STATS_LEN * FBNIC_MAX_XDPQS) + static void fbnic_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) { @@ -160,6 +175,7 @@ static void fbnic_clone_swap_cfg(struct fbnic_net *orig, swap(clone->num_rx_queues, orig->num_rx_queues); swap(clone->num_tx_queues, orig->num_tx_queues); swap(clone->num_napi, orig->num_napi); + swap(clone->hds_thresh, orig->hds_thresh); } static void fbnic_aggregate_vector_counters(struct fbnic_net *fbn, @@ -277,15 +293,21 @@ fbnic_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, ring->rx_mini_pending = fbn->hpq_size; ring->rx_jumbo_pending = fbn->ppq_size; ring->tx_pending = fbn->txq_size; + + kernel_ring->tcp_data_split = ETHTOOL_TCP_DATA_SPLIT_ENABLED; + kernel_ring->hds_thresh_max = FBNIC_HDS_THRESH_MAX; + kernel_ring->hds_thresh = fbn->hds_thresh; } static void fbnic_set_rings(struct fbnic_net *fbn, - struct ethtool_ringparam *ring) + struct ethtool_ringparam *ring, + struct kernel_ethtool_ringparam *kernel_ring) { fbn->rcq_size = ring->rx_pending; fbn->hpq_size = ring->rx_mini_pending; fbn->ppq_size = ring->rx_jumbo_pending; fbn->txq_size = ring->tx_pending; + fbn->hds_thresh = kernel_ring->hds_thresh; } static int @@ -316,8 +338,24 @@ fbnic_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, return -EINVAL; } + if (kernel_ring->tcp_data_split == ETHTOOL_TCP_DATA_SPLIT_DISABLED) { + NL_SET_ERR_MSG_MOD(extack, "Cannot disable TCP data split"); + return -EINVAL; + } + + /* If an XDP program is attached, we should check for potential frame + * splitting. If the new HDS threshold can cause splitting, we should + * only allow if the attached XDP program can handle frags. + */ + if (fbnic_check_split_frames(fbn->xdp_prog, netdev->mtu, + kernel_ring->hds_thresh)) { + NL_SET_ERR_MSG_MOD(extack, + "Use higher HDS threshold or multi-buf capable program"); + return -EINVAL; + } + if (!netif_running(netdev)) { - fbnic_set_rings(fbn, ring); + fbnic_set_rings(fbn, ring, kernel_ring); return 0; } @@ -325,7 +363,7 @@ fbnic_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, if (!clone) return -ENOMEM; - fbnic_set_rings(clone, ring); + fbnic_set_rings(clone, ring, kernel_ring); err = fbnic_alloc_napi_vectors(clone); if (err) @@ -398,6 +436,16 @@ static void fbnic_get_rxb_dequeue_strings(u8 **data, unsigned int idx) ethtool_sprintf(data, stat->string, idx); } +static void fbnic_get_xdp_queue_strings(u8 **data, unsigned int idx) +{ + const struct fbnic_stat *stat; + int i; + + stat = fbnic_gstrings_xdp_stats; + for (i = 0; i < FBNIC_XDP_STATS_LEN; i++, stat++) + ethtool_sprintf(data, stat->string, idx); +} + static void fbnic_get_strings(struct net_device *dev, u32 sset, u8 *data) { const struct fbnic_stat *stat; @@ -423,6 +471,9 @@ static void fbnic_get_strings(struct net_device *dev, u32 sset, u8 *data) for (i = 0; i < FBNIC_HW_Q_STATS_LEN; i++, stat++) ethtool_sprintf(&data, stat->string, idx); } + + for (i = 0; i < FBNIC_MAX_XDPQS; i++) + fbnic_get_xdp_queue_strings(&data, i); break; } } @@ -440,6 +491,24 @@ static void fbnic_report_hw_stats(const struct fbnic_stat *stat, } } +static void fbnic_get_xdp_queue_stats(struct fbnic_ring *ring, u64 **data) +{ + const struct fbnic_stat *stat; + int i; + + if (!ring) { + *data += FBNIC_XDP_STATS_LEN; + return; + } + + stat = fbnic_gstrings_xdp_stats; + for (i = 0; i < FBNIC_XDP_STATS_LEN; i++, stat++, (*data)++) { + u8 *p = (u8 *)ring + stat->offset; + + **data = *(u64 *)p; + } +} + static void fbnic_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { @@ -449,7 +518,7 @@ static void fbnic_get_ethtool_stats(struct net_device *dev, fbnic_get_hw_stats(fbn->fbd); - spin_lock(&fbd->hw_stats_lock); + spin_lock(&fbd->hw_stats.lock); fbnic_report_hw_stats(fbnic_gstrings_hw_stats, &fbd->hw_stats, FBNIC_HW_FIXED_STATS_LEN, &data); @@ -486,14 +555,17 @@ static void fbnic_get_ethtool_stats(struct net_device *dev, fbnic_report_hw_stats(fbnic_gstrings_hw_q_stats, hw_q, FBNIC_HW_Q_STATS_LEN, &data); } - spin_unlock(&fbd->hw_stats_lock); + spin_unlock(&fbd->hw_stats.lock); + + for (i = 0; i < FBNIC_MAX_XDPQS; i++) + fbnic_get_xdp_queue_stats(fbn->tx[i + FBNIC_MAX_TXQS], &data); } static int fbnic_get_sset_count(struct net_device *dev, int sset) { switch (sset) { case ETH_SS_STATS: - return FBNIC_HW_STATS_LEN; + return FBNIC_STATS_LEN; default: return -EOPNOTSUPP; } @@ -1310,7 +1382,7 @@ fbnic_get_rss_hash_opts(struct net_device *netdev, #define FBNIC_L2_HASH_OPTIONS \ (RXH_L2DA | RXH_DISCARD) #define FBNIC_L3_HASH_OPTIONS \ - (FBNIC_L2_HASH_OPTIONS | RXH_IP_SRC | RXH_IP_DST) + (FBNIC_L2_HASH_OPTIONS | RXH_IP_SRC | RXH_IP_DST | RXH_IP6_FL) #define FBNIC_L4_HASH_OPTIONS \ (FBNIC_L3_HASH_OPTIONS | RXH_L4_B_0_1 | RXH_L4_B_2_3) @@ -1563,6 +1635,65 @@ static void fbnic_get_ts_stats(struct net_device *netdev, } } +static int +fbnic_get_module_eeprom_by_page(struct net_device *netdev, + const struct ethtool_module_eeprom *page_data, + struct netlink_ext_ack *extack) +{ + struct fbnic_net *fbn = netdev_priv(netdev); + struct fbnic_fw_completion *fw_cmpl; + struct fbnic_dev *fbd = fbn->fbd; + int err; + + if (page_data->i2c_address != 0x50) { + NL_SET_ERR_MSG_MOD(extack, + "Invalid i2c address. Only 0x50 is supported"); + return -EINVAL; + } + + fw_cmpl = __fbnic_fw_alloc_cmpl(FBNIC_TLV_MSG_ID_QSFP_READ_RESP, + page_data->length); + if (!fw_cmpl) + return -ENOMEM; + + /* Initialize completion and queue it for FW to process */ + fw_cmpl->u.qsfp.length = page_data->length; + fw_cmpl->u.qsfp.offset = page_data->offset; + fw_cmpl->u.qsfp.page = page_data->page; + fw_cmpl->u.qsfp.bank = page_data->bank; + + err = fbnic_fw_xmit_qsfp_read_msg(fbd, fw_cmpl, page_data->page, + page_data->bank, page_data->offset, + page_data->length); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to transmit EEPROM read request"); + goto exit_free; + } + + if (!wait_for_completion_timeout(&fw_cmpl->done, 2 * HZ)) { + err = -ETIMEDOUT; + NL_SET_ERR_MSG_MOD(extack, + "Timed out waiting for firmware response"); + goto exit_cleanup; + } + + if (fw_cmpl->result) { + err = fw_cmpl->result; + NL_SET_ERR_MSG_MOD(extack, "Failed to read EEPROM"); + goto exit_cleanup; + } + + memcpy(page_data->data, fw_cmpl->u.qsfp.data, page_data->length); + +exit_cleanup: + fbnic_mbx_clear_cmpl(fbd, fw_cmpl); +exit_free: + fbnic_fw_put_cmpl(fw_cmpl); + + return err ? : page_data->length; +} + static void fbnic_set_counter(u64 *stat, struct fbnic_stat_counter *counter) { if (counter->reported) @@ -1570,6 +1701,63 @@ static void fbnic_set_counter(u64 *stat, struct fbnic_stat_counter *counter) } static void +fbnic_get_pause_stats(struct net_device *netdev, + struct ethtool_pause_stats *pause_stats) +{ + struct fbnic_net *fbn = netdev_priv(netdev); + struct fbnic_mac_stats *mac_stats; + struct fbnic_dev *fbd = fbn->fbd; + + mac_stats = &fbd->hw_stats.mac; + + fbd->mac->get_pause_stats(fbd, false, &mac_stats->pause); + + pause_stats->tx_pause_frames = mac_stats->pause.tx_pause_frames.value; + pause_stats->rx_pause_frames = mac_stats->pause.rx_pause_frames.value; +} + +static void +fbnic_get_fec_stats(struct net_device *netdev, + struct ethtool_fec_stats *fec_stats, + struct ethtool_fec_hist *hist) +{ + struct fbnic_net *fbn = netdev_priv(netdev); + struct fbnic_phy_stats *phy_stats; + struct fbnic_dev *fbd = fbn->fbd; + + fbnic_get_hw_stats32(fbd); + phy_stats = &fbd->hw_stats.phy; + + spin_lock(&fbd->hw_stats.lock); + fec_stats->corrected_blocks.total = + phy_stats->fec.corrected_blocks.value; + fec_stats->uncorrectable_blocks.total = + phy_stats->fec.uncorrectable_blocks.value; + spin_unlock(&fbd->hw_stats.lock); +} + +static void +fbnic_get_eth_phy_stats(struct net_device *netdev, + struct ethtool_eth_phy_stats *eth_phy_stats) +{ + struct fbnic_net *fbn = netdev_priv(netdev); + struct fbnic_phy_stats *phy_stats; + struct fbnic_dev *fbd = fbn->fbd; + u64 total = 0; + int i; + + fbnic_get_hw_stats32(fbd); + phy_stats = &fbd->hw_stats.phy; + + spin_lock(&fbd->hw_stats.lock); + for (i = 0; i < FBNIC_PCS_MAX_LANES; i++) + total += phy_stats->pcs.SymbolErrorDuringCarrier.lanes[i].value; + + eth_phy_stats->SymbolErrorDuringCarrier = total; + spin_unlock(&fbd->hw_stats.lock); +} + +static void fbnic_get_eth_mac_stats(struct net_device *netdev, struct ethtool_eth_mac_stats *eth_mac_stats) { @@ -1676,8 +1864,11 @@ fbnic_get_rmon_stats(struct net_device *netdev, } static const struct ethtool_ops fbnic_ethtool_ops = { + .cap_link_lanes_supported = true, .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_RX_MAX_FRAMES, + .supported_ring_params = ETHTOOL_RING_USE_TCP_DATA_SPLIT | + ETHTOOL_RING_USE_HDS_THRS, .rxfh_max_num_contexts = FBNIC_RPC_RSS_TBL_COUNT, .get_drvinfo = fbnic_get_drvinfo, .get_regs_len = fbnic_get_regs_len, @@ -1687,6 +1878,7 @@ static const struct ethtool_ops fbnic_ethtool_ops = { .set_coalesce = fbnic_set_coalesce, .get_ringparam = fbnic_get_ringparam, .set_ringparam = fbnic_set_ringparam, + .get_pause_stats = fbnic_get_pause_stats, .get_pauseparam = fbnic_phylink_get_pauseparam, .set_pauseparam = fbnic_phylink_set_pauseparam, .get_strings = fbnic_get_strings, @@ -1708,7 +1900,10 @@ static const struct ethtool_ops fbnic_ethtool_ops = { .get_ts_info = fbnic_get_ts_info, .get_ts_stats = fbnic_get_ts_stats, .get_link_ksettings = fbnic_phylink_ethtool_ksettings_get, + .get_fec_stats = fbnic_get_fec_stats, .get_fecparam = fbnic_phylink_get_fecparam, + .get_module_eeprom_by_page = fbnic_get_module_eeprom_by_page, + .get_eth_phy_stats = fbnic_get_eth_phy_stats, .get_eth_mac_stats = fbnic_get_eth_mac_stats, .get_eth_ctrl_stats = fbnic_get_eth_ctrl_stats, .get_rmon_stats = fbnic_get_rmon_stats, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c index 0c55be7d2547..c87cb9ed09e7 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.c @@ -495,6 +495,11 @@ int fbnic_fw_xmit_ownership_msg(struct fbnic_dev *fbd, bool take_ownership) fbd->last_heartbeat_request = req_time; + /* Set prev_firmware_time to 0 to avoid triggering firmware crash + * detection until we receive the second uptime in a heartbeat resp. + */ + fbd->prev_firmware_time = 0; + /* Set heartbeat detection based on if we are taking ownership */ fbd->fw_heartbeat_enabled = take_ownership; @@ -653,10 +658,14 @@ static int fbnic_fw_parse_cap_resp(void *opaque, struct fbnic_tlv_msg **results) fbd->fw_cap.anti_rollback_version = fta_get_uint(results, FBNIC_FW_CAP_RESP_ANTI_ROLLBACK_VERSION); + /* Always assume we need a BMC reinit */ + fbd->fw_cap.need_bmc_tcam_reinit = true; + return 0; } static const struct fbnic_tlv_index fbnic_ownership_resp_index[] = { + FBNIC_TLV_ATTR_U64(FBNIC_FW_OWNERSHIP_TIME), FBNIC_TLV_ATTR_LAST }; @@ -668,10 +677,14 @@ static int fbnic_fw_parse_ownership_resp(void *opaque, /* Count the ownership response as a heartbeat reply */ fbd->last_heartbeat_response = jiffies; + /* Capture firmware time for logging and firmware crash check */ + fbd->firmware_time = fta_get_uint(results, FBNIC_FW_OWNERSHIP_TIME); + return 0; } static const struct fbnic_tlv_index fbnic_heartbeat_resp_index[] = { + FBNIC_TLV_ATTR_U64(FBNIC_FW_HEARTBEAT_UPTIME), FBNIC_TLV_ATTR_LAST }; @@ -682,6 +695,9 @@ static int fbnic_fw_parse_heartbeat_resp(void *opaque, fbd->last_heartbeat_response = jiffies; + /* Capture firmware time for logging and firmware crash check */ + fbd->firmware_time = fta_get_uint(results, FBNIC_FW_HEARTBEAT_UPTIME); + return 0; } @@ -703,6 +719,7 @@ static int fbnic_fw_xmit_heartbeat_message(struct fbnic_dev *fbd) goto free_message; fbd->last_heartbeat_request = req_time; + fbd->prev_firmware_time = fbd->firmware_time; return err; @@ -763,7 +780,8 @@ void fbnic_fw_check_heartbeat(struct fbnic_dev *fbd) return; /* Was the last heartbeat response long time ago? */ - if (!fbnic_fw_heartbeat_current(fbd)) { + if (!fbnic_fw_heartbeat_current(fbd) || + fbd->firmware_time < fbd->prev_firmware_time) { dev_warn(fbd->dev, "Firmware did not respond to heartbeat message\n"); fbd->fw_heartbeat_enabled = false; @@ -775,6 +793,215 @@ void fbnic_fw_check_heartbeat(struct fbnic_dev *fbd) dev_warn(fbd->dev, "Failed to send heartbeat message\n"); } +/** + * fbnic_fw_xmit_coredump_info_msg - Create and transmit a coredump info message + * @fbd: FBNIC device structure + * @cmpl_data: Structure to store info in + * @force: Force coredump event if one hasn't already occurred + * + * Return: zero on success, negative errno on failure + * + * Asks the FW for info related to coredump. If a coredump doesn't exist it + * can optionally force one if force is true. + */ +int fbnic_fw_xmit_coredump_info_msg(struct fbnic_dev *fbd, + struct fbnic_fw_completion *cmpl_data, + bool force) +{ + struct fbnic_tlv_msg *msg; + int err = 0; + + msg = fbnic_tlv_msg_alloc(FBNIC_TLV_MSG_ID_COREDUMP_GET_INFO_REQ); + if (!msg) + return -ENOMEM; + + if (force) { + err = fbnic_tlv_attr_put_flag(msg, FBNIC_FW_COREDUMP_REQ_INFO_CREATE); + if (err) + goto free_msg; + } + + err = fbnic_mbx_map_req_w_cmpl(fbd, msg, cmpl_data); + if (err) + goto free_msg; + + return 0; + +free_msg: + free_page((unsigned long)msg); + return err; +} + +static const struct fbnic_tlv_index fbnic_coredump_info_resp_index[] = { + FBNIC_TLV_ATTR_FLAG(FBNIC_FW_COREDUMP_INFO_AVAILABLE), + FBNIC_TLV_ATTR_U32(FBNIC_FW_COREDUMP_INFO_SIZE), + FBNIC_TLV_ATTR_S32(FBNIC_FW_COREDUMP_INFO_ERROR), + FBNIC_TLV_ATTR_LAST +}; + +static int +fbnic_fw_parse_coredump_info_resp(void *opaque, struct fbnic_tlv_msg **results) +{ + struct fbnic_fw_completion *cmpl_data; + struct fbnic_dev *fbd = opaque; + u32 msg_type; + s32 err; + + /* Verify we have a completion pointer to provide with data */ + msg_type = FBNIC_TLV_MSG_ID_COREDUMP_GET_INFO_RESP; + cmpl_data = fbnic_fw_get_cmpl_by_type(fbd, msg_type); + if (!cmpl_data) + return -ENOSPC; + + err = fta_get_sint(results, FBNIC_FW_COREDUMP_INFO_ERROR); + if (err) + goto msg_err; + + if (!results[FBNIC_FW_COREDUMP_INFO_AVAILABLE]) { + err = -ENOENT; + goto msg_err; + } + + cmpl_data->u.coredump_info.size = + fta_get_uint(results, FBNIC_FW_COREDUMP_INFO_SIZE); + +msg_err: + cmpl_data->result = err; + complete(&cmpl_data->done); + fbnic_fw_put_cmpl(cmpl_data); + + return err; +} + +/** + * fbnic_fw_xmit_coredump_read_msg - Create and transmit a coredump read request + * @fbd: FBNIC device structure + * @cmpl_data: Completion struct to store coredump + * @offset: Offset into coredump requested + * @length: Length of section of cordeump to fetch + * + * Return: zero on success, negative errno on failure + * + * Asks the firmware to provide a section of the cordeump back in a message. + * The response will have an offset and size matching the values provided. + */ +int fbnic_fw_xmit_coredump_read_msg(struct fbnic_dev *fbd, + struct fbnic_fw_completion *cmpl_data, + u32 offset, u32 length) +{ + struct fbnic_tlv_msg *msg; + int err = 0; + + msg = fbnic_tlv_msg_alloc(FBNIC_TLV_MSG_ID_COREDUMP_READ_REQ); + if (!msg) + return -ENOMEM; + + if (offset) { + err = fbnic_tlv_attr_put_int(msg, FBNIC_FW_COREDUMP_READ_OFFSET, + offset); + if (err) + goto free_message; + } + + if (length) { + err = fbnic_tlv_attr_put_int(msg, FBNIC_FW_COREDUMP_READ_LENGTH, + length); + if (err) + goto free_message; + } + + err = fbnic_mbx_map_req_w_cmpl(fbd, msg, cmpl_data); + if (err) + goto free_message; + + return 0; + +free_message: + free_page((unsigned long)msg); + return err; +} + +static const struct fbnic_tlv_index fbnic_coredump_resp_index[] = { + FBNIC_TLV_ATTR_U32(FBNIC_FW_COREDUMP_READ_OFFSET), + FBNIC_TLV_ATTR_U32(FBNIC_FW_COREDUMP_READ_LENGTH), + FBNIC_TLV_ATTR_RAW_DATA(FBNIC_FW_COREDUMP_READ_DATA), + FBNIC_TLV_ATTR_S32(FBNIC_FW_COREDUMP_READ_ERROR), + FBNIC_TLV_ATTR_LAST +}; + +static int fbnic_fw_parse_coredump_resp(void *opaque, + struct fbnic_tlv_msg **results) +{ + struct fbnic_fw_completion *cmpl_data; + u32 index, last_offset, last_length; + struct fbnic_dev *fbd = opaque; + struct fbnic_tlv_msg *data_hdr; + u32 length, offset; + u32 msg_type; + s32 err; + + /* Verify we have a completion pointer to provide with data */ + msg_type = FBNIC_TLV_MSG_ID_COREDUMP_READ_RESP; + cmpl_data = fbnic_fw_get_cmpl_by_type(fbd, msg_type); + if (!cmpl_data) + return -ENOSPC; + + err = fta_get_sint(results, FBNIC_FW_COREDUMP_READ_ERROR); + if (err) + goto msg_err; + + data_hdr = results[FBNIC_FW_COREDUMP_READ_DATA]; + if (!data_hdr) { + err = -ENODATA; + goto msg_err; + } + + offset = fta_get_uint(results, FBNIC_FW_COREDUMP_READ_OFFSET); + length = fta_get_uint(results, FBNIC_FW_COREDUMP_READ_LENGTH); + + if (length > le16_to_cpu(data_hdr->hdr.len) - sizeof(u32)) { + dev_err(fbd->dev, "length greater than size of message\n"); + err = -EINVAL; + goto msg_err; + } + + /* Only the last offset can have a length != stride */ + last_length = + (cmpl_data->u.coredump.size % cmpl_data->u.coredump.stride) ? : + cmpl_data->u.coredump.stride; + last_offset = cmpl_data->u.coredump.size - last_length; + + /* Verify offset and length */ + if (offset % cmpl_data->u.coredump.stride || offset > last_offset) { + dev_err(fbd->dev, "offset %d out of range\n", offset); + err = -EINVAL; + } else if (length != ((offset == last_offset) ? + last_length : cmpl_data->u.coredump.stride)) { + dev_err(fbd->dev, "length %d out of range for offset %d\n", + length, offset); + err = -EINVAL; + } + if (err) + goto msg_err; + + /* If data pointer is NULL it is already filled, just skip the copy */ + index = offset / cmpl_data->u.coredump.stride; + if (!cmpl_data->u.coredump.data[index]) + goto msg_err; + + /* Copy data and mark index filled by setting pointer to NULL */ + memcpy(cmpl_data->u.coredump.data[index], + fbnic_tlv_attr_get_value_ptr(data_hdr), length); + cmpl_data->u.coredump.data[index] = NULL; + +msg_err: + cmpl_data->result = err; + complete(&cmpl_data->done); + fbnic_fw_put_cmpl(cmpl_data); + + return err; +} + int fbnic_fw_xmit_fw_start_upgrade(struct fbnic_dev *fbd, struct fbnic_fw_completion *cmpl_data, unsigned int id, unsigned int len) @@ -958,6 +1185,138 @@ static int fbnic_fw_parse_fw_finish_upgrade_req(void *opaque, } /** + * fbnic_fw_xmit_qsfp_read_msg - Transmit a QSFP read request + * @fbd: FBNIC device structure + * @cmpl_data: Structure to store EEPROM response in + * @page: Refers to page number on page enabled QSFP modules + * @bank: Refers to a collection of pages + * @offset: Offset into QSFP EEPROM requested + * @length: Length of section of QSFP EEPROM to fetch + * + * Return: zero on success, negative value on failure + * + * Asks the firmware to provide a section of the QSFP EEPROM back in a + * message. The response will have an offset and size matching the values + * provided. + */ +int fbnic_fw_xmit_qsfp_read_msg(struct fbnic_dev *fbd, + struct fbnic_fw_completion *cmpl_data, + u32 page, u32 bank, u32 offset, u32 length) +{ + struct fbnic_tlv_msg *msg; + int err = 0; + + if (!length || length > TLV_MAX_DATA) + return -EINVAL; + + msg = fbnic_tlv_msg_alloc(FBNIC_TLV_MSG_ID_QSFP_READ_REQ); + if (!msg) + return -ENOMEM; + + err = fbnic_tlv_attr_put_int(msg, FBNIC_FW_QSFP_BANK, bank); + if (err) + goto free_message; + + err = fbnic_tlv_attr_put_int(msg, FBNIC_FW_QSFP_PAGE, page); + if (err) + goto free_message; + + err = fbnic_tlv_attr_put_int(msg, FBNIC_FW_QSFP_OFFSET, offset); + if (err) + goto free_message; + + err = fbnic_tlv_attr_put_int(msg, FBNIC_FW_QSFP_LENGTH, length); + if (err) + goto free_message; + + err = fbnic_mbx_map_req_w_cmpl(fbd, msg, cmpl_data); + if (err) + goto free_message; + + return 0; + +free_message: + free_page((unsigned long)msg); + return err; +} + +static const struct fbnic_tlv_index fbnic_qsfp_read_resp_index[] = { + FBNIC_TLV_ATTR_U32(FBNIC_FW_QSFP_BANK), + FBNIC_TLV_ATTR_U32(FBNIC_FW_QSFP_PAGE), + FBNIC_TLV_ATTR_U32(FBNIC_FW_QSFP_OFFSET), + FBNIC_TLV_ATTR_U32(FBNIC_FW_QSFP_LENGTH), + FBNIC_TLV_ATTR_RAW_DATA(FBNIC_FW_QSFP_DATA), + FBNIC_TLV_ATTR_S32(FBNIC_FW_QSFP_ERROR), + FBNIC_TLV_ATTR_LAST +}; + +static int fbnic_fw_parse_qsfp_read_resp(void *opaque, + struct fbnic_tlv_msg **results) +{ + struct fbnic_fw_completion *cmpl_data; + struct fbnic_dev *fbd = opaque; + struct fbnic_tlv_msg *data_hdr; + u32 length, offset, page, bank; + u8 *data; + s32 err; + + /* Verify we have a completion pointer to provide with data */ + cmpl_data = fbnic_fw_get_cmpl_by_type(fbd, + FBNIC_TLV_MSG_ID_QSFP_READ_RESP); + if (!cmpl_data) + return -ENOSPC; + + bank = fta_get_uint(results, FBNIC_FW_QSFP_BANK); + if (bank != cmpl_data->u.qsfp.bank) { + dev_warn(fbd->dev, "bank not equal to bank requested: %d vs %d\n", + bank, cmpl_data->u.qsfp.bank); + err = -EINVAL; + goto msg_err; + } + + page = fta_get_uint(results, FBNIC_FW_QSFP_PAGE); + if (page != cmpl_data->u.qsfp.page) { + dev_warn(fbd->dev, "page not equal to page requested: %d vs %d\n", + page, cmpl_data->u.qsfp.page); + err = -EINVAL; + goto msg_err; + } + + offset = fta_get_uint(results, FBNIC_FW_QSFP_OFFSET); + length = fta_get_uint(results, FBNIC_FW_QSFP_LENGTH); + + if (length != cmpl_data->u.qsfp.length || + offset != cmpl_data->u.qsfp.offset) { + dev_warn(fbd->dev, + "offset/length not equal to size requested: %d/%d vs %d/%d\n", + offset, length, + cmpl_data->u.qsfp.offset, cmpl_data->u.qsfp.length); + err = -EINVAL; + goto msg_err; + } + + err = fta_get_sint(results, FBNIC_FW_QSFP_ERROR); + if (err) + goto msg_err; + + data_hdr = results[FBNIC_FW_QSFP_DATA]; + if (!data_hdr) { + err = -ENODATA; + goto msg_err; + } + + /* Copy data */ + data = fbnic_tlv_attr_get_value_ptr(data_hdr); + memcpy(cmpl_data->u.qsfp.data, data, length); +msg_err: + cmpl_data->result = err; + complete(&cmpl_data->done); + fbnic_fw_put_cmpl(cmpl_data); + + return err; +} + +/** * fbnic_fw_xmit_tsene_read_msg - Create and transmit a sensor read request * @fbd: FBNIC device structure * @cmpl_data: Completion data structure to store sensor response @@ -1204,6 +1563,11 @@ static const struct fbnic_tlv_parser fbnic_fw_tlv_parser[] = { fbnic_fw_parse_ownership_resp), FBNIC_TLV_PARSER(HEARTBEAT_RESP, fbnic_heartbeat_resp_index, fbnic_fw_parse_heartbeat_resp), + FBNIC_TLV_PARSER(COREDUMP_GET_INFO_RESP, + fbnic_coredump_info_resp_index, + fbnic_fw_parse_coredump_info_resp), + FBNIC_TLV_PARSER(COREDUMP_READ_RESP, fbnic_coredump_resp_index, + fbnic_fw_parse_coredump_resp), FBNIC_TLV_PARSER(FW_START_UPGRADE_RESP, fbnic_fw_start_upgrade_resp_index, fbnic_fw_parse_fw_start_upgrade_resp), @@ -1213,6 +1577,9 @@ static const struct fbnic_tlv_parser fbnic_fw_tlv_parser[] = { FBNIC_TLV_PARSER(FW_FINISH_UPGRADE_REQ, fbnic_fw_finish_upgrade_req_index, fbnic_fw_parse_fw_finish_upgrade_req), + FBNIC_TLV_PARSER(QSFP_READ_RESP, + fbnic_qsfp_read_resp_index, + fbnic_fw_parse_qsfp_read_resp), FBNIC_TLV_PARSER(TSENE_READ_RESP, fbnic_tsene_read_resp_index, fbnic_fw_parse_tsene_read_resp), @@ -1410,6 +1777,109 @@ void fbnic_mbx_flush_tx(struct fbnic_dev *fbd) } while (time_is_after_jiffies(timeout)); } +int fbnic_fw_xmit_rpc_macda_sync(struct fbnic_dev *fbd) +{ + struct fbnic_tlv_msg *mac_array; + int i, addr_count = 0, err; + struct fbnic_tlv_msg *msg; + u32 rx_flags = 0; + + /* Nothing to do if there is no FW to sync with */ + if (!fbd->mbx[FBNIC_IPC_MBX_TX_IDX].ready) + return 0; + + msg = fbnic_tlv_msg_alloc(FBNIC_TLV_MSG_ID_RPC_MAC_SYNC_REQ); + if (!msg) + return -ENOMEM; + + mac_array = fbnic_tlv_attr_nest_start(msg, + FBNIC_FW_RPC_MAC_SYNC_UC_ARRAY); + if (!mac_array) + goto free_message_nospc; + + /* Populate the unicast MAC addrs and capture PROMISC/ALLMULTI flags */ + for (addr_count = 0, i = FBNIC_RPC_TCAM_MACDA_PROMISC_IDX; + i >= fbd->mac_addr_boundary; i--) { + struct fbnic_mac_addr *mac_addr = &fbd->mac_addr[i]; + + if (mac_addr->state != FBNIC_TCAM_S_VALID) + continue; + if (test_bit(FBNIC_MAC_ADDR_T_ALLMULTI, mac_addr->act_tcam)) + rx_flags |= FW_RPC_MAC_SYNC_RX_FLAGS_ALLMULTI; + if (test_bit(FBNIC_MAC_ADDR_T_PROMISC, mac_addr->act_tcam)) + rx_flags |= FW_RPC_MAC_SYNC_RX_FLAGS_PROMISC; + if (!test_bit(FBNIC_MAC_ADDR_T_UNICAST, mac_addr->act_tcam)) + continue; + if (addr_count == FW_RPC_MAC_SYNC_UC_ARRAY_SIZE) { + rx_flags |= FW_RPC_MAC_SYNC_RX_FLAGS_PROMISC; + continue; + } + + err = fbnic_tlv_attr_put_value(mac_array, + FBNIC_FW_RPC_MAC_SYNC_MAC_ADDR, + mac_addr->value.addr8, + ETH_ALEN); + if (err) + goto free_message; + addr_count++; + } + + /* Close array */ + fbnic_tlv_attr_nest_stop(msg); + + mac_array = fbnic_tlv_attr_nest_start(msg, + FBNIC_FW_RPC_MAC_SYNC_MC_ARRAY); + if (!mac_array) + goto free_message_nospc; + + /* Repeat for multicast addrs, record BROADCAST/ALLMULTI flags */ + for (addr_count = 0, i = FBNIC_RPC_TCAM_MACDA_BROADCAST_IDX; + i < fbd->mac_addr_boundary; i++) { + struct fbnic_mac_addr *mac_addr = &fbd->mac_addr[i]; + + if (mac_addr->state != FBNIC_TCAM_S_VALID) + continue; + if (test_bit(FBNIC_MAC_ADDR_T_BROADCAST, mac_addr->act_tcam)) + rx_flags |= FW_RPC_MAC_SYNC_RX_FLAGS_BROADCAST; + if (test_bit(FBNIC_MAC_ADDR_T_ALLMULTI, mac_addr->act_tcam)) + rx_flags |= FW_RPC_MAC_SYNC_RX_FLAGS_ALLMULTI; + if (!test_bit(FBNIC_MAC_ADDR_T_MULTICAST, mac_addr->act_tcam)) + continue; + if (addr_count == FW_RPC_MAC_SYNC_MC_ARRAY_SIZE) { + rx_flags |= FW_RPC_MAC_SYNC_RX_FLAGS_ALLMULTI; + continue; + } + + err = fbnic_tlv_attr_put_value(mac_array, + FBNIC_FW_RPC_MAC_SYNC_MAC_ADDR, + mac_addr->value.addr8, + ETH_ALEN); + if (err) + goto free_message; + addr_count++; + } + + /* Close array */ + fbnic_tlv_attr_nest_stop(msg); + + /* Report flags at end of list */ + err = fbnic_tlv_attr_put_int(msg, FBNIC_FW_RPC_MAC_SYNC_RX_FLAGS, + rx_flags); + if (err) + goto free_message; + + /* Send message of to FW notifying it of current RPC config */ + err = fbnic_mbx_map_tlv_msg(fbd, msg); + if (err) + goto free_message; + return 0; +free_message_nospc: + err = -ENOSPC; +free_message: + free_page((unsigned long)msg); + return err; +} + void fbnic_get_fw_ver_commit_str(struct fbnic_dev *fbd, char *fw_version, const size_t str_sz) { @@ -1423,11 +1893,12 @@ void fbnic_get_fw_ver_commit_str(struct fbnic_dev *fbd, char *fw_version, fw_version, str_sz); } -struct fbnic_fw_completion *fbnic_fw_alloc_cmpl(u32 msg_type) +struct fbnic_fw_completion *__fbnic_fw_alloc_cmpl(u32 msg_type, + size_t priv_size) { struct fbnic_fw_completion *cmpl; - cmpl = kzalloc(sizeof(*cmpl), GFP_KERNEL); + cmpl = kzalloc(sizeof(*cmpl) + priv_size, GFP_KERNEL); if (!cmpl) return NULL; @@ -1438,6 +1909,11 @@ struct fbnic_fw_completion *fbnic_fw_alloc_cmpl(u32 msg_type) return cmpl; } +struct fbnic_fw_completion *fbnic_fw_alloc_cmpl(u32 msg_type) +{ + return __fbnic_fw_alloc_cmpl(msg_type, 0); +} + void fbnic_fw_put_cmpl(struct fbnic_fw_completion *fw_cmpl) { kref_put(&fw_cmpl->ref_count, fbnic_fw_release_cmpl_data); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw.h b/drivers/net/ethernet/meta/fbnic/fbnic_fw.h index fde331696fdd..1ecd777aaada 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw.h @@ -51,8 +51,10 @@ struct fbnic_fw_cap { } stored; u8 active_slot; u8 bmc_mac_addr[4][ETH_ALEN]; - u8 bmc_present : 1; - u8 all_multi : 1; + u8 bmc_present : 1; + u8 need_bmc_tcam_reinit : 1; + u8 need_bmc_macda_sync : 1; + u8 all_multi : 1; u8 link_speed; u8 link_fec; u32 anti_rollback_version; @@ -65,10 +67,25 @@ struct fbnic_fw_completion { int result; union { struct { + u32 size; + } coredump_info; + struct { + u32 size; + u16 stride; + u8 *data[]; + } coredump; + struct { u32 offset; u32 length; } fw_update; struct { + u16 length; + u8 offset; + u8 page; + u8 bank; + u8 data[] __aligned(sizeof(u32)) __counted_by(length); + } qsfp; + struct { s32 millivolts; s32 millidegrees; } tsene; @@ -87,16 +104,28 @@ void fbnic_mbx_flush_tx(struct fbnic_dev *fbd); int fbnic_fw_xmit_ownership_msg(struct fbnic_dev *fbd, bool take_ownership); int fbnic_fw_init_heartbeat(struct fbnic_dev *fbd, bool poll); void fbnic_fw_check_heartbeat(struct fbnic_dev *fbd); +int fbnic_fw_xmit_coredump_info_msg(struct fbnic_dev *fbd, + struct fbnic_fw_completion *cmpl_data, + bool force); +int fbnic_fw_xmit_coredump_read_msg(struct fbnic_dev *fbd, + struct fbnic_fw_completion *cmpl_data, + u32 offset, u32 length); int fbnic_fw_xmit_fw_start_upgrade(struct fbnic_dev *fbd, struct fbnic_fw_completion *cmpl_data, unsigned int id, unsigned int len); int fbnic_fw_xmit_fw_write_chunk(struct fbnic_dev *fbd, const u8 *data, u32 offset, u16 length, int cancel_error); +int fbnic_fw_xmit_qsfp_read_msg(struct fbnic_dev *fbd, + struct fbnic_fw_completion *cmpl_data, + u32 page, u32 bank, u32 offset, u32 length); int fbnic_fw_xmit_tsene_read_msg(struct fbnic_dev *fbd, struct fbnic_fw_completion *cmpl_data); int fbnic_fw_xmit_send_logs(struct fbnic_dev *fbd, bool enable, bool send_log_history); +int fbnic_fw_xmit_rpc_macda_sync(struct fbnic_dev *fbd); +struct fbnic_fw_completion *__fbnic_fw_alloc_cmpl(u32 msg_type, + size_t priv_size); struct fbnic_fw_completion *fbnic_fw_alloc_cmpl(u32 msg_type); void fbnic_fw_put_cmpl(struct fbnic_fw_completion *cmpl_data); @@ -132,17 +161,24 @@ enum { FBNIC_TLV_MSG_ID_OWNERSHIP_RESP = 0x13, FBNIC_TLV_MSG_ID_HEARTBEAT_REQ = 0x14, FBNIC_TLV_MSG_ID_HEARTBEAT_RESP = 0x15, + FBNIC_TLV_MSG_ID_COREDUMP_GET_INFO_REQ = 0x18, + FBNIC_TLV_MSG_ID_COREDUMP_GET_INFO_RESP = 0x19, + FBNIC_TLV_MSG_ID_COREDUMP_READ_REQ = 0x20, + FBNIC_TLV_MSG_ID_COREDUMP_READ_RESP = 0x21, FBNIC_TLV_MSG_ID_FW_START_UPGRADE_REQ = 0x22, FBNIC_TLV_MSG_ID_FW_START_UPGRADE_RESP = 0x23, FBNIC_TLV_MSG_ID_FW_WRITE_CHUNK_REQ = 0x24, FBNIC_TLV_MSG_ID_FW_WRITE_CHUNK_RESP = 0x25, FBNIC_TLV_MSG_ID_FW_FINISH_UPGRADE_REQ = 0x28, FBNIC_TLV_MSG_ID_FW_FINISH_UPGRADE_RESP = 0x29, + FBNIC_TLV_MSG_ID_QSFP_READ_REQ = 0x38, + FBNIC_TLV_MSG_ID_QSFP_READ_RESP = 0x39, FBNIC_TLV_MSG_ID_TSENE_READ_REQ = 0x3C, FBNIC_TLV_MSG_ID_TSENE_READ_RESP = 0x3D, FBNIC_TLV_MSG_ID_LOG_SEND_LOGS_REQ = 0x43, FBNIC_TLV_MSG_ID_LOG_MSG_REQ = 0x44, FBNIC_TLV_MSG_ID_LOG_MSG_RESP = 0x45, + FBNIC_TLV_MSG_ID_RPC_MAC_SYNC_REQ = 0x46, }; #define FBNIC_FW_CAP_RESP_VERSION_MAJOR CSR_GENMASK(31, 24) @@ -186,6 +222,16 @@ enum { }; enum { + FBNIC_FW_QSFP_BANK = 0x0, + FBNIC_FW_QSFP_PAGE = 0x1, + FBNIC_FW_QSFP_OFFSET = 0x2, + FBNIC_FW_QSFP_LENGTH = 0x3, + FBNIC_FW_QSFP_ERROR = 0x4, + FBNIC_FW_QSFP_DATA = 0x5, + FBNIC_FW_QSFP_MSG_MAX +}; + +enum { FBNIC_FW_TSENE_THERM = 0x0, FBNIC_FW_TSENE_VOLT = 0x1, FBNIC_FW_TSENE_ERROR = 0x2, @@ -194,10 +240,37 @@ enum { enum { FBNIC_FW_OWNERSHIP_FLAG = 0x0, + FBNIC_FW_OWNERSHIP_TIME = 0x1, FBNIC_FW_OWNERSHIP_MSG_MAX }; enum { + FBNIC_FW_HEARTBEAT_UPTIME = 0x0, + FBNIC_FW_HEARTBEAT_NUMBER_OF_MESSAGES = 0x1, + FBNIC_FW_HEARTBEAT_MSG_MAX +}; + +enum { + FBNIC_FW_COREDUMP_REQ_INFO_CREATE = 0x0, + FBNIC_FW_COREDUMP_REQ_INFO_MSG_MAX +}; + +enum { + FBNIC_FW_COREDUMP_INFO_AVAILABLE = 0x0, + FBNIC_FW_COREDUMP_INFO_SIZE = 0x1, + FBNIC_FW_COREDUMP_INFO_ERROR = 0x2, + FBNIC_FW_COREDUMP_INFO_MSG_MAX +}; + +enum { + FBNIC_FW_COREDUMP_READ_OFFSET = 0x0, + FBNIC_FW_COREDUMP_READ_LENGTH = 0x1, + FBNIC_FW_COREDUMP_READ_DATA = 0x2, + FBNIC_FW_COREDUMP_READ_ERROR = 0x3, + FBNIC_FW_COREDUMP_READ_MSG_MAX +}; + +enum { FBNIC_FW_START_UPGRADE_ERROR = 0x0, FBNIC_FW_START_UPGRADE_SECTION = 0x1, FBNIC_FW_START_UPGRADE_IMAGE_LENGTH = 0x2, @@ -235,4 +308,19 @@ enum { FBNIC_FW_LOG_MSG_MAX }; +enum { + FBNIC_FW_RPC_MAC_SYNC_RX_FLAGS = 0x0, + FBNIC_FW_RPC_MAC_SYNC_UC_ARRAY = 0x1, + FBNIC_FW_RPC_MAC_SYNC_MC_ARRAY = 0x2, + FBNIC_FW_RPC_MAC_SYNC_MAC_ADDR = 0x3, + FBNIC_FW_RPC_MAC_SYNC_MSG_MAX +}; + +#define FW_RPC_MAC_SYNC_RX_FLAGS_PROMISC 1 +#define FW_RPC_MAC_SYNC_RX_FLAGS_ALLMULTI 2 +#define FW_RPC_MAC_SYNC_RX_FLAGS_BROADCAST 4 + +#define FW_RPC_MAC_SYNC_UC_ARRAY_SIZE 8 +#define FW_RPC_MAC_SYNC_MC_ARRAY_SIZE 8 + #endif /* _FBNIC_FW_H_ */ diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw_log.c b/drivers/net/ethernet/meta/fbnic/fbnic_fw_log.c index c1663f042245..85a883dba385 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw_log.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw_log.c @@ -72,7 +72,7 @@ void fbnic_fw_log_free(struct fbnic_dev *fbd) } int fbnic_fw_log_write(struct fbnic_dev *fbd, u64 index, u32 timestamp, - char *msg) + const char *msg) { struct fbnic_fw_log_entry *entry, *head, *tail, *next; struct fbnic_fw_log *log = &fbd->fw_log; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_fw_log.h b/drivers/net/ethernet/meta/fbnic/fbnic_fw_log.h index cb6555f40a24..50ec79003108 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_fw_log.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_fw_log.h @@ -41,5 +41,5 @@ void fbnic_fw_log_disable(struct fbnic_dev *fbd); int fbnic_fw_log_init(struct fbnic_dev *fbd); void fbnic_fw_log_free(struct fbnic_dev *fbd); int fbnic_fw_log_write(struct fbnic_dev *fbd, u64 index, u32 timestamp, - char *msg); + const char *msg); #endif /* _FBNIC_FW_LOG_H_ */ diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.c b/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.c index 4223d8100e64..8b9b2076beec 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.c @@ -1,6 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) Meta Platforms, Inc. and affiliates. */ +#include <linux/rtnetlink.h> + #include "fbnic.h" static void fbnic_hw_stat_rst32(struct fbnic_dev *fbd, u32 reg, @@ -421,9 +423,9 @@ static void fbnic_get_hw_rxq_stats32(struct fbnic_dev *fbd, void fbnic_get_hw_q_stats(struct fbnic_dev *fbd, struct fbnic_hw_q_stats *hw_q) { - spin_lock(&fbd->hw_stats_lock); + spin_lock(&fbd->hw_stats.lock); fbnic_get_hw_rxq_stats32(fbd, hw_q); - spin_unlock(&fbd->hw_stats_lock); + spin_unlock(&fbd->hw_stats.lock); } static void fbnic_reset_pcie_stats_asic(struct fbnic_dev *fbd, @@ -510,20 +512,68 @@ static void fbnic_get_pcie_stats_asic64(struct fbnic_dev *fbd, &pcie->ob_rd_no_np_cred); } +static void fbnic_reset_phy_stats(struct fbnic_dev *fbd, + struct fbnic_phy_stats *phy_stats) +{ + const struct fbnic_mac *mac = fbd->mac; + + mac->get_fec_stats(fbd, true, &phy_stats->fec); + mac->get_pcs_stats(fbd, true, &phy_stats->pcs); +} + +static void fbnic_get_phy_stats32(struct fbnic_dev *fbd, + struct fbnic_phy_stats *phy_stats) +{ + const struct fbnic_mac *mac = fbd->mac; + + mac->get_fec_stats(fbd, false, &phy_stats->fec); + mac->get_pcs_stats(fbd, false, &phy_stats->pcs); +} + +static void fbnic_reset_hw_mac_stats(struct fbnic_dev *fbd, + struct fbnic_mac_stats *mac_stats) +{ + const struct fbnic_mac *mac = fbd->mac; + + mac->get_eth_mac_stats(fbd, true, &mac_stats->eth_mac); + mac->get_pause_stats(fbd, true, &mac_stats->pause); + mac->get_eth_ctrl_stats(fbd, true, &mac_stats->eth_ctrl); + mac->get_rmon_stats(fbd, true, &mac_stats->rmon); +} + void fbnic_reset_hw_stats(struct fbnic_dev *fbd) { - spin_lock(&fbd->hw_stats_lock); + spin_lock(&fbd->hw_stats.lock); + fbnic_reset_phy_stats(fbd, &fbd->hw_stats.phy); fbnic_reset_tmi_stats(fbd, &fbd->hw_stats.tmi); fbnic_reset_tti_stats(fbd, &fbd->hw_stats.tti); fbnic_reset_rpc_stats(fbd, &fbd->hw_stats.rpc); fbnic_reset_rxb_stats(fbd, &fbd->hw_stats.rxb); fbnic_reset_hw_rxq_stats(fbd, fbd->hw_stats.hw_q); fbnic_reset_pcie_stats_asic(fbd, &fbd->hw_stats.pcie); - spin_unlock(&fbd->hw_stats_lock); + spin_unlock(&fbd->hw_stats.lock); + + /* Once registered, the only other access to MAC stats is via the + * ethtool API which is protected by the rtnl_lock. The call to + * fbnic_reset_hw_stats() during PCI recovery is also protected + * by the rtnl_lock hence, we don't need the spinlock to access + * the MAC stats. + */ + if (fbd->netdev) + ASSERT_RTNL(); + fbnic_reset_hw_mac_stats(fbd, &fbd->hw_stats.mac); +} + +void fbnic_init_hw_stats(struct fbnic_dev *fbd) +{ + spin_lock_init(&fbd->hw_stats.lock); + + fbnic_reset_hw_stats(fbd); } static void __fbnic_get_hw_stats32(struct fbnic_dev *fbd) { + fbnic_get_phy_stats32(fbd, &fbd->hw_stats.phy); fbnic_get_tmi_stats32(fbd, &fbd->hw_stats.tmi); fbnic_get_tti_stats32(fbd, &fbd->hw_stats.tti); fbnic_get_rpc_stats32(fbd, &fbd->hw_stats.rpc); @@ -533,19 +583,19 @@ static void __fbnic_get_hw_stats32(struct fbnic_dev *fbd) void fbnic_get_hw_stats32(struct fbnic_dev *fbd) { - spin_lock(&fbd->hw_stats_lock); + spin_lock(&fbd->hw_stats.lock); __fbnic_get_hw_stats32(fbd); - spin_unlock(&fbd->hw_stats_lock); + spin_unlock(&fbd->hw_stats.lock); } void fbnic_get_hw_stats(struct fbnic_dev *fbd) { - spin_lock(&fbd->hw_stats_lock); + spin_lock(&fbd->hw_stats.lock); __fbnic_get_hw_stats32(fbd); fbnic_get_tmi_stats(fbd, &fbd->hw_stats.tmi); fbnic_get_tti_stats(fbd, &fbd->hw_stats.tti); fbnic_get_rxb_stats(fbd, &fbd->hw_stats.rxb); fbnic_get_pcie_stats_asic64(fbd, &fbd->hw_stats.pcie); - spin_unlock(&fbd->hw_stats_lock); + spin_unlock(&fbd->hw_stats.lock); } diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.h b/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.h index 4fe239717497..aa3f429a9aed 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_hw_stats.h @@ -5,6 +5,7 @@ #define _FBNIC_HW_STATS_H_ #include <linux/ethtool.h> +#include <linux/spinlock.h> #include "fbnic_csr.h" @@ -22,6 +23,16 @@ struct fbnic_hw_stat { struct fbnic_stat_counter bytes; }; +struct fbnic_fec_stats { + struct fbnic_stat_counter corrected_blocks, uncorrectable_blocks; +}; + +struct fbnic_pcs_stats { + struct { + struct fbnic_stat_counter lanes[FBNIC_PCS_MAX_LANES]; + } SymbolErrorDuringCarrier; +}; + /* Note: not updated by fbnic_get_hw_stats() */ struct fbnic_eth_ctrl_stats { struct fbnic_stat_counter MACControlFramesTransmitted; @@ -39,6 +50,12 @@ struct fbnic_rmon_stats { struct fbnic_stat_counter hist_tx[ETHTOOL_RMON_HIST_MAX]; }; +/* Note: not updated by fbnic_get_hw_stats() */ +struct fbnic_pause_stats { + struct fbnic_stat_counter tx_pause_frames; + struct fbnic_stat_counter rx_pause_frames; +}; + struct fbnic_eth_mac_stats { struct fbnic_stat_counter FramesTransmittedOK; struct fbnic_stat_counter FramesReceivedOK; @@ -55,8 +72,14 @@ struct fbnic_eth_mac_stats { struct fbnic_stat_counter FrameTooLongErrors; }; +struct fbnic_phy_stats { + struct fbnic_fec_stats fec; + struct fbnic_pcs_stats pcs; +}; + struct fbnic_mac_stats { struct fbnic_eth_mac_stats eth_mac; + struct fbnic_pause_stats pause; struct fbnic_eth_ctrl_stats eth_ctrl; struct fbnic_rmon_stats rmon; }; @@ -115,6 +138,7 @@ struct fbnic_pcie_stats { }; struct fbnic_hw_stats { + struct fbnic_phy_stats phy; struct fbnic_mac_stats mac; struct fbnic_tmi_stats tmi; struct fbnic_tti_stats tti; @@ -122,11 +146,15 @@ struct fbnic_hw_stats { struct fbnic_rxb_stats rxb; struct fbnic_hw_q_stats hw_q[FBNIC_MAX_QUEUES]; struct fbnic_pcie_stats pcie; + + /* Lock protecting the access to hw stats */ + spinlock_t lock; }; u64 fbnic_stat_rd64(struct fbnic_dev *fbd, u32 reg, u32 offset); void fbnic_reset_hw_stats(struct fbnic_dev *fbd); +void fbnic_init_hw_stats(struct fbnic_dev *fbd); void fbnic_get_hw_q_stats(struct fbnic_dev *fbd, struct fbnic_hw_q_stats *hw_q); void fbnic_get_hw_stats32(struct fbnic_dev *fbd); diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_mac.c b/drivers/net/ethernet/meta/fbnic/fbnic_mac.c index fd8d67f9048e..8f998d26b9a3 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_mac.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_mac.c @@ -632,6 +632,50 @@ static void fbnic_mac_link_up_asic(struct fbnic_dev *fbd, } static void +fbnic_pcs_rsfec_stat_rd32(struct fbnic_dev *fbd, u32 reg, bool reset, + struct fbnic_stat_counter *stat) +{ + u32 pcs_rsfec_stat; + + /* The PCS/RFSEC registers are only 16b wide each. So what we will + * have after the 64b read is 0x0000xxxx0000xxxx. To make it usable + * as a full stat we will shift the upper bits into the lower set of + * 0s and then mask off the math at 32b. + * + * Read ordering must be lower reg followed by upper reg. + */ + pcs_rsfec_stat = rd32(fbd, reg) & 0xffff; + pcs_rsfec_stat |= rd32(fbd, reg + 1) << 16; + + /* RFSEC registers clear themselves upon being read so there is no + * need to store the old_reg_value. + */ + if (!reset) + stat->value += pcs_rsfec_stat; +} + +static void +fbnic_mac_get_fec_stats(struct fbnic_dev *fbd, bool reset, + struct fbnic_fec_stats *s) +{ + fbnic_pcs_rsfec_stat_rd32(fbd, FBNIC_RSFEC_CCW_LO(0), reset, + &s->corrected_blocks); + fbnic_pcs_rsfec_stat_rd32(fbd, FBNIC_RSFEC_NCCW_LO(0), reset, + &s->uncorrectable_blocks); +} + +static void +fbnic_mac_get_pcs_stats(struct fbnic_dev *fbd, bool reset, + struct fbnic_pcs_stats *s) +{ + int i; + + for (i = 0; i < FBNIC_PCS_MAX_LANES; i++) + fbnic_pcs_rsfec_stat_rd32(fbd, FBNIC_PCS_SYMBLERR_LO(i), reset, + &s->SymbolErrorDuringCarrier.lanes[i]); +} + +static void fbnic_mac_get_eth_mac_stats(struct fbnic_dev *fbd, bool reset, struct fbnic_eth_mac_stats *mac_stats) { @@ -666,6 +710,16 @@ fbnic_mac_get_eth_mac_stats(struct fbnic_dev *fbd, bool reset, } static void +fbnic_mac_get_pause_stats(struct fbnic_dev *fbd, bool reset, + struct fbnic_pause_stats *pause_stats) +{ + fbnic_mac_stat_rd64(fbd, reset, pause_stats->tx_pause_frames, + MAC_STAT_TX_XOFF_STB); + fbnic_mac_stat_rd64(fbd, reset, pause_stats->rx_pause_frames, + MAC_STAT_RX_XOFF_STB); +} + +static void fbnic_mac_get_eth_ctrl_stats(struct fbnic_dev *fbd, bool reset, struct fbnic_eth_ctrl_stats *ctrl_stats) { @@ -809,7 +863,10 @@ static const struct fbnic_mac fbnic_mac_asic = { .pcs_disable = fbnic_pcs_disable_asic, .pcs_get_link = fbnic_pcs_get_link_asic, .pcs_get_link_event = fbnic_pcs_get_link_event_asic, + .get_fec_stats = fbnic_mac_get_fec_stats, + .get_pcs_stats = fbnic_mac_get_pcs_stats, .get_eth_mac_stats = fbnic_mac_get_eth_mac_stats, + .get_pause_stats = fbnic_mac_get_pause_stats, .get_eth_ctrl_stats = fbnic_mac_get_eth_ctrl_stats, .get_rmon_stats = fbnic_mac_get_rmon_stats, .link_down = fbnic_mac_link_down_asic, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_mac.h b/drivers/net/ethernet/meta/fbnic/fbnic_mac.h index 86fa06da2b3e..ede5ff0dae22 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_mac.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_mac.h @@ -79,8 +79,14 @@ struct fbnic_mac { bool (*pcs_get_link)(struct fbnic_dev *fbd); int (*pcs_get_link_event)(struct fbnic_dev *fbd); + void (*get_fec_stats)(struct fbnic_dev *fbd, bool reset, + struct fbnic_fec_stats *fec_stats); + void (*get_pcs_stats)(struct fbnic_dev *fbd, bool reset, + struct fbnic_pcs_stats *pcs_stats); void (*get_eth_mac_stats)(struct fbnic_dev *fbd, bool reset, struct fbnic_eth_mac_stats *mac_stats); + void (*get_pause_stats)(struct fbnic_dev *fbd, bool reset, + struct fbnic_pause_stats *pause_stats); void (*get_eth_ctrl_stats)(struct fbnic_dev *fbd, bool reset, struct fbnic_eth_ctrl_stats *ctrl_stats); void (*get_rmon_stats)(struct fbnic_dev *fbd, bool reset, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c index e67e99487a27..d12b4cad84a5 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.c @@ -52,6 +52,8 @@ int __fbnic_open(struct fbnic_net *fbn) fbnic_bmc_rpc_init(fbd); fbnic_rss_reinit(fbd, fbn); + phylink_resume(fbn->phylink); + return 0; time_stop: fbnic_time_stop(fbn); @@ -84,6 +86,8 @@ static int fbnic_stop(struct net_device *netdev) { struct fbnic_net *fbn = netdev_priv(netdev); + phylink_suspend(fbn->phylink, fbnic_bmc_present(fbn->fbd)); + fbnic_down(fbn); fbnic_pcs_free_irq(fbn->fbd); @@ -179,11 +183,10 @@ static int fbnic_mc_unsync(struct net_device *netdev, const unsigned char *addr) return ret; } -void __fbnic_set_rx_mode(struct net_device *netdev) +void __fbnic_set_rx_mode(struct fbnic_dev *fbd) { - struct fbnic_net *fbn = netdev_priv(netdev); bool uc_promisc = false, mc_promisc = false; - struct fbnic_dev *fbd = fbn->fbd; + struct net_device *netdev = fbd->netdev; struct fbnic_mac_addr *mac_addr; int err; @@ -220,49 +223,8 @@ void __fbnic_set_rx_mode(struct net_device *netdev) uc_promisc |= !!(netdev->flags & IFF_PROMISC); mc_promisc |= !!(netdev->flags & IFF_ALLMULTI) || uc_promisc; - /* Populate last TCAM entry with promiscuous entry and 0/1 bit mask */ - mac_addr = &fbd->mac_addr[FBNIC_RPC_TCAM_MACDA_PROMISC_IDX]; - if (uc_promisc) { - if (!is_zero_ether_addr(mac_addr->value.addr8) || - mac_addr->state != FBNIC_TCAM_S_VALID) { - eth_zero_addr(mac_addr->value.addr8); - eth_broadcast_addr(mac_addr->mask.addr8); - clear_bit(FBNIC_MAC_ADDR_T_ALLMULTI, - mac_addr->act_tcam); - set_bit(FBNIC_MAC_ADDR_T_PROMISC, - mac_addr->act_tcam); - mac_addr->state = FBNIC_TCAM_S_ADD; - } - } else if (mc_promisc && - (!fbnic_bmc_present(fbd) || !fbd->fw_cap.all_multi)) { - /* We have to add a special handler for multicast as the - * BMC may have an all-multi rule already in place. As such - * adding a rule ourselves won't do any good so we will have - * to modify the rules for the ALL MULTI below if the BMC - * already has the rule in place. - */ - if (!is_multicast_ether_addr(mac_addr->value.addr8) || - mac_addr->state != FBNIC_TCAM_S_VALID) { - eth_zero_addr(mac_addr->value.addr8); - eth_broadcast_addr(mac_addr->mask.addr8); - mac_addr->value.addr8[0] ^= 1; - mac_addr->mask.addr8[0] ^= 1; - set_bit(FBNIC_MAC_ADDR_T_ALLMULTI, - mac_addr->act_tcam); - clear_bit(FBNIC_MAC_ADDR_T_PROMISC, - mac_addr->act_tcam); - mac_addr->state = FBNIC_TCAM_S_ADD; - } - } else if (mac_addr->state == FBNIC_TCAM_S_VALID) { - if (test_bit(FBNIC_MAC_ADDR_T_BMC, mac_addr->act_tcam)) { - clear_bit(FBNIC_MAC_ADDR_T_ALLMULTI, - mac_addr->act_tcam); - clear_bit(FBNIC_MAC_ADDR_T_PROMISC, - mac_addr->act_tcam); - } else { - mac_addr->state = FBNIC_TCAM_S_DELETE; - } - } + /* Update the promiscuous rules */ + fbnic_promisc_sync(fbd, uc_promisc, mc_promisc); /* Add rules for BMC all multicast if it is enabled */ fbnic_bmc_rpc_all_multi_config(fbd, mc_promisc); @@ -278,9 +240,12 @@ void __fbnic_set_rx_mode(struct net_device *netdev) static void fbnic_set_rx_mode(struct net_device *netdev) { + struct fbnic_net *fbn = netdev_priv(netdev); + struct fbnic_dev *fbd = fbn->fbd; + /* No need to update the hardware if we are not running */ if (netif_running(netdev)) - __fbnic_set_rx_mode(netdev); + __fbnic_set_rx_mode(fbd); } static int fbnic_set_mac(struct net_device *netdev, void *p) @@ -297,10 +262,9 @@ static int fbnic_set_mac(struct net_device *netdev, void *p) return 0; } -void fbnic_clear_rx_mode(struct net_device *netdev) +void fbnic_clear_rx_mode(struct fbnic_dev *fbd) { - struct fbnic_net *fbn = netdev_priv(netdev); - struct fbnic_dev *fbd = fbn->fbd; + struct net_device *netdev = fbd->netdev; int idx; for (idx = ARRAY_SIZE(fbd->mac_addr); idx--;) { @@ -407,11 +371,12 @@ static void fbnic_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats64) { u64 rx_bytes, rx_packets, rx_dropped = 0, rx_errors = 0; + u64 rx_over = 0, rx_missed = 0, rx_length = 0; u64 tx_bytes, tx_packets, tx_dropped = 0; struct fbnic_net *fbn = netdev_priv(dev); struct fbnic_dev *fbd = fbn->fbd; struct fbnic_queue_stats *stats; - u64 rx_over = 0, rx_missed = 0; + unsigned int start, i; fbnic_get_hw_stats(fbd); @@ -423,12 +388,12 @@ static void fbnic_get_stats64(struct net_device *dev, tx_dropped = stats->dropped; /* Record drops from Tx HW Datapath */ - spin_lock(&fbd->hw_stats_lock); + spin_lock(&fbd->hw_stats.lock); tx_dropped += fbd->hw_stats.tmi.drop.frames.value + fbd->hw_stats.tti.cm_drop.frames.value + fbd->hw_stats.tti.frame_drop.frames.value + fbd->hw_stats.tti.tbi_drop.frames.value; - spin_unlock(&fbd->hw_stats_lock); + spin_unlock(&fbd->hw_stats.lock); stats64->tx_bytes = tx_bytes; stats64->tx_packets = tx_packets; @@ -459,7 +424,7 @@ static void fbnic_get_stats64(struct net_device *dev, rx_packets = stats->packets; rx_dropped = stats->dropped; - spin_lock(&fbd->hw_stats_lock); + spin_lock(&fbd->hw_stats.lock); /* Record drops for the host FIFOs. * 4: network to Host, 6: BMC to Host * Exclude the BMC and MC FIFOs as those stats may contain drops @@ -479,7 +444,7 @@ static void fbnic_get_stats64(struct net_device *dev, /* Report packets with errors */ rx_errors += fbd->hw_stats.hw_q[i].rde_pkt_err.value; } - spin_unlock(&fbd->hw_stats_lock); + spin_unlock(&fbd->hw_stats.lock); stats64->rx_bytes = rx_bytes; stats64->rx_packets = rx_packets; @@ -489,6 +454,7 @@ static void fbnic_get_stats64(struct net_device *dev, stats64->rx_missed_errors = rx_missed; for (i = 0; i < fbn->num_rx_queues; i++) { + struct fbnic_ring *xdpr = fbn->tx[FBNIC_MAX_TXQS + i]; struct fbnic_ring *rxr = fbn->rx[i]; if (!rxr) @@ -500,12 +466,64 @@ static void fbnic_get_stats64(struct net_device *dev, rx_bytes = stats->bytes; rx_packets = stats->packets; rx_dropped = stats->dropped; + rx_length = stats->rx.length_errors; } while (u64_stats_fetch_retry(&stats->syncp, start)); stats64->rx_bytes += rx_bytes; stats64->rx_packets += rx_packets; stats64->rx_dropped += rx_dropped; + stats64->rx_errors += rx_length; + stats64->rx_length_errors += rx_length; + + if (!xdpr) + continue; + + stats = &xdpr->stats; + do { + start = u64_stats_fetch_begin(&stats->syncp); + tx_bytes = stats->bytes; + tx_packets = stats->packets; + tx_dropped = stats->dropped; + } while (u64_stats_fetch_retry(&stats->syncp, start)); + + stats64->tx_bytes += tx_bytes; + stats64->tx_packets += tx_packets; + stats64->tx_dropped += tx_dropped; + } +} + +bool fbnic_check_split_frames(struct bpf_prog *prog, unsigned int mtu, + u32 hds_thresh) +{ + if (!prog) + return false; + + if (prog->aux->xdp_has_frags) + return false; + + return mtu + ETH_HLEN > hds_thresh; +} + +static int fbnic_bpf(struct net_device *netdev, struct netdev_bpf *bpf) +{ + struct bpf_prog *prog = bpf->prog, *prev_prog; + struct fbnic_net *fbn = netdev_priv(netdev); + + if (bpf->command != XDP_SETUP_PROG) + return -EINVAL; + + if (fbnic_check_split_frames(prog, netdev->mtu, + fbn->hds_thresh)) { + NL_SET_ERR_MSG_MOD(bpf->extack, + "MTU too high, or HDS threshold is too low for single buffer XDP"); + return -EOPNOTSUPP; } + + prev_prog = xchg(&fbn->xdp_prog, prog); + if (prev_prog) + bpf_prog_put(prev_prog); + + return 0; } static const struct net_device_ops fbnic_netdev_ops = { @@ -517,6 +535,7 @@ static const struct net_device_ops fbnic_netdev_ops = { .ndo_set_mac_address = fbnic_set_mac, .ndo_set_rx_mode = fbnic_set_rx_mode, .ndo_get_stats64 = fbnic_get_stats64, + .ndo_bpf = fbnic_bpf, .ndo_hwtstamp_get = fbnic_hwtstamp_get, .ndo_hwtstamp_set = fbnic_hwtstamp_set, }; @@ -553,12 +572,12 @@ static void fbnic_get_queue_stats_rx(struct net_device *dev, int idx, fbnic_get_hw_q_stats(fbd, fbd->hw_stats.hw_q); - spin_lock(&fbd->hw_stats_lock); + spin_lock(&fbd->hw_stats.lock); rx->hw_drop_overruns = fbd->hw_stats.hw_q[idx].rde_pkt_cq_drop.value + fbd->hw_stats.hw_q[idx].rde_pkt_bdq_drop.value; rx->hw_drops = fbd->hw_stats.hw_q[idx].rde_pkt_err.value + rx->hw_drop_overruns; - spin_unlock(&fbd->hw_stats_lock); + spin_unlock(&fbd->hw_stats.lock); } static void fbnic_get_queue_stats_tx(struct net_device *dev, int idx, @@ -568,6 +587,7 @@ static void fbnic_get_queue_stats_tx(struct net_device *dev, int idx, struct fbnic_ring *txr = fbn->tx[idx]; struct fbnic_queue_stats *stats; u64 stop, wake, csum, lso; + struct fbnic_ring *xdpr; unsigned int start; u64 bytes, packets; @@ -591,6 +611,19 @@ static void fbnic_get_queue_stats_tx(struct net_device *dev, int idx, tx->hw_gso_wire_packets = lso; tx->stop = stop; tx->wake = wake; + + xdpr = fbn->tx[FBNIC_MAX_TXQS + idx]; + if (xdpr) { + stats = &xdpr->stats; + do { + start = u64_stats_fetch_begin(&stats->syncp); + bytes = stats->bytes; + packets = stats->packets; + } while (u64_stats_fetch_retry(&stats->syncp, start)); + + tx->bytes += bytes; + tx->packets += packets; + } } static void fbnic_get_base_stats(struct net_device *dev, @@ -678,6 +711,8 @@ struct net_device *fbnic_netdev_alloc(struct fbnic_dev *fbd) netdev->netdev_ops = &fbnic_netdev_ops; netdev->stat_ops = &fbnic_stat_ops; + netdev->queue_mgmt_ops = &fbnic_queue_mgmt_ops; + netdev->netmem_tx = true; fbnic_set_ethtool_ops(netdev); @@ -695,6 +730,10 @@ struct net_device *fbnic_netdev_alloc(struct fbnic_dev *fbd) fbn->rx_usecs = FBNIC_RX_USECS_DEFAULT; fbn->rx_max_frames = FBNIC_RX_FRAMES_DEFAULT; + /* Initialize the hds_thresh */ + netdev->cfg->hds_thresh = FBNIC_HDS_THRESH_DEFAULT; + fbn->hds_thresh = FBNIC_HDS_THRESH_DEFAULT; + default_queues = netif_get_num_default_rss_queues(); if (default_queues > fbd->max_num_queues) default_queues = fbd->max_num_queues; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h index 86576ae04262..e84e0527c3a9 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_netdev.h @@ -18,7 +18,9 @@ #define FBNIC_TUN_GSO_FEATURES NETIF_F_GSO_IPXIP6 struct fbnic_net { - struct fbnic_ring *tx[FBNIC_MAX_TXQS]; + struct bpf_prog *xdp_prog; + + struct fbnic_ring *tx[FBNIC_MAX_TXQS + FBNIC_MAX_XDPQS]; struct fbnic_ring *rx[FBNIC_MAX_RXQS]; struct fbnic_napi_vector *napi[FBNIC_MAX_NAPI_VECTORS]; @@ -31,6 +33,8 @@ struct fbnic_net { u32 ppq_size; u32 rcq_size; + u32 hds_thresh; + u16 rx_usecs; u16 tx_usecs; @@ -90,8 +94,8 @@ void fbnic_time_init(struct fbnic_net *fbn); int fbnic_time_start(struct fbnic_net *fbn); void fbnic_time_stop(struct fbnic_net *fbn); -void __fbnic_set_rx_mode(struct net_device *netdev); -void fbnic_clear_rx_mode(struct net_device *netdev); +void __fbnic_set_rx_mode(struct fbnic_dev *fbd); +void fbnic_clear_rx_mode(struct fbnic_dev *fbd); void fbnic_phylink_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause); @@ -102,4 +106,7 @@ int fbnic_phylink_ethtool_ksettings_get(struct net_device *netdev, int fbnic_phylink_get_fecparam(struct net_device *netdev, struct ethtool_fecparam *fecparam); int fbnic_phylink_init(struct net_device *netdev); + +bool fbnic_check_split_frames(struct bpf_prog *prog, + unsigned int mtu, u32 hds_threshold); #endif /* _FBNIC_NETDEV_H_ */ diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c index b70e4cadb37b..a7a6b4db8016 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_pci.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_pci.c @@ -118,14 +118,12 @@ static void fbnic_service_task_start(struct fbnic_net *fbn) struct fbnic_dev *fbd = fbn->fbd; schedule_delayed_work(&fbd->service_task, HZ); - phylink_resume(fbn->phylink); } static void fbnic_service_task_stop(struct fbnic_net *fbn) { struct fbnic_dev *fbd = fbn->fbd; - phylink_suspend(fbn->phylink, fbnic_bmc_present(fbd)); cancel_delayed_work(&fbd->service_task); } @@ -137,7 +135,7 @@ void fbnic_up(struct fbnic_net *fbn) fbnic_rss_reinit_hw(fbn->fbd, fbn); - __fbnic_set_rx_mode(fbn->netdev); + __fbnic_set_rx_mode(fbn->fbd); /* Enable Tx/Rx processing */ fbnic_napi_enable(fbn); @@ -154,7 +152,7 @@ void fbnic_down_noidle(struct fbnic_net *fbn) fbnic_napi_disable(fbn); netif_tx_disable(fbn->netdev); - fbnic_clear_rx_mode(fbn->netdev); + fbnic_clear_rx_mode(fbn->fbd); fbnic_clear_rules(fbn->fbd); fbnic_rss_disable_hw(fbn->fbd); fbnic_disable(fbn); @@ -169,6 +167,20 @@ void fbnic_down(struct fbnic_net *fbn) fbnic_flush(fbn); } +static int fbnic_fw_config_after_crash(struct fbnic_dev *fbd) +{ + if (fbnic_fw_xmit_ownership_msg(fbd, true)) { + dev_err(fbd->dev, "NIC failed to take ownership\n"); + + return -1; + } + + fbnic_rpc_reset_valid_entries(fbd); + __fbnic_set_rx_mode(fbd); + + return 0; +} + static void fbnic_health_check(struct fbnic_dev *fbd) { struct fbnic_fw_mbx *tx_mbx = &fbd->mbx[FBNIC_IPC_MBX_TX_IDX]; @@ -184,13 +196,11 @@ static void fbnic_health_check(struct fbnic_dev *fbd) if (tx_mbx->head != tx_mbx->tail) return; - /* TBD: Need to add a more thorough recovery here. - * Specifically I need to verify what all the firmware will have - * changed since we had setup and it rebooted. May just need to - * perform a down/up. For now we will just reclaim ownership so - * the heartbeat can catch the next fault. - */ - fbnic_fw_xmit_ownership_msg(fbd, true); + fbnic_devlink_fw_report(fbd, "Firmware crashed detected!"); + fbnic_devlink_otp_check(fbd, "error detected after firmware recovery"); + + if (fbnic_fw_config_after_crash(fbd)) + dev_err(fbd->dev, "Firmware recovery failed after crash\n"); } static void fbnic_service_task(struct work_struct *work) @@ -206,8 +216,13 @@ static void fbnic_service_task(struct work_struct *work) fbnic_health_check(fbd); - if (netif_carrier_ok(fbd->netdev)) + fbnic_bmc_rpc_check(fbd); + + if (netif_carrier_ok(fbd->netdev)) { + netdev_lock(fbd->netdev); fbnic_napi_depletion_check(fbd->netdev); + netdev_unlock(fbd->netdev); + } if (netif_running(fbd->netdev)) schedule_delayed_work(&fbd->service_task, HZ); @@ -266,6 +281,10 @@ static int fbnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return -ENOMEM; } + err = fbnic_devlink_health_create(fbd); + if (err) + goto free_fbd; + /* Populate driver with hardware-specific info and handlers */ fbd->max_num_queues = info->max_num_queues; @@ -276,7 +295,7 @@ static int fbnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = fbnic_alloc_irqs(fbd); if (err) - goto free_fbd; + goto err_destroy_health; err = fbnic_mac_init(fbd); if (err) { @@ -303,11 +322,11 @@ static int fbnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err); fbnic_devlink_register(fbd); + fbnic_devlink_otp_check(fbd, "error detected during probe"); fbnic_dbg_fbd_init(fbd); - spin_lock_init(&fbd->hw_stats_lock); /* Capture snapshot of hardware stats so netdev can calculate delta */ - fbnic_reset_hw_stats(fbd); + fbnic_init_hw_stats(fbd); fbnic_hwmon_register(fbd); @@ -346,6 +365,8 @@ init_failure_mode: return 0; free_irqs: fbnic_free_irqs(fbd); +err_destroy_health: + fbnic_devlink_health_destroy(fbd); free_fbd: fbnic_devlink_free(fbd); @@ -380,6 +401,7 @@ static void fbnic_remove(struct pci_dev *pdev) fbnic_fw_free_mbx(fbd); fbnic_free_irqs(fbd); + fbnic_devlink_health_destroy(fbd); fbnic_devlink_free(fbd); } @@ -392,12 +414,14 @@ static int fbnic_pm_suspend(struct device *dev) goto null_uc_addr; rtnl_lock(); + netdev_lock(netdev); netif_device_detach(netdev); if (netif_running(netdev)) netdev->netdev_ops->ndo_stop(netdev); + netdev_unlock(netdev); rtnl_unlock(); null_uc_addr: @@ -443,16 +467,18 @@ static int __fbnic_pm_resume(struct device *dev) /* Re-enable mailbox */ err = fbnic_fw_request_mbx(fbd); + devl_unlock(priv_to_devlink(fbd)); if (err) goto err_free_irqs; - devl_unlock(priv_to_devlink(fbd)); - /* Only send log history if log buffer is empty to prevent duplicate * log entries. */ fbnic_fw_log_enable(fbd, list_empty(&fbd->fw_log.entries)); + /* Since the FW should be up, check if it reported OTP errors */ + fbnic_devlink_otp_check(fbd, "error detected after PM resume"); + /* No netdev means there isn't a network interface to bring up */ if (fbnic_init_failure(fbd)) return 0; @@ -463,21 +489,23 @@ static int __fbnic_pm_resume(struct device *dev) fbnic_reset_queues(fbn, fbn->num_tx_queues, fbn->num_rx_queues); rtnl_lock(); + netdev_lock(netdev); - if (netif_running(netdev)) { + if (netif_running(netdev)) err = __fbnic_open(fbn); - if (err) - goto err_free_mbx; - } + netdev_unlock(netdev); rtnl_unlock(); + if (err) + goto err_free_mbx; return 0; err_free_mbx: fbnic_fw_log_disable(fbd); - rtnl_unlock(); + devl_lock(priv_to_devlink(fbd)); fbnic_fw_free_mbx(fbd); + devl_unlock(priv_to_devlink(fbd)); err_free_irqs: fbnic_free_irqs(fbd); err_invalidate_uc_addr: @@ -492,6 +520,10 @@ static void __fbnic_pm_attach(struct device *dev) struct net_device *netdev = fbd->netdev; struct fbnic_net *fbn; + rtnl_lock(); + fbnic_reset_hw_stats(fbd); + rtnl_unlock(); + if (fbnic_init_failure(fbd)) return; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c index 8ff07b5562e3..7f31e890031c 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.c @@ -6,6 +6,7 @@ #include <net/ipv6.h> #include "fbnic.h" +#include "fbnic_fw.h" #include "fbnic_netdev.h" #include "fbnic_rpc.h" @@ -71,6 +72,8 @@ u16 fbnic_flow_hash_2_rss_en_mask(struct fbnic_net *fbn, int flow_type) rss_en_mask |= FBNIC_FH_2_RSSEM_BIT(IP_DST, IP_DST, flow_hash); rss_en_mask |= FBNIC_FH_2_RSSEM_BIT(L4_B_0_1, L4_SRC, flow_hash); rss_en_mask |= FBNIC_FH_2_RSSEM_BIT(L4_B_2_3, L4_DST, flow_hash); + rss_en_mask |= FBNIC_FH_2_RSSEM_BIT(IP6_FL, OV6_FL_LBL, flow_hash); + rss_en_mask |= FBNIC_FH_2_RSSEM_BIT(IP6_FL, IV6_FL_LBL, flow_hash); return rss_en_mask; } @@ -129,12 +132,9 @@ void fbnic_bmc_rpc_all_multi_config(struct fbnic_dev *fbd, else clear_bit(FBNIC_MAC_ADDR_T_ALLMULTI, mac_addr->act_tcam); - } else if (!test_bit(FBNIC_MAC_ADDR_T_BMC, mac_addr->act_tcam) && - !is_zero_ether_addr(mac_addr->mask.addr8) && - mac_addr->state == FBNIC_TCAM_S_VALID) { - clear_bit(FBNIC_MAC_ADDR_T_ALLMULTI, mac_addr->act_tcam); - clear_bit(FBNIC_MAC_ADDR_T_BMC, mac_addr->act_tcam); - mac_addr->state = FBNIC_TCAM_S_DELETE; + } else { + __fbnic_xc_unsync(mac_addr, FBNIC_MAC_ADDR_T_BMC); + __fbnic_xc_unsync(mac_addr, FBNIC_MAC_ADDR_T_ALLMULTI); } /* We have to add a special handler for multicast as the @@ -236,8 +236,25 @@ void fbnic_bmc_rpc_init(struct fbnic_dev *fbd) act_tcam->mask.tcam[j] = 0xffff; act_tcam->state = FBNIC_TCAM_S_UPDATE; +} + +void fbnic_bmc_rpc_check(struct fbnic_dev *fbd) +{ + int err; + + if (fbd->fw_cap.need_bmc_tcam_reinit) { + fbnic_bmc_rpc_init(fbd); + __fbnic_set_rx_mode(fbd); + fbd->fw_cap.need_bmc_tcam_reinit = false; + } - fbnic_bmc_rpc_all_multi_config(fbd, false); + if (fbd->fw_cap.need_bmc_macda_sync) { + err = fbnic_fw_xmit_rpc_macda_sync(fbd); + if (err) + dev_warn(fbd->dev, + "Writing MACDA table to FW failed, err: %d\n", err); + fbd->fw_cap.need_bmc_macda_sync = false; + } } #define FBNIC_ACT1_INIT(_l4, _udp, _ip, _v6) \ @@ -452,6 +469,50 @@ int __fbnic_xc_unsync(struct fbnic_mac_addr *mac_addr, unsigned int tcam_idx) return 0; } +void fbnic_promisc_sync(struct fbnic_dev *fbd, + bool uc_promisc, bool mc_promisc) +{ + struct fbnic_mac_addr *mac_addr; + + /* Populate last TCAM entry with promiscuous entry and 0/1 bit mask */ + mac_addr = &fbd->mac_addr[FBNIC_RPC_TCAM_MACDA_PROMISC_IDX]; + if (uc_promisc) { + if (!is_zero_ether_addr(mac_addr->value.addr8) || + mac_addr->state != FBNIC_TCAM_S_VALID) { + eth_zero_addr(mac_addr->value.addr8); + eth_broadcast_addr(mac_addr->mask.addr8); + clear_bit(FBNIC_MAC_ADDR_T_ALLMULTI, + mac_addr->act_tcam); + set_bit(FBNIC_MAC_ADDR_T_PROMISC, + mac_addr->act_tcam); + mac_addr->state = FBNIC_TCAM_S_ADD; + } + } else if (mc_promisc && + (!fbnic_bmc_present(fbd) || !fbd->fw_cap.all_multi)) { + /* We have to add a special handler for multicast as the + * BMC may have an all-multi rule already in place. As such + * adding a rule ourselves won't do any good so we will have + * to modify the rules for the ALL MULTI below if the BMC + * already has the rule in place. + */ + if (!is_multicast_ether_addr(mac_addr->value.addr8) || + mac_addr->state != FBNIC_TCAM_S_VALID) { + eth_zero_addr(mac_addr->value.addr8); + eth_broadcast_addr(mac_addr->mask.addr8); + mac_addr->value.addr8[0] ^= 1; + mac_addr->mask.addr8[0] ^= 1; + set_bit(FBNIC_MAC_ADDR_T_ALLMULTI, + mac_addr->act_tcam); + clear_bit(FBNIC_MAC_ADDR_T_PROMISC, + mac_addr->act_tcam); + mac_addr->state = FBNIC_TCAM_S_ADD; + } + } else if (mac_addr->state == FBNIC_TCAM_S_VALID) { + __fbnic_xc_unsync(mac_addr, FBNIC_MAC_ADDR_T_ALLMULTI); + __fbnic_xc_unsync(mac_addr, FBNIC_MAC_ADDR_T_PROMISC); + } +} + void fbnic_sift_macda(struct fbnic_dev *fbd) { int dest, src; @@ -535,6 +596,21 @@ static void fbnic_clear_macda(struct fbnic_dev *fbd) } } +static void fbnic_clear_valid_macda(struct fbnic_dev *fbd) +{ + int idx; + + for (idx = ARRAY_SIZE(fbd->mac_addr); idx--;) { + struct fbnic_mac_addr *mac_addr = &fbd->mac_addr[idx]; + + if (mac_addr->state == FBNIC_TCAM_S_VALID) { + fbnic_clear_macda_entry(fbd, idx); + + mac_addr->state = FBNIC_TCAM_S_UPDATE; + } + } +} + static void fbnic_write_macda_entry(struct fbnic_dev *fbd, unsigned int idx, struct fbnic_mac_addr *mac_addr) { @@ -556,7 +632,7 @@ static void fbnic_write_macda_entry(struct fbnic_dev *fbd, unsigned int idx, void fbnic_write_macda(struct fbnic_dev *fbd) { - int idx; + int idx, updates = 0; for (idx = ARRAY_SIZE(fbd->mac_addr); idx--;) { struct fbnic_mac_addr *mac_addr = &fbd->mac_addr[idx]; @@ -565,6 +641,9 @@ void fbnic_write_macda(struct fbnic_dev *fbd) if (!(mac_addr->state & FBNIC_TCAM_S_UPDATE)) continue; + /* Record update count */ + updates++; + /* Clear by writing 0s. */ if (mac_addr->state == FBNIC_TCAM_S_DELETE) { /* Invalidate entry and clear addr state info */ @@ -578,6 +657,14 @@ void fbnic_write_macda(struct fbnic_dev *fbd) mac_addr->state = FBNIC_TCAM_S_VALID; } + + /* If reinitializing the BMC TCAM we are doing an initial update */ + if (fbd->fw_cap.need_bmc_tcam_reinit) + updates++; + + /* If needed notify firmware of changes to MACDA TCAM */ + if (updates != 0 && fbnic_bmc_present(fbd)) + fbd->fw_cap.need_bmc_macda_sync = true; } static void fbnic_clear_act_tcam(struct fbnic_dev *fbd, unsigned int idx) @@ -1052,13 +1139,25 @@ void fbnic_write_ip_addr(struct fbnic_dev *fbd) } } -void fbnic_clear_rules(struct fbnic_dev *fbd) +static void fbnic_clear_valid_act_tcam(struct fbnic_dev *fbd) { - u32 dest = FIELD_PREP(FBNIC_RPC_ACT_TBL0_DEST_MASK, - FBNIC_RPC_ACT_TBL0_DEST_BMC); int i = FBNIC_RPC_TCAM_ACT_NUM_ENTRIES - 1; struct fbnic_act_tcam *act_tcam; + /* Work from the bottom up deleting all other rules from hardware */ + do { + act_tcam = &fbd->act_tcam[i]; + + if (act_tcam->state != FBNIC_TCAM_S_VALID) + continue; + + fbnic_clear_act_tcam(fbd, i); + act_tcam->state = FBNIC_TCAM_S_UPDATE; + } while (i--); +} + +void fbnic_clear_rules(struct fbnic_dev *fbd) +{ /* Clear MAC rules */ fbnic_clear_macda(fbd); @@ -1073,6 +1172,11 @@ void fbnic_clear_rules(struct fbnic_dev *fbd) * the interface back up. */ if (fbnic_bmc_present(fbd)) { + u32 dest = FIELD_PREP(FBNIC_RPC_ACT_TBL0_DEST_MASK, + FBNIC_RPC_ACT_TBL0_DEST_BMC); + int i = FBNIC_RPC_TCAM_ACT_NUM_ENTRIES - 1; + struct fbnic_act_tcam *act_tcam; + act_tcam = &fbd->act_tcam[i]; if (act_tcam->state == FBNIC_TCAM_S_VALID && @@ -1081,21 +1185,10 @@ void fbnic_clear_rules(struct fbnic_dev *fbd) wr32(fbd, FBNIC_RPC_ACT_TBL1(i), 0); act_tcam->state = FBNIC_TCAM_S_UPDATE; - - i--; } } - /* Work from the bottom up deleting all other rules from hardware */ - do { - act_tcam = &fbd->act_tcam[i]; - - if (act_tcam->state != FBNIC_TCAM_S_VALID) - continue; - - fbnic_clear_act_tcam(fbd, i); - act_tcam->state = FBNIC_TCAM_S_UPDATE; - } while (i--); + fbnic_clear_valid_act_tcam(fbd); } static void fbnic_delete_act_tcam(struct fbnic_dev *fbd, unsigned int idx) @@ -1145,3 +1238,9 @@ void fbnic_write_rules(struct fbnic_dev *fbd) fbnic_update_act_tcam(fbd, i); } } + +void fbnic_rpc_reset_valid_entries(struct fbnic_dev *fbd) +{ + fbnic_clear_valid_act_tcam(fbd); + fbnic_clear_valid_macda(fbd); +} diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.h b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.h index 6892414195c3..3d4925b2ac75 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_rpc.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_rpc.h @@ -184,6 +184,7 @@ struct fbnic_net; void fbnic_bmc_rpc_init(struct fbnic_dev *fbd); void fbnic_bmc_rpc_all_multi_config(struct fbnic_dev *fbd, bool enable_host); +void fbnic_bmc_rpc_check(struct fbnic_dev *fbd); void fbnic_reset_indir_tbl(struct fbnic_net *fbn); void fbnic_rss_key_fill(u32 *buffer); @@ -201,6 +202,9 @@ struct fbnic_mac_addr *__fbnic_mc_sync(struct fbnic_dev *fbd, void fbnic_sift_macda(struct fbnic_dev *fbd); void fbnic_write_macda(struct fbnic_dev *fbd); +void fbnic_promisc_sync(struct fbnic_dev *fbd, + bool uc_promisc, bool mc_promisc); + struct fbnic_ip_addr *__fbnic_ip4_sync(struct fbnic_dev *fbd, struct fbnic_ip_addr *ip_addr, const struct in_addr *addr, diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c index f9543d03485f..cf773cc78e40 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.c @@ -2,11 +2,14 @@ /* Copyright (c) Meta Platforms, Inc. and affiliates. */ #include <linux/bitfield.h> +#include <linux/bpf.h> +#include <linux/bpf_trace.h> #include <linux/iopoll.h> #include <linux/pci.h> #include <net/netdev_queues.h> #include <net/page_pool/helpers.h> #include <net/tcp.h> +#include <net/xdp.h> #include "fbnic.h" #include "fbnic_csr.h" @@ -14,6 +17,13 @@ #include "fbnic_txrx.h" enum { + FBNIC_XDP_PASS = 0, + FBNIC_XDP_CONSUME, + FBNIC_XDP_TX, + FBNIC_XDP_LEN_ERR, +}; + +enum { FBNIC_XMIT_CB_TS = 0x01, }; @@ -27,6 +37,8 @@ struct fbnic_xmit_cb { #define FBNIC_XMIT_CB(__skb) ((struct fbnic_xmit_cb *)((__skb)->cb)) +#define FBNIC_XMIT_NOUNMAP ((void *)1) + static u32 __iomem *fbnic_ring_csr_base(const struct fbnic_ring *ring) { unsigned long csr_base = (unsigned long)ring->doorbell; @@ -305,6 +317,7 @@ fbnic_tx_map(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta) unsigned int tail = ring->tail, first; unsigned int size, data_len; skb_frag_t *frag; + bool is_net_iov; dma_addr_t dma; __le64 *twd; @@ -320,6 +333,7 @@ fbnic_tx_map(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta) if (size > FIELD_MAX(FBNIC_TWD_LEN_MASK)) goto dma_error; + is_net_iov = false; dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE); for (frag = &skb_shinfo(skb)->frags[0];; frag++) { @@ -332,6 +346,8 @@ fbnic_tx_map(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta) FIELD_PREP(FBNIC_TWD_LEN_MASK, size) | FIELD_PREP(FBNIC_TWD_TYPE_MASK, FBNIC_TWD_TYPE_AL)); + if (is_net_iov) + ring->tx_buf[tail] = FBNIC_XMIT_NOUNMAP; tail++; tail &= ring->size_mask; @@ -345,6 +361,7 @@ fbnic_tx_map(struct fbnic_ring *ring, struct sk_buff *skb, __le64 *meta) if (size > FIELD_MAX(FBNIC_TWD_LEN_MASK)) goto dma_error; + is_net_iov = skb_frag_is_net_iov(frag); dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE); } @@ -380,6 +397,8 @@ dma_error: twd = &ring->desc[tail]; if (tail == first) fbnic_unmap_single_twd(dev, twd); + else if (ring->tx_buf[tail] == FBNIC_XMIT_NOUNMAP) + ring->tx_buf[tail] = NULL; else fbnic_unmap_page_twd(dev, twd); } @@ -564,7 +583,11 @@ static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget, desc_cnt--; while (desc_cnt--) { - fbnic_unmap_page_twd(nv->dev, &ring->desc[head]); + if (ring->tx_buf[head] != FBNIC_XMIT_NOUNMAP) + fbnic_unmap_page_twd(nv->dev, + &ring->desc[head]); + else + ring->tx_buf[head] = NULL; head++; head &= ring->size_mask; } @@ -606,6 +629,54 @@ static void fbnic_clean_twq0(struct fbnic_napi_vector *nv, int napi_budget, } } +static void fbnic_clean_twq1(struct fbnic_napi_vector *nv, bool pp_allow_direct, + struct fbnic_ring *ring, bool discard, + unsigned int hw_head) +{ + u64 total_bytes = 0, total_packets = 0; + unsigned int head = ring->head; + + while (hw_head != head) { + struct page *page; + u64 twd; + + if (unlikely(!(ring->desc[head] & FBNIC_TWD_TYPE(AL)))) + goto next_desc; + + twd = le64_to_cpu(ring->desc[head]); + page = ring->tx_buf[head]; + + /* TYPE_AL is 2, TYPE_LAST_AL is 3. So this trick gives + * us one increment per packet, with no branches. + */ + total_packets += FIELD_GET(FBNIC_TWD_TYPE_MASK, twd) - + FBNIC_TWD_TYPE_AL; + total_bytes += FIELD_GET(FBNIC_TWD_LEN_MASK, twd); + + page_pool_put_page(page->pp, page, -1, pp_allow_direct); +next_desc: + head++; + head &= ring->size_mask; + } + + if (!total_bytes) + return; + + ring->head = head; + + if (discard) { + u64_stats_update_begin(&ring->stats.syncp); + ring->stats.dropped += total_packets; + u64_stats_update_end(&ring->stats.syncp); + return; + } + + u64_stats_update_begin(&ring->stats.syncp); + ring->stats.bytes += total_bytes; + ring->stats.packets += total_packets; + u64_stats_update_end(&ring->stats.syncp); +} + static void fbnic_clean_tsq(struct fbnic_napi_vector *nv, struct fbnic_ring *ring, u64 tcd, int *ts_head, int *head0) @@ -657,44 +728,65 @@ static void fbnic_clean_tsq(struct fbnic_napi_vector *nv, } static void fbnic_page_pool_init(struct fbnic_ring *ring, unsigned int idx, - struct page *page) + netmem_ref netmem) { struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx]; - page_pool_fragment_page(page, FBNIC_PAGECNT_BIAS_MAX); + page_pool_fragment_netmem(netmem, FBNIC_PAGECNT_BIAS_MAX); rx_buf->pagecnt_bias = FBNIC_PAGECNT_BIAS_MAX; - rx_buf->page = page; + rx_buf->netmem = netmem; } -static struct page *fbnic_page_pool_get(struct fbnic_ring *ring, - unsigned int idx) +static struct page * +fbnic_page_pool_get_head(struct fbnic_q_triad *qt, unsigned int idx) { - struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx]; + struct fbnic_rx_buf *rx_buf = &qt->sub0.rx_buf[idx]; rx_buf->pagecnt_bias--; - return rx_buf->page; + /* sub0 is always fed system pages, from the NAPI-level page_pool */ + return netmem_to_page(rx_buf->netmem); +} + +static netmem_ref +fbnic_page_pool_get_data(struct fbnic_q_triad *qt, unsigned int idx) +{ + struct fbnic_rx_buf *rx_buf = &qt->sub1.rx_buf[idx]; + + rx_buf->pagecnt_bias--; + + return rx_buf->netmem; } static void fbnic_page_pool_drain(struct fbnic_ring *ring, unsigned int idx, - struct fbnic_napi_vector *nv, int budget) + int budget) { struct fbnic_rx_buf *rx_buf = &ring->rx_buf[idx]; - struct page *page = rx_buf->page; + netmem_ref netmem = rx_buf->netmem; - if (!page_pool_unref_page(page, rx_buf->pagecnt_bias)) - page_pool_put_unrefed_page(nv->page_pool, page, -1, !!budget); + if (!page_pool_unref_netmem(netmem, rx_buf->pagecnt_bias)) + page_pool_put_unrefed_netmem(ring->page_pool, netmem, -1, + !!budget); - rx_buf->page = NULL; + rx_buf->netmem = 0; } static void fbnic_clean_twq(struct fbnic_napi_vector *nv, int napi_budget, - struct fbnic_q_triad *qt, s32 ts_head, s32 head0) + struct fbnic_q_triad *qt, s32 ts_head, s32 head0, + s32 head1) { if (head0 >= 0) fbnic_clean_twq0(nv, napi_budget, &qt->sub0, false, head0); else if (ts_head >= 0) fbnic_clean_twq0(nv, napi_budget, &qt->sub0, false, ts_head); + + if (head1 >= 0) { + qt->cmpl.deferred_head = -1; + if (napi_budget) + fbnic_clean_twq1(nv, true, &qt->sub1, false, head1); + else + qt->cmpl.deferred_head = head1; + } } static void @@ -702,6 +794,7 @@ fbnic_clean_tcq(struct fbnic_napi_vector *nv, struct fbnic_q_triad *qt, int napi_budget) { struct fbnic_ring *cmpl = &qt->cmpl; + s32 head1 = cmpl->deferred_head; s32 head0 = -1, ts_head = -1; __le64 *raw_tcd, done; u32 head = cmpl->head; @@ -719,7 +812,10 @@ fbnic_clean_tcq(struct fbnic_napi_vector *nv, struct fbnic_q_triad *qt, switch (FIELD_GET(FBNIC_TCD_TYPE_MASK, tcd)) { case FBNIC_TCD_TYPE_0: - if (!(tcd & FBNIC_TCD_TWQ1)) + if (tcd & FBNIC_TCD_TWQ1) + head1 = FIELD_GET(FBNIC_TCD_TYPE0_HEAD1_MASK, + tcd); + else head0 = FIELD_GET(FBNIC_TCD_TYPE0_HEAD0_MASK, tcd); /* Currently all err status bits are related to @@ -752,11 +848,11 @@ fbnic_clean_tcq(struct fbnic_napi_vector *nv, struct fbnic_q_triad *qt, } /* Unmap and free processed buffers */ - fbnic_clean_twq(nv, napi_budget, qt, ts_head, head0); + fbnic_clean_twq(nv, napi_budget, qt, ts_head, head0, head1); } -static void fbnic_clean_bdq(struct fbnic_napi_vector *nv, int napi_budget, - struct fbnic_ring *ring, unsigned int hw_head) +static void fbnic_clean_bdq(struct fbnic_ring *ring, unsigned int hw_head, + int napi_budget) { unsigned int head = ring->head; @@ -764,7 +860,7 @@ static void fbnic_clean_bdq(struct fbnic_napi_vector *nv, int napi_budget, return; do { - fbnic_page_pool_drain(ring, head, nv, napi_budget); + fbnic_page_pool_drain(ring, head, napi_budget); head++; head &= ring->size_mask; @@ -773,10 +869,10 @@ static void fbnic_clean_bdq(struct fbnic_napi_vector *nv, int napi_budget, ring->head = head; } -static void fbnic_bd_prep(struct fbnic_ring *bdq, u16 id, struct page *page) +static void fbnic_bd_prep(struct fbnic_ring *bdq, u16 id, netmem_ref netmem) { __le64 *bdq_desc = &bdq->desc[id * FBNIC_BD_FRAG_COUNT]; - dma_addr_t dma = page_pool_get_dma_addr(page); + dma_addr_t dma = page_pool_get_dma_addr_netmem(netmem); u64 bd, i = FBNIC_BD_FRAG_COUNT; bd = (FBNIC_BD_PAGE_ADDR_MASK & dma) | @@ -794,7 +890,7 @@ static void fbnic_bd_prep(struct fbnic_ring *bdq, u16 id, struct page *page) } while (--i); } -static void fbnic_fill_bdq(struct fbnic_napi_vector *nv, struct fbnic_ring *bdq) +static void fbnic_fill_bdq(struct fbnic_ring *bdq) { unsigned int count = fbnic_desc_unused(bdq); unsigned int i = bdq->tail; @@ -803,10 +899,10 @@ static void fbnic_fill_bdq(struct fbnic_napi_vector *nv, struct fbnic_ring *bdq) return; do { - struct page *page; + netmem_ref netmem; - page = page_pool_dev_alloc_pages(nv->page_pool); - if (!page) { + netmem = page_pool_dev_alloc_netmems(bdq->page_pool); + if (!netmem) { u64_stats_update_begin(&bdq->stats.syncp); bdq->stats.rx.alloc_failed++; u64_stats_update_end(&bdq->stats.syncp); @@ -814,8 +910,8 @@ static void fbnic_fill_bdq(struct fbnic_napi_vector *nv, struct fbnic_ring *bdq) break; } - fbnic_page_pool_init(bdq, i, page); - fbnic_bd_prep(bdq, i, page); + fbnic_page_pool_init(bdq, i, netmem); + fbnic_bd_prep(bdq, i, netmem); i++; i &= bdq->size_mask; @@ -862,7 +958,7 @@ static void fbnic_pkt_prepare(struct fbnic_napi_vector *nv, u64 rcd, { unsigned int hdr_pg_idx = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd); unsigned int hdr_pg_off = FIELD_GET(FBNIC_RCD_AL_BUFF_OFF_MASK, rcd); - struct page *page = fbnic_page_pool_get(&qt->sub0, hdr_pg_idx); + struct page *page = fbnic_page_pool_get_head(qt, hdr_pg_idx); unsigned int len = FIELD_GET(FBNIC_RCD_AL_BUFF_LEN_MASK, rcd); unsigned int frame_sz, hdr_pg_start, hdr_pg_end, headroom; unsigned char *hdr_start; @@ -877,7 +973,7 @@ static void fbnic_pkt_prepare(struct fbnic_napi_vector *nv, u64 rcd, headroom = hdr_pg_off - hdr_pg_start + FBNIC_RX_PAD; frame_sz = hdr_pg_end - hdr_pg_start; - xdp_init_buff(&pkt->buff, frame_sz, NULL); + xdp_init_buff(&pkt->buff, frame_sz, &qt->xdp_rxq); hdr_pg_start += (FBNIC_RCD_AL_BUFF_FRAG_MASK & rcd) * FBNIC_BD_FRAG_SIZE; @@ -888,13 +984,12 @@ static void fbnic_pkt_prepare(struct fbnic_napi_vector *nv, u64 rcd, /* Build frame around buffer */ hdr_start = page_address(page) + hdr_pg_start; - + net_prefetch(pkt->buff.data); xdp_prepare_buff(&pkt->buff, hdr_start, headroom, len - FBNIC_RX_PAD, true); - pkt->data_truesize = 0; - pkt->data_len = 0; - pkt->nr_frags = 0; + pkt->hwtstamp = 0; + pkt->add_frag_failed = false; } static void fbnic_add_rx_frag(struct fbnic_napi_vector *nv, u64 rcd, @@ -904,9 +999,9 @@ static void fbnic_add_rx_frag(struct fbnic_napi_vector *nv, u64 rcd, unsigned int pg_idx = FIELD_GET(FBNIC_RCD_AL_BUFF_PAGE_MASK, rcd); unsigned int pg_off = FIELD_GET(FBNIC_RCD_AL_BUFF_OFF_MASK, rcd); unsigned int len = FIELD_GET(FBNIC_RCD_AL_BUFF_LEN_MASK, rcd); - struct page *page = fbnic_page_pool_get(&qt->sub1, pg_idx); - struct skb_shared_info *shinfo; + netmem_ref netmem = fbnic_page_pool_get_data(qt, pg_idx); unsigned int truesize; + bool added; truesize = FIELD_GET(FBNIC_RCD_AL_PAGE_FIN, rcd) ? FBNIC_BD_FRAG_SIZE - pg_off : ALIGN(len, 128); @@ -915,88 +1010,171 @@ static void fbnic_add_rx_frag(struct fbnic_napi_vector *nv, u64 rcd, FBNIC_BD_FRAG_SIZE; /* Sync DMA buffer */ - dma_sync_single_range_for_cpu(nv->dev, page_pool_get_dma_addr(page), - pg_off, truesize, DMA_BIDIRECTIONAL); - - /* Add page to xdp shared info */ - shinfo = xdp_get_shared_info_from_buff(&pkt->buff); - - /* We use gso_segs to store truesize */ - pkt->data_truesize += truesize; - - __skb_fill_page_desc_noacc(shinfo, pkt->nr_frags++, page, pg_off, len); - - /* Store data_len in gso_size */ - pkt->data_len += len; + page_pool_dma_sync_netmem_for_cpu(qt->sub1.page_pool, netmem, + pg_off, truesize); + + added = xdp_buff_add_frag(&pkt->buff, netmem, pg_off, len, truesize); + if (unlikely(!added)) { + pkt->add_frag_failed = true; + netdev_err_once(nv->napi.dev, + "Failed to add fragment to xdp_buff\n"); + } } -static void fbnic_put_pkt_buff(struct fbnic_napi_vector *nv, +static void fbnic_put_pkt_buff(struct fbnic_q_triad *qt, struct fbnic_pkt_buff *pkt, int budget) { - struct skb_shared_info *shinfo; struct page *page; - int nr_frags; if (!pkt->buff.data_hard_start) return; - shinfo = xdp_get_shared_info_from_buff(&pkt->buff); - nr_frags = pkt->nr_frags; + if (xdp_buff_has_frags(&pkt->buff)) { + struct skb_shared_info *shinfo; + netmem_ref netmem; + int nr_frags; + + shinfo = xdp_get_shared_info_from_buff(&pkt->buff); + nr_frags = shinfo->nr_frags; - while (nr_frags--) { - page = skb_frag_page(&shinfo->frags[nr_frags]); - page_pool_put_full_page(nv->page_pool, page, !!budget); + while (nr_frags--) { + netmem = skb_frag_netmem(&shinfo->frags[nr_frags]); + page_pool_put_full_netmem(qt->sub1.page_pool, netmem, + !!budget); + } } page = virt_to_page(pkt->buff.data_hard_start); - page_pool_put_full_page(nv->page_pool, page, !!budget); + page_pool_put_full_page(qt->sub0.page_pool, page, !!budget); } static struct sk_buff *fbnic_build_skb(struct fbnic_napi_vector *nv, struct fbnic_pkt_buff *pkt) { - unsigned int nr_frags = pkt->nr_frags; - struct skb_shared_info *shinfo; - unsigned int truesize; struct sk_buff *skb; - truesize = xdp_data_hard_end(&pkt->buff) + FBNIC_RX_TROOM - - pkt->buff.data_hard_start; - - /* Build frame around buffer */ - skb = napi_build_skb(pkt->buff.data_hard_start, truesize); - if (unlikely(!skb)) + skb = xdp_build_skb_from_buff(&pkt->buff); + if (!skb) return NULL; - /* Push data pointer to start of data, put tail to end of data */ - skb_reserve(skb, pkt->buff.data - pkt->buff.data_hard_start); - __skb_put(skb, pkt->buff.data_end - pkt->buff.data); + /* Add timestamp if present */ + if (pkt->hwtstamp) + skb_hwtstamps(skb)->hwtstamp = pkt->hwtstamp; + + return skb; +} - /* Add tracking for metadata at the start of the frame */ - skb_metadata_set(skb, pkt->buff.data - pkt->buff.data_meta); +static long fbnic_pkt_tx(struct fbnic_napi_vector *nv, + struct fbnic_pkt_buff *pkt) +{ + struct fbnic_ring *ring = &nv->qt[0].sub1; + int size, offset, nsegs = 1, data_len = 0; + unsigned int tail = ring->tail; + struct skb_shared_info *shinfo; + skb_frag_t *frag = NULL; + struct page *page; + dma_addr_t dma; + __le64 *twd; - /* Add Rx frags */ - if (nr_frags) { - /* Verify that shared info didn't move */ + if (unlikely(xdp_buff_has_frags(&pkt->buff))) { shinfo = xdp_get_shared_info_from_buff(&pkt->buff); - WARN_ON(skb_shinfo(skb) != shinfo); + nsegs += shinfo->nr_frags; + data_len = shinfo->xdp_frags_size; + frag = &shinfo->frags[0]; + } - skb->truesize += pkt->data_truesize; - skb->data_len += pkt->data_len; - shinfo->nr_frags = nr_frags; - skb->len += pkt->data_len; + if (fbnic_desc_unused(ring) < nsegs) { + u64_stats_update_begin(&ring->stats.syncp); + ring->stats.dropped++; + u64_stats_update_end(&ring->stats.syncp); + return -FBNIC_XDP_CONSUME; } - skb_mark_for_recycle(skb); + page = virt_to_page(pkt->buff.data_hard_start); + offset = offset_in_page(pkt->buff.data); + dma = page_pool_get_dma_addr(page); - /* Set MAC header specific fields */ - skb->protocol = eth_type_trans(skb, nv->napi.dev); + size = pkt->buff.data_end - pkt->buff.data; - /* Add timestamp if present */ - if (pkt->hwtstamp) - skb_hwtstamps(skb)->hwtstamp = pkt->hwtstamp; + while (nsegs--) { + dma_sync_single_range_for_device(nv->dev, dma, offset, size, + DMA_BIDIRECTIONAL); + dma += offset; - return skb; + ring->tx_buf[tail] = page; + + twd = &ring->desc[tail]; + *twd = cpu_to_le64(FIELD_PREP(FBNIC_TWD_ADDR_MASK, dma) | + FIELD_PREP(FBNIC_TWD_LEN_MASK, size) | + FIELD_PREP(FBNIC_TWD_TYPE_MASK, + FBNIC_TWD_TYPE_AL)); + + tail++; + tail &= ring->size_mask; + + if (!data_len) + break; + + offset = skb_frag_off(frag); + page = skb_frag_page(frag); + dma = page_pool_get_dma_addr(page); + + size = skb_frag_size(frag); + data_len -= size; + frag++; + } + + *twd |= FBNIC_TWD_TYPE(LAST_AL); + + ring->tail = tail; + + return -FBNIC_XDP_TX; +} + +static void fbnic_pkt_commit_tail(struct fbnic_napi_vector *nv, + unsigned int pkt_tail) +{ + struct fbnic_ring *ring = &nv->qt[0].sub1; + + /* Force DMA writes to flush before writing to tail */ + dma_wmb(); + + writel(pkt_tail, ring->doorbell); +} + +static struct sk_buff *fbnic_run_xdp(struct fbnic_napi_vector *nv, + struct fbnic_pkt_buff *pkt) +{ + struct fbnic_net *fbn = netdev_priv(nv->napi.dev); + struct bpf_prog *xdp_prog; + int act; + + xdp_prog = READ_ONCE(fbn->xdp_prog); + if (!xdp_prog) + goto xdp_pass; + + /* Should never happen, config paths enforce HDS threshold > MTU */ + if (xdp_buff_has_frags(&pkt->buff) && !xdp_prog->aux->xdp_has_frags) + return ERR_PTR(-FBNIC_XDP_LEN_ERR); + + act = bpf_prog_run_xdp(xdp_prog, &pkt->buff); + switch (act) { + case XDP_PASS: +xdp_pass: + return fbnic_build_skb(nv, pkt); + case XDP_TX: + return ERR_PTR(fbnic_pkt_tx(nv, pkt)); + default: + bpf_warn_invalid_xdp_action(nv->napi.dev, xdp_prog, act); + fallthrough; + case XDP_ABORTED: + trace_xdp_exception(nv->napi.dev, xdp_prog, act); + fallthrough; + case XDP_DROP: + break; + } + + return ERR_PTR(-FBNIC_XDP_CONSUME); } static enum pkt_hash_types fbnic_skb_hash_type(u64 rcd) @@ -1050,10 +1228,10 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv, struct fbnic_q_triad *qt, int budget) { unsigned int packets = 0, bytes = 0, dropped = 0, alloc_failed = 0; - u64 csum_complete = 0, csum_none = 0; + u64 csum_complete = 0, csum_none = 0, length_errors = 0; + s32 head0 = -1, head1 = -1, pkt_tail = -1; struct fbnic_ring *rcq = &qt->cmpl; struct fbnic_pkt_buff *pkt; - s32 head0 = -1, head1 = -1; __le64 *raw_rcd, done; u32 head = rcq->head; @@ -1094,8 +1272,10 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv, /* We currently ignore the action table index */ break; case FBNIC_RCD_TYPE_META: - if (likely(!fbnic_rcd_metadata_err(rcd))) - skb = fbnic_build_skb(nv, pkt); + if (unlikely(pkt->add_frag_failed)) + skb = NULL; + else if (likely(!fbnic_rcd_metadata_err(rcd))) + skb = fbnic_run_xdp(nv, pkt); /* Populate skb and invalidate XDP */ if (!IS_ERR_OR_NULL(skb)) { @@ -1107,15 +1287,20 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv, bytes += skb->len; napi_gro_receive(&nv->napi, skb); + } else if (skb == ERR_PTR(-FBNIC_XDP_TX)) { + pkt_tail = nv->qt[0].sub1.tail; + bytes += xdp_get_buff_len(&pkt->buff); } else { if (!skb) { alloc_failed++; dropped++; + } else if (skb == ERR_PTR(-FBNIC_XDP_LEN_ERR)) { + length_errors++; } else { dropped++; } - fbnic_put_pkt_buff(nv, pkt, 1); + fbnic_put_pkt_buff(qt, pkt, 1); } pkt->buff.data_hard_start = NULL; @@ -1140,16 +1325,20 @@ static int fbnic_clean_rcq(struct fbnic_napi_vector *nv, rcq->stats.rx.alloc_failed += alloc_failed; rcq->stats.rx.csum_complete += csum_complete; rcq->stats.rx.csum_none += csum_none; + rcq->stats.rx.length_errors += length_errors; u64_stats_update_end(&rcq->stats.syncp); + if (pkt_tail >= 0) + fbnic_pkt_commit_tail(nv, pkt_tail); + /* Unmap and free processed buffers */ if (head0 >= 0) - fbnic_clean_bdq(nv, budget, &qt->sub0, head0); - fbnic_fill_bdq(nv, &qt->sub0); + fbnic_clean_bdq(&qt->sub0, head0, budget); + fbnic_fill_bdq(&qt->sub0); if (head1 >= 0) - fbnic_clean_bdq(nv, budget, &qt->sub1, head1); - fbnic_fill_bdq(nv, &qt->sub1); + fbnic_clean_bdq(&qt->sub1, head1, budget); + fbnic_fill_bdq(&qt->sub1); /* Record the current head/tail of the queue */ if (rcq->head != head) { @@ -1220,8 +1409,9 @@ void fbnic_aggregate_ring_rx_counters(struct fbnic_net *fbn, fbn->rx_stats.rx.alloc_failed += stats->rx.alloc_failed; fbn->rx_stats.rx.csum_complete += stats->rx.csum_complete; fbn->rx_stats.rx.csum_none += stats->rx.csum_none; + fbn->rx_stats.rx.length_errors += stats->rx.length_errors; /* Remember to add new stats here */ - BUILD_BUG_ON(sizeof(fbn->rx_stats.rx) / 8 != 3); + BUILD_BUG_ON(sizeof(fbn->rx_stats.rx) / 8 != 4); } void fbnic_aggregate_ring_tx_counters(struct fbnic_net *fbn, @@ -1243,6 +1433,22 @@ void fbnic_aggregate_ring_tx_counters(struct fbnic_net *fbn, BUILD_BUG_ON(sizeof(fbn->tx_stats.twq) / 8 != 6); } +static void fbnic_aggregate_ring_xdp_counters(struct fbnic_net *fbn, + struct fbnic_ring *xdpr) +{ + struct fbnic_queue_stats *stats = &xdpr->stats; + + if (!(xdpr->flags & FBNIC_RING_F_STATS)) + return; + + /* Capture stats from queues before dissasociating them */ + fbn->rx_stats.bytes += stats->bytes; + fbn->rx_stats.packets += stats->packets; + fbn->rx_stats.dropped += stats->dropped; + fbn->tx_stats.bytes += stats->bytes; + fbn->tx_stats.packets += stats->packets; +} + static void fbnic_remove_tx_ring(struct fbnic_net *fbn, struct fbnic_ring *txr) { @@ -1256,6 +1462,19 @@ static void fbnic_remove_tx_ring(struct fbnic_net *fbn, fbn->tx[txr->q_idx] = NULL; } +static void fbnic_remove_xdp_ring(struct fbnic_net *fbn, + struct fbnic_ring *xdpr) +{ + if (!(xdpr->flags & FBNIC_RING_F_STATS)) + return; + + fbnic_aggregate_ring_xdp_counters(fbn, xdpr); + + /* Remove pointer to the Tx ring */ + WARN_ON(fbn->tx[xdpr->q_idx] && fbn->tx[xdpr->q_idx] != xdpr); + fbn->tx[xdpr->q_idx] = NULL; +} + static void fbnic_remove_rx_ring(struct fbnic_net *fbn, struct fbnic_ring *rxr) { @@ -1269,6 +1488,12 @@ static void fbnic_remove_rx_ring(struct fbnic_net *fbn, fbn->rx[rxr->q_idx] = NULL; } +static void fbnic_free_qt_page_pools(struct fbnic_q_triad *qt) +{ + page_pool_destroy(qt->sub0.page_pool); + page_pool_destroy(qt->sub1.page_pool); +} + static void fbnic_free_napi_vector(struct fbnic_net *fbn, struct fbnic_napi_vector *nv) { @@ -1277,6 +1502,7 @@ static void fbnic_free_napi_vector(struct fbnic_net *fbn, for (i = 0; i < nv->txt_count; i++) { fbnic_remove_tx_ring(fbn, &nv->qt[i].sub0); + fbnic_remove_xdp_ring(fbn, &nv->qt[i].sub1); fbnic_remove_tx_ring(fbn, &nv->qt[i].cmpl); } @@ -1287,8 +1513,7 @@ static void fbnic_free_napi_vector(struct fbnic_net *fbn, } fbnic_napi_free_irq(fbd, nv); - page_pool_destroy(nv->page_pool); - netif_napi_del(&nv->napi); + netif_napi_del_locked(&nv->napi); fbn->napi[fbnic_napi_idx(nv)] = NULL; kfree(nv); } @@ -1302,23 +1527,22 @@ void fbnic_free_napi_vectors(struct fbnic_net *fbn) fbnic_free_napi_vector(fbn, fbn->napi[i]); } -#define FBNIC_PAGE_POOL_FLAGS \ - (PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV) - -static int fbnic_alloc_nv_page_pool(struct fbnic_net *fbn, - struct fbnic_napi_vector *nv) +static int +fbnic_alloc_qt_page_pools(struct fbnic_net *fbn, struct fbnic_q_triad *qt, + unsigned int rxq_idx) { struct page_pool_params pp_params = { .order = 0, - .flags = FBNIC_PAGE_POOL_FLAGS, - .pool_size = (fbn->hpq_size + fbn->ppq_size) * nv->rxt_count, + .flags = PP_FLAG_DMA_MAP | + PP_FLAG_DMA_SYNC_DEV, + .pool_size = fbn->hpq_size + fbn->ppq_size, .nid = NUMA_NO_NODE, - .dev = nv->dev, + .dev = fbn->netdev->dev.parent, .dma_dir = DMA_BIDIRECTIONAL, .offset = 0, .max_len = PAGE_SIZE, - .napi = &nv->napi, .netdev = fbn->netdev, + .queue_idx = rxq_idx, }; struct page_pool *pp; @@ -1338,9 +1562,24 @@ static int fbnic_alloc_nv_page_pool(struct fbnic_net *fbn, if (IS_ERR(pp)) return PTR_ERR(pp); - nv->page_pool = pp; + qt->sub0.page_pool = pp; + if (netif_rxq_has_unreadable_mp(fbn->netdev, rxq_idx)) { + pp_params.flags |= PP_FLAG_ALLOW_UNREADABLE_NETMEM; + pp_params.dma_dir = DMA_FROM_DEVICE; + + pp = page_pool_create(&pp_params); + if (IS_ERR(pp)) + goto err_destroy_sub0; + } else { + page_pool_get(pp); + } + qt->sub1.page_pool = pp; return 0; + +err_destroy_sub0: + page_pool_destroy(pp); + return PTR_ERR(pp); } static void fbnic_ring_init(struct fbnic_ring *ring, u32 __iomem *doorbell, @@ -1350,6 +1589,7 @@ static void fbnic_ring_init(struct fbnic_ring *ring, u32 __iomem *doorbell, ring->doorbell = doorbell; ring->q_idx = q_idx; ring->flags = flags; + ring->deferred_head = -1; } static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn, @@ -1359,11 +1599,18 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn, { int txt_count = txq_count, rxt_count = rxq_count; u32 __iomem *uc_addr = fbd->uc_addr0; + int xdp_count = 0, qt_count, err; struct fbnic_napi_vector *nv; struct fbnic_q_triad *qt; - int qt_count, err; u32 __iomem *db; + /* We need to reserve at least one Tx Queue Triad for an XDP ring */ + if (rxq_count) { + xdp_count = 1; + if (!txt_count) + txt_count = 1; + } + qt_count = txt_count + rxq_count; if (!qt_count) return -EINVAL; @@ -1387,37 +1634,33 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn, /* Tie napi to netdev */ fbn->napi[fbnic_napi_idx(nv)] = nv; - netif_napi_add(fbn->netdev, &nv->napi, fbnic_poll); + netif_napi_add_config_locked(fbn->netdev, &nv->napi, fbnic_poll, + fbnic_napi_idx(nv)); /* Record IRQ to NAPI struct */ - netif_napi_set_irq(&nv->napi, - pci_irq_vector(to_pci_dev(fbd->dev), nv->v_idx)); + netif_napi_set_irq_locked(&nv->napi, + pci_irq_vector(to_pci_dev(fbd->dev), + nv->v_idx)); /* Tie nv back to PCIe dev */ nv->dev = fbd->dev; - /* Allocate page pool */ - if (rxq_count) { - err = fbnic_alloc_nv_page_pool(fbn, nv); - if (err) - goto napi_del; - } - /* Request the IRQ for napi vector */ err = fbnic_napi_request_irq(fbd, nv); if (err) - goto pp_destroy; + goto napi_del; /* Initialize queue triads */ qt = nv->qt; while (txt_count) { + u8 flags = FBNIC_RING_F_CTX | FBNIC_RING_F_STATS; + /* Configure Tx queue */ db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TWQ0_TAIL]; /* Assign Tx queue to netdev if applicable */ if (txq_count > 0) { - u8 flags = FBNIC_RING_F_CTX | FBNIC_RING_F_STATS; fbnic_ring_init(&qt->sub0, db, txq_idx, flags); fbn->tx[txq_idx] = &qt->sub0; @@ -1427,6 +1670,28 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn, FBNIC_RING_F_DISABLED); } + /* Configure XDP queue */ + db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TWQ1_TAIL]; + + /* Assign XDP queue to netdev if applicable + * + * The setup for this is in itself a bit different. + * 1. We only need one XDP Tx queue per NAPI vector. + * 2. We associate it to the first Rx queue index. + * 3. The hardware side is associated based on the Tx Queue. + * 4. The netdev queue is offset by FBNIC_MAX_TXQs. + */ + if (xdp_count > 0) { + unsigned int xdp_idx = FBNIC_MAX_TXQS + rxq_idx; + + fbnic_ring_init(&qt->sub1, db, xdp_idx, flags); + fbn->tx[xdp_idx] = &qt->sub1; + xdp_count--; + } else { + fbnic_ring_init(&qt->sub1, db, 0, + FBNIC_RING_F_DISABLED); + } + /* Configure Tx completion queue */ db = &uc_addr[FBNIC_QUEUE(txq_idx) + FBNIC_QUEUE_TCQ_HEAD]; fbnic_ring_init(&qt->cmpl, db, 0, 0); @@ -1463,10 +1728,8 @@ static int fbnic_alloc_napi_vector(struct fbnic_dev *fbd, struct fbnic_net *fbn, return 0; -pp_destroy: - page_pool_destroy(nv->page_pool); napi_del: - netif_napi_del(&nv->napi); + netif_napi_del_locked(&nv->napi); fbn->napi[fbnic_napi_idx(nv)] = NULL; kfree(nv); return err; @@ -1680,6 +1943,12 @@ static void fbnic_free_qt_resources(struct fbnic_net *fbn, fbnic_free_ring_resources(dev, &qt->cmpl); fbnic_free_ring_resources(dev, &qt->sub1); fbnic_free_ring_resources(dev, &qt->sub0); + + if (xdp_rxq_info_is_reg(&qt->xdp_rxq)) { + xdp_rxq_info_unreg_mem_model(&qt->xdp_rxq); + xdp_rxq_info_unreg(&qt->xdp_rxq); + fbnic_free_qt_page_pools(qt); + } } static int fbnic_alloc_tx_qt_resources(struct fbnic_net *fbn, @@ -1692,6 +1961,10 @@ static int fbnic_alloc_tx_qt_resources(struct fbnic_net *fbn, if (err) return err; + err = fbnic_alloc_tx_ring_resources(fbn, &qt->sub1); + if (err) + goto free_sub0; + err = fbnic_alloc_tx_ring_resources(fbn, &qt->cmpl); if (err) goto free_sub1; @@ -1699,20 +1972,37 @@ static int fbnic_alloc_tx_qt_resources(struct fbnic_net *fbn, return 0; free_sub1: + fbnic_free_ring_resources(dev, &qt->sub1); +free_sub0: fbnic_free_ring_resources(dev, &qt->sub0); return err; } static int fbnic_alloc_rx_qt_resources(struct fbnic_net *fbn, + struct fbnic_napi_vector *nv, struct fbnic_q_triad *qt) { struct device *dev = fbn->netdev->dev.parent; int err; - err = fbnic_alloc_rx_ring_resources(fbn, &qt->sub0); + err = fbnic_alloc_qt_page_pools(fbn, qt, qt->cmpl.q_idx); if (err) return err; + err = xdp_rxq_info_reg(&qt->xdp_rxq, fbn->netdev, qt->sub0.q_idx, + nv->napi.napi_id); + if (err) + goto free_page_pools; + + err = xdp_rxq_info_reg_mem_model(&qt->xdp_rxq, MEM_TYPE_PAGE_POOL, + qt->sub0.page_pool); + if (err) + goto unreg_rxq; + + err = fbnic_alloc_rx_ring_resources(fbn, &qt->sub0); + if (err) + goto unreg_mm; + err = fbnic_alloc_rx_ring_resources(fbn, &qt->sub1); if (err) goto free_sub0; @@ -1727,19 +2017,21 @@ free_sub1: fbnic_free_ring_resources(dev, &qt->sub1); free_sub0: fbnic_free_ring_resources(dev, &qt->sub0); +unreg_mm: + xdp_rxq_info_unreg_mem_model(&qt->xdp_rxq); +unreg_rxq: + xdp_rxq_info_unreg(&qt->xdp_rxq); +free_page_pools: + fbnic_free_qt_page_pools(qt); return err; } static void fbnic_free_nv_resources(struct fbnic_net *fbn, struct fbnic_napi_vector *nv) { - int i, j; - - /* Free Tx Resources */ - for (i = 0; i < nv->txt_count; i++) - fbnic_free_qt_resources(fbn, &nv->qt[i]); + int i; - for (j = 0; j < nv->rxt_count; j++, i++) + for (i = 0; i < nv->txt_count + nv->rxt_count; i++) fbnic_free_qt_resources(fbn, &nv->qt[i]); } @@ -1752,19 +2044,19 @@ static int fbnic_alloc_nv_resources(struct fbnic_net *fbn, for (i = 0; i < nv->txt_count; i++) { err = fbnic_alloc_tx_qt_resources(fbn, &nv->qt[i]); if (err) - goto free_resources; + goto free_qt_resources; } /* Allocate Rx Resources */ for (j = 0; j < nv->rxt_count; j++, i++) { - err = fbnic_alloc_rx_qt_resources(fbn, &nv->qt[i]); + err = fbnic_alloc_rx_qt_resources(fbn, nv, &nv->qt[i]); if (err) - goto free_resources; + goto free_qt_resources; } return 0; -free_resources: +free_qt_resources: while (i--) fbnic_free_qt_resources(fbn, &nv->qt[i]); return err; @@ -1871,6 +2163,15 @@ static void fbnic_disable_twq0(struct fbnic_ring *txr) fbnic_ring_wr32(txr, FBNIC_QUEUE_TWQ0_CTL, twq_ctl); } +static void fbnic_disable_twq1(struct fbnic_ring *txr) +{ + u32 twq_ctl = fbnic_ring_rd32(txr, FBNIC_QUEUE_TWQ1_CTL); + + twq_ctl &= ~FBNIC_QUEUE_TWQ_CTL_ENABLE; + + fbnic_ring_wr32(txr, FBNIC_QUEUE_TWQ1_CTL, twq_ctl); +} + static void fbnic_disable_tcq(struct fbnic_ring *txr) { fbnic_ring_wr32(txr, FBNIC_QUEUE_TCQ_CTL, 0); @@ -1897,36 +2198,48 @@ void fbnic_napi_disable(struct fbnic_net *fbn) int i; for (i = 0; i < fbn->num_napi; i++) { - napi_disable(&fbn->napi[i]->napi); + napi_disable_locked(&fbn->napi[i]->napi); fbnic_nv_irq_disable(fbn->napi[i]); } } -void fbnic_disable(struct fbnic_net *fbn) +static void __fbnic_nv_disable(struct fbnic_napi_vector *nv) { - struct fbnic_dev *fbd = fbn->fbd; - int i, j, t; - - for (i = 0; i < fbn->num_napi; i++) { - struct fbnic_napi_vector *nv = fbn->napi[i]; + int i, t; - /* Disable Tx queue triads */ - for (t = 0; t < nv->txt_count; t++) { - struct fbnic_q_triad *qt = &nv->qt[t]; + /* Disable Tx queue triads */ + for (t = 0; t < nv->txt_count; t++) { + struct fbnic_q_triad *qt = &nv->qt[t]; - fbnic_disable_twq0(&qt->sub0); - fbnic_disable_tcq(&qt->cmpl); - } + fbnic_disable_twq0(&qt->sub0); + fbnic_disable_twq1(&qt->sub1); + fbnic_disable_tcq(&qt->cmpl); + } - /* Disable Rx queue triads */ - for (j = 0; j < nv->rxt_count; j++, t++) { - struct fbnic_q_triad *qt = &nv->qt[t]; + /* Disable Rx queue triads */ + for (i = 0; i < nv->rxt_count; i++, t++) { + struct fbnic_q_triad *qt = &nv->qt[t]; - fbnic_disable_bdq(&qt->sub0, &qt->sub1); - fbnic_disable_rcq(&qt->cmpl); - } + fbnic_disable_bdq(&qt->sub0, &qt->sub1); + fbnic_disable_rcq(&qt->cmpl); } +} + +static void +fbnic_nv_disable(struct fbnic_net *fbn, struct fbnic_napi_vector *nv) +{ + __fbnic_nv_disable(nv); + fbnic_wrfl(fbn->fbd); +} + +void fbnic_disable(struct fbnic_net *fbn) +{ + struct fbnic_dev *fbd = fbn->fbd; + int i; + + for (i = 0; i < fbn->num_napi; i++) + __fbnic_nv_disable(fbn->napi[i]); fbnic_wrfl(fbd); } @@ -2015,73 +2328,119 @@ int fbnic_wait_all_queues_idle(struct fbnic_dev *fbd, bool may_fail) return err; } -void fbnic_flush(struct fbnic_net *fbn) +static int +fbnic_wait_queue_idle(struct fbnic_net *fbn, bool rx, unsigned int idx) { - int i; + static const unsigned int tx_regs[] = { + FBNIC_QM_TWQ_IDLE(0), FBNIC_QM_TQS_IDLE(0), + FBNIC_QM_TDE_IDLE(0), FBNIC_QM_TCQ_IDLE(0), + }, rx_regs[] = { + FBNIC_QM_HPQ_IDLE(0), FBNIC_QM_PPQ_IDLE(0), + FBNIC_QM_RCQ_IDLE(0), + }; + struct fbnic_dev *fbd = fbn->fbd; + unsigned int val, mask, off; + const unsigned int *regs; + unsigned int reg_cnt; + int i, err; - for (i = 0; i < fbn->num_napi; i++) { - struct fbnic_napi_vector *nv = fbn->napi[i]; - int j, t; + regs = rx ? rx_regs : tx_regs; + reg_cnt = rx ? ARRAY_SIZE(rx_regs) : ARRAY_SIZE(tx_regs); - /* Flush any processed Tx Queue Triads and drop the rest */ - for (t = 0; t < nv->txt_count; t++) { - struct fbnic_q_triad *qt = &nv->qt[t]; - struct netdev_queue *tx_queue; + off = idx / 32; + mask = BIT(idx % 32); - /* Clean the work queues of unprocessed work */ - fbnic_clean_twq0(nv, 0, &qt->sub0, true, qt->sub0.tail); + for (i = 0; i < reg_cnt; i++) { + err = read_poll_timeout_atomic(fbnic_rd32, val, val & mask, + 2, 500000, false, + fbd, regs[i] + off); + if (err) { + netdev_err(fbd->netdev, + "wait for queue %s%d idle failed 0x%04x(%d): %08x (mask: %08x)\n", + rx ? "Rx" : "Tx", idx, regs[i] + off, i, + val, mask); + return err; + } + } - /* Reset completion queue descriptor ring */ - memset(qt->cmpl.desc, 0, qt->cmpl.size); + return 0; +} - /* Nothing else to do if Tx queue is disabled */ - if (qt->sub0.flags & FBNIC_RING_F_DISABLED) - continue; +static void fbnic_nv_flush(struct fbnic_napi_vector *nv) +{ + int j, t; - /* Reset BQL associated with Tx queue */ - tx_queue = netdev_get_tx_queue(nv->napi.dev, - qt->sub0.q_idx); - netdev_tx_reset_queue(tx_queue); - } + /* Flush any processed Tx Queue Triads and drop the rest */ + for (t = 0; t < nv->txt_count; t++) { + struct fbnic_q_triad *qt = &nv->qt[t]; + struct netdev_queue *tx_queue; - /* Flush any processed Rx Queue Triads and drop the rest */ - for (j = 0; j < nv->rxt_count; j++, t++) { - struct fbnic_q_triad *qt = &nv->qt[t]; + /* Clean the work queues of unprocessed work */ + fbnic_clean_twq0(nv, 0, &qt->sub0, true, qt->sub0.tail); + fbnic_clean_twq1(nv, false, &qt->sub1, true, + qt->sub1.tail); - /* Clean the work queues of unprocessed work */ - fbnic_clean_bdq(nv, 0, &qt->sub0, qt->sub0.tail); - fbnic_clean_bdq(nv, 0, &qt->sub1, qt->sub1.tail); + /* Reset completion queue descriptor ring */ + memset(qt->cmpl.desc, 0, qt->cmpl.size); - /* Reset completion queue descriptor ring */ - memset(qt->cmpl.desc, 0, qt->cmpl.size); + /* Nothing else to do if Tx queue is disabled */ + if (qt->sub0.flags & FBNIC_RING_F_DISABLED) + continue; - fbnic_put_pkt_buff(nv, qt->cmpl.pkt, 0); - qt->cmpl.pkt->buff.data_hard_start = NULL; - } + /* Reset BQL associated with Tx queue */ + tx_queue = netdev_get_tx_queue(nv->napi.dev, + qt->sub0.q_idx); + netdev_tx_reset_queue(tx_queue); + } + + /* Flush any processed Rx Queue Triads and drop the rest */ + for (j = 0; j < nv->rxt_count; j++, t++) { + struct fbnic_q_triad *qt = &nv->qt[t]; + + /* Clean the work queues of unprocessed work */ + fbnic_clean_bdq(&qt->sub0, qt->sub0.tail, 0); + fbnic_clean_bdq(&qt->sub1, qt->sub1.tail, 0); + + /* Reset completion queue descriptor ring */ + memset(qt->cmpl.desc, 0, qt->cmpl.size); + + fbnic_put_pkt_buff(qt, qt->cmpl.pkt, 0); + memset(qt->cmpl.pkt, 0, sizeof(struct fbnic_pkt_buff)); } } -void fbnic_fill(struct fbnic_net *fbn) +void fbnic_flush(struct fbnic_net *fbn) { int i; - for (i = 0; i < fbn->num_napi; i++) { - struct fbnic_napi_vector *nv = fbn->napi[i]; - int j, t; + for (i = 0; i < fbn->num_napi; i++) + fbnic_nv_flush(fbn->napi[i]); +} - /* Configure NAPI mapping and populate pages - * in the BDQ rings to use for Rx - */ - for (j = 0, t = nv->txt_count; j < nv->rxt_count; j++, t++) { - struct fbnic_q_triad *qt = &nv->qt[t]; +static void fbnic_nv_fill(struct fbnic_napi_vector *nv) +{ + int j, t; - /* Populate the header and payload BDQs */ - fbnic_fill_bdq(nv, &qt->sub0); - fbnic_fill_bdq(nv, &qt->sub1); - } + /* Configure NAPI mapping and populate pages + * in the BDQ rings to use for Rx + */ + for (j = 0, t = nv->txt_count; j < nv->rxt_count; j++, t++) { + struct fbnic_q_triad *qt = &nv->qt[t]; + + /* Populate the header and payload BDQs */ + fbnic_fill_bdq(&qt->sub0); + fbnic_fill_bdq(&qt->sub1); } } +void fbnic_fill(struct fbnic_net *fbn) +{ + int i; + + for (i = 0; i < fbn->num_napi; i++) + fbnic_nv_fill(fbn->napi[i]); +} + static void fbnic_enable_twq0(struct fbnic_ring *twq) { u32 log_size = fls(twq->size_mask); @@ -2104,6 +2463,28 @@ static void fbnic_enable_twq0(struct fbnic_ring *twq) fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ0_CTL, FBNIC_QUEUE_TWQ_CTL_ENABLE); } +static void fbnic_enable_twq1(struct fbnic_ring *twq) +{ + u32 log_size = fls(twq->size_mask); + + if (!twq->size_mask) + return; + + /* Reset head/tail */ + fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_CTL, FBNIC_QUEUE_TWQ_CTL_RESET); + twq->tail = 0; + twq->head = 0; + + /* Store descriptor ring address and size */ + fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_BAL, lower_32_bits(twq->dma)); + fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_BAH, upper_32_bits(twq->dma)); + + /* Write lower 4 bits of log size as 64K ring size is 0 */ + fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_SIZE, log_size & 0xf); + + fbnic_ring_wr32(twq, FBNIC_QUEUE_TWQ1_CTL, FBNIC_QUEUE_TWQ_CTL_ENABLE); +} + static void fbnic_enable_tcq(struct fbnic_napi_vector *nv, struct fbnic_ring *tcq) { @@ -2232,13 +2613,22 @@ static void fbnic_enable_rcq(struct fbnic_napi_vector *nv, { struct fbnic_net *fbn = netdev_priv(nv->napi.dev); u32 log_size = fls(rcq->size_mask); - u32 rcq_ctl; + u32 hds_thresh = fbn->hds_thresh; + u32 rcq_ctl = 0; fbnic_config_drop_mode_rcq(nv, rcq); - rcq_ctl = FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PADLEN_MASK, FBNIC_RX_PAD) | - FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_MAX_HDR_MASK, - FBNIC_RX_MAX_HDR) | + /* Force lower bound on MAX_HEADER_BYTES. Below this, all frames should + * be split at L4. It would also result in the frames being split at + * L2/L3 depending on the frame size. + */ + if (fbn->hds_thresh < FBNIC_HDR_BYTES_MIN) { + rcq_ctl = FBNIC_QUEUE_RDE_CTL0_EN_HDR_SPLIT; + hds_thresh = FBNIC_HDR_BYTES_MIN; + } + + rcq_ctl |= FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PADLEN_MASK, FBNIC_RX_PAD) | + FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_MAX_HDR_MASK, hds_thresh) | FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PAYLD_OFF_MASK, FBNIC_RX_PAYLD_OFFSET) | FIELD_PREP(FBNIC_QUEUE_RDE_CTL1_PAYLD_PG_CL_MASK, @@ -2266,32 +2656,47 @@ static void fbnic_enable_rcq(struct fbnic_napi_vector *nv, fbnic_ring_wr32(rcq, FBNIC_QUEUE_RCQ_CTL, FBNIC_QUEUE_RCQ_CTL_ENABLE); } -void fbnic_enable(struct fbnic_net *fbn) +static void __fbnic_nv_enable(struct fbnic_napi_vector *nv) { - struct fbnic_dev *fbd = fbn->fbd; - int i; + int j, t; - for (i = 0; i < fbn->num_napi; i++) { - struct fbnic_napi_vector *nv = fbn->napi[i]; - int j, t; + /* Setup Tx Queue Triads */ + for (t = 0; t < nv->txt_count; t++) { + struct fbnic_q_triad *qt = &nv->qt[t]; - /* Setup Tx Queue Triads */ - for (t = 0; t < nv->txt_count; t++) { - struct fbnic_q_triad *qt = &nv->qt[t]; + fbnic_enable_twq0(&qt->sub0); + fbnic_enable_twq1(&qt->sub1); + fbnic_enable_tcq(nv, &qt->cmpl); + } - fbnic_enable_twq0(&qt->sub0); - fbnic_enable_tcq(nv, &qt->cmpl); - } + /* Setup Rx Queue Triads */ + for (j = 0; j < nv->rxt_count; j++, t++) { + struct fbnic_q_triad *qt = &nv->qt[t]; - /* Setup Rx Queue Triads */ - for (j = 0; j < nv->rxt_count; j++, t++) { - struct fbnic_q_triad *qt = &nv->qt[t]; + page_pool_enable_direct_recycling(qt->sub0.page_pool, + &nv->napi); + page_pool_enable_direct_recycling(qt->sub1.page_pool, + &nv->napi); - fbnic_enable_bdq(&qt->sub0, &qt->sub1); - fbnic_config_drop_mode_rcq(nv, &qt->cmpl); - fbnic_enable_rcq(nv, &qt->cmpl); - } + fbnic_enable_bdq(&qt->sub0, &qt->sub1); + fbnic_config_drop_mode_rcq(nv, &qt->cmpl); + fbnic_enable_rcq(nv, &qt->cmpl); } +} + +static void fbnic_nv_enable(struct fbnic_net *fbn, struct fbnic_napi_vector *nv) +{ + __fbnic_nv_enable(nv); + fbnic_wrfl(fbn->fbd); +} + +void fbnic_enable(struct fbnic_net *fbn) +{ + struct fbnic_dev *fbd = fbn->fbd; + int i; + + for (i = 0; i < fbn->num_napi; i++) + __fbnic_nv_enable(fbn->napi[i]); fbnic_wrfl(fbd); } @@ -2310,7 +2715,7 @@ void fbnic_napi_enable(struct fbnic_net *fbn) for (i = 0; i < fbn->num_napi; i++) { struct fbnic_napi_vector *nv = fbn->napi[i]; - napi_enable(&nv->napi); + napi_enable_locked(&nv->napi); fbnic_nv_irq_enable(nv); @@ -2363,3 +2768,123 @@ void fbnic_napi_depletion_check(struct net_device *netdev) fbnic_wrfl(fbd); } + +static int fbnic_queue_mem_alloc(struct net_device *dev, void *qmem, int idx) +{ + struct fbnic_net *fbn = netdev_priv(dev); + const struct fbnic_q_triad *real; + struct fbnic_q_triad *qt = qmem; + struct fbnic_napi_vector *nv; + + if (!netif_running(dev)) + return fbnic_alloc_qt_page_pools(fbn, qt, idx); + + real = container_of(fbn->rx[idx], struct fbnic_q_triad, cmpl); + nv = fbn->napi[idx % fbn->num_napi]; + + fbnic_ring_init(&qt->sub0, real->sub0.doorbell, real->sub0.q_idx, + real->sub0.flags); + fbnic_ring_init(&qt->sub1, real->sub1.doorbell, real->sub1.q_idx, + real->sub1.flags); + fbnic_ring_init(&qt->cmpl, real->cmpl.doorbell, real->cmpl.q_idx, + real->cmpl.flags); + + return fbnic_alloc_rx_qt_resources(fbn, nv, qt); +} + +static void fbnic_queue_mem_free(struct net_device *dev, void *qmem) +{ + struct fbnic_net *fbn = netdev_priv(dev); + struct fbnic_q_triad *qt = qmem; + + if (!netif_running(dev)) + fbnic_free_qt_page_pools(qt); + else + fbnic_free_qt_resources(fbn, qt); +} + +static void __fbnic_nv_restart(struct fbnic_net *fbn, + struct fbnic_napi_vector *nv) +{ + struct fbnic_dev *fbd = fbn->fbd; + int i; + + fbnic_nv_enable(fbn, nv); + fbnic_nv_fill(nv); + + napi_enable_locked(&nv->napi); + fbnic_nv_irq_enable(nv); + fbnic_wr32(fbd, FBNIC_INTR_SET(nv->v_idx / 32), BIT(nv->v_idx % 32)); + fbnic_wrfl(fbd); + + for (i = 0; i < nv->txt_count; i++) + netif_wake_subqueue(fbn->netdev, nv->qt[i].sub0.q_idx); +} + +static int fbnic_queue_start(struct net_device *dev, void *qmem, int idx) +{ + struct fbnic_net *fbn = netdev_priv(dev); + struct fbnic_napi_vector *nv; + struct fbnic_q_triad *real; + + real = container_of(fbn->rx[idx], struct fbnic_q_triad, cmpl); + nv = fbn->napi[idx % fbn->num_napi]; + + fbnic_aggregate_ring_rx_counters(fbn, &real->sub0); + fbnic_aggregate_ring_rx_counters(fbn, &real->sub1); + fbnic_aggregate_ring_rx_counters(fbn, &real->cmpl); + + memcpy(real, qmem, sizeof(*real)); + + __fbnic_nv_restart(fbn, nv); + + return 0; +} + +static int fbnic_queue_stop(struct net_device *dev, void *qmem, int idx) +{ + struct fbnic_net *fbn = netdev_priv(dev); + const struct fbnic_q_triad *real; + struct fbnic_napi_vector *nv; + int i, t; + int err; + + real = container_of(fbn->rx[idx], struct fbnic_q_triad, cmpl); + nv = fbn->napi[idx % fbn->num_napi]; + + napi_disable_locked(&nv->napi); + fbnic_nv_irq_disable(nv); + + for (i = 0; i < nv->txt_count; i++) + netif_stop_subqueue(dev, nv->qt[i].sub0.q_idx); + fbnic_nv_disable(fbn, nv); + + for (t = 0; t < nv->txt_count + nv->rxt_count; t++) { + err = fbnic_wait_queue_idle(fbn, t >= nv->txt_count, + nv->qt[t].sub0.q_idx); + if (err) + goto err_restart; + } + + fbnic_synchronize_irq(fbn->fbd, nv->v_idx); + fbnic_nv_flush(nv); + + page_pool_disable_direct_recycling(real->sub0.page_pool); + page_pool_disable_direct_recycling(real->sub1.page_pool); + + memcpy(qmem, real, sizeof(*real)); + + return 0; + +err_restart: + __fbnic_nv_restart(fbn, nv); + return err; +} + +const struct netdev_queue_mgmt_ops fbnic_queue_mgmt_ops = { + .ndo_queue_mem_size = sizeof(struct fbnic_q_triad), + .ndo_queue_mem_alloc = fbnic_queue_mem_alloc, + .ndo_queue_mem_free = fbnic_queue_mem_free, + .ndo_queue_start = fbnic_queue_start, + .ndo_queue_stop = fbnic_queue_stop, +}; diff --git a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h index 34693596e5eb..31fac0ba0902 100644 --- a/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h +++ b/drivers/net/ethernet/meta/fbnic/fbnic_txrx.h @@ -35,6 +35,7 @@ struct fbnic_net; #define FBNIC_MAX_TXQS 128u #define FBNIC_MAX_RXQS 128u +#define FBNIC_MAX_XDPQS 128u /* These apply to TWQs, TCQ, RCQ */ #define FBNIC_QUEUE_SIZE_MIN 16u @@ -50,10 +51,10 @@ struct fbnic_net; #define FBNIC_RX_TROOM \ SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +#define FBNIC_RX_HROOM_PAD 128 #define FBNIC_RX_HROOM \ - (ALIGN(FBNIC_RX_TROOM + NET_SKB_PAD, 128) - FBNIC_RX_TROOM) + (ALIGN(FBNIC_RX_TROOM + FBNIC_RX_HROOM_PAD, 128) - FBNIC_RX_TROOM) #define FBNIC_RX_PAD 0 -#define FBNIC_RX_MAX_HDR (1536 - FBNIC_RX_PAD) #define FBNIC_RX_PAYLD_OFFSET 0 #define FBNIC_RX_PAYLD_PG_CL 0 @@ -61,12 +62,16 @@ struct fbnic_net; #define FBNIC_RING_F_CTX BIT(1) #define FBNIC_RING_F_STATS BIT(2) /* Ring's stats may be used */ +#define FBNIC_HDS_THRESH_MAX \ + (4096 - FBNIC_RX_HROOM - FBNIC_RX_TROOM - FBNIC_RX_PAD) +#define FBNIC_HDS_THRESH_DEFAULT \ + (1536 - FBNIC_RX_PAD) +#define FBNIC_HDR_BYTES_MIN 128 + struct fbnic_pkt_buff { struct xdp_buff buff; ktime_t hwtstamp; - u32 data_truesize; - u16 data_len; - u16 nr_frags; + bool add_frag_failed; }; struct fbnic_queue_stats { @@ -85,6 +90,7 @@ struct fbnic_queue_stats { u64 alloc_failed; u64 csum_complete; u64 csum_none; + u64 length_errors; } rx; }; u64 dropped; @@ -94,7 +100,7 @@ struct fbnic_queue_stats { #define FBNIC_PAGECNT_BIAS_MAX PAGE_SIZE struct fbnic_rx_buf { - struct page *page; + netmem_ref netmem; long pagecnt_bias; }; @@ -115,6 +121,17 @@ struct fbnic_ring { u32 head, tail; /* Head/Tail of ring */ + union { + /* Rx BDQs only */ + struct page_pool *page_pool; + + /* Deferred_head is used to cache the head for TWQ1 if + * an attempt is made to clean TWQ1 with zero napi_budget. + * We do not use it for any other ring. + */ + s32 deferred_head; + }; + struct fbnic_queue_stats stats; /* Slow path fields follow */ @@ -124,12 +141,12 @@ struct fbnic_ring { struct fbnic_q_triad { struct fbnic_ring sub0, sub1, cmpl; + struct xdp_rxq_info xdp_rxq; }; struct fbnic_napi_vector { struct napi_struct napi; struct device *dev; /* Device for DMA unmapping */ - struct page_pool *page_pool; struct fbnic_dev *fbd; u16 v_idx; @@ -139,6 +156,8 @@ struct fbnic_napi_vector { struct fbnic_q_triad qt[]; }; +extern const struct netdev_queue_mgmt_ops fbnic_queue_mgmt_ops; + netdev_tx_t fbnic_xmit_frame(struct sk_buff *skb, struct net_device *dev); netdev_features_t fbnic_features_check(struct sk_buff *skb, struct net_device *dev, diff --git a/drivers/net/ethernet/microchip/lan865x/lan865x.c b/drivers/net/ethernet/microchip/lan865x/lan865x.c index 84c41f193561..0277d9737369 100644 --- a/drivers/net/ethernet/microchip/lan865x/lan865x.c +++ b/drivers/net/ethernet/microchip/lan865x/lan865x.c @@ -326,6 +326,8 @@ static const struct net_device_ops lan865x_netdev_ops = { .ndo_start_xmit = lan865x_send_packet, .ndo_set_rx_mode = lan865x_set_multicast_list, .ndo_set_mac_address = lan865x_set_mac_address, + .ndo_validate_addr = eth_validate_addr, + .ndo_eth_ioctl = phy_do_ioctl_running, }; static int lan865x_probe(struct spi_device *spi) @@ -423,13 +425,16 @@ static void lan865x_remove(struct spi_device *spi) free_netdev(priv->netdev); } -static const struct spi_device_id spidev_spi_ids[] = { +static const struct spi_device_id lan865x_ids[] = { { .name = "lan8650" }, + { .name = "lan8651" }, {}, }; +MODULE_DEVICE_TABLE(spi, lan865x_ids); static const struct of_device_id lan865x_dt_ids[] = { { .compatible = "microchip,lan8650" }, + { .compatible = "microchip,lan8651" }, { /* Sentinel */ } }; MODULE_DEVICE_TABLE(of, lan865x_dt_ids); @@ -441,7 +446,7 @@ static struct spi_driver lan865x_driver = { }, .probe = lan865x_probe, .remove = lan865x_remove, - .id_table = spidev_spi_ids, + .id_table = lan865x_ids, }; module_spi_driver(lan865x_driver); diff --git a/drivers/net/ethernet/microchip/sparx5/Kconfig b/drivers/net/ethernet/microchip/sparx5/Kconfig index 35e1c0cf345e..a4d6706590d2 100644 --- a/drivers/net/ethernet/microchip/sparx5/Kconfig +++ b/drivers/net/ethernet/microchip/sparx5/Kconfig @@ -3,7 +3,7 @@ config SPARX5_SWITCH depends on NET_SWITCHDEV depends on HAS_IOMEM depends on OF - depends on ARCH_SPARX5 || COMPILE_TEST + depends on ARCH_SPARX5 || ARCH_LAN969X || COMPILE_TEST depends on PTP_1588_CLOCK_OPTIONAL depends on BRIDGE || BRIDGE=n select PHYLINK diff --git a/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c index 832f4ae57c83..049541eeaae0 100644 --- a/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c +++ b/drivers/net/ethernet/microchip/sparx5/sparx5_ethtool.c @@ -1212,6 +1212,22 @@ static int sparx5_get_ts_info(struct net_device *dev, return 0; } +static void sparx5_get_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *pause) +{ + struct sparx5_port *port = netdev_priv(dev); + + phylink_ethtool_get_pauseparam(port->phylink, pause); +} + +static int sparx5_set_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *pause) +{ + struct sparx5_port *port = netdev_priv(dev); + + return phylink_ethtool_set_pauseparam(port->phylink, pause); +} + const struct ethtool_ops sparx5_ethtool_ops = { .get_sset_count = sparx5_get_sset_count, .get_strings = sparx5_get_sset_strings, @@ -1224,6 +1240,8 @@ const struct ethtool_ops sparx5_ethtool_ops = { .get_eth_ctrl_stats = sparx5_get_eth_mac_ctrl_stats, .get_rmon_stats = sparx5_get_eth_rmon_stats, .get_ts_info = sparx5_get_ts_info, + .get_pauseparam = sparx5_get_pauseparam, + .set_pauseparam = sparx5_set_pauseparam, }; int sparx_stats_init(struct sparx5 *sparx5) diff --git a/drivers/net/ethernet/microsoft/mana/hw_channel.c b/drivers/net/ethernet/microsoft/mana/hw_channel.c index ef072e24c46d..ada6c78a2bef 100644 --- a/drivers/net/ethernet/microsoft/mana/hw_channel.c +++ b/drivers/net/ethernet/microsoft/mana/hw_channel.c @@ -881,7 +881,12 @@ int mana_hwc_send_request(struct hw_channel_context *hwc, u32 req_len, if (!wait_for_completion_timeout(&ctx->comp_event, (msecs_to_jiffies(hwc->hwc_timeout)))) { if (hwc->hwc_timeout != 0) - dev_err(hwc->dev, "HWC: Request timed out!\n"); + dev_err(hwc->dev, "HWC: Request timed out: %u ms\n", + hwc->hwc_timeout); + + /* Reduce further waiting if HWC no response */ + if (hwc->hwc_timeout > 1) + hwc->hwc_timeout = 1; err = -ETIMEDOUT; goto out; diff --git a/drivers/net/ethernet/microsoft/mana/mana_bpf.c b/drivers/net/ethernet/microsoft/mana/mana_bpf.c index d30721d4516f..7697c9b52ed3 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_bpf.c +++ b/drivers/net/ethernet/microsoft/mana/mana_bpf.c @@ -174,6 +174,7 @@ static int mana_xdp_set(struct net_device *ndev, struct bpf_prog *prog, struct mana_port_context *apc = netdev_priv(ndev); struct bpf_prog *old_prog; struct gdma_context *gc; + int err; gc = apc->ac->gdma_dev->gdma_context; @@ -195,11 +196,45 @@ static int mana_xdp_set(struct net_device *ndev, struct bpf_prog *prog, */ apc->bpf_prog = prog; - if (old_prog) - bpf_prog_put(old_prog); + if (apc->port_is_up) { + /* Re-create rxq's after xdp prog was loaded or unloaded. + * Ex: re create rxq's to switch from full pages to smaller + * size page fragments when xdp prog is unloaded and + * vice-versa. + */ + + /* Pre-allocate buffers to prevent failure in mana_attach */ + err = mana_pre_alloc_rxbufs(apc, ndev->mtu, apc->num_queues); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "XDP: Insufficient memory for tx/rx re-config"); + return err; + } + + err = mana_detach(ndev, false); + if (err) { + netdev_err(ndev, + "mana_detach failed at xdp set: %d\n", err); + NL_SET_ERR_MSG_MOD(extack, + "XDP: Re-config failed at detach"); + goto err_dealloc_rxbuffs; + } + + err = mana_attach(ndev); + if (err) { + netdev_err(ndev, + "mana_attach failed at xdp set: %d\n", err); + NL_SET_ERR_MSG_MOD(extack, + "XDP: Re-config failed at attach"); + goto err_dealloc_rxbuffs; + } - if (apc->port_is_up) mana_chn_setxdp(apc, prog); + mana_pre_dealloc_rxbufs(apc); + } + + if (old_prog) + bpf_prog_put(old_prog); if (prog) ndev->max_mtu = MANA_XDP_MTU_MAX; @@ -207,6 +242,11 @@ static int mana_xdp_set(struct net_device *ndev, struct bpf_prog *prog, ndev->max_mtu = gc->adapter_mtu - ETH_HLEN; return 0; + +err_dealloc_rxbuffs: + apc->bpf_prog = old_prog; + mana_pre_dealloc_rxbufs(apc); + return err; } int mana_bpf(struct net_device *ndev, struct netdev_bpf *bpf) diff --git a/drivers/net/ethernet/microsoft/mana/mana_en.c b/drivers/net/ethernet/microsoft/mana/mana_en.c index 550843e2164b..0142fd98392c 100644 --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -57,6 +57,15 @@ static bool mana_en_need_log(struct mana_port_context *apc, int err) return true; } +static void mana_put_rx_page(struct mana_rxq *rxq, struct page *page, + bool from_pool) +{ + if (from_pool) + page_pool_put_full_page(rxq->page_pool, page, false); + else + put_page(page); +} + /* Microsoft Azure Network Adapter (MANA) functions */ static int mana_open(struct net_device *ndev) @@ -630,21 +639,40 @@ static void *mana_get_rxbuf_pre(struct mana_rxq *rxq, dma_addr_t *da) } /* Get RX buffer's data size, alloc size, XDP headroom based on MTU */ -static void mana_get_rxbuf_cfg(int mtu, u32 *datasize, u32 *alloc_size, - u32 *headroom) +static void mana_get_rxbuf_cfg(struct mana_port_context *apc, + int mtu, u32 *datasize, u32 *alloc_size, + u32 *headroom, u32 *frag_count) { - if (mtu > MANA_XDP_MTU_MAX) - *headroom = 0; /* no support for XDP */ - else - *headroom = XDP_PACKET_HEADROOM; + u32 len, buf_size; - *alloc_size = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + *headroom); + /* Calculate datasize first (consistent across all cases) */ + *datasize = mtu + ETH_HLEN; - /* Using page pool in this case, so alloc_size is PAGE_SIZE */ - if (*alloc_size < PAGE_SIZE) - *alloc_size = PAGE_SIZE; + /* For xdp and jumbo frames make sure only one packet fits per page */ + if (mtu + MANA_RXBUF_PAD > PAGE_SIZE / 2 || mana_xdp_get(apc)) { + if (mana_xdp_get(apc)) { + *headroom = XDP_PACKET_HEADROOM; + *alloc_size = PAGE_SIZE; + } else { + *headroom = 0; /* no support for XDP */ + *alloc_size = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + + *headroom); + } - *datasize = mtu + ETH_HLEN; + *frag_count = 1; + return; + } + + /* Standard MTU case - optimize for multiple packets per page */ + *headroom = 0; + + /* Calculate base buffer size needed */ + len = SKB_DATA_ALIGN(mtu + MANA_RXBUF_PAD + *headroom); + buf_size = ALIGN(len, MANA_RX_FRAG_ALIGNMENT); + + /* Calculate how many packets can fit in a page */ + *frag_count = PAGE_SIZE / buf_size; + *alloc_size = buf_size; } int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu, int num_queues) @@ -656,8 +684,9 @@ int mana_pre_alloc_rxbufs(struct mana_port_context *mpc, int new_mtu, int num_qu void *va; int i; - mana_get_rxbuf_cfg(new_mtu, &mpc->rxbpre_datasize, - &mpc->rxbpre_alloc_size, &mpc->rxbpre_headroom); + mana_get_rxbuf_cfg(mpc, new_mtu, &mpc->rxbpre_datasize, + &mpc->rxbpre_alloc_size, &mpc->rxbpre_headroom, + &mpc->rxbpre_frag_count); dev = mpc->ac->gdma_dev->gdma_context->dev; @@ -1842,8 +1871,11 @@ drop_xdp: drop: if (from_pool) { - page_pool_recycle_direct(rxq->page_pool, - virt_to_head_page(buf_va)); + if (rxq->frag_count == 1) + page_pool_recycle_direct(rxq->page_pool, + virt_to_head_page(buf_va)); + else + page_pool_free_va(rxq->page_pool, buf_va, true); } else { WARN_ON_ONCE(rxq->xdp_save_va); /* Save for reuse */ @@ -1859,33 +1891,46 @@ static void *mana_get_rxfrag(struct mana_rxq *rxq, struct device *dev, dma_addr_t *da, bool *from_pool) { struct page *page; + u32 offset; void *va; - *from_pool = false; - /* Reuse XDP dropped page if available */ - if (rxq->xdp_save_va) { - va = rxq->xdp_save_va; - rxq->xdp_save_va = NULL; - } else { - page = page_pool_dev_alloc_pages(rxq->page_pool); - if (!page) + /* Don't use fragments for jumbo frames or XDP where it's 1 fragment + * per page. + */ + if (rxq->frag_count == 1) { + /* Reuse XDP dropped page if available */ + if (rxq->xdp_save_va) { + va = rxq->xdp_save_va; + page = virt_to_head_page(va); + rxq->xdp_save_va = NULL; + } else { + page = page_pool_dev_alloc_pages(rxq->page_pool); + if (!page) + return NULL; + + *from_pool = true; + va = page_to_virt(page); + } + + *da = dma_map_single(dev, va + rxq->headroom, rxq->datasize, + DMA_FROM_DEVICE); + if (dma_mapping_error(dev, *da)) { + mana_put_rx_page(rxq, page, *from_pool); return NULL; + } - *from_pool = true; - va = page_to_virt(page); + return va; } - *da = dma_map_single(dev, va + rxq->headroom, rxq->datasize, - DMA_FROM_DEVICE); - if (dma_mapping_error(dev, *da)) { - if (*from_pool) - page_pool_put_full_page(rxq->page_pool, page, false); - else - put_page(virt_to_head_page(va)); - + page = page_pool_dev_alloc_frag(rxq->page_pool, &offset, + rxq->alloc_size); + if (!page) return NULL; - } + + va = page_to_virt(page) + offset; + *da = page_pool_get_dma_addr(page) + offset + rxq->headroom; + *from_pool = true; return va; } @@ -1902,9 +1947,9 @@ static void mana_refill_rx_oob(struct device *dev, struct mana_rxq *rxq, va = mana_get_rxfrag(rxq, dev, &da, &from_pool); if (!va) return; - - dma_unmap_single(dev, rxoob->sgl[0].address, rxq->datasize, - DMA_FROM_DEVICE); + if (!rxoob->from_pool || rxq->frag_count == 1) + dma_unmap_single(dev, rxoob->sgl[0].address, rxq->datasize, + DMA_FROM_DEVICE); *old_buf = rxoob->buf_va; *old_fp = rxoob->from_pool; @@ -2100,10 +2145,8 @@ static void mana_destroy_txq(struct mana_port_context *apc) napi = &apc->tx_qp[i].tx_cq.napi; if (apc->tx_qp[i].txq.napi_initialized) { napi_synchronize(napi); - netdev_lock_ops_to_full(napi->dev); napi_disable_locked(napi); netif_napi_del_locked(napi); - netdev_unlock_full_to_ops(napi->dev); apc->tx_qp[i].txq.napi_initialized = false; } mana_destroy_wq_obj(apc, GDMA_SQ, apc->tx_qp[i].tx_object); @@ -2256,10 +2299,8 @@ static int mana_create_txq(struct mana_port_context *apc, mana_create_txq_debugfs(apc, i); set_bit(NAPI_STATE_NO_BUSY_POLL, &cq->napi.state); - netdev_lock_ops_to_full(net); netif_napi_add_locked(net, &cq->napi, mana_poll); napi_enable_locked(&cq->napi); - netdev_unlock_full_to_ops(net); txq->napi_initialized = true; mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); @@ -2295,10 +2336,8 @@ static void mana_destroy_rxq(struct mana_port_context *apc, if (napi_initialized) { napi_synchronize(napi); - netdev_lock_ops_to_full(napi->dev); napi_disable_locked(napi); netif_napi_del_locked(napi); - netdev_unlock_full_to_ops(napi->dev); } xdp_rxq_info_unreg(&rxq->xdp_rxq); @@ -2315,15 +2354,15 @@ static void mana_destroy_rxq(struct mana_port_context *apc, if (!rx_oob->buf_va) continue; - dma_unmap_single(dev, rx_oob->sgl[0].address, - rx_oob->sgl[0].size, DMA_FROM_DEVICE); - page = virt_to_head_page(rx_oob->buf_va); - if (rx_oob->from_pool) - page_pool_put_full_page(rxq->page_pool, page, false); - else - put_page(page); + if (rxq->frag_count == 1 || !rx_oob->from_pool) { + dma_unmap_single(dev, rx_oob->sgl[0].address, + rx_oob->sgl[0].size, DMA_FROM_DEVICE); + mana_put_rx_page(rxq, page, rx_oob->from_pool); + } else { + page_pool_free_va(rxq->page_pool, rx_oob->buf_va, true); + } rx_oob->buf_va = NULL; } @@ -2429,11 +2468,22 @@ static int mana_create_page_pool(struct mana_rxq *rxq, struct gdma_context *gc) struct page_pool_params pprm = {}; int ret; - pprm.pool_size = mpc->rx_queue_size; + pprm.pool_size = mpc->rx_queue_size / rxq->frag_count + 1; pprm.nid = gc->numa_node; pprm.napi = &rxq->rx_cq.napi; pprm.netdev = rxq->ndev; pprm.order = get_order(rxq->alloc_size); + pprm.queue_idx = rxq->rxq_idx; + pprm.dev = gc->dev; + + /* Let the page pool do the dma map when page sharing with multiple + * fragments enabled for rx buffers. + */ + if (rxq->frag_count > 1) { + pprm.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV; + pprm.max_len = PAGE_SIZE; + pprm.dma_dir = DMA_FROM_DEVICE; + } rxq->page_pool = page_pool_create(&pprm); @@ -2472,9 +2522,8 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, rxq->rxq_idx = rxq_idx; rxq->rxobj = INVALID_MANA_HANDLE; - mana_get_rxbuf_cfg(ndev->mtu, &rxq->datasize, &rxq->alloc_size, - &rxq->headroom); - + mana_get_rxbuf_cfg(apc, ndev->mtu, &rxq->datasize, &rxq->alloc_size, + &rxq->headroom, &rxq->frag_count); /* Create page pool for RX queue */ err = mana_create_page_pool(rxq, gc); if (err) { @@ -2549,18 +2598,14 @@ static struct mana_rxq *mana_create_rxq(struct mana_port_context *apc, gc->cq_table[cq->gdma_id] = cq->gdma_cq; - netdev_lock_ops_to_full(ndev); netif_napi_add_weight_locked(ndev, &cq->napi, mana_poll, 1); - netdev_unlock_full_to_ops(ndev); WARN_ON(xdp_rxq_info_reg(&rxq->xdp_rxq, ndev, rxq_idx, cq->napi.napi_id)); WARN_ON(xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq, MEM_TYPE_PAGE_POOL, rxq->page_pool)); - netdev_lock_ops_to_full(ndev); napi_enable_locked(&cq->napi); - netdev_unlock_full_to_ops(ndev); mana_gd_ring_cq(cq->gdma_cq, SET_ARM_BIT); out: diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c index 56d5464222d9..cdbf82affa7b 100644 --- a/drivers/net/ethernet/natsemi/ns83820.c +++ b/drivers/net/ethernet/natsemi/ns83820.c @@ -820,7 +820,7 @@ static void rx_irq(struct net_device *ndev) struct ns83820 *dev = PRIV(ndev); struct rx_info *info = &dev->rx_info; unsigned next_rx; - int rx_rc, len; + int len; u32 cmdsts; __le32 *desc; unsigned long flags; @@ -881,8 +881,10 @@ static void rx_irq(struct net_device *ndev) if (likely(CMDSTS_OK & cmdsts)) { #endif skb_put(skb, len); - if (unlikely(!skb)) + if (unlikely(!skb)) { + ndev->stats.rx_dropped++; goto netdev_mangle_me_harder_failed; + } if (cmdsts & CMDSTS_DEST_MULTI) ndev->stats.multicast++; ndev->stats.rx_packets++; @@ -901,15 +903,12 @@ static void rx_irq(struct net_device *ndev) __vlan_hwaccel_put_tag(skb, htons(ETH_P_IPV6), tag); } #endif - rx_rc = netif_rx(skb); - if (NET_RX_DROP == rx_rc) { -netdev_mangle_me_harder_failed: - ndev->stats.rx_dropped++; - } + netif_rx(skb); } else { dev_kfree_skb_irq(skb); } +netdev_mangle_me_harder_failed: nr++; next_rx = info->next_rx; desc = info->descs + (DESC_SIZE * next_rx); diff --git a/drivers/net/ethernet/netronome/nfp/crypto/tls.c b/drivers/net/ethernet/netronome/nfp/crypto/tls.c index f80f1a6953fa..f252ecdcd2cd 100644 --- a/drivers/net/ethernet/netronome/nfp/crypto/tls.c +++ b/drivers/net/ethernet/netronome/nfp/crypto/tls.c @@ -495,14 +495,13 @@ int nfp_net_tls_rx_resync_req(struct net_device *netdev, switch (ipv6h->version) { case 4: - sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, - iph->saddr, th->source, iph->daddr, - th->dest, netdev->ifindex); + sk = inet_lookup_established(net, iph->saddr, th->source, + iph->daddr, th->dest, + netdev->ifindex); break; #if IS_ENABLED(CONFIG_IPV6) case 6: - sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, - &ipv6h->saddr, th->source, + sk = __inet6_lookup_established(net, &ipv6h->saddr, th->source, &ipv6h->daddr, ntohs(th->dest), netdev->ifindex, 0); break; diff --git a/drivers/net/ethernet/netronome/nfp/flower/metadata.c b/drivers/net/ethernet/netronome/nfp/flower/metadata.c index 80e4675582bf..dde60c4572fa 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/metadata.c +++ b/drivers/net/ethernet/netronome/nfp/flower/metadata.c @@ -564,8 +564,8 @@ int nfp_flower_metadata_init(struct nfp_app *app, u64 host_ctx_count, /* Init ring buffer and unallocated stats_ids. */ priv->stats_ids.free_list.buf = - vmalloc(array_size(NFP_FL_STATS_ELEM_RS, - priv->stats_ring_size)); + vmalloc_array(priv->stats_ring_size, + NFP_FL_STATS_ELEM_RS); if (!priv->stats_ids.free_list.buf) goto err_free_last_used; diff --git a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c index 08086eb76996..91a227929a5f 100644 --- a/drivers/net/ethernet/netronome/nfp/nfd3/dp.c +++ b/drivers/net/ethernet/netronome/nfp/nfd3/dp.c @@ -1169,14 +1169,10 @@ int nfp_nfd3_poll(struct napi_struct *napi, int budget) if (r_vec->nfp_net->rx_coalesce_adapt_on && r_vec->rx_ring) { struct dim_sample dim_sample = {}; - unsigned int start; u64 pkts, bytes; - do { - start = u64_stats_fetch_begin(&r_vec->rx_sync); - pkts = r_vec->rx_pkts; - bytes = r_vec->rx_bytes; - } while (u64_stats_fetch_retry(&r_vec->rx_sync, start)); + pkts = r_vec->rx_pkts; + bytes = r_vec->rx_bytes; dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); net_dim(&r_vec->rx_dim, &dim_sample); @@ -1184,14 +1180,10 @@ int nfp_nfd3_poll(struct napi_struct *napi, int budget) if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) { struct dim_sample dim_sample = {}; - unsigned int start; u64 pkts, bytes; - do { - start = u64_stats_fetch_begin(&r_vec->tx_sync); - pkts = r_vec->tx_pkts; - bytes = r_vec->tx_bytes; - } while (u64_stats_fetch_retry(&r_vec->tx_sync, start)); + pkts = r_vec->tx_pkts; + bytes = r_vec->tx_bytes; dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); net_dim(&r_vec->tx_dim, &dim_sample); diff --git a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c index ab3cd06ed63e..ee0db3d5fd66 100644 --- a/drivers/net/ethernet/netronome/nfp/nfdk/dp.c +++ b/drivers/net/ethernet/netronome/nfp/nfdk/dp.c @@ -1279,14 +1279,10 @@ int nfp_nfdk_poll(struct napi_struct *napi, int budget) if (r_vec->nfp_net->rx_coalesce_adapt_on && r_vec->rx_ring) { struct dim_sample dim_sample = {}; - unsigned int start; u64 pkts, bytes; - do { - start = u64_stats_fetch_begin(&r_vec->rx_sync); - pkts = r_vec->rx_pkts; - bytes = r_vec->rx_bytes; - } while (u64_stats_fetch_retry(&r_vec->rx_sync, start)); + pkts = r_vec->rx_pkts; + bytes = r_vec->rx_bytes; dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); net_dim(&r_vec->rx_dim, &dim_sample); @@ -1294,14 +1290,10 @@ int nfp_nfdk_poll(struct napi_struct *napi, int budget) if (r_vec->nfp_net->tx_coalesce_adapt_on && r_vec->tx_ring) { struct dim_sample dim_sample = {}; - unsigned int start; u64 pkts, bytes; - do { - start = u64_stats_fetch_begin(&r_vec->tx_sync); - pkts = r_vec->tx_pkts; - bytes = r_vec->tx_bytes; - } while (u64_stats_fetch_retry(&r_vec->tx_sync, start)); + pkts = r_vec->tx_pkts; + bytes = r_vec->tx_bytes; dim_update_sample(r_vec->event_ctr, pkts, bytes, &dim_sample); net_dim(&r_vec->tx_dim, &dim_sample); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c index 71301dbd8fb5..48390b2fd44d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c @@ -797,7 +797,7 @@ static int nfp_pci_probe(struct pci_dev *pdev, pf->pdev = pdev; pf->dev_info = dev_info; - pf->wq = alloc_workqueue("nfp-%s", 0, 2, pci_name(pdev)); + pf->wq = alloc_workqueue("nfp-%s", WQ_PERCPU, 2, pci_name(pdev)); if (!pf->wq) { err = -ENOMEM; goto err_pci_priv_unset; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index a36215195923..16c828dd5c1a 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -1788,7 +1788,7 @@ static u32 nfp_net_get_rxfh_key_size(struct net_device *netdev) struct nfp_net *nn = netdev_priv(netdev); if (!(nn->cap & NFP_NET_CFG_CTRL_RSS_ANY)) - return -EOPNOTSUPP; + return 0; return nfp_net_rss_key_sz(nn); } diff --git a/drivers/net/ethernet/oa_tc6.c b/drivers/net/ethernet/oa_tc6.c index db200e4ec284..91a906a7918a 100644 --- a/drivers/net/ethernet/oa_tc6.c +++ b/drivers/net/ethernet/oa_tc6.c @@ -1249,7 +1249,8 @@ struct oa_tc6 *oa_tc6_init(struct spi_device *spi, struct net_device *netdev) /* Set the SPI controller to pump at realtime priority */ tc6->spi->rt = true; - spi_setup(tc6->spi); + if (spi_setup(tc6->spi) < 0) + return NULL; tc6->spi_ctrl_tx_buf = devm_kzalloc(&tc6->spi->dev, OA_TC6_CTRL_SPI_BUF_SIZE, diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c index 92f30ff2d631..2d9efadb5d2a 100644 --- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c +++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c @@ -978,7 +978,7 @@ static int ionic_get_module_eeprom_by_page(struct net_device *netdev, { struct ionic_lif *lif = netdev_priv(netdev); struct ionic_dev *idev = &lif->ionic->idev; - u32 err = -EINVAL; + int err; u8 *src; if (!page_data->length) diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c index 9c3d3dd2f847..1f0cea3cae92 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_debug.c +++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c @@ -4462,10 +4462,11 @@ static enum dbg_status qed_protection_override_dump(struct qed_hwfn *p_hwfn, goto out; } - /* Add override window info to buffer */ + /* Add override window info to buffer, preventing buffer overflow */ override_window_dwords = - qed_rd(p_hwfn, p_ptt, GRC_REG_NUMBER_VALID_OVERRIDE_WINDOW) * - PROTECTION_OVERRIDE_ELEMENT_DWORDS; + min(qed_rd(p_hwfn, p_ptt, GRC_REG_NUMBER_VALID_OVERRIDE_WINDOW) * + PROTECTION_OVERRIDE_ELEMENT_DWORDS, + PROTECTION_OVERRIDE_DEPTH_DWORDS); if (override_window_dwords) { addr = BYTES_TO_DWORDS(GRC_REG_PROTECTION_OVERRIDE_WINDOW); offset += qed_grc_dump_addr_range(p_hwfn, diff --git a/drivers/net/ethernet/qlogic/qed/qed_devlink.c b/drivers/net/ethernet/qlogic/qed/qed_devlink.c index 1adc7fbb3f2f..94c5689b5abd 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_devlink.c +++ b/drivers/net/ethernet/qlogic/qed/qed_devlink.c @@ -87,20 +87,21 @@ qed_fw_fatal_reporter_recover(struct devlink_health_reporter *reporter, return 0; } +#define QED_REPORTER_FW_GRACEFUL_PERIOD 0 + static const struct devlink_health_reporter_ops qed_fw_fatal_reporter_ops = { .name = "fw_fatal", .recover = qed_fw_fatal_reporter_recover, .dump = qed_fw_fatal_reporter_dump, + .default_graceful_period = QED_REPORTER_FW_GRACEFUL_PERIOD, }; -#define QED_REPORTER_FW_GRACEFUL_PERIOD 0 - void qed_fw_reporters_create(struct devlink *devlink) { struct qed_devlink *dl = devlink_priv(devlink); - dl->fw_reporter = devlink_health_reporter_create(devlink, &qed_fw_fatal_reporter_ops, - QED_REPORTER_FW_GRACEFUL_PERIOD, dl); + dl->fw_reporter = devlink_health_reporter_create(devlink, + &qed_fw_fatal_reporter_ops, dl); if (IS_ERR(dl->fw_reporter)) { DP_NOTICE(dl->cdev, "Failed to create fw reporter, err = %ld\n", PTR_ERR(dl->fw_reporter)); diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 886061d7351a..d4685ad4b169 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1214,7 +1214,8 @@ static int qed_slowpath_wq_start(struct qed_dev *cdev) hwfn = &cdev->hwfns[i]; hwfn->slowpath_wq = alloc_workqueue("slowpath-%02x:%02x.%02x", - 0, 0, cdev->pdev->bus->number, + WQ_PERCPU, 0, + cdev->pdev->bus->number, PCI_SLOT(cdev->pdev->devfn), hwfn->abs_pf_id); diff --git a/drivers/net/ethernet/qlogic/qed/qed_ooo.c b/drivers/net/ethernet/qlogic/qed/qed_ooo.c index 5d725f59db24..8be567a6ad44 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ooo.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ooo.c @@ -183,9 +183,6 @@ void qed_ooo_release_connection_isles(struct qed_hwfn *p_hwfn, struct qed_ooo_buffer, list_entry); - if (!p_buffer) - break; - list_move_tail(&p_buffer->list_entry, &p_ooo_info->free_buffers_list); } @@ -218,9 +215,6 @@ void qed_ooo_release_all_isles(struct qed_hwfn *p_hwfn, struct qed_ooo_buffer, list_entry); - if (!p_buffer) - break; - list_move_tail(&p_buffer->list_entry, &p_ooo_info->free_buffers_list); } @@ -255,9 +249,6 @@ void qed_ooo_free(struct qed_hwfn *p_hwfn) p_buffer = list_first_entry(&p_ooo_info->free_buffers_list, struct qed_ooo_buffer, list_entry); - if (!p_buffer) - break; - list_del(&p_buffer->list_entry); dma_free_coherent(&p_hwfn->cdev->pdev->dev, p_buffer->rx_buffer_size, diff --git a/drivers/net/ethernet/qualcomm/Kconfig b/drivers/net/ethernet/qualcomm/Kconfig index a4434eb38950..ba7efb108637 100644 --- a/drivers/net/ethernet/qualcomm/Kconfig +++ b/drivers/net/ethernet/qualcomm/Kconfig @@ -60,6 +60,21 @@ config QCOM_EMAC low power, Receive-Side Scaling (RSS), and IEEE 1588-2008 Precision Clock Synchronization Protocol. +config QCOM_PPE + tristate "Qualcomm Technologies, Inc. PPE Ethernet support" + depends on COMMON_CLK && HAS_IOMEM && OF + depends on ARCH_QCOM || COMPILE_TEST + select REGMAP_MMIO + help + This driver supports the Qualcomm Technologies, Inc. packet + process engine (PPE) available with IPQ SoC. The PPE includes + the Ethernet MACs, Ethernet DMA (EDMA) and switch core that + supports L3 flow offload, L2 switch function, RSS and tunnel + offload. + + To compile this driver as a module, choose M here. The module + will be called qcom-ppe. + source "drivers/net/ethernet/qualcomm/rmnet/Kconfig" endif # NET_VENDOR_QUALCOMM diff --git a/drivers/net/ethernet/qualcomm/Makefile b/drivers/net/ethernet/qualcomm/Makefile index 9250976dd884..166a59aea363 100644 --- a/drivers/net/ethernet/qualcomm/Makefile +++ b/drivers/net/ethernet/qualcomm/Makefile @@ -11,4 +11,5 @@ qcauart-objs := qca_uart.o obj-y += emac/ +obj-$(CONFIG_QCOM_PPE) += ppe/ obj-$(CONFIG_RMNET) += rmnet/ diff --git a/drivers/net/ethernet/qualcomm/ppe/Makefile b/drivers/net/ethernet/qualcomm/ppe/Makefile new file mode 100644 index 000000000000..9e60b2400c16 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/ppe/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Makefile for the device driver of PPE (Packet Process Engine) in IPQ SoC +# + +obj-$(CONFIG_QCOM_PPE) += qcom-ppe.o +qcom-ppe-objs := ppe.o ppe_config.o ppe_debugfs.o diff --git a/drivers/net/ethernet/qualcomm/ppe/ppe.c b/drivers/net/ethernet/qualcomm/ppe/ppe.c new file mode 100644 index 000000000000..be747510d947 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/ppe/ppe.c @@ -0,0 +1,239 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. + */ + +/* PPE platform device probe, DTSI parser and PPE clock initializations. */ + +#include <linux/clk.h> +#include <linux/interconnect.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include <linux/reset.h> + +#include "ppe.h" +#include "ppe_config.h" +#include "ppe_debugfs.h" + +#define PPE_PORT_MAX 8 +#define PPE_CLK_RATE 353000000 + +/* ICC clocks for enabling PPE device. The avg_bw and peak_bw with value 0 + * will be updated by the clock rate of PPE. + */ +static const struct icc_bulk_data ppe_icc_data[] = { + { + .name = "ppe", + .avg_bw = 0, + .peak_bw = 0, + }, + { + .name = "ppe_cfg", + .avg_bw = 0, + .peak_bw = 0, + }, + { + .name = "qos_gen", + .avg_bw = 6000, + .peak_bw = 6000, + }, + { + .name = "timeout_ref", + .avg_bw = 6000, + .peak_bw = 6000, + }, + { + .name = "nssnoc_memnoc", + .avg_bw = 533333, + .peak_bw = 533333, + }, + { + .name = "memnoc_nssnoc", + .avg_bw = 533333, + .peak_bw = 533333, + }, + { + .name = "memnoc_nssnoc_1", + .avg_bw = 533333, + .peak_bw = 533333, + }, +}; + +static const struct regmap_range ppe_readable_ranges[] = { + regmap_reg_range(0x0, 0x1ff), /* Global */ + regmap_reg_range(0x400, 0x5ff), /* LPI CSR */ + regmap_reg_range(0x1000, 0x11ff), /* GMAC0 */ + regmap_reg_range(0x1200, 0x13ff), /* GMAC1 */ + regmap_reg_range(0x1400, 0x15ff), /* GMAC2 */ + regmap_reg_range(0x1600, 0x17ff), /* GMAC3 */ + regmap_reg_range(0x1800, 0x19ff), /* GMAC4 */ + regmap_reg_range(0x1a00, 0x1bff), /* GMAC5 */ + regmap_reg_range(0xb000, 0xefff), /* PRX CSR */ + regmap_reg_range(0xf000, 0x1efff), /* IPE */ + regmap_reg_range(0x20000, 0x5ffff), /* PTX CSR */ + regmap_reg_range(0x60000, 0x9ffff), /* IPE L2 CSR */ + regmap_reg_range(0xb0000, 0xeffff), /* IPO CSR */ + regmap_reg_range(0x100000, 0x17ffff), /* IPE PC */ + regmap_reg_range(0x180000, 0x1bffff), /* PRE IPO CSR */ + regmap_reg_range(0x1d0000, 0x1dffff), /* Tunnel parser */ + regmap_reg_range(0x1e0000, 0x1effff), /* Ingress parse */ + regmap_reg_range(0x200000, 0x2fffff), /* IPE L3 */ + regmap_reg_range(0x300000, 0x3fffff), /* IPE tunnel */ + regmap_reg_range(0x400000, 0x4fffff), /* Scheduler */ + regmap_reg_range(0x500000, 0x503fff), /* XGMAC0 */ + regmap_reg_range(0x504000, 0x507fff), /* XGMAC1 */ + regmap_reg_range(0x508000, 0x50bfff), /* XGMAC2 */ + regmap_reg_range(0x50c000, 0x50ffff), /* XGMAC3 */ + regmap_reg_range(0x510000, 0x513fff), /* XGMAC4 */ + regmap_reg_range(0x514000, 0x517fff), /* XGMAC5 */ + regmap_reg_range(0x600000, 0x6fffff), /* BM */ + regmap_reg_range(0x800000, 0x9fffff), /* QM */ + regmap_reg_range(0xb00000, 0xbef800), /* EDMA */ +}; + +static const struct regmap_access_table ppe_reg_table = { + .yes_ranges = ppe_readable_ranges, + .n_yes_ranges = ARRAY_SIZE(ppe_readable_ranges), +}; + +static const struct regmap_config regmap_config_ipq9574 = { + .reg_bits = 32, + .reg_stride = 4, + .val_bits = 32, + .rd_table = &ppe_reg_table, + .wr_table = &ppe_reg_table, + .max_register = 0xbef800, + .fast_io = true, +}; + +static int ppe_clock_init_and_reset(struct ppe_device *ppe_dev) +{ + unsigned long ppe_rate = ppe_dev->clk_rate; + struct device *dev = ppe_dev->dev; + struct reset_control *rstc; + struct clk_bulk_data *clks; + struct clk *clk; + int ret, i; + + for (i = 0; i < ppe_dev->num_icc_paths; i++) { + ppe_dev->icc_paths[i].name = ppe_icc_data[i].name; + ppe_dev->icc_paths[i].avg_bw = ppe_icc_data[i].avg_bw ? : + Bps_to_icc(ppe_rate); + + /* PPE does not have an explicit peak bandwidth requirement, + * so set the peak bandwidth to be equal to the average + * bandwidth. + */ + ppe_dev->icc_paths[i].peak_bw = ppe_icc_data[i].peak_bw ? : + Bps_to_icc(ppe_rate); + } + + ret = devm_of_icc_bulk_get(dev, ppe_dev->num_icc_paths, + ppe_dev->icc_paths); + if (ret) + return ret; + + ret = icc_bulk_set_bw(ppe_dev->num_icc_paths, ppe_dev->icc_paths); + if (ret) + return ret; + + /* The PPE clocks have a common parent clock. Setting the clock + * rate of "ppe" ensures the clock rate of all PPE clocks is + * configured to the same rate. + */ + clk = devm_clk_get(dev, "ppe"); + if (IS_ERR(clk)) + return PTR_ERR(clk); + + ret = clk_set_rate(clk, ppe_rate); + if (ret) + return ret; + + ret = devm_clk_bulk_get_all_enabled(dev, &clks); + if (ret < 0) + return ret; + + /* Reset the PPE. */ + rstc = devm_reset_control_get_exclusive(dev, NULL); + if (IS_ERR(rstc)) + return PTR_ERR(rstc); + + ret = reset_control_assert(rstc); + if (ret) + return ret; + + /* The delay 10 ms of assert is necessary for resetting PPE. */ + usleep_range(10000, 11000); + + return reset_control_deassert(rstc); +} + +static int qcom_ppe_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct ppe_device *ppe_dev; + void __iomem *base; + int ret, num_icc; + + num_icc = ARRAY_SIZE(ppe_icc_data); + ppe_dev = devm_kzalloc(dev, struct_size(ppe_dev, icc_paths, num_icc), + GFP_KERNEL); + if (!ppe_dev) + return -ENOMEM; + + base = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(base)) + return dev_err_probe(dev, PTR_ERR(base), "PPE ioremap failed\n"); + + ppe_dev->regmap = devm_regmap_init_mmio(dev, base, ®map_config_ipq9574); + if (IS_ERR(ppe_dev->regmap)) + return dev_err_probe(dev, PTR_ERR(ppe_dev->regmap), + "PPE initialize regmap failed\n"); + ppe_dev->dev = dev; + ppe_dev->clk_rate = PPE_CLK_RATE; + ppe_dev->num_ports = PPE_PORT_MAX; + ppe_dev->num_icc_paths = num_icc; + + ret = ppe_clock_init_and_reset(ppe_dev); + if (ret) + return dev_err_probe(dev, ret, "PPE clock config failed\n"); + + ret = ppe_hw_config(ppe_dev); + if (ret) + return dev_err_probe(dev, ret, "PPE HW config failed\n"); + + ppe_debugfs_setup(ppe_dev); + platform_set_drvdata(pdev, ppe_dev); + + return 0; +} + +static void qcom_ppe_remove(struct platform_device *pdev) +{ + struct ppe_device *ppe_dev; + + ppe_dev = platform_get_drvdata(pdev); + ppe_debugfs_teardown(ppe_dev); +} + +static const struct of_device_id qcom_ppe_of_match[] = { + { .compatible = "qcom,ipq9574-ppe" }, + {} +}; +MODULE_DEVICE_TABLE(of, qcom_ppe_of_match); + +static struct platform_driver qcom_ppe_driver = { + .driver = { + .name = "qcom_ppe", + .of_match_table = qcom_ppe_of_match, + }, + .probe = qcom_ppe_probe, + .remove = qcom_ppe_remove, +}; +module_platform_driver(qcom_ppe_driver); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Qualcomm Technologies, Inc. IPQ PPE driver"); diff --git a/drivers/net/ethernet/qualcomm/ppe/ppe.h b/drivers/net/ethernet/qualcomm/ppe/ppe.h new file mode 100644 index 000000000000..27458f0bc206 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/ppe/ppe.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. + */ + +#ifndef __PPE_H__ +#define __PPE_H__ + +#include <linux/compiler.h> +#include <linux/interconnect.h> + +struct device; +struct regmap; +struct dentry; + +/** + * struct ppe_device - PPE device private data. + * @dev: PPE device structure. + * @regmap: PPE register map. + * @clk_rate: PPE clock rate. + * @num_ports: Number of PPE ports. + * @debugfs_root: Debugfs root entry. + * @num_icc_paths: Number of interconnect paths. + * @icc_paths: Interconnect path array. + * + * PPE device is the instance of PPE hardware, which is used to + * configure PPE packet process modules such as BM (buffer management), + * QM (queue management), and scheduler. + */ +struct ppe_device { + struct device *dev; + struct regmap *regmap; + unsigned long clk_rate; + unsigned int num_ports; + struct dentry *debugfs_root; + unsigned int num_icc_paths; + struct icc_bulk_data icc_paths[] __counted_by(num_icc_paths); +}; +#endif diff --git a/drivers/net/ethernet/qualcomm/ppe/ppe_config.c b/drivers/net/ethernet/qualcomm/ppe/ppe_config.c new file mode 100644 index 000000000000..e9a0e22907a6 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/ppe/ppe_config.c @@ -0,0 +1,2034 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. + */ + +/* PPE HW initialization configs such as BM(buffer management), + * QM(queue management) and scheduler configs. + */ + +#include <linux/bitfield.h> +#include <linux/bitmap.h> +#include <linux/bits.h> +#include <linux/device.h> +#include <linux/regmap.h> + +#include "ppe.h" +#include "ppe_config.h" +#include "ppe_regs.h" + +#define PPE_QUEUE_SCH_PRI_NUM 8 + +/** + * struct ppe_bm_port_config - PPE BM port configuration. + * @port_id_start: The fist BM port ID to configure. + * @port_id_end: The last BM port ID to configure. + * @pre_alloc: BM port dedicated buffer number. + * @in_fly_buf: Buffer number for receiving the packet after pause frame sent. + * @ceil: Ceil to generate the back pressure. + * @weight: Weight value. + * @resume_offset: Resume offset from the threshold value. + * @resume_ceil: Ceil to resume from the back pressure state. + * @dynamic: Dynamic threshold used or not. + * + * The is for configuring the threshold that impacts the port + * flow control. + */ +struct ppe_bm_port_config { + unsigned int port_id_start; + unsigned int port_id_end; + unsigned int pre_alloc; + unsigned int in_fly_buf; + unsigned int ceil; + unsigned int weight; + unsigned int resume_offset; + unsigned int resume_ceil; + bool dynamic; +}; + +/** + * struct ppe_qm_queue_config - PPE queue config. + * @queue_start: PPE start of queue ID. + * @queue_end: PPE end of queue ID. + * @prealloc_buf: Queue dedicated buffer number. + * @ceil: Ceil to start drop packet from queue. + * @weight: Weight value. + * @resume_offset: Resume offset from the threshold. + * @dynamic: Threshold value is decided dynamically or statically. + * + * Queue configuration decides the threshold to drop packet from PPE + * hardware queue. + */ +struct ppe_qm_queue_config { + unsigned int queue_start; + unsigned int queue_end; + unsigned int prealloc_buf; + unsigned int ceil; + unsigned int weight; + unsigned int resume_offset; + bool dynamic; +}; + +/** + * enum ppe_scheduler_direction - PPE scheduler direction for packet. + * @PPE_SCH_INGRESS: Scheduler for the packet on ingress, + * @PPE_SCH_EGRESS: Scheduler for the packet on egress, + */ +enum ppe_scheduler_direction { + PPE_SCH_INGRESS = 0, + PPE_SCH_EGRESS = 1, +}; + +/** + * struct ppe_scheduler_bm_config - PPE arbitration for buffer config. + * @valid: Arbitration entry valid or not. + * @dir: Arbitration entry for egress or ingress. + * @port: Port ID to use arbitration entry. + * @backup_port_valid: Backup port valid or not. + * @backup_port: Backup port ID to use. + * + * Configure the scheduler settings for accessing and releasing the PPE buffers. + */ +struct ppe_scheduler_bm_config { + bool valid; + enum ppe_scheduler_direction dir; + unsigned int port; + bool backup_port_valid; + unsigned int backup_port; +}; + +/** + * struct ppe_scheduler_qm_config - PPE arbitration for scheduler config. + * @ensch_port_bmp: Port bit map for enqueue scheduler. + * @ensch_port: Port ID to enqueue scheduler. + * @desch_port: Port ID to dequeue scheduler. + * @desch_backup_port_valid: Dequeue for the backup port valid or not. + * @desch_backup_port: Backup port ID to dequeue scheduler. + * + * Configure the scheduler settings for enqueuing and dequeuing packets on + * the PPE port. + */ +struct ppe_scheduler_qm_config { + unsigned int ensch_port_bmp; + unsigned int ensch_port; + unsigned int desch_port; + bool desch_backup_port_valid; + unsigned int desch_backup_port; +}; + +/** + * struct ppe_scheduler_port_config - PPE port scheduler config. + * @port: Port ID to be scheduled. + * @flow_level: Scheduler flow level or not. + * @node_id: Node ID, for level 0, queue ID is used. + * @loop_num: Loop number of scheduler config. + * @pri_max: Max priority configured. + * @flow_id: Strict priority ID. + * @drr_node_id: Node ID for scheduler. + * + * PPE port scheduler configuration which decides the priority in the + * packet scheduler for the egress port. + */ +struct ppe_scheduler_port_config { + unsigned int port; + bool flow_level; + unsigned int node_id; + unsigned int loop_num; + unsigned int pri_max; + unsigned int flow_id; + unsigned int drr_node_id; +}; + +/** + * struct ppe_port_schedule_resource - PPE port scheduler resource. + * @ucastq_start: Unicast queue start ID. + * @ucastq_end: Unicast queue end ID. + * @mcastq_start: Multicast queue start ID. + * @mcastq_end: Multicast queue end ID. + * @flow_id_start: Flow start ID. + * @flow_id_end: Flow end ID. + * @l0node_start: Scheduler node start ID for queue level. + * @l0node_end: Scheduler node end ID for queue level. + * @l1node_start: Scheduler node start ID for flow level. + * @l1node_end: Scheduler node end ID for flow level. + * + * PPE scheduler resource allocated among the PPE ports. + */ +struct ppe_port_schedule_resource { + unsigned int ucastq_start; + unsigned int ucastq_end; + unsigned int mcastq_start; + unsigned int mcastq_end; + unsigned int flow_id_start; + unsigned int flow_id_end; + unsigned int l0node_start; + unsigned int l0node_end; + unsigned int l1node_start; + unsigned int l1node_end; +}; + +/* There are total 2048 buffers available in PPE, out of which some + * buffers are reserved for some specific purposes per PPE port. The + * rest of the pool of 1550 buffers are assigned to the general 'group0' + * which is shared among all ports of the PPE. + */ +static const int ipq9574_ppe_bm_group_config = 1550; + +/* The buffer configurations per PPE port. There are 15 BM ports and + * 4 BM groups supported by PPE. BM port (0-7) is for EDMA port 0, + * BM port (8-13) is for PPE physical port 1-6 and BM port 14 is for + * EIP port. + */ +static const struct ppe_bm_port_config ipq9574_ppe_bm_port_config[] = { + { + /* Buffer configuration for the BM port ID 0 of EDMA. */ + .port_id_start = 0, + .port_id_end = 0, + .pre_alloc = 0, + .in_fly_buf = 100, + .ceil = 1146, + .weight = 7, + .resume_offset = 8, + .resume_ceil = 0, + .dynamic = true, + }, + { + /* Buffer configuration for the BM port ID 1-7 of EDMA. */ + .port_id_start = 1, + .port_id_end = 7, + .pre_alloc = 0, + .in_fly_buf = 100, + .ceil = 250, + .weight = 4, + .resume_offset = 36, + .resume_ceil = 0, + .dynamic = true, + }, + { + /* Buffer configuration for the BM port ID 8-13 of PPE ports. */ + .port_id_start = 8, + .port_id_end = 13, + .pre_alloc = 0, + .in_fly_buf = 128, + .ceil = 250, + .weight = 4, + .resume_offset = 36, + .resume_ceil = 0, + .dynamic = true, + }, + { + /* Buffer configuration for the BM port ID 14 of EIP. */ + .port_id_start = 14, + .port_id_end = 14, + .pre_alloc = 0, + .in_fly_buf = 40, + .ceil = 250, + .weight = 4, + .resume_offset = 36, + .resume_ceil = 0, + .dynamic = true, + }, +}; + +/* QM fetches the packet from PPE buffer management for transmitting the + * packet out. The QM group configuration limits the total number of buffers + * enqueued by all PPE hardware queues. + * There are total 2048 buffers available, out of which some buffers are + * dedicated to hardware exception handlers. The remaining buffers are + * assigned to the general 'group0', which is the group assigned to all + * queues by default. + */ +static const int ipq9574_ppe_qm_group_config = 2000; + +/* Default QM settings for unicast and multicast queues for IPQ9754. */ +static const struct ppe_qm_queue_config ipq9574_ppe_qm_queue_config[] = { + { + /* QM settings for unicast queues 0 to 255. */ + .queue_start = 0, + .queue_end = 255, + .prealloc_buf = 0, + .ceil = 1200, + .weight = 7, + .resume_offset = 36, + .dynamic = true, + }, + { + /* QM settings for multicast queues 256 to 299. */ + .queue_start = 256, + .queue_end = 299, + .prealloc_buf = 0, + .ceil = 250, + .weight = 0, + .resume_offset = 36, + .dynamic = false, + }, +}; + +/* PPE scheduler configuration for BM includes multiple entries. Each entry + * indicates the primary port to be assigned the buffers for the ingress or + * to release the buffers for the egress. Backup port ID will be used when + * the primary port ID is down. + */ +static const struct ppe_scheduler_bm_config ipq9574_ppe_sch_bm_config[] = { + {true, PPE_SCH_INGRESS, 0, false, 0}, + {true, PPE_SCH_EGRESS, 0, false, 0}, + {true, PPE_SCH_INGRESS, 5, false, 0}, + {true, PPE_SCH_EGRESS, 5, false, 0}, + {true, PPE_SCH_INGRESS, 6, false, 0}, + {true, PPE_SCH_EGRESS, 6, false, 0}, + {true, PPE_SCH_INGRESS, 1, false, 0}, + {true, PPE_SCH_EGRESS, 1, false, 0}, + {true, PPE_SCH_INGRESS, 0, false, 0}, + {true, PPE_SCH_EGRESS, 0, false, 0}, + {true, PPE_SCH_INGRESS, 5, false, 0}, + {true, PPE_SCH_EGRESS, 5, false, 0}, + {true, PPE_SCH_INGRESS, 6, false, 0}, + {true, PPE_SCH_EGRESS, 6, false, 0}, + {true, PPE_SCH_INGRESS, 7, false, 0}, + {true, PPE_SCH_EGRESS, 7, false, 0}, + {true, PPE_SCH_INGRESS, 0, false, 0}, + {true, PPE_SCH_EGRESS, 0, false, 0}, + {true, PPE_SCH_INGRESS, 1, false, 0}, + {true, PPE_SCH_EGRESS, 1, false, 0}, + {true, PPE_SCH_INGRESS, 5, false, 0}, + {true, PPE_SCH_EGRESS, 5, false, 0}, + {true, PPE_SCH_INGRESS, 6, false, 0}, + {true, PPE_SCH_EGRESS, 6, false, 0}, + {true, PPE_SCH_INGRESS, 2, false, 0}, + {true, PPE_SCH_EGRESS, 2, false, 0}, + {true, PPE_SCH_INGRESS, 0, false, 0}, + {true, PPE_SCH_EGRESS, 0, false, 0}, + {true, PPE_SCH_INGRESS, 5, false, 0}, + {true, PPE_SCH_EGRESS, 5, false, 0}, + {true, PPE_SCH_INGRESS, 6, false, 0}, + {true, PPE_SCH_EGRESS, 6, false, 0}, + {true, PPE_SCH_INGRESS, 1, false, 0}, + {true, PPE_SCH_EGRESS, 1, false, 0}, + {true, PPE_SCH_INGRESS, 3, false, 0}, + {true, PPE_SCH_EGRESS, 3, false, 0}, + {true, PPE_SCH_INGRESS, 0, false, 0}, + {true, PPE_SCH_EGRESS, 0, false, 0}, + {true, PPE_SCH_INGRESS, 5, false, 0}, + {true, PPE_SCH_EGRESS, 5, false, 0}, + {true, PPE_SCH_INGRESS, 6, false, 0}, + {true, PPE_SCH_EGRESS, 6, false, 0}, + {true, PPE_SCH_INGRESS, 7, false, 0}, + {true, PPE_SCH_EGRESS, 7, false, 0}, + {true, PPE_SCH_INGRESS, 0, false, 0}, + {true, PPE_SCH_EGRESS, 0, false, 0}, + {true, PPE_SCH_INGRESS, 1, false, 0}, + {true, PPE_SCH_EGRESS, 1, false, 0}, + {true, PPE_SCH_INGRESS, 5, false, 0}, + {true, PPE_SCH_EGRESS, 5, false, 0}, + {true, PPE_SCH_INGRESS, 6, false, 0}, + {true, PPE_SCH_EGRESS, 6, false, 0}, + {true, PPE_SCH_INGRESS, 4, false, 0}, + {true, PPE_SCH_EGRESS, 4, false, 0}, + {true, PPE_SCH_INGRESS, 0, false, 0}, + {true, PPE_SCH_EGRESS, 0, false, 0}, + {true, PPE_SCH_INGRESS, 5, false, 0}, + {true, PPE_SCH_EGRESS, 5, false, 0}, + {true, PPE_SCH_INGRESS, 6, false, 0}, + {true, PPE_SCH_EGRESS, 6, false, 0}, + {true, PPE_SCH_INGRESS, 1, false, 0}, + {true, PPE_SCH_EGRESS, 1, false, 0}, + {true, PPE_SCH_INGRESS, 0, false, 0}, + {true, PPE_SCH_EGRESS, 0, false, 0}, + {true, PPE_SCH_INGRESS, 5, false, 0}, + {true, PPE_SCH_EGRESS, 5, false, 0}, + {true, PPE_SCH_INGRESS, 6, false, 0}, + {true, PPE_SCH_EGRESS, 6, false, 0}, + {true, PPE_SCH_INGRESS, 2, false, 0}, + {true, PPE_SCH_EGRESS, 2, false, 0}, + {true, PPE_SCH_INGRESS, 0, false, 0}, + {true, PPE_SCH_EGRESS, 0, false, 0}, + {true, PPE_SCH_INGRESS, 7, false, 0}, + {true, PPE_SCH_EGRESS, 7, false, 0}, + {true, PPE_SCH_INGRESS, 5, false, 0}, + {true, PPE_SCH_EGRESS, 5, false, 0}, + {true, PPE_SCH_INGRESS, 6, false, 0}, + {true, PPE_SCH_EGRESS, 6, false, 0}, + {true, PPE_SCH_INGRESS, 1, false, 0}, + {true, PPE_SCH_EGRESS, 1, false, 0}, + {true, PPE_SCH_INGRESS, 0, false, 0}, + {true, PPE_SCH_EGRESS, 0, false, 0}, + {true, PPE_SCH_INGRESS, 5, false, 0}, + {true, PPE_SCH_EGRESS, 5, false, 0}, + {true, PPE_SCH_INGRESS, 6, false, 0}, + {true, PPE_SCH_EGRESS, 6, false, 0}, + {true, PPE_SCH_INGRESS, 3, false, 0}, + {true, PPE_SCH_EGRESS, 3, false, 0}, + {true, PPE_SCH_INGRESS, 1, false, 0}, + {true, PPE_SCH_EGRESS, 1, false, 0}, + {true, PPE_SCH_INGRESS, 0, false, 0}, + {true, PPE_SCH_EGRESS, 0, false, 0}, + {true, PPE_SCH_INGRESS, 5, false, 0}, + {true, PPE_SCH_EGRESS, 5, false, 0}, + {true, PPE_SCH_INGRESS, 6, false, 0}, + {true, PPE_SCH_EGRESS, 6, false, 0}, + {true, PPE_SCH_INGRESS, 4, false, 0}, + {true, PPE_SCH_EGRESS, 4, false, 0}, + {true, PPE_SCH_INGRESS, 7, false, 0}, + {true, PPE_SCH_EGRESS, 7, false, 0}, +}; + +/* PPE scheduler configuration for QM includes multiple entries. Each entry + * contains ports to be dispatched for enqueueing and dequeueing. The backup + * port for dequeueing is supported to be used when the primary port for + * dequeueing is down. + */ +static const struct ppe_scheduler_qm_config ipq9574_ppe_sch_qm_config[] = { + {0x98, 6, 0, true, 1}, + {0x94, 5, 6, true, 3}, + {0x86, 0, 5, true, 4}, + {0x8C, 1, 6, true, 0}, + {0x1C, 7, 5, true, 1}, + {0x98, 2, 6, true, 0}, + {0x1C, 5, 7, true, 1}, + {0x34, 3, 6, true, 0}, + {0x8C, 4, 5, true, 1}, + {0x98, 2, 6, true, 0}, + {0x8C, 5, 4, true, 1}, + {0xA8, 0, 6, true, 2}, + {0x98, 5, 1, true, 0}, + {0x98, 6, 5, true, 2}, + {0x89, 1, 6, true, 4}, + {0xA4, 3, 0, true, 1}, + {0x8C, 5, 6, true, 4}, + {0xA8, 0, 2, true, 1}, + {0x98, 6, 5, true, 0}, + {0xC4, 4, 3, true, 1}, + {0x94, 6, 5, true, 0}, + {0x1C, 7, 6, true, 1}, + {0x98, 2, 5, true, 0}, + {0x1C, 6, 7, true, 1}, + {0x1C, 5, 6, true, 0}, + {0x94, 3, 5, true, 1}, + {0x8C, 4, 6, true, 0}, + {0x94, 1, 5, true, 3}, + {0x94, 6, 1, true, 0}, + {0xD0, 3, 5, true, 2}, + {0x98, 6, 0, true, 1}, + {0x94, 5, 6, true, 3}, + {0x94, 1, 5, true, 0}, + {0x98, 2, 6, true, 1}, + {0x8C, 4, 5, true, 0}, + {0x1C, 7, 6, true, 1}, + {0x8C, 0, 5, true, 4}, + {0x89, 1, 6, true, 2}, + {0x98, 5, 0, true, 1}, + {0x94, 6, 5, true, 3}, + {0x92, 0, 6, true, 2}, + {0x98, 1, 5, true, 0}, + {0x98, 6, 2, true, 1}, + {0xD0, 0, 5, true, 3}, + {0x94, 6, 0, true, 1}, + {0x8C, 5, 6, true, 4}, + {0x8C, 1, 5, true, 0}, + {0x1C, 6, 7, true, 1}, + {0x1C, 5, 6, true, 0}, + {0xB0, 2, 3, true, 1}, + {0xC4, 4, 5, true, 0}, + {0x8C, 6, 4, true, 1}, + {0xA4, 3, 6, true, 0}, + {0x1C, 5, 7, true, 1}, + {0x4C, 0, 5, true, 4}, + {0x8C, 6, 0, true, 1}, + {0x34, 7, 6, true, 3}, + {0x94, 5, 0, true, 1}, + {0x98, 6, 5, true, 2}, +}; + +static const struct ppe_scheduler_port_config ppe_port_sch_config[] = { + { + .port = 0, + .flow_level = true, + .node_id = 0, + .loop_num = 1, + .pri_max = 1, + .flow_id = 0, + .drr_node_id = 0, + }, + { + .port = 0, + .flow_level = false, + .node_id = 0, + .loop_num = 8, + .pri_max = 8, + .flow_id = 0, + .drr_node_id = 0, + }, + { + .port = 0, + .flow_level = false, + .node_id = 8, + .loop_num = 8, + .pri_max = 8, + .flow_id = 0, + .drr_node_id = 0, + }, + { + .port = 0, + .flow_level = false, + .node_id = 16, + .loop_num = 8, + .pri_max = 8, + .flow_id = 0, + .drr_node_id = 0, + }, + { + .port = 0, + .flow_level = false, + .node_id = 24, + .loop_num = 8, + .pri_max = 8, + .flow_id = 0, + .drr_node_id = 0, + }, + { + .port = 0, + .flow_level = false, + .node_id = 32, + .loop_num = 8, + .pri_max = 8, + .flow_id = 0, + .drr_node_id = 0, + }, + { + .port = 0, + .flow_level = false, + .node_id = 40, + .loop_num = 8, + .pri_max = 8, + .flow_id = 0, + .drr_node_id = 0, + }, + { + .port = 0, + .flow_level = false, + .node_id = 48, + .loop_num = 8, + .pri_max = 8, + .flow_id = 0, + .drr_node_id = 0, + }, + { + .port = 0, + .flow_level = false, + .node_id = 56, + .loop_num = 8, + .pri_max = 8, + .flow_id = 0, + .drr_node_id = 0, + }, + { + .port = 0, + .flow_level = false, + .node_id = 256, + .loop_num = 8, + .pri_max = 8, + .flow_id = 0, + .drr_node_id = 0, + }, + { + .port = 0, + .flow_level = false, + .node_id = 264, + .loop_num = 8, + .pri_max = 8, + .flow_id = 0, + .drr_node_id = 0, + }, + { + .port = 1, + .flow_level = true, + .node_id = 36, + .loop_num = 2, + .pri_max = 0, + .flow_id = 1, + .drr_node_id = 8, + }, + { + .port = 1, + .flow_level = false, + .node_id = 144, + .loop_num = 16, + .pri_max = 8, + .flow_id = 36, + .drr_node_id = 48, + }, + { + .port = 1, + .flow_level = false, + .node_id = 272, + .loop_num = 4, + .pri_max = 4, + .flow_id = 36, + .drr_node_id = 48, + }, + { + .port = 2, + .flow_level = true, + .node_id = 40, + .loop_num = 2, + .pri_max = 0, + .flow_id = 2, + .drr_node_id = 12, + }, + { + .port = 2, + .flow_level = false, + .node_id = 160, + .loop_num = 16, + .pri_max = 8, + .flow_id = 40, + .drr_node_id = 64, + }, + { + .port = 2, + .flow_level = false, + .node_id = 276, + .loop_num = 4, + .pri_max = 4, + .flow_id = 40, + .drr_node_id = 64, + }, + { + .port = 3, + .flow_level = true, + .node_id = 44, + .loop_num = 2, + .pri_max = 0, + .flow_id = 3, + .drr_node_id = 16, + }, + { + .port = 3, + .flow_level = false, + .node_id = 176, + .loop_num = 16, + .pri_max = 8, + .flow_id = 44, + .drr_node_id = 80, + }, + { + .port = 3, + .flow_level = false, + .node_id = 280, + .loop_num = 4, + .pri_max = 4, + .flow_id = 44, + .drr_node_id = 80, + }, + { + .port = 4, + .flow_level = true, + .node_id = 48, + .loop_num = 2, + .pri_max = 0, + .flow_id = 4, + .drr_node_id = 20, + }, + { + .port = 4, + .flow_level = false, + .node_id = 192, + .loop_num = 16, + .pri_max = 8, + .flow_id = 48, + .drr_node_id = 96, + }, + { + .port = 4, + .flow_level = false, + .node_id = 284, + .loop_num = 4, + .pri_max = 4, + .flow_id = 48, + .drr_node_id = 96, + }, + { + .port = 5, + .flow_level = true, + .node_id = 52, + .loop_num = 2, + .pri_max = 0, + .flow_id = 5, + .drr_node_id = 24, + }, + { + .port = 5, + .flow_level = false, + .node_id = 208, + .loop_num = 16, + .pri_max = 8, + .flow_id = 52, + .drr_node_id = 112, + }, + { + .port = 5, + .flow_level = false, + .node_id = 288, + .loop_num = 4, + .pri_max = 4, + .flow_id = 52, + .drr_node_id = 112, + }, + { + .port = 6, + .flow_level = true, + .node_id = 56, + .loop_num = 2, + .pri_max = 0, + .flow_id = 6, + .drr_node_id = 28, + }, + { + .port = 6, + .flow_level = false, + .node_id = 224, + .loop_num = 16, + .pri_max = 8, + .flow_id = 56, + .drr_node_id = 128, + }, + { + .port = 6, + .flow_level = false, + .node_id = 292, + .loop_num = 4, + .pri_max = 4, + .flow_id = 56, + .drr_node_id = 128, + }, + { + .port = 7, + .flow_level = true, + .node_id = 60, + .loop_num = 2, + .pri_max = 0, + .flow_id = 7, + .drr_node_id = 32, + }, + { + .port = 7, + .flow_level = false, + .node_id = 240, + .loop_num = 16, + .pri_max = 8, + .flow_id = 60, + .drr_node_id = 144, + }, + { + .port = 7, + .flow_level = false, + .node_id = 296, + .loop_num = 4, + .pri_max = 4, + .flow_id = 60, + .drr_node_id = 144, + }, +}; + +/* The scheduler resource is applied to each PPE port, The resource + * includes the unicast & multicast queues, flow nodes and DRR nodes. + */ +static const struct ppe_port_schedule_resource ppe_scheduler_res[] = { + { .ucastq_start = 0, + .ucastq_end = 63, + .mcastq_start = 256, + .mcastq_end = 271, + .flow_id_start = 0, + .flow_id_end = 0, + .l0node_start = 0, + .l0node_end = 7, + .l1node_start = 0, + .l1node_end = 0, + }, + { .ucastq_start = 144, + .ucastq_end = 159, + .mcastq_start = 272, + .mcastq_end = 275, + .flow_id_start = 36, + .flow_id_end = 39, + .l0node_start = 48, + .l0node_end = 63, + .l1node_start = 8, + .l1node_end = 11, + }, + { .ucastq_start = 160, + .ucastq_end = 175, + .mcastq_start = 276, + .mcastq_end = 279, + .flow_id_start = 40, + .flow_id_end = 43, + .l0node_start = 64, + .l0node_end = 79, + .l1node_start = 12, + .l1node_end = 15, + }, + { .ucastq_start = 176, + .ucastq_end = 191, + .mcastq_start = 280, + .mcastq_end = 283, + .flow_id_start = 44, + .flow_id_end = 47, + .l0node_start = 80, + .l0node_end = 95, + .l1node_start = 16, + .l1node_end = 19, + }, + { .ucastq_start = 192, + .ucastq_end = 207, + .mcastq_start = 284, + .mcastq_end = 287, + .flow_id_start = 48, + .flow_id_end = 51, + .l0node_start = 96, + .l0node_end = 111, + .l1node_start = 20, + .l1node_end = 23, + }, + { .ucastq_start = 208, + .ucastq_end = 223, + .mcastq_start = 288, + .mcastq_end = 291, + .flow_id_start = 52, + .flow_id_end = 55, + .l0node_start = 112, + .l0node_end = 127, + .l1node_start = 24, + .l1node_end = 27, + }, + { .ucastq_start = 224, + .ucastq_end = 239, + .mcastq_start = 292, + .mcastq_end = 295, + .flow_id_start = 56, + .flow_id_end = 59, + .l0node_start = 128, + .l0node_end = 143, + .l1node_start = 28, + .l1node_end = 31, + }, + { .ucastq_start = 240, + .ucastq_end = 255, + .mcastq_start = 296, + .mcastq_end = 299, + .flow_id_start = 60, + .flow_id_end = 63, + .l0node_start = 144, + .l0node_end = 159, + .l1node_start = 32, + .l1node_end = 35, + }, + { .ucastq_start = 64, + .ucastq_end = 143, + .mcastq_start = 0, + .mcastq_end = 0, + .flow_id_start = 1, + .flow_id_end = 35, + .l0node_start = 8, + .l0node_end = 47, + .l1node_start = 1, + .l1node_end = 7, + }, +}; + +/* Set the PPE queue level scheduler configuration. */ +static int ppe_scheduler_l0_queue_map_set(struct ppe_device *ppe_dev, + int node_id, int port, + struct ppe_scheduler_cfg scheduler_cfg) +{ + u32 val, reg; + int ret; + + reg = PPE_L0_FLOW_MAP_TBL_ADDR + node_id * PPE_L0_FLOW_MAP_TBL_INC; + val = FIELD_PREP(PPE_L0_FLOW_MAP_TBL_FLOW_ID, scheduler_cfg.flow_id); + val |= FIELD_PREP(PPE_L0_FLOW_MAP_TBL_C_PRI, scheduler_cfg.pri); + val |= FIELD_PREP(PPE_L0_FLOW_MAP_TBL_E_PRI, scheduler_cfg.pri); + val |= FIELD_PREP(PPE_L0_FLOW_MAP_TBL_C_NODE_WT, scheduler_cfg.drr_node_wt); + val |= FIELD_PREP(PPE_L0_FLOW_MAP_TBL_E_NODE_WT, scheduler_cfg.drr_node_wt); + + ret = regmap_write(ppe_dev->regmap, reg, val); + if (ret) + return ret; + + reg = PPE_L0_C_FLOW_CFG_TBL_ADDR + + (scheduler_cfg.flow_id * PPE_QUEUE_SCH_PRI_NUM + scheduler_cfg.pri) * + PPE_L0_C_FLOW_CFG_TBL_INC; + val = FIELD_PREP(PPE_L0_C_FLOW_CFG_TBL_NODE_ID, scheduler_cfg.drr_node_id); + val |= FIELD_PREP(PPE_L0_C_FLOW_CFG_TBL_NODE_CREDIT_UNIT, scheduler_cfg.unit_is_packet); + + ret = regmap_write(ppe_dev->regmap, reg, val); + if (ret) + return ret; + + reg = PPE_L0_E_FLOW_CFG_TBL_ADDR + + (scheduler_cfg.flow_id * PPE_QUEUE_SCH_PRI_NUM + scheduler_cfg.pri) * + PPE_L0_E_FLOW_CFG_TBL_INC; + val = FIELD_PREP(PPE_L0_E_FLOW_CFG_TBL_NODE_ID, scheduler_cfg.drr_node_id); + val |= FIELD_PREP(PPE_L0_E_FLOW_CFG_TBL_NODE_CREDIT_UNIT, scheduler_cfg.unit_is_packet); + + ret = regmap_write(ppe_dev->regmap, reg, val); + if (ret) + return ret; + + reg = PPE_L0_FLOW_PORT_MAP_TBL_ADDR + node_id * PPE_L0_FLOW_PORT_MAP_TBL_INC; + val = FIELD_PREP(PPE_L0_FLOW_PORT_MAP_TBL_PORT_NUM, port); + + ret = regmap_write(ppe_dev->regmap, reg, val); + if (ret) + return ret; + + reg = PPE_L0_COMP_CFG_TBL_ADDR + node_id * PPE_L0_COMP_CFG_TBL_INC; + val = FIELD_PREP(PPE_L0_COMP_CFG_TBL_NODE_METER_LEN, scheduler_cfg.frame_mode); + + return regmap_update_bits(ppe_dev->regmap, reg, + PPE_L0_COMP_CFG_TBL_NODE_METER_LEN, + val); +} + +/* Set the PPE flow level scheduler configuration. */ +static int ppe_scheduler_l1_queue_map_set(struct ppe_device *ppe_dev, + int node_id, int port, + struct ppe_scheduler_cfg scheduler_cfg) +{ + u32 val, reg; + int ret; + + val = FIELD_PREP(PPE_L1_FLOW_MAP_TBL_FLOW_ID, scheduler_cfg.flow_id); + val |= FIELD_PREP(PPE_L1_FLOW_MAP_TBL_C_PRI, scheduler_cfg.pri); + val |= FIELD_PREP(PPE_L1_FLOW_MAP_TBL_E_PRI, scheduler_cfg.pri); + val |= FIELD_PREP(PPE_L1_FLOW_MAP_TBL_C_NODE_WT, scheduler_cfg.drr_node_wt); + val |= FIELD_PREP(PPE_L1_FLOW_MAP_TBL_E_NODE_WT, scheduler_cfg.drr_node_wt); + reg = PPE_L1_FLOW_MAP_TBL_ADDR + node_id * PPE_L1_FLOW_MAP_TBL_INC; + + ret = regmap_write(ppe_dev->regmap, reg, val); + if (ret) + return ret; + + val = FIELD_PREP(PPE_L1_C_FLOW_CFG_TBL_NODE_ID, scheduler_cfg.drr_node_id); + val |= FIELD_PREP(PPE_L1_C_FLOW_CFG_TBL_NODE_CREDIT_UNIT, scheduler_cfg.unit_is_packet); + reg = PPE_L1_C_FLOW_CFG_TBL_ADDR + + (scheduler_cfg.flow_id * PPE_QUEUE_SCH_PRI_NUM + scheduler_cfg.pri) * + PPE_L1_C_FLOW_CFG_TBL_INC; + + ret = regmap_write(ppe_dev->regmap, reg, val); + if (ret) + return ret; + + val = FIELD_PREP(PPE_L1_E_FLOW_CFG_TBL_NODE_ID, scheduler_cfg.drr_node_id); + val |= FIELD_PREP(PPE_L1_E_FLOW_CFG_TBL_NODE_CREDIT_UNIT, scheduler_cfg.unit_is_packet); + reg = PPE_L1_E_FLOW_CFG_TBL_ADDR + + (scheduler_cfg.flow_id * PPE_QUEUE_SCH_PRI_NUM + scheduler_cfg.pri) * + PPE_L1_E_FLOW_CFG_TBL_INC; + + ret = regmap_write(ppe_dev->regmap, reg, val); + if (ret) + return ret; + + val = FIELD_PREP(PPE_L1_FLOW_PORT_MAP_TBL_PORT_NUM, port); + reg = PPE_L1_FLOW_PORT_MAP_TBL_ADDR + node_id * PPE_L1_FLOW_PORT_MAP_TBL_INC; + + ret = regmap_write(ppe_dev->regmap, reg, val); + if (ret) + return ret; + + reg = PPE_L1_COMP_CFG_TBL_ADDR + node_id * PPE_L1_COMP_CFG_TBL_INC; + val = FIELD_PREP(PPE_L1_COMP_CFG_TBL_NODE_METER_LEN, scheduler_cfg.frame_mode); + + return regmap_update_bits(ppe_dev->regmap, reg, PPE_L1_COMP_CFG_TBL_NODE_METER_LEN, val); +} + +/** + * ppe_queue_scheduler_set - Configure scheduler for PPE hardware queue + * @ppe_dev: PPE device + * @node_id: PPE queue ID or flow ID + * @flow_level: Flow level scheduler or queue level scheduler + * @port: PPE port ID set scheduler configuration + * @scheduler_cfg: PPE scheduler configuration + * + * PPE scheduler configuration supports queue level and flow level on + * the PPE egress port. + * + * Return: 0 on success, negative error code on failure. + */ +int ppe_queue_scheduler_set(struct ppe_device *ppe_dev, + int node_id, bool flow_level, int port, + struct ppe_scheduler_cfg scheduler_cfg) +{ + if (flow_level) + return ppe_scheduler_l1_queue_map_set(ppe_dev, node_id, + port, scheduler_cfg); + + return ppe_scheduler_l0_queue_map_set(ppe_dev, node_id, + port, scheduler_cfg); +} + +/** + * ppe_queue_ucast_base_set - Set PPE unicast queue base ID and profile ID + * @ppe_dev: PPE device + * @queue_dst: PPE queue destination configuration + * @queue_base: PPE queue base ID + * @profile_id: Profile ID + * + * The PPE unicast queue base ID and profile ID are configured based on the + * destination port information that can be service code or CPU code or the + * destination port. + * + * Return: 0 on success, negative error code on failure. + */ +int ppe_queue_ucast_base_set(struct ppe_device *ppe_dev, + struct ppe_queue_ucast_dest queue_dst, + int queue_base, int profile_id) +{ + int index, profile_size; + u32 val, reg; + + profile_size = queue_dst.src_profile << 8; + if (queue_dst.service_code_en) + index = PPE_QUEUE_BASE_SERVICE_CODE + profile_size + + queue_dst.service_code; + else if (queue_dst.cpu_code_en) + index = PPE_QUEUE_BASE_CPU_CODE + profile_size + + queue_dst.cpu_code; + else + index = profile_size + queue_dst.dest_port; + + val = FIELD_PREP(PPE_UCAST_QUEUE_MAP_TBL_PROFILE_ID, profile_id); + val |= FIELD_PREP(PPE_UCAST_QUEUE_MAP_TBL_QUEUE_ID, queue_base); + reg = PPE_UCAST_QUEUE_MAP_TBL_ADDR + index * PPE_UCAST_QUEUE_MAP_TBL_INC; + + return regmap_write(ppe_dev->regmap, reg, val); +} + +/** + * ppe_queue_ucast_offset_pri_set - Set PPE unicast queue offset based on priority + * @ppe_dev: PPE device + * @profile_id: Profile ID + * @priority: PPE internal priority to be used to set queue offset + * @queue_offset: Queue offset used for calculating the destination queue ID + * + * The PPE unicast queue offset is configured based on the PPE + * internal priority. + * + * Return: 0 on success, negative error code on failure. + */ +int ppe_queue_ucast_offset_pri_set(struct ppe_device *ppe_dev, + int profile_id, + int priority, + int queue_offset) +{ + u32 val, reg; + int index; + + index = (profile_id << 4) + priority; + val = FIELD_PREP(PPE_UCAST_PRIORITY_MAP_TBL_CLASS, queue_offset); + reg = PPE_UCAST_PRIORITY_MAP_TBL_ADDR + index * PPE_UCAST_PRIORITY_MAP_TBL_INC; + + return regmap_write(ppe_dev->regmap, reg, val); +} + +/** + * ppe_queue_ucast_offset_hash_set - Set PPE unicast queue offset based on hash + * @ppe_dev: PPE device + * @profile_id: Profile ID + * @rss_hash: Packet hash value to be used to set queue offset + * @queue_offset: Queue offset used for calculating the destination queue ID + * + * The PPE unicast queue offset is configured based on the RSS hash value. + * + * Return: 0 on success, negative error code on failure. + */ +int ppe_queue_ucast_offset_hash_set(struct ppe_device *ppe_dev, + int profile_id, + int rss_hash, + int queue_offset) +{ + u32 val, reg; + int index; + + index = (profile_id << 8) + rss_hash; + val = FIELD_PREP(PPE_UCAST_HASH_MAP_TBL_HASH, queue_offset); + reg = PPE_UCAST_HASH_MAP_TBL_ADDR + index * PPE_UCAST_HASH_MAP_TBL_INC; + + return regmap_write(ppe_dev->regmap, reg, val); +} + +/** + * ppe_port_resource_get - Get PPE resource per port + * @ppe_dev: PPE device + * @port: PPE port + * @type: Resource type + * @res_start: Resource start ID returned + * @res_end: Resource end ID returned + * + * PPE resource is assigned per PPE port, which is acquired for QoS scheduler. + * + * Return: 0 on success, negative error code on failure. + */ +int ppe_port_resource_get(struct ppe_device *ppe_dev, int port, + enum ppe_resource_type type, + int *res_start, int *res_end) +{ + struct ppe_port_schedule_resource res; + + /* The reserved resource with the maximum port ID of PPE is + * also allowed to be acquired. + */ + if (port > ppe_dev->num_ports) + return -EINVAL; + + res = ppe_scheduler_res[port]; + switch (type) { + case PPE_RES_UCAST: + *res_start = res.ucastq_start; + *res_end = res.ucastq_end; + break; + case PPE_RES_MCAST: + *res_start = res.mcastq_start; + *res_end = res.mcastq_end; + break; + case PPE_RES_FLOW_ID: + *res_start = res.flow_id_start; + *res_end = res.flow_id_end; + break; + case PPE_RES_L0_NODE: + *res_start = res.l0node_start; + *res_end = res.l0node_end; + break; + case PPE_RES_L1_NODE: + *res_start = res.l1node_start; + *res_end = res.l1node_end; + break; + default: + return -EINVAL; + } + + return 0; +} + +/** + * ppe_sc_config_set - Set PPE service code configuration + * @ppe_dev: PPE device + * @sc: Service ID, 0-255 supported by PPE + * @cfg: Service code configuration + * + * PPE service code is used by the PPE during its packet processing stages, + * to perform or bypass certain selected packet operations on the packet. + * + * Return: 0 on success, negative error code on failure. + */ +int ppe_sc_config_set(struct ppe_device *ppe_dev, int sc, struct ppe_sc_cfg cfg) +{ + u32 val, reg, servcode_val[2] = {}; + unsigned long bitmap_value; + int ret; + + val = FIELD_PREP(PPE_IN_L2_SERVICE_TBL_DST_PORT_ID_VALID, cfg.dest_port_valid); + val |= FIELD_PREP(PPE_IN_L2_SERVICE_TBL_DST_PORT_ID, cfg.dest_port); + val |= FIELD_PREP(PPE_IN_L2_SERVICE_TBL_DST_DIRECTION, cfg.is_src); + + bitmap_value = bitmap_read(cfg.bitmaps.egress, 0, PPE_SC_BYPASS_EGRESS_SIZE); + val |= FIELD_PREP(PPE_IN_L2_SERVICE_TBL_DST_BYPASS_BITMAP, bitmap_value); + val |= FIELD_PREP(PPE_IN_L2_SERVICE_TBL_RX_CNT_EN, + test_bit(PPE_SC_BYPASS_COUNTER_RX, cfg.bitmaps.counter)); + val |= FIELD_PREP(PPE_IN_L2_SERVICE_TBL_TX_CNT_EN, + test_bit(PPE_SC_BYPASS_COUNTER_TX, cfg.bitmaps.counter)); + reg = PPE_IN_L2_SERVICE_TBL_ADDR + PPE_IN_L2_SERVICE_TBL_INC * sc; + + ret = regmap_write(ppe_dev->regmap, reg, val); + if (ret) + return ret; + + bitmap_value = bitmap_read(cfg.bitmaps.ingress, 0, PPE_SC_BYPASS_INGRESS_SIZE); + PPE_SERVICE_SET_BYPASS_BITMAP(servcode_val, bitmap_value); + PPE_SERVICE_SET_RX_CNT_EN(servcode_val, + test_bit(PPE_SC_BYPASS_COUNTER_RX_VLAN, cfg.bitmaps.counter)); + reg = PPE_SERVICE_TBL_ADDR + PPE_SERVICE_TBL_INC * sc; + + ret = regmap_bulk_write(ppe_dev->regmap, reg, + servcode_val, ARRAY_SIZE(servcode_val)); + if (ret) + return ret; + + reg = PPE_EG_SERVICE_TBL_ADDR + PPE_EG_SERVICE_TBL_INC * sc; + ret = regmap_bulk_read(ppe_dev->regmap, reg, + servcode_val, ARRAY_SIZE(servcode_val)); + if (ret) + return ret; + + PPE_EG_SERVICE_SET_NEXT_SERVCODE(servcode_val, cfg.next_service_code); + PPE_EG_SERVICE_SET_UPDATE_ACTION(servcode_val, cfg.eip_field_update_bitmap); + PPE_EG_SERVICE_SET_HW_SERVICE(servcode_val, cfg.eip_hw_service); + PPE_EG_SERVICE_SET_OFFSET_SEL(servcode_val, cfg.eip_offset_sel); + PPE_EG_SERVICE_SET_TX_CNT_EN(servcode_val, + test_bit(PPE_SC_BYPASS_COUNTER_TX_VLAN, cfg.bitmaps.counter)); + + ret = regmap_bulk_write(ppe_dev->regmap, reg, + servcode_val, ARRAY_SIZE(servcode_val)); + if (ret) + return ret; + + bitmap_value = bitmap_read(cfg.bitmaps.tunnel, 0, PPE_SC_BYPASS_TUNNEL_SIZE); + val = FIELD_PREP(PPE_TL_SERVICE_TBL_BYPASS_BITMAP, bitmap_value); + reg = PPE_TL_SERVICE_TBL_ADDR + PPE_TL_SERVICE_TBL_INC * sc; + + return regmap_write(ppe_dev->regmap, reg, val); +} + +/** + * ppe_counter_enable_set - Set PPE port counter enabled + * @ppe_dev: PPE device + * @port: PPE port ID + * + * Enable PPE counters on the given port for the unicast packet, multicast + * packet and VLAN packet received and transmitted by PPE. + * + * Return: 0 on success, negative error code on failure. + */ +int ppe_counter_enable_set(struct ppe_device *ppe_dev, int port) +{ + u32 reg, mru_mtu_val[3]; + int ret; + + reg = PPE_MRU_MTU_CTRL_TBL_ADDR + PPE_MRU_MTU_CTRL_TBL_INC * port; + ret = regmap_bulk_read(ppe_dev->regmap, reg, + mru_mtu_val, ARRAY_SIZE(mru_mtu_val)); + if (ret) + return ret; + + PPE_MRU_MTU_CTRL_SET_RX_CNT_EN(mru_mtu_val, true); + PPE_MRU_MTU_CTRL_SET_TX_CNT_EN(mru_mtu_val, true); + ret = regmap_bulk_write(ppe_dev->regmap, reg, + mru_mtu_val, ARRAY_SIZE(mru_mtu_val)); + if (ret) + return ret; + + reg = PPE_MC_MTU_CTRL_TBL_ADDR + PPE_MC_MTU_CTRL_TBL_INC * port; + ret = regmap_set_bits(ppe_dev->regmap, reg, PPE_MC_MTU_CTRL_TBL_TX_CNT_EN); + if (ret) + return ret; + + reg = PPE_PORT_EG_VLAN_TBL_ADDR + PPE_PORT_EG_VLAN_TBL_INC * port; + + return regmap_set_bits(ppe_dev->regmap, reg, PPE_PORT_EG_VLAN_TBL_TX_COUNTING_EN); +} + +static int ppe_rss_hash_ipv4_config(struct ppe_device *ppe_dev, int index, + struct ppe_rss_hash_cfg cfg) +{ + u32 reg, val; + + switch (index) { + case 0: + val = cfg.hash_sip_mix[0]; + break; + case 1: + val = cfg.hash_dip_mix[0]; + break; + case 2: + val = cfg.hash_protocol_mix; + break; + case 3: + val = cfg.hash_dport_mix; + break; + case 4: + val = cfg.hash_sport_mix; + break; + default: + return -EINVAL; + } + + reg = PPE_RSS_HASH_MIX_IPV4_ADDR + index * PPE_RSS_HASH_MIX_IPV4_INC; + + return regmap_update_bits(ppe_dev->regmap, reg, + PPE_RSS_HASH_MIX_IPV4_VAL, + FIELD_PREP(PPE_RSS_HASH_MIX_IPV4_VAL, val)); +} + +static int ppe_rss_hash_ipv6_config(struct ppe_device *ppe_dev, int index, + struct ppe_rss_hash_cfg cfg) +{ + u32 reg, val; + + switch (index) { + case 0 ... 3: + val = cfg.hash_sip_mix[index]; + break; + case 4 ... 7: + val = cfg.hash_dip_mix[index - 4]; + break; + case 8: + val = cfg.hash_protocol_mix; + break; + case 9: + val = cfg.hash_dport_mix; + break; + case 10: + val = cfg.hash_sport_mix; + break; + default: + return -EINVAL; + } + + reg = PPE_RSS_HASH_MIX_ADDR + index * PPE_RSS_HASH_MIX_INC; + + return regmap_update_bits(ppe_dev->regmap, reg, + PPE_RSS_HASH_MIX_VAL, + FIELD_PREP(PPE_RSS_HASH_MIX_VAL, val)); +} + +/** + * ppe_rss_hash_config_set - Configure the PPE hash settings for the packet received. + * @ppe_dev: PPE device. + * @mode: Configure RSS hash for the packet type IPv4 and IPv6. + * @cfg: RSS hash configuration. + * + * PPE RSS hash settings are configured for the packet type IPv4 and IPv6. + * + * Return: 0 on success, negative error code on failure. + */ +int ppe_rss_hash_config_set(struct ppe_device *ppe_dev, int mode, + struct ppe_rss_hash_cfg cfg) +{ + u32 val, reg; + int i, ret; + + if (mode & PPE_RSS_HASH_MODE_IPV4) { + val = FIELD_PREP(PPE_RSS_HASH_MASK_IPV4_HASH_MASK, cfg.hash_mask); + val |= FIELD_PREP(PPE_RSS_HASH_MASK_IPV4_FRAGMENT, cfg.hash_fragment_mode); + ret = regmap_write(ppe_dev->regmap, PPE_RSS_HASH_MASK_IPV4_ADDR, val); + if (ret) + return ret; + + val = FIELD_PREP(PPE_RSS_HASH_SEED_IPV4_VAL, cfg.hash_seed); + ret = regmap_write(ppe_dev->regmap, PPE_RSS_HASH_SEED_IPV4_ADDR, val); + if (ret) + return ret; + + for (i = 0; i < PPE_RSS_HASH_MIX_IPV4_ENTRIES; i++) { + ret = ppe_rss_hash_ipv4_config(ppe_dev, i, cfg); + if (ret) + return ret; + } + + for (i = 0; i < PPE_RSS_HASH_FIN_IPV4_ENTRIES; i++) { + val = FIELD_PREP(PPE_RSS_HASH_FIN_IPV4_INNER, cfg.hash_fin_inner[i]); + val |= FIELD_PREP(PPE_RSS_HASH_FIN_IPV4_OUTER, cfg.hash_fin_outer[i]); + reg = PPE_RSS_HASH_FIN_IPV4_ADDR + i * PPE_RSS_HASH_FIN_IPV4_INC; + + ret = regmap_write(ppe_dev->regmap, reg, val); + if (ret) + return ret; + } + } + + if (mode & PPE_RSS_HASH_MODE_IPV6) { + val = FIELD_PREP(PPE_RSS_HASH_MASK_HASH_MASK, cfg.hash_mask); + val |= FIELD_PREP(PPE_RSS_HASH_MASK_FRAGMENT, cfg.hash_fragment_mode); + ret = regmap_write(ppe_dev->regmap, PPE_RSS_HASH_MASK_ADDR, val); + if (ret) + return ret; + + val = FIELD_PREP(PPE_RSS_HASH_SEED_VAL, cfg.hash_seed); + ret = regmap_write(ppe_dev->regmap, PPE_RSS_HASH_SEED_ADDR, val); + if (ret) + return ret; + + for (i = 0; i < PPE_RSS_HASH_MIX_ENTRIES; i++) { + ret = ppe_rss_hash_ipv6_config(ppe_dev, i, cfg); + if (ret) + return ret; + } + + for (i = 0; i < PPE_RSS_HASH_FIN_ENTRIES; i++) { + val = FIELD_PREP(PPE_RSS_HASH_FIN_INNER, cfg.hash_fin_inner[i]); + val |= FIELD_PREP(PPE_RSS_HASH_FIN_OUTER, cfg.hash_fin_outer[i]); + reg = PPE_RSS_HASH_FIN_ADDR + i * PPE_RSS_HASH_FIN_INC; + + ret = regmap_write(ppe_dev->regmap, reg, val); + if (ret) + return ret; + } + } + + return 0; +} + +/** + * ppe_ring_queue_map_set - Set the PPE queue to Ethernet DMA ring mapping + * @ppe_dev: PPE device + * @ring_id: Ethernet DMA ring ID + * @queue_map: Bit map of queue IDs to given Ethernet DMA ring + * + * Configure the mapping from a set of PPE queues to a given Ethernet DMA ring. + * + * Return: 0 on success, negative error code on failure. + */ +int ppe_ring_queue_map_set(struct ppe_device *ppe_dev, int ring_id, u32 *queue_map) +{ + u32 reg, queue_bitmap_val[PPE_RING_TO_QUEUE_BITMAP_WORD_CNT]; + + memcpy(queue_bitmap_val, queue_map, sizeof(queue_bitmap_val)); + reg = PPE_RING_Q_MAP_TBL_ADDR + PPE_RING_Q_MAP_TBL_INC * ring_id; + + return regmap_bulk_write(ppe_dev->regmap, reg, + queue_bitmap_val, + ARRAY_SIZE(queue_bitmap_val)); +} + +static int ppe_config_bm_threshold(struct ppe_device *ppe_dev, int bm_port_id, + const struct ppe_bm_port_config port_cfg) +{ + u32 reg, val, bm_fc_val[2]; + int ret; + + reg = PPE_BM_PORT_FC_CFG_TBL_ADDR + PPE_BM_PORT_FC_CFG_TBL_INC * bm_port_id; + ret = regmap_bulk_read(ppe_dev->regmap, reg, + bm_fc_val, ARRAY_SIZE(bm_fc_val)); + if (ret) + return ret; + + /* Configure BM flow control related threshold. */ + PPE_BM_PORT_FC_SET_WEIGHT(bm_fc_val, port_cfg.weight); + PPE_BM_PORT_FC_SET_RESUME_OFFSET(bm_fc_val, port_cfg.resume_offset); + PPE_BM_PORT_FC_SET_RESUME_THRESHOLD(bm_fc_val, port_cfg.resume_ceil); + PPE_BM_PORT_FC_SET_DYNAMIC(bm_fc_val, port_cfg.dynamic); + PPE_BM_PORT_FC_SET_REACT_LIMIT(bm_fc_val, port_cfg.in_fly_buf); + PPE_BM_PORT_FC_SET_PRE_ALLOC(bm_fc_val, port_cfg.pre_alloc); + + /* Configure low/high bits of the ceiling for the BM port. */ + val = FIELD_GET(GENMASK(2, 0), port_cfg.ceil); + PPE_BM_PORT_FC_SET_CEILING_LOW(bm_fc_val, val); + val = FIELD_GET(GENMASK(10, 3), port_cfg.ceil); + PPE_BM_PORT_FC_SET_CEILING_HIGH(bm_fc_val, val); + + ret = regmap_bulk_write(ppe_dev->regmap, reg, + bm_fc_val, ARRAY_SIZE(bm_fc_val)); + if (ret) + return ret; + + /* Assign the default group ID 0 to the BM port. */ + val = FIELD_PREP(PPE_BM_PORT_GROUP_ID_SHARED_GROUP_ID, 0); + reg = PPE_BM_PORT_GROUP_ID_ADDR + PPE_BM_PORT_GROUP_ID_INC * bm_port_id; + ret = regmap_update_bits(ppe_dev->regmap, reg, + PPE_BM_PORT_GROUP_ID_SHARED_GROUP_ID, + val); + if (ret) + return ret; + + /* Enable BM port flow control. */ + reg = PPE_BM_PORT_FC_MODE_ADDR + PPE_BM_PORT_FC_MODE_INC * bm_port_id; + + return regmap_set_bits(ppe_dev->regmap, reg, PPE_BM_PORT_FC_MODE_EN); +} + +/* Configure the buffer threshold for the port flow control function. */ +static int ppe_config_bm(struct ppe_device *ppe_dev) +{ + const struct ppe_bm_port_config *port_cfg; + unsigned int i, bm_port_id, port_cfg_cnt; + u32 reg, val; + int ret; + + /* Configure the allocated buffer number only for group 0. + * The buffer number of group 1-3 is already cleared to 0 + * after PPE reset during the probe of PPE driver. + */ + reg = PPE_BM_SHARED_GROUP_CFG_ADDR; + val = FIELD_PREP(PPE_BM_SHARED_GROUP_CFG_SHARED_LIMIT, + ipq9574_ppe_bm_group_config); + ret = regmap_update_bits(ppe_dev->regmap, reg, + PPE_BM_SHARED_GROUP_CFG_SHARED_LIMIT, + val); + if (ret) + goto bm_config_fail; + + /* Configure buffer thresholds for the BM ports. */ + port_cfg = ipq9574_ppe_bm_port_config; + port_cfg_cnt = ARRAY_SIZE(ipq9574_ppe_bm_port_config); + for (i = 0; i < port_cfg_cnt; i++) { + for (bm_port_id = port_cfg[i].port_id_start; + bm_port_id <= port_cfg[i].port_id_end; bm_port_id++) { + ret = ppe_config_bm_threshold(ppe_dev, bm_port_id, + port_cfg[i]); + if (ret) + goto bm_config_fail; + } + } + + return 0; + +bm_config_fail: + dev_err(ppe_dev->dev, "PPE BM config error %d\n", ret); + return ret; +} + +/* Configure PPE hardware queue depth, which is decided by the threshold + * of queue. + */ +static int ppe_config_qm(struct ppe_device *ppe_dev) +{ + const struct ppe_qm_queue_config *queue_cfg; + int ret, i, queue_id, queue_cfg_count; + u32 reg, multicast_queue_cfg[5]; + u32 unicast_queue_cfg[4]; + u32 group_cfg[3]; + + /* Assign the buffer number to the group 0 by default. */ + reg = PPE_AC_GRP_CFG_TBL_ADDR; + ret = regmap_bulk_read(ppe_dev->regmap, reg, + group_cfg, ARRAY_SIZE(group_cfg)); + if (ret) + goto qm_config_fail; + + PPE_AC_GRP_SET_BUF_LIMIT(group_cfg, ipq9574_ppe_qm_group_config); + + ret = regmap_bulk_write(ppe_dev->regmap, reg, + group_cfg, ARRAY_SIZE(group_cfg)); + if (ret) + goto qm_config_fail; + + queue_cfg = ipq9574_ppe_qm_queue_config; + queue_cfg_count = ARRAY_SIZE(ipq9574_ppe_qm_queue_config); + for (i = 0; i < queue_cfg_count; i++) { + queue_id = queue_cfg[i].queue_start; + + /* Configure threshold for dropping packets separately for + * unicast and multicast PPE queues. + */ + while (queue_id <= queue_cfg[i].queue_end) { + if (queue_id < PPE_AC_UNICAST_QUEUE_CFG_TBL_ENTRIES) { + reg = PPE_AC_UNICAST_QUEUE_CFG_TBL_ADDR + + PPE_AC_UNICAST_QUEUE_CFG_TBL_INC * queue_id; + + ret = regmap_bulk_read(ppe_dev->regmap, reg, + unicast_queue_cfg, + ARRAY_SIZE(unicast_queue_cfg)); + if (ret) + goto qm_config_fail; + + PPE_AC_UNICAST_QUEUE_SET_EN(unicast_queue_cfg, true); + PPE_AC_UNICAST_QUEUE_SET_GRP_ID(unicast_queue_cfg, 0); + PPE_AC_UNICAST_QUEUE_SET_PRE_LIMIT(unicast_queue_cfg, + queue_cfg[i].prealloc_buf); + PPE_AC_UNICAST_QUEUE_SET_DYNAMIC(unicast_queue_cfg, + queue_cfg[i].dynamic); + PPE_AC_UNICAST_QUEUE_SET_WEIGHT(unicast_queue_cfg, + queue_cfg[i].weight); + PPE_AC_UNICAST_QUEUE_SET_THRESHOLD(unicast_queue_cfg, + queue_cfg[i].ceil); + PPE_AC_UNICAST_QUEUE_SET_GRN_RESUME(unicast_queue_cfg, + queue_cfg[i].resume_offset); + + ret = regmap_bulk_write(ppe_dev->regmap, reg, + unicast_queue_cfg, + ARRAY_SIZE(unicast_queue_cfg)); + if (ret) + goto qm_config_fail; + } else { + reg = PPE_AC_MULTICAST_QUEUE_CFG_TBL_ADDR + + PPE_AC_MULTICAST_QUEUE_CFG_TBL_INC * queue_id; + + ret = regmap_bulk_read(ppe_dev->regmap, reg, + multicast_queue_cfg, + ARRAY_SIZE(multicast_queue_cfg)); + if (ret) + goto qm_config_fail; + + PPE_AC_MULTICAST_QUEUE_SET_EN(multicast_queue_cfg, true); + PPE_AC_MULTICAST_QUEUE_SET_GRN_GRP_ID(multicast_queue_cfg, 0); + PPE_AC_MULTICAST_QUEUE_SET_GRN_PRE_LIMIT(multicast_queue_cfg, + queue_cfg[i].prealloc_buf); + PPE_AC_MULTICAST_QUEUE_SET_GRN_THRESHOLD(multicast_queue_cfg, + queue_cfg[i].ceil); + PPE_AC_MULTICAST_QUEUE_SET_GRN_RESUME(multicast_queue_cfg, + queue_cfg[i].resume_offset); + + ret = regmap_bulk_write(ppe_dev->regmap, reg, + multicast_queue_cfg, + ARRAY_SIZE(multicast_queue_cfg)); + if (ret) + goto qm_config_fail; + } + + /* Enable enqueue. */ + reg = PPE_ENQ_OPR_TBL_ADDR + PPE_ENQ_OPR_TBL_INC * queue_id; + ret = regmap_clear_bits(ppe_dev->regmap, reg, + PPE_ENQ_OPR_TBL_ENQ_DISABLE); + if (ret) + goto qm_config_fail; + + /* Enable dequeue. */ + reg = PPE_DEQ_OPR_TBL_ADDR + PPE_DEQ_OPR_TBL_INC * queue_id; + ret = regmap_clear_bits(ppe_dev->regmap, reg, + PPE_DEQ_OPR_TBL_DEQ_DISABLE); + if (ret) + goto qm_config_fail; + + queue_id++; + } + } + + /* Enable queue counter for all PPE hardware queues. */ + ret = regmap_set_bits(ppe_dev->regmap, PPE_EG_BRIDGE_CONFIG_ADDR, + PPE_EG_BRIDGE_CONFIG_QUEUE_CNT_EN); + if (ret) + goto qm_config_fail; + + return 0; + +qm_config_fail: + dev_err(ppe_dev->dev, "PPE QM config error %d\n", ret); + return ret; +} + +static int ppe_node_scheduler_config(struct ppe_device *ppe_dev, + const struct ppe_scheduler_port_config config) +{ + struct ppe_scheduler_cfg sch_cfg; + int ret, i; + + for (i = 0; i < config.loop_num; i++) { + if (!config.pri_max) { + /* Round robin scheduler without priority. */ + sch_cfg.flow_id = config.flow_id; + sch_cfg.pri = 0; + sch_cfg.drr_node_id = config.drr_node_id; + } else { + sch_cfg.flow_id = config.flow_id + (i / config.pri_max); + sch_cfg.pri = i % config.pri_max; + sch_cfg.drr_node_id = config.drr_node_id + i; + } + + /* Scheduler weight, must be more than 0. */ + sch_cfg.drr_node_wt = 1; + /* Byte based to be scheduled. */ + sch_cfg.unit_is_packet = false; + /* Frame + CRC calculated. */ + sch_cfg.frame_mode = PPE_SCH_WITH_FRAME_CRC; + + ret = ppe_queue_scheduler_set(ppe_dev, config.node_id + i, + config.flow_level, + config.port, + sch_cfg); + if (ret) + return ret; + } + + return 0; +} + +/* Initialize scheduler settings for PPE buffer utilization and dispatching + * packet on PPE queue. + */ +static int ppe_config_scheduler(struct ppe_device *ppe_dev) +{ + const struct ppe_scheduler_port_config *port_cfg; + const struct ppe_scheduler_qm_config *qm_cfg; + const struct ppe_scheduler_bm_config *bm_cfg; + int ret, i, count; + u32 val, reg; + + count = ARRAY_SIZE(ipq9574_ppe_sch_bm_config); + bm_cfg = ipq9574_ppe_sch_bm_config; + + /* Configure the depth of BM scheduler entries. */ + val = FIELD_PREP(PPE_BM_SCH_CTRL_SCH_DEPTH, count); + val |= FIELD_PREP(PPE_BM_SCH_CTRL_SCH_OFFSET, 0); + val |= FIELD_PREP(PPE_BM_SCH_CTRL_SCH_EN, 1); + + ret = regmap_write(ppe_dev->regmap, PPE_BM_SCH_CTRL_ADDR, val); + if (ret) + goto sch_config_fail; + + /* Configure each BM scheduler entry with the valid ingress port and + * egress port, the second port takes effect when the specified port + * is in the inactive state. + */ + for (i = 0; i < count; i++) { + val = FIELD_PREP(PPE_BM_SCH_CFG_TBL_VALID, bm_cfg[i].valid); + val |= FIELD_PREP(PPE_BM_SCH_CFG_TBL_DIR, bm_cfg[i].dir); + val |= FIELD_PREP(PPE_BM_SCH_CFG_TBL_PORT_NUM, bm_cfg[i].port); + val |= FIELD_PREP(PPE_BM_SCH_CFG_TBL_SECOND_PORT_VALID, + bm_cfg[i].backup_port_valid); + val |= FIELD_PREP(PPE_BM_SCH_CFG_TBL_SECOND_PORT, + bm_cfg[i].backup_port); + + reg = PPE_BM_SCH_CFG_TBL_ADDR + i * PPE_BM_SCH_CFG_TBL_INC; + ret = regmap_write(ppe_dev->regmap, reg, val); + if (ret) + goto sch_config_fail; + } + + count = ARRAY_SIZE(ipq9574_ppe_sch_qm_config); + qm_cfg = ipq9574_ppe_sch_qm_config; + + /* Configure the depth of QM scheduler entries. */ + val = FIELD_PREP(PPE_PSCH_SCH_DEPTH_CFG_SCH_DEPTH, count); + ret = regmap_write(ppe_dev->regmap, PPE_PSCH_SCH_DEPTH_CFG_ADDR, val); + if (ret) + goto sch_config_fail; + + /* Configure each QM scheduler entry with enqueue port and dequeue + * port, the second port takes effect when the specified dequeue + * port is in the inactive port. + */ + for (i = 0; i < count; i++) { + val = FIELD_PREP(PPE_PSCH_SCH_CFG_TBL_ENS_PORT_BITMAP, + qm_cfg[i].ensch_port_bmp); + val |= FIELD_PREP(PPE_PSCH_SCH_CFG_TBL_ENS_PORT, + qm_cfg[i].ensch_port); + val |= FIELD_PREP(PPE_PSCH_SCH_CFG_TBL_DES_PORT, + qm_cfg[i].desch_port); + val |= FIELD_PREP(PPE_PSCH_SCH_CFG_TBL_DES_SECOND_PORT_EN, + qm_cfg[i].desch_backup_port_valid); + val |= FIELD_PREP(PPE_PSCH_SCH_CFG_TBL_DES_SECOND_PORT, + qm_cfg[i].desch_backup_port); + + reg = PPE_PSCH_SCH_CFG_TBL_ADDR + i * PPE_PSCH_SCH_CFG_TBL_INC; + ret = regmap_write(ppe_dev->regmap, reg, val); + if (ret) + goto sch_config_fail; + } + + count = ARRAY_SIZE(ppe_port_sch_config); + port_cfg = ppe_port_sch_config; + + /* Configure scheduler per PPE queue or flow. */ + for (i = 0; i < count; i++) { + if (port_cfg[i].port >= ppe_dev->num_ports) + break; + + ret = ppe_node_scheduler_config(ppe_dev, port_cfg[i]); + if (ret) + goto sch_config_fail; + } + + return 0; + +sch_config_fail: + dev_err(ppe_dev->dev, "PPE scheduler arbitration config error %d\n", ret); + return ret; +}; + +/* Configure PPE queue destination of each PPE port. */ +static int ppe_queue_dest_init(struct ppe_device *ppe_dev) +{ + int ret, port_id, index, q_base, q_offset, res_start, res_end, pri_max; + struct ppe_queue_ucast_dest queue_dst; + + for (port_id = 0; port_id < ppe_dev->num_ports; port_id++) { + memset(&queue_dst, 0, sizeof(queue_dst)); + + ret = ppe_port_resource_get(ppe_dev, port_id, PPE_RES_UCAST, + &res_start, &res_end); + if (ret) + return ret; + + q_base = res_start; + queue_dst.dest_port = port_id; + + /* Configure queue base ID and profile ID that is same as + * physical port ID. + */ + ret = ppe_queue_ucast_base_set(ppe_dev, queue_dst, + q_base, port_id); + if (ret) + return ret; + + /* Queue priority range supported by each PPE port */ + ret = ppe_port_resource_get(ppe_dev, port_id, PPE_RES_L0_NODE, + &res_start, &res_end); + if (ret) + return ret; + + pri_max = res_end - res_start; + + /* Redirect ARP reply packet with the max priority on CPU port, + * which keeps the ARP reply directed to CPU (CPU code is 101) + * with highest priority queue of EDMA. + */ + if (port_id == 0) { + memset(&queue_dst, 0, sizeof(queue_dst)); + + queue_dst.cpu_code_en = true; + queue_dst.cpu_code = 101; + ret = ppe_queue_ucast_base_set(ppe_dev, queue_dst, + q_base + pri_max, + 0); + if (ret) + return ret; + } + + /* Initialize the queue offset of internal priority. */ + for (index = 0; index < PPE_QUEUE_INTER_PRI_NUM; index++) { + q_offset = index > pri_max ? pri_max : index; + + ret = ppe_queue_ucast_offset_pri_set(ppe_dev, port_id, + index, q_offset); + if (ret) + return ret; + } + + /* Initialize the queue offset of RSS hash as 0 to avoid the + * random hardware value that will lead to the unexpected + * destination queue generated. + */ + for (index = 0; index < PPE_QUEUE_HASH_NUM; index++) { + ret = ppe_queue_ucast_offset_hash_set(ppe_dev, port_id, + index, 0); + if (ret) + return ret; + } + } + + return 0; +} + +/* Initialize the service code 1 used by CPU port. */ +static int ppe_servcode_init(struct ppe_device *ppe_dev) +{ + struct ppe_sc_cfg sc_cfg = {}; + + bitmap_zero(sc_cfg.bitmaps.counter, PPE_SC_BYPASS_COUNTER_SIZE); + bitmap_zero(sc_cfg.bitmaps.tunnel, PPE_SC_BYPASS_TUNNEL_SIZE); + + bitmap_fill(sc_cfg.bitmaps.ingress, PPE_SC_BYPASS_INGRESS_SIZE); + clear_bit(PPE_SC_BYPASS_INGRESS_FAKE_MAC_HEADER, sc_cfg.bitmaps.ingress); + clear_bit(PPE_SC_BYPASS_INGRESS_SERVICE_CODE, sc_cfg.bitmaps.ingress); + clear_bit(PPE_SC_BYPASS_INGRESS_FAKE_L2_PROTO, sc_cfg.bitmaps.ingress); + + bitmap_fill(sc_cfg.bitmaps.egress, PPE_SC_BYPASS_EGRESS_SIZE); + clear_bit(PPE_SC_BYPASS_EGRESS_ACL_POST_ROUTING_CHECK, sc_cfg.bitmaps.egress); + + return ppe_sc_config_set(ppe_dev, PPE_EDMA_SC_BYPASS_ID, sc_cfg); +} + +/* Initialize PPE port configurations. */ +static int ppe_port_config_init(struct ppe_device *ppe_dev) +{ + u32 reg, val, mru_mtu_val[3]; + int i, ret; + + /* MTU and MRU settings are not required for CPU port 0. */ + for (i = 1; i < ppe_dev->num_ports; i++) { + /* Enable Ethernet port counter */ + ret = ppe_counter_enable_set(ppe_dev, i); + if (ret) + return ret; + + reg = PPE_MRU_MTU_CTRL_TBL_ADDR + PPE_MRU_MTU_CTRL_TBL_INC * i; + ret = regmap_bulk_read(ppe_dev->regmap, reg, + mru_mtu_val, ARRAY_SIZE(mru_mtu_val)); + if (ret) + return ret; + + /* Drop the packet when the packet size is more than the MTU + * and redirect the packet to the CPU port when the received + * packet size is more than the MRU of the physical interface. + */ + PPE_MRU_MTU_CTRL_SET_MRU_CMD(mru_mtu_val, PPE_ACTION_REDIRECT_TO_CPU); + PPE_MRU_MTU_CTRL_SET_MTU_CMD(mru_mtu_val, PPE_ACTION_DROP); + ret = regmap_bulk_write(ppe_dev->regmap, reg, + mru_mtu_val, ARRAY_SIZE(mru_mtu_val)); + if (ret) + return ret; + + reg = PPE_MC_MTU_CTRL_TBL_ADDR + PPE_MC_MTU_CTRL_TBL_INC * i; + val = FIELD_PREP(PPE_MC_MTU_CTRL_TBL_MTU_CMD, PPE_ACTION_DROP); + ret = regmap_update_bits(ppe_dev->regmap, reg, + PPE_MC_MTU_CTRL_TBL_MTU_CMD, + val); + if (ret) + return ret; + } + + /* Enable CPU port counters. */ + return ppe_counter_enable_set(ppe_dev, 0); +} + +/* Initialize the PPE RSS configuration for IPv4 and IPv6 packet receive. + * RSS settings are to calculate the random RSS hash value generated during + * packet receive. This hash is then used to generate the queue offset used + * to determine the queue used to transmit the packet. + */ +static int ppe_rss_hash_init(struct ppe_device *ppe_dev) +{ + u16 fins[PPE_RSS_HASH_TUPLES] = { 0x205, 0x264, 0x227, 0x245, 0x201 }; + u8 ips[PPE_RSS_HASH_IP_LENGTH] = { 0x13, 0xb, 0x13, 0xb }; + struct ppe_rss_hash_cfg hash_cfg; + int i, ret; + + hash_cfg.hash_seed = get_random_u32(); + hash_cfg.hash_mask = 0xfff; + + /* Use 5 tuple as RSS hash key for the first fragment of TCP, UDP + * and UDP-Lite packets. + */ + hash_cfg.hash_fragment_mode = false; + + /* The final common seed configs used to calculate the RSS has value, + * which is available for both IPv4 and IPv6 packet. + */ + for (i = 0; i < ARRAY_SIZE(fins); i++) { + hash_cfg.hash_fin_inner[i] = fins[i] & 0x1f; + hash_cfg.hash_fin_outer[i] = fins[i] >> 5; + } + + /* RSS seeds for IP protocol, L4 destination & source port and + * destination & source IP used to calculate the RSS hash value. + */ + hash_cfg.hash_protocol_mix = 0x13; + hash_cfg.hash_dport_mix = 0xb; + hash_cfg.hash_sport_mix = 0x13; + hash_cfg.hash_dip_mix[0] = 0xb; + hash_cfg.hash_sip_mix[0] = 0x13; + + /* Configure RSS seed configs for IPv4 packet. */ + ret = ppe_rss_hash_config_set(ppe_dev, PPE_RSS_HASH_MODE_IPV4, hash_cfg); + if (ret) + return ret; + + for (i = 0; i < ARRAY_SIZE(ips); i++) { + hash_cfg.hash_sip_mix[i] = ips[i]; + hash_cfg.hash_dip_mix[i] = ips[i]; + } + + /* Configure RSS seed configs for IPv6 packet. */ + return ppe_rss_hash_config_set(ppe_dev, PPE_RSS_HASH_MODE_IPV6, hash_cfg); +} + +/* Initialize mapping between PPE queues assigned to CPU port 0 + * to Ethernet DMA ring 0. + */ +static int ppe_queues_to_ring_init(struct ppe_device *ppe_dev) +{ + u32 queue_bmap[PPE_RING_TO_QUEUE_BITMAP_WORD_CNT] = {}; + int ret, queue_id, queue_max; + + ret = ppe_port_resource_get(ppe_dev, 0, PPE_RES_UCAST, + &queue_id, &queue_max); + if (ret) + return ret; + + for (; queue_id <= queue_max; queue_id++) + queue_bmap[queue_id / 32] |= BIT_MASK(queue_id % 32); + + return ppe_ring_queue_map_set(ppe_dev, 0, queue_bmap); +} + +/* Initialize PPE bridge settings to only enable L2 frame receive and + * transmit between CPU port and PPE Ethernet ports. + */ +static int ppe_bridge_init(struct ppe_device *ppe_dev) +{ + u32 reg, mask, port_cfg[4], vsi_cfg[2]; + int ret, i; + + /* Configure the following settings for CPU port0: + * a.) Enable Bridge TX + * b.) Disable FDB new address learning + * c.) Disable station move address learning + */ + mask = PPE_PORT_BRIDGE_TXMAC_EN; + mask |= PPE_PORT_BRIDGE_NEW_LRN_EN; + mask |= PPE_PORT_BRIDGE_STA_MOVE_LRN_EN; + ret = regmap_update_bits(ppe_dev->regmap, + PPE_PORT_BRIDGE_CTRL_ADDR, + mask, + PPE_PORT_BRIDGE_TXMAC_EN); + if (ret) + return ret; + + for (i = 1; i < ppe_dev->num_ports; i++) { + /* Enable invalid VSI forwarding for all the physical ports + * to CPU port0, in case no VSI is assigned to the physical + * port. + */ + reg = PPE_L2_VP_PORT_TBL_ADDR + PPE_L2_VP_PORT_TBL_INC * i; + ret = regmap_bulk_read(ppe_dev->regmap, reg, + port_cfg, ARRAY_SIZE(port_cfg)); + + if (ret) + return ret; + + PPE_L2_PORT_SET_INVALID_VSI_FWD_EN(port_cfg, true); + PPE_L2_PORT_SET_DST_INFO(port_cfg, 0); + + ret = regmap_bulk_write(ppe_dev->regmap, reg, + port_cfg, ARRAY_SIZE(port_cfg)); + if (ret) + return ret; + } + + for (i = 0; i < PPE_VSI_TBL_ENTRIES; i++) { + /* Set the VSI forward membership to include only CPU port0. + * FDB learning and forwarding take place only after switchdev + * is supported later to create the VSI and join the physical + * ports to the VSI port member. + */ + reg = PPE_VSI_TBL_ADDR + PPE_VSI_TBL_INC * i; + ret = regmap_bulk_read(ppe_dev->regmap, reg, + vsi_cfg, ARRAY_SIZE(vsi_cfg)); + if (ret) + return ret; + + PPE_VSI_SET_MEMBER_PORT_BITMAP(vsi_cfg, BIT(0)); + PPE_VSI_SET_UUC_BITMAP(vsi_cfg, BIT(0)); + PPE_VSI_SET_UMC_BITMAP(vsi_cfg, BIT(0)); + PPE_VSI_SET_BC_BITMAP(vsi_cfg, BIT(0)); + PPE_VSI_SET_NEW_ADDR_LRN_EN(vsi_cfg, true); + PPE_VSI_SET_NEW_ADDR_FWD_CMD(vsi_cfg, PPE_ACTION_FORWARD); + PPE_VSI_SET_STATION_MOVE_LRN_EN(vsi_cfg, true); + PPE_VSI_SET_STATION_MOVE_FWD_CMD(vsi_cfg, PPE_ACTION_FORWARD); + + ret = regmap_bulk_write(ppe_dev->regmap, reg, + vsi_cfg, ARRAY_SIZE(vsi_cfg)); + if (ret) + return ret; + } + + return 0; +} + +int ppe_hw_config(struct ppe_device *ppe_dev) +{ + int ret; + + ret = ppe_config_bm(ppe_dev); + if (ret) + return ret; + + ret = ppe_config_qm(ppe_dev); + if (ret) + return ret; + + ret = ppe_config_scheduler(ppe_dev); + if (ret) + return ret; + + ret = ppe_queue_dest_init(ppe_dev); + if (ret) + return ret; + + ret = ppe_servcode_init(ppe_dev); + if (ret) + return ret; + + ret = ppe_port_config_init(ppe_dev); + if (ret) + return ret; + + ret = ppe_rss_hash_init(ppe_dev); + if (ret) + return ret; + + ret = ppe_queues_to_ring_init(ppe_dev); + if (ret) + return ret; + + return ppe_bridge_init(ppe_dev); +} diff --git a/drivers/net/ethernet/qualcomm/ppe/ppe_config.h b/drivers/net/ethernet/qualcomm/ppe/ppe_config.h new file mode 100644 index 000000000000..4bb45ca40144 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/ppe/ppe_config.h @@ -0,0 +1,317 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. + */ + +#ifndef __PPE_CONFIG_H__ +#define __PPE_CONFIG_H__ + +#include <linux/types.h> + +#include "ppe.h" + +/* There are different table index ranges for configuring queue base ID of + * the destination port, CPU code and service code. + */ +#define PPE_QUEUE_BASE_DEST_PORT 0 +#define PPE_QUEUE_BASE_CPU_CODE 1024 +#define PPE_QUEUE_BASE_SERVICE_CODE 2048 + +#define PPE_QUEUE_INTER_PRI_NUM 16 +#define PPE_QUEUE_HASH_NUM 256 + +/* The service code is used by EDMA port to transmit packet to PPE. */ +#define PPE_EDMA_SC_BYPASS_ID 1 + +/* The PPE RSS hash configured for IPv4 and IPv6 packet separately. */ +#define PPE_RSS_HASH_MODE_IPV4 BIT(0) +#define PPE_RSS_HASH_MODE_IPV6 BIT(1) +#define PPE_RSS_HASH_IP_LENGTH 4 +#define PPE_RSS_HASH_TUPLES 5 + +/* PPE supports 300 queues, each bit presents as one queue. */ +#define PPE_RING_TO_QUEUE_BITMAP_WORD_CNT 10 + +/** + * enum ppe_scheduler_frame_mode - PPE scheduler frame mode. + * @PPE_SCH_WITH_IPG_PREAMBLE_FRAME_CRC: The scheduled frame includes IPG, + * preamble, Ethernet packet and CRC. + * @PPE_SCH_WITH_FRAME_CRC: The scheduled frame includes Ethernet frame and CRC + * excluding IPG and preamble. + * @PPE_SCH_WITH_L3_PAYLOAD: The scheduled frame includes layer 3 packet data. + */ +enum ppe_scheduler_frame_mode { + PPE_SCH_WITH_IPG_PREAMBLE_FRAME_CRC = 0, + PPE_SCH_WITH_FRAME_CRC = 1, + PPE_SCH_WITH_L3_PAYLOAD = 2, +}; + +/** + * struct ppe_scheduler_cfg - PPE scheduler configuration. + * @flow_id: PPE flow ID. + * @pri: Scheduler priority. + * @drr_node_id: Node ID for scheduled traffic. + * @drr_node_wt: Weight for scheduled traffic. + * @unit_is_packet: Packet based or byte based unit for scheduled traffic. + * @frame_mode: Packet mode to be scheduled. + * + * PPE scheduler supports commit rate and exceed rate configurations. + */ +struct ppe_scheduler_cfg { + int flow_id; + int pri; + int drr_node_id; + int drr_node_wt; + bool unit_is_packet; + enum ppe_scheduler_frame_mode frame_mode; +}; + +/** + * enum ppe_resource_type - PPE resource type. + * @PPE_RES_UCAST: Unicast queue resource. + * @PPE_RES_MCAST: Multicast queue resource. + * @PPE_RES_L0_NODE: Level 0 for queue based node resource. + * @PPE_RES_L1_NODE: Level 1 for flow based node resource. + * @PPE_RES_FLOW_ID: Flow based node resource. + */ +enum ppe_resource_type { + PPE_RES_UCAST, + PPE_RES_MCAST, + PPE_RES_L0_NODE, + PPE_RES_L1_NODE, + PPE_RES_FLOW_ID, +}; + +/** + * struct ppe_queue_ucast_dest - PPE unicast queue destination. + * @src_profile: Source profile. + * @service_code_en: Enable service code to map the queue base ID. + * @service_code: Service code. + * @cpu_code_en: Enable CPU code to map the queue base ID. + * @cpu_code: CPU code. + * @dest_port: destination port. + * + * PPE egress queue ID is decided by the service code if enabled, otherwise + * by the CPU code if enabled, or by destination port if both service code + * and CPU code are disabled. + */ +struct ppe_queue_ucast_dest { + int src_profile; + bool service_code_en; + int service_code; + bool cpu_code_en; + int cpu_code; + int dest_port; +}; + +/* Hardware bitmaps for bypassing features of the ingress packet. */ +enum ppe_sc_ingress_type { + PPE_SC_BYPASS_INGRESS_VLAN_TAG_FMT_CHECK = 0, + PPE_SC_BYPASS_INGRESS_VLAN_MEMBER_CHECK = 1, + PPE_SC_BYPASS_INGRESS_VLAN_TRANSLATE = 2, + PPE_SC_BYPASS_INGRESS_MY_MAC_CHECK = 3, + PPE_SC_BYPASS_INGRESS_DIP_LOOKUP = 4, + PPE_SC_BYPASS_INGRESS_FLOW_LOOKUP = 5, + PPE_SC_BYPASS_INGRESS_FLOW_ACTION = 6, + PPE_SC_BYPASS_INGRESS_ACL = 7, + PPE_SC_BYPASS_INGRESS_FAKE_MAC_HEADER = 8, + PPE_SC_BYPASS_INGRESS_SERVICE_CODE = 9, + PPE_SC_BYPASS_INGRESS_WRONG_PKT_FMT_L2 = 10, + PPE_SC_BYPASS_INGRESS_WRONG_PKT_FMT_L3_IPV4 = 11, + PPE_SC_BYPASS_INGRESS_WRONG_PKT_FMT_L3_IPV6 = 12, + PPE_SC_BYPASS_INGRESS_WRONG_PKT_FMT_L4 = 13, + PPE_SC_BYPASS_INGRESS_FLOW_SERVICE_CODE = 14, + PPE_SC_BYPASS_INGRESS_ACL_SERVICE_CODE = 15, + PPE_SC_BYPASS_INGRESS_FAKE_L2_PROTO = 16, + PPE_SC_BYPASS_INGRESS_PPPOE_TERMINATION = 17, + PPE_SC_BYPASS_INGRESS_DEFAULT_VLAN = 18, + PPE_SC_BYPASS_INGRESS_DEFAULT_PCP = 19, + PPE_SC_BYPASS_INGRESS_VSI_ASSIGN = 20, + /* Values 21-23 are not specified by hardware. */ + PPE_SC_BYPASS_INGRESS_VLAN_ASSIGN_FAIL = 24, + PPE_SC_BYPASS_INGRESS_SOURCE_GUARD = 25, + PPE_SC_BYPASS_INGRESS_MRU_MTU_CHECK = 26, + PPE_SC_BYPASS_INGRESS_FLOW_SRC_CHECK = 27, + PPE_SC_BYPASS_INGRESS_FLOW_QOS = 28, + /* This must be last as it determines the size of the BITMAP. */ + PPE_SC_BYPASS_INGRESS_SIZE, +}; + +/* Hardware bitmaps for bypassing features of the egress packet. */ +enum ppe_sc_egress_type { + PPE_SC_BYPASS_EGRESS_VLAN_MEMBER_CHECK = 0, + PPE_SC_BYPASS_EGRESS_VLAN_TRANSLATE = 1, + PPE_SC_BYPASS_EGRESS_VLAN_TAG_FMT_CTRL = 2, + PPE_SC_BYPASS_EGRESS_FDB_LEARN = 3, + PPE_SC_BYPASS_EGRESS_FDB_REFRESH = 4, + PPE_SC_BYPASS_EGRESS_L2_SOURCE_SECURITY = 5, + PPE_SC_BYPASS_EGRESS_MANAGEMENT_FWD = 6, + PPE_SC_BYPASS_EGRESS_BRIDGING_FWD = 7, + PPE_SC_BYPASS_EGRESS_IN_STP_FLTR = 8, + PPE_SC_BYPASS_EGRESS_EG_STP_FLTR = 9, + PPE_SC_BYPASS_EGRESS_SOURCE_FLTR = 10, + PPE_SC_BYPASS_EGRESS_POLICER = 11, + PPE_SC_BYPASS_EGRESS_L2_PKT_EDIT = 12, + PPE_SC_BYPASS_EGRESS_L3_PKT_EDIT = 13, + PPE_SC_BYPASS_EGRESS_ACL_POST_ROUTING_CHECK = 14, + PPE_SC_BYPASS_EGRESS_PORT_ISOLATION = 15, + PPE_SC_BYPASS_EGRESS_PRE_ACL_QOS = 16, + PPE_SC_BYPASS_EGRESS_POST_ACL_QOS = 17, + PPE_SC_BYPASS_EGRESS_DSCP_QOS = 18, + PPE_SC_BYPASS_EGRESS_PCP_QOS = 19, + PPE_SC_BYPASS_EGRESS_PREHEADER_QOS = 20, + PPE_SC_BYPASS_EGRESS_FAKE_MAC_DROP = 21, + PPE_SC_BYPASS_EGRESS_TUNL_CONTEXT = 22, + PPE_SC_BYPASS_EGRESS_FLOW_POLICER = 23, + /* This must be last as it determines the size of the BITMAP. */ + PPE_SC_BYPASS_EGRESS_SIZE, +}; + +/* Hardware bitmaps for bypassing counter of packet. */ +enum ppe_sc_counter_type { + PPE_SC_BYPASS_COUNTER_RX_VLAN = 0, + PPE_SC_BYPASS_COUNTER_RX = 1, + PPE_SC_BYPASS_COUNTER_TX_VLAN = 2, + PPE_SC_BYPASS_COUNTER_TX = 3, + /* This must be last as it determines the size of the BITMAP. */ + PPE_SC_BYPASS_COUNTER_SIZE, +}; + +/* Hardware bitmaps for bypassing features of tunnel packet. */ +enum ppe_sc_tunnel_type { + PPE_SC_BYPASS_TUNNEL_SERVICE_CODE = 0, + PPE_SC_BYPASS_TUNNEL_TUNNEL_HANDLE = 1, + PPE_SC_BYPASS_TUNNEL_L3_IF_CHECK = 2, + PPE_SC_BYPASS_TUNNEL_VLAN_CHECK = 3, + PPE_SC_BYPASS_TUNNEL_DMAC_CHECK = 4, + PPE_SC_BYPASS_TUNNEL_UDP_CSUM_0_CHECK = 5, + PPE_SC_BYPASS_TUNNEL_TBL_DE_ACCE_CHECK = 6, + PPE_SC_BYPASS_TUNNEL_PPPOE_MC_TERM_CHECK = 7, + PPE_SC_BYPASS_TUNNEL_TTL_EXCEED_CHECK = 8, + PPE_SC_BYPASS_TUNNEL_MAP_SRC_CHECK = 9, + PPE_SC_BYPASS_TUNNEL_MAP_DST_CHECK = 10, + PPE_SC_BYPASS_TUNNEL_LPM_DST_LOOKUP = 11, + PPE_SC_BYPASS_TUNNEL_LPM_LOOKUP = 12, + PPE_SC_BYPASS_TUNNEL_WRONG_PKT_FMT_L2 = 13, + PPE_SC_BYPASS_TUNNEL_WRONG_PKT_FMT_L3_IPV4 = 14, + PPE_SC_BYPASS_TUNNEL_WRONG_PKT_FMT_L3_IPV6 = 15, + PPE_SC_BYPASS_TUNNEL_WRONG_PKT_FMT_L4 = 16, + PPE_SC_BYPASS_TUNNEL_WRONG_PKT_FMT_TUNNEL = 17, + /* Values 18-19 are not specified by hardware. */ + PPE_SC_BYPASS_TUNNEL_PRE_IPO = 20, + /* This must be last as it determines the size of the BITMAP. */ + PPE_SC_BYPASS_TUNNEL_SIZE, +}; + +/** + * struct ppe_sc_bypass - PPE service bypass bitmaps + * @ingress: Bitmap of features that can be bypassed on the ingress packet. + * @egress: Bitmap of features that can be bypassed on the egress packet. + * @counter: Bitmap of features that can be bypassed on the counter type. + * @tunnel: Bitmap of features that can be bypassed on the tunnel packet. + */ +struct ppe_sc_bypass { + DECLARE_BITMAP(ingress, PPE_SC_BYPASS_INGRESS_SIZE); + DECLARE_BITMAP(egress, PPE_SC_BYPASS_EGRESS_SIZE); + DECLARE_BITMAP(counter, PPE_SC_BYPASS_COUNTER_SIZE); + DECLARE_BITMAP(tunnel, PPE_SC_BYPASS_TUNNEL_SIZE); +}; + +/** + * struct ppe_sc_cfg - PPE service code configuration. + * @dest_port_valid: Generate destination port or not. + * @dest_port: Destination port ID. + * @bitmaps: Bitmap of bypass features. + * @is_src: Destination port acts as source port, packet sent to CPU. + * @next_service_code: New service code generated. + * @eip_field_update_bitmap: Fields updated as actions taken for EIP. + * @eip_hw_service: Selected hardware functions for EIP. + * @eip_offset_sel: Packet offset selection, using packet's layer 4 offset + * or using packet's layer 3 offset for EIP. + * + * Service code is generated during the packet passing through PPE. + */ +struct ppe_sc_cfg { + bool dest_port_valid; + int dest_port; + struct ppe_sc_bypass bitmaps; + bool is_src; + int next_service_code; + int eip_field_update_bitmap; + int eip_hw_service; + int eip_offset_sel; +}; + +/** + * enum ppe_action_type - PPE action of the received packet. + * @PPE_ACTION_FORWARD: Packet forwarded per L2/L3 process. + * @PPE_ACTION_DROP: Packet dropped by PPE. + * @PPE_ACTION_COPY_TO_CPU: Packet copied to CPU port per multicast queue. + * @PPE_ACTION_REDIRECT_TO_CPU: Packet redirected to CPU port per unicast queue. + */ +enum ppe_action_type { + PPE_ACTION_FORWARD = 0, + PPE_ACTION_DROP = 1, + PPE_ACTION_COPY_TO_CPU = 2, + PPE_ACTION_REDIRECT_TO_CPU = 3, +}; + +/** + * struct ppe_rss_hash_cfg - PPE RSS hash configuration. + * @hash_mask: Mask of the generated hash value. + * @hash_fragment_mode: Hash generation mode for the first fragment of TCP, + * UDP and UDP-Lite packets, to use either 3 tuple or 5 tuple for RSS hash + * key computation. + * @hash_seed: Seed to generate RSS hash. + * @hash_sip_mix: Source IP selection. + * @hash_dip_mix: Destination IP selection. + * @hash_protocol_mix: Protocol selection. + * @hash_sport_mix: Source L4 port selection. + * @hash_dport_mix: Destination L4 port selection. + * @hash_fin_inner: RSS hash value first selection. + * @hash_fin_outer: RSS hash value second selection. + * + * PPE RSS hash value is generated for the packet based on the RSS hash + * configured. + */ +struct ppe_rss_hash_cfg { + u32 hash_mask; + bool hash_fragment_mode; + u32 hash_seed; + u8 hash_sip_mix[PPE_RSS_HASH_IP_LENGTH]; + u8 hash_dip_mix[PPE_RSS_HASH_IP_LENGTH]; + u8 hash_protocol_mix; + u8 hash_sport_mix; + u8 hash_dport_mix; + u8 hash_fin_inner[PPE_RSS_HASH_TUPLES]; + u8 hash_fin_outer[PPE_RSS_HASH_TUPLES]; +}; + +int ppe_hw_config(struct ppe_device *ppe_dev); +int ppe_queue_scheduler_set(struct ppe_device *ppe_dev, + int node_id, bool flow_level, int port, + struct ppe_scheduler_cfg scheduler_cfg); +int ppe_queue_ucast_base_set(struct ppe_device *ppe_dev, + struct ppe_queue_ucast_dest queue_dst, + int queue_base, + int profile_id); +int ppe_queue_ucast_offset_pri_set(struct ppe_device *ppe_dev, + int profile_id, + int priority, + int queue_offset); +int ppe_queue_ucast_offset_hash_set(struct ppe_device *ppe_dev, + int profile_id, + int rss_hash, + int queue_offset); +int ppe_port_resource_get(struct ppe_device *ppe_dev, int port, + enum ppe_resource_type type, + int *res_start, int *res_end); +int ppe_sc_config_set(struct ppe_device *ppe_dev, int sc, + struct ppe_sc_cfg cfg); +int ppe_counter_enable_set(struct ppe_device *ppe_dev, int port); +int ppe_rss_hash_config_set(struct ppe_device *ppe_dev, int mode, + struct ppe_rss_hash_cfg hash_cfg); +int ppe_ring_queue_map_set(struct ppe_device *ppe_dev, + int ring_id, + u32 *queue_map); +#endif diff --git a/drivers/net/ethernet/qualcomm/ppe/ppe_debugfs.c b/drivers/net/ethernet/qualcomm/ppe/ppe_debugfs.c new file mode 100644 index 000000000000..fd959a76ff43 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/ppe/ppe_debugfs.c @@ -0,0 +1,847 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. + */ + +/* PPE debugfs routines for display of PPE counters useful for debug. */ + +#include <linux/bitfield.h> +#include <linux/debugfs.h> +#include <linux/dev_printk.h> +#include <linux/device.h> +#include <linux/regmap.h> +#include <linux/seq_file.h> + +#include "ppe.h" +#include "ppe_config.h" +#include "ppe_debugfs.h" +#include "ppe_regs.h" + +#define PPE_PKT_CNT_TBL_SIZE 3 +#define PPE_DROP_PKT_CNT_TBL_SIZE 5 + +#define PPE_W0_PKT_CNT GENMASK(31, 0) +#define PPE_W2_DROP_PKT_CNT_LOW GENMASK(31, 8) +#define PPE_W3_DROP_PKT_CNT_HIGH GENMASK(7, 0) + +#define PPE_GET_PKT_CNT(tbl_cnt) \ + FIELD_GET(PPE_W0_PKT_CNT, *(tbl_cnt)) +#define PPE_GET_DROP_PKT_CNT_LOW(tbl_cnt) \ + FIELD_GET(PPE_W2_DROP_PKT_CNT_LOW, *((tbl_cnt) + 0x2)) +#define PPE_GET_DROP_PKT_CNT_HIGH(tbl_cnt) \ + FIELD_GET(PPE_W3_DROP_PKT_CNT_HIGH, *((tbl_cnt) + 0x3)) + +/** + * enum ppe_cnt_size_type - PPE counter size type + * @PPE_PKT_CNT_SIZE_1WORD: Counter size with single register + * @PPE_PKT_CNT_SIZE_3WORD: Counter size with table of 3 words + * @PPE_PKT_CNT_SIZE_5WORD: Counter size with table of 5 words + * + * PPE takes the different register size to record the packet counters. + * It uses single register, or register table with 3 words or 5 words. + * The counter with table size 5 words also records the drop counter. + * There are also some other counter types occupying sizes less than 32 + * bits, which is not covered by this enumeration type. + */ +enum ppe_cnt_size_type { + PPE_PKT_CNT_SIZE_1WORD, + PPE_PKT_CNT_SIZE_3WORD, + PPE_PKT_CNT_SIZE_5WORD, +}; + +/** + * enum ppe_cnt_type - PPE counter type. + * @PPE_CNT_BM: Packet counter processed by BM. + * @PPE_CNT_PARSE: Packet counter parsed on ingress. + * @PPE_CNT_PORT_RX: Packet counter on the ingress port. + * @PPE_CNT_VLAN_RX: VLAN packet counter received. + * @PPE_CNT_L2_FWD: Packet counter processed by L2 forwarding. + * @PPE_CNT_CPU_CODE: Packet counter marked with various CPU codes. + * @PPE_CNT_VLAN_TX: VLAN packet counter transmitted. + * @PPE_CNT_PORT_TX: Packet counter on the egress port. + * @PPE_CNT_QM: Packet counter processed by QM. + */ +enum ppe_cnt_type { + PPE_CNT_BM, + PPE_CNT_PARSE, + PPE_CNT_PORT_RX, + PPE_CNT_VLAN_RX, + PPE_CNT_L2_FWD, + PPE_CNT_CPU_CODE, + PPE_CNT_VLAN_TX, + PPE_CNT_PORT_TX, + PPE_CNT_QM, +}; + +/** + * struct ppe_debugfs_entry - PPE debugfs entry. + * @name: Debugfs file name. + * @counter_type: PPE packet counter type. + * @ppe: PPE device. + * + * The PPE debugfs entry is used to create the debugfs file and passed + * to debugfs_create_file() as private data. + */ +struct ppe_debugfs_entry { + const char *name; + enum ppe_cnt_type counter_type; + struct ppe_device *ppe; +}; + +static const struct ppe_debugfs_entry debugfs_files[] = { + { + .name = "bm", + .counter_type = PPE_CNT_BM, + }, + { + .name = "parse", + .counter_type = PPE_CNT_PARSE, + }, + { + .name = "port_rx", + .counter_type = PPE_CNT_PORT_RX, + }, + { + .name = "vlan_rx", + .counter_type = PPE_CNT_VLAN_RX, + }, + { + .name = "l2_forward", + .counter_type = PPE_CNT_L2_FWD, + }, + { + .name = "cpu_code", + .counter_type = PPE_CNT_CPU_CODE, + }, + { + .name = "vlan_tx", + .counter_type = PPE_CNT_VLAN_TX, + }, + { + .name = "port_tx", + .counter_type = PPE_CNT_PORT_TX, + }, + { + .name = "qm", + .counter_type = PPE_CNT_QM, + }, +}; + +static int ppe_pkt_cnt_get(struct ppe_device *ppe_dev, u32 reg, + enum ppe_cnt_size_type cnt_type, + u32 *cnt, u32 *drop_cnt) +{ + u32 drop_pkt_cnt[PPE_DROP_PKT_CNT_TBL_SIZE]; + u32 pkt_cnt[PPE_PKT_CNT_TBL_SIZE]; + u32 value; + int ret; + + switch (cnt_type) { + case PPE_PKT_CNT_SIZE_1WORD: + ret = regmap_read(ppe_dev->regmap, reg, &value); + if (ret) + return ret; + + *cnt = value; + break; + case PPE_PKT_CNT_SIZE_3WORD: + ret = regmap_bulk_read(ppe_dev->regmap, reg, + pkt_cnt, ARRAY_SIZE(pkt_cnt)); + if (ret) + return ret; + + *cnt = PPE_GET_PKT_CNT(pkt_cnt); + break; + case PPE_PKT_CNT_SIZE_5WORD: + ret = regmap_bulk_read(ppe_dev->regmap, reg, + drop_pkt_cnt, ARRAY_SIZE(drop_pkt_cnt)); + if (ret) + return ret; + + *cnt = PPE_GET_PKT_CNT(drop_pkt_cnt); + + /* Drop counter with low 24 bits. */ + value = PPE_GET_DROP_PKT_CNT_LOW(drop_pkt_cnt); + *drop_cnt = FIELD_PREP(GENMASK(23, 0), value); + + /* Drop counter with high 8 bits. */ + value = PPE_GET_DROP_PKT_CNT_HIGH(drop_pkt_cnt); + *drop_cnt |= FIELD_PREP(GENMASK(31, 24), value); + break; + } + + return 0; +} + +static void ppe_tbl_pkt_cnt_clear(struct ppe_device *ppe_dev, u32 reg, + enum ppe_cnt_size_type cnt_type) +{ + u32 drop_pkt_cnt[PPE_DROP_PKT_CNT_TBL_SIZE] = {}; + u32 pkt_cnt[PPE_PKT_CNT_TBL_SIZE] = {}; + + switch (cnt_type) { + case PPE_PKT_CNT_SIZE_1WORD: + regmap_write(ppe_dev->regmap, reg, 0); + break; + case PPE_PKT_CNT_SIZE_3WORD: + regmap_bulk_write(ppe_dev->regmap, reg, + pkt_cnt, ARRAY_SIZE(pkt_cnt)); + break; + case PPE_PKT_CNT_SIZE_5WORD: + regmap_bulk_write(ppe_dev->regmap, reg, + drop_pkt_cnt, ARRAY_SIZE(drop_pkt_cnt)); + break; + } +} + +static int ppe_bm_counter_get(struct ppe_device *ppe_dev, struct seq_file *seq) +{ + u32 reg, val, pkt_cnt, pkt_cnt1; + int ret, i, tag; + + seq_printf(seq, "%-24s", "BM SILENT_DROP:"); + tag = 0; + for (i = 0; i < PPE_DROP_CNT_TBL_ENTRIES; i++) { + reg = PPE_DROP_CNT_TBL_ADDR + i * PPE_DROP_CNT_TBL_INC; + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_1WORD, + &pkt_cnt, NULL); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + if (pkt_cnt > 0) { + if (!((++tag) % 4)) + seq_printf(seq, "\n%-24s", ""); + + seq_printf(seq, "%10u(%s=%04d)", pkt_cnt, "port", i); + } + } + + seq_putc(seq, '\n'); + + /* The number of packets dropped because hardware buffers were + * available only partially for the packet. + */ + seq_printf(seq, "%-24s", "BM OVERFLOW_DROP:"); + tag = 0; + for (i = 0; i < PPE_DROP_STAT_TBL_ENTRIES; i++) { + reg = PPE_DROP_STAT_TBL_ADDR + PPE_DROP_STAT_TBL_INC * i; + + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD, + &pkt_cnt, NULL); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + if (pkt_cnt > 0) { + if (!((++tag) % 4)) + seq_printf(seq, "\n%-24s", ""); + + seq_printf(seq, "%10u(%s=%04d)", pkt_cnt, "port", i); + } + } + + seq_putc(seq, '\n'); + + /* The number of currently occupied buffers, that can't be flushed. */ + seq_printf(seq, "%-24s", "BM USED/REACT:"); + tag = 0; + for (i = 0; i < PPE_BM_USED_CNT_TBL_ENTRIES; i++) { + reg = PPE_BM_USED_CNT_TBL_ADDR + i * PPE_BM_USED_CNT_TBL_INC; + ret = regmap_read(ppe_dev->regmap, reg, &val); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + /* The number of PPE buffers used for caching the received + * packets before the pause frame sent. + */ + pkt_cnt = FIELD_GET(PPE_BM_USED_CNT_VAL, val); + + reg = PPE_BM_REACT_CNT_TBL_ADDR + i * PPE_BM_REACT_CNT_TBL_INC; + ret = regmap_read(ppe_dev->regmap, reg, &val); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + /* The number of PPE buffers used for caching the received + * packets after pause frame sent out. + */ + pkt_cnt1 = FIELD_GET(PPE_BM_REACT_CNT_VAL, val); + + if (pkt_cnt > 0 || pkt_cnt1 > 0) { + if (!((++tag) % 4)) + seq_printf(seq, "\n%-24s", ""); + + seq_printf(seq, "%10u/%u(%s=%04d)", pkt_cnt, pkt_cnt1, + "port", i); + } + } + + seq_putc(seq, '\n'); + + return 0; +} + +/* The number of packets processed by the ingress parser module of PPE. */ +static int ppe_parse_pkt_counter_get(struct ppe_device *ppe_dev, + struct seq_file *seq) +{ + u32 reg, cnt = 0, tunnel_cnt = 0; + int i, ret, tag = 0; + + seq_printf(seq, "%-24s", "PARSE TPRX/IPRX:"); + for (i = 0; i < PPE_IPR_PKT_CNT_TBL_ENTRIES; i++) { + reg = PPE_TPR_PKT_CNT_TBL_ADDR + i * PPE_TPR_PKT_CNT_TBL_INC; + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_1WORD, + &tunnel_cnt, NULL); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + reg = PPE_IPR_PKT_CNT_TBL_ADDR + i * PPE_IPR_PKT_CNT_TBL_INC; + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_1WORD, + &cnt, NULL); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + if (tunnel_cnt > 0 || cnt > 0) { + if (!((++tag) % 4)) + seq_printf(seq, "\n%-24s", ""); + + seq_printf(seq, "%10u/%u(%s=%04d)", tunnel_cnt, cnt, + "port", i); + } + } + + seq_putc(seq, '\n'); + + return 0; +} + +/* The number of packets received or dropped on the ingress port. */ +static int ppe_port_rx_counter_get(struct ppe_device *ppe_dev, + struct seq_file *seq) +{ + u32 reg, pkt_cnt = 0, drop_cnt = 0; + int ret, i, tag; + + seq_printf(seq, "%-24s", "PORT RX/RX_DROP:"); + tag = 0; + for (i = 0; i < PPE_PHY_PORT_RX_CNT_TBL_ENTRIES; i++) { + reg = PPE_PHY_PORT_RX_CNT_TBL_ADDR + PPE_PHY_PORT_RX_CNT_TBL_INC * i; + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_5WORD, + &pkt_cnt, &drop_cnt); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + if (pkt_cnt > 0) { + if (!((++tag) % 4)) + seq_printf(seq, "\n%-24s", ""); + + seq_printf(seq, "%10u/%u(%s=%04d)", pkt_cnt, drop_cnt, + "port", i); + } + } + + seq_putc(seq, '\n'); + + seq_printf(seq, "%-24s", "VPORT RX/RX_DROP:"); + tag = 0; + for (i = 0; i < PPE_PORT_RX_CNT_TBL_ENTRIES; i++) { + reg = PPE_PORT_RX_CNT_TBL_ADDR + PPE_PORT_RX_CNT_TBL_INC * i; + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_5WORD, + &pkt_cnt, &drop_cnt); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + if (pkt_cnt > 0) { + if (!((++tag) % 4)) + seq_printf(seq, "\n%-24s", ""); + + seq_printf(seq, "%10u/%u(%s=%04d)", pkt_cnt, drop_cnt, + "port", i); + } + } + + seq_putc(seq, '\n'); + + return 0; +} + +/* The number of packets received or dropped by layer 2 processing. */ +static int ppe_l2_counter_get(struct ppe_device *ppe_dev, + struct seq_file *seq) +{ + u32 reg, pkt_cnt = 0, drop_cnt = 0; + int ret, i, tag = 0; + + seq_printf(seq, "%-24s", "L2 RX/RX_DROP:"); + for (i = 0; i < PPE_PRE_L2_CNT_TBL_ENTRIES; i++) { + reg = PPE_PRE_L2_CNT_TBL_ADDR + PPE_PRE_L2_CNT_TBL_INC * i; + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_5WORD, + &pkt_cnt, &drop_cnt); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + if (pkt_cnt > 0) { + if (!((++tag) % 4)) + seq_printf(seq, "\n%-24s", ""); + + seq_printf(seq, "%10u/%u(%s=%04d)", pkt_cnt, drop_cnt, + "vsi", i); + } + } + + seq_putc(seq, '\n'); + + return 0; +} + +/* The number of VLAN packets received by PPE. */ +static int ppe_vlan_rx_counter_get(struct ppe_device *ppe_dev, + struct seq_file *seq) +{ + u32 reg, pkt_cnt = 0; + int ret, i, tag = 0; + + seq_printf(seq, "%-24s", "VLAN RX:"); + for (i = 0; i < PPE_VLAN_CNT_TBL_ENTRIES; i++) { + reg = PPE_VLAN_CNT_TBL_ADDR + PPE_VLAN_CNT_TBL_INC * i; + + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD, + &pkt_cnt, NULL); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + if (pkt_cnt > 0) { + if (!((++tag) % 4)) + seq_printf(seq, "\n%-24s", ""); + + seq_printf(seq, "%10u(%s=%04d)", pkt_cnt, "vsi", i); + } + } + + seq_putc(seq, '\n'); + + return 0; +} + +/* The number of packets handed to CPU by PPE. */ +static int ppe_cpu_code_counter_get(struct ppe_device *ppe_dev, + struct seq_file *seq) +{ + u32 reg, pkt_cnt = 0; + int ret, i; + + seq_printf(seq, "%-24s", "CPU CODE:"); + for (i = 0; i < PPE_DROP_CPU_CNT_TBL_ENTRIES; i++) { + reg = PPE_DROP_CPU_CNT_TBL_ADDR + PPE_DROP_CPU_CNT_TBL_INC * i; + + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD, + &pkt_cnt, NULL); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + if (!pkt_cnt) + continue; + + /* There are 256 CPU codes saved in the first 256 entries + * of register table, and 128 drop codes for each PPE port + * (0-7), the total entries is 256 + 8 * 128. + */ + if (i < 256) + seq_printf(seq, "%10u(cpucode:%d)", pkt_cnt, i); + else + seq_printf(seq, "%10u(port=%04d),dropcode:%d", pkt_cnt, + (i - 256) % 8, (i - 256) / 8); + seq_putc(seq, '\n'); + seq_printf(seq, "%-24s", ""); + } + + seq_putc(seq, '\n'); + + return 0; +} + +/* The number of packets forwarded by VLAN on the egress direction. */ +static int ppe_vlan_tx_counter_get(struct ppe_device *ppe_dev, + struct seq_file *seq) +{ + u32 reg, pkt_cnt = 0; + int ret, i, tag = 0; + + seq_printf(seq, "%-24s", "VLAN TX:"); + for (i = 0; i < PPE_EG_VSI_COUNTER_TBL_ENTRIES; i++) { + reg = PPE_EG_VSI_COUNTER_TBL_ADDR + PPE_EG_VSI_COUNTER_TBL_INC * i; + + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD, + &pkt_cnt, NULL); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + if (pkt_cnt > 0) { + if (!((++tag) % 4)) + seq_printf(seq, "\n%-24s", ""); + + seq_printf(seq, "%10u(%s=%04d)", pkt_cnt, "vsi", i); + } + } + + seq_putc(seq, '\n'); + + return 0; +} + +/* The number of packets transmitted or dropped on the egress port. */ +static int ppe_port_tx_counter_get(struct ppe_device *ppe_dev, + struct seq_file *seq) +{ + u32 reg, pkt_cnt = 0, drop_cnt = 0; + int ret, i, tag; + + seq_printf(seq, "%-24s", "VPORT TX/TX_DROP:"); + tag = 0; + for (i = 0; i < PPE_VPORT_TX_COUNTER_TBL_ENTRIES; i++) { + reg = PPE_VPORT_TX_COUNTER_TBL_ADDR + PPE_VPORT_TX_COUNTER_TBL_INC * i; + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD, + &pkt_cnt, NULL); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + reg = PPE_VPORT_TX_DROP_CNT_TBL_ADDR + PPE_VPORT_TX_DROP_CNT_TBL_INC * i; + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD, + &drop_cnt, NULL); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + if (pkt_cnt > 0 || drop_cnt > 0) { + if (!((++tag) % 4)) + seq_printf(seq, "\n%-24s", ""); + + seq_printf(seq, "%10u/%u(%s=%04d)", pkt_cnt, drop_cnt, + "port", i); + } + } + + seq_putc(seq, '\n'); + + seq_printf(seq, "%-24s", "PORT TX/TX_DROP:"); + tag = 0; + for (i = 0; i < PPE_PORT_TX_COUNTER_TBL_ENTRIES; i++) { + reg = PPE_PORT_TX_COUNTER_TBL_ADDR + PPE_PORT_TX_COUNTER_TBL_INC * i; + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD, + &pkt_cnt, NULL); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + reg = PPE_PORT_TX_DROP_CNT_TBL_ADDR + PPE_PORT_TX_DROP_CNT_TBL_INC * i; + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD, + &drop_cnt, NULL); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + if (pkt_cnt > 0 || drop_cnt > 0) { + if (!((++tag) % 4)) + seq_printf(seq, "\n%-24s", ""); + + seq_printf(seq, "%10u/%u(%s=%04d)", pkt_cnt, drop_cnt, + "port", i); + } + } + + seq_putc(seq, '\n'); + + return 0; +} + +/* The number of packets transmitted or pending by the PPE queue. */ +static int ppe_queue_counter_get(struct ppe_device *ppe_dev, + struct seq_file *seq) +{ + u32 reg, val, pkt_cnt = 0, pend_cnt = 0, drop_cnt = 0; + int ret, i, tag = 0; + + seq_printf(seq, "%-24s", "QUEUE TX/PEND/DROP:"); + for (i = 0; i < PPE_QUEUE_TX_COUNTER_TBL_ENTRIES; i++) { + reg = PPE_QUEUE_TX_COUNTER_TBL_ADDR + PPE_QUEUE_TX_COUNTER_TBL_INC * i; + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD, + &pkt_cnt, NULL); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + if (i < PPE_AC_UNICAST_QUEUE_CFG_TBL_ENTRIES) { + reg = PPE_AC_UNICAST_QUEUE_CNT_TBL_ADDR + + PPE_AC_UNICAST_QUEUE_CNT_TBL_INC * i; + ret = regmap_read(ppe_dev->regmap, reg, &val); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + pend_cnt = FIELD_GET(PPE_AC_UNICAST_QUEUE_CNT_TBL_PEND_CNT, val); + + reg = PPE_UNICAST_DROP_CNT_TBL_ADDR + + PPE_AC_UNICAST_QUEUE_CNT_TBL_INC * + (i * PPE_UNICAST_DROP_TYPES + PPE_UNICAST_DROP_FORCE_OFFSET); + + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD, + &drop_cnt, NULL); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + } else { + int mq_offset = i - PPE_AC_UNICAST_QUEUE_CFG_TBL_ENTRIES; + + reg = PPE_AC_MULTICAST_QUEUE_CNT_TBL_ADDR + + PPE_AC_MULTICAST_QUEUE_CNT_TBL_INC * mq_offset; + ret = regmap_read(ppe_dev->regmap, reg, &val); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + + pend_cnt = FIELD_GET(PPE_AC_MULTICAST_QUEUE_CNT_TBL_PEND_CNT, val); + + if (mq_offset < PPE_P0_MULTICAST_QUEUE_NUM) { + reg = PPE_CPU_PORT_MULTICAST_FORCE_DROP_CNT_TBL_ADDR(mq_offset); + } else { + mq_offset -= PPE_P0_MULTICAST_QUEUE_NUM; + + reg = PPE_P1_MULTICAST_DROP_CNT_TBL_ADDR; + reg += (mq_offset / PPE_MULTICAST_QUEUE_NUM) * + PPE_MULTICAST_QUEUE_PORT_ADDR_INC; + reg += (mq_offset % PPE_MULTICAST_QUEUE_NUM) * + PPE_MULTICAST_DROP_CNT_TBL_INC * + PPE_MULTICAST_DROP_TYPES; + } + + ret = ppe_pkt_cnt_get(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD, + &drop_cnt, NULL); + if (ret) { + dev_err(ppe_dev->dev, "CNT ERROR %d\n", ret); + return ret; + } + } + + if (pkt_cnt > 0 || pend_cnt > 0 || drop_cnt > 0) { + if (!((++tag) % 4)) + seq_printf(seq, "\n%-24s", ""); + + seq_printf(seq, "%10u/%u/%u(%s=%04d)", + pkt_cnt, pend_cnt, drop_cnt, "queue", i); + } + } + + seq_putc(seq, '\n'); + + return 0; +} + +/* Display the various packet counters of PPE. */ +static int ppe_packet_counter_show(struct seq_file *seq, void *v) +{ + struct ppe_debugfs_entry *entry = seq->private; + struct ppe_device *ppe_dev = entry->ppe; + int ret; + + switch (entry->counter_type) { + case PPE_CNT_BM: + ret = ppe_bm_counter_get(ppe_dev, seq); + break; + case PPE_CNT_PARSE: + ret = ppe_parse_pkt_counter_get(ppe_dev, seq); + break; + case PPE_CNT_PORT_RX: + ret = ppe_port_rx_counter_get(ppe_dev, seq); + break; + case PPE_CNT_VLAN_RX: + ret = ppe_vlan_rx_counter_get(ppe_dev, seq); + break; + case PPE_CNT_L2_FWD: + ret = ppe_l2_counter_get(ppe_dev, seq); + break; + case PPE_CNT_CPU_CODE: + ret = ppe_cpu_code_counter_get(ppe_dev, seq); + break; + case PPE_CNT_VLAN_TX: + ret = ppe_vlan_tx_counter_get(ppe_dev, seq); + break; + case PPE_CNT_PORT_TX: + ret = ppe_port_tx_counter_get(ppe_dev, seq); + break; + case PPE_CNT_QM: + ret = ppe_queue_counter_get(ppe_dev, seq); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +/* Flush the various packet counters of PPE. */ +static ssize_t ppe_packet_counter_write(struct file *file, + const char __user *buf, + size_t count, loff_t *pos) +{ + struct ppe_debugfs_entry *entry = file_inode(file)->i_private; + struct ppe_device *ppe_dev = entry->ppe; + u32 reg; + int i; + + switch (entry->counter_type) { + case PPE_CNT_BM: + for (i = 0; i < PPE_DROP_CNT_TBL_ENTRIES; i++) { + reg = PPE_DROP_CNT_TBL_ADDR + i * PPE_DROP_CNT_TBL_INC; + ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_1WORD); + } + + for (i = 0; i < PPE_DROP_STAT_TBL_ENTRIES; i++) { + reg = PPE_DROP_STAT_TBL_ADDR + PPE_DROP_STAT_TBL_INC * i; + ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD); + } + + break; + case PPE_CNT_PARSE: + for (i = 0; i < PPE_IPR_PKT_CNT_TBL_ENTRIES; i++) { + reg = PPE_IPR_PKT_CNT_TBL_ADDR + i * PPE_IPR_PKT_CNT_TBL_INC; + ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_1WORD); + + reg = PPE_TPR_PKT_CNT_TBL_ADDR + i * PPE_TPR_PKT_CNT_TBL_INC; + ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_1WORD); + } + + break; + case PPE_CNT_PORT_RX: + for (i = 0; i < PPE_PORT_RX_CNT_TBL_ENTRIES; i++) { + reg = PPE_PORT_RX_CNT_TBL_ADDR + PPE_PORT_RX_CNT_TBL_INC * i; + ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_5WORD); + } + + for (i = 0; i < PPE_PHY_PORT_RX_CNT_TBL_ENTRIES; i++) { + reg = PPE_PHY_PORT_RX_CNT_TBL_ADDR + PPE_PHY_PORT_RX_CNT_TBL_INC * i; + ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_5WORD); + } + + break; + case PPE_CNT_VLAN_RX: + for (i = 0; i < PPE_VLAN_CNT_TBL_ENTRIES; i++) { + reg = PPE_VLAN_CNT_TBL_ADDR + PPE_VLAN_CNT_TBL_INC * i; + ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD); + } + + break; + case PPE_CNT_L2_FWD: + for (i = 0; i < PPE_PRE_L2_CNT_TBL_ENTRIES; i++) { + reg = PPE_PRE_L2_CNT_TBL_ADDR + PPE_PRE_L2_CNT_TBL_INC * i; + ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_5WORD); + } + + break; + case PPE_CNT_CPU_CODE: + for (i = 0; i < PPE_DROP_CPU_CNT_TBL_ENTRIES; i++) { + reg = PPE_DROP_CPU_CNT_TBL_ADDR + PPE_DROP_CPU_CNT_TBL_INC * i; + ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD); + } + + break; + case PPE_CNT_VLAN_TX: + for (i = 0; i < PPE_EG_VSI_COUNTER_TBL_ENTRIES; i++) { + reg = PPE_EG_VSI_COUNTER_TBL_ADDR + PPE_EG_VSI_COUNTER_TBL_INC * i; + ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD); + } + + break; + case PPE_CNT_PORT_TX: + for (i = 0; i < PPE_PORT_TX_COUNTER_TBL_ENTRIES; i++) { + reg = PPE_PORT_TX_DROP_CNT_TBL_ADDR + PPE_PORT_TX_DROP_CNT_TBL_INC * i; + ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD); + + reg = PPE_PORT_TX_COUNTER_TBL_ADDR + PPE_PORT_TX_COUNTER_TBL_INC * i; + ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD); + } + + for (i = 0; i < PPE_VPORT_TX_COUNTER_TBL_ENTRIES; i++) { + reg = PPE_VPORT_TX_COUNTER_TBL_ADDR + PPE_VPORT_TX_COUNTER_TBL_INC * i; + ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD); + + reg = PPE_VPORT_TX_DROP_CNT_TBL_ADDR + PPE_VPORT_TX_DROP_CNT_TBL_INC * i; + ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD); + } + + break; + case PPE_CNT_QM: + for (i = 0; i < PPE_QUEUE_TX_COUNTER_TBL_ENTRIES; i++) { + reg = PPE_QUEUE_TX_COUNTER_TBL_ADDR + PPE_QUEUE_TX_COUNTER_TBL_INC * i; + ppe_tbl_pkt_cnt_clear(ppe_dev, reg, PPE_PKT_CNT_SIZE_3WORD); + } + + break; + default: + break; + } + + return count; +} +DEFINE_SHOW_STORE_ATTRIBUTE(ppe_packet_counter); + +void ppe_debugfs_setup(struct ppe_device *ppe_dev) +{ + struct ppe_debugfs_entry *entry; + int i; + + ppe_dev->debugfs_root = debugfs_create_dir("ppe", NULL); + if (IS_ERR(ppe_dev->debugfs_root)) + return; + + for (i = 0; i < ARRAY_SIZE(debugfs_files); i++) { + entry = devm_kzalloc(ppe_dev->dev, sizeof(*entry), GFP_KERNEL); + if (!entry) + return; + + entry->ppe = ppe_dev; + entry->counter_type = debugfs_files[i].counter_type; + + debugfs_create_file(debugfs_files[i].name, 0444, + ppe_dev->debugfs_root, entry, + &ppe_packet_counter_fops); + } +} + +void ppe_debugfs_teardown(struct ppe_device *ppe_dev) +{ + debugfs_remove_recursive(ppe_dev->debugfs_root); + ppe_dev->debugfs_root = NULL; +} diff --git a/drivers/net/ethernet/qualcomm/ppe/ppe_debugfs.h b/drivers/net/ethernet/qualcomm/ppe/ppe_debugfs.h new file mode 100644 index 000000000000..81f49a709123 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/ppe/ppe_debugfs.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. + */ + +/* PPE debugfs counters setup. */ + +#ifndef __PPE_DEBUGFS_H__ +#define __PPE_DEBUGFS_H__ + +#include "ppe.h" + +void ppe_debugfs_setup(struct ppe_device *ppe_dev); +void ppe_debugfs_teardown(struct ppe_device *ppe_dev); + +#endif diff --git a/drivers/net/ethernet/qualcomm/ppe/ppe_regs.h b/drivers/net/ethernet/qualcomm/ppe/ppe_regs.h new file mode 100644 index 000000000000..746dfbb5a682 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/ppe/ppe_regs.h @@ -0,0 +1,591 @@ +/* SPDX-License-Identifier: GPL-2.0-only + * + * Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. + */ + +/* PPE hardware register and table declarations. */ +#ifndef __PPE_REGS_H__ +#define __PPE_REGS_H__ + +#include <linux/bitfield.h> + +/* PPE scheduler configurations for buffer manager block. */ +#define PPE_BM_SCH_CTRL_ADDR 0xb000 +#define PPE_BM_SCH_CTRL_INC 4 +#define PPE_BM_SCH_CTRL_SCH_DEPTH GENMASK(7, 0) +#define PPE_BM_SCH_CTRL_SCH_OFFSET GENMASK(14, 8) +#define PPE_BM_SCH_CTRL_SCH_EN BIT(31) + +/* PPE drop counters. */ +#define PPE_DROP_CNT_TBL_ADDR 0xb024 +#define PPE_DROP_CNT_TBL_ENTRIES 8 +#define PPE_DROP_CNT_TBL_INC 4 + +/* BM port drop counters. */ +#define PPE_DROP_STAT_TBL_ADDR 0xe000 +#define PPE_DROP_STAT_TBL_ENTRIES 30 +#define PPE_DROP_STAT_TBL_INC 0x10 + +/* Egress VLAN counters. */ +#define PPE_EG_VSI_COUNTER_TBL_ADDR 0x41000 +#define PPE_EG_VSI_COUNTER_TBL_ENTRIES 64 +#define PPE_EG_VSI_COUNTER_TBL_INC 0x10 + +/* Port TX counters. */ +#define PPE_PORT_TX_COUNTER_TBL_ADDR 0x45000 +#define PPE_PORT_TX_COUNTER_TBL_ENTRIES 8 +#define PPE_PORT_TX_COUNTER_TBL_INC 0x10 + +/* Virtual port TX counters. */ +#define PPE_VPORT_TX_COUNTER_TBL_ADDR 0x47000 +#define PPE_VPORT_TX_COUNTER_TBL_ENTRIES 256 +#define PPE_VPORT_TX_COUNTER_TBL_INC 0x10 + +/* Queue counters. */ +#define PPE_QUEUE_TX_COUNTER_TBL_ADDR 0x4a000 +#define PPE_QUEUE_TX_COUNTER_TBL_ENTRIES 300 +#define PPE_QUEUE_TX_COUNTER_TBL_INC 0x10 + +/* RSS settings are to calculate the random RSS hash value generated during + * packet receive to ARM cores. This hash is then used to generate the queue + * offset used to determine the queue used to transmit the packet to ARM cores. + */ +#define PPE_RSS_HASH_MASK_ADDR 0xb4318 +#define PPE_RSS_HASH_MASK_HASH_MASK GENMASK(20, 0) +#define PPE_RSS_HASH_MASK_FRAGMENT BIT(28) + +#define PPE_RSS_HASH_SEED_ADDR 0xb431c +#define PPE_RSS_HASH_SEED_VAL GENMASK(31, 0) + +#define PPE_RSS_HASH_MIX_ADDR 0xb4320 +#define PPE_RSS_HASH_MIX_ENTRIES 11 +#define PPE_RSS_HASH_MIX_INC 4 +#define PPE_RSS_HASH_MIX_VAL GENMASK(4, 0) + +#define PPE_RSS_HASH_FIN_ADDR 0xb4350 +#define PPE_RSS_HASH_FIN_ENTRIES 5 +#define PPE_RSS_HASH_FIN_INC 4 +#define PPE_RSS_HASH_FIN_INNER GENMASK(4, 0) +#define PPE_RSS_HASH_FIN_OUTER GENMASK(9, 5) + +#define PPE_RSS_HASH_MASK_IPV4_ADDR 0xb4380 +#define PPE_RSS_HASH_MASK_IPV4_HASH_MASK GENMASK(20, 0) +#define PPE_RSS_HASH_MASK_IPV4_FRAGMENT BIT(28) + +#define PPE_RSS_HASH_SEED_IPV4_ADDR 0xb4384 +#define PPE_RSS_HASH_SEED_IPV4_VAL GENMASK(31, 0) + +#define PPE_RSS_HASH_MIX_IPV4_ADDR 0xb4390 +#define PPE_RSS_HASH_MIX_IPV4_ENTRIES 5 +#define PPE_RSS_HASH_MIX_IPV4_INC 4 +#define PPE_RSS_HASH_MIX_IPV4_VAL GENMASK(4, 0) + +#define PPE_RSS_HASH_FIN_IPV4_ADDR 0xb43b0 +#define PPE_RSS_HASH_FIN_IPV4_ENTRIES 5 +#define PPE_RSS_HASH_FIN_IPV4_INC 4 +#define PPE_RSS_HASH_FIN_IPV4_INNER GENMASK(4, 0) +#define PPE_RSS_HASH_FIN_IPV4_OUTER GENMASK(9, 5) + +#define PPE_BM_SCH_CFG_TBL_ADDR 0xc000 +#define PPE_BM_SCH_CFG_TBL_ENTRIES 128 +#define PPE_BM_SCH_CFG_TBL_INC 0x10 +#define PPE_BM_SCH_CFG_TBL_PORT_NUM GENMASK(3, 0) +#define PPE_BM_SCH_CFG_TBL_DIR BIT(4) +#define PPE_BM_SCH_CFG_TBL_VALID BIT(5) +#define PPE_BM_SCH_CFG_TBL_SECOND_PORT_VALID BIT(6) +#define PPE_BM_SCH_CFG_TBL_SECOND_PORT GENMASK(11, 8) + +/* PPE service code configuration for the ingress direction functions, + * including bypass configuration for relevant PPE switch core functions + * such as flow entry lookup bypass. + */ +#define PPE_SERVICE_TBL_ADDR 0x15000 +#define PPE_SERVICE_TBL_ENTRIES 256 +#define PPE_SERVICE_TBL_INC 0x10 +#define PPE_SERVICE_W0_BYPASS_BITMAP GENMASK(31, 0) +#define PPE_SERVICE_W1_RX_CNT_EN BIT(0) + +#define PPE_SERVICE_SET_BYPASS_BITMAP(tbl_cfg, value) \ + FIELD_MODIFY(PPE_SERVICE_W0_BYPASS_BITMAP, tbl_cfg, value) +#define PPE_SERVICE_SET_RX_CNT_EN(tbl_cfg, value) \ + FIELD_MODIFY(PPE_SERVICE_W1_RX_CNT_EN, (tbl_cfg) + 0x1, value) + +/* PPE port egress VLAN configurations. */ +#define PPE_PORT_EG_VLAN_TBL_ADDR 0x20020 +#define PPE_PORT_EG_VLAN_TBL_ENTRIES 8 +#define PPE_PORT_EG_VLAN_TBL_INC 4 +#define PPE_PORT_EG_VLAN_TBL_VLAN_TYPE BIT(0) +#define PPE_PORT_EG_VLAN_TBL_CTAG_MODE GENMASK(2, 1) +#define PPE_PORT_EG_VLAN_TBL_STAG_MODE GENMASK(4, 3) +#define PPE_PORT_EG_VLAN_TBL_VSI_TAG_MODE_EN BIT(5) +#define PPE_PORT_EG_VLAN_TBL_PCP_PROP_CMD BIT(6) +#define PPE_PORT_EG_VLAN_TBL_DEI_PROP_CMD BIT(7) +#define PPE_PORT_EG_VLAN_TBL_TX_COUNTING_EN BIT(8) + +/* PPE queue counters enable/disable control. */ +#define PPE_EG_BRIDGE_CONFIG_ADDR 0x20044 +#define PPE_EG_BRIDGE_CONFIG_QUEUE_CNT_EN BIT(2) + +/* PPE service code configuration on the egress direction. */ +#define PPE_EG_SERVICE_TBL_ADDR 0x43000 +#define PPE_EG_SERVICE_TBL_ENTRIES 256 +#define PPE_EG_SERVICE_TBL_INC 0x10 +#define PPE_EG_SERVICE_W0_UPDATE_ACTION GENMASK(31, 0) +#define PPE_EG_SERVICE_W1_NEXT_SERVCODE GENMASK(7, 0) +#define PPE_EG_SERVICE_W1_HW_SERVICE GENMASK(13, 8) +#define PPE_EG_SERVICE_W1_OFFSET_SEL BIT(14) +#define PPE_EG_SERVICE_W1_TX_CNT_EN BIT(15) + +#define PPE_EG_SERVICE_SET_UPDATE_ACTION(tbl_cfg, value) \ + FIELD_MODIFY(PPE_EG_SERVICE_W0_UPDATE_ACTION, tbl_cfg, value) +#define PPE_EG_SERVICE_SET_NEXT_SERVCODE(tbl_cfg, value) \ + FIELD_MODIFY(PPE_EG_SERVICE_W1_NEXT_SERVCODE, (tbl_cfg) + 0x1, value) +#define PPE_EG_SERVICE_SET_HW_SERVICE(tbl_cfg, value) \ + FIELD_MODIFY(PPE_EG_SERVICE_W1_HW_SERVICE, (tbl_cfg) + 0x1, value) +#define PPE_EG_SERVICE_SET_OFFSET_SEL(tbl_cfg, value) \ + FIELD_MODIFY(PPE_EG_SERVICE_W1_OFFSET_SEL, (tbl_cfg) + 0x1, value) +#define PPE_EG_SERVICE_SET_TX_CNT_EN(tbl_cfg, value) \ + FIELD_MODIFY(PPE_EG_SERVICE_W1_TX_CNT_EN, (tbl_cfg) + 0x1, value) + +/* PPE port bridge configuration */ +#define PPE_PORT_BRIDGE_CTRL_ADDR 0x60300 +#define PPE_PORT_BRIDGE_CTRL_ENTRIES 8 +#define PPE_PORT_BRIDGE_CTRL_INC 4 +#define PPE_PORT_BRIDGE_NEW_LRN_EN BIT(0) +#define PPE_PORT_BRIDGE_STA_MOVE_LRN_EN BIT(3) +#define PPE_PORT_BRIDGE_TXMAC_EN BIT(16) + +/* PPE port control configurations for the traffic to the multicast queues. */ +#define PPE_MC_MTU_CTRL_TBL_ADDR 0x60a00 +#define PPE_MC_MTU_CTRL_TBL_ENTRIES 8 +#define PPE_MC_MTU_CTRL_TBL_INC 4 +#define PPE_MC_MTU_CTRL_TBL_MTU GENMASK(13, 0) +#define PPE_MC_MTU_CTRL_TBL_MTU_CMD GENMASK(15, 14) +#define PPE_MC_MTU_CTRL_TBL_TX_CNT_EN BIT(16) + +/* PPE VSI configurations */ +#define PPE_VSI_TBL_ADDR 0x63800 +#define PPE_VSI_TBL_ENTRIES 64 +#define PPE_VSI_TBL_INC 0x10 +#define PPE_VSI_W0_MEMBER_PORT_BITMAP GENMASK(7, 0) +#define PPE_VSI_W0_UUC_BITMAP GENMASK(15, 8) +#define PPE_VSI_W0_UMC_BITMAP GENMASK(23, 16) +#define PPE_VSI_W0_BC_BITMAP GENMASK(31, 24) +#define PPE_VSI_W1_NEW_ADDR_LRN_EN BIT(0) +#define PPE_VSI_W1_NEW_ADDR_FWD_CMD GENMASK(2, 1) +#define PPE_VSI_W1_STATION_MOVE_LRN_EN BIT(3) +#define PPE_VSI_W1_STATION_MOVE_FWD_CMD GENMASK(5, 4) + +#define PPE_VSI_SET_MEMBER_PORT_BITMAP(tbl_cfg, value) \ + FIELD_MODIFY(PPE_VSI_W0_MEMBER_PORT_BITMAP, tbl_cfg, value) +#define PPE_VSI_SET_UUC_BITMAP(tbl_cfg, value) \ + FIELD_MODIFY(PPE_VSI_W0_UUC_BITMAP, tbl_cfg, value) +#define PPE_VSI_SET_UMC_BITMAP(tbl_cfg, value) \ + FIELD_MODIFY(PPE_VSI_W0_UMC_BITMAP, tbl_cfg, value) +#define PPE_VSI_SET_BC_BITMAP(tbl_cfg, value) \ + FIELD_MODIFY(PPE_VSI_W0_BC_BITMAP, tbl_cfg, value) +#define PPE_VSI_SET_NEW_ADDR_LRN_EN(tbl_cfg, value) \ + FIELD_MODIFY(PPE_VSI_W1_NEW_ADDR_LRN_EN, (tbl_cfg) + 0x1, value) +#define PPE_VSI_SET_NEW_ADDR_FWD_CMD(tbl_cfg, value) \ + FIELD_MODIFY(PPE_VSI_W1_NEW_ADDR_FWD_CMD, (tbl_cfg) + 0x1, value) +#define PPE_VSI_SET_STATION_MOVE_LRN_EN(tbl_cfg, value) \ + FIELD_MODIFY(PPE_VSI_W1_STATION_MOVE_LRN_EN, (tbl_cfg) + 0x1, value) +#define PPE_VSI_SET_STATION_MOVE_FWD_CMD(tbl_cfg, value) \ + FIELD_MODIFY(PPE_VSI_W1_STATION_MOVE_FWD_CMD, (tbl_cfg) + 0x1, value) + +/* PPE port control configurations for the traffic to the unicast queues. */ +#define PPE_MRU_MTU_CTRL_TBL_ADDR 0x65000 +#define PPE_MRU_MTU_CTRL_TBL_ENTRIES 256 +#define PPE_MRU_MTU_CTRL_TBL_INC 0x10 +#define PPE_MRU_MTU_CTRL_W0_MRU GENMASK(13, 0) +#define PPE_MRU_MTU_CTRL_W0_MRU_CMD GENMASK(15, 14) +#define PPE_MRU_MTU_CTRL_W0_MTU GENMASK(29, 16) +#define PPE_MRU_MTU_CTRL_W0_MTU_CMD GENMASK(31, 30) +#define PPE_MRU_MTU_CTRL_W1_RX_CNT_EN BIT(0) +#define PPE_MRU_MTU_CTRL_W1_TX_CNT_EN BIT(1) +#define PPE_MRU_MTU_CTRL_W1_SRC_PROFILE GENMASK(3, 2) +#define PPE_MRU_MTU_CTRL_W1_INNER_PREC_LOW BIT(31) +#define PPE_MRU_MTU_CTRL_W2_INNER_PREC_HIGH GENMASK(1, 0) + +#define PPE_MRU_MTU_CTRL_SET_MRU(tbl_cfg, value) \ + FIELD_MODIFY(PPE_MRU_MTU_CTRL_W0_MRU, tbl_cfg, value) +#define PPE_MRU_MTU_CTRL_SET_MRU_CMD(tbl_cfg, value) \ + FIELD_MODIFY(PPE_MRU_MTU_CTRL_W0_MRU_CMD, tbl_cfg, value) +#define PPE_MRU_MTU_CTRL_SET_MTU(tbl_cfg, value) \ + FIELD_MODIFY(PPE_MRU_MTU_CTRL_W0_MTU, tbl_cfg, value) +#define PPE_MRU_MTU_CTRL_SET_MTU_CMD(tbl_cfg, value) \ + FIELD_MODIFY(PPE_MRU_MTU_CTRL_W0_MTU_CMD, tbl_cfg, value) +#define PPE_MRU_MTU_CTRL_SET_RX_CNT_EN(tbl_cfg, value) \ + FIELD_MODIFY(PPE_MRU_MTU_CTRL_W1_RX_CNT_EN, (tbl_cfg) + 0x1, value) +#define PPE_MRU_MTU_CTRL_SET_TX_CNT_EN(tbl_cfg, value) \ + FIELD_MODIFY(PPE_MRU_MTU_CTRL_W1_TX_CNT_EN, (tbl_cfg) + 0x1, value) + +/* PPE service code configuration for destination port and counter. */ +#define PPE_IN_L2_SERVICE_TBL_ADDR 0x66000 +#define PPE_IN_L2_SERVICE_TBL_ENTRIES 256 +#define PPE_IN_L2_SERVICE_TBL_INC 0x10 +#define PPE_IN_L2_SERVICE_TBL_DST_PORT_ID_VALID BIT(0) +#define PPE_IN_L2_SERVICE_TBL_DST_PORT_ID GENMASK(4, 1) +#define PPE_IN_L2_SERVICE_TBL_DST_DIRECTION BIT(5) +#define PPE_IN_L2_SERVICE_TBL_DST_BYPASS_BITMAP GENMASK(29, 6) +#define PPE_IN_L2_SERVICE_TBL_RX_CNT_EN BIT(30) +#define PPE_IN_L2_SERVICE_TBL_TX_CNT_EN BIT(31) + +/* L2 Port configurations */ +#define PPE_L2_VP_PORT_TBL_ADDR 0x98000 +#define PPE_L2_VP_PORT_TBL_ENTRIES 256 +#define PPE_L2_VP_PORT_TBL_INC 0x10 +#define PPE_L2_VP_PORT_W0_INVALID_VSI_FWD_EN BIT(0) +#define PPE_L2_VP_PORT_W0_DST_INFO GENMASK(9, 2) + +#define PPE_L2_PORT_SET_INVALID_VSI_FWD_EN(tbl_cfg, value) \ + FIELD_MODIFY(PPE_L2_VP_PORT_W0_INVALID_VSI_FWD_EN, tbl_cfg, value) +#define PPE_L2_PORT_SET_DST_INFO(tbl_cfg, value) \ + FIELD_MODIFY(PPE_L2_VP_PORT_W0_DST_INFO, tbl_cfg, value) + +/* Port RX and RX drop counters. */ +#define PPE_PORT_RX_CNT_TBL_ADDR 0x150000 +#define PPE_PORT_RX_CNT_TBL_ENTRIES 256 +#define PPE_PORT_RX_CNT_TBL_INC 0x20 + +/* Physical port RX and RX drop counters. */ +#define PPE_PHY_PORT_RX_CNT_TBL_ADDR 0x156000 +#define PPE_PHY_PORT_RX_CNT_TBL_ENTRIES 8 +#define PPE_PHY_PORT_RX_CNT_TBL_INC 0x20 + +/* Counters for the packet to CPU port. */ +#define PPE_DROP_CPU_CNT_TBL_ADDR 0x160000 +#define PPE_DROP_CPU_CNT_TBL_ENTRIES 1280 +#define PPE_DROP_CPU_CNT_TBL_INC 0x10 + +/* VLAN counters. */ +#define PPE_VLAN_CNT_TBL_ADDR 0x178000 +#define PPE_VLAN_CNT_TBL_ENTRIES 64 +#define PPE_VLAN_CNT_TBL_INC 0x10 + +/* PPE L2 counters. */ +#define PPE_PRE_L2_CNT_TBL_ADDR 0x17c000 +#define PPE_PRE_L2_CNT_TBL_ENTRIES 64 +#define PPE_PRE_L2_CNT_TBL_INC 0x20 + +/* Port TX drop counters. */ +#define PPE_PORT_TX_DROP_CNT_TBL_ADDR 0x17d000 +#define PPE_PORT_TX_DROP_CNT_TBL_ENTRIES 8 +#define PPE_PORT_TX_DROP_CNT_TBL_INC 0x10 + +/* Virtual port TX counters. */ +#define PPE_VPORT_TX_DROP_CNT_TBL_ADDR 0x17e000 +#define PPE_VPORT_TX_DROP_CNT_TBL_ENTRIES 256 +#define PPE_VPORT_TX_DROP_CNT_TBL_INC 0x10 + +/* Counters for the tunnel packet. */ +#define PPE_TPR_PKT_CNT_TBL_ADDR 0x1d0080 +#define PPE_TPR_PKT_CNT_TBL_ENTRIES 8 +#define PPE_TPR_PKT_CNT_TBL_INC 4 + +/* Counters for the all packet received. */ +#define PPE_IPR_PKT_CNT_TBL_ADDR 0x1e0080 +#define PPE_IPR_PKT_CNT_TBL_ENTRIES 8 +#define PPE_IPR_PKT_CNT_TBL_INC 4 + +/* PPE service code configuration for the tunnel packet. */ +#define PPE_TL_SERVICE_TBL_ADDR 0x306000 +#define PPE_TL_SERVICE_TBL_ENTRIES 256 +#define PPE_TL_SERVICE_TBL_INC 4 +#define PPE_TL_SERVICE_TBL_BYPASS_BITMAP GENMASK(31, 0) + +/* Port scheduler global config. */ +#define PPE_PSCH_SCH_DEPTH_CFG_ADDR 0x400000 +#define PPE_PSCH_SCH_DEPTH_CFG_INC 4 +#define PPE_PSCH_SCH_DEPTH_CFG_SCH_DEPTH GENMASK(7, 0) + +/* PPE queue level scheduler configurations. */ +#define PPE_L0_FLOW_MAP_TBL_ADDR 0x402000 +#define PPE_L0_FLOW_MAP_TBL_ENTRIES 300 +#define PPE_L0_FLOW_MAP_TBL_INC 0x10 +#define PPE_L0_FLOW_MAP_TBL_FLOW_ID GENMASK(5, 0) +#define PPE_L0_FLOW_MAP_TBL_C_PRI GENMASK(8, 6) +#define PPE_L0_FLOW_MAP_TBL_E_PRI GENMASK(11, 9) +#define PPE_L0_FLOW_MAP_TBL_C_NODE_WT GENMASK(21, 12) +#define PPE_L0_FLOW_MAP_TBL_E_NODE_WT GENMASK(31, 22) + +#define PPE_L0_C_FLOW_CFG_TBL_ADDR 0x404000 +#define PPE_L0_C_FLOW_CFG_TBL_ENTRIES 512 +#define PPE_L0_C_FLOW_CFG_TBL_INC 0x10 +#define PPE_L0_C_FLOW_CFG_TBL_NODE_ID GENMASK(7, 0) +#define PPE_L0_C_FLOW_CFG_TBL_NODE_CREDIT_UNIT BIT(8) + +#define PPE_L0_E_FLOW_CFG_TBL_ADDR 0x406000 +#define PPE_L0_E_FLOW_CFG_TBL_ENTRIES 512 +#define PPE_L0_E_FLOW_CFG_TBL_INC 0x10 +#define PPE_L0_E_FLOW_CFG_TBL_NODE_ID GENMASK(7, 0) +#define PPE_L0_E_FLOW_CFG_TBL_NODE_CREDIT_UNIT BIT(8) + +#define PPE_L0_FLOW_PORT_MAP_TBL_ADDR 0x408000 +#define PPE_L0_FLOW_PORT_MAP_TBL_ENTRIES 300 +#define PPE_L0_FLOW_PORT_MAP_TBL_INC 0x10 +#define PPE_L0_FLOW_PORT_MAP_TBL_PORT_NUM GENMASK(3, 0) + +#define PPE_L0_COMP_CFG_TBL_ADDR 0x428000 +#define PPE_L0_COMP_CFG_TBL_ENTRIES 300 +#define PPE_L0_COMP_CFG_TBL_INC 0x10 +#define PPE_L0_COMP_CFG_TBL_SHAPER_METER_LEN GENMASK(1, 0) +#define PPE_L0_COMP_CFG_TBL_NODE_METER_LEN GENMASK(3, 2) + +/* PPE queue to Ethernet DMA ring mapping table. */ +#define PPE_RING_Q_MAP_TBL_ADDR 0x42a000 +#define PPE_RING_Q_MAP_TBL_ENTRIES 24 +#define PPE_RING_Q_MAP_TBL_INC 0x40 + +/* Table addresses for per-queue dequeue setting. */ +#define PPE_DEQ_OPR_TBL_ADDR 0x430000 +#define PPE_DEQ_OPR_TBL_ENTRIES 300 +#define PPE_DEQ_OPR_TBL_INC 0x10 +#define PPE_DEQ_OPR_TBL_DEQ_DISABLE BIT(0) + +/* PPE flow level scheduler configurations. */ +#define PPE_L1_FLOW_MAP_TBL_ADDR 0x440000 +#define PPE_L1_FLOW_MAP_TBL_ENTRIES 64 +#define PPE_L1_FLOW_MAP_TBL_INC 0x10 +#define PPE_L1_FLOW_MAP_TBL_FLOW_ID GENMASK(3, 0) +#define PPE_L1_FLOW_MAP_TBL_C_PRI GENMASK(6, 4) +#define PPE_L1_FLOW_MAP_TBL_E_PRI GENMASK(9, 7) +#define PPE_L1_FLOW_MAP_TBL_C_NODE_WT GENMASK(19, 10) +#define PPE_L1_FLOW_MAP_TBL_E_NODE_WT GENMASK(29, 20) + +#define PPE_L1_C_FLOW_CFG_TBL_ADDR 0x442000 +#define PPE_L1_C_FLOW_CFG_TBL_ENTRIES 64 +#define PPE_L1_C_FLOW_CFG_TBL_INC 0x10 +#define PPE_L1_C_FLOW_CFG_TBL_NODE_ID GENMASK(5, 0) +#define PPE_L1_C_FLOW_CFG_TBL_NODE_CREDIT_UNIT BIT(6) + +#define PPE_L1_E_FLOW_CFG_TBL_ADDR 0x444000 +#define PPE_L1_E_FLOW_CFG_TBL_ENTRIES 64 +#define PPE_L1_E_FLOW_CFG_TBL_INC 0x10 +#define PPE_L1_E_FLOW_CFG_TBL_NODE_ID GENMASK(5, 0) +#define PPE_L1_E_FLOW_CFG_TBL_NODE_CREDIT_UNIT BIT(6) + +#define PPE_L1_FLOW_PORT_MAP_TBL_ADDR 0x446000 +#define PPE_L1_FLOW_PORT_MAP_TBL_ENTRIES 64 +#define PPE_L1_FLOW_PORT_MAP_TBL_INC 0x10 +#define PPE_L1_FLOW_PORT_MAP_TBL_PORT_NUM GENMASK(3, 0) + +#define PPE_L1_COMP_CFG_TBL_ADDR 0x46a000 +#define PPE_L1_COMP_CFG_TBL_ENTRIES 64 +#define PPE_L1_COMP_CFG_TBL_INC 0x10 +#define PPE_L1_COMP_CFG_TBL_SHAPER_METER_LEN GENMASK(1, 0) +#define PPE_L1_COMP_CFG_TBL_NODE_METER_LEN GENMASK(3, 2) + +/* PPE port scheduler configurations for egress. */ +#define PPE_PSCH_SCH_CFG_TBL_ADDR 0x47a000 +#define PPE_PSCH_SCH_CFG_TBL_ENTRIES 128 +#define PPE_PSCH_SCH_CFG_TBL_INC 0x10 +#define PPE_PSCH_SCH_CFG_TBL_DES_PORT GENMASK(3, 0) +#define PPE_PSCH_SCH_CFG_TBL_ENS_PORT GENMASK(7, 4) +#define PPE_PSCH_SCH_CFG_TBL_ENS_PORT_BITMAP GENMASK(15, 8) +#define PPE_PSCH_SCH_CFG_TBL_DES_SECOND_PORT_EN BIT(16) +#define PPE_PSCH_SCH_CFG_TBL_DES_SECOND_PORT GENMASK(20, 17) + +/* There are 15 BM ports and 4 BM groups supported by PPE. + * BM port (0-7) is for EDMA port 0, BM port (8-13) is for + * PPE physical port 1-6 and BM port 14 is for EIP port. + */ +#define PPE_BM_PORT_FC_MODE_ADDR 0x600100 +#define PPE_BM_PORT_FC_MODE_ENTRIES 15 +#define PPE_BM_PORT_FC_MODE_INC 0x4 +#define PPE_BM_PORT_FC_MODE_EN BIT(0) + +#define PPE_BM_PORT_GROUP_ID_ADDR 0x600180 +#define PPE_BM_PORT_GROUP_ID_ENTRIES 15 +#define PPE_BM_PORT_GROUP_ID_INC 0x4 +#define PPE_BM_PORT_GROUP_ID_SHARED_GROUP_ID GENMASK(1, 0) + +/* Counters for PPE buffers used for packets cached. */ +#define PPE_BM_USED_CNT_TBL_ADDR 0x6001c0 +#define PPE_BM_USED_CNT_TBL_ENTRIES 15 +#define PPE_BM_USED_CNT_TBL_INC 0x4 +#define PPE_BM_USED_CNT_VAL GENMASK(10, 0) + +/* Counters for PPE buffers used for packets received after pause frame sent. */ +#define PPE_BM_REACT_CNT_TBL_ADDR 0x600240 +#define PPE_BM_REACT_CNT_TBL_ENTRIES 15 +#define PPE_BM_REACT_CNT_TBL_INC 0x4 +#define PPE_BM_REACT_CNT_VAL GENMASK(8, 0) + +#define PPE_BM_SHARED_GROUP_CFG_ADDR 0x600290 +#define PPE_BM_SHARED_GROUP_CFG_ENTRIES 4 +#define PPE_BM_SHARED_GROUP_CFG_INC 0x4 +#define PPE_BM_SHARED_GROUP_CFG_SHARED_LIMIT GENMASK(10, 0) + +#define PPE_BM_PORT_FC_CFG_TBL_ADDR 0x601000 +#define PPE_BM_PORT_FC_CFG_TBL_ENTRIES 15 +#define PPE_BM_PORT_FC_CFG_TBL_INC 0x10 +#define PPE_BM_PORT_FC_W0_REACT_LIMIT GENMASK(8, 0) +#define PPE_BM_PORT_FC_W0_RESUME_THRESHOLD GENMASK(17, 9) +#define PPE_BM_PORT_FC_W0_RESUME_OFFSET GENMASK(28, 18) +#define PPE_BM_PORT_FC_W0_CEILING_LOW GENMASK(31, 29) +#define PPE_BM_PORT_FC_W1_CEILING_HIGH GENMASK(7, 0) +#define PPE_BM_PORT_FC_W1_WEIGHT GENMASK(10, 8) +#define PPE_BM_PORT_FC_W1_DYNAMIC BIT(11) +#define PPE_BM_PORT_FC_W1_PRE_ALLOC GENMASK(22, 12) + +#define PPE_BM_PORT_FC_SET_REACT_LIMIT(tbl_cfg, value) \ + FIELD_MODIFY(PPE_BM_PORT_FC_W0_REACT_LIMIT, tbl_cfg, value) +#define PPE_BM_PORT_FC_SET_RESUME_THRESHOLD(tbl_cfg, value) \ + FIELD_MODIFY(PPE_BM_PORT_FC_W0_RESUME_THRESHOLD, tbl_cfg, value) +#define PPE_BM_PORT_FC_SET_RESUME_OFFSET(tbl_cfg, value) \ + FIELD_MODIFY(PPE_BM_PORT_FC_W0_RESUME_OFFSET, tbl_cfg, value) +#define PPE_BM_PORT_FC_SET_CEILING_LOW(tbl_cfg, value) \ + FIELD_MODIFY(PPE_BM_PORT_FC_W0_CEILING_LOW, tbl_cfg, value) +#define PPE_BM_PORT_FC_SET_CEILING_HIGH(tbl_cfg, value) \ + FIELD_MODIFY(PPE_BM_PORT_FC_W1_CEILING_HIGH, (tbl_cfg) + 0x1, value) +#define PPE_BM_PORT_FC_SET_WEIGHT(tbl_cfg, value) \ + FIELD_MODIFY(PPE_BM_PORT_FC_W1_WEIGHT, (tbl_cfg) + 0x1, value) +#define PPE_BM_PORT_FC_SET_DYNAMIC(tbl_cfg, value) \ + FIELD_MODIFY(PPE_BM_PORT_FC_W1_DYNAMIC, (tbl_cfg) + 0x1, value) +#define PPE_BM_PORT_FC_SET_PRE_ALLOC(tbl_cfg, value) \ + FIELD_MODIFY(PPE_BM_PORT_FC_W1_PRE_ALLOC, (tbl_cfg) + 0x1, value) + +/* The queue base configurations based on destination port, + * service code or CPU code. + */ +#define PPE_UCAST_QUEUE_MAP_TBL_ADDR 0x810000 +#define PPE_UCAST_QUEUE_MAP_TBL_ENTRIES 3072 +#define PPE_UCAST_QUEUE_MAP_TBL_INC 0x10 +#define PPE_UCAST_QUEUE_MAP_TBL_PROFILE_ID GENMASK(3, 0) +#define PPE_UCAST_QUEUE_MAP_TBL_QUEUE_ID GENMASK(11, 4) + +/* The queue offset configurations based on RSS hash value. */ +#define PPE_UCAST_HASH_MAP_TBL_ADDR 0x830000 +#define PPE_UCAST_HASH_MAP_TBL_ENTRIES 4096 +#define PPE_UCAST_HASH_MAP_TBL_INC 0x10 +#define PPE_UCAST_HASH_MAP_TBL_HASH GENMASK(7, 0) + +/* The queue offset configurations based on PPE internal priority. */ +#define PPE_UCAST_PRIORITY_MAP_TBL_ADDR 0x842000 +#define PPE_UCAST_PRIORITY_MAP_TBL_ENTRIES 256 +#define PPE_UCAST_PRIORITY_MAP_TBL_INC 0x10 +#define PPE_UCAST_PRIORITY_MAP_TBL_CLASS GENMASK(3, 0) + +/* PPE unicast queue (0-255) configurations. */ +#define PPE_AC_UNICAST_QUEUE_CFG_TBL_ADDR 0x848000 +#define PPE_AC_UNICAST_QUEUE_CFG_TBL_ENTRIES 256 +#define PPE_AC_UNICAST_QUEUE_CFG_TBL_INC 0x10 +#define PPE_AC_UNICAST_QUEUE_CFG_W0_EN BIT(0) +#define PPE_AC_UNICAST_QUEUE_CFG_W0_WRED_EN BIT(1) +#define PPE_AC_UNICAST_QUEUE_CFG_W0_FC_EN BIT(2) +#define PPE_AC_UNICAST_QUEUE_CFG_W0_CLR_AWARE BIT(3) +#define PPE_AC_UNICAST_QUEUE_CFG_W0_GRP_ID GENMASK(5, 4) +#define PPE_AC_UNICAST_QUEUE_CFG_W0_PRE_LIMIT GENMASK(16, 6) +#define PPE_AC_UNICAST_QUEUE_CFG_W0_DYNAMIC BIT(17) +#define PPE_AC_UNICAST_QUEUE_CFG_W0_WEIGHT GENMASK(20, 18) +#define PPE_AC_UNICAST_QUEUE_CFG_W0_THRESHOLD GENMASK(31, 21) +#define PPE_AC_UNICAST_QUEUE_CFG_W3_GRN_RESUME GENMASK(23, 13) + +#define PPE_AC_UNICAST_QUEUE_SET_EN(tbl_cfg, value) \ + FIELD_MODIFY(PPE_AC_UNICAST_QUEUE_CFG_W0_EN, tbl_cfg, value) +#define PPE_AC_UNICAST_QUEUE_SET_GRP_ID(tbl_cfg, value) \ + FIELD_MODIFY(PPE_AC_UNICAST_QUEUE_CFG_W0_GRP_ID, tbl_cfg, value) +#define PPE_AC_UNICAST_QUEUE_SET_PRE_LIMIT(tbl_cfg, value) \ + FIELD_MODIFY(PPE_AC_UNICAST_QUEUE_CFG_W0_PRE_LIMIT, tbl_cfg, value) +#define PPE_AC_UNICAST_QUEUE_SET_DYNAMIC(tbl_cfg, value) \ + FIELD_MODIFY(PPE_AC_UNICAST_QUEUE_CFG_W0_DYNAMIC, tbl_cfg, value) +#define PPE_AC_UNICAST_QUEUE_SET_WEIGHT(tbl_cfg, value) \ + FIELD_MODIFY(PPE_AC_UNICAST_QUEUE_CFG_W0_WEIGHT, tbl_cfg, value) +#define PPE_AC_UNICAST_QUEUE_SET_THRESHOLD(tbl_cfg, value) \ + FIELD_MODIFY(PPE_AC_UNICAST_QUEUE_CFG_W0_THRESHOLD, tbl_cfg, value) +#define PPE_AC_UNICAST_QUEUE_SET_GRN_RESUME(tbl_cfg, value) \ + FIELD_MODIFY(PPE_AC_UNICAST_QUEUE_CFG_W3_GRN_RESUME, (tbl_cfg) + 0x3, value) + +/* PPE multicast queue (256-299) configurations. */ +#define PPE_AC_MULTICAST_QUEUE_CFG_TBL_ADDR 0x84a000 +#define PPE_AC_MULTICAST_QUEUE_CFG_TBL_ENTRIES 44 +#define PPE_AC_MULTICAST_QUEUE_CFG_TBL_INC 0x10 +#define PPE_AC_MULTICAST_QUEUE_CFG_W0_EN BIT(0) +#define PPE_AC_MULTICAST_QUEUE_CFG_W0_FC_EN BIT(1) +#define PPE_AC_MULTICAST_QUEUE_CFG_W0_CLR_AWARE BIT(2) +#define PPE_AC_MULTICAST_QUEUE_CFG_W0_GRP_ID GENMASK(4, 3) +#define PPE_AC_MULTICAST_QUEUE_CFG_W0_PRE_LIMIT GENMASK(15, 5) +#define PPE_AC_MULTICAST_QUEUE_CFG_W0_THRESHOLD GENMASK(26, 16) +#define PPE_AC_MULTICAST_QUEUE_CFG_W2_RESUME GENMASK(17, 7) + +#define PPE_AC_MULTICAST_QUEUE_SET_EN(tbl_cfg, value) \ + FIELD_MODIFY(PPE_AC_MULTICAST_QUEUE_CFG_W0_EN, tbl_cfg, value) +#define PPE_AC_MULTICAST_QUEUE_SET_GRN_GRP_ID(tbl_cfg, value) \ + FIELD_MODIFY(PPE_AC_MULTICAST_QUEUE_CFG_W0_GRP_ID, tbl_cfg, value) +#define PPE_AC_MULTICAST_QUEUE_SET_GRN_PRE_LIMIT(tbl_cfg, value) \ + FIELD_MODIFY(PPE_AC_MULTICAST_QUEUE_CFG_W0_PRE_LIMIT, tbl_cfg, value) +#define PPE_AC_MULTICAST_QUEUE_SET_GRN_THRESHOLD(tbl_cfg, value) \ + FIELD_MODIFY(PPE_AC_MULTICAST_QUEUE_CFG_W0_THRESHOLD, tbl_cfg, value) +#define PPE_AC_MULTICAST_QUEUE_SET_GRN_RESUME(tbl_cfg, value) \ + FIELD_MODIFY(PPE_AC_MULTICAST_QUEUE_CFG_W2_RESUME, (tbl_cfg) + 0x2, value) + +/* PPE admission control group (0-3) configurations */ +#define PPE_AC_GRP_CFG_TBL_ADDR 0x84c000 +#define PPE_AC_GRP_CFG_TBL_ENTRIES 0x4 +#define PPE_AC_GRP_CFG_TBL_INC 0x10 +#define PPE_AC_GRP_W0_AC_EN BIT(0) +#define PPE_AC_GRP_W0_AC_FC_EN BIT(1) +#define PPE_AC_GRP_W0_CLR_AWARE BIT(2) +#define PPE_AC_GRP_W0_THRESHOLD_LOW GENMASK(31, 25) +#define PPE_AC_GRP_W1_THRESHOLD_HIGH GENMASK(3, 0) +#define PPE_AC_GRP_W1_BUF_LIMIT GENMASK(14, 4) +#define PPE_AC_GRP_W2_RESUME_GRN GENMASK(15, 5) +#define PPE_AC_GRP_W2_PRE_ALLOC GENMASK(26, 16) + +#define PPE_AC_GRP_SET_BUF_LIMIT(tbl_cfg, value) \ + FIELD_MODIFY(PPE_AC_GRP_W1_BUF_LIMIT, (tbl_cfg) + 0x1, value) + +/* Counters for packets handled by unicast queues (0-255). */ +#define PPE_AC_UNICAST_QUEUE_CNT_TBL_ADDR 0x84e000 +#define PPE_AC_UNICAST_QUEUE_CNT_TBL_ENTRIES 256 +#define PPE_AC_UNICAST_QUEUE_CNT_TBL_INC 0x10 +#define PPE_AC_UNICAST_QUEUE_CNT_TBL_PEND_CNT GENMASK(12, 0) + +/* Counters for packets handled by multicast queues (256-299). */ +#define PPE_AC_MULTICAST_QUEUE_CNT_TBL_ADDR 0x852000 +#define PPE_AC_MULTICAST_QUEUE_CNT_TBL_ENTRIES 44 +#define PPE_AC_MULTICAST_QUEUE_CNT_TBL_INC 0x10 +#define PPE_AC_MULTICAST_QUEUE_CNT_TBL_PEND_CNT GENMASK(12, 0) + +/* Table addresses for per-queue enqueue setting. */ +#define PPE_ENQ_OPR_TBL_ADDR 0x85c000 +#define PPE_ENQ_OPR_TBL_ENTRIES 300 +#define PPE_ENQ_OPR_TBL_INC 0x10 +#define PPE_ENQ_OPR_TBL_ENQ_DISABLE BIT(0) + +/* Unicast drop count includes the possible drops with WRED for the green, + * yellow and red categories. + */ +#define PPE_UNICAST_DROP_CNT_TBL_ADDR 0x9e0000 +#define PPE_UNICAST_DROP_CNT_TBL_ENTRIES 1536 +#define PPE_UNICAST_DROP_CNT_TBL_INC 0x10 +#define PPE_UNICAST_DROP_TYPES 6 +#define PPE_UNICAST_DROP_FORCE_OFFSET 3 + +/* There are 16 multicast queues dedicated to CPU port 0. Multicast drop + * count includes the force drop for green, yellow and red category packets. + */ +#define PPE_P0_MULTICAST_DROP_CNT_TBL_ADDR 0x9f0000 +#define PPE_P0_MULTICAST_DROP_CNT_TBL_ENTRIES 48 +#define PPE_P0_MULTICAST_DROP_CNT_TBL_INC 0x10 +#define PPE_P0_MULTICAST_QUEUE_NUM 16 + +/* Each PPE physical port has four dedicated multicast queues, providing + * a total of 12 entries per port. The multicast drop count includes forced + * drops for green, yellow, and red category packets. + */ +#define PPE_MULTICAST_QUEUE_PORT_ADDR_INC 0x1000 +#define PPE_MULTICAST_DROP_CNT_TBL_INC 0x10 +#define PPE_MULTICAST_DROP_TYPES 3 +#define PPE_MULTICAST_QUEUE_NUM 4 +#define PPE_MULTICAST_DROP_CNT_TBL_ENTRIES 12 + +#define PPE_CPU_PORT_MULTICAST_FORCE_DROP_CNT_TBL_ADDR(mq_offset) \ + (PPE_P0_MULTICAST_DROP_CNT_TBL_ADDR + \ + (mq_offset) * PPE_P0_MULTICAST_DROP_CNT_TBL_INC * \ + PPE_MULTICAST_DROP_TYPES) + +#define PPE_P1_MULTICAST_DROP_CNT_TBL_ADDR \ + (PPE_P0_MULTICAST_DROP_CNT_TBL_ADDR + PPE_MULTICAST_QUEUE_PORT_ADDR_INC) +#endif diff --git a/drivers/net/ethernet/realtek/Kconfig b/drivers/net/ethernet/realtek/Kconfig index fe136f61586f..272c83bfdc6c 100644 --- a/drivers/net/ethernet/realtek/Kconfig +++ b/drivers/net/ethernet/realtek/Kconfig @@ -58,7 +58,7 @@ config 8139TOO config 8139TOO_PIO bool "Use PIO instead of MMIO" default y - depends on 8139TOO + depends on 8139TOO && !NO_IOPORT_MAP help This instructs the driver to use programmed I/O ports (PIO) instead of PCI shared memory (MMIO). This can possibly solve some problems diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 9c601f271c02..8903ae90afcb 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -3409,7 +3409,7 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp) r8168_mac_ocp_modify(tp, 0xd412, 0x0fff, sw_cnt_1ms_ini); } - r8168_mac_ocp_modify(tp, 0xe056, 0x00f0, 0x0070); + r8168_mac_ocp_modify(tp, 0xe056, 0x00f0, 0x0000); r8168_mac_ocp_modify(tp, 0xe052, 0x6000, 0x8008); r8168_mac_ocp_modify(tp, 0xe0d6, 0x01ff, 0x017f); r8168_mac_ocp_modify(tp, 0xd420, 0x0fff, 0x047f); @@ -3514,7 +3514,7 @@ static void rtl_hw_start_8117(struct rtl8169_private *tp) r8168_mac_ocp_modify(tp, 0xd412, 0x0fff, sw_cnt_1ms_ini); } - r8168_mac_ocp_modify(tp, 0xe056, 0x00f0, 0x0070); + r8168_mac_ocp_modify(tp, 0xe056, 0x00f0, 0x0000); r8168_mac_ocp_write(tp, 0xea80, 0x0003); r8168_mac_ocp_modify(tp, 0xe052, 0x0000, 0x0009); r8168_mac_ocp_modify(tp, 0xd420, 0x0fff, 0x047f); @@ -3715,7 +3715,7 @@ static void rtl_hw_start_8125_common(struct rtl8169_private *tp) r8168_mac_ocp_modify(tp, 0xc0b4, 0x0000, 0x000c); r8168_mac_ocp_modify(tp, 0xeb6a, 0x00ff, 0x0033); r8168_mac_ocp_modify(tp, 0xeb50, 0x03e0, 0x0040); - r8168_mac_ocp_modify(tp, 0xe056, 0x00f0, 0x0030); + r8168_mac_ocp_modify(tp, 0xe056, 0x00f0, 0x0000); r8168_mac_ocp_modify(tp, 0xe040, 0x1000, 0x0000); r8168_mac_ocp_modify(tp, 0xea1c, 0x0003, 0x0001); if (tp->mac_version == RTL_GIGA_MAC_VER_70 || @@ -5441,10 +5441,12 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* Disable ASPM L1 as that cause random device stop working * problems as well as full system hangs for some PCIe devices users. */ - if (rtl_aspm_is_safe(tp)) + if (rtl_aspm_is_safe(tp)) { + dev_info(&pdev->dev, "System vendor flags ASPM as safe\n"); rc = 0; - else + } else { rc = pci_disable_link_state(pdev, PCIE_LINK_STATE_L1); + } tp->aspm_manageable = !rc; tp->dash_type = rtl_get_dash_type(tp); diff --git a/drivers/net/ethernet/renesas/Makefile b/drivers/net/ethernet/renesas/Makefile index f65fc76f8b4d..d63e0c61bb68 100644 --- a/drivers/net/ethernet/renesas/Makefile +++ b/drivers/net/ethernet/renesas/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_SH_ETH) += sh_eth.o ravb-objs := ravb_main.o ravb_ptp.o obj-$(CONFIG_RAVB) += ravb.o +rswitch-objs := rswitch_main.o rswitch_l2.o obj-$(CONFIG_RENESAS_ETHER_SWITCH) += rswitch.o obj-$(CONFIG_RENESAS_GEN4_PTP) += rcar_gen4_ptp.o diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 94b6fb94f8f1..9d3bd65b85ff 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -802,7 +802,6 @@ static int ravb_rx_gbeth(struct net_device *ndev, int budget, int q) const struct ravb_hw_info *info = priv->info; struct net_device_stats *stats; struct ravb_rx_desc *desc; - struct sk_buff *skb; int rx_packets = 0; u8 desc_status; u16 desc_len; @@ -815,6 +814,8 @@ static int ravb_rx_gbeth(struct net_device *ndev, int budget, int q) stats = &priv->stats[q]; for (i = 0; i < limit; i++, priv->cur_rx[q]++) { + struct sk_buff *skb = NULL; + entry = priv->cur_rx[q] % priv->num_rx_ring[q]; desc = &priv->rx_ring[q].desc[entry]; if (rx_packets == budget || desc->die_dt == DT_FEMPTY) diff --git a/drivers/net/ethernet/renesas/rcar_gen4_ptp.c b/drivers/net/ethernet/renesas/rcar_gen4_ptp.c index 4c3e8cc5046f..d0979abd36de 100644 --- a/drivers/net/ethernet/renesas/rcar_gen4_ptp.c +++ b/drivers/net/ethernet/renesas/rcar_gen4_ptp.c @@ -12,19 +12,18 @@ #include <linux/slab.h> #include "rcar_gen4_ptp.h" -#define ptp_to_priv(ptp) container_of(ptp, struct rcar_gen4_ptp_private, info) -static const struct rcar_gen4_ptp_reg_offset gen4_offs = { - .enable = PTPTMEC, - .disable = PTPTMDC, - .increment = PTPTIVC0, - .config_t0 = PTPTOVC00, - .config_t1 = PTPTOVC10, - .config_t2 = PTPTOVC20, - .monitor_t0 = PTPGPTPTM00, - .monitor_t1 = PTPGPTPTM10, - .monitor_t2 = PTPGPTPTM20, -}; +#define PTPTMEC_REG 0x0010 +#define PTPTMDC_REG 0x0014 +#define PTPTIVC0_REG 0x0020 +#define PTPTOVC00_REG 0x0030 +#define PTPTOVC10_REG 0x0034 +#define PTPTOVC20_REG 0x0038 +#define PTPGPTPTM00_REG 0x0050 +#define PTPGPTPTM10_REG 0x0054 +#define PTPGPTPTM20_REG 0x0058 + +#define ptp_to_priv(ptp) container_of(ptp, struct rcar_gen4_ptp_private, info) static int rcar_gen4_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) { @@ -38,20 +37,21 @@ static int rcar_gen4_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) diff = div_s64(addend * scaled_ppm_to_ppb(scaled_ppm), NSEC_PER_SEC); addend = neg_adj ? addend - diff : addend + diff; - iowrite32(addend, ptp_priv->addr + ptp_priv->offs->increment); + iowrite32(addend, ptp_priv->addr + PTPTIVC0_REG); return 0; } -/* Caller must hold the lock */ static void _rcar_gen4_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts) { struct rcar_gen4_ptp_private *ptp_priv = ptp_to_priv(ptp); - ts->tv_nsec = ioread32(ptp_priv->addr + ptp_priv->offs->monitor_t0); - ts->tv_sec = ioread32(ptp_priv->addr + ptp_priv->offs->monitor_t1) | - ((s64)ioread32(ptp_priv->addr + ptp_priv->offs->monitor_t2) << 32); + lockdep_assert_held(&ptp_priv->lock); + + ts->tv_nsec = ioread32(ptp_priv->addr + PTPGPTPTM00_REG); + ts->tv_sec = ioread32(ptp_priv->addr + PTPGPTPTM10_REG) | + ((s64)ioread32(ptp_priv->addr + PTPGPTPTM20_REG) << 32); } static int rcar_gen4_ptp_gettime(struct ptp_clock_info *ptp, @@ -73,14 +73,14 @@ static void _rcar_gen4_ptp_settime(struct ptp_clock_info *ptp, { struct rcar_gen4_ptp_private *ptp_priv = ptp_to_priv(ptp); - iowrite32(1, ptp_priv->addr + ptp_priv->offs->disable); - iowrite32(0, ptp_priv->addr + ptp_priv->offs->config_t2); - iowrite32(0, ptp_priv->addr + ptp_priv->offs->config_t1); - iowrite32(0, ptp_priv->addr + ptp_priv->offs->config_t0); - iowrite32(1, ptp_priv->addr + ptp_priv->offs->enable); - iowrite32(ts->tv_sec >> 32, ptp_priv->addr + ptp_priv->offs->config_t2); - iowrite32(ts->tv_sec, ptp_priv->addr + ptp_priv->offs->config_t1); - iowrite32(ts->tv_nsec, ptp_priv->addr + ptp_priv->offs->config_t0); + iowrite32(1, ptp_priv->addr + PTPTMDC_REG); + iowrite32(0, ptp_priv->addr + PTPTOVC20_REG); + iowrite32(0, ptp_priv->addr + PTPTOVC10_REG); + iowrite32(0, ptp_priv->addr + PTPTOVC00_REG); + iowrite32(1, ptp_priv->addr + PTPTMEC_REG); + iowrite32(ts->tv_sec >> 32, ptp_priv->addr + PTPTOVC20_REG); + iowrite32(ts->tv_sec, ptp_priv->addr + PTPTOVC10_REG); + iowrite32(ts->tv_nsec, ptp_priv->addr + PTPTOVC00_REG); } static int rcar_gen4_ptp_settime(struct ptp_clock_info *ptp, @@ -130,17 +130,6 @@ static struct ptp_clock_info rcar_gen4_ptp_info = { .enable = rcar_gen4_ptp_enable, }; -static int rcar_gen4_ptp_set_offs(struct rcar_gen4_ptp_private *ptp_priv, - enum rcar_gen4_ptp_reg_layout layout) -{ - if (layout != RCAR_GEN4_PTP_REG_LAYOUT) - return -EINVAL; - - ptp_priv->offs = &gen4_offs; - - return 0; -} - static s64 rcar_gen4_ptp_rate_to_increment(u32 rate) { /* Timer increment in ns. @@ -151,27 +140,20 @@ static s64 rcar_gen4_ptp_rate_to_increment(u32 rate) return div_s64(1000000000LL << 27, rate); } -int rcar_gen4_ptp_register(struct rcar_gen4_ptp_private *ptp_priv, - enum rcar_gen4_ptp_reg_layout layout, u32 rate) +int rcar_gen4_ptp_register(struct rcar_gen4_ptp_private *ptp_priv, u32 rate) { - int ret; - if (ptp_priv->initialized) return 0; spin_lock_init(&ptp_priv->lock); - ret = rcar_gen4_ptp_set_offs(ptp_priv, layout); - if (ret) - return ret; - ptp_priv->default_addend = rcar_gen4_ptp_rate_to_increment(rate); - iowrite32(ptp_priv->default_addend, ptp_priv->addr + ptp_priv->offs->increment); + iowrite32(ptp_priv->default_addend, ptp_priv->addr + PTPTIVC0_REG); ptp_priv->clock = ptp_clock_register(&ptp_priv->info, NULL); if (IS_ERR(ptp_priv->clock)) return PTR_ERR(ptp_priv->clock); - iowrite32(0x01, ptp_priv->addr + ptp_priv->offs->enable); + iowrite32(0x01, ptp_priv->addr + PTPTMEC_REG); ptp_priv->initialized = true; return 0; @@ -180,7 +162,7 @@ EXPORT_SYMBOL_GPL(rcar_gen4_ptp_register); int rcar_gen4_ptp_unregister(struct rcar_gen4_ptp_private *ptp_priv) { - iowrite32(1, ptp_priv->addr + ptp_priv->offs->disable); + iowrite32(1, ptp_priv->addr + PTPTMDC_REG); return ptp_clock_unregister(ptp_priv->clock); } diff --git a/drivers/net/ethernet/renesas/rcar_gen4_ptp.h b/drivers/net/ethernet/renesas/rcar_gen4_ptp.h index e22da5acd53d..f77e79e47357 100644 --- a/drivers/net/ethernet/renesas/rcar_gen4_ptp.h +++ b/drivers/net/ethernet/renesas/rcar_gen4_ptp.h @@ -11,10 +11,6 @@ #define RCAR_GEN4_GPTP_OFFSET_S4 0x00018000 -enum rcar_gen4_ptp_reg_layout { - RCAR_GEN4_PTP_REG_LAYOUT -}; - /* driver's definitions */ #define RCAR_GEN4_RXTSTAMP_ENABLED BIT(0) #define RCAR_GEN4_RXTSTAMP_TYPE_V2_L2_EVENT BIT(1) @@ -23,37 +19,11 @@ enum rcar_gen4_ptp_reg_layout { #define RCAR_GEN4_TXTSTAMP_ENABLED BIT(0) -#define PTPRO 0 - -enum rcar_gen4_ptp_reg { - PTPTMEC = PTPRO + 0x0010, - PTPTMDC = PTPRO + 0x0014, - PTPTIVC0 = PTPRO + 0x0020, - PTPTOVC00 = PTPRO + 0x0030, - PTPTOVC10 = PTPRO + 0x0034, - PTPTOVC20 = PTPRO + 0x0038, - PTPGPTPTM00 = PTPRO + 0x0050, - PTPGPTPTM10 = PTPRO + 0x0054, - PTPGPTPTM20 = PTPRO + 0x0058, -}; - -struct rcar_gen4_ptp_reg_offset { - u16 enable; - u16 disable; - u16 increment; - u16 config_t0; - u16 config_t1; - u16 config_t2; - u16 monitor_t0; - u16 monitor_t1; - u16 monitor_t2; -}; struct rcar_gen4_ptp_private { void __iomem *addr; struct ptp_clock *clock; struct ptp_clock_info info; - const struct rcar_gen4_ptp_reg_offset *offs; spinlock_t lock; /* For multiple registers access */ u32 tstamp_tx_ctrl; u32 tstamp_rx_ctrl; @@ -61,8 +31,7 @@ struct rcar_gen4_ptp_private { bool initialized; }; -int rcar_gen4_ptp_register(struct rcar_gen4_ptp_private *ptp_priv, - enum rcar_gen4_ptp_reg_layout layout, u32 rate); +int rcar_gen4_ptp_register(struct rcar_gen4_ptp_private *ptp_priv, u32 rate); int rcar_gen4_ptp_unregister(struct rcar_gen4_ptp_private *ptp_priv); struct rcar_gen4_ptp_private *rcar_gen4_ptp_alloc(struct platform_device *pdev); diff --git a/drivers/net/ethernet/renesas/rswitch.h b/drivers/net/ethernet/renesas/rswitch.h index 532192cbca4b..a1d4a877e5bd 100644 --- a/drivers/net/ethernet/renesas/rswitch.h +++ b/drivers/net/ethernet/renesas/rswitch.h @@ -1,19 +1,25 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Renesas Ethernet Switch device driver * - * Copyright (C) 2022 Renesas Electronics Corporation + * Copyright (C) 2022-2025 Renesas Electronics Corporation */ #ifndef __RSWITCH_H__ #define __RSWITCH_H__ #include <linux/platform_device.h> +#include <linux/phy.h> + #include "rcar_gen4_ptp.h" #define RSWITCH_MAX_NUM_QUEUES 128 #define RSWITCH_NUM_AGENTS 5 #define RSWITCH_NUM_PORTS 3 + +#define rswitch_for_all_ports(_priv, _rdev) \ + list_for_each_entry(_rdev, &_priv->port_list, list) + #define rswitch_for_each_enabled_port(priv, i) \ for (i = 0; i < RSWITCH_NUM_PORTS; i++) \ if (priv->rdev[i]->disabled) \ @@ -809,7 +815,8 @@ enum rswitch_gwca_mode { #define FWPC0_IP4EA BIT(10) #define FWPC0_IPDSA BIT(12) #define FWPC0_IPHLA BIT(18) -#define FWPC0_MACSDA BIT(20) +#define FWPC0_MACDSA BIT(20) +#define FWPC0_MACSSA BIT(23) #define FWPC0_MACHLA BIT(26) #define FWPC0_MACHMA BIT(27) #define FWPC0_VLANSA BIT(28) @@ -820,12 +827,30 @@ enum rswitch_gwca_mode { #define FWPC2(i) (FWPC20 + (i) * 0x10) #define FWCP2_LTWFW GENMASK(16 + (RSWITCH_NUM_AGENTS - 1), 16) +#define FWCP2_LTWFW_MASK GENMASK(16 + (RSWITCH_NUM_AGENTS - 1), 16) #define FWPBFC(i) (FWPBFC0 + (i) * 0x10) #define FWPBFC_PBDV GENMASK(RSWITCH_NUM_AGENTS - 1, 0) #define FWPBFCSDC(j, i) (FWPBFCSDC00 + (i) * 0x10 + (j) * 0x04) +#define FWMACHEC_MACHMUE_MASK GENMASK(26, 16) + +#define FWMACTIM_MACTIOG BIT(0) +#define FWMACTIM_MACTR BIT(1) + +#define FWMACAGUSPC_MACAGUSP GENMASK(9, 0) +#define FWMACAGC_MACAGT GENMASK(15, 0) +#define FWMACAGC_MACAGE BIT(16) +#define FWMACAGC_MACAGSL BIT(17) +#define FWMACAGC_MACAGPM BIT(18) +#define FWMACAGC_MACDES BIT(24) +#define FWMACAGC_MACAGOG BIT(28) +#define FWMACAGC_MACDESOG BIT(29) + +#define RSW_AGEING_CLK_PER_US 0x140 +#define RSW_AGEING_TIME 300 + /* TOP */ #define TPEMIMC7(queue) (TPEMIMC70 + (queue) * 4) @@ -994,10 +1019,18 @@ struct rswitch_device { DECLARE_BITMAP(ts_skb_used, TS_TAGS_PER_PORT); bool disabled; + struct list_head list; + int port; struct rswitch_etha *etha; struct device_node *np_port; struct phy *serdes; + + struct net_device *brdev; /* master bridge device */ + unsigned int learning_requested : 1; + unsigned int learning_offloaded : 1; + unsigned int forwarding_requested : 1; + unsigned int forwarding_offloaded : 1; }; struct rswitch_mfwd_mac_table_entry { @@ -1022,11 +1055,17 @@ struct rswitch_private { struct rswitch_etha etha[RSWITCH_NUM_PORTS]; struct rswitch_mfwd mfwd; + struct list_head port_list; + spinlock_t lock; /* lock interrupt registers' control */ struct clk *clk; bool etha_no_runtime_change; bool gwca_halt; + struct net_device *offload_brdev; }; +bool is_rdev(const struct net_device *ndev); +void rswitch_modify(void __iomem *addr, enum rswitch_reg reg, u32 clear, u32 set); + #endif /* #ifndef __RSWITCH_H__ */ diff --git a/drivers/net/ethernet/renesas/rswitch_l2.c b/drivers/net/ethernet/renesas/rswitch_l2.c new file mode 100644 index 000000000000..4a69ec77d69c --- /dev/null +++ b/drivers/net/ethernet/renesas/rswitch_l2.c @@ -0,0 +1,316 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Renesas Ethernet Switch device driver + * + * Copyright (C) 2025 Renesas Electronics Corporation + */ + +#include <linux/err.h> +#include <linux/etherdevice.h> +#include <linux/if_bridge.h> +#include <linux/kernel.h> +#include <net/switchdev.h> + +#include "rswitch.h" +#include "rswitch_l2.h" + +static bool rdev_for_l2_offload(struct rswitch_device *rdev) +{ + return rdev->priv->offload_brdev && + rdev->brdev == rdev->priv->offload_brdev && + (test_bit(rdev->port, rdev->priv->opened_ports)); +} + +static void rswitch_change_l2_hw_offloading(struct rswitch_device *rdev, + bool start, bool learning) +{ + u32 bits = learning ? FWPC0_MACSSA | FWPC0_MACHLA | FWPC0_MACHMA : FWPC0_MACDSA; + u32 clear = start ? 0 : bits; + u32 set = start ? bits : 0; + + if ((learning && rdev->learning_offloaded == start) || + (!learning && rdev->forwarding_offloaded == start)) + return; + + rswitch_modify(rdev->priv->addr, FWPC0(rdev->port), clear, set); + + if (learning) + rdev->learning_offloaded = start; + else + rdev->forwarding_offloaded = start; + + netdev_info(rdev->ndev, "%s hw %s\n", start ? "starting" : "stopping", + learning ? "learning" : "forwarding"); +} + +static void rswitch_update_l2_hw_learning(struct rswitch_private *priv) +{ + struct rswitch_device *rdev; + bool learning_needed; + + rswitch_for_all_ports(priv, rdev) { + if (rdev_for_l2_offload(rdev)) + learning_needed = rdev->learning_requested; + else + learning_needed = false; + + rswitch_change_l2_hw_offloading(rdev, learning_needed, true); + } +} + +static void rswitch_update_l2_hw_forwarding(struct rswitch_private *priv) +{ + struct rswitch_device *rdev; + unsigned int fwd_mask; + + /* calculate fwd_mask with zeroes in bits corresponding to ports that + * shall participate in hardware forwarding + */ + fwd_mask = GENMASK(RSWITCH_NUM_AGENTS - 1, 0); + + rswitch_for_all_ports(priv, rdev) { + if (rdev_for_l2_offload(rdev) && rdev->forwarding_requested) + fwd_mask &= ~BIT(rdev->port); + } + + rswitch_for_all_ports(priv, rdev) { + if ((rdev_for_l2_offload(rdev) && rdev->forwarding_requested) || + rdev->forwarding_offloaded) { + /* Update allowed offload destinations even for ports + * with L2 offload enabled earlier. + * + * Do not allow L2 forwarding to self for hw port. + */ + iowrite32(FIELD_PREP(FWCP2_LTWFW_MASK, fwd_mask | BIT(rdev->port)), + priv->addr + FWPC2(rdev->port)); + } + + if (rdev_for_l2_offload(rdev) && + rdev->forwarding_requested && + !rdev->forwarding_offloaded) { + rswitch_change_l2_hw_offloading(rdev, true, false); + } else if (rdev->forwarding_offloaded) { + rswitch_change_l2_hw_offloading(rdev, false, false); + } + } +} + +void rswitch_update_l2_offload(struct rswitch_private *priv) +{ + rswitch_update_l2_hw_learning(priv); + rswitch_update_l2_hw_forwarding(priv); +} + +static void rswitch_update_offload_brdev(struct rswitch_private *priv) +{ + struct net_device *offload_brdev = NULL; + struct rswitch_device *rdev, *rdev2; + + rswitch_for_all_ports(priv, rdev) { + if (!rdev->brdev) + continue; + rswitch_for_all_ports(priv, rdev2) { + if (rdev2 == rdev) + break; + if (rdev2->brdev == rdev->brdev) { + offload_brdev = rdev->brdev; + break; + } + } + if (offload_brdev) + break; + } + + if (offload_brdev == priv->offload_brdev) + dev_dbg(&priv->pdev->dev, + "changing l2 offload from %s to %s\n", + netdev_name(priv->offload_brdev), + netdev_name(offload_brdev)); + else if (offload_brdev) + dev_dbg(&priv->pdev->dev, "starting l2 offload for %s\n", + netdev_name(offload_brdev)); + else if (!offload_brdev) + dev_dbg(&priv->pdev->dev, "stopping l2 offload for %s\n", + netdev_name(priv->offload_brdev)); + + priv->offload_brdev = offload_brdev; + + rswitch_update_l2_offload(priv); +} + +static bool rswitch_port_check(const struct net_device *ndev) +{ + return is_rdev(ndev); +} + +static void rswitch_port_update_brdev(struct net_device *ndev, + struct net_device *brdev) +{ + struct rswitch_device *rdev; + + if (!is_rdev(ndev)) + return; + + rdev = netdev_priv(ndev); + rdev->brdev = brdev; + rswitch_update_offload_brdev(rdev->priv); +} + +static int rswitch_port_update_stp_state(struct net_device *ndev, u8 stp_state) +{ + struct rswitch_device *rdev; + + if (!is_rdev(ndev)) + return -ENODEV; + + rdev = netdev_priv(ndev); + rdev->learning_requested = (stp_state == BR_STATE_LEARNING || + stp_state == BR_STATE_FORWARDING); + rdev->forwarding_requested = (stp_state == BR_STATE_FORWARDING); + rswitch_update_l2_offload(rdev->priv); + + return 0; +} + +static int rswitch_netdevice_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *ndev = netdev_notifier_info_to_dev(ptr); + struct netdev_notifier_changeupper_info *info; + struct net_device *brdev; + + if (!rswitch_port_check(ndev)) + return NOTIFY_DONE; + if (event != NETDEV_CHANGEUPPER) + return NOTIFY_DONE; + + info = ptr; + + if (netif_is_bridge_master(info->upper_dev)) { + brdev = info->linking ? info->upper_dev : NULL; + rswitch_port_update_brdev(ndev, brdev); + } + + return NOTIFY_OK; +} + +static int rswitch_update_ageing_time(struct net_device *ndev, clock_t time) +{ + struct rswitch_device *rdev = netdev_priv(ndev); + u32 reg_val; + + if (!is_rdev(ndev)) + return -ENODEV; + + if (!FIELD_FIT(FWMACAGC_MACAGT, time)) + return -EINVAL; + + reg_val = FIELD_PREP(FWMACAGC_MACAGT, time); + reg_val |= FWMACAGC_MACAGE | FWMACAGC_MACAGSL; + iowrite32(reg_val, rdev->priv->addr + FWMACAGC); + + return 0; +} + +static int rswitch_port_attr_set(struct net_device *ndev, const void *ctx, + const struct switchdev_attr *attr, + struct netlink_ext_ack *extack) +{ + switch (attr->id) { + case SWITCHDEV_ATTR_ID_PORT_STP_STATE: + return rswitch_port_update_stp_state(ndev, attr->u.stp_state); + case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: + return rswitch_update_ageing_time(ndev, attr->u.ageing_time); + default: + return -EOPNOTSUPP; + } +} + +static int rswitch_switchdev_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *ndev = switchdev_notifier_info_to_dev(ptr); + int ret; + + if (event == SWITCHDEV_PORT_ATTR_SET) { + ret = switchdev_handle_port_attr_set(ndev, ptr, + rswitch_port_check, + rswitch_port_attr_set); + return notifier_from_errno(ret); + } + + if (!rswitch_port_check(ndev)) + return NOTIFY_DONE; + + return notifier_from_errno(-EOPNOTSUPP); +} + +static int rswitch_switchdev_blocking_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct net_device *ndev = switchdev_notifier_info_to_dev(ptr); + int ret; + + switch (event) { + case SWITCHDEV_PORT_OBJ_ADD: + return -EOPNOTSUPP; + case SWITCHDEV_PORT_OBJ_DEL: + return -EOPNOTSUPP; + case SWITCHDEV_PORT_ATTR_SET: + ret = switchdev_handle_port_attr_set(ndev, ptr, + rswitch_port_check, + rswitch_port_attr_set); + break; + default: + if (!rswitch_port_check(ndev)) + return NOTIFY_DONE; + ret = -EOPNOTSUPP; + } + + return notifier_from_errno(ret); +} + +static struct notifier_block rswitch_netdevice_nb = { + .notifier_call = rswitch_netdevice_event, +}; + +static struct notifier_block rswitch_switchdev_nb = { + .notifier_call = rswitch_switchdev_event, +}; + +static struct notifier_block rswitch_switchdev_blocking_nb = { + .notifier_call = rswitch_switchdev_blocking_event, +}; + +int rswitch_register_notifiers(void) +{ + int ret; + + ret = register_netdevice_notifier(&rswitch_netdevice_nb); + if (ret) + goto register_netdevice_notifier_failed; + + ret = register_switchdev_notifier(&rswitch_switchdev_nb); + if (ret) + goto register_switchdev_notifier_failed; + + ret = register_switchdev_blocking_notifier(&rswitch_switchdev_blocking_nb); + if (ret) + goto register_switchdev_blocking_notifier_failed; + + return 0; + +register_switchdev_blocking_notifier_failed: + unregister_switchdev_notifier(&rswitch_switchdev_nb); +register_switchdev_notifier_failed: + unregister_netdevice_notifier(&rswitch_netdevice_nb); +register_netdevice_notifier_failed: + + return ret; +} + +void rswitch_unregister_notifiers(void) +{ + unregister_switchdev_blocking_notifier(&rswitch_switchdev_blocking_nb); + unregister_switchdev_notifier(&rswitch_switchdev_nb); + unregister_netdevice_notifier(&rswitch_netdevice_nb); +} diff --git a/drivers/net/ethernet/renesas/rswitch_l2.h b/drivers/net/ethernet/renesas/rswitch_l2.h new file mode 100644 index 000000000000..57050ede8f31 --- /dev/null +++ b/drivers/net/ethernet/renesas/rswitch_l2.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Renesas Ethernet Switch device driver + * + * Copyright (C) 2025 Renesas Electronics Corporation + */ + +#ifndef __RSWITCH_L2_H__ +#define __RSWITCH_L2_H__ + +void rswitch_update_l2_offload(struct rswitch_private *priv); + +int rswitch_register_notifiers(void); +void rswitch_unregister_notifiers(void); + +#endif /* #ifndef __RSWITCH_L2_H__ */ diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch_main.c index aba772e14555..8d8acc2124b8 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch_main.c @@ -1,15 +1,18 @@ // SPDX-License-Identifier: GPL-2.0 /* Renesas Ethernet Switch device driver * - * Copyright (C) 2022 Renesas Electronics Corporation + * Copyright (C) 2022-2025 Renesas Electronics Corporation */ #include <linux/clk.h> #include <linux/dma-mapping.h> #include <linux/err.h> #include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/ip.h> #include <linux/iopoll.h> #include <linux/kernel.h> +#include <linux/list.h> #include <linux/module.h> #include <linux/net_tstamp.h> #include <linux/of.h> @@ -25,6 +28,7 @@ #include <linux/sys_soc.h> #include "rswitch.h" +#include "rswitch_l2.h" static int rswitch_reg_wait(void __iomem *addr, u32 offs, u32 mask, u32 expected) { @@ -34,7 +38,7 @@ static int rswitch_reg_wait(void __iomem *addr, u32 offs, u32 mask, u32 expected 1, RSWITCH_TIMEOUT_US); } -static void rswitch_modify(void __iomem *addr, enum rswitch_reg reg, u32 clear, u32 set) +void rswitch_modify(void __iomem *addr, enum rswitch_reg reg, u32 clear, u32 set) { iowrite32((ioread32(addr + reg) & ~clear) | set, addr + reg); } @@ -109,10 +113,11 @@ static void rswitch_top_init(struct rswitch_private *priv) } /* Forwarding engine block (MFWD) */ -static void rswitch_fwd_init(struct rswitch_private *priv) +static int rswitch_fwd_init(struct rswitch_private *priv) { u32 all_ports_mask = GENMASK(RSWITCH_NUM_AGENTS - 1, 0); unsigned int i; + u32 reg_val; /* Start with empty configuration */ for (i = 0; i < RSWITCH_NUM_AGENTS; i++) { @@ -128,6 +133,14 @@ static void rswitch_fwd_init(struct rswitch_private *priv) iowrite32(0, priv->addr + FWPBFC(i)); } + /* Configure MAC table aging */ + rswitch_modify(priv->addr, FWMACAGUSPC, FWMACAGUSPC_MACAGUSP, + FIELD_PREP(FWMACAGUSPC_MACAGUSP, RSW_AGEING_CLK_PER_US)); + + reg_val = FIELD_PREP(FWMACAGC_MACAGT, RSW_AGEING_TIME); + reg_val |= FWMACAGC_MACAGE | FWMACAGC_MACAGSL; + iowrite32(reg_val, priv->addr + FWMACAGC); + /* For enabled ETHA ports, setup port based forwarding */ rswitch_for_each_enabled_port(priv, i) { /* Port based forwarding from port i to GWCA port */ @@ -140,6 +153,16 @@ static void rswitch_fwd_init(struct rswitch_private *priv) /* For GWCA port, allow direct descriptor forwarding */ rswitch_modify(priv->addr, FWPC1(priv->gwca.index), FWPC1_DDE, FWPC1_DDE); + + /* Initialize hardware L2 forwarding table */ + + /* Allow entire table to be used for "unsecure" entries */ + rswitch_modify(priv->addr, FWMACHEC, 0, FWMACHEC_MACHMUE_MASK); + + /* Initialize MAC hash table */ + iowrite32(FWMACTIM_MACTIOG, priv->addr + FWMACTIM); + + return rswitch_reg_wait(priv->addr, FWMACTIM, FWMACTIM_MACTIOG, 0); } /* Gateway CPU agent block (GWCA) */ @@ -1602,8 +1625,11 @@ static int rswitch_open(struct net_device *ndev) netif_start_queue(ndev); + if (rdev->brdev) + rswitch_update_l2_offload(rdev->priv); + return 0; -}; +} static int rswitch_stop(struct net_device *ndev) { @@ -1624,12 +1650,13 @@ static int rswitch_stop(struct net_device *ndev) napi_disable(&rdev->napi); + if (rdev->brdev) + rswitch_update_l2_offload(rdev->priv); + if (bitmap_empty(rdev->priv->opened_ports, RSWITCH_NUM_PORTS)) iowrite32(GWCA_TS_IRQ_BIT, rdev->priv->addr + GWTSDID); - for (tag = find_first_bit(rdev->ts_skb_used, TS_TAGS_PER_PORT); - tag < TS_TAGS_PER_PORT; - tag = find_next_bit(rdev->ts_skb_used, TS_TAGS_PER_PORT, tag + 1)) { + for_each_set_bit(tag, rdev->ts_skb_used, TS_TAGS_PER_PORT) { ts_skb = xchg(&rdev->ts_skb[tag], NULL); clear_bit(tag, rdev->ts_skb_used); if (ts_skb) @@ -1637,7 +1664,7 @@ static int rswitch_stop(struct net_device *ndev) } return 0; -}; +} static bool rswitch_ext_desc_set_info1(struct rswitch_device *rdev, struct sk_buff *skb, @@ -1850,16 +1877,46 @@ static int rswitch_eth_ioctl(struct net_device *ndev, struct ifreq *req, int cmd } } +static int rswitch_get_port_parent_id(struct net_device *ndev, + struct netdev_phys_item_id *ppid) +{ + struct rswitch_device *rdev = netdev_priv(ndev); + const char *name; + + name = dev_name(&rdev->priv->pdev->dev); + ppid->id_len = min_t(size_t, strlen(name), sizeof(ppid->id)); + memcpy(ppid->id, name, ppid->id_len); + + return 0; +} + +static int rswitch_get_phys_port_name(struct net_device *ndev, + char *name, size_t len) +{ + struct rswitch_device *rdev = netdev_priv(ndev); + + snprintf(name, len, "tsn%d", rdev->port); + + return 0; +} + static const struct net_device_ops rswitch_netdev_ops = { .ndo_open = rswitch_open, .ndo_stop = rswitch_stop, .ndo_start_xmit = rswitch_start_xmit, .ndo_get_stats = rswitch_get_stats, .ndo_eth_ioctl = rswitch_eth_ioctl, + .ndo_get_port_parent_id = rswitch_get_port_parent_id, + .ndo_get_phys_port_name = rswitch_get_phys_port_name, .ndo_validate_addr = eth_validate_addr, .ndo_set_mac_address = eth_mac_addr, }; +bool is_rdev(const struct net_device *ndev) +{ + return (ndev->netdev_ops == &rswitch_netdev_ops); +} + static int rswitch_get_ts_info(struct net_device *ndev, struct kernel_ethtool_ts_info *info) { struct rswitch_device *rdev = netdev_priv(ndev); @@ -1959,6 +2016,8 @@ static int rswitch_device_alloc(struct rswitch_private *priv, unsigned int index if (err < 0) goto out_txdmac; + list_add_tail(&rdev->list, &priv->port_list); + return 0; out_txdmac: @@ -1978,6 +2037,7 @@ static void rswitch_device_free(struct rswitch_private *priv, unsigned int index struct rswitch_device *rdev = priv->rdev[index]; struct net_device *ndev = rdev->ndev; + list_del(&rdev->list); rswitch_txdmac_free(ndev); rswitch_rxdmac_free(ndev); of_node_put(rdev->np_port); @@ -2024,10 +2084,11 @@ static int rswitch_init(struct rswitch_private *priv) } } - rswitch_fwd_init(priv); + err = rswitch_fwd_init(priv); + if (err < 0) + goto err_fwd_init; - err = rcar_gen4_ptp_register(priv->ptp_priv, RCAR_GEN4_PTP_REG_LAYOUT, - clk_get_rate(priv->clk)); + err = rcar_gen4_ptp_register(priv->ptp_priv, clk_get_rate(priv->clk)); if (err < 0) goto err_ptp_register; @@ -2073,6 +2134,7 @@ err_gwca_ts_request_irq: err_gwca_request_irq: rcar_gen4_ptp_unregister(priv->ptp_priv); +err_fwd_init: err_ptp_register: for (i = 0; i < RSWITCH_NUM_PORTS; i++) rswitch_device_free(priv, i); @@ -2107,6 +2169,7 @@ static int renesas_eth_sw_probe(struct platform_device *pdev) priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; + spin_lock_init(&priv->lock); priv->clk = devm_clk_get(&pdev->dev, NULL); @@ -2144,6 +2207,8 @@ static int renesas_eth_sw_probe(struct platform_device *pdev) if (!priv->gwca.queues) return -ENOMEM; + INIT_LIST_HEAD(&priv->port_list); + pm_runtime_enable(&pdev->dev); pm_runtime_get_sync(&pdev->dev); @@ -2154,6 +2219,15 @@ static int renesas_eth_sw_probe(struct platform_device *pdev) return ret; } + if (list_empty(&priv->port_list)) + dev_warn(&pdev->dev, "could not initialize any ports\n"); + + ret = rswitch_register_notifiers(); + if (ret) { + dev_err(&pdev->dev, "could not register notifiers\n"); + return ret; + } + device_set_wakeup_capable(&pdev->dev, 1); return ret; @@ -2187,6 +2261,7 @@ static void renesas_eth_sw_remove(struct platform_device *pdev) { struct rswitch_private *priv = platform_get_drvdata(pdev); + rswitch_unregister_notifiers(); rswitch_deinit(priv); pm_runtime_put(&pdev->dev); diff --git a/drivers/net/ethernet/renesas/rtsn.c b/drivers/net/ethernet/renesas/rtsn.c index 05c4b6c8c9c3..15a043e85431 100644 --- a/drivers/net/ethernet/renesas/rtsn.c +++ b/drivers/net/ethernet/renesas/rtsn.c @@ -1330,8 +1330,7 @@ static int rtsn_probe(struct platform_device *pdev) device_set_wakeup_capable(&pdev->dev, 1); - ret = rcar_gen4_ptp_register(priv->ptp_priv, RCAR_GEN4_PTP_REG_LAYOUT, - clk_get_rate(priv->clk)); + ret = rcar_gen4_ptp_register(priv->ptp_priv, clk_get_rate(priv->clk)); if (ret) goto error_pm; diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 5fc8027c92c7..6fb0ffc1c844 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2233,7 +2233,7 @@ static void sh_eth_get_regs(struct net_device *ndev, struct ethtool_regs *regs, pm_runtime_get_sync(&mdp->pdev->dev); __sh_eth_get_regs(ndev, buf); - pm_runtime_put_sync(&mdp->pdev->dev); + pm_runtime_put(&mdp->pdev->dev); } static u32 sh_eth_get_msglevel(struct net_device *ndev) @@ -2360,6 +2360,7 @@ static int sh_eth_set_ringparam(struct net_device *ndev, return 0; } +#ifdef CONFIG_PM_SLEEP static void sh_eth_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol) { struct sh_eth_private *mdp = netdev_priv(ndev); @@ -2386,6 +2387,7 @@ static int sh_eth_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol) return 0; } +#endif static const struct ethtool_ops sh_eth_ethtool_ops = { .get_regs_len = sh_eth_get_regs_len, @@ -2401,8 +2403,10 @@ static const struct ethtool_ops sh_eth_ethtool_ops = { .set_ringparam = sh_eth_set_ringparam, .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, +#ifdef CONFIG_PM_SLEEP .get_wol = sh_eth_get_wol, .set_wol = sh_eth_set_wol, +#endif }; /* network device open function */ @@ -2447,7 +2451,7 @@ out_free_irq: free_irq(ndev->irq, ndev); out_napi_off: napi_disable(&mdp->napi); - pm_runtime_put_sync(&mdp->pdev->dev); + pm_runtime_put(&mdp->pdev->dev); return ret; } @@ -3443,8 +3447,6 @@ static void sh_eth_drv_remove(struct platform_device *pdev) free_netdev(ndev); } -#ifdef CONFIG_PM -#ifdef CONFIG_PM_SLEEP static int sh_eth_wol_setup(struct net_device *ndev) { struct sh_eth_private *mdp = netdev_priv(ndev); @@ -3527,28 +3529,8 @@ static int sh_eth_resume(struct device *dev) return ret; } -#endif - -static int sh_eth_runtime_nop(struct device *dev) -{ - /* Runtime PM callback shared between ->runtime_suspend() - * and ->runtime_resume(). Simply returns success. - * - * This driver re-initializes all registers after - * pm_runtime_get_sync() anyway so there is no need - * to save and restore registers here. - */ - return 0; -} -static const struct dev_pm_ops sh_eth_dev_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(sh_eth_suspend, sh_eth_resume) - SET_RUNTIME_PM_OPS(sh_eth_runtime_nop, sh_eth_runtime_nop, NULL) -}; -#define SH_ETH_PM_OPS (&sh_eth_dev_pm_ops) -#else -#define SH_ETH_PM_OPS NULL -#endif +static DEFINE_SIMPLE_DEV_PM_OPS(sh_eth_dev_pm_ops, sh_eth_suspend, sh_eth_resume); static const struct platform_device_id sh_eth_id_table[] = { { "sh7619-ether", (kernel_ulong_t)&sh7619_data }, @@ -3568,7 +3550,7 @@ static struct platform_driver sh_eth_driver = { .id_table = sh_eth_id_table, .driver = { .name = CARDNAME, - .pm = SH_ETH_PM_OPS, + .pm = pm_sleep_ptr(&sh_eth_dev_pm_ops), .of_match_table = of_match_ptr(sh_eth_match_table), }, }; diff --git a/drivers/net/ethernet/sfc/ef100_tx.c b/drivers/net/ethernet/sfc/ef100_tx.c index e6b6be549581..03005757c060 100644 --- a/drivers/net/ethernet/sfc/ef100_tx.c +++ b/drivers/net/ethernet/sfc/ef100_tx.c @@ -189,6 +189,7 @@ static void ef100_make_tso_desc(struct efx_nic *efx, { bool gso_partial = skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL; unsigned int len, ip_offset, tcp_offset, payload_segs; + u32 mangleid_outer = ESE_GZ_TX_DESC_IP4_ID_INC_MOD16; u32 mangleid = ESE_GZ_TX_DESC_IP4_ID_INC_MOD16; unsigned int outer_ip_offset, outer_l4_offset; u16 vlan_tci = skb_vlan_tag_get(skb); @@ -200,8 +201,17 @@ static void ef100_make_tso_desc(struct efx_nic *efx, bool outer_csum; u32 paylen; - if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID) - mangleid = ESE_GZ_TX_DESC_IP4_ID_NO_OP; + if (encap) { + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID_INNER) + mangleid = ESE_GZ_TX_DESC_IP4_ID_NO_OP; + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID) + mangleid_outer = ESE_GZ_TX_DESC_IP4_ID_NO_OP; + } else { + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID) + mangleid = ESE_GZ_TX_DESC_IP4_ID_NO_OP; + mangleid_outer = ESE_GZ_TX_DESC_IP4_ID_NO_OP; + } + if (efx->net_dev->features & NETIF_F_HW_VLAN_CTAG_TX) vlan_enable = skb_vlan_tag_present(skb); @@ -245,8 +255,7 @@ static void ef100_make_tso_desc(struct efx_nic *efx, ESF_GZ_TX_TSO_OUTER_L4_OFF_W, outer_l4_offset >> 1, ESF_GZ_TX_TSO_ED_OUTER_UDP_LEN, udp_encap && !gso_partial, ESF_GZ_TX_TSO_ED_OUTER_IP_LEN, encap && !gso_partial, - ESF_GZ_TX_TSO_ED_OUTER_IP4_ID, encap ? mangleid : - ESE_GZ_TX_DESC_IP4_ID_NO_OP, + ESF_GZ_TX_TSO_ED_OUTER_IP4_ID, mangleid_outer, ESF_GZ_TX_TSO_VLAN_INSERT_EN, vlan_enable, ESF_GZ_TX_TSO_VLAN_INSERT_TCI, vlan_tci ); diff --git a/drivers/net/ethernet/sfc/efx_channels.c b/drivers/net/ethernet/sfc/efx_channels.c index 06b4f52713ef..ed3a96ebc7f3 100644 --- a/drivers/net/ethernet/sfc/efx_channels.c +++ b/drivers/net/ethernet/sfc/efx_channels.c @@ -216,8 +216,8 @@ static int efx_allocate_msix_channels(struct efx_nic *efx, if (efx_separate_tx_channels) { efx->n_tx_channels = - min(max(n_channels / 2, 1U), - efx->max_tx_channels); + clamp(n_channels / 2, 1U, + efx->max_tx_channels); efx->tx_channel_offset = n_channels - efx->n_tx_channels; efx->n_rx_channels = @@ -1281,7 +1281,7 @@ static int efx_poll(struct napi_struct *napi, int budget) time = jiffies - channel->rfs_last_expiry; /* Would our quota be >= 20? */ if (channel->rfs_filter_count * time >= 600 * HZ) - mod_delayed_work(system_wq, &channel->filter_work, 0); + mod_delayed_work(system_percpu_wq, &channel->filter_work, 0); #endif /* There is no race here; although napi_disable() will diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 23c6a7df78d0..18fe5850a978 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -217,7 +217,8 @@ static int efx_ethtool_set_wol(struct net_device *net_dev, } static void efx_ethtool_get_fec_stats(struct net_device *net_dev, - struct ethtool_fec_stats *fec_stats) + struct ethtool_fec_stats *fec_stats, + struct ethtool_fec_hist *hist) { struct efx_nic *efx = efx_netdev_priv(net_dev); diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c index b07f7e4e2877..d19fbf8732ff 100644 --- a/drivers/net/ethernet/sfc/falcon/efx.c +++ b/drivers/net/ethernet/sfc/falcon/efx.c @@ -1394,9 +1394,8 @@ static int ef4_probe_interrupts(struct ef4_nic *efx) if (n_channels > extra_channels) n_channels -= extra_channels; if (ef4_separate_tx_channels) { - efx->n_tx_channels = min(max(n_channels / 2, - 1U), - efx->max_tx_channels); + efx->n_tx_channels = clamp(n_channels / 2, 1U, + efx->max_tx_channels); efx->n_rx_channels = max(n_channels - efx->n_tx_channels, 1U); diff --git a/drivers/net/ethernet/sfc/siena/efx_channels.c b/drivers/net/ethernet/sfc/siena/efx_channels.c index d120b3c83ac0..fc075ab6b7b5 100644 --- a/drivers/net/ethernet/sfc/siena/efx_channels.c +++ b/drivers/net/ethernet/sfc/siena/efx_channels.c @@ -217,8 +217,8 @@ static int efx_allocate_msix_channels(struct efx_nic *efx, if (efx_siena_separate_tx_channels) { efx->n_tx_channels = - min(max(n_channels / 2, 1U), - efx->max_tx_channels); + clamp(n_channels / 2, 1U, + efx->max_tx_channels); efx->tx_channel_offset = n_channels - efx->n_tx_channels; efx->n_rx_channels = @@ -1300,7 +1300,7 @@ static int efx_poll(struct napi_struct *napi, int budget) time = jiffies - channel->rfs_last_expiry; /* Would our quota be >= 20? */ if (channel->rfs_filter_count * time >= 600 * HZ) - mod_delayed_work(system_wq, &channel->filter_work, 0); + mod_delayed_work(system_percpu_wq, &channel->filter_work, 0); #endif /* There is no race here; although napi_disable() will diff --git a/drivers/net/ethernet/sfc/siena/ethtool.c b/drivers/net/ethernet/sfc/siena/ethtool.c index 994909789bfe..8c3ebd0617fb 100644 --- a/drivers/net/ethernet/sfc/siena/ethtool.c +++ b/drivers/net/ethernet/sfc/siena/ethtool.c @@ -217,7 +217,8 @@ static int efx_ethtool_set_wol(struct net_device *net_dev, } static void efx_ethtool_get_fec_stats(struct net_device *net_dev, - struct ethtool_fec_stats *fec_stats) + struct ethtool_fec_stats *fec_stats, + struct ethtool_fec_hist *hist) { struct efx_nic *efx = netdev_priv(net_dev); diff --git a/drivers/net/ethernet/sfc/tc_encap_actions.c b/drivers/net/ethernet/sfc/tc_encap_actions.c index e872f926e438..eef06e48185d 100644 --- a/drivers/net/ethernet/sfc/tc_encap_actions.c +++ b/drivers/net/ethernet/sfc/tc_encap_actions.c @@ -11,6 +11,8 @@ #include "tc_encap_actions.h" #include "tc.h" #include "mae.h" +#include <net/flow.h> +#include <net/inet_dscp.h> #include <net/vxlan.h> #include <net/geneve.h> #include <net/netevent.h> @@ -99,7 +101,7 @@ static int efx_bind_neigh(struct efx_nic *efx, case EFX_ENCAP_TYPE_GENEVE: flow4.flowi4_proto = IPPROTO_UDP; flow4.fl4_dport = encap->key.tp_dst; - flow4.flowi4_tos = encap->key.tos; + flow4.flowi4_dscp = inet_dsfield_to_dscp(encap->key.tos); flow4.daddr = encap->key.u.ipv4.dst; flow4.saddr = encap->key.u.ipv4.src; break; diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index 6ca290f7c0df..3ebd0664c697 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -2162,10 +2162,20 @@ static const struct net_device_ops smsc911x_netdev_ops = { static void smsc911x_read_mac_address(struct net_device *dev) { struct smsc911x_data *pdata = netdev_priv(dev); - u32 mac_high16 = smsc911x_mac_read(pdata, ADDRH); - u32 mac_low32 = smsc911x_mac_read(pdata, ADDRL); + u32 mac_high16, mac_low32; u8 addr[ETH_ALEN]; + mac_high16 = smsc911x_mac_read(pdata, ADDRH); + mac_low32 = smsc911x_mac_read(pdata, ADDRL); + + /* The first mac_read in some setups can incorrectly read 0. Re-read it + * to get the full MAC if this is observed. + */ + if (mac_high16 == 0) { + SMSC_TRACE(pdata, probe, "Re-read MAC ADDRH\n"); + mac_high16 = smsc911x_mac_read(pdata, ADDRH); + } + addr[0] = (u8)(mac_low32); addr[1] = (u8)(mac_low32 >> 8); addr[2] = (u8)(mac_low32 >> 16); diff --git a/drivers/net/ethernet/spacemit/Kconfig b/drivers/net/ethernet/spacemit/Kconfig new file mode 100644 index 000000000000..85ef61a9b4ef --- /dev/null +++ b/drivers/net/ethernet/spacemit/Kconfig @@ -0,0 +1,29 @@ +config NET_VENDOR_SPACEMIT + bool "SpacemiT devices" + default y + depends on ARCH_SPACEMIT || COMPILE_TEST + help + If you have a network (Ethernet) device belonging to this class, + say Y. + + Note that the answer to this question does not directly affect + the kernel: saying N will just cause the configurator to skip all + the questions regarding SpacemiT devices. If you say Y, you will + be asked for your specific chipset/driver in the following questions. + +if NET_VENDOR_SPACEMIT + +config SPACEMIT_K1_EMAC + tristate "SpacemiT K1 Ethernet MAC driver" + depends on ARCH_SPACEMIT || COMPILE_TEST + depends on MFD_SYSCON + depends on OF + default m if ARCH_SPACEMIT + select PHYLIB + help + This driver supports the Ethernet MAC in the SpacemiT K1 SoC. + + To compile this driver as a module, choose M here: the module + will be called k1_emac. + +endif # NET_VENDOR_SPACEMIT diff --git a/drivers/net/ethernet/spacemit/Makefile b/drivers/net/ethernet/spacemit/Makefile new file mode 100644 index 000000000000..d29efd997a4f --- /dev/null +++ b/drivers/net/ethernet/spacemit/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the SpacemiT network device drivers. +# + +obj-$(CONFIG_SPACEMIT_K1_EMAC) += k1_emac.o diff --git a/drivers/net/ethernet/spacemit/k1_emac.c b/drivers/net/ethernet/spacemit/k1_emac.c new file mode 100644 index 000000000000..e1c5faff3b71 --- /dev/null +++ b/drivers/net/ethernet/spacemit/k1_emac.c @@ -0,0 +1,2159 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * SpacemiT K1 Ethernet driver + * + * Copyright (C) 2023-2025 SpacemiT (Hangzhou) Technology Co. Ltd + * Copyright (C) 2025 Vivian Wang <wangruikang@iscas.ac.cn> + */ + +#include <linux/bitfield.h> +#include <linux/clk.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/etherdevice.h> +#include <linux/ethtool.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/iopoll.h> +#include <linux/kernel.h> +#include <linux/mfd/syscon.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_irq.h> +#include <linux/of_mdio.h> +#include <linux/of_net.h> +#include <linux/phy.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> +#include <linux/pm.h> +#include <linux/regmap.h> +#include <linux/reset.h> +#include <linux/rtnetlink.h> +#include <linux/timer.h> +#include <linux/types.h> + +#include "k1_emac.h" + +#define DRIVER_NAME "k1_emac" + +#define EMAC_DEFAULT_BUFSIZE 1536 +#define EMAC_RX_BUF_2K 2048 +#define EMAC_RX_BUF_4K 4096 + +/* Tuning parameters from SpacemiT */ +#define EMAC_TX_FRAMES 64 +#define EMAC_TX_COAL_TIMEOUT 40000 +#define EMAC_RX_FRAMES 64 +#define EMAC_RX_COAL_TIMEOUT (600 * 312) + +#define DEFAULT_FC_PAUSE_TIME 0xffff +#define DEFAULT_FC_FIFO_HIGH 1600 +#define DEFAULT_TX_ALMOST_FULL 0x1f8 +#define DEFAULT_TX_THRESHOLD 1518 +#define DEFAULT_RX_THRESHOLD 12 +#define DEFAULT_TX_RING_NUM 1024 +#define DEFAULT_RX_RING_NUM 1024 +#define DEFAULT_DMA_BURST MREGBIT_BURST_16WORD +#define HASH_TABLE_SIZE 64 + +struct desc_buf { + u64 dma_addr; + void *buff_addr; + u16 dma_len; + u8 map_as_page; +}; + +struct emac_tx_desc_buffer { + struct sk_buff *skb; + struct desc_buf buf[2]; +}; + +struct emac_rx_desc_buffer { + struct sk_buff *skb; + u64 dma_addr; + void *buff_addr; + u16 dma_len; + u8 map_as_page; +}; + +/** + * struct emac_desc_ring - Software-side information for one descriptor ring + * Same structure used for both RX and TX + * @desc_addr: Virtual address to the descriptor ring memory + * @desc_dma_addr: DMA address of the descriptor ring + * @total_size: Size of ring in bytes + * @total_cnt: Number of descriptors + * @head: Next descriptor to associate a buffer with + * @tail: Next descriptor to check status bit + * @rx_desc_buf: Array of descriptors for RX + * @tx_desc_buf: Array of descriptors for TX, with max of two buffers each + */ +struct emac_desc_ring { + void *desc_addr; + dma_addr_t desc_dma_addr; + u32 total_size; + u32 total_cnt; + u32 head; + u32 tail; + union { + struct emac_rx_desc_buffer *rx_desc_buf; + struct emac_tx_desc_buffer *tx_desc_buf; + }; +}; + +struct emac_priv { + void __iomem *iobase; + u32 dma_buf_sz; + struct emac_desc_ring tx_ring; + struct emac_desc_ring rx_ring; + + struct net_device *ndev; + struct napi_struct napi; + struct platform_device *pdev; + struct clk *bus_clk; + struct clk *ref_clk; + struct regmap *regmap_apmu; + u32 regmap_apmu_offset; + int irq; + + phy_interface_t phy_interface; + + union emac_hw_tx_stats tx_stats, tx_stats_off; + union emac_hw_rx_stats rx_stats, rx_stats_off; + + u32 tx_count_frames; + u32 tx_coal_frames; + u32 tx_coal_timeout; + struct work_struct tx_timeout_task; + + struct timer_list txtimer; + struct timer_list stats_timer; + + u32 tx_delay; + u32 rx_delay; + + bool flow_control_autoneg; + u8 flow_control; + + /* Softirq-safe, hold while touching hardware statistics */ + spinlock_t stats_lock; +}; + +static void emac_wr(struct emac_priv *priv, u32 reg, u32 val) +{ + writel(val, priv->iobase + reg); +} + +static u32 emac_rd(struct emac_priv *priv, u32 reg) +{ + return readl(priv->iobase + reg); +} + +static int emac_phy_interface_config(struct emac_priv *priv) +{ + u32 val = 0, mask = REF_CLK_SEL | RGMII_TX_CLK_SEL | PHY_INTF_RGMII; + + if (phy_interface_mode_is_rgmii(priv->phy_interface)) + val |= PHY_INTF_RGMII; + + regmap_update_bits(priv->regmap_apmu, + priv->regmap_apmu_offset + APMU_EMAC_CTRL_REG, + mask, val); + + return 0; +} + +/* + * Where the hardware expects a MAC address, it is laid out in this high, med, + * low order in three consecutive registers and in this format. + */ + +static void emac_set_mac_addr_reg(struct emac_priv *priv, + const unsigned char *addr, + u32 reg) +{ + emac_wr(priv, reg + sizeof(u32) * 0, addr[1] << 8 | addr[0]); + emac_wr(priv, reg + sizeof(u32) * 1, addr[3] << 8 | addr[2]); + emac_wr(priv, reg + sizeof(u32) * 2, addr[5] << 8 | addr[4]); +} + +static void emac_set_mac_addr(struct emac_priv *priv, const unsigned char *addr) +{ + /* We use only one address, so set the same for flow control as well */ + emac_set_mac_addr_reg(priv, addr, MAC_ADDRESS1_HIGH); + emac_set_mac_addr_reg(priv, addr, MAC_FC_SOURCE_ADDRESS_HIGH); +} + +static void emac_reset_hw(struct emac_priv *priv) +{ + /* Disable all interrupts */ + emac_wr(priv, MAC_INTERRUPT_ENABLE, 0x0); + emac_wr(priv, DMA_INTERRUPT_ENABLE, 0x0); + + /* Disable transmit and receive units */ + emac_wr(priv, MAC_RECEIVE_CONTROL, 0x0); + emac_wr(priv, MAC_TRANSMIT_CONTROL, 0x0); + + /* Disable DMA */ + emac_wr(priv, DMA_CONTROL, 0x0); +} + +static void emac_init_hw(struct emac_priv *priv) +{ + /* Destination address for 802.3x Ethernet flow control */ + u8 fc_dest_addr[ETH_ALEN] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x01 }; + + u32 rxirq = 0, dma = 0; + + regmap_set_bits(priv->regmap_apmu, + priv->regmap_apmu_offset + APMU_EMAC_CTRL_REG, + AXI_SINGLE_ID); + + /* Disable transmit and receive units */ + emac_wr(priv, MAC_RECEIVE_CONTROL, 0x0); + emac_wr(priv, MAC_TRANSMIT_CONTROL, 0x0); + + /* Enable MAC address 1 filtering */ + emac_wr(priv, MAC_ADDRESS_CONTROL, MREGBIT_MAC_ADDRESS1_ENABLE); + + /* Zero initialize the multicast hash table */ + emac_wr(priv, MAC_MULTICAST_HASH_TABLE1, 0x0); + emac_wr(priv, MAC_MULTICAST_HASH_TABLE2, 0x0); + emac_wr(priv, MAC_MULTICAST_HASH_TABLE3, 0x0); + emac_wr(priv, MAC_MULTICAST_HASH_TABLE4, 0x0); + + /* Configure thresholds */ + emac_wr(priv, MAC_TRANSMIT_FIFO_ALMOST_FULL, DEFAULT_TX_ALMOST_FULL); + emac_wr(priv, MAC_TRANSMIT_PACKET_START_THRESHOLD, + DEFAULT_TX_THRESHOLD); + emac_wr(priv, MAC_RECEIVE_PACKET_START_THRESHOLD, DEFAULT_RX_THRESHOLD); + + /* Configure flow control (enabled in emac_adjust_link() later) */ + emac_set_mac_addr_reg(priv, fc_dest_addr, MAC_FC_SOURCE_ADDRESS_HIGH); + emac_wr(priv, MAC_FC_PAUSE_HIGH_THRESHOLD, DEFAULT_FC_FIFO_HIGH); + emac_wr(priv, MAC_FC_HIGH_PAUSE_TIME, DEFAULT_FC_PAUSE_TIME); + emac_wr(priv, MAC_FC_PAUSE_LOW_THRESHOLD, 0); + + /* RX IRQ mitigation */ + rxirq = FIELD_PREP(MREGBIT_RECEIVE_IRQ_FRAME_COUNTER_MASK, + EMAC_RX_FRAMES); + rxirq |= FIELD_PREP(MREGBIT_RECEIVE_IRQ_TIMEOUT_COUNTER_MASK, + EMAC_RX_COAL_TIMEOUT); + rxirq |= MREGBIT_RECEIVE_IRQ_MITIGATION_ENABLE; + emac_wr(priv, DMA_RECEIVE_IRQ_MITIGATION_CTRL, rxirq); + + /* Disable and set DMA config */ + emac_wr(priv, DMA_CONTROL, 0x0); + + emac_wr(priv, DMA_CONFIGURATION, MREGBIT_SOFTWARE_RESET); + usleep_range(9000, 10000); + emac_wr(priv, DMA_CONFIGURATION, 0x0); + usleep_range(9000, 10000); + + dma |= MREGBIT_STRICT_BURST; + dma |= MREGBIT_DMA_64BIT_MODE; + dma |= DEFAULT_DMA_BURST; + + emac_wr(priv, DMA_CONFIGURATION, dma); +} + +static void emac_dma_start_transmit(struct emac_priv *priv) +{ + /* The actual value written does not matter */ + emac_wr(priv, DMA_TRANSMIT_POLL_DEMAND, 1); +} + +static void emac_enable_interrupt(struct emac_priv *priv) +{ + u32 val; + + val = emac_rd(priv, DMA_INTERRUPT_ENABLE); + val |= MREGBIT_TRANSMIT_TRANSFER_DONE_INTR_ENABLE; + val |= MREGBIT_RECEIVE_TRANSFER_DONE_INTR_ENABLE; + emac_wr(priv, DMA_INTERRUPT_ENABLE, val); +} + +static void emac_disable_interrupt(struct emac_priv *priv) +{ + u32 val; + + val = emac_rd(priv, DMA_INTERRUPT_ENABLE); + val &= ~MREGBIT_TRANSMIT_TRANSFER_DONE_INTR_ENABLE; + val &= ~MREGBIT_RECEIVE_TRANSFER_DONE_INTR_ENABLE; + emac_wr(priv, DMA_INTERRUPT_ENABLE, val); +} + +static u32 emac_tx_avail(struct emac_priv *priv) +{ + struct emac_desc_ring *tx_ring = &priv->tx_ring; + u32 avail; + + if (tx_ring->tail > tx_ring->head) + avail = tx_ring->tail - tx_ring->head - 1; + else + avail = tx_ring->total_cnt - tx_ring->head + tx_ring->tail - 1; + + return avail; +} + +static void emac_tx_coal_timer_resched(struct emac_priv *priv) +{ + mod_timer(&priv->txtimer, + jiffies + usecs_to_jiffies(priv->tx_coal_timeout)); +} + +static void emac_tx_coal_timer(struct timer_list *t) +{ + struct emac_priv *priv = timer_container_of(priv, t, txtimer); + + napi_schedule(&priv->napi); +} + +static bool emac_tx_should_interrupt(struct emac_priv *priv, u32 pkt_num) +{ + priv->tx_count_frames += pkt_num; + if (likely(priv->tx_coal_frames > priv->tx_count_frames)) { + emac_tx_coal_timer_resched(priv); + return false; + } + + priv->tx_count_frames = 0; + return true; +} + +static void emac_free_tx_buf(struct emac_priv *priv, int i) +{ + struct emac_tx_desc_buffer *tx_buf; + struct emac_desc_ring *tx_ring; + struct desc_buf *buf; + int j; + + tx_ring = &priv->tx_ring; + tx_buf = &tx_ring->tx_desc_buf[i]; + + for (j = 0; j < 2; j++) { + buf = &tx_buf->buf[j]; + if (!buf->dma_addr) + continue; + + if (buf->map_as_page) + dma_unmap_page(&priv->pdev->dev, buf->dma_addr, + buf->dma_len, DMA_TO_DEVICE); + else + dma_unmap_single(&priv->pdev->dev, + buf->dma_addr, buf->dma_len, + DMA_TO_DEVICE); + + buf->dma_addr = 0; + buf->map_as_page = false; + buf->buff_addr = NULL; + } + + if (tx_buf->skb) { + dev_kfree_skb_any(tx_buf->skb); + tx_buf->skb = NULL; + } +} + +static void emac_clean_tx_desc_ring(struct emac_priv *priv) +{ + struct emac_desc_ring *tx_ring = &priv->tx_ring; + u32 i; + + for (i = 0; i < tx_ring->total_cnt; i++) + emac_free_tx_buf(priv, i); + + tx_ring->head = 0; + tx_ring->tail = 0; +} + +static void emac_clean_rx_desc_ring(struct emac_priv *priv) +{ + struct emac_rx_desc_buffer *rx_buf; + struct emac_desc_ring *rx_ring; + u32 i; + + rx_ring = &priv->rx_ring; + + for (i = 0; i < rx_ring->total_cnt; i++) { + rx_buf = &rx_ring->rx_desc_buf[i]; + + if (!rx_buf->skb) + continue; + + dma_unmap_single(&priv->pdev->dev, rx_buf->dma_addr, + rx_buf->dma_len, DMA_FROM_DEVICE); + + dev_kfree_skb(rx_buf->skb); + rx_buf->skb = NULL; + } + + rx_ring->tail = 0; + rx_ring->head = 0; +} + +static int emac_alloc_tx_resources(struct emac_priv *priv) +{ + struct emac_desc_ring *tx_ring = &priv->tx_ring; + struct platform_device *pdev = priv->pdev; + + tx_ring->tx_desc_buf = kcalloc(tx_ring->total_cnt, + sizeof(*tx_ring->tx_desc_buf), + GFP_KERNEL); + + if (!tx_ring->tx_desc_buf) + return -ENOMEM; + + tx_ring->total_size = tx_ring->total_cnt * sizeof(struct emac_desc); + tx_ring->total_size = ALIGN(tx_ring->total_size, PAGE_SIZE); + + tx_ring->desc_addr = dma_alloc_coherent(&pdev->dev, tx_ring->total_size, + &tx_ring->desc_dma_addr, + GFP_KERNEL); + if (!tx_ring->desc_addr) { + kfree(tx_ring->tx_desc_buf); + return -ENOMEM; + } + + tx_ring->head = 0; + tx_ring->tail = 0; + + return 0; +} + +static int emac_alloc_rx_resources(struct emac_priv *priv) +{ + struct emac_desc_ring *rx_ring = &priv->rx_ring; + struct platform_device *pdev = priv->pdev; + + rx_ring->rx_desc_buf = kcalloc(rx_ring->total_cnt, + sizeof(*rx_ring->rx_desc_buf), + GFP_KERNEL); + if (!rx_ring->rx_desc_buf) + return -ENOMEM; + + rx_ring->total_size = rx_ring->total_cnt * sizeof(struct emac_desc); + + rx_ring->total_size = ALIGN(rx_ring->total_size, PAGE_SIZE); + + rx_ring->desc_addr = dma_alloc_coherent(&pdev->dev, rx_ring->total_size, + &rx_ring->desc_dma_addr, + GFP_KERNEL); + if (!rx_ring->desc_addr) { + kfree(rx_ring->rx_desc_buf); + return -ENOMEM; + } + + rx_ring->head = 0; + rx_ring->tail = 0; + + return 0; +} + +static void emac_free_tx_resources(struct emac_priv *priv) +{ + struct emac_desc_ring *tr = &priv->tx_ring; + struct device *dev = &priv->pdev->dev; + + emac_clean_tx_desc_ring(priv); + + kfree(tr->tx_desc_buf); + tr->tx_desc_buf = NULL; + + dma_free_coherent(dev, tr->total_size, tr->desc_addr, + tr->desc_dma_addr); + tr->desc_addr = NULL; +} + +static void emac_free_rx_resources(struct emac_priv *priv) +{ + struct emac_desc_ring *rr = &priv->rx_ring; + struct device *dev = &priv->pdev->dev; + + emac_clean_rx_desc_ring(priv); + + kfree(rr->rx_desc_buf); + rr->rx_desc_buf = NULL; + + dma_free_coherent(dev, rr->total_size, rr->desc_addr, + rr->desc_dma_addr); + rr->desc_addr = NULL; +} + +static int emac_tx_clean_desc(struct emac_priv *priv) +{ + struct net_device *ndev = priv->ndev; + struct emac_desc_ring *tx_ring; + struct emac_desc *tx_desc; + u32 i; + + netif_tx_lock(ndev); + + tx_ring = &priv->tx_ring; + + i = tx_ring->tail; + + while (i != tx_ring->head) { + tx_desc = &((struct emac_desc *)tx_ring->desc_addr)[i]; + + /* Stop checking if desc still own by DMA */ + if (READ_ONCE(tx_desc->desc0) & TX_DESC_0_OWN) + break; + + emac_free_tx_buf(priv, i); + memset(tx_desc, 0, sizeof(struct emac_desc)); + + if (++i == tx_ring->total_cnt) + i = 0; + } + + tx_ring->tail = i; + + if (unlikely(netif_queue_stopped(ndev) && + emac_tx_avail(priv) > tx_ring->total_cnt / 4)) + netif_wake_queue(ndev); + + netif_tx_unlock(ndev); + + return 0; +} + +static bool emac_rx_frame_good(struct emac_priv *priv, struct emac_desc *desc) +{ + const char *msg; + u32 len; + + len = FIELD_GET(RX_DESC_0_FRAME_PACKET_LENGTH_MASK, desc->desc0); + + if (WARN_ON_ONCE(!(desc->desc0 & RX_DESC_0_LAST_DESCRIPTOR))) + msg = "Not last descriptor"; /* This would be a bug */ + else if (desc->desc0 & RX_DESC_0_FRAME_RUNT) + msg = "Runt frame"; + else if (desc->desc0 & RX_DESC_0_FRAME_CRC_ERR) + msg = "Frame CRC error"; + else if (desc->desc0 & RX_DESC_0_FRAME_MAX_LEN_ERR) + msg = "Frame exceeds max length"; + else if (desc->desc0 & RX_DESC_0_FRAME_JABBER_ERR) + msg = "Frame jabber error"; + else if (desc->desc0 & RX_DESC_0_FRAME_LENGTH_ERR) + msg = "Frame length error"; + else if (len <= ETH_FCS_LEN || len > priv->dma_buf_sz) + msg = "Frame length unacceptable"; + else + return true; /* All good */ + + dev_dbg_ratelimited(&priv->ndev->dev, "RX error: %s", msg); + + return false; +} + +static void emac_alloc_rx_desc_buffers(struct emac_priv *priv) +{ + struct emac_desc_ring *rx_ring = &priv->rx_ring; + struct emac_desc rx_desc, *rx_desc_addr; + struct net_device *ndev = priv->ndev; + struct emac_rx_desc_buffer *rx_buf; + struct sk_buff *skb; + u32 i; + + i = rx_ring->head; + rx_buf = &rx_ring->rx_desc_buf[i]; + + while (!rx_buf->skb) { + skb = netdev_alloc_skb_ip_align(ndev, priv->dma_buf_sz); + if (!skb) + break; + + skb->dev = ndev; + + rx_buf->skb = skb; + rx_buf->dma_len = priv->dma_buf_sz; + rx_buf->dma_addr = dma_map_single(&priv->pdev->dev, skb->data, + priv->dma_buf_sz, + DMA_FROM_DEVICE); + if (dma_mapping_error(&priv->pdev->dev, rx_buf->dma_addr)) { + dev_err_ratelimited(&ndev->dev, "Mapping skb failed\n"); + goto err_free_skb; + } + + rx_desc_addr = &((struct emac_desc *)rx_ring->desc_addr)[i]; + + memset(&rx_desc, 0, sizeof(rx_desc)); + + rx_desc.buffer_addr_1 = rx_buf->dma_addr; + rx_desc.desc1 = FIELD_PREP(RX_DESC_1_BUFFER_SIZE_1_MASK, + rx_buf->dma_len); + + if (++i == rx_ring->total_cnt) { + rx_desc.desc1 |= RX_DESC_1_END_RING; + i = 0; + } + + *rx_desc_addr = rx_desc; + dma_wmb(); + WRITE_ONCE(rx_desc_addr->desc0, rx_desc.desc0 | RX_DESC_0_OWN); + + rx_buf = &rx_ring->rx_desc_buf[i]; + } + + rx_ring->head = i; + return; + +err_free_skb: + dev_kfree_skb_any(skb); + rx_buf->skb = NULL; +} + +/* Returns number of packets received */ +static int emac_rx_clean_desc(struct emac_priv *priv, int budget) +{ + struct net_device *ndev = priv->ndev; + struct emac_rx_desc_buffer *rx_buf; + struct emac_desc_ring *rx_ring; + struct sk_buff *skb = NULL; + struct emac_desc *rx_desc; + u32 got = 0, skb_len, i; + + rx_ring = &priv->rx_ring; + + i = rx_ring->tail; + + while (budget--) { + rx_desc = &((struct emac_desc *)rx_ring->desc_addr)[i]; + + /* Stop checking if rx_desc still owned by DMA */ + if (READ_ONCE(rx_desc->desc0) & RX_DESC_0_OWN) + break; + + dma_rmb(); + + rx_buf = &rx_ring->rx_desc_buf[i]; + + if (!rx_buf->skb) + break; + + got++; + + dma_unmap_single(&priv->pdev->dev, rx_buf->dma_addr, + rx_buf->dma_len, DMA_FROM_DEVICE); + + if (likely(emac_rx_frame_good(priv, rx_desc))) { + skb = rx_buf->skb; + + skb_len = FIELD_GET(RX_DESC_0_FRAME_PACKET_LENGTH_MASK, + rx_desc->desc0); + skb_len -= ETH_FCS_LEN; + + skb_put(skb, skb_len); + skb->dev = ndev; + ndev->hard_header_len = ETH_HLEN; + + skb->protocol = eth_type_trans(skb, ndev); + + skb->ip_summed = CHECKSUM_NONE; + + napi_gro_receive(&priv->napi, skb); + + memset(rx_desc, 0, sizeof(struct emac_desc)); + rx_buf->skb = NULL; + } else { + dev_kfree_skb_irq(rx_buf->skb); + rx_buf->skb = NULL; + } + + if (++i == rx_ring->total_cnt) + i = 0; + } + + rx_ring->tail = i; + + emac_alloc_rx_desc_buffers(priv); + + return got; +} + +static int emac_rx_poll(struct napi_struct *napi, int budget) +{ + struct emac_priv *priv = container_of(napi, struct emac_priv, napi); + int work_done; + + emac_tx_clean_desc(priv); + + work_done = emac_rx_clean_desc(priv, budget); + if (work_done < budget && napi_complete_done(napi, work_done)) + emac_enable_interrupt(priv); + + return work_done; +} + +/* + * For convenience, skb->data is fragment 0, frags[0] is fragment 1, etc. + * + * Each descriptor can hold up to two fragments, called buffer 1 and 2. For each + * fragment f, if f % 2 == 0, it uses buffer 1, otherwise it uses buffer 2. + */ + +static int emac_tx_map_frag(struct device *dev, struct emac_desc *tx_desc, + struct emac_tx_desc_buffer *tx_buf, + struct sk_buff *skb, u32 frag_idx) +{ + bool map_as_page, buf_idx; + const skb_frag_t *frag; + phys_addr_t addr; + u32 len; + int ret; + + buf_idx = frag_idx % 2; + + if (frag_idx == 0) { + /* Non-fragmented part */ + len = skb_headlen(skb); + addr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE); + map_as_page = false; + } else { + /* Fragment */ + frag = &skb_shinfo(skb)->frags[frag_idx - 1]; + len = skb_frag_size(frag); + addr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE); + map_as_page = true; + } + + ret = dma_mapping_error(dev, addr); + if (ret) + return ret; + + tx_buf->buf[buf_idx].dma_addr = addr; + tx_buf->buf[buf_idx].dma_len = len; + tx_buf->buf[buf_idx].map_as_page = map_as_page; + + if (buf_idx == 0) { + tx_desc->buffer_addr_1 = addr; + tx_desc->desc1 |= FIELD_PREP(TX_DESC_1_BUFFER_SIZE_1_MASK, len); + } else { + tx_desc->buffer_addr_2 = addr; + tx_desc->desc1 |= FIELD_PREP(TX_DESC_1_BUFFER_SIZE_2_MASK, len); + } + + return 0; +} + +static void emac_tx_mem_map(struct emac_priv *priv, struct sk_buff *skb) +{ + struct emac_desc_ring *tx_ring = &priv->tx_ring; + struct emac_desc tx_desc, *tx_desc_addr; + struct device *dev = &priv->pdev->dev; + struct emac_tx_desc_buffer *tx_buf; + u32 head, old_head, frag_num, f; + bool buf_idx; + + frag_num = skb_shinfo(skb)->nr_frags; + head = tx_ring->head; + old_head = head; + + for (f = 0; f < frag_num + 1; f++) { + buf_idx = f % 2; + + /* + * If using buffer 1, initialize a new desc. Otherwise, use + * buffer 2 of previous fragment's desc. + */ + if (!buf_idx) { + tx_buf = &tx_ring->tx_desc_buf[head]; + tx_desc_addr = + &((struct emac_desc *)tx_ring->desc_addr)[head]; + memset(&tx_desc, 0, sizeof(tx_desc)); + + /* + * Give ownership for all but first desc initially. For + * first desc, give at the end so DMA cannot start + * reading uninitialized descs. + */ + if (head != old_head) + tx_desc.desc0 |= TX_DESC_0_OWN; + + if (++head == tx_ring->total_cnt) { + /* Just used last desc in ring */ + tx_desc.desc1 |= TX_DESC_1_END_RING; + head = 0; + } + } + + if (emac_tx_map_frag(dev, &tx_desc, tx_buf, skb, f)) { + dev_err_ratelimited(&priv->ndev->dev, + "Map TX frag %d failed\n", f); + goto err_free_skb; + } + + if (f == 0) + tx_desc.desc1 |= TX_DESC_1_FIRST_SEGMENT; + + if (f == frag_num) { + tx_desc.desc1 |= TX_DESC_1_LAST_SEGMENT; + tx_buf->skb = skb; + if (emac_tx_should_interrupt(priv, frag_num + 1)) + tx_desc.desc1 |= + TX_DESC_1_INTERRUPT_ON_COMPLETION; + } + + *tx_desc_addr = tx_desc; + } + + /* All descriptors are ready, give ownership for first desc */ + tx_desc_addr = &((struct emac_desc *)tx_ring->desc_addr)[old_head]; + dma_wmb(); + WRITE_ONCE(tx_desc_addr->desc0, tx_desc_addr->desc0 | TX_DESC_0_OWN); + + emac_dma_start_transmit(priv); + + tx_ring->head = head; + + return; + +err_free_skb: + dev_dstats_tx_dropped(priv->ndev); + dev_kfree_skb_any(skb); +} + +static netdev_tx_t emac_start_xmit(struct sk_buff *skb, struct net_device *ndev) +{ + struct emac_priv *priv = netdev_priv(ndev); + int nfrags = skb_shinfo(skb)->nr_frags; + struct device *dev = &priv->pdev->dev; + + if (unlikely(emac_tx_avail(priv) < nfrags + 1)) { + if (!netif_queue_stopped(ndev)) { + netif_stop_queue(ndev); + dev_err_ratelimited(dev, "TX ring full, stop TX queue\n"); + } + return NETDEV_TX_BUSY; + } + + emac_tx_mem_map(priv, skb); + + /* Make sure there is space in the ring for the next TX. */ + if (unlikely(emac_tx_avail(priv) <= MAX_SKB_FRAGS + 2)) + netif_stop_queue(ndev); + + return NETDEV_TX_OK; +} + +static int emac_set_mac_address(struct net_device *ndev, void *addr) +{ + struct emac_priv *priv = netdev_priv(ndev); + int ret = eth_mac_addr(ndev, addr); + + if (ret) + return ret; + + /* If running, set now; if not running it will be set in emac_up. */ + if (netif_running(ndev)) + emac_set_mac_addr(priv, ndev->dev_addr); + + return 0; +} + +static void emac_mac_multicast_filter_clear(struct emac_priv *priv) +{ + emac_wr(priv, MAC_MULTICAST_HASH_TABLE1, 0x0); + emac_wr(priv, MAC_MULTICAST_HASH_TABLE2, 0x0); + emac_wr(priv, MAC_MULTICAST_HASH_TABLE3, 0x0); + emac_wr(priv, MAC_MULTICAST_HASH_TABLE4, 0x0); +} + +/* + * The upper 6 bits of the Ethernet CRC of the MAC address is used as the hash + * when matching multicast addresses. + */ +static u32 emac_ether_addr_hash(u8 addr[ETH_ALEN]) +{ + u32 crc32 = ether_crc(ETH_ALEN, addr); + + return crc32 >> 26; +} + +/* Configure Multicast and Promiscuous modes */ +static void emac_set_rx_mode(struct net_device *ndev) +{ + struct emac_priv *priv = netdev_priv(ndev); + struct netdev_hw_addr *ha; + u32 mc_filter[4] = { 0 }; + u32 hash, reg, bit, val; + + val = emac_rd(priv, MAC_ADDRESS_CONTROL); + + val &= ~MREGBIT_PROMISCUOUS_MODE; + + if (ndev->flags & IFF_PROMISC) { + /* Enable promisc mode */ + val |= MREGBIT_PROMISCUOUS_MODE; + } else if ((ndev->flags & IFF_ALLMULTI) || + (netdev_mc_count(ndev) > HASH_TABLE_SIZE)) { + /* Accept all multicast frames by setting every bit */ + emac_wr(priv, MAC_MULTICAST_HASH_TABLE1, 0xffff); + emac_wr(priv, MAC_MULTICAST_HASH_TABLE2, 0xffff); + emac_wr(priv, MAC_MULTICAST_HASH_TABLE3, 0xffff); + emac_wr(priv, MAC_MULTICAST_HASH_TABLE4, 0xffff); + } else if (!netdev_mc_empty(ndev)) { + emac_mac_multicast_filter_clear(priv); + netdev_for_each_mc_addr(ha, ndev) { + /* + * The hash table is an array of 4 16-bit registers. It + * is treated like an array of 64 bits (bits[hash]). + */ + hash = emac_ether_addr_hash(ha->addr); + reg = hash / 16; + bit = hash % 16; + mc_filter[reg] |= BIT(bit); + } + emac_wr(priv, MAC_MULTICAST_HASH_TABLE1, mc_filter[0]); + emac_wr(priv, MAC_MULTICAST_HASH_TABLE2, mc_filter[1]); + emac_wr(priv, MAC_MULTICAST_HASH_TABLE3, mc_filter[2]); + emac_wr(priv, MAC_MULTICAST_HASH_TABLE4, mc_filter[3]); + } + + emac_wr(priv, MAC_ADDRESS_CONTROL, val); +} + +static int emac_change_mtu(struct net_device *ndev, int mtu) +{ + struct emac_priv *priv = netdev_priv(ndev); + u32 frame_len; + + if (netif_running(ndev)) { + netdev_err(ndev, "must be stopped to change MTU\n"); + return -EBUSY; + } + + frame_len = mtu + ETH_HLEN + ETH_FCS_LEN; + + if (frame_len <= EMAC_DEFAULT_BUFSIZE) + priv->dma_buf_sz = EMAC_DEFAULT_BUFSIZE; + else if (frame_len <= EMAC_RX_BUF_2K) + priv->dma_buf_sz = EMAC_RX_BUF_2K; + else + priv->dma_buf_sz = EMAC_RX_BUF_4K; + + ndev->mtu = mtu; + + return 0; +} + +static void emac_tx_timeout(struct net_device *ndev, unsigned int txqueue) +{ + struct emac_priv *priv = netdev_priv(ndev); + + schedule_work(&priv->tx_timeout_task); +} + +static int emac_mii_read(struct mii_bus *bus, int phy_addr, int regnum) +{ + struct emac_priv *priv = bus->priv; + u32 cmd = 0, val; + int ret; + + cmd |= FIELD_PREP(MREGBIT_PHY_ADDRESS, phy_addr); + cmd |= FIELD_PREP(MREGBIT_REGISTER_ADDRESS, regnum); + cmd |= MREGBIT_START_MDIO_TRANS | MREGBIT_MDIO_READ_WRITE; + + emac_wr(priv, MAC_MDIO_DATA, 0x0); + emac_wr(priv, MAC_MDIO_CONTROL, cmd); + + ret = readl_poll_timeout(priv->iobase + MAC_MDIO_CONTROL, val, + !(val & MREGBIT_START_MDIO_TRANS), 100, 10000); + + if (ret) + return ret; + + val = emac_rd(priv, MAC_MDIO_DATA); + return FIELD_GET(MREGBIT_MDIO_DATA, val); +} + +static int emac_mii_write(struct mii_bus *bus, int phy_addr, int regnum, + u16 value) +{ + struct emac_priv *priv = bus->priv; + u32 cmd = 0, val; + int ret; + + emac_wr(priv, MAC_MDIO_DATA, value); + + cmd |= FIELD_PREP(MREGBIT_PHY_ADDRESS, phy_addr); + cmd |= FIELD_PREP(MREGBIT_REGISTER_ADDRESS, regnum); + cmd |= MREGBIT_START_MDIO_TRANS; + + emac_wr(priv, MAC_MDIO_CONTROL, cmd); + + ret = readl_poll_timeout(priv->iobase + MAC_MDIO_CONTROL, val, + !(val & MREGBIT_START_MDIO_TRANS), 100, 10000); + + return ret; +} + +static int emac_mdio_init(struct emac_priv *priv) +{ + struct device *dev = &priv->pdev->dev; + struct device_node *mii_np; + struct mii_bus *mii; + int ret; + + mii = devm_mdiobus_alloc(dev); + if (!mii) + return -ENOMEM; + + mii->priv = priv; + mii->name = "k1_emac_mii"; + mii->read = emac_mii_read; + mii->write = emac_mii_write; + mii->parent = dev; + mii->phy_mask = ~0; + snprintf(mii->id, MII_BUS_ID_SIZE, "%s", priv->pdev->name); + + mii_np = of_get_available_child_by_name(dev->of_node, "mdio-bus"); + + ret = devm_of_mdiobus_register(dev, mii, mii_np); + if (ret) + dev_err_probe(dev, ret, "Failed to register mdio bus\n"); + + of_node_put(mii_np); + return ret; +} + +static void emac_set_tx_fc(struct emac_priv *priv, bool enable) +{ + u32 val; + + val = emac_rd(priv, MAC_FC_CONTROL); + + FIELD_MODIFY(MREGBIT_FC_GENERATION_ENABLE, &val, enable); + FIELD_MODIFY(MREGBIT_AUTO_FC_GENERATION_ENABLE, &val, enable); + + emac_wr(priv, MAC_FC_CONTROL, val); +} + +static void emac_set_rx_fc(struct emac_priv *priv, bool enable) +{ + u32 val = emac_rd(priv, MAC_FC_CONTROL); + + FIELD_MODIFY(MREGBIT_FC_DECODE_ENABLE, &val, enable); + + emac_wr(priv, MAC_FC_CONTROL, val); +} + +static void emac_set_fc(struct emac_priv *priv, u8 fc) +{ + emac_set_tx_fc(priv, fc & FLOW_CTRL_TX); + emac_set_rx_fc(priv, fc & FLOW_CTRL_RX); + priv->flow_control = fc; +} + +static void emac_set_fc_autoneg(struct emac_priv *priv) +{ + struct phy_device *phydev = priv->ndev->phydev; + u32 local_adv, remote_adv; + u8 fc; + + local_adv = linkmode_adv_to_lcl_adv_t(phydev->advertising); + + remote_adv = 0; + + if (phydev->pause) + remote_adv |= LPA_PAUSE_CAP; + + if (phydev->asym_pause) + remote_adv |= LPA_PAUSE_ASYM; + + fc = mii_resolve_flowctrl_fdx(local_adv, remote_adv); + + priv->flow_control_autoneg = true; + + emac_set_fc(priv, fc); +} + +/* + * Even though this MAC supports gigabit operation, it only provides 32-bit + * statistics counters. The most overflow-prone counters are the "bytes" ones, + * which at gigabit overflow about twice a minute. + * + * Therefore, we maintain the high 32 bits of counters ourselves, incrementing + * every time statistics seem to go backwards. Also, update periodically to + * catch overflows when we are not otherwise checking the statistics often + * enough. + */ + +#define EMAC_STATS_TIMER_PERIOD 20 + +static int emac_read_stat_cnt(struct emac_priv *priv, u8 cnt, u32 *res, + u32 control_reg, u32 high_reg, u32 low_reg) +{ + u32 val, high, low; + int ret; + + /* The "read" bit is the same for TX and RX */ + + val = MREGBIT_START_TX_COUNTER_READ | cnt; + emac_wr(priv, control_reg, val); + val = emac_rd(priv, control_reg); + + ret = readl_poll_timeout_atomic(priv->iobase + control_reg, val, + !(val & MREGBIT_START_TX_COUNTER_READ), + 100, 10000); + + if (ret) { + netdev_err(priv->ndev, "Read stat timeout\n"); + return ret; + } + + high = emac_rd(priv, high_reg); + low = emac_rd(priv, low_reg); + *res = high << 16 | lower_16_bits(low); + + return 0; +} + +static int emac_tx_read_stat_cnt(struct emac_priv *priv, u8 cnt, u32 *res) +{ + return emac_read_stat_cnt(priv, cnt, res, MAC_TX_STATCTR_CONTROL, + MAC_TX_STATCTR_DATA_HIGH, + MAC_TX_STATCTR_DATA_LOW); +} + +static int emac_rx_read_stat_cnt(struct emac_priv *priv, u8 cnt, u32 *res) +{ + return emac_read_stat_cnt(priv, cnt, res, MAC_RX_STATCTR_CONTROL, + MAC_RX_STATCTR_DATA_HIGH, + MAC_RX_STATCTR_DATA_LOW); +} + +static void emac_update_counter(u64 *counter, u32 new_low) +{ + u32 old_low = lower_32_bits(*counter); + u64 high = upper_32_bits(*counter); + + if (old_low > new_low) { + /* Overflowed, increment high 32 bits */ + high++; + } + + *counter = (high << 32) | new_low; +} + +static void emac_stats_update(struct emac_priv *priv) +{ + u64 *tx_stats_off = priv->tx_stats_off.array; + u64 *rx_stats_off = priv->rx_stats_off.array; + u64 *tx_stats = priv->tx_stats.array; + u64 *rx_stats = priv->rx_stats.array; + u32 i, res, offset; + + assert_spin_locked(&priv->stats_lock); + + if (!netif_running(priv->ndev) || !netif_device_present(priv->ndev)) { + /* Not up, don't try to update */ + return; + } + + for (i = 0; i < sizeof(priv->tx_stats) / sizeof(*tx_stats); i++) { + /* + * If reading stats times out, everything is broken and there's + * nothing we can do. Reading statistics also can't return an + * error, so just return without updating and without + * rescheduling. + */ + if (emac_tx_read_stat_cnt(priv, i, &res)) + return; + + /* + * Re-initializing while bringing interface up resets counters + * to zero, so to provide continuity, we add the values saved + * last time we did emac_down() to the new hardware-provided + * value. + */ + offset = lower_32_bits(tx_stats_off[i]); + emac_update_counter(&tx_stats[i], res + offset); + } + + /* Similar remarks as TX stats */ + for (i = 0; i < sizeof(priv->rx_stats) / sizeof(*rx_stats); i++) { + if (emac_rx_read_stat_cnt(priv, i, &res)) + return; + offset = lower_32_bits(rx_stats_off[i]); + emac_update_counter(&rx_stats[i], res + offset); + } + + mod_timer(&priv->stats_timer, jiffies + EMAC_STATS_TIMER_PERIOD * HZ); +} + +static void emac_stats_timer(struct timer_list *t) +{ + struct emac_priv *priv = timer_container_of(priv, t, stats_timer); + + spin_lock(&priv->stats_lock); + + emac_stats_update(priv); + + spin_unlock(&priv->stats_lock); +} + +static const struct ethtool_rmon_hist_range emac_rmon_hist_ranges[] = { + { 64, 64 }, + { 65, 127 }, + { 128, 255 }, + { 256, 511 }, + { 512, 1023 }, + { 1024, 1518 }, + { 1519, 4096 }, + { /* sentinel */ }, +}; + +/* Like dev_fetch_dstats(), but we only use tx_drops */ +static u64 emac_get_stat_tx_drops(struct emac_priv *priv) +{ + const struct pcpu_dstats *stats; + u64 tx_drops, total = 0; + unsigned int start; + int cpu; + + for_each_possible_cpu(cpu) { + stats = per_cpu_ptr(priv->ndev->dstats, cpu); + do { + start = u64_stats_fetch_begin(&stats->syncp); + tx_drops = u64_stats_read(&stats->tx_drops); + } while (u64_stats_fetch_retry(&stats->syncp, start)); + + total += tx_drops; + } + + return total; +} + +static void emac_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *storage) +{ + struct emac_priv *priv = netdev_priv(dev); + union emac_hw_tx_stats *tx_stats; + union emac_hw_rx_stats *rx_stats; + + tx_stats = &priv->tx_stats; + rx_stats = &priv->rx_stats; + + /* This is the only software counter */ + storage->tx_dropped = emac_get_stat_tx_drops(priv); + + spin_lock_bh(&priv->stats_lock); + + emac_stats_update(priv); + + storage->tx_packets = tx_stats->stats.tx_ok_pkts; + storage->tx_bytes = tx_stats->stats.tx_ok_bytes; + storage->tx_errors = tx_stats->stats.tx_err_pkts; + + storage->rx_packets = rx_stats->stats.rx_ok_pkts; + storage->rx_bytes = rx_stats->stats.rx_ok_bytes; + storage->rx_errors = rx_stats->stats.rx_err_total_pkts; + storage->rx_crc_errors = rx_stats->stats.rx_crc_err_pkts; + storage->rx_frame_errors = rx_stats->stats.rx_align_err_pkts; + storage->rx_length_errors = rx_stats->stats.rx_len_err_pkts; + + storage->collisions = tx_stats->stats.tx_singleclsn_pkts; + storage->collisions += tx_stats->stats.tx_multiclsn_pkts; + storage->collisions += tx_stats->stats.tx_excessclsn_pkts; + + storage->rx_missed_errors = rx_stats->stats.rx_drp_fifo_full_pkts; + storage->rx_missed_errors += rx_stats->stats.rx_truncate_fifo_full_pkts; + + spin_unlock_bh(&priv->stats_lock); +} + +static void emac_get_rmon_stats(struct net_device *dev, + struct ethtool_rmon_stats *rmon_stats, + const struct ethtool_rmon_hist_range **ranges) +{ + struct emac_priv *priv = netdev_priv(dev); + union emac_hw_rx_stats *rx_stats; + + rx_stats = &priv->rx_stats; + + *ranges = emac_rmon_hist_ranges; + + spin_lock_bh(&priv->stats_lock); + + emac_stats_update(priv); + + rmon_stats->undersize_pkts = rx_stats->stats.rx_len_undersize_pkts; + rmon_stats->oversize_pkts = rx_stats->stats.rx_len_oversize_pkts; + rmon_stats->fragments = rx_stats->stats.rx_len_fragment_pkts; + rmon_stats->jabbers = rx_stats->stats.rx_len_jabber_pkts; + + /* Only RX has histogram stats */ + + rmon_stats->hist[0] = rx_stats->stats.rx_64_pkts; + rmon_stats->hist[1] = rx_stats->stats.rx_65_127_pkts; + rmon_stats->hist[2] = rx_stats->stats.rx_128_255_pkts; + rmon_stats->hist[3] = rx_stats->stats.rx_256_511_pkts; + rmon_stats->hist[4] = rx_stats->stats.rx_512_1023_pkts; + rmon_stats->hist[5] = rx_stats->stats.rx_1024_1518_pkts; + rmon_stats->hist[6] = rx_stats->stats.rx_1519_plus_pkts; + + spin_unlock_bh(&priv->stats_lock); +} + +static void emac_get_eth_mac_stats(struct net_device *dev, + struct ethtool_eth_mac_stats *mac_stats) +{ + struct emac_priv *priv = netdev_priv(dev); + union emac_hw_tx_stats *tx_stats; + union emac_hw_rx_stats *rx_stats; + + tx_stats = &priv->tx_stats; + rx_stats = &priv->rx_stats; + + spin_lock_bh(&priv->stats_lock); + + emac_stats_update(priv); + + mac_stats->MulticastFramesXmittedOK = tx_stats->stats.tx_multicast_pkts; + mac_stats->BroadcastFramesXmittedOK = tx_stats->stats.tx_broadcast_pkts; + + mac_stats->MulticastFramesReceivedOK = + rx_stats->stats.rx_multicast_pkts; + mac_stats->BroadcastFramesReceivedOK = + rx_stats->stats.rx_broadcast_pkts; + + mac_stats->SingleCollisionFrames = tx_stats->stats.tx_singleclsn_pkts; + mac_stats->MultipleCollisionFrames = tx_stats->stats.tx_multiclsn_pkts; + mac_stats->LateCollisions = tx_stats->stats.tx_lateclsn_pkts; + mac_stats->FramesAbortedDueToXSColls = + tx_stats->stats.tx_excessclsn_pkts; + + spin_unlock_bh(&priv->stats_lock); +} + +static void emac_get_pause_stats(struct net_device *dev, + struct ethtool_pause_stats *pause_stats) +{ + struct emac_priv *priv = netdev_priv(dev); + union emac_hw_tx_stats *tx_stats; + union emac_hw_rx_stats *rx_stats; + + tx_stats = &priv->tx_stats; + rx_stats = &priv->rx_stats; + + spin_lock_bh(&priv->stats_lock); + + emac_stats_update(priv); + + pause_stats->tx_pause_frames = tx_stats->stats.tx_pause_pkts; + pause_stats->rx_pause_frames = rx_stats->stats.rx_pause_pkts; + + spin_unlock_bh(&priv->stats_lock); +} + +/* Other statistics that are not derivable from standard statistics */ + +#define EMAC_ETHTOOL_STAT(type, name) \ + { offsetof(type, stats.name) / sizeof(u64), #name } + +static const struct emac_ethtool_stats { + size_t offset; + char str[ETH_GSTRING_LEN]; +} emac_ethtool_rx_stats[] = { + EMAC_ETHTOOL_STAT(union emac_hw_rx_stats, rx_drp_fifo_full_pkts), + EMAC_ETHTOOL_STAT(union emac_hw_rx_stats, rx_truncate_fifo_full_pkts), +}; + +static int emac_get_sset_count(struct net_device *dev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return ARRAY_SIZE(emac_ethtool_rx_stats); + default: + return -EOPNOTSUPP; + } +} + +static void emac_get_strings(struct net_device *dev, u32 stringset, u8 *data) +{ + int i; + + switch (stringset) { + case ETH_SS_STATS: + for (i = 0; i < ARRAY_SIZE(emac_ethtool_rx_stats); i++) { + memcpy(data, emac_ethtool_rx_stats[i].str, + ETH_GSTRING_LEN); + data += ETH_GSTRING_LEN; + } + break; + } +} + +static void emac_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct emac_priv *priv = netdev_priv(dev); + u64 *rx_stats = (u64 *)&priv->rx_stats; + int i; + + spin_lock_bh(&priv->stats_lock); + + emac_stats_update(priv); + + for (i = 0; i < ARRAY_SIZE(emac_ethtool_rx_stats); i++) + data[i] = rx_stats[emac_ethtool_rx_stats[i].offset]; + + spin_unlock_bh(&priv->stats_lock); +} + +static int emac_ethtool_get_regs_len(struct net_device *dev) +{ + return (EMAC_DMA_REG_CNT + EMAC_MAC_REG_CNT) * sizeof(u32); +} + +static void emac_ethtool_get_regs(struct net_device *dev, + struct ethtool_regs *regs, void *space) +{ + struct emac_priv *priv = netdev_priv(dev); + u32 *reg_space = space; + int i; + + regs->version = 1; + + for (i = 0; i < EMAC_DMA_REG_CNT; i++) + reg_space[i] = emac_rd(priv, DMA_CONFIGURATION + i * 4); + + for (i = 0; i < EMAC_MAC_REG_CNT; i++) + reg_space[i + EMAC_DMA_REG_CNT] = + emac_rd(priv, MAC_GLOBAL_CONTROL + i * 4); +} + +static void emac_get_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *pause) +{ + struct emac_priv *priv = netdev_priv(dev); + + pause->autoneg = priv->flow_control_autoneg; + pause->tx_pause = !!(priv->flow_control & FLOW_CTRL_TX); + pause->rx_pause = !!(priv->flow_control & FLOW_CTRL_RX); +} + +static int emac_set_pauseparam(struct net_device *dev, + struct ethtool_pauseparam *pause) +{ + struct emac_priv *priv = netdev_priv(dev); + u8 fc = 0; + + priv->flow_control_autoneg = pause->autoneg; + + if (pause->autoneg) { + emac_set_fc_autoneg(priv); + } else { + if (pause->tx_pause) + fc |= FLOW_CTRL_TX; + + if (pause->rx_pause) + fc |= FLOW_CTRL_RX; + + emac_set_fc(priv, fc); + } + + return 0; +} + +static void emac_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + strscpy(info->driver, DRIVER_NAME, sizeof(info->driver)); + info->n_stats = ARRAY_SIZE(emac_ethtool_rx_stats); +} + +static void emac_tx_timeout_task(struct work_struct *work) +{ + struct net_device *ndev; + struct emac_priv *priv; + + priv = container_of(work, struct emac_priv, tx_timeout_task); + ndev = priv->ndev; + + rtnl_lock(); + + /* No need to reset if already down */ + if (!netif_running(ndev)) { + rtnl_unlock(); + return; + } + + netdev_err(ndev, "MAC reset due to TX timeout\n"); + + netif_trans_update(ndev); /* prevent tx timeout */ + dev_close(ndev); + dev_open(ndev, NULL); + + rtnl_unlock(); +} + +static void emac_sw_init(struct emac_priv *priv) +{ + priv->dma_buf_sz = EMAC_DEFAULT_BUFSIZE; + + priv->tx_ring.total_cnt = DEFAULT_TX_RING_NUM; + priv->rx_ring.total_cnt = DEFAULT_RX_RING_NUM; + + spin_lock_init(&priv->stats_lock); + + INIT_WORK(&priv->tx_timeout_task, emac_tx_timeout_task); + + priv->tx_coal_frames = EMAC_TX_FRAMES; + priv->tx_coal_timeout = EMAC_TX_COAL_TIMEOUT; + + timer_setup(&priv->txtimer, emac_tx_coal_timer, 0); + timer_setup(&priv->stats_timer, emac_stats_timer, 0); +} + +static irqreturn_t emac_interrupt_handler(int irq, void *dev_id) +{ + struct net_device *ndev = (struct net_device *)dev_id; + struct emac_priv *priv = netdev_priv(ndev); + bool should_schedule = false; + u32 clr = 0; + u32 status; + + status = emac_rd(priv, DMA_STATUS_IRQ); + + if (status & MREGBIT_TRANSMIT_TRANSFER_DONE_IRQ) { + clr |= MREGBIT_TRANSMIT_TRANSFER_DONE_IRQ; + should_schedule = true; + } + + if (status & MREGBIT_TRANSMIT_DES_UNAVAILABLE_IRQ) + clr |= MREGBIT_TRANSMIT_DES_UNAVAILABLE_IRQ; + + if (status & MREGBIT_TRANSMIT_DMA_STOPPED_IRQ) + clr |= MREGBIT_TRANSMIT_DMA_STOPPED_IRQ; + + if (status & MREGBIT_RECEIVE_TRANSFER_DONE_IRQ) { + clr |= MREGBIT_RECEIVE_TRANSFER_DONE_IRQ; + should_schedule = true; + } + + if (status & MREGBIT_RECEIVE_DES_UNAVAILABLE_IRQ) + clr |= MREGBIT_RECEIVE_DES_UNAVAILABLE_IRQ; + + if (status & MREGBIT_RECEIVE_DMA_STOPPED_IRQ) + clr |= MREGBIT_RECEIVE_DMA_STOPPED_IRQ; + + if (status & MREGBIT_RECEIVE_MISSED_FRAME_IRQ) + clr |= MREGBIT_RECEIVE_MISSED_FRAME_IRQ; + + if (should_schedule) { + if (napi_schedule_prep(&priv->napi)) { + emac_disable_interrupt(priv); + __napi_schedule_irqoff(&priv->napi); + } + } + + emac_wr(priv, DMA_STATUS_IRQ, clr); + + return IRQ_HANDLED; +} + +static void emac_configure_tx(struct emac_priv *priv) +{ + u32 val; + + /* Set base address */ + val = (u32)priv->tx_ring.desc_dma_addr; + emac_wr(priv, DMA_TRANSMIT_BASE_ADDRESS, val); + + /* Set TX inter-frame gap value, enable transmit */ + val = emac_rd(priv, MAC_TRANSMIT_CONTROL); + val &= ~MREGBIT_IFG_LEN; + val |= MREGBIT_TRANSMIT_ENABLE; + val |= MREGBIT_TRANSMIT_AUTO_RETRY; + emac_wr(priv, MAC_TRANSMIT_CONTROL, val); + + emac_wr(priv, DMA_TRANSMIT_AUTO_POLL_COUNTER, 0x0); + + /* Start TX DMA */ + val = emac_rd(priv, DMA_CONTROL); + val |= MREGBIT_START_STOP_TRANSMIT_DMA; + emac_wr(priv, DMA_CONTROL, val); +} + +static void emac_configure_rx(struct emac_priv *priv) +{ + u32 val; + + /* Set base address */ + val = (u32)priv->rx_ring.desc_dma_addr; + emac_wr(priv, DMA_RECEIVE_BASE_ADDRESS, val); + + /* Enable receive */ + val = emac_rd(priv, MAC_RECEIVE_CONTROL); + val |= MREGBIT_RECEIVE_ENABLE; + val |= MREGBIT_STORE_FORWARD; + emac_wr(priv, MAC_RECEIVE_CONTROL, val); + + /* Start RX DMA */ + val = emac_rd(priv, DMA_CONTROL); + val |= MREGBIT_START_STOP_RECEIVE_DMA; + emac_wr(priv, DMA_CONTROL, val); +} + +static void emac_adjust_link(struct net_device *dev) +{ + struct emac_priv *priv = netdev_priv(dev); + struct phy_device *phydev = dev->phydev; + u32 ctrl; + + if (phydev->link) { + ctrl = emac_rd(priv, MAC_GLOBAL_CONTROL); + + /* Update duplex and speed from PHY */ + + FIELD_MODIFY(MREGBIT_FULL_DUPLEX_MODE, &ctrl, + phydev->duplex == DUPLEX_FULL); + + ctrl &= ~MREGBIT_SPEED; + + switch (phydev->speed) { + case SPEED_1000: + ctrl |= MREGBIT_SPEED_1000M; + break; + case SPEED_100: + ctrl |= MREGBIT_SPEED_100M; + break; + case SPEED_10: + ctrl |= MREGBIT_SPEED_10M; + break; + default: + netdev_err(dev, "Unknown speed: %d\n", phydev->speed); + phydev->speed = SPEED_UNKNOWN; + break; + } + + emac_wr(priv, MAC_GLOBAL_CONTROL, ctrl); + + emac_set_fc_autoneg(priv); + } + + phy_print_status(phydev); +} + +static void emac_update_delay_line(struct emac_priv *priv) +{ + u32 mask = 0, val = 0; + + mask |= EMAC_RX_DLINE_EN; + mask |= EMAC_RX_DLINE_STEP_MASK | EMAC_RX_DLINE_CODE_MASK; + mask |= EMAC_TX_DLINE_EN; + mask |= EMAC_TX_DLINE_STEP_MASK | EMAC_TX_DLINE_CODE_MASK; + + if (phy_interface_mode_is_rgmii(priv->phy_interface)) { + val |= EMAC_RX_DLINE_EN; + val |= FIELD_PREP(EMAC_RX_DLINE_STEP_MASK, + EMAC_DLINE_STEP_15P6); + val |= FIELD_PREP(EMAC_RX_DLINE_CODE_MASK, priv->rx_delay); + + val |= EMAC_TX_DLINE_EN; + val |= FIELD_PREP(EMAC_TX_DLINE_STEP_MASK, + EMAC_DLINE_STEP_15P6); + val |= FIELD_PREP(EMAC_TX_DLINE_CODE_MASK, priv->tx_delay); + } + + regmap_update_bits(priv->regmap_apmu, + priv->regmap_apmu_offset + APMU_EMAC_DLINE_REG, + mask, val); +} + +static int emac_phy_connect(struct net_device *ndev) +{ + struct emac_priv *priv = netdev_priv(ndev); + struct device *dev = &priv->pdev->dev; + struct phy_device *phydev; + struct device_node *np; + int ret; + + ret = of_get_phy_mode(dev->of_node, &priv->phy_interface); + if (ret) { + netdev_err(ndev, "No phy-mode found"); + return ret; + } + + switch (priv->phy_interface) { + case PHY_INTERFACE_MODE_RMII: + case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_TXID: + break; + default: + netdev_err(ndev, "Unsupported PHY interface %s", + phy_modes(priv->phy_interface)); + return -EINVAL; + } + + np = of_parse_phandle(dev->of_node, "phy-handle", 0); + if (!np && of_phy_is_fixed_link(dev->of_node)) + np = of_node_get(dev->of_node); + + if (!np) { + netdev_err(ndev, "No PHY specified"); + return -ENODEV; + } + + ret = emac_phy_interface_config(priv); + if (ret) + goto err_node_put; + + phydev = of_phy_connect(ndev, np, &emac_adjust_link, 0, + priv->phy_interface); + if (!phydev) { + netdev_err(ndev, "Could not attach to PHY\n"); + ret = -ENODEV; + goto err_node_put; + } + + phy_support_asym_pause(phydev); + + phydev->mac_managed_pm = true; + + emac_update_delay_line(priv); + +err_node_put: + of_node_put(np); + return ret; +} + +static int emac_up(struct emac_priv *priv) +{ + struct platform_device *pdev = priv->pdev; + struct net_device *ndev = priv->ndev; + int ret; + + pm_runtime_get_sync(&pdev->dev); + + ret = emac_phy_connect(ndev); + if (ret) { + dev_err(&pdev->dev, "emac_phy_connect failed\n"); + goto err_pm_put; + } + + emac_init_hw(priv); + + emac_set_mac_addr(priv, ndev->dev_addr); + emac_configure_tx(priv); + emac_configure_rx(priv); + + emac_alloc_rx_desc_buffers(priv); + + phy_start(ndev->phydev); + + ret = request_irq(priv->irq, emac_interrupt_handler, IRQF_SHARED, + ndev->name, ndev); + if (ret) { + dev_err(&pdev->dev, "request_irq failed\n"); + goto err_reset_disconnect_phy; + } + + /* Don't enable MAC interrupts */ + emac_wr(priv, MAC_INTERRUPT_ENABLE, 0x0); + + /* Enable DMA interrupts */ + emac_wr(priv, DMA_INTERRUPT_ENABLE, + MREGBIT_TRANSMIT_TRANSFER_DONE_INTR_ENABLE | + MREGBIT_TRANSMIT_DMA_STOPPED_INTR_ENABLE | + MREGBIT_RECEIVE_TRANSFER_DONE_INTR_ENABLE | + MREGBIT_RECEIVE_DMA_STOPPED_INTR_ENABLE | + MREGBIT_RECEIVE_MISSED_FRAME_INTR_ENABLE); + + napi_enable(&priv->napi); + + netif_start_queue(ndev); + + mod_timer(&priv->stats_timer, jiffies); + + return 0; + +err_reset_disconnect_phy: + emac_reset_hw(priv); + phy_disconnect(ndev->phydev); + +err_pm_put: + pm_runtime_put_sync(&pdev->dev); + return ret; +} + +static int emac_down(struct emac_priv *priv) +{ + struct platform_device *pdev = priv->pdev; + struct net_device *ndev = priv->ndev; + + netif_stop_queue(ndev); + + phy_disconnect(ndev->phydev); + + emac_wr(priv, MAC_INTERRUPT_ENABLE, 0x0); + emac_wr(priv, DMA_INTERRUPT_ENABLE, 0x0); + + free_irq(priv->irq, ndev); + + napi_disable(&priv->napi); + + timer_delete_sync(&priv->txtimer); + cancel_work_sync(&priv->tx_timeout_task); + + timer_delete_sync(&priv->stats_timer); + + emac_reset_hw(priv); + + /* Update and save current stats, see emac_stats_update() for usage */ + + spin_lock_bh(&priv->stats_lock); + + emac_stats_update(priv); + + priv->tx_stats_off = priv->tx_stats; + priv->rx_stats_off = priv->rx_stats; + + spin_unlock_bh(&priv->stats_lock); + + pm_runtime_put_sync(&pdev->dev); + return 0; +} + +/* Called when net interface is brought up. */ +static int emac_open(struct net_device *ndev) +{ + struct emac_priv *priv = netdev_priv(ndev); + struct device *dev = &priv->pdev->dev; + int ret; + + ret = emac_alloc_tx_resources(priv); + if (ret) { + dev_err(dev, "Cannot allocate TX resources\n"); + return ret; + } + + ret = emac_alloc_rx_resources(priv); + if (ret) { + dev_err(dev, "Cannot allocate RX resources\n"); + goto err_free_tx; + } + + ret = emac_up(priv); + if (ret) { + dev_err(dev, "Error when bringing interface up\n"); + goto err_free_rx; + } + return 0; + +err_free_rx: + emac_free_rx_resources(priv); +err_free_tx: + emac_free_tx_resources(priv); + + return ret; +} + +/* Called when interface is brought down. */ +static int emac_stop(struct net_device *ndev) +{ + struct emac_priv *priv = netdev_priv(ndev); + + emac_down(priv); + emac_free_tx_resources(priv); + emac_free_rx_resources(priv); + + return 0; +} + +static const struct ethtool_ops emac_ethtool_ops = { + .get_link_ksettings = phy_ethtool_get_link_ksettings, + .set_link_ksettings = phy_ethtool_set_link_ksettings, + .nway_reset = phy_ethtool_nway_reset, + .get_drvinfo = emac_get_drvinfo, + .get_link = ethtool_op_get_link, + + .get_regs = emac_ethtool_get_regs, + .get_regs_len = emac_ethtool_get_regs_len, + + .get_rmon_stats = emac_get_rmon_stats, + .get_pause_stats = emac_get_pause_stats, + .get_eth_mac_stats = emac_get_eth_mac_stats, + + .get_sset_count = emac_get_sset_count, + .get_strings = emac_get_strings, + .get_ethtool_stats = emac_get_ethtool_stats, + + .get_pauseparam = emac_get_pauseparam, + .set_pauseparam = emac_set_pauseparam, +}; + +static const struct net_device_ops emac_netdev_ops = { + .ndo_open = emac_open, + .ndo_stop = emac_stop, + .ndo_start_xmit = emac_start_xmit, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = emac_set_mac_address, + .ndo_eth_ioctl = phy_do_ioctl_running, + .ndo_change_mtu = emac_change_mtu, + .ndo_tx_timeout = emac_tx_timeout, + .ndo_set_rx_mode = emac_set_rx_mode, + .ndo_get_stats64 = emac_get_stats64, +}; + +/* Currently we always use 15.6 ps/step for the delay line */ + +static u32 delay_ps_to_unit(u32 ps) +{ + return DIV_ROUND_CLOSEST(ps * 10, 156); +} + +static u32 delay_unit_to_ps(u32 unit) +{ + return DIV_ROUND_CLOSEST(unit * 156, 10); +} + +#define EMAC_MAX_DELAY_UNIT FIELD_MAX(EMAC_TX_DLINE_CODE_MASK) + +/* Minus one just to be safe from rounding errors */ +#define EMAC_MAX_DELAY_PS (delay_unit_to_ps(EMAC_MAX_DELAY_UNIT - 1)) + +static int emac_config_dt(struct platform_device *pdev, struct emac_priv *priv) +{ + struct device_node *np = pdev->dev.of_node; + struct device *dev = &pdev->dev; + u8 mac_addr[ETH_ALEN] = { 0 }; + int ret; + + priv->iobase = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(priv->iobase)) + return dev_err_probe(dev, PTR_ERR(priv->iobase), + "ioremap failed\n"); + + priv->regmap_apmu = + syscon_regmap_lookup_by_phandle_args(np, "spacemit,apmu", 1, + &priv->regmap_apmu_offset); + + if (IS_ERR(priv->regmap_apmu)) + return dev_err_probe(dev, PTR_ERR(priv->regmap_apmu), + "failed to get syscon\n"); + + priv->irq = platform_get_irq(pdev, 0); + if (priv->irq < 0) + return priv->irq; + + ret = of_get_mac_address(np, mac_addr); + if (ret) { + if (ret == -EPROBE_DEFER) + return dev_err_probe(dev, ret, + "Can't get MAC address\n"); + + dev_info(&pdev->dev, "Using random MAC address\n"); + eth_hw_addr_random(priv->ndev); + } else { + eth_hw_addr_set(priv->ndev, mac_addr); + } + + priv->tx_delay = 0; + priv->rx_delay = 0; + + of_property_read_u32(np, "tx-internal-delay-ps", &priv->tx_delay); + of_property_read_u32(np, "rx-internal-delay-ps", &priv->rx_delay); + + if (priv->tx_delay > EMAC_MAX_DELAY_PS) { + dev_err(&pdev->dev, + "tx-internal-delay-ps too large: max %d, got %d", + EMAC_MAX_DELAY_PS, priv->tx_delay); + return -EINVAL; + } + + if (priv->rx_delay > EMAC_MAX_DELAY_PS) { + dev_err(&pdev->dev, + "rx-internal-delay-ps too large: max %d, got %d", + EMAC_MAX_DELAY_PS, priv->rx_delay); + return -EINVAL; + } + + priv->tx_delay = delay_ps_to_unit(priv->tx_delay); + priv->rx_delay = delay_ps_to_unit(priv->rx_delay); + + return 0; +} + +static void emac_phy_deregister_fixed_link(void *data) +{ + struct device_node *of_node = data; + + of_phy_deregister_fixed_link(of_node); +} + +static int emac_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct reset_control *reset; + struct net_device *ndev; + struct emac_priv *priv; + int ret; + + ndev = devm_alloc_etherdev(dev, sizeof(struct emac_priv)); + if (!ndev) + return -ENOMEM; + + ndev->hw_features = NETIF_F_SG; + ndev->features |= ndev->hw_features; + + ndev->max_mtu = EMAC_RX_BUF_4K - (ETH_HLEN + ETH_FCS_LEN); + ndev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS; + + priv = netdev_priv(ndev); + priv->ndev = ndev; + priv->pdev = pdev; + platform_set_drvdata(pdev, priv); + + ret = emac_config_dt(pdev, priv); + if (ret < 0) + return dev_err_probe(dev, ret, "Configuration failed\n"); + + ndev->watchdog_timeo = 5 * HZ; + ndev->base_addr = (unsigned long)priv->iobase; + ndev->irq = priv->irq; + + ndev->ethtool_ops = &emac_ethtool_ops; + ndev->netdev_ops = &emac_netdev_ops; + + devm_pm_runtime_enable(&pdev->dev); + + priv->bus_clk = devm_clk_get_enabled(&pdev->dev, NULL); + if (IS_ERR(priv->bus_clk)) + return dev_err_probe(dev, PTR_ERR(priv->bus_clk), + "Failed to get clock\n"); + + reset = devm_reset_control_get_optional_exclusive_deasserted(&pdev->dev, + NULL); + if (IS_ERR(reset)) + return dev_err_probe(dev, PTR_ERR(reset), + "Failed to get reset\n"); + + if (of_phy_is_fixed_link(dev->of_node)) { + ret = of_phy_register_fixed_link(dev->of_node); + if (ret) + return dev_err_probe(dev, ret, + "Failed to register fixed-link\n"); + + ret = devm_add_action_or_reset(dev, + emac_phy_deregister_fixed_link, + dev->of_node); + + if (ret) { + dev_err(dev, "devm_add_action_or_reset failed\n"); + return ret; + } + } + + emac_sw_init(priv); + + ret = emac_mdio_init(priv); + if (ret) + goto err_timer_delete; + + SET_NETDEV_DEV(ndev, &pdev->dev); + + ret = devm_register_netdev(dev, ndev); + if (ret) { + dev_err(dev, "devm_register_netdev failed\n"); + goto err_timer_delete; + } + + netif_napi_add(ndev, &priv->napi, emac_rx_poll); + netif_carrier_off(ndev); + + return 0; + +err_timer_delete: + timer_delete_sync(&priv->txtimer); + timer_delete_sync(&priv->stats_timer); + + return ret; +} + +static void emac_remove(struct platform_device *pdev) +{ + struct emac_priv *priv = platform_get_drvdata(pdev); + + timer_shutdown_sync(&priv->txtimer); + cancel_work_sync(&priv->tx_timeout_task); + + timer_shutdown_sync(&priv->stats_timer); + + emac_reset_hw(priv); +} + +static int emac_resume(struct device *dev) +{ + struct emac_priv *priv = dev_get_drvdata(dev); + struct net_device *ndev = priv->ndev; + int ret; + + ret = clk_prepare_enable(priv->bus_clk); + if (ret < 0) { + dev_err(dev, "Failed to enable bus clock: %d\n", ret); + return ret; + } + + if (!netif_running(ndev)) + return 0; + + ret = emac_open(ndev); + if (ret) { + clk_disable_unprepare(priv->bus_clk); + return ret; + } + + netif_device_attach(ndev); + + mod_timer(&priv->stats_timer, jiffies); + + return 0; +} + +static int emac_suspend(struct device *dev) +{ + struct emac_priv *priv = dev_get_drvdata(dev); + struct net_device *ndev = priv->ndev; + + if (!ndev || !netif_running(ndev)) { + clk_disable_unprepare(priv->bus_clk); + return 0; + } + + emac_stop(ndev); + + clk_disable_unprepare(priv->bus_clk); + netif_device_detach(ndev); + return 0; +} + +static const struct dev_pm_ops emac_pm_ops = { + SYSTEM_SLEEP_PM_OPS(emac_suspend, emac_resume) +}; + +static const struct of_device_id emac_of_match[] = { + { .compatible = "spacemit,k1-emac" }, + { /* sentinel */ }, +}; +MODULE_DEVICE_TABLE(of, emac_of_match); + +static struct platform_driver emac_driver = { + .probe = emac_probe, + .remove = emac_remove, + .driver = { + .name = DRIVER_NAME, + .of_match_table = of_match_ptr(emac_of_match), + .pm = &emac_pm_ops, + }, +}; +module_platform_driver(emac_driver); + +MODULE_DESCRIPTION("SpacemiT K1 Ethernet driver"); +MODULE_AUTHOR("Vivian Wang <wangruikang@iscas.ac.cn>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/spacemit/k1_emac.h b/drivers/net/ethernet/spacemit/k1_emac.h new file mode 100644 index 000000000000..5a09e946a276 --- /dev/null +++ b/drivers/net/ethernet/spacemit/k1_emac.h @@ -0,0 +1,416 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * SpacemiT K1 Ethernet hardware definitions + * + * Copyright (C) 2023-2025 SpacemiT (Hangzhou) Technology Co. Ltd + * Copyright (C) 2025 Vivian Wang <wangruikang@iscas.ac.cn> + */ + +#ifndef _K1_EMAC_H_ +#define _K1_EMAC_H_ + +#include <linux/stddef.h> + +/* APMU syscon registers */ + +#define APMU_EMAC_CTRL_REG 0x0 + +#define PHY_INTF_RGMII BIT(2) + +/* + * Only valid for RMII mode + * 0: Ref clock from External PHY + * 1: Ref clock from SoC + */ +#define REF_CLK_SEL BIT(3) + +/* + * Function clock select + * 0: 208 MHz + * 1: 312 MHz + */ +#define FUNC_CLK_SEL BIT(4) + +/* Only valid for RMII, invert TX clk */ +#define RMII_TX_CLK_SEL BIT(6) + +/* Only valid for RMII, invert RX clk */ +#define RMII_RX_CLK_SEL BIT(7) + +/* + * Only valid for RGMII + * 0: TX clk from RX clk + * 1: TX clk from SoC + */ +#define RGMII_TX_CLK_SEL BIT(8) + +#define PHY_IRQ_EN BIT(12) +#define AXI_SINGLE_ID BIT(13) + +#define APMU_EMAC_DLINE_REG 0x4 + +#define EMAC_RX_DLINE_EN BIT(0) +#define EMAC_RX_DLINE_STEP_MASK GENMASK(5, 4) +#define EMAC_RX_DLINE_CODE_MASK GENMASK(15, 8) + +#define EMAC_TX_DLINE_EN BIT(16) +#define EMAC_TX_DLINE_STEP_MASK GENMASK(21, 20) +#define EMAC_TX_DLINE_CODE_MASK GENMASK(31, 24) + +#define EMAC_DLINE_STEP_15P6 0 /* 15.6 ps/step */ +#define EMAC_DLINE_STEP_24P4 1 /* 24.4 ps/step */ +#define EMAC_DLINE_STEP_29P7 2 /* 29.7 ps/step */ +#define EMAC_DLINE_STEP_35P1 3 /* 35.1 ps/step */ + +/* DMA register set */ +#define DMA_CONFIGURATION 0x0000 +#define DMA_CONTROL 0x0004 +#define DMA_STATUS_IRQ 0x0008 +#define DMA_INTERRUPT_ENABLE 0x000c + +#define DMA_TRANSMIT_AUTO_POLL_COUNTER 0x0010 +#define DMA_TRANSMIT_POLL_DEMAND 0x0014 +#define DMA_RECEIVE_POLL_DEMAND 0x0018 + +#define DMA_TRANSMIT_BASE_ADDRESS 0x001c +#define DMA_RECEIVE_BASE_ADDRESS 0x0020 +#define DMA_MISSED_FRAME_COUNTER 0x0024 +#define DMA_STOP_FLUSH_COUNTER 0x0028 + +#define DMA_RECEIVE_IRQ_MITIGATION_CTRL 0x002c + +#define DMA_CURRENT_TRANSMIT_DESCRIPTOR_POINTER 0x0030 +#define DMA_CURRENT_TRANSMIT_BUFFER_POINTER 0x0034 +#define DMA_CURRENT_RECEIVE_DESCRIPTOR_POINTER 0x0038 +#define DMA_CURRENT_RECEIVE_BUFFER_POINTER 0x003c + +/* MAC Register set */ +#define MAC_GLOBAL_CONTROL 0x0100 +#define MAC_TRANSMIT_CONTROL 0x0104 +#define MAC_RECEIVE_CONTROL 0x0108 +#define MAC_MAXIMUM_FRAME_SIZE 0x010c +#define MAC_TRANSMIT_JABBER_SIZE 0x0110 +#define MAC_RECEIVE_JABBER_SIZE 0x0114 +#define MAC_ADDRESS_CONTROL 0x0118 +#define MAC_MDIO_CLK_DIV 0x011c +#define MAC_ADDRESS1_HIGH 0x0120 +#define MAC_ADDRESS1_MED 0x0124 +#define MAC_ADDRESS1_LOW 0x0128 +#define MAC_ADDRESS2_HIGH 0x012c +#define MAC_ADDRESS2_MED 0x0130 +#define MAC_ADDRESS2_LOW 0x0134 +#define MAC_ADDRESS3_HIGH 0x0138 +#define MAC_ADDRESS3_MED 0x013c +#define MAC_ADDRESS3_LOW 0x0140 +#define MAC_ADDRESS4_HIGH 0x0144 +#define MAC_ADDRESS4_MED 0x0148 +#define MAC_ADDRESS4_LOW 0x014c +#define MAC_MULTICAST_HASH_TABLE1 0x0150 +#define MAC_MULTICAST_HASH_TABLE2 0x0154 +#define MAC_MULTICAST_HASH_TABLE3 0x0158 +#define MAC_MULTICAST_HASH_TABLE4 0x015c +#define MAC_FC_CONTROL 0x0160 +#define MAC_FC_PAUSE_FRAME_GENERATE 0x0164 +#define MAC_FC_SOURCE_ADDRESS_HIGH 0x0168 +#define MAC_FC_SOURCE_ADDRESS_MED 0x016c +#define MAC_FC_SOURCE_ADDRESS_LOW 0x0170 +#define MAC_FC_DESTINATION_ADDRESS_HIGH 0x0174 +#define MAC_FC_DESTINATION_ADDRESS_MED 0x0178 +#define MAC_FC_DESTINATION_ADDRESS_LOW 0x017c +#define MAC_FC_PAUSE_TIME_VALUE 0x0180 +#define MAC_FC_HIGH_PAUSE_TIME 0x0184 +#define MAC_FC_LOW_PAUSE_TIME 0x0188 +#define MAC_FC_PAUSE_HIGH_THRESHOLD 0x018c +#define MAC_FC_PAUSE_LOW_THRESHOLD 0x0190 +#define MAC_MDIO_CONTROL 0x01a0 +#define MAC_MDIO_DATA 0x01a4 +#define MAC_RX_STATCTR_CONTROL 0x01a8 +#define MAC_RX_STATCTR_DATA_HIGH 0x01ac +#define MAC_RX_STATCTR_DATA_LOW 0x01b0 +#define MAC_TX_STATCTR_CONTROL 0x01b4 +#define MAC_TX_STATCTR_DATA_HIGH 0x01b8 +#define MAC_TX_STATCTR_DATA_LOW 0x01bc +#define MAC_TRANSMIT_FIFO_ALMOST_FULL 0x01c0 +#define MAC_TRANSMIT_PACKET_START_THRESHOLD 0x01c4 +#define MAC_RECEIVE_PACKET_START_THRESHOLD 0x01c8 +#define MAC_STATUS_IRQ 0x01e0 +#define MAC_INTERRUPT_ENABLE 0x01e4 + +/* Used for register dump */ +#define EMAC_DMA_REG_CNT 16 +#define EMAC_MAC_REG_CNT 124 + +/* DMA_CONFIGURATION (0x0000) */ + +/* + * 0-DMA controller in normal operation mode, + * 1-DMA controller reset to default state, + * clearing all internal state information + */ +#define MREGBIT_SOFTWARE_RESET BIT(0) + +#define MREGBIT_BURST_1WORD BIT(1) +#define MREGBIT_BURST_2WORD BIT(2) +#define MREGBIT_BURST_4WORD BIT(3) +#define MREGBIT_BURST_8WORD BIT(4) +#define MREGBIT_BURST_16WORD BIT(5) +#define MREGBIT_BURST_32WORD BIT(6) +#define MREGBIT_BURST_64WORD BIT(7) +#define MREGBIT_BURST_LENGTH GENMASK(7, 1) +#define MREGBIT_DESCRIPTOR_SKIP_LENGTH GENMASK(12, 8) + +/* For Receive and Transmit DMA operate in Big-Endian mode for Descriptors. */ +#define MREGBIT_DESCRIPTOR_BYTE_ORDERING BIT(13) + +#define MREGBIT_BIG_LITLE_ENDIAN BIT(14) +#define MREGBIT_TX_RX_ARBITRATION BIT(15) +#define MREGBIT_WAIT_FOR_DONE BIT(16) +#define MREGBIT_STRICT_BURST BIT(17) +#define MREGBIT_DMA_64BIT_MODE BIT(18) + +/* DMA_CONTROL (0x0004) */ +#define MREGBIT_START_STOP_TRANSMIT_DMA BIT(0) +#define MREGBIT_START_STOP_RECEIVE_DMA BIT(1) + +/* DMA_STATUS_IRQ (0x0008) */ +#define MREGBIT_TRANSMIT_TRANSFER_DONE_IRQ BIT(0) +#define MREGBIT_TRANSMIT_DES_UNAVAILABLE_IRQ BIT(1) +#define MREGBIT_TRANSMIT_DMA_STOPPED_IRQ BIT(2) +#define MREGBIT_RECEIVE_TRANSFER_DONE_IRQ BIT(4) +#define MREGBIT_RECEIVE_DES_UNAVAILABLE_IRQ BIT(5) +#define MREGBIT_RECEIVE_DMA_STOPPED_IRQ BIT(6) +#define MREGBIT_RECEIVE_MISSED_FRAME_IRQ BIT(7) +#define MREGBIT_MAC_IRQ BIT(8) +#define MREGBIT_TRANSMIT_DMA_STATE GENMASK(18, 16) +#define MREGBIT_RECEIVE_DMA_STATE GENMASK(23, 20) + +/* DMA_INTERRUPT_ENABLE (0x000c) */ +#define MREGBIT_TRANSMIT_TRANSFER_DONE_INTR_ENABLE BIT(0) +#define MREGBIT_TRANSMIT_DES_UNAVAILABLE_INTR_ENABLE BIT(1) +#define MREGBIT_TRANSMIT_DMA_STOPPED_INTR_ENABLE BIT(2) +#define MREGBIT_RECEIVE_TRANSFER_DONE_INTR_ENABLE BIT(4) +#define MREGBIT_RECEIVE_DES_UNAVAILABLE_INTR_ENABLE BIT(5) +#define MREGBIT_RECEIVE_DMA_STOPPED_INTR_ENABLE BIT(6) +#define MREGBIT_RECEIVE_MISSED_FRAME_INTR_ENABLE BIT(7) +#define MREGBIT_MAC_INTR_ENABLE BIT(8) + +/* DMA_RECEIVE_IRQ_MITIGATION_CTRL (0x002c) */ +#define MREGBIT_RECEIVE_IRQ_FRAME_COUNTER_MASK GENMASK(7, 0) +#define MREGBIT_RECEIVE_IRQ_TIMEOUT_COUNTER_MASK GENMASK(27, 8) +#define MREGBIT_RECEIVE_IRQ_FRAME_COUNTER_MODE BIT(30) +#define MREGBIT_RECEIVE_IRQ_MITIGATION_ENABLE BIT(31) + +/* MAC_GLOBAL_CONTROL (0x0100) */ +#define MREGBIT_SPEED GENMASK(1, 0) +#define MREGBIT_SPEED_10M 0x0 +#define MREGBIT_SPEED_100M BIT(0) +#define MREGBIT_SPEED_1000M BIT(1) +#define MREGBIT_FULL_DUPLEX_MODE BIT(2) +#define MREGBIT_RESET_RX_STAT_COUNTERS BIT(3) +#define MREGBIT_RESET_TX_STAT_COUNTERS BIT(4) +#define MREGBIT_UNICAST_WAKEUP_MODE BIT(8) +#define MREGBIT_MAGIC_PACKET_WAKEUP_MODE BIT(9) + +/* MAC_TRANSMIT_CONTROL (0x0104) */ +#define MREGBIT_TRANSMIT_ENABLE BIT(0) +#define MREGBIT_INVERT_FCS BIT(1) +#define MREGBIT_DISABLE_FCS_INSERT BIT(2) +#define MREGBIT_TRANSMIT_AUTO_RETRY BIT(3) +#define MREGBIT_IFG_LEN GENMASK(6, 4) +#define MREGBIT_PREAMBLE_LENGTH GENMASK(9, 7) + +/* MAC_RECEIVE_CONTROL (0x0108) */ +#define MREGBIT_RECEIVE_ENABLE BIT(0) +#define MREGBIT_DISABLE_FCS_CHECK BIT(1) +#define MREGBIT_STRIP_FCS BIT(2) +#define MREGBIT_STORE_FORWARD BIT(3) +#define MREGBIT_STATUS_FIRST BIT(4) +#define MREGBIT_PASS_BAD_FRAMES BIT(5) +#define MREGBIT_ACOOUNT_VLAN BIT(6) + +/* MAC_MAXIMUM_FRAME_SIZE (0x010c) */ +#define MREGBIT_MAX_FRAME_SIZE GENMASK(13, 0) + +/* MAC_TRANSMIT_JABBER_SIZE (0x0110) */ +#define MREGBIT_TRANSMIT_JABBER_SIZE GENMASK(15, 0) + +/* MAC_RECEIVE_JABBER_SIZE (0x0114) */ +#define MREGBIT_RECEIVE_JABBER_SIZE GENMASK(15, 0) + +/* MAC_ADDRESS_CONTROL (0x0118) */ +#define MREGBIT_MAC_ADDRESS1_ENABLE BIT(0) +#define MREGBIT_MAC_ADDRESS2_ENABLE BIT(1) +#define MREGBIT_MAC_ADDRESS3_ENABLE BIT(2) +#define MREGBIT_MAC_ADDRESS4_ENABLE BIT(3) +#define MREGBIT_INVERSE_MAC_ADDRESS1_ENABLE BIT(4) +#define MREGBIT_INVERSE_MAC_ADDRESS2_ENABLE BIT(5) +#define MREGBIT_INVERSE_MAC_ADDRESS3_ENABLE BIT(6) +#define MREGBIT_INVERSE_MAC_ADDRESS4_ENABLE BIT(7) +#define MREGBIT_PROMISCUOUS_MODE BIT(8) + +/* MAC_FC_CONTROL (0x0160) */ +#define MREGBIT_FC_DECODE_ENABLE BIT(0) +#define MREGBIT_FC_GENERATION_ENABLE BIT(1) +#define MREGBIT_AUTO_FC_GENERATION_ENABLE BIT(2) +#define MREGBIT_MULTICAST_MODE BIT(3) +#define MREGBIT_BLOCK_PAUSE_FRAMES BIT(4) + +/* MAC_FC_PAUSE_FRAME_GENERATE (0x0164) */ +#define MREGBIT_GENERATE_PAUSE_FRAME BIT(0) + +/* MAC_FC_PAUSE_TIME_VALUE (0x0180) */ +#define MREGBIT_MAC_FC_PAUSE_TIME GENMASK(15, 0) + +/* MAC_MDIO_CONTROL (0x01a0) */ +#define MREGBIT_PHY_ADDRESS GENMASK(4, 0) +#define MREGBIT_REGISTER_ADDRESS GENMASK(9, 5) +#define MREGBIT_MDIO_READ_WRITE BIT(10) +#define MREGBIT_START_MDIO_TRANS BIT(15) + +/* MAC_MDIO_DATA (0x01a4) */ +#define MREGBIT_MDIO_DATA GENMASK(15, 0) + +/* MAC_RX_STATCTR_CONTROL (0x01a8) */ +#define MREGBIT_RX_COUNTER_NUMBER GENMASK(4, 0) +#define MREGBIT_START_RX_COUNTER_READ BIT(15) + +/* MAC_RX_STATCTR_DATA_HIGH (0x01ac) */ +#define MREGBIT_RX_STATCTR_DATA_HIGH GENMASK(15, 0) +/* MAC_RX_STATCTR_DATA_LOW (0x01b0) */ +#define MREGBIT_RX_STATCTR_DATA_LOW GENMASK(15, 0) + +/* MAC_TX_STATCTR_CONTROL (0x01b4) */ +#define MREGBIT_TX_COUNTER_NUMBER GENMASK(4, 0) +#define MREGBIT_START_TX_COUNTER_READ BIT(15) + +/* MAC_TX_STATCTR_DATA_HIGH (0x01b8) */ +#define MREGBIT_TX_STATCTR_DATA_HIGH GENMASK(15, 0) +/* MAC_TX_STATCTR_DATA_LOW (0x01bc) */ +#define MREGBIT_TX_STATCTR_DATA_LOW GENMASK(15, 0) + +/* MAC_TRANSMIT_FIFO_ALMOST_FULL (0x01c0) */ +#define MREGBIT_TX_FIFO_AF GENMASK(13, 0) + +/* MAC_TRANSMIT_PACKET_START_THRESHOLD (0x01c4) */ +#define MREGBIT_TX_PACKET_START_THRESHOLD GENMASK(13, 0) + +/* MAC_RECEIVE_PACKET_START_THRESHOLD (0x01c8) */ +#define MREGBIT_RX_PACKET_START_THRESHOLD GENMASK(13, 0) + +/* MAC_STATUS_IRQ (0x01e0) */ +#define MREGBIT_MAC_UNDERRUN_IRQ BIT(0) +#define MREGBIT_MAC_JABBER_IRQ BIT(1) + +/* MAC_INTERRUPT_ENABLE (0x01e4) */ +#define MREGBIT_MAC_UNDERRUN_INTERRUPT_ENABLE BIT(0) +#define MREGBIT_JABBER_INTERRUPT_ENABLE BIT(1) + +/* RX DMA descriptor */ + +#define RX_DESC_0_FRAME_PACKET_LENGTH_MASK GENMASK(13, 0) +#define RX_DESC_0_FRAME_ALIGN_ERR BIT(14) +#define RX_DESC_0_FRAME_RUNT BIT(15) +#define RX_DESC_0_FRAME_ETHERNET_TYPE BIT(16) +#define RX_DESC_0_FRAME_VLAN BIT(17) +#define RX_DESC_0_FRAME_MULTICAST BIT(18) +#define RX_DESC_0_FRAME_BROADCAST BIT(19) +#define RX_DESC_0_FRAME_CRC_ERR BIT(20) +#define RX_DESC_0_FRAME_MAX_LEN_ERR BIT(21) +#define RX_DESC_0_FRAME_JABBER_ERR BIT(22) +#define RX_DESC_0_FRAME_LENGTH_ERR BIT(23) +#define RX_DESC_0_FRAME_MAC_ADDR1_MATCH BIT(24) +#define RX_DESC_0_FRAME_MAC_ADDR2_MATCH BIT(25) +#define RX_DESC_0_FRAME_MAC_ADDR3_MATCH BIT(26) +#define RX_DESC_0_FRAME_MAC_ADDR4_MATCH BIT(27) +#define RX_DESC_0_FRAME_PAUSE_CTRL BIT(28) +#define RX_DESC_0_LAST_DESCRIPTOR BIT(29) +#define RX_DESC_0_FIRST_DESCRIPTOR BIT(30) +#define RX_DESC_0_OWN BIT(31) + +#define RX_DESC_1_BUFFER_SIZE_1_MASK GENMASK(11, 0) +#define RX_DESC_1_BUFFER_SIZE_2_MASK GENMASK(23, 12) + /* [24] reserved */ +#define RX_DESC_1_SECOND_ADDRESS_CHAINED BIT(25) +#define RX_DESC_1_END_RING BIT(26) + /* [29:27] reserved */ +#define RX_DESC_1_RX_TIMESTAMP BIT(30) +#define RX_DESC_1_PTP_PKT BIT(31) + +/* TX DMA descriptor */ + + /* [29:0] unused */ +#define TX_DESC_0_TX_TIMESTAMP BIT(30) +#define TX_DESC_0_OWN BIT(31) + +#define TX_DESC_1_BUFFER_SIZE_1_MASK GENMASK(11, 0) +#define TX_DESC_1_BUFFER_SIZE_2_MASK GENMASK(23, 12) +#define TX_DESC_1_FORCE_EOP_ERROR BIT(24) +#define TX_DESC_1_SECOND_ADDRESS_CHAINED BIT(25) +#define TX_DESC_1_END_RING BIT(26) +#define TX_DESC_1_DISABLE_PADDING BIT(27) +#define TX_DESC_1_ADD_CRC_DISABLE BIT(28) +#define TX_DESC_1_FIRST_SEGMENT BIT(29) +#define TX_DESC_1_LAST_SEGMENT BIT(30) +#define TX_DESC_1_INTERRUPT_ON_COMPLETION BIT(31) + +struct emac_desc { + u32 desc0; + u32 desc1; + u32 buffer_addr_1; + u32 buffer_addr_2; +}; + +/* Keep stats in this order, index used for accessing hardware */ + +union emac_hw_tx_stats { + struct { + u64 tx_ok_pkts; + u64 tx_total_pkts; + u64 tx_ok_bytes; + u64 tx_err_pkts; + u64 tx_singleclsn_pkts; + u64 tx_multiclsn_pkts; + u64 tx_lateclsn_pkts; + u64 tx_excessclsn_pkts; + u64 tx_unicast_pkts; + u64 tx_multicast_pkts; + u64 tx_broadcast_pkts; + u64 tx_pause_pkts; + } stats; + + DECLARE_FLEX_ARRAY(u64, array); +}; + +union emac_hw_rx_stats { + struct { + u64 rx_ok_pkts; + u64 rx_total_pkts; + u64 rx_crc_err_pkts; + u64 rx_align_err_pkts; + u64 rx_err_total_pkts; + u64 rx_ok_bytes; + u64 rx_total_bytes; + u64 rx_unicast_pkts; + u64 rx_multicast_pkts; + u64 rx_broadcast_pkts; + u64 rx_pause_pkts; + u64 rx_len_err_pkts; + u64 rx_len_undersize_pkts; + u64 rx_len_oversize_pkts; + u64 rx_len_fragment_pkts; + u64 rx_len_jabber_pkts; + u64 rx_64_pkts; + u64 rx_65_127_pkts; + u64 rx_128_255_pkts; + u64 rx_256_511_pkts; + u64 rx_512_1023_pkts; + u64 rx_1024_1518_pkts; + u64 rx_1519_plus_pkts; + u64 rx_drp_fifo_full_pkts; + u64 rx_truncate_fifo_full_pkts; + } stats; + + DECLARE_FLEX_ARRAY(u64, array); +}; + +#endif /* _K1_EMAC_H_ */ diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 67fa879b1e52..9507131875b2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -133,15 +133,17 @@ config DWMAC_QCOM_ETHQOS stmmac device driver. config DWMAC_RENESAS_GBETH - tristate "Renesas RZ/V2H(P) GBETH support" + tristate "Renesas RZ/V2H(P) GBETH and RZ/T2H, RZ/N2H GMAC support" default ARCH_RENESAS depends on OF && (ARCH_RENESAS || COMPILE_TEST) + select PCS_RZN1_MIIC help - Support for Gigabit Ethernet Interface (GBETH) on Renesas - RZ/V2H(P) SoCs. + Support for Gigabit Ethernet Interface (GBETH)/ Ethernet MAC (GMAC) + on Renesas SoCs. - This selects the Renesas RZ/V2H(P) Soc specific glue layer support - for the stmmac device driver. + This selects Renesas SoC glue layer support for the stmmac device + driver. This driver is used for the RZ/V2H(P) family, RZ/T2H and + RZ/N2H SoCs. config DWMAC_ROCKCHIP tristate "Rockchip dwmac support" @@ -263,6 +265,18 @@ config DWMAC_SUN8I stmmac device driver. This driver is used for H3/A83T/A64 EMAC ethernet controller. +config DWMAC_SUN55I + tristate "Allwinner sun55i GMAC200 support" + default ARCH_SUNXI + depends on OF && (ARCH_SUNXI || COMPILE_TEST) + select MDIO_BUS_MUX + help + Support for Allwinner A523/T527 GMAC200 ethernet controllers. + + This selects Allwinner SoC glue layer support for the + stmmac device driver. This driver is used for A523/T527 + GMAC200 ethernet controller. + config DWMAC_THEAD tristate "T-HEAD dwmac support" depends on OF && (ARCH_THEAD || COMPILE_TEST) diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index b591d93f8503..51e068e26ce4 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -31,6 +31,7 @@ obj-$(CONFIG_DWMAC_STI) += dwmac-sti.o obj-$(CONFIG_DWMAC_STM32) += dwmac-stm32.o obj-$(CONFIG_DWMAC_SUNXI) += dwmac-sunxi.o obj-$(CONFIG_DWMAC_SUN8I) += dwmac-sun8i.o +obj-$(CONFIG_DWMAC_SUN55I) += dwmac-sun55i.o obj-$(CONFIG_DWMAC_THEAD) += dwmac-thead.o obj-$(CONFIG_DWMAC_DWC_QOS_ETH) += dwmac-dwc-qos-eth.o obj-$(CONFIG_DWMAC_INTEL_PLAT) += dwmac-intel-plat.o diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index cbffccb3b9af..8f34c9ad457f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -228,6 +228,7 @@ struct stmmac_extra_stats { unsigned long mtl_est_btrlm; unsigned long max_sdu_txq_drop[MTL_MAX_TX_QUEUES]; unsigned long mtl_est_txq_hlbf[MTL_MAX_TX_QUEUES]; + unsigned long mtl_est_txq_hlbs[MTL_MAX_TX_QUEUES]; /* per queue statistics */ struct stmmac_txq_stats txq_stats[MTL_MAX_TX_QUEUES]; struct stmmac_rxq_stats rxq_stats[MTL_MAX_RX_QUEUES]; @@ -602,7 +603,6 @@ struct mac_device_info { unsigned int mcast_bits_log2; unsigned int rx_csum; unsigned int pcs; - unsigned int pmt; unsigned int ps; unsigned int xlgmac; unsigned int num_vlan; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c index 6c363f9b0ce2..e8539cad4602 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-dwc-qos-eth.c @@ -261,7 +261,8 @@ bypass_clk_reset_gpio: plat_dat->set_clk_tx_rate = stmmac_set_clk_tx_rate; plat_dat->bsp_priv = eqos; plat_dat->flags |= STMMAC_FLAG_SPH_DISABLE | - STMMAC_FLAG_EN_TX_LPI_CLK_PHY_CAP; + STMMAC_FLAG_EN_TX_LPI_CLK_PHY_CAP | + STMMAC_FLAG_USE_PHY_WOL; return 0; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index 889e2bb6f7f5..4268b9987237 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -49,7 +49,7 @@ struct imx_dwmac_ops { u32 flags; bool mac_rgmii_txclk_auto_adj; - int (*fix_soc_reset)(void *priv, void __iomem *ioaddr); + int (*fix_soc_reset)(struct stmmac_priv *priv, void __iomem *ioaddr); int (*set_intf_mode)(struct plat_stmmacenet_data *plat_dat); void (*fix_mac_speed)(void *priv, int speed, unsigned int mode); }; @@ -72,7 +72,7 @@ static int imx8mp_set_intf_mode(struct plat_stmmacenet_data *plat_dat) struct imx_priv_data *dwmac = plat_dat->bsp_priv; int val; - switch (plat_dat->mac_interface) { + switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_MII: val = GPR_ENET_QOS_INTF_SEL_MII; break; @@ -88,8 +88,8 @@ static int imx8mp_set_intf_mode(struct plat_stmmacenet_data *plat_dat) GPR_ENET_QOS_RGMII_EN; break; default: - pr_debug("imx dwmac doesn't support %d interface\n", - plat_dat->mac_interface); + pr_debug("imx dwmac doesn't support %s interface\n", + phy_modes(plat_dat->phy_interface)); return -EINVAL; } @@ -112,7 +112,7 @@ static int imx93_set_intf_mode(struct plat_stmmacenet_data *plat_dat) struct imx_priv_data *dwmac = plat_dat->bsp_priv; int val, ret; - switch (plat_dat->mac_interface) { + switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_MII: val = MX93_GPR_ENET_QOS_INTF_SEL_MII; break; @@ -134,8 +134,8 @@ static int imx93_set_intf_mode(struct plat_stmmacenet_data *plat_dat) val = MX93_GPR_ENET_QOS_INTF_SEL_RGMII; break; default: - dev_dbg(dwmac->dev, "imx dwmac doesn't support %d interface\n", - plat_dat->mac_interface); + dev_dbg(dwmac->dev, "imx dwmac doesn't support %s interface\n", + phy_modes(plat_dat->phy_interface)); return -EINVAL; } @@ -197,7 +197,7 @@ static int imx_dwmac_set_clk_tx_rate(void *bsp_priv, struct clk *clk_tx_i, { struct imx_priv_data *dwmac = bsp_priv; - interface = dwmac->plat_dat->mac_interface; + interface = dwmac->plat_dat->phy_interface; if (interface == PHY_INTERFACE_MODE_RMII || interface == PHY_INTERFACE_MODE_MII) return 0; @@ -215,8 +215,8 @@ static void imx_dwmac_fix_speed(void *priv, int speed, unsigned int mode) plat_dat = dwmac->plat_dat; if (dwmac->ops->mac_rgmii_txclk_auto_adj || - (plat_dat->mac_interface == PHY_INTERFACE_MODE_RMII) || - (plat_dat->mac_interface == PHY_INTERFACE_MODE_MII)) + (plat_dat->phy_interface == PHY_INTERFACE_MODE_RMII) || + (plat_dat->phy_interface == PHY_INTERFACE_MODE_MII)) return; rate = rgmii_clock(speed); @@ -265,16 +265,16 @@ static void imx93_dwmac_fix_speed(void *priv, int speed, unsigned int mode) writel(old_ctrl, dwmac->base_addr + MAC_CTRL_REG); } -static int imx_dwmac_mx93_reset(void *priv, void __iomem *ioaddr) +static int imx_dwmac_mx93_reset(struct stmmac_priv *priv, void __iomem *ioaddr) { - struct plat_stmmacenet_data *plat_dat = priv; + struct plat_stmmacenet_data *plat_dat = priv->plat; u32 value = readl(ioaddr + DMA_BUS_MODE); /* DMA SW reset */ value |= DMA_BUS_MODE_SFT_RESET; writel(value, ioaddr + DMA_BUS_MODE); - if (plat_dat->mac_interface == PHY_INTERFACE_MODE_RMII) { + if (plat_dat->phy_interface == PHY_INTERFACE_MODE_RMII) { usleep_range(100, 200); writel(RMII_RESET_SPEED, ioaddr + MAC_CTRL_REG); } @@ -301,6 +301,7 @@ imx_dwmac_parse_dt(struct imx_priv_data *dwmac, struct device *dev) dwmac->clk_mem = NULL; if (of_machine_is_compatible("fsl,imx8dxl") || + of_machine_is_compatible("fsl,imx91") || of_machine_is_compatible("fsl,imx93")) { dwmac->clk_mem = devm_clk_get(dev, "mem"); if (IS_ERR(dwmac->clk_mem)) { @@ -310,9 +311,10 @@ imx_dwmac_parse_dt(struct imx_priv_data *dwmac, struct device *dev) } if (of_machine_is_compatible("fsl,imx8mp") || + of_machine_is_compatible("fsl,imx91") || of_machine_is_compatible("fsl,imx93")) { /* Binding doc describes the propety: - * is required by i.MX8MP, i.MX93. + * is required by i.MX8MP, i.MX91, i.MX93. * is optinoal for i.MX8DXL. */ dwmac->intf_regmap = diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c index 15abe214131f..c1670f6bae14 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ingenic.c @@ -90,7 +90,7 @@ static int jz4775_mac_set_mode(struct plat_stmmacenet_data *plat_dat) struct ingenic_mac *mac = plat_dat->bsp_priv; unsigned int val; - switch (plat_dat->mac_interface) { + switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_MII: val = FIELD_PREP(MACPHYC_TXCLK_SEL_MASK, MACPHYC_TXCLK_SEL_INPUT) | FIELD_PREP(MACPHYC_PHY_INFT_MASK, MACPHYC_PHY_INFT_MII); @@ -119,7 +119,8 @@ static int jz4775_mac_set_mode(struct plat_stmmacenet_data *plat_dat) break; default: - dev_err(mac->dev, "Unsupported interface %d", plat_dat->mac_interface); + dev_err(mac->dev, "Unsupported interface %s\n", + phy_modes(plat_dat->phy_interface)); return -EINVAL; } @@ -131,13 +132,14 @@ static int x1000_mac_set_mode(struct plat_stmmacenet_data *plat_dat) { struct ingenic_mac *mac = plat_dat->bsp_priv; - switch (plat_dat->mac_interface) { + switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_RMII: dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RMII\n"); break; default: - dev_err(mac->dev, "Unsupported interface %d", plat_dat->mac_interface); + dev_err(mac->dev, "Unsupported interface %s\n", + phy_modes(plat_dat->phy_interface)); return -EINVAL; } @@ -150,14 +152,15 @@ static int x1600_mac_set_mode(struct plat_stmmacenet_data *plat_dat) struct ingenic_mac *mac = plat_dat->bsp_priv; unsigned int val; - switch (plat_dat->mac_interface) { + switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_RMII: val = FIELD_PREP(MACPHYC_PHY_INFT_MASK, MACPHYC_PHY_INFT_RMII); dev_dbg(mac->dev, "MAC PHY Control Register: PHY_INTERFACE_MODE_RMII\n"); break; default: - dev_err(mac->dev, "Unsupported interface %d", plat_dat->mac_interface); + dev_err(mac->dev, "Unsupported interface %s\n", + phy_modes(plat_dat->phy_interface)); return -EINVAL; } @@ -170,7 +173,7 @@ static int x1830_mac_set_mode(struct plat_stmmacenet_data *plat_dat) struct ingenic_mac *mac = plat_dat->bsp_priv; unsigned int val; - switch (plat_dat->mac_interface) { + switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_RMII: val = FIELD_PREP(MACPHYC_MODE_SEL_MASK, MACPHYC_MODE_SEL_RMII) | FIELD_PREP(MACPHYC_PHY_INFT_MASK, MACPHYC_PHY_INFT_RMII); @@ -178,7 +181,8 @@ static int x1830_mac_set_mode(struct plat_stmmacenet_data *plat_dat) break; default: - dev_err(mac->dev, "Unsupported interface %d", plat_dat->mac_interface); + dev_err(mac->dev, "Unsupported interface %s\n", + phy_modes(plat_dat->phy_interface)); return -EINVAL; } @@ -191,7 +195,7 @@ static int x2000_mac_set_mode(struct plat_stmmacenet_data *plat_dat) struct ingenic_mac *mac = plat_dat->bsp_priv; unsigned int val; - switch (plat_dat->mac_interface) { + switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_RMII: val = FIELD_PREP(MACPHYC_TX_SEL_MASK, MACPHYC_TX_SEL_ORIGIN) | FIELD_PREP(MACPHYC_RX_SEL_MASK, MACPHYC_RX_SEL_ORIGIN) | @@ -221,7 +225,8 @@ static int x2000_mac_set_mode(struct plat_stmmacenet_data *plat_dat) break; default: - dev_err(mac->dev, "Unsupported interface %d", plat_dat->mac_interface); + dev_err(mac->dev, "Unsupported interface %s\n", + phy_modes(plat_dat->phy_interface)); return -EINVAL; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c index ea33ae39be6b..e74d00984b88 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c @@ -371,9 +371,6 @@ static int intel_crosststamp(ktime_t *device, u32 acr_value; int i; - if (!boot_cpu_has(X86_FEATURE_ART)) - return -EOPNOTSUPP; - intel_priv = priv->plat->bsp_priv; /* Both internal crosstimestamping and external triggered event @@ -566,7 +563,8 @@ static int intel_mac_finish(struct net_device *ndev, static void common_default_data(struct plat_stmmacenet_data *plat) { - plat->clk_csr = 2; /* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */ + /* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */ + plat->clk_csr = STMMAC_CSR_20_35M; plat->has_gmac = 1; plat->force_sf_dma_mode = 1; @@ -613,7 +611,7 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, plat->pdev = pdev; plat->phy_addr = -1; - plat->clk_csr = 5; + plat->clk_csr = STMMAC_CSR_250_300M; plat->has_gmac = 0; plat->has_gmac4 = 1; plat->force_sf_dma_mode = 0; @@ -755,7 +753,9 @@ static int intel_mgbe_common_data(struct pci_dev *pdev, plat->int_snapshot_num = AUX_SNAPSHOT1; - plat->crosststamp = intel_crosststamp; + if (boot_cpu_has(X86_FEATURE_ART)) + plat->crosststamp = intel_crosststamp; + plat->flags &= ~STMMAC_FLAG_INT_SNAPSHOT_EN; /* Setup MSI vector offset specific to Intel mGbE controller */ @@ -1231,6 +1231,37 @@ static int stmmac_config_multi_msi(struct pci_dev *pdev, return 0; } +static int intel_eth_pci_suspend(struct device *dev, void *bsp_priv) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int ret; + + ret = pci_save_state(pdev); + if (ret) + return ret; + + pci_wake_from_d3(pdev, true); + pci_set_power_state(pdev, PCI_D3hot); + return 0; +} + +static int intel_eth_pci_resume(struct device *dev, void *bsp_priv) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int ret; + + pci_restore_state(pdev); + pci_set_power_state(pdev, PCI_D0); + + ret = pcim_enable_device(pdev); + if (ret) + return ret; + + pci_set_master(pdev); + + return 0; +} + /** * intel_eth_pci_probe * @@ -1292,6 +1323,9 @@ static int intel_eth_pci_probe(struct pci_dev *pdev, pci_set_master(pdev); plat->bsp_priv = intel_priv; + plat->suspend = intel_eth_pci_suspend; + plat->resume = intel_eth_pci_resume; + intel_priv->mdio_adhoc_addr = INTEL_MGBE_ADHOC_ADDR; intel_priv->crossts_adj = 1; @@ -1355,44 +1389,6 @@ static void intel_eth_pci_remove(struct pci_dev *pdev) clk_unregister_fixed_rate(priv->plat->stmmac_clk); } -static int __maybe_unused intel_eth_pci_suspend(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - int ret; - - ret = stmmac_suspend(dev); - if (ret) - return ret; - - ret = pci_save_state(pdev); - if (ret) - return ret; - - pci_wake_from_d3(pdev, true); - pci_set_power_state(pdev, PCI_D3hot); - return 0; -} - -static int __maybe_unused intel_eth_pci_resume(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - int ret; - - pci_restore_state(pdev); - pci_set_power_state(pdev, PCI_D0); - - ret = pcim_enable_device(pdev); - if (ret) - return ret; - - pci_set_master(pdev); - - return stmmac_resume(dev); -} - -static SIMPLE_DEV_PM_OPS(intel_eth_pm_ops, intel_eth_pci_suspend, - intel_eth_pci_resume); - #define PCI_DEVICE_ID_INTEL_QUARK 0x0937 #define PCI_DEVICE_ID_INTEL_EHL_RGMII1G 0x4b30 #define PCI_DEVICE_ID_INTEL_EHL_SGMII1G 0x4b31 @@ -1442,7 +1438,7 @@ static struct pci_driver intel_eth_pci_driver = { .probe = intel_eth_pci_probe, .remove = intel_eth_pci_remove, .driver = { - .pm = &intel_eth_pm_ops, + .pm = &stmmac_simple_pm_ops, }, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c index e1591e6217d4..592aa9d636e5 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-loongson.c @@ -90,15 +90,14 @@ static void loongson_default_data(struct pci_dev *pdev, /* Get bus_id, this can be overwritten later */ plat->bus_id = pci_dev_id(pdev); - plat->clk_csr = 2; /* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */ + /* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */ + plat->clk_csr = STMMAC_CSR_20_35M; plat->has_gmac = 1; plat->force_sf_dma_mode = 1; /* Set default value for multicast hash bins */ plat->multicast_filter_bins = 256; - plat->mac_interface = PHY_INTERFACE_MODE_NA; - /* Set default value for unicast filter entries */ plat->unicast_filter_entries = 1; @@ -509,10 +508,15 @@ static int loongson_dwmac_acpi_config(struct pci_dev *pdev, } /* Loongson's DWMAC device may take nearly two seconds to complete DMA reset */ -static int loongson_dwmac_fix_reset(void *priv, void __iomem *ioaddr) +static int loongson_dwmac_fix_reset(struct stmmac_priv *priv, void __iomem *ioaddr) { u32 value = readl(ioaddr + DMA_BUS_MODE); + if (value & DMA_BUS_MODE_SFT_RESET) { + netdev_err(priv->dev, "the PHY clock is missing\n"); + return -EINVAL; + } + value |= DMA_BUS_MODE_SFT_RESET; writel(value, ioaddr + DMA_BUS_MODE); @@ -521,6 +525,37 @@ static int loongson_dwmac_fix_reset(void *priv, void __iomem *ioaddr) 10000, 2000000); } +static int loongson_dwmac_suspend(struct device *dev, void *bsp_priv) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int ret; + + ret = pci_save_state(pdev); + if (ret) + return ret; + + pci_disable_device(pdev); + pci_wake_from_d3(pdev, true); + return 0; +} + +static int loongson_dwmac_resume(struct device *dev, void *bsp_priv) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int ret; + + pci_restore_state(pdev); + pci_set_power_state(pdev, PCI_D0); + + ret = pci_enable_device(pdev); + if (ret) + return ret; + + pci_set_master(pdev); + + return 0; +} + static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct plat_stmmacenet_data *plat; @@ -565,6 +600,8 @@ static int loongson_dwmac_probe(struct pci_dev *pdev, const struct pci_device_id plat->bsp_priv = ld; plat->setup = loongson_dwmac_setup; plat->fix_soc_reset = loongson_dwmac_fix_reset; + plat->suspend = loongson_dwmac_suspend; + plat->resume = loongson_dwmac_resume; ld->dev = &pdev->dev; ld->loongson_id = readl(res.addr + GMAC_VERSION) & 0xff; @@ -621,44 +658,6 @@ static void loongson_dwmac_remove(struct pci_dev *pdev) pci_disable_device(pdev); } -static int __maybe_unused loongson_dwmac_suspend(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - int ret; - - ret = stmmac_suspend(dev); - if (ret) - return ret; - - ret = pci_save_state(pdev); - if (ret) - return ret; - - pci_disable_device(pdev); - pci_wake_from_d3(pdev, true); - return 0; -} - -static int __maybe_unused loongson_dwmac_resume(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - int ret; - - pci_restore_state(pdev); - pci_set_power_state(pdev, PCI_D0); - - ret = pci_enable_device(pdev); - if (ret) - return ret; - - pci_set_master(pdev); - - return stmmac_resume(dev); -} - -static SIMPLE_DEV_PM_OPS(loongson_dwmac_pm_ops, loongson_dwmac_suspend, - loongson_dwmac_resume); - static const struct pci_device_id loongson_dwmac_id_table[] = { { PCI_DEVICE_DATA(LOONGSON, GMAC1, &loongson_gmac_pci_info) }, { PCI_DEVICE_DATA(LOONGSON, GMAC2, &loongson_gmac_pci_info) }, @@ -673,7 +672,7 @@ static struct pci_driver loongson_dwmac_driver = { .probe = loongson_dwmac_probe, .remove = loongson_dwmac_remove, .driver = { - .pm = &loongson_dwmac_pm_ops, + .pm = &stmmac_simple_pm_ops, }, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c index c0c44916f849..2562a6d036a2 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c @@ -41,7 +41,6 @@ static int lpc18xx_dwmac_probe(struct platform_device *pdev) if (IS_ERR(plat_dat)) return PTR_ERR(plat_dat); - plat_dat->mac_interface = PHY_INTERFACE_MODE_NA; plat_dat->has_gmac = true; reg = syscon_regmap_lookup_by_compatible("nxp,lpc1850-creg"); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c index 39421d6a34e4..f1b36f0a401d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-mediatek.c @@ -523,7 +523,7 @@ static int mediatek_dwmac_clk_init(struct mediatek_dwmac_plat_data *plat) return ret; } -static int mediatek_dwmac_init(struct platform_device *pdev, void *priv) +static int mediatek_dwmac_init(struct device *dev, void *priv) { struct mediatek_dwmac_plat_data *plat = priv; const struct mediatek_dwmac_variant *variant = plat->variant; @@ -532,7 +532,7 @@ static int mediatek_dwmac_init(struct platform_device *pdev, void *priv) if (variant->dwmac_set_phy_interface) { ret = variant->dwmac_set_phy_interface(plat); if (ret) { - dev_err(plat->dev, "failed to set phy interface, err = %d\n", ret); + dev_err(dev, "failed to set phy interface, err = %d\n", ret); return ret; } } @@ -540,7 +540,7 @@ static int mediatek_dwmac_init(struct platform_device *pdev, void *priv) if (variant->dwmac_set_delay) { ret = variant->dwmac_set_delay(plat); if (ret) { - dev_err(plat->dev, "failed to set delay value, err = %d\n", ret); + dev_err(dev, "failed to set delay value, err = %d\n", ret); return ret; } } @@ -589,7 +589,7 @@ static int mediatek_dwmac_common_data(struct platform_device *pdev, plat->maxmtu = ETH_DATA_LEN; plat->host_dma_width = priv_plat->variant->dma_bit_mask; plat->bsp_priv = priv_plat; - plat->init = mediatek_dwmac_init; + plat->resume = mediatek_dwmac_init; plat->clks_config = mediatek_dwmac_clks_config; plat->safety_feat_cfg = devm_kzalloc(&pdev->dev, @@ -654,7 +654,7 @@ static int mediatek_dwmac_probe(struct platform_device *pdev) return PTR_ERR(plat_dat); mediatek_dwmac_common_data(pdev, plat_dat, priv_plat); - mediatek_dwmac_init(pdev, priv_plat); + mediatek_dwmac_init(&pdev->dev, priv_plat); ret = mediatek_dwmac_clks_config(priv_plat, true); if (ret) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-renesas-gbeth.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-renesas-gbeth.c index df4ca897a60c..bc7bb975803c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-renesas-gbeth.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-renesas-gbeth.c @@ -16,12 +16,37 @@ #include <linux/clk.h> #include <linux/device.h> #include <linux/module.h> +#include <linux/of.h> +#include <linux/pcs-rzn1-miic.h> #include <linux/platform_device.h> #include <linux/reset.h> +#include <linux/types.h> #include "stmmac_platform.h" +/** + * struct renesas_gbeth_of_data - OF data for Renesas GBETH + * + * @clks: Array of clock names + * @num_clks: Number of clocks + * @stmmac_flags: Flags for the stmmac driver + * @handle_reset: Flag to indicate if reset control is + * handled by the glue driver or core driver. + * @set_clk_tx_rate: Flag to indicate if Tx clock is fixed or + * set_clk_tx_rate is needed. + * @has_pcs: Flag to indicate if the MAC has a PCS + */ +struct renesas_gbeth_of_data { + const char * const *clks; + u8 num_clks; + u32 stmmac_flags; + bool handle_reset; + bool set_clk_tx_rate; + bool has_pcs; +}; + struct renesas_gbeth { + const struct renesas_gbeth_of_data *of_data; struct plat_stmmacenet_data *plat_dat; struct reset_control *rstc; struct device *dev; @@ -31,6 +56,41 @@ static const char *const renesas_gbeth_clks[] = { "tx", "tx-180", "rx", "rx-180", }; +static const char *const renesas_gmac_clks[] = { + "tx", +}; + +static int renesas_gmac_pcs_init(struct stmmac_priv *priv) +{ + struct device_node *np = priv->device->of_node; + struct device_node *pcs_node; + struct phylink_pcs *pcs; + + pcs_node = of_parse_phandle(np, "pcs-handle", 0); + if (pcs_node) { + pcs = miic_create(priv->device, pcs_node); + of_node_put(pcs_node); + if (IS_ERR(pcs)) + return PTR_ERR(pcs); + + priv->hw->phylink_pcs = pcs; + } + + return 0; +} + +static void renesas_gmac_pcs_exit(struct stmmac_priv *priv) +{ + if (priv->hw->phylink_pcs) + miic_destroy(priv->hw->phylink_pcs); +} + +static struct phylink_pcs *renesas_gmac_select_pcs(struct stmmac_priv *priv, + phy_interface_t interface) +{ + return priv->hw->phylink_pcs; +} + static int renesas_gbeth_init(struct platform_device *pdev, void *priv) { struct plat_stmmacenet_data *plat_dat; @@ -70,6 +130,7 @@ static void renesas_gbeth_exit(struct platform_device *pdev, void *priv) static int renesas_gbeth_probe(struct platform_device *pdev) { + const struct renesas_gbeth_of_data *of_data; struct plat_stmmacenet_data *plat_dat; struct stmmac_resources stmmac_res; struct device *dev = &pdev->dev; @@ -91,14 +152,17 @@ static int renesas_gbeth_probe(struct platform_device *pdev) if (!gbeth) return -ENOMEM; - plat_dat->num_clks = ARRAY_SIZE(renesas_gbeth_clks); + of_data = of_device_get_match_data(&pdev->dev); + gbeth->of_data = of_data; + + plat_dat->num_clks = of_data->num_clks; plat_dat->clks = devm_kcalloc(dev, plat_dat->num_clks, sizeof(*plat_dat->clks), GFP_KERNEL); if (!plat_dat->clks) return -ENOMEM; for (i = 0; i < plat_dat->num_clks; i++) - plat_dat->clks[i].id = renesas_gbeth_clks[i]; + plat_dat->clks[i].id = of_data->clks[i]; err = devm_clk_bulk_get(dev, plat_dat->num_clks, plat_dat->clks); if (err < 0) @@ -109,25 +173,49 @@ static int renesas_gbeth_probe(struct platform_device *pdev) return dev_err_probe(dev, -EINVAL, "error finding tx clock\n"); - gbeth->rstc = devm_reset_control_get_exclusive(dev, NULL); - if (IS_ERR(gbeth->rstc)) - return PTR_ERR(gbeth->rstc); + if (of_data->handle_reset) { + gbeth->rstc = devm_reset_control_get_exclusive(dev, NULL); + if (IS_ERR(gbeth->rstc)) + return PTR_ERR(gbeth->rstc); + } gbeth->dev = dev; gbeth->plat_dat = plat_dat; plat_dat->bsp_priv = gbeth; - plat_dat->set_clk_tx_rate = stmmac_set_clk_tx_rate; + if (of_data->set_clk_tx_rate) + plat_dat->set_clk_tx_rate = stmmac_set_clk_tx_rate; plat_dat->init = renesas_gbeth_init; plat_dat->exit = renesas_gbeth_exit; - plat_dat->flags |= STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY | - STMMAC_FLAG_EN_TX_LPI_CLK_PHY_CAP | - STMMAC_FLAG_SPH_DISABLE; + plat_dat->flags |= STMMAC_FLAG_EN_TX_LPI_CLK_PHY_CAP | + gbeth->of_data->stmmac_flags; + if (of_data->has_pcs) { + plat_dat->pcs_init = renesas_gmac_pcs_init; + plat_dat->pcs_exit = renesas_gmac_pcs_exit; + plat_dat->select_pcs = renesas_gmac_select_pcs; + } return devm_stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res); } +static const struct renesas_gbeth_of_data renesas_gbeth_of_data = { + .clks = renesas_gbeth_clks, + .num_clks = ARRAY_SIZE(renesas_gbeth_clks), + .handle_reset = true, + .set_clk_tx_rate = true, + .stmmac_flags = STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY | + STMMAC_FLAG_SPH_DISABLE, +}; + +static const struct renesas_gbeth_of_data renesas_gmac_of_data = { + .clks = renesas_gmac_clks, + .num_clks = ARRAY_SIZE(renesas_gmac_clks), + .stmmac_flags = STMMAC_FLAG_HWTSTAMP_CORRECT_LATENCY, + .has_pcs = true, +}; + static const struct of_device_id renesas_gbeth_match[] = { - { .compatible = "renesas,rzv2h-gbeth", }, + { .compatible = "renesas,r9a09g077-gbeth", .data = &renesas_gmac_of_data }, + { .compatible = "renesas,rzv2h-gbeth", .data = &renesas_gbeth_of_data }, { /* Sentinel */ } }; MODULE_DEVICE_TABLE(of, renesas_gbeth_match); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index f6687c2f30f6..51ea0caf16c1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -8,6 +8,7 @@ */ #include <linux/stmmac.h> +#include <linux/hw_bitfield.h> #include <linux/bitops.h> #include <linux/clk.h> #include <linux/phy.h> @@ -71,7 +72,6 @@ struct rk_priv_data { phy_interface_t phy_iface; int id; struct regulator *regulator; - bool suspended; const struct rk_gmac_ops *ops; bool clk_enabled; @@ -150,7 +150,7 @@ static int rk_set_clk_mac_speed(struct rk_priv_data *bsp_priv, } #define HIWORD_UPDATE(val, mask, shift) \ - ((val) << (shift) | (mask) << ((shift) + 16)) + (FIELD_PREP_WM16((mask) << (shift), (val))) #define GRF_BIT(nr) (BIT(nr) | BIT(nr+16)) #define GRF_CLR_BIT(nr) (BIT(nr+16)) @@ -557,9 +557,7 @@ static const struct rk_gmac_ops rk3308_ops = { #define RK3328_GMAC_RMII_MODE GRF_BIT(9) #define RK3328_GMAC_RMII_MODE_CLR GRF_CLR_BIT(9) #define RK3328_GMAC_TXCLK_DLY_ENABLE GRF_BIT(0) -#define RK3328_GMAC_TXCLK_DLY_DISABLE GRF_CLR_BIT(0) #define RK3328_GMAC_RXCLK_DLY_ENABLE GRF_BIT(1) -#define RK3328_GMAC_RXCLK_DLY_DISABLE GRF_CLR_BIT(0) /* RK3328_GRF_MACPHY_CON1 */ #define RK3328_MACPHY_RMII_MODE GRF_BIT(9) @@ -1706,6 +1704,28 @@ static int rk_set_clk_tx_rate(void *bsp_priv_, struct clk *clk_tx_i, return -EINVAL; } +static int rk_gmac_suspend(struct device *dev, void *bsp_priv_) +{ + struct rk_priv_data *bsp_priv = bsp_priv_; + + /* Keep the PHY up if we use Wake-on-Lan. */ + if (!device_may_wakeup(dev)) + rk_gmac_powerdown(bsp_priv); + + return 0; +} + +static int rk_gmac_resume(struct device *dev, void *bsp_priv_) +{ + struct rk_priv_data *bsp_priv = bsp_priv_; + + /* The PHY was up for Wake-on-Lan. */ + if (!device_may_wakeup(dev)) + rk_gmac_powerup(bsp_priv); + + return 0; +} + static int rk_gmac_probe(struct platform_device *pdev) { struct plat_stmmacenet_data *plat_dat; @@ -1738,6 +1758,8 @@ static int rk_gmac_probe(struct platform_device *pdev) plat_dat->get_interfaces = rk_get_interfaces; plat_dat->set_clk_tx_rate = rk_set_clk_tx_rate; + plat_dat->suspend = rk_gmac_suspend; + plat_dat->resume = rk_gmac_resume; plat_dat->bsp_priv = rk_gmac_setup(pdev, plat_dat, data); if (IS_ERR(plat_dat->bsp_priv)) @@ -1776,37 +1798,6 @@ static void rk_gmac_remove(struct platform_device *pdev) clk_put(bsp_priv->clk_phy); } -#ifdef CONFIG_PM_SLEEP -static int rk_gmac_suspend(struct device *dev) -{ - struct rk_priv_data *bsp_priv = get_stmmac_bsp_priv(dev); - int ret = stmmac_suspend(dev); - - /* Keep the PHY up if we use Wake-on-Lan. */ - if (!device_may_wakeup(dev)) { - rk_gmac_powerdown(bsp_priv); - bsp_priv->suspended = true; - } - - return ret; -} - -static int rk_gmac_resume(struct device *dev) -{ - struct rk_priv_data *bsp_priv = get_stmmac_bsp_priv(dev); - - /* The PHY was up for Wake-on-Lan. */ - if (bsp_priv->suspended) { - rk_gmac_powerup(bsp_priv); - bsp_priv->suspended = false; - } - - return stmmac_resume(dev); -} -#endif /* CONFIG_PM_SLEEP */ - -static SIMPLE_DEV_PM_OPS(rk_gmac_pm_ops, rk_gmac_suspend, rk_gmac_resume); - static const struct of_device_id rk_gmac_dwmac_match[] = { { .compatible = "rockchip,px30-gmac", .data = &px30_ops }, { .compatible = "rockchip,rk3128-gmac", .data = &rk3128_ops }, @@ -1832,7 +1823,7 @@ static struct platform_driver rk_gmac_dwmac_driver = { .remove = rk_gmac_remove, .driver = { .name = "rk_gmac-dwmac", - .pm = &rk_gmac_pm_ops, + .pm = &stmmac_simple_pm_ops, .of_match_table = rk_gmac_dwmac_match, }, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c index 01dd0cf0923c..354f01184e6c 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c @@ -234,7 +234,7 @@ err_node_put: static int socfpga_get_plat_phymode(struct socfpga_dwmac *dwmac) { - return dwmac->plat_dat->mac_interface; + return dwmac->plat_dat->phy_interface; } static void socfpga_sgmii_config(struct socfpga_dwmac *dwmac, bool enable) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c index 2013d7477eb7..6938dd2a79b7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-starfive.c @@ -38,7 +38,7 @@ static int starfive_dwmac_set_mode(struct plat_stmmacenet_data *plat_dat) unsigned int mode; int err; - switch (plat_dat->mac_interface) { + switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_RMII: mode = STARFIVE_DWMAC_PHY_INFT_RMII; break; @@ -51,8 +51,8 @@ static int starfive_dwmac_set_mode(struct plat_stmmacenet_data *plat_dat) break; default: - dev_err(dwmac->dev, "unsupported interface %d\n", - plat_dat->mac_interface); + dev_err(dwmac->dev, "unsupported interface %s\n", + phy_modes(plat_dat->phy_interface)); return -EINVAL; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c index 1eb16eec9c0d..6c179911ef3f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c @@ -171,7 +171,7 @@ static int stm32mp1_select_ethck_external(struct plat_stmmacenet_data *plat_dat) { struct stm32_dwmac *dwmac = plat_dat->bsp_priv; - switch (plat_dat->mac_interface) { + switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_MII: dwmac->enable_eth_ck = dwmac->ext_phyclk; return 0; @@ -193,7 +193,7 @@ static int stm32mp1_select_ethck_external(struct plat_stmmacenet_data *plat_dat) default: dwmac->enable_eth_ck = false; dev_err(dwmac->dev, "Mode %s not supported", - phy_modes(plat_dat->mac_interface)); + phy_modes(plat_dat->phy_interface)); return -EINVAL; } } @@ -206,7 +206,7 @@ static int stm32mp1_validate_ethck_rate(struct plat_stmmacenet_data *plat_dat) if (!dwmac->enable_eth_ck) return 0; - switch (plat_dat->mac_interface) { + switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_MII: case PHY_INTERFACE_MODE_GMII: if (clk_rate == ETH_CK_F_25M) @@ -228,7 +228,7 @@ static int stm32mp1_validate_ethck_rate(struct plat_stmmacenet_data *plat_dat) } dev_err(dwmac->dev, "Mode %s does not match eth-ck frequency %d Hz", - phy_modes(plat_dat->mac_interface), clk_rate); + phy_modes(plat_dat->phy_interface), clk_rate); return -EINVAL; } @@ -238,7 +238,7 @@ static int stm32mp1_configure_pmcr(struct plat_stmmacenet_data *plat_dat) u32 reg = dwmac->mode_reg; int val = 0; - switch (plat_dat->mac_interface) { + switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_MII: /* * STM32MP15xx supports both MII and GMII, STM32MP13xx MII only. @@ -269,12 +269,12 @@ static int stm32mp1_configure_pmcr(struct plat_stmmacenet_data *plat_dat) break; default: dev_err(dwmac->dev, "Mode %s not supported", - phy_modes(plat_dat->mac_interface)); + phy_modes(plat_dat->phy_interface)); /* Do not manage others interfaces */ return -EINVAL; } - dev_dbg(dwmac->dev, "Mode %s", phy_modes(plat_dat->mac_interface)); + dev_dbg(dwmac->dev, "Mode %s", phy_modes(plat_dat->phy_interface)); /* Shift value at correct ethernet MAC offset in SYSCFG_PMCSETR */ val <<= ffs(dwmac->mode_mask) - ffs(SYSCFG_MP1_ETH_MASK); @@ -294,7 +294,7 @@ static int stm32mp2_configure_syscfg(struct plat_stmmacenet_data *plat_dat) u32 reg = dwmac->mode_reg; int val = 0; - switch (plat_dat->mac_interface) { + switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_MII: /* ETH_REF_CLK_SEL bit in SYSCFG register is not applicable in MII mode */ break; @@ -319,12 +319,12 @@ static int stm32mp2_configure_syscfg(struct plat_stmmacenet_data *plat_dat) break; default: dev_err(dwmac->dev, "Mode %s not supported", - phy_modes(plat_dat->mac_interface)); + phy_modes(plat_dat->phy_interface)); /* Do not manage others interfaces */ return -EINVAL; } - dev_dbg(dwmac->dev, "Mode %s", phy_modes(plat_dat->mac_interface)); + dev_dbg(dwmac->dev, "Mode %s", phy_modes(plat_dat->phy_interface)); /* Select PTP (IEEE1588) clock selection from RCC (ck_ker_ethxptp) */ val |= SYSCFG_ETHCR_ETH_PTP_CLK_SEL; @@ -359,7 +359,7 @@ static int stm32mcu_set_mode(struct plat_stmmacenet_data *plat_dat) u32 reg = dwmac->mode_reg; int val; - switch (plat_dat->mac_interface) { + switch (plat_dat->phy_interface) { case PHY_INTERFACE_MODE_MII: val = SYSCFG_MCU_ETH_SEL_MII; break; @@ -368,12 +368,12 @@ static int stm32mcu_set_mode(struct plat_stmmacenet_data *plat_dat) break; default: dev_err(dwmac->dev, "Mode %s not supported", - phy_modes(plat_dat->mac_interface)); + phy_modes(plat_dat->phy_interface)); /* Do not manage others interfaces */ return -EINVAL; } - dev_dbg(dwmac->dev, "Mode %s", phy_modes(plat_dat->mac_interface)); + dev_dbg(dwmac->dev, "Mode %s", phy_modes(plat_dat->phy_interface)); return regmap_update_bits(dwmac->regmap, reg, SYSCFG_MCU_ETH_MASK, val << 23); @@ -498,6 +498,26 @@ static int stm32mp1_parse_data(struct stm32_dwmac *dwmac, return err; } +static int stm32_dwmac_suspend(struct device *dev, void *bsp_priv) +{ + struct stm32_dwmac *dwmac = bsp_priv; + + stm32_dwmac_clk_disable(dwmac); + + return dwmac->ops->suspend ? dwmac->ops->suspend(dwmac) : 0; +} + +static int stm32_dwmac_resume(struct device *dev, void *bsp_priv) +{ + struct stmmac_priv *priv = netdev_priv(dev_get_drvdata(dev)); + struct stm32_dwmac *dwmac = bsp_priv; + + if (dwmac->ops->resume) + dwmac->ops->resume(dwmac); + + return stm32_dwmac_init(priv->plat); +} + static int stm32_dwmac_probe(struct platform_device *pdev) { struct plat_stmmacenet_data *plat_dat; @@ -535,6 +555,8 @@ static int stm32_dwmac_probe(struct platform_device *pdev) plat_dat->flags |= STMMAC_FLAG_EN_TX_LPI_CLK_PHY_CAP; plat_dat->bsp_priv = dwmac; + plat_dat->suspend = stm32_dwmac_suspend; + plat_dat->resume = stm32_dwmac_resume; ret = stm32_dwmac_init(plat_dat); if (ret) @@ -600,50 +622,6 @@ static void stm32mp1_resume(struct stm32_dwmac *dwmac) clk_disable_unprepare(dwmac->clk_ethstp); } -#ifdef CONFIG_PM_SLEEP -static int stm32_dwmac_suspend(struct device *dev) -{ - struct net_device *ndev = dev_get_drvdata(dev); - struct stmmac_priv *priv = netdev_priv(ndev); - struct stm32_dwmac *dwmac = priv->plat->bsp_priv; - - int ret; - - ret = stmmac_suspend(dev); - if (ret) - return ret; - - stm32_dwmac_clk_disable(dwmac); - - if (dwmac->ops->suspend) - ret = dwmac->ops->suspend(dwmac); - - return ret; -} - -static int stm32_dwmac_resume(struct device *dev) -{ - struct net_device *ndev = dev_get_drvdata(dev); - struct stmmac_priv *priv = netdev_priv(ndev); - struct stm32_dwmac *dwmac = priv->plat->bsp_priv; - int ret; - - if (dwmac->ops->resume) - dwmac->ops->resume(dwmac); - - ret = stm32_dwmac_init(priv->plat); - if (ret) - return ret; - - ret = stmmac_resume(dev); - - return ret; -} -#endif /* CONFIG_PM_SLEEP */ - -static SIMPLE_DEV_PM_OPS(stm32_dwmac_pm_ops, - stm32_dwmac_suspend, stm32_dwmac_resume); - static struct stm32_ops stm32mcu_dwmac_data = { .set_mode = stm32mcu_set_mode }; @@ -691,7 +669,7 @@ static struct platform_driver stm32_dwmac_driver = { .remove = stm32_dwmac_remove, .driver = { .name = "stm32-dwmac", - .pm = &stm32_dwmac_pm_ops, + .pm = &stmmac_simple_pm_ops, .of_match_table = stm32_dwmac_match, }, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun55i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun55i.c new file mode 100644 index 000000000000..862df173d963 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun55i.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * dwmac-sun55i.c - Allwinner sun55i GMAC200 specific glue layer + * + * Copyright (C) 2025 Chen-Yu Tsai <wens@csie.org> + * + * syscon parts taken from dwmac-sun8i.c, which is + * + * Copyright (C) 2017 Corentin Labbe <clabbe.montjoie@gmail.com> + */ + +#include <linux/bitfield.h> +#include <linux/bits.h> +#include <linux/mfd/syscon.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/phy.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include <linux/regulator/consumer.h> +#include <linux/stmmac.h> + +#include "stmmac.h" +#include "stmmac_platform.h" + +#define SYSCON_REG 0x34 + +/* RMII specific bits */ +#define SYSCON_RMII_EN BIT(13) /* 1: enable RMII (overrides EPIT) */ +/* Generic system control EMAC_CLK bits */ +#define SYSCON_ETXDC_MASK GENMASK(12, 10) +#define SYSCON_ERXDC_MASK GENMASK(9, 5) +/* EMAC PHY Interface Type */ +#define SYSCON_EPIT BIT(2) /* 1: RGMII, 0: MII */ +#define SYSCON_ETCS_MASK GENMASK(1, 0) +#define SYSCON_ETCS_MII 0x0 +#define SYSCON_ETCS_EXT_GMII 0x1 +#define SYSCON_ETCS_INT_GMII 0x2 + +static int sun55i_gmac200_set_syscon(struct device *dev, + struct plat_stmmacenet_data *plat) +{ + struct device_node *node = dev->of_node; + struct regmap *regmap; + u32 val, reg = 0; + int ret; + + regmap = syscon_regmap_lookup_by_phandle(node, "syscon"); + if (IS_ERR(regmap)) + return dev_err_probe(dev, PTR_ERR(regmap), "Unable to map syscon\n"); + + if (!of_property_read_u32(node, "tx-internal-delay-ps", &val)) { + if (val % 100) + return dev_err_probe(dev, -EINVAL, + "tx-delay must be a multiple of 100ps\n"); + val /= 100; + dev_dbg(dev, "set tx-delay to %x\n", val); + if (!FIELD_FIT(SYSCON_ETXDC_MASK, val)) + return dev_err_probe(dev, -EINVAL, + "TX clock delay exceeds maximum (%u00ps > %lu00ps)\n", + val, FIELD_MAX(SYSCON_ETXDC_MASK)); + + reg |= FIELD_PREP(SYSCON_ETXDC_MASK, val); + } + + if (!of_property_read_u32(node, "rx-internal-delay-ps", &val)) { + if (val % 100) + return dev_err_probe(dev, -EINVAL, + "rx-delay must be a multiple of 100ps\n"); + val /= 100; + dev_dbg(dev, "set rx-delay to %x\n", val); + if (!FIELD_FIT(SYSCON_ERXDC_MASK, val)) + return dev_err_probe(dev, -EINVAL, + "RX clock delay exceeds maximum (%u00ps > %lu00ps)\n", + val, FIELD_MAX(SYSCON_ERXDC_MASK)); + + reg |= FIELD_PREP(SYSCON_ERXDC_MASK, val); + } + + switch (plat->phy_interface) { + case PHY_INTERFACE_MODE_MII: + /* default */ + break; + case PHY_INTERFACE_MODE_RGMII: + case PHY_INTERFACE_MODE_RGMII_ID: + case PHY_INTERFACE_MODE_RGMII_RXID: + case PHY_INTERFACE_MODE_RGMII_TXID: + reg |= SYSCON_EPIT | SYSCON_ETCS_INT_GMII; + break; + case PHY_INTERFACE_MODE_RMII: + reg |= SYSCON_RMII_EN; + break; + default: + return dev_err_probe(dev, -EINVAL, "Unsupported interface mode: %s", + phy_modes(plat->phy_interface)); + } + + ret = regmap_write(regmap, SYSCON_REG, reg); + if (ret < 0) + return dev_err_probe(dev, ret, "Failed to write to syscon\n"); + + return 0; +} + +static int sun55i_gmac200_probe(struct platform_device *pdev) +{ + struct plat_stmmacenet_data *plat_dat; + struct stmmac_resources stmmac_res; + struct device *dev = &pdev->dev; + struct clk *clk; + int ret; + + ret = stmmac_get_platform_resources(pdev, &stmmac_res); + if (ret) + return ret; + + plat_dat = devm_stmmac_probe_config_dt(pdev, stmmac_res.mac); + if (IS_ERR(plat_dat)) + return PTR_ERR(plat_dat); + + /* BSP disables it */ + plat_dat->flags |= STMMAC_FLAG_SPH_DISABLE; + plat_dat->host_dma_width = 32; + + ret = sun55i_gmac200_set_syscon(dev, plat_dat); + if (ret) + return ret; + + clk = devm_clk_get_enabled(dev, "mbus"); + if (IS_ERR(clk)) + return dev_err_probe(dev, PTR_ERR(clk), + "Failed to get or enable MBUS clock\n"); + + ret = devm_regulator_get_enable_optional(dev, "phy"); + if (ret) + return dev_err_probe(dev, ret, "Failed to get or enable PHY supply\n"); + + return devm_stmmac_pltfr_probe(pdev, plat_dat, &stmmac_res); +} + +static const struct of_device_id sun55i_gmac200_match[] = { + { .compatible = "allwinner,sun55i-a523-gmac200" }, + { } +}; +MODULE_DEVICE_TABLE(of, sun55i_gmac200_match); + +static struct platform_driver sun55i_gmac200_driver = { + .probe = sun55i_gmac200_probe, + .driver = { + .name = "dwmac-sun55i", + .pm = &stmmac_pltfr_pm_ops, + .of_match_table = sun55i_gmac200_match, + }, +}; +module_platform_driver(sun55i_gmac200_driver); + +MODULE_AUTHOR("Chen-Yu Tsai <wens@csie.org>"); +MODULE_DESCRIPTION("Allwinner sun55i GMAC200 specific glue layer"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 2796dc426943..5d871b2cd111 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -31,10 +31,6 @@ */ /* struct emac_variant - Describe dwmac-sun8i hardware variant - * @default_syscon_value: The default value of the EMAC register in syscon - * This value is used for disabling properly EMAC - * and used as a good starting value in case of the - * boot process(uboot) leave some stuff. * @syscon_field reg_field for the syscon's gmac register * @soc_has_internal_phy: Does the MAC embed an internal PHY * @support_mii: Does the MAC handle MII @@ -48,7 +44,6 @@ * value of zero indicates this is not supported. */ struct emac_variant { - u32 default_syscon_value; const struct reg_field *syscon_field; bool soc_has_internal_phy; bool support_mii; @@ -94,7 +89,6 @@ static const struct reg_field sun8i_ccu_reg_field = { }; static const struct emac_variant emac_variant_h3 = { - .default_syscon_value = 0x58000, .syscon_field = &sun8i_syscon_reg_field, .soc_has_internal_phy = true, .support_mii = true, @@ -105,14 +99,12 @@ static const struct emac_variant emac_variant_h3 = { }; static const struct emac_variant emac_variant_v3s = { - .default_syscon_value = 0x38000, .syscon_field = &sun8i_syscon_reg_field, .soc_has_internal_phy = true, .support_mii = true }; static const struct emac_variant emac_variant_a83t = { - .default_syscon_value = 0, .syscon_field = &sun8i_syscon_reg_field, .soc_has_internal_phy = false, .support_mii = true, @@ -122,7 +114,6 @@ static const struct emac_variant emac_variant_a83t = { }; static const struct emac_variant emac_variant_r40 = { - .default_syscon_value = 0, .syscon_field = &sun8i_ccu_reg_field, .support_mii = true, .support_rgmii = true, @@ -130,7 +121,6 @@ static const struct emac_variant emac_variant_r40 = { }; static const struct emac_variant emac_variant_a64 = { - .default_syscon_value = 0, .syscon_field = &sun8i_syscon_reg_field, .soc_has_internal_phy = false, .support_mii = true, @@ -141,7 +131,6 @@ static const struct emac_variant emac_variant_a64 = { }; static const struct emac_variant emac_variant_h6 = { - .default_syscon_value = 0x50000, .syscon_field = &sun8i_syscon_reg_field, /* The "Internal PHY" of H6 is not on the die. It's on the * co-packaged AC200 chip instead. @@ -933,25 +922,11 @@ static int sun8i_dwmac_set_syscon(struct device *dev, struct sunxi_priv_data *gmac = plat->bsp_priv; struct device_node *node = dev->of_node; int ret; - u32 reg, val; - - ret = regmap_field_read(gmac->regmap_field, &val); - if (ret) { - dev_err(dev, "Fail to read from regmap field.\n"); - return ret; - } - - reg = gmac->variant->default_syscon_value; - if (reg != val) - dev_warn(dev, - "Current syscon value is not the default %x (expect %x)\n", - val, reg); + u32 reg = 0, val; if (gmac->variant->soc_has_internal_phy) { if (of_property_read_bool(node, "allwinner,leds-active-low")) reg |= H3_EPHY_LED_POL; - else - reg &= ~H3_EPHY_LED_POL; /* Force EPHY xtal frequency to 24MHz. */ reg |= H3_EPHY_CLK_SEL; @@ -965,11 +940,6 @@ static int sun8i_dwmac_set_syscon(struct device *dev, * address. No need to mask it again. */ reg |= ret << H3_EPHY_ADDR_SHIFT; - } else { - /* For SoCs without internal PHY the PHY selection bit should be - * set to 0 (external PHY). - */ - reg &= ~H3_EPHY_SELECT; } if (!of_property_read_u32(node, "allwinner,tx-delay-ps", &val)) { @@ -980,8 +950,6 @@ static int sun8i_dwmac_set_syscon(struct device *dev, val /= 100; dev_dbg(dev, "set tx-delay to %x\n", val); if (val <= gmac->variant->tx_delay_max) { - reg &= ~(gmac->variant->tx_delay_max << - SYSCON_ETXDC_SHIFT); reg |= (val << SYSCON_ETXDC_SHIFT); } else { dev_err(dev, "Invalid TX clock delay: %d\n", @@ -998,8 +966,6 @@ static int sun8i_dwmac_set_syscon(struct device *dev, val /= 100; dev_dbg(dev, "set rx-delay to %x\n", val); if (val <= gmac->variant->rx_delay_max) { - reg &= ~(gmac->variant->rx_delay_max << - SYSCON_ERXDC_SHIFT); reg |= (val << SYSCON_ERXDC_SHIFT); } else { dev_err(dev, "Invalid RX clock delay: %d\n", @@ -1008,12 +974,7 @@ static int sun8i_dwmac_set_syscon(struct device *dev, } } - /* Clear interface mode bits */ - reg &= ~(SYSCON_ETCS_MASK | SYSCON_EPIT); - if (gmac->variant->support_rmii) - reg &= ~SYSCON_RMII_EN; - - switch (plat->mac_interface) { + switch (plat->phy_interface) { case PHY_INTERFACE_MODE_MII: /* default */ break; @@ -1028,7 +989,7 @@ static int sun8i_dwmac_set_syscon(struct device *dev, break; default: dev_err(dev, "Unsupported interface mode: %s", - phy_modes(plat->mac_interface)); + phy_modes(plat->phy_interface)); return -EINVAL; } @@ -1039,9 +1000,9 @@ static int sun8i_dwmac_set_syscon(struct device *dev, static void sun8i_dwmac_unset_syscon(struct sunxi_priv_data *gmac) { - u32 reg = gmac->variant->default_syscon_value; - - regmap_field_write(gmac->regmap_field, reg); + if (gmac->variant->soc_has_internal_phy) + regmap_field_write(gmac->regmap_field, + (H3_EPHY_SHUTDOWN | H3_EPHY_SELECT)); } static void sun8i_dwmac_exit(struct platform_device *pdev, void *priv) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c index 6c6c49e4b66f..a3378046b061 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-thead.c @@ -56,7 +56,7 @@ static int thead_dwmac_set_phy_if(struct plat_stmmacenet_data *plat) struct thead_dwmac *dwmac = plat->bsp_priv; u32 phyif; - switch (plat->mac_interface) { + switch (plat->phy_interface) { case PHY_INTERFACE_MODE_MII: phyif = PHY_INTF_MII_GMII; break; @@ -67,8 +67,8 @@ static int thead_dwmac_set_phy_if(struct plat_stmmacenet_data *plat) phyif = PHY_INTF_RGMII; break; default: - dev_err(dwmac->dev, "unsupported phy interface %d\n", - plat->mac_interface); + dev_err(dwmac->dev, "unsupported phy interface %s\n", + phy_modes(plat->phy_interface)); return -EINVAL; } @@ -81,7 +81,7 @@ static int thead_dwmac_set_txclk_dir(struct plat_stmmacenet_data *plat) struct thead_dwmac *dwmac = plat->bsp_priv; u32 txclk_dir; - switch (plat->mac_interface) { + switch (plat->phy_interface) { case PHY_INTERFACE_MODE_MII: txclk_dir = TXCLK_DIR_INPUT; break; @@ -92,8 +92,8 @@ static int thead_dwmac_set_txclk_dir(struct plat_stmmacenet_data *plat) txclk_dir = TXCLK_DIR_OUTPUT; break; default: - dev_err(dwmac->dev, "unsupported phy interface %d\n", - plat->mac_interface); + dev_err(dwmac->dev, "unsupported phy interface %s\n", + phy_modes(plat->phy_interface)); return -EINVAL; } @@ -112,7 +112,7 @@ static int thead_set_clk_tx_rate(void *bsp_priv, struct clk *clk_tx_i, plat = dwmac->plat; - switch (plat->mac_interface) { + switch (plat->phy_interface) { /* For MII, rxc/txc is provided by phy */ case PHY_INTERFACE_MODE_MII: return 0; @@ -143,8 +143,8 @@ static int thead_set_clk_tx_rate(void *bsp_priv, struct clk *clk_tx_i, return 0; default: - dev_err(dwmac->dev, "unsupported phy interface %d\n", - plat->mac_interface); + dev_err(dwmac->dev, "unsupported phy interface %s\n", + phy_modes(plat->phy_interface)); return -EINVAL; } } @@ -154,7 +154,7 @@ static int thead_dwmac_enable_clk(struct plat_stmmacenet_data *plat) struct thead_dwmac *dwmac = plat->bsp_priv; u32 reg, div; - switch (plat->mac_interface) { + switch (plat->phy_interface) { case PHY_INTERFACE_MODE_MII: reg = GMAC_RX_CLK_EN | GMAC_TX_CLK_EN; break; @@ -177,8 +177,8 @@ static int thead_dwmac_enable_clk(struct plat_stmmacenet_data *plat) break; default: - dev_err(dwmac->dev, "unsupported phy interface %d\n", - plat->mac_interface); + dev_err(dwmac->dev, "unsupported phy interface %s\n", + phy_modes(plat->phy_interface)); return -EINVAL; } diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h index f4694fd576f5..3dec1a264cf6 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h @@ -341,6 +341,7 @@ static inline u32 mtl_chanx_base_addr(const struct dwmac4_addrs *addrs, #define MTL_OP_MODE_RFA_SHIFT 8 #define MTL_OP_MODE_EHFC BIT(7) +#define MTL_OP_MODE_DIS_TCP_EF BIT(6) #define MTL_OP_MODE_RTC_MASK GENMASK(1, 0) #define MTL_OP_MODE_RTC_SHIFT 0 diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c index a5fb31eb0192..aac68dc28dc1 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c @@ -110,16 +110,20 @@ static int dwmac4_wrback_get_rx_status(struct stmmac_extra_stats *x, message_type = (rdes1 & ERDES4_MSG_TYPE_MASK) >> 8; - if (rdes1 & RDES1_IP_HDR_ERROR) + if (rdes1 & RDES1_IP_HDR_ERROR) { x->ip_hdr_err++; + ret |= csum_none; + } if (rdes1 & RDES1_IP_CSUM_BYPASSED) x->ip_csum_bypassed++; if (rdes1 & RDES1_IPV4_HEADER) x->ipv4_pkt_rcvd++; if (rdes1 & RDES1_IPV6_HEADER) x->ipv6_pkt_rcvd++; - if (rdes1 & RDES1_IP_PAYLOAD_ERROR) + if (rdes1 & RDES1_IP_PAYLOAD_ERROR) { x->ip_payload_err++; + ret |= csum_none; + } if (message_type == RDES_EXT_NO_PTP) x->no_ptp_rx_msg_type_ext++; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c index 0cb84a0041a4..d87a8b595e6a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c @@ -268,6 +268,8 @@ static void dwmac4_dma_rx_chan_op_mode(struct stmmac_priv *priv, mtl_rx_op = readl(ioaddr + MTL_CHAN_RX_OP_MODE(dwmac4_addrs, channel)); + mtl_rx_op |= MTL_OP_MODE_DIS_TCP_EF; + if (mode == SF_DMA_MODE) { pr_debug("GMAC: enable RX store and forward mode\n"); mtl_rx_op |= MTL_OP_MODE_RSF; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c index 4846bf49c576..467f1a05747e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac_lib.c @@ -251,7 +251,7 @@ void dwmac_dma_flush_tx_fifo(void __iomem *ioaddr) void stmmac_set_mac_addr(void __iomem *ioaddr, const u8 addr[6], unsigned int high, unsigned int low) { - unsigned long data; + u32 data; data = (addr[5] << 8) | addr[4]; /* For MAC Addr registers we have to set the Address Enable (AE) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c index 6cadf8de4fdf..00e929bf280b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_core.c @@ -49,6 +49,14 @@ static void dwxgmac2_core_init(struct mac_device_info *hw, writel(XGMAC_INT_DEFAULT_EN, ioaddr + XGMAC_INT_EN); } +static void dwxgmac2_update_caps(struct stmmac_priv *priv) +{ + if (!priv->dma_cap.mbps_10_100) + priv->hw->link.caps &= ~(MAC_10 | MAC_100); + else if (!priv->dma_cap.half_duplex) + priv->hw->link.caps &= ~(MAC_10HD | MAC_100HD); +} + static void dwxgmac2_set_mac(void __iomem *ioaddr, bool enable) { u32 tx = readl(ioaddr + XGMAC_TX_CONFIG); @@ -1424,6 +1432,7 @@ static void dwxgmac2_set_arp_offload(struct mac_device_info *hw, bool en, const struct stmmac_ops dwxgmac210_ops = { .core_init = dwxgmac2_core_init, + .update_caps = dwxgmac2_update_caps, .set_mac = dwxgmac2_set_mac, .rx_ipc = dwxgmac2_rx_ipc, .rx_queue_enable = dwxgmac2_rx_queue_enable, @@ -1532,8 +1541,8 @@ int dwxgmac2_setup(struct stmmac_priv *priv) mac->mcast_bits_log2 = ilog2(mac->multicast_filter_bins); mac->link.caps = MAC_ASYM_PAUSE | MAC_SYM_PAUSE | - MAC_1000FD | MAC_2500FD | MAC_5000FD | - MAC_10000FD; + MAC_10 | MAC_100 | MAC_1000FD | + MAC_2500FD | MAC_5000FD | MAC_10000FD; mac->link.duplex = 0; mac->link.speed10 = XGMAC_CONFIG_SS_10_MII; mac->link.speed100 = XGMAC_CONFIG_SS_100_MII; diff --git a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c index 5dcc95bc0ad2..4d6bb995d8d8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwxgmac2_dma.c @@ -203,10 +203,6 @@ static void dwxgmac2_dma_rx_mode(struct stmmac_priv *priv, void __iomem *ioaddr, } writel(value, ioaddr + XGMAC_MTL_RXQ_OPMODE(channel)); - - /* Enable MTL RX overflow */ - value = readl(ioaddr + XGMAC_MTL_QINTEN(channel)); - writel(value | XGMAC_RXOIE, ioaddr + XGMAC_MTL_QINTEN(channel)); } static void dwxgmac2_dma_tx_mode(struct stmmac_priv *priv, void __iomem *ioaddr, @@ -386,8 +382,11 @@ static int dwxgmac2_dma_interrupt(struct stmmac_priv *priv, static int dwxgmac2_get_hw_feature(void __iomem *ioaddr, struct dma_features *dma_cap) { + struct stmmac_priv *priv; u32 hw_cap; + priv = container_of(dma_cap, struct stmmac_priv, dma_cap); + /* MAC HW feature 0 */ hw_cap = readl(ioaddr + XGMAC_HW_FEATURE0); dma_cap->edma = (hw_cap & XGMAC_HWFEAT_EDMA) >> 31; @@ -410,6 +409,8 @@ static int dwxgmac2_get_hw_feature(void __iomem *ioaddr, dma_cap->vlhash = (hw_cap & XGMAC_HWFEAT_VLHASH) >> 4; dma_cap->half_duplex = (hw_cap & XGMAC_HWFEAT_HDSEL) >> 3; dma_cap->mbps_1000 = (hw_cap & XGMAC_HWFEAT_GMIISEL) >> 1; + if (dma_cap->mbps_1000 && priv->synopsys_id >= DWXGMAC_CORE_2_20) + dma_cap->mbps_10_100 = 1; /* MAC HW feature 1 */ hw_cap = readl(ioaddr + XGMAC_HW_FEATURE1); diff --git a/drivers/net/ethernet/stmicro/stmmac/hwif.c b/drivers/net/ethernet/stmicro/stmmac/hwif.c index 99635b37044a..3f7c765dcb79 100644 --- a/drivers/net/ethernet/stmicro/stmmac/hwif.c +++ b/drivers/net/ethernet/stmicro/stmmac/hwif.c @@ -100,7 +100,7 @@ int stmmac_reset(struct stmmac_priv *priv, void __iomem *ioaddr) return -EINVAL; if (plat && plat->fix_soc_reset) - return plat->fix_soc_reset(plat, ioaddr); + return plat->fix_soc_reset(priv, ioaddr); return stmmac_do_callback(priv, dma, reset, ioaddr); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h index cda09cf5dcca..7ca5477be390 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h @@ -289,8 +289,7 @@ struct stmmac_priv { u32 msg_enable; int wolopts; int wol_irq; - bool wol_irq_disabled; - int clk_csr; + u32 gmii_address_bus_config; struct timer_list eee_ctrl_timer; int lpi_irq; u32 tx_lpi_timer; @@ -374,6 +373,18 @@ enum stmmac_state { STMMAC_SERVICE_SCHED, }; +extern const struct dev_pm_ops stmmac_simple_pm_ops; + +static inline bool stmmac_wol_enabled_mac(struct stmmac_priv *priv) +{ + return priv->plat->pmt && device_may_wakeup(priv->device); +} + +static inline bool stmmac_wol_enabled_phy(struct stmmac_priv *priv) +{ + return !priv->plat->pmt && device_may_wakeup(priv->device); +} + int stmmac_mdio_unregister(struct net_device *ndev); int stmmac_mdio_register(struct net_device *ndev); int stmmac_mdio_reset(struct mii_bus *mii); @@ -381,7 +392,6 @@ int stmmac_pcs_setup(struct net_device *ndev); void stmmac_pcs_clean(struct net_device *ndev); void stmmac_set_ethtool_ops(struct net_device *netdev); -int stmmac_init_tstamp_counter(struct stmmac_priv *priv, u32 systime_flags); void stmmac_ptp_register(struct stmmac_priv *priv); void stmmac_ptp_unregister(struct stmmac_priv *priv); int stmmac_xdp_open(struct net_device *dev); @@ -394,7 +404,6 @@ int stmmac_dvr_probe(struct device *device, struct stmmac_resources *res); int stmmac_reinit_queues(struct net_device *dev, u32 rx_cnt, u32 tx_cnt); int stmmac_reinit_ringparam(struct net_device *dev, u32 rx_size, u32 tx_size); -int stmmac_bus_clks_config(struct stmmac_priv *priv, bool enabled); int stmmac_set_clk_tx_rate(void *bsp_priv, struct clk *clk_tx_i, phy_interface_t interface, int speed); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c index ac6f2e3a3fcd..4b513d27a988 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_est.c @@ -63,7 +63,7 @@ static int est_configure(struct stmmac_priv *priv, struct stmmac_est *cfg, EST_GMAC5_PTOV_SHIFT; } if (cfg->enable) - ctrl |= EST_EEST | EST_SSWL; + ctrl |= EST_EEST | EST_SSWL | EST_DFBS; else ctrl &= ~EST_EEST; @@ -109,6 +109,10 @@ static void est_irq_status(struct stmmac_priv *priv, struct net_device *dev, x->mtl_est_hlbs++; + for (i = 0; i < txqcnt; i++) + if (value & BIT(i)) + x->mtl_est_txq_hlbs[i]++; + /* Clear Interrupt */ writel(value, est_addr + EST_SCH_ERR); @@ -131,10 +135,9 @@ static void est_irq_status(struct stmmac_priv *priv, struct net_device *dev, x->mtl_est_hlbf++; - for (i = 0; i < txqcnt; i++) { + for (i = 0; i < txqcnt; i++) if (feqn & BIT(i)) x->mtl_est_txq_hlbf[i]++; - } /* Clear Interrupt */ writel(feqn, est_addr + EST_FRM_SZ_ERR); diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_est.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_est.h index d247fa383a6e..f70221c9c84a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_est.h +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_est.h @@ -16,6 +16,7 @@ #define EST_XGMAC_PTOV_MUL 9 #define EST_SSWL BIT(1) #define EST_EEST BIT(0) +#define EST_DFBS BIT(5) #define EST_STATUS 0x00000008 #define EST_GMAC5_BTRL GENMASK(11, 8) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c index 77758a7299b4..39fa1ec92f82 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c @@ -803,7 +803,6 @@ static void stmmac_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) static int stmmac_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) { struct stmmac_priv *priv = netdev_priv(dev); - u32 support = WAKE_MAGIC | WAKE_UCAST; if (!device_can_wakeup(priv->device)) return -EOPNOTSUPP; @@ -816,29 +815,7 @@ static int stmmac_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) return ret; } - /* By default almost all GMAC devices support the WoL via - * magic frame but we can disable it if the HW capability - * register shows no support for pmt_magic_frame. */ - if ((priv->hw_cap_support) && (!priv->dma_cap.pmt_magic_frame)) - wol->wolopts &= ~WAKE_MAGIC; - - if (wol->wolopts & ~support) - return -EINVAL; - - if (wol->wolopts) { - pr_info("stmmac: wakeup enable\n"); - device_set_wakeup_enable(priv->device, 1); - /* Avoid unbalanced enable_irq_wake calls */ - if (priv->wol_irq_disabled) - enable_irq_wake(priv->wol_irq); - priv->wol_irq_disabled = false; - } else { - device_set_wakeup_enable(priv->device, 0); - /* Avoid unbalanced disable_irq_wake calls */ - if (!priv->wol_irq_disabled) - disable_irq_wake(priv->wol_irq); - priv->wol_irq_disabled = true; - } + device_set_wakeup_enable(priv->device, !!wol->wolopts); mutex_lock(&priv->lock); priv->wolopts = wol->wolopts; @@ -852,9 +829,6 @@ static int stmmac_ethtool_op_get_eee(struct net_device *dev, { struct stmmac_priv *priv = netdev_priv(dev); - if (!priv->dma_cap.eee) - return -EOPNOTSUPP; - return phylink_ethtool_get_eee(priv->phylink, edata); } @@ -863,9 +837,6 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev, { struct stmmac_priv *priv = netdev_priv(dev); - if (!priv->dma_cap.eee) - return -EOPNOTSUPP; - return phylink_ethtool_set_eee(priv->phylink, edata); } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c index e2840fa241f2..bb110124f21e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_hwtstamp.c @@ -135,7 +135,6 @@ static int init_systime(void __iomem *ioaddr, u32 sec, u32 nsec) static int config_addend(void __iomem *ioaddr, u32 addend) { u32 value; - int limit; writel(addend, ioaddr + PTP_TAR); /* issue command to update the addend value */ @@ -144,23 +143,15 @@ static int config_addend(void __iomem *ioaddr, u32 addend) writel(value, ioaddr + PTP_TCR); /* wait for present addend update to complete */ - limit = 10; - while (limit--) { - if (!(readl(ioaddr + PTP_TCR) & PTP_TCR_TSADDREG)) - break; - mdelay(10); - } - if (limit < 0) - return -EBUSY; - - return 0; + return readl_poll_timeout_atomic(ioaddr + PTP_TCR, value, + !(value & PTP_TCR_TSADDREG), + 10, 100000); } static int adjust_systime(void __iomem *ioaddr, u32 sec, u32 nsec, int add_sub, int gmac4) { u32 value; - int limit; if (add_sub) { /* If the new sec value needs to be subtracted with @@ -187,16 +178,9 @@ static int adjust_systime(void __iomem *ioaddr, u32 sec, u32 nsec, writel(value, ioaddr + PTP_TCR); /* wait for present system time adjust/update to complete */ - limit = 10; - while (limit--) { - if (!(readl(ioaddr + PTP_TCR) & PTP_TCR_TSUPDT)) - break; - mdelay(10); - } - if (limit < 0) - return -EBUSY; - - return 0; + return readl_poll_timeout_atomic(ioaddr + PTP_TCR, value, + !(value & PTP_TCR_TSUPDT), + 10, 100000); } static void get_systime(void __iomem *ioaddr, u64 *systime) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index f1abf4242cd2..650d75b73e0b 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -29,6 +29,7 @@ #include <linux/dma-mapping.h> #include <linux/slab.h> #include <linux/pm_runtime.h> +#include <linux/pm_wakeirq.h> #include <linux/prefetch.h> #include <linux/pinctrl/consumer.h> #ifdef CONFIG_DEBUG_FS @@ -146,38 +147,6 @@ static void stmmac_exit_fs(struct net_device *dev); #define STMMAC_COAL_TIMER(x) (ns_to_ktime((x) * NSEC_PER_USEC)) -int stmmac_bus_clks_config(struct stmmac_priv *priv, bool enabled) -{ - int ret = 0; - - if (enabled) { - ret = clk_prepare_enable(priv->plat->stmmac_clk); - if (ret) - return ret; - ret = clk_prepare_enable(priv->plat->pclk); - if (ret) { - clk_disable_unprepare(priv->plat->stmmac_clk); - return ret; - } - if (priv->plat->clks_config) { - ret = priv->plat->clks_config(priv->plat->bsp_priv, enabled); - if (ret) { - clk_disable_unprepare(priv->plat->stmmac_clk); - clk_disable_unprepare(priv->plat->pclk); - return ret; - } - } - } else { - clk_disable_unprepare(priv->plat->stmmac_clk); - clk_disable_unprepare(priv->plat->pclk); - if (priv->plat->clks_config) - priv->plat->clks_config(priv->plat->bsp_priv, enabled); - } - - return ret; -} -EXPORT_SYMBOL_GPL(stmmac_bus_clks_config); - /** * stmmac_set_clk_tx_rate() - set the clock rate for the MAC transmit clock * @bsp_priv: BSP private data structure (unused) @@ -312,77 +281,6 @@ static void stmmac_global_err(struct stmmac_priv *priv) stmmac_service_event_schedule(priv); } -/** - * stmmac_clk_csr_set - dynamically set the MDC clock - * @priv: driver private structure - * Description: this is to dynamically set the MDC clock according to the csr - * clock input. - * Note: - * If a specific clk_csr value is passed from the platform - * this means that the CSR Clock Range selection cannot be - * changed at run-time and it is fixed (as reported in the driver - * documentation). Viceversa the driver will try to set the MDC - * clock dynamically according to the actual clock input. - */ -static void stmmac_clk_csr_set(struct stmmac_priv *priv) -{ - unsigned long clk_rate; - - clk_rate = clk_get_rate(priv->plat->stmmac_clk); - - /* Platform provided default clk_csr would be assumed valid - * for all other cases except for the below mentioned ones. - * For values higher than the IEEE 802.3 specified frequency - * we can not estimate the proper divider as it is not known - * the frequency of clk_csr_i. So we do not change the default - * divider. - */ - if (!(priv->clk_csr & MAC_CSR_H_FRQ_MASK)) { - if (clk_rate < CSR_F_35M) - priv->clk_csr = STMMAC_CSR_20_35M; - else if ((clk_rate >= CSR_F_35M) && (clk_rate < CSR_F_60M)) - priv->clk_csr = STMMAC_CSR_35_60M; - else if ((clk_rate >= CSR_F_60M) && (clk_rate < CSR_F_100M)) - priv->clk_csr = STMMAC_CSR_60_100M; - else if ((clk_rate >= CSR_F_100M) && (clk_rate < CSR_F_150M)) - priv->clk_csr = STMMAC_CSR_100_150M; - else if ((clk_rate >= CSR_F_150M) && (clk_rate < CSR_F_250M)) - priv->clk_csr = STMMAC_CSR_150_250M; - else if ((clk_rate >= CSR_F_250M) && (clk_rate <= CSR_F_300M)) - priv->clk_csr = STMMAC_CSR_250_300M; - else if ((clk_rate >= CSR_F_300M) && (clk_rate < CSR_F_500M)) - priv->clk_csr = STMMAC_CSR_300_500M; - else if ((clk_rate >= CSR_F_500M) && (clk_rate < CSR_F_800M)) - priv->clk_csr = STMMAC_CSR_500_800M; - } - - if (priv->plat->flags & STMMAC_FLAG_HAS_SUN8I) { - if (clk_rate > 160000000) - priv->clk_csr = 0x03; - else if (clk_rate > 80000000) - priv->clk_csr = 0x02; - else if (clk_rate > 40000000) - priv->clk_csr = 0x01; - else - priv->clk_csr = 0; - } - - if (priv->plat->has_xgmac) { - if (clk_rate > 400000000) - priv->clk_csr = 0x5; - else if (clk_rate > 350000000) - priv->clk_csr = 0x4; - else if (clk_rate > 300000000) - priv->clk_csr = 0x3; - else if (clk_rate > 250000000) - priv->clk_csr = 0x2; - else if (clk_rate > 150000000) - priv->clk_csr = 0x1; - else - priv->clk_csr = 0x0; - } -} - static void print_pkt(unsigned char *buf, int len) { pr_debug("len = %d byte, buf addr: 0x%p\n", len, buf); @@ -795,16 +693,14 @@ static int stmmac_hwtstamp_get(struct net_device *dev, * Will be rerun after resuming from suspend, case in which the timestamping * flags updated by stmmac_hwtstamp_set() also need to be restored. */ -int stmmac_init_tstamp_counter(struct stmmac_priv *priv, u32 systime_flags) +static int stmmac_init_tstamp_counter(struct stmmac_priv *priv, + u32 systime_flags) { bool xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac; struct timespec64 now; u32 sec_inc = 0; u64 temp = 0; - if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp)) - return -EOPNOTSUPP; - if (!priv->plat->clk_ptp_rate) { netdev_err(priv->dev, "Invalid PTP clock rate"); return -EINVAL; @@ -839,16 +735,15 @@ int stmmac_init_tstamp_counter(struct stmmac_priv *priv, u32 systime_flags) return 0; } -EXPORT_SYMBOL_GPL(stmmac_init_tstamp_counter); /** - * stmmac_init_ptp - init PTP + * stmmac_init_timestamping - initialise timestamping * @priv: driver private structure * Description: this is to verify if the HW supports the PTPv1 or PTPv2. * This is done by looking at the HW cap. register. * This function also registers the ptp driver. */ -static int stmmac_init_ptp(struct stmmac_priv *priv) +static int stmmac_init_timestamping(struct stmmac_priv *priv) { bool xmac = priv->plat->has_gmac4 || priv->plat->has_xgmac; int ret; @@ -856,9 +751,16 @@ static int stmmac_init_ptp(struct stmmac_priv *priv) if (priv->plat->ptp_clk_freq_config) priv->plat->ptp_clk_freq_config(priv); + if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp)) { + netdev_info(priv->dev, "PTP not supported by HW\n"); + return -EOPNOTSUPP; + } + ret = stmmac_init_tstamp_counter(priv, STMMAC_HWTS_ACTIVE); - if (ret) + if (ret) { + netdev_warn(priv->dev, "PTP init failed\n"); return ret; + } priv->adv_ts = 0; /* Check if adv_ts can be enabled for dwmac 4.x / xgmac core */ @@ -884,10 +786,24 @@ static int stmmac_init_ptp(struct stmmac_priv *priv) return 0; } +static void stmmac_setup_ptp(struct stmmac_priv *priv) +{ + int ret; + + ret = clk_prepare_enable(priv->plat->clk_ptp_ref); + if (ret < 0) + netdev_warn(priv->dev, + "failed to enable PTP reference clock: %pe\n", + ERR_PTR(ret)); + + if (stmmac_init_timestamping(priv) == 0) + stmmac_ptp_register(priv); +} + static void stmmac_release_ptp(struct stmmac_priv *priv) { - clk_disable_unprepare(priv->plat->clk_ptp_ref); stmmac_ptp_unregister(priv); + clk_disable_unprepare(priv->plat->clk_ptp_ref); } /** @@ -1169,7 +1085,7 @@ static const struct phylink_mac_ops stmmac_phylink_mac_ops = { */ static void stmmac_check_pcs_mode(struct stmmac_priv *priv) { - int interface = priv->plat->mac_interface; + int interface = priv->plat->phy_interface; if (priv->dma_cap.pcs) { if ((interface == PHY_INTERFACE_MODE_RGMII) || @@ -1196,13 +1112,19 @@ static void stmmac_check_pcs_mode(struct stmmac_priv *priv) static int stmmac_init_phy(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); + int mode = priv->plat->phy_interface; struct fwnode_handle *phy_fwnode; struct fwnode_handle *fwnode; + struct ethtool_keee eee; int ret; if (!phylink_expects_phy(priv->phylink)) return 0; + if (priv->hw->xpcs && + xpcs_get_an_mode(priv->hw->xpcs, mode) == DW_AN_C73) + return 0; + fwnode = priv->plat->port_node; if (!fwnode) fwnode = dev_fwnode(priv->device); @@ -1236,19 +1158,20 @@ static int stmmac_init_phy(struct net_device *dev) ret = phylink_fwnode_phy_connect(priv->phylink, fwnode, 0); } - if (ret == 0) { - struct ethtool_keee eee; + if (ret) { + netdev_err(priv->dev, "cannot attach to PHY (error: %pe)\n", + ERR_PTR(ret)); + return ret; + } - /* Configure phylib's copy of the LPI timer. Normally, - * phylink_config.lpi_timer_default would do this, but there is - * a chance that userspace could change the eee_timer setting - * via sysfs before the first open. Thus, preserve existing - * behaviour. - */ - if (!phylink_ethtool_get_eee(priv->phylink, &eee)) { - eee.tx_lpi_timer = priv->tx_lpi_timer; - phylink_ethtool_set_eee(priv->phylink, &eee); - } + /* Configure phylib's copy of the LPI timer. Normally, + * phylink_config.lpi_timer_default would do this, but there is a + * chance that userspace could change the eee_timer setting via sysfs + * before the first open. Thus, preserve existing behaviour. + */ + if (!phylink_ethtool_get_eee(priv->phylink, &eee)) { + eee.tx_lpi_timer = priv->tx_lpi_timer; + phylink_ethtool_set_eee(priv->phylink, &eee); } if (!priv->plat->pmt) { @@ -1259,7 +1182,7 @@ static int stmmac_init_phy(struct net_device *dev) device_set_wakeup_enable(priv->device, !!wol.wolopts); } - return ret; + return 0; } static int stmmac_phy_setup(struct stmmac_priv *priv) @@ -2584,6 +2507,7 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget) struct netdev_queue *nq = netdev_get_tx_queue(priv->dev, queue); struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue]; struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[queue]; + bool csum = !priv->plat->tx_queues_cfg[queue].coe_unsupported; struct xsk_buff_pool *pool = tx_q->xsk_pool; unsigned int entry = tx_q->cur_tx; struct dma_desc *tx_desc = NULL; @@ -2671,7 +2595,7 @@ static bool stmmac_xdp_xmit_zc(struct stmmac_priv *priv, u32 queue, u32 budget) } stmmac_prepare_tx_desc(priv, tx_desc, 1, xdp_desc.len, - true, priv->mode, true, true, + csum, priv->mode, true, true, xdp_desc.len); stmmac_enable_dma_transmission(priv, priv->ioaddr, queue); @@ -3476,7 +3400,6 @@ static void stmmac_safety_feat_configuration(struct stmmac_priv *priv) /** * stmmac_hw_setup - setup mac in a usable state. * @dev : pointer to the device structure. - * @ptp_register: register PTP if set * Description: * this is the main function to setup the HW in a usable state because the * dma engine is reset, the core registers are configured (e.g. AXI, @@ -3486,7 +3409,7 @@ static void stmmac_safety_feat_configuration(struct stmmac_priv *priv) * 0 on success and an appropriate (-)ve integer as defined in errno.h * file on failure. */ -static int stmmac_hw_setup(struct net_device *dev, bool ptp_register) +static int stmmac_hw_setup(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); u32 rx_cnt = priv->plat->rx_queues_to_use; @@ -3557,22 +3480,6 @@ static int stmmac_hw_setup(struct net_device *dev, bool ptp_register) stmmac_mmc_setup(priv); - if (ptp_register) { - ret = clk_prepare_enable(priv->plat->clk_ptp_ref); - if (ret < 0) - netdev_warn(priv->dev, - "failed to enable PTP reference clock: %pe\n", - ERR_PTR(ret)); - } - - ret = stmmac_init_ptp(priv); - if (ret == -EOPNOTSUPP) - netdev_info(priv->dev, "PTP not supported by HW\n"); - else if (ret) - netdev_warn(priv->dev, "PTP init failed\n"); - else if (ptp_register) - stmmac_ptp_register(priv); - if (priv->use_riwt) { u32 queue; @@ -3636,13 +3543,6 @@ static int stmmac_hw_setup(struct net_device *dev, bool ptp_register) return 0; } -static void stmmac_hw_teardown(struct net_device *dev) -{ - struct stmmac_priv *priv = netdev_priv(dev); - - clk_disable_unprepare(priv->plat->clk_ptp_ref); -} - static void stmmac_free_irq(struct net_device *dev, enum request_irq_err irq_err, int irq_idx) { @@ -3724,7 +3624,6 @@ static int stmmac_request_irq_multi_msi(struct net_device *dev) /* Request the Wake IRQ in case of another line * is used for WoL */ - priv->wol_irq_disabled = true; if (priv->wol_irq > 0 && priv->wol_irq != dev->irq) { int_name = priv->int_name_wol; sprintf(int_name, "%s:%s", dev->name, "wol"); @@ -3885,7 +3784,6 @@ static int stmmac_request_irq_single(struct net_device *dev) /* Request the Wake IRQ in case of another line * is used for WoL */ - priv->wol_irq_disabled = true; if (priv->wol_irq > 0 && priv->wol_irq != dev->irq) { ret = request_irq(priv->wol_irq, stmmac_interrupt, IRQF_SHARED, dev->name, dev); @@ -4034,29 +3932,9 @@ static int __stmmac_open(struct net_device *dev, struct stmmac_dma_conf *dma_conf) { struct stmmac_priv *priv = netdev_priv(dev); - int mode = priv->plat->phy_interface; u32 chan; int ret; - /* Initialise the tx lpi timer, converting from msec to usec */ - if (!priv->tx_lpi_timer) - priv->tx_lpi_timer = eee_timer * 1000; - - ret = pm_runtime_resume_and_get(priv->device); - if (ret < 0) - return ret; - - if ((!priv->hw->xpcs || - xpcs_get_an_mode(priv->hw->xpcs, mode) != DW_AN_C73)) { - ret = stmmac_init_phy(dev); - if (ret) { - netdev_err(priv->dev, - "%s: Cannot attach to PHY (error: %d)\n", - __func__, ret); - goto init_phy_error; - } - } - for (int i = 0; i < MTL_MAX_TX_QUEUES; i++) if (priv->dma_conf.tx_queue[i].tbs & STMMAC_TBS_EN) dma_conf->tx_queue[i].tbs = priv->dma_conf.tx_queue[i].tbs; @@ -4074,12 +3952,14 @@ static int __stmmac_open(struct net_device *dev, } } - ret = stmmac_hw_setup(dev, true); + ret = stmmac_hw_setup(dev); if (ret < 0) { netdev_err(priv->dev, "%s: Hw setup failed\n", __func__); goto init_error; } + stmmac_setup_ptp(priv); + stmmac_init_coalesce(priv); phylink_start(priv->phylink); @@ -4102,11 +3982,8 @@ irq_error: for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++) hrtimer_cancel(&priv->dma_conf.tx_queue[chan].txtimer); - stmmac_hw_teardown(dev); + stmmac_release_ptp(priv); init_error: - phylink_disconnect_phy(priv->phylink); -init_phy_error: - pm_runtime_put(priv->device); return ret; } @@ -4116,34 +3993,54 @@ static int stmmac_open(struct net_device *dev) struct stmmac_dma_conf *dma_conf; int ret; + /* Initialise the tx lpi timer, converting from msec to usec */ + if (!priv->tx_lpi_timer) + priv->tx_lpi_timer = eee_timer * 1000; + dma_conf = stmmac_setup_dma_desc(priv, dev->mtu); if (IS_ERR(dma_conf)) return PTR_ERR(dma_conf); + ret = pm_runtime_resume_and_get(priv->device); + if (ret < 0) + goto err_dma_resources; + + ret = stmmac_init_phy(dev); + if (ret) + goto err_runtime_pm; + ret = __stmmac_open(dev, dma_conf); if (ret) - free_dma_desc_resources(priv, dma_conf); + goto err_disconnect_phy; kfree(dma_conf); + + return ret; + +err_disconnect_phy: + phylink_disconnect_phy(priv->phylink); +err_runtime_pm: + pm_runtime_put(priv->device); +err_dma_resources: + free_dma_desc_resources(priv, dma_conf); + kfree(dma_conf); return ret; } -/** - * stmmac_release - close entry point of the driver - * @dev : device pointer. - * Description: - * This is the stop entry point of the driver. - */ -static int stmmac_release(struct net_device *dev) +static void __stmmac_release(struct net_device *dev) { struct stmmac_priv *priv = netdev_priv(dev); u32 chan; + /* If the PHY or MAC has WoL enabled, then the PHY will not be + * suspended when phylink_stop() is called below. Set the PHY + * to its slowest speed to save power. + */ if (device_may_wakeup(priv->device)) phylink_speed_down(priv->phylink, false); + /* Stop and disconnect the PHY */ phylink_stop(priv->phylink); - phylink_disconnect_phy(priv->phylink); stmmac_disable_all_queues(priv); @@ -4169,7 +4066,21 @@ static int stmmac_release(struct net_device *dev) if (stmmac_fpe_supported(priv)) ethtool_mmsv_stop(&priv->fpe_cfg.mmsv); +} + +/** + * stmmac_release - close entry point of the driver + * @dev : device pointer. + * Description: + * This is the stop entry point of the driver. + */ +static int stmmac_release(struct net_device *dev) +{ + struct stmmac_priv *priv = netdev_priv(dev); + + __stmmac_release(dev); + phylink_disconnect_phy(priv->phylink); pm_runtime_put(priv->device); return 0; @@ -4983,6 +4894,7 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue, { struct stmmac_txq_stats *txq_stats = &priv->xstats.txq_stats[queue]; struct stmmac_tx_queue *tx_q = &priv->dma_conf.tx_queue[queue]; + bool csum = !priv->plat->tx_queues_cfg[queue].coe_unsupported; unsigned int entry = tx_q->cur_tx; struct dma_desc *tx_desc; dma_addr_t dma_addr; @@ -5034,7 +4946,7 @@ static int stmmac_xdp_xmit_xdpf(struct stmmac_priv *priv, int queue, stmmac_set_desc_addr(priv, tx_desc, dma_addr); stmmac_prepare_tx_desc(priv, tx_desc, 1, xdpf->len, - true, priv->mode, true, true, + csum, priv->mode, true, true, xdpf->len); tx_q->tx_count_frames++; @@ -5733,7 +5645,8 @@ drain_data: skb->protocol = eth_type_trans(skb, priv->dev); - if (unlikely(!coe) || !stmmac_has_ip_ethertype(skb)) + if (unlikely(!coe) || !stmmac_has_ip_ethertype(skb) || + (status & csum_none)) skb_checksum_none_assert(skb); else skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -5965,7 +5878,7 @@ static int stmmac_change_mtu(struct net_device *dev, int new_mtu) return PTR_ERR(dma_conf); } - stmmac_release(dev); + __stmmac_release(dev); ret = __stmmac_open(dev, dma_conf); if (ret) { @@ -7055,7 +6968,6 @@ irq_error: for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++) hrtimer_cancel(&priv->dma_conf.tx_queue[chan].txtimer); - stmmac_hw_teardown(dev); init_error: free_dma_desc_resources(priv, &priv->dma_conf); dma_desc_error: @@ -7240,7 +7152,6 @@ static int stmmac_hw_init(struct stmmac_priv *priv) priv->plat->enh_desc = priv->dma_cap.enh_desc; priv->plat->pmt = priv->dma_cap.pmt_remote_wake_up && !(priv->plat->flags & STMMAC_FLAG_USE_PHY_WOL); - priv->hw->pmt = priv->plat->pmt; if (priv->dma_cap.hash_tb_sz) { priv->hw->multicast_filter_bins = (BIT(priv->dma_cap.hash_tb_sz) << 5); @@ -7278,6 +7189,7 @@ static int stmmac_hw_init(struct stmmac_priv *priv) if (priv->plat->pmt) { dev_info(priv->device, "Wake-Up On Lan supported\n"); device_set_wakeup_capable(priv->device, 1); + devm_pm_set_wake_irq(priv->device, priv->wol_irq); } if (priv->dma_cap.tsoen) @@ -7710,17 +7622,6 @@ int stmmac_dvr_probe(struct device *device, stmmac_fpe_init(priv); - /* If a specific clk_csr value is passed from the platform - * this means that the CSR Clock Range selection cannot be - * changed at run-time and it is fixed. Viceversa the driver'll try to - * set the MDC clock dynamically according to the csr actual - * clock input. - */ - if (priv->plat->clk_csr >= 0) - priv->clk_csr = priv->plat->clk_csr; - else - stmmac_clk_csr_set(priv); - stmmac_check_pcs_mode(priv); pm_runtime_get_noresume(device); @@ -7858,7 +7759,7 @@ int stmmac_suspend(struct device *dev) priv->plat->serdes_powerdown(ndev, priv->plat->bsp_priv); /* Enable Power down mode by programming the PMT regs */ - if (device_may_wakeup(priv->device) && priv->plat->pmt) { + if (stmmac_wol_enabled_mac(priv)) { stmmac_pmt(priv, priv->hw, priv->wolopts); priv->irq_wake = 1; } else { @@ -7869,16 +7770,18 @@ int stmmac_suspend(struct device *dev) mutex_unlock(&priv->lock); rtnl_lock(); - if (device_may_wakeup(priv->device) && !priv->plat->pmt) + if (stmmac_wol_enabled_phy(priv)) phylink_speed_down(priv->phylink, false); - phylink_suspend(priv->phylink, - device_may_wakeup(priv->device) && priv->plat->pmt); + phylink_suspend(priv->phylink, stmmac_wol_enabled_mac(priv)); rtnl_unlock(); if (stmmac_fpe_supported(priv)) ethtool_mmsv_stop(&priv->fpe_cfg.mmsv); + if (priv->plat->suspend) + return priv->plat->suspend(dev, priv->plat->bsp_priv); + return 0; } EXPORT_SYMBOL_GPL(stmmac_suspend); @@ -7931,6 +7834,12 @@ int stmmac_resume(struct device *dev) struct stmmac_priv *priv = netdev_priv(ndev); int ret; + if (priv->plat->resume) { + ret = priv->plat->resume(dev, priv->plat->bsp_priv); + if (ret) + return ret; + } + if (!netif_running(ndev)) return 0; @@ -7940,7 +7849,7 @@ int stmmac_resume(struct device *dev) * this bit because it can generate problems while resuming * from another devices (e.g. serial console). */ - if (device_may_wakeup(priv->device) && priv->plat->pmt) { + if (stmmac_wol_enabled_mac(priv)) { mutex_lock(&priv->lock); stmmac_pmt(priv, priv->hw, 0); mutex_unlock(&priv->lock); @@ -7975,7 +7884,16 @@ int stmmac_resume(struct device *dev) stmmac_free_tx_skbufs(priv); stmmac_clear_descriptors(priv, &priv->dma_conf); - stmmac_hw_setup(ndev, false); + ret = stmmac_hw_setup(ndev); + if (ret < 0) { + netdev_err(priv->dev, "%s: Hw setup failed\n", __func__); + mutex_unlock(&priv->lock); + rtnl_unlock(); + return ret; + } + + stmmac_init_timestamping(priv); + stmmac_init_coalesce(priv); phylink_rx_clk_stop_block(priv->phylink); stmmac_set_rx_mode(ndev); @@ -7993,7 +7911,7 @@ int stmmac_resume(struct device *dev) * workqueue thread, which will race with initialisation. */ phylink_resume(priv->phylink); - if (device_may_wakeup(priv->device) && !priv->plat->pmt) + if (stmmac_wol_enabled_phy(priv)) phylink_speed_up(priv->phylink); rtnl_unlock(); @@ -8004,6 +7922,10 @@ int stmmac_resume(struct device *dev) } EXPORT_SYMBOL_GPL(stmmac_resume); +/* This is not the same as EXPORT_GPL_SIMPLE_DEV_PM_OPS() when CONFIG_PM=n */ +DEFINE_SIMPLE_DEV_PM_OPS(stmmac_simple_pm_ops, stmmac_suspend, stmmac_resume); +EXPORT_SYMBOL_GPL(stmmac_simple_pm_ops); + #ifndef MODULE static int __init stmmac_cmdline_opt(char *str) { diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c index 836f2848dfeb..f408737f6fc7 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c @@ -23,9 +23,9 @@ #include "dwxgmac2.h" #include "stmmac.h" -#define MII_BUSY 0x00000001 -#define MII_WRITE 0x00000002 -#define MII_DATA_MASK GENMASK(15, 0) +#define MII_ADDR_GBUSY BIT(0) +#define MII_ADDR_GWRITE BIT(1) +#define MII_DATA_GD_MASK GENMASK(15, 0) /* GMAC4 defines */ #define MII_GMAC4_GOC_SHIFT 2 @@ -45,6 +45,16 @@ #define MII_XGMAC_PA_SHIFT 16 #define MII_XGMAC_DA_SHIFT 21 +static int stmmac_mdio_wait(void __iomem *reg, u32 mask) +{ + u32 v; + + if (readl_poll_timeout(reg, v, !(v & mask), 100, 10000)) + return -EBUSY; + + return 0; +} + static void stmmac_xgmac2_c45_format(struct stmmac_priv *priv, int phyaddr, int devad, int phyreg, u32 *hw_addr) { @@ -83,7 +93,6 @@ static int stmmac_xgmac2_mdio_read(struct stmmac_priv *priv, u32 addr, { unsigned int mii_address = priv->hw->mii.addr; unsigned int mii_data = priv->hw->mii.data; - u32 tmp; int ret; ret = pm_runtime_resume_and_get(priv->device); @@ -91,33 +100,25 @@ static int stmmac_xgmac2_mdio_read(struct stmmac_priv *priv, u32 addr, return ret; /* Wait until any existing MII operation is complete */ - if (readl_poll_timeout(priv->ioaddr + mii_data, tmp, - !(tmp & MII_XGMAC_BUSY), 100, 10000)) { - ret = -EBUSY; + ret = stmmac_mdio_wait(priv->ioaddr + mii_data, MII_XGMAC_BUSY); + if (ret) goto err_disable_clks; - } - value |= (priv->clk_csr << priv->hw->mii.clk_csr_shift) - & priv->hw->mii.clk_csr_mask; - value |= MII_XGMAC_READ; + value |= priv->gmii_address_bus_config | MII_XGMAC_READ; /* Wait until any existing MII operation is complete */ - if (readl_poll_timeout(priv->ioaddr + mii_data, tmp, - !(tmp & MII_XGMAC_BUSY), 100, 10000)) { - ret = -EBUSY; + ret = stmmac_mdio_wait(priv->ioaddr + mii_data, MII_XGMAC_BUSY); + if (ret) goto err_disable_clks; - } /* Set the MII address register to read */ writel(addr, priv->ioaddr + mii_address); writel(value, priv->ioaddr + mii_data); /* Wait until any existing MII operation is complete */ - if (readl_poll_timeout(priv->ioaddr + mii_data, tmp, - !(tmp & MII_XGMAC_BUSY), 100, 10000)) { - ret = -EBUSY; + ret = stmmac_mdio_wait(priv->ioaddr + mii_data, MII_XGMAC_BUSY); + if (ret) goto err_disable_clks; - } /* Read the data from the MII data register */ ret = (int)readl(priv->ioaddr + mii_data) & GENMASK(15, 0); @@ -131,12 +132,9 @@ err_disable_clks: static int stmmac_xgmac2_mdio_read_c22(struct mii_bus *bus, int phyaddr, int phyreg) { - struct net_device *ndev = bus->priv; - struct stmmac_priv *priv; + struct stmmac_priv *priv = netdev_priv(bus->priv); u32 addr; - priv = netdev_priv(ndev); - /* Until ver 2.20 XGMAC does not support C22 addr >= 4 */ if (priv->synopsys_id < DWXGMAC_CORE_2_20 && phyaddr > MII_XGMAC_MAX_C22ADDR) @@ -150,12 +148,9 @@ static int stmmac_xgmac2_mdio_read_c22(struct mii_bus *bus, int phyaddr, static int stmmac_xgmac2_mdio_read_c45(struct mii_bus *bus, int phyaddr, int devad, int phyreg) { - struct net_device *ndev = bus->priv; - struct stmmac_priv *priv; + struct stmmac_priv *priv = netdev_priv(bus->priv); u32 addr; - priv = netdev_priv(ndev); - stmmac_xgmac2_c45_format(priv, phyaddr, devad, phyreg, &addr); return stmmac_xgmac2_mdio_read(priv, addr, MII_XGMAC_BUSY); @@ -166,7 +161,6 @@ static int stmmac_xgmac2_mdio_write(struct stmmac_priv *priv, u32 addr, { unsigned int mii_address = priv->hw->mii.addr; unsigned int mii_data = priv->hw->mii.data; - u32 tmp; int ret; ret = pm_runtime_resume_and_get(priv->device); @@ -174,31 +168,23 @@ static int stmmac_xgmac2_mdio_write(struct stmmac_priv *priv, u32 addr, return ret; /* Wait until any existing MII operation is complete */ - if (readl_poll_timeout(priv->ioaddr + mii_data, tmp, - !(tmp & MII_XGMAC_BUSY), 100, 10000)) { - ret = -EBUSY; + ret = stmmac_mdio_wait(priv->ioaddr + mii_data, MII_XGMAC_BUSY); + if (ret) goto err_disable_clks; - } - value |= (priv->clk_csr << priv->hw->mii.clk_csr_shift) - & priv->hw->mii.clk_csr_mask; - value |= phydata; - value |= MII_XGMAC_WRITE; + value |= priv->gmii_address_bus_config | phydata | MII_XGMAC_WRITE; /* Wait until any existing MII operation is complete */ - if (readl_poll_timeout(priv->ioaddr + mii_data, tmp, - !(tmp & MII_XGMAC_BUSY), 100, 10000)) { - ret = -EBUSY; + ret = stmmac_mdio_wait(priv->ioaddr + mii_data, MII_XGMAC_BUSY); + if (ret) goto err_disable_clks; - } /* Set the MII address register to write */ writel(addr, priv->ioaddr + mii_address); writel(value, priv->ioaddr + mii_data); /* Wait until any existing MII operation is complete */ - ret = readl_poll_timeout(priv->ioaddr + mii_data, tmp, - !(tmp & MII_XGMAC_BUSY), 100, 10000); + ret = stmmac_mdio_wait(priv->ioaddr + mii_data, MII_XGMAC_BUSY); err_disable_clks: pm_runtime_put(priv->device); @@ -209,12 +195,9 @@ err_disable_clks: static int stmmac_xgmac2_mdio_write_c22(struct mii_bus *bus, int phyaddr, int phyreg, u16 phydata) { - struct net_device *ndev = bus->priv; - struct stmmac_priv *priv; + struct stmmac_priv *priv = netdev_priv(bus->priv); u32 addr; - priv = netdev_priv(ndev); - /* Until ver 2.20 XGMAC does not support C22 addr >= 4 */ if (priv->synopsys_id < DWXGMAC_CORE_2_20 && phyaddr > MII_XGMAC_MAX_C22ADDR) @@ -229,37 +212,78 @@ static int stmmac_xgmac2_mdio_write_c22(struct mii_bus *bus, int phyaddr, static int stmmac_xgmac2_mdio_write_c45(struct mii_bus *bus, int phyaddr, int devad, int phyreg, u16 phydata) { - struct net_device *ndev = bus->priv; - struct stmmac_priv *priv; + struct stmmac_priv *priv = netdev_priv(bus->priv); u32 addr; - priv = netdev_priv(ndev); - stmmac_xgmac2_c45_format(priv, phyaddr, devad, phyreg, &addr); return stmmac_xgmac2_mdio_write(priv, addr, MII_XGMAC_BUSY, phydata); } -static int stmmac_mdio_read(struct stmmac_priv *priv, int data, u32 value) +/** + * stmmac_mdio_format_addr() - format the address register + * @priv: struct stmmac_priv pointer + * @pa: 5-bit MDIO package address + * @gr: 5-bit MDIO register address (C22) or MDIO device address (C45) + * + * Return: formatted address register + */ +static u32 stmmac_mdio_format_addr(struct stmmac_priv *priv, + unsigned int pa, unsigned int gr) { - unsigned int mii_address = priv->hw->mii.addr; - unsigned int mii_data = priv->hw->mii.data; - u32 v; + const struct mii_regs *mii_regs = &priv->hw->mii; - if (readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY), - 100, 10000)) - return -EBUSY; + return ((pa << mii_regs->addr_shift) & mii_regs->addr_mask) | + ((gr << mii_regs->reg_shift) & mii_regs->reg_mask) | + priv->gmii_address_bus_config | + MII_ADDR_GBUSY; +} - writel(data, priv->ioaddr + mii_data); - writel(value, priv->ioaddr + mii_address); +static int stmmac_mdio_access(struct stmmac_priv *priv, unsigned int pa, + unsigned int gr, u32 cmd, u32 data, bool read) +{ + void __iomem *mii_address = priv->ioaddr + priv->hw->mii.addr; + void __iomem *mii_data = priv->ioaddr + priv->hw->mii.data; + u32 addr; + int ret; - if (readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY), - 100, 10000)) - return -EBUSY; + ret = pm_runtime_resume_and_get(priv->device); + if (ret < 0) + return ret; - /* Read the data from the MII data register */ - return readl(priv->ioaddr + mii_data) & MII_DATA_MASK; + ret = stmmac_mdio_wait(mii_address, MII_ADDR_GBUSY); + if (ret) + goto out; + + addr = stmmac_mdio_format_addr(priv, pa, gr) | cmd; + + writel(data, mii_data); + writel(addr, mii_address); + + ret = stmmac_mdio_wait(mii_address, MII_ADDR_GBUSY); + if (ret) + goto out; + + /* Read the data from the MII data register if in read mode */ + ret = read ? readl(mii_data) & MII_DATA_GD_MASK : 0; + +out: + pm_runtime_put(priv->device); + + return ret; +} + +static int stmmac_mdio_read(struct stmmac_priv *priv, unsigned int pa, + unsigned int gr, u32 cmd, int data) +{ + return stmmac_mdio_access(priv, pa, gr, cmd, data, true); +} + +static int stmmac_mdio_write(struct stmmac_priv *priv, unsigned int pa, + unsigned int gr, u32 cmd, int data) +{ + return stmmac_mdio_access(priv, pa, gr, cmd, data, false); } /** @@ -274,28 +298,15 @@ static int stmmac_mdio_read(struct stmmac_priv *priv, int data, u32 value) */ static int stmmac_mdio_read_c22(struct mii_bus *bus, int phyaddr, int phyreg) { - struct net_device *ndev = bus->priv; - struct stmmac_priv *priv = netdev_priv(ndev); - u32 value = MII_BUSY; - int data = 0; - - data = pm_runtime_resume_and_get(priv->device); - if (data < 0) - return data; - - value |= (phyaddr << priv->hw->mii.addr_shift) - & priv->hw->mii.addr_mask; - value |= (phyreg << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask; - value |= (priv->clk_csr << priv->hw->mii.clk_csr_shift) - & priv->hw->mii.clk_csr_mask; - if (priv->plat->has_gmac4) - value |= MII_GMAC4_READ; + struct stmmac_priv *priv = netdev_priv(bus->priv); + u32 cmd; - data = stmmac_mdio_read(priv, data, value); - - pm_runtime_put(priv->device); + if (priv->plat->has_gmac4) + cmd = MII_GMAC4_READ; + else + cmd = 0; - return data; + return stmmac_mdio_read(priv, phyaddr, phyreg, cmd, 0); } /** @@ -312,54 +323,11 @@ static int stmmac_mdio_read_c22(struct mii_bus *bus, int phyaddr, int phyreg) static int stmmac_mdio_read_c45(struct mii_bus *bus, int phyaddr, int devad, int phyreg) { - struct net_device *ndev = bus->priv; - struct stmmac_priv *priv = netdev_priv(ndev); - u32 value = MII_BUSY; - int data = 0; - - data = pm_runtime_get_sync(priv->device); - if (data < 0) { - pm_runtime_put_noidle(priv->device); - return data; - } - - value |= (phyaddr << priv->hw->mii.addr_shift) - & priv->hw->mii.addr_mask; - value |= (phyreg << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask; - value |= (priv->clk_csr << priv->hw->mii.clk_csr_shift) - & priv->hw->mii.clk_csr_mask; - value |= MII_GMAC4_READ; - value |= MII_GMAC4_C45E; - value &= ~priv->hw->mii.reg_mask; - value |= (devad << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask; + struct stmmac_priv *priv = netdev_priv(bus->priv); + int data = phyreg << MII_GMAC4_REG_ADDR_SHIFT; + u32 cmd = MII_GMAC4_READ | MII_GMAC4_C45E; - data |= phyreg << MII_GMAC4_REG_ADDR_SHIFT; - - data = stmmac_mdio_read(priv, data, value); - - pm_runtime_put(priv->device); - - return data; -} - -static int stmmac_mdio_write(struct stmmac_priv *priv, int data, u32 value) -{ - unsigned int mii_address = priv->hw->mii.addr; - unsigned int mii_data = priv->hw->mii.data; - u32 v; - - /* Wait until any existing MII operation is complete */ - if (readl_poll_timeout(priv->ioaddr + mii_address, v, !(v & MII_BUSY), - 100, 10000)) - return -EBUSY; - - /* Set the MII address register to write */ - writel(data, priv->ioaddr + mii_data); - writel(value, priv->ioaddr + mii_address); - - /* Wait until any existing MII operation is complete */ - return readl_poll_timeout(priv->ioaddr + mii_address, v, - !(v & MII_BUSY), 100, 10000); + return stmmac_mdio_read(priv, phyaddr, devad, cmd, data); } /** @@ -373,31 +341,15 @@ static int stmmac_mdio_write(struct stmmac_priv *priv, int data, u32 value) static int stmmac_mdio_write_c22(struct mii_bus *bus, int phyaddr, int phyreg, u16 phydata) { - struct net_device *ndev = bus->priv; - struct stmmac_priv *priv = netdev_priv(ndev); - int ret, data = phydata; - u32 value = MII_BUSY; - - ret = pm_runtime_resume_and_get(priv->device); - if (ret < 0) - return ret; - - value |= (phyaddr << priv->hw->mii.addr_shift) - & priv->hw->mii.addr_mask; - value |= (phyreg << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask; + struct stmmac_priv *priv = netdev_priv(bus->priv); + u32 cmd; - value |= (priv->clk_csr << priv->hw->mii.clk_csr_shift) - & priv->hw->mii.clk_csr_mask; if (priv->plat->has_gmac4) - value |= MII_GMAC4_WRITE; + cmd = MII_GMAC4_WRITE; else - value |= MII_WRITE; + cmd = MII_ADDR_GWRITE; - ret = stmmac_mdio_write(priv, data, value); - - pm_runtime_put(priv->device); - - return ret; + return stmmac_mdio_write(priv, phyaddr, phyreg, cmd, phydata); } /** @@ -412,36 +364,13 @@ static int stmmac_mdio_write_c22(struct mii_bus *bus, int phyaddr, int phyreg, static int stmmac_mdio_write_c45(struct mii_bus *bus, int phyaddr, int devad, int phyreg, u16 phydata) { - struct net_device *ndev = bus->priv; - struct stmmac_priv *priv = netdev_priv(ndev); - int ret, data = phydata; - u32 value = MII_BUSY; - - ret = pm_runtime_get_sync(priv->device); - if (ret < 0) { - pm_runtime_put_noidle(priv->device); - return ret; - } - - value |= (phyaddr << priv->hw->mii.addr_shift) - & priv->hw->mii.addr_mask; - value |= (phyreg << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask; - - value |= (priv->clk_csr << priv->hw->mii.clk_csr_shift) - & priv->hw->mii.clk_csr_mask; - - value |= MII_GMAC4_WRITE; - value |= MII_GMAC4_C45E; - value &= ~priv->hw->mii.reg_mask; - value |= (devad << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask; + struct stmmac_priv *priv = netdev_priv(bus->priv); + u32 cmd = MII_GMAC4_WRITE | MII_GMAC4_C45E; + int data = phydata; data |= phyreg << MII_GMAC4_REG_ADDR_SHIFT; - ret = stmmac_mdio_write(priv, data, value); - - pm_runtime_put(priv->device); - - return ret; + return stmmac_mdio_write(priv, phyaddr, devad, cmd, data); } /** @@ -452,8 +381,7 @@ static int stmmac_mdio_write_c45(struct mii_bus *bus, int phyaddr, int stmmac_mdio_reset(struct mii_bus *bus) { #if IS_ENABLED(CONFIG_STMMAC_PLATFORM) - struct net_device *ndev = bus->priv; - struct stmmac_priv *priv = netdev_priv(ndev); + struct stmmac_priv *priv = netdev_priv(bus->priv); unsigned int mii_address = priv->hw->mii.addr; #ifdef CONFIG_OF @@ -497,12 +425,11 @@ int stmmac_mdio_reset(struct mii_bus *bus) int stmmac_pcs_setup(struct net_device *ndev) { + struct stmmac_priv *priv = netdev_priv(ndev); struct fwnode_handle *devnode, *pcsnode; struct dw_xpcs *xpcs = NULL; - struct stmmac_priv *priv; int addr, ret; - priv = netdev_priv(ndev); devnode = priv->plat->port_node; if (priv->plat->pcs_init) { @@ -547,6 +474,102 @@ void stmmac_pcs_clean(struct net_device *ndev) } /** + * stmmac_clk_csr_set - dynamically set the MDC clock + * @priv: driver private structure + * Description: this is to dynamically set the MDC clock according to the csr + * clock input. + * Return: MII register CR field value + * Note: + * If a specific clk_csr value is passed from the platform + * this means that the CSR Clock Range selection cannot be + * changed at run-time and it is fixed (as reported in the driver + * documentation). Viceversa the driver will try to set the MDC + * clock dynamically according to the actual clock input. + */ +static u32 stmmac_clk_csr_set(struct stmmac_priv *priv) +{ + unsigned long clk_rate; + u32 value = ~0; + + clk_rate = clk_get_rate(priv->plat->stmmac_clk); + + /* Platform provided default clk_csr would be assumed valid + * for all other cases except for the below mentioned ones. + * For values higher than the IEEE 802.3 specified frequency + * we can not estimate the proper divider as it is not known + * the frequency of clk_csr_i. So we do not change the default + * divider. + */ + if (clk_rate < CSR_F_35M) + value = STMMAC_CSR_20_35M; + else if (clk_rate < CSR_F_60M) + value = STMMAC_CSR_35_60M; + else if (clk_rate < CSR_F_100M) + value = STMMAC_CSR_60_100M; + else if (clk_rate < CSR_F_150M) + value = STMMAC_CSR_100_150M; + else if (clk_rate < CSR_F_250M) + value = STMMAC_CSR_150_250M; + else if (clk_rate <= CSR_F_300M) + value = STMMAC_CSR_250_300M; + else if (clk_rate < CSR_F_500M) + value = STMMAC_CSR_300_500M; + else if (clk_rate < CSR_F_800M) + value = STMMAC_CSR_500_800M; + + if (priv->plat->flags & STMMAC_FLAG_HAS_SUN8I) { + if (clk_rate > 160000000) + value = 0x03; + else if (clk_rate > 80000000) + value = 0x02; + else if (clk_rate > 40000000) + value = 0x01; + else + value = 0; + } + + if (priv->plat->has_xgmac) { + if (clk_rate > 400000000) + value = 0x5; + else if (clk_rate > 350000000) + value = 0x4; + else if (clk_rate > 300000000) + value = 0x3; + else if (clk_rate > 250000000) + value = 0x2; + else if (clk_rate > 150000000) + value = 0x1; + else + value = 0x0; + } + + return value; +} + +static void stmmac_mdio_bus_config(struct stmmac_priv *priv) +{ + u32 value; + + /* If a specific clk_csr value is passed from the platform, this means + * that the CSR Clock Range value should not be computed from the CSR + * clock. + */ + if (priv->plat->clk_csr >= 0) + value = priv->plat->clk_csr; + else + value = stmmac_clk_csr_set(priv); + + value <<= priv->hw->mii.clk_csr_shift; + + if (value & ~priv->hw->mii.clk_csr_mask) + dev_warn(priv->device, + "clk_csr value out of range (0x%08x exceeds mask 0x%08x), truncating\n", + value, priv->hw->mii.clk_csr_mask); + + priv->gmii_address_bus_config = value & priv->hw->mii.clk_csr_mask; +} + +/** * stmmac_mdio_register * @ndev: net device structure * Description: it registers the MII bus @@ -566,6 +589,8 @@ int stmmac_mdio_register(struct net_device *ndev) if (!mdio_bus_data) return 0; + stmmac_mdio_bus_config(priv); + new_bus = mdiobus_alloc(); if (!new_bus) return -ENOMEM; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c index 9c1b54b701f7..4e3aa611fda8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c @@ -21,7 +21,8 @@ struct stmmac_pci_info { static void common_default_data(struct plat_stmmacenet_data *plat) { - plat->clk_csr = 2; /* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */ + /* clk_csr_i = 20-35MHz & MDC = clk_csr_i/16 */ + plat->clk_csr = STMMAC_CSR_20_35M; plat->has_gmac = 1; plat->force_sf_dma_mode = 1; @@ -74,7 +75,7 @@ static int snps_gmac5_default_data(struct pci_dev *pdev, { int i; - plat->clk_csr = 5; + plat->clk_csr = STMMAC_CSR_250_300M; plat->has_gmac4 = 1; plat->force_sf_dma_mode = 1; plat->flags |= STMMAC_FLAG_TSO_EN; @@ -138,6 +139,37 @@ static const struct stmmac_pci_info snps_gmac5_pci_info = { .setup = snps_gmac5_default_data, }; +static int stmmac_pci_suspend(struct device *dev, void *bsp_priv) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int ret; + + ret = pci_save_state(pdev); + if (ret) + return ret; + + pci_disable_device(pdev); + pci_wake_from_d3(pdev, true); + return 0; +} + +static int stmmac_pci_resume(struct device *dev, void *bsp_priv) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int ret; + + pci_restore_state(pdev); + pci_set_power_state(pdev, PCI_D0); + + ret = pci_enable_device(pdev); + if (ret) + return ret; + + pci_set_master(pdev); + + return 0; +} + /** * stmmac_pci_probe * @@ -217,6 +249,9 @@ static int stmmac_pci_probe(struct pci_dev *pdev, plat->safety_feat_cfg->prtyen = 1; plat->safety_feat_cfg->tmouten = 1; + plat->suspend = stmmac_pci_suspend; + plat->resume = stmmac_pci_resume; + return stmmac_dvr_probe(&pdev->dev, plat, &res); } @@ -231,43 +266,6 @@ static void stmmac_pci_remove(struct pci_dev *pdev) stmmac_dvr_remove(&pdev->dev); } -static int __maybe_unused stmmac_pci_suspend(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - int ret; - - ret = stmmac_suspend(dev); - if (ret) - return ret; - - ret = pci_save_state(pdev); - if (ret) - return ret; - - pci_disable_device(pdev); - pci_wake_from_d3(pdev, true); - return 0; -} - -static int __maybe_unused stmmac_pci_resume(struct device *dev) -{ - struct pci_dev *pdev = to_pci_dev(dev); - int ret; - - pci_restore_state(pdev); - pci_set_power_state(pdev, PCI_D0); - - ret = pci_enable_device(pdev); - if (ret) - return ret; - - pci_set_master(pdev); - - return stmmac_resume(dev); -} - -static SIMPLE_DEV_PM_OPS(stmmac_pm_ops, stmmac_pci_suspend, stmmac_pci_resume); - /* synthetic ID, no official vendor */ #define PCI_VENDOR_ID_STMMAC 0x0700 @@ -289,7 +287,7 @@ static struct pci_driver stmmac_pci_driver = { .probe = stmmac_pci_probe, .remove = stmmac_pci_remove, .driver = { - .pm = &stmmac_pm_ops, + .pm = &stmmac_simple_pm_ops, }, }; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 030fcf1b5993..27bcaae07a7f 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -453,8 +453,12 @@ stmmac_probe_config_dt(struct platform_device *pdev, u8 *mac) return ERR_PTR(phy_mode); plat->phy_interface = phy_mode; + rc = stmmac_of_get_mac_mode(np); - plat->mac_interface = rc < 0 ? plat->phy_interface : rc; + if (rc >= 0 && rc != phy_mode) + dev_warn(&pdev->dev, + "\"mac-mode\" property used for %s but differs to \"phy-mode\" of %s, and will be ignored. Please report.\n", + phy_modes(rc), phy_modes(phy_mode)); /* Some wrapper drivers still rely on phy_node. Let's save it while * they are not converted to phylink. */ @@ -811,6 +815,22 @@ static void stmmac_pltfr_exit(struct platform_device *pdev, plat->exit(pdev, plat->bsp_priv); } +static int stmmac_plat_suspend(struct device *dev, void *bsp_priv) +{ + struct stmmac_priv *priv = netdev_priv(dev_get_drvdata(dev)); + + stmmac_pltfr_exit(to_platform_device(dev), priv->plat); + + return 0; +} + +static int stmmac_plat_resume(struct device *dev, void *bsp_priv) +{ + struct stmmac_priv *priv = netdev_priv(dev_get_drvdata(dev)); + + return stmmac_pltfr_init(to_platform_device(dev), priv->plat); +} + /** * stmmac_pltfr_probe * @pdev: platform device pointer @@ -825,6 +845,11 @@ int stmmac_pltfr_probe(struct platform_device *pdev, { int ret; + if (!plat->suspend && plat->exit) + plat->suspend = stmmac_plat_suspend; + if (!plat->resume && plat->init) + plat->resume = stmmac_plat_resume; + ret = stmmac_pltfr_init(pdev, plat); if (ret) return ret; @@ -886,45 +911,36 @@ void stmmac_pltfr_remove(struct platform_device *pdev) } EXPORT_SYMBOL_GPL(stmmac_pltfr_remove); -/** - * stmmac_pltfr_suspend - * @dev: device pointer - * Description: this function is invoked when suspend the driver and it direcly - * call the main suspend function and then, if required, on some platform, it - * can call an exit helper. - */ -static int __maybe_unused stmmac_pltfr_suspend(struct device *dev) -{ - int ret; - struct net_device *ndev = dev_get_drvdata(dev); - struct stmmac_priv *priv = netdev_priv(ndev); - struct platform_device *pdev = to_platform_device(dev); - - ret = stmmac_suspend(dev); - stmmac_pltfr_exit(pdev, priv->plat); - - return ret; -} - -/** - * stmmac_pltfr_resume - * @dev: device pointer - * Description: this function is invoked when resume the driver before calling - * the main resume function, on some platforms, it can call own init helper - * if required. - */ -static int __maybe_unused stmmac_pltfr_resume(struct device *dev) +static int stmmac_bus_clks_config(struct stmmac_priv *priv, bool enabled) { - struct net_device *ndev = dev_get_drvdata(dev); - struct stmmac_priv *priv = netdev_priv(ndev); - struct platform_device *pdev = to_platform_device(dev); + struct plat_stmmacenet_data *plat_dat = priv->plat; int ret; - ret = stmmac_pltfr_init(pdev, priv->plat); - if (ret) - return ret; + if (enabled) { + ret = clk_prepare_enable(plat_dat->stmmac_clk); + if (ret) + return ret; + ret = clk_prepare_enable(plat_dat->pclk); + if (ret) { + clk_disable_unprepare(plat_dat->stmmac_clk); + return ret; + } + if (plat_dat->clks_config) { + ret = plat_dat->clks_config(plat_dat->bsp_priv, enabled); + if (ret) { + clk_disable_unprepare(plat_dat->stmmac_clk); + clk_disable_unprepare(plat_dat->pclk); + return ret; + } + } + } else { + clk_disable_unprepare(plat_dat->stmmac_clk); + clk_disable_unprepare(plat_dat->pclk); + if (plat_dat->clks_config) + plat_dat->clks_config(plat_dat->bsp_priv, enabled); + } - return stmmac_resume(dev); + return 0; } static int __maybe_unused stmmac_runtime_suspend(struct device *dev) @@ -954,7 +970,7 @@ static int __maybe_unused stmmac_pltfr_noirq_suspend(struct device *dev) if (!netif_running(ndev)) return 0; - if (!device_may_wakeup(priv->device) || !priv->plat->pmt) { + if (!stmmac_wol_enabled_mac(priv)) { /* Disable clock in case of PWM is off */ clk_disable_unprepare(priv->plat->clk_ptp_ref); @@ -975,7 +991,7 @@ static int __maybe_unused stmmac_pltfr_noirq_resume(struct device *dev) if (!netif_running(ndev)) return 0; - if (!device_may_wakeup(priv->device) || !priv->plat->pmt) { + if (!stmmac_wol_enabled_mac(priv)) { /* enable the clk previously disabled */ ret = pm_runtime_force_resume(dev); if (ret) @@ -994,7 +1010,7 @@ static int __maybe_unused stmmac_pltfr_noirq_resume(struct device *dev) } const struct dev_pm_ops stmmac_pltfr_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(stmmac_pltfr_suspend, stmmac_pltfr_resume) + SET_SYSTEM_SLEEP_PM_OPS(stmmac_suspend, stmmac_resume) SET_RUNTIME_PM_OPS(stmmac_runtime_suspend, stmmac_runtime_resume, NULL) SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(stmmac_pltfr_noirq_suspend, stmmac_pltfr_noirq_resume) }; diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 3767ba495e78..993ff4e87e55 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -10,6 +10,8 @@ #include "stmmac.h" #include "stmmac_ptp.h" +#define PTP_SAFE_TIME_OFFSET_NS 500000 + /** * stmmac_adjust_freq * @@ -171,7 +173,11 @@ static int stmmac_enable(struct ptp_clock_info *ptp, u32 acr_value; switch (rq->type) { - case PTP_CLK_REQ_PEROUT: + case PTP_CLK_REQ_PEROUT: { + struct timespec64 curr_time; + u64 target_ns = 0; + u64 ns = 0; + /* Reject requests with unsupported flags */ if (rq->perout.flags) return -EOPNOTSUPP; @@ -180,6 +186,31 @@ static int stmmac_enable(struct ptp_clock_info *ptp, cfg->start.tv_sec = rq->perout.start.sec; cfg->start.tv_nsec = rq->perout.start.nsec; + + /* A time set in the past won't trigger the start of the flexible PPS generation for + * the GMAC5. For some reason it does for the GMAC4 but setting a time in the past + * should be addressed anyway. Therefore, any value set it the past is considered as + * an offset compared to the current MAC system time. + * Be aware that an offset too low may not trigger flexible PPS generation + * if time spent in this configuration makes the targeted time already outdated. + * To address this, add a safe time offset. + */ + if (!cfg->start.tv_sec && cfg->start.tv_nsec < PTP_SAFE_TIME_OFFSET_NS) + cfg->start.tv_nsec += PTP_SAFE_TIME_OFFSET_NS; + + target_ns = cfg->start.tv_nsec + ((u64)cfg->start.tv_sec * NSEC_PER_SEC); + + stmmac_get_systime(priv, priv->ptpaddr, &ns); + if (ns > TIME64_MAX - PTP_SAFE_TIME_OFFSET_NS) + return -EINVAL; + + curr_time = ns_to_timespec64(ns); + if (target_ns < ns + PTP_SAFE_TIME_OFFSET_NS) { + cfg->start = timespec64_add_safe(cfg->start, curr_time); + if (cfg->start.tv_sec == TIME64_MAX) + return -EINVAL; + } + cfg->period.tv_sec = rq->perout.period.sec; cfg->period.tv_nsec = rq->perout.period.nsec; @@ -190,6 +221,7 @@ static int stmmac_enable(struct ptp_clock_info *ptp, priv->systime_flags); write_unlock_irqrestore(&priv->ptp_lock, flags); break; + } case PTP_CLK_REQ_EXTTS: { u8 channel; @@ -247,10 +279,7 @@ static int stmmac_get_syncdevicetime(ktime_t *device, { struct stmmac_priv *priv = (struct stmmac_priv *)ctx; - if (priv->plat->crosststamp) - return priv->plat->crosststamp(device, system, ctx); - else - return -EOPNOTSUPP; + return priv->plat->crosststamp(device, system, ctx); } static int stmmac_getcrosststamp(struct ptp_clock_info *ptp, @@ -278,7 +307,6 @@ const struct ptp_clock_info stmmac_ptp_clock_ops = { .gettime64 = stmmac_get_time, .settime64 = stmmac_set_time, .enable = stmmac_enable, - .getcrosststamp = stmmac_getcrosststamp, }; /* structure describing a PTP hardware clock */ @@ -296,7 +324,6 @@ const struct ptp_clock_info dwmac1000_ptp_clock_ops = { .gettime64 = stmmac_get_time, .settime64 = stmmac_set_time, .enable = dwmac1000_ptp_enable, - .getcrosststamp = stmmac_getcrosststamp, }; /** @@ -332,6 +359,9 @@ void stmmac_ptp_register(struct stmmac_priv *priv) if (priv->plat->ptp_max_adj) priv->ptp_clock_ops.max_adj = priv->plat->ptp_max_adj; + if (priv->plat->crosststamp) + priv->ptp_clock_ops.getcrosststamp = stmmac_getcrosststamp; + rwlock_init(&priv->ptp_lock); mutex_init(&priv->aux_ts_lock); @@ -340,8 +370,12 @@ void stmmac_ptp_register(struct stmmac_priv *priv) if (IS_ERR(priv->ptp_clock)) { netdev_err(priv->dev, "ptp_clock_register failed\n"); priv->ptp_clock = NULL; - } else if (priv->ptp_clock) + } + + if (priv->ptp_clock) netdev_info(priv->dev, "registered PTP clock\n"); + else + mutex_destroy(&priv->aux_ts_lock); } /** @@ -357,7 +391,7 @@ void stmmac_ptp_unregister(struct stmmac_priv *priv) priv->ptp_clock = NULL; pr_debug("Removed PTP HW clock successfully on %s\n", priv->dev->name); - } - mutex_destroy(&priv->aux_ts_lock); + mutex_destroy(&priv->aux_ts_lock); + } } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c index 694d6ee14381..97e89a604abd 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c @@ -1080,6 +1080,7 @@ disable: for (i = 0; i < priv->plat->tx_queues_to_use; i++) { priv->xstats.max_sdu_txq_drop[i] = 0; priv->xstats.mtl_est_txq_hlbf[i] = 0; + priv->xstats.mtl_est_txq_hlbs[i] = 0; } mutex_unlock(&priv->est_lock); } @@ -1097,7 +1098,8 @@ static void tc_taprio_stats(struct stmmac_priv *priv, for (i = 0; i < priv->plat->tx_queues_to_use; i++) window_drops += priv->xstats.max_sdu_txq_drop[i] + - priv->xstats.mtl_est_txq_hlbf[i]; + priv->xstats.mtl_est_txq_hlbf[i] + + priv->xstats.mtl_est_txq_hlbs[i]; qopt->stats.window_drops = window_drops; /* Transmission overrun doesn't happen for stmmac, hence always 0 */ @@ -1111,7 +1113,8 @@ static void tc_taprio_queue_stats(struct stmmac_priv *priv, int queue = qopt->queue_stats.queue; q_stats->stats.window_drops = priv->xstats.max_sdu_txq_drop[queue] + - priv->xstats.mtl_est_txq_hlbf[queue]; + priv->xstats.mtl_est_txq_hlbf[queue] + + priv->xstats.mtl_est_txq_hlbs[queue]; /* Transmission overrun doesn't happen for stmmac, hence always 0 */ q_stats->stats.tx_overruns = 0; diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig index a07c910c497a..a54d71155263 100644 --- a/drivers/net/ethernet/ti/Kconfig +++ b/drivers/net/ethernet/ti/Kconfig @@ -229,4 +229,16 @@ config TI_ICSS_IEP To compile this driver as a module, choose M here. The module will be called icss_iep. +config TI_PRUETH + tristate "TI PRU Ethernet EMAC driver" + depends on PRU_REMOTEPROC + depends on NET_SWITCHDEV + select TI_ICSS_IEP + imply PTP_1588_CLOCK + help + Some TI SoCs has Programmable Realtime Unit (PRU) cores which can + support Single or Dual Ethernet ports with the help of firmware code + running on PRU cores. This driver supports remoteproc based + communication to PRU firmware to expose Ethernet interface to Linux. + endif # NET_VENDOR_TI diff --git a/drivers/net/ethernet/ti/Makefile b/drivers/net/ethernet/ti/Makefile index cbcf44806924..93c0a4d0e33a 100644 --- a/drivers/net/ethernet/ti/Makefile +++ b/drivers/net/ethernet/ti/Makefile @@ -3,6 +3,9 @@ # Makefile for the TI network device drivers. # +obj-$(CONFIG_TI_PRUETH) += icssm-prueth.o +icssm-prueth-y := icssm/icssm_prueth.o + obj-$(CONFIG_TI_CPSW) += cpsw-common.o obj-$(CONFIG_TI_DAVINCI_EMAC) += cpsw-common.o obj-$(CONFIG_TI_CPSW_SWITCHDEV) += cpsw-common.o diff --git a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c index 9032444435e9..c57497074ae6 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c +++ b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c @@ -694,17 +694,20 @@ static int am65_cpsw_get_ethtool_ts_info(struct net_device *ndev, struct kernel_ethtool_ts_info *info) { struct am65_cpsw_common *common = am65_ndev_to_common(ndev); - unsigned int ptp_v2_filter; - - ptp_v2_filter = BIT(HWTSTAMP_FILTER_PTP_V2_L4_EVENT) | - BIT(HWTSTAMP_FILTER_PTP_V2_L4_SYNC) | - BIT(HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) | - BIT(HWTSTAMP_FILTER_PTP_V2_L2_EVENT) | - BIT(HWTSTAMP_FILTER_PTP_V2_L2_SYNC) | - BIT(HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) | - BIT(HWTSTAMP_FILTER_PTP_V2_EVENT) | - BIT(HWTSTAMP_FILTER_PTP_V2_SYNC) | - BIT(HWTSTAMP_FILTER_PTP_V2_DELAY_REQ); + unsigned int ptp_filter; + + ptp_filter = BIT(HWTSTAMP_FILTER_PTP_V2_L4_EVENT) | + BIT(HWTSTAMP_FILTER_PTP_V2_L4_SYNC) | + BIT(HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) | + BIT(HWTSTAMP_FILTER_PTP_V2_L2_EVENT) | + BIT(HWTSTAMP_FILTER_PTP_V2_L2_SYNC) | + BIT(HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) | + BIT(HWTSTAMP_FILTER_PTP_V2_EVENT) | + BIT(HWTSTAMP_FILTER_PTP_V2_SYNC) | + BIT(HWTSTAMP_FILTER_PTP_V2_DELAY_REQ) | + BIT(HWTSTAMP_FILTER_PTP_V1_L4_EVENT) | + BIT(HWTSTAMP_FILTER_PTP_V1_L4_SYNC) | + BIT(HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ); if (!IS_ENABLED(CONFIG_TI_K3_AM65_CPTS)) return ethtool_op_get_ts_info(ndev, info); @@ -716,7 +719,7 @@ static int am65_cpsw_get_ethtool_ts_info(struct net_device *ndev, SOF_TIMESTAMPING_RAW_HARDWARE; info->phc_index = am65_cpts_phc_index(common->cpts); info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON); - info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | ptp_v2_filter; + info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | ptp_filter; return 0; } diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index ecd6ecac87bb..110eb2da8dbc 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -1522,7 +1522,7 @@ static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common, } } - if (single_port) { + if (single_port && num_tx) { netif_txq = netdev_get_tx_queue(ndev, chn); netdev_tx_completed_queue(netif_txq, num_tx, total_bytes); am65_cpsw_nuss_tx_wake(tx_chn, ndev, netif_txq); @@ -1813,6 +1813,9 @@ static int am65_cpsw_nuss_hwtstamp_set(struct net_device *ndev, case HWTSTAMP_FILTER_NONE: port->rx_ts_enabled = false; break; + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: @@ -1823,7 +1826,7 @@ static int am65_cpsw_nuss_hwtstamp_set(struct net_device *ndev, case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: port->rx_ts_enabled = true; - cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT | HWTSTAMP_FILTER_PTP_V1_L4_EVENT; break; case HWTSTAMP_FILTER_ALL: case HWTSTAMP_FILTER_SOME: @@ -1884,8 +1887,8 @@ static int am65_cpsw_nuss_hwtstamp_get(struct net_device *ndev, cfg.flags = 0; cfg.tx_type = port->tx_ts_enabled ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; - cfg.rx_filter = port->rx_ts_enabled ? - HWTSTAMP_FILTER_PTP_V2_EVENT : HWTSTAMP_FILTER_NONE; + cfg.rx_filter = port->rx_ts_enabled ? HWTSTAMP_FILTER_PTP_V2_EVENT | + HWTSTAMP_FILTER_PTP_V1_L4_EVENT : HWTSTAMP_FILTER_NONE; return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0; } diff --git a/drivers/net/ethernet/ti/icssg/icss_iep.c b/drivers/net/ethernet/ti/icssg/icss_iep.c index d8c9fe1d98c4..ec085897edf0 100644 --- a/drivers/net/ethernet/ti/icssg/icss_iep.c +++ b/drivers/net/ethernet/ti/icssg/icss_iep.c @@ -982,11 +982,112 @@ static const struct icss_iep_plat_data am654_icss_iep_plat_data = { .config = &am654_icss_iep_regmap_config, }; +static const struct icss_iep_plat_data am57xx_icss_iep_plat_data = { + .flags = ICSS_IEP_64BIT_COUNTER_SUPPORT | + ICSS_IEP_SLOW_COMPEN_REG_SUPPORT, + .reg_offs = { + [ICSS_IEP_GLOBAL_CFG_REG] = 0x00, + [ICSS_IEP_COMPEN_REG] = 0x08, + [ICSS_IEP_SLOW_COMPEN_REG] = 0x0c, + [ICSS_IEP_COUNT_REG0] = 0x10, + [ICSS_IEP_COUNT_REG1] = 0x14, + [ICSS_IEP_CAPTURE_CFG_REG] = 0x18, + [ICSS_IEP_CAPTURE_STAT_REG] = 0x1c, + + [ICSS_IEP_CAP6_RISE_REG0] = 0x50, + [ICSS_IEP_CAP6_RISE_REG1] = 0x54, + + [ICSS_IEP_CAP7_RISE_REG0] = 0x60, + [ICSS_IEP_CAP7_RISE_REG1] = 0x64, + + [ICSS_IEP_CMP_CFG_REG] = 0x70, + [ICSS_IEP_CMP_STAT_REG] = 0x74, + [ICSS_IEP_CMP0_REG0] = 0x78, + [ICSS_IEP_CMP0_REG1] = 0x7c, + [ICSS_IEP_CMP1_REG0] = 0x80, + [ICSS_IEP_CMP1_REG1] = 0x84, + + [ICSS_IEP_CMP8_REG0] = 0xc0, + [ICSS_IEP_CMP8_REG1] = 0xc4, + [ICSS_IEP_SYNC_CTRL_REG] = 0x180, + [ICSS_IEP_SYNC0_STAT_REG] = 0x188, + [ICSS_IEP_SYNC1_STAT_REG] = 0x18c, + [ICSS_IEP_SYNC_PWIDTH_REG] = 0x190, + [ICSS_IEP_SYNC0_PERIOD_REG] = 0x194, + [ICSS_IEP_SYNC1_DELAY_REG] = 0x198, + [ICSS_IEP_SYNC_START_REG] = 0x19c, + }, + .config = &am654_icss_iep_regmap_config, +}; + +static bool am335x_icss_iep_valid_reg(struct device *dev, unsigned int reg) +{ + switch (reg) { + case ICSS_IEP_GLOBAL_CFG_REG ... ICSS_IEP_CAPTURE_STAT_REG: + case ICSS_IEP_CAP6_RISE_REG0: + case ICSS_IEP_CMP_CFG_REG ... ICSS_IEP_CMP0_REG0: + case ICSS_IEP_CMP8_REG0 ... ICSS_IEP_SYNC_START_REG: + return true; + default: + return false; + } +} + +static const struct regmap_config am335x_icss_iep_regmap_config = { + .name = "icss iep", + .reg_stride = 1, + .reg_write = icss_iep_regmap_write, + .reg_read = icss_iep_regmap_read, + .writeable_reg = am335x_icss_iep_valid_reg, + .readable_reg = am335x_icss_iep_valid_reg, +}; + +static const struct icss_iep_plat_data am335x_icss_iep_plat_data = { + .flags = 0, + .reg_offs = { + [ICSS_IEP_GLOBAL_CFG_REG] = 0x00, + [ICSS_IEP_COMPEN_REG] = 0x08, + [ICSS_IEP_COUNT_REG0] = 0x0c, + [ICSS_IEP_CAPTURE_CFG_REG] = 0x10, + [ICSS_IEP_CAPTURE_STAT_REG] = 0x14, + + [ICSS_IEP_CAP6_RISE_REG0] = 0x30, + + [ICSS_IEP_CAP7_RISE_REG0] = 0x38, + + [ICSS_IEP_CMP_CFG_REG] = 0x40, + [ICSS_IEP_CMP_STAT_REG] = 0x44, + [ICSS_IEP_CMP0_REG0] = 0x48, + + [ICSS_IEP_CMP8_REG0] = 0x88, + [ICSS_IEP_SYNC_CTRL_REG] = 0x100, + [ICSS_IEP_SYNC0_STAT_REG] = 0x108, + [ICSS_IEP_SYNC1_STAT_REG] = 0x10c, + [ICSS_IEP_SYNC_PWIDTH_REG] = 0x110, + [ICSS_IEP_SYNC0_PERIOD_REG] = 0x114, + [ICSS_IEP_SYNC1_DELAY_REG] = 0x118, + [ICSS_IEP_SYNC_START_REG] = 0x11c, + }, + .config = &am335x_icss_iep_regmap_config, +}; + static const struct of_device_id icss_iep_of_match[] = { { .compatible = "ti,am654-icss-iep", .data = &am654_icss_iep_plat_data, }, + { + .compatible = "ti,am5728-icss-iep", + .data = &am57xx_icss_iep_plat_data, + }, + { + .compatible = "ti,am4376-icss-iep", + .data = &am335x_icss_iep_plat_data, + }, + { + .compatible = "ti,am3356-icss-iep", + .data = &am335x_icss_iep_plat_data, + }, {}, }; MODULE_DEVICE_TABLE(of, icss_iep_of_match); diff --git a/drivers/net/ethernet/ti/icssg/icssg_prueth.c b/drivers/net/ethernet/ti/icssg/icssg_prueth.c index dadce6009791..e42d0fdefee1 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_prueth.c +++ b/drivers/net/ethernet/ti/icssg/icssg_prueth.c @@ -654,7 +654,7 @@ static void icssg_prueth_hsr_fdb_add_del(struct prueth_emac *emac, static int icssg_prueth_hsr_add_mcast(struct net_device *ndev, const u8 *addr) { - struct net_device *real_dev; + struct net_device *real_dev, *port_dev; struct prueth_emac *emac; u8 vlan_id, i; @@ -663,11 +663,15 @@ static int icssg_prueth_hsr_add_mcast(struct net_device *ndev, const u8 *addr) if (is_hsr_master(real_dev)) { for (i = HSR_PT_SLAVE_A; i < HSR_PT_INTERLINK; i++) { - emac = netdev_priv(hsr_get_port_ndev(real_dev, i)); - if (!emac) + port_dev = hsr_get_port_ndev(real_dev, i); + emac = netdev_priv(port_dev); + if (!emac) { + dev_put(port_dev); return -EINVAL; + } icssg_prueth_hsr_fdb_add_del(emac, addr, vlan_id, true); + dev_put(port_dev); } } else { emac = netdev_priv(real_dev); @@ -679,7 +683,7 @@ static int icssg_prueth_hsr_add_mcast(struct net_device *ndev, const u8 *addr) static int icssg_prueth_hsr_del_mcast(struct net_device *ndev, const u8 *addr) { - struct net_device *real_dev; + struct net_device *real_dev, *port_dev; struct prueth_emac *emac; u8 vlan_id, i; @@ -688,11 +692,15 @@ static int icssg_prueth_hsr_del_mcast(struct net_device *ndev, const u8 *addr) if (is_hsr_master(real_dev)) { for (i = HSR_PT_SLAVE_A; i < HSR_PT_INTERLINK; i++) { - emac = netdev_priv(hsr_get_port_ndev(real_dev, i)); - if (!emac) + port_dev = hsr_get_port_ndev(real_dev, i); + emac = netdev_priv(port_dev); + if (!emac) { + dev_put(port_dev); return -EINVAL; + } icssg_prueth_hsr_fdb_add_del(emac, addr, vlan_id, false); + dev_put(port_dev); } } else { emac = netdev_priv(real_dev); diff --git a/drivers/net/ethernet/ti/icssm/icssm_prueth.c b/drivers/net/ethernet/ti/icssm/icssm_prueth.c new file mode 100644 index 000000000000..293b7af04263 --- /dev/null +++ b/drivers/net/ethernet/ti/icssm/icssm_prueth.c @@ -0,0 +1,1746 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Texas Instruments ICSSM Ethernet Driver + * + * Copyright (C) 2018-2022 Texas Instruments Incorporated - https://www.ti.com/ + * + */ + +#include <linux/etherdevice.h> +#include <linux/genalloc.h> +#include <linux/if_bridge.h> +#include <linux/if_hsr.h> +#include <linux/if_vlan.h> +#include <linux/interrupt.h> +#include <linux/kernel.h> +#include <linux/mfd/syscon.h> +#include <linux/module.h> +#include <linux/net_tstamp.h> +#include <linux/of.h> +#include <linux/of_irq.h> +#include <linux/of_mdio.h> +#include <linux/of_net.h> +#include <linux/platform_device.h> +#include <linux/phy.h> +#include <linux/remoteproc/pruss.h> +#include <linux/ptp_classify.h> +#include <linux/regmap.h> +#include <linux/remoteproc.h> +#include <net/pkt_cls.h> + +#include "icssm_prueth.h" +#include "../icssg/icssg_mii_rt.h" +#include "../icssg/icss_iep.h" + +#define OCMC_RAM_SIZE (SZ_64K) + +#define TX_START_DELAY 0x40 +#define TX_CLK_DELAY_100M 0x6 +#define HR_TIMER_TX_DELAY_US 100 + +static void icssm_prueth_write_reg(struct prueth *prueth, + enum prueth_mem region, + unsigned int reg, u32 val) +{ + writel_relaxed(val, prueth->mem[region].va + reg); +} + +/* Below macro is for 1528 Byte Frame support, to Allow even with + * Redundancy tag + */ +#define PRUSS_MII_RT_RX_FRMS_MAX_SUPPORT_EMAC (VLAN_ETH_FRAME_LEN + \ + ETH_FCS_LEN + \ + ICSSM_LRE_TAG_SIZE) + +/* ensure that order of PRUSS mem regions is same as enum prueth_mem */ +static enum pruss_mem pruss_mem_ids[] = { PRUSS_MEM_DRAM0, PRUSS_MEM_DRAM1, + PRUSS_MEM_SHRD_RAM2 }; + +static const struct prueth_queue_info queue_infos[][NUM_QUEUES] = { + [PRUETH_PORT_QUEUE_HOST] = { + [PRUETH_QUEUE1] = { + P0_Q1_BUFFER_OFFSET, + HOST_QUEUE_DESC_OFFSET, + P0_Q1_BD_OFFSET, + P0_Q1_BD_OFFSET + ((HOST_QUEUE_1_SIZE - 1) * BD_SIZE), + }, + [PRUETH_QUEUE2] = { + P0_Q2_BUFFER_OFFSET, + HOST_QUEUE_DESC_OFFSET + 8, + P0_Q2_BD_OFFSET, + P0_Q2_BD_OFFSET + ((HOST_QUEUE_2_SIZE - 1) * BD_SIZE), + }, + [PRUETH_QUEUE3] = { + P0_Q3_BUFFER_OFFSET, + HOST_QUEUE_DESC_OFFSET + 16, + P0_Q3_BD_OFFSET, + P0_Q3_BD_OFFSET + ((HOST_QUEUE_3_SIZE - 1) * BD_SIZE), + }, + [PRUETH_QUEUE4] = { + P0_Q4_BUFFER_OFFSET, + HOST_QUEUE_DESC_OFFSET + 24, + P0_Q4_BD_OFFSET, + P0_Q4_BD_OFFSET + ((HOST_QUEUE_4_SIZE - 1) * BD_SIZE), + }, + }, + [PRUETH_PORT_QUEUE_MII0] = { + [PRUETH_QUEUE1] = { + P1_Q1_BUFFER_OFFSET, + P1_Q1_BUFFER_OFFSET + ((QUEUE_1_SIZE - 1) * + ICSS_BLOCK_SIZE), + P1_Q1_BD_OFFSET, + P1_Q1_BD_OFFSET + ((QUEUE_1_SIZE - 1) * BD_SIZE), + }, + [PRUETH_QUEUE2] = { + P1_Q2_BUFFER_OFFSET, + P1_Q2_BUFFER_OFFSET + ((QUEUE_2_SIZE - 1) * + ICSS_BLOCK_SIZE), + P1_Q2_BD_OFFSET, + P1_Q2_BD_OFFSET + ((QUEUE_2_SIZE - 1) * BD_SIZE), + }, + [PRUETH_QUEUE3] = { + P1_Q3_BUFFER_OFFSET, + P1_Q3_BUFFER_OFFSET + ((QUEUE_3_SIZE - 1) * + ICSS_BLOCK_SIZE), + P1_Q3_BD_OFFSET, + P1_Q3_BD_OFFSET + ((QUEUE_3_SIZE - 1) * BD_SIZE), + }, + [PRUETH_QUEUE4] = { + P1_Q4_BUFFER_OFFSET, + P1_Q4_BUFFER_OFFSET + ((QUEUE_4_SIZE - 1) * + ICSS_BLOCK_SIZE), + P1_Q4_BD_OFFSET, + P1_Q4_BD_OFFSET + ((QUEUE_4_SIZE - 1) * BD_SIZE), + }, + }, + [PRUETH_PORT_QUEUE_MII1] = { + [PRUETH_QUEUE1] = { + P2_Q1_BUFFER_OFFSET, + P2_Q1_BUFFER_OFFSET + ((QUEUE_1_SIZE - 1) * + ICSS_BLOCK_SIZE), + P2_Q1_BD_OFFSET, + P2_Q1_BD_OFFSET + ((QUEUE_1_SIZE - 1) * BD_SIZE), + }, + [PRUETH_QUEUE2] = { + P2_Q2_BUFFER_OFFSET, + P2_Q2_BUFFER_OFFSET + ((QUEUE_2_SIZE - 1) * + ICSS_BLOCK_SIZE), + P2_Q2_BD_OFFSET, + P2_Q2_BD_OFFSET + ((QUEUE_2_SIZE - 1) * BD_SIZE), + }, + [PRUETH_QUEUE3] = { + P2_Q3_BUFFER_OFFSET, + P2_Q3_BUFFER_OFFSET + ((QUEUE_3_SIZE - 1) * + ICSS_BLOCK_SIZE), + P2_Q3_BD_OFFSET, + P2_Q3_BD_OFFSET + ((QUEUE_3_SIZE - 1) * BD_SIZE), + }, + [PRUETH_QUEUE4] = { + P2_Q4_BUFFER_OFFSET, + P2_Q4_BUFFER_OFFSET + ((QUEUE_4_SIZE - 1) * + ICSS_BLOCK_SIZE), + P2_Q4_BD_OFFSET, + P2_Q4_BD_OFFSET + ((QUEUE_4_SIZE - 1) * BD_SIZE), + }, + }, +}; + +static const struct prueth_queue_desc queue_descs[][NUM_QUEUES] = { + [PRUETH_PORT_QUEUE_HOST] = { + { .rd_ptr = P0_Q1_BD_OFFSET, .wr_ptr = P0_Q1_BD_OFFSET, }, + { .rd_ptr = P0_Q2_BD_OFFSET, .wr_ptr = P0_Q2_BD_OFFSET, }, + { .rd_ptr = P0_Q3_BD_OFFSET, .wr_ptr = P0_Q3_BD_OFFSET, }, + { .rd_ptr = P0_Q4_BD_OFFSET, .wr_ptr = P0_Q4_BD_OFFSET, }, + }, + [PRUETH_PORT_QUEUE_MII0] = { + { .rd_ptr = P1_Q1_BD_OFFSET, .wr_ptr = P1_Q1_BD_OFFSET, }, + { .rd_ptr = P1_Q2_BD_OFFSET, .wr_ptr = P1_Q2_BD_OFFSET, }, + { .rd_ptr = P1_Q3_BD_OFFSET, .wr_ptr = P1_Q3_BD_OFFSET, }, + { .rd_ptr = P1_Q4_BD_OFFSET, .wr_ptr = P1_Q4_BD_OFFSET, }, + }, + [PRUETH_PORT_QUEUE_MII1] = { + { .rd_ptr = P2_Q1_BD_OFFSET, .wr_ptr = P2_Q1_BD_OFFSET, }, + { .rd_ptr = P2_Q2_BD_OFFSET, .wr_ptr = P2_Q2_BD_OFFSET, }, + { .rd_ptr = P2_Q3_BD_OFFSET, .wr_ptr = P2_Q3_BD_OFFSET, }, + { .rd_ptr = P2_Q4_BD_OFFSET, .wr_ptr = P2_Q4_BD_OFFSET, }, + } +}; + +static void icssm_prueth_hostconfig(struct prueth *prueth) +{ + void __iomem *sram_base = prueth->mem[PRUETH_MEM_SHARED_RAM].va; + void __iomem *sram; + + /* queue size lookup table */ + sram = sram_base + HOST_QUEUE_SIZE_ADDR; + writew(HOST_QUEUE_1_SIZE, sram); + writew(HOST_QUEUE_2_SIZE, sram + 2); + writew(HOST_QUEUE_3_SIZE, sram + 4); + writew(HOST_QUEUE_4_SIZE, sram + 6); + + /* queue information table */ + sram = sram_base + HOST_Q1_RX_CONTEXT_OFFSET; + memcpy_toio(sram, queue_infos[PRUETH_PORT_QUEUE_HOST], + sizeof(queue_infos[PRUETH_PORT_QUEUE_HOST])); + + /* buffer offset table */ + sram = sram_base + HOST_QUEUE_OFFSET_ADDR; + writew(P0_Q1_BUFFER_OFFSET, sram); + writew(P0_Q2_BUFFER_OFFSET, sram + 2); + writew(P0_Q3_BUFFER_OFFSET, sram + 4); + writew(P0_Q4_BUFFER_OFFSET, sram + 6); + + /* buffer descriptor offset table*/ + sram = sram_base + HOST_QUEUE_DESCRIPTOR_OFFSET_ADDR; + writew(P0_Q1_BD_OFFSET, sram); + writew(P0_Q2_BD_OFFSET, sram + 2); + writew(P0_Q3_BD_OFFSET, sram + 4); + writew(P0_Q4_BD_OFFSET, sram + 6); + + /* queue table */ + sram = sram_base + HOST_QUEUE_DESC_OFFSET; + memcpy_toio(sram, queue_descs[PRUETH_PORT_QUEUE_HOST], + sizeof(queue_descs[PRUETH_PORT_QUEUE_HOST])); +} + +static void icssm_prueth_mii_init(struct prueth *prueth) +{ + struct regmap *mii_rt; + u32 rxcfg_reg, rxcfg; + u32 txcfg_reg, txcfg; + + mii_rt = prueth->mii_rt; + + rxcfg = PRUSS_MII_RT_RXCFG_RX_ENABLE | + PRUSS_MII_RT_RXCFG_RX_DATA_RDY_MODE_DIS | + PRUSS_MII_RT_RXCFG_RX_L2_EN | + PRUSS_MII_RT_RXCFG_RX_CUT_PREAMBLE | + PRUSS_MII_RT_RXCFG_RX_L2_EOF_SCLR_DIS; + + /* Configuration of Port 0 Rx */ + rxcfg_reg = PRUSS_MII_RT_RXCFG0; + + regmap_write(mii_rt, rxcfg_reg, rxcfg); + + /* Configuration of Port 1 Rx */ + rxcfg_reg = PRUSS_MII_RT_RXCFG1; + + rxcfg |= PRUSS_MII_RT_RXCFG_RX_MUX_SEL; + + regmap_write(mii_rt, rxcfg_reg, rxcfg); + + txcfg = PRUSS_MII_RT_TXCFG_TX_ENABLE | + PRUSS_MII_RT_TXCFG_TX_AUTO_PREAMBLE | + PRUSS_MII_RT_TXCFG_TX_32_MODE_EN | + (TX_START_DELAY << PRUSS_MII_RT_TXCFG_TX_START_DELAY_SHIFT) | + (TX_CLK_DELAY_100M << PRUSS_MII_RT_TXCFG_TX_CLK_DELAY_SHIFT); + + /* Configuration of Port 0 Tx */ + txcfg_reg = PRUSS_MII_RT_TXCFG0; + + regmap_write(mii_rt, txcfg_reg, txcfg); + + txcfg |= PRUSS_MII_RT_TXCFG_TX_MUX_SEL; + + /* Configuration of Port 1 Tx */ + txcfg_reg = PRUSS_MII_RT_TXCFG1; + + regmap_write(mii_rt, txcfg_reg, txcfg); + + txcfg_reg = PRUSS_MII_RT_RX_FRMS0; + + /* Min frame length should be set to 64 to allow receive of standard + * Ethernet frames such as PTP, LLDP that will not have the tag/rct. + * Actual size written to register is size - 1 per TRM. This also + * includes CRC/FCS. + */ + txcfg = FIELD_PREP(PRUSS_MII_RT_RX_FRMS_MIN_FRM_MASK, + (PRUSS_MII_RT_RX_FRMS_MIN_FRM - 1)); + + /* For EMAC, set Max frame size to 1528 i.e size with VLAN. + * Actual size written to register is size - 1 as per TRM. + * Since driver support run time change of protocol, driver + * must overwrite the values based on Ethernet type. + */ + txcfg |= FIELD_PREP(PRUSS_MII_RT_RX_FRMS_MAX_FRM_MASK, + (PRUSS_MII_RT_RX_FRMS_MAX_SUPPORT_EMAC - 1)); + + regmap_write(mii_rt, txcfg_reg, txcfg); + + txcfg_reg = PRUSS_MII_RT_RX_FRMS1; + + regmap_write(mii_rt, txcfg_reg, txcfg); +} + +static void icssm_prueth_clearmem(struct prueth *prueth, enum prueth_mem region) +{ + memset_io(prueth->mem[region].va, 0, prueth->mem[region].size); +} + +static void icssm_prueth_hostinit(struct prueth *prueth) +{ + /* Clear shared RAM */ + icssm_prueth_clearmem(prueth, PRUETH_MEM_SHARED_RAM); + + /* Clear OCMC RAM */ + icssm_prueth_clearmem(prueth, PRUETH_MEM_OCMC); + + /* Clear data RAMs */ + if (prueth->eth_node[PRUETH_MAC0]) + icssm_prueth_clearmem(prueth, PRUETH_MEM_DRAM0); + if (prueth->eth_node[PRUETH_MAC1]) + icssm_prueth_clearmem(prueth, PRUETH_MEM_DRAM1); + + /* Initialize host queues in shared RAM */ + icssm_prueth_hostconfig(prueth); + + /* Configure MII_RT */ + icssm_prueth_mii_init(prueth); +} + +/* This function initialize the driver in EMAC mode + * based on eth_type + */ +static void icssm_prueth_init_ethernet_mode(struct prueth *prueth) +{ + icssm_prueth_hostinit(prueth); +} + +static void icssm_prueth_port_enable(struct prueth_emac *emac, bool enable) +{ + struct prueth *prueth = emac->prueth; + void __iomem *port_ctrl; + void __iomem *ram; + + ram = prueth->mem[emac->dram].va; + port_ctrl = ram + PORT_CONTROL_ADDR; + writeb(!!enable, port_ctrl); +} + +static int icssm_prueth_emac_config(struct prueth_emac *emac) +{ + struct prueth *prueth = emac->prueth; + u32 sharedramaddr, ocmcaddr; + void __iomem *dram_base; + void __iomem *mac_addr; + void __iomem *dram; + void __iomem *sram; + + /* PRU needs local shared RAM address for C28 */ + sharedramaddr = ICSS_LOCAL_SHARED_RAM; + /* PRU needs real global OCMC address for C30*/ + ocmcaddr = (u32)prueth->mem[PRUETH_MEM_OCMC].pa; + sram = prueth->mem[PRUETH_MEM_SHARED_RAM].va; + + /* Clear data RAM */ + icssm_prueth_clearmem(prueth, emac->dram); + + dram_base = prueth->mem[emac->dram].va; + + /* setup mac address */ + mac_addr = dram_base + PORT_MAC_ADDR; + memcpy_toio(mac_addr, emac->mac_addr, 6); + + /* queue information table */ + dram = dram_base + TX_CONTEXT_Q1_OFFSET_ADDR; + memcpy_toio(dram, queue_infos[emac->port_id], + sizeof(queue_infos[emac->port_id])); + + /* queue table */ + dram = dram_base + PORT_QUEUE_DESC_OFFSET; + memcpy_toio(dram, queue_descs[emac->port_id], + sizeof(queue_descs[emac->port_id])); + + emac->rx_queue_descs = sram + HOST_QUEUE_DESC_OFFSET; + emac->tx_queue_descs = dram; + + /* Set in constant table C28 of PRU0 to ICSS Shared memory */ + pru_rproc_set_ctable(emac->pru, PRU_C28, sharedramaddr); + + /* Set in constant table C30 of PRU0 to OCMC memory */ + pru_rproc_set_ctable(emac->pru, PRU_C30, ocmcaddr); + + return 0; +} + +/* called back by PHY layer if there is change in link state of hw port*/ +static void icssm_emac_adjust_link(struct net_device *ndev) +{ + struct prueth_emac *emac = netdev_priv(ndev); + struct phy_device *phydev = emac->phydev; + struct prueth *prueth = emac->prueth; + bool new_state = false; + enum prueth_mem region; + unsigned long flags; + u32 port_status = 0; + u32 txcfg, mask; + u32 delay; + + spin_lock_irqsave(&emac->lock, flags); + + if (phydev->link) { + /* check the mode of operation */ + if (phydev->duplex != emac->duplex) { + new_state = true; + emac->duplex = phydev->duplex; + } + if (phydev->speed != emac->speed) { + new_state = true; + emac->speed = phydev->speed; + } + if (!emac->link) { + new_state = true; + emac->link = 1; + } + } else if (emac->link) { + new_state = true; + emac->link = 0; + } + + if (new_state) { + phy_print_status(phydev); + region = emac->dram; + + /* update phy/port status information based on PHY values*/ + if (emac->link) { + port_status |= PORT_LINK_MASK; + + icssm_prueth_write_reg(prueth, region, PHY_SPEED_OFFSET, + emac->speed); + + delay = TX_CLK_DELAY_100M; + delay = delay << PRUSS_MII_RT_TXCFG_TX_CLK_DELAY_SHIFT; + mask = PRUSS_MII_RT_TXCFG_TX_CLK_DELAY_MASK; + + if (emac->port_id) + txcfg = PRUSS_MII_RT_TXCFG1; + else + txcfg = PRUSS_MII_RT_TXCFG0; + + regmap_update_bits(prueth->mii_rt, txcfg, mask, delay); + } + + writeb(port_status, prueth->mem[region].va + + PORT_STATUS_OFFSET); + } + + if (emac->link) { + /* reactivate the transmit queue if it is stopped */ + if (netif_running(ndev) && netif_queue_stopped(ndev)) + netif_wake_queue(ndev); + } else { + if (!netif_queue_stopped(ndev)) + netif_stop_queue(ndev); + } + + spin_unlock_irqrestore(&emac->lock, flags); +} + +static unsigned int +icssm_get_buff_desc_count(const struct prueth_queue_info *queue) +{ + unsigned int buffer_desc_count; + + buffer_desc_count = queue->buffer_desc_end - + queue->buffer_desc_offset; + buffer_desc_count /= BD_SIZE; + buffer_desc_count++; + + return buffer_desc_count; +} + +static void icssm_get_block(struct prueth_queue_desc __iomem *queue_desc, + const struct prueth_queue_info *queue, + int *write_block, int *read_block) +{ + *write_block = (readw(&queue_desc->wr_ptr) - + queue->buffer_desc_offset) / BD_SIZE; + *read_block = (readw(&queue_desc->rd_ptr) - + queue->buffer_desc_offset) / BD_SIZE; +} + +/** + * icssm_emac_rx_irq - EMAC Rx interrupt handler + * @irq: interrupt number + * @dev_id: pointer to net_device + * + * EMAC Interrupt handler - we only schedule NAPI and not process any packets + * here. + * + * Return: IRQ_HANDLED if the interrupt handled + */ +static irqreturn_t icssm_emac_rx_irq(int irq, void *dev_id) +{ + struct net_device *ndev = (struct net_device *)dev_id; + struct prueth_emac *emac = netdev_priv(ndev); + + if (likely(netif_running(ndev))) { + /* disable Rx system event */ + disable_irq_nosync(emac->rx_irq); + napi_schedule(&emac->napi); + } + + return IRQ_HANDLED; +} + +/** + * icssm_prueth_tx_enqueue - queue a packet to firmware for transmission + * + * @emac: EMAC data structure + * @skb: packet data buffer + * @queue_id: priority queue id + * + * Return: 0 (Success) + */ +static int icssm_prueth_tx_enqueue(struct prueth_emac *emac, + struct sk_buff *skb, + enum prueth_queue_id queue_id) +{ + struct prueth_queue_desc __iomem *queue_desc; + const struct prueth_queue_info *txqueue; + struct net_device *ndev = emac->ndev; + unsigned int buffer_desc_count; + int free_blocks, update_block; + bool buffer_wrapped = false; + int write_block, read_block; + void *src_addr, *dst_addr; + int pkt_block_size; + void __iomem *dram; + int txport, pktlen; + u16 update_wr_ptr; + u32 wr_buf_desc; + void *ocmc_ram; + + dram = emac->prueth->mem[emac->dram].va; + if (eth_skb_pad(skb)) { + if (netif_msg_tx_err(emac) && net_ratelimit()) + netdev_err(ndev, "packet pad failed\n"); + return -ENOMEM; + } + + /* which port to tx: MII0 or MII1 */ + txport = emac->tx_port_queue; + src_addr = skb->data; + pktlen = skb->len; + /* Get the tx queue */ + queue_desc = emac->tx_queue_descs + queue_id; + txqueue = &queue_infos[txport][queue_id]; + + buffer_desc_count = icssm_get_buff_desc_count(txqueue); + + /* the PRU firmware deals mostly in pointers already + * offset into ram, we would like to deal in indexes + * within the queue we are working with for code + * simplicity, calculate this here + */ + icssm_get_block(queue_desc, txqueue, &write_block, &read_block); + + if (write_block > read_block) { + free_blocks = buffer_desc_count - write_block; + free_blocks += read_block; + } else if (write_block < read_block) { + free_blocks = read_block - write_block; + } else { /* they are all free */ + free_blocks = buffer_desc_count; + } + + pkt_block_size = DIV_ROUND_UP(pktlen, ICSS_BLOCK_SIZE); + if (pkt_block_size > free_blocks) /* out of queue space */ + return -ENOBUFS; + + /* calculate end BD address post write */ + update_block = write_block + pkt_block_size; + + /* Check for wrap around */ + if (update_block >= buffer_desc_count) { + update_block %= buffer_desc_count; + buffer_wrapped = true; + } + + /* OCMC RAM is not cached and write order is not important */ + ocmc_ram = (__force void *)emac->prueth->mem[PRUETH_MEM_OCMC].va; + dst_addr = ocmc_ram + txqueue->buffer_offset + + (write_block * ICSS_BLOCK_SIZE); + + /* Copy the data from socket buffer(DRAM) to PRU buffers(OCMC) */ + if (buffer_wrapped) { /* wrapped around buffer */ + int bytes = (buffer_desc_count - write_block) * ICSS_BLOCK_SIZE; + int remaining; + + /* bytes is integral multiple of ICSS_BLOCK_SIZE but + * entire packet may have fit within the last BD + * if pkt_info.length is not integral multiple of + * ICSS_BLOCK_SIZE + */ + if (pktlen < bytes) + bytes = pktlen; + + /* copy non-wrapped part */ + memcpy(dst_addr, src_addr, bytes); + + /* copy wrapped part */ + src_addr += bytes; + remaining = pktlen - bytes; + dst_addr = ocmc_ram + txqueue->buffer_offset; + memcpy(dst_addr, src_addr, remaining); + } else { + memcpy(dst_addr, src_addr, pktlen); + } + + /* update first buffer descriptor */ + wr_buf_desc = (pktlen << PRUETH_BD_LENGTH_SHIFT) & + PRUETH_BD_LENGTH_MASK; + writel(wr_buf_desc, dram + readw(&queue_desc->wr_ptr)); + + /* update the write pointer in this queue descriptor, the firmware + * polls for this change so this will signal the start of transmission + */ + update_wr_ptr = txqueue->buffer_desc_offset + (update_block * BD_SIZE); + writew(update_wr_ptr, &queue_desc->wr_ptr); + + return 0; +} + +void icssm_parse_packet_info(struct prueth *prueth, u32 buffer_descriptor, + struct prueth_packet_info *pkt_info) +{ + pkt_info->shadow = !!(buffer_descriptor & PRUETH_BD_SHADOW_MASK); + pkt_info->port = (buffer_descriptor & PRUETH_BD_PORT_MASK) >> + PRUETH_BD_PORT_SHIFT; + pkt_info->length = (buffer_descriptor & PRUETH_BD_LENGTH_MASK) >> + PRUETH_BD_LENGTH_SHIFT; + pkt_info->broadcast = !!(buffer_descriptor & PRUETH_BD_BROADCAST_MASK); + pkt_info->error = !!(buffer_descriptor & PRUETH_BD_ERROR_MASK); + pkt_info->lookup_success = !!(buffer_descriptor & + PRUETH_BD_LOOKUP_SUCCESS_MASK); + pkt_info->flood = !!(buffer_descriptor & PRUETH_BD_SW_FLOOD_MASK); + pkt_info->timestamp = !!(buffer_descriptor & PRUETH_BD_TIMESTAMP_MASK); +} + +/** + * icssm_emac_rx_packet - EMAC Receive function + * + * @emac: EMAC data structure + * @bd_rd_ptr: Buffer descriptor read pointer + * @pkt_info: packet information structure + * @rxqueue: Receive queue information structure + * + * Get a packet from receive queue + * + * Return: 0 (Success) + */ +int icssm_emac_rx_packet(struct prueth_emac *emac, u16 *bd_rd_ptr, + struct prueth_packet_info *pkt_info, + const struct prueth_queue_info *rxqueue) +{ + struct net_device *ndev = emac->ndev; + unsigned int buffer_desc_count; + int read_block, update_block; + unsigned int actual_pkt_len; + bool buffer_wrapped = false; + void *src_addr, *dst_addr; + struct sk_buff *skb; + int pkt_block_size; + void *ocmc_ram; + + /* the PRU firmware deals mostly in pointers already + * offset into ram, we would like to deal in indexes + * within the queue we are working with for code + * simplicity, calculate this here + */ + buffer_desc_count = icssm_get_buff_desc_count(rxqueue); + read_block = (*bd_rd_ptr - rxqueue->buffer_desc_offset) / BD_SIZE; + pkt_block_size = DIV_ROUND_UP(pkt_info->length, ICSS_BLOCK_SIZE); + + /* calculate end BD address post read */ + update_block = read_block + pkt_block_size; + + /* Check for wrap around */ + if (update_block >= buffer_desc_count) { + update_block %= buffer_desc_count; + if (update_block) + buffer_wrapped = true; + } + + /* calculate new pointer in ram */ + *bd_rd_ptr = rxqueue->buffer_desc_offset + (update_block * BD_SIZE); + + actual_pkt_len = pkt_info->length; + + /* Allocate a socket buffer for this packet */ + skb = netdev_alloc_skb_ip_align(ndev, actual_pkt_len); + if (!skb) { + if (netif_msg_rx_err(emac) && net_ratelimit()) + netdev_err(ndev, "failed rx buffer alloc\n"); + return -ENOMEM; + } + + dst_addr = skb->data; + + /* OCMC RAM is not cached and read order is not important */ + ocmc_ram = (__force void *)emac->prueth->mem[PRUETH_MEM_OCMC].va; + + /* Get the start address of the first buffer from + * the read buffer description + */ + src_addr = ocmc_ram + rxqueue->buffer_offset + + (read_block * ICSS_BLOCK_SIZE); + + /* Copy the data from PRU buffers(OCMC) to socket buffer(DRAM) */ + if (buffer_wrapped) { /* wrapped around buffer */ + int bytes = (buffer_desc_count - read_block) * ICSS_BLOCK_SIZE; + int remaining; + /* bytes is integral multiple of ICSS_BLOCK_SIZE but + * entire packet may have fit within the last BD + * if pkt_info.length is not integral multiple of + * ICSS_BLOCK_SIZE + */ + if (pkt_info->length < bytes) + bytes = pkt_info->length; + + /* copy non-wrapped part */ + memcpy(dst_addr, src_addr, bytes); + + /* copy wrapped part */ + dst_addr += bytes; + remaining = actual_pkt_len - bytes; + + src_addr = ocmc_ram + rxqueue->buffer_offset; + memcpy(dst_addr, src_addr, remaining); + src_addr += remaining; + } else { + memcpy(dst_addr, src_addr, actual_pkt_len); + src_addr += actual_pkt_len; + } + + skb_put(skb, actual_pkt_len); + + /* send packet up the stack */ + skb->protocol = eth_type_trans(skb, ndev); + netif_receive_skb(skb); + + /* update stats */ + emac->stats.rx_bytes += actual_pkt_len; + emac->stats.rx_packets++; + + return 0; +} + +static int icssm_emac_rx_packets(struct prueth_emac *emac, int budget) +{ + struct prueth_queue_desc __iomem *queue_desc; + const struct prueth_queue_info *rxqueue; + struct prueth *prueth = emac->prueth; + struct prueth_packet_info pkt_info; + int start_queue, end_queue; + void __iomem *shared_ram; + u16 bd_rd_ptr, bd_wr_ptr; + u16 update_rd_ptr; + u8 overflow_cnt; + u32 rd_buf_desc; + int used = 0; + int i, ret; + + shared_ram = emac->prueth->mem[PRUETH_MEM_SHARED_RAM].va; + + start_queue = emac->rx_queue_start; + end_queue = emac->rx_queue_end; + + /* skip Rx if budget is 0 */ + if (!budget) + return 0; + + /* search host queues for packets */ + for (i = start_queue; i <= end_queue; i++) { + queue_desc = emac->rx_queue_descs + i; + rxqueue = &queue_infos[PRUETH_PORT_HOST][i]; + + overflow_cnt = readb(&queue_desc->overflow_cnt); + if (overflow_cnt > 0) { + emac->stats.rx_over_errors += overflow_cnt; + /* reset to zero */ + writeb(0, &queue_desc->overflow_cnt); + } + + bd_rd_ptr = readw(&queue_desc->rd_ptr); + bd_wr_ptr = readw(&queue_desc->wr_ptr); + + /* while packets are available in this queue */ + while (bd_rd_ptr != bd_wr_ptr) { + /* get packet info from the read buffer descriptor */ + rd_buf_desc = readl(shared_ram + bd_rd_ptr); + icssm_parse_packet_info(prueth, rd_buf_desc, &pkt_info); + + if (pkt_info.length <= 0) { + /* a packet length of zero will cause us to + * never move the read pointer ahead, locking + * the driver, so we manually have to move it + * to the write pointer, discarding all + * remaining packets in this queue. This should + * never happen. + */ + update_rd_ptr = bd_wr_ptr; + emac->stats.rx_length_errors++; + } else if (pkt_info.length > EMAC_MAX_FRM_SUPPORT) { + /* if the packet is too large we skip it but we + * still need to move the read pointer ahead + * and assume something is wrong with the read + * pointer as the firmware should be filtering + * these packets + */ + update_rd_ptr = bd_wr_ptr; + emac->stats.rx_length_errors++; + } else { + update_rd_ptr = bd_rd_ptr; + ret = icssm_emac_rx_packet(emac, &update_rd_ptr, + &pkt_info, rxqueue); + if (ret) + return used; + used++; + } + + /* after reading the buffer descriptor we clear it + * to prevent improperly moved read pointer errors + * from simply looking like old packets. + */ + writel(0, shared_ram + bd_rd_ptr); + + /* update read pointer in queue descriptor */ + writew(update_rd_ptr, &queue_desc->rd_ptr); + bd_rd_ptr = update_rd_ptr; + + /* all we have room for? */ + if (used >= budget) + return used; + } + } + + return used; +} + +static int icssm_emac_napi_poll(struct napi_struct *napi, int budget) +{ + struct prueth_emac *emac = container_of(napi, struct prueth_emac, napi); + int num_rx; + + num_rx = icssm_emac_rx_packets(emac, budget); + + if (num_rx < budget && napi_complete_done(napi, num_rx)) + enable_irq(emac->rx_irq); + + return num_rx; +} + +static int icssm_emac_set_boot_pru(struct prueth_emac *emac, + struct net_device *ndev) +{ + const struct prueth_firmware *pru_firmwares; + struct prueth *prueth = emac->prueth; + const char *fw_name; + int ret; + + pru_firmwares = &prueth->fw_data->fw_pru[emac->port_id - 1]; + fw_name = pru_firmwares->fw_name[prueth->eth_type]; + if (!fw_name) { + netdev_err(ndev, "eth_type %d not supported\n", + prueth->eth_type); + return -ENODEV; + } + + ret = rproc_set_firmware(emac->pru, fw_name); + if (ret) { + netdev_err(ndev, "failed to set %s firmware: %d\n", + fw_name, ret); + return ret; + } + + ret = rproc_boot(emac->pru); + if (ret) { + netdev_err(ndev, "failed to boot %s firmware: %d\n", + fw_name, ret); + return ret; + } + return ret; +} + +static int icssm_emac_request_irqs(struct prueth_emac *emac) +{ + struct net_device *ndev = emac->ndev; + int ret; + + ret = request_irq(emac->rx_irq, icssm_emac_rx_irq, + IRQF_TRIGGER_HIGH, + ndev->name, ndev); + if (ret) { + netdev_err(ndev, "unable to request RX IRQ\n"); + return ret; + } + + return ret; +} + +static void icssm_ptp_dram_init(struct prueth_emac *emac) +{ + void __iomem *sram = emac->prueth->mem[PRUETH_MEM_SHARED_RAM].va; + u64 temp64; + + writew(0, sram + MII_RX_CORRECTION_OFFSET); + writew(0, sram + MII_TX_CORRECTION_OFFSET); + + /* Initialize RCF to 1 (Linux N/A) */ + writel(1 * 1024, sram + TIMESYNC_TC_RCF_OFFSET); + + /* This flag will be set and cleared by firmware */ + /* Write Sync0 period for sync signal generation in PTP + * memory in shared RAM + */ + writel(200000000 / 50, sram + TIMESYNC_SYNC0_WIDTH_OFFSET); + + /* Write CMP1 period for sync signal generation in PTP + * memory in shared RAM + */ + temp64 = 1000000; + memcpy_toio(sram + TIMESYNC_CMP1_CMP_OFFSET, &temp64, sizeof(temp64)); + + /* Write Sync0 period for sync signal generation in PTP + * memory in shared RAM + */ + writel(1000000, sram + TIMESYNC_CMP1_PERIOD_OFFSET); + + /* Configures domainNumber list. Firmware supports 2 domains */ + writeb(0, sram + TIMESYNC_DOMAIN_NUMBER_LIST); + writeb(0, sram + TIMESYNC_DOMAIN_NUMBER_LIST + 1); + + /* Configure 1-step/2-step */ + writeb(1, sram + DISABLE_SWITCH_SYNC_RELAY_OFFSET); + + /* Configures the setting to Link local frame without HSR tag */ + writeb(0, sram + LINK_LOCAL_FRAME_HAS_HSR_TAG); + + /* Enable E2E/UDP PTP message timestamping */ + writeb(1, sram + PTP_IPV4_UDP_E2E_ENABLE); +} + +/** + * icssm_emac_ndo_open - EMAC device open + * @ndev: network adapter device + * + * Called when system wants to start the interface. + * + * Return: 0 for a successful open, or appropriate error code + */ +static int icssm_emac_ndo_open(struct net_device *ndev) +{ + struct prueth_emac *emac = netdev_priv(ndev); + struct prueth *prueth = emac->prueth; + int ret; + + /* set h/w MAC as user might have re-configured */ + ether_addr_copy(emac->mac_addr, ndev->dev_addr); + + if (!prueth->emac_configured) + icssm_prueth_init_ethernet_mode(prueth); + + icssm_prueth_emac_config(emac); + + if (!prueth->emac_configured) { + icssm_ptp_dram_init(emac); + ret = icss_iep_init(prueth->iep, NULL, NULL, 0); + if (ret) { + netdev_err(ndev, "Failed to initialize iep: %d\n", ret); + goto iep_exit; + } + } + + ret = icssm_emac_set_boot_pru(emac, ndev); + if (ret) + goto iep_exit; + + ret = icssm_emac_request_irqs(emac); + if (ret) + goto rproc_shutdown; + + napi_enable(&emac->napi); + + /* start PHY */ + phy_start(emac->phydev); + + /* enable the port and vlan */ + icssm_prueth_port_enable(emac, true); + + prueth->emac_configured |= BIT(emac->port_id); + + if (netif_msg_drv(emac)) + dev_notice(&ndev->dev, "started\n"); + + return 0; + +rproc_shutdown: + rproc_shutdown(emac->pru); + +iep_exit: + if (!prueth->emac_configured) + icss_iep_exit(prueth->iep); + + return ret; +} + +/** + * icssm_emac_ndo_stop - EMAC device stop + * @ndev: network adapter device + * + * Called when system wants to stop or down the interface. + * + * Return: Always 0 (Success) + */ +static int icssm_emac_ndo_stop(struct net_device *ndev) +{ + struct prueth_emac *emac = netdev_priv(ndev); + struct prueth *prueth = emac->prueth; + + prueth->emac_configured &= ~BIT(emac->port_id); + + /* disable the mac port */ + icssm_prueth_port_enable(emac, false); + + /* stop PHY */ + phy_stop(emac->phydev); + + napi_disable(&emac->napi); + hrtimer_cancel(&emac->tx_hrtimer); + + /* stop the PRU */ + rproc_shutdown(emac->pru); + + /* free rx interrupts */ + free_irq(emac->rx_irq, ndev); + + if (netif_msg_drv(emac)) + dev_notice(&ndev->dev, "stopped\n"); + + return 0; +} + +/* VLAN-tag PCP to priority queue map for EMAC/Switch/HSR/PRP used by driver + * Index is PCP val / 2. + * low - pcp 0..3 maps to Q4 for Host + * high - pcp 4..7 maps to Q3 for Host + * low - pcp 0..3 maps to Q2 (FWD Queue) for PRU-x + * where x = 1 for PRUETH_PORT_MII0 + * 0 for PRUETH_PORT_MII1 + * high - pcp 4..7 maps to Q1 (FWD Queue) for PRU-x + */ +static const unsigned short emac_pcp_tx_priority_queue_map[] = { + PRUETH_QUEUE4, PRUETH_QUEUE4, + PRUETH_QUEUE3, PRUETH_QUEUE3, + PRUETH_QUEUE2, PRUETH_QUEUE2, + PRUETH_QUEUE1, PRUETH_QUEUE1, +}; + +static u16 icssm_prueth_get_tx_queue_id(struct prueth *prueth, + struct sk_buff *skb) +{ + u16 vlan_tci, pcp; + int err; + + err = vlan_get_tag(skb, &vlan_tci); + if (likely(err)) + pcp = 0; + else + pcp = (vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; + + /* Below code (pcp >>= 1) is made common for all + * protocols (i.e., EMAC, RSTP, HSR and PRP)* + * pcp value 0,1 will be updated to 0 mapped to QUEUE4 + * pcp value 2,3 will be updated to 1 mapped to QUEUE4 + * pcp value 4,5 will be updated to 2 mapped to QUEUE3 + * pcp value 6,7 will be updated to 3 mapped to QUEUE3 + */ + pcp >>= 1; + + return emac_pcp_tx_priority_queue_map[pcp]; +} + +/** + * icssm_emac_ndo_start_xmit - EMAC Transmit function + * @skb: SKB pointer + * @ndev: EMAC network adapter + * + * Called by the system to transmit a packet - we queue the packet in + * EMAC hardware transmit queue + * + * Return: enum netdev_tx + */ +static enum netdev_tx icssm_emac_ndo_start_xmit(struct sk_buff *skb, + struct net_device *ndev) +{ + struct prueth_emac *emac = netdev_priv(ndev); + int ret; + u16 qid; + + qid = icssm_prueth_get_tx_queue_id(emac->prueth, skb); + ret = icssm_prueth_tx_enqueue(emac, skb, qid); + if (ret) { + if (ret != -ENOBUFS && netif_msg_tx_err(emac) && + net_ratelimit()) + netdev_err(ndev, "packet queue failed: %d\n", ret); + goto fail_tx; + } + + emac->stats.tx_packets++; + emac->stats.tx_bytes += skb->len; + dev_kfree_skb_any(skb); + + return NETDEV_TX_OK; + +fail_tx: + if (ret == -ENOBUFS) { + netif_stop_queue(ndev); + hrtimer_start(&emac->tx_hrtimer, + us_to_ktime(HR_TIMER_TX_DELAY_US), + HRTIMER_MODE_REL_PINNED); + ret = NETDEV_TX_BUSY; + } else { + /* error */ + emac->stats.tx_dropped++; + ret = NET_XMIT_DROP; + } + + return ret; +} + +/** + * icssm_emac_ndo_get_stats64 - EMAC get statistics function + * @ndev: The EMAC network adapter + * @stats: rtnl_link_stats structure + * + * Called when system wants to get statistics from the device. + * + */ +static void icssm_emac_ndo_get_stats64(struct net_device *ndev, + struct rtnl_link_stats64 *stats) +{ + struct prueth_emac *emac = netdev_priv(ndev); + + stats->rx_packets = emac->stats.rx_packets; + stats->rx_bytes = emac->stats.rx_bytes; + stats->tx_packets = emac->stats.tx_packets; + stats->tx_bytes = emac->stats.tx_bytes; + stats->tx_dropped = emac->stats.tx_dropped; + stats->rx_over_errors = emac->stats.rx_over_errors; + stats->rx_length_errors = emac->stats.rx_length_errors; +} + +static const struct net_device_ops emac_netdev_ops = { + .ndo_open = icssm_emac_ndo_open, + .ndo_stop = icssm_emac_ndo_stop, + .ndo_start_xmit = icssm_emac_ndo_start_xmit, + .ndo_get_stats64 = icssm_emac_ndo_get_stats64, +}; + +/* get emac_port corresponding to eth_node name */ +static int icssm_prueth_node_port(struct device_node *eth_node) +{ + u32 port_id; + int ret; + + ret = of_property_read_u32(eth_node, "reg", &port_id); + if (ret) + return ret; + + if (port_id == 0) + return PRUETH_PORT_MII0; + else if (port_id == 1) + return PRUETH_PORT_MII1; + else + return PRUETH_PORT_INVALID; +} + +/* get MAC instance corresponding to eth_node name */ +static int icssm_prueth_node_mac(struct device_node *eth_node) +{ + u32 port_id; + int ret; + + ret = of_property_read_u32(eth_node, "reg", &port_id); + if (ret) + return ret; + + if (port_id == 0) + return PRUETH_MAC0; + else if (port_id == 1) + return PRUETH_MAC1; + else + return PRUETH_MAC_INVALID; +} + +static enum hrtimer_restart icssm_emac_tx_timer_callback(struct hrtimer *timer) +{ + struct prueth_emac *emac = + container_of(timer, struct prueth_emac, tx_hrtimer); + + if (netif_queue_stopped(emac->ndev)) + netif_wake_queue(emac->ndev); + + return HRTIMER_NORESTART; +} + +static int icssm_prueth_netdev_init(struct prueth *prueth, + struct device_node *eth_node) +{ + struct prueth_emac *emac; + struct net_device *ndev; + enum prueth_port port; + enum prueth_mac mac; + int ret; + + port = icssm_prueth_node_port(eth_node); + if (port == PRUETH_PORT_INVALID) + return -EINVAL; + + mac = icssm_prueth_node_mac(eth_node); + if (mac == PRUETH_MAC_INVALID) + return -EINVAL; + + ndev = devm_alloc_etherdev(prueth->dev, sizeof(*emac)); + if (!ndev) + return -ENOMEM; + + SET_NETDEV_DEV(ndev, prueth->dev); + emac = netdev_priv(ndev); + prueth->emac[mac] = emac; + emac->prueth = prueth; + emac->ndev = ndev; + emac->port_id = port; + + /* by default eth_type is EMAC */ + switch (port) { + case PRUETH_PORT_MII0: + emac->tx_port_queue = PRUETH_PORT_QUEUE_MII0; + + /* packets from MII0 are on queues 1 through 2 */ + emac->rx_queue_start = PRUETH_QUEUE1; + emac->rx_queue_end = PRUETH_QUEUE2; + + emac->dram = PRUETH_MEM_DRAM0; + emac->pru = prueth->pru0; + break; + case PRUETH_PORT_MII1: + emac->tx_port_queue = PRUETH_PORT_QUEUE_MII1; + + /* packets from MII1 are on queues 3 through 4 */ + emac->rx_queue_start = PRUETH_QUEUE3; + emac->rx_queue_end = PRUETH_QUEUE4; + + emac->dram = PRUETH_MEM_DRAM1; + emac->pru = prueth->pru1; + break; + default: + return -EINVAL; + } + + emac->rx_irq = of_irq_get_byname(eth_node, "rx"); + if (emac->rx_irq < 0) { + ret = emac->rx_irq; + if (ret != -EPROBE_DEFER) + dev_err(prueth->dev, "could not get rx irq\n"); + goto free; + } + + /* get mac address from DT and set private and netdev addr */ + ret = of_get_ethdev_address(eth_node, ndev); + if (!is_valid_ether_addr(ndev->dev_addr)) { + eth_hw_addr_random(ndev); + dev_warn(prueth->dev, "port %d: using random MAC addr: %pM\n", + port, ndev->dev_addr); + } + ether_addr_copy(emac->mac_addr, ndev->dev_addr); + + /* connect PHY */ + emac->phydev = of_phy_get_and_connect(ndev, eth_node, + icssm_emac_adjust_link); + if (!emac->phydev) { + dev_dbg(prueth->dev, "PHY connection failed\n"); + ret = -ENODEV; + goto free; + } + + /* remove unsupported modes */ + phy_remove_link_mode(emac->phydev, ETHTOOL_LINK_MODE_10baseT_Full_BIT); + + phy_remove_link_mode(emac->phydev, ETHTOOL_LINK_MODE_10baseT_Half_BIT); + phy_remove_link_mode(emac->phydev, ETHTOOL_LINK_MODE_100baseT_Half_BIT); + + phy_remove_link_mode(emac->phydev, ETHTOOL_LINK_MODE_Pause_BIT); + phy_remove_link_mode(emac->phydev, ETHTOOL_LINK_MODE_Asym_Pause_BIT); + + ndev->dev.of_node = eth_node; + ndev->netdev_ops = &emac_netdev_ops; + + netif_napi_add(ndev, &emac->napi, icssm_emac_napi_poll); + + hrtimer_setup(&emac->tx_hrtimer, &icssm_emac_tx_timer_callback, + CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED); + + return 0; +free: + emac->ndev = NULL; + prueth->emac[mac] = NULL; + + return ret; +} + +static void icssm_prueth_netdev_exit(struct prueth *prueth, + struct device_node *eth_node) +{ + struct prueth_emac *emac; + enum prueth_mac mac; + + mac = icssm_prueth_node_mac(eth_node); + if (mac == PRUETH_MAC_INVALID) + return; + + emac = prueth->emac[mac]; + if (!emac) + return; + + phy_disconnect(emac->phydev); + + netif_napi_del(&emac->napi); + prueth->emac[mac] = NULL; +} + +static int icssm_prueth_probe(struct platform_device *pdev) +{ + struct device_node *eth0_node = NULL, *eth1_node = NULL; + struct device_node *eth_node, *eth_ports_node; + enum pruss_pru_id pruss_id0, pruss_id1; + struct device *dev = &pdev->dev; + struct device_node *np; + struct prueth *prueth; + struct pruss *pruss; + int i, ret; + + np = dev->of_node; + if (!np) + return -ENODEV; /* we don't support non DT */ + + prueth = devm_kzalloc(dev, sizeof(*prueth), GFP_KERNEL); + if (!prueth) + return -ENOMEM; + + platform_set_drvdata(pdev, prueth); + prueth->dev = dev; + prueth->fw_data = device_get_match_data(dev); + + eth_ports_node = of_get_child_by_name(np, "ethernet-ports"); + if (!eth_ports_node) + return -ENOENT; + + for_each_child_of_node(eth_ports_node, eth_node) { + u32 reg; + + if (strcmp(eth_node->name, "ethernet-port")) + continue; + ret = of_property_read_u32(eth_node, "reg", ®); + if (ret < 0) { + dev_err(dev, "%pOF error reading port_id %d\n", + eth_node, ret); + of_node_put(eth_node); + return ret; + } + + of_node_get(eth_node); + + if (reg == 0 && !eth0_node) { + eth0_node = eth_node; + if (!of_device_is_available(eth0_node)) { + of_node_put(eth0_node); + eth0_node = NULL; + } + } else if (reg == 1 && !eth1_node) { + eth1_node = eth_node; + if (!of_device_is_available(eth1_node)) { + of_node_put(eth1_node); + eth1_node = NULL; + } + } else { + if (reg == 0 || reg == 1) + dev_err(dev, "duplicate port reg value: %d\n", + reg); + else + dev_err(dev, "invalid port reg value: %d\n", + reg); + + of_node_put(eth_node); + } + } + + of_node_put(eth_ports_node); + + /* At least one node must be present and available else we fail */ + if (!eth0_node && !eth1_node) { + dev_err(dev, "neither port0 nor port1 node available\n"); + return -ENODEV; + } + + prueth->eth_node[PRUETH_MAC0] = eth0_node; + prueth->eth_node[PRUETH_MAC1] = eth1_node; + + prueth->mii_rt = syscon_regmap_lookup_by_phandle(np, "ti,mii-rt"); + if (IS_ERR(prueth->mii_rt)) { + dev_err(dev, "couldn't get mii-rt syscon regmap\n"); + ret = PTR_ERR(prueth->mii_rt); + goto put_eth; + } + + if (eth0_node) { + prueth->pru0 = pru_rproc_get(np, 0, &pruss_id0); + if (IS_ERR(prueth->pru0)) { + ret = PTR_ERR(prueth->pru0); + dev_err_probe(dev, ret, "unable to get PRU0"); + goto put_eth; + } + } + + if (eth1_node) { + prueth->pru1 = pru_rproc_get(np, 1, &pruss_id1); + if (IS_ERR(prueth->pru1)) { + ret = PTR_ERR(prueth->pru1); + dev_err_probe(dev, ret, "unable to get PRU1"); + goto put_pru0; + } + } + + pruss = pruss_get(prueth->pru0 ? prueth->pru0 : prueth->pru1); + if (IS_ERR(pruss)) { + ret = PTR_ERR(pruss); + dev_err(dev, "unable to get pruss handle\n"); + goto put_pru1; + } + prueth->pruss = pruss; + + /* Configure PRUSS */ + if (eth0_node) + pruss_cfg_gpimode(pruss, pruss_id0, PRUSS_GPI_MODE_MII); + if (eth1_node) + pruss_cfg_gpimode(pruss, pruss_id1, PRUSS_GPI_MODE_MII); + pruss_cfg_miirt_enable(pruss, true); + pruss_cfg_xfr_enable(pruss, PRU_TYPE_PRU, true); + + /* Get PRUSS mem resources */ + /* OCMC is system resource which we get separately */ + for (i = 0; i < ARRAY_SIZE(pruss_mem_ids); i++) { + /* skip appropriate DRAM if not required */ + if (!eth0_node && i == PRUETH_MEM_DRAM0) + continue; + + if (!eth1_node && i == PRUETH_MEM_DRAM1) + continue; + + ret = pruss_request_mem_region(pruss, pruss_mem_ids[i], + &prueth->mem[i]); + if (ret) { + dev_err(dev, "unable to get PRUSS resource %d: %d\n", + i, ret); + goto put_mem; + } + } + + prueth->sram_pool = of_gen_pool_get(np, "sram", 0); + if (!prueth->sram_pool) { + dev_err(dev, "unable to get SRAM pool\n"); + ret = -ENODEV; + goto put_mem; + } + + prueth->ocmc_ram_size = OCMC_RAM_SIZE; + /* Decreased by 8KB to address the reserved region for AM33x */ + if (prueth->fw_data->driver_data == PRUSS_AM33XX) + prueth->ocmc_ram_size = (SZ_64K - SZ_8K); + + prueth->mem[PRUETH_MEM_OCMC].va = + (void __iomem *)gen_pool_alloc(prueth->sram_pool, + prueth->ocmc_ram_size); + if (!prueth->mem[PRUETH_MEM_OCMC].va) { + dev_err(dev, "unable to allocate OCMC resource\n"); + ret = -ENOMEM; + goto put_mem; + } + prueth->mem[PRUETH_MEM_OCMC].pa = gen_pool_virt_to_phys + (prueth->sram_pool, (unsigned long) + prueth->mem[PRUETH_MEM_OCMC].va); + prueth->mem[PRUETH_MEM_OCMC].size = prueth->ocmc_ram_size; + dev_dbg(dev, "ocmc: pa %pa va %p size %#zx\n", + &prueth->mem[PRUETH_MEM_OCMC].pa, + prueth->mem[PRUETH_MEM_OCMC].va, + prueth->mem[PRUETH_MEM_OCMC].size); + + /* setup netdev interfaces */ + if (eth0_node) { + ret = icssm_prueth_netdev_init(prueth, eth0_node); + if (ret) { + if (ret != -EPROBE_DEFER) { + dev_err(dev, "netdev init %s failed: %d\n", + eth0_node->name, ret); + } + goto free_pool; + } + } + + if (eth1_node) { + ret = icssm_prueth_netdev_init(prueth, eth1_node); + if (ret) { + if (ret != -EPROBE_DEFER) { + dev_err(dev, "netdev init %s failed: %d\n", + eth1_node->name, ret); + } + goto netdev_exit; + } + } + + prueth->iep = icss_iep_get(np); + if (IS_ERR(prueth->iep)) { + ret = PTR_ERR(prueth->iep); + dev_err(dev, "unable to get IEP\n"); + goto netdev_exit; + } + + /* register the network devices */ + if (eth0_node) { + ret = register_netdev(prueth->emac[PRUETH_MAC0]->ndev); + if (ret) { + dev_err(dev, "can't register netdev for port MII0"); + goto iep_put; + } + + prueth->registered_netdevs[PRUETH_MAC0] = + prueth->emac[PRUETH_MAC0]->ndev; + } + + if (eth1_node) { + ret = register_netdev(prueth->emac[PRUETH_MAC1]->ndev); + if (ret) { + dev_err(dev, "can't register netdev for port MII1"); + goto netdev_unregister; + } + + prueth->registered_netdevs[PRUETH_MAC1] = + prueth->emac[PRUETH_MAC1]->ndev; + } + + dev_info(dev, "TI PRU ethernet driver initialized: %s EMAC mode\n", + (!eth0_node || !eth1_node) ? "single" : "dual"); + + if (eth1_node) + of_node_put(eth1_node); + if (eth0_node) + of_node_put(eth0_node); + return 0; + +netdev_unregister: + for (i = 0; i < PRUETH_NUM_MACS; i++) { + if (!prueth->registered_netdevs[i]) + continue; + unregister_netdev(prueth->registered_netdevs[i]); + } + +iep_put: + icss_iep_put(prueth->iep); + prueth->iep = NULL; + +netdev_exit: + for (i = 0; i < PRUETH_NUM_MACS; i++) { + eth_node = prueth->eth_node[i]; + if (!eth_node) + continue; + + icssm_prueth_netdev_exit(prueth, eth_node); + } + +free_pool: + gen_pool_free(prueth->sram_pool, + (unsigned long)prueth->mem[PRUETH_MEM_OCMC].va, + prueth->ocmc_ram_size); + +put_mem: + for (i = PRUETH_MEM_DRAM0; i < PRUETH_MEM_OCMC; i++) { + if (prueth->mem[i].va) + pruss_release_mem_region(pruss, &prueth->mem[i]); + } + pruss_put(prueth->pruss); + +put_pru1: + if (eth1_node) + pru_rproc_put(prueth->pru1); +put_pru0: + if (eth0_node) + pru_rproc_put(prueth->pru0); +put_eth: + of_node_put(eth1_node); + of_node_put(eth0_node); + + return ret; +} + +static void icssm_prueth_remove(struct platform_device *pdev) +{ + struct prueth *prueth = platform_get_drvdata(pdev); + struct device_node *eth_node; + int i; + + for (i = 0; i < PRUETH_NUM_MACS; i++) { + if (!prueth->registered_netdevs[i]) + continue; + unregister_netdev(prueth->registered_netdevs[i]); + } + + for (i = 0; i < PRUETH_NUM_MACS; i++) { + eth_node = prueth->eth_node[i]; + if (!eth_node) + continue; + + icssm_prueth_netdev_exit(prueth, eth_node); + of_node_put(eth_node); + } + + gen_pool_free(prueth->sram_pool, + (unsigned long)prueth->mem[PRUETH_MEM_OCMC].va, + prueth->ocmc_ram_size); + + for (i = PRUETH_MEM_DRAM0; i < PRUETH_MEM_OCMC; i++) { + if (prueth->mem[i].va) + pruss_release_mem_region(prueth->pruss, + &prueth->mem[i]); + } + + icss_iep_put(prueth->iep); + prueth->iep = NULL; + + pruss_put(prueth->pruss); + + if (prueth->eth_node[PRUETH_MAC0]) + pru_rproc_put(prueth->pru0); + if (prueth->eth_node[PRUETH_MAC1]) + pru_rproc_put(prueth->pru1); +} + +#ifdef CONFIG_PM_SLEEP +static int icssm_prueth_suspend(struct device *dev) +{ + struct prueth *prueth = dev_get_drvdata(dev); + struct net_device *ndev; + int i, ret; + + for (i = 0; i < PRUETH_NUM_MACS; i++) { + ndev = prueth->registered_netdevs[i]; + + if (!ndev) + continue; + + if (netif_running(ndev)) { + netif_device_detach(ndev); + ret = icssm_emac_ndo_stop(ndev); + if (ret < 0) { + netdev_err(ndev, "failed to stop: %d", ret); + return ret; + } + } + } + + return 0; +} + +static int icssm_prueth_resume(struct device *dev) +{ + struct prueth *prueth = dev_get_drvdata(dev); + struct net_device *ndev; + int i, ret; + + for (i = 0; i < PRUETH_NUM_MACS; i++) { + ndev = prueth->registered_netdevs[i]; + + if (!ndev) + continue; + + if (netif_running(ndev)) { + ret = icssm_emac_ndo_open(ndev); + if (ret < 0) { + netdev_err(ndev, "failed to start: %d", ret); + return ret; + } + netif_device_attach(ndev); + } + } + + return 0; +} + +#endif /* CONFIG_PM_SLEEP */ + +static const struct dev_pm_ops prueth_dev_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(icssm_prueth_suspend, icssm_prueth_resume) +}; + +/* AM335x SoC-specific firmware data */ +static struct prueth_private_data am335x_prueth_pdata = { + .driver_data = PRUSS_AM33XX, + .fw_pru[PRUSS_PRU0] = { + .fw_name[PRUSS_ETHTYPE_EMAC] = + "ti-pruss/am335x-pru0-prueth-fw.elf", + }, + .fw_pru[PRUSS_PRU1] = { + .fw_name[PRUSS_ETHTYPE_EMAC] = + "ti-pruss/am335x-pru1-prueth-fw.elf", + }, +}; + +/* AM437x SoC-specific firmware data */ +static struct prueth_private_data am437x_prueth_pdata = { + .driver_data = PRUSS_AM43XX, + .fw_pru[PRUSS_PRU0] = { + .fw_name[PRUSS_ETHTYPE_EMAC] = + "ti-pruss/am437x-pru0-prueth-fw.elf", + }, + .fw_pru[PRUSS_PRU1] = { + .fw_name[PRUSS_ETHTYPE_EMAC] = + "ti-pruss/am437x-pru1-prueth-fw.elf", + }, +}; + +/* AM57xx SoC-specific firmware data */ +static struct prueth_private_data am57xx_prueth_pdata = { + .driver_data = PRUSS_AM57XX, + .fw_pru[PRUSS_PRU0] = { + .fw_name[PRUSS_ETHTYPE_EMAC] = + "ti-pruss/am57xx-pru0-prueth-fw.elf", + }, + .fw_pru[PRUSS_PRU1] = { + .fw_name[PRUSS_ETHTYPE_EMAC] = + "ti-pruss/am57xx-pru1-prueth-fw.elf", + }, +}; + +static const struct of_device_id prueth_dt_match[] = { + { .compatible = "ti,am57-prueth", .data = &am57xx_prueth_pdata, }, + { .compatible = "ti,am4376-prueth", .data = &am437x_prueth_pdata, }, + { .compatible = "ti,am3359-prueth", .data = &am335x_prueth_pdata, }, + { /* sentinel */ } +}; +MODULE_DEVICE_TABLE(of, prueth_dt_match); + +static struct platform_driver prueth_driver = { + .probe = icssm_prueth_probe, + .remove = icssm_prueth_remove, + .driver = { + .name = "prueth", + .of_match_table = prueth_dt_match, + .pm = &prueth_dev_pm_ops, + }, +}; +module_platform_driver(prueth_driver); + +MODULE_AUTHOR("Roger Quadros <rogerq@ti.com>"); +MODULE_AUTHOR("Andrew F. Davis <afd@ti.com>"); +MODULE_DESCRIPTION("PRUSS ICSSM Ethernet Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ethernet/ti/icssm/icssm_prueth.h b/drivers/net/ethernet/ti/icssm/icssm_prueth.h new file mode 100644 index 000000000000..8e7e0af08144 --- /dev/null +++ b/drivers/net/ethernet/ti/icssm/icssm_prueth.h @@ -0,0 +1,262 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Texas Instruments ICSSM Ethernet driver + * + * Copyright (C) 2018-2022 Texas Instruments Incorporated - https://www.ti.com/ + * + */ + +#ifndef __NET_TI_PRUETH_H +#define __NET_TI_PRUETH_H + +#include <linux/phy.h> +#include <linux/types.h> +#include <linux/pruss_driver.h> +#include <linux/remoteproc/pruss.h> + +#include "icssm_switch.h" +#include "icssm_prueth_ptp.h" + +/* ICSSM size of redundancy tag */ +#define ICSSM_LRE_TAG_SIZE 6 + +/* PRUSS local memory map */ +#define ICSS_LOCAL_SHARED_RAM 0x00010000 +#define EMAC_MAX_PKTLEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN) +/* Below macro is for 1528 Byte Frame support, to Allow even with + * Redundancy tag + */ +#define EMAC_MAX_FRM_SUPPORT (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN + \ + ICSSM_LRE_TAG_SIZE) + +/* PRU Ethernet Type - Ethernet functionality (protocol + * implemented) provided by the PRU firmware being loaded. + */ +enum pruss_ethtype { + PRUSS_ETHTYPE_EMAC = 0, + PRUSS_ETHTYPE_HSR, + PRUSS_ETHTYPE_PRP, + PRUSS_ETHTYPE_SWITCH, + PRUSS_ETHTYPE_MAX, +}; + +#define PRUETH_IS_EMAC(p) ((p)->eth_type == PRUSS_ETHTYPE_EMAC) +#define PRUETH_IS_SWITCH(p) ((p)->eth_type == PRUSS_ETHTYPE_SWITCH) + +/** + * struct prueth_queue_desc - Queue descriptor + * @rd_ptr: Read pointer, points to a buffer descriptor in Shared PRU RAM. + * @wr_ptr: Write pointer, points to a buffer descriptor in Shared PRU RAM. + * @busy_s: Slave queue busy flag, set by slave(us) to request access from + * master(PRU). + * @status: Bit field status register, Bits: + * 0: Master queue busy flag. + * 1: Packet has been placed in collision queue. + * 2: Packet has been discarded due to overflow. + * @max_fill_level: Maximum queue usage seen. + * @overflow_cnt: Count of queue overflows. + * + * Each port has up to 4 queues with variable length. The queue is processed + * as ring buffer with read and write pointers. Both pointers are address + * pointers and increment by 4 for each buffer descriptor position. Queue has + * a length defined in constants and a status. + */ +struct prueth_queue_desc { + u16 rd_ptr; + u16 wr_ptr; + u8 busy_s; + u8 status; + u8 max_fill_level; + u8 overflow_cnt; +}; + +/** + * struct prueth_queue_info - Information about a queue in memory + * @buffer_offset: buffer offset in OCMC RAM + * @queue_desc_offset: queue descriptor offset in Shared RAM + * @buffer_desc_offset: buffer descriptors offset in Shared RAM + * @buffer_desc_end: end address of buffer descriptors in Shared RAM + */ +struct prueth_queue_info { + u16 buffer_offset; + u16 queue_desc_offset; + u16 buffer_desc_offset; + u16 buffer_desc_end; +}; + +/** + * struct prueth_packet_info - Info about a packet in buffer + * @shadow: this packet is stored in the collision queue + * @port: port packet is on + * @length: length of packet + * @broadcast: this packet is a broadcast packet + * @error: this packet has an error + * @lookup_success: src mac found in FDB + * @flood: packet is to be flooded + * @timestamp: Specifies if timestamp is appended to the packet + */ +struct prueth_packet_info { + bool shadow; + unsigned int port; + unsigned int length; + bool broadcast; + bool error; + bool lookup_success; + bool flood; + bool timestamp; +}; + +/* In switch mode there are 3 real ports i.e. 3 mac addrs. + * however Linux sees only the host side port. The other 2 ports + * are the switch ports. + * In emac mode there are 2 real ports i.e. 2 mac addrs. + * Linux sees both the ports. + */ +enum prueth_port { + PRUETH_PORT_HOST = 0, /* host side port */ + PRUETH_PORT_MII0, /* physical port MII 0 */ + PRUETH_PORT_MII1, /* physical port MII 1 */ + PRUETH_PORT_INVALID, /* Invalid prueth port */ +}; + +enum prueth_mac { + PRUETH_MAC0 = 0, + PRUETH_MAC1, + PRUETH_NUM_MACS, + PRUETH_MAC_INVALID, +}; + +/* In both switch & emac modes there are 3 port queues + * EMAC mode: + * RX packets for both MII0 & MII1 ports come on + * QUEUE_HOST. + * TX packets for MII0 go on QUEUE_MII0, TX packets + * for MII1 go on QUEUE_MII1. + * Switch mode: + * Host port RX packets come on QUEUE_HOST + * TX packets might have to go on MII0 or MII1 or both. + * MII0 TX queue is QUEUE_MII0 and MII1 TX queue is + * QUEUE_MII1. + */ +enum prueth_port_queue_id { + PRUETH_PORT_QUEUE_HOST = 0, + PRUETH_PORT_QUEUE_MII0, + PRUETH_PORT_QUEUE_MII1, + PRUETH_PORT_QUEUE_MAX, +}; + +/* Each port queue has 4 queues and 1 collision queue */ +enum prueth_queue_id { + PRUETH_QUEUE1 = 0, + PRUETH_QUEUE2, + PRUETH_QUEUE3, + PRUETH_QUEUE4, + PRUETH_COLQUEUE, /* collision queue */ +}; + +/** + * struct prueth_firmware - PRU Ethernet FW data + * @fw_name: firmware names of firmware to run on PRU + */ +struct prueth_firmware { + const char *fw_name[PRUSS_ETHTYPE_MAX]; +}; + +/* PRUeth memory range identifiers */ +enum prueth_mem { + PRUETH_MEM_DRAM0 = 0, + PRUETH_MEM_DRAM1, + PRUETH_MEM_SHARED_RAM, + PRUETH_MEM_OCMC, + PRUETH_MEM_MAX, +}; + +enum pruss_device { + PRUSS_AM57XX = 0, + PRUSS_AM43XX, + PRUSS_AM33XX, + PRUSS_K2G +}; + +/** + * struct prueth_private_data - PRU Ethernet private data + * @driver_data: PRU Ethernet device name + * @fw_pru: firmware names to be used for PRUSS ethernet usecases + */ +struct prueth_private_data { + enum pruss_device driver_data; + const struct prueth_firmware fw_pru[PRUSS_NUM_PRUS]; +}; + +struct prueth_emac_stats { + u64 tx_packets; + u64 tx_dropped; + u64 tx_bytes; + u64 rx_packets; + u64 rx_bytes; + u64 rx_length_errors; + u64 rx_over_errors; +}; + +/* data for each emac port */ +struct prueth_emac { + struct prueth *prueth; + struct net_device *ndev; + struct napi_struct napi; + + struct rproc *pru; + struct phy_device *phydev; + struct prueth_queue_desc __iomem *rx_queue_descs; + struct prueth_queue_desc __iomem *tx_queue_descs; + + int link; + int speed; + int duplex; + int rx_irq; + + enum prueth_port_queue_id tx_port_queue; + enum prueth_queue_id rx_queue_start; + enum prueth_queue_id rx_queue_end; + enum prueth_port port_id; + enum prueth_mem dram; + const char *phy_id; + u32 msg_enable; + u8 mac_addr[6]; + phy_interface_t phy_if; + + /* spin lock used to protect + * during link configuration + */ + spinlock_t lock; + + struct hrtimer tx_hrtimer; + struct prueth_emac_stats stats; +}; + +struct prueth { + struct device *dev; + struct pruss *pruss; + struct rproc *pru0, *pru1; + struct pruss_mem_region mem[PRUETH_MEM_MAX]; + struct gen_pool *sram_pool; + struct regmap *mii_rt; + struct icss_iep *iep; + + const struct prueth_private_data *fw_data; + struct prueth_fw_offsets *fw_offsets; + + struct device_node *eth_node[PRUETH_NUM_MACS]; + struct prueth_emac *emac[PRUETH_NUM_MACS]; + struct net_device *registered_netdevs[PRUETH_NUM_MACS]; + + unsigned int eth_type; + size_t ocmc_ram_size; + u8 emac_configured; +}; + +void icssm_parse_packet_info(struct prueth *prueth, u32 buffer_descriptor, + struct prueth_packet_info *pkt_info); +int icssm_emac_rx_packet(struct prueth_emac *emac, u16 *bd_rd_ptr, + struct prueth_packet_info *pkt_info, + const struct prueth_queue_info *rxqueue); + +#endif /* __NET_TI_PRUETH_H */ diff --git a/drivers/net/ethernet/ti/icssm/icssm_prueth_ptp.h b/drivers/net/ethernet/ti/icssm/icssm_prueth_ptp.h new file mode 100644 index 000000000000..e0bf692beda1 --- /dev/null +++ b/drivers/net/ethernet/ti/icssm/icssm_prueth_ptp.h @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020-2021 Texas Instruments Incorporated - https://www.ti.com + */ +#ifndef PRUETH_PTP_H +#define PRUETH_PTP_H + +#define RX_SYNC_TIMESTAMP_OFFSET_P1 0x8 /* 8 bytes */ +#define RX_PDELAY_REQ_TIMESTAMP_OFFSET_P1 0x14 /* 12 bytes */ + +#define DISABLE_PTP_FRAME_FORWARDING_CTRL_OFFSET 0x14 /* 1 byte */ + +#define RX_PDELAY_RESP_TIMESTAMP_OFFSET_P1 0x20 /* 12 bytes */ +#define RX_SYNC_TIMESTAMP_OFFSET_P2 0x2c /* 12 bytes */ +#define RX_PDELAY_REQ_TIMESTAMP_OFFSET_P2 0x38 /* 12 bytes */ +#define RX_PDELAY_RESP_TIMESTAMP_OFFSET_P2 0x44 /* 12 bytes */ +#define TIMESYNC_DOMAIN_NUMBER_LIST 0x50 /* 2 bytes */ +#define P1_SMA_LINE_DELAY_OFFSET 0x52 /* 4 bytes */ +#define P2_SMA_LINE_DELAY_OFFSET 0x56 /* 4 bytes */ +#define TIMESYNC_SECONDS_COUNT_OFFSET 0x5a /* 6 bytes */ +#define TIMESYNC_TC_RCF_OFFSET 0x60 /* 4 bytes */ +#define DUT_IS_MASTER_OFFSET 0x64 /* 1 byte */ +#define MASTER_PORT_NUM_OFFSET 0x65 /* 1 byte */ +#define SYNC_MASTER_MAC_OFFSET 0x66 /* 6 bytes */ +#define TX_TS_NOTIFICATION_OFFSET_SYNC_P1 0x6c /* 1 byte */ +#define TX_TS_NOTIFICATION_OFFSET_PDEL_REQ_P1 0x6d /* 1 byte */ +#define TX_TS_NOTIFICATION_OFFSET_PDEL_RES_P1 0x6e /* 1 byte */ +#define TX_TS_NOTIFICATION_OFFSET_SYNC_P2 0x6f /* 1 byte */ +#define TX_TS_NOTIFICATION_OFFSET_PDEL_REQ_P2 0x70 /* 1 byte */ +#define TX_TS_NOTIFICATION_OFFSET_PDEL_RES_P2 0x71 /* 1 byte */ +#define TX_SYNC_TIMESTAMP_OFFSET_P1 0x72 /* 12 bytes */ +#define TX_PDELAY_REQ_TIMESTAMP_OFFSET_P1 0x7e /* 12 bytes */ +#define TX_PDELAY_RESP_TIMESTAMP_OFFSET_P1 0x8a /* 12 bytes */ +#define TX_SYNC_TIMESTAMP_OFFSET_P2 0x96 /* 12 bytes */ +#define TX_PDELAY_REQ_TIMESTAMP_OFFSET_P2 0xa2 /* 12 bytes */ +#define TX_PDELAY_RESP_TIMESTAMP_OFFSET_P2 0xae /* 12 bytes */ +#define TIMESYNC_CTRL_VAR_OFFSET 0xba /* 1 byte */ +#define DISABLE_SWITCH_SYNC_RELAY_OFFSET 0xbb /* 1 byte */ +#define MII_RX_CORRECTION_OFFSET 0xbc /* 2 bytes */ +#define MII_TX_CORRECTION_OFFSET 0xbe /* 2 bytes */ +#define TIMESYNC_CMP1_CMP_OFFSET 0xc0 /* 8 bytes */ +#define TIMESYNC_SYNC0_CMP_OFFSET 0xc8 /* 8 bytes */ +#define TIMESYNC_CMP1_PERIOD_OFFSET 0xd0 /* 4 bytes */ +#define TIMESYNC_SYNC0_WIDTH_OFFSET 0xd4 /* 4 bytes */ +#define SINGLE_STEP_IEP_OFFSET_P1 0xd8 /* 8 bytes */ +#define SINGLE_STEP_SECONDS_OFFSET_P1 0xe0 /* 8 bytes */ +#define SINGLE_STEP_IEP_OFFSET_P2 0xe8 /* 8 bytes */ +#define SINGLE_STEP_SECONDS_OFFSET_P2 0xf0 /* 8 bytes */ +#define LINK_LOCAL_FRAME_HAS_HSR_TAG 0xf8 /* 1 bytes */ +#define PTP_PREV_TX_TIMESTAMP_P1 0xf9 /* 8 bytes */ +#define PTP_PREV_TX_TIMESTAMP_P2 0x101 /* 8 bytes */ +#define PTP_CLK_IDENTITY_OFFSET 0x109 /* 8 bytes */ +#define PTP_SCRATCH_MEM 0x111 /* 16 byte */ +#define PTP_IPV4_UDP_E2E_ENABLE 0x121 /* 1 byte */ + +enum { + PRUETH_PTP_SYNC, + PRUETH_PTP_DLY_REQ, + PRUETH_PTP_DLY_RESP, + PRUETH_PTP_TS_EVENTS, +}; + +#define PRUETH_PTP_TS_SIZE 12 +#define PRUETH_PTP_TS_NOTIFY_SIZE 1 +#define PRUETH_PTP_TS_NOTIFY_MASK 0xff + +/* Bit definitions for TIMESYNC_CTRL */ +#define TIMESYNC_CTRL_BG_ENABLE BIT(0) +#define TIMESYNC_CTRL_FORCED_2STEP BIT(1) + +static inline u32 icssm_prueth_tx_ts_offs_get(u8 port, u8 event) +{ + return TX_SYNC_TIMESTAMP_OFFSET_P1 + port * + PRUETH_PTP_TS_EVENTS * PRUETH_PTP_TS_SIZE + + event * PRUETH_PTP_TS_SIZE; +} + +static inline u32 icssm_prueth_tx_ts_notify_offs_get(u8 port, u8 event) +{ + return TX_TS_NOTIFICATION_OFFSET_SYNC_P1 + + PRUETH_PTP_TS_EVENTS * PRUETH_PTP_TS_NOTIFY_SIZE * port + + event * PRUETH_PTP_TS_NOTIFY_SIZE; +} + +#endif /* PRUETH_PTP_H */ diff --git a/drivers/net/ethernet/ti/icssm/icssm_switch.h b/drivers/net/ethernet/ti/icssm/icssm_switch.h new file mode 100644 index 000000000000..8b494ffdcde7 --- /dev/null +++ b/drivers/net/ethernet/ti/icssm/icssm_switch.h @@ -0,0 +1,257 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* Copyright (C) 2015-2021 Texas Instruments Incorporated - https://www.ti.com + */ + +#ifndef __ICSS_SWITCH_H +#define __ICSS_SWITCH_H + +/* Basic Switch Parameters + * Used to auto compute offset addresses on L3 OCMC RAM. Do not modify these + * without changing firmware accordingly + */ +#define SWITCH_BUFFER_SIZE (64 * 1024) /* L3 buffer */ +#define ICSS_BLOCK_SIZE 32 /* data bytes per BD */ +#define BD_SIZE 4 /* byte buffer descriptor */ +#define NUM_QUEUES 4 /* Queues on Port 0/1/2 */ + +#define PORT_LINK_MASK 0x1 +#define PORT_IS_HD_MASK 0x2 + +/* Physical Port queue size (number of BDs). Same for both ports */ +#define QUEUE_1_SIZE 97 /* Network Management high */ +#define QUEUE_2_SIZE 97 /* Network Management low */ +#define QUEUE_3_SIZE 97 /* Protocol specific */ +#define QUEUE_4_SIZE 97 /* NRT (IP,ARP, ICMP) */ + +/* Host queue size (number of BDs). Each BD points to data buffer of 32 bytes. + * HOST PORT QUEUES can buffer up to 4 full sized frames per queue + */ +#define HOST_QUEUE_1_SIZE 194 /* Protocol and VLAN priority 7 & 6 */ +#define HOST_QUEUE_2_SIZE 194 /* Protocol mid */ +#define HOST_QUEUE_3_SIZE 194 /* Protocol low */ +#define HOST_QUEUE_4_SIZE 194 /* NRT (IP, ARP, ICMP) */ + +#define COL_QUEUE_SIZE 0 + +/* NRT Buffer descriptor definition + * Each buffer descriptor points to a max 32 byte block and has 32 bit in size + * to have atomic operation. + * PRU can address bytewise into memory. + * Definition of 32 bit descriptor is as follows + * + * Bits Name Meaning + * ============================================================================= + * 0..7 Index points to index in buffer queue, max 256 x 32 + * byte blocks can be addressed + * 6 LookupSuccess For switch, FDB lookup was successful (source + * MAC address found in FDB). + * For RED, NodeTable lookup was successful. + * 7 Flood Packet should be flooded (destination MAC + * address found in FDB). For switch only. + * 8..12 Block_length number of valid bytes in this specific block. + * Will be <=32 bytes on last block of packet + * 13 More "More" bit indicating that there are more blocks + * 14 Shadow indicates that "index" is pointing into shadow + * buffer + * 15 TimeStamp indicates that this packet has time stamp in + * separate buffer - only needed if PTP runs on + * host + * 16..17 Port different meaning for ingress and egress, + * Ingress: Port = 0 indicates phy port 1 and + * Port = 1 indicates phy port 2. + * Egress: 0 sends on phy port 1 and 1 sends on + * phy port 2. Port = 2 goes over MAC table + * look-up + * 18..28 Length 11 bit of total packet length which is put into + * first BD only so that host access only one BD + * 29 VlanTag indicates that packet has Length/Type field of + * 0x08100 with VLAN tag in following byte + * 30 Broadcast indicates that packet goes out on both physical + * ports, there will be two bd but only one buffer + * 31 Error indicates there was an error in the packet + */ +#define PRUETH_BD_START_FLAG_MASK BIT(0) +#define PRUETH_BD_START_FLAG_SHIFT 0 + +#define PRUETH_BD_HSR_FRAME_MASK BIT(4) +#define PRUETH_BD_HSR_FRAME_SHIFT 4 + +#define PRUETH_BD_SUP_HSR_FRAME_MASK BIT(5) +#define PRUETH_BD_SUP_HSR_FRAME_SHIFT 5 + +#define PRUETH_BD_LOOKUP_SUCCESS_MASK BIT(6) +#define PRUETH_BD_LOOKUP_SUCCESS_SHIFT 6 + +#define PRUETH_BD_SW_FLOOD_MASK BIT(7) +#define PRUETH_BD_SW_FLOOD_SHIFT 7 + +#define PRUETH_BD_SHADOW_MASK BIT(14) +#define PRUETH_BD_SHADOW_SHIFT 14 + +#define PRUETH_BD_TIMESTAMP_MASK BIT(15) +#define PRUETH_BD_TIMESTAMP_SHIFT 15 + +#define PRUETH_BD_PORT_MASK GENMASK(17, 16) +#define PRUETH_BD_PORT_SHIFT 16 + +#define PRUETH_BD_LENGTH_MASK GENMASK(28, 18) +#define PRUETH_BD_LENGTH_SHIFT 18 + +#define PRUETH_BD_BROADCAST_MASK BIT(30) +#define PRUETH_BD_BROADCAST_SHIFT 30 + +#define PRUETH_BD_ERROR_MASK BIT(31) +#define PRUETH_BD_ERROR_SHIFT 31 + +/* The following offsets indicate which sections of the memory are used + * for EMAC internal tasks + */ +#define DRAM_START_OFFSET 0x1E98 +#define SRAM_START_OFFSET 0x400 + +/* General Purpose Statistics + * These are present on both PRU0 and PRU1 DRAM + */ +/* base statistics offset */ +#define STATISTICS_OFFSET 0x1F00 +#define STAT_SIZE 0x98 + +/* Offset for storing + * 1. Storm Prevention Params + * 2. PHY Speed Offset + * 3. Port Status Offset + * These are present on both PRU0 and PRU1 + */ +/* 4 bytes */ +#define STORM_PREVENTION_OFFSET_BC (STATISTICS_OFFSET + STAT_SIZE) +/* 4 bytes */ +#define PHY_SPEED_OFFSET (STATISTICS_OFFSET + STAT_SIZE + 4) +/* 1 byte */ +#define PORT_STATUS_OFFSET (STATISTICS_OFFSET + STAT_SIZE + 8) +/* 1 byte */ +#define COLLISION_COUNTER (STATISTICS_OFFSET + STAT_SIZE + 9) +/* 4 bytes */ +#define RX_PKT_SIZE_OFFSET (STATISTICS_OFFSET + STAT_SIZE + 10) +/* 4 bytes */ +#define PORT_CONTROL_ADDR (STATISTICS_OFFSET + STAT_SIZE + 14) +/* 6 bytes */ +#define PORT_MAC_ADDR (STATISTICS_OFFSET + STAT_SIZE + 18) +/* 1 byte */ +#define RX_INT_STATUS_OFFSET (STATISTICS_OFFSET + STAT_SIZE + 24) +/* 4 bytes */ +#define STORM_PREVENTION_OFFSET_MC (STATISTICS_OFFSET + STAT_SIZE + 25) +/* 4 bytes */ +#define STORM_PREVENTION_OFFSET_UC (STATISTICS_OFFSET + STAT_SIZE + 29) +/* 4 bytes ? */ +#define STP_INVALID_STATE_OFFSET (STATISTICS_OFFSET + STAT_SIZE + 33) + +/* DRAM Offsets for EMAC + * Present on Both DRAM0 and DRAM1 + */ + +/* 4 queue descriptors for port tx = 32 bytes */ +#define TX_CONTEXT_Q1_OFFSET_ADDR (PORT_QUEUE_DESC_OFFSET + 32) +#define PORT_QUEUE_DESC_OFFSET (ICSS_EMAC_TTS_CYC_TX_SOF + 8) + +/* EMAC Time Triggered Send Offsets */ +#define ICSS_EMAC_TTS_CYC_TX_SOF (ICSS_EMAC_TTS_PREV_TX_SOF + 8) +#define ICSS_EMAC_TTS_PREV_TX_SOF \ + (ICSS_EMAC_TTS_MISSED_CYCLE_CNT_OFFSET + 4) +#define ICSS_EMAC_TTS_MISSED_CYCLE_CNT_OFFSET (ICSS_EMAC_TTS_STATUS_OFFSET \ + + 4) +#define ICSS_EMAC_TTS_STATUS_OFFSET (ICSS_EMAC_TTS_CFG_TIME_OFFSET + 4) +#define ICSS_EMAC_TTS_CFG_TIME_OFFSET (ICSS_EMAC_TTS_CYCLE_PERIOD_OFFSET + 4) +#define ICSS_EMAC_TTS_CYCLE_PERIOD_OFFSET \ + (ICSS_EMAC_TTS_CYCLE_START_OFFSET + 8) +#define ICSS_EMAC_TTS_CYCLE_START_OFFSET ICSS_EMAC_TTS_BASE_OFFSET +#define ICSS_EMAC_TTS_BASE_OFFSET DRAM_START_OFFSET + +/* Shared RAM offsets for EMAC */ + +/* Queue Descriptors */ + +/* 4 queue descriptors for port 0 (host receive). 32 bytes */ +#define HOST_QUEUE_DESC_OFFSET (HOST_QUEUE_SIZE_ADDR + 16) + +/* table offset for queue size: + * 3 ports * 4 Queues * 1 byte offset = 12 bytes + */ +#define HOST_QUEUE_SIZE_ADDR (HOST_QUEUE_OFFSET_ADDR + 8) +/* table offset for queue: + * 4 Queues * 2 byte offset = 8 bytes + */ +#define HOST_QUEUE_OFFSET_ADDR (HOST_QUEUE_DESCRIPTOR_OFFSET_ADDR + 8) +/* table offset for Host queue descriptors: + * 1 ports * 4 Queues * 2 byte offset = 8 bytes + */ +#define HOST_QUEUE_DESCRIPTOR_OFFSET_ADDR (HOST_Q4_RX_CONTEXT_OFFSET + 8) + +/* Host Port Rx Context */ +#define HOST_Q4_RX_CONTEXT_OFFSET (HOST_Q3_RX_CONTEXT_OFFSET + 8) +#define HOST_Q3_RX_CONTEXT_OFFSET (HOST_Q2_RX_CONTEXT_OFFSET + 8) +#define HOST_Q2_RX_CONTEXT_OFFSET (HOST_Q1_RX_CONTEXT_OFFSET + 8) +#define HOST_Q1_RX_CONTEXT_OFFSET (EMAC_PROMISCUOUS_MODE_OFFSET + 4) + +/* Promiscuous mode control */ +#define EMAC_P1_PROMISCUOUS_BIT BIT(0) +#define EMAC_P2_PROMISCUOUS_BIT BIT(1) +#define EMAC_PROMISCUOUS_MODE_OFFSET (EMAC_RESERVED + 4) +#define EMAC_RESERVED EOF_48K_BUFFER_BD + +/* allow for max 48k buffer which spans the descriptors up to 0x1800 6kB */ +#define EOF_48K_BUFFER_BD (P0_BUFFER_DESC_OFFSET + HOST_BD_SIZE + \ + PORT_BD_SIZE) + +#define HOST_BD_SIZE ((HOST_QUEUE_1_SIZE + \ + HOST_QUEUE_2_SIZE + HOST_QUEUE_3_SIZE + \ + HOST_QUEUE_4_SIZE) * BD_SIZE) +#define PORT_BD_SIZE ((QUEUE_1_SIZE + QUEUE_2_SIZE + \ + QUEUE_3_SIZE + QUEUE_4_SIZE) * 2 * BD_SIZE) + +#define END_OF_BD_POOL (P2_Q4_BD_OFFSET + QUEUE_4_SIZE * BD_SIZE) +#define P2_Q4_BD_OFFSET (P2_Q3_BD_OFFSET + QUEUE_3_SIZE * BD_SIZE) +#define P2_Q3_BD_OFFSET (P2_Q2_BD_OFFSET + QUEUE_2_SIZE * BD_SIZE) +#define P2_Q2_BD_OFFSET (P2_Q1_BD_OFFSET + QUEUE_1_SIZE * BD_SIZE) +#define P2_Q1_BD_OFFSET (P1_Q4_BD_OFFSET + QUEUE_4_SIZE * BD_SIZE) +#define P1_Q4_BD_OFFSET (P1_Q3_BD_OFFSET + QUEUE_3_SIZE * BD_SIZE) +#define P1_Q3_BD_OFFSET (P1_Q2_BD_OFFSET + QUEUE_2_SIZE * BD_SIZE) +#define P1_Q2_BD_OFFSET (P1_Q1_BD_OFFSET + QUEUE_1_SIZE * BD_SIZE) +#define P1_Q1_BD_OFFSET (P0_Q4_BD_OFFSET + HOST_QUEUE_4_SIZE * BD_SIZE) +#define P0_Q4_BD_OFFSET (P0_Q3_BD_OFFSET + HOST_QUEUE_3_SIZE * BD_SIZE) +#define P0_Q3_BD_OFFSET (P0_Q2_BD_OFFSET + HOST_QUEUE_2_SIZE * BD_SIZE) +#define P0_Q2_BD_OFFSET (P0_Q1_BD_OFFSET + HOST_QUEUE_1_SIZE * BD_SIZE) +#define P0_Q1_BD_OFFSET P0_BUFFER_DESC_OFFSET +#define P0_BUFFER_DESC_OFFSET SRAM_START_OFFSET + +/* Memory Usage of L3 OCMC RAM */ + +/* L3 64KB Memory - mainly buffer Pool */ +#define END_OF_BUFFER_POOL (P2_Q4_BUFFER_OFFSET + QUEUE_4_SIZE * \ + ICSS_BLOCK_SIZE) +#define P2_Q4_BUFFER_OFFSET (P2_Q3_BUFFER_OFFSET + QUEUE_3_SIZE * \ + ICSS_BLOCK_SIZE) +#define P2_Q3_BUFFER_OFFSET (P2_Q2_BUFFER_OFFSET + QUEUE_2_SIZE * \ + ICSS_BLOCK_SIZE) +#define P2_Q2_BUFFER_OFFSET (P2_Q1_BUFFER_OFFSET + QUEUE_1_SIZE * \ + ICSS_BLOCK_SIZE) +#define P2_Q1_BUFFER_OFFSET (P1_Q4_BUFFER_OFFSET + QUEUE_4_SIZE * \ + ICSS_BLOCK_SIZE) +#define P1_Q4_BUFFER_OFFSET (P1_Q3_BUFFER_OFFSET + QUEUE_3_SIZE * \ + ICSS_BLOCK_SIZE) +#define P1_Q3_BUFFER_OFFSET (P1_Q2_BUFFER_OFFSET + QUEUE_2_SIZE * \ + ICSS_BLOCK_SIZE) +#define P1_Q2_BUFFER_OFFSET (P1_Q1_BUFFER_OFFSET + QUEUE_1_SIZE * \ + ICSS_BLOCK_SIZE) +#define P1_Q1_BUFFER_OFFSET (P0_Q4_BUFFER_OFFSET + HOST_QUEUE_4_SIZE * \ + ICSS_BLOCK_SIZE) +#define P0_Q4_BUFFER_OFFSET (P0_Q3_BUFFER_OFFSET + HOST_QUEUE_3_SIZE * \ + ICSS_BLOCK_SIZE) +#define P0_Q3_BUFFER_OFFSET (P0_Q2_BUFFER_OFFSET + HOST_QUEUE_2_SIZE * \ + ICSS_BLOCK_SIZE) +#define P0_Q2_BUFFER_OFFSET (P0_Q1_BUFFER_OFFSET + HOST_QUEUE_1_SIZE * \ + ICSS_BLOCK_SIZE) +#define P0_COL_BUFFER_OFFSET 0xEE00 +#define P0_Q1_BUFFER_OFFSET 0x0000 + +#endif /* __ICSS_SWITCH_H */ diff --git a/drivers/net/ethernet/wangxun/Kconfig b/drivers/net/ethernet/wangxun/Kconfig index 424ec3212128..d138dea7d208 100644 --- a/drivers/net/ethernet/wangxun/Kconfig +++ b/drivers/net/ethernet/wangxun/Kconfig @@ -20,6 +20,7 @@ config LIBWX tristate depends on PTP_1588_CLOCK_OPTIONAL select PAGE_POOL + select DIMLIB help Common library for Wangxun(R) Ethernet drivers. diff --git a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c index c12a4cb951f6..06f401bd975c 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.c @@ -303,6 +303,11 @@ int wx_get_coalesce(struct net_device *netdev, else ec->rx_coalesce_usecs = wx->rx_itr_setting >> 2; + if (wx->adaptive_itr) { + ec->use_adaptive_rx_coalesce = 1; + ec->use_adaptive_tx_coalesce = 1; + } + /* if in mixed tx/rx queues per vector mode, report only rx settings */ if (wx->q_vector[0]->tx.count && wx->q_vector[0]->rx.count) return 0; @@ -334,19 +339,28 @@ int wx_set_coalesce(struct net_device *netdev, return -EOPNOTSUPP; } - if (ec->tx_max_coalesced_frames_irq) - wx->tx_work_limit = ec->tx_max_coalesced_frames_irq; + if (ec->tx_max_coalesced_frames_irq > U16_MAX || + !ec->tx_max_coalesced_frames_irq) + return -EINVAL; + + wx->tx_work_limit = ec->tx_max_coalesced_frames_irq; switch (wx->mac.type) { case wx_mac_sp: max_eitr = WX_SP_MAX_EITR; + rx_itr_param = WX_20K_ITR; + tx_itr_param = WX_12K_ITR; break; case wx_mac_aml: case wx_mac_aml40: max_eitr = WX_AML_MAX_EITR; + rx_itr_param = WX_20K_ITR; + tx_itr_param = WX_12K_ITR; break; default: max_eitr = WX_EM_MAX_EITR; + rx_itr_param = WX_7K_ITR; + tx_itr_param = WX_7K_ITR; break; } @@ -354,36 +368,37 @@ int wx_set_coalesce(struct net_device *netdev, (ec->tx_coalesce_usecs > (max_eitr >> 2))) return -EINVAL; + if (ec->use_adaptive_rx_coalesce) { + wx->adaptive_itr = true; + wx->rx_itr_setting = 1; + wx->tx_itr_setting = 1; + return 0; + } + if (ec->rx_coalesce_usecs > 1) wx->rx_itr_setting = ec->rx_coalesce_usecs << 2; else wx->rx_itr_setting = ec->rx_coalesce_usecs; - if (wx->rx_itr_setting == 1) - rx_itr_param = WX_20K_ITR; - else - rx_itr_param = wx->rx_itr_setting; - if (ec->tx_coalesce_usecs > 1) wx->tx_itr_setting = ec->tx_coalesce_usecs << 2; else wx->tx_itr_setting = ec->tx_coalesce_usecs; - if (wx->tx_itr_setting == 1) { - switch (wx->mac.type) { - case wx_mac_sp: - case wx_mac_aml: - case wx_mac_aml40: - tx_itr_param = WX_12K_ITR; - break; - default: - tx_itr_param = WX_20K_ITR; - break; - } - } else { - tx_itr_param = wx->tx_itr_setting; + if (wx->adaptive_itr) { + wx->adaptive_itr = false; + wx->rx_itr_setting = rx_itr_param; + wx->tx_itr_setting = tx_itr_param; + } else if (wx->rx_itr_setting == 1 || wx->tx_itr_setting == 1) { + wx->adaptive_itr = true; } + if (wx->rx_itr_setting != 1) + rx_itr_param = wx->rx_itr_setting; + + if (wx->tx_itr_setting != 1) + tx_itr_param = wx->tx_itr_setting; + /* mixed Rx/Tx */ if (wx->q_vector[0]->tx.count && wx->q_vector[0]->rx.count) wx->tx_itr_setting = wx->rx_itr_setting; @@ -466,6 +481,142 @@ int wx_set_channels(struct net_device *dev, } EXPORT_SYMBOL(wx_set_channels); +u32 wx_rss_indir_size(struct net_device *netdev) +{ + struct wx *wx = netdev_priv(netdev); + + return wx_rss_indir_tbl_entries(wx); +} +EXPORT_SYMBOL(wx_rss_indir_size); + +u32 wx_get_rxfh_key_size(struct net_device *netdev) +{ + return WX_RSS_KEY_SIZE; +} +EXPORT_SYMBOL(wx_get_rxfh_key_size); + +static void wx_get_reta(struct wx *wx, u32 *indir) +{ + u32 reta_size = wx_rss_indir_tbl_entries(wx); + u16 rss_m = wx->ring_feature[RING_F_RSS].mask; + + if (test_bit(WX_FLAG_SRIOV_ENABLED, wx->flags)) + rss_m = wx->ring_feature[RING_F_RSS].indices - 1; + + for (u32 i = 0; i < reta_size; i++) + indir[i] = wx->rss_indir_tbl[i] & rss_m; +} + +int wx_get_rxfh(struct net_device *netdev, + struct ethtool_rxfh_param *rxfh) +{ + struct wx *wx = netdev_priv(netdev); + + rxfh->hfunc = ETH_RSS_HASH_TOP; + + if (rxfh->indir) + wx_get_reta(wx, rxfh->indir); + + if (rxfh->key) + memcpy(rxfh->key, wx->rss_key, WX_RSS_KEY_SIZE); + + return 0; +} +EXPORT_SYMBOL(wx_get_rxfh); + +int wx_set_rxfh(struct net_device *netdev, + struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack) +{ + struct wx *wx = netdev_priv(netdev); + u32 reta_entries, i; + + if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && + rxfh->hfunc != ETH_RSS_HASH_TOP) + return -EOPNOTSUPP; + + reta_entries = wx_rss_indir_tbl_entries(wx); + /* Fill out the redirection table */ + if (rxfh->indir) { + for (i = 0; i < reta_entries; i++) + wx->rss_indir_tbl[i] = rxfh->indir[i]; + + wx_store_reta(wx); + } + + /* Fill out the rss hash key */ + if (rxfh->key) { + memcpy(wx->rss_key, rxfh->key, WX_RSS_KEY_SIZE); + wx_store_rsskey(wx); + } + + return 0; +} +EXPORT_SYMBOL(wx_set_rxfh); + +static const struct wx_rss_flow_map rss_flow_table[] = { + { TCP_V4_FLOW, RXH_L4_B_0_1 | RXH_L4_B_2_3, WX_RSS_FIELD_IPV4_TCP }, + { TCP_V6_FLOW, RXH_L4_B_0_1 | RXH_L4_B_2_3, WX_RSS_FIELD_IPV6_TCP }, + { UDP_V4_FLOW, RXH_L4_B_0_1 | RXH_L4_B_2_3, WX_RSS_FIELD_IPV4_UDP }, + { UDP_V6_FLOW, RXH_L4_B_0_1 | RXH_L4_B_2_3, WX_RSS_FIELD_IPV6_UDP }, + { SCTP_V4_FLOW, RXH_L4_B_0_1 | RXH_L4_B_2_3, WX_RSS_FIELD_IPV4_SCTP }, + { SCTP_V6_FLOW, RXH_L4_B_0_1 | RXH_L4_B_2_3, WX_RSS_FIELD_IPV6_SCTP }, +}; + +int wx_get_rxfh_fields(struct net_device *dev, + struct ethtool_rxfh_fields *nfc) +{ + struct wx *wx = netdev_priv(dev); + + nfc->data = RXH_IP_SRC | RXH_IP_DST; + + for (u32 i = 0; i < ARRAY_SIZE(rss_flow_table); i++) { + const struct wx_rss_flow_map *entry = &rss_flow_table[i]; + + if (entry->flow_type == nfc->flow_type) { + if (wx->rss_flags & entry->flag) + nfc->data |= entry->data; + break; + } + } + + return 0; +} +EXPORT_SYMBOL(wx_get_rxfh_fields); + +int wx_set_rxfh_fields(struct net_device *dev, + const struct ethtool_rxfh_fields *nfc, + struct netlink_ext_ack *extack) +{ + struct wx *wx = netdev_priv(dev); + u8 flags = wx->rss_flags; + + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST)) + return -EINVAL; + + for (u32 i = 0; i < ARRAY_SIZE(rss_flow_table); i++) { + const struct wx_rss_flow_map *entry = &rss_flow_table[i]; + + if (entry->flow_type == nfc->flow_type) { + if (nfc->data & entry->data) + flags |= entry->flag; + else + flags &= ~entry->flag; + + if (flags != wx->rss_flags) { + wx->rss_flags = flags; + wx_config_rss_field(wx); + } + + return 0; + } + } + + return -EINVAL; +} +EXPORT_SYMBOL(wx_set_rxfh_fields); + u32 wx_get_msglevel(struct net_device *netdev) { struct wx *wx = netdev_priv(netdev); @@ -531,3 +682,36 @@ void wx_get_ptp_stats(struct net_device *dev, } } EXPORT_SYMBOL(wx_get_ptp_stats); + +static int wx_get_link_ksettings_vf(struct net_device *netdev, + struct ethtool_link_ksettings *cmd) +{ + struct wx *wx = netdev_priv(netdev); + + ethtool_link_ksettings_zero_link_mode(cmd, supported); + cmd->base.autoneg = AUTONEG_DISABLE; + cmd->base.port = PORT_NONE; + cmd->base.duplex = DUPLEX_FULL; + cmd->base.speed = wx->speed; + + return 0; +} + +static const struct ethtool_ops wx_ethtool_ops_vf = { + .supported_coalesce_params = ETHTOOL_COALESCE_USECS | + ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ | + ETHTOOL_COALESCE_USE_ADAPTIVE, + .get_drvinfo = wx_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_ringparam = wx_get_ringparam, + .get_msglevel = wx_get_msglevel, + .get_coalesce = wx_get_coalesce, + .get_ts_info = ethtool_op_get_ts_info, + .get_link_ksettings = wx_get_link_ksettings_vf, +}; + +void wx_set_ethtool_ops_vf(struct net_device *netdev) +{ + netdev->ethtool_ops = &wx_ethtool_ops_vf; +} +EXPORT_SYMBOL(wx_set_ethtool_ops_vf); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.h b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.h index 9e002e699eca..727093970462 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_ethtool.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_ethtool.h @@ -38,10 +38,23 @@ void wx_get_channels(struct net_device *dev, struct ethtool_channels *ch); int wx_set_channels(struct net_device *dev, struct ethtool_channels *ch); +u32 wx_rss_indir_size(struct net_device *netdev); +u32 wx_get_rxfh_key_size(struct net_device *netdev); +int wx_get_rxfh(struct net_device *netdev, + struct ethtool_rxfh_param *rxfh); +int wx_set_rxfh(struct net_device *netdev, + struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack); +int wx_get_rxfh_fields(struct net_device *dev, + struct ethtool_rxfh_fields *cmd); +int wx_set_rxfh_fields(struct net_device *dev, + const struct ethtool_rxfh_fields *nfc, + struct netlink_ext_ack *extack); u32 wx_get_msglevel(struct net_device *netdev); void wx_set_msglevel(struct net_device *netdev, u32 data); int wx_get_ts_info(struct net_device *dev, struct kernel_ethtool_ts_info *info); void wx_get_ptp_stats(struct net_device *dev, struct ethtool_ts_stats *ts_stats); +void wx_set_ethtool_ops_vf(struct net_device *netdev); #endif /* _WX_ETHTOOL_H_ */ diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c index bcd07a715752..1e2713f0c921 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c @@ -1998,8 +1998,17 @@ static void wx_restore_vlan(struct wx *wx) wx_vlan_rx_add_vid(wx->netdev, htons(ETH_P_8021Q), vid); } -static void wx_store_reta(struct wx *wx) +u32 wx_rss_indir_tbl_entries(struct wx *wx) { + if (test_bit(WX_FLAG_SRIOV_ENABLED, wx->flags)) + return 64; + else + return 128; +} + +void wx_store_reta(struct wx *wx) +{ + u32 reta_entries = wx_rss_indir_tbl_entries(wx); u8 *indir_tbl = wx->rss_indir_tbl; u32 reta = 0; u32 i; @@ -2007,45 +2016,110 @@ static void wx_store_reta(struct wx *wx) /* Fill out the redirection table as follows: * - 8 bit wide entries containing 4 bit RSS index */ - for (i = 0; i < WX_MAX_RETA_ENTRIES; i++) { + for (i = 0; i < reta_entries; i++) { reta |= indir_tbl[i] << (i & 0x3) * 8; if ((i & 3) == 3) { - wr32(wx, WX_RDB_RSSTBL(i >> 2), reta); + if (test_bit(WX_FLAG_SRIOV_ENABLED, wx->flags) && + test_bit(WX_FLAG_MULTI_64_FUNC, wx->flags)) + wr32(wx, WX_RDB_VMRSSTBL(i >> 2, wx->num_vfs), reta); + else + wr32(wx, WX_RDB_RSSTBL(i >> 2), reta); reta = 0; } } } -static void wx_setup_reta(struct wx *wx) +void wx_store_rsskey(struct wx *wx) { - u16 rss_i = wx->ring_feature[RING_F_RSS].indices; - u32 random_key_size = WX_RSS_KEY_SIZE / 4; - u32 i, j; + u32 key_size = WX_RSS_KEY_SIZE / 4; + u32 i; - if (test_bit(WX_FLAG_SRIOV_ENABLED, wx->flags)) { - if (wx->mac.type == wx_mac_em) - rss_i = 1; - else - rss_i = rss_i < 4 ? 4 : rss_i; + if (test_bit(WX_FLAG_SRIOV_ENABLED, wx->flags) && + test_bit(WX_FLAG_MULTI_64_FUNC, wx->flags)) { + for (i = 0; i < key_size; i++) + wr32(wx, WX_RDB_VMRSSRK(i, wx->num_vfs), + wx->rss_key[i]); + } else { + for (i = 0; i < key_size; i++) + wr32(wx, WX_RDB_RSSRK(i), wx->rss_key[i]); } +} +static void wx_setup_reta(struct wx *wx) +{ /* Fill out hash function seeds */ - for (i = 0; i < random_key_size; i++) - wr32(wx, WX_RDB_RSSRK(i), wx->rss_key[i]); + wx_store_rsskey(wx); /* Fill out redirection table */ - memset(wx->rss_indir_tbl, 0, sizeof(wx->rss_indir_tbl)); + if (!netif_is_rxfh_configured(wx->netdev)) { + u16 rss_i = wx->ring_feature[RING_F_RSS].indices; + u32 reta_entries = wx_rss_indir_tbl_entries(wx); + u32 i, j; + + memset(wx->rss_indir_tbl, 0, sizeof(wx->rss_indir_tbl)); + + if (test_bit(WX_FLAG_SRIOV_ENABLED, wx->flags)) { + if (test_bit(WX_FLAG_MULTI_64_FUNC, wx->flags)) + rss_i = rss_i < 2 ? 2 : rss_i; + else + rss_i = 1; + } - for (i = 0, j = 0; i < WX_MAX_RETA_ENTRIES; i++, j++) { - if (j == rss_i) - j = 0; + for (i = 0, j = 0; i < reta_entries; i++, j++) { + if (j == rss_i) + j = 0; - wx->rss_indir_tbl[i] = j; + wx->rss_indir_tbl[i] = j; + } } wx_store_reta(wx); } +void wx_config_rss_field(struct wx *wx) +{ + u32 rss_field; + + if (test_bit(WX_FLAG_SRIOV_ENABLED, wx->flags) && + test_bit(WX_FLAG_MULTI_64_FUNC, wx->flags)) { + rss_field = rd32(wx, WX_RDB_PL_CFG(wx->num_vfs)); + rss_field &= ~WX_RDB_PL_CFG_RSS_MASK; + rss_field |= FIELD_PREP(WX_RDB_PL_CFG_RSS_MASK, wx->rss_flags); + wr32(wx, WX_RDB_PL_CFG(wx->num_vfs), rss_field); + + /* Enable global RSS and multiple RSS to make the RSS + * field of each pool take effect. + */ + wr32m(wx, WX_RDB_RA_CTL, + WX_RDB_RA_CTL_MULTI_RSS | WX_RDB_RA_CTL_RSS_EN, + WX_RDB_RA_CTL_MULTI_RSS | WX_RDB_RA_CTL_RSS_EN); + } else { + rss_field = rd32(wx, WX_RDB_RA_CTL); + rss_field &= ~WX_RDB_RA_CTL_RSS_MASK; + rss_field |= FIELD_PREP(WX_RDB_RA_CTL_RSS_MASK, wx->rss_flags); + wr32(wx, WX_RDB_RA_CTL, rss_field); + } +} + +void wx_enable_rss(struct wx *wx, bool enable) +{ + if (test_bit(WX_FLAG_SRIOV_ENABLED, wx->flags) && + test_bit(WX_FLAG_MULTI_64_FUNC, wx->flags)) { + if (enable) + wr32m(wx, WX_RDB_PL_CFG(wx->num_vfs), + WX_RDB_PL_CFG_RSS_EN, WX_RDB_PL_CFG_RSS_EN); + else + wr32m(wx, WX_RDB_PL_CFG(wx->num_vfs), + WX_RDB_PL_CFG_RSS_EN, 0); + } else { + if (enable) + wr32m(wx, WX_RDB_RA_CTL, WX_RDB_RA_CTL_RSS_EN, + WX_RDB_RA_CTL_RSS_EN); + else + wr32m(wx, WX_RDB_RA_CTL, WX_RDB_RA_CTL_RSS_EN, 0); + } +} + #define WX_RDB_RSS_PL_2 FIELD_PREP(GENMASK(31, 29), 1) #define WX_RDB_RSS_PL_4 FIELD_PREP(GENMASK(31, 29), 2) static void wx_setup_psrtype(struct wx *wx) @@ -2076,31 +2150,12 @@ static void wx_setup_psrtype(struct wx *wx) static void wx_setup_mrqc(struct wx *wx) { - u32 rss_field = 0; - - /* VT, and RSS do not coexist at the same time */ - if (test_bit(WX_FLAG_VMDQ_ENABLED, wx->flags)) - return; - /* Disable indicating checksum in descriptor, enables RSS hash */ wr32m(wx, WX_PSR_CTL, WX_PSR_CTL_PCSD, WX_PSR_CTL_PCSD); - /* Perform hash on these packet types */ - rss_field = WX_RDB_RA_CTL_RSS_IPV4 | - WX_RDB_RA_CTL_RSS_IPV4_TCP | - WX_RDB_RA_CTL_RSS_IPV4_UDP | - WX_RDB_RA_CTL_RSS_IPV6 | - WX_RDB_RA_CTL_RSS_IPV6_TCP | - WX_RDB_RA_CTL_RSS_IPV6_UDP; - - netdev_rss_key_fill(wx->rss_key, sizeof(wx->rss_key)); - + wx_config_rss_field(wx); + wx_enable_rss(wx, wx->rss_enabled); wx_setup_reta(wx); - - if (wx->rss_enabled) - rss_field |= WX_RDB_RA_CTL_RSS_EN; - - wr32(wx, WX_RDB_RA_CTL, rss_field); } /** @@ -2393,6 +2448,8 @@ int wx_sw_init(struct wx *wx) wx_err(wx, "rss key allocation failed\n"); return err; } + wx->rss_flags = WX_RSS_FIELD_IPV4 | WX_RSS_FIELD_IPV4_TCP | + WX_RSS_FIELD_IPV6 | WX_RSS_FIELD_IPV6_TCP; wx->mac_table = kcalloc(wx->mac.num_rar_entries, sizeof(struct wx_mac_addr), diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.h b/drivers/net/ethernet/wangxun/libwx/wx_hw.h index 2393a743b564..13857376bbad 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_hw.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.h @@ -39,6 +39,11 @@ void wx_set_rx_mode(struct net_device *netdev); int wx_change_mtu(struct net_device *netdev, int new_mtu); void wx_disable_rx_queue(struct wx *wx, struct wx_ring *ring); void wx_enable_rx_queue(struct wx *wx, struct wx_ring *ring); +u32 wx_rss_indir_tbl_entries(struct wx *wx); +void wx_store_reta(struct wx *wx); +void wx_store_rsskey(struct wx *wx); +void wx_config_rss_field(struct wx *wx); +void wx_enable_rss(struct wx *wx, bool enable); void wx_configure_rx(struct wx *wx); void wx_configure(struct wx *wx); void wx_start_hw(struct wx *wx); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c index 723785ef87bb..3adf7048320a 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c @@ -16,6 +16,7 @@ #include "wx_lib.h" #include "wx_ptp.h" #include "wx_hw.h" +#include "wx_vf_lib.h" /* Lookup table mapping the HW PTYPE to the bit field for decoding */ static struct wx_dec_ptype wx_ptype_lookup[256] = { @@ -832,6 +833,36 @@ static bool wx_clean_tx_irq(struct wx_q_vector *q_vector, return !!budget; } +static void wx_update_rx_dim_sample(struct wx_q_vector *q_vector) +{ + struct dim_sample sample = {}; + + dim_update_sample(q_vector->total_events, + q_vector->rx.total_packets, + q_vector->rx.total_bytes, + &sample); + + net_dim(&q_vector->rx.dim, &sample); +} + +static void wx_update_tx_dim_sample(struct wx_q_vector *q_vector) +{ + struct dim_sample sample = {}; + + dim_update_sample(q_vector->total_events, + q_vector->tx.total_packets, + q_vector->tx.total_bytes, + &sample); + + net_dim(&q_vector->tx.dim, &sample); +} + +static void wx_update_dim_sample(struct wx_q_vector *q_vector) +{ + wx_update_rx_dim_sample(q_vector); + wx_update_tx_dim_sample(q_vector); +} + /** * wx_poll - NAPI polling RX/TX cleanup routine * @napi: napi struct with our devices info in it @@ -878,6 +909,8 @@ static int wx_poll(struct napi_struct *napi, int budget) /* all work done, exit the polling mode */ if (likely(napi_complete_done(napi, work_done))) { + if (wx->adaptive_itr) + wx_update_dim_sample(q_vector); if (netif_running(wx->netdev)) wx_intr_enable(wx, WX_INTR_Q(q_vector->v_idx)); } @@ -1591,6 +1624,65 @@ netdev_tx_t wx_xmit_frame(struct sk_buff *skb, } EXPORT_SYMBOL(wx_xmit_frame); +static void wx_set_itr(struct wx_q_vector *q_vector) +{ + struct wx *wx = q_vector->wx; + u32 new_itr; + + if (!wx->adaptive_itr) + return; + + /* use the smallest value of new ITR delay calculations */ + new_itr = min(q_vector->rx.itr, q_vector->tx.itr); + new_itr <<= 2; + + if (new_itr != q_vector->itr) { + /* save the algorithm value here */ + q_vector->itr = new_itr; + + if (wx->pdev->is_virtfn) + wx_write_eitr_vf(q_vector); + else + wx_write_eitr(q_vector); + } +} + +static void wx_rx_dim_work(struct work_struct *work) +{ + struct dim *dim = container_of(work, struct dim, work); + struct dim_cq_moder rx_moder; + struct wx_ring_container *rx; + struct wx_q_vector *q_vector; + + rx = container_of(dim, struct wx_ring_container, dim); + + rx_moder = net_dim_get_rx_moderation(dim->mode, dim->profile_ix); + rx->itr = rx_moder.usec; + + q_vector = container_of(rx, struct wx_q_vector, rx); + wx_set_itr(q_vector); + + dim->state = DIM_START_MEASURE; +} + +static void wx_tx_dim_work(struct work_struct *work) +{ + struct dim *dim = container_of(work, struct dim, work); + struct dim_cq_moder tx_moder; + struct wx_ring_container *tx; + struct wx_q_vector *q_vector; + + tx = container_of(dim, struct wx_ring_container, dim); + + tx_moder = net_dim_get_tx_moderation(dim->mode, dim->profile_ix); + tx->itr = tx_moder.usec; + + q_vector = container_of(tx, struct wx_q_vector, tx); + wx_set_itr(q_vector); + + dim->state = DIM_START_MEASURE; +} + void wx_napi_enable_all(struct wx *wx) { struct wx_q_vector *q_vector; @@ -1598,6 +1690,11 @@ void wx_napi_enable_all(struct wx *wx) for (q_idx = 0; q_idx < wx->num_q_vectors; q_idx++) { q_vector = wx->q_vector[q_idx]; + + INIT_WORK(&q_vector->rx.dim.work, wx_rx_dim_work); + INIT_WORK(&q_vector->tx.dim.work, wx_tx_dim_work); + q_vector->rx.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE; + q_vector->tx.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_CQE; napi_enable(&q_vector->napi); } } @@ -1611,6 +1708,8 @@ void wx_napi_disable_all(struct wx *wx) for (q_idx = 0; q_idx < wx->num_q_vectors; q_idx++) { q_vector = wx->q_vector[q_idx]; napi_disable(&q_vector->napi); + disable_work_sync(&q_vector->rx.dim.work); + disable_work_sync(&q_vector->tx.dim.work); } } EXPORT_SYMBOL(wx_napi_disable_all); @@ -2197,8 +2296,10 @@ irqreturn_t wx_msix_clean_rings(int __always_unused irq, void *data) struct wx_q_vector *q_vector = data; /* EIAM disabled interrupts (on this vector) for us */ - if (q_vector->rx.ring || q_vector->tx.ring) + if (q_vector->rx.ring || q_vector->tx.ring) { napi_schedule_irqoff(&q_vector->napi); + q_vector->total_events++; + } return IRQ_HANDLED; } @@ -2915,14 +3016,8 @@ int wx_set_features(struct net_device *netdev, netdev_features_t features) struct wx *wx = netdev_priv(netdev); bool need_reset = false; - if (features & NETIF_F_RXHASH) { - wr32m(wx, WX_RDB_RA_CTL, WX_RDB_RA_CTL_RSS_EN, - WX_RDB_RA_CTL_RSS_EN); - wx->rss_enabled = true; - } else { - wr32m(wx, WX_RDB_RA_CTL, WX_RDB_RA_CTL_RSS_EN, 0); - wx->rss_enabled = false; - } + wx->rss_enabled = !!(features & NETIF_F_RXHASH); + wx_enable_rss(wx, wx->rss_enabled); netdev->features = features; diff --git a/drivers/net/ethernet/wangxun/libwx/wx_sriov.c b/drivers/net/ethernet/wangxun/libwx/wx_sriov.c index c82ae137756c..c6d158cd70da 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_sriov.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_sriov.c @@ -150,6 +150,12 @@ static int wx_pci_sriov_enable(struct pci_dev *dev, struct wx *wx = pci_get_drvdata(dev); int err = 0, i; + if (netif_is_rxfh_configured(wx->netdev)) { + wx_err(wx, "Cannot enable SR-IOV while RXFH is configured\n"); + wx_err(wx, "Run 'ethtool -X <if> default' to reset RSS table\n"); + return -EBUSY; + } + err = __wx_enable_sriov(wx, num_vfs); if (err) return err; @@ -173,12 +179,20 @@ err_out: return err; } -static void wx_pci_sriov_disable(struct pci_dev *dev) +static int wx_pci_sriov_disable(struct pci_dev *dev) { struct wx *wx = pci_get_drvdata(dev); + if (netif_is_rxfh_configured(wx->netdev)) { + wx_err(wx, "Cannot disable SR-IOV while RXFH is configured\n"); + wx_err(wx, "Run 'ethtool -X <if> default' to reset RSS table\n"); + return -EBUSY; + } + wx_disable_sriov(wx); wx_sriov_reinit(wx); + + return 0; } int wx_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) @@ -187,10 +201,8 @@ int wx_pci_sriov_configure(struct pci_dev *pdev, int num_vfs) int err; if (!num_vfs) { - if (!pci_vfs_assigned(pdev)) { - wx_pci_sriov_disable(pdev); - return 0; - } + if (!pci_vfs_assigned(pdev)) + return wx_pci_sriov_disable(pdev); wx_err(wx, "can't free VFs because some are assigned to VMs.\n"); return -EBUSY; diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 9d5d10f9e410..d89b9b8a0a2c 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -10,6 +10,7 @@ #include <linux/netdevice.h> #include <linux/if_vlan.h> #include <linux/phylink.h> +#include <linux/dim.h> #include <net/ip.h> #define WX_NCSI_SUP 0x8000 @@ -167,9 +168,12 @@ #define WX_RDB_PL_CFG_L2HDR BIT(3) #define WX_RDB_PL_CFG_TUN_TUNHDR BIT(4) #define WX_RDB_PL_CFG_TUN_OUTL2HDR BIT(5) +#define WX_RDB_PL_CFG_RSS_EN BIT(24) +#define WX_RDB_PL_CFG_RSS_MASK GENMASK(23, 16) #define WX_RDB_RSSTBL(_i) (0x19400 + ((_i) * 4)) #define WX_RDB_RSSRK(_i) (0x19480 + ((_i) * 4)) #define WX_RDB_RA_CTL 0x194F4 +#define WX_RDB_RA_CTL_MULTI_RSS BIT(0) #define WX_RDB_RA_CTL_RSS_EN BIT(2) /* RSS Enable */ #define WX_RDB_RA_CTL_RSS_IPV4_TCP BIT(16) #define WX_RDB_RA_CTL_RSS_IPV4 BIT(17) @@ -177,8 +181,12 @@ #define WX_RDB_RA_CTL_RSS_IPV6_TCP BIT(21) #define WX_RDB_RA_CTL_RSS_IPV4_UDP BIT(22) #define WX_RDB_RA_CTL_RSS_IPV6_UDP BIT(23) +#define WX_RDB_RA_CTL_RSS_MASK GENMASK(23, 16) #define WX_RDB_FDIR_MATCH 0x19558 #define WX_RDB_FDIR_MISS 0x1955C +/* VM RSS */ +#define WX_RDB_VMRSSRK(_i, _p) (0x1A000 + ((_i) * 4) + ((_p) * 0x40)) +#define WX_RDB_VMRSSTBL(_i, _p) (0x1B000 + ((_i) * 4) + ((_p) * 0x40)) /******************************* PSR Registers *******************************/ /* psr control */ @@ -1033,6 +1041,7 @@ struct wx_ring_container { unsigned int total_packets; /* total packets processed this int */ u8 count; /* total number of rings in vector */ u8 itr; /* current ITR setting for ring */ + struct dim dim; /* data for net_dim algorithm */ }; struct wx_ring { struct wx_ring *next; /* pointer to next ring in q_vector */ @@ -1089,6 +1098,8 @@ struct wx_q_vector { struct napi_struct napi; struct rcu_head rcu; /* to avoid race with update stats on free */ + u16 total_events; /* number of interrupts processed */ + char name[IFNAMSIZ + 17]; /* for dynamic allocation of rings associated with this q_vector */ @@ -1188,6 +1199,21 @@ struct vf_macvlans { u8 vf_macvlan[ETH_ALEN]; }; +#define WX_RSS_FIELD_IPV4_TCP BIT(0) +#define WX_RSS_FIELD_IPV4 BIT(1) +#define WX_RSS_FIELD_IPV4_SCTP BIT(2) +#define WX_RSS_FIELD_IPV6_SCTP BIT(3) +#define WX_RSS_FIELD_IPV6_TCP BIT(4) +#define WX_RSS_FIELD_IPV6 BIT(5) +#define WX_RSS_FIELD_IPV4_UDP BIT(6) +#define WX_RSS_FIELD_IPV6_UDP BIT(7) + +struct wx_rss_flow_map { + u8 flow_type; + u32 data; + u8 flag; +}; + enum wx_pf_flags { WX_FLAG_MULTI_64_FUNC, WX_FLAG_SWFW_RING, @@ -1268,6 +1294,7 @@ struct wx { int num_rx_queues; u16 rx_itr_setting; u16 rx_work_limit; + bool adaptive_itr; int num_q_vectors; /* current number of q_vectors for device */ int max_q_vectors; /* upper limit of q_vectors for device */ @@ -1297,6 +1324,7 @@ struct wx { #define WX_MAX_RETA_ENTRIES 128 #define WX_RSS_INDIR_TBL_MAX 64 u8 rss_indir_tbl[WX_MAX_RETA_ENTRIES]; + u8 rss_flags; bool rss_enabled; #define WX_RSS_KEY_SIZE 40 /* size of RSS Hash Key in bytes */ u32 *rss_key; diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf.h b/drivers/net/ethernet/wangxun/libwx/wx_vf.h index fec1126703e3..3f16de0fa427 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_vf.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_vf.h @@ -4,6 +4,7 @@ #ifndef _WX_VF_H_ #define _WX_VF_H_ +/* Control registers */ #define WX_VF_MAX_RING_NUMS 8 #define WX_VX_PF_BME 0x4B8 #define WX_VF_BME_ENABLE BIT(0) @@ -12,16 +13,32 @@ #define WX_VXCTRL_RST BIT(0) #define WX_VXMRQC 0x78 +#define WX_VXMRQC_PSR_L4HDR BIT(0) +#define WX_VXMRQC_PSR_L3HDR BIT(1) +#define WX_VXMRQC_PSR_L2HDR BIT(2) +#define WX_VXMRQC_PSR_TUNHDR BIT(3) +#define WX_VXMRQC_PSR_TUNMAC BIT(4) +#define WX_VXMRQC_PSR_MASK GENMASK(5, 1) +#define WX_VXMRQC_PSR(f) FIELD_PREP(GENMASK(5, 1), f) +#define WX_VXMRQC_RSS_HASH(f) FIELD_PREP(GENMASK(15, 13), f) +#define WX_VXMRQC_RSS_MASK GENMASK(31, 16) +#define WX_VXMRQC_RSS(f) FIELD_PREP(GENMASK(31, 16), f) +#define WX_VXMRQC_RSS_ALG_IPV4_TCP BIT(0) +#define WX_VXMRQC_RSS_ALG_IPV4 BIT(1) +#define WX_VXMRQC_RSS_ALG_IPV6 BIT(4) +#define WX_VXMRQC_RSS_ALG_IPV6_TCP BIT(5) +#define WX_VXMRQC_RSS_EN BIT(8) + +#define WX_VXRSSRK(i) (0x80 + ((i) * 4)) /* i=[0,9] */ +#define WX_VXRETA(i) (0xC0 + ((i) * 4)) /* i=[0,15] */ + +/* Interrupt registers */ #define WX_VXICR 0x100 #define WX_VXIMS 0x108 #define WX_VXIMC 0x10C #define WX_VF_IRQ_CLEAR_MASK 7 #define WX_VF_MAX_TX_QUEUES 4 #define WX_VF_MAX_RX_QUEUES 4 -#define WX_VXTXDCTL(r) (0x3010 + (0x40 * (r))) -#define WX_VXRXDCTL(r) (0x1010 + (0x40 * (r))) -#define WX_VXRXDCTL_ENABLE BIT(0) -#define WX_VXTXDCTL_FLUSH BIT(26) #define WX_VXITR(i) (0x200 + (4 * (i))) /* i=[0,1] */ #define WX_VXITR_MASK GENMASK(8, 0) @@ -29,16 +46,6 @@ #define WX_VXIVAR_MISC 0x260 #define WX_VXIVAR(i) (0x240 + (4 * (i))) /* i=[0,3] */ -#define WX_VXRXDCTL_RSCMAX(f) FIELD_PREP(GENMASK(24, 23), f) -#define WX_VXRXDCTL_BUFLEN(f) FIELD_PREP(GENMASK(6, 1), f) -#define WX_VXRXDCTL_BUFSZ(f) FIELD_PREP(GENMASK(11, 8), f) -#define WX_VXRXDCTL_HDRSZ(f) FIELD_PREP(GENMASK(15, 12), f) - -#define WX_VXRXDCTL_RSCMAX_MASK GENMASK(24, 23) -#define WX_VXRXDCTL_BUFLEN_MASK GENMASK(6, 1) -#define WX_VXRXDCTL_BUFSZ_MASK GENMASK(11, 8) -#define WX_VXRXDCTL_HDRSZ_MASK GENMASK(15, 12) - #define wx_conf_size(v, mwidth, uwidth) ({ \ typeof(v) _v = (v); \ (_v == 2 << (mwidth) ? 0 : _v >> (uwidth)); \ @@ -59,44 +66,35 @@ #define WX_VXRDBAH(r) (0x1004 + (0x40 * (r))) #define WX_VXRDT(r) (0x1008 + (0x40 * (r))) #define WX_VXRDH(r) (0x100C + (0x40 * (r))) - +#define WX_VXRXDCTL(r) (0x1010 + (0x40 * (r))) +#define WX_VXRXDCTL_ENABLE BIT(0) +#define WX_VXRXDCTL_BUFLEN_MASK GENMASK(6, 1) +#define WX_VXRXDCTL_BUFLEN(f) FIELD_PREP(GENMASK(6, 1), f) +#define WX_VXRXDCTL_BUFSZ_MASK GENMASK(11, 8) +#define WX_VXRXDCTL_BUFSZ(f) FIELD_PREP(GENMASK(11, 8), f) +#define WX_VXRXDCTL_HDRSZ_MASK GENMASK(15, 12) +#define WX_VXRXDCTL_HDRSZ(f) FIELD_PREP(GENMASK(15, 12), f) +#define WX_VXRXDCTL_RSCMAX_MASK GENMASK(24, 23) +#define WX_VXRXDCTL_RSCMAX(f) FIELD_PREP(GENMASK(24, 23), f) #define WX_VXRXDCTL_RSCEN BIT(29) #define WX_VXRXDCTL_DROP BIT(30) #define WX_VXRXDCTL_VLAN BIT(31) +/* Transimit Path */ #define WX_VXTDBAL(r) (0x3000 + (0x40 * (r))) #define WX_VXTDBAH(r) (0x3004 + (0x40 * (r))) #define WX_VXTDT(r) (0x3008 + (0x40 * (r))) #define WX_VXTDH(r) (0x300C + (0x40 * (r))) - +#define WX_VXTXDCTL(r) (0x3010 + (0x40 * (r))) #define WX_VXTXDCTL_ENABLE BIT(0) #define WX_VXTXDCTL_BUFLEN(f) FIELD_PREP(GENMASK(6, 1), f) #define WX_VXTXDCTL_PTHRESH(f) FIELD_PREP(GENMASK(11, 8), f) #define WX_VXTXDCTL_WTHRESH(f) FIELD_PREP(GENMASK(22, 16), f) - -#define WX_VXMRQC_PSR(f) FIELD_PREP(GENMASK(5, 1), f) -#define WX_VXMRQC_PSR_MASK GENMASK(5, 1) -#define WX_VXMRQC_PSR_L4HDR BIT(0) -#define WX_VXMRQC_PSR_L3HDR BIT(1) -#define WX_VXMRQC_PSR_L2HDR BIT(2) -#define WX_VXMRQC_PSR_TUNHDR BIT(3) -#define WX_VXMRQC_PSR_TUNMAC BIT(4) - -#define WX_VXRSSRK(i) (0x80 + ((i) * 4)) /* i=[0,9] */ -#define WX_VXRETA(i) (0xC0 + ((i) * 4)) /* i=[0,15] */ - -#define WX_VXMRQC_RSS(f) FIELD_PREP(GENMASK(31, 16), f) -#define WX_VXMRQC_RSS_MASK GENMASK(31, 16) -#define WX_VXMRQC_RSS_ALG_IPV4_TCP BIT(0) -#define WX_VXMRQC_RSS_ALG_IPV4 BIT(1) -#define WX_VXMRQC_RSS_ALG_IPV6 BIT(4) -#define WX_VXMRQC_RSS_ALG_IPV6_TCP BIT(5) -#define WX_VXMRQC_RSS_EN BIT(8) -#define WX_VXMRQC_RSS_HASH(f) FIELD_PREP(GENMASK(15, 13), f) +#define WX_VXTXDCTL_FLUSH BIT(26) #define WX_PFLINK_STATUS(g) FIELD_GET(BIT(0), g) #define WX_PFLINK_SPEED(g) FIELD_GET(GENMASK(31, 1), g) -#define WX_VXSTATUS_SPEED(g) FIELD_GET(GENMASK(4, 1), g) +#define WX_VXSTATUS_SPEED(g) FIELD_GET(GENMASK(4, 1), g) struct wx_link_reg_fields { u32 mac_type; diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c index 3023ea2732ef..a87887b9f8ee 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c +++ b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.c @@ -10,7 +10,7 @@ #include "wx_vf.h" #include "wx_vf_lib.h" -static void wx_write_eitr_vf(struct wx_q_vector *q_vector) +void wx_write_eitr_vf(struct wx_q_vector *q_vector) { struct wx *wx = q_vector->wx; int v_idx = q_vector->v_idx; diff --git a/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.h b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.h index 43ea126b79eb..a4bd23c92800 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_vf_lib.h @@ -4,6 +4,7 @@ #ifndef _WX_VF_LIB_H_ #define _WX_VF_LIB_H_ +void wx_write_eitr_vf(struct wx_q_vector *q_vector); void wx_configure_msix_vf(struct wx *wx); int wx_write_uc_addr_list_vf(struct net_device *netdev); void wx_setup_psrtype_vf(struct wx *wx); diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c index 7e2d9ec38a30..662f28bdde8a 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c @@ -115,7 +115,8 @@ static int ngbe_set_channels(struct net_device *dev, static const struct ethtool_ops ngbe_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | - ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ, + ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ | + ETHTOOL_COALESCE_USE_ADAPTIVE, .get_drvinfo = wx_get_drvinfo, .get_link = ethtool_op_get_link, .get_link_ksettings = wx_get_link_ksettings, @@ -136,6 +137,12 @@ static const struct ethtool_ops ngbe_ethtool_ops = { .set_coalesce = wx_set_coalesce, .get_channels = wx_get_channels, .set_channels = ngbe_set_channels, + .get_rxfh_fields = wx_get_rxfh_fields, + .set_rxfh_fields = wx_set_rxfh_fields, + .get_rxfh_indir_size = wx_rss_indir_size, + .get_rxfh_key_size = wx_get_rxfh_key_size, + .get_rxfh = wx_get_rxfh, + .set_rxfh = wx_set_rxfh, .get_msglevel = wx_get_msglevel, .set_msglevel = wx_set_msglevel, .get_ts_info = wx_get_ts_info, diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index e0fc897b0a58..58488e138beb 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -119,9 +119,9 @@ static int ngbe_sw_init(struct wx *wx) num_online_cpus()); wx->rss_enabled = true; - /* enable itr by default in dynamic mode */ - wx->rx_itr_setting = 1; - wx->tx_itr_setting = 1; + wx->adaptive_itr = false; + wx->rx_itr_setting = WX_7K_ITR; + wx->tx_itr_setting = WX_7K_ITR; /* set default ring sizes */ wx->tx_ring_count = NGBE_DEFAULT_TXD; diff --git a/drivers/net/ethernet/wangxun/ngbevf/ngbevf_main.c b/drivers/net/ethernet/wangxun/ngbevf/ngbevf_main.c index c1246ab5239c..6ef43adcc425 100644 --- a/drivers/net/ethernet/wangxun/ngbevf/ngbevf_main.c +++ b/drivers/net/ethernet/wangxun/ngbevf/ngbevf_main.c @@ -14,6 +14,7 @@ #include "../libwx/wx_mbx.h" #include "../libwx/wx_vf.h" #include "../libwx/wx_vf_common.h" +#include "../libwx/wx_ethtool.h" #include "ngbevf_type.h" /* ngbevf_pci_tbl - PCI Device ID Table @@ -100,6 +101,7 @@ static int ngbevf_sw_init(struct wx *wx) wx->mac.max_tx_queues = NGBEVF_MAX_TX_QUEUES; wx->mac.max_rx_queues = NGBEVF_MAX_RX_QUEUES; /* Enable dynamic interrupt throttling rates */ + wx->adaptive_itr = true; wx->rx_itr_setting = 1; wx->tx_itr_setting = 1; /* set default ring sizes */ @@ -185,6 +187,8 @@ static int ngbevf_probe(struct pci_dev *pdev, goto err_pci_release_regions; } + wx->driver_name = KBUILD_MODNAME; + wx_set_ethtool_ops_vf(netdev); netdev->netdev_ops = &ngbevf_netdev_ops; /* setup the private structure */ @@ -202,6 +206,7 @@ static int ngbevf_probe(struct pci_dev *pdev, if (err) goto err_free_sw_init; + wx_get_fw_version_vf(wx); err = register_netdev(netdev); if (err) goto err_register; diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c index a4753402660e..e285b088c7b2 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_ethtool.c @@ -538,7 +538,8 @@ static int txgbe_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) static const struct ethtool_ops txgbe_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | - ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ, + ETHTOOL_COALESCE_TX_MAX_FRAMES_IRQ | + ETHTOOL_COALESCE_USE_ADAPTIVE, .get_drvinfo = wx_get_drvinfo, .nway_reset = wx_nway_reset, .get_link = ethtool_op_get_link, @@ -559,6 +560,12 @@ static const struct ethtool_ops txgbe_ethtool_ops = { .set_channels = txgbe_set_channels, .get_rxnfc = txgbe_get_rxnfc, .set_rxnfc = txgbe_set_rxnfc, + .get_rxfh_fields = wx_get_rxfh_fields, + .set_rxfh_fields = wx_set_rxfh_fields, + .get_rxfh_indir_size = wx_rss_indir_size, + .get_rxfh_key_size = wx_get_rxfh_key_size, + .get_rxfh = wx_get_rxfh, + .set_rxfh = wx_set_rxfh, .get_msglevel = wx_get_msglevel, .set_msglevel = wx_set_msglevel, .get_ts_info = wx_get_ts_info, diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c index a5867f3c93fc..c4c4d70d8466 100644 --- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c +++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c @@ -401,6 +401,7 @@ static int txgbe_sw_init(struct wx *wx) set_bit(WX_FLAG_MULTI_64_FUNC, wx->flags); /* enable itr by default in dynamic mode */ + wx->adaptive_itr = true; wx->rx_itr_setting = 1; wx->tx_itr_setting = 1; diff --git a/drivers/net/ethernet/wangxun/txgbevf/txgbevf_main.c b/drivers/net/ethernet/wangxun/txgbevf/txgbevf_main.c index ebfce3cf753e..72663e3c4205 100644 --- a/drivers/net/ethernet/wangxun/txgbevf/txgbevf_main.c +++ b/drivers/net/ethernet/wangxun/txgbevf/txgbevf_main.c @@ -14,6 +14,7 @@ #include "../libwx/wx_mbx.h" #include "../libwx/wx_vf.h" #include "../libwx/wx_vf_common.h" +#include "../libwx/wx_ethtool.h" #include "txgbevf_type.h" /* txgbevf_pci_tbl - PCI Device ID Table @@ -144,6 +145,7 @@ static int txgbevf_sw_init(struct wx *wx) wx->mac.max_tx_queues = TXGBEVF_MAX_TX_QUEUES; wx->mac.max_rx_queues = TXGBEVF_MAX_RX_QUEUES; /* Enable dynamic interrupt throttling rates */ + wx->adaptive_itr = true; wx->rx_itr_setting = 1; wx->tx_itr_setting = 1; /* set default ring sizes */ @@ -238,6 +240,8 @@ static int txgbevf_probe(struct pci_dev *pdev, goto err_pci_release_regions; } + wx->driver_name = KBUILD_MODNAME; + wx_set_ethtool_ops_vf(netdev); netdev->netdev_ops = &txgbevf_netdev_ops; /* setup the private structure */ @@ -255,6 +259,7 @@ static int txgbevf_probe(struct pci_dev *pdev, if (err) goto err_free_sw_init; + wx_get_fw_version_vf(wx); err = register_netdev(netdev); if (err) goto err_register; diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c index b77f096eaf99..c5424d882135 100644 --- a/drivers/net/ethernet/wiznet/w5100.c +++ b/drivers/net/ethernet/wiznet/w5100.c @@ -1142,7 +1142,7 @@ int w5100_probe(struct device *dev, const struct w5100_ops *ops, if (err < 0) goto err_register; - priv->xfer_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, + priv->xfer_wq = alloc_workqueue("%s", WQ_MEM_RECLAIM | WQ_PERCPU, 0, netdev_name(ndev)); if (!priv->xfer_wq) { err = -ENOMEM; diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 0d8a05fe541a..284031fb2e2c 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -238,6 +238,8 @@ static u64 axienet_dma_rate(struct axienet_local *lp) * * Calculate a control register value based on the coalescing settings. The * run/stop bit is not set. + * + * Return: Control register value with coalescing settings configured. */ static u32 axienet_calc_cr(struct axienet_local *lp, u32 count, u32 usec) { @@ -702,7 +704,8 @@ static void axienet_dma_stop(struct axienet_local *lp) * are connected to Axi Ethernet reset lines, this in turn resets the Axi * Ethernet core. No separate hardware reset is done for the Axi Ethernet * core. - * Returns 0 on success or a negative error number otherwise. + * + * Return: 0 on success or a negative error number otherwise. */ static int axienet_device_reset(struct net_device *ndev) { @@ -773,7 +776,8 @@ static int axienet_device_reset(struct net_device *ndev) * * Would either be called after a successful transmit operation, or after * there was an error when setting up the chain. - * Returns the number of packets handled. + * + * Return: The number of packets handled. */ static int axienet_free_tx_chain(struct axienet_local *lp, u32 first_bd, int nr_bds, bool force, u32 *sizep, int budget) @@ -1168,6 +1172,15 @@ static void axienet_dma_rx_cb(void *data, const struct dmaengine_result *result) &meta_max_len); dma_unmap_single(lp->dev, skbuf_dma->dma_address, lp->max_frm_size, DMA_FROM_DEVICE); + + if (IS_ERR(app_metadata)) { + if (net_ratelimit()) + netdev_err(lp->ndev, "Failed to get RX metadata pointer\n"); + dev_kfree_skb_any(skb); + lp->ndev->stats.rx_dropped++; + goto rx_submit; + } + /* TODO: Derive app word index programmatically */ rx_len = (app_metadata[LEN_APP] & 0xFFFF); skb_put(skb, rx_len); @@ -1180,6 +1193,7 @@ static void axienet_dma_rx_cb(void *data, const struct dmaengine_result *result) u64_stats_add(&lp->rx_bytes, rx_len); u64_stats_update_end(&lp->rx_stat_sync); +rx_submit: for (i = 0; i < CIRC_SPACE(lp->rx_ring_head, lp->rx_ring_tail, RX_BUF_NUM_DEFAULT); i++) axienet_rx_submit_desc(lp->ndev); @@ -2102,6 +2116,8 @@ static void axienet_update_coalesce_rx(struct axienet_local *lp, u32 cr, /** * axienet_dim_coalesce_count_rx() - RX coalesce count for DIM * @lp: Device private data + * + * Return: RX coalescing frame count value for DIM. */ static u32 axienet_dim_coalesce_count_rx(struct axienet_local *lp) { diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c index a31d5d5e6593..97e88886253f 100644 --- a/drivers/net/ethernet/xircom/xirc2ps_cs.c +++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c @@ -1576,7 +1576,7 @@ do_reset(struct net_device *dev, int full) msleep(40); /* wait 40 msec to let it complete */ } if (full_duplex) - PutByte(XIRCREG1_ECR, GetByte(XIRCREG1_ECR | FullDuplex)); + PutByte(XIRCREG1_ECR, GetByte(XIRCREG1_ECR) | FullDuplex); } else { /* No MII */ SelectPage(0); value = GetByte(XIRCREG_ESR); /* read the ESR */ |