diff options
Diffstat (limited to 'drivers/net/ethernet/intel/idpf')
21 files changed, 3672 insertions, 1446 deletions
diff --git a/drivers/net/ethernet/intel/idpf/Kconfig b/drivers/net/ethernet/intel/idpf/Kconfig index 2c359a8551c7..adab2154125b 100644 --- a/drivers/net/ethernet/intel/idpf/Kconfig +++ b/drivers/net/ethernet/intel/idpf/Kconfig @@ -6,7 +6,7 @@ config IDPF depends on PCI_MSI depends on PTP_1588_CLOCK_OPTIONAL select DIMLIB - select LIBETH + select LIBETH_XDP help This driver supports Intel(R) Infrastructure Data Path Function devices. diff --git a/drivers/net/ethernet/intel/idpf/Makefile b/drivers/net/ethernet/intel/idpf/Makefile index 4ef4b2b5e37a..651ddee942bd 100644 --- a/drivers/net/ethernet/intel/idpf/Makefile +++ b/drivers/net/ethernet/intel/idpf/Makefile @@ -21,3 +21,6 @@ idpf-$(CONFIG_IDPF_SINGLEQ) += idpf_singleq_txrx.o idpf-$(CONFIG_PTP_1588_CLOCK) += idpf_ptp.o idpf-$(CONFIG_PTP_1588_CLOCK) += idpf_virtchnl_ptp.o + +idpf-y += xdp.o +idpf-y += xsk.o diff --git a/drivers/net/ethernet/intel/idpf/idpf.h b/drivers/net/ethernet/intel/idpf/idpf.h index f4c0eaf9bde3..ca4da0c89979 100644 --- a/drivers/net/ethernet/intel/idpf/idpf.h +++ b/drivers/net/ethernet/intel/idpf/idpf.h @@ -40,6 +40,7 @@ struct idpf_vport_max_q; #define IDPF_NUM_CHUNKS_PER_MSG(struct_sz, chunk_sz) \ ((IDPF_CTLQ_MAX_BUF_LEN - (struct_sz)) / (chunk_sz)) +#define IDPF_WAIT_FOR_MARKER_TIMEO 500 #define IDPF_MAX_WAIT 500 /* available message levels */ @@ -148,6 +149,7 @@ enum idpf_vport_state { * @link_speed_mbps: Link speed in mbps * @vport_idx: Relative vport index * @max_tx_hdr_size: Max header length hardware can support + * @tx_max_bufs: Max buffers that can be transmitted with scatter-gather * @state: See enum idpf_vport_state * @netstats: Packet and byte stats * @stats_lock: Lock to protect stats update @@ -159,6 +161,7 @@ struct idpf_netdev_priv { u32 link_speed_mbps; u16 vport_idx; u16 max_tx_hdr_size; + u16 tx_max_bufs; enum idpf_vport_state state; struct rtnl_link_stats64 netstats; spinlock_t stats_lock; @@ -246,16 +249,28 @@ enum idpf_vport_reset_cause { /** * enum idpf_vport_flags - Vport flags * @IDPF_VPORT_DEL_QUEUES: To send delete queues message - * @IDPF_VPORT_SW_MARKER: Indicate TX pipe drain software marker packets - * processing is done * @IDPF_VPORT_FLAGS_NBITS: Must be last */ enum idpf_vport_flags { IDPF_VPORT_DEL_QUEUES, - IDPF_VPORT_SW_MARKER, IDPF_VPORT_FLAGS_NBITS, }; +/** + * struct idpf_tstamp_stats - Tx timestamp statistics + * @stats_sync: See struct u64_stats_sync + * @packets: Number of packets successfully timestamped by the hardware + * @discarded: Number of Tx skbs discarded due to cached PHC + * being too old to correctly extend timestamp + * @flushed: Number of Tx skbs flushed due to interface closed + */ +struct idpf_tstamp_stats { + struct u64_stats_sync stats_sync; + u64_stats_t packets; + u64_stats_t discarded; + u64_stats_t flushed; +}; + struct idpf_port_stats { struct u64_stats_sync stats_sync; u64_stats_t rx_hw_csum_err; @@ -287,6 +302,10 @@ struct idpf_fsteer_fltr { * @txq_model: Split queue or single queue queuing model * @txqs: Used only in hotpath to get to the right queue very fast * @crc_enable: Enable CRC insertion offload + * @xdpsq_share: whether XDPSQ sharing is enabled + * @num_xdp_txq: number of XDPSQs + * @xdp_txq_offset: index of the first XDPSQ (== number of regular SQs) + * @xdp_prog: installed XDP program * @num_rxq: Number of allocated RX queues * @num_bufq: Number of allocated buffer queues * @rxq_desc_count: RX queue descriptor count. *MUST* have enough descriptors @@ -312,16 +331,19 @@ struct idpf_fsteer_fltr { * @num_q_vectors: Number of IRQ vectors allocated * @q_vectors: Array of queue vectors * @q_vector_idxs: Starting index of queue vectors + * @noirq_dyn_ctl: register to enable/disable the vector for NOIRQ queues + * @noirq_dyn_ctl_ena: value to write to the above to enable it + * @noirq_v_idx: ID of the NOIRQ vector * @max_mtu: device given max possible MTU * @default_mac_addr: device will give a default MAC to use * @rx_itr_profile: RX profiles for Dynamic Interrupt Moderation * @tx_itr_profile: TX profiles for Dynamic Interrupt Moderation * @port_stats: per port csum, header split, and other offload stats * @link_up: True if link is up - * @sw_marker_wq: workqueue for marker packets * @tx_tstamp_caps: Capabilities negotiated for Tx timestamping * @tstamp_config: The Tx tstamp config * @tstamp_task: Tx timestamping task + * @tstamp_stats: Tx timestamping statistics */ struct idpf_vport { u16 num_txq; @@ -335,6 +357,11 @@ struct idpf_vport { struct idpf_tx_queue **txqs; bool crc_enable; + bool xdpsq_share; + u16 num_xdp_txq; + u16 xdp_txq_offset; + struct bpf_prog *xdp_prog; + u16 num_rxq; u16 num_bufq; u32 rxq_desc_count; @@ -359,6 +386,11 @@ struct idpf_vport { u16 num_q_vectors; struct idpf_q_vector *q_vectors; u16 *q_vector_idxs; + + void __iomem *noirq_dyn_ctl; + u32 noirq_dyn_ctl_ena; + u16 noirq_v_idx; + u16 max_mtu; u8 default_mac_addr[ETH_ALEN]; u16 rx_itr_profile[IDPF_DIM_PROFILE_SLOTS]; @@ -367,11 +399,10 @@ struct idpf_vport { bool link_up; - wait_queue_head_t sw_marker_wq; - struct idpf_ptp_vport_tx_tstamp_caps *tx_tstamp_caps; struct kernel_hwtstamp_config tstamp_config; struct work_struct tstamp_task; + struct idpf_tstamp_stats tstamp_stats; }; /** @@ -433,6 +464,7 @@ struct idpf_q_coalesce { * ethtool * @num_req_rxq_desc: Number of user requested RX queue descriptors through * ethtool + * @xdp_prog: requested XDP program to install * @user_flags: User toggled config flags * @mac_filter_list: List of MAC filters * @num_fsteer_fltrs: number of flow steering filters @@ -447,6 +479,7 @@ struct idpf_vport_user_config_data { u16 num_req_rx_qs; u32 num_req_txq_desc; u32 num_req_rxq_desc; + struct bpf_prog *xdp_prog; DECLARE_BITMAP(user_flags, __IDPF_USER_FLAGS_NBITS); struct list_head mac_filter_list; u32 num_fsteer_fltrs; @@ -676,6 +709,11 @@ static inline int idpf_is_queue_model_split(u16 q_model) q_model == VIRTCHNL2_QUEUE_MODEL_SPLIT; } +static inline bool idpf_xdp_enabled(const struct idpf_vport *vport) +{ + return vport->adapter && vport->xdp_prog; +} + #define idpf_is_cap_ena(adapter, field, flag) \ idpf_is_capability_ena(adapter, false, field, flag) #define idpf_is_cap_ena_all(adapter, field, flag) \ @@ -957,6 +995,13 @@ static inline void idpf_vport_ctrl_unlock(struct net_device *netdev) mutex_unlock(&np->adapter->vport_ctrl_lock); } +static inline bool idpf_vport_ctrl_is_locked(struct net_device *netdev) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + + return mutex_is_locked(&np->adapter->vport_ctrl_lock); +} + void idpf_statistics_task(struct work_struct *work); void idpf_init_task(struct work_struct *work); void idpf_service_task(struct work_struct *work); diff --git a/drivers/net/ethernet/intel/idpf/idpf_dev.c b/drivers/net/ethernet/intel/idpf/idpf_dev.c index bfa60f7d43de..3a04a6bd0d7c 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_dev.c +++ b/drivers/net/ethernet/intel/idpf/idpf_dev.c @@ -77,7 +77,7 @@ static int idpf_intr_reg_init(struct idpf_vport *vport) int num_vecs = vport->num_q_vectors; struct idpf_vec_regs *reg_vals; int num_regs, i, err = 0; - u32 rx_itr, tx_itr; + u32 rx_itr, tx_itr, val; u16 total_vecs; total_vecs = idpf_get_reserved_vecs(vport->adapter); @@ -121,6 +121,15 @@ static int idpf_intr_reg_init(struct idpf_vport *vport) intr->tx_itr = idpf_get_reg_addr(adapter, tx_itr); } + /* Data vector for NOIRQ queues */ + + val = reg_vals[vport->q_vector_idxs[i] - IDPF_MBX_Q_VEC].dyn_ctl_reg; + vport->noirq_dyn_ctl = idpf_get_reg_addr(adapter, val); + + val = PF_GLINT_DYN_CTL_WB_ON_ITR_M | PF_GLINT_DYN_CTL_INTENA_MSK_M | + FIELD_PREP(PF_GLINT_DYN_CTL_ITR_INDX_M, IDPF_NO_ITR_UPDATE_IDX); + vport->noirq_dyn_ctl_ena = val; + free_reg_vals: kfree(reg_vals); diff --git a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c index 0eb812ac19c2..a5a1eec9ade8 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_ethtool.c +++ b/drivers/net/ethernet/intel/idpf/idpf_ethtool.c @@ -1245,8 +1245,8 @@ static void idpf_get_ethtool_stats(struct net_device *netdev, * * returns pointer to rx vector */ -static struct idpf_q_vector *idpf_find_rxq_vec(const struct idpf_vport *vport, - int q_num) +struct idpf_q_vector *idpf_find_rxq_vec(const struct idpf_vport *vport, + u32 q_num) { int q_grp, q_idx; @@ -1266,8 +1266,8 @@ static struct idpf_q_vector *idpf_find_rxq_vec(const struct idpf_vport *vport, * * returns pointer to tx vector */ -static struct idpf_q_vector *idpf_find_txq_vec(const struct idpf_vport *vport, - int q_num) +struct idpf_q_vector *idpf_find_txq_vec(const struct idpf_vport *vport, + u32 q_num) { int q_grp; @@ -1685,6 +1685,61 @@ unlock: return err; } +/** + * idpf_get_ts_stats - Collect HW tstamping statistics + * @netdev: network interface device structure + * @ts_stats: HW timestamping stats structure + * + * Collect HW timestamping statistics including successfully timestamped + * packets, discarded due to illegal values, flushed during releasing PTP and + * skipped due to lack of the free index. + */ +static void idpf_get_ts_stats(struct net_device *netdev, + struct ethtool_ts_stats *ts_stats) +{ + struct idpf_netdev_priv *np = netdev_priv(netdev); + struct idpf_vport *vport; + unsigned int start; + + idpf_vport_ctrl_lock(netdev); + vport = idpf_netdev_to_vport(netdev); + do { + start = u64_stats_fetch_begin(&vport->tstamp_stats.stats_sync); + ts_stats->pkts = u64_stats_read(&vport->tstamp_stats.packets); + ts_stats->lost = u64_stats_read(&vport->tstamp_stats.flushed); + ts_stats->err = u64_stats_read(&vport->tstamp_stats.discarded); + } while (u64_stats_fetch_retry(&vport->tstamp_stats.stats_sync, start)); + + if (np->state != __IDPF_VPORT_UP) + goto exit; + + for (u16 i = 0; i < vport->num_txq_grp; i++) { + struct idpf_txq_group *txq_grp = &vport->txq_grps[i]; + + for (u16 j = 0; j < txq_grp->num_txq; j++) { + struct idpf_tx_queue *txq = txq_grp->txqs[j]; + struct idpf_tx_queue_stats *stats; + u64 ts; + + if (!txq) + continue; + + stats = &txq->q_stats; + do { + start = u64_stats_fetch_begin(&txq->stats_sync); + + ts = u64_stats_read(&stats->tstamp_skipped); + } while (u64_stats_fetch_retry(&txq->stats_sync, + start)); + + ts_stats->lost += ts; + } + } + +exit: + idpf_vport_ctrl_unlock(netdev); +} + static const struct ethtool_ops idpf_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_USE_ADAPTIVE, @@ -1711,6 +1766,7 @@ static const struct ethtool_ops idpf_ethtool_ops = { .set_ringparam = idpf_set_ringparam, .get_link_ksettings = idpf_get_link_ksettings, .get_ts_info = idpf_get_ts_info, + .get_ts_stats = idpf_get_ts_stats, }; /** diff --git a/drivers/net/ethernet/intel/idpf/idpf_idc.c b/drivers/net/ethernet/intel/idpf/idpf_idc.c index 4d2905103215..7e20a07e98e5 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_idc.c +++ b/drivers/net/ethernet/intel/idpf/idpf_idc.c @@ -247,10 +247,10 @@ static void idpf_unplug_aux_dev(struct auxiliary_device *adev) if (!adev) return; + ida_free(&idpf_idc_ida, adev->id); + auxiliary_device_delete(adev); auxiliary_device_uninit(adev); - - ida_free(&idpf_idc_ida, adev->id); } /** diff --git a/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h index 7492d1713243..20d5af64e750 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_lan_txrx.h @@ -186,13 +186,17 @@ struct idpf_base_tx_desc { __le64 qw1; /* type_cmd_offset_bsz_l2tag1 */ }; /* read used with buffer queues */ -struct idpf_splitq_tx_compl_desc { +struct idpf_splitq_4b_tx_compl_desc { /* qid=[10:0] comptype=[13:11] rsvd=[14] gen=[15] */ __le16 qid_comptype_gen; union { __le16 q_head; /* Queue head */ __le16 compl_tag; /* Completion tag */ } q_head_compl_tag; +}; /* writeback used with completion queues */ + +struct idpf_splitq_tx_compl_desc { + struct idpf_splitq_4b_tx_compl_desc common; u8 ts[3]; u8 rsvd; /* Reserved */ }; /* writeback used with completion queues */ diff --git a/drivers/net/ethernet/intel/idpf/idpf_lib.c b/drivers/net/ethernet/intel/idpf/idpf_lib.c index 2c2a3e85d693..8a941f0fb048 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_lib.c +++ b/drivers/net/ethernet/intel/idpf/idpf_lib.c @@ -4,6 +4,8 @@ #include "idpf.h" #include "idpf_virtchnl.h" #include "idpf_ptp.h" +#include "xdp.h" +#include "xsk.h" static const struct net_device_ops idpf_netdev_ops; @@ -776,6 +778,7 @@ static int idpf_cfg_netdev(struct idpf_vport *vport) np->vport_idx = vport->idx; np->vport_id = vport->vport_id; np->max_tx_hdr_size = idpf_get_max_tx_hdr_size(adapter); + np->tx_max_bufs = idpf_get_max_tx_bufs(adapter); spin_lock_init(&np->stats_lock); @@ -834,6 +837,8 @@ static int idpf_cfg_netdev(struct idpf_vport *vport) netdev->hw_features |= netdev->features | other_offloads; netdev->vlan_features |= netdev->features | other_offloads; netdev->hw_enc_features |= dflt_features | other_offloads; + idpf_xdp_set_features(vport); + idpf_set_ethtool_ops(netdev); netif_set_affinity_auto(netdev); SET_NETDEV_DEV(netdev, &adapter->pdev->dev); @@ -883,14 +888,18 @@ static void idpf_remove_features(struct idpf_vport *vport) /** * idpf_vport_stop - Disable a vport * @vport: vport to disable + * @rtnl: whether to take RTNL lock */ -static void idpf_vport_stop(struct idpf_vport *vport) +static void idpf_vport_stop(struct idpf_vport *vport, bool rtnl) { struct idpf_netdev_priv *np = netdev_priv(vport->netdev); if (np->state <= __IDPF_VPORT_DOWN) return; + if (rtnl) + rtnl_lock(); + netif_carrier_off(vport->netdev); netif_tx_disable(vport->netdev); @@ -909,9 +918,13 @@ static void idpf_vport_stop(struct idpf_vport *vport) vport->link_up = false; idpf_vport_intr_deinit(vport); + idpf_xdp_rxq_info_deinit_all(vport); idpf_vport_queues_rel(vport); idpf_vport_intr_rel(vport); np->state = __IDPF_VPORT_DOWN; + + if (rtnl) + rtnl_unlock(); } /** @@ -935,7 +948,7 @@ static int idpf_stop(struct net_device *netdev) idpf_vport_ctrl_lock(netdev); vport = idpf_netdev_to_vport(netdev); - idpf_vport_stop(vport); + idpf_vport_stop(vport, false); idpf_vport_ctrl_unlock(netdev); @@ -1028,7 +1041,7 @@ static void idpf_vport_dealloc(struct idpf_vport *vport) idpf_idc_deinit_vport_aux_device(vport->vdev_info); idpf_deinit_mac_addr(vport); - idpf_vport_stop(vport); + idpf_vport_stop(vport, true); if (!test_bit(IDPF_HR_RESET_IN_PROG, adapter->flags)) idpf_decfg_netdev(vport); @@ -1134,7 +1147,7 @@ static struct idpf_vport *idpf_vport_alloc(struct idpf_adapter *adapter, if (!vport) return vport; - num_max_q = max(max_q->max_txq, max_q->max_rxq); + num_max_q = max(max_q->max_txq, max_q->max_rxq) + IDPF_RESERVED_VECS; if (!adapter->vport_config[idx]) { struct idpf_vport_config *vport_config; struct idpf_q_coalesce *q_coal; @@ -1308,13 +1321,13 @@ static void idpf_restore_features(struct idpf_vport *vport) */ static int idpf_set_real_num_queues(struct idpf_vport *vport) { - int err; + int err, txq = vport->num_txq - vport->num_xdp_txq; err = netif_set_real_num_rx_queues(vport->netdev, vport->num_rxq); if (err) return err; - return netif_set_real_num_tx_queues(vport->netdev, vport->num_txq); + return netif_set_real_num_tx_queues(vport->netdev, txq); } /** @@ -1369,8 +1382,9 @@ static void idpf_rx_init_buf_tail(struct idpf_vport *vport) /** * idpf_vport_open - Bring up a vport * @vport: vport to bring up + * @rtnl: whether to take RTNL lock */ -static int idpf_vport_open(struct idpf_vport *vport) +static int idpf_vport_open(struct idpf_vport *vport, bool rtnl) { struct idpf_netdev_priv *np = netdev_priv(vport->netdev); struct idpf_adapter *adapter = vport->adapter; @@ -1380,6 +1394,9 @@ static int idpf_vport_open(struct idpf_vport *vport) if (np->state != __IDPF_VPORT_DOWN) return -EBUSY; + if (rtnl) + rtnl_lock(); + /* we do not allow interface up just yet */ netif_carrier_off(vport->netdev); @@ -1387,7 +1404,7 @@ static int idpf_vport_open(struct idpf_vport *vport) if (err) { dev_err(&adapter->pdev->dev, "Failed to allocate interrupts for vport %u: %d\n", vport->vport_id, err); - return err; + goto err_rtnl_unlock; } err = idpf_vport_queues_alloc(vport); @@ -1408,35 +1425,44 @@ static int idpf_vport_open(struct idpf_vport *vport) goto queues_rel; } - err = idpf_rx_bufs_init_all(vport); + err = idpf_queue_reg_init(vport); if (err) { - dev_err(&adapter->pdev->dev, "Failed to initialize RX buffers for vport %u: %d\n", + dev_err(&adapter->pdev->dev, "Failed to initialize queue registers for vport %u: %d\n", vport->vport_id, err); goto queues_rel; } - err = idpf_queue_reg_init(vport); + err = idpf_rx_bufs_init_all(vport); if (err) { - dev_err(&adapter->pdev->dev, "Failed to initialize queue registers for vport %u: %d\n", + dev_err(&adapter->pdev->dev, "Failed to initialize RX buffers for vport %u: %d\n", vport->vport_id, err); goto queues_rel; } idpf_rx_init_buf_tail(vport); + + err = idpf_xdp_rxq_info_init_all(vport); + if (err) { + netdev_err(vport->netdev, + "Failed to initialize XDP RxQ info for vport %u: %pe\n", + vport->vport_id, ERR_PTR(err)); + goto intr_deinit; + } + idpf_vport_intr_ena(vport); err = idpf_send_config_queues_msg(vport); if (err) { dev_err(&adapter->pdev->dev, "Failed to configure queues for vport %u, %d\n", vport->vport_id, err); - goto intr_deinit; + goto rxq_deinit; } err = idpf_send_map_unmap_queue_vector_msg(vport, true); if (err) { dev_err(&adapter->pdev->dev, "Failed to map queue vectors for vport %u: %d\n", vport->vport_id, err); - goto intr_deinit; + goto rxq_deinit; } err = idpf_send_enable_queues_msg(vport); @@ -1474,6 +1500,9 @@ static int idpf_vport_open(struct idpf_vport *vport) goto deinit_rss; } + if (rtnl) + rtnl_unlock(); + return 0; deinit_rss: @@ -1484,6 +1513,8 @@ disable_queues: idpf_send_disable_queues_msg(vport); unmap_queue_vectors: idpf_send_map_unmap_queue_vector_msg(vport, false); +rxq_deinit: + idpf_xdp_rxq_info_deinit_all(vport); intr_deinit: idpf_vport_intr_deinit(vport); queues_rel: @@ -1491,6 +1522,10 @@ queues_rel: intr_rel: idpf_vport_intr_rel(vport); +err_rtnl_unlock: + if (rtnl) + rtnl_unlock(); + return err; } @@ -1547,8 +1582,6 @@ void idpf_init_task(struct work_struct *work) index = vport->idx; vport_config = adapter->vport_config[index]; - init_waitqueue_head(&vport->sw_marker_wq); - spin_lock_init(&vport_config->mac_filter_list_lock); INIT_LIST_HEAD(&vport_config->user_config.mac_filter_list); @@ -1571,7 +1604,7 @@ void idpf_init_task(struct work_struct *work) np = netdev_priv(vport->netdev); np->state = __IDPF_VPORT_DOWN; if (test_and_clear_bit(IDPF_VPORT_UP_REQUESTED, vport_config->flags)) - idpf_vport_open(vport); + idpf_vport_open(vport, true); /* Spawn and return 'idpf_init_task' work queue until all the * default vports are created @@ -1961,7 +1994,7 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, idpf_send_delete_queues_msg(vport); } else { set_bit(IDPF_VPORT_DEL_QUEUES, vport->flags); - idpf_vport_stop(vport); + idpf_vport_stop(vport, false); } idpf_deinit_rss(vport); @@ -1991,7 +2024,7 @@ int idpf_initiate_soft_reset(struct idpf_vport *vport, goto err_open; if (current_state == __IDPF_VPORT_UP) - err = idpf_vport_open(vport); + err = idpf_vport_open(vport, false); goto free_vport; @@ -2001,7 +2034,7 @@ err_reset: err_open: if (current_state == __IDPF_VPORT_UP) - idpf_vport_open(vport); + idpf_vport_open(vport, false); free_vport: kfree(new_vport); @@ -2239,7 +2272,7 @@ static int idpf_open(struct net_device *netdev) if (err) goto unlock; - err = idpf_vport_open(vport); + err = idpf_vport_open(vport, false); unlock: idpf_vport_ctrl_unlock(netdev); @@ -2272,6 +2305,92 @@ static int idpf_change_mtu(struct net_device *netdev, int new_mtu) } /** + * idpf_chk_tso_segment - Check skb is not using too many buffers + * @skb: send buffer + * @max_bufs: maximum number of buffers + * + * For TSO we need to count the TSO header and segment payload separately. As + * such we need to check cases where we have max_bufs-1 fragments or more as we + * can potentially require max_bufs+1 DMA transactions, 1 for the TSO header, 1 + * for the segment payload in the first descriptor, and another max_buf-1 for + * the fragments. + * + * Returns true if the packet needs to be software segmented by core stack. + */ +static bool idpf_chk_tso_segment(const struct sk_buff *skb, + unsigned int max_bufs) +{ + const struct skb_shared_info *shinfo = skb_shinfo(skb); + const skb_frag_t *frag, *stale; + int nr_frags, sum; + + /* no need to check if number of frags is less than max_bufs - 1 */ + nr_frags = shinfo->nr_frags; + if (nr_frags < (max_bufs - 1)) + return false; + + /* We need to walk through the list and validate that each group + * of max_bufs-2 fragments totals at least gso_size. + */ + nr_frags -= max_bufs - 2; + frag = &shinfo->frags[0]; + + /* Initialize size to the negative value of gso_size minus 1. We use + * this as the worst case scenario in which the frag ahead of us only + * provides one byte which is why we are limited to max_bufs-2 + * descriptors for a single transmit as the header and previous + * fragment are already consuming 2 descriptors. + */ + sum = 1 - shinfo->gso_size; + + /* Add size of frags 0 through 4 to create our initial sum */ + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + sum += skb_frag_size(frag++); + + /* Walk through fragments adding latest fragment, testing it, and + * then removing stale fragments from the sum. + */ + for (stale = &shinfo->frags[0];; stale++) { + int stale_size = skb_frag_size(stale); + + sum += skb_frag_size(frag++); + + /* The stale fragment may present us with a smaller + * descriptor than the actual fragment size. To account + * for that we need to remove all the data on the front and + * figure out what the remainder would be in the last + * descriptor associated with the fragment. + */ + if (stale_size > IDPF_TX_MAX_DESC_DATA) { + int align_pad = -(skb_frag_off(stale)) & + (IDPF_TX_MAX_READ_REQ_SIZE - 1); + + sum -= align_pad; + stale_size -= align_pad; + + do { + sum -= IDPF_TX_MAX_DESC_DATA_ALIGNED; + stale_size -= IDPF_TX_MAX_DESC_DATA_ALIGNED; + } while (stale_size > IDPF_TX_MAX_DESC_DATA); + } + + /* if sum is negative we failed to make sufficient progress */ + if (sum < 0) + return true; + + if (!nr_frags--) + break; + + sum -= stale_size; + } + + return false; +} + +/** * idpf_features_check - Validate packet conforms to limits * @skb: skb buffer * @netdev: This port's netdev @@ -2292,12 +2411,15 @@ static netdev_features_t idpf_features_check(struct sk_buff *skb, if (skb->ip_summed != CHECKSUM_PARTIAL) return features; - /* We cannot support GSO if the MSS is going to be less than - * 88 bytes. If it is then we need to drop support for GSO. - */ - if (skb_is_gso(skb) && - (skb_shinfo(skb)->gso_size < IDPF_TX_TSO_MIN_MSS)) - features &= ~NETIF_F_GSO_MASK; + if (skb_is_gso(skb)) { + /* We cannot support GSO if the MSS is going to be less than + * 88 bytes. If it is then we need to drop support for GSO. + */ + if (skb_shinfo(skb)->gso_size < IDPF_TX_TSO_MIN_MSS) + features &= ~NETIF_F_GSO_MASK; + else if (idpf_chk_tso_segment(skb, np->tx_max_bufs)) + features &= ~NETIF_F_GSO_MASK; + } /* Ensure MACLEN is <= 126 bytes (63 words) and not an odd size */ len = skb_network_offset(skb); @@ -2344,6 +2466,7 @@ static int idpf_set_mac(struct net_device *netdev, void *p) struct idpf_netdev_priv *np = netdev_priv(netdev); struct idpf_vport_config *vport_config; struct sockaddr *addr = p; + u8 old_mac_addr[ETH_ALEN]; struct idpf_vport *vport; int err = 0; @@ -2367,17 +2490,19 @@ static int idpf_set_mac(struct net_device *netdev, void *p) if (ether_addr_equal(netdev->dev_addr, addr->sa_data)) goto unlock_mutex; + ether_addr_copy(old_mac_addr, vport->default_mac_addr); + ether_addr_copy(vport->default_mac_addr, addr->sa_data); vport_config = vport->adapter->vport_config[vport->idx]; err = idpf_add_mac_filter(vport, np, addr->sa_data, false); if (err) { __idpf_del_mac_filter(vport_config, addr->sa_data); + ether_addr_copy(vport->default_mac_addr, netdev->dev_addr); goto unlock_mutex; } - if (is_valid_ether_addr(vport->default_mac_addr)) - idpf_del_mac_filter(vport, np, vport->default_mac_addr, false); + if (is_valid_ether_addr(old_mac_addr)) + __idpf_del_mac_filter(vport_config, old_mac_addr); - ether_addr_copy(vport->default_mac_addr, addr->sa_data); eth_hw_addr_set(netdev, addr->sa_data); unlock_mutex: @@ -2492,4 +2617,7 @@ static const struct net_device_ops idpf_netdev_ops = { .ndo_tx_timeout = idpf_tx_timeout, .ndo_hwtstamp_get = idpf_hwtstamp_get, .ndo_hwtstamp_set = idpf_hwtstamp_set, + .ndo_bpf = idpf_xdp, + .ndo_xdp_xmit = idpf_xdp_xmit, + .ndo_xsk_wakeup = idpf_xsk_wakeup, }; diff --git a/drivers/net/ethernet/intel/idpf/idpf_main.c b/drivers/net/ethernet/intel/idpf/idpf_main.c index dfe9126f1f4a..8c46481d2e1f 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_main.c +++ b/drivers/net/ethernet/intel/idpf/idpf_main.c @@ -9,6 +9,7 @@ MODULE_DESCRIPTION(DRV_SUMMARY); MODULE_IMPORT_NS("LIBETH"); +MODULE_IMPORT_NS("LIBETH_XDP"); MODULE_LICENSE("GPL"); /** diff --git a/drivers/net/ethernet/intel/idpf/idpf_ptp.c b/drivers/net/ethernet/intel/idpf/idpf_ptp.c index ee21f2ff0cad..142823af1f9e 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_ptp.c +++ b/drivers/net/ethernet/intel/idpf/idpf_ptp.c @@ -618,8 +618,13 @@ u64 idpf_ptp_extend_ts(struct idpf_vport *vport, u64 in_tstamp) discard_time = ptp->cached_phc_jiffies + 2 * HZ; - if (time_is_before_jiffies(discard_time)) + if (time_is_before_jiffies(discard_time)) { + u64_stats_update_begin(&vport->tstamp_stats.stats_sync); + u64_stats_inc(&vport->tstamp_stats.discarded); + u64_stats_update_end(&vport->tstamp_stats.stats_sync); + return 0; + } return idpf_ptp_tstamp_extend_32b_to_64b(ptp->cached_phc_time, lower_32_bits(in_tstamp)); @@ -853,10 +858,14 @@ static void idpf_ptp_release_vport_tstamp(struct idpf_vport *vport) /* Remove list with latches in use */ head = &vport->tx_tstamp_caps->latches_in_use; + u64_stats_update_begin(&vport->tstamp_stats.stats_sync); list_for_each_entry_safe(ptp_tx_tstamp, tmp, head, list_member) { + u64_stats_inc(&vport->tstamp_stats.flushed); + list_del(&ptp_tx_tstamp->list_member); kfree(ptp_tx_tstamp); } + u64_stats_update_end(&vport->tstamp_stats.stats_sync); spin_unlock_bh(&vport->tx_tstamp_caps->latches_lock); diff --git a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c index 555879b1248d..61e613066140 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_singleq_txrx.c @@ -1,8 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2023 Intel Corporation */ -#include <net/libeth/rx.h> -#include <net/libeth/tx.h> +#include <net/libeth/xdp.h> #include "idpf.h" @@ -180,6 +179,58 @@ static int idpf_tx_singleq_csum(struct sk_buff *skb, } /** + * idpf_tx_singleq_dma_map_error - handle TX DMA map errors + * @txq: queue to send buffer on + * @skb: send buffer + * @first: original first buffer info buffer for packet + * @idx: starting point on ring to unwind + */ +static void idpf_tx_singleq_dma_map_error(struct idpf_tx_queue *txq, + struct sk_buff *skb, + struct idpf_tx_buf *first, u16 idx) +{ + struct libeth_sq_napi_stats ss = { }; + struct libeth_cq_pp cp = { + .dev = txq->dev, + .ss = &ss, + }; + + u64_stats_update_begin(&txq->stats_sync); + u64_stats_inc(&txq->q_stats.dma_map_errs); + u64_stats_update_end(&txq->stats_sync); + + /* clear dma mappings for failed tx_buf map */ + for (;;) { + struct idpf_tx_buf *tx_buf; + + tx_buf = &txq->tx_buf[idx]; + libeth_tx_complete(tx_buf, &cp); + if (tx_buf == first) + break; + if (idx == 0) + idx = txq->desc_count; + idx--; + } + + if (skb_is_gso(skb)) { + union idpf_tx_flex_desc *tx_desc; + + /* If we failed a DMA mapping for a TSO packet, we will have + * used one additional descriptor for a context + * descriptor. Reset that here. + */ + tx_desc = &txq->flex_tx[idx]; + memset(tx_desc, 0, sizeof(*tx_desc)); + if (idx == 0) + idx = txq->desc_count; + idx--; + } + + /* Update tail in case netdev_xmit_more was previously true */ + idpf_tx_buf_hw_update(txq, idx, false); +} + +/** * idpf_tx_singleq_map - Build the Tx base descriptor * @tx_q: queue to send buffer on * @first: first buffer info buffer to use @@ -219,8 +270,9 @@ static void idpf_tx_singleq_map(struct idpf_tx_queue *tx_q, for (frag = &skb_shinfo(skb)->frags[0];; frag++) { unsigned int max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; - if (dma_mapping_error(tx_q->dev, dma)) - return idpf_tx_dma_map_error(tx_q, skb, first, i); + if (unlikely(dma_mapping_error(tx_q->dev, dma))) + return idpf_tx_singleq_dma_map_error(tx_q, skb, + first, i); /* record length, and DMA address */ dma_unmap_len_set(tx_buf, len, size); @@ -362,11 +414,11 @@ netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, { struct idpf_tx_offload_params offload = { }; struct idpf_tx_buf *first; + u32 count, buf_count = 1; int csum, tso, needed; - unsigned int count; __be16 protocol; - count = idpf_tx_desc_count_required(tx_q, skb); + count = idpf_tx_res_count_required(tx_q, skb, &buf_count); if (unlikely(!count)) return idpf_tx_drop_skb(tx_q, skb); @@ -602,7 +654,7 @@ static void idpf_rx_singleq_csum(struct idpf_rx_queue *rxq, bool ipv4, ipv6; /* check if Rx checksum is enabled */ - if (!libeth_rx_pt_has_checksum(rxq->netdev, decoded)) + if (!libeth_rx_pt_has_checksum(rxq->xdp_rxq.dev, decoded)) return; /* check if HW has decoded the packet and checksum */ @@ -741,7 +793,7 @@ static void idpf_rx_singleq_base_hash(struct idpf_rx_queue *rx_q, { u64 mask, qw1; - if (!libeth_rx_pt_has_hash(rx_q->netdev, decoded)) + if (!libeth_rx_pt_has_hash(rx_q->xdp_rxq.dev, decoded)) return; mask = VIRTCHNL2_RX_BASE_DESC_FLTSTAT_RSS_HASH_M; @@ -769,7 +821,7 @@ static void idpf_rx_singleq_flex_hash(struct idpf_rx_queue *rx_q, const union virtchnl2_rx_desc *rx_desc, struct libeth_rx_pt decoded) { - if (!libeth_rx_pt_has_hash(rx_q->netdev, decoded)) + if (!libeth_rx_pt_has_hash(rx_q->xdp_rxq.dev, decoded)) return; if (FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_STATUS0_RSS_VALID_M, @@ -781,7 +833,7 @@ static void idpf_rx_singleq_flex_hash(struct idpf_rx_queue *rx_q, } /** - * idpf_rx_singleq_process_skb_fields - Populate skb header fields from Rx + * __idpf_rx_singleq_process_skb_fields - Populate skb header fields from Rx * descriptor * @rx_q: Rx ring being processed * @skb: pointer to current skb being populated @@ -793,17 +845,14 @@ static void idpf_rx_singleq_flex_hash(struct idpf_rx_queue *rx_q, * other fields within the skb. */ static void -idpf_rx_singleq_process_skb_fields(struct idpf_rx_queue *rx_q, - struct sk_buff *skb, - const union virtchnl2_rx_desc *rx_desc, - u16 ptype) +__idpf_rx_singleq_process_skb_fields(struct idpf_rx_queue *rx_q, + struct sk_buff *skb, + const union virtchnl2_rx_desc *rx_desc, + u16 ptype) { struct libeth_rx_pt decoded = rx_q->rx_ptype_lkup[ptype]; struct libeth_rx_csum csum_bits; - /* modifies the skb - consumes the enet header */ - skb->protocol = eth_type_trans(skb, rx_q->netdev); - /* Check if we're using base mode descriptor IDs */ if (rx_q->rxdids == VIRTCHNL2_RXDID_1_32B_BASE_M) { idpf_rx_singleq_base_hash(rx_q, skb, rx_desc, decoded); @@ -814,7 +863,6 @@ idpf_rx_singleq_process_skb_fields(struct idpf_rx_queue *rx_q, } idpf_rx_singleq_csum(rx_q, skb, csum_bits, decoded); - skb_record_rx_queue(skb, rx_q->idx); } /** @@ -950,6 +998,32 @@ idpf_rx_singleq_extract_fields(const struct idpf_rx_queue *rx_q, idpf_rx_singleq_extract_flex_fields(rx_desc, fields); } +static bool +idpf_rx_singleq_process_skb_fields(struct sk_buff *skb, + const struct libeth_xdp_buff *xdp, + struct libeth_rq_napi_stats *rs) +{ + struct libeth_rqe_info fields; + struct idpf_rx_queue *rxq; + + rxq = libeth_xdp_buff_to_rq(xdp, typeof(*rxq), xdp_rxq); + + idpf_rx_singleq_extract_fields(rxq, xdp->desc, &fields); + __idpf_rx_singleq_process_skb_fields(rxq, skb, xdp->desc, + fields.ptype); + + return true; +} + +static void idpf_xdp_run_pass(struct libeth_xdp_buff *xdp, + struct napi_struct *napi, + struct libeth_rq_napi_stats *rs, + const union virtchnl2_rx_desc *desc) +{ + libeth_xdp_run_pass(xdp, NULL, napi, rs, desc, NULL, + idpf_rx_singleq_process_skb_fields); +} + /** * idpf_rx_singleq_clean - Reclaim resources after receive completes * @rx_q: rx queue to clean @@ -959,14 +1033,15 @@ idpf_rx_singleq_extract_fields(const struct idpf_rx_queue *rx_q, */ static int idpf_rx_singleq_clean(struct idpf_rx_queue *rx_q, int budget) { - unsigned int total_rx_bytes = 0, total_rx_pkts = 0; - struct sk_buff *skb = rx_q->skb; + struct libeth_rq_napi_stats rs = { }; u16 ntc = rx_q->next_to_clean; + LIBETH_XDP_ONSTACK_BUFF(xdp); u16 cleaned_count = 0; - bool failure = false; + + libeth_xdp_init_buff(xdp, &rx_q->xdp, &rx_q->xdp_rxq); /* Process Rx packets bounded by budget */ - while (likely(total_rx_pkts < (unsigned int)budget)) { + while (likely(rs.packets < budget)) { struct libeth_rqe_info fields = { }; union virtchnl2_rx_desc *rx_desc; struct idpf_rx_buf *rx_buf; @@ -993,73 +1068,41 @@ static int idpf_rx_singleq_clean(struct idpf_rx_queue *rx_q, int budget) idpf_rx_singleq_extract_fields(rx_q, rx_desc, &fields); rx_buf = &rx_q->rx_buf[ntc]; - if (!libeth_rx_sync_for_cpu(rx_buf, fields.len)) - goto skip_data; - - if (skb) - idpf_rx_add_frag(rx_buf, skb, fields.len); - else - skb = idpf_rx_build_skb(rx_buf, fields.len); - - /* exit if we failed to retrieve a buffer */ - if (!skb) - break; - -skip_data: + libeth_xdp_process_buff(xdp, rx_buf, fields.len); rx_buf->netmem = 0; IDPF_SINGLEQ_BUMP_RING_IDX(rx_q, ntc); cleaned_count++; /* skip if it is non EOP desc */ - if (idpf_rx_singleq_is_non_eop(rx_desc) || unlikely(!skb)) + if (idpf_rx_singleq_is_non_eop(rx_desc) || + unlikely(!xdp->data)) continue; #define IDPF_RXD_ERR_S FIELD_PREP(VIRTCHNL2_RX_BASE_DESC_QW1_ERROR_M, \ VIRTCHNL2_RX_BASE_DESC_ERROR_RXE_M) if (unlikely(idpf_rx_singleq_test_staterr(rx_desc, IDPF_RXD_ERR_S))) { - dev_kfree_skb_any(skb); - skb = NULL; + libeth_xdp_return_buff_slow(xdp); continue; } - /* pad skb if needed (to make valid ethernet frame) */ - if (eth_skb_pad(skb)) { - skb = NULL; - continue; - } - - /* probably a little skewed due to removing CRC */ - total_rx_bytes += skb->len; - - /* protocol */ - idpf_rx_singleq_process_skb_fields(rx_q, skb, rx_desc, - fields.ptype); - - /* send completed skb up the stack */ - napi_gro_receive(rx_q->pp->p.napi, skb); - skb = NULL; - - /* update budget accounting */ - total_rx_pkts++; + idpf_xdp_run_pass(xdp, rx_q->pp->p.napi, &rs, rx_desc); } - rx_q->skb = skb; - rx_q->next_to_clean = ntc; + libeth_xdp_save_buff(&rx_q->xdp, xdp); page_pool_nid_changed(rx_q->pp, numa_mem_id()); if (cleaned_count) - failure = idpf_rx_singleq_buf_hw_alloc_all(rx_q, cleaned_count); + idpf_rx_singleq_buf_hw_alloc_all(rx_q, cleaned_count); u64_stats_update_begin(&rx_q->stats_sync); - u64_stats_add(&rx_q->q_stats.packets, total_rx_pkts); - u64_stats_add(&rx_q->q_stats.bytes, total_rx_bytes); + u64_stats_add(&rx_q->q_stats.packets, rs.packets); + u64_stats_add(&rx_q->q_stats.bytes, rs.bytes); u64_stats_update_end(&rx_q->stats_sync); - /* guarantee a trip back through this routine if there was a failure */ - return failure ? budget : (int)total_rx_pkts; + return rs.packets; } /** diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.c b/drivers/net/ethernet/intel/idpf/idpf_txrx.c index 66a1b040639d..828f7c444d30 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.c +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.c @@ -1,52 +1,36 @@ // SPDX-License-Identifier: GPL-2.0-only /* Copyright (C) 2023 Intel Corporation */ -#include <net/libeth/rx.h> -#include <net/libeth/tx.h> - #include "idpf.h" #include "idpf_ptp.h" #include "idpf_virtchnl.h" +#include "xdp.h" +#include "xsk.h" -struct idpf_tx_stash { - struct hlist_node hlist; - struct libeth_sqe buf; -}; - -#define idpf_tx_buf_compl_tag(buf) (*(u32 *)&(buf)->priv) +#define idpf_tx_buf_next(buf) (*(u32 *)&(buf)->priv) LIBETH_SQE_CHECK_PRIV(u32); -static bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, - unsigned int count); - /** - * idpf_buf_lifo_push - push a buffer pointer onto stack - * @stack: pointer to stack struct - * @buf: pointer to buf to push + * idpf_chk_linearize - Check if skb exceeds max descriptors per packet + * @skb: send buffer + * @max_bufs: maximum scatter gather buffers for single packet + * @count: number of buffers this packet needs * - * Returns 0 on success, negative on failure - **/ -static int idpf_buf_lifo_push(struct idpf_buf_lifo *stack, - struct idpf_tx_stash *buf) + * Make sure we don't exceed maximum scatter gather buffers for a single + * packet. + * TSO case has been handled earlier from idpf_features_check(). + */ +static bool idpf_chk_linearize(const struct sk_buff *skb, + unsigned int max_bufs, + unsigned int count) { - if (unlikely(stack->top == stack->size)) - return -ENOSPC; - - stack->bufs[stack->top++] = buf; - - return 0; -} + if (likely(count <= max_bufs)) + return false; -/** - * idpf_buf_lifo_pop - pop a buffer pointer from stack - * @stack: pointer to stack struct - **/ -static struct idpf_tx_stash *idpf_buf_lifo_pop(struct idpf_buf_lifo *stack) -{ - if (unlikely(!stack->top)) - return NULL; + if (skb_is_gso(skb)) + return false; - return stack->bufs[--stack->top]; + return true; } /** @@ -70,59 +54,42 @@ void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue) } } -/** - * idpf_tx_buf_rel_all - Free any empty Tx buffers - * @txq: queue to be cleaned - */ -static void idpf_tx_buf_rel_all(struct idpf_tx_queue *txq) +static void idpf_tx_buf_clean(struct idpf_tx_queue *txq) { struct libeth_sq_napi_stats ss = { }; - struct idpf_buf_lifo *buf_stack; - struct idpf_tx_stash *stash; + struct xdp_frame_bulk bq; struct libeth_cq_pp cp = { .dev = txq->dev, + .bq = &bq, .ss = &ss, }; - struct hlist_node *tmp; - u32 i, tag; - /* Buffers already cleared, nothing to do */ - if (!txq->tx_buf) - return; + xdp_frame_bulk_init(&bq); /* Free all the Tx buffer sk_buffs */ - for (i = 0; i < txq->desc_count; i++) - libeth_tx_complete(&txq->tx_buf[i], &cp); - - kfree(txq->tx_buf); - txq->tx_buf = NULL; + for (u32 i = 0; i < txq->buf_pool_size; i++) + libeth_tx_complete_any(&txq->tx_buf[i], &cp); - if (!idpf_queue_has(FLOW_SCH_EN, txq)) - return; + xdp_flush_frame_bulk(&bq); +} - buf_stack = &txq->stash->buf_stack; - if (!buf_stack->bufs) +/** + * idpf_tx_buf_rel_all - Free any empty Tx buffers + * @txq: queue to be cleaned + */ +static void idpf_tx_buf_rel_all(struct idpf_tx_queue *txq) +{ + /* Buffers already cleared, nothing to do */ + if (!txq->tx_buf) return; - /* - * If a Tx timeout occurred, there are potentially still bufs in the - * hash table, free them here. - */ - hash_for_each_safe(txq->stash->sched_buf_hash, tag, tmp, stash, - hlist) { - if (!stash) - continue; - - libeth_tx_complete(&stash->buf, &cp); - hash_del(&stash->hlist); - idpf_buf_lifo_push(buf_stack, stash); - } - - for (i = 0; i < buf_stack->size; i++) - kfree(buf_stack->bufs[i]); + if (idpf_queue_has(XSK, txq)) + idpf_xsksq_clean(txq); + else + idpf_tx_buf_clean(txq); - kfree(buf_stack->bufs); - buf_stack->bufs = NULL; + kfree(txq->tx_buf); + txq->tx_buf = NULL; } /** @@ -133,12 +100,24 @@ static void idpf_tx_buf_rel_all(struct idpf_tx_queue *txq) */ static void idpf_tx_desc_rel(struct idpf_tx_queue *txq) { + bool xdp = idpf_queue_has(XDP, txq); + + if (xdp) + libeth_xdpsq_deinit_timer(txq->timer); + idpf_tx_buf_rel_all(txq); - netdev_tx_reset_subqueue(txq->netdev, txq->idx); + + if (!xdp) + netdev_tx_reset_subqueue(txq->netdev, txq->idx); + + idpf_xsk_clear_queue(txq, VIRTCHNL2_QUEUE_TYPE_TX); if (!txq->desc_ring) return; + if (!xdp && txq->refillq) + kfree(txq->refillq->ring); + dmam_free_coherent(txq->dev, txq->size, txq->desc_ring, txq->dma); txq->desc_ring = NULL; txq->next_to_use = 0; @@ -153,12 +132,14 @@ static void idpf_tx_desc_rel(struct idpf_tx_queue *txq) */ static void idpf_compl_desc_rel(struct idpf_compl_queue *complq) { + idpf_xsk_clear_queue(complq, VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION); + if (!complq->comp) return; dma_free_coherent(complq->netdev->dev.parent, complq->size, - complq->comp, complq->dma); - complq->comp = NULL; + complq->desc_ring, complq->dma); + complq->desc_ring = NULL; complq->next_to_use = 0; complq->next_to_clean = 0; } @@ -195,41 +176,18 @@ static void idpf_tx_desc_rel_all(struct idpf_vport *vport) */ static int idpf_tx_buf_alloc_all(struct idpf_tx_queue *tx_q) { - struct idpf_buf_lifo *buf_stack; - int buf_size; - int i; - /* Allocate book keeping buffers only. Buffers to be supplied to HW * are allocated by kernel network stack and received as part of skb */ - buf_size = sizeof(struct idpf_tx_buf) * tx_q->desc_count; - tx_q->tx_buf = kzalloc(buf_size, GFP_KERNEL); + if (idpf_queue_has(FLOW_SCH_EN, tx_q)) + tx_q->buf_pool_size = U16_MAX; + else + tx_q->buf_pool_size = tx_q->desc_count; + tx_q->tx_buf = kcalloc(tx_q->buf_pool_size, sizeof(*tx_q->tx_buf), + GFP_KERNEL); if (!tx_q->tx_buf) return -ENOMEM; - if (!idpf_queue_has(FLOW_SCH_EN, tx_q)) - return 0; - - buf_stack = &tx_q->stash->buf_stack; - - /* Initialize tx buf stack for out-of-order completions if - * flow scheduling offload is enabled - */ - buf_stack->bufs = kcalloc(tx_q->desc_count, sizeof(*buf_stack->bufs), - GFP_KERNEL); - if (!buf_stack->bufs) - return -ENOMEM; - - buf_stack->size = tx_q->desc_count; - buf_stack->top = tx_q->desc_count; - - for (i = 0; i < tx_q->desc_count; i++) { - buf_stack->bufs[i] = kzalloc(sizeof(*buf_stack->bufs[i]), - GFP_KERNEL); - if (!buf_stack->bufs[i]) - return -ENOMEM; - } - return 0; } @@ -244,6 +202,7 @@ static int idpf_tx_desc_alloc(const struct idpf_vport *vport, struct idpf_tx_queue *tx_q) { struct device *dev = tx_q->dev; + struct idpf_sw_queue *refillq; int err; err = idpf_tx_buf_alloc_all(tx_q); @@ -267,6 +226,33 @@ static int idpf_tx_desc_alloc(const struct idpf_vport *vport, tx_q->next_to_clean = 0; idpf_queue_set(GEN_CHK, tx_q); + idpf_xsk_setup_queue(vport, tx_q, VIRTCHNL2_QUEUE_TYPE_TX); + + if (!idpf_queue_has(FLOW_SCH_EN, tx_q)) + return 0; + + refillq = tx_q->refillq; + refillq->desc_count = tx_q->buf_pool_size; + refillq->ring = kcalloc(refillq->desc_count, sizeof(u32), + GFP_KERNEL); + if (!refillq->ring) { + err = -ENOMEM; + goto err_alloc; + } + + for (unsigned int i = 0; i < refillq->desc_count; i++) + refillq->ring[i] = + FIELD_PREP(IDPF_RFL_BI_BUFID_M, i) | + FIELD_PREP(IDPF_RFL_BI_GEN_M, + idpf_queue_has(GEN_CHK, refillq)); + + /* Go ahead and flip the GEN bit since this counts as filling + * up the ring, i.e. we already ring wrapped. + */ + idpf_queue_change(GEN_CHK, refillq); + + tx_q->last_re = tx_q->desc_count - IDPF_TX_SPLITQ_RE_MIN_GAP; + return 0; err_alloc: @@ -285,18 +271,25 @@ err_alloc: static int idpf_compl_desc_alloc(const struct idpf_vport *vport, struct idpf_compl_queue *complq) { - complq->size = array_size(complq->desc_count, sizeof(*complq->comp)); + u32 desc_size; - complq->comp = dma_alloc_coherent(complq->netdev->dev.parent, - complq->size, &complq->dma, - GFP_KERNEL); - if (!complq->comp) + desc_size = idpf_queue_has(FLOW_SCH_EN, complq) ? + sizeof(*complq->comp) : sizeof(*complq->comp_4b); + complq->size = array_size(complq->desc_count, desc_size); + + complq->desc_ring = dma_alloc_coherent(complq->netdev->dev.parent, + complq->size, &complq->dma, + GFP_KERNEL); + if (!complq->desc_ring) return -ENOMEM; complq->next_to_use = 0; complq->next_to_clean = 0; idpf_queue_set(GEN_CHK, complq); + idpf_xsk_setup_queue(vport, complq, + VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION); + return 0; } @@ -317,8 +310,6 @@ static int idpf_tx_desc_alloc_all(struct idpf_vport *vport) for (i = 0; i < vport->num_txq_grp; i++) { for (j = 0; j < vport->txq_grps[i].num_txq; j++) { struct idpf_tx_queue *txq = vport->txq_grps[i].txqs[j]; - u8 gen_bits = 0; - u16 bufidx_mask; err = idpf_tx_desc_alloc(vport, txq); if (err) { @@ -327,34 +318,6 @@ static int idpf_tx_desc_alloc_all(struct idpf_vport *vport) i); goto err_out; } - - if (!idpf_is_queue_model_split(vport->txq_model)) - continue; - - txq->compl_tag_cur_gen = 0; - - /* Determine the number of bits in the bufid - * mask and add one to get the start of the - * generation bits - */ - bufidx_mask = txq->desc_count - 1; - while (bufidx_mask >> 1) { - txq->compl_tag_gen_s++; - bufidx_mask = bufidx_mask >> 1; - } - txq->compl_tag_gen_s++; - - gen_bits = IDPF_TX_SPLITQ_COMPL_TAG_WIDTH - - txq->compl_tag_gen_s; - txq->compl_tag_gen_max = GETMAXVAL(gen_bits); - - /* Set bufid mask based on location of first - * gen bit; it cannot simply be the descriptor - * ring size-1 since we can have size values - * where not all of those bits are set. - */ - txq->compl_tag_bufid_m = - GETMAXVAL(txq->compl_tag_gen_s); } if (!idpf_is_queue_model_split(vport->txq_model)) @@ -426,6 +389,11 @@ static void idpf_rx_buf_rel_bufq(struct idpf_buf_queue *bufq) if (!bufq->buf) return; + if (idpf_queue_has(XSK, bufq)) { + idpf_xskfq_rel(bufq); + return; + } + /* Free all the bufs allocated and given to hw on Rx queue */ for (u32 i = 0; i < bufq->desc_count; i++) idpf_rx_page_rel(&bufq->buf[i]); @@ -474,14 +442,14 @@ static void idpf_rx_desc_rel(struct idpf_rx_queue *rxq, struct device *dev, if (!rxq) return; - if (rxq->skb) { - dev_kfree_skb_any(rxq->skb); - rxq->skb = NULL; - } + if (!idpf_queue_has(XSK, rxq)) + libeth_xdp_return_stash(&rxq->xdp); if (!idpf_is_queue_model_split(model)) idpf_rx_buf_rel_all(rxq); + idpf_xsk_clear_queue(rxq, VIRTCHNL2_QUEUE_TYPE_RX); + rxq->next_to_alloc = 0; rxq->next_to_clean = 0; rxq->next_to_use = 0; @@ -504,6 +472,7 @@ static void idpf_rx_desc_rel_bufq(struct idpf_buf_queue *bufq, return; idpf_rx_buf_rel_bufq(bufq); + idpf_xsk_clear_queue(bufq, VIRTCHNL2_QUEUE_TYPE_RX_BUFFER); bufq->next_to_alloc = 0; bufq->next_to_clean = 0; @@ -586,6 +555,7 @@ static int idpf_rx_hdr_buf_alloc_all(struct idpf_buf_queue *bufq) struct libeth_fq fq = { .count = bufq->desc_count, .type = LIBETH_FQE_HDR, + .xdp = idpf_xdp_enabled(bufq->q_vector->vport), .nid = idpf_q_vector_to_mem(bufq->q_vector), }; int ret; @@ -603,18 +573,18 @@ static int idpf_rx_hdr_buf_alloc_all(struct idpf_buf_queue *bufq) } /** - * idpf_rx_post_buf_refill - Post buffer id to refill queue + * idpf_post_buf_refill - Post buffer id to refill queue * @refillq: refill queue to post to * @buf_id: buffer id to post */ -static void idpf_rx_post_buf_refill(struct idpf_sw_queue *refillq, u16 buf_id) +static void idpf_post_buf_refill(struct idpf_sw_queue *refillq, u16 buf_id) { u32 nta = refillq->next_to_use; /* store the buffer ID and the SW maintained GEN bit to the refillq */ refillq->ring[nta] = - FIELD_PREP(IDPF_RX_BI_BUFID_M, buf_id) | - FIELD_PREP(IDPF_RX_BI_GEN_M, + FIELD_PREP(IDPF_RFL_BI_BUFID_M, buf_id) | + FIELD_PREP(IDPF_RFL_BI_GEN_M, idpf_queue_has(GEN_CHK, refillq)); if (unlikely(++nta == refillq->desc_count)) { @@ -785,10 +755,14 @@ static int idpf_rx_bufs_init(struct idpf_buf_queue *bufq, .count = bufq->desc_count, .type = type, .hsplit = idpf_queue_has(HSPLIT_EN, bufq), + .xdp = idpf_xdp_enabled(bufq->q_vector->vport), .nid = idpf_q_vector_to_mem(bufq->q_vector), }; int ret; + if (idpf_queue_has(XSK, bufq)) + return idpf_xskfq_init(bufq); + ret = libeth_rx_fq_create(&fq, &bufq->q_vector->napi); if (ret) return ret; @@ -812,6 +786,8 @@ int idpf_rx_bufs_init_all(struct idpf_vport *vport) bool split = idpf_is_queue_model_split(vport->rxq_model); int i, j, err; + idpf_xdp_copy_prog_to_rqs(vport, vport->xdp_prog); + for (i = 0; i < vport->num_rxq_grp; i++) { struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; u32 truesize = 0; @@ -882,6 +858,8 @@ static int idpf_rx_desc_alloc(const struct idpf_vport *vport, rxq->next_to_use = 0; idpf_queue_set(GEN_CHK, rxq); + idpf_xsk_setup_queue(vport, rxq, VIRTCHNL2_QUEUE_TYPE_RX); + return 0; } @@ -907,9 +885,10 @@ static int idpf_bufq_desc_alloc(const struct idpf_vport *vport, bufq->next_to_alloc = 0; bufq->next_to_clean = 0; bufq->next_to_use = 0; - idpf_queue_set(GEN_CHK, bufq); + idpf_xsk_setup_queue(vport, bufq, VIRTCHNL2_QUEUE_TYPE_RX_BUFFER); + return 0; } @@ -975,6 +954,341 @@ err_out: return err; } +static int idpf_init_queue_set(const struct idpf_queue_set *qs) +{ + const struct idpf_vport *vport = qs->vport; + bool splitq; + int err; + + splitq = idpf_is_queue_model_split(vport->rxq_model); + + for (u32 i = 0; i < qs->num; i++) { + const struct idpf_queue_ptr *q = &qs->qs[i]; + struct idpf_buf_queue *bufq; + + switch (q->type) { + case VIRTCHNL2_QUEUE_TYPE_RX: + err = idpf_rx_desc_alloc(vport, q->rxq); + if (err) + break; + + err = idpf_xdp_rxq_info_init(q->rxq); + if (err) + break; + + if (!splitq) + err = idpf_rx_bufs_init_singleq(q->rxq); + + break; + case VIRTCHNL2_QUEUE_TYPE_RX_BUFFER: + bufq = q->bufq; + + err = idpf_bufq_desc_alloc(vport, bufq); + if (err) + break; + + for (u32 j = 0; j < bufq->q_vector->num_bufq; j++) { + struct idpf_buf_queue * const *bufqs; + enum libeth_fqe_type type; + u32 ts; + + bufqs = bufq->q_vector->bufq; + if (bufqs[j] != bufq) + continue; + + if (j) { + type = LIBETH_FQE_SHORT; + ts = bufqs[j - 1]->truesize >> 1; + } else { + type = LIBETH_FQE_MTU; + ts = 0; + } + + bufq->truesize = ts; + + err = idpf_rx_bufs_init(bufq, type); + break; + } + + break; + case VIRTCHNL2_QUEUE_TYPE_TX: + err = idpf_tx_desc_alloc(vport, q->txq); + break; + case VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION: + err = idpf_compl_desc_alloc(vport, q->complq); + break; + default: + continue; + } + + if (err) + return err; + } + + return 0; +} + +static void idpf_clean_queue_set(const struct idpf_queue_set *qs) +{ + const struct idpf_vport *vport = qs->vport; + struct device *dev = vport->netdev->dev.parent; + + for (u32 i = 0; i < qs->num; i++) { + const struct idpf_queue_ptr *q = &qs->qs[i]; + + switch (q->type) { + case VIRTCHNL2_QUEUE_TYPE_RX: + idpf_xdp_rxq_info_deinit(q->rxq, vport->rxq_model); + idpf_rx_desc_rel(q->rxq, dev, vport->rxq_model); + break; + case VIRTCHNL2_QUEUE_TYPE_RX_BUFFER: + idpf_rx_desc_rel_bufq(q->bufq, dev); + break; + case VIRTCHNL2_QUEUE_TYPE_TX: + idpf_tx_desc_rel(q->txq); + + if (idpf_queue_has(XDP, q->txq)) { + q->txq->pending = 0; + q->txq->xdp_tx = 0; + } else { + q->txq->txq_grp->num_completions_pending = 0; + } + + writel(q->txq->next_to_use, q->txq->tail); + break; + case VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION: + idpf_compl_desc_rel(q->complq); + q->complq->num_completions = 0; + break; + default: + break; + } + } +} + +static void idpf_qvec_ena_irq(struct idpf_q_vector *qv) +{ + if (qv->num_txq) { + u32 itr; + + if (IDPF_ITR_IS_DYNAMIC(qv->tx_intr_mode)) + itr = qv->vport->tx_itr_profile[qv->tx_dim.profile_ix]; + else + itr = qv->tx_itr_value; + + idpf_vport_intr_write_itr(qv, itr, true); + } + + if (qv->num_rxq) { + u32 itr; + + if (IDPF_ITR_IS_DYNAMIC(qv->rx_intr_mode)) + itr = qv->vport->rx_itr_profile[qv->rx_dim.profile_ix]; + else + itr = qv->rx_itr_value; + + idpf_vport_intr_write_itr(qv, itr, false); + } + + if (qv->num_txq || qv->num_rxq) + idpf_vport_intr_update_itr_ena_irq(qv); +} + +/** + * idpf_vector_to_queue_set - create a queue set associated with the given + * queue vector + * @qv: queue vector corresponding to the queue pair + * + * Returns a pointer to a dynamically allocated array of pointers to all + * queues associated with a given queue vector (@qv). + * Please note that the caller is responsible to free the memory allocated + * by this function using kfree(). + * + * Return: &idpf_queue_set on success, %NULL in case of error. + */ +static struct idpf_queue_set * +idpf_vector_to_queue_set(struct idpf_q_vector *qv) +{ + bool xdp = qv->vport->xdp_txq_offset && !qv->num_xsksq; + struct idpf_vport *vport = qv->vport; + struct idpf_queue_set *qs; + u32 num; + + num = qv->num_rxq + qv->num_bufq + qv->num_txq + qv->num_complq; + num += xdp ? qv->num_rxq * 2 : qv->num_xsksq * 2; + if (!num) + return NULL; + + qs = idpf_alloc_queue_set(vport, num); + if (!qs) + return NULL; + + num = 0; + + for (u32 i = 0; i < qv->num_bufq; i++) { + qs->qs[num].type = VIRTCHNL2_QUEUE_TYPE_RX_BUFFER; + qs->qs[num++].bufq = qv->bufq[i]; + } + + for (u32 i = 0; i < qv->num_rxq; i++) { + qs->qs[num].type = VIRTCHNL2_QUEUE_TYPE_RX; + qs->qs[num++].rxq = qv->rx[i]; + } + + for (u32 i = 0; i < qv->num_txq; i++) { + qs->qs[num].type = VIRTCHNL2_QUEUE_TYPE_TX; + qs->qs[num++].txq = qv->tx[i]; + } + + for (u32 i = 0; i < qv->num_complq; i++) { + qs->qs[num].type = VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION; + qs->qs[num++].complq = qv->complq[i]; + } + + if (!vport->xdp_txq_offset) + goto finalize; + + if (xdp) { + for (u32 i = 0; i < qv->num_rxq; i++) { + u32 idx = vport->xdp_txq_offset + qv->rx[i]->idx; + + qs->qs[num].type = VIRTCHNL2_QUEUE_TYPE_TX; + qs->qs[num++].txq = vport->txqs[idx]; + + qs->qs[num].type = VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION; + qs->qs[num++].complq = vport->txqs[idx]->complq; + } + } else { + for (u32 i = 0; i < qv->num_xsksq; i++) { + qs->qs[num].type = VIRTCHNL2_QUEUE_TYPE_TX; + qs->qs[num++].txq = qv->xsksq[i]; + + qs->qs[num].type = VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION; + qs->qs[num++].complq = qv->xsksq[i]->complq; + } + } + +finalize: + if (num != qs->num) { + kfree(qs); + return NULL; + } + + return qs; +} + +static int idpf_qp_enable(const struct idpf_queue_set *qs, u32 qid) +{ + struct idpf_vport *vport = qs->vport; + struct idpf_q_vector *q_vector; + int err; + + q_vector = idpf_find_rxq_vec(vport, qid); + + err = idpf_init_queue_set(qs); + if (err) { + netdev_err(vport->netdev, "Could not initialize queues in pair %u: %pe\n", + qid, ERR_PTR(err)); + return err; + } + + if (!vport->xdp_txq_offset) + goto config; + + q_vector->xsksq = kcalloc(DIV_ROUND_UP(vport->num_rxq_grp, + vport->num_q_vectors), + sizeof(*q_vector->xsksq), GFP_KERNEL); + if (!q_vector->xsksq) + return -ENOMEM; + + for (u32 i = 0; i < qs->num; i++) { + const struct idpf_queue_ptr *q = &qs->qs[i]; + + if (q->type != VIRTCHNL2_QUEUE_TYPE_TX) + continue; + + if (!idpf_queue_has(XSK, q->txq)) + continue; + + idpf_xsk_init_wakeup(q_vector); + + q->txq->q_vector = q_vector; + q_vector->xsksq[q_vector->num_xsksq++] = q->txq; + } + +config: + err = idpf_send_config_queue_set_msg(qs); + if (err) { + netdev_err(vport->netdev, "Could not configure queues in pair %u: %pe\n", + qid, ERR_PTR(err)); + return err; + } + + err = idpf_send_enable_queue_set_msg(qs); + if (err) { + netdev_err(vport->netdev, "Could not enable queues in pair %u: %pe\n", + qid, ERR_PTR(err)); + return err; + } + + napi_enable(&q_vector->napi); + idpf_qvec_ena_irq(q_vector); + + netif_start_subqueue(vport->netdev, qid); + + return 0; +} + +static int idpf_qp_disable(const struct idpf_queue_set *qs, u32 qid) +{ + struct idpf_vport *vport = qs->vport; + struct idpf_q_vector *q_vector; + int err; + + q_vector = idpf_find_rxq_vec(vport, qid); + netif_stop_subqueue(vport->netdev, qid); + + writel(0, q_vector->intr_reg.dyn_ctl); + napi_disable(&q_vector->napi); + + err = idpf_send_disable_queue_set_msg(qs); + if (err) { + netdev_err(vport->netdev, "Could not disable queues in pair %u: %pe\n", + qid, ERR_PTR(err)); + return err; + } + + idpf_clean_queue_set(qs); + + kfree(q_vector->xsksq); + q_vector->num_xsksq = 0; + + return 0; +} + +/** + * idpf_qp_switch - enable or disable queues associated with queue pair + * @vport: vport to switch the pair for + * @qid: index of the queue pair to switch + * @en: whether to enable or disable the pair + * + * Return: 0 on success, -errno on failure. + */ +int idpf_qp_switch(struct idpf_vport *vport, u32 qid, bool en) +{ + struct idpf_q_vector *q_vector = idpf_find_rxq_vec(vport, qid); + struct idpf_queue_set *qs __free(kfree) = NULL; + + if (idpf_find_txq_vec(vport, qid) != q_vector) + return -EINVAL; + + qs = idpf_vector_to_queue_set(q_vector); + if (!qs) + return -ENOMEM; + + return en ? idpf_qp_enable(qs, qid) : idpf_qp_disable(qs, qid); +} + /** * idpf_txq_group_rel - Release all resources for txq groups * @vport: vport to release txq groups on @@ -995,6 +1309,11 @@ static void idpf_txq_group_rel(struct idpf_vport *vport) struct idpf_txq_group *txq_grp = &vport->txq_grps[i]; for (j = 0; j < txq_grp->num_txq; j++) { + if (flow_sch_en) { + kfree(txq_grp->txqs[j]->refillq); + txq_grp->txqs[j]->refillq = NULL; + } + kfree(txq_grp->txqs[j]); txq_grp->txqs[j] = NULL; } @@ -1004,9 +1323,6 @@ static void idpf_txq_group_rel(struct idpf_vport *vport) kfree(txq_grp->complq); txq_grp->complq = NULL; - - if (flow_sch_en) - kfree(txq_grp->stashes); } kfree(vport->txq_grps); vport->txq_grps = NULL; @@ -1088,8 +1404,12 @@ static void idpf_vport_queue_grp_rel_all(struct idpf_vport *vport) */ void idpf_vport_queues_rel(struct idpf_vport *vport) { + idpf_xdp_copy_prog_to_rqs(vport, NULL); + idpf_tx_desc_rel_all(vport); idpf_rx_desc_rel_all(vport); + + idpf_xdpsqs_put(vport); idpf_vport_queue_grp_rel_all(vport); kfree(vport->txqs); @@ -1163,6 +1483,18 @@ void idpf_vport_init_num_qs(struct idpf_vport *vport, if (idpf_is_queue_model_split(vport->rxq_model)) vport->num_bufq = le16_to_cpu(vport_msg->num_rx_bufq); + vport->xdp_prog = config_data->xdp_prog; + if (idpf_xdp_enabled(vport)) { + vport->xdp_txq_offset = config_data->num_req_tx_qs; + vport->num_xdp_txq = le16_to_cpu(vport_msg->num_tx_q) - + vport->xdp_txq_offset; + vport->xdpsq_share = libeth_xdpsq_shared(vport->num_xdp_txq); + } else { + vport->xdp_txq_offset = 0; + vport->num_xdp_txq = 0; + vport->xdpsq_share = false; + } + /* Adjust number of buffer queues per Rx queue group. */ if (!idpf_is_queue_model_split(vport->rxq_model)) { vport->num_bufqs_per_qgrp = 0; @@ -1234,22 +1566,17 @@ int idpf_vport_calc_total_qs(struct idpf_adapter *adapter, u16 vport_idx, int dflt_splitq_txq_grps = 0, dflt_singleq_txqs = 0; int dflt_splitq_rxq_grps = 0, dflt_singleq_rxqs = 0; u16 num_req_tx_qs = 0, num_req_rx_qs = 0; + struct idpf_vport_user_config_data *user; struct idpf_vport_config *vport_config; u16 num_txq_grps, num_rxq_grps; - u32 num_qs; + u32 num_qs, num_xdpsq; vport_config = adapter->vport_config[vport_idx]; if (vport_config) { num_req_tx_qs = vport_config->user_config.num_req_tx_qs; num_req_rx_qs = vport_config->user_config.num_req_rx_qs; } else { - int num_cpus; - - /* Restrict num of queues to cpus online as a default - * configuration to give best performance. User can always - * override to a max number of queues via ethtool. - */ - num_cpus = num_online_cpus(); + u32 num_cpus = netif_get_num_default_rss_queues(); dflt_splitq_txq_grps = min_t(int, max_q->max_txq, num_cpus); dflt_singleq_txqs = min_t(int, max_q->max_txq, num_cpus); @@ -1284,6 +1611,24 @@ int idpf_vport_calc_total_qs(struct idpf_adapter *adapter, u16 vport_idx, vport_msg->num_rx_bufq = 0; } + if (!vport_config) + return 0; + + user = &vport_config->user_config; + user->num_req_rx_qs = le16_to_cpu(vport_msg->num_rx_q); + user->num_req_tx_qs = le16_to_cpu(vport_msg->num_tx_q); + + if (vport_config->user_config.xdp_prog) + num_xdpsq = libeth_xdpsq_num(user->num_req_rx_qs, + user->num_req_tx_qs, + vport_config->max_q.max_txq); + else + num_xdpsq = 0; + + vport_msg->num_tx_q = cpu_to_le16(user->num_req_tx_qs + num_xdpsq); + if (idpf_is_queue_model_split(le16_to_cpu(vport_msg->txq_model))) + vport_msg->num_tx_complq = vport_msg->num_tx_q; + return 0; } @@ -1333,14 +1678,13 @@ static void idpf_vport_calc_numq_per_grp(struct idpf_vport *vport, static void idpf_rxq_set_descids(const struct idpf_vport *vport, struct idpf_rx_queue *q) { - if (idpf_is_queue_model_split(vport->rxq_model)) { - q->rxdids = VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M; - } else { - if (vport->base_rxd) - q->rxdids = VIRTCHNL2_RXDID_1_32B_BASE_M; - else - q->rxdids = VIRTCHNL2_RXDID_2_FLEX_SQ_NIC_M; - } + if (idpf_is_queue_model_split(vport->rxq_model)) + return; + + if (vport->base_rxd) + q->rxdids = VIRTCHNL2_RXDID_1_32B_BASE_M; + else + q->rxdids = VIRTCHNL2_RXDID_2_FLEX_SQ_NIC_M; } /** @@ -1367,7 +1711,6 @@ static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq) for (i = 0; i < vport->num_txq_grp; i++) { struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; struct idpf_adapter *adapter = vport->adapter; - struct idpf_txq_stash *stashes; int j; tx_qgrp->vport = vport; @@ -1380,15 +1723,6 @@ static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq) goto err_alloc; } - if (split && flow_sch_en) { - stashes = kcalloc(num_txq, sizeof(*stashes), - GFP_KERNEL); - if (!stashes) - goto err_alloc; - - tx_qgrp->stashes = stashes; - } - for (j = 0; j < tx_qgrp->num_txq; j++) { struct idpf_tx_queue *q = tx_qgrp->txqs[j]; @@ -1398,6 +1732,7 @@ static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq) q->tx_min_pkt_len = idpf_get_min_tx_pkt_len(adapter); q->netdev = vport->netdev; q->txq_grp = tx_qgrp; + q->rel_q_id = j; if (!split) { q->clean_budget = vport->compln_clean_budget; @@ -1408,12 +1743,14 @@ static int idpf_txq_group_alloc(struct idpf_vport *vport, u16 num_txq) if (!flow_sch_en) continue; - if (split) { - q->stash = &stashes[j]; - hash_init(q->stash->sched_buf_hash); - } - idpf_queue_set(FLOW_SCH_EN, q); + + q->refillq = kzalloc(sizeof(*q->refillq), GFP_KERNEL); + if (!q->refillq) + goto err_alloc; + + idpf_queue_set(GEN_CHK, q->refillq); + idpf_queue_set(RFL_GEN_CHK, q->refillq); } if (!split) @@ -1556,7 +1893,6 @@ skip_splitq_rx_init: setup_rxq: q->desc_count = vport->rxq_desc_count; q->rx_ptype_lkup = vport->rx_ptype_lkup; - q->netdev = vport->netdev; q->bufq_sets = rx_qgrp->splitq.bufq_sets; q->idx = (i * num_rxq) + j; q->rx_buffer_low_watermark = IDPF_LOW_WATERMARK; @@ -1617,15 +1953,19 @@ int idpf_vport_queues_alloc(struct idpf_vport *vport) if (err) goto err_out; - err = idpf_tx_desc_alloc_all(vport); + err = idpf_vport_init_fast_path_txqs(vport); if (err) goto err_out; - err = idpf_rx_desc_alloc_all(vport); + err = idpf_xdpsqs_get(vport); if (err) goto err_out; - err = idpf_vport_init_fast_path_txqs(vport); + err = idpf_tx_desc_alloc_all(vport); + if (err) + goto err_out; + + err = idpf_rx_desc_alloc_all(vport); if (err) goto err_out; @@ -1638,32 +1978,6 @@ err_out: } /** - * idpf_tx_handle_sw_marker - Handle queue marker packet - * @tx_q: tx queue to handle software marker - */ -static void idpf_tx_handle_sw_marker(struct idpf_tx_queue *tx_q) -{ - struct idpf_netdev_priv *priv = netdev_priv(tx_q->netdev); - struct idpf_vport *vport = priv->vport; - int i; - - idpf_queue_clear(SW_MARKER, tx_q); - /* Hardware must write marker packets to all queues associated with - * completion queues. So check if all queues received marker packets - */ - for (i = 0; i < vport->num_txq; i++) - /* If we're still waiting on any other TXQ marker completions, - * just return now since we cannot wake up the marker_wq yet. - */ - if (idpf_queue_has(SW_MARKER, vport->txqs[i])) - return; - - /* Drain complete */ - set_bit(IDPF_VPORT_SW_MARKER, vport->flags); - wake_up(&vport->sw_marker_wq); -} - -/** * idpf_tx_read_tstamp - schedule a work to read Tx timestamp value * @txq: queue to read the timestamp from * @skb: socket buffer to provide Tx timestamp value @@ -1697,87 +2011,6 @@ static void idpf_tx_read_tstamp(struct idpf_tx_queue *txq, struct sk_buff *skb) spin_unlock_bh(&tx_tstamp_caps->status_lock); } -/** - * idpf_tx_clean_stashed_bufs - clean bufs that were stored for - * out of order completions - * @txq: queue to clean - * @compl_tag: completion tag of packet to clean (from completion descriptor) - * @cleaned: pointer to stats struct to track cleaned packets/bytes - * @budget: Used to determine if we are in netpoll - */ -static void idpf_tx_clean_stashed_bufs(struct idpf_tx_queue *txq, - u16 compl_tag, - struct libeth_sq_napi_stats *cleaned, - int budget) -{ - struct idpf_tx_stash *stash; - struct hlist_node *tmp_buf; - struct libeth_cq_pp cp = { - .dev = txq->dev, - .ss = cleaned, - .napi = budget, - }; - - /* Buffer completion */ - hash_for_each_possible_safe(txq->stash->sched_buf_hash, stash, tmp_buf, - hlist, compl_tag) { - if (unlikely(idpf_tx_buf_compl_tag(&stash->buf) != compl_tag)) - continue; - - hash_del(&stash->hlist); - - if (stash->buf.type == LIBETH_SQE_SKB && - (skb_shinfo(stash->buf.skb)->tx_flags & SKBTX_IN_PROGRESS)) - idpf_tx_read_tstamp(txq, stash->buf.skb); - - libeth_tx_complete(&stash->buf, &cp); - - /* Push shadow buf back onto stack */ - idpf_buf_lifo_push(&txq->stash->buf_stack, stash); - } -} - -/** - * idpf_stash_flow_sch_buffers - store buffer parameters info to be freed at a - * later time (only relevant for flow scheduling mode) - * @txq: Tx queue to clean - * @tx_buf: buffer to store - */ -static int idpf_stash_flow_sch_buffers(struct idpf_tx_queue *txq, - struct idpf_tx_buf *tx_buf) -{ - struct idpf_tx_stash *stash; - - if (unlikely(tx_buf->type <= LIBETH_SQE_CTX)) - return 0; - - stash = idpf_buf_lifo_pop(&txq->stash->buf_stack); - if (unlikely(!stash)) { - net_err_ratelimited("%s: No out-of-order TX buffers left!\n", - netdev_name(txq->netdev)); - - return -ENOMEM; - } - - /* Store buffer params in shadow buffer */ - stash->buf.skb = tx_buf->skb; - stash->buf.bytes = tx_buf->bytes; - stash->buf.packets = tx_buf->packets; - stash->buf.type = tx_buf->type; - stash->buf.nr_frags = tx_buf->nr_frags; - dma_unmap_addr_set(&stash->buf, dma, dma_unmap_addr(tx_buf, dma)); - dma_unmap_len_set(&stash->buf, len, dma_unmap_len(tx_buf, len)); - idpf_tx_buf_compl_tag(&stash->buf) = idpf_tx_buf_compl_tag(tx_buf); - - /* Add buffer to buf_hash table to be freed later */ - hash_add(txq->stash->sched_buf_hash, &stash->hlist, - idpf_tx_buf_compl_tag(&stash->buf)); - - tx_buf->type = LIBETH_SQE_EMPTY; - - return 0; -} - #define idpf_tx_splitq_clean_bump_ntc(txq, ntc, desc, buf) \ do { \ if (unlikely(++(ntc) == (txq)->desc_count)) { \ @@ -1805,14 +2038,8 @@ do { \ * Separate packet completion events will be reported on the completion queue, * and the buffers will be cleaned separately. The stats are not updated from * this function when using flow-based scheduling. - * - * Furthermore, in flow scheduling mode, check to make sure there are enough - * reserve buffers to stash the packet. If there are not, return early, which - * will leave next_to_clean pointing to the packet that failed to be stashed. - * - * Return: false in the scenario above, true otherwise. */ -static bool idpf_tx_splitq_clean(struct idpf_tx_queue *tx_q, u16 end, +static void idpf_tx_splitq_clean(struct idpf_tx_queue *tx_q, u16 end, int napi_budget, struct libeth_sq_napi_stats *cleaned, bool descs_only) @@ -1826,7 +2053,12 @@ static bool idpf_tx_splitq_clean(struct idpf_tx_queue *tx_q, u16 end, .napi = napi_budget, }; struct idpf_tx_buf *tx_buf; - bool clean_complete = true; + + if (descs_only) { + /* Bump ring index to mark as cleaned. */ + tx_q->next_to_clean = end; + return; + } tx_desc = &tx_q->flex_tx[ntc]; next_pending_desc = &tx_q->flex_tx[end]; @@ -1846,136 +2078,61 @@ static bool idpf_tx_splitq_clean(struct idpf_tx_queue *tx_q, u16 end, break; eop_idx = tx_buf->rs_idx; + libeth_tx_complete(tx_buf, &cp); - if (descs_only) { - if (IDPF_TX_BUF_RSV_UNUSED(tx_q) < tx_buf->nr_frags) { - clean_complete = false; - goto tx_splitq_clean_out; - } - - idpf_stash_flow_sch_buffers(tx_q, tx_buf); + /* unmap remaining buffers */ + while (ntc != eop_idx) { + idpf_tx_splitq_clean_bump_ntc(tx_q, ntc, + tx_desc, tx_buf); - while (ntc != eop_idx) { - idpf_tx_splitq_clean_bump_ntc(tx_q, ntc, - tx_desc, tx_buf); - idpf_stash_flow_sch_buffers(tx_q, tx_buf); - } - } else { + /* unmap any remaining paged data */ libeth_tx_complete(tx_buf, &cp); - - /* unmap remaining buffers */ - while (ntc != eop_idx) { - idpf_tx_splitq_clean_bump_ntc(tx_q, ntc, - tx_desc, tx_buf); - - /* unmap any remaining paged data */ - libeth_tx_complete(tx_buf, &cp); - } } fetch_next_txq_desc: idpf_tx_splitq_clean_bump_ntc(tx_q, ntc, tx_desc, tx_buf); } -tx_splitq_clean_out: tx_q->next_to_clean = ntc; - - return clean_complete; } -#define idpf_tx_clean_buf_ring_bump_ntc(txq, ntc, buf) \ -do { \ - (buf)++; \ - (ntc)++; \ - if (unlikely((ntc) == (txq)->desc_count)) { \ - buf = (txq)->tx_buf; \ - ntc = 0; \ - } \ -} while (0) - /** - * idpf_tx_clean_buf_ring - clean flow scheduling TX queue buffers + * idpf_tx_clean_bufs - clean flow scheduling TX queue buffers * @txq: queue to clean - * @compl_tag: completion tag of packet to clean (from completion descriptor) + * @buf_id: packet's starting buffer ID, from completion descriptor * @cleaned: pointer to stats struct to track cleaned packets/bytes * @budget: Used to determine if we are in netpoll * - * Cleans all buffers associated with the input completion tag either from the - * TX buffer ring or from the hash table if the buffers were previously - * stashed. Returns the byte/segment count for the cleaned packet associated - * this completion tag. + * Clean all buffers associated with the packet starting at buf_id. Returns the + * byte/segment count for the cleaned packet. */ -static bool idpf_tx_clean_buf_ring(struct idpf_tx_queue *txq, u16 compl_tag, - struct libeth_sq_napi_stats *cleaned, - int budget) +static void idpf_tx_clean_bufs(struct idpf_tx_queue *txq, u32 buf_id, + struct libeth_sq_napi_stats *cleaned, + int budget) { - u16 idx = compl_tag & txq->compl_tag_bufid_m; struct idpf_tx_buf *tx_buf = NULL; struct libeth_cq_pp cp = { .dev = txq->dev, .ss = cleaned, .napi = budget, }; - u16 ntc, orig_idx = idx; - - tx_buf = &txq->tx_buf[idx]; - - if (unlikely(tx_buf->type <= LIBETH_SQE_CTX || - idpf_tx_buf_compl_tag(tx_buf) != compl_tag)) - return false; + tx_buf = &txq->tx_buf[buf_id]; if (tx_buf->type == LIBETH_SQE_SKB) { if (skb_shinfo(tx_buf->skb)->tx_flags & SKBTX_IN_PROGRESS) idpf_tx_read_tstamp(txq, tx_buf->skb); libeth_tx_complete(tx_buf, &cp); + idpf_post_buf_refill(txq->refillq, buf_id); } - idpf_tx_clean_buf_ring_bump_ntc(txq, idx, tx_buf); + while (idpf_tx_buf_next(tx_buf) != IDPF_TXBUF_NULL) { + buf_id = idpf_tx_buf_next(tx_buf); - while (idpf_tx_buf_compl_tag(tx_buf) == compl_tag) { + tx_buf = &txq->tx_buf[buf_id]; libeth_tx_complete(tx_buf, &cp); - idpf_tx_clean_buf_ring_bump_ntc(txq, idx, tx_buf); + idpf_post_buf_refill(txq->refillq, buf_id); } - - /* - * It's possible the packet we just cleaned was an out of order - * completion, which means we can stash the buffers starting from - * the original next_to_clean and reuse the descriptors. We need - * to compare the descriptor ring next_to_clean packet's "first" buffer - * to the "first" buffer of the packet we just cleaned to determine if - * this is the case. Howevever, next_to_clean can point to either a - * reserved buffer that corresponds to a context descriptor used for the - * next_to_clean packet (TSO packet) or the "first" buffer (single - * packet). The orig_idx from the packet we just cleaned will always - * point to the "first" buffer. If next_to_clean points to a reserved - * buffer, let's bump ntc once and start the comparison from there. - */ - ntc = txq->next_to_clean; - tx_buf = &txq->tx_buf[ntc]; - - if (tx_buf->type == LIBETH_SQE_CTX) - idpf_tx_clean_buf_ring_bump_ntc(txq, ntc, tx_buf); - - /* - * If ntc still points to a different "first" buffer, clean the - * descriptor ring and stash all of the buffers for later cleaning. If - * we cannot stash all of the buffers, next_to_clean will point to the - * "first" buffer of the packet that could not be stashed and cleaning - * will start there next time. - */ - if (unlikely(tx_buf != &txq->tx_buf[orig_idx] && - !idpf_tx_splitq_clean(txq, orig_idx, budget, cleaned, - true))) - return true; - - /* - * Otherwise, update next_to_clean to reflect the cleaning that was - * done above. - */ - txq->next_to_clean = idx; - - return true; } /** @@ -1994,22 +2151,17 @@ static void idpf_tx_handle_rs_completion(struct idpf_tx_queue *txq, struct libeth_sq_napi_stats *cleaned, int budget) { - u16 compl_tag; + /* RS completion contains queue head for queue based scheduling or + * completion tag for flow based scheduling. + */ + u16 rs_compl_val = le16_to_cpu(desc->common.q_head_compl_tag.q_head); if (!idpf_queue_has(FLOW_SCH_EN, txq)) { - u16 head = le16_to_cpu(desc->q_head_compl_tag.q_head); - - idpf_tx_splitq_clean(txq, head, budget, cleaned, false); + idpf_tx_splitq_clean(txq, rs_compl_val, budget, cleaned, false); return; } - compl_tag = le16_to_cpu(desc->q_head_compl_tag.compl_tag); - - /* If we didn't clean anything on the ring, this packet must be - * in the hash table. Go clean it there. - */ - if (!idpf_tx_clean_buf_ring(txq, compl_tag, cleaned, budget)) - idpf_tx_clean_stashed_bufs(txq, compl_tag, cleaned, budget); + idpf_tx_clean_bufs(txq, rs_compl_val, cleaned, budget); } /** @@ -2037,19 +2189,19 @@ static bool idpf_tx_clean_complq(struct idpf_compl_queue *complq, int budget, do { struct libeth_sq_napi_stats cleaned_stats = { }; struct idpf_tx_queue *tx_q; + __le16 hw_head; int rel_tx_qid; - u16 hw_head; u8 ctype; /* completion type */ u16 gen; /* if the descriptor isn't done, no work yet to do */ - gen = le16_get_bits(tx_desc->qid_comptype_gen, + gen = le16_get_bits(tx_desc->common.qid_comptype_gen, IDPF_TXD_COMPLQ_GEN_M); if (idpf_queue_has(GEN_CHK, complq) != gen) break; /* Find necessary info of TX queue to clean buffers */ - rel_tx_qid = le16_get_bits(tx_desc->qid_comptype_gen, + rel_tx_qid = le16_get_bits(tx_desc->common.qid_comptype_gen, IDPF_TXD_COMPLQ_QID_M); if (rel_tx_qid >= complq->txq_grp->num_txq || !complq->txq_grp->txqs[rel_tx_qid]) { @@ -2059,22 +2211,19 @@ static bool idpf_tx_clean_complq(struct idpf_compl_queue *complq, int budget, tx_q = complq->txq_grp->txqs[rel_tx_qid]; /* Determine completion type */ - ctype = le16_get_bits(tx_desc->qid_comptype_gen, + ctype = le16_get_bits(tx_desc->common.qid_comptype_gen, IDPF_TXD_COMPLQ_COMPL_TYPE_M); switch (ctype) { case IDPF_TXD_COMPLT_RE: - hw_head = le16_to_cpu(tx_desc->q_head_compl_tag.q_head); + hw_head = tx_desc->common.q_head_compl_tag.q_head; - idpf_tx_splitq_clean(tx_q, hw_head, budget, - &cleaned_stats, true); + idpf_tx_splitq_clean(tx_q, le16_to_cpu(hw_head), + budget, &cleaned_stats, true); break; case IDPF_TXD_COMPLT_RS: idpf_tx_handle_rs_completion(tx_q, tx_desc, &cleaned_stats, budget); break; - case IDPF_TXD_COMPLT_SW_MARKER: - idpf_tx_handle_sw_marker(tx_q); - break; default: netdev_err(tx_q->netdev, "Unknown TX completion type: %d\n", ctype); @@ -2126,8 +2275,7 @@ fetch_next_desc: /* Update BQL */ nq = netdev_get_tx_queue(tx_q->netdev, tx_q->idx); - dont_wake = !complq_ok || IDPF_TX_BUF_RSV_LOW(tx_q) || - np->state != __IDPF_VPORT_UP || + dont_wake = !complq_ok || np->state != __IDPF_VPORT_UP || !netif_carrier_ok(tx_q->netdev); /* Check if the TXQ needs to and can be restarted */ __netif_txq_completed_wake(nq, tx_q->cleaned_pkts, tx_q->cleaned_bytes, @@ -2148,6 +2296,69 @@ fetch_next_desc: } /** + * idpf_wait_for_sw_marker_completion - wait for SW marker of disabled Tx queue + * @txq: disabled Tx queue + * + * When Tx queue is requested for disabling, the CP sends a special completion + * descriptor called "SW marker", meaning the queue is ready to be destroyed. + * If, for some reason, the marker is not received within 500 ms, break the + * polling to not hang the driver. + */ +void idpf_wait_for_sw_marker_completion(const struct idpf_tx_queue *txq) +{ + struct idpf_compl_queue *complq; + unsigned long timeout; + bool flow, gen_flag; + u32 ntc; + + if (!idpf_queue_has(SW_MARKER, txq)) + return; + + complq = idpf_queue_has(XDP, txq) ? txq->complq : txq->txq_grp->complq; + ntc = complq->next_to_clean; + + flow = idpf_queue_has(FLOW_SCH_EN, complq); + gen_flag = idpf_queue_has(GEN_CHK, complq); + + timeout = jiffies + msecs_to_jiffies(IDPF_WAIT_FOR_MARKER_TIMEO); + + do { + struct idpf_splitq_4b_tx_compl_desc *tx_desc; + struct idpf_tx_queue *target; + u32 ctype_gen, id; + + tx_desc = flow ? &complq->comp[ntc].common : + &complq->comp_4b[ntc]; + ctype_gen = le16_to_cpu(tx_desc->qid_comptype_gen); + + if (!!(ctype_gen & IDPF_TXD_COMPLQ_GEN_M) != gen_flag) { + usleep_range(500, 1000); + continue; + } + + if (FIELD_GET(IDPF_TXD_COMPLQ_COMPL_TYPE_M, ctype_gen) != + IDPF_TXD_COMPLT_SW_MARKER) + goto next; + + id = FIELD_GET(IDPF_TXD_COMPLQ_QID_M, ctype_gen); + target = complq->txq_grp->txqs[id]; + + idpf_queue_clear(SW_MARKER, target); + if (target == txq) + break; + +next: + if (unlikely(++ntc == complq->desc_count)) { + ntc = 0; + gen_flag = !gen_flag; + } + } while (time_before(jiffies, timeout)); + + idpf_queue_assign(GEN_CHK, complq, gen_flag); + complq->next_to_clean = ntc; +} + +/** * idpf_tx_splitq_build_ctb - populate command tag and size for queue * based scheduling descriptors * @desc: descriptor to populate @@ -2184,15 +2395,21 @@ void idpf_tx_splitq_build_flow_desc(union idpf_tx_flex_desc *desc, desc->flow.qw1.compl_tag = cpu_to_le16(params->compl_tag); } -/* Global conditions to tell whether the txq (and related resources) - * has room to allow the use of "size" descriptors. +/** + * idpf_tx_splitq_has_room - check if enough Tx splitq resources are available + * @tx_q: the queue to be checked + * @descs_needed: number of descriptors required for this packet + * @bufs_needed: number of Tx buffers required for this packet + * + * Return: 0 if no room available, 1 otherwise */ -static int idpf_txq_has_room(struct idpf_tx_queue *tx_q, u32 size) +static int idpf_txq_has_room(struct idpf_tx_queue *tx_q, u32 descs_needed, + u32 bufs_needed) { - if (IDPF_DESC_UNUSED(tx_q) < size || + if (IDPF_DESC_UNUSED(tx_q) < descs_needed || IDPF_TX_COMPLQ_PENDING(tx_q->txq_grp) > IDPF_TX_COMPLQ_OVERFLOW_THRESH(tx_q->txq_grp->complq) || - IDPF_TX_BUF_RSV_LOW(tx_q)) + idpf_tx_splitq_get_free_bufs(tx_q->refillq) < bufs_needed) return 0; return 1; } @@ -2201,14 +2418,21 @@ static int idpf_txq_has_room(struct idpf_tx_queue *tx_q, u32 size) * idpf_tx_maybe_stop_splitq - 1st level check for Tx splitq stop conditions * @tx_q: the queue to be checked * @descs_needed: number of descriptors required for this packet + * @bufs_needed: number of buffers needed for this packet * - * Returns 0 if stop is not needed + * Return: 0 if stop is not needed */ static int idpf_tx_maybe_stop_splitq(struct idpf_tx_queue *tx_q, - unsigned int descs_needed) + u32 descs_needed, + u32 bufs_needed) { + /* Since we have multiple resources to check for splitq, our + * start,stop_thrs becomes a boolean check instead of a count + * threshold. + */ if (netif_subqueue_maybe_stop(tx_q->netdev, tx_q->idx, - idpf_txq_has_room(tx_q, descs_needed), + idpf_txq_has_room(tx_q, descs_needed, + bufs_needed), 1, 1)) return 0; @@ -2250,14 +2474,16 @@ void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val, } /** - * idpf_tx_desc_count_required - calculate number of Tx descriptors needed + * idpf_tx_res_count_required - get number of Tx resources needed for this pkt * @txq: queue to send buffer on * @skb: send buffer + * @bufs_needed: (output) number of buffers needed for this skb. * - * Returns number of data descriptors needed for this skb. + * Return: number of data descriptors and buffers needed for this skb. */ -unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq, - struct sk_buff *skb) +unsigned int idpf_tx_res_count_required(struct idpf_tx_queue *txq, + struct sk_buff *skb, + u32 *bufs_needed) { const struct skb_shared_info *shinfo; unsigned int count = 0, i; @@ -2268,6 +2494,7 @@ unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq, return count; shinfo = skb_shinfo(skb); + *bufs_needed += shinfo->nr_frags; for (i = 0; i < shinfo->nr_frags; i++) { unsigned int size; @@ -2297,71 +2524,89 @@ unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq, } /** - * idpf_tx_dma_map_error - handle TX DMA map errors - * @txq: queue to send buffer on - * @skb: send buffer - * @first: original first buffer info buffer for packet - * @idx: starting point on ring to unwind + * idpf_tx_splitq_bump_ntu - adjust NTU and generation + * @txq: the tx ring to wrap + * @ntu: ring index to bump */ -void idpf_tx_dma_map_error(struct idpf_tx_queue *txq, struct sk_buff *skb, - struct idpf_tx_buf *first, u16 idx) +static unsigned int idpf_tx_splitq_bump_ntu(struct idpf_tx_queue *txq, u16 ntu) { - struct libeth_sq_napi_stats ss = { }; - struct libeth_cq_pp cp = { - .dev = txq->dev, - .ss = &ss, - }; + ntu++; - u64_stats_update_begin(&txq->stats_sync); - u64_stats_inc(&txq->q_stats.dma_map_errs); - u64_stats_update_end(&txq->stats_sync); + if (ntu == txq->desc_count) + ntu = 0; - /* clear dma mappings for failed tx_buf map */ - for (;;) { - struct idpf_tx_buf *tx_buf; + return ntu; +} - tx_buf = &txq->tx_buf[idx]; - libeth_tx_complete(tx_buf, &cp); - if (tx_buf == first) - break; - if (idx == 0) - idx = txq->desc_count; - idx--; - } +/** + * idpf_tx_get_free_buf_id - get a free buffer ID from the refill queue + * @refillq: refill queue to get buffer ID from + * @buf_id: return buffer ID + * + * Return: true if a buffer ID was found, false if not + */ +static bool idpf_tx_get_free_buf_id(struct idpf_sw_queue *refillq, + u32 *buf_id) +{ + u32 ntc = refillq->next_to_clean; + u32 refill_desc; - if (skb_is_gso(skb)) { - union idpf_tx_flex_desc *tx_desc; + refill_desc = refillq->ring[ntc]; - /* If we failed a DMA mapping for a TSO packet, we will have - * used one additional descriptor for a context - * descriptor. Reset that here. - */ - tx_desc = &txq->flex_tx[idx]; - memset(tx_desc, 0, sizeof(*tx_desc)); - if (idx == 0) - idx = txq->desc_count; - idx--; + if (unlikely(idpf_queue_has(RFL_GEN_CHK, refillq) != + !!(refill_desc & IDPF_RFL_BI_GEN_M))) + return false; + + *buf_id = FIELD_GET(IDPF_RFL_BI_BUFID_M, refill_desc); + + if (unlikely(++ntc == refillq->desc_count)) { + idpf_queue_change(RFL_GEN_CHK, refillq); + ntc = 0; } - /* Update tail in case netdev_xmit_more was previously true */ - idpf_tx_buf_hw_update(txq, idx, false); + refillq->next_to_clean = ntc; + + return true; } /** - * idpf_tx_splitq_bump_ntu - adjust NTU and generation - * @txq: the tx ring to wrap - * @ntu: ring index to bump + * idpf_tx_splitq_pkt_err_unmap - Unmap buffers and bump tail in case of error + * @txq: Tx queue to unwind + * @params: pointer to splitq params struct + * @first: starting buffer for packet to unmap */ -static unsigned int idpf_tx_splitq_bump_ntu(struct idpf_tx_queue *txq, u16 ntu) +static void idpf_tx_splitq_pkt_err_unmap(struct idpf_tx_queue *txq, + struct idpf_tx_splitq_params *params, + struct idpf_tx_buf *first) { - ntu++; + struct idpf_sw_queue *refillq = txq->refillq; + struct libeth_sq_napi_stats ss = { }; + struct idpf_tx_buf *tx_buf = first; + struct libeth_cq_pp cp = { + .dev = txq->dev, + .ss = &ss, + }; - if (ntu == txq->desc_count) { - ntu = 0; - txq->compl_tag_cur_gen = IDPF_TX_ADJ_COMPL_TAG_GEN(txq); + u64_stats_update_begin(&txq->stats_sync); + u64_stats_inc(&txq->q_stats.dma_map_errs); + u64_stats_update_end(&txq->stats_sync); + + libeth_tx_complete(tx_buf, &cp); + while (idpf_tx_buf_next(tx_buf) != IDPF_TXBUF_NULL) { + tx_buf = &txq->tx_buf[idpf_tx_buf_next(tx_buf)]; + libeth_tx_complete(tx_buf, &cp); } - return ntu; + /* Update tail in case netdev_xmit_more was previously true. */ + idpf_tx_buf_hw_update(txq, params->prev_ntu, false); + + if (!refillq) + return; + + /* Restore refillq state to avoid leaking tags. */ + if (params->prev_refill_gen != idpf_queue_has(RFL_GEN_CHK, refillq)) + idpf_queue_change(RFL_GEN_CHK, refillq); + refillq->next_to_clean = params->prev_refill_ntc; } /** @@ -2385,6 +2630,7 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q, struct netdev_queue *nq; struct sk_buff *skb; skb_frag_t *frag; + u32 next_buf_id; u16 td_cmd = 0; dma_addr_t dma; @@ -2402,17 +2648,16 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q, tx_buf = first; first->nr_frags = 0; - params->compl_tag = - (tx_q->compl_tag_cur_gen << tx_q->compl_tag_gen_s) | i; - for (frag = &skb_shinfo(skb)->frags[0];; frag++) { unsigned int max_data = IDPF_TX_MAX_DESC_DATA_ALIGNED; - if (dma_mapping_error(tx_q->dev, dma)) - return idpf_tx_dma_map_error(tx_q, skb, first, i); + if (unlikely(dma_mapping_error(tx_q->dev, dma))) { + idpf_tx_buf_next(tx_buf) = IDPF_TXBUF_NULL; + return idpf_tx_splitq_pkt_err_unmap(tx_q, params, + first); + } first->nr_frags++; - idpf_tx_buf_compl_tag(tx_buf) = params->compl_tag; tx_buf->type = LIBETH_SQE_FRAG; /* record length, and DMA address */ @@ -2468,29 +2713,12 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q, max_data); if (unlikely(++i == tx_q->desc_count)) { - tx_buf = tx_q->tx_buf; tx_desc = &tx_q->flex_tx[0]; i = 0; - tx_q->compl_tag_cur_gen = - IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q); } else { - tx_buf++; tx_desc++; } - /* Since this packet has a buffer that is going to span - * multiple descriptors, it's going to leave holes in - * to the TX buffer ring. To ensure these holes do not - * cause issues in the cleaning routines, we will clear - * them of any stale data and assign them the same - * completion tag as the current packet. Then when the - * packet is being cleaned, the cleaning routines will - * simply pass over these holes and finish cleaning the - * rest of the packet. - */ - tx_buf->type = LIBETH_SQE_EMPTY; - idpf_tx_buf_compl_tag(tx_buf) = params->compl_tag; - /* Adjust the DMA offset and the remaining size of the * fragment. On the first iteration of this loop, * max_data will be >= 12K and <= 16K-1. On any @@ -2515,15 +2743,25 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q, idpf_tx_splitq_build_desc(tx_desc, params, td_cmd, size); if (unlikely(++i == tx_q->desc_count)) { - tx_buf = tx_q->tx_buf; tx_desc = &tx_q->flex_tx[0]; i = 0; - tx_q->compl_tag_cur_gen = IDPF_TX_ADJ_COMPL_TAG_GEN(tx_q); } else { - tx_buf++; tx_desc++; } + if (idpf_queue_has(FLOW_SCH_EN, tx_q)) { + if (unlikely(!idpf_tx_get_free_buf_id(tx_q->refillq, + &next_buf_id))) { + idpf_tx_buf_next(tx_buf) = IDPF_TXBUF_NULL; + return idpf_tx_splitq_pkt_err_unmap(tx_q, params, + first); + } + } else { + next_buf_id = i; + } + idpf_tx_buf_next(tx_buf) = next_buf_id; + tx_buf = &tx_q->tx_buf[next_buf_id]; + size = skb_frag_size(frag); data_len -= size; @@ -2538,6 +2776,7 @@ static void idpf_tx_splitq_map(struct idpf_tx_queue *tx_q, /* write last descriptor with RS and EOP bits */ first->rs_idx = i; + idpf_tx_buf_next(tx_buf) = IDPF_TXBUF_NULL; td_cmd |= params->eop_cmd; idpf_tx_splitq_build_desc(tx_desc, params, td_cmd, size); i = idpf_tx_splitq_bump_ntu(tx_q, i); @@ -2627,111 +2866,6 @@ int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off) return 1; } -/** - * __idpf_chk_linearize - Check skb is not using too many buffers - * @skb: send buffer - * @max_bufs: maximum number of buffers - * - * For TSO we need to count the TSO header and segment payload separately. As - * such we need to check cases where we have max_bufs-1 fragments or more as we - * can potentially require max_bufs+1 DMA transactions, 1 for the TSO header, 1 - * for the segment payload in the first descriptor, and another max_buf-1 for - * the fragments. - */ -static bool __idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs) -{ - const struct skb_shared_info *shinfo = skb_shinfo(skb); - const skb_frag_t *frag, *stale; - int nr_frags, sum; - - /* no need to check if number of frags is less than max_bufs - 1 */ - nr_frags = shinfo->nr_frags; - if (nr_frags < (max_bufs - 1)) - return false; - - /* We need to walk through the list and validate that each group - * of max_bufs-2 fragments totals at least gso_size. - */ - nr_frags -= max_bufs - 2; - frag = &shinfo->frags[0]; - - /* Initialize size to the negative value of gso_size minus 1. We use - * this as the worst case scenario in which the frag ahead of us only - * provides one byte which is why we are limited to max_bufs-2 - * descriptors for a single transmit as the header and previous - * fragment are already consuming 2 descriptors. - */ - sum = 1 - shinfo->gso_size; - - /* Add size of frags 0 through 4 to create our initial sum */ - sum += skb_frag_size(frag++); - sum += skb_frag_size(frag++); - sum += skb_frag_size(frag++); - sum += skb_frag_size(frag++); - sum += skb_frag_size(frag++); - - /* Walk through fragments adding latest fragment, testing it, and - * then removing stale fragments from the sum. - */ - for (stale = &shinfo->frags[0];; stale++) { - int stale_size = skb_frag_size(stale); - - sum += skb_frag_size(frag++); - - /* The stale fragment may present us with a smaller - * descriptor than the actual fragment size. To account - * for that we need to remove all the data on the front and - * figure out what the remainder would be in the last - * descriptor associated with the fragment. - */ - if (stale_size > IDPF_TX_MAX_DESC_DATA) { - int align_pad = -(skb_frag_off(stale)) & - (IDPF_TX_MAX_READ_REQ_SIZE - 1); - - sum -= align_pad; - stale_size -= align_pad; - - do { - sum -= IDPF_TX_MAX_DESC_DATA_ALIGNED; - stale_size -= IDPF_TX_MAX_DESC_DATA_ALIGNED; - } while (stale_size > IDPF_TX_MAX_DESC_DATA); - } - - /* if sum is negative we failed to make sufficient progress */ - if (sum < 0) - return true; - - if (!nr_frags--) - break; - - sum -= stale_size; - } - - return false; -} - -/** - * idpf_chk_linearize - Check if skb exceeds max descriptors per packet - * @skb: send buffer - * @max_bufs: maximum scatter gather buffers for single packet - * @count: number of buffers this packet needs - * - * Make sure we don't exceed maximum scatter gather buffers for a single - * packet. We have to do some special checking around the boundary (max_bufs-1) - * if TSO is on since we need count the TSO header and payload separately. - * E.g.: a packet with 7 fragments can require 9 DMA transactions; 1 for TSO - * header, 1 for segment payload, and then 7 for the fragments. - */ -static bool idpf_chk_linearize(struct sk_buff *skb, unsigned int max_bufs, - unsigned int count) -{ - if (likely(count < max_bufs)) - return false; - if (skb_is_gso(skb)) - return __idpf_chk_linearize(skb, max_bufs); - - return count > max_bufs; -} /** * idpf_tx_splitq_get_ctx_desc - grab next desc and update buffer ring @@ -2746,8 +2880,6 @@ idpf_tx_splitq_get_ctx_desc(struct idpf_tx_queue *txq) union idpf_flex_tx_ctx_desc *desc; int i = txq->next_to_use; - txq->tx_buf[i].type = LIBETH_SQE_CTX; - /* grab the next descriptor */ desc = &txq->flex_ctx[i]; txq->next_to_use = idpf_tx_splitq_bump_ntu(txq, i); @@ -2841,6 +2973,21 @@ static void idpf_tx_set_tstamp_desc(union idpf_flex_tx_ctx_desc *ctx_desc, #endif /* CONFIG_PTP_1588_CLOCK */ /** + * idpf_tx_splitq_need_re - check whether RE bit needs to be set + * @tx_q: pointer to Tx queue + * + * Return: true if RE bit needs to be set, false otherwise + */ +static bool idpf_tx_splitq_need_re(struct idpf_tx_queue *tx_q) +{ + int gap = tx_q->next_to_use - tx_q->last_re; + + gap += (gap < 0) ? tx_q->desc_count : 0; + + return gap >= IDPF_TX_SPLITQ_RE_MIN_GAP; +} + +/** * idpf_tx_splitq_frame - Sends buffer on Tx ring using flex descriptors * @skb: send buffer * @tx_q: queue to send buffer on @@ -2850,13 +2997,16 @@ static void idpf_tx_set_tstamp_desc(union idpf_flex_tx_ctx_desc *ctx_desc, static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, struct idpf_tx_queue *tx_q) { - struct idpf_tx_splitq_params tx_params = { }; + struct idpf_tx_splitq_params tx_params = { + .prev_ntu = tx_q->next_to_use, + }; union idpf_flex_tx_ctx_desc *ctx_desc; struct idpf_tx_buf *first; - unsigned int count; + u32 count, buf_count = 1; int tso, idx; + u32 buf_id; - count = idpf_tx_desc_count_required(tx_q, skb); + count = idpf_tx_res_count_required(tx_q, skb, &buf_count); if (unlikely(!count)) return idpf_tx_drop_skb(tx_q, skb); @@ -2866,7 +3016,7 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, /* Check for splitq specific TX resources */ count += (IDPF_TX_DESCS_PER_CACHE_LINE + tso); - if (idpf_tx_maybe_stop_splitq(tx_q, count)) { + if (idpf_tx_maybe_stop_splitq(tx_q, count, buf_count)) { idpf_tx_buf_hw_update(tx_q, tx_q->next_to_use, false); return NETDEV_TX_BUSY; @@ -2898,36 +3048,47 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, idpf_tx_set_tstamp_desc(ctx_desc, idx); } - /* record the location of the first descriptor for this packet */ - first = &tx_q->tx_buf[tx_q->next_to_use]; - first->skb = skb; + if (idpf_queue_has(FLOW_SCH_EN, tx_q)) { + struct idpf_sw_queue *refillq = tx_q->refillq; - if (tso) { - first->packets = tx_params.offload.tso_segs; - first->bytes = skb->len + - ((first->packets - 1) * tx_params.offload.tso_hdr_len); - } else { - first->packets = 1; - first->bytes = max_t(unsigned int, skb->len, ETH_ZLEN); - } + /* Save refillq state in case of a packet rollback. Otherwise, + * the tags will be leaked since they will be popped from the + * refillq but never reposted during cleaning. + */ + tx_params.prev_refill_gen = + idpf_queue_has(RFL_GEN_CHK, refillq); + tx_params.prev_refill_ntc = refillq->next_to_clean; + + if (unlikely(!idpf_tx_get_free_buf_id(tx_q->refillq, + &buf_id))) { + if (tx_params.prev_refill_gen != + idpf_queue_has(RFL_GEN_CHK, refillq)) + idpf_queue_change(RFL_GEN_CHK, refillq); + refillq->next_to_clean = tx_params.prev_refill_ntc; + + tx_q->next_to_use = tx_params.prev_ntu; + return idpf_tx_drop_skb(tx_q, skb); + } + tx_params.compl_tag = buf_id; - if (idpf_queue_has(FLOW_SCH_EN, tx_q)) { tx_params.dtype = IDPF_TX_DESC_DTYPE_FLEX_FLOW_SCHE; tx_params.eop_cmd = IDPF_TXD_FLEX_FLOW_CMD_EOP; - /* Set the RE bit to catch any packets that may have not been - * stashed during RS completion cleaning. MIN_GAP is set to - * MIN_RING size to ensure it will be set at least once each - * time around the ring. + /* Set the RE bit to periodically "clean" the descriptor ring. + * MIN_GAP is set to MIN_RING size to ensure it will be set at + * least once each time around the ring. */ - if (!(tx_q->next_to_use % IDPF_TX_SPLITQ_RE_MIN_GAP)) { + if (idpf_tx_splitq_need_re(tx_q)) { tx_params.eop_cmd |= IDPF_TXD_FLEX_FLOW_CMD_RE; tx_q->txq_grp->num_completions_pending++; + tx_q->last_re = tx_q->next_to_use; } if (skb->ip_summed == CHECKSUM_PARTIAL) tx_params.offload.td_cmd |= IDPF_TXD_FLEX_FLOW_CMD_CS_EN; } else { + buf_id = tx_q->next_to_use; + tx_params.dtype = IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2; tx_params.eop_cmd = IDPF_TXD_LAST_DESC_CMD; @@ -2935,6 +3096,18 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, tx_params.offload.td_cmd |= IDPF_TX_FLEX_DESC_CMD_CS_EN; } + first = &tx_q->tx_buf[buf_id]; + first->skb = skb; + + if (tso) { + first->packets = tx_params.offload.tso_segs; + first->bytes = skb->len + + ((first->packets - 1) * tx_params.offload.tso_hdr_len); + } else { + first->packets = 1; + first->bytes = max_t(unsigned int, skb->len, ETH_ZLEN); + } + idpf_tx_splitq_map(tx_q, &tx_params, first); return NETDEV_TX_OK; @@ -2949,10 +3122,11 @@ static netdev_tx_t idpf_tx_splitq_frame(struct sk_buff *skb, */ netdev_tx_t idpf_tx_start(struct sk_buff *skb, struct net_device *netdev) { - struct idpf_vport *vport = idpf_netdev_to_vport(netdev); + const struct idpf_vport *vport = idpf_netdev_to_vport(netdev); struct idpf_tx_queue *tx_q; - if (unlikely(skb_get_queue_mapping(skb) >= vport->num_txq)) { + if (unlikely(skb_get_queue_mapping(skb) >= + vport->num_txq - vport->num_xdp_txq)) { dev_kfree_skb_any(skb); return NETDEV_TX_OK; @@ -2989,7 +3163,7 @@ idpf_rx_hash(const struct idpf_rx_queue *rxq, struct sk_buff *skb, { u32 hash; - if (!libeth_rx_pt_has_hash(rxq->netdev, decoded)) + if (!libeth_rx_pt_has_hash(rxq->xdp_rxq.dev, decoded)) return; hash = le16_to_cpu(rx_desc->hash1) | @@ -3015,7 +3189,7 @@ static void idpf_rx_csum(struct idpf_rx_queue *rxq, struct sk_buff *skb, bool ipv4, ipv6; /* check if Rx checksum is enabled */ - if (!libeth_rx_pt_has_checksum(rxq->netdev, decoded)) + if (!libeth_rx_pt_has_checksum(rxq->xdp_rxq.dev, decoded)) return; /* check if HW has decoded the packet and checksum */ @@ -3187,7 +3361,7 @@ idpf_rx_hwtstamp(const struct idpf_rx_queue *rxq, } /** - * idpf_rx_process_skb_fields - Populate skb header fields from Rx descriptor + * __idpf_rx_process_skb_fields - Populate skb header fields from Rx descriptor * @rxq: Rx descriptor ring packet is being transacted on * @skb: pointer to current skb being populated * @rx_desc: Receive descriptor @@ -3197,8 +3371,8 @@ idpf_rx_hwtstamp(const struct idpf_rx_queue *rxq, * other fields within the skb. */ static int -idpf_rx_process_skb_fields(struct idpf_rx_queue *rxq, struct sk_buff *skb, - const struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc) +__idpf_rx_process_skb_fields(struct idpf_rx_queue *rxq, struct sk_buff *skb, + const struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc) { struct libeth_rx_csum csum_bits; struct libeth_rx_pt decoded; @@ -3214,9 +3388,6 @@ idpf_rx_process_skb_fields(struct idpf_rx_queue *rxq, struct sk_buff *skb, if (idpf_queue_has(PTP, rxq)) idpf_rx_hwtstamp(rxq, rx_desc, skb); - skb->protocol = eth_type_trans(skb, rxq->netdev); - skb_record_rx_queue(skb, rxq->idx); - if (le16_get_bits(rx_desc->hdrlen_flags, VIRTCHNL2_RX_FLEX_DESC_ADV_RSC_M)) return idpf_rx_rsc(rxq, skb, rx_desc, decoded); @@ -3227,25 +3398,24 @@ idpf_rx_process_skb_fields(struct idpf_rx_queue *rxq, struct sk_buff *skb, return 0; } -/** - * idpf_rx_add_frag - Add contents of Rx buffer to sk_buff as a frag - * @rx_buf: buffer containing page to add - * @skb: sk_buff to place the data into - * @size: packet length from rx_desc - * - * This function will add the data contained in rx_buf->page to the skb. - * It will just attach the page as a frag to the skb. - * The function will then update the page offset. - */ -void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb, - unsigned int size) +bool idpf_rx_process_skb_fields(struct sk_buff *skb, + const struct libeth_xdp_buff *xdp, + struct libeth_rq_napi_stats *rs) { - u32 hr = netmem_get_pp(rx_buf->netmem)->p.offset; + struct idpf_rx_queue *rxq; + + rxq = libeth_xdp_buff_to_rq(xdp, typeof(*rxq), xdp_rxq); - skb_add_rx_frag_netmem(skb, skb_shinfo(skb)->nr_frags, rx_buf->netmem, - rx_buf->offset + hr, size, rx_buf->truesize); + return !__idpf_rx_process_skb_fields(rxq, skb, xdp->desc); } +LIBETH_XDP_DEFINE_START(); +LIBETH_XDP_DEFINE_RUN(static idpf_xdp_run_pass, idpf_xdp_run_prog, + idpf_xdp_tx_flush_bulk, idpf_rx_process_skb_fields); +LIBETH_XDP_DEFINE_FINALIZE(static idpf_xdp_finalize_rx, idpf_xdp_tx_flush_bulk, + idpf_xdp_tx_finalize); +LIBETH_XDP_DEFINE_END(); + /** * idpf_rx_hsplit_wa - handle header buffer overflows and split errors * @hdr: Rx buffer for the headers @@ -3288,36 +3458,6 @@ static u32 idpf_rx_hsplit_wa(const struct libeth_fqe *hdr, } /** - * idpf_rx_build_skb - Allocate skb and populate it from header buffer - * @buf: Rx buffer to pull data from - * @size: the length of the packet - * - * This function allocates an skb. It then populates it with the page data from - * the current receive descriptor, taking care to set up the skb correctly. - */ -struct sk_buff *idpf_rx_build_skb(const struct libeth_fqe *buf, u32 size) -{ - struct page *buf_page = __netmem_to_page(buf->netmem); - u32 hr = pp_page_to_nmdesc(buf_page)->pp->p.offset; - struct sk_buff *skb; - void *va; - - va = page_address(buf_page) + buf->offset; - prefetch(va + hr); - - skb = napi_build_skb(va, buf->truesize); - if (unlikely(!skb)) - return NULL; - - skb_mark_for_recycle(skb); - - skb_reserve(skb, hr); - __skb_put(skb, size); - - return skb; -} - -/** * idpf_rx_splitq_test_staterr - tests bits in Rx descriptor * status and error fields * @stat_err_field: field from descriptor to test bits in @@ -3358,13 +3498,18 @@ static bool idpf_rx_splitq_is_eop(struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_de */ static int idpf_rx_splitq_clean(struct idpf_rx_queue *rxq, int budget) { - int total_rx_bytes = 0, total_rx_pkts = 0; struct idpf_buf_queue *rx_bufq = NULL; - struct sk_buff *skb = rxq->skb; + struct libeth_rq_napi_stats rs = { }; u16 ntc = rxq->next_to_clean; + LIBETH_XDP_ONSTACK_BUFF(xdp); + LIBETH_XDP_ONSTACK_BULK(bq); + + libeth_xdp_tx_init_bulk(&bq, rxq->xdp_prog, rxq->xdp_rxq.dev, + rxq->xdpsqs, rxq->num_xdp_txq); + libeth_xdp_init_buff(xdp, &rxq->xdp, &rxq->xdp_rxq); /* Process Rx packets bounded by budget */ - while (likely(total_rx_pkts < budget)) { + while (likely(rs.packets < budget)) { struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc; struct libeth_fqe *hdr, *rx_buf = NULL; struct idpf_sw_queue *refillq = NULL; @@ -3378,18 +3523,14 @@ static int idpf_rx_splitq_clean(struct idpf_rx_queue *rxq, int budget) /* get the Rx desc from Rx queue based on 'next_to_clean' */ rx_desc = &rxq->rx[ntc].flex_adv_nic_3_wb; - /* This memory barrier is needed to keep us from reading - * any other fields out of the rx_desc - */ - dma_rmb(); - /* if the descriptor isn't done, no work yet to do */ gen_id = le16_get_bits(rx_desc->pktlen_gen_bufq_id, VIRTCHNL2_RX_FLEX_DESC_ADV_GEN_M); - if (idpf_queue_has(GEN_CHK, rxq) != gen_id) break; + dma_rmb(); + rxdid = FIELD_GET(VIRTCHNL2_RX_FLEX_DESC_ADV_RXDID_M, rx_desc->rxdid_ucast); if (rxdid != VIRTCHNL2_RXDID_2_FLEX_SPLITQ) { @@ -3434,7 +3575,7 @@ static int idpf_rx_splitq_clean(struct idpf_rx_queue *rxq, int budget) hdr = &rx_bufq->hdr_buf[buf_id]; - if (unlikely(!hdr_len && !skb)) { + if (unlikely(!hdr_len && !xdp->data)) { hdr_len = idpf_rx_hsplit_wa(hdr, rx_buf, pkt_len); /* If failed, drop both buffers by setting len to 0 */ pkt_len -= hdr_len ? : pkt_len; @@ -3444,75 +3585,37 @@ static int idpf_rx_splitq_clean(struct idpf_rx_queue *rxq, int budget) u64_stats_update_end(&rxq->stats_sync); } - if (libeth_rx_sync_for_cpu(hdr, hdr_len)) { - skb = idpf_rx_build_skb(hdr, hdr_len); - if (!skb) - break; - - u64_stats_update_begin(&rxq->stats_sync); - u64_stats_inc(&rxq->q_stats.hsplit_pkts); - u64_stats_update_end(&rxq->stats_sync); - } + if (libeth_xdp_process_buff(xdp, hdr, hdr_len)) + rs.hsplit++; hdr->netmem = 0; payload: - if (!libeth_rx_sync_for_cpu(rx_buf, pkt_len)) - goto skip_data; - - if (skb) - idpf_rx_add_frag(rx_buf, skb, pkt_len); - else - skb = idpf_rx_build_skb(rx_buf, pkt_len); - - /* exit if we failed to retrieve a buffer */ - if (!skb) - break; - -skip_data: + libeth_xdp_process_buff(xdp, rx_buf, pkt_len); rx_buf->netmem = 0; - idpf_rx_post_buf_refill(refillq, buf_id); + idpf_post_buf_refill(refillq, buf_id); IDPF_RX_BUMP_NTC(rxq, ntc); /* skip if it is non EOP desc */ - if (!idpf_rx_splitq_is_eop(rx_desc) || unlikely(!skb)) - continue; - - /* pad skb if needed (to make valid ethernet frame) */ - if (eth_skb_pad(skb)) { - skb = NULL; - continue; - } - - /* probably a little skewed due to removing CRC */ - total_rx_bytes += skb->len; - - /* protocol */ - if (unlikely(idpf_rx_process_skb_fields(rxq, skb, rx_desc))) { - dev_kfree_skb_any(skb); - skb = NULL; + if (!idpf_rx_splitq_is_eop(rx_desc) || unlikely(!xdp->data)) continue; - } - - /* send completed skb up the stack */ - napi_gro_receive(rxq->napi, skb); - skb = NULL; - /* update budget accounting */ - total_rx_pkts++; + idpf_xdp_run_pass(xdp, &bq, rxq->napi, &rs, rx_desc); } + idpf_xdp_finalize_rx(&bq); + rxq->next_to_clean = ntc; + libeth_xdp_save_buff(&rxq->xdp, xdp); - rxq->skb = skb; u64_stats_update_begin(&rxq->stats_sync); - u64_stats_add(&rxq->q_stats.packets, total_rx_pkts); - u64_stats_add(&rxq->q_stats.bytes, total_rx_bytes); + u64_stats_add(&rxq->q_stats.packets, rs.packets); + u64_stats_add(&rxq->q_stats.bytes, rs.bytes); + u64_stats_add(&rxq->q_stats.hsplit_pkts, rs.hsplit); u64_stats_update_end(&rxq->stats_sync); - /* guarantee a trip back through this routine if there was a failure */ - return total_rx_pkts; + return rs.packets; } /** @@ -3580,10 +3683,10 @@ static void idpf_rx_clean_refillq(struct idpf_buf_queue *bufq, bool failure; if (idpf_queue_has(RFL_GEN_CHK, refillq) != - !!(refill_desc & IDPF_RX_BI_GEN_M)) + !!(refill_desc & IDPF_RFL_BI_GEN_M)) break; - buf_id = FIELD_GET(IDPF_RX_BI_BUFID_M, refill_desc); + buf_id = FIELD_GET(IDPF_RFL_BI_BUFID_M, refill_desc); failure = idpf_rx_update_bufq_desc(bufq, buf_id, buf_desc); if (failure) break; @@ -3655,7 +3758,7 @@ static irqreturn_t idpf_vport_intr_clean_queues(int __always_unused irq, struct idpf_q_vector *q_vector = (struct idpf_q_vector *)data; q_vector->total_events++; - napi_schedule(&q_vector->napi); + napi_schedule_irqoff(&q_vector->napi); return IRQ_HANDLED; } @@ -3696,6 +3799,8 @@ void idpf_vport_intr_rel(struct idpf_vport *vport) for (u32 v_idx = 0; v_idx < vport->num_q_vectors; v_idx++) { struct idpf_q_vector *q_vector = &vport->q_vectors[v_idx]; + kfree(q_vector->xsksq); + q_vector->xsksq = NULL; kfree(q_vector->complq); q_vector->complq = NULL; kfree(q_vector->bufq); @@ -3710,6 +3815,20 @@ void idpf_vport_intr_rel(struct idpf_vport *vport) vport->q_vectors = NULL; } +static void idpf_q_vector_set_napi(struct idpf_q_vector *q_vector, bool link) +{ + struct napi_struct *napi = link ? &q_vector->napi : NULL; + struct net_device *dev = q_vector->vport->netdev; + + for (u32 i = 0; i < q_vector->num_rxq; i++) + netif_queue_set_napi(dev, q_vector->rx[i]->idx, + NETDEV_QUEUE_TYPE_RX, napi); + + for (u32 i = 0; i < q_vector->num_txq; i++) + netif_queue_set_napi(dev, q_vector->tx[i]->idx, + NETDEV_QUEUE_TYPE_TX, napi); +} + /** * idpf_vport_intr_rel_irq - Free the IRQ association with the OS * @vport: main vport structure @@ -3730,6 +3849,7 @@ static void idpf_vport_intr_rel_irq(struct idpf_vport *vport) vidx = vport->q_vector_idxs[vector]; irq_num = adapter->msix_entries[vidx].vector; + idpf_q_vector_set_napi(q_vector, false); kfree(free_irq(irq_num, q_vector)); } } @@ -3743,6 +3863,8 @@ static void idpf_vport_intr_dis_irq_all(struct idpf_vport *vport) struct idpf_q_vector *q_vector = vport->q_vectors; int q_idx; + writel(0, vport->noirq_dyn_ctl); + for (q_idx = 0; q_idx < vport->num_q_vectors; q_idx++) writel(0, q_vector[q_idx].intr_reg.dyn_ctl); } @@ -3917,6 +4039,8 @@ static int idpf_vport_intr_req_irq(struct idpf_vport *vport) "Request_irq failed, error: %d\n", err); goto free_q_irqs; } + + idpf_q_vector_set_napi(q_vector, true); } return 0; @@ -3984,6 +4108,8 @@ static void idpf_vport_intr_ena_irq_all(struct idpf_vport *vport) if (qv->num_txq || qv->num_rxq) idpf_vport_intr_update_itr_ena_irq(qv); } + + writel(vport->noirq_dyn_ctl_ena, vport->noirq_dyn_ctl); } /** @@ -4133,7 +4259,9 @@ static bool idpf_rx_splitq_clean_all(struct idpf_q_vector *q_vec, int budget, struct idpf_rx_queue *rxq = q_vec->rx[i]; int pkts_cleaned_per_q; - pkts_cleaned_per_q = idpf_rx_splitq_clean(rxq, budget_per_q); + pkts_cleaned_per_q = idpf_queue_has(XSK, rxq) ? + idpf_xskrq_poll(rxq, budget_per_q) : + idpf_rx_splitq_clean(rxq, budget_per_q); /* if we clean as many as budgeted, we must not be done */ if (pkts_cleaned_per_q >= budget_per_q) clean_complete = false; @@ -4143,8 +4271,10 @@ static bool idpf_rx_splitq_clean_all(struct idpf_q_vector *q_vec, int budget, nid = numa_mem_id(); - for (i = 0; i < q_vec->num_bufq; i++) - idpf_rx_clean_refillq_all(q_vec->bufq[i], nid); + for (i = 0; i < q_vec->num_bufq; i++) { + if (!idpf_queue_has(XSK, q_vec->bufq[i])) + idpf_rx_clean_refillq_all(q_vec->bufq[i], nid); + } return clean_complete; } @@ -4158,7 +4288,7 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget) { struct idpf_q_vector *q_vector = container_of(napi, struct idpf_q_vector, napi); - bool clean_complete; + bool clean_complete = true; int work_done = 0; /* Handle case where we are called by netpoll with a budget of 0 */ @@ -4168,8 +4298,13 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget) return 0; } - clean_complete = idpf_rx_splitq_clean_all(q_vector, budget, &work_done); - clean_complete &= idpf_tx_splitq_clean_all(q_vector, budget, &work_done); + for (u32 i = 0; i < q_vector->num_xsksq; i++) + clean_complete &= idpf_xsk_xmit(q_vector->xsksq[i]); + + clean_complete &= idpf_tx_splitq_clean_all(q_vector, budget, + &work_done); + clean_complete &= idpf_rx_splitq_clean_all(q_vector, budget, + &work_done); /* If work not completed, return budget and polling will return */ if (!clean_complete) { @@ -4177,20 +4312,12 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget) return budget; } - /* Switch to poll mode in the tear-down path after sending disable - * queues virtchnl message, as the interrupts will be disabled after - * that. - */ - if (unlikely(q_vector->num_txq && idpf_queue_has(POLL_MODE, - q_vector->tx[0]))) - return budget; - work_done = min_t(int, work_done, budget - 1); /* Exit the polling mode, but don't re-enable interrupts if stack might * poll us due to busy-polling */ - if (likely(napi_complete_done(napi, work_done))) + if (napi_complete_done(napi, work_done)) idpf_vport_intr_update_itr_ena_irq(q_vector); else idpf_vport_intr_set_wb_on_itr(q_vector); @@ -4206,8 +4333,8 @@ static int idpf_vport_splitq_napi_poll(struct napi_struct *napi, int budget) */ static void idpf_vport_intr_map_vector_to_qs(struct idpf_vport *vport) { + u16 num_txq_grp = vport->num_txq_grp - vport->num_xdp_txq; bool split = idpf_is_queue_model_split(vport->rxq_model); - u16 num_txq_grp = vport->num_txq_grp; struct idpf_rxq_group *rx_qgrp; struct idpf_txq_group *tx_qgrp; u32 i, qv_idx, q_index; @@ -4283,6 +4410,21 @@ static void idpf_vport_intr_map_vector_to_qs(struct idpf_vport *vport) qv_idx++; } + + for (i = 0; i < vport->num_xdp_txq; i++) { + struct idpf_tx_queue *xdpsq; + struct idpf_q_vector *qv; + + xdpsq = vport->txqs[vport->xdp_txq_offset + i]; + if (!idpf_queue_has(XSK, xdpsq)) + continue; + + qv = idpf_find_rxq_vec(vport, i); + idpf_xsk_init_wakeup(qv); + + xdpsq->q_vector = qv; + qv->xsksq[qv->num_xsksq++] = xdpsq; + } } /** @@ -4303,6 +4445,8 @@ static int idpf_vport_intr_init_vec_idx(struct idpf_vport *vport) for (i = 0; i < vport->num_q_vectors; i++) vport->q_vectors[i].v_idx = vport->q_vector_idxs[i]; + vport->noirq_v_idx = vport->q_vector_idxs[i]; + return 0; } @@ -4316,6 +4460,8 @@ static int idpf_vport_intr_init_vec_idx(struct idpf_vport *vport) for (i = 0; i < vport->num_q_vectors; i++) vport->q_vectors[i].v_idx = vecids[vport->q_vector_idxs[i]]; + vport->noirq_v_idx = vecids[vport->q_vector_idxs[i]]; + kfree(vecids); return 0; @@ -4416,6 +4562,15 @@ int idpf_vport_intr_alloc(struct idpf_vport *vport) GFP_KERNEL); if (!q_vector->complq) goto error; + + if (!vport->xdp_txq_offset) + continue; + + q_vector->xsksq = kcalloc(rxqs_per_vector, + sizeof(*q_vector->xsksq), + GFP_KERNEL); + if (!q_vector->xsksq) + goto error; } return 0; diff --git a/drivers/net/ethernet/intel/idpf/idpf_txrx.h b/drivers/net/ethernet/intel/idpf/idpf_txrx.h index 281de655a813..75b977094741 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_txrx.h +++ b/drivers/net/ethernet/intel/idpf/idpf_txrx.h @@ -7,8 +7,10 @@ #include <linux/dim.h> #include <net/libeth/cache.h> -#include <net/tcp.h> +#include <net/libeth/types.h> #include <net/netdev_queues.h> +#include <net/tcp.h> +#include <net/xdp.h> #include "idpf_lan_txrx.h" #include "virtchnl2_lan_desc.h" @@ -58,6 +60,8 @@ #define IDPF_MBX_Q_VEC 1 #define IDPF_MIN_Q_VEC 1 #define IDPF_MIN_RDMA_VEC 2 +/* Data vector for NOIRQ queues */ +#define IDPF_RESERVED_VECS 1 #define IDPF_DFLT_TX_Q_DESC_COUNT 512 #define IDPF_DFLT_TX_COMPLQ_DESC_COUNT 512 @@ -108,8 +112,8 @@ do { \ */ #define IDPF_TX_SPLITQ_RE_MIN_GAP 64 -#define IDPF_RX_BI_GEN_M BIT(16) -#define IDPF_RX_BI_BUFID_M GENMASK(15, 0) +#define IDPF_RFL_BI_GEN_M BIT(16) +#define IDPF_RFL_BI_BUFID_M GENMASK(15, 0) #define IDPF_RXD_EOF_SPLITQ VIRTCHNL2_RX_FLEX_DESC_ADV_STATUS0_EOF_M #define IDPF_RXD_EOF_SINGLEQ VIRTCHNL2_RX_BASE_DESC_STATUS_EOF_M @@ -118,10 +122,6 @@ do { \ ((((txq)->next_to_clean > (txq)->next_to_use) ? 0 : (txq)->desc_count) + \ (txq)->next_to_clean - (txq)->next_to_use - 1) -#define IDPF_TX_BUF_RSV_UNUSED(txq) ((txq)->stash->buf_stack.top) -#define IDPF_TX_BUF_RSV_LOW(txq) (IDPF_TX_BUF_RSV_UNUSED(txq) < \ - (txq)->desc_count >> 2) - #define IDPF_TX_COMPLQ_OVERFLOW_THRESH(txcq) ((txcq)->desc_count >> 1) /* Determine the absolute number of completions pending, i.e. the number of * completions that are expected to arrive on the TX completion queue. @@ -131,11 +131,7 @@ do { \ 0 : U32_MAX) + \ (txq)->num_completions_pending - (txq)->complq->num_completions) -#define IDPF_TX_SPLITQ_COMPL_TAG_WIDTH 16 -/* Adjust the generation for the completion tag and wrap if necessary */ -#define IDPF_TX_ADJ_COMPL_TAG_GEN(txq) \ - ((++(txq)->compl_tag_cur_gen) >= (txq)->compl_tag_gen_max ? \ - 0 : (txq)->compl_tag_cur_gen) +#define IDPF_TXBUF_NULL U32_MAX #define IDPF_TXD_LAST_DESC_CMD (IDPF_TX_DESC_CMD_EOP | IDPF_TX_DESC_CMD_RS) @@ -145,6 +141,8 @@ do { \ #define IDPF_TX_FLAGS_TUNNEL BIT(3) #define IDPF_TX_FLAGS_TSYN BIT(4) +struct libeth_rq_napi_stats; + union idpf_tx_flex_desc { struct idpf_flex_tx_desc q; /* queue based scheduling */ struct idpf_flex_tx_sched_desc flow; /* flow based scheduling */ @@ -153,18 +151,6 @@ union idpf_tx_flex_desc { #define idpf_tx_buf libeth_sqe /** - * struct idpf_buf_lifo - LIFO for managing OOO completions - * @top: Used to know how many buffers are left - * @size: Total size of LIFO - * @bufs: Backing array - */ -struct idpf_buf_lifo { - u16 top; - u16 size; - struct idpf_tx_stash **bufs; -}; - -/** * struct idpf_tx_offload_params - Offload parameters for a given packet * @tx_flags: Feature flags enabled for this packet * @hdr_offsets: Offset parameter for single queue model @@ -196,6 +182,9 @@ struct idpf_tx_offload_params { * @compl_tag: Associated tag for completion * @td_tag: Descriptor tunneling tag * @offload: Offload parameters + * @prev_ntu: stored TxQ next_to_use in case of rollback + * @prev_refill_ntc: stored refillq next_to_clean in case of packet rollback + * @prev_refill_gen: stored refillq generation bit in case of packet rollback */ struct idpf_tx_splitq_params { enum idpf_tx_desc_dtype_value dtype; @@ -206,6 +195,10 @@ struct idpf_tx_splitq_params { }; struct idpf_tx_offload_params offload; + + u16 prev_ntu; + u16 prev_refill_ntc; + bool prev_refill_gen; }; enum idpf_tx_ctx_desc_eipt_offload { @@ -288,11 +281,13 @@ struct idpf_ptype_state { * bit and Q_RFL_GEN is the SW bit. * @__IDPF_Q_FLOW_SCH_EN: Enable flow scheduling * @__IDPF_Q_SW_MARKER: Used to indicate TX queue marker completions - * @__IDPF_Q_POLL_MODE: Enable poll mode * @__IDPF_Q_CRC_EN: enable CRC offload in singleq mode * @__IDPF_Q_HSPLIT_EN: enable header split on Rx (splitq) * @__IDPF_Q_PTP: indicates whether the Rx timestamping is enabled for the * queue + * @__IDPF_Q_NOIRQ: queue is polling-driven and has no interrupt + * @__IDPF_Q_XDP: this is an XDP queue + * @__IDPF_Q_XSK: the queue has an XSk pool installed * @__IDPF_Q_FLAGS_NBITS: Must be last */ enum idpf_queue_flags_t { @@ -300,10 +295,12 @@ enum idpf_queue_flags_t { __IDPF_Q_RFL_GEN_CHK, __IDPF_Q_FLOW_SCH_EN, __IDPF_Q_SW_MARKER, - __IDPF_Q_POLL_MODE, __IDPF_Q_CRC_EN, __IDPF_Q_HSPLIT_EN, __IDPF_Q_PTP, + __IDPF_Q_NOIRQ, + __IDPF_Q_XDP, + __IDPF_Q_XSK, __IDPF_Q_FLAGS_NBITS, }; @@ -370,14 +367,17 @@ struct idpf_intr_reg { * @num_txq: Number of TX queues * @num_bufq: Number of buffer queues * @num_complq: number of completion queues + * @num_xsksq: number of XSk send queues * @rx: Array of RX queues to service * @tx: Array of TX queues to service * @bufq: Array of buffer queues to service * @complq: array of completion queues + * @xsksq: array of XSk send queues * @intr_reg: See struct idpf_intr_reg - * @napi: napi handler + * @csd: XSk wakeup CSD * @total_events: Number of interrupts processed * @wb_on_itr: whether WB on ITR is enabled + * @napi: napi handler * @tx_dim: Data for TX net_dim algorithm * @tx_itr_value: TX interrupt throttling rate * @tx_intr_mode: Dynamic ITR or not @@ -396,19 +396,24 @@ struct idpf_q_vector { u16 num_txq; u16 num_bufq; u16 num_complq; + u16 num_xsksq; struct idpf_rx_queue **rx; struct idpf_tx_queue **tx; struct idpf_buf_queue **bufq; struct idpf_compl_queue **complq; + struct idpf_tx_queue **xsksq; struct idpf_intr_reg intr_reg; __cacheline_group_end_aligned(read_mostly); __cacheline_group_begin_aligned(read_write); - struct napi_struct napi; + call_single_data_t csd; + u16 total_events; bool wb_on_itr; + struct napi_struct napi; + struct dim tx_dim; u16 tx_itr_value; bool tx_intr_mode; @@ -425,8 +430,8 @@ struct idpf_q_vector { __cacheline_group_end_aligned(cold); }; -libeth_cacheline_set_assert(struct idpf_q_vector, 120, - 24 + sizeof(struct napi_struct) + +libeth_cacheline_set_assert(struct idpf_q_vector, 136, + 56 + sizeof(struct napi_struct) + 2 * sizeof(struct dim), 8); @@ -468,38 +473,32 @@ struct idpf_tx_queue_stats { #define IDPF_DIM_DEFAULT_PROFILE_IX 1 /** - * struct idpf_txq_stash - Tx buffer stash for Flow-based scheduling mode - * @buf_stack: Stack of empty buffers to store buffer info for out of order - * buffer completions. See struct idpf_buf_lifo - * @sched_buf_hash: Hash table to store buffers - */ -struct idpf_txq_stash { - struct idpf_buf_lifo buf_stack; - DECLARE_HASHTABLE(sched_buf_hash, 12); -} ____cacheline_aligned; - -/** * struct idpf_rx_queue - software structure representing a receive queue * @rx: universal receive descriptor array * @single_buf: buffer descriptor array in singleq * @desc_ring: virtual descriptor ring address * @bufq_sets: Pointer to the array of buffer queues in splitq mode * @napi: NAPI instance corresponding to this queue (splitq) + * @xdp_prog: attached XDP program * @rx_buf: See struct &libeth_fqe * @pp: Page pool pointer in singleq mode - * @netdev: &net_device corresponding to this queue * @tail: Tail offset. Used for both queue models single and split. * @flags: See enum idpf_queue_flags_t * @idx: For RX queue, it is used to index to total RX queue across groups and * used for skb reporting. * @desc_count: Number of descriptors + * @num_xdp_txq: total number of XDP Tx queues + * @xdpsqs: shortcut for XDP Tx queues array * @rxdids: Supported RX descriptor ids + * @truesize: data buffer truesize in singleq * @rx_ptype_lkup: LUT of Rx ptypes + * @xdp_rxq: XDP queue info * @next_to_use: Next descriptor to use * @next_to_clean: Next descriptor to clean * @next_to_alloc: RX buffer to allocate at - * @skb: Pointer to the skb - * @truesize: data buffer truesize in singleq + * @xdp: XDP buffer with the current frame + * @xsk: current XDP buffer in XSk mode + * @pool: XSk pool if installed * @cached_phc_time: Cached PHC time for the Rx queue * @stats_sync: See struct u64_stats_sync * @q_stats: See union idpf_rx_queue_stats @@ -524,30 +523,44 @@ struct idpf_rx_queue { struct { struct idpf_bufq_set *bufq_sets; struct napi_struct *napi; + struct bpf_prog __rcu *xdp_prog; }; struct { struct libeth_fqe *rx_buf; struct page_pool *pp; + void __iomem *tail; }; }; - struct net_device *netdev; - void __iomem *tail; DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS); u16 idx; u16 desc_count; - u32 rxdids; + u32 num_xdp_txq; + union { + struct idpf_tx_queue **xdpsqs; + struct { + u32 rxdids; + u32 truesize; + }; + }; const struct libeth_rx_pt *rx_ptype_lkup; + + struct xdp_rxq_info xdp_rxq; __cacheline_group_end_aligned(read_mostly); __cacheline_group_begin_aligned(read_write); - u16 next_to_use; - u16 next_to_clean; - u16 next_to_alloc; + u32 next_to_use; + u32 next_to_clean; + u32 next_to_alloc; - struct sk_buff *skb; - u32 truesize; + union { + struct libeth_xdp_buff_stash xdp; + struct { + struct libeth_xdp_buff *xsk; + struct xsk_buff_pool *pool; + }; + }; u64 cached_phc_time; struct u64_stats_sync stats_sync; @@ -567,8 +580,11 @@ struct idpf_rx_queue { u16 rx_max_pkt_size; __cacheline_group_end_aligned(cold); }; -libeth_cacheline_set_assert(struct idpf_rx_queue, 64, - 88 + sizeof(struct u64_stats_sync), +libeth_cacheline_set_assert(struct idpf_rx_queue, + ALIGN(64, __alignof(struct xdp_rxq_info)) + + sizeof(struct xdp_rxq_info), + 96 + offsetof(struct idpf_rx_queue, q_stats) - + offsetofend(struct idpf_rx_queue, cached_phc_time), 32); /** @@ -580,36 +596,21 @@ libeth_cacheline_set_assert(struct idpf_rx_queue, 64, * @desc_ring: virtual descriptor ring address * @tx_buf: See struct idpf_tx_buf * @txq_grp: See struct idpf_txq_group + * @complq: corresponding completion queue in XDP mode * @dev: Device back pointer for DMA mapping + * @pool: corresponding XSk pool if installed * @tail: Tail offset. Used for both queue models single and split * @flags: See enum idpf_queue_flags_t * @idx: For TX queue, it is used as index to map between TX queue group and * hot path TX pointers stored in vport. Used in both singleq/splitq. * @desc_count: Number of descriptors * @tx_min_pkt_len: Min supported packet length - * @compl_tag_gen_s: Completion tag generation bit - * The format of the completion tag will change based on the TXQ - * descriptor ring size so that we can maintain roughly the same level - * of "uniqueness" across all descriptor sizes. For example, if the - * TXQ descriptor ring size is 64 (the minimum size supported), the - * completion tag will be formatted as below: - * 15 6 5 0 - * -------------------------------- - * | GEN=0-1023 |IDX = 0-63| - * -------------------------------- - * - * This gives us 64*1024 = 65536 possible unique values. Similarly, if - * the TXQ descriptor ring size is 8160 (the maximum size supported), - * the completion tag will be formatted as below: - * 15 13 12 0 - * -------------------------------- - * |GEN | IDX = 0-8159 | - * -------------------------------- - * - * This gives us 8*8160 = 65280 possible unique values. + * @thresh: XDP queue cleaning threshold * @netdev: &net_device corresponding to this queue * @next_to_use: Next descriptor to use * @next_to_clean: Next descriptor to clean + * @last_re: last descriptor index that RE bit was set + * @tx_max_bufs: Max buffers that can be transmitted with scatter-gather * @cleaned_bytes: Splitq only, TXQ only: When a TX completion is received on * the TX completion queue, it can be for any TXQ associated * with that completion queue. This means we can clean up to @@ -620,11 +621,11 @@ libeth_cacheline_set_assert(struct idpf_rx_queue, 64, * only once at the end of the cleaning routine. * @clean_budget: singleq only, queue cleaning budget * @cleaned_pkts: Number of packets cleaned for the above said case - * @tx_max_bufs: Max buffers that can be transmitted with scatter-gather - * @stash: Tx buffer stash for Flow-based scheduling mode - * @compl_tag_bufid_m: Completion tag buffer id mask - * @compl_tag_cur_gen: Used to keep track of current completion tag generation - * @compl_tag_gen_max: To determine when compl_tag_cur_gen should be reset + * @refillq: Pointer to refill queue + * @pending: number of pending descriptors to send in QB + * @xdp_tx: number of pending &xdp_buff or &xdp_frame buffers + * @timer: timer for XDP Tx queue cleanup + * @xdp_lock: lock for XDP Tx queues sharing * @cached_tstamp_caps: Tx timestamp capabilities negotiated with the CP * @tstamp_task: Work that handles Tx timestamp read * @stats_sync: See struct u64_stats_sync @@ -633,6 +634,8 @@ libeth_cacheline_set_assert(struct idpf_rx_queue, 64, * @size: Length of descriptor ring in bytes * @dma: Physical address of ring * @q_vector: Backreference to associated vector + * @buf_pool_size: Total number of idpf_tx_buf + * @rel_q_id: relative virtchnl queue index */ struct idpf_tx_queue { __cacheline_group_begin_aligned(read_mostly); @@ -645,36 +648,53 @@ struct idpf_tx_queue { void *desc_ring; }; struct libeth_sqe *tx_buf; - struct idpf_txq_group *txq_grp; - struct device *dev; + union { + struct idpf_txq_group *txq_grp; + struct idpf_compl_queue *complq; + }; + union { + struct device *dev; + struct xsk_buff_pool *pool; + }; void __iomem *tail; DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS); u16 idx; u16 desc_count; - u16 tx_min_pkt_len; - u16 compl_tag_gen_s; + union { + u16 tx_min_pkt_len; + u32 thresh; + }; struct net_device *netdev; __cacheline_group_end_aligned(read_mostly); __cacheline_group_begin_aligned(read_write); - u16 next_to_use; - u16 next_to_clean; + u32 next_to_use; + u32 next_to_clean; union { - u32 cleaned_bytes; - u32 clean_budget; - }; - u16 cleaned_pkts; + struct { + u16 last_re; + u16 tx_max_bufs; - u16 tx_max_bufs; - struct idpf_txq_stash *stash; + union { + u32 cleaned_bytes; + u32 clean_budget; + }; + u16 cleaned_pkts; - u16 compl_tag_bufid_m; - u16 compl_tag_cur_gen; - u16 compl_tag_gen_max; + struct idpf_sw_queue *refillq; + }; + struct { + u32 pending; + u32 xdp_tx; + + struct libeth_xdpsq_timer *timer; + struct libeth_xdpsq_lock xdp_lock; + }; + }; struct idpf_ptp_vport_tx_tstamp_caps *cached_tstamp_caps; struct work_struct *tstamp_task; @@ -689,25 +709,36 @@ struct idpf_tx_queue { dma_addr_t dma; struct idpf_q_vector *q_vector; + + u32 buf_pool_size; + u32 rel_q_id; __cacheline_group_end_aligned(cold); }; libeth_cacheline_set_assert(struct idpf_tx_queue, 64, - 112 + sizeof(struct u64_stats_sync), - 24); + 104 + + offsetof(struct idpf_tx_queue, cached_tstamp_caps) - + offsetofend(struct idpf_tx_queue, timer) + + offsetof(struct idpf_tx_queue, q_stats) - + offsetofend(struct idpf_tx_queue, tstamp_task), + 32); /** * struct idpf_buf_queue - software structure representing a buffer queue * @split_buf: buffer descriptor array - * @hdr_buf: &libeth_fqe for header buffers - * @hdr_pp: &page_pool for header buffers * @buf: &libeth_fqe for data buffers * @pp: &page_pool for data buffers + * @xsk_buf: &xdp_buff for XSk Rx buffers + * @pool: &xsk_buff_pool on XSk queues + * @hdr_buf: &libeth_fqe for header buffers + * @hdr_pp: &page_pool for header buffers * @tail: Tail offset * @flags: See enum idpf_queue_flags_t * @desc_count: Number of descriptors + * @thresh: refill threshold in XSk * @next_to_use: Next descriptor to use * @next_to_clean: Next descriptor to clean * @next_to_alloc: RX buffer to allocate at + * @pending: number of buffers to refill (Xsk) * @hdr_truesize: truesize for buffer headers * @truesize: truesize for data buffers * @q_id: Queue id @@ -721,14 +752,24 @@ libeth_cacheline_set_assert(struct idpf_tx_queue, 64, struct idpf_buf_queue { __cacheline_group_begin_aligned(read_mostly); struct virtchnl2_splitq_rx_buf_desc *split_buf; + union { + struct { + struct libeth_fqe *buf; + struct page_pool *pp; + }; + struct { + struct libeth_xdp_buff **xsk_buf; + struct xsk_buff_pool *pool; + }; + }; struct libeth_fqe *hdr_buf; struct page_pool *hdr_pp; - struct libeth_fqe *buf; - struct page_pool *pp; void __iomem *tail; DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS); u32 desc_count; + + u32 thresh; __cacheline_group_end_aligned(read_mostly); __cacheline_group_begin_aligned(read_write); @@ -736,6 +777,7 @@ struct idpf_buf_queue { u32 next_to_clean; u32 next_to_alloc; + u32 pending; u32 hdr_truesize; u32 truesize; __cacheline_group_end_aligned(read_write); @@ -756,7 +798,9 @@ libeth_cacheline_set_assert(struct idpf_buf_queue, 64, 24, 32); /** * struct idpf_compl_queue - software structure representing a completion queue - * @comp: completion descriptor array + * @comp: 8-byte completion descriptor array + * @comp_4b: 4-byte completion descriptor array + * @desc_ring: virtual descriptor ring address * @txq_grp: See struct idpf_txq_group * @flags: See enum idpf_queue_flags_t * @desc_count: Number of descriptors @@ -776,7 +820,12 @@ libeth_cacheline_set_assert(struct idpf_buf_queue, 64, 24, 32); */ struct idpf_compl_queue { __cacheline_group_begin_aligned(read_mostly); - struct idpf_splitq_tx_compl_desc *comp; + union { + struct idpf_splitq_tx_compl_desc *comp; + struct idpf_splitq_4b_tx_compl_desc *comp_4b; + + void *desc_ring; + }; struct idpf_txq_group *txq_grp; DECLARE_BITMAP(flags, __IDPF_Q_FLAGS_NBITS); @@ -903,7 +952,6 @@ struct idpf_rxq_group { * @vport: Vport back pointer * @num_txq: Number of TX queues associated * @txqs: Array of TX queue pointers - * @stashes: array of OOO stashes for the queues * @complq: Associated completion queue pointer, split queue only * @num_completions_pending: Total number of completions pending for the * completion queue, acculumated for all TX queues @@ -918,7 +966,6 @@ struct idpf_txq_group { u16 num_txq; struct idpf_tx_queue *txqs[IDPF_LARGE_MAX_Q]; - struct idpf_txq_stash *stashes; struct idpf_compl_queue *complq; @@ -1011,6 +1058,17 @@ static inline void idpf_vport_intr_set_wb_on_itr(struct idpf_q_vector *q_vector) reg->dyn_ctl); } +/** + * idpf_tx_splitq_get_free_bufs - get number of free buf_ids in refillq + * @refillq: pointer to refillq containing buf_ids + */ +static inline u32 idpf_tx_splitq_get_free_bufs(struct idpf_sw_queue *refillq) +{ + return (refillq->next_to_use > refillq->next_to_clean ? + 0 : refillq->desc_count) + + refillq->next_to_use - refillq->next_to_clean - 1; +} + int idpf_vport_singleq_napi_poll(struct napi_struct *napi, int budget); void idpf_vport_init_num_qs(struct idpf_vport *vport, struct virtchnl2_create_vport *vport_msg); @@ -1031,23 +1089,30 @@ int idpf_config_rss(struct idpf_vport *vport); int idpf_init_rss(struct idpf_vport *vport); void idpf_deinit_rss(struct idpf_vport *vport); int idpf_rx_bufs_init_all(struct idpf_vport *vport); -void idpf_rx_add_frag(struct idpf_rx_buf *rx_buf, struct sk_buff *skb, - unsigned int size); -struct sk_buff *idpf_rx_build_skb(const struct libeth_fqe *buf, u32 size); + +struct idpf_q_vector *idpf_find_rxq_vec(const struct idpf_vport *vport, + u32 q_num); +struct idpf_q_vector *idpf_find_txq_vec(const struct idpf_vport *vport, + u32 q_num); +int idpf_qp_switch(struct idpf_vport *vport, u32 qid, bool en); + void idpf_tx_buf_hw_update(struct idpf_tx_queue *tx_q, u32 val, bool xmit_more); unsigned int idpf_size_to_txd_count(unsigned int size); netdev_tx_t idpf_tx_drop_skb(struct idpf_tx_queue *tx_q, struct sk_buff *skb); -void idpf_tx_dma_map_error(struct idpf_tx_queue *txq, struct sk_buff *skb, - struct idpf_tx_buf *first, u16 ring_idx); -unsigned int idpf_tx_desc_count_required(struct idpf_tx_queue *txq, - struct sk_buff *skb); +unsigned int idpf_tx_res_count_required(struct idpf_tx_queue *txq, + struct sk_buff *skb, u32 *buf_count); void idpf_tx_timeout(struct net_device *netdev, unsigned int txqueue); netdev_tx_t idpf_tx_singleq_frame(struct sk_buff *skb, struct idpf_tx_queue *tx_q); netdev_tx_t idpf_tx_start(struct sk_buff *skb, struct net_device *netdev); bool idpf_rx_singleq_buf_hw_alloc_all(struct idpf_rx_queue *rxq, u16 cleaned_count); +bool idpf_rx_process_skb_fields(struct sk_buff *skb, + const struct libeth_xdp_buff *xdp, + struct libeth_rq_napi_stats *rs); int idpf_tso(struct sk_buff *skb, struct idpf_tx_offload_params *off); +void idpf_wait_for_sw_marker_completion(const struct idpf_tx_queue *txq); + #endif /* !_IDPF_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c b/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c index 259d50fded67..4cc58c83688c 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c +++ b/drivers/net/ethernet/intel/idpf/idpf_vf_dev.c @@ -76,7 +76,7 @@ static int idpf_vf_intr_reg_init(struct idpf_vport *vport) int num_vecs = vport->num_q_vectors; struct idpf_vec_regs *reg_vals; int num_regs, i, err = 0; - u32 rx_itr, tx_itr; + u32 rx_itr, tx_itr, val; u16 total_vecs; total_vecs = idpf_get_reserved_vecs(vport->adapter); @@ -120,6 +120,15 @@ static int idpf_vf_intr_reg_init(struct idpf_vport *vport) intr->tx_itr = idpf_get_reg_addr(adapter, tx_itr); } + /* Data vector for NOIRQ queues */ + + val = reg_vals[vport->q_vector_idxs[i] - IDPF_MBX_Q_VEC].dyn_ctl_reg; + vport->noirq_dyn_ctl = idpf_get_reg_addr(adapter, val); + + val = VF_INT_DYN_CTLN_WB_ON_ITR_M | VF_INT_DYN_CTLN_INTENA_MSK_M | + FIELD_PREP(VF_INT_DYN_CTLN_ITR_INDX_M, IDPF_NO_ITR_UPDATE_IDX); + vport->noirq_dyn_ctl_ena = val; + free_reg_vals: kfree(reg_vals); diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c index a028c69f7fdc..cbb5fa30f5a0 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.c @@ -702,9 +702,9 @@ int idpf_recv_mb_msg(struct idpf_adapter *adapter) /* If post failed clear the only buffer we supplied */ if (post_err) { if (dma_mem) - dmam_free_coherent(&adapter->pdev->dev, - dma_mem->size, dma_mem->va, - dma_mem->pa); + dma_free_coherent(&adapter->pdev->dev, + dma_mem->size, dma_mem->va, + dma_mem->pa); break; } @@ -716,34 +716,145 @@ int idpf_recv_mb_msg(struct idpf_adapter *adapter) return err; } +struct idpf_chunked_msg_params { + u32 (*prepare_msg)(const struct idpf_vport *vport, + void *buf, const void *pos, + u32 num); + + const void *chunks; + u32 num_chunks; + + u32 chunk_sz; + u32 config_sz; + + u32 vc_op; +}; + +struct idpf_queue_set *idpf_alloc_queue_set(struct idpf_vport *vport, u32 num) +{ + struct idpf_queue_set *qp; + + qp = kzalloc(struct_size(qp, qs, num), GFP_KERNEL); + if (!qp) + return NULL; + + qp->vport = vport; + qp->num = num; + + return qp; +} + /** - * idpf_wait_for_marker_event - wait for software marker response + * idpf_send_chunked_msg - send VC message consisting of chunks * @vport: virtual port data structure + * @params: message params * - * Returns 0 success, negative on failure. - **/ -static int idpf_wait_for_marker_event(struct idpf_vport *vport) + * Helper function for preparing a message describing queues to be enabled + * or disabled. + * + * Return: the total size of the prepared message. + */ +static int idpf_send_chunked_msg(struct idpf_vport *vport, + const struct idpf_chunked_msg_params *params) { - int event; - int i; + struct idpf_vc_xn_params xn_params = { + .vc_op = params->vc_op, + .timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC, + }; + const void *pos = params->chunks; + u32 num_chunks, num_msgs, buf_sz; + void *buf __free(kfree) = NULL; + u32 totqs = params->num_chunks; + + num_chunks = min(IDPF_NUM_CHUNKS_PER_MSG(params->config_sz, + params->chunk_sz), totqs); + num_msgs = DIV_ROUND_UP(totqs, num_chunks); - for (i = 0; i < vport->num_txq; i++) - idpf_queue_set(SW_MARKER, vport->txqs[i]); + buf_sz = params->config_sz + num_chunks * params->chunk_sz; + buf = kzalloc(buf_sz, GFP_KERNEL); + if (!buf) + return -ENOMEM; - event = wait_event_timeout(vport->sw_marker_wq, - test_and_clear_bit(IDPF_VPORT_SW_MARKER, - vport->flags), - msecs_to_jiffies(500)); + xn_params.send_buf.iov_base = buf; - for (i = 0; i < vport->num_txq; i++) - idpf_queue_clear(POLL_MODE, vport->txqs[i]); + for (u32 i = 0; i < num_msgs; i++) { + ssize_t reply_sz; - if (event) - return 0; + memset(buf, 0, buf_sz); + xn_params.send_buf.iov_len = buf_sz; - dev_warn(&vport->adapter->pdev->dev, "Failed to receive marker packets\n"); + if (params->prepare_msg(vport, buf, pos, num_chunks) != buf_sz) + return -EINVAL; + + reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); + if (reply_sz < 0) + return reply_sz; + + pos += num_chunks * params->chunk_sz; + totqs -= num_chunks; + + num_chunks = min(num_chunks, totqs); + buf_sz = params->config_sz + num_chunks * params->chunk_sz; + } - return -ETIMEDOUT; + return 0; +} + +/** + * idpf_wait_for_marker_event_set - wait for software marker response for + * selected Tx queues + * @qs: set of the Tx queues + * + * Return: 0 success, -errno on failure. + */ +static int idpf_wait_for_marker_event_set(const struct idpf_queue_set *qs) +{ + struct idpf_tx_queue *txq; + bool markers_rcvd = true; + + for (u32 i = 0; i < qs->num; i++) { + switch (qs->qs[i].type) { + case VIRTCHNL2_QUEUE_TYPE_TX: + txq = qs->qs[i].txq; + + idpf_queue_set(SW_MARKER, txq); + idpf_wait_for_sw_marker_completion(txq); + markers_rcvd &= !idpf_queue_has(SW_MARKER, txq); + break; + default: + break; + } + } + + if (!markers_rcvd) { + netdev_warn(qs->vport->netdev, + "Failed to receive marker packets\n"); + return -ETIMEDOUT; + } + + return 0; +} + +/** + * idpf_wait_for_marker_event - wait for software marker response + * @vport: virtual port data structure + * + * Return: 0 success, negative on failure. + **/ +static int idpf_wait_for_marker_event(struct idpf_vport *vport) +{ + struct idpf_queue_set *qs __free(kfree) = NULL; + + qs = idpf_alloc_queue_set(vport, vport->num_txq); + if (!qs) + return -ENOMEM; + + for (u32 i = 0; i < qs->num; i++) { + qs->qs[i].type = VIRTCHNL2_QUEUE_TYPE_TX; + qs->qs[i].txq = vport->txqs[i]; + } + + return idpf_wait_for_marker_event_set(qs); } /** @@ -1061,21 +1172,35 @@ int idpf_vport_alloc_max_qs(struct idpf_adapter *adapter, struct idpf_avail_queue_info *avail_queues = &adapter->avail_queues; struct virtchnl2_get_capabilities *caps = &adapter->caps; u16 default_vports = idpf_get_default_vports(adapter); - int max_rx_q, max_tx_q; + u32 max_rx_q, max_tx_q, max_buf_q, max_compl_q; mutex_lock(&adapter->queue_lock); + /* Caps are device-wide. Give each vport an equal piece */ max_rx_q = le16_to_cpu(caps->max_rx_q) / default_vports; max_tx_q = le16_to_cpu(caps->max_tx_q) / default_vports; - if (adapter->num_alloc_vports < default_vports) { - max_q->max_rxq = min_t(u16, max_rx_q, IDPF_MAX_Q); - max_q->max_txq = min_t(u16, max_tx_q, IDPF_MAX_Q); - } else { - max_q->max_rxq = IDPF_MIN_Q; - max_q->max_txq = IDPF_MIN_Q; + max_buf_q = le16_to_cpu(caps->max_rx_bufq) / default_vports; + max_compl_q = le16_to_cpu(caps->max_tx_complq) / default_vports; + + if (adapter->num_alloc_vports >= default_vports) { + max_rx_q = IDPF_MIN_Q; + max_tx_q = IDPF_MIN_Q; } - max_q->max_bufq = max_q->max_rxq * IDPF_MAX_BUFQS_PER_RXQ_GRP; - max_q->max_complq = max_q->max_txq; + + /* + * Harmonize the numbers. The current implementation always creates + * `IDPF_MAX_BUFQS_PER_RXQ_GRP` buffer queues for each Rx queue and + * one completion queue for each Tx queue for best performance. + * If less buffer or completion queues is available, cap the number + * of the corresponding Rx/Tx queues. + */ + max_rx_q = min(max_rx_q, max_buf_q / IDPF_MAX_BUFQS_PER_RXQ_GRP); + max_tx_q = min(max_tx_q, max_compl_q); + + max_q->max_rxq = max_rx_q; + max_q->max_txq = max_tx_q; + max_q->max_bufq = max_rx_q * IDPF_MAX_BUFQS_PER_RXQ_GRP; + max_q->max_complq = max_tx_q; if (avail_queues->avail_rxq < max_q->max_rxq || avail_queues->avail_txq < max_q->max_txq || @@ -1506,7 +1631,7 @@ int idpf_send_destroy_vport_msg(struct idpf_vport *vport) xn_params.vc_op = VIRTCHNL2_OP_DESTROY_VPORT; xn_params.send_buf.iov_base = &v_id; xn_params.send_buf.iov_len = sizeof(v_id); - xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); return reply_sz < 0 ? reply_sz : 0; @@ -1554,236 +1679,368 @@ int idpf_send_disable_vport_msg(struct idpf_vport *vport) xn_params.vc_op = VIRTCHNL2_OP_DISABLE_VPORT; xn_params.send_buf.iov_base = &v_id; xn_params.send_buf.iov_len = sizeof(v_id); - xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); return reply_sz < 0 ? reply_sz : 0; } /** - * idpf_send_config_tx_queues_msg - Send virtchnl config tx queues message + * idpf_fill_txq_config_chunk - fill chunk describing the Tx queue + * @vport: virtual port data structure + * @q: Tx queue to be inserted into VC chunk + * @qi: pointer to the buffer containing the VC chunk + */ +static void idpf_fill_txq_config_chunk(const struct idpf_vport *vport, + const struct idpf_tx_queue *q, + struct virtchnl2_txq_info *qi) +{ + u32 val; + + qi->queue_id = cpu_to_le32(q->q_id); + qi->model = cpu_to_le16(vport->txq_model); + qi->type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX); + qi->ring_len = cpu_to_le16(q->desc_count); + qi->dma_ring_addr = cpu_to_le64(q->dma); + qi->relative_queue_id = cpu_to_le16(q->rel_q_id); + + if (!idpf_is_queue_model_split(vport->txq_model)) { + qi->sched_mode = cpu_to_le16(VIRTCHNL2_TXQ_SCHED_MODE_QUEUE); + return; + } + + if (idpf_queue_has(XDP, q)) + val = q->complq->q_id; + else + val = q->txq_grp->complq->q_id; + + qi->tx_compl_queue_id = cpu_to_le16(val); + + if (idpf_queue_has(FLOW_SCH_EN, q)) + val = VIRTCHNL2_TXQ_SCHED_MODE_FLOW; + else + val = VIRTCHNL2_TXQ_SCHED_MODE_QUEUE; + + qi->sched_mode = cpu_to_le16(val); +} + +/** + * idpf_fill_complq_config_chunk - fill chunk describing the completion queue + * @vport: virtual port data structure + * @q: completion queue to be inserted into VC chunk + * @qi: pointer to the buffer containing the VC chunk + */ +static void idpf_fill_complq_config_chunk(const struct idpf_vport *vport, + const struct idpf_compl_queue *q, + struct virtchnl2_txq_info *qi) +{ + u32 val; + + qi->queue_id = cpu_to_le32(q->q_id); + qi->model = cpu_to_le16(vport->txq_model); + qi->type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION); + qi->ring_len = cpu_to_le16(q->desc_count); + qi->dma_ring_addr = cpu_to_le64(q->dma); + + if (idpf_queue_has(FLOW_SCH_EN, q)) + val = VIRTCHNL2_TXQ_SCHED_MODE_FLOW; + else + val = VIRTCHNL2_TXQ_SCHED_MODE_QUEUE; + + qi->sched_mode = cpu_to_le16(val); +} + +/** + * idpf_prepare_cfg_txqs_msg - prepare message to configure selected Tx queues * @vport: virtual port data structure + * @buf: buffer containing the message + * @pos: pointer to the first chunk describing the tx queue + * @num_chunks: number of chunks in the message * - * Send config tx queues virtchnl message. Returns 0 on success, negative on - * failure. + * Helper function for preparing the message describing configuration of + * Tx queues. + * + * Return: the total size of the prepared message. */ -static int idpf_send_config_tx_queues_msg(struct idpf_vport *vport) +static u32 idpf_prepare_cfg_txqs_msg(const struct idpf_vport *vport, + void *buf, const void *pos, + u32 num_chunks) +{ + struct virtchnl2_config_tx_queues *ctq = buf; + + ctq->vport_id = cpu_to_le32(vport->vport_id); + ctq->num_qinfo = cpu_to_le16(num_chunks); + memcpy(ctq->qinfo, pos, num_chunks * sizeof(*ctq->qinfo)); + + return struct_size(ctq, qinfo, num_chunks); +} + +/** + * idpf_send_config_tx_queue_set_msg - send virtchnl config Tx queues + * message for selected queues + * @qs: set of the Tx queues to configure + * + * Send config queues virtchnl message for queues contained in the @qs array. + * The @qs array can contain Tx queues (or completion queues) only. + * + * Return: 0 on success, -errno on failure. + */ +static int idpf_send_config_tx_queue_set_msg(const struct idpf_queue_set *qs) { - struct virtchnl2_config_tx_queues *ctq __free(kfree) = NULL; struct virtchnl2_txq_info *qi __free(kfree) = NULL; - struct idpf_vc_xn_params xn_params = {}; - u32 config_sz, chunk_sz, buf_sz; - int totqs, num_msgs, num_chunks; - ssize_t reply_sz; - int i, k = 0; + struct idpf_chunked_msg_params params = { + .vc_op = VIRTCHNL2_OP_CONFIG_TX_QUEUES, + .prepare_msg = idpf_prepare_cfg_txqs_msg, + .config_sz = sizeof(struct virtchnl2_config_tx_queues), + .chunk_sz = sizeof(*qi), + }; - totqs = vport->num_txq + vport->num_complq; - qi = kcalloc(totqs, sizeof(struct virtchnl2_txq_info), GFP_KERNEL); + qi = kcalloc(qs->num, sizeof(*qi), GFP_KERNEL); if (!qi) return -ENOMEM; - /* Populate the queue info buffer with all queue context info */ - for (i = 0; i < vport->num_txq_grp; i++) { - struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; - int j, sched_mode; - - for (j = 0; j < tx_qgrp->num_txq; j++, k++) { - qi[k].queue_id = - cpu_to_le32(tx_qgrp->txqs[j]->q_id); - qi[k].model = - cpu_to_le16(vport->txq_model); - qi[k].type = - cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX); - qi[k].ring_len = - cpu_to_le16(tx_qgrp->txqs[j]->desc_count); - qi[k].dma_ring_addr = - cpu_to_le64(tx_qgrp->txqs[j]->dma); - if (idpf_is_queue_model_split(vport->txq_model)) { - struct idpf_tx_queue *q = tx_qgrp->txqs[j]; - - qi[k].tx_compl_queue_id = - cpu_to_le16(tx_qgrp->complq->q_id); - qi[k].relative_queue_id = cpu_to_le16(j); - - if (idpf_queue_has(FLOW_SCH_EN, q)) - qi[k].sched_mode = - cpu_to_le16(VIRTCHNL2_TXQ_SCHED_MODE_FLOW); - else - qi[k].sched_mode = - cpu_to_le16(VIRTCHNL2_TXQ_SCHED_MODE_QUEUE); - } else { - qi[k].sched_mode = - cpu_to_le16(VIRTCHNL2_TXQ_SCHED_MODE_QUEUE); - } - } + params.chunks = qi; - if (!idpf_is_queue_model_split(vport->txq_model)) - continue; + for (u32 i = 0; i < qs->num; i++) { + if (qs->qs[i].type == VIRTCHNL2_QUEUE_TYPE_TX) + idpf_fill_txq_config_chunk(qs->vport, qs->qs[i].txq, + &qi[params.num_chunks++]); + else if (qs->qs[i].type == VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION) + idpf_fill_complq_config_chunk(qs->vport, + qs->qs[i].complq, + &qi[params.num_chunks++]); + } - qi[k].queue_id = cpu_to_le32(tx_qgrp->complq->q_id); - qi[k].model = cpu_to_le16(vport->txq_model); - qi[k].type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION); - qi[k].ring_len = cpu_to_le16(tx_qgrp->complq->desc_count); - qi[k].dma_ring_addr = cpu_to_le64(tx_qgrp->complq->dma); + return idpf_send_chunked_msg(qs->vport, ¶ms); +} - if (idpf_queue_has(FLOW_SCH_EN, tx_qgrp->complq)) - sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_FLOW; - else - sched_mode = VIRTCHNL2_TXQ_SCHED_MODE_QUEUE; - qi[k].sched_mode = cpu_to_le16(sched_mode); +/** + * idpf_send_config_tx_queues_msg - send virtchnl config Tx queues message + * @vport: virtual port data structure + * + * Return: 0 on success, -errno on failure. + */ +static int idpf_send_config_tx_queues_msg(struct idpf_vport *vport) +{ + struct idpf_queue_set *qs __free(kfree) = NULL; + u32 totqs = vport->num_txq + vport->num_complq; + u32 k = 0; + + qs = idpf_alloc_queue_set(vport, totqs); + if (!qs) + return -ENOMEM; - k++; + /* Populate the queue info buffer with all queue context info */ + for (u32 i = 0; i < vport->num_txq_grp; i++) { + const struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + + for (u32 j = 0; j < tx_qgrp->num_txq; j++) { + qs->qs[k].type = VIRTCHNL2_QUEUE_TYPE_TX; + qs->qs[k++].txq = tx_qgrp->txqs[j]; + } + + if (idpf_is_queue_model_split(vport->txq_model)) { + qs->qs[k].type = VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION; + qs->qs[k++].complq = tx_qgrp->complq; + } } /* Make sure accounting agrees */ if (k != totqs) return -EINVAL; - /* Chunk up the queue contexts into multiple messages to avoid - * sending a control queue message buffer that is too large - */ - config_sz = sizeof(struct virtchnl2_config_tx_queues); - chunk_sz = sizeof(struct virtchnl2_txq_info); + return idpf_send_config_tx_queue_set_msg(qs); +} - num_chunks = min_t(u32, IDPF_NUM_CHUNKS_PER_MSG(config_sz, chunk_sz), - totqs); - num_msgs = DIV_ROUND_UP(totqs, num_chunks); +/** + * idpf_fill_rxq_config_chunk - fill chunk describing the Rx queue + * @vport: virtual port data structure + * @q: Rx queue to be inserted into VC chunk + * @qi: pointer to the buffer containing the VC chunk + */ +static void idpf_fill_rxq_config_chunk(const struct idpf_vport *vport, + struct idpf_rx_queue *q, + struct virtchnl2_rxq_info *qi) +{ + const struct idpf_bufq_set *sets; + + qi->queue_id = cpu_to_le32(q->q_id); + qi->model = cpu_to_le16(vport->rxq_model); + qi->type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX); + qi->ring_len = cpu_to_le16(q->desc_count); + qi->dma_ring_addr = cpu_to_le64(q->dma); + qi->max_pkt_size = cpu_to_le32(q->rx_max_pkt_size); + qi->rx_buffer_low_watermark = cpu_to_le16(q->rx_buffer_low_watermark); + qi->qflags = cpu_to_le16(VIRTCHNL2_RX_DESC_SIZE_32BYTE); + if (idpf_is_feature_ena(vport, NETIF_F_GRO_HW)) + qi->qflags |= cpu_to_le16(VIRTCHNL2_RXQ_RSC); + + if (!idpf_is_queue_model_split(vport->rxq_model)) { + qi->data_buffer_size = cpu_to_le32(q->rx_buf_size); + qi->desc_ids = cpu_to_le64(q->rxdids); - buf_sz = struct_size(ctq, qinfo, num_chunks); - ctq = kzalloc(buf_sz, GFP_KERNEL); - if (!ctq) - return -ENOMEM; + return; + } - xn_params.vc_op = VIRTCHNL2_OP_CONFIG_TX_QUEUES; - xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + sets = q->bufq_sets; - for (i = 0, k = 0; i < num_msgs; i++) { - memset(ctq, 0, buf_sz); - ctq->vport_id = cpu_to_le32(vport->vport_id); - ctq->num_qinfo = cpu_to_le16(num_chunks); - memcpy(ctq->qinfo, &qi[k], chunk_sz * num_chunks); + /* + * In splitq mode, RxQ buffer size should be set to that of the first + * buffer queue associated with this RxQ. + */ + q->rx_buf_size = sets[0].bufq.rx_buf_size; + qi->data_buffer_size = cpu_to_le32(q->rx_buf_size); - xn_params.send_buf.iov_base = ctq; - xn_params.send_buf.iov_len = buf_sz; - reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); - if (reply_sz < 0) - return reply_sz; + qi->rx_bufq1_id = cpu_to_le16(sets[0].bufq.q_id); + if (vport->num_bufqs_per_qgrp > IDPF_SINGLE_BUFQ_PER_RXQ_GRP) { + qi->bufq2_ena = IDPF_BUFQ2_ENA; + qi->rx_bufq2_id = cpu_to_le16(sets[1].bufq.q_id); + } - k += num_chunks; - totqs -= num_chunks; - num_chunks = min(num_chunks, totqs); - /* Recalculate buffer size */ - buf_sz = struct_size(ctq, qinfo, num_chunks); + q->rx_hbuf_size = sets[0].bufq.rx_hbuf_size; + + if (idpf_queue_has(HSPLIT_EN, q)) { + qi->qflags |= cpu_to_le16(VIRTCHNL2_RXQ_HDR_SPLIT); + qi->hdr_buffer_size = cpu_to_le16(q->rx_hbuf_size); } - return 0; + qi->desc_ids = cpu_to_le64(VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M); +} + +/** + * idpf_fill_bufq_config_chunk - fill chunk describing the buffer queue + * @vport: virtual port data structure + * @q: buffer queue to be inserted into VC chunk + * @qi: pointer to the buffer containing the VC chunk + */ +static void idpf_fill_bufq_config_chunk(const struct idpf_vport *vport, + const struct idpf_buf_queue *q, + struct virtchnl2_rxq_info *qi) +{ + qi->queue_id = cpu_to_le32(q->q_id); + qi->model = cpu_to_le16(vport->rxq_model); + qi->type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX_BUFFER); + qi->ring_len = cpu_to_le16(q->desc_count); + qi->dma_ring_addr = cpu_to_le64(q->dma); + qi->data_buffer_size = cpu_to_le32(q->rx_buf_size); + qi->rx_buffer_low_watermark = cpu_to_le16(q->rx_buffer_low_watermark); + qi->desc_ids = cpu_to_le64(VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M); + qi->buffer_notif_stride = IDPF_RX_BUF_STRIDE; + if (idpf_is_feature_ena(vport, NETIF_F_GRO_HW)) + qi->qflags = cpu_to_le16(VIRTCHNL2_RXQ_RSC); + + if (idpf_queue_has(HSPLIT_EN, q)) { + qi->qflags |= cpu_to_le16(VIRTCHNL2_RXQ_HDR_SPLIT); + qi->hdr_buffer_size = cpu_to_le16(q->rx_hbuf_size); + } } /** - * idpf_send_config_rx_queues_msg - Send virtchnl config rx queues message + * idpf_prepare_cfg_rxqs_msg - prepare message to configure selected Rx queues * @vport: virtual port data structure + * @buf: buffer containing the message + * @pos: pointer to the first chunk describing the rx queue + * @num_chunks: number of chunks in the message * - * Send config rx queues virtchnl message. Returns 0 on success, negative on - * failure. + * Helper function for preparing the message describing configuration of + * Rx queues. + * + * Return: the total size of the prepared message. */ -static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport) +static u32 idpf_prepare_cfg_rxqs_msg(const struct idpf_vport *vport, + void *buf, const void *pos, + u32 num_chunks) +{ + struct virtchnl2_config_rx_queues *crq = buf; + + crq->vport_id = cpu_to_le32(vport->vport_id); + crq->num_qinfo = cpu_to_le16(num_chunks); + memcpy(crq->qinfo, pos, num_chunks * sizeof(*crq->qinfo)); + + return struct_size(crq, qinfo, num_chunks); +} + +/** + * idpf_send_config_rx_queue_set_msg - send virtchnl config Rx queues message + * for selected queues. + * @qs: set of the Rx queues to configure + * + * Send config queues virtchnl message for queues contained in the @qs array. + * The @qs array can contain Rx queues (or buffer queues) only. + * + * Return: 0 on success, -errno on failure. + */ +static int idpf_send_config_rx_queue_set_msg(const struct idpf_queue_set *qs) { - struct virtchnl2_config_rx_queues *crq __free(kfree) = NULL; struct virtchnl2_rxq_info *qi __free(kfree) = NULL; - struct idpf_vc_xn_params xn_params = {}; - u32 config_sz, chunk_sz, buf_sz; - int totqs, num_msgs, num_chunks; - ssize_t reply_sz; - int i, k = 0; + struct idpf_chunked_msg_params params = { + .vc_op = VIRTCHNL2_OP_CONFIG_RX_QUEUES, + .prepare_msg = idpf_prepare_cfg_rxqs_msg, + .config_sz = sizeof(struct virtchnl2_config_rx_queues), + .chunk_sz = sizeof(*qi), + }; - totqs = vport->num_rxq + vport->num_bufq; - qi = kcalloc(totqs, sizeof(struct virtchnl2_rxq_info), GFP_KERNEL); + qi = kcalloc(qs->num, sizeof(*qi), GFP_KERNEL); if (!qi) return -ENOMEM; - /* Populate the queue info buffer with all queue context info */ - for (i = 0; i < vport->num_rxq_grp; i++) { - struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; - u16 num_rxq; - int j; - - if (!idpf_is_queue_model_split(vport->rxq_model)) - goto setup_rxqs; - - for (j = 0; j < vport->num_bufqs_per_qgrp; j++, k++) { - struct idpf_buf_queue *bufq = - &rx_qgrp->splitq.bufq_sets[j].bufq; - - qi[k].queue_id = cpu_to_le32(bufq->q_id); - qi[k].model = cpu_to_le16(vport->rxq_model); - qi[k].type = - cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX_BUFFER); - qi[k].desc_ids = cpu_to_le64(VIRTCHNL2_RXDID_2_FLEX_SPLITQ_M); - qi[k].ring_len = cpu_to_le16(bufq->desc_count); - qi[k].dma_ring_addr = cpu_to_le64(bufq->dma); - qi[k].data_buffer_size = cpu_to_le32(bufq->rx_buf_size); - qi[k].buffer_notif_stride = IDPF_RX_BUF_STRIDE; - qi[k].rx_buffer_low_watermark = - cpu_to_le16(bufq->rx_buffer_low_watermark); - if (idpf_is_feature_ena(vport, NETIF_F_GRO_HW)) - qi[k].qflags |= cpu_to_le16(VIRTCHNL2_RXQ_RSC); - } + params.chunks = qi; -setup_rxqs: - if (idpf_is_queue_model_split(vport->rxq_model)) - num_rxq = rx_qgrp->splitq.num_rxq_sets; - else - num_rxq = rx_qgrp->singleq.num_rxq; + for (u32 i = 0; i < qs->num; i++) { + if (qs->qs[i].type == VIRTCHNL2_QUEUE_TYPE_RX) + idpf_fill_rxq_config_chunk(qs->vport, qs->qs[i].rxq, + &qi[params.num_chunks++]); + else if (qs->qs[i].type == VIRTCHNL2_QUEUE_TYPE_RX_BUFFER) + idpf_fill_bufq_config_chunk(qs->vport, qs->qs[i].bufq, + &qi[params.num_chunks++]); + } - for (j = 0; j < num_rxq; j++, k++) { - const struct idpf_bufq_set *sets; - struct idpf_rx_queue *rxq; + return idpf_send_chunked_msg(qs->vport, ¶ms); +} - if (!idpf_is_queue_model_split(vport->rxq_model)) { - rxq = rx_qgrp->singleq.rxqs[j]; - goto common_qi_fields; - } +/** + * idpf_send_config_rx_queues_msg - send virtchnl config Rx queues message + * @vport: virtual port data structure + * + * Return: 0 on success, -errno on failure. + */ +static int idpf_send_config_rx_queues_msg(struct idpf_vport *vport) +{ + bool splitq = idpf_is_queue_model_split(vport->rxq_model); + struct idpf_queue_set *qs __free(kfree) = NULL; + u32 totqs = vport->num_rxq + vport->num_bufq; + u32 k = 0; - rxq = &rx_qgrp->splitq.rxq_sets[j]->rxq; - sets = rxq->bufq_sets; + qs = idpf_alloc_queue_set(vport, totqs); + if (!qs) + return -ENOMEM; - /* In splitq mode, RXQ buffer size should be - * set to that of the first buffer queue - * associated with this RXQ. - */ - rxq->rx_buf_size = sets[0].bufq.rx_buf_size; + /* Populate the queue info buffer with all queue context info */ + for (u32 i = 0; i < vport->num_rxq_grp; i++) { + const struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + u32 num_rxq; - qi[k].rx_bufq1_id = cpu_to_le16(sets[0].bufq.q_id); - if (vport->num_bufqs_per_qgrp > IDPF_SINGLE_BUFQ_PER_RXQ_GRP) { - qi[k].bufq2_ena = IDPF_BUFQ2_ENA; - qi[k].rx_bufq2_id = - cpu_to_le16(sets[1].bufq.q_id); - } - qi[k].rx_buffer_low_watermark = - cpu_to_le16(rxq->rx_buffer_low_watermark); - if (idpf_is_feature_ena(vport, NETIF_F_GRO_HW)) - qi[k].qflags |= cpu_to_le16(VIRTCHNL2_RXQ_RSC); - - rxq->rx_hbuf_size = sets[0].bufq.rx_hbuf_size; - - if (idpf_queue_has(HSPLIT_EN, rxq)) { - qi[k].qflags |= - cpu_to_le16(VIRTCHNL2_RXQ_HDR_SPLIT); - qi[k].hdr_buffer_size = - cpu_to_le16(rxq->rx_hbuf_size); - } + if (!splitq) { + num_rxq = rx_qgrp->singleq.num_rxq; + goto rxq; + } -common_qi_fields: - qi[k].queue_id = cpu_to_le32(rxq->q_id); - qi[k].model = cpu_to_le16(vport->rxq_model); - qi[k].type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX); - qi[k].ring_len = cpu_to_le16(rxq->desc_count); - qi[k].dma_ring_addr = cpu_to_le64(rxq->dma); - qi[k].max_pkt_size = cpu_to_le32(rxq->rx_max_pkt_size); - qi[k].data_buffer_size = cpu_to_le32(rxq->rx_buf_size); - qi[k].qflags |= - cpu_to_le16(VIRTCHNL2_RX_DESC_SIZE_32BYTE); - qi[k].desc_ids = cpu_to_le64(rxq->rxdids); + for (u32 j = 0; j < vport->num_bufqs_per_qgrp; j++) { + qs->qs[k].type = VIRTCHNL2_QUEUE_TYPE_RX_BUFFER; + qs->qs[k++].bufq = &rx_qgrp->splitq.bufq_sets[j].bufq; + } + + num_rxq = rx_qgrp->splitq.num_rxq_sets; + +rxq: + for (u32 j = 0; j < num_rxq; j++) { + qs->qs[k].type = VIRTCHNL2_QUEUE_TYPE_RX; + + if (splitq) + qs->qs[k++].rxq = + &rx_qgrp->splitq.rxq_sets[j]->rxq; + else + qs->qs[k++].rxq = rx_qgrp->singleq.rxqs[j]; } } @@ -1791,317 +2048,395 @@ common_qi_fields: if (k != totqs) return -EINVAL; - /* Chunk up the queue contexts into multiple messages to avoid - * sending a control queue message buffer that is too large - */ - config_sz = sizeof(struct virtchnl2_config_rx_queues); - chunk_sz = sizeof(struct virtchnl2_rxq_info); + return idpf_send_config_rx_queue_set_msg(qs); +} - num_chunks = min_t(u32, IDPF_NUM_CHUNKS_PER_MSG(config_sz, chunk_sz), - totqs); - num_msgs = DIV_ROUND_UP(totqs, num_chunks); +/** + * idpf_prepare_ena_dis_qs_msg - prepare message to enable/disable selected + * queues + * @vport: virtual port data structure + * @buf: buffer containing the message + * @pos: pointer to the first chunk describing the queue + * @num_chunks: number of chunks in the message + * + * Helper function for preparing the message describing queues to be enabled + * or disabled. + * + * Return: the total size of the prepared message. + */ +static u32 idpf_prepare_ena_dis_qs_msg(const struct idpf_vport *vport, + void *buf, const void *pos, + u32 num_chunks) +{ + struct virtchnl2_del_ena_dis_queues *eq = buf; + + eq->vport_id = cpu_to_le32(vport->vport_id); + eq->chunks.num_chunks = cpu_to_le16(num_chunks); + memcpy(eq->chunks.chunks, pos, + num_chunks * sizeof(*eq->chunks.chunks)); + + return struct_size(eq, chunks.chunks, num_chunks); +} + +/** + * idpf_send_ena_dis_queue_set_msg - send virtchnl enable or disable queues + * message for selected queues + * @qs: set of the queues to enable or disable + * @en: whether to enable or disable queues + * + * Send enable or disable queues virtchnl message for queues contained + * in the @qs array. + * The @qs array can contain pointers to both Rx and Tx queues. + * + * Return: 0 on success, -errno on failure. + */ +static int idpf_send_ena_dis_queue_set_msg(const struct idpf_queue_set *qs, + bool en) +{ + struct virtchnl2_queue_chunk *qc __free(kfree) = NULL; + struct idpf_chunked_msg_params params = { + .vc_op = en ? VIRTCHNL2_OP_ENABLE_QUEUES : + VIRTCHNL2_OP_DISABLE_QUEUES, + .prepare_msg = idpf_prepare_ena_dis_qs_msg, + .config_sz = sizeof(struct virtchnl2_del_ena_dis_queues), + .chunk_sz = sizeof(*qc), + .num_chunks = qs->num, + }; - buf_sz = struct_size(crq, qinfo, num_chunks); - crq = kzalloc(buf_sz, GFP_KERNEL); - if (!crq) + qc = kcalloc(qs->num, sizeof(*qc), GFP_KERNEL); + if (!qc) return -ENOMEM; - xn_params.vc_op = VIRTCHNL2_OP_CONFIG_RX_QUEUES; - xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; + params.chunks = qc; - for (i = 0, k = 0; i < num_msgs; i++) { - memset(crq, 0, buf_sz); - crq->vport_id = cpu_to_le32(vport->vport_id); - crq->num_qinfo = cpu_to_le16(num_chunks); - memcpy(crq->qinfo, &qi[k], chunk_sz * num_chunks); + for (u32 i = 0; i < qs->num; i++) { + const struct idpf_queue_ptr *q = &qs->qs[i]; + u32 qid; - xn_params.send_buf.iov_base = crq; - xn_params.send_buf.iov_len = buf_sz; - reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); - if (reply_sz < 0) - return reply_sz; + qc[i].type = cpu_to_le32(q->type); + qc[i].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); - k += num_chunks; - totqs -= num_chunks; - num_chunks = min(num_chunks, totqs); - /* Recalculate buffer size */ - buf_sz = struct_size(crq, qinfo, num_chunks); + switch (q->type) { + case VIRTCHNL2_QUEUE_TYPE_RX: + qid = q->rxq->q_id; + break; + case VIRTCHNL2_QUEUE_TYPE_TX: + qid = q->txq->q_id; + break; + case VIRTCHNL2_QUEUE_TYPE_RX_BUFFER: + qid = q->bufq->q_id; + break; + case VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION: + qid = q->complq->q_id; + break; + default: + return -EINVAL; + } + + qc[i].start_queue_id = cpu_to_le32(qid); } - return 0; + return idpf_send_chunked_msg(qs->vport, ¶ms); } /** - * idpf_send_ena_dis_queues_msg - Send virtchnl enable or disable - * queues message + * idpf_send_ena_dis_queues_msg - send virtchnl enable or disable queues + * message * @vport: virtual port data structure - * @ena: if true enable, false disable + * @en: whether to enable or disable queues * - * Send enable or disable queues virtchnl message. Returns 0 on success, - * negative on failure. + * Return: 0 on success, -errno on failure. */ -static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, bool ena) +static int idpf_send_ena_dis_queues_msg(struct idpf_vport *vport, bool en) { - struct virtchnl2_del_ena_dis_queues *eq __free(kfree) = NULL; - struct virtchnl2_queue_chunk *qc __free(kfree) = NULL; - u32 num_msgs, num_chunks, num_txq, num_rxq, num_q; - struct idpf_vc_xn_params xn_params = {}; - struct virtchnl2_queue_chunks *qcs; - u32 config_sz, chunk_sz, buf_sz; - ssize_t reply_sz; - int i, j, k = 0; + struct idpf_queue_set *qs __free(kfree) = NULL; + u32 num_txq, num_q, k = 0; + bool split; num_txq = vport->num_txq + vport->num_complq; - num_rxq = vport->num_rxq + vport->num_bufq; - num_q = num_txq + num_rxq; - buf_sz = sizeof(struct virtchnl2_queue_chunk) * num_q; - qc = kzalloc(buf_sz, GFP_KERNEL); - if (!qc) + num_q = num_txq + vport->num_rxq + vport->num_bufq; + + qs = idpf_alloc_queue_set(vport, num_q); + if (!qs) return -ENOMEM; - for (i = 0; i < vport->num_txq_grp; i++) { - struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + split = idpf_is_queue_model_split(vport->txq_model); - for (j = 0; j < tx_qgrp->num_txq; j++, k++) { - qc[k].type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX); - qc[k].start_queue_id = cpu_to_le32(tx_qgrp->txqs[j]->q_id); - qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); - } - } - if (vport->num_txq != k) - return -EINVAL; + for (u32 i = 0; i < vport->num_txq_grp; i++) { + const struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; - if (!idpf_is_queue_model_split(vport->txq_model)) - goto setup_rx; + for (u32 j = 0; j < tx_qgrp->num_txq; j++) { + qs->qs[k].type = VIRTCHNL2_QUEUE_TYPE_TX; + qs->qs[k++].txq = tx_qgrp->txqs[j]; + } - for (i = 0; i < vport->num_txq_grp; i++, k++) { - struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; + if (!split) + continue; - qc[k].type = cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION); - qc[k].start_queue_id = cpu_to_le32(tx_qgrp->complq->q_id); - qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); + qs->qs[k].type = VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION; + qs->qs[k++].complq = tx_qgrp->complq; } - if (vport->num_complq != (k - vport->num_txq)) + + if (k != num_txq) return -EINVAL; -setup_rx: - for (i = 0; i < vport->num_rxq_grp; i++) { - struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + split = idpf_is_queue_model_split(vport->rxq_model); - if (idpf_is_queue_model_split(vport->rxq_model)) + for (u32 i = 0; i < vport->num_rxq_grp; i++) { + const struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + u32 num_rxq; + + if (split) num_rxq = rx_qgrp->splitq.num_rxq_sets; else num_rxq = rx_qgrp->singleq.num_rxq; - for (j = 0; j < num_rxq; j++, k++) { - if (idpf_is_queue_model_split(vport->rxq_model)) { - qc[k].start_queue_id = - cpu_to_le32(rx_qgrp->splitq.rxq_sets[j]->rxq.q_id); - qc[k].type = - cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX); - } else { - qc[k].start_queue_id = - cpu_to_le32(rx_qgrp->singleq.rxqs[j]->q_id); - qc[k].type = - cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX); - } - qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); - } - } - if (vport->num_rxq != k - (vport->num_txq + vport->num_complq)) - return -EINVAL; - - if (!idpf_is_queue_model_split(vport->rxq_model)) - goto send_msg; + for (u32 j = 0; j < num_rxq; j++) { + qs->qs[k].type = VIRTCHNL2_QUEUE_TYPE_RX; - for (i = 0; i < vport->num_rxq_grp; i++) { - struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + if (split) + qs->qs[k++].rxq = + &rx_qgrp->splitq.rxq_sets[j]->rxq; + else + qs->qs[k++].rxq = rx_qgrp->singleq.rxqs[j]; + } - for (j = 0; j < vport->num_bufqs_per_qgrp; j++, k++) { - const struct idpf_buf_queue *q; + if (!split) + continue; - q = &rx_qgrp->splitq.bufq_sets[j].bufq; - qc[k].type = - cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX_BUFFER); - qc[k].start_queue_id = cpu_to_le32(q->q_id); - qc[k].num_queues = cpu_to_le32(IDPF_NUMQ_PER_CHUNK); + for (u32 j = 0; j < vport->num_bufqs_per_qgrp; j++) { + qs->qs[k].type = VIRTCHNL2_QUEUE_TYPE_RX_BUFFER; + qs->qs[k++].bufq = &rx_qgrp->splitq.bufq_sets[j].bufq; } } - if (vport->num_bufq != k - (vport->num_txq + - vport->num_complq + - vport->num_rxq)) + + if (k != num_q) return -EINVAL; -send_msg: - /* Chunk up the queue info into multiple messages */ - config_sz = sizeof(struct virtchnl2_del_ena_dis_queues); - chunk_sz = sizeof(struct virtchnl2_queue_chunk); + return idpf_send_ena_dis_queue_set_msg(qs, en); +} - num_chunks = min_t(u32, IDPF_NUM_CHUNKS_PER_MSG(config_sz, chunk_sz), - num_q); - num_msgs = DIV_ROUND_UP(num_q, num_chunks); +/** + * idpf_prep_map_unmap_queue_set_vector_msg - prepare message to map or unmap + * queue set to the interrupt vector + * @vport: virtual port data structure + * @buf: buffer containing the message + * @pos: pointer to the first chunk describing the vector mapping + * @num_chunks: number of chunks in the message + * + * Helper function for preparing the message describing mapping queues to + * q_vectors. + * + * Return: the total size of the prepared message. + */ +static u32 +idpf_prep_map_unmap_queue_set_vector_msg(const struct idpf_vport *vport, + void *buf, const void *pos, + u32 num_chunks) +{ + struct virtchnl2_queue_vector_maps *vqvm = buf; - buf_sz = struct_size(eq, chunks.chunks, num_chunks); - eq = kzalloc(buf_sz, GFP_KERNEL); - if (!eq) + vqvm->vport_id = cpu_to_le32(vport->vport_id); + vqvm->num_qv_maps = cpu_to_le16(num_chunks); + memcpy(vqvm->qv_maps, pos, num_chunks * sizeof(*vqvm->qv_maps)); + + return struct_size(vqvm, qv_maps, num_chunks); +} + +/** + * idpf_send_map_unmap_queue_set_vector_msg - send virtchnl map or unmap + * queue set vector message + * @qs: set of the queues to map or unmap + * @map: true for map and false for unmap + * + * Return: 0 on success, -errno on failure. + */ +static int +idpf_send_map_unmap_queue_set_vector_msg(const struct idpf_queue_set *qs, + bool map) +{ + struct virtchnl2_queue_vector *vqv __free(kfree) = NULL; + struct idpf_chunked_msg_params params = { + .vc_op = map ? VIRTCHNL2_OP_MAP_QUEUE_VECTOR : + VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR, + .prepare_msg = idpf_prep_map_unmap_queue_set_vector_msg, + .config_sz = sizeof(struct virtchnl2_queue_vector_maps), + .chunk_sz = sizeof(*vqv), + .num_chunks = qs->num, + }; + bool split; + + vqv = kcalloc(qs->num, sizeof(*vqv), GFP_KERNEL); + if (!vqv) return -ENOMEM; - if (ena) { - xn_params.vc_op = VIRTCHNL2_OP_ENABLE_QUEUES; - xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; - } else { - xn_params.vc_op = VIRTCHNL2_OP_DISABLE_QUEUES; - xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC; - } + params.chunks = vqv; - for (i = 0, k = 0; i < num_msgs; i++) { - memset(eq, 0, buf_sz); - eq->vport_id = cpu_to_le32(vport->vport_id); - eq->chunks.num_chunks = cpu_to_le16(num_chunks); - qcs = &eq->chunks; - memcpy(qcs->chunks, &qc[k], chunk_sz * num_chunks); + split = idpf_is_queue_model_split(qs->vport->txq_model); - xn_params.send_buf.iov_base = eq; - xn_params.send_buf.iov_len = buf_sz; - reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); - if (reply_sz < 0) - return reply_sz; + for (u32 i = 0; i < qs->num; i++) { + const struct idpf_queue_ptr *q = &qs->qs[i]; + const struct idpf_q_vector *vec; + u32 qid, v_idx, itr_idx; + + vqv[i].queue_type = cpu_to_le32(q->type); + + switch (q->type) { + case VIRTCHNL2_QUEUE_TYPE_RX: + qid = q->rxq->q_id; + + if (idpf_queue_has(NOIRQ, q->rxq)) + vec = NULL; + else + vec = q->rxq->q_vector; + + if (vec) { + v_idx = vec->v_idx; + itr_idx = vec->rx_itr_idx; + } else { + v_idx = qs->vport->noirq_v_idx; + itr_idx = VIRTCHNL2_ITR_IDX_0; + } + break; + case VIRTCHNL2_QUEUE_TYPE_TX: + qid = q->txq->q_id; + + if (idpf_queue_has(NOIRQ, q->txq)) + vec = NULL; + else if (idpf_queue_has(XDP, q->txq)) + vec = q->txq->complq->q_vector; + else if (split) + vec = q->txq->txq_grp->complq->q_vector; + else + vec = q->txq->q_vector; + + if (vec) { + v_idx = vec->v_idx; + itr_idx = vec->tx_itr_idx; + } else { + v_idx = qs->vport->noirq_v_idx; + itr_idx = VIRTCHNL2_ITR_IDX_1; + } + break; + default: + return -EINVAL; + } - k += num_chunks; - num_q -= num_chunks; - num_chunks = min(num_chunks, num_q); - /* Recalculate buffer size */ - buf_sz = struct_size(eq, chunks.chunks, num_chunks); + vqv[i].queue_id = cpu_to_le32(qid); + vqv[i].vector_id = cpu_to_le16(v_idx); + vqv[i].itr_idx = cpu_to_le32(itr_idx); } - return 0; + return idpf_send_chunked_msg(qs->vport, ¶ms); } /** - * idpf_send_map_unmap_queue_vector_msg - Send virtchnl map or unmap queue - * vector message + * idpf_send_map_unmap_queue_vector_msg - send virtchnl map or unmap queue + * vector message * @vport: virtual port data structure * @map: true for map and false for unmap * - * Send map or unmap queue vector virtchnl message. Returns 0 on success, - * negative on failure. + * Return: 0 on success, -errno on failure. */ int idpf_send_map_unmap_queue_vector_msg(struct idpf_vport *vport, bool map) { - struct virtchnl2_queue_vector_maps *vqvm __free(kfree) = NULL; - struct virtchnl2_queue_vector *vqv __free(kfree) = NULL; - struct idpf_vc_xn_params xn_params = {}; - u32 config_sz, chunk_sz, buf_sz; - u32 num_msgs, num_chunks, num_q; - ssize_t reply_sz; - int i, j, k = 0; + struct idpf_queue_set *qs __free(kfree) = NULL; + u32 num_q = vport->num_txq + vport->num_rxq; + u32 k = 0; - num_q = vport->num_txq + vport->num_rxq; - - buf_sz = sizeof(struct virtchnl2_queue_vector) * num_q; - vqv = kzalloc(buf_sz, GFP_KERNEL); - if (!vqv) + qs = idpf_alloc_queue_set(vport, num_q); + if (!qs) return -ENOMEM; - for (i = 0; i < vport->num_txq_grp; i++) { - struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; - - for (j = 0; j < tx_qgrp->num_txq; j++, k++) { - vqv[k].queue_type = - cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_TX); - vqv[k].queue_id = cpu_to_le32(tx_qgrp->txqs[j]->q_id); + for (u32 i = 0; i < vport->num_txq_grp; i++) { + const struct idpf_txq_group *tx_qgrp = &vport->txq_grps[i]; - if (idpf_is_queue_model_split(vport->txq_model)) { - vqv[k].vector_id = - cpu_to_le16(tx_qgrp->complq->q_vector->v_idx); - vqv[k].itr_idx = - cpu_to_le32(tx_qgrp->complq->q_vector->tx_itr_idx); - } else { - vqv[k].vector_id = - cpu_to_le16(tx_qgrp->txqs[j]->q_vector->v_idx); - vqv[k].itr_idx = - cpu_to_le32(tx_qgrp->txqs[j]->q_vector->tx_itr_idx); - } + for (u32 j = 0; j < tx_qgrp->num_txq; j++) { + qs->qs[k].type = VIRTCHNL2_QUEUE_TYPE_TX; + qs->qs[k++].txq = tx_qgrp->txqs[j]; } } - if (vport->num_txq != k) + if (k != vport->num_txq) return -EINVAL; - for (i = 0; i < vport->num_rxq_grp; i++) { - struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; - u16 num_rxq; + for (u32 i = 0; i < vport->num_rxq_grp; i++) { + const struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + u32 num_rxq; if (idpf_is_queue_model_split(vport->rxq_model)) num_rxq = rx_qgrp->splitq.num_rxq_sets; else num_rxq = rx_qgrp->singleq.num_rxq; - for (j = 0; j < num_rxq; j++, k++) { - struct idpf_rx_queue *rxq; + for (u32 j = 0; j < num_rxq; j++) { + qs->qs[k].type = VIRTCHNL2_QUEUE_TYPE_RX; if (idpf_is_queue_model_split(vport->rxq_model)) - rxq = &rx_qgrp->splitq.rxq_sets[j]->rxq; + qs->qs[k++].rxq = + &rx_qgrp->splitq.rxq_sets[j]->rxq; else - rxq = rx_qgrp->singleq.rxqs[j]; - - vqv[k].queue_type = - cpu_to_le32(VIRTCHNL2_QUEUE_TYPE_RX); - vqv[k].queue_id = cpu_to_le32(rxq->q_id); - vqv[k].vector_id = cpu_to_le16(rxq->q_vector->v_idx); - vqv[k].itr_idx = cpu_to_le32(rxq->q_vector->rx_itr_idx); + qs->qs[k++].rxq = rx_qgrp->singleq.rxqs[j]; } } - if (idpf_is_queue_model_split(vport->txq_model)) { - if (vport->num_rxq != k - vport->num_complq) - return -EINVAL; - } else { - if (vport->num_rxq != k - vport->num_txq) - return -EINVAL; - } + if (k != num_q) + return -EINVAL; - /* Chunk up the vector info into multiple messages */ - config_sz = sizeof(struct virtchnl2_queue_vector_maps); - chunk_sz = sizeof(struct virtchnl2_queue_vector); + return idpf_send_map_unmap_queue_set_vector_msg(qs, map); +} - num_chunks = min_t(u32, IDPF_NUM_CHUNKS_PER_MSG(config_sz, chunk_sz), - num_q); - num_msgs = DIV_ROUND_UP(num_q, num_chunks); +/** + * idpf_send_enable_queue_set_msg - send enable queues virtchnl message for + * selected queues + * @qs: set of the queues + * + * Send enable queues virtchnl message for queues contained in the @qs array. + * + * Return: 0 on success, -errno on failure. + */ +int idpf_send_enable_queue_set_msg(const struct idpf_queue_set *qs) +{ + return idpf_send_ena_dis_queue_set_msg(qs, true); +} - buf_sz = struct_size(vqvm, qv_maps, num_chunks); - vqvm = kzalloc(buf_sz, GFP_KERNEL); - if (!vqvm) - return -ENOMEM; +/** + * idpf_send_disable_queue_set_msg - send disable queues virtchnl message for + * selected queues + * @qs: set of the queues + * + * Return: 0 on success, -errno on failure. + */ +int idpf_send_disable_queue_set_msg(const struct idpf_queue_set *qs) +{ + int err; - if (map) { - xn_params.vc_op = VIRTCHNL2_OP_MAP_QUEUE_VECTOR; - xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; - } else { - xn_params.vc_op = VIRTCHNL2_OP_UNMAP_QUEUE_VECTOR; - xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC; - } + err = idpf_send_ena_dis_queue_set_msg(qs, false); + if (err) + return err; - for (i = 0, k = 0; i < num_msgs; i++) { - memset(vqvm, 0, buf_sz); - xn_params.send_buf.iov_base = vqvm; - xn_params.send_buf.iov_len = buf_sz; - vqvm->vport_id = cpu_to_le32(vport->vport_id); - vqvm->num_qv_maps = cpu_to_le16(num_chunks); - memcpy(vqvm->qv_maps, &vqv[k], chunk_sz * num_chunks); + return idpf_wait_for_marker_event_set(qs); +} - reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); - if (reply_sz < 0) - return reply_sz; +/** + * idpf_send_config_queue_set_msg - send virtchnl config queues message for + * selected queues + * @qs: set of the queues + * + * Send config queues virtchnl message for queues contained in the @qs array. + * The @qs array can contain both Rx or Tx queues. + * + * Return: 0 on success, -errno on failure. + */ +int idpf_send_config_queue_set_msg(const struct idpf_queue_set *qs) +{ + int err; - k += num_chunks; - num_q -= num_chunks; - num_chunks = min(num_chunks, num_q); - /* Recalculate buffer size */ - buf_sz = struct_size(vqvm, qv_maps, num_chunks); - } + err = idpf_send_config_tx_queue_set_msg(qs); + if (err) + return err; - return 0; + return idpf_send_config_rx_queue_set_msg(qs); } /** @@ -2125,24 +2460,12 @@ int idpf_send_enable_queues_msg(struct idpf_vport *vport) */ int idpf_send_disable_queues_msg(struct idpf_vport *vport) { - int err, i; + int err; err = idpf_send_ena_dis_queues_msg(vport, false); if (err) return err; - /* switch to poll mode as interrupts will be disabled after disable - * queues virtchnl message is sent - */ - for (i = 0; i < vport->num_txq; i++) - idpf_queue_set(POLL_MODE, vport->txqs[i]); - - /* schedule the napi to receive all the marker packets */ - local_bh_disable(); - for (i = 0; i < vport->num_q_vectors; i++) - napi_schedule(&vport->q_vectors[i].napi); - local_bh_enable(); - return idpf_wait_for_marker_event(vport); } @@ -2207,7 +2530,7 @@ int idpf_send_delete_queues_msg(struct idpf_vport *vport) num_chunks); xn_params.vc_op = VIRTCHNL2_OP_DEL_QUEUES; - xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; xn_params.send_buf.iov_base = eq; xn_params.send_buf.iov_len = buf_size; reply_sz = idpf_vc_xn_exec(vport->adapter, &xn_params); @@ -2371,7 +2694,7 @@ int idpf_send_dealloc_vectors_msg(struct idpf_adapter *adapter) xn_params.vc_op = VIRTCHNL2_OP_DEALLOC_VECTORS; xn_params.send_buf.iov_base = vcs; xn_params.send_buf.iov_len = buf_size; - xn_params.timeout_ms = IDPF_VC_XN_MIN_TIMEOUT_MSEC; + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; reply_sz = idpf_vc_xn_exec(adapter, &xn_params); if (reply_sz < 0) return reply_sz; @@ -3285,9 +3608,17 @@ int idpf_vport_alloc_vec_indexes(struct idpf_vport *vport) { struct idpf_vector_info vec_info; int num_alloc_vecs; + u32 req; vec_info.num_curr_vecs = vport->num_q_vectors; - vec_info.num_req_vecs = max(vport->num_txq, vport->num_rxq); + if (vec_info.num_curr_vecs) + vec_info.num_curr_vecs += IDPF_RESERVED_VECS; + + /* XDPSQs are all bound to the NOIRQ vector from IDPF_RESERVED_VECS */ + req = max(vport->num_txq - vport->num_xdp_txq, vport->num_rxq) + + IDPF_RESERVED_VECS; + vec_info.num_req_vecs = req; + vec_info.default_vport = vport->default_vport; vec_info.index = vport->idx; @@ -3300,7 +3631,7 @@ int idpf_vport_alloc_vec_indexes(struct idpf_vport *vport) return -EINVAL; } - vport->num_q_vectors = num_alloc_vecs; + vport->num_q_vectors = num_alloc_vecs - IDPF_RESERVED_VECS; return 0; } @@ -3765,6 +4096,16 @@ u32 idpf_get_vport_id(struct idpf_vport *vport) return le32_to_cpu(vport_msg->vport_id); } +static void idpf_set_mac_type(struct idpf_vport *vport, + struct virtchnl2_mac_addr *mac_addr) +{ + bool is_primary; + + is_primary = ether_addr_equal(vport->default_mac_addr, mac_addr->addr); + mac_addr->type = is_primary ? VIRTCHNL2_MAC_ADDR_PRIMARY : + VIRTCHNL2_MAC_ADDR_EXTRA; +} + /** * idpf_mac_filter_async_handler - Async callback for mac filters * @adapter: private data struct @@ -3894,6 +4235,7 @@ int idpf_add_del_mac_filters(struct idpf_vport *vport, list) { if (add && f->add) { ether_addr_copy(mac_addr[i].addr, f->macaddr); + idpf_set_mac_type(vport, &mac_addr[i]); i++; f->add = false; if (i == total_filters) @@ -3901,6 +4243,7 @@ int idpf_add_del_mac_filters(struct idpf_vport *vport, } if (!add && f->remove) { ether_addr_copy(mac_addr[i].addr, f->macaddr); + idpf_set_mac_type(vport, &mac_addr[i]); i++; f->remove = false; if (i == total_filters) diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h index 86f30f0db07a..eac3d15daa42 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl.h @@ -4,7 +4,8 @@ #ifndef _IDPF_VIRTCHNL_H_ #define _IDPF_VIRTCHNL_H_ -#define IDPF_VC_XN_MIN_TIMEOUT_MSEC 2000 +#include "virtchnl2.h" + #define IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC (60 * 1000) #define IDPF_VC_XN_IDX_M GENMASK(7, 0) #define IDPF_VC_XN_SALT_M GENMASK(15, 8) @@ -115,6 +116,33 @@ int idpf_recv_mb_msg(struct idpf_adapter *adapter); int idpf_send_mb_msg(struct idpf_adapter *adapter, u32 op, u16 msg_size, u8 *msg, u16 cookie); +struct idpf_queue_ptr { + enum virtchnl2_queue_type type; + union { + struct idpf_rx_queue *rxq; + struct idpf_tx_queue *txq; + struct idpf_buf_queue *bufq; + struct idpf_compl_queue *complq; + }; +}; + +struct idpf_queue_set { + struct idpf_vport *vport; + + u32 num; + struct idpf_queue_ptr qs[] __counted_by(num); +}; + +struct idpf_queue_set *idpf_alloc_queue_set(struct idpf_vport *vport, u32 num); + +int idpf_send_enable_queue_set_msg(const struct idpf_queue_set *qs); +int idpf_send_disable_queue_set_msg(const struct idpf_queue_set *qs); +int idpf_send_config_queue_set_msg(const struct idpf_queue_set *qs); + +int idpf_send_disable_queues_msg(struct idpf_vport *vport); +int idpf_send_config_queues_msg(struct idpf_vport *vport); +int idpf_send_enable_queues_msg(struct idpf_vport *vport); + void idpf_vport_init(struct idpf_vport *vport, struct idpf_vport_max_q *max_q); u32 idpf_get_vport_id(struct idpf_vport *vport); int idpf_send_create_vport_msg(struct idpf_adapter *adapter, @@ -131,9 +159,6 @@ void idpf_vport_dealloc_max_qs(struct idpf_adapter *adapter, int idpf_send_add_queues_msg(const struct idpf_vport *vport, u16 num_tx_q, u16 num_complq, u16 num_rx_q, u16 num_rx_bufq); int idpf_send_delete_queues_msg(struct idpf_vport *vport); -int idpf_send_enable_queues_msg(struct idpf_vport *vport); -int idpf_send_disable_queues_msg(struct idpf_vport *vport); -int idpf_send_config_queues_msg(struct idpf_vport *vport); int idpf_vport_alloc_vec_indexes(struct idpf_vport *vport); int idpf_get_vec_ids(struct idpf_adapter *adapter, diff --git a/drivers/net/ethernet/intel/idpf/idpf_virtchnl_ptp.c b/drivers/net/ethernet/intel/idpf/idpf_virtchnl_ptp.c index 4f1fb0cefe51..8a2e0f8c5e36 100644 --- a/drivers/net/ethernet/intel/idpf/idpf_virtchnl_ptp.c +++ b/drivers/net/ethernet/intel/idpf/idpf_virtchnl_ptp.c @@ -521,6 +521,10 @@ idpf_ptp_get_tstamp_value(struct idpf_vport *vport, list_add(&ptp_tx_tstamp->list_member, &tx_tstamp_caps->latches_free); + u64_stats_update_begin(&vport->tstamp_stats.stats_sync); + u64_stats_inc(&vport->tstamp_stats.packets); + u64_stats_update_end(&vport->tstamp_stats.stats_sync); + return 0; } diff --git a/drivers/net/ethernet/intel/idpf/xdp.c b/drivers/net/ethernet/intel/idpf/xdp.c new file mode 100644 index 000000000000..21ce25b0567f --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/xdp.c @@ -0,0 +1,486 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2025 Intel Corporation */ + +#include "idpf.h" +#include "idpf_virtchnl.h" +#include "xdp.h" +#include "xsk.h" + +static int idpf_rxq_for_each(const struct idpf_vport *vport, + int (*fn)(struct idpf_rx_queue *rxq, void *arg), + void *arg) +{ + bool splitq = idpf_is_queue_model_split(vport->rxq_model); + + if (!vport->rxq_grps) + return -ENETDOWN; + + for (u32 i = 0; i < vport->num_rxq_grp; i++) { + const struct idpf_rxq_group *rx_qgrp = &vport->rxq_grps[i]; + u32 num_rxq; + + if (splitq) + num_rxq = rx_qgrp->splitq.num_rxq_sets; + else + num_rxq = rx_qgrp->singleq.num_rxq; + + for (u32 j = 0; j < num_rxq; j++) { + struct idpf_rx_queue *q; + int err; + + if (splitq) + q = &rx_qgrp->splitq.rxq_sets[j]->rxq; + else + q = rx_qgrp->singleq.rxqs[j]; + + err = fn(q, arg); + if (err) + return err; + } + } + + return 0; +} + +static int __idpf_xdp_rxq_info_init(struct idpf_rx_queue *rxq, void *arg) +{ + const struct idpf_vport *vport = rxq->q_vector->vport; + bool split = idpf_is_queue_model_split(vport->rxq_model); + int err; + + err = __xdp_rxq_info_reg(&rxq->xdp_rxq, vport->netdev, rxq->idx, + rxq->q_vector->napi.napi_id, + rxq->rx_buf_size); + if (err) + return err; + + if (idpf_queue_has(XSK, rxq)) { + err = xdp_rxq_info_reg_mem_model(&rxq->xdp_rxq, + MEM_TYPE_XSK_BUFF_POOL, + rxq->pool); + if (err) + goto unreg; + } else { + const struct page_pool *pp; + + pp = split ? rxq->bufq_sets[0].bufq.pp : rxq->pp; + xdp_rxq_info_attach_page_pool(&rxq->xdp_rxq, pp); + } + + if (!split) + return 0; + + rxq->xdpsqs = &vport->txqs[vport->xdp_txq_offset]; + rxq->num_xdp_txq = vport->num_xdp_txq; + + return 0; + +unreg: + xdp_rxq_info_unreg(&rxq->xdp_rxq); + + return err; +} + +int idpf_xdp_rxq_info_init(struct idpf_rx_queue *rxq) +{ + return __idpf_xdp_rxq_info_init(rxq, NULL); +} + +int idpf_xdp_rxq_info_init_all(const struct idpf_vport *vport) +{ + return idpf_rxq_for_each(vport, __idpf_xdp_rxq_info_init, NULL); +} + +static int __idpf_xdp_rxq_info_deinit(struct idpf_rx_queue *rxq, void *arg) +{ + if (idpf_is_queue_model_split((size_t)arg)) { + rxq->xdpsqs = NULL; + rxq->num_xdp_txq = 0; + } + + if (!idpf_queue_has(XSK, rxq)) + xdp_rxq_info_detach_mem_model(&rxq->xdp_rxq); + + xdp_rxq_info_unreg(&rxq->xdp_rxq); + + return 0; +} + +void idpf_xdp_rxq_info_deinit(struct idpf_rx_queue *rxq, u32 model) +{ + __idpf_xdp_rxq_info_deinit(rxq, (void *)(size_t)model); +} + +void idpf_xdp_rxq_info_deinit_all(const struct idpf_vport *vport) +{ + idpf_rxq_for_each(vport, __idpf_xdp_rxq_info_deinit, + (void *)(size_t)vport->rxq_model); +} + +static int idpf_xdp_rxq_assign_prog(struct idpf_rx_queue *rxq, void *arg) +{ + struct bpf_prog *prog = arg; + struct bpf_prog *old; + + if (prog) + bpf_prog_inc(prog); + + old = rcu_replace_pointer(rxq->xdp_prog, prog, lockdep_rtnl_is_held()); + if (old) + bpf_prog_put(old); + + return 0; +} + +void idpf_xdp_copy_prog_to_rqs(const struct idpf_vport *vport, + struct bpf_prog *xdp_prog) +{ + idpf_rxq_for_each(vport, idpf_xdp_rxq_assign_prog, xdp_prog); +} + +static void idpf_xdp_tx_timer(struct work_struct *work); + +int idpf_xdpsqs_get(const struct idpf_vport *vport) +{ + struct libeth_xdpsq_timer **timers __free(kvfree) = NULL; + struct net_device *dev; + u32 sqs; + + if (!idpf_xdp_enabled(vport)) + return 0; + + timers = kvcalloc(vport->num_xdp_txq, sizeof(*timers), GFP_KERNEL); + if (!timers) + return -ENOMEM; + + for (u32 i = 0; i < vport->num_xdp_txq; i++) { + timers[i] = kzalloc_node(sizeof(*timers[i]), GFP_KERNEL, + cpu_to_mem(i)); + if (!timers[i]) { + for (int j = i - 1; j >= 0; j--) + kfree(timers[j]); + + return -ENOMEM; + } + } + + dev = vport->netdev; + sqs = vport->xdp_txq_offset; + + for (u32 i = sqs; i < vport->num_txq; i++) { + struct idpf_tx_queue *xdpsq = vport->txqs[i]; + + xdpsq->complq = xdpsq->txq_grp->complq; + kfree(xdpsq->refillq); + xdpsq->refillq = NULL; + + idpf_queue_clear(FLOW_SCH_EN, xdpsq); + idpf_queue_clear(FLOW_SCH_EN, xdpsq->complq); + idpf_queue_set(NOIRQ, xdpsq); + idpf_queue_set(XDP, xdpsq); + idpf_queue_set(XDP, xdpsq->complq); + + xdpsq->timer = timers[i - sqs]; + libeth_xdpsq_get(&xdpsq->xdp_lock, dev, vport->xdpsq_share); + libeth_xdpsq_init_timer(xdpsq->timer, xdpsq, &xdpsq->xdp_lock, + idpf_xdp_tx_timer); + + xdpsq->pending = 0; + xdpsq->xdp_tx = 0; + xdpsq->thresh = libeth_xdp_queue_threshold(xdpsq->desc_count); + } + + return 0; +} + +void idpf_xdpsqs_put(const struct idpf_vport *vport) +{ + struct net_device *dev; + u32 sqs; + + if (!idpf_xdp_enabled(vport)) + return; + + dev = vport->netdev; + sqs = vport->xdp_txq_offset; + + for (u32 i = sqs; i < vport->num_txq; i++) { + struct idpf_tx_queue *xdpsq = vport->txqs[i]; + + if (!idpf_queue_has_clear(XDP, xdpsq)) + continue; + + libeth_xdpsq_deinit_timer(xdpsq->timer); + libeth_xdpsq_put(&xdpsq->xdp_lock, dev); + + kfree(xdpsq->timer); + xdpsq->refillq = NULL; + idpf_queue_clear(NOIRQ, xdpsq); + } +} + +static int idpf_xdp_parse_cqe(const struct idpf_splitq_4b_tx_compl_desc *desc, + bool gen) +{ + u32 val; + +#ifdef __LIBETH_WORD_ACCESS + val = *(const u32 *)desc; +#else + val = ((u32)le16_to_cpu(desc->q_head_compl_tag.q_head) << 16) | + le16_to_cpu(desc->qid_comptype_gen); +#endif + if (!!(val & IDPF_TXD_COMPLQ_GEN_M) != gen) + return -ENODATA; + + if (unlikely((val & GENMASK(IDPF_TXD_COMPLQ_GEN_S - 1, 0)) != + FIELD_PREP(IDPF_TXD_COMPLQ_COMPL_TYPE_M, + IDPF_TXD_COMPLT_RS))) + return -EINVAL; + + return upper_16_bits(val); +} + +u32 idpf_xdpsq_poll(struct idpf_tx_queue *xdpsq, u32 budget) +{ + struct idpf_compl_queue *cq = xdpsq->complq; + u32 tx_ntc = xdpsq->next_to_clean; + u32 tx_cnt = xdpsq->desc_count; + u32 ntc = cq->next_to_clean; + u32 cnt = cq->desc_count; + u32 done_frames; + bool gen; + + gen = idpf_queue_has(GEN_CHK, cq); + + for (done_frames = 0; done_frames < budget; ) { + int ret; + + ret = idpf_xdp_parse_cqe(&cq->comp_4b[ntc], gen); + if (ret >= 0) { + done_frames = ret > tx_ntc ? ret - tx_ntc : + ret + tx_cnt - tx_ntc; + goto next; + } + + switch (ret) { + case -ENODATA: + goto out; + case -EINVAL: + break; + } + +next: + if (unlikely(++ntc == cnt)) { + ntc = 0; + gen = !gen; + idpf_queue_change(GEN_CHK, cq); + } + } + +out: + cq->next_to_clean = ntc; + + return done_frames; +} + +static u32 idpf_xdpsq_complete(void *_xdpsq, u32 budget) +{ + struct libeth_xdpsq_napi_stats ss = { }; + struct idpf_tx_queue *xdpsq = _xdpsq; + u32 tx_ntc = xdpsq->next_to_clean; + u32 tx_cnt = xdpsq->desc_count; + struct xdp_frame_bulk bq; + struct libeth_cq_pp cp = { + .dev = xdpsq->dev, + .bq = &bq, + .xss = &ss, + .napi = true, + }; + u32 done_frames; + + done_frames = idpf_xdpsq_poll(xdpsq, budget); + if (unlikely(!done_frames)) + return 0; + + xdp_frame_bulk_init(&bq); + + for (u32 i = 0; likely(i < done_frames); i++) { + libeth_xdp_complete_tx(&xdpsq->tx_buf[tx_ntc], &cp); + + if (unlikely(++tx_ntc == tx_cnt)) + tx_ntc = 0; + } + + xdp_flush_frame_bulk(&bq); + + xdpsq->next_to_clean = tx_ntc; + xdpsq->pending -= done_frames; + xdpsq->xdp_tx -= cp.xdp_tx; + + return done_frames; +} + +static u32 idpf_xdp_tx_prep(void *_xdpsq, struct libeth_xdpsq *sq) +{ + struct idpf_tx_queue *xdpsq = _xdpsq; + u32 free; + + libeth_xdpsq_lock(&xdpsq->xdp_lock); + + free = xdpsq->desc_count - xdpsq->pending; + if (free < xdpsq->thresh) + free += idpf_xdpsq_complete(xdpsq, xdpsq->thresh); + + *sq = (struct libeth_xdpsq){ + .sqes = xdpsq->tx_buf, + .descs = xdpsq->desc_ring, + .count = xdpsq->desc_count, + .lock = &xdpsq->xdp_lock, + .ntu = &xdpsq->next_to_use, + .pending = &xdpsq->pending, + .xdp_tx = &xdpsq->xdp_tx, + }; + + return free; +} + +LIBETH_XDP_DEFINE_START(); +LIBETH_XDP_DEFINE_TIMER(static idpf_xdp_tx_timer, idpf_xdpsq_complete); +LIBETH_XDP_DEFINE_FLUSH_TX(idpf_xdp_tx_flush_bulk, idpf_xdp_tx_prep, + idpf_xdp_tx_xmit); +LIBETH_XDP_DEFINE_FLUSH_XMIT(static idpf_xdp_xmit_flush_bulk, idpf_xdp_tx_prep, + idpf_xdp_tx_xmit); +LIBETH_XDP_DEFINE_END(); + +int idpf_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags) +{ + const struct idpf_netdev_priv *np = netdev_priv(dev); + const struct idpf_vport *vport = np->vport; + + if (unlikely(!netif_carrier_ok(dev) || !vport->link_up)) + return -ENETDOWN; + + return libeth_xdp_xmit_do_bulk(dev, n, frames, flags, + &vport->txqs[vport->xdp_txq_offset], + vport->num_xdp_txq, + idpf_xdp_xmit_flush_bulk, + idpf_xdp_tx_finalize); +} + +static int idpf_xdpmo_rx_hash(const struct xdp_md *ctx, u32 *hash, + enum xdp_rss_hash_type *rss_type) +{ + const struct libeth_xdp_buff *xdp = (typeof(xdp))ctx; + struct idpf_xdp_rx_desc desc __uninitialized; + const struct idpf_rx_queue *rxq; + struct libeth_rx_pt pt; + + rxq = libeth_xdp_buff_to_rq(xdp, typeof(*rxq), xdp_rxq); + + idpf_xdp_get_qw0(&desc, xdp->desc); + + pt = rxq->rx_ptype_lkup[idpf_xdp_rx_pt(&desc)]; + if (!libeth_rx_pt_has_hash(rxq->xdp_rxq.dev, pt)) + return -ENODATA; + + idpf_xdp_get_qw2(&desc, xdp->desc); + + return libeth_xdpmo_rx_hash(hash, rss_type, idpf_xdp_rx_hash(&desc), + pt); +} + +static const struct xdp_metadata_ops idpf_xdpmo = { + .xmo_rx_hash = idpf_xdpmo_rx_hash, +}; + +void idpf_xdp_set_features(const struct idpf_vport *vport) +{ + if (!idpf_is_queue_model_split(vport->rxq_model)) + return; + + libeth_xdp_set_features_noredir(vport->netdev, &idpf_xdpmo, + idpf_get_max_tx_bufs(vport->adapter), + libeth_xsktmo); +} + +static int idpf_xdp_setup_prog(struct idpf_vport *vport, + const struct netdev_bpf *xdp) +{ + const struct idpf_netdev_priv *np = netdev_priv(vport->netdev); + struct bpf_prog *old, *prog = xdp->prog; + struct idpf_vport_config *cfg; + int ret; + + cfg = vport->adapter->vport_config[vport->idx]; + + if (test_bit(IDPF_REMOVE_IN_PROG, vport->adapter->flags) || + !test_bit(IDPF_VPORT_REG_NETDEV, cfg->flags) || + !!vport->xdp_prog == !!prog) { + if (np->state == __IDPF_VPORT_UP) + idpf_xdp_copy_prog_to_rqs(vport, prog); + + old = xchg(&vport->xdp_prog, prog); + if (old) + bpf_prog_put(old); + + cfg->user_config.xdp_prog = prog; + + return 0; + } + + if (!vport->num_xdp_txq && vport->num_txq == cfg->max_q.max_txq) { + NL_SET_ERR_MSG_MOD(xdp->extack, + "No Tx queues available for XDP, please decrease the number of regular SQs"); + return -ENOSPC; + } + + old = cfg->user_config.xdp_prog; + cfg->user_config.xdp_prog = prog; + + ret = idpf_initiate_soft_reset(vport, IDPF_SR_Q_CHANGE); + if (ret) { + NL_SET_ERR_MSG_MOD(xdp->extack, + "Could not reopen the vport after XDP setup"); + + cfg->user_config.xdp_prog = old; + old = prog; + } + + if (old) + bpf_prog_put(old); + + libeth_xdp_set_redirect(vport->netdev, vport->xdp_prog); + + return ret; +} + +int idpf_xdp(struct net_device *dev, struct netdev_bpf *xdp) +{ + struct idpf_vport *vport; + int ret; + + idpf_vport_ctrl_lock(dev); + vport = idpf_netdev_to_vport(dev); + + if (!idpf_is_queue_model_split(vport->txq_model)) + goto notsupp; + + switch (xdp->command) { + case XDP_SETUP_PROG: + ret = idpf_xdp_setup_prog(vport, xdp); + break; + case XDP_SETUP_XSK_POOL: + ret = idpf_xsk_pool_setup(vport, xdp); + break; + default: +notsupp: + ret = -EOPNOTSUPP; + break; + } + + idpf_vport_ctrl_unlock(dev); + + return ret; +} diff --git a/drivers/net/ethernet/intel/idpf/xdp.h b/drivers/net/ethernet/intel/idpf/xdp.h new file mode 100644 index 000000000000..479f5ef3c604 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/xdp.h @@ -0,0 +1,175 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2025 Intel Corporation */ + +#ifndef _IDPF_XDP_H_ +#define _IDPF_XDP_H_ + +#include <net/libeth/xdp.h> + +#include "idpf_txrx.h" + +int idpf_xdp_rxq_info_init(struct idpf_rx_queue *rxq); +int idpf_xdp_rxq_info_init_all(const struct idpf_vport *vport); +void idpf_xdp_rxq_info_deinit(struct idpf_rx_queue *rxq, u32 model); +void idpf_xdp_rxq_info_deinit_all(const struct idpf_vport *vport); +void idpf_xdp_copy_prog_to_rqs(const struct idpf_vport *vport, + struct bpf_prog *xdp_prog); + +int idpf_xdpsqs_get(const struct idpf_vport *vport); +void idpf_xdpsqs_put(const struct idpf_vport *vport); + +u32 idpf_xdpsq_poll(struct idpf_tx_queue *xdpsq, u32 budget); +bool idpf_xdp_tx_flush_bulk(struct libeth_xdp_tx_bulk *bq, u32 flags); + +/** + * idpf_xdp_tx_xmit - produce a single HW Tx descriptor out of XDP desc + * @desc: XDP descriptor to pull the DMA address and length from + * @i: descriptor index on the queue to fill + * @sq: XDP queue to produce the HW Tx descriptor on + * @priv: &xsk_tx_metadata_ops on XSk xmit or %NULL + */ +static inline void idpf_xdp_tx_xmit(struct libeth_xdp_tx_desc desc, u32 i, + const struct libeth_xdpsq *sq, u64 priv) +{ + struct idpf_flex_tx_desc *tx_desc = sq->descs; + u32 cmd; + + cmd = FIELD_PREP(IDPF_FLEX_TXD_QW1_DTYPE_M, + IDPF_TX_DESC_DTYPE_FLEX_L2TAG1_L2TAG2); + if (desc.flags & LIBETH_XDP_TX_LAST) + cmd |= FIELD_PREP(IDPF_FLEX_TXD_QW1_CMD_M, + IDPF_TX_DESC_CMD_EOP); + if (priv && (desc.flags & LIBETH_XDP_TX_CSUM)) + cmd |= FIELD_PREP(IDPF_FLEX_TXD_QW1_CMD_M, + IDPF_TX_FLEX_DESC_CMD_CS_EN); + + tx_desc = &tx_desc[i]; + tx_desc->buf_addr = cpu_to_le64(desc.addr); +#ifdef __LIBETH_WORD_ACCESS + *(u64 *)&tx_desc->qw1 = ((u64)desc.len << 48) | cmd; +#else + tx_desc->qw1.buf_size = cpu_to_le16(desc.len); + tx_desc->qw1.cmd_dtype = cpu_to_le16(cmd); +#endif +} + +static inline void idpf_xdpsq_set_rs(const struct idpf_tx_queue *xdpsq) +{ + u32 ntu, cmd; + + ntu = xdpsq->next_to_use; + if (unlikely(!ntu)) + ntu = xdpsq->desc_count; + + cmd = FIELD_PREP(IDPF_FLEX_TXD_QW1_CMD_M, IDPF_TX_DESC_CMD_RS); +#ifdef __LIBETH_WORD_ACCESS + *(u64 *)&xdpsq->flex_tx[ntu - 1].q.qw1 |= cmd; +#else + xdpsq->flex_tx[ntu - 1].q.qw1.cmd_dtype |= cpu_to_le16(cmd); +#endif +} + +static inline void idpf_xdpsq_update_tail(const struct idpf_tx_queue *xdpsq) +{ + dma_wmb(); + writel_relaxed(xdpsq->next_to_use, xdpsq->tail); +} + +/** + * idpf_xdp_tx_finalize - finalize sending over XDPSQ + * @_xdpsq: XDP Tx queue + * @sent: whether any frames were sent + * @flush: whether to update RS bit and the tail register + * + * Set the RS bit ("end of batch"), bump the tail, and queue the cleanup timer. + * To be called after a NAPI polling loop, at the end of .ndo_xdp_xmit() etc. + */ +static inline void idpf_xdp_tx_finalize(void *_xdpsq, bool sent, bool flush) +{ + struct idpf_tx_queue *xdpsq = _xdpsq; + + if ((!flush || unlikely(!sent)) && + likely(xdpsq->desc_count - 1 != xdpsq->pending)) + return; + + libeth_xdpsq_lock(&xdpsq->xdp_lock); + + idpf_xdpsq_set_rs(xdpsq); + idpf_xdpsq_update_tail(xdpsq); + + libeth_xdpsq_queue_timer(xdpsq->timer); + + libeth_xdpsq_unlock(&xdpsq->xdp_lock); +} + +struct idpf_xdp_rx_desc { + aligned_u64 qw0; +#define IDPF_XDP_RX_BUFQ BIT_ULL(47) +#define IDPF_XDP_RX_GEN BIT_ULL(46) +#define IDPF_XDP_RX_LEN GENMASK_ULL(45, 32) +#define IDPF_XDP_RX_PT GENMASK_ULL(25, 16) + + aligned_u64 qw1; +#define IDPF_XDP_RX_BUF GENMASK_ULL(47, 32) +#define IDPF_XDP_RX_EOP BIT_ULL(1) + + aligned_u64 qw2; +#define IDPF_XDP_RX_HASH GENMASK_ULL(31, 0) + + aligned_u64 qw3; +} __aligned(4 * sizeof(u64)); +static_assert(sizeof(struct idpf_xdp_rx_desc) == + sizeof(struct virtchnl2_rx_flex_desc_adv_nic_3)); + +#define idpf_xdp_rx_bufq(desc) !!((desc)->qw0 & IDPF_XDP_RX_BUFQ) +#define idpf_xdp_rx_gen(desc) !!((desc)->qw0 & IDPF_XDP_RX_GEN) +#define idpf_xdp_rx_len(desc) FIELD_GET(IDPF_XDP_RX_LEN, (desc)->qw0) +#define idpf_xdp_rx_pt(desc) FIELD_GET(IDPF_XDP_RX_PT, (desc)->qw0) +#define idpf_xdp_rx_buf(desc) FIELD_GET(IDPF_XDP_RX_BUF, (desc)->qw1) +#define idpf_xdp_rx_eop(desc) !!((desc)->qw1 & IDPF_XDP_RX_EOP) +#define idpf_xdp_rx_hash(desc) FIELD_GET(IDPF_XDP_RX_HASH, (desc)->qw2) + +static inline void +idpf_xdp_get_qw0(struct idpf_xdp_rx_desc *desc, + const struct virtchnl2_rx_flex_desc_adv_nic_3 *rxd) +{ +#ifdef __LIBETH_WORD_ACCESS + desc->qw0 = ((const typeof(desc))rxd)->qw0; +#else + desc->qw0 = ((u64)le16_to_cpu(rxd->pktlen_gen_bufq_id) << 32) | + ((u64)le16_to_cpu(rxd->ptype_err_fflags0) << 16); +#endif +} + +static inline void +idpf_xdp_get_qw1(struct idpf_xdp_rx_desc *desc, + const struct virtchnl2_rx_flex_desc_adv_nic_3 *rxd) +{ +#ifdef __LIBETH_WORD_ACCESS + desc->qw1 = ((const typeof(desc))rxd)->qw1; +#else + desc->qw1 = ((u64)le16_to_cpu(rxd->buf_id) << 32) | + rxd->status_err0_qw1; +#endif +} + +static inline void +idpf_xdp_get_qw2(struct idpf_xdp_rx_desc *desc, + const struct virtchnl2_rx_flex_desc_adv_nic_3 *rxd) +{ +#ifdef __LIBETH_WORD_ACCESS + desc->qw2 = ((const typeof(desc))rxd)->qw2; +#else + desc->qw2 = ((u64)rxd->hash3 << 24) | + ((u64)rxd->ff2_mirrid_hash2.hash2 << 16) | + le16_to_cpu(rxd->hash1); +#endif +} + +void idpf_xdp_set_features(const struct idpf_vport *vport); + +int idpf_xdp(struct net_device *dev, struct netdev_bpf *xdp); +int idpf_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, + u32 flags); + +#endif /* _IDPF_XDP_H_ */ diff --git a/drivers/net/ethernet/intel/idpf/xsk.c b/drivers/net/ethernet/intel/idpf/xsk.c new file mode 100644 index 000000000000..fd2cc43ab43c --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/xsk.c @@ -0,0 +1,633 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2025 Intel Corporation */ + +#include <net/libeth/xsk.h> + +#include "idpf.h" +#include "xdp.h" +#include "xsk.h" + +static void idpf_xsk_tx_timer(struct work_struct *work); + +static void idpf_xsk_setup_rxq(const struct idpf_vport *vport, + struct idpf_rx_queue *rxq) +{ + struct xsk_buff_pool *pool; + + pool = xsk_get_pool_from_qid(vport->netdev, rxq->idx); + if (!pool || !pool->dev || !xsk_buff_can_alloc(pool, 1)) + return; + + rxq->pool = pool; + + idpf_queue_set(XSK, rxq); +} + +static void idpf_xsk_setup_bufq(const struct idpf_vport *vport, + struct idpf_buf_queue *bufq) +{ + struct xsk_buff_pool *pool; + u32 qid = U32_MAX; + + for (u32 i = 0; i < vport->num_rxq_grp; i++) { + const struct idpf_rxq_group *grp = &vport->rxq_grps[i]; + + for (u32 j = 0; j < vport->num_bufqs_per_qgrp; j++) { + if (&grp->splitq.bufq_sets[j].bufq == bufq) { + qid = grp->splitq.rxq_sets[0]->rxq.idx; + goto setup; + } + } + } + +setup: + pool = xsk_get_pool_from_qid(vport->netdev, qid); + if (!pool || !pool->dev || !xsk_buff_can_alloc(pool, 1)) + return; + + bufq->pool = pool; + + idpf_queue_set(XSK, bufq); +} + +static void idpf_xsk_setup_txq(const struct idpf_vport *vport, + struct idpf_tx_queue *txq) +{ + struct xsk_buff_pool *pool; + u32 qid; + + idpf_queue_clear(XSK, txq); + + if (!idpf_queue_has(XDP, txq)) + return; + + qid = txq->idx - vport->xdp_txq_offset; + + pool = xsk_get_pool_from_qid(vport->netdev, qid); + if (!pool || !pool->dev) + return; + + txq->pool = pool; + libeth_xdpsq_init_timer(txq->timer, txq, &txq->xdp_lock, + idpf_xsk_tx_timer); + + idpf_queue_assign(NOIRQ, txq, xsk_uses_need_wakeup(pool)); + idpf_queue_set(XSK, txq); +} + +static void idpf_xsk_setup_complq(const struct idpf_vport *vport, + struct idpf_compl_queue *complq) +{ + const struct xsk_buff_pool *pool; + u32 qid; + + idpf_queue_clear(XSK, complq); + + if (!idpf_queue_has(XDP, complq)) + return; + + qid = complq->txq_grp->txqs[0]->idx - vport->xdp_txq_offset; + + pool = xsk_get_pool_from_qid(vport->netdev, qid); + if (!pool || !pool->dev) + return; + + idpf_queue_set(XSK, complq); +} + +void idpf_xsk_setup_queue(const struct idpf_vport *vport, void *q, + enum virtchnl2_queue_type type) +{ + if (!idpf_xdp_enabled(vport)) + return; + + switch (type) { + case VIRTCHNL2_QUEUE_TYPE_RX: + idpf_xsk_setup_rxq(vport, q); + break; + case VIRTCHNL2_QUEUE_TYPE_RX_BUFFER: + idpf_xsk_setup_bufq(vport, q); + break; + case VIRTCHNL2_QUEUE_TYPE_TX: + idpf_xsk_setup_txq(vport, q); + break; + case VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION: + idpf_xsk_setup_complq(vport, q); + break; + default: + break; + } +} + +void idpf_xsk_clear_queue(void *q, enum virtchnl2_queue_type type) +{ + struct idpf_compl_queue *complq; + struct idpf_buf_queue *bufq; + struct idpf_rx_queue *rxq; + struct idpf_tx_queue *txq; + + switch (type) { + case VIRTCHNL2_QUEUE_TYPE_RX: + rxq = q; + if (!idpf_queue_has_clear(XSK, rxq)) + return; + + rxq->pool = NULL; + break; + case VIRTCHNL2_QUEUE_TYPE_RX_BUFFER: + bufq = q; + if (!idpf_queue_has_clear(XSK, bufq)) + return; + + bufq->pool = NULL; + break; + case VIRTCHNL2_QUEUE_TYPE_TX: + txq = q; + if (!idpf_queue_has_clear(XSK, txq)) + return; + + idpf_queue_set(NOIRQ, txq); + txq->dev = txq->netdev->dev.parent; + break; + case VIRTCHNL2_QUEUE_TYPE_TX_COMPLETION: + complq = q; + idpf_queue_clear(XSK, complq); + break; + default: + break; + } +} + +void idpf_xsk_init_wakeup(struct idpf_q_vector *qv) +{ + libeth_xsk_init_wakeup(&qv->csd, &qv->napi); +} + +void idpf_xsksq_clean(struct idpf_tx_queue *xdpsq) +{ + struct libeth_xdpsq_napi_stats ss = { }; + u32 ntc = xdpsq->next_to_clean; + struct xdp_frame_bulk bq; + struct libeth_cq_pp cp = { + .dev = xdpsq->pool->dev, + .bq = &bq, + .xss = &ss, + }; + u32 xsk_frames = 0; + + xdp_frame_bulk_init(&bq); + + while (ntc != xdpsq->next_to_use) { + struct libeth_sqe *sqe = &xdpsq->tx_buf[ntc]; + + if (sqe->type) + libeth_xdp_complete_tx(sqe, &cp); + else + xsk_frames++; + + if (unlikely(++ntc == xdpsq->desc_count)) + ntc = 0; + } + + xdp_flush_frame_bulk(&bq); + + if (xsk_frames) + xsk_tx_completed(xdpsq->pool, xsk_frames); +} + +static noinline u32 idpf_xsksq_complete_slow(struct idpf_tx_queue *xdpsq, + u32 done) +{ + struct libeth_xdpsq_napi_stats ss = { }; + u32 ntc = xdpsq->next_to_clean; + u32 cnt = xdpsq->desc_count; + struct xdp_frame_bulk bq; + struct libeth_cq_pp cp = { + .dev = xdpsq->pool->dev, + .bq = &bq, + .xss = &ss, + .napi = true, + }; + u32 xsk_frames = 0; + + xdp_frame_bulk_init(&bq); + + for (u32 i = 0; likely(i < done); i++) { + struct libeth_sqe *sqe = &xdpsq->tx_buf[ntc]; + + if (sqe->type) + libeth_xdp_complete_tx(sqe, &cp); + else + xsk_frames++; + + if (unlikely(++ntc == cnt)) + ntc = 0; + } + + xdp_flush_frame_bulk(&bq); + + xdpsq->next_to_clean = ntc; + xdpsq->xdp_tx -= cp.xdp_tx; + + return xsk_frames; +} + +static __always_inline u32 idpf_xsksq_complete(void *_xdpsq, u32 budget) +{ + struct idpf_tx_queue *xdpsq = _xdpsq; + u32 tx_ntc = xdpsq->next_to_clean; + u32 tx_cnt = xdpsq->desc_count; + u32 done_frames; + u32 xsk_frames; + + done_frames = idpf_xdpsq_poll(xdpsq, budget); + if (unlikely(!done_frames)) + return 0; + + if (likely(!xdpsq->xdp_tx)) { + tx_ntc += done_frames; + if (tx_ntc >= tx_cnt) + tx_ntc -= tx_cnt; + + xdpsq->next_to_clean = tx_ntc; + xsk_frames = done_frames; + + goto finalize; + } + + xsk_frames = idpf_xsksq_complete_slow(xdpsq, done_frames); + if (xsk_frames) +finalize: + xsk_tx_completed(xdpsq->pool, xsk_frames); + + xdpsq->pending -= done_frames; + + return done_frames; +} + +static u32 idpf_xsk_tx_prep(void *_xdpsq, struct libeth_xdpsq *sq) +{ + struct idpf_tx_queue *xdpsq = _xdpsq; + u32 free; + + libeth_xdpsq_lock(&xdpsq->xdp_lock); + + free = xdpsq->desc_count - xdpsq->pending; + if (free < xdpsq->thresh) + free += idpf_xsksq_complete(xdpsq, xdpsq->thresh); + + *sq = (struct libeth_xdpsq){ + .pool = xdpsq->pool, + .sqes = xdpsq->tx_buf, + .descs = xdpsq->desc_ring, + .count = xdpsq->desc_count, + .lock = &xdpsq->xdp_lock, + .ntu = &xdpsq->next_to_use, + .pending = &xdpsq->pending, + .xdp_tx = &xdpsq->xdp_tx, + }; + + return free; +} + +static u32 idpf_xsk_xmit_prep(void *_xdpsq, struct libeth_xdpsq *sq) +{ + struct idpf_tx_queue *xdpsq = _xdpsq; + + *sq = (struct libeth_xdpsq){ + .pool = xdpsq->pool, + .sqes = xdpsq->tx_buf, + .descs = xdpsq->desc_ring, + .count = xdpsq->desc_count, + .lock = &xdpsq->xdp_lock, + .ntu = &xdpsq->next_to_use, + .pending = &xdpsq->pending, + }; + + /* + * The queue is cleaned, the budget is already known, optimize out + * the second min() by passing the type limit. + */ + return U32_MAX; +} + +bool idpf_xsk_xmit(struct idpf_tx_queue *xsksq) +{ + u32 free; + + libeth_xdpsq_lock(&xsksq->xdp_lock); + + free = xsksq->desc_count - xsksq->pending; + if (free < xsksq->thresh) + free += idpf_xsksq_complete(xsksq, xsksq->thresh); + + return libeth_xsk_xmit_do_bulk(xsksq->pool, xsksq, + min(free - 1, xsksq->thresh), + libeth_xsktmo, idpf_xsk_xmit_prep, + idpf_xdp_tx_xmit, idpf_xdp_tx_finalize); +} + +LIBETH_XDP_DEFINE_START(); +LIBETH_XDP_DEFINE_TIMER(static idpf_xsk_tx_timer, idpf_xsksq_complete); +LIBETH_XSK_DEFINE_FLUSH_TX(static idpf_xsk_tx_flush_bulk, idpf_xsk_tx_prep, + idpf_xdp_tx_xmit); +LIBETH_XSK_DEFINE_RUN(static idpf_xsk_run_pass, idpf_xsk_run_prog, + idpf_xsk_tx_flush_bulk, idpf_rx_process_skb_fields); +LIBETH_XSK_DEFINE_FINALIZE(static idpf_xsk_finalize_rx, idpf_xsk_tx_flush_bulk, + idpf_xdp_tx_finalize); +LIBETH_XDP_DEFINE_END(); + +static void idpf_xskfqe_init(const struct libeth_xskfq_fp *fq, u32 i) +{ + struct virtchnl2_splitq_rx_buf_desc *desc = fq->descs; + + desc = &desc[i]; +#ifdef __LIBETH_WORD_ACCESS + *(u64 *)&desc->qword0 = i; +#else + desc->qword0.buf_id = cpu_to_le16(i); +#endif + desc->pkt_addr = cpu_to_le64(libeth_xsk_buff_xdp_get_dma(fq->fqes[i])); +} + +static bool idpf_xskfq_refill_thresh(struct idpf_buf_queue *bufq, u32 count) +{ + struct libeth_xskfq_fp fq = { + .pool = bufq->pool, + .fqes = bufq->xsk_buf, + .descs = bufq->split_buf, + .ntu = bufq->next_to_use, + .count = bufq->desc_count, + }; + u32 done; + + done = libeth_xskfqe_alloc(&fq, count, idpf_xskfqe_init); + writel(fq.ntu, bufq->tail); + + bufq->next_to_use = fq.ntu; + bufq->pending -= done; + + return done == count; +} + +static bool idpf_xskfq_refill(struct idpf_buf_queue *bufq) +{ + u32 count, rx_thresh = bufq->thresh; + + count = ALIGN_DOWN(bufq->pending - 1, rx_thresh); + + for (u32 i = 0; i < count; i += rx_thresh) { + if (unlikely(!idpf_xskfq_refill_thresh(bufq, rx_thresh))) + return false; + } + + return true; +} + +int idpf_xskfq_init(struct idpf_buf_queue *bufq) +{ + struct libeth_xskfq fq = { + .pool = bufq->pool, + .count = bufq->desc_count, + .nid = idpf_q_vector_to_mem(bufq->q_vector), + }; + int ret; + + ret = libeth_xskfq_create(&fq); + if (ret) + return ret; + + bufq->xsk_buf = fq.fqes; + bufq->pending = fq.pending; + bufq->thresh = fq.thresh; + bufq->rx_buf_size = fq.buf_len; + + if (!idpf_xskfq_refill(bufq)) + netdev_err(bufq->pool->netdev, + "failed to allocate XSk buffers for qid %d\n", + bufq->pool->queue_id); + + bufq->next_to_alloc = bufq->next_to_use; + + idpf_queue_clear(HSPLIT_EN, bufq); + bufq->rx_hbuf_size = 0; + + return 0; +} + +void idpf_xskfq_rel(struct idpf_buf_queue *bufq) +{ + struct libeth_xskfq fq = { + .fqes = bufq->xsk_buf, + }; + + libeth_xskfq_destroy(&fq); + + bufq->rx_buf_size = fq.buf_len; + bufq->thresh = fq.thresh; + bufq->pending = fq.pending; +} + +struct idpf_xskfq_refill_set { + struct { + struct idpf_buf_queue *q; + u32 buf_id; + u32 pending; + } bufqs[IDPF_MAX_BUFQS_PER_RXQ_GRP]; +}; + +static bool idpf_xskfq_refill_set(const struct idpf_xskfq_refill_set *set) +{ + bool ret = true; + + for (u32 i = 0; i < ARRAY_SIZE(set->bufqs); i++) { + struct idpf_buf_queue *bufq = set->bufqs[i].q; + u32 ntc; + + if (!bufq) + continue; + + ntc = set->bufqs[i].buf_id; + if (unlikely(++ntc == bufq->desc_count)) + ntc = 0; + + bufq->next_to_clean = ntc; + bufq->pending += set->bufqs[i].pending; + + if (bufq->pending > bufq->thresh) + ret &= idpf_xskfq_refill(bufq); + } + + return ret; +} + +int idpf_xskrq_poll(struct idpf_rx_queue *rxq, u32 budget) +{ + struct idpf_xskfq_refill_set set = { }; + struct libeth_rq_napi_stats rs = { }; + bool wake, gen, fail = false; + u32 ntc = rxq->next_to_clean; + struct libeth_xdp_buff *xdp; + LIBETH_XDP_ONSTACK_BULK(bq); + u32 cnt = rxq->desc_count; + + wake = xsk_uses_need_wakeup(rxq->pool); + if (wake) + xsk_clear_rx_need_wakeup(rxq->pool); + + gen = idpf_queue_has(GEN_CHK, rxq); + + libeth_xsk_tx_init_bulk(&bq, rxq->xdp_prog, rxq->xdp_rxq.dev, + rxq->xdpsqs, rxq->num_xdp_txq); + xdp = rxq->xsk; + + while (likely(rs.packets < budget)) { + const struct virtchnl2_rx_flex_desc_adv_nic_3 *rx_desc; + struct idpf_xdp_rx_desc desc __uninitialized; + struct idpf_buf_queue *bufq; + u32 bufq_id, buf_id; + + rx_desc = &rxq->rx[ntc].flex_adv_nic_3_wb; + + idpf_xdp_get_qw0(&desc, rx_desc); + if (idpf_xdp_rx_gen(&desc) != gen) + break; + + dma_rmb(); + + bufq_id = idpf_xdp_rx_bufq(&desc); + bufq = set.bufqs[bufq_id].q; + if (!bufq) { + bufq = &rxq->bufq_sets[bufq_id].bufq; + set.bufqs[bufq_id].q = bufq; + } + + idpf_xdp_get_qw1(&desc, rx_desc); + buf_id = idpf_xdp_rx_buf(&desc); + + set.bufqs[bufq_id].buf_id = buf_id; + set.bufqs[bufq_id].pending++; + + xdp = libeth_xsk_process_buff(xdp, bufq->xsk_buf[buf_id], + idpf_xdp_rx_len(&desc)); + + if (unlikely(++ntc == cnt)) { + ntc = 0; + gen = !gen; + idpf_queue_change(GEN_CHK, rxq); + } + + if (!idpf_xdp_rx_eop(&desc) || unlikely(!xdp)) + continue; + + fail = !idpf_xsk_run_pass(xdp, &bq, rxq->napi, &rs, rx_desc); + xdp = NULL; + + if (fail) + break; + } + + idpf_xsk_finalize_rx(&bq); + + rxq->next_to_clean = ntc; + rxq->xsk = xdp; + + fail |= !idpf_xskfq_refill_set(&set); + + u64_stats_update_begin(&rxq->stats_sync); + u64_stats_add(&rxq->q_stats.packets, rs.packets); + u64_stats_add(&rxq->q_stats.bytes, rs.bytes); + u64_stats_update_end(&rxq->stats_sync); + + if (!wake) + return unlikely(fail) ? budget : rs.packets; + + if (unlikely(fail)) + xsk_set_rx_need_wakeup(rxq->pool); + + return rs.packets; +} + +int idpf_xsk_pool_setup(struct idpf_vport *vport, struct netdev_bpf *bpf) +{ + struct xsk_buff_pool *pool = bpf->xsk.pool; + u32 qid = bpf->xsk.queue_id; + bool restart; + int ret; + + if (pool && !IS_ALIGNED(xsk_pool_get_rx_frame_size(pool), + LIBETH_RX_BUF_STRIDE)) { + NL_SET_ERR_MSG_FMT_MOD(bpf->extack, + "%s: HW doesn't support frames sizes not aligned to %u (qid %u: %u)", + netdev_name(vport->netdev), + LIBETH_RX_BUF_STRIDE, qid, + xsk_pool_get_rx_frame_size(pool)); + return -EINVAL; + } + + restart = idpf_xdp_enabled(vport) && netif_running(vport->netdev); + if (!restart) + goto pool; + + ret = idpf_qp_switch(vport, qid, false); + if (ret) { + NL_SET_ERR_MSG_FMT_MOD(bpf->extack, + "%s: failed to disable queue pair %u: %pe", + netdev_name(vport->netdev), qid, + ERR_PTR(ret)); + return ret; + } + +pool: + ret = libeth_xsk_setup_pool(vport->netdev, qid, pool); + if (ret) { + NL_SET_ERR_MSG_FMT_MOD(bpf->extack, + "%s: failed to configure XSk pool for pair %u: %pe", + netdev_name(vport->netdev), qid, + ERR_PTR(ret)); + return ret; + } + + if (!restart) + return 0; + + ret = idpf_qp_switch(vport, qid, true); + if (ret) { + NL_SET_ERR_MSG_FMT_MOD(bpf->extack, + "%s: failed to enable queue pair %u: %pe", + netdev_name(vport->netdev), qid, + ERR_PTR(ret)); + goto err_dis; + } + + return 0; + +err_dis: + libeth_xsk_setup_pool(vport->netdev, qid, false); + + return ret; +} + +int idpf_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags) +{ + const struct idpf_netdev_priv *np = netdev_priv(dev); + const struct idpf_vport *vport = np->vport; + struct idpf_q_vector *q_vector; + + if (unlikely(idpf_vport_ctrl_is_locked(dev))) + return -EBUSY; + + if (unlikely(!vport->link_up)) + return -ENETDOWN; + + if (unlikely(!vport->num_xdp_txq)) + return -ENXIO; + + q_vector = idpf_find_rxq_vec(vport, qid); + if (unlikely(!q_vector->xsksq)) + return -ENXIO; + + libeth_xsk_wakeup(&q_vector->csd, qid); + + return 0; +} diff --git a/drivers/net/ethernet/intel/idpf/xsk.h b/drivers/net/ethernet/intel/idpf/xsk.h new file mode 100644 index 000000000000..b622d08c03e8 --- /dev/null +++ b/drivers/net/ethernet/intel/idpf/xsk.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (C) 2025 Intel Corporation */ + +#ifndef _IDPF_XSK_H_ +#define _IDPF_XSK_H_ + +#include <linux/types.h> + +enum virtchnl2_queue_type; +struct idpf_buf_queue; +struct idpf_q_vector; +struct idpf_rx_queue; +struct idpf_tx_queue; +struct idpf_vport; +struct net_device; +struct netdev_bpf; + +void idpf_xsk_setup_queue(const struct idpf_vport *vport, void *q, + enum virtchnl2_queue_type type); +void idpf_xsk_clear_queue(void *q, enum virtchnl2_queue_type type); +void idpf_xsk_init_wakeup(struct idpf_q_vector *qv); + +int idpf_xskfq_init(struct idpf_buf_queue *bufq); +void idpf_xskfq_rel(struct idpf_buf_queue *bufq); +void idpf_xsksq_clean(struct idpf_tx_queue *xdpq); + +int idpf_xskrq_poll(struct idpf_rx_queue *rxq, u32 budget); +bool idpf_xsk_xmit(struct idpf_tx_queue *xsksq); + +int idpf_xsk_pool_setup(struct idpf_vport *vport, struct netdev_bpf *xdp); +int idpf_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags); + +#endif /* !_IDPF_XSK_H_ */ |