From abddafd4585cc825d454da3cf308ad1226f6c554 Mon Sep 17 00:00:00 2001 From: Grzegorz Siwik Date: Fri, 12 Aug 2022 15:25:49 +0200 Subject: ice: Fix clearing of promisc mode with bridge over bond When at least two interfaces are bonded and a bridge is enabled on the bond, an error can occur when the bridge is removed and re-added. The reason for the error is because promiscuous mode was not fully cleared from the VLAN VSI in the hardware. With this change, promiscuous mode is properly removed when the bridge disconnects from bonding. [ 1033.676359] bond1: link status definitely down for interface enp95s0f0, disabling it [ 1033.676366] bond1: making interface enp175s0f0 the new active one [ 1033.676369] device enp95s0f0 left promiscuous mode [ 1033.676522] device enp175s0f0 entered promiscuous mode [ 1033.676901] ice 0000:af:00.0 enp175s0f0: Error setting Multicast promiscuous mode on VSI 6 [ 1041.795662] ice 0000:af:00.0 enp175s0f0: Error setting Multicast promiscuous mode on VSI 6 [ 1041.944826] bond1: link status definitely down for interface enp175s0f0, disabling it [ 1041.944874] device enp175s0f0 left promiscuous mode [ 1041.944918] bond1: now running without any active interface! Fixes: c31af68a1b94 ("ice: Add outer_vlan_ops and VSI specific VLAN ops implementations") Co-developed-by: Jesse Brandeburg Signed-off-by: Jesse Brandeburg Signed-off-by: Grzegorz Siwik Link: https://lore.kernel.org/all/CAK8fFZ7m-KR57M_rYX6xZN39K89O=LGooYkKsu6HKt0Bs+x6xQ@mail.gmail.com/ Tested-by: Jaroslav Pulchart Tested-by: Igor Raits Tested-by: Gurucharan (A Contingent worker at Intel) Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice_main.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet/intel/ice/ice_main.c') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index eb40526ee179..4ecaf40cf946 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -267,8 +267,10 @@ static int ice_set_promisc(struct ice_vsi *vsi, u8 promisc_m) status = ice_fltr_set_vsi_promisc(&vsi->back->hw, vsi->idx, promisc_m, 0); } + if (status && status != -EEXIST) + return status; - return status; + return 0; } /** @@ -3573,6 +3575,14 @@ ice_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) while (test_and_set_bit(ICE_CFG_BUSY, vsi->state)) usleep_range(1000, 2000); + ret = ice_clear_vsi_promisc(&vsi->back->hw, vsi->idx, + ICE_MCAST_VLAN_PROMISC_BITS, vid); + if (ret) { + netdev_err(netdev, "Error clearing multicast promiscuous mode on VSI %i\n", + vsi->vsi_num); + vsi->current_netdev_flags |= IFF_ALLMULTI; + } + vlan_ops = ice_get_compat_vsi_vlan_ops(vsi); /* Make sure VLAN delete is successful before updating VLAN -- cgit v1.2.3 From 9ead7e74bfd6dd54db12ef133b8604add72511de Mon Sep 17 00:00:00 2001 From: Maciej Fijalkowski Date: Thu, 11 Aug 2022 20:21:49 +0200 Subject: ice: xsk: use Rx ring's XDP ring when picking NAPI context Ice driver allocates per cpu XDP queues so that redirect path can safely use smp_processor_id() as an index to the array. At the same time though, XDP rings are used to pick NAPI context to call napi_schedule() or set NAPIF_STATE_MISSED. When user reduces queue count, say to 8, and num_possible_cpus() of underlying platform is 44, then this means queue vectors with correlated NAPI contexts will carry several XDP queues. This in turn can result in a broken behavior where NAPI context of interest will never be scheduled and AF_XDP socket will not process any traffic. To fix this, let us change the way how XDP rings are assigned to Rx rings and use this information later on when setting ice_tx_ring::xsk_pool pointer. For each Rx ring, grab the associated queue vector and walk through Tx ring's linked list. Once we stumble upon XDP ring in it, assign this ring to ice_rx_ring::xdp_ring. Previous [0] approach of fixing this issue was for txonly scenario because of the described grouping of XDP rings across queue vectors. So, relying on Rx ring meant that NAPI context could be scheduled with a queue vector without XDP ring with associated XSK pool. [0]: https://lore.kernel.org/netdev/20220707161128.54215-1-maciej.fijalkowski@intel.com/ Fixes: 2d4238f55697 ("ice: Add support for AF_XDP") Fixes: 22bf877e528f ("ice: introduce XDP_TX fallback path") Signed-off-by: Maciej Fijalkowski Tested-by: George Kuruvinakunnel Signed-off-by: Tony Nguyen --- drivers/net/ethernet/intel/ice/ice.h | 36 ++++++++++++++++++++----------- drivers/net/ethernet/intel/ice/ice_lib.c | 4 ++-- drivers/net/ethernet/intel/ice/ice_main.c | 25 ++++++++++++++------- drivers/net/ethernet/intel/ice/ice_xsk.c | 12 +++++------ 4 files changed, 48 insertions(+), 29 deletions(-) (limited to 'drivers/net/ethernet/intel/ice/ice_main.c') diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index cc5b85afd437..841fa149c407 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -684,8 +684,8 @@ static inline void ice_set_ring_xdp(struct ice_tx_ring *ring) * ice_xsk_pool - get XSK buffer pool bound to a ring * @ring: Rx ring to use * - * Returns a pointer to xdp_umem structure if there is a buffer pool present, - * NULL otherwise. + * Returns a pointer to xsk_buff_pool structure if there is a buffer pool + * present, NULL otherwise. */ static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring) { @@ -699,23 +699,33 @@ static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring) } /** - * ice_tx_xsk_pool - get XSK buffer pool bound to a ring - * @ring: Tx ring to use + * ice_tx_xsk_pool - assign XSK buff pool to XDP ring + * @vsi: pointer to VSI + * @qid: index of a queue to look at XSK buff pool presence * - * Returns a pointer to xdp_umem structure if there is a buffer pool present, - * NULL otherwise. Tx equivalent of ice_xsk_pool. + * Sets XSK buff pool pointer on XDP ring. + * + * XDP ring is picked from Rx ring, whereas Rx ring is picked based on provided + * queue id. Reason for doing so is that queue vectors might have assigned more + * than one XDP ring, e.g. when user reduced the queue count on netdev; Rx ring + * carries a pointer to one of these XDP rings for its own purposes, such as + * handling XDP_TX action, therefore we can piggyback here on the + * rx_ring->xdp_ring assignment that was done during XDP rings initialization. */ -static inline struct xsk_buff_pool *ice_tx_xsk_pool(struct ice_tx_ring *ring) +static inline void ice_tx_xsk_pool(struct ice_vsi *vsi, u16 qid) { - struct ice_vsi *vsi = ring->vsi; - u16 qid; + struct ice_tx_ring *ring; - qid = ring->q_index - vsi->alloc_txq; + ring = vsi->rx_rings[qid]->xdp_ring; + if (!ring) + return; - if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) - return NULL; + if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) { + ring->xsk_pool = NULL; + return; + } - return xsk_get_pool_from_qid(vsi->netdev, qid); + ring->xsk_pool = xsk_get_pool_from_qid(vsi->netdev, qid); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 733c455f6574..0c4ec9264071 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1986,8 +1986,8 @@ int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi) if (ret) return ret; - ice_for_each_xdp_txq(vsi, i) - vsi->xdp_rings[i]->xsk_pool = ice_tx_xsk_pool(vsi->xdp_rings[i]); + ice_for_each_rxq(vsi, i) + ice_tx_xsk_pool(vsi, i); return ret; } diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 4ecaf40cf946..173fe6c31341 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -2581,7 +2581,6 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) if (ice_setup_tx_ring(xdp_ring)) goto free_xdp_rings; ice_set_ring_xdp(xdp_ring); - xdp_ring->xsk_pool = ice_tx_xsk_pool(xdp_ring); spin_lock_init(&xdp_ring->tx_lock); for (j = 0; j < xdp_ring->count; j++) { tx_desc = ICE_TX_DESC(xdp_ring, j); @@ -2589,13 +2588,6 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) } } - ice_for_each_rxq(vsi, i) { - if (static_key_enabled(&ice_xdp_locking_key)) - vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i % vsi->num_xdp_txq]; - else - vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i]; - } - return 0; free_xdp_rings: @@ -2685,6 +2677,23 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog) xdp_rings_rem -= xdp_rings_per_v; } + ice_for_each_rxq(vsi, i) { + if (static_key_enabled(&ice_xdp_locking_key)) { + vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i % vsi->num_xdp_txq]; + } else { + struct ice_q_vector *q_vector = vsi->rx_rings[i]->q_vector; + struct ice_tx_ring *ring; + + ice_for_each_tx_ring(ring, q_vector->tx) { + if (ice_ring_is_xdp(ring)) { + vsi->rx_rings[i]->xdp_ring = ring; + break; + } + } + } + ice_tx_xsk_pool(vsi, i); + } + /* omit the scheduler update if in reset path; XDP queues will be * taken into account at the end of ice_vsi_rebuild, where * ice_cfg_vsi_lan is being called diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 45f88e6ec25e..e48e29258450 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -243,7 +243,7 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) if (err) goto free_buf; ice_set_ring_xdp(xdp_ring); - xdp_ring->xsk_pool = ice_tx_xsk_pool(xdp_ring); + ice_tx_xsk_pool(vsi, q_idx); } err = ice_vsi_cfg_rxq(rx_ring); @@ -359,7 +359,7 @@ xsk_pool_if_up: if (if_running) { ret = ice_qp_ena(vsi, qid); if (!ret && pool_present) - napi_schedule(&vsi->xdp_rings[qid]->q_vector->napi); + napi_schedule(&vsi->rx_rings[qid]->xdp_ring->q_vector->napi); else if (ret) netdev_err(vsi->netdev, "ice_qp_ena error = %d\n", ret); } @@ -950,13 +950,13 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, if (!ice_is_xdp_ena_vsi(vsi)) return -EINVAL; - if (queue_id >= vsi->num_txq) + if (queue_id >= vsi->num_txq || queue_id >= vsi->num_rxq) return -EINVAL; - if (!vsi->xdp_rings[queue_id]->xsk_pool) - return -EINVAL; + ring = vsi->rx_rings[queue_id]->xdp_ring; - ring = vsi->xdp_rings[queue_id]; + if (!ring->xsk_pool) + return -EINVAL; /* The idea here is that if NAPI is running, mark a miss, so * it will run again. If not, trigger an interrupt and -- cgit v1.2.3