From d2b57063e4aba51d3c49ec957607d2e4c9d5f29a Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:37 +0000 Subject: IB/core: Reserve bits in enum ib_qp_create_flags for low-level driver use Reserve bits 26-31 for internal use by low-level drivers. Two such bits are used in the mlx4_b driver SR-IOV implementation. These enum additions guarantee that the core layer will never use these bits, so that low level drivers may safely make use of them. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- include/rdma/ib_verbs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 07996af8265a..46bc045bbe15 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -614,6 +614,9 @@ enum ib_qp_type { enum ib_qp_create_flags { IB_QP_CREATE_IPOIB_UD_LSO = 1 << 0, IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK = 1 << 1, + /* reserve bits 26-31 for low level drivers' internal use */ + IB_QP_CREATE_RESERVED_START = 1 << 26, + IB_QP_CREATE_RESERVED_END = 1 << 31, }; struct ib_qp_init_attr { -- cgit v1.2.3 From 73aaa7418f8069103ca56fc620b3cd16c5a37d6e Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:39 +0000 Subject: IB/core: Add ib_find_exact_cached_pkey() When P_Key tables potentially contain both full and partial membership copies for the same P_Key, we need a function to find the index for an exact (16-bit) P_Key. This is necessary when the master forwards QP1 MADs sent by guests. If the guest has sent the MAD with a limited membership P_Key, we need to to forward the MAD using the same limited membership P_Key. Since the master may have both the limited and the full member P_Keys in its table, we must make sure to retrieve the limited membership P_Key in this case. Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: Roland Dreier --- drivers/infiniband/core/cache.c | 32 ++++++++++++++++++++++++++++++++ include/rdma/ib_cache.h | 16 ++++++++++++++++ 2 files changed, 48 insertions(+) (limited to 'include') diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index 4da381b74f54..80f6cf2449fb 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -199,6 +199,38 @@ int ib_find_cached_pkey(struct ib_device *device, } EXPORT_SYMBOL(ib_find_cached_pkey); +int ib_find_exact_cached_pkey(struct ib_device *device, + u8 port_num, + u16 pkey, + u16 *index) +{ + struct ib_pkey_cache *cache; + unsigned long flags; + int i; + int ret = -ENOENT; + + if (port_num < start_port(device) || port_num > end_port(device)) + return -EINVAL; + + read_lock_irqsave(&device->cache.lock, flags); + + cache = device->cache.pkey_cache[port_num - start_port(device)]; + + *index = -1; + + for (i = 0; i < cache->table_len; ++i) + if (cache->table[i] == pkey) { + *index = i; + ret = 0; + break; + } + + read_unlock_irqrestore(&device->cache.lock, flags); + + return ret; +} +EXPORT_SYMBOL(ib_find_exact_cached_pkey); + int ib_get_cached_lmc(struct ib_device *device, u8 port_num, u8 *lmc) diff --git a/include/rdma/ib_cache.h b/include/rdma/ib_cache.h index 00a2b8ec327f..ad9a3c280944 100644 --- a/include/rdma/ib_cache.h +++ b/include/rdma/ib_cache.h @@ -100,6 +100,22 @@ int ib_find_cached_pkey(struct ib_device *device, u16 pkey, u16 *index); +/** + * ib_find_exact_cached_pkey - Returns the PKey table index where a specified + * PKey value occurs. Comparison uses the FULL 16 bits (incl membership bit) + * @device: The device to query. + * @port_num: The port number of the device to search for the PKey. + * @pkey: The PKey value to search for. + * @index: The index into the cached PKey table where the PKey was found. + * + * ib_find_exact_cached_pkey() searches the specified PKey table in + * the local software cache. + */ +int ib_find_exact_cached_pkey(struct ib_device *device, + u8 port_num, + u16 pkey, + u16 *index); + /** * ib_get_cached_lmc - Returns a cached lmc table entry * @device: The device to query. -- cgit v1.2.3 From 1ffeb2eb8be9936e9dc1f9af2d5f4c14d69a0d36 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:40 +0000 Subject: IB/mlx4: SR-IOV IB context objects and proxy/tunnel SQP support 1. Introduce the basic SR-IOV parvirtualization context objects for multiplexing and demultiplexing MADs. 2. Introduce support for the new proxy and tunnel QP types. This patch introduces the objects required by the master for managing QP paravirtualization for guests. struct mlx4_ib_sriov is created by the master only. It is a container for the following: 1. All the info required by the PPF to multiplex and de-multiplex MADs (including those from the PF). (struct mlx4_ib_demux_ctx demux) 2. All the info required to manage alias GUIDs (i.e., the GUID at index 0 that each guest perceives. In fact, this is not the GUID which is actually at index 0, but is, in fact, the GUID which is at index[] in the physical table. 3. structures which are used to manage CM paravirtualization 4. structures for managing the real special QPs when running in SR-IOV mode. The real SQPs are controlled by the PPF in this case. All SQPs created and controlled by the ib core layer are proxy SQP. struct mlx4_ib_demux_ctx contains the information per port needed to manage paravirtualization: 1. All multicast paravirt info 2. All tunnel-qp paravirt info for the port. 3. GUID-table and GUID-prefix for the port 4. work queues. struct mlx4_ib_demux_pv_ctx contains all the info for managing the paravirtualized QPs for one slave/port. struct mlx4_ib_demux_pv_qp contains the info need to run an individual QP (either tunnel qp or real SQP). Note: We made use of the 2 most significant bits in enum mlx4_ib_qp_flags (based on enum ib_qp_create_flags in ib_verbs.h). We need these bits in the low-level driver for internal purposes. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/cq.c | 31 +- drivers/infiniband/hw/mlx4/mlx4_ib.h | 128 +++++++- drivers/infiniband/hw/mlx4/qp.c | 616 ++++++++++++++++++++++++++++++----- include/linux/mlx4/device.h | 1 + include/linux/mlx4/qp.h | 3 +- 5 files changed, 702 insertions(+), 77 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c index 6d4ef71cbcdf..c9eb6a6815ce 100644 --- a/drivers/infiniband/hw/mlx4/cq.c +++ b/drivers/infiniband/hw/mlx4/cq.c @@ -547,6 +547,26 @@ static int mlx4_ib_ipoib_csum_ok(__be16 status, __be16 checksum) checksum == cpu_to_be16(0xffff); } +static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct ib_wc *wc, + unsigned tail, struct mlx4_cqe *cqe) +{ + struct mlx4_ib_proxy_sqp_hdr *hdr; + + ib_dma_sync_single_for_cpu(qp->ibqp.device, + qp->sqp_proxy_rcv[tail].map, + sizeof (struct mlx4_ib_proxy_sqp_hdr), + DMA_FROM_DEVICE); + hdr = (struct mlx4_ib_proxy_sqp_hdr *) (qp->sqp_proxy_rcv[tail].addr); + wc->pkey_index = be16_to_cpu(hdr->tun.pkey_index); + wc->slid = be16_to_cpu(hdr->tun.slid_mac_47_32); + wc->sl = (u8) (be16_to_cpu(hdr->tun.sl_vid) >> 12); + wc->src_qp = be32_to_cpu(hdr->tun.flags_src_qp) & 0xFFFFFF; + wc->wc_flags |= (hdr->tun.g_ml_path & 0x80) ? (IB_WC_GRH) : 0; + wc->dlid_path_bits = 0; + + return 0; +} + static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, struct mlx4_ib_qp **cur_qp, struct ib_wc *wc) @@ -559,6 +579,7 @@ static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq, int is_error; u32 g_mlpath_rqpn; u16 wqe_ctr; + unsigned tail = 0; repoll: cqe = next_cqe_sw(cq); @@ -634,7 +655,8 @@ repoll: mlx4_ib_free_srq_wqe(srq, wqe_ctr); } else { wq = &(*cur_qp)->rq; - wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)]; + tail = wq->tail & (wq->wqe_cnt - 1); + wc->wr_id = wq->wrid[tail]; ++wq->tail; } @@ -717,6 +739,13 @@ repoll: break; } + if (mlx4_is_mfunc(to_mdev(cq->ibcq.device)->dev)) { + if ((*cur_qp)->mlx4_ib_qp_type & + (MLX4_IB_QPT_PROXY_SMI_OWNER | + MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) + return use_tunnel_data(*cur_qp, cq, wc, tail, cqe); + } + wc->slid = be16_to_cpu(cqe->rlid); g_mlpath_rqpn = be32_to_cpu(cqe->g_mlpath_rqpn); wc->src_qp = g_mlpath_rqpn & 0xffffff; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index c136bb618e29..1248d576b031 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -133,8 +133,10 @@ struct mlx4_ib_wq { }; enum mlx4_ib_qp_flags { - MLX4_IB_QP_LSO = 1 << 0, - MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = 1 << 1, + MLX4_IB_QP_LSO = IB_QP_CREATE_IPOIB_UD_LSO, + MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK = IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK, + MLX4_IB_SRIOV_TUNNEL_QP = 1 << 30, + MLX4_IB_SRIOV_SQP = 1 << 31, }; struct mlx4_ib_gid_entry { @@ -144,6 +146,68 @@ struct mlx4_ib_gid_entry { u8 port; }; +enum mlx4_ib_qp_type { + /* + * IB_QPT_SMI and IB_QPT_GSI have to be the first two entries + * here (and in that order) since the MAD layer uses them as + * indices into a 2-entry table. + */ + MLX4_IB_QPT_SMI = IB_QPT_SMI, + MLX4_IB_QPT_GSI = IB_QPT_GSI, + + MLX4_IB_QPT_RC = IB_QPT_RC, + MLX4_IB_QPT_UC = IB_QPT_UC, + MLX4_IB_QPT_UD = IB_QPT_UD, + MLX4_IB_QPT_RAW_IPV6 = IB_QPT_RAW_IPV6, + MLX4_IB_QPT_RAW_ETHERTYPE = IB_QPT_RAW_ETHERTYPE, + MLX4_IB_QPT_RAW_PACKET = IB_QPT_RAW_PACKET, + MLX4_IB_QPT_XRC_INI = IB_QPT_XRC_INI, + MLX4_IB_QPT_XRC_TGT = IB_QPT_XRC_TGT, + + MLX4_IB_QPT_PROXY_SMI_OWNER = 1 << 16, + MLX4_IB_QPT_PROXY_SMI = 1 << 17, + MLX4_IB_QPT_PROXY_GSI = 1 << 18, + MLX4_IB_QPT_TUN_SMI_OWNER = 1 << 19, + MLX4_IB_QPT_TUN_SMI = 1 << 20, + MLX4_IB_QPT_TUN_GSI = 1 << 21, +}; + +#define MLX4_IB_QPT_ANY_SRIOV (MLX4_IB_QPT_PROXY_SMI_OWNER | \ + MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER | \ + MLX4_IB_QPT_TUN_SMI | MLX4_IB_QPT_TUN_GSI) + +struct mlx4_ib_tunnel_header { + struct mlx4_av av; + __be32 remote_qpn; + __be32 qkey; + __be16 vlan; + u8 mac[6]; + __be16 pkey_index; + u8 reserved[6]; +}; + +struct mlx4_ib_buf { + void *addr; + dma_addr_t map; +}; + +struct mlx4_rcv_tunnel_hdr { + __be32 flags_src_qp; /* flags[6:5] is defined for VLANs: + * 0x0 - no vlan was in the packet + * 0x01 - C-VLAN was in the packet */ + u8 g_ml_path; /* gid bit stands for ipv6/4 header in RoCE */ + u8 reserved; + __be16 pkey_index; + __be16 sl_vid; + __be16 slid_mac_47_32; + __be32 mac_31_0; +}; + +struct mlx4_ib_proxy_sqp_hdr { + struct ib_grh grh; + struct mlx4_rcv_tunnel_hdr tun; +} __packed; + struct mlx4_ib_qp { struct ib_qp ibqp; struct mlx4_qp mqp; @@ -159,6 +223,7 @@ struct mlx4_ib_qp { int sq_spare_wqes; struct mlx4_ib_wq sq; + enum mlx4_ib_qp_type mlx4_ib_qp_type; struct ib_umem *umem; struct mlx4_mtt mtt; int buf_size; @@ -174,6 +239,8 @@ struct mlx4_ib_qp { int mlx_type; struct list_head gid_list; struct list_head steering_rules; + struct mlx4_ib_buf *sqp_proxy_rcv; + }; struct mlx4_ib_srq { @@ -196,6 +263,55 @@ struct mlx4_ib_ah { union mlx4_ext_av av; }; +struct mlx4_ib_tun_tx_buf { + struct mlx4_ib_buf buf; + struct ib_ah *ah; +}; + +struct mlx4_ib_demux_pv_qp { + struct ib_qp *qp; + enum ib_qp_type proxy_qpt; + struct mlx4_ib_buf *ring; + struct mlx4_ib_tun_tx_buf *tx_ring; + spinlock_t tx_lock; + unsigned tx_ix_head; + unsigned tx_ix_tail; +}; + +struct mlx4_ib_demux_pv_ctx { + int port; + int slave; + int has_smi; + struct ib_device *ib_dev; + struct ib_cq *cq; + struct ib_pd *pd; + struct ib_mr *mr; + struct work_struct work; + struct workqueue_struct *wq; + struct mlx4_ib_demux_pv_qp qp[2]; +}; + +struct mlx4_ib_demux_ctx { + struct ib_device *ib_dev; + int port; + struct workqueue_struct *wq; + struct workqueue_struct *ud_wq; + spinlock_t ud_lock; + __be64 subnet_prefix; + __be64 guid_cache[128]; + struct mlx4_ib_dev *dev; + struct mlx4_ib_demux_pv_ctx **tun; +}; + +struct mlx4_ib_sriov { + struct mlx4_ib_demux_ctx demux[MLX4_MAX_PORTS]; + struct mlx4_ib_demux_pv_ctx *sqps[MLX4_MAX_PORTS]; + /* when using this spinlock you should use "irq" because + * it may be called from interrupt context.*/ + spinlock_t going_down_lock; + int is_going_down; +}; + struct mlx4_ib_iboe { spinlock_t lock; struct net_device *netdevs[MLX4_MAX_PORTS]; @@ -216,6 +332,7 @@ struct mlx4_ib_dev { struct ib_mad_agent *send_agent[MLX4_MAX_PORTS][2]; struct ib_ah *sm_ah[MLX4_MAX_PORTS]; spinlock_t sm_lock; + struct mlx4_ib_sriov sriov; struct mutex cap_mask_mutex; bool ib_active; @@ -231,6 +348,13 @@ struct ib_event_work { struct mlx4_eqe ib_eqe; }; +struct mlx4_ib_qp_tunnel_init_attr { + struct ib_qp_init_attr init_attr; + int slave; + enum ib_qp_type proxy_qp_type; + u8 port; +}; + static inline struct mlx4_ib_dev *to_mdev(struct ib_device *ibdev) { return container_of(ibdev, struct mlx4_ib_dev, ib_dev); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index f585eddef4b7..a8622510de42 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -38,6 +38,7 @@ #include #include #include +#include #include @@ -110,16 +111,38 @@ static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp) return container_of(mqp, struct mlx4_ib_sqp, qp); } +static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) +{ + if (!mlx4_is_master(dev->dev)) + return 0; + + return qp->mqp.qpn >= dev->dev->caps.base_sqpn && + qp->mqp.qpn < dev->dev->caps.base_sqpn + + 8 + 16 * MLX4_MFUNC_MAX; +} + static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) { - return qp->mqp.qpn >= dev->dev->caps.sqp_start && - qp->mqp.qpn <= dev->dev->caps.sqp_start + 3; + return ((mlx4_is_master(dev->dev) && + qp->mqp.qpn >= dev->dev->caps.base_sqpn && + qp->mqp.qpn <= dev->dev->caps.base_sqpn + 3) || + (qp->mqp.qpn >= dev->dev->caps.sqp_start && + qp->mqp.qpn <= dev->dev->caps.sqp_start + 3)); } +/* used for INIT/CLOSE port logic */ static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) { - return qp->mqp.qpn >= dev->dev->caps.sqp_start && - qp->mqp.qpn <= dev->dev->caps.sqp_start + 1; + int qp0; + + /* qp0 is either the proxy qp0, or the real qp0 */ + qp0 = (qp->mqp.qpn >= dev->dev->caps.sqp_start && + qp->mqp.qpn <= dev->dev->caps.sqp_start + 1) || + (mlx4_is_master(dev->dev) && + qp->mqp.qpn >= dev->dev->caps.base_sqpn && + qp->mqp.qpn <= dev->dev->caps.base_sqpn + 1); + + return qp0; } static void *get_wqe(struct mlx4_ib_qp *qp, int offset) @@ -270,7 +293,7 @@ static void mlx4_ib_qp_event(struct mlx4_qp *qp, enum mlx4_event type) } } -static int send_wqe_overhead(enum ib_qp_type type, u32 flags) +static int send_wqe_overhead(enum mlx4_ib_qp_type type, u32 flags) { /* * UD WQEs must have a datagram segment. @@ -279,19 +302,29 @@ static int send_wqe_overhead(enum ib_qp_type type, u32 flags) * header and space for the ICRC). */ switch (type) { - case IB_QPT_UD: + case MLX4_IB_QPT_UD: return sizeof (struct mlx4_wqe_ctrl_seg) + sizeof (struct mlx4_wqe_datagram_seg) + ((flags & MLX4_IB_QP_LSO) ? MLX4_IB_LSO_HEADER_SPARE : 0); - case IB_QPT_UC: + case MLX4_IB_QPT_PROXY_SMI_OWNER: + case MLX4_IB_QPT_PROXY_SMI: + case MLX4_IB_QPT_PROXY_GSI: + return sizeof (struct mlx4_wqe_ctrl_seg) + + sizeof (struct mlx4_wqe_datagram_seg) + 64; + case MLX4_IB_QPT_TUN_SMI_OWNER: + case MLX4_IB_QPT_TUN_GSI: + return sizeof (struct mlx4_wqe_ctrl_seg) + + sizeof (struct mlx4_wqe_datagram_seg); + + case MLX4_IB_QPT_UC: return sizeof (struct mlx4_wqe_ctrl_seg) + sizeof (struct mlx4_wqe_raddr_seg); - case IB_QPT_RC: + case MLX4_IB_QPT_RC: return sizeof (struct mlx4_wqe_ctrl_seg) + sizeof (struct mlx4_wqe_atomic_seg) + sizeof (struct mlx4_wqe_raddr_seg); - case IB_QPT_SMI: - case IB_QPT_GSI: + case MLX4_IB_QPT_SMI: + case MLX4_IB_QPT_GSI: return sizeof (struct mlx4_wqe_ctrl_seg) + ALIGN(MLX4_IB_UD_HEADER_SIZE + DIV_ROUND_UP(MLX4_IB_UD_HEADER_SIZE, @@ -345,7 +378,7 @@ static int set_rq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, } static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, - enum ib_qp_type type, struct mlx4_ib_qp *qp) + enum mlx4_ib_qp_type type, struct mlx4_ib_qp *qp) { int s; @@ -360,7 +393,8 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, * For MLX transport we need 2 extra S/G entries: * one for the header and one for the checksum at the end */ - if ((type == IB_QPT_SMI || type == IB_QPT_GSI) && + if ((type == MLX4_IB_QPT_SMI || type == MLX4_IB_QPT_GSI || + type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) && cap->max_send_sge + 2 > dev->dev->caps.max_sq_sg) return -EINVAL; @@ -404,7 +438,9 @@ static int set_kernel_sq_size(struct mlx4_ib_dev *dev, struct ib_qp_cap *cap, */ if (dev->dev->caps.fw_ver >= MLX4_FW_VER_WQE_CTRL_NEC && qp->sq_signal_bits && BITS_PER_LONG == 64 && - type != IB_QPT_SMI && type != IB_QPT_GSI) + type != MLX4_IB_QPT_SMI && type != MLX4_IB_QPT_GSI && + !(type & (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_PROXY_SMI | + MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) qp->sq.wqe_shift = ilog2(64); else qp->sq.wqe_shift = ilog2(roundup_pow_of_two(s)); @@ -476,6 +512,54 @@ static int set_user_sq_size(struct mlx4_ib_dev *dev, return 0; } +static int alloc_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp) +{ + int i; + + qp->sqp_proxy_rcv = + kmalloc(sizeof (struct mlx4_ib_buf) * qp->rq.wqe_cnt, + GFP_KERNEL); + if (!qp->sqp_proxy_rcv) + return -ENOMEM; + for (i = 0; i < qp->rq.wqe_cnt; i++) { + qp->sqp_proxy_rcv[i].addr = + kmalloc(sizeof (struct mlx4_ib_proxy_sqp_hdr), + GFP_KERNEL); + if (!qp->sqp_proxy_rcv[i].addr) + goto err; + qp->sqp_proxy_rcv[i].map = + ib_dma_map_single(dev, qp->sqp_proxy_rcv[i].addr, + sizeof (struct mlx4_ib_proxy_sqp_hdr), + DMA_FROM_DEVICE); + } + return 0; + +err: + while (i > 0) { + --i; + ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map, + sizeof (struct mlx4_ib_proxy_sqp_hdr), + DMA_FROM_DEVICE); + kfree(qp->sqp_proxy_rcv[i].addr); + } + kfree(qp->sqp_proxy_rcv); + qp->sqp_proxy_rcv = NULL; + return -ENOMEM; +} + +static void free_proxy_bufs(struct ib_device *dev, struct mlx4_ib_qp *qp) +{ + int i; + + for (i = 0; i < qp->rq.wqe_cnt; i++) { + ib_dma_unmap_single(dev, qp->sqp_proxy_rcv[i].map, + sizeof (struct mlx4_ib_proxy_sqp_hdr), + DMA_FROM_DEVICE); + kfree(qp->sqp_proxy_rcv[i].addr); + } + kfree(qp->sqp_proxy_rcv); +} + static int qp_has_rq(struct ib_qp_init_attr *attr) { if (attr->qp_type == IB_QPT_XRC_INI || attr->qp_type == IB_QPT_XRC_TGT) @@ -486,10 +570,71 @@ static int qp_has_rq(struct ib_qp_init_attr *attr) static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, struct ib_qp_init_attr *init_attr, - struct ib_udata *udata, int sqpn, struct mlx4_ib_qp *qp) + struct ib_udata *udata, int sqpn, struct mlx4_ib_qp **caller_qp) { int qpn; int err; + struct mlx4_ib_sqp *sqp; + struct mlx4_ib_qp *qp; + enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type; + + /* When tunneling special qps, we use a plain UD qp */ + if (sqpn) { + if (mlx4_is_mfunc(dev->dev) && + (!mlx4_is_master(dev->dev) || + !(init_attr->create_flags & MLX4_IB_SRIOV_SQP))) { + if (init_attr->qp_type == IB_QPT_GSI) + qp_type = MLX4_IB_QPT_PROXY_GSI; + else if (mlx4_is_master(dev->dev)) + qp_type = MLX4_IB_QPT_PROXY_SMI_OWNER; + else + qp_type = MLX4_IB_QPT_PROXY_SMI; + } + qpn = sqpn; + /* add extra sg entry for tunneling */ + init_attr->cap.max_recv_sge++; + } else if (init_attr->create_flags & MLX4_IB_SRIOV_TUNNEL_QP) { + struct mlx4_ib_qp_tunnel_init_attr *tnl_init = + container_of(init_attr, + struct mlx4_ib_qp_tunnel_init_attr, init_attr); + if ((tnl_init->proxy_qp_type != IB_QPT_SMI && + tnl_init->proxy_qp_type != IB_QPT_GSI) || + !mlx4_is_master(dev->dev)) + return -EINVAL; + if (tnl_init->proxy_qp_type == IB_QPT_GSI) + qp_type = MLX4_IB_QPT_TUN_GSI; + else if (tnl_init->slave == mlx4_master_func_num(dev->dev)) + qp_type = MLX4_IB_QPT_TUN_SMI_OWNER; + else + qp_type = MLX4_IB_QPT_TUN_SMI; + qpn = dev->dev->caps.base_tunnel_sqpn + 8 * tnl_init->slave + + tnl_init->proxy_qp_type * 2 + tnl_init->port - 1; + sqpn = qpn; + } + + if (!*caller_qp) { + if (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI || + (qp_type & (MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_SMI_OWNER | + MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER))) { + sqp = kzalloc(sizeof (struct mlx4_ib_sqp), GFP_KERNEL); + if (!sqp) + return -ENOMEM; + qp = &sqp->qp; + } else { + qp = kzalloc(sizeof (struct mlx4_ib_qp), GFP_KERNEL); + if (!qp) + return -ENOMEM; + } + } else + qp = *caller_qp; + + qp->mlx4_ib_qp_type = qp_type; + + if (mlx4_is_mfunc(dev->dev) && + (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI)) { + qpn -= 8; + sqpn -= 8; + } mutex_init(&qp->mutex); spin_lock_init(&qp->sq.lock); @@ -550,7 +695,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, if (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO) qp->flags |= MLX4_IB_QP_LSO; - err = set_kernel_sq_size(dev, &init_attr->cap, init_attr->qp_type, qp); + err = set_kernel_sq_size(dev, &init_attr->cap, qp_type, qp); if (err) goto err; @@ -586,7 +731,13 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, } if (sqpn) { - qpn = sqpn; + if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | + MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) { + if (alloc_proxy_bufs(pd->device, qp)) { + err = -ENOMEM; + goto err_wrid; + } + } } else { /* Raw packet QPNs must be aligned to 8 bits. If not, the WQE * BlueFlame setup flow wrongly causes VLAN insertion. */ @@ -595,7 +746,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, else err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn); if (err) - goto err_wrid; + goto err_proxy; } err = mlx4_qp_alloc(dev->dev, qpn, &qp->mqp); @@ -613,13 +764,16 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, qp->doorbell_qpn = swab32(qp->mqp.qpn << 8); qp->mqp.event = mlx4_ib_qp_event; - + if (!*caller_qp) + *caller_qp = qp; return 0; err_qpn: if (!sqpn) mlx4_qp_release_range(dev->dev, qpn, 1); - +err_proxy: + if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI) + free_proxy_bufs(pd->device, qp); err_wrid: if (pd->uobject) { if (qp_has_rq(init_attr)) @@ -643,6 +797,8 @@ err_db: mlx4_db_free(dev->dev, &qp->db); err: + if (!*caller_qp) + kfree(qp); return err; } @@ -755,7 +911,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, mlx4_qp_free(dev->dev, &qp->mqp); - if (!is_sqp(dev, qp)) + if (!is_sqp(dev, qp) && !is_tunnel_qp(dev, qp)) mlx4_qp_release_range(dev->dev, qp->mqp.qpn, 1); mlx4_mtt_cleanup(dev->dev, &qp->mtt); @@ -768,6 +924,9 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, } else { kfree(qp->sq.wrid); kfree(qp->rq.wrid); + if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | + MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) + free_proxy_bufs(&dev->ib_dev, qp); mlx4_buf_free(dev->dev, qp->buf_size, &qp->buf); if (qp->rq.wqe_cnt) mlx4_db_free(dev->dev, &qp->db); @@ -780,21 +939,25 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) { - struct mlx4_ib_sqp *sqp; - struct mlx4_ib_qp *qp; + struct mlx4_ib_qp *qp = NULL; int err; u16 xrcdn = 0; /* - * We only support LSO and multicast loopback blocking, and - * only for kernel UD QPs. + * We only support LSO, vendor flag1, and multicast loopback blocking, + * and only for kernel UD QPs. */ - if (init_attr->create_flags & ~(IB_QP_CREATE_IPOIB_UD_LSO | - IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK)) + if (init_attr->create_flags & ~(MLX4_IB_QP_LSO | + MLX4_IB_QP_BLOCK_MULTICAST_LOOPBACK | + MLX4_IB_SRIOV_TUNNEL_QP | MLX4_IB_SRIOV_SQP)) return ERR_PTR(-EINVAL); if (init_attr->create_flags && - (udata || init_attr->qp_type != IB_QPT_UD)) + (udata || + ((init_attr->create_flags & ~MLX4_IB_SRIOV_SQP) && + init_attr->qp_type != IB_QPT_UD) || + ((init_attr->create_flags & MLX4_IB_SRIOV_SQP) && + init_attr->qp_type > IB_QPT_GSI))) return ERR_PTR(-EINVAL); switch (init_attr->qp_type) { @@ -810,18 +973,17 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, /* fall through */ case IB_QPT_RC: case IB_QPT_UC: - case IB_QPT_UD: case IB_QPT_RAW_PACKET: - { qp = kzalloc(sizeof *qp, GFP_KERNEL); if (!qp) return ERR_PTR(-ENOMEM); - - err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, 0, qp); - if (err) { - kfree(qp); + /* fall through */ + case IB_QPT_UD: + { + err = create_qp_common(to_mdev(pd->device), pd, init_attr, + udata, 0, &qp); + if (err) return ERR_PTR(err); - } qp->ibqp.qp_num = qp->mqp.qpn; qp->xrcdn = xrcdn; @@ -835,21 +997,13 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, if (udata) return ERR_PTR(-EINVAL); - sqp = kzalloc(sizeof *sqp, GFP_KERNEL); - if (!sqp) - return ERR_PTR(-ENOMEM); - - qp = &sqp->qp; - err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, to_mdev(pd->device)->dev->caps.sqp_start + (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) + init_attr->port_num - 1, - qp); - if (err) { - kfree(sqp); + &qp); + if (err) return ERR_PTR(err); - } qp->port = init_attr->port_num; qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1; @@ -884,18 +1038,27 @@ int mlx4_ib_destroy_qp(struct ib_qp *qp) return 0; } -static int to_mlx4_st(enum ib_qp_type type) +static int to_mlx4_st(struct mlx4_ib_dev *dev, enum mlx4_ib_qp_type type) { switch (type) { - case IB_QPT_RC: return MLX4_QP_ST_RC; - case IB_QPT_UC: return MLX4_QP_ST_UC; - case IB_QPT_UD: return MLX4_QP_ST_UD; - case IB_QPT_XRC_INI: - case IB_QPT_XRC_TGT: return MLX4_QP_ST_XRC; - case IB_QPT_SMI: - case IB_QPT_GSI: - case IB_QPT_RAW_PACKET: return MLX4_QP_ST_MLX; - default: return -1; + case MLX4_IB_QPT_RC: return MLX4_QP_ST_RC; + case MLX4_IB_QPT_UC: return MLX4_QP_ST_UC; + case MLX4_IB_QPT_UD: return MLX4_QP_ST_UD; + case MLX4_IB_QPT_XRC_INI: + case MLX4_IB_QPT_XRC_TGT: return MLX4_QP_ST_XRC; + case MLX4_IB_QPT_SMI: + case MLX4_IB_QPT_GSI: + case MLX4_IB_QPT_RAW_PACKET: return MLX4_QP_ST_MLX; + + case MLX4_IB_QPT_PROXY_SMI_OWNER: + case MLX4_IB_QPT_TUN_SMI_OWNER: return (mlx4_is_mfunc(dev->dev) ? + MLX4_QP_ST_MLX : -1); + case MLX4_IB_QPT_PROXY_SMI: + case MLX4_IB_QPT_TUN_SMI: + case MLX4_IB_QPT_PROXY_GSI: + case MLX4_IB_QPT_TUN_GSI: return (mlx4_is_mfunc(dev->dev) ? + MLX4_QP_ST_UD : -1); + default: return -1; } } @@ -1043,7 +1206,7 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, return -ENOMEM; context->flags = cpu_to_be32((to_mlx4_state(new_state) << 28) | - (to_mlx4_st(ibqp->qp_type) << 16)); + (to_mlx4_st(dev, qp->mlx4_ib_qp_type) << 16)); if (!(attr_mask & IB_QP_PATH_MIG_STATE)) context->flags |= cpu_to_be32(MLX4_QP_PM_MIGRATED << 11); @@ -1121,13 +1284,16 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, } if (attr_mask & IB_QP_PKEY_INDEX) { + if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) + context->pri_path.disable_pkey_check = 0x40; context->pri_path.pkey_index = attr->pkey_index; optpar |= MLX4_QP_OPTPAR_PKEY_INDEX; } if (attr_mask & IB_QP_AV) { if (mlx4_set_path(dev, &attr->ah_attr, &context->pri_path, - attr_mask & IB_QP_PORT ? attr->port_num : qp->port)) + attr_mask & IB_QP_PORT ? + attr->port_num : qp->port)) goto out; optpar |= (MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH | @@ -1210,8 +1376,24 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, if (attr_mask & IB_QP_RQ_PSN) context->rnr_nextrecvpsn |= cpu_to_be32(attr->rq_psn); + /* proxy and tunnel qp qkeys will be changed in modify-qp wrappers */ if (attr_mask & IB_QP_QKEY) { - context->qkey = cpu_to_be32(attr->qkey); + if (qp->mlx4_ib_qp_type & + (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) + context->qkey = cpu_to_be32(IB_QP_SET_QKEY); + else { + if (mlx4_is_mfunc(dev->dev) && + !(qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) && + (attr->qkey & MLX4_RESERVED_QKEY_MASK) == + MLX4_RESERVED_QKEY_BASE) { + pr_err("Cannot use reserved QKEY" + " 0x%x (range 0xffff0000..0xffffffff" + " is reserved)\n", attr->qkey); + err = -EINVAL; + goto out; + } + context->qkey = cpu_to_be32(attr->qkey); + } optpar |= MLX4_QP_OPTPAR_Q_KEY; } @@ -1227,10 +1409,17 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, ibqp->qp_type == IB_QPT_UD || ibqp->qp_type == IB_QPT_RAW_PACKET)) { context->pri_path.sched_queue = (qp->port - 1) << 6; - if (is_qp0(dev, qp)) + if (qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI || + qp->mlx4_ib_qp_type & + (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER)) { context->pri_path.sched_queue |= MLX4_IB_DEFAULT_QP0_SCHED_QUEUE; - else + if (qp->mlx4_ib_qp_type != MLX4_IB_QPT_SMI) + context->pri_path.fl = 0x80; + } else { + if (qp->mlx4_ib_qp_type & MLX4_IB_QPT_ANY_SRIOV) + context->pri_path.fl = 0x80; context->pri_path.sched_queue |= MLX4_IB_DEFAULT_SCHED_QUEUE; + } } if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && @@ -1346,7 +1535,7 @@ int mlx4_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, } if ((attr_mask & IB_QP_PORT) && - (attr->port_num == 0 || attr->port_num > dev->dev->caps.num_ports)) { + (attr->port_num == 0 || attr->port_num > dev->num_ports)) { pr_debug("qpn 0x%x: invalid port number (%d) specified " "for transition %d to %d. qp_type %d\n", ibqp->qp_num, attr->port_num, cur_state, @@ -1400,6 +1589,115 @@ out: return err; } +static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, + struct ib_send_wr *wr, + void *wqe, unsigned *mlx_seg_len) +{ + struct mlx4_ib_dev *mdev = to_mdev(sqp->qp.ibqp.device); + struct ib_device *ib_dev = &mdev->ib_dev; + struct mlx4_wqe_mlx_seg *mlx = wqe; + struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx; + struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); + u16 pkey; + u32 qkey; + int send_size; + int header_size; + int spc; + int i; + + if (wr->opcode != IB_WR_SEND) + return -EINVAL; + + send_size = 0; + + for (i = 0; i < wr->num_sge; ++i) + send_size += wr->sg_list[i].length; + + /* for proxy-qp0 sends, need to add in size of tunnel header */ + /* for tunnel-qp0 sends, tunnel header is already in s/g list */ + if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) + send_size += sizeof (struct mlx4_ib_tunnel_header); + + ib_ud_header_init(send_size, 1, 0, 0, 0, 0, &sqp->ud_header); + + if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_SMI_OWNER) { + sqp->ud_header.lrh.service_level = + be32_to_cpu(ah->av.ib.sl_tclass_flowlabel) >> 28; + sqp->ud_header.lrh.destination_lid = + cpu_to_be16(ah->av.ib.g_slid & 0x7f); + sqp->ud_header.lrh.source_lid = + cpu_to_be16(ah->av.ib.g_slid & 0x7f); + } + + mlx->flags &= cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE); + + /* force loopback */ + mlx->flags |= cpu_to_be32(MLX4_WQE_MLX_VL15 | 0x1 | MLX4_WQE_MLX_SLR); + mlx->rlid = sqp->ud_header.lrh.destination_lid; + + sqp->ud_header.lrh.virtual_lane = 0; + sqp->ud_header.bth.solicited_event = !!(wr->send_flags & IB_SEND_SOLICITED); + ib_get_cached_pkey(ib_dev, sqp->qp.port, 0, &pkey); + sqp->ud_header.bth.pkey = cpu_to_be16(pkey); + if (sqp->qp.mlx4_ib_qp_type == MLX4_IB_QPT_TUN_SMI_OWNER) + sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); + else + sqp->ud_header.bth.destination_qpn = + cpu_to_be32(mdev->dev->caps.base_tunnel_sqpn + + sqp->qp.port - 1); + + sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); + if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey)) + return -EINVAL; + sqp->ud_header.deth.qkey = cpu_to_be32(qkey); + sqp->ud_header.deth.source_qpn = cpu_to_be32(sqp->qp.mqp.qpn); + + sqp->ud_header.bth.opcode = IB_OPCODE_UD_SEND_ONLY; + sqp->ud_header.immediate_present = 0; + + header_size = ib_ud_header_pack(&sqp->ud_header, sqp->header_buf); + + /* + * Inline data segments may not cross a 64 byte boundary. If + * our UD header is bigger than the space available up to the + * next 64 byte boundary in the WQE, use two inline data + * segments to hold the UD header. + */ + spc = MLX4_INLINE_ALIGN - + ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); + if (header_size <= spc) { + inl->byte_count = cpu_to_be32(1 << 31 | header_size); + memcpy(inl + 1, sqp->header_buf, header_size); + i = 1; + } else { + inl->byte_count = cpu_to_be32(1 << 31 | spc); + memcpy(inl + 1, sqp->header_buf, spc); + + inl = (void *) (inl + 1) + spc; + memcpy(inl + 1, sqp->header_buf + spc, header_size - spc); + /* + * Need a barrier here to make sure all the data is + * visible before the byte_count field is set. + * Otherwise the HCA prefetcher could grab the 64-byte + * chunk with this inline segment and get a valid (!= + * 0xffffffff) byte count but stale data, and end up + * generating a packet with bad headers. + * + * The first inline segment's byte_count field doesn't + * need a barrier, because it comes after a + * control/MLX segment and therefore is at an offset + * of 16 mod 64. + */ + wmb(); + inl->byte_count = cpu_to_be32(1 << 31 | (header_size - spc)); + i = 2; + } + + *mlx_seg_len = + ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + header_size, 16); + return 0; +} + static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len) { @@ -1418,6 +1716,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, int is_vlan = 0; int is_grh; u16 vlan; + int err = 0; send_size = 0; for (i = 0; i < wr->num_sge; ++i) @@ -1426,8 +1725,24 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET; is_grh = mlx4_ib_ah_grh_present(ah); if (is_eth) { - ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, - ah->av.ib.gid_index, &sgid); + if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { + /* When multi-function is enabled, the ib_core gid + * indexes don't necessarily match the hw ones, so + * we must use our own cache */ + sgid.global.subnet_prefix = + to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. + subnet_prefix; + sgid.global.interface_id = + to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. + guid_cache[ah->av.ib.gid_index]; + } else { + err = ib_get_cached_gid(ib_dev, + be32_to_cpu(ah->av.ib.port_pd) >> 24, + ah->av.ib.gid_index, &sgid); + if (err) + return err; + } + vlan = rdma_get_vlan_id(&sgid); is_vlan = vlan < 0x1000; } @@ -1446,8 +1761,21 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, sqp->ud_header.grh.flow_label = ah->av.ib.sl_tclass_flowlabel & cpu_to_be32(0xfffff); sqp->ud_header.grh.hop_limit = ah->av.ib.hop_limit; - ib_get_cached_gid(ib_dev, be32_to_cpu(ah->av.ib.port_pd) >> 24, - ah->av.ib.gid_index, &sqp->ud_header.grh.source_gid); + if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) { + /* When multi-function is enabled, the ib_core gid + * indexes don't necessarily match the hw ones, so + * we must use our own cache */ + sqp->ud_header.grh.source_gid.global.subnet_prefix = + to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. + subnet_prefix; + sqp->ud_header.grh.source_gid.global.interface_id = + to_mdev(ib_dev)->sriov.demux[sqp->qp.port - 1]. + guid_cache[ah->av.ib.gid_index]; + } else + ib_get_cached_gid(ib_dev, + be32_to_cpu(ah->av.ib.port_pd) >> 24, + ah->av.ib.gid_index, + &sqp->ud_header.grh.source_gid); memcpy(sqp->ud_header.grh.destination_gid.raw, ah->av.ib.dgid, 16); } @@ -1459,6 +1787,8 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_send_wr *wr, (sqp->ud_header.lrh.destination_lid == IB_LID_PERMISSIVE ? MLX4_WQE_MLX_SLR : 0) | (sqp->ud_header.lrh.service_level << 8)); + if (ah->av.ib.port_pd & cpu_to_be32(0x80000000)) + mlx->flags |= cpu_to_be32(0x1); /* force loopback */ mlx->rlid = sqp->ud_header.lrh.destination_lid; } @@ -1667,6 +1997,63 @@ static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg, memcpy(dseg->mac, to_mah(wr->wr.ud.ah)->av.eth.mac, 6); } +static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev, + struct mlx4_wqe_datagram_seg *dseg, + struct ib_send_wr *wr, enum ib_qp_type qpt) +{ + union mlx4_ext_av *av = &to_mah(wr->wr.ud.ah)->av; + struct mlx4_av sqp_av = {0}; + int port = *((u8 *) &av->ib.port_pd) & 0x3; + + /* force loopback */ + sqp_av.port_pd = av->ib.port_pd | cpu_to_be32(0x80000000); + sqp_av.g_slid = av->ib.g_slid & 0x7f; /* no GRH */ + sqp_av.sl_tclass_flowlabel = av->ib.sl_tclass_flowlabel & + cpu_to_be32(0xf0000000); + + memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av)); + dseg->dqpn = cpu_to_be32(dev->dev->caps.base_tunnel_sqpn + + qpt * 2 + port - 1); + /* use well-known qkey from the QPC */ + dseg->qkey = cpu_to_be32(0x80000000); +} + +static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len) +{ + struct mlx4_wqe_inline_seg *inl = wqe; + struct mlx4_ib_tunnel_header hdr; + struct mlx4_ib_ah *ah = to_mah(wr->wr.ud.ah); + int spc; + int i; + + memcpy(&hdr.av, &ah->av, sizeof hdr.av); + hdr.remote_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); + hdr.pkey_index = cpu_to_be16(wr->wr.ud.pkey_index); + hdr.qkey = cpu_to_be32(wr->wr.ud.remote_qkey); + + spc = MLX4_INLINE_ALIGN - + ((unsigned long) (inl + 1) & (MLX4_INLINE_ALIGN - 1)); + if (sizeof (hdr) <= spc) { + memcpy(inl + 1, &hdr, sizeof (hdr)); + wmb(); + inl->byte_count = cpu_to_be32(1 << 31 | sizeof (hdr)); + i = 1; + } else { + memcpy(inl + 1, &hdr, spc); + wmb(); + inl->byte_count = cpu_to_be32(1 << 31 | spc); + + inl = (void *) (inl + 1) + spc; + memcpy(inl + 1, (void *) &hdr + spc, sizeof (hdr) - spc); + wmb(); + inl->byte_count = cpu_to_be32(1 << 31 | (sizeof (hdr) - spc)); + i = 2; + } + + *mlx_seg_len = + ALIGN(i * sizeof (struct mlx4_wqe_inline_seg) + sizeof (hdr), 16); +} + static void set_mlx_icrc_seg(void *dseg) { u32 *t = dseg; @@ -1748,6 +2135,13 @@ static __be32 send_ieth(struct ib_send_wr *wr) } } +static void add_zero_len_inline(void *wqe) +{ + struct mlx4_wqe_inline_seg *inl = wqe; + memset(wqe, 0, 16); + inl->byte_count = cpu_to_be32(1 << 31); +} + int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr) { @@ -1806,9 +2200,9 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, wqe += sizeof *ctrl; size = sizeof *ctrl / 16; - switch (ibqp->qp_type) { - case IB_QPT_RC: - case IB_QPT_UC: + switch (qp->mlx4_ib_qp_type) { + case MLX4_IB_QPT_RC: + case MLX4_IB_QPT_UC: switch (wr->opcode) { case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: @@ -1869,7 +2263,25 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } break; - case IB_QPT_UD: + case MLX4_IB_QPT_TUN_SMI_OWNER: + err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen); + if (unlikely(err)) { + *bad_wr = wr; + goto out; + } + wqe += seglen; + size += seglen / 16; + break; + case MLX4_IB_QPT_TUN_SMI: + case MLX4_IB_QPT_TUN_GSI: + /* this is a UD qp used in MAD responses to slaves. */ + set_datagram_seg(wqe, wr); + /* set the forced-loopback bit in the data seg av */ + *(__be32 *) wqe |= cpu_to_be32(0x80000000); + wqe += sizeof (struct mlx4_wqe_datagram_seg); + size += sizeof (struct mlx4_wqe_datagram_seg) / 16; + break; + case MLX4_IB_QPT_UD: set_datagram_seg(wqe, wr); wqe += sizeof (struct mlx4_wqe_datagram_seg); size += sizeof (struct mlx4_wqe_datagram_seg) / 16; @@ -1886,8 +2298,47 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } break; - case IB_QPT_SMI: - case IB_QPT_GSI: + case MLX4_IB_QPT_PROXY_SMI_OWNER: + if (unlikely(!mlx4_is_master(to_mdev(ibqp->device)->dev))) { + err = -ENOSYS; + *bad_wr = wr; + goto out; + } + err = build_sriov_qp0_header(to_msqp(qp), wr, ctrl, &seglen); + if (unlikely(err)) { + *bad_wr = wr; + goto out; + } + wqe += seglen; + size += seglen / 16; + /* to start tunnel header on a cache-line boundary */ + add_zero_len_inline(wqe); + wqe += 16; + size++; + build_tunnel_header(wr, wqe, &seglen); + wqe += seglen; + size += seglen / 16; + break; + case MLX4_IB_QPT_PROXY_SMI: + /* don't allow QP0 sends on guests */ + err = -ENOSYS; + *bad_wr = wr; + goto out; + case MLX4_IB_QPT_PROXY_GSI: + /* If we are tunneling special qps, this is a UD qp. + * In this case we first add a UD segment targeting + * the tunnel qp, and then add a header with address + * information */ + set_tunnel_datagram_seg(to_mdev(ibqp->device), wqe, wr, ibqp->qp_type); + wqe += sizeof (struct mlx4_wqe_datagram_seg); + size += sizeof (struct mlx4_wqe_datagram_seg) / 16; + build_tunnel_header(wr, wqe, &seglen); + wqe += seglen; + size += seglen / 16; + break; + + case MLX4_IB_QPT_SMI: + case MLX4_IB_QPT_GSI: err = build_mlx_header(to_msqp(qp), wr, ctrl, &seglen); if (unlikely(err)) { *bad_wr = wr; @@ -1913,8 +2364,10 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, size += wr->num_sge * (sizeof (struct mlx4_wqe_data_seg) / 16); /* Add one more inline data segment for ICRC for MLX sends */ - if (unlikely(qp->ibqp.qp_type == IB_QPT_SMI || - qp->ibqp.qp_type == IB_QPT_GSI)) { + if (unlikely(qp->mlx4_ib_qp_type == MLX4_IB_QPT_SMI || + qp->mlx4_ib_qp_type == MLX4_IB_QPT_GSI || + qp->mlx4_ib_qp_type & + (MLX4_IB_QPT_PROXY_SMI_OWNER | MLX4_IB_QPT_TUN_SMI_OWNER))) { set_mlx_icrc_seg(dseg + 1); size += sizeof (struct mlx4_wqe_data_seg) / 16; } @@ -2006,8 +2459,10 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, int err = 0; int nreq; int ind; + int max_gs; int i; + max_gs = qp->rq.max_gs; spin_lock_irqsave(&qp->rq.lock, flags); ind = qp->rq.head & (qp->rq.wqe_cnt - 1); @@ -2027,10 +2482,25 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr, scat = get_recv_wqe(qp, ind); + if (qp->mlx4_ib_qp_type & (MLX4_IB_QPT_PROXY_SMI_OWNER | + MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI)) { + ib_dma_sync_single_for_device(ibqp->device, + qp->sqp_proxy_rcv[ind].map, + sizeof (struct mlx4_ib_proxy_sqp_hdr), + DMA_FROM_DEVICE); + scat->byte_count = + cpu_to_be32(sizeof (struct mlx4_ib_proxy_sqp_hdr)); + /* use dma lkey from upper layer entry */ + scat->lkey = cpu_to_be32(wr->sg_list->lkey); + scat->addr = cpu_to_be64(qp->sqp_proxy_rcv[ind].map); + scat++; + max_gs--; + } + for (i = 0; i < wr->num_sge; ++i) __set_data_seg(scat + i, wr->sg_list + i); - if (i < qp->rq.max_gs) { + if (i < max_gs) { scat[i].byte_count = 0; scat[i].lkey = cpu_to_be32(MLX4_INVALID_LKEY); scat[i].addr = 0; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 6e1b0f973a03..07aa8232e631 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -389,6 +389,7 @@ struct mlx4_caps { enum mlx4_port_type possible_type[MLX4_MAX_PORTS + 1]; u32 max_counters; u8 port_ib_mtu[MLX4_MAX_PORTS + 1]; + u16 sqp_demux; }; struct mlx4_buf_list { diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 338388ba260a..4b4ad6ffef92 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -126,7 +126,8 @@ struct mlx4_rss_context { struct mlx4_qp_path { u8 fl; - u8 reserved1[2]; + u8 reserved1[1]; + u8 disable_pkey_check; u8 pkey_index; u8 counter_index; u8 grh_mylmc; -- cgit v1.2.3 From e2c76824ca16a3e8443cc7b26abcb21af7c27b10 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:41 +0000 Subject: mlx4_core: Add proxy and tunnel QPs to the reserved QP area In addition, pass the proxy and tunnel QP numbers to slaves so the driver can perform special QP paravirtualization. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/fw.c | 14 +++++++++++ drivers/net/ethernet/mellanox/mlx4/fw.h | 3 +++ drivers/net/ethernet/mellanox/mlx4/main.c | 5 ++++ drivers/net/ethernet/mellanox/mlx4/qp.c | 29 +++++++++++++++++++--- .../net/ethernet/mellanox/mlx4/resource_tracker.c | 3 ++- include/linux/mlx4/device.h | 13 +++++++++- 6 files changed, 62 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index c69648487321..35be90178bef 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -184,6 +184,8 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET 0x28 #define QUERY_FUNC_CAP_MAX_EQ_OFFSET 0x2c #define QUERY_FUNC_CAP_RESERVED_EQ_OFFSET 0X30 +#define QUERY_FUNC_CAP_BASE_TUNNEL_QPN_OFFSET 0x44 +#define QUERY_FUNC_CAP_BASE_PROXY_QPN_OFFSET 0x48 #define QUERY_FUNC_CAP_FMR_FLAG 0x80 #define QUERY_FUNC_CAP_FLAG_RDMA 0x40 @@ -247,6 +249,12 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, size = dev->caps.num_mgms + dev->caps.num_amgms; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET); + size = dev->caps.base_tunnel_sqpn + 8 * slave; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_BASE_TUNNEL_QPN_OFFSET); + + size = dev->caps.sqp_start + 8 * slave; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_BASE_PROXY_QPN_OFFSET); + } else err = -EINVAL; @@ -312,6 +320,12 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, struct mlx4_func_cap *func_cap) MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET); func_cap->mcg_quota = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_BASE_TUNNEL_QPN_OFFSET); + func_cap->base_tunnel_qpn = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_BASE_PROXY_QPN_OFFSET); + func_cap->base_proxy_qpn = size & 0xFFFFFF; + for (i = 1; i <= func_cap->num_ports; ++i) { err = mlx4_cmd_box(dev, 0, mailbox->dma, i, 1, MLX4_CMD_QUERY_FUNC_CAP, diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index 83fcbbf1b169..ced1de57c818 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -134,6 +134,9 @@ struct mlx4_func_cap { int max_eq; int reserved_eq; int mcg_quota; + u32 base_qpn; + u32 base_tunnel_qpn; + u32 base_proxy_qpn; u8 physical_port[MLX4_MAX_PORTS + 1]; u8 port_flags[MLX4_MAX_PORTS + 1]; }; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 2f816c6aed72..06ef3afbc49a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -384,6 +384,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_ADDR] + dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH]; + dev->caps.sqp_demux = (mlx4_is_master(dev)) ? MLX4_MAX_NUM_SLAVES : 0; return 0; } /*The function checks if there are live vf, return the num of them*/ @@ -541,6 +542,10 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) return -ENODEV; } + /* Calculate our sqp_start */ + dev->caps.sqp_start = func_cap.base_proxy_qpn; + dev->caps.base_tunnel_sqpn = func_cap.base_tunnel_qpn; + return 0; } diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index fb2b36759cbf..b8da72b29cf7 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -406,7 +406,7 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) * We also reserve the MSB of the 24-bit QP number to indicate * that a QP is an XRC QP. */ - dev->caps.sqp_start = + dev->caps.base_sqpn = ALIGN(dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 8); { @@ -437,13 +437,36 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) } + /* Reserve 8 real SQPs in both native and SRIOV modes. + * In addition, in SRIOV mode, reserve 8 proxy SQPs per function + * (for all PFs and VFs), and 8 corresponding tunnel QPs. + * Each proxy SQP works opposite its own tunnel QP. + * + * The QPs are arranged as follows: + * a. 8 real SQPs + * b. All the proxy SQPs (8 per function) + * c. All the tunnel QPs (8 per function) + */ + err = mlx4_bitmap_init(&qp_table->bitmap, dev->caps.num_qps, - (1 << 23) - 1, dev->caps.sqp_start + 8, + (1 << 23) - 1, dev->caps.base_sqpn + 8 + + 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev), reserved_from_top); + + /* In mfunc, sqp_start is the base of the proxy SQPs, since the PF also + * uses paravirtualized SQPs. + * In native mode, sqp_start is the base of the real SQPs. */ + if (mlx4_is_mfunc(dev)) { + dev->caps.sqp_start = dev->caps.base_sqpn + + 8 * (mlx4_master_func_num(dev) + 1); + dev->caps.base_tunnel_sqpn = dev->caps.sqp_start + 8 * MLX4_MFUNC_MAX; + } else + dev->caps.sqp_start = dev->caps.base_sqpn; + if (err) return err; - return mlx4_CONF_SPECIAL_QP(dev, dev->caps.sqp_start); + return mlx4_CONF_SPECIAL_QP(dev, dev->caps.base_sqpn); } void mlx4_cleanup_qp_table(struct mlx4_dev *dev) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 293c9e820c49..3c57a83e6287 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -1105,7 +1105,8 @@ static void res_end_move(struct mlx4_dev *dev, int slave, static int valid_reserved(struct mlx4_dev *dev, int slave, int qpn) { - return mlx4_is_qp_reserved(dev, qpn); + return mlx4_is_qp_reserved(dev, qpn) && + (mlx4_is_master(dev) || mlx4_is_guest_proxy(dev, slave, qpn)); } static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 07aa8232e631..d5c82b7216de 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -693,7 +693,18 @@ static inline int mlx4_is_master(struct mlx4_dev *dev) static inline int mlx4_is_qp_reserved(struct mlx4_dev *dev, u32 qpn) { - return (qpn < dev->caps.sqp_start + 8); + return (qpn < dev->caps.base_sqpn + 8 + + 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev)); +} + +static inline int mlx4_is_guest_proxy(struct mlx4_dev *dev, int slave, u32 qpn) +{ + int base = dev->caps.sqp_start + slave * 8; + + if (qpn >= base && qpn < base + 8) + return 1; + + return 0; } static inline int mlx4_is_mfunc(struct mlx4_dev *dev) -- cgit v1.2.3 From fc06573dfaf8a33bc0533bb70c49de13fa5232a4 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:42 +0000 Subject: IB/mlx4: Initialize SR-IOV IB support for slaves in master context Allocate SR-IOV paravirtualization resources and MAD demuxing contexts on the master. This has two parts. The first part is to initialize the structures to contain the contexts. This is done at master startup time in mlx4_ib_init_sriov(). The second part is to actually create the tunneling resources required on the master to support a slave. This is performed the master detects that a slave has started up (MLX4_DEV_EVENT_SLAVE_INIT event generated when a slave initializes its comm channel). For the master, there is no such startup event, so it creates its own tunneling resources when it starts up. In addition, the master also creates the real special QPs. The ib_core layer on the master causes creation of proxy special QPs, since the master is also paravirtualized at the ib_core layer. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mad.c | 684 +++++++++++++++++++++++++++++++ drivers/infiniband/hw/mlx4/main.c | 80 +++- drivers/infiniband/hw/mlx4/mlx4_ib.h | 34 ++ drivers/net/ethernet/mellanox/mlx4/cmd.c | 3 + include/linux/mlx4/device.h | 3 +- include/linux/mlx4/driver.h | 2 + 6 files changed, 798 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 9c2ae7efd00f..e98849338a94 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -44,6 +44,35 @@ enum { MLX4_IB_VENDOR_CLASS2 = 0xa }; +#define MLX4_TUN_SEND_WRID_SHIFT 34 +#define MLX4_TUN_QPN_SHIFT 32 +#define MLX4_TUN_WRID_RECV (((u64) 1) << MLX4_TUN_SEND_WRID_SHIFT) +#define MLX4_TUN_SET_WRID_QPN(a) (((u64) ((a) & 0x3)) << MLX4_TUN_QPN_SHIFT) + +#define MLX4_TUN_IS_RECV(a) (((a) >> MLX4_TUN_SEND_WRID_SHIFT) & 0x1) +#define MLX4_TUN_WRID_QPN(a) (((a) >> MLX4_TUN_QPN_SHIFT) & 0x3) + +struct mlx4_mad_rcv_buf { + struct ib_grh grh; + u8 payload[256]; +} __packed; + +struct mlx4_mad_snd_buf { + u8 payload[256]; +} __packed; + +struct mlx4_tunnel_mad { + struct ib_grh grh; + struct mlx4_ib_tunnel_header hdr; + struct ib_mad mad; +} __packed; + +struct mlx4_rcv_tunnel_mad { + struct mlx4_rcv_tunnel_hdr hdr; + struct ib_grh grh; + struct ib_mad mad; +} __packed; + int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey, int port, struct ib_wc *in_wc, struct ib_grh *in_grh, void *in_mad, void *response_mad) @@ -516,3 +545,658 @@ void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num, ib_dispatch_event(&event); } + +static void mlx4_ib_tunnel_comp_handler(struct ib_cq *cq, void *arg) +{ + unsigned long flags; + struct mlx4_ib_demux_pv_ctx *ctx = cq->cq_context; + struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev); + spin_lock_irqsave(&dev->sriov.going_down_lock, flags); + if (!dev->sriov.is_going_down && ctx->state == DEMUX_PV_STATE_ACTIVE) + queue_work(ctx->wq, &ctx->work); + spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); +} + +static int mlx4_ib_post_pv_qp_buf(struct mlx4_ib_demux_pv_ctx *ctx, + struct mlx4_ib_demux_pv_qp *tun_qp, + int index) +{ + struct ib_sge sg_list; + struct ib_recv_wr recv_wr, *bad_recv_wr; + int size; + + size = (tun_qp->qp->qp_type == IB_QPT_UD) ? + sizeof (struct mlx4_tunnel_mad) : sizeof (struct mlx4_mad_rcv_buf); + + sg_list.addr = tun_qp->ring[index].map; + sg_list.length = size; + sg_list.lkey = ctx->mr->lkey; + + recv_wr.next = NULL; + recv_wr.sg_list = &sg_list; + recv_wr.num_sge = 1; + recv_wr.wr_id = (u64) index | MLX4_TUN_WRID_RECV | + MLX4_TUN_SET_WRID_QPN(tun_qp->proxy_qpt); + ib_dma_sync_single_for_device(ctx->ib_dev, tun_qp->ring[index].map, + size, DMA_FROM_DEVICE); + return ib_post_recv(tun_qp->qp, &recv_wr, &bad_recv_wr); +} + +static int mlx4_ib_alloc_pv_bufs(struct mlx4_ib_demux_pv_ctx *ctx, + enum ib_qp_type qp_type, int is_tun) +{ + int i; + struct mlx4_ib_demux_pv_qp *tun_qp; + int rx_buf_size, tx_buf_size; + + if (qp_type > IB_QPT_GSI) + return -EINVAL; + + tun_qp = &ctx->qp[qp_type]; + + tun_qp->ring = kzalloc(sizeof (struct mlx4_ib_buf) * MLX4_NUM_TUNNEL_BUFS, + GFP_KERNEL); + if (!tun_qp->ring) + return -ENOMEM; + + tun_qp->tx_ring = kcalloc(MLX4_NUM_TUNNEL_BUFS, + sizeof (struct mlx4_ib_tun_tx_buf), + GFP_KERNEL); + if (!tun_qp->tx_ring) { + kfree(tun_qp->ring); + tun_qp->ring = NULL; + return -ENOMEM; + } + + if (is_tun) { + rx_buf_size = sizeof (struct mlx4_tunnel_mad); + tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad); + } else { + rx_buf_size = sizeof (struct mlx4_mad_rcv_buf); + tx_buf_size = sizeof (struct mlx4_mad_snd_buf); + } + + for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { + tun_qp->ring[i].addr = kmalloc(rx_buf_size, GFP_KERNEL); + if (!tun_qp->ring[i].addr) + goto err; + tun_qp->ring[i].map = ib_dma_map_single(ctx->ib_dev, + tun_qp->ring[i].addr, + rx_buf_size, + DMA_FROM_DEVICE); + } + + for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { + tun_qp->tx_ring[i].buf.addr = + kmalloc(tx_buf_size, GFP_KERNEL); + if (!tun_qp->tx_ring[i].buf.addr) + goto tx_err; + tun_qp->tx_ring[i].buf.map = + ib_dma_map_single(ctx->ib_dev, + tun_qp->tx_ring[i].buf.addr, + tx_buf_size, + DMA_TO_DEVICE); + tun_qp->tx_ring[i].ah = NULL; + } + spin_lock_init(&tun_qp->tx_lock); + tun_qp->tx_ix_head = 0; + tun_qp->tx_ix_tail = 0; + tun_qp->proxy_qpt = qp_type; + + return 0; + +tx_err: + while (i > 0) { + --i; + ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map, + tx_buf_size, DMA_TO_DEVICE); + kfree(tun_qp->tx_ring[i].buf.addr); + } + kfree(tun_qp->tx_ring); + tun_qp->tx_ring = NULL; + i = MLX4_NUM_TUNNEL_BUFS; +err: + while (i > 0) { + --i; + ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map, + rx_buf_size, DMA_FROM_DEVICE); + kfree(tun_qp->ring[i].addr); + } + kfree(tun_qp->ring); + tun_qp->ring = NULL; + return -ENOMEM; +} + +static void mlx4_ib_free_pv_qp_bufs(struct mlx4_ib_demux_pv_ctx *ctx, + enum ib_qp_type qp_type, int is_tun) +{ + int i; + struct mlx4_ib_demux_pv_qp *tun_qp; + int rx_buf_size, tx_buf_size; + + if (qp_type > IB_QPT_GSI) + return; + + tun_qp = &ctx->qp[qp_type]; + if (is_tun) { + rx_buf_size = sizeof (struct mlx4_tunnel_mad); + tx_buf_size = sizeof (struct mlx4_rcv_tunnel_mad); + } else { + rx_buf_size = sizeof (struct mlx4_mad_rcv_buf); + tx_buf_size = sizeof (struct mlx4_mad_snd_buf); + } + + + for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { + ib_dma_unmap_single(ctx->ib_dev, tun_qp->ring[i].map, + rx_buf_size, DMA_FROM_DEVICE); + kfree(tun_qp->ring[i].addr); + } + + for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { + ib_dma_unmap_single(ctx->ib_dev, tun_qp->tx_ring[i].buf.map, + tx_buf_size, DMA_TO_DEVICE); + kfree(tun_qp->tx_ring[i].buf.addr); + if (tun_qp->tx_ring[i].ah) + ib_destroy_ah(tun_qp->tx_ring[i].ah); + } + kfree(tun_qp->tx_ring); + kfree(tun_qp->ring); +} + +static void mlx4_ib_tunnel_comp_worker(struct work_struct *work) +{ + /* dummy until next patch in series */ +} + +static void pv_qp_event_handler(struct ib_event *event, void *qp_context) +{ + struct mlx4_ib_demux_pv_ctx *sqp = qp_context; + + /* It's worse than that! He's dead, Jim! */ + pr_err("Fatal error (%d) on a MAD QP on port %d\n", + event->event, sqp->port); +} + +static int create_pv_sqp(struct mlx4_ib_demux_pv_ctx *ctx, + enum ib_qp_type qp_type, int create_tun) +{ + int i, ret; + struct mlx4_ib_demux_pv_qp *tun_qp; + struct mlx4_ib_qp_tunnel_init_attr qp_init_attr; + struct ib_qp_attr attr; + int qp_attr_mask_INIT; + + if (qp_type > IB_QPT_GSI) + return -EINVAL; + + tun_qp = &ctx->qp[qp_type]; + + memset(&qp_init_attr, 0, sizeof qp_init_attr); + qp_init_attr.init_attr.send_cq = ctx->cq; + qp_init_attr.init_attr.recv_cq = ctx->cq; + qp_init_attr.init_attr.sq_sig_type = IB_SIGNAL_ALL_WR; + qp_init_attr.init_attr.cap.max_send_wr = MLX4_NUM_TUNNEL_BUFS; + qp_init_attr.init_attr.cap.max_recv_wr = MLX4_NUM_TUNNEL_BUFS; + qp_init_attr.init_attr.cap.max_send_sge = 1; + qp_init_attr.init_attr.cap.max_recv_sge = 1; + if (create_tun) { + qp_init_attr.init_attr.qp_type = IB_QPT_UD; + qp_init_attr.init_attr.create_flags = MLX4_IB_SRIOV_TUNNEL_QP; + qp_init_attr.port = ctx->port; + qp_init_attr.slave = ctx->slave; + qp_init_attr.proxy_qp_type = qp_type; + qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX | + IB_QP_QKEY | IB_QP_PORT; + } else { + qp_init_attr.init_attr.qp_type = qp_type; + qp_init_attr.init_attr.create_flags = MLX4_IB_SRIOV_SQP; + qp_attr_mask_INIT = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_QKEY; + } + qp_init_attr.init_attr.port_num = ctx->port; + qp_init_attr.init_attr.qp_context = ctx; + qp_init_attr.init_attr.event_handler = pv_qp_event_handler; + tun_qp->qp = ib_create_qp(ctx->pd, &qp_init_attr.init_attr); + if (IS_ERR(tun_qp->qp)) { + ret = PTR_ERR(tun_qp->qp); + tun_qp->qp = NULL; + pr_err("Couldn't create %s QP (%d)\n", + create_tun ? "tunnel" : "special", ret); + return ret; + } + + memset(&attr, 0, sizeof attr); + attr.qp_state = IB_QPS_INIT; + attr.pkey_index = + to_mdev(ctx->ib_dev)->pkeys.virt2phys_pkey[ctx->slave][ctx->port - 1][0]; + attr.qkey = IB_QP1_QKEY; + attr.port_num = ctx->port; + ret = ib_modify_qp(tun_qp->qp, &attr, qp_attr_mask_INIT); + if (ret) { + pr_err("Couldn't change %s qp state to INIT (%d)\n", + create_tun ? "tunnel" : "special", ret); + goto err_qp; + } + attr.qp_state = IB_QPS_RTR; + ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE); + if (ret) { + pr_err("Couldn't change %s qp state to RTR (%d)\n", + create_tun ? "tunnel" : "special", ret); + goto err_qp; + } + attr.qp_state = IB_QPS_RTS; + attr.sq_psn = 0; + ret = ib_modify_qp(tun_qp->qp, &attr, IB_QP_STATE | IB_QP_SQ_PSN); + if (ret) { + pr_err("Couldn't change %s qp state to RTS (%d)\n", + create_tun ? "tunnel" : "special", ret); + goto err_qp; + } + + for (i = 0; i < MLX4_NUM_TUNNEL_BUFS; i++) { + ret = mlx4_ib_post_pv_qp_buf(ctx, tun_qp, i); + if (ret) { + pr_err(" mlx4_ib_post_pv_buf error" + " (err = %d, i = %d)\n", ret, i); + goto err_qp; + } + } + return 0; + +err_qp: + ib_destroy_qp(tun_qp->qp); + tun_qp->qp = NULL; + return ret; +} + +/* + * IB MAD completion callback for real SQPs + */ +static void mlx4_ib_sqp_comp_worker(struct work_struct *work) +{ + /* dummy until next patch in series */ +} + +static int alloc_pv_object(struct mlx4_ib_dev *dev, int slave, int port, + struct mlx4_ib_demux_pv_ctx **ret_ctx) +{ + struct mlx4_ib_demux_pv_ctx *ctx; + + *ret_ctx = NULL; + ctx = kzalloc(sizeof (struct mlx4_ib_demux_pv_ctx), GFP_KERNEL); + if (!ctx) { + pr_err("failed allocating pv resource context " + "for port %d, slave %d\n", port, slave); + return -ENOMEM; + } + + ctx->ib_dev = &dev->ib_dev; + ctx->port = port; + ctx->slave = slave; + *ret_ctx = ctx; + return 0; +} + +static void free_pv_object(struct mlx4_ib_dev *dev, int slave, int port) +{ + if (dev->sriov.demux[port - 1].tun[slave]) { + kfree(dev->sriov.demux[port - 1].tun[slave]); + dev->sriov.demux[port - 1].tun[slave] = NULL; + } +} + +static int create_pv_resources(struct ib_device *ibdev, int slave, int port, + int create_tun, struct mlx4_ib_demux_pv_ctx *ctx) +{ + int ret, cq_size; + + ctx->state = DEMUX_PV_STATE_STARTING; + /* have QP0 only on port owner, and only if link layer is IB */ + if (ctx->slave == mlx4_master_func_num(to_mdev(ctx->ib_dev)->dev) && + rdma_port_get_link_layer(ibdev, ctx->port) == IB_LINK_LAYER_INFINIBAND) + ctx->has_smi = 1; + + if (ctx->has_smi) { + ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_SMI, create_tun); + if (ret) { + pr_err("Failed allocating qp0 tunnel bufs (%d)\n", ret); + goto err_out; + } + } + + ret = mlx4_ib_alloc_pv_bufs(ctx, IB_QPT_GSI, create_tun); + if (ret) { + pr_err("Failed allocating qp1 tunnel bufs (%d)\n", ret); + goto err_out_qp0; + } + + cq_size = 2 * MLX4_NUM_TUNNEL_BUFS; + if (ctx->has_smi) + cq_size *= 2; + + ctx->cq = ib_create_cq(ctx->ib_dev, mlx4_ib_tunnel_comp_handler, + NULL, ctx, cq_size, 0); + if (IS_ERR(ctx->cq)) { + ret = PTR_ERR(ctx->cq); + pr_err("Couldn't create tunnel CQ (%d)\n", ret); + goto err_buf; + } + + ctx->pd = ib_alloc_pd(ctx->ib_dev); + if (IS_ERR(ctx->pd)) { + ret = PTR_ERR(ctx->pd); + pr_err("Couldn't create tunnel PD (%d)\n", ret); + goto err_cq; + } + + ctx->mr = ib_get_dma_mr(ctx->pd, IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(ctx->mr)) { + ret = PTR_ERR(ctx->mr); + pr_err("Couldn't get tunnel DMA MR (%d)\n", ret); + goto err_pd; + } + + if (ctx->has_smi) { + ret = create_pv_sqp(ctx, IB_QPT_SMI, create_tun); + if (ret) { + pr_err("Couldn't create %s QP0 (%d)\n", + create_tun ? "tunnel for" : "", ret); + goto err_mr; + } + } + + ret = create_pv_sqp(ctx, IB_QPT_GSI, create_tun); + if (ret) { + pr_err("Couldn't create %s QP1 (%d)\n", + create_tun ? "tunnel for" : "", ret); + goto err_qp0; + } + + if (create_tun) + INIT_WORK(&ctx->work, mlx4_ib_tunnel_comp_worker); + else + INIT_WORK(&ctx->work, mlx4_ib_sqp_comp_worker); + + ctx->wq = to_mdev(ibdev)->sriov.demux[port - 1].wq; + + ret = ib_req_notify_cq(ctx->cq, IB_CQ_NEXT_COMP); + if (ret) { + pr_err("Couldn't arm tunnel cq (%d)\n", ret); + goto err_wq; + } + ctx->state = DEMUX_PV_STATE_ACTIVE; + return 0; + +err_wq: + ctx->wq = NULL; + ib_destroy_qp(ctx->qp[1].qp); + ctx->qp[1].qp = NULL; + + +err_qp0: + if (ctx->has_smi) + ib_destroy_qp(ctx->qp[0].qp); + ctx->qp[0].qp = NULL; + +err_mr: + ib_dereg_mr(ctx->mr); + ctx->mr = NULL; + +err_pd: + ib_dealloc_pd(ctx->pd); + ctx->pd = NULL; + +err_cq: + ib_destroy_cq(ctx->cq); + ctx->cq = NULL; + +err_buf: + mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, create_tun); + +err_out_qp0: + if (ctx->has_smi) + mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, create_tun); +err_out: + ctx->state = DEMUX_PV_STATE_DOWN; + return ret; +} + +static void destroy_pv_resources(struct mlx4_ib_dev *dev, int slave, int port, + struct mlx4_ib_demux_pv_ctx *ctx, int flush) +{ + if (!ctx) + return; + if (ctx->state > DEMUX_PV_STATE_DOWN) { + ctx->state = DEMUX_PV_STATE_DOWNING; + if (flush) + flush_workqueue(ctx->wq); + if (ctx->has_smi) { + ib_destroy_qp(ctx->qp[0].qp); + ctx->qp[0].qp = NULL; + mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_SMI, 1); + } + ib_destroy_qp(ctx->qp[1].qp); + ctx->qp[1].qp = NULL; + mlx4_ib_free_pv_qp_bufs(ctx, IB_QPT_GSI, 1); + ib_dereg_mr(ctx->mr); + ctx->mr = NULL; + ib_dealloc_pd(ctx->pd); + ctx->pd = NULL; + ib_destroy_cq(ctx->cq); + ctx->cq = NULL; + ctx->state = DEMUX_PV_STATE_DOWN; + } +} + +static int mlx4_ib_tunnels_update(struct mlx4_ib_dev *dev, int slave, + int port, int do_init) +{ + int ret = 0; + + if (!do_init) { + /* for master, destroy real sqp resources */ + if (slave == mlx4_master_func_num(dev->dev)) + destroy_pv_resources(dev, slave, port, + dev->sriov.sqps[port - 1], 1); + /* destroy the tunnel qp resources */ + destroy_pv_resources(dev, slave, port, + dev->sriov.demux[port - 1].tun[slave], 1); + return 0; + } + + /* create the tunnel qp resources */ + ret = create_pv_resources(&dev->ib_dev, slave, port, 1, + dev->sriov.demux[port - 1].tun[slave]); + + /* for master, create the real sqp resources */ + if (!ret && slave == mlx4_master_func_num(dev->dev)) + ret = create_pv_resources(&dev->ib_dev, slave, port, 0, + dev->sriov.sqps[port - 1]); + return ret; +} + +void mlx4_ib_tunnels_update_work(struct work_struct *work) +{ + struct mlx4_ib_demux_work *dmxw; + + dmxw = container_of(work, struct mlx4_ib_demux_work, work); + mlx4_ib_tunnels_update(dmxw->dev, dmxw->slave, (int) dmxw->port, + dmxw->do_init); + kfree(dmxw); + return; +} + +static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev, + struct mlx4_ib_demux_ctx *ctx, + int port) +{ + char name[12]; + int ret = 0; + int i; + + ctx->tun = kcalloc(dev->dev->caps.sqp_demux, + sizeof (struct mlx4_ib_demux_pv_ctx *), GFP_KERNEL); + if (!ctx->tun) + return -ENOMEM; + + ctx->dev = dev; + ctx->port = port; + ctx->ib_dev = &dev->ib_dev; + + for (i = 0; i < dev->dev->caps.sqp_demux; i++) { + ret = alloc_pv_object(dev, i, port, &ctx->tun[i]); + if (ret) { + ret = -ENOMEM; + goto err_wq; + } + } + + snprintf(name, sizeof name, "mlx4_ibt%d", port); + ctx->wq = create_singlethread_workqueue(name); + if (!ctx->wq) { + pr_err("Failed to create tunnelling WQ for port %d\n", port); + ret = -ENOMEM; + goto err_wq; + } + + snprintf(name, sizeof name, "mlx4_ibud%d", port); + ctx->ud_wq = create_singlethread_workqueue(name); + if (!ctx->ud_wq) { + pr_err("Failed to create up/down WQ for port %d\n", port); + ret = -ENOMEM; + goto err_udwq; + } + + return 0; + +err_udwq: + destroy_workqueue(ctx->wq); + ctx->wq = NULL; + +err_wq: + for (i = 0; i < dev->dev->caps.sqp_demux; i++) + free_pv_object(dev, i, port); + kfree(ctx->tun); + ctx->tun = NULL; + return ret; +} + +static void mlx4_ib_free_sqp_ctx(struct mlx4_ib_demux_pv_ctx *sqp_ctx) +{ + if (sqp_ctx->state > DEMUX_PV_STATE_DOWN) { + sqp_ctx->state = DEMUX_PV_STATE_DOWNING; + flush_workqueue(sqp_ctx->wq); + if (sqp_ctx->has_smi) { + ib_destroy_qp(sqp_ctx->qp[0].qp); + sqp_ctx->qp[0].qp = NULL; + mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_SMI, 0); + } + ib_destroy_qp(sqp_ctx->qp[1].qp); + sqp_ctx->qp[1].qp = NULL; + mlx4_ib_free_pv_qp_bufs(sqp_ctx, IB_QPT_GSI, 0); + ib_dereg_mr(sqp_ctx->mr); + sqp_ctx->mr = NULL; + ib_dealloc_pd(sqp_ctx->pd); + sqp_ctx->pd = NULL; + ib_destroy_cq(sqp_ctx->cq); + sqp_ctx->cq = NULL; + sqp_ctx->state = DEMUX_PV_STATE_DOWN; + } +} + +static void mlx4_ib_free_demux_ctx(struct mlx4_ib_demux_ctx *ctx) +{ + int i; + if (ctx) { + struct mlx4_ib_dev *dev = to_mdev(ctx->ib_dev); + for (i = 0; i < dev->dev->caps.sqp_demux; i++) { + if (!ctx->tun[i]) + continue; + if (ctx->tun[i]->state > DEMUX_PV_STATE_DOWN) + ctx->tun[i]->state = DEMUX_PV_STATE_DOWNING; + } + flush_workqueue(ctx->wq); + for (i = 0; i < dev->dev->caps.sqp_demux; i++) { + destroy_pv_resources(dev, i, ctx->port, ctx->tun[i], 0); + free_pv_object(dev, i, ctx->port); + } + kfree(ctx->tun); + destroy_workqueue(ctx->ud_wq); + destroy_workqueue(ctx->wq); + } +} + +static void mlx4_ib_master_tunnels(struct mlx4_ib_dev *dev, int do_init) +{ + int i; + + if (!mlx4_is_master(dev->dev)) + return; + /* initialize or tear down tunnel QPs for the master */ + for (i = 0; i < dev->dev->caps.num_ports; i++) + mlx4_ib_tunnels_update(dev, mlx4_master_func_num(dev->dev), i + 1, do_init); + return; +} + +int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev) +{ + int i = 0; + int err; + + if (!mlx4_is_mfunc(dev->dev)) + return 0; + + dev->sriov.is_going_down = 0; + spin_lock_init(&dev->sriov.going_down_lock); + + mlx4_ib_warn(&dev->ib_dev, "multi-function enabled\n"); + + if (mlx4_is_slave(dev->dev)) { + mlx4_ib_warn(&dev->ib_dev, "operating in qp1 tunnel mode\n"); + return 0; + } + + mlx4_ib_warn(&dev->ib_dev, "initializing demux service for %d qp1 clients\n", + dev->dev->caps.sqp_demux); + for (i = 0; i < dev->num_ports; i++) { + err = alloc_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1, + &dev->sriov.sqps[i]); + if (err) + goto demux_err; + err = mlx4_ib_alloc_demux_ctx(dev, &dev->sriov.demux[i], i + 1); + if (err) + goto demux_err; + } + mlx4_ib_master_tunnels(dev, 1); + return 0; + +demux_err: + while (i > 0) { + free_pv_object(dev, mlx4_master_func_num(dev->dev), i + 1); + mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]); + --i; + } + + return err; +} + +void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev) +{ + int i; + unsigned long flags; + + if (!mlx4_is_mfunc(dev->dev)) + return; + + spin_lock_irqsave(&dev->sriov.going_down_lock, flags); + dev->sriov.is_going_down = 1; + spin_unlock_irqrestore(&dev->sriov.going_down_lock, flags); + if (mlx4_is_master(dev->dev)) + for (i = 0; i < dev->num_ports; i++) { + flush_workqueue(dev->sriov.demux[i].ud_wq); + mlx4_ib_free_sqp_ctx(dev->sriov.sqps[i]); + kfree(dev->sriov.sqps[i]); + dev->sriov.sqps[i] = NULL; + mlx4_ib_free_demux_ctx(&dev->sriov.demux[i]); + } +} diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index cc05579ebce7..3f7f77f93a1c 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1357,11 +1357,14 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) if (mlx4_ib_mad_init(ibdev)) goto err_reg; + if (mlx4_ib_init_sriov(ibdev)) + goto err_mad; + if (dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE && !iboe->nb.notifier_call) { iboe->nb.notifier_call = mlx4_ib_netdev_event; err = register_netdevice_notifier(&iboe->nb); if (err) - goto err_reg; + goto err_sriov; } for (j = 0; j < ARRAY_SIZE(mlx4_class_attributes); ++j) { @@ -1379,6 +1382,12 @@ err_notif: pr_warn("failure unregistering notifier\n"); flush_workqueue(wq); +err_sriov: + mlx4_ib_close_sriov(ibdev); + +err_mad: + mlx4_ib_mad_cleanup(ibdev); + err_reg: ib_unregister_device(&ibdev->ib_dev); @@ -1407,6 +1416,7 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) struct mlx4_ib_dev *ibdev = ibdev_ptr; int p; + mlx4_ib_close_sriov(ibdev); mlx4_ib_mad_cleanup(ibdev); ib_unregister_device(&ibdev->ib_dev); if (ibdev->iboe.nb.notifier_call) { @@ -1428,6 +1438,51 @@ static void mlx4_ib_remove(struct mlx4_dev *dev, void *ibdev_ptr) ib_dealloc_device(&ibdev->ib_dev); } +static void do_slave_init(struct mlx4_ib_dev *ibdev, int slave, int do_init) +{ + struct mlx4_ib_demux_work **dm = NULL; + struct mlx4_dev *dev = ibdev->dev; + int i; + unsigned long flags; + + if (!mlx4_is_master(dev)) + return; + + dm = kcalloc(dev->caps.num_ports, sizeof *dm, GFP_ATOMIC); + if (!dm) { + pr_err("failed to allocate memory for tunneling qp update\n"); + goto out; + } + + for (i = 0; i < dev->caps.num_ports; i++) { + dm[i] = kmalloc(sizeof (struct mlx4_ib_demux_work), GFP_ATOMIC); + if (!dm[i]) { + pr_err("failed to allocate memory for tunneling qp update work struct\n"); + for (i = 0; i < dev->caps.num_ports; i++) { + if (dm[i]) + kfree(dm[i]); + } + goto out; + } + } + /* initialize or tear down tunnel QPs for the slave */ + for (i = 0; i < dev->caps.num_ports; i++) { + INIT_WORK(&dm[i]->work, mlx4_ib_tunnels_update_work); + dm[i]->port = i + 1; + dm[i]->slave = slave; + dm[i]->do_init = do_init; + dm[i]->dev = ibdev; + spin_lock_irqsave(&ibdev->sriov.going_down_lock, flags); + if (!ibdev->sriov.is_going_down) + queue_work(ibdev->sriov.demux[i].ud_wq, &dm[i]->work); + spin_unlock_irqrestore(&ibdev->sriov.going_down_lock, flags); + } +out: + if (dm) + kfree(dm); + return; +} + static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, enum mlx4_dev_event event, unsigned long param) { @@ -1435,22 +1490,23 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, struct mlx4_ib_dev *ibdev = to_mdev((struct ib_device *) ibdev_ptr); struct mlx4_eqe *eqe = NULL; struct ib_event_work *ew; - int port = 0; + int p = 0; if (event == MLX4_DEV_EVENT_PORT_MGMT_CHANGE) eqe = (struct mlx4_eqe *)param; else - port = (u8)param; - - if (port > ibdev->num_ports) - return; + p = (int) param; switch (event) { case MLX4_DEV_EVENT_PORT_UP: + if (p > ibdev->num_ports) + return; ibev.event = IB_EVENT_PORT_ACTIVE; break; case MLX4_DEV_EVENT_PORT_DOWN: + if (p > ibdev->num_ports) + return; ibev.event = IB_EVENT_PORT_ERR; break; @@ -1472,12 +1528,22 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr, handle_port_mgmt_change_event(&ew->work); return; + case MLX4_DEV_EVENT_SLAVE_INIT: + /* here, p is the slave id */ + do_slave_init(ibdev, p, 1); + return; + + case MLX4_DEV_EVENT_SLAVE_SHUTDOWN: + /* here, p is the slave id */ + do_slave_init(ibdev, p, 0); + return; + default: return; } ibev.device = ibdev_ptr; - ibev.element.port_num = port; + ibev.element.port_num = (u8) p; ib_dispatch_event(&ibev); } diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 1248d576b031..137941d79870 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -176,6 +176,10 @@ enum mlx4_ib_qp_type { MLX4_IB_QPT_PROXY_SMI | MLX4_IB_QPT_PROXY_GSI | MLX4_IB_QPT_TUN_SMI_OWNER | \ MLX4_IB_QPT_TUN_SMI | MLX4_IB_QPT_TUN_GSI) +enum { + MLX4_NUM_TUNNEL_BUFS = 256, +}; + struct mlx4_ib_tunnel_header { struct mlx4_av av; __be32 remote_qpn; @@ -263,6 +267,15 @@ struct mlx4_ib_ah { union mlx4_ext_av av; }; +struct mlx4_ib_demux_work { + struct work_struct work; + struct mlx4_ib_dev *dev; + int slave; + int do_init; + u8 port; + +}; + struct mlx4_ib_tun_tx_buf { struct mlx4_ib_buf buf; struct ib_ah *ah; @@ -278,9 +291,17 @@ struct mlx4_ib_demux_pv_qp { unsigned tx_ix_tail; }; +enum mlx4_ib_demux_pv_state { + DEMUX_PV_STATE_DOWN, + DEMUX_PV_STATE_STARTING, + DEMUX_PV_STATE_ACTIVE, + DEMUX_PV_STATE_DOWNING, +}; + struct mlx4_ib_demux_pv_ctx { int port; int slave; + enum mlx4_ib_demux_pv_state state; int has_smi; struct ib_device *ib_dev; struct ib_cq *cq; @@ -319,6 +340,13 @@ struct mlx4_ib_iboe { union ib_gid gid_table[MLX4_MAX_PORTS][128]; }; +struct pkey_mgt { + u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS]; + u16 phys_pkey_cache[MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS]; + struct list_head pkey_port_list[MLX4_MFUNC_MAX]; + struct kobject *device_parent[MLX4_MFUNC_MAX]; +}; + struct mlx4_ib_dev { struct ib_device ib_dev; struct mlx4_dev *dev; @@ -340,6 +368,7 @@ struct mlx4_ib_dev { int counters[MLX4_MAX_PORTS]; int *eq_table; int eq_added; + struct pkey_mgt pkeys; }; struct ib_event_work { @@ -424,6 +453,9 @@ static inline struct mlx4_ib_ah *to_mah(struct ib_ah *ibah) return container_of(ibah, struct mlx4_ib_ah, ibah); } +int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev); +void mlx4_ib_close_sriov(struct mlx4_ib_dev *dev); + int mlx4_ib_db_map_user(struct mlx4_ib_ucontext *context, unsigned long virt, struct mlx4_db *db); void mlx4_ib_db_unmap_user(struct mlx4_ib_ucontext *context, struct mlx4_db *db); @@ -515,4 +547,6 @@ int mlx4_ib_add_mc(struct mlx4_ib_dev *mdev, struct mlx4_ib_qp *mqp, void mlx4_ib_dispatch_event(struct mlx4_ib_dev *dev, u8 port_num, enum ib_event_type type); +void mlx4_ib_tunnels_update_work(struct work_struct *work); + #endif /* MLX4_IB_H */ diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index c8fef4353021..cb9bebe28276 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -1340,6 +1340,8 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, if (MLX4_COMM_CMD_FLR == slave_state[slave].last_cmd) goto inform_slave_state; + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_SHUTDOWN, slave); + /* write the version in the event field */ reply |= mlx4_comm_get_version(); @@ -1376,6 +1378,7 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, goto reset_slave; slave_state[slave].vhcr_dma |= param; slave_state[slave].active = true; + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_SLAVE_INIT, slave); break; case MLX4_COMM_CMD_VHCR_POST: if ((slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_EN) && diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index d5c82b7216de..b6b8d341b6c8 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -54,7 +54,8 @@ enum { }; enum { - MLX4_MAX_PORTS = 2 + MLX4_MAX_PORTS = 2, + MLX4_MAX_PORT_PKEYS = 128 }; /* base qkey for use in sriov tunnel-qp/proxy-qp communication. diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h index d813704b963b..c257e1b211be 100644 --- a/include/linux/mlx4/driver.h +++ b/include/linux/mlx4/driver.h @@ -45,6 +45,8 @@ enum mlx4_dev_event { MLX4_DEV_EVENT_PORT_DOWN, MLX4_DEV_EVENT_PORT_REINIT, MLX4_DEV_EVENT_PORT_MGMT_CHANGE, + MLX4_DEV_EVENT_SLAVE_INIT, + MLX4_DEV_EVENT_SLAVE_SHUTDOWN, }; struct mlx4_interface { -- cgit v1.2.3 From 54679e148287f0ca1bdd09264c908bacb9f19b3f Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:43 +0000 Subject: mlx4: Implement QP paravirtualization and maintain phys_pkey_cache for smp_snoop This requires: 1. Replacing the paravirtualized P_Key index (inserted by the guest) with the real P_Key index. 2. For UD QPs, placing the guest's true source GID index in the address path structure mgid field, and setting the ud_force_mgid bit so that the mgid is taken from the QP context and not from the WQE when posting sends. 3. For UC and RC QPs, placing the guest's true source GID index in the address path structure mgid field. 4. For tunnel and proxy QPs, setting the Q_Key value reserved for that proxy/tunnel pair. Since not all the above adjustments occur in all the QP transitions, the QP transitions require separate wrapper functions. Secondly, initialize the P_Key virtualization table to its default values: Master virtualized table is 1-1 with the real P_Key table, guest virtualized table has P_Key index 0 mapped to the real P_Key index 0, and all the other P_Key indices mapped to the reserved (invalid) P_Key at index 127. Finally, add logic in smp_snoop for maintaining the phys_P_Key_cache. and generating events on the master only if a P_Key actually changed. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mad.c | 33 +++- drivers/infiniband/hw/mlx4/main.c | 35 ++++ drivers/net/ethernet/mellanox/mlx4/cmd.c | 12 +- drivers/net/ethernet/mellanox/mlx4/main.c | 11 ++ drivers/net/ethernet/mellanox/mlx4/mlx4.h | 47 +++++ .../net/ethernet/mellanox/mlx4/resource_tracker.c | 217 ++++++++++++++++++++- include/linux/mlx4/device.h | 3 + 7 files changed, 344 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index e98849338a94..318d5bcf821b 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -185,6 +185,10 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad, { struct ib_port_info *pinfo; u16 lid; + __be16 *base; + u32 bn, pkey_change_bitmap; + int i; + struct mlx4_ib_dev *dev = to_mdev(ibdev); if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED || @@ -209,8 +213,33 @@ static void smp_snoop(struct ib_device *ibdev, u8 port_num, struct ib_mad *mad, break; case IB_SMP_ATTR_PKEY_TABLE: - mlx4_ib_dispatch_event(dev, port_num, - IB_EVENT_PKEY_CHANGE); + if (!mlx4_is_mfunc(dev->dev)) { + mlx4_ib_dispatch_event(dev, port_num, + IB_EVENT_PKEY_CHANGE); + break; + } + + bn = be32_to_cpu(((struct ib_smp *)mad)->attr_mod) & 0xFFFF; + base = (__be16 *) &(((struct ib_smp *)mad)->data[0]); + pkey_change_bitmap = 0; + for (i = 0; i < 32; i++) { + pr_debug("PKEY[%d] = x%x\n", + i + bn*32, be16_to_cpu(base[i])); + if (be16_to_cpu(base[i]) != + dev->pkeys.phys_pkey_cache[port_num - 1][i + bn*32]) { + pkey_change_bitmap |= (1 << i); + dev->pkeys.phys_pkey_cache[port_num - 1][i + bn*32] = + be16_to_cpu(base[i]); + } + } + pr_debug("PKEY Change event: port=%d, " + "block=0x%x, change_bitmap=0x%x\n", + port_num, bn, pkey_change_bitmap); + + if (pkey_change_bitmap) + mlx4_ib_dispatch_event(dev, port_num, + IB_EVENT_PKEY_CHANGE); + break; case IB_SMP_ATTR_GUID_INFO: diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 3f7f77f93a1c..8e10ec2af7b6 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1121,6 +1121,38 @@ static int mlx4_ib_netdev_event(struct notifier_block *this, unsigned long event return NOTIFY_DONE; } +static void init_pkeys(struct mlx4_ib_dev *ibdev) +{ + int port; + int slave; + int i; + + if (mlx4_is_master(ibdev->dev)) { + for (slave = 0; slave <= ibdev->dev->num_vfs; ++slave) { + for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) { + for (i = 0; + i < ibdev->dev->phys_caps.pkey_phys_table_len[port]; + ++i) { + ibdev->pkeys.virt2phys_pkey[slave][port - 1][i] = + /* master has the identity virt2phys pkey mapping */ + (slave == mlx4_master_func_num(ibdev->dev) || !i) ? i : + ibdev->dev->phys_caps.pkey_phys_table_len[port] - 1; + mlx4_sync_pkey_table(ibdev->dev, slave, port, i, + ibdev->pkeys.virt2phys_pkey[slave][port - 1][i]); + } + } + } + /* initialize pkey cache */ + for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) { + for (i = 0; + i < ibdev->dev->phys_caps.pkey_phys_table_len[port]; + ++i) + ibdev->pkeys.phys_pkey_cache[port-1][i] = + (i) ? 0 : 0xFFFF; + } + } +} + static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev) { char name[32]; @@ -1375,6 +1407,9 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) ibdev->ib_active = true; + if (mlx4_is_mfunc(ibdev->dev)) + init_pkeys(ibdev); + return ibdev; err_notif: diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index cb9bebe28276..662a3c5da739 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -950,7 +950,7 @@ static struct mlx4_cmd_info cmd_info[] = { .out_is_imm = false, .encode_slave_id = false, .verify = NULL, - .wrapper = mlx4_GEN_QP_wrapper + .wrapper = mlx4_INIT2INIT_QP_wrapper }, { .opcode = MLX4_CMD_INIT2RTR_QP, @@ -968,7 +968,7 @@ static struct mlx4_cmd_info cmd_info[] = { .out_is_imm = false, .encode_slave_id = false, .verify = NULL, - .wrapper = mlx4_GEN_QP_wrapper + .wrapper = mlx4_RTR2RTS_QP_wrapper }, { .opcode = MLX4_CMD_RTS2RTS_QP, @@ -977,7 +977,7 @@ static struct mlx4_cmd_info cmd_info[] = { .out_is_imm = false, .encode_slave_id = false, .verify = NULL, - .wrapper = mlx4_GEN_QP_wrapper + .wrapper = mlx4_RTS2RTS_QP_wrapper }, { .opcode = MLX4_CMD_SQERR2RTS_QP, @@ -986,7 +986,7 @@ static struct mlx4_cmd_info cmd_info[] = { .out_is_imm = false, .encode_slave_id = false, .verify = NULL, - .wrapper = mlx4_GEN_QP_wrapper + .wrapper = mlx4_SQERR2RTS_QP_wrapper }, { .opcode = MLX4_CMD_2ERR_QP, @@ -1013,7 +1013,7 @@ static struct mlx4_cmd_info cmd_info[] = { .out_is_imm = false, .encode_slave_id = false, .verify = NULL, - .wrapper = mlx4_GEN_QP_wrapper + .wrapper = mlx4_SQD2SQD_QP_wrapper }, { .opcode = MLX4_CMD_SQD2RTS_QP, @@ -1022,7 +1022,7 @@ static struct mlx4_cmd_info cmd_info[] = { .out_is_imm = false, .encode_slave_id = false, .verify = NULL, - .wrapper = mlx4_GEN_QP_wrapper + .wrapper = mlx4_SQD2RTS_QP_wrapper }, { .opcode = MLX4_CMD_2RST_QP, diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 06ef3afbc49a..2294b7173180 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -424,6 +424,17 @@ int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey) } EXPORT_SYMBOL(mlx4_get_parav_qkey); +void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int val) +{ + struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); + + if (!mlx4_is_master(dev)) + return; + + priv->virt2phys_pkey[slave][port - 1][i] = val; +} +EXPORT_SYMBOL(mlx4_sync_pkey_table); + int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index dba69d98734a..7d27c3158d0c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -807,6 +807,8 @@ struct mlx4_priv { struct io_mapping *bf_mapping; int reserved_mtts; int fs_hash_mode; + u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS]; + }; static inline struct mlx4_priv *mlx4_priv(struct mlx4_dev *dev) @@ -1011,16 +1013,61 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); +int mlx4_INIT2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); +int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_SQERR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_2ERR_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_RTS2SQD_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); +int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); int mlx4_2RST_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd); +int mlx4_QUERY_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd); int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe); diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 3c57a83e6287..49e9de725d0a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -242,6 +242,15 @@ static int res_tracker_insert(struct rb_root *root, struct res_common *res) return 0; } +enum qp_transition { + QP_TRANS_INIT2RTR, + QP_TRANS_RTR2RTS, + QP_TRANS_RTS2RTS, + QP_TRANS_SQERR2RTS, + QP_TRANS_SQD2SQD, + QP_TRANS_SQD2RTS +}; + /* For Debug uses */ static const char *ResourceType(enum mlx4_resource rt) { @@ -308,14 +317,41 @@ void mlx4_free_resource_tracker(struct mlx4_dev *dev, } } -static void update_ud_gid(struct mlx4_dev *dev, - struct mlx4_qp_context *qp_ctx, u8 slave) +static void update_pkey_index(struct mlx4_dev *dev, int slave, + struct mlx4_cmd_mailbox *inbox) { - u32 ts = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff; + u8 sched = *(u8 *)(inbox->buf + 64); + u8 orig_index = *(u8 *)(inbox->buf + 35); + u8 new_index; + struct mlx4_priv *priv = mlx4_priv(dev); + int port; + + port = (sched >> 6 & 1) + 1; + + new_index = priv->virt2phys_pkey[slave][port - 1][orig_index]; + *(u8 *)(inbox->buf + 35) = new_index; + + mlx4_dbg(dev, "port = %d, orig pkey index = %d, " + "new pkey index = %d\n", port, orig_index, new_index); +} + +static void update_gid(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *inbox, + u8 slave) +{ + struct mlx4_qp_context *qp_ctx = inbox->buf + 8; + enum mlx4_qp_optpar optpar = be32_to_cpu(*(__be32 *) inbox->buf); + u32 ts = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff; if (MLX4_QP_ST_UD == ts) qp_ctx->pri_path.mgid_index = 0x80 | slave; + if (MLX4_QP_ST_RC == ts || MLX4_QP_ST_UC == ts) { + if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) + qp_ctx->pri_path.mgid_index = slave & 0x7F; + if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) + qp_ctx->alt_path.mgid_index = slave & 0x7F; + } + mlx4_dbg(dev, "slave %d, new gid index: 0x%x ", slave, qp_ctx->pri_path.mgid_index); } @@ -1109,6 +1145,11 @@ static int valid_reserved(struct mlx4_dev *dev, int slave, int qpn) (mlx4_is_master(dev) || mlx4_is_guest_proxy(dev, slave, qpn)); } +static int fw_reserved(struct mlx4_dev *dev, int qpn) +{ + return qpn < dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW]; +} + static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, u64 in_param, u64 *out_param) { @@ -1146,7 +1187,7 @@ static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (err) return err; - if (!valid_reserved(dev, slave, qpn)) { + if (!fw_reserved(dev, qpn)) { err = __mlx4_qp_alloc_icm(dev, qpn); if (err) { res_abort_move(dev, slave, RES_QP, qpn); @@ -1499,7 +1540,7 @@ static int qp_free_res(struct mlx4_dev *dev, int slave, int op, int cmd, if (err) return err; - if (!valid_reserved(dev, slave, qpn)) + if (!fw_reserved(dev, qpn)) __mlx4_qp_free_icm(dev, qpn); res_end_move(dev, slave, RES_QP, qpn); @@ -1939,6 +1980,19 @@ static u32 qp_get_srqn(struct mlx4_qp_context *qpc) return be32_to_cpu(qpc->srqn) & 0x1ffffff; } +static void adjust_proxy_tun_qkey(struct mlx4_dev *dev, struct mlx4_vhcr *vhcr, + struct mlx4_qp_context *context) +{ + u32 qpn = vhcr->in_modifier & 0xffffff; + u32 qkey = 0; + + if (mlx4_get_parav_qkey(dev, qpn, &qkey)) + return; + + /* adjust qkey in qp context */ + context->qkey = cpu_to_be32(qkey); +} + int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -1991,6 +2045,8 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, goto ex_put_scq; } + adjust_proxy_tun_qkey(dev, vhcr, qpc); + update_pkey_index(dev, slave, inbox); err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd); if (err) goto ex_put_srq; @@ -2136,6 +2192,48 @@ static int get_containing_mtt(struct mlx4_dev *dev, int slave, int start, return err; } +static int verify_qp_parameters(struct mlx4_dev *dev, + struct mlx4_cmd_mailbox *inbox, + enum qp_transition transition, u8 slave) +{ + u32 qp_type; + struct mlx4_qp_context *qp_ctx; + enum mlx4_qp_optpar optpar; + + qp_ctx = inbox->buf + 8; + qp_type = (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff; + optpar = be32_to_cpu(*(__be32 *) inbox->buf); + + switch (qp_type) { + case MLX4_QP_ST_RC: + case MLX4_QP_ST_UC: + switch (transition) { + case QP_TRANS_INIT2RTR: + case QP_TRANS_RTR2RTS: + case QP_TRANS_RTS2RTS: + case QP_TRANS_SQD2SQD: + case QP_TRANS_SQD2RTS: + if (slave != mlx4_master_func_num(dev)) + /* slaves have only gid index 0 */ + if (optpar & MLX4_QP_OPTPAR_PRIMARY_ADDR_PATH) + if (qp_ctx->pri_path.mgid_index) + return -EINVAL; + if (optpar & MLX4_QP_OPTPAR_ALT_ADDR_PATH) + if (qp_ctx->alt_path.mgid_index) + return -EINVAL; + break; + default: + break; + } + + break; + default: + break; + } + + return 0; +} + int mlx4_WRITE_MTT_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -2623,16 +2721,123 @@ out: return err; } +int mlx4_INIT2INIT_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + struct mlx4_qp_context *context = inbox->buf + 8; + adjust_proxy_tun_qkey(dev, vhcr, context); + update_pkey_index(dev, slave, inbox); + return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +} + int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, struct mlx4_cmd_mailbox *outbox, struct mlx4_cmd_info *cmd) { + int err; struct mlx4_qp_context *qpc = inbox->buf + 8; - update_ud_gid(dev, qpc, (u8)slave); + err = verify_qp_parameters(dev, inbox, QP_TRANS_INIT2RTR, slave); + if (err) + return err; + + update_pkey_index(dev, slave, inbox); + update_gid(dev, inbox, (u8)slave); + adjust_proxy_tun_qkey(dev, vhcr, qpc); + + return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +} + +int mlx4_RTR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + struct mlx4_qp_context *context = inbox->buf + 8; + + err = verify_qp_parameters(dev, inbox, QP_TRANS_RTR2RTS, slave); + if (err) + return err; + + update_pkey_index(dev, slave, inbox); + update_gid(dev, inbox, (u8)slave); + adjust_proxy_tun_qkey(dev, vhcr, context); + return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +} + +int mlx4_RTS2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + struct mlx4_qp_context *context = inbox->buf + 8; + + err = verify_qp_parameters(dev, inbox, QP_TRANS_RTS2RTS, slave); + if (err) + return err; + + update_pkey_index(dev, slave, inbox); + update_gid(dev, inbox, (u8)slave); + adjust_proxy_tun_qkey(dev, vhcr, context); + return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +} + + +int mlx4_SQERR2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + struct mlx4_qp_context *context = inbox->buf + 8; + adjust_proxy_tun_qkey(dev, vhcr, context); + return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +} + +int mlx4_SQD2SQD_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + struct mlx4_qp_context *context = inbox->buf + 8; + + err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2SQD, slave); + if (err) + return err; + + adjust_proxy_tun_qkey(dev, vhcr, context); + update_gid(dev, inbox, (u8)slave); + update_pkey_index(dev, slave, inbox); + return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); +} + +int mlx4_SQD2RTS_QP_wrapper(struct mlx4_dev *dev, int slave, + struct mlx4_vhcr *vhcr, + struct mlx4_cmd_mailbox *inbox, + struct mlx4_cmd_mailbox *outbox, + struct mlx4_cmd_info *cmd) +{ + int err; + struct mlx4_qp_context *context = inbox->buf + 8; + + err = verify_qp_parameters(dev, inbox, QP_TRANS_SQD2RTS, slave); + if (err) + return err; + adjust_proxy_tun_qkey(dev, vhcr, context); + update_gid(dev, inbox, (u8)slave); + update_pkey_index(dev, slave, inbox); return mlx4_GEN_QP_wrapper(dev, slave, vhcr, inbox, outbox, cmd); } diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index b6b8d341b6c8..9803fd5d3dba 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -940,6 +940,9 @@ int mlx4_flow_attach(struct mlx4_dev *dev, struct mlx4_net_trans_rule *rule, u64 *reg_id); int mlx4_flow_detach(struct mlx4_dev *dev, u64 reg_id); +void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, + int i, int val); + int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey); #endif /* MLX4_DEVICE_H */ -- cgit v1.2.3 From 993c401e207946fa56f69c51e39f015e7108497b Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:48 +0000 Subject: mlx4_core: Add IB port-state machine and port mgmt event propagation For an IB port, a slave should not show port active until that slave has a valid alias-guid (provided by the subnet manager). Therefore the port-up event should be passed to a slave only after both the port is up, and the slave's alias-guid has been set. Also, provide the infrastructure for propagating port-management events (client-reregister, etc) to slaves. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/eq.c | 237 ++++++++++++++++++++++++++++-- drivers/net/ethernet/mellanox/mlx4/mlx4.h | 1 + include/linux/mlx4/device.h | 28 ++++ 3 files changed, 250 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c index 99a04648fab0..c425826b1fc0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/eq.c +++ b/drivers/net/ethernet/mellanox/mlx4/eq.c @@ -200,6 +200,196 @@ static void mlx4_slave_event(struct mlx4_dev *dev, int slave, slave_event(dev, slave, eqe); } +int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port) +{ + struct mlx4_eqe eqe; + + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_slave_state *s_slave = &priv->mfunc.master.slave_state[slave]; + + if (!s_slave->active) + return 0; + + memset(&eqe, 0, sizeof eqe); + + eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT; + eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PKEY_TABLE; + eqe.event.port_mgmt_change.port = port; + + return mlx4_GEN_EQE(dev, slave, &eqe); +} +EXPORT_SYMBOL(mlx4_gen_pkey_eqe); + +int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port) +{ + struct mlx4_eqe eqe; + + /*don't send if we don't have the that slave */ + if (dev->num_vfs < slave) + return 0; + memset(&eqe, 0, sizeof eqe); + + eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT; + eqe.subtype = MLX4_DEV_PMC_SUBTYPE_GUID_INFO; + eqe.event.port_mgmt_change.port = port; + + return mlx4_GEN_EQE(dev, slave, &eqe); +} +EXPORT_SYMBOL(mlx4_gen_guid_change_eqe); + +int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port, + u8 port_subtype_change) +{ + struct mlx4_eqe eqe; + + /*don't send if we don't have the that slave */ + if (dev->num_vfs < slave) + return 0; + memset(&eqe, 0, sizeof eqe); + + eqe.type = MLX4_EVENT_TYPE_PORT_CHANGE; + eqe.subtype = port_subtype_change; + eqe.event.port_change.port = cpu_to_be32(port << 28); + + mlx4_dbg(dev, "%s: sending: %d to slave: %d on port: %d\n", __func__, + port_subtype_change, slave, port); + return mlx4_GEN_EQE(dev, slave, &eqe); +} +EXPORT_SYMBOL(mlx4_gen_port_state_change_eqe); + +enum slave_port_state mlx4_get_slave_port_state(struct mlx4_dev *dev, int slave, u8 port) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_slave_state *s_state = priv->mfunc.master.slave_state; + if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS) { + pr_err("%s: Error: asking for slave:%d, port:%d\n", + __func__, slave, port); + return SLAVE_PORT_DOWN; + } + return s_state[slave].port_state[port]; +} +EXPORT_SYMBOL(mlx4_get_slave_port_state); + +static int mlx4_set_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, + enum slave_port_state state) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_slave_state *s_state = priv->mfunc.master.slave_state; + + if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS || port == 0) { + pr_err("%s: Error: asking for slave:%d, port:%d\n", + __func__, slave, port); + return -1; + } + s_state[slave].port_state[port] = state; + + return 0; +} + +static void set_all_slave_state(struct mlx4_dev *dev, u8 port, int event) +{ + int i; + enum slave_port_gen_event gen_event; + + for (i = 0; i < dev->num_slaves; i++) + set_and_calc_slave_port_state(dev, i, port, event, &gen_event); +} +/************************************************************************** + The function get as input the new event to that port, + and according to the prev state change the slave's port state. + The events are: + MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN, + MLX4_PORT_STATE_DEV_EVENT_PORT_UP + MLX4_PORT_STATE_IB_EVENT_GID_VALID + MLX4_PORT_STATE_IB_EVENT_GID_INVALID +***************************************************************************/ +int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, + u8 port, int event, + enum slave_port_gen_event *gen_event) +{ + struct mlx4_priv *priv = mlx4_priv(dev); + struct mlx4_slave_state *ctx = NULL; + unsigned long flags; + int ret = -1; + enum slave_port_state cur_state = + mlx4_get_slave_port_state(dev, slave, port); + + *gen_event = SLAVE_PORT_GEN_EVENT_NONE; + + if (slave >= dev->num_slaves || port > MLX4_MAX_PORTS || port == 0) { + pr_err("%s: Error: asking for slave:%d, port:%d\n", + __func__, slave, port); + return ret; + } + + ctx = &priv->mfunc.master.slave_state[slave]; + spin_lock_irqsave(&ctx->lock, flags); + + mlx4_dbg(dev, "%s: slave: %d, current state: %d new event :%d\n", + __func__, slave, cur_state, event); + + switch (cur_state) { + case SLAVE_PORT_DOWN: + if (MLX4_PORT_STATE_DEV_EVENT_PORT_UP == event) + mlx4_set_slave_port_state(dev, slave, port, + SLAVE_PENDING_UP); + break; + case SLAVE_PENDING_UP: + if (MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN == event) + mlx4_set_slave_port_state(dev, slave, port, + SLAVE_PORT_DOWN); + else if (MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID == event) { + mlx4_set_slave_port_state(dev, slave, port, + SLAVE_PORT_UP); + *gen_event = SLAVE_PORT_GEN_EVENT_UP; + } + break; + case SLAVE_PORT_UP: + if (MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN == event) { + mlx4_set_slave_port_state(dev, slave, port, + SLAVE_PORT_DOWN); + *gen_event = SLAVE_PORT_GEN_EVENT_DOWN; + } else if (MLX4_PORT_STATE_IB_EVENT_GID_INVALID == + event) { + mlx4_set_slave_port_state(dev, slave, port, + SLAVE_PENDING_UP); + *gen_event = SLAVE_PORT_GEN_EVENT_DOWN; + } + break; + default: + pr_err("%s: BUG!!! UNKNOWN state: " + "slave:%d, port:%d\n", __func__, slave, port); + goto out; + } + ret = mlx4_get_slave_port_state(dev, slave, port); + mlx4_dbg(dev, "%s: slave: %d, current state: %d new event" + " :%d gen_event: %d\n", + __func__, slave, cur_state, event, *gen_event); + +out: + spin_unlock_irqrestore(&ctx->lock, flags); + return ret; +} + +EXPORT_SYMBOL(set_and_calc_slave_port_state); + +int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr) +{ + struct mlx4_eqe eqe; + + memset(&eqe, 0, sizeof eqe); + + eqe.type = MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT; + eqe.subtype = MLX4_DEV_PMC_SUBTYPE_PORT_INFO; + eqe.event.port_mgmt_change.port = port; + eqe.event.port_mgmt_change.params.port_info.changed_attr = + cpu_to_be32((u32) attr); + + slave_event(dev, ALL_SLAVES, &eqe); + return 0; +} +EXPORT_SYMBOL(mlx4_gen_slaves_port_mgt_ev); + void mlx4_master_handle_slave_flr(struct work_struct *work) { struct mlx4_mfunc_master_ctx *master = @@ -251,6 +441,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) u32 flr_slave; u8 update_slave_state; int i; + enum slave_port_gen_event gen_event; while ((eqe = next_eqe_sw(eq))) { /* @@ -347,35 +538,49 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq) case MLX4_EVENT_TYPE_PORT_CHANGE: port = be32_to_cpu(eqe->event.port_change.port) >> 28; if (eqe->subtype == MLX4_PORT_CHANGE_SUBTYPE_DOWN) { - mlx4_dispatch_event(dev, - MLX4_DEV_EVENT_PORT_DOWN, + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_DOWN, port); mlx4_priv(dev)->sense.do_sense_port[port] = 1; - if (mlx4_is_master(dev)) - /*change the state of all slave's port - * to down:*/ - for (i = 0; i < dev->num_slaves; i++) { - mlx4_dbg(dev, "%s: Sending " - "MLX4_PORT_CHANGE_SUBTYPE_DOWN" + if (!mlx4_is_master(dev)) + break; + for (i = 0; i < dev->num_slaves; i++) { + if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) { + if (i == mlx4_master_func_num(dev)) + continue; + mlx4_dbg(dev, "%s: Sending MLX4_PORT_CHANGE_SUBTYPE_DOWN" " to slave: %d, port:%d\n", __func__, i, port); - if (i == dev->caps.function) - continue; mlx4_slave_event(dev, i, eqe); + } else { /* IB port */ + set_and_calc_slave_port_state(dev, i, port, + MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN, + &gen_event); + /*we can be in pending state, then do not send port_down event*/ + if (SLAVE_PORT_GEN_EVENT_DOWN == gen_event) { + if (i == mlx4_master_func_num(dev)) + continue; + mlx4_slave_event(dev, i, eqe); + } } + } } else { - mlx4_dispatch_event(dev, - MLX4_DEV_EVENT_PORT_UP, - port); + mlx4_dispatch_event(dev, MLX4_DEV_EVENT_PORT_UP, port); + mlx4_priv(dev)->sense.do_sense_port[port] = 0; - if (mlx4_is_master(dev)) { + if (!mlx4_is_master(dev)) + break; + if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) for (i = 0; i < dev->num_slaves; i++) { - if (i == dev->caps.function) + if (i == mlx4_master_func_num(dev)) continue; mlx4_slave_event(dev, i, eqe); } - } + else /* IB port */ + /* port-up event will be sent to a slave when the + * slave's alias-guid is set. This is done in alias_GUID.c + */ + set_all_slave_state(dev, port, MLX4_DEV_EVENT_PORT_UP); } break; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 7d27c3158d0c..23f74759f403 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -452,6 +452,7 @@ struct mlx4_slave_state { /*initialized via the kzalloc*/ u8 is_slave_going_down; u32 cookie; + enum slave_port_state port_state[MLX4_MAX_PORTS + 1]; }; struct slave_list { diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 9803fd5d3dba..380e01671d1e 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -192,6 +192,25 @@ enum { MLX4_FATAL_WARNING_SUBTYPE_WARMING = 0, }; +enum slave_port_state { + SLAVE_PORT_DOWN = 0, + SLAVE_PENDING_UP, + SLAVE_PORT_UP, +}; + +enum slave_port_gen_event { + SLAVE_PORT_GEN_EVENT_DOWN = 0, + SLAVE_PORT_GEN_EVENT_UP, + SLAVE_PORT_GEN_EVENT_NONE, +}; + +enum slave_port_state_event { + MLX4_PORT_STATE_DEV_EVENT_PORT_DOWN, + MLX4_PORT_STATE_DEV_EVENT_PORT_UP, + MLX4_PORT_STATE_IB_PORT_STATE_EVENT_GID_VALID, + MLX4_PORT_STATE_IB_EVENT_GID_INVALID, +}; + enum { MLX4_PERM_LOCAL_READ = 1 << 10, MLX4_PERM_LOCAL_WRITE = 1 << 11, @@ -945,4 +964,13 @@ void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey); +int mlx4_is_slave_active(struct mlx4_dev *dev, int slave); +int mlx4_gen_pkey_eqe(struct mlx4_dev *dev, int slave, u8 port); +int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port); +int mlx4_gen_slaves_port_mgt_ev(struct mlx4_dev *dev, u8 port, int attr); +int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port, u8 port_subtype_change); +enum slave_port_state mlx4_get_slave_port_state(struct mlx4_dev *dev, int slave, u8 port); +int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, int event, enum slave_port_gen_event *gen_event); + + #endif /* MLX4_DEVICE_H */ -- cgit v1.2.3 From efcd235d736ab05ef2b29d7fe1493a2f52b07b66 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:52 +0000 Subject: net/mlx4_core: Adjustments to SET_PORT for IB SR-IOV 1. Slaves may not set the IS_SM capability for the port. 2. DEV_MGMT may not be set in multifunction mode. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/net/ethernet/mellanox/mlx4/port.c | 10 ++++++++++ include/linux/mlx4/device.h | 5 +++++ 2 files changed, 15 insertions(+) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c index e36dd0f2fa73..4c51b05efa28 100644 --- a/drivers/net/ethernet/mellanox/mlx4/port.c +++ b/drivers/net/ethernet/mellanox/mlx4/port.c @@ -732,6 +732,16 @@ static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod, new_cap_mask = ((__be32 *) inbox->buf)[1]; } + /* slave may not set the IS_SM capability for the port */ + if (slave != mlx4_master_func_num(dev) && + (be32_to_cpu(new_cap_mask) & MLX4_PORT_CAP_IS_SM)) + return -EINVAL; + + /* No DEV_MGMT in multifunc mode */ + if (mlx4_is_mfunc(dev) && + (be32_to_cpu(new_cap_mask) & MLX4_PORT_CAP_DEV_MGMT_SUP)) + return -EINVAL; + agg_cap_mask = 0; slave_cap_mask = priv->mfunc.master.slave_state[slave].ib_cap_mask[port]; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 380e01671d1e..b3f9b4500b9b 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -53,6 +53,11 @@ enum { MLX4_FLAG_SRIOV = 1 << 4, }; +enum { + MLX4_PORT_CAP_IS_SM = 1 << 1, + MLX4_PORT_CAP_DEV_MGMT_SUP = 1 << 19, +}; + enum { MLX4_MAX_PORTS = 2, MLX4_MAX_PORT_PKEYS = 128 -- cgit v1.2.3 From 026149cbaada391d98f1cbec47c488cb548f753a Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:55 +0000 Subject: mlx4: Activate SR-IOV mode for IB Remove the error returns for IB ports from mlx4_ib_add, mlx4_INIT_PORT_wrapper, and mlx4_CLOSE_PORT_wrapper. Currently, SRIOV is supported only for devices for which the link layer is IB on all ports; RoCE support will be added later. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/main.c | 8 ++++++-- drivers/net/ethernet/mellanox/mlx4/fw.c | 6 ------ include/linux/mlx4/device.h | 4 ++++ 3 files changed, 10 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 46303b209ce6..e849347ef99a 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1293,11 +1293,15 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) pr_info_once("%s", mlx4_ib_version); - if (mlx4_is_mfunc(dev)) { - pr_warn("IB not yet supported in SRIOV\n"); + mlx4_foreach_non_ib_transport_port(i, dev) + num_ports++; + + if (mlx4_is_mfunc(dev) && num_ports) { + dev_err(&dev->pdev->dev, "RoCE is not supported over SRIOV as yet\n"); return NULL; } + num_ports = 0; mlx4_foreach_ib_transport_port(i, dev) num_ports++; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index bf159c0fe82b..1d74e8541146 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -1391,9 +1391,6 @@ int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave, if (priv->mfunc.master.slave_state[slave].init_port_mask & (1 << port)) return 0; - if (dev->caps.port_mask[port] == MLX4_PORT_TYPE_IB) - return -ENODEV; - if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) { /* Enable port only if it was previously disabled */ if (!priv->mfunc.master.init_port_ref[port]) { @@ -1489,9 +1486,6 @@ int mlx4_CLOSE_PORT_wrapper(struct mlx4_dev *dev, int slave, (1 << port))) return 0; - if (dev->caps.port_mask[port] == MLX4_PORT_TYPE_IB) - return -ENODEV; - if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) { if (priv->mfunc.master.init_port_ref[port] == 1) { err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index b3f9b4500b9b..9b243df789ef 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -697,6 +697,10 @@ struct mlx4_init_port_param { for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ if ((type) == (dev)->caps.port_mask[(port)]) +#define mlx4_foreach_non_ib_transport_port(port, dev) \ + for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ + if (((dev)->caps.port_mask[port] != MLX4_PORT_TYPE_IB)) + #define mlx4_foreach_ib_transport_port(port, dev) \ for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \ -- cgit v1.2.3 From afa8fd1db9f295a0c4130bc6d87bf8b05bdd0523 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:56 +0000 Subject: mlx4: Paravirtualize Node Guids for slaves This is necessary in order to support > 1 VF/PF in a VM for software that uses the node guid as a discriminator, such as librdmacm. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mad.c | 14 ++++++++++++++ drivers/infiniband/hw/mlx4/mlx4_ib.h | 3 +++ drivers/net/ethernet/mellanox/mlx4/cmd.c | 11 +++++++++++ drivers/net/ethernet/mellanox/mlx4/main.c | 22 ++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx4/mlx4.h | 1 + include/linux/mlx4/device.h | 2 ++ 6 files changed, 53 insertions(+) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index b91b4865d635..603a114b3dfe 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -89,6 +90,12 @@ static void handle_lid_change_event(struct mlx4_ib_dev *dev, u8 port_num); static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num, int block, u32 change_bitmap); +__be64 mlx4_ib_gen_node_guid(void) +{ +#define NODE_GUID_HI ((u64) (((u64)IB_OPENIB_OUI) << 40)) + return cpu_to_be64(NODE_GUID_HI | random32()); +} + __be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx) { return cpu_to_be64(atomic_inc_return(&ctx->tid)) | @@ -1962,6 +1969,13 @@ int mlx4_ib_init_sriov(struct mlx4_ib_dev *dev) return 0; } + for (i = 0; i < dev->dev->caps.sqp_demux; i++) { + if (i == mlx4_master_func_num(dev->dev)) + mlx4_put_slave_node_guid(dev->dev, i, dev->ib_dev.node_guid); + else + mlx4_put_slave_node_guid(dev->dev, i, mlx4_ib_gen_node_guid()); + } + err = mlx4_ib_init_alias_guid_service(dev); if (err) { mlx4_ib_warn(&dev->ib_dev, "Failed init alias guid process.\n"); diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index e57a220a4d55..e04cbc9a54a5 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -723,4 +723,7 @@ int mlx4_ib_device_register_sysfs(struct mlx4_ib_dev *device) ; void mlx4_ib_device_unregister_sysfs(struct mlx4_ib_dev *device); +__be64 mlx4_ib_gen_node_guid(void); + + #endif /* MLX4_IB_H */ diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 78fdbd653fa9..90774b7e47a4 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -696,6 +696,7 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, struct ib_smp *outsmp = outbox->buf; __be16 *outtab = (__be16 *)(outsmp->data); __be32 slave_cap_mask; + __be64 slave_node_guid; port = vhcr->in_modifier; if (smp->base_version == 1 && @@ -755,6 +756,16 @@ static int mlx4_MAD_IFC_wrapper(struct mlx4_dev *dev, int slave, } return err; } + if (smp->attr_id == IB_SMP_ATTR_NODE_INFO) { + err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, + vhcr->in_modifier, vhcr->op_modifier, + vhcr->op, MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE); + if (!err) { + slave_node_guid = mlx4_get_slave_node_guid(dev, slave); + memcpy(outsmp->data + 12, &slave_node_guid, 8); + } + return err; + } } } if (slave != mlx4_master_func_num(dev) && diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 2294b7173180..76f69fdd01d5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -435,6 +435,28 @@ void mlx4_sync_pkey_table(struct mlx4_dev *dev, int slave, int port, int i, int } EXPORT_SYMBOL(mlx4_sync_pkey_table); +void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid) +{ + struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); + + if (!mlx4_is_master(dev)) + return; + + priv->slave_node_guids[slave] = guid; +} +EXPORT_SYMBOL(mlx4_put_slave_node_guid); + +__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave) +{ + struct mlx4_priv *priv = container_of(dev, struct mlx4_priv, dev); + + if (!mlx4_is_master(dev)) + return 0; + + return priv->slave_node_guids[slave]; +} +EXPORT_SYMBOL(mlx4_get_slave_node_guid); + int mlx4_is_slave_active(struct mlx4_dev *dev, int slave) { struct mlx4_priv *priv = mlx4_priv(dev); diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index 399793a23457..4b1fbd84a68f 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -810,6 +810,7 @@ struct mlx4_priv { int reserved_mtts; int fs_hash_mode; u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS]; + __be64 slave_node_guids[MLX4_MFUNC_MAX]; }; diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 9b243df789ef..c0f8f74a0a5e 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -981,5 +981,7 @@ int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port, u8 enum slave_port_state mlx4_get_slave_port_state(struct mlx4_dev *dev, int slave, u8 port); int set_and_calc_slave_port_state(struct mlx4_dev *dev, int slave, u8 port, int event, enum slave_port_gen_event *gen_event); +void mlx4_put_slave_node_guid(struct mlx4_dev *dev, int slave, __be64 guid); +__be64 mlx4_get_slave_node_guid(struct mlx4_dev *dev, int slave); #endif /* MLX4_DEVICE_H */ -- cgit v1.2.3 From 47605df953985c2b792ac9f3ddf70d270b89adb8 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Fri, 3 Aug 2012 08:40:57 +0000 Subject: mlx4: Modify proxy/tunnel QP mechanism so that guests do no calculations Previously, the structure of a guest's proxy QPs followed the structure of the PPF special qps (qp0 port 1, qp0 port 2, qp1 port 1, qp1 port 2, ...). The guest then did offset calculations on the sqp_base qp number that the PPF passed to it in QUERY_FUNC_CAP(). This is now changed so that the guest does no offset calculations regarding proxy or tunnel QPs to use. This change frees the PPF from needing to adhere to a specific order in allocating proxy and tunnel QPs. Now QUERY_FUNC_CAP provides each port individually with its proxy qp0, proxy qp1, tunnel qp0, and tunnel qp1 QP numbers, and these are used directly where required (with no offset calculations). To accomplish this change, several fields were added to the phys_caps structure for use by the PPF and by non-SR-IOV mode: base_sqpn -- in non-sriov mode, this was formerly sqp_start. base_proxy_sqpn -- the first physical proxy qp number -- used by PPF base_tunnel_sqpn -- the first physical tunnel qp number -- used by PPF. The current code in the PPF still adheres to the previous layout of sqps, proxy-sqps and tunnel-sqps. However, the PPF can change this layout without affecting VF or (paravirtualized) PF code. Signed-off-by: Jack Morgenstein Signed-off-by: Roland Dreier --- drivers/infiniband/hw/mlx4/mad.c | 12 +-- drivers/infiniband/hw/mlx4/qp.c | 104 ++++++++++++------ drivers/net/ethernet/mellanox/mlx4/fw.c | 173 +++++++++++++++++------------- drivers/net/ethernet/mellanox/mlx4/fw.h | 14 +-- drivers/net/ethernet/mellanox/mlx4/main.c | 61 ++++++++--- drivers/net/ethernet/mellanox/mlx4/qp.c | 71 ++++++++---- include/linux/mlx4/device.h | 16 +-- 7 files changed, 290 insertions(+), 161 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 603a114b3dfe..658a622791fb 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -505,7 +505,7 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, } else tun_pkey_ix = dev->pkeys.virt2phys_pkey[slave][port - 1][0]; - dqpn = dev->dev->caps.sqp_start + 8 * slave + port + (dest_qpt * 2) - 1; + dqpn = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave + port + (dest_qpt * 2) - 1; /* get tunnel tx data buf for slave */ src_qp = tun_qp->qp; @@ -1074,9 +1074,9 @@ static int mlx4_ib_multiplex_sa_handler(struct ib_device *ibdev, int port, static int is_proxy_qp0(struct mlx4_ib_dev *dev, int qpn, int slave) { - int slave_start = dev->dev->caps.sqp_start + 8 * slave; + int proxy_start = dev->dev->phys_caps.base_proxy_sqpn + 8 * slave; - return (qpn >= slave_start && qpn <= slave_start + 1); + return (qpn >= proxy_start && qpn <= proxy_start + 1); } @@ -1191,14 +1191,14 @@ static void mlx4_ib_multiplex_mad(struct mlx4_ib_demux_pv_ctx *ctx, struct ib_wc int slave; /* Get slave that sent this packet */ - if (wc->src_qp < dev->dev->caps.sqp_start || - wc->src_qp >= dev->dev->caps.base_tunnel_sqpn || + if (wc->src_qp < dev->dev->phys_caps.base_proxy_sqpn || + wc->src_qp >= dev->dev->phys_caps.base_proxy_sqpn + 8 * MLX4_MFUNC_MAX || (wc->src_qp & 0x1) != ctx->port - 1 || wc->src_qp & 0x4) { mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d\n", wc->src_qp); return; } - slave = ((wc->src_qp & ~0x7) - dev->dev->caps.sqp_start) / 8; + slave = ((wc->src_qp & ~0x7) - dev->dev->phys_caps.base_proxy_sqpn) / 8; if (slave != ctx->slave) { mlx4_ib_warn(ctx->ib_dev, "can't multiplex bad sqp:%d: " "belongs to another slave\n", wc->src_qp); diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index a8622510de42..96fe103f5973 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -116,33 +116,57 @@ static int is_tunnel_qp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) if (!mlx4_is_master(dev->dev)) return 0; - return qp->mqp.qpn >= dev->dev->caps.base_sqpn && - qp->mqp.qpn < dev->dev->caps.base_sqpn + - 8 + 16 * MLX4_MFUNC_MAX; + return qp->mqp.qpn >= dev->dev->phys_caps.base_tunnel_sqpn && + qp->mqp.qpn < dev->dev->phys_caps.base_tunnel_sqpn + + 8 * MLX4_MFUNC_MAX; } static int is_sqp(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) { - return ((mlx4_is_master(dev->dev) && - qp->mqp.qpn >= dev->dev->caps.base_sqpn && - qp->mqp.qpn <= dev->dev->caps.base_sqpn + 3) || - (qp->mqp.qpn >= dev->dev->caps.sqp_start && - qp->mqp.qpn <= dev->dev->caps.sqp_start + 3)); + int proxy_sqp = 0; + int real_sqp = 0; + int i; + /* PPF or Native -- real SQP */ + real_sqp = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) && + qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn && + qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 3); + if (real_sqp) + return 1; + /* VF or PF -- proxy SQP */ + if (mlx4_is_mfunc(dev->dev)) { + for (i = 0; i < dev->dev->caps.num_ports; i++) { + if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i] || + qp->mqp.qpn == dev->dev->caps.qp1_proxy[i]) { + proxy_sqp = 1; + break; + } + } + } + return proxy_sqp; } /* used for INIT/CLOSE port logic */ static int is_qp0(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp) { - int qp0; - - /* qp0 is either the proxy qp0, or the real qp0 */ - qp0 = (qp->mqp.qpn >= dev->dev->caps.sqp_start && - qp->mqp.qpn <= dev->dev->caps.sqp_start + 1) || - (mlx4_is_master(dev->dev) && - qp->mqp.qpn >= dev->dev->caps.base_sqpn && - qp->mqp.qpn <= dev->dev->caps.base_sqpn + 1); - - return qp0; + int proxy_qp0 = 0; + int real_qp0 = 0; + int i; + /* PPF or Native -- real QP0 */ + real_qp0 = ((mlx4_is_master(dev->dev) || !mlx4_is_mfunc(dev->dev)) && + qp->mqp.qpn >= dev->dev->phys_caps.base_sqpn && + qp->mqp.qpn <= dev->dev->phys_caps.base_sqpn + 1); + if (real_qp0) + return 1; + /* VF or PF -- proxy QP0 */ + if (mlx4_is_mfunc(dev->dev)) { + for (i = 0; i < dev->dev->caps.num_ports; i++) { + if (qp->mqp.qpn == dev->dev->caps.qp0_proxy[i]) { + proxy_qp0 = 1; + break; + } + } + } + return proxy_qp0; } static void *get_wqe(struct mlx4_ib_qp *qp, int offset) @@ -607,8 +631,10 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, qp_type = MLX4_IB_QPT_TUN_SMI_OWNER; else qp_type = MLX4_IB_QPT_TUN_SMI; - qpn = dev->dev->caps.base_tunnel_sqpn + 8 * tnl_init->slave + - tnl_init->proxy_qp_type * 2 + tnl_init->port - 1; + /* we are definitely in the PPF here, since we are creating + * tunnel QPs. base_tunnel_sqpn is therefore valid. */ + qpn = dev->dev->phys_caps.base_tunnel_sqpn + 8 * tnl_init->slave + + tnl_init->proxy_qp_type * 2 + tnl_init->port - 1; sqpn = qpn; } @@ -630,12 +656,6 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, qp->mlx4_ib_qp_type = qp_type; - if (mlx4_is_mfunc(dev->dev) && - (qp_type == MLX4_IB_QPT_SMI || qp_type == MLX4_IB_QPT_GSI)) { - qpn -= 8; - sqpn -= 8; - } - mutex_init(&qp->mutex); spin_lock_init(&qp->sq.lock); spin_lock_init(&qp->rq.lock); @@ -935,6 +955,23 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp, del_gid_entries(qp); } +static u32 get_sqp_num(struct mlx4_ib_dev *dev, struct ib_qp_init_attr *attr) +{ + /* Native or PPF */ + if (!mlx4_is_mfunc(dev->dev) || + (mlx4_is_master(dev->dev) && + attr->create_flags & MLX4_IB_SRIOV_SQP)) { + return dev->dev->phys_caps.base_sqpn + + (attr->qp_type == IB_QPT_SMI ? 0 : 2) + + attr->port_num - 1; + } + /* PF or VF -- creating proxies */ + if (attr->qp_type == IB_QPT_SMI) + return dev->dev->caps.qp0_proxy[attr->port_num - 1]; + else + return dev->dev->caps.qp1_proxy[attr->port_num - 1]; +} + struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, struct ib_qp_init_attr *init_attr, struct ib_udata *udata) @@ -998,9 +1035,7 @@ struct ib_qp *mlx4_ib_create_qp(struct ib_pd *pd, return ERR_PTR(-EINVAL); err = create_qp_common(to_mdev(pd->device), pd, init_attr, udata, - to_mdev(pd->device)->dev->caps.sqp_start + - (init_attr->qp_type == IB_QPT_SMI ? 0 : 2) + - init_attr->port_num - 1, + get_sqp_num(to_mdev(pd->device), init_attr), &qp); if (err) return ERR_PTR(err); @@ -1643,8 +1678,7 @@ static int build_sriov_qp0_header(struct mlx4_ib_sqp *sqp, sqp->ud_header.bth.destination_qpn = cpu_to_be32(wr->wr.ud.remote_qpn); else sqp->ud_header.bth.destination_qpn = - cpu_to_be32(mdev->dev->caps.base_tunnel_sqpn + - sqp->qp.port - 1); + cpu_to_be32(mdev->dev->caps.qp0_tunnel[sqp->qp.port - 1]); sqp->ud_header.bth.psn = cpu_to_be32((sqp->send_psn++) & ((1 << 24) - 1)); if (mlx4_get_parav_qkey(mdev->dev, sqp->qp.mqp.qpn, &qkey)) @@ -2012,10 +2046,10 @@ static void set_tunnel_datagram_seg(struct mlx4_ib_dev *dev, cpu_to_be32(0xf0000000); memcpy(dseg->av, &sqp_av, sizeof (struct mlx4_av)); - dseg->dqpn = cpu_to_be32(dev->dev->caps.base_tunnel_sqpn + - qpt * 2 + port - 1); - /* use well-known qkey from the QPC */ - dseg->qkey = cpu_to_be32(0x80000000); + /* This function used only for sending on QP1 proxies */ + dseg->dqpn = cpu_to_be32(dev->dev->caps.qp1_tunnel[port - 1]); + /* Use QKEY from the QP context, which is set by master */ + dseg->qkey = cpu_to_be32(IB_QP_SET_QKEY); } static void build_tunnel_header(struct ib_send_wr *wr, void *wqe, unsigned *mlx_seg_len) diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index 1d74e8541146..e2ed36e2fb29 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -184,8 +184,6 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET 0x28 #define QUERY_FUNC_CAP_MAX_EQ_OFFSET 0x2c #define QUERY_FUNC_CAP_RESERVED_EQ_OFFSET 0X30 -#define QUERY_FUNC_CAP_BASE_TUNNEL_QPN_OFFSET 0x44 -#define QUERY_FUNC_CAP_BASE_PROXY_QPN_OFFSET 0x48 #define QUERY_FUNC_CAP_FMR_FLAG 0x80 #define QUERY_FUNC_CAP_FLAG_RDMA 0x40 @@ -196,21 +194,39 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_RDMA_PROPS_OFFSET 0x8 #define QUERY_FUNC_CAP_ETH_PROPS_OFFSET 0xc +#define QUERY_FUNC_CAP_QP0_TUNNEL 0x10 +#define QUERY_FUNC_CAP_QP0_PROXY 0x14 +#define QUERY_FUNC_CAP_QP1_TUNNEL 0x18 +#define QUERY_FUNC_CAP_QP1_PROXY 0x1c + #define QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC 0x40 #define QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN 0x80 #define QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID 0x80 if (vhcr->op_modifier == 1) { - field = vhcr->in_modifier; - MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); - field = 0; /* ensure force vlan and force mac bits are not set */ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_ETH_PROPS_OFFSET); /* ensure that phy_wqe_gid bit is not set */ MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET); + field = vhcr->in_modifier; /* phys-port = logical-port */ + MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); + + /* size is now the QP number */ + size = dev->phys_caps.base_tunnel_sqpn + 8 * slave + field - 1; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_TUNNEL); + + size += 2; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_TUNNEL); + + size = dev->phys_caps.base_proxy_sqpn + 8 * slave + field - 1; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP0_PROXY); + + size += 2; + MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP1_PROXY); + } else if (vhcr->op_modifier == 0) { /* enable rdma and ethernet interfaces */ field = (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA); @@ -249,117 +265,124 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, size = dev->caps.num_mgms + dev->caps.num_amgms; MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET); - size = dev->caps.base_tunnel_sqpn + 8 * slave; - MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_BASE_TUNNEL_QPN_OFFSET); - - size = dev->caps.sqp_start + 8 * slave; - MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_BASE_PROXY_QPN_OFFSET); - } else err = -EINVAL; return err; } -int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, struct mlx4_func_cap *func_cap) +int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, + struct mlx4_func_cap *func_cap) { struct mlx4_cmd_mailbox *mailbox; u32 *outbox; - u8 field; + u8 field, op_modifier; u32 size; - int i; int err = 0; + op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */ mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) return PTR_ERR(mailbox); - err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_FUNC_CAP, + err = mlx4_cmd_box(dev, 0, mailbox->dma, gen_or_port, op_modifier, + MLX4_CMD_QUERY_FUNC_CAP, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); if (err) goto out; outbox = mailbox->buf; - MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS_OFFSET); - if (!(field & (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA))) { - mlx4_err(dev, "The host supports neither eth nor rdma interfaces\n"); - err = -EPROTONOSUPPORT; - goto out; - } - func_cap->flags = field; - - MLX4_GET(field, outbox, QUERY_FUNC_CAP_NUM_PORTS_OFFSET); - func_cap->num_ports = field; + if (!op_modifier) { + MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS_OFFSET); + if (!(field & (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA))) { + mlx4_err(dev, "The host supports neither eth nor rdma interfaces\n"); + err = -EPROTONOSUPPORT; + goto out; + } + func_cap->flags = field; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_PF_BHVR_OFFSET); - func_cap->pf_context_behaviour = size; + MLX4_GET(field, outbox, QUERY_FUNC_CAP_NUM_PORTS_OFFSET); + func_cap->num_ports = field; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET); - func_cap->qp_quota = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_PF_BHVR_OFFSET); + func_cap->pf_context_behaviour = size; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET); - func_cap->srq_quota = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP_QUOTA_OFFSET); + func_cap->qp_quota = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET); - func_cap->cq_quota = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_SRQ_QUOTA_OFFSET); + func_cap->srq_quota = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_MAX_EQ_OFFSET); - func_cap->max_eq = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET); + func_cap->cq_quota = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); - func_cap->reserved_eq = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MAX_EQ_OFFSET); + func_cap->max_eq = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET); - func_cap->mpt_quota = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET); + func_cap->reserved_eq = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET); - func_cap->mtt_quota = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET); + func_cap->mpt_quota = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET); - func_cap->mcg_quota = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET); + func_cap->mtt_quota = size & 0xFFFFFF; - MLX4_GET(size, outbox, QUERY_FUNC_CAP_BASE_TUNNEL_QPN_OFFSET); - func_cap->base_tunnel_qpn = size & 0xFFFFFF; + MLX4_GET(size, outbox, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET); + func_cap->mcg_quota = size & 0xFFFFFF; + goto out; + } - MLX4_GET(size, outbox, QUERY_FUNC_CAP_BASE_PROXY_QPN_OFFSET); - func_cap->base_proxy_qpn = size & 0xFFFFFF; + /* logical port query */ + if (gen_or_port > dev->caps.num_ports) { + err = -EINVAL; + goto out; + } - for (i = 1; i <= func_cap->num_ports; ++i) { - err = mlx4_cmd_box(dev, 0, mailbox->dma, i, 1, - MLX4_CMD_QUERY_FUNC_CAP, - MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED); - if (err) + if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_ETH) { + MLX4_GET(field, outbox, QUERY_FUNC_CAP_ETH_PROPS_OFFSET); + if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN) { + mlx4_err(dev, "VLAN is enforced on this port\n"); + err = -EPROTONOSUPPORT; goto out; + } - if (dev->caps.port_type[i] == MLX4_PORT_TYPE_ETH) { - MLX4_GET(field, outbox, QUERY_FUNC_CAP_ETH_PROPS_OFFSET); - if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_VLAN) { - mlx4_err(dev, "VLAN is enforced on this port\n"); - err = -EPROTONOSUPPORT; - goto out; - } - - if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC) { - mlx4_err(dev, "Force mac is enabled on this port\n"); - err = -EPROTONOSUPPORT; - goto out; - } - } else if (dev->caps.port_type[i] == MLX4_PORT_TYPE_IB) { - MLX4_GET(field, outbox, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET); - if (field & QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID) { - mlx4_err(dev, "phy_wqe_gid is " - "enforced on this ib port\n"); - err = -EPROTONOSUPPORT; - goto out; - } + if (field & QUERY_FUNC_CAP_ETH_PROPS_FORCE_MAC) { + mlx4_err(dev, "Force mac is enabled on this port\n"); + err = -EPROTONOSUPPORT; + goto out; + } + } else if (dev->caps.port_type[gen_or_port] == MLX4_PORT_TYPE_IB) { + MLX4_GET(field, outbox, QUERY_FUNC_CAP_RDMA_PROPS_OFFSET); + if (field & QUERY_FUNC_CAP_RDMA_PROPS_FORCE_PHY_WQE_GID) { + mlx4_err(dev, "phy_wqe_gid is " + "enforced on this ib port\n"); + err = -EPROTONOSUPPORT; + goto out; } + } - MLX4_GET(field, outbox, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); - func_cap->physical_port[i] = field; + MLX4_GET(field, outbox, QUERY_FUNC_CAP_PHYS_PORT_OFFSET); + func_cap->physical_port = field; + if (func_cap->physical_port != gen_or_port) { + err = -ENOSYS; + goto out; } + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_TUNNEL); + func_cap->qp0_tunnel_qpn = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP0_PROXY); + func_cap->qp0_proxy_qpn = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_TUNNEL); + func_cap->qp1_tunnel_qpn = size & 0xFFFFFF; + + MLX4_GET(size, outbox, QUERY_FUNC_CAP_QP1_PROXY); + func_cap->qp1_proxy_qpn = size & 0xFFFFFF; + /* All other resources are allocated by the master, but we still report * 'num' and 'reserved' capabilities as follows: * - num remains the maximum resource index diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index ced1de57c818..85abe9c11a22 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -134,11 +134,12 @@ struct mlx4_func_cap { int max_eq; int reserved_eq; int mcg_quota; - u32 base_qpn; - u32 base_tunnel_qpn; - u32 base_proxy_qpn; - u8 physical_port[MLX4_MAX_PORTS + 1]; - u8 port_flags[MLX4_MAX_PORTS + 1]; + u32 qp0_tunnel_qpn; + u32 qp0_proxy_qpn; + u32 qp1_tunnel_qpn; + u32 qp1_proxy_qpn; + u8 physical_port; + u8 port_flags; }; struct mlx4_adapter { @@ -195,7 +196,8 @@ struct mlx4_set_ib_param { }; int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap); -int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, struct mlx4_func_cap *func_cap); +int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u32 gen_or_port, + struct mlx4_func_cap *func_cap); int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 76f69fdd01d5..1e2ab9d00f1c 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -410,15 +410,16 @@ static int mlx4_how_many_lives_vf(struct mlx4_dev *dev) int mlx4_get_parav_qkey(struct mlx4_dev *dev, u32 qpn, u32 *qkey) { u32 qk = MLX4_RESERVED_QKEY_BASE; - if (qpn >= dev->caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX || - qpn < dev->caps.sqp_start) + + if (qpn >= dev->phys_caps.base_tunnel_sqpn + 8 * MLX4_MFUNC_MAX || + qpn < dev->phys_caps.base_proxy_sqpn) return -EINVAL; - if (qpn >= dev->caps.base_tunnel_sqpn) + if (qpn >= dev->phys_caps.base_tunnel_sqpn) /* tunnel qp */ - qk += qpn - dev->caps.base_tunnel_sqpn; + qk += qpn - dev->phys_caps.base_tunnel_sqpn; else - qk += qpn - dev->caps.sqp_start; + qk += qpn - dev->phys_caps.base_proxy_sqpn; *qkey = qk; return 0; } @@ -527,9 +528,10 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) } memset(&func_cap, 0, sizeof(func_cap)); - err = mlx4_QUERY_FUNC_CAP(dev, &func_cap); + err = mlx4_QUERY_FUNC_CAP(dev, 0, &func_cap); if (err) { - mlx4_err(dev, "QUERY_FUNC_CAP command failed, aborting.\n"); + mlx4_err(dev, "QUERY_FUNC_CAP general command failed, aborting (%d).\n", + err); return err; } @@ -557,12 +559,33 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) return -ENODEV; } + dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); + dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); + dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); + dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); + + if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy || + !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) { + err = -ENOMEM; + goto err_mem; + } + for (i = 1; i <= dev->caps.num_ports; ++i) { + err = mlx4_QUERY_FUNC_CAP(dev, (u32) i, &func_cap); + if (err) { + mlx4_err(dev, "QUERY_FUNC_CAP port command failed for" + " port %d, aborting (%d).\n", i, err); + goto err_mem; + } + dev->caps.qp0_tunnel[i - 1] = func_cap.qp0_tunnel_qpn; + dev->caps.qp0_proxy[i - 1] = func_cap.qp0_proxy_qpn; + dev->caps.qp1_tunnel[i - 1] = func_cap.qp1_tunnel_qpn; + dev->caps.qp1_proxy[i - 1] = func_cap.qp1_proxy_qpn; dev->caps.port_mask[i] = dev->caps.port_type[i]; if (mlx4_get_slave_pkey_gid_tbl_len(dev, i, &dev->caps.gid_table_len[i], &dev->caps.pkey_table_len[i])) - return -ENODEV; + goto err_mem; } if (dev->caps.uar_page_size * (dev->caps.num_uars - @@ -572,14 +595,20 @@ static int mlx4_slave_cap(struct mlx4_dev *dev) "PCI resource 2 size of 0x%llx, aborting.\n", dev->caps.uar_page_size * dev->caps.num_uars, (unsigned long long) pci_resource_len(dev->pdev, 2)); - return -ENODEV; + goto err_mem; } - /* Calculate our sqp_start */ - dev->caps.sqp_start = func_cap.base_proxy_qpn; - dev->caps.base_tunnel_sqpn = func_cap.base_tunnel_qpn; - return 0; + +err_mem: + kfree(dev->caps.qp0_tunnel); + kfree(dev->caps.qp0_proxy); + kfree(dev->caps.qp1_tunnel); + kfree(dev->caps.qp1_proxy); + dev->caps.qp0_tunnel = dev->caps.qp0_proxy = + dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL; + + return err; } /* @@ -2261,6 +2290,12 @@ static void mlx4_remove_one(struct pci_dev *pdev) if (!mlx4_is_slave(dev)) mlx4_free_ownership(dev); + + kfree(dev->caps.qp0_tunnel); + kfree(dev->caps.qp0_proxy); + kfree(dev->caps.qp1_tunnel); + kfree(dev->caps.qp1_proxy); + kfree(priv); pci_release_regions(pdev); pci_disable_device(pdev); diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c index 436ef6c69add..81e2abe07bbb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/qp.c +++ b/drivers/net/ethernet/mellanox/mlx4/qp.c @@ -68,15 +68,15 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type) } /* used for INIT/CLOSE port logic */ -static int is_qp0(struct mlx4_dev *dev, struct mlx4_qp *qp, int *real_qp0, int *proxy_qp0) +static int is_master_qp0(struct mlx4_dev *dev, struct mlx4_qp *qp, int *real_qp0, int *proxy_qp0) { + /* this procedure is called after we already know we are on the master */ /* qp0 is either the proxy qp0, or the real qp0 */ - *proxy_qp0 = qp->qpn >= dev->caps.sqp_start && - qp->qpn <= dev->caps.sqp_start + 1; + u32 pf_proxy_offset = dev->phys_caps.base_proxy_sqpn + 8 * mlx4_master_func_num(dev); + *proxy_qp0 = qp->qpn >= pf_proxy_offset && qp->qpn <= pf_proxy_offset + 1; - *real_qp0 = mlx4_is_master(dev) && - qp->qpn >= dev->caps.base_sqpn && - qp->qpn <= dev->caps.base_sqpn + 1; + *real_qp0 = qp->qpn >= dev->phys_caps.base_sqpn && + qp->qpn <= dev->phys_caps.base_sqpn + 1; return *real_qp0 || *proxy_qp0; } @@ -143,7 +143,7 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, MLX4_CMD_2RST_QP, MLX4_CMD_TIME_CLASS_A, native); if (mlx4_is_master(dev) && cur_state != MLX4_QP_STATE_ERR && cur_state != MLX4_QP_STATE_RST && - is_qp0(dev, qp, &real_qp0, &proxy_qp0)) { + is_master_qp0(dev, qp, &real_qp0, &proxy_qp0)) { port = (qp->qpn & 1) + 1; if (proxy_qp0) priv->mfunc.master.qp0_state[port].proxy_qp0_active = 0; @@ -175,7 +175,7 @@ static int __mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt, new_state == MLX4_QP_STATE_RST ? 2 : 0, op[cur_state][new_state], MLX4_CMD_TIME_CLASS_C, native); - if (mlx4_is_master(dev) && is_qp0(dev, qp, &real_qp0, &proxy_qp0)) { + if (mlx4_is_master(dev) && is_master_qp0(dev, qp, &real_qp0, &proxy_qp0)) { port = (qp->qpn & 1) + 1; if (cur_state != MLX4_QP_STATE_ERR && cur_state != MLX4_QP_STATE_RST && @@ -422,6 +422,7 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table; int err; int reserved_from_top = 0; + int k; spin_lock_init(&qp_table->lock); INIT_RADIX_TREE(&dev->qp_table_tree, GFP_ATOMIC); @@ -436,7 +437,7 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) * We also reserve the MSB of the 24-bit QP number to indicate * that a QP is an XRC QP. */ - dev->caps.base_sqpn = + dev->phys_caps.base_sqpn = ALIGN(dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 8); { @@ -479,24 +480,54 @@ int mlx4_init_qp_table(struct mlx4_dev *dev) */ err = mlx4_bitmap_init(&qp_table->bitmap, dev->caps.num_qps, - (1 << 23) - 1, dev->caps.base_sqpn + 8 + + (1 << 23) - 1, dev->phys_caps.base_sqpn + 8 + 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev), reserved_from_top); + if (err) + return err; - /* In mfunc, sqp_start is the base of the proxy SQPs, since the PF also - * uses paravirtualized SQPs. - * In native mode, sqp_start is the base of the real SQPs. */ if (mlx4_is_mfunc(dev)) { - dev->caps.sqp_start = dev->caps.base_sqpn + - 8 * (mlx4_master_func_num(dev) + 1); - dev->caps.base_tunnel_sqpn = dev->caps.sqp_start + 8 * MLX4_MFUNC_MAX; - } else - dev->caps.sqp_start = dev->caps.base_sqpn; + /* for PPF use */ + dev->phys_caps.base_proxy_sqpn = dev->phys_caps.base_sqpn + 8; + dev->phys_caps.base_tunnel_sqpn = dev->phys_caps.base_sqpn + 8 + 8 * MLX4_MFUNC_MAX; + + /* In mfunc, calculate proxy and tunnel qp offsets for the PF here, + * since the PF does not call mlx4_slave_caps */ + dev->caps.qp0_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); + dev->caps.qp0_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); + dev->caps.qp1_tunnel = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); + dev->caps.qp1_proxy = kcalloc(dev->caps.num_ports, sizeof (u32), GFP_KERNEL); + + if (!dev->caps.qp0_tunnel || !dev->caps.qp0_proxy || + !dev->caps.qp1_tunnel || !dev->caps.qp1_proxy) { + err = -ENOMEM; + goto err_mem; + } + for (k = 0; k < dev->caps.num_ports; k++) { + dev->caps.qp0_proxy[k] = dev->phys_caps.base_proxy_sqpn + + 8 * mlx4_master_func_num(dev) + k; + dev->caps.qp0_tunnel[k] = dev->caps.qp0_proxy[k] + 8 * MLX4_MFUNC_MAX; + dev->caps.qp1_proxy[k] = dev->phys_caps.base_proxy_sqpn + + 8 * mlx4_master_func_num(dev) + MLX4_MAX_PORTS + k; + dev->caps.qp1_tunnel[k] = dev->caps.qp1_proxy[k] + 8 * MLX4_MFUNC_MAX; + } + } + + + err = mlx4_CONF_SPECIAL_QP(dev, dev->phys_caps.base_sqpn); if (err) - return err; + goto err_mem; + return 0; - return mlx4_CONF_SPECIAL_QP(dev, dev->caps.base_sqpn); +err_mem: + kfree(dev->caps.qp0_tunnel); + kfree(dev->caps.qp0_proxy); + kfree(dev->caps.qp1_tunnel); + kfree(dev->caps.qp1_proxy); + dev->caps.qp0_tunnel = dev->caps.qp0_proxy = + dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL; + return err; } void mlx4_cleanup_qp_table(struct mlx4_dev *dev) diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index c0f8f74a0a5e..6d1acb04cd17 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -328,6 +328,9 @@ struct mlx4_phys_caps { u32 gid_phys_table_len[MLX4_MAX_PORTS + 1]; u32 pkey_phys_table_len[MLX4_MAX_PORTS + 1]; u32 num_phys_eqs; + u32 base_sqpn; + u32 base_proxy_sqpn; + u32 base_tunnel_sqpn; }; struct mlx4_caps { @@ -358,9 +361,10 @@ struct mlx4_caps { int max_rq_desc_sz; int max_qp_init_rdma; int max_qp_dest_rdma; - int sqp_start; - u32 base_sqpn; - u32 base_tunnel_sqpn; + u32 *qp0_proxy; + u32 *qp1_proxy; + u32 *qp0_tunnel; + u32 *qp1_tunnel; int num_srqs; int max_srq_wqes; int max_srq_sge; @@ -722,15 +726,15 @@ static inline int mlx4_is_master(struct mlx4_dev *dev) static inline int mlx4_is_qp_reserved(struct mlx4_dev *dev, u32 qpn) { - return (qpn < dev->caps.base_sqpn + 8 + + return (qpn < dev->phys_caps.base_sqpn + 8 + 16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev)); } static inline int mlx4_is_guest_proxy(struct mlx4_dev *dev, int slave, u32 qpn) { - int base = dev->caps.sqp_start + slave * 8; + int guest_proxy_base = dev->phys_caps.base_proxy_sqpn + slave * 8; - if (qpn >= base && qpn < base + 8) + if (qpn >= guest_proxy_base && qpn < guest_proxy_base + 8) return 1; return 0; -- cgit v1.2.3