From 31a78a5a7983141c17852d31eb3a1f70d8161225 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 24 Dec 2017 16:31:34 +0200 Subject: IB/mlx5: Extend UAR stuff to support dynamic allocation This patch extends the alloc context flow to be prepared for working with dynamic UAR allocations. Currently upon alloc context there is some fix size of UARs that are allocated (named 'static allocation') and there is no option to user application to ask for more or control which UAR will be used by which QP. In this patch the driver prepares its data structures to manage both the static and the dynamic allocations and let the user driver knows about the max value of dynamic blue-flame registers that are allowed. Downstream patches from this series will enable the dynamic allocation and the association as part of QP creation. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/driver.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/mlx5/driver.h') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 8846919356ca..2fe263f69751 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -230,6 +230,9 @@ struct mlx5_bfreg_info { u32 ver; bool lib_uar_4k; u32 num_sys_pages; + u32 num_static_sys_pages; + u32 total_num_bfregs; + u32 num_dyn_bfregs; }; struct mlx5_cmd_first { -- cgit v1.2.3 From 57cda166bbe045151d46b2d1133fdf4afccb90ed Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Tue, 2 Jan 2018 16:19:28 +0200 Subject: net/mlx5: Add DCT command interface Add a missing command interface to work with a DCT. It includes: creating, destroying and get events for. Signed-off-by: Moni Shoua Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/net/ethernet/mellanox/mlx5/core/eq.c | 9 +- drivers/net/ethernet/mellanox/mlx5/core/qp.c | 125 ++++++++++++++++++++++++--- include/linux/mlx5/device.h | 9 ++ include/linux/mlx5/driver.h | 8 ++ include/linux/mlx5/qp.h | 12 +++ 5 files changed, 150 insertions(+), 13 deletions(-) (limited to 'include/linux/mlx5/driver.h') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 60771865c99c..7d3d503fa675 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -417,7 +417,11 @@ static irqreturn_t mlx5_eq_int(int irq, void *eq_ptr) cqn = be32_to_cpu(eqe->data.comp.cqn) & 0xffffff; mlx5_cq_completion(dev, cqn); break; - + case MLX5_EVENT_TYPE_DCT_DRAINED: + rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff; + rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN); + mlx5_rsc_event(dev, rsn, eqe->type); + break; case MLX5_EVENT_TYPE_PATH_MIG: case MLX5_EVENT_TYPE_COMM_EST: case MLX5_EVENT_TYPE_SQ_DRAINED: @@ -715,6 +719,9 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) if (MLX5_CAP_GEN(dev, fpga)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_FPGA_ERROR); + if (MLX5_CAP_GEN_MAX(dev, dct)) + async_event_mask |= (1ull << MLX5_EVENT_TYPE_DCT_DRAINED); + err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD, MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index db9e665ab104..0f5ddd22927d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -98,6 +98,11 @@ static u64 sq_allowed_event_types(void) return BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR); } +static u64 dct_allowed_event_types(void) +{ + return BIT(MLX5_EVENT_TYPE_DCT_DRAINED); +} + static bool is_event_type_allowed(int rsc_type, int event_type) { switch (rsc_type) { @@ -107,6 +112,8 @@ static bool is_event_type_allowed(int rsc_type, int event_type) return BIT(event_type) & rq_allowed_event_types(); case MLX5_EVENT_QUEUE_TYPE_SQ: return BIT(event_type) & sq_allowed_event_types(); + case MLX5_EVENT_QUEUE_TYPE_DCT: + return BIT(event_type) & dct_allowed_event_types(); default: WARN(1, "Event arrived for unknown resource type"); return false; @@ -116,6 +123,7 @@ static bool is_event_type_allowed(int rsc_type, int event_type) void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type) { struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, rsn); + struct mlx5_core_dct *dct; struct mlx5_core_qp *qp; if (!common) @@ -134,7 +142,11 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type) qp = (struct mlx5_core_qp *)common; qp->event(qp, event_type); break; - + case MLX5_RES_DCT: + dct = (struct mlx5_core_dct *)common; + if (event_type == MLX5_EVENT_TYPE_DCT_DRAINED) + complete(&dct->drained); + break; default: mlx5_core_warn(dev, "invalid resource type for 0x%x\n", rsn); } @@ -142,9 +154,9 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type) mlx5_core_put_rsc(common); } -static int create_qprqsq_common(struct mlx5_core_dev *dev, - struct mlx5_core_qp *qp, - int rsc_type) +static int create_resource_common(struct mlx5_core_dev *dev, + struct mlx5_core_qp *qp, + int rsc_type) { struct mlx5_qp_table *table = &dev->priv.qp_table; int err; @@ -165,8 +177,8 @@ static int create_qprqsq_common(struct mlx5_core_dev *dev, return 0; } -static void destroy_qprqsq_common(struct mlx5_core_dev *dev, - struct mlx5_core_qp *qp) +static void destroy_resource_common(struct mlx5_core_dev *dev, + struct mlx5_core_qp *qp) { struct mlx5_qp_table *table = &dev->priv.qp_table; unsigned long flags; @@ -179,6 +191,40 @@ static void destroy_qprqsq_common(struct mlx5_core_dev *dev, wait_for_completion(&qp->common.free); } +int mlx5_core_create_dct(struct mlx5_core_dev *dev, + struct mlx5_core_dct *dct, + u32 *in, int inlen) +{ + u32 out[MLX5_ST_SZ_DW(create_dct_out)] = {0}; + u32 din[MLX5_ST_SZ_DW(destroy_dct_in)] = {0}; + u32 dout[MLX5_ST_SZ_DW(destroy_dct_out)] = {0}; + struct mlx5_core_qp *qp = &dct->mqp; + int err; + + init_completion(&dct->drained); + MLX5_SET(create_dct_in, in, opcode, MLX5_CMD_OP_CREATE_DCT); + + err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); + if (err) { + mlx5_core_warn(dev, "create DCT failed, ret %d\n", err); + return err; + } + + qp->qpn = MLX5_GET(create_dct_out, out, dctn); + err = create_resource_common(dev, qp, MLX5_RES_DCT); + if (err) + goto err_cmd; + + return 0; +err_cmd: + MLX5_SET(destroy_dct_in, din, opcode, MLX5_CMD_OP_DESTROY_DCT); + MLX5_SET(destroy_dct_in, din, dctn, qp->qpn); + mlx5_cmd_exec(dev, (void *)&in, sizeof(din), + (void *)&out, sizeof(dout)); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_create_dct); + int mlx5_core_create_qp(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, u32 *in, int inlen) @@ -197,7 +243,7 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev, qp->qpn = MLX5_GET(create_qp_out, out, qpn); mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn); - err = create_qprqsq_common(dev, qp, MLX5_RES_QP); + err = create_resource_common(dev, qp, MLX5_RES_QP); if (err) goto err_cmd; @@ -220,6 +266,47 @@ err_cmd: } EXPORT_SYMBOL_GPL(mlx5_core_create_qp); +static int mlx5_core_drain_dct(struct mlx5_core_dev *dev, + struct mlx5_core_dct *dct) +{ + u32 out[MLX5_ST_SZ_DW(drain_dct_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(drain_dct_in)] = {0}; + struct mlx5_core_qp *qp = &dct->mqp; + + MLX5_SET(drain_dct_in, in, opcode, MLX5_CMD_OP_DRAIN_DCT); + MLX5_SET(drain_dct_in, in, dctn, qp->qpn); + return mlx5_cmd_exec(dev, (void *)&in, sizeof(in), + (void *)&out, sizeof(out)); +} + +int mlx5_core_destroy_dct(struct mlx5_core_dev *dev, + struct mlx5_core_dct *dct) +{ + u32 out[MLX5_ST_SZ_DW(destroy_dct_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_dct_in)] = {0}; + struct mlx5_core_qp *qp = &dct->mqp; + int err; + + err = mlx5_core_drain_dct(dev, dct); + if (err) { + if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + goto destroy; + } else { + mlx5_core_warn(dev, "failed drain DCT 0x%x with error 0x%x\n", qp->qpn, err); + return err; + } + } + wait_for_completion(&dct->drained); +destroy: + destroy_resource_common(dev, &dct->mqp); + MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT); + MLX5_SET(destroy_dct_in, in, dctn, qp->qpn); + err = mlx5_cmd_exec(dev, (void *)&in, sizeof(in), + (void *)&out, sizeof(out)); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_core_destroy_dct); + int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp) { @@ -229,7 +316,7 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, mlx5_debug_qp_remove(dev, qp); - destroy_qprqsq_common(dev, qp); + destroy_resource_common(dev, qp); MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); MLX5_SET(destroy_qp_in, in, qpn, qp->qpn); @@ -405,6 +492,20 @@ int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, } EXPORT_SYMBOL_GPL(mlx5_core_qp_query); +int mlx5_core_dct_query(struct mlx5_core_dev *dev, struct mlx5_core_dct *dct, + u32 *out, int outlen) +{ + u32 in[MLX5_ST_SZ_DW(query_dct_in)] = {0}; + struct mlx5_core_qp *qp = &dct->mqp; + + MLX5_SET(query_dct_in, in, opcode, MLX5_CMD_OP_QUERY_DCT); + MLX5_SET(query_dct_in, in, dctn, qp->qpn); + + return mlx5_cmd_exec(dev, (void *)&in, sizeof(in), + (void *)out, outlen); +} +EXPORT_SYMBOL_GPL(mlx5_core_dct_query); + int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn) { u32 out[MLX5_ST_SZ_DW(alloc_xrcd_out)] = {0}; @@ -441,7 +542,7 @@ int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, return err; rq->qpn = rqn; - err = create_qprqsq_common(dev, rq, MLX5_RES_RQ); + err = create_resource_common(dev, rq, MLX5_RES_RQ); if (err) goto err_destroy_rq; @@ -457,7 +558,7 @@ EXPORT_SYMBOL(mlx5_core_create_rq_tracked); void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev, struct mlx5_core_qp *rq) { - destroy_qprqsq_common(dev, rq); + destroy_resource_common(dev, rq); mlx5_core_destroy_rq(dev, rq->qpn); } EXPORT_SYMBOL(mlx5_core_destroy_rq_tracked); @@ -473,7 +574,7 @@ int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen, return err; sq->qpn = sqn; - err = create_qprqsq_common(dev, sq, MLX5_RES_SQ); + err = create_resource_common(dev, sq, MLX5_RES_SQ); if (err) goto err_destroy_sq; @@ -489,7 +590,7 @@ EXPORT_SYMBOL(mlx5_core_create_sq_tracked); void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev, struct mlx5_core_qp *sq) { - destroy_qprqsq_common(dev, sq); + destroy_resource_common(dev, sq); mlx5_core_destroy_sq(dev, sq->qpn); } EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 52b8ea423dd2..9aee835b7393 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -286,6 +286,7 @@ enum { MLX5_EVENT_QUEUE_TYPE_QP = 0, MLX5_EVENT_QUEUE_TYPE_RQ = 1, MLX5_EVENT_QUEUE_TYPE_SQ = 2, + MLX5_EVENT_QUEUE_TYPE_DCT = 6, }; enum mlx5_event { @@ -321,6 +322,8 @@ enum mlx5_event { MLX5_EVENT_TYPE_PAGE_FAULT = 0xc, MLX5_EVENT_TYPE_NIC_VPORT_CHANGE = 0xd, + MLX5_EVENT_TYPE_DCT_DRAINED = 0x1c, + MLX5_EVENT_TYPE_FPGA_ERROR = 0x20, }; @@ -613,6 +616,11 @@ struct mlx5_eqe_pps { u8 rsvd2[12]; } __packed; +struct mlx5_eqe_dct { + __be32 reserved[6]; + __be32 dctn; +}; + union ev_data { __be32 raw[7]; struct mlx5_eqe_cmd cmd; @@ -628,6 +636,7 @@ union ev_data { struct mlx5_eqe_vport_change vport_change; struct mlx5_eqe_port_module port_module; struct mlx5_eqe_pps pps; + struct mlx5_eqe_dct dct; } __packed; struct mlx5_eqe { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 2fe263f69751..0776554f18dc 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -154,6 +154,13 @@ enum mlx5_dcbx_oper_mode { MLX5E_DCBX_PARAM_VER_OPER_AUTO = 0x3, }; +enum mlx5_dct_atomic_mode { + MLX5_ATOMIC_MODE_DCT_OFF = 20, + MLX5_ATOMIC_MODE_DCT_NONE = 0 << MLX5_ATOMIC_MODE_DCT_OFF, + MLX5_ATOMIC_MODE_DCT_IB_COMP = 1 << MLX5_ATOMIC_MODE_DCT_OFF, + MLX5_ATOMIC_MODE_DCT_CX = 2 << MLX5_ATOMIC_MODE_DCT_OFF, +}; + enum { MLX5_ATOMIC_OPS_CMP_SWAP = 1 << 0, MLX5_ATOMIC_OPS_FETCH_ADD = 1 << 1, @@ -432,6 +439,7 @@ enum mlx5_res_type { MLX5_RES_SRQ = 3, MLX5_RES_XSRQ = 4, MLX5_RES_XRQ = 5, + MLX5_RES_DCT = MLX5_EVENT_QUEUE_TYPE_DCT, }; struct mlx5_core_rsc_common { diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 62af7512dabb..4778d41085d4 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -473,6 +473,11 @@ struct mlx5_core_qp { int pid; }; +struct mlx5_core_dct { + struct mlx5_core_qp mqp; + struct completion drained; +}; + struct mlx5_qp_path { u8 fl_free_ar; u8 rsvd3; @@ -549,6 +554,9 @@ static inline struct mlx5_core_mkey *__mlx5_mr_lookup(struct mlx5_core_dev *dev, return radix_tree_lookup(&dev->priv.mkey_table.tree, key); } +int mlx5_core_create_dct(struct mlx5_core_dev *dev, + struct mlx5_core_dct *qp, + u32 *in, int inlen); int mlx5_core_create_qp(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, u32 *in, @@ -558,8 +566,12 @@ int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 opcode, struct mlx5_core_qp *qp); int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); +int mlx5_core_destroy_dct(struct mlx5_core_dev *dev, + struct mlx5_core_dct *dct); int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, u32 *out, int outlen); +int mlx5_core_dct_query(struct mlx5_core_dev *dev, struct mlx5_core_dct *dct, + u32 *out, int outlen); int mlx5_core_set_delay_drop(struct mlx5_core_dev *dev, u32 timeout_usec); -- cgit v1.2.3 From 734dc065fc41f6143ff88225aa5d335cb1e0f6aa Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Thu, 4 Jan 2018 17:25:31 +0200 Subject: net/mlx5: Fix race for multiple RoCE enable There are two potential problems with the existing implementation. 1. Enable and disable can race after the atomic operations. 2. If a command fails the refcount is left in an inconsistent state. Introduce a lock and perform error checking. Fixes: a6f7d2aff623 ("net/mlx5: Add support for multiple RoCE enable") Signed-off-by: Daniel Jurgens Reviewed-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 33 ++++++++++++++++++++----- include/linux/mlx5/driver.h | 2 +- 2 files changed, 28 insertions(+), 7 deletions(-) (limited to 'include/linux/mlx5/driver.h') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index d653b0025b13..916523103f16 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -36,6 +36,9 @@ #include #include "mlx5_core.h" +/* Mutex to hold while enabling or disabling RoCE */ +static DEFINE_MUTEX(mlx5_roce_en_lock); + static int _mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport, u32 *out, int outlen) { @@ -988,17 +991,35 @@ static int mlx5_nic_vport_update_roce_state(struct mlx5_core_dev *mdev, int mlx5_nic_vport_enable_roce(struct mlx5_core_dev *mdev) { - if (atomic_inc_return(&mdev->roce.roce_en) != 1) - return 0; - return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_ENABLED); + int err = 0; + + mutex_lock(&mlx5_roce_en_lock); + if (!mdev->roce.roce_en) + err = mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_ENABLED); + + if (!err) + mdev->roce.roce_en++; + mutex_unlock(&mlx5_roce_en_lock); + + return err; } EXPORT_SYMBOL_GPL(mlx5_nic_vport_enable_roce); int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev) { - if (atomic_dec_return(&mdev->roce.roce_en) != 0) - return 0; - return mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED); + int err = 0; + + mutex_lock(&mlx5_roce_en_lock); + if (mdev->roce.roce_en) { + mdev->roce.roce_en--; + if (mdev->roce.roce_en == 0) + err = mlx5_nic_vport_update_roce_state(mdev, MLX5_VPORT_ROCE_DISABLED); + + if (err) + mdev->roce.roce_en++; + } + mutex_unlock(&mlx5_roce_en_lock); + return err; } EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 0776554f18dc..5b0443c9d337 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -835,7 +835,7 @@ struct mlx5_core_dev { struct mlx5e_resources mlx5e_res; struct { struct mlx5_rsvd_gids reserved_gids; - atomic_t roce_en; + u32 roce_en; } roce; #ifdef CONFIG_MLX5_FPGA struct mlx5_fpga_device *fpga; -- cgit v1.2.3 From 7fd8aefb7ce202dd9d97f752bf249be6215f1004 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Thu, 4 Jan 2018 17:25:35 +0200 Subject: IB/mlx5: Make netdev notifications multiport capable When multiple RoCE ports are supported registration for events on multiple netdevs is required. Refactor the event registration and handling to support multiple ports. Signed-off-by: Daniel Jurgens Reviewed-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 85 +++++++++++++++++++++--------------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 4 +- drivers/infiniband/hw/mlx5/qp.c | 3 +- include/linux/mlx5/driver.h | 5 +++ 4 files changed, 60 insertions(+), 37 deletions(-) (limited to 'include/linux/mlx5/driver.h') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 0c24cd42b411..5fcb2ed94c11 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -113,24 +113,30 @@ static int get_port_state(struct ib_device *ibdev, static int mlx5_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { + struct mlx5_roce *roce = container_of(this, struct mlx5_roce, nb); struct net_device *ndev = netdev_notifier_info_to_dev(ptr); - struct mlx5_ib_dev *ibdev = container_of(this, struct mlx5_ib_dev, - roce.nb); + u8 port_num = roce->native_port_num; + struct mlx5_core_dev *mdev; + struct mlx5_ib_dev *ibdev; + + ibdev = roce->dev; + mdev = ibdev->mdev; switch (event) { case NETDEV_REGISTER: case NETDEV_UNREGISTER: - write_lock(&ibdev->roce.netdev_lock); - if (ndev->dev.parent == &ibdev->mdev->pdev->dev) - ibdev->roce.netdev = (event == NETDEV_UNREGISTER) ? - NULL : ndev; - write_unlock(&ibdev->roce.netdev_lock); + write_lock(&roce->netdev_lock); + + if (ndev->dev.parent == &mdev->pdev->dev) + roce->netdev = (event == NETDEV_UNREGISTER) ? + NULL : ndev; + write_unlock(&roce->netdev_lock); break; case NETDEV_CHANGE: case NETDEV_UP: case NETDEV_DOWN: { - struct net_device *lag_ndev = mlx5_lag_get_roce_netdev(ibdev->mdev); + struct net_device *lag_ndev = mlx5_lag_get_roce_netdev(mdev); struct net_device *upper = NULL; if (lag_ndev) { @@ -138,27 +144,28 @@ static int mlx5_netdev_event(struct notifier_block *this, dev_put(lag_ndev); } - if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev)) + if ((upper == ndev || (!upper && ndev == roce->netdev)) && ibdev->ib_active) { struct ib_event ibev = { }; enum ib_port_state port_state; - if (get_port_state(&ibdev->ib_dev, 1, &port_state)) - return NOTIFY_DONE; + if (get_port_state(&ibdev->ib_dev, port_num, + &port_state)) + goto done; - if (ibdev->roce.last_port_state == port_state) - return NOTIFY_DONE; + if (roce->last_port_state == port_state) + goto done; - ibdev->roce.last_port_state = port_state; + roce->last_port_state = port_state; ibev.device = &ibdev->ib_dev; if (port_state == IB_PORT_DOWN) ibev.event = IB_EVENT_PORT_ERR; else if (port_state == IB_PORT_ACTIVE) ibev.event = IB_EVENT_PORT_ACTIVE; else - return NOTIFY_DONE; + goto done; - ibev.element.port_num = 1; + ibev.element.port_num = port_num; ib_dispatch_event(&ibev); } break; @@ -167,7 +174,7 @@ static int mlx5_netdev_event(struct notifier_block *this, default: break; } - +done: return NOTIFY_DONE; } @@ -183,11 +190,11 @@ static struct net_device *mlx5_ib_get_netdev(struct ib_device *device, /* Ensure ndev does not disappear before we invoke dev_hold() */ - read_lock(&ibdev->roce.netdev_lock); - ndev = ibdev->roce.netdev; + read_lock(&ibdev->roce[port_num - 1].netdev_lock); + ndev = ibdev->roce[port_num - 1].netdev; if (ndev) dev_hold(ndev); - read_unlock(&ibdev->roce.netdev_lock); + read_unlock(&ibdev->roce[port_num - 1].netdev_lock); return ndev; } @@ -3579,33 +3586,33 @@ static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev) } } -static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev) +static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) { int err; - dev->roce.nb.notifier_call = mlx5_netdev_event; - err = register_netdevice_notifier(&dev->roce.nb); + dev->roce[port_num].nb.notifier_call = mlx5_netdev_event; + err = register_netdevice_notifier(&dev->roce[port_num].nb); if (err) { - dev->roce.nb.notifier_call = NULL; + dev->roce[port_num].nb.notifier_call = NULL; return err; } return 0; } -static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev) +static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev, u8 port_num) { - if (dev->roce.nb.notifier_call) { - unregister_netdevice_notifier(&dev->roce.nb); - dev->roce.nb.notifier_call = NULL; + if (dev->roce[port_num].nb.notifier_call) { + unregister_netdevice_notifier(&dev->roce[port_num].nb); + dev->roce[port_num].nb.notifier_call = NULL; } } -static int mlx5_enable_eth(struct mlx5_ib_dev *dev) +static int mlx5_enable_eth(struct mlx5_ib_dev *dev, u8 port_num) { int err; - err = mlx5_add_netdev_notifier(dev); + err = mlx5_add_netdev_notifier(dev, port_num); if (err) return err; @@ -3626,7 +3633,7 @@ err_disable_roce: mlx5_nic_vport_disable_roce(dev->mdev); err_unregister_netdevice_notifier: - mlx5_remove_netdev_notifier(dev); + mlx5_remove_netdev_notifier(dev, port_num); return err; } @@ -4066,7 +4073,6 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) if (!dev->port) return -ENOMEM; - rwlock_init(&dev->roce.netdev_lock); err = get_port_caps(dev); if (err) goto err_free_port; @@ -4246,12 +4252,21 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev) struct mlx5_core_dev *mdev = dev->mdev; enum rdma_link_layer ll; int port_type_cap; + u8 port_num = 0; int err; + int i; port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) { + for (i = 0; i < dev->num_ports; i++) { + rwlock_init(&dev->roce[i].netdev_lock); + dev->roce[i].dev = dev; + dev->roce[i].native_port_num = i + 1; + dev->roce[i].last_port_state = IB_PORT_DOWN; + } + dev->ib_dev.get_netdev = mlx5_ib_get_netdev; dev->ib_dev.create_wq = mlx5_ib_create_wq; dev->ib_dev.modify_wq = mlx5_ib_modify_wq; @@ -4264,10 +4279,9 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev) (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); - err = mlx5_enable_eth(dev); + err = mlx5_enable_eth(dev, port_num); if (err) return err; - dev->roce.last_port_state = IB_PORT_DOWN; } return 0; @@ -4278,13 +4292,14 @@ static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev) struct mlx5_core_dev *mdev = dev->mdev; enum rdma_link_layer ll; int port_type_cap; + u8 port_num = 0; port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) { mlx5_disable_eth(dev); - mlx5_remove_netdev_notifier(dev); + mlx5_remove_netdev_notifier(dev, port_num); } } diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 57405cf8a5c5..6106dde35144 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -667,6 +667,8 @@ struct mlx5_roce { struct notifier_block nb; atomic_t next_port; enum ib_port_state last_port_state; + struct mlx5_ib_dev *dev; + u8 native_port_num; }; struct mlx5_ib_dbg_param { @@ -757,7 +759,7 @@ struct mlx5_ib_profile { struct mlx5_ib_dev { struct ib_device ib_dev; struct mlx5_core_dev *mdev; - struct mlx5_roce roce; + struct mlx5_roce roce[MLX5_MAX_PORTS]; int num_ports; /* serialize update of capability mask */ diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 4b08472cf0ba..ae36db3d0deb 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2962,8 +2962,9 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, (ibqp->qp_type == IB_QPT_XRC_INI) || (ibqp->qp_type == IB_QPT_XRC_TGT)) { if (mlx5_lag_is_active(dev->mdev)) { + u8 p = mlx5_core_native_port_num(dev->mdev); tx_affinity = (unsigned int)atomic_add_return(1, - &dev->roce.next_port) % + &dev->roce[p].next_port) % MLX5_MAX_PORTS + 1; context->flags |= cpu_to_be32(tx_affinity << 24); } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 5b0443c9d337..28733529f6ff 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1234,6 +1234,11 @@ static inline bool mlx5_rl_is_supported(struct mlx5_core_dev *dev) return !!(dev->priv.rl_table.max_size); } +static inline int mlx5_core_native_port_num(struct mlx5_core_dev *dev) +{ + return 1; +} + enum { MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32, }; -- cgit v1.2.3 From 32f69e4be269739c3850cd20f1a3322e95c1145f Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Thu, 4 Jan 2018 17:25:36 +0200 Subject: {net, IB}/mlx5: Manage port association for multiport RoCE When mlx5_ib_add is called determine if the mlx5 core device being added is capable of dual port RoCE operation. If it is, determine whether it is a master device or a slave device using the num_vhca_ports and affiliate_nic_vport_criteria capabilities. If the device is a slave, attempt to find a master device to affiliate it with. Devices that can be affiliated will share a system image guid. If none are found place it on a list of unaffiliated ports. If a master is found bind the port to it by configuring the port affiliation in the NIC vport context. Similarly when mlx5_ib_remove is called determine the port type. If it's a slave port, unaffiliate it from the master device, otherwise just remove it from the unaffiliated port list. The IB device is registered as a multiport device, even if a 2nd port is not available for affiliation. When the 2nd port is affiliated later the GID cache must be refreshed in order to get the default GIDs for the 2nd port in the cache. Export roce_rescan_device to provide a mechanism to refresh the cache after a new port is bound. In a multiport configuration all IB object (QP, MR, PD, etc) related commands should flow through the master mlx5_core_dev, other commands must be sent to the slave port mlx5_core_mdev, an interface is provide to get the correct mdev for non IB object commands. Signed-off-by: Daniel Jurgens Reviewed-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/cache.c | 2 +- drivers/infiniband/core/core_priv.h | 1 - drivers/infiniband/core/roce_gid_mgmt.c | 11 +- drivers/infiniband/hw/mlx5/main.c | 421 +++++++++++++++++++-- drivers/infiniband/hw/mlx5/mlx5_ib.h | 28 ++ .../net/ethernet/mellanox/mlx5/core/fpga/conn.c | 4 +- .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/vport.c | 58 +++ include/linux/mlx5/driver.h | 22 +- include/linux/mlx5/mlx5_ifc.h | 31 +- include/linux/mlx5/vport.h | 4 + include/rdma/ib_verbs.h | 8 + 12 files changed, 550 insertions(+), 42 deletions(-) (limited to 'include/linux/mlx5/driver.h') diff --git a/drivers/infiniband/core/cache.c b/drivers/infiniband/core/cache.c index fc4022884dbb..e9a409d7f4e2 100644 --- a/drivers/infiniband/core/cache.c +++ b/drivers/infiniband/core/cache.c @@ -821,7 +821,7 @@ static int gid_table_setup_one(struct ib_device *ib_dev) if (err) return err; - roce_rescan_device(ib_dev); + rdma_roce_rescan_device(ib_dev); return err; } diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 39e3c1d02613..39e4acdb025e 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -137,7 +137,6 @@ int ib_cache_gid_del_all_netdev_gids(struct ib_device *ib_dev, u8 port, int roce_gid_mgmt_init(void); void roce_gid_mgmt_cleanup(void); -void roce_rescan_device(struct ib_device *ib_dev); unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port); int ib_cache_setup_one(struct ib_device *device); diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c index ebfe45739ca7..5a52ec77940a 100644 --- a/drivers/infiniband/core/roce_gid_mgmt.c +++ b/drivers/infiniband/core/roce_gid_mgmt.c @@ -410,13 +410,18 @@ static void enum_all_gids_of_dev_cb(struct ib_device *ib_dev, rtnl_unlock(); } -/* This function will rescan all of the network devices in the system - * and add their gids, as needed, to the relevant RoCE devices. */ -void roce_rescan_device(struct ib_device *ib_dev) +/** + * rdma_roce_rescan_device - Rescan all of the network devices in the system + * and add their gids, as needed, to the relevant RoCE devices. + * + * @device: the rdma device + */ +void rdma_roce_rescan_device(struct ib_device *ib_dev) { ib_enum_roce_netdev(ib_dev, pass_all_filter, NULL, enum_all_gids_of_dev_cb, NULL); } +EXPORT_SYMBOL(rdma_roce_rescan_device); static void callback_for_addr_gid_device_scan(struct ib_device *device, u8 port, diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 5fcb2ed94c11..4fbbe4c7a99b 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -74,6 +74,23 @@ enum { MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3, }; +static LIST_HEAD(mlx5_ib_unaffiliated_port_list); +static LIST_HEAD(mlx5_ib_dev_list); +/* + * This mutex should be held when accessing either of the above lists + */ +static DEFINE_MUTEX(mlx5_ib_multiport_mutex); + +struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi) +{ + struct mlx5_ib_dev *dev; + + mutex_lock(&mlx5_ib_multiport_mutex); + dev = mpi->ibdev; + mutex_unlock(&mlx5_ib_multiport_mutex); + return dev; +} + static enum rdma_link_layer mlx5_port_type_cap_to_rdma_ll(int port_type_cap) { @@ -120,7 +137,9 @@ static int mlx5_netdev_event(struct notifier_block *this, struct mlx5_ib_dev *ibdev; ibdev = roce->dev; - mdev = ibdev->mdev; + mdev = mlx5_ib_get_native_port_mdev(ibdev, port_num, NULL); + if (!mdev) + return NOTIFY_DONE; switch (event) { case NETDEV_REGISTER: @@ -175,6 +194,7 @@ static int mlx5_netdev_event(struct notifier_block *this, break; } done: + mlx5_ib_put_native_port_mdev(ibdev, port_num); return NOTIFY_DONE; } @@ -183,10 +203,15 @@ static struct net_device *mlx5_ib_get_netdev(struct ib_device *device, { struct mlx5_ib_dev *ibdev = to_mdev(device); struct net_device *ndev; + struct mlx5_core_dev *mdev; + + mdev = mlx5_ib_get_native_port_mdev(ibdev, port_num, NULL); + if (!mdev) + return NULL; - ndev = mlx5_lag_get_roce_netdev(ibdev->mdev); + ndev = mlx5_lag_get_roce_netdev(mdev); if (ndev) - return ndev; + goto out; /* Ensure ndev does not disappear before we invoke dev_hold() */ @@ -196,9 +221,70 @@ static struct net_device *mlx5_ib_get_netdev(struct ib_device *device, dev_hold(ndev); read_unlock(&ibdev->roce[port_num - 1].netdev_lock); +out: + mlx5_ib_put_native_port_mdev(ibdev, port_num); return ndev; } +struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev, + u8 ib_port_num, + u8 *native_port_num) +{ + enum rdma_link_layer ll = mlx5_ib_port_link_layer(&ibdev->ib_dev, + ib_port_num); + struct mlx5_core_dev *mdev = NULL; + struct mlx5_ib_multiport_info *mpi; + struct mlx5_ib_port *port; + + if (native_port_num) + *native_port_num = 1; + + if (!mlx5_core_mp_enabled(ibdev->mdev) || ll != IB_LINK_LAYER_ETHERNET) + return ibdev->mdev; + + port = &ibdev->port[ib_port_num - 1]; + if (!port) + return NULL; + + spin_lock(&port->mp.mpi_lock); + mpi = ibdev->port[ib_port_num - 1].mp.mpi; + if (mpi && !mpi->unaffiliate) { + mdev = mpi->mdev; + /* If it's the master no need to refcount, it'll exist + * as long as the ib_dev exists. + */ + if (!mpi->is_master) + mpi->mdev_refcnt++; + } + spin_unlock(&port->mp.mpi_lock); + + return mdev; +} + +void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *ibdev, u8 port_num) +{ + enum rdma_link_layer ll = mlx5_ib_port_link_layer(&ibdev->ib_dev, + port_num); + struct mlx5_ib_multiport_info *mpi; + struct mlx5_ib_port *port; + + if (!mlx5_core_mp_enabled(ibdev->mdev) || ll != IB_LINK_LAYER_ETHERNET) + return; + + port = &ibdev->port[port_num - 1]; + + spin_lock(&port->mp.mpi_lock); + mpi = ibdev->port[port_num - 1].mp.mpi; + if (mpi->is_master) + goto out; + + mpi->mdev_refcnt--; + if (mpi->unaffiliate) + complete(&mpi->unref_comp); +out: + spin_unlock(&port->mp.mpi_lock); +} + static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed, u8 *active_width) { @@ -3160,12 +3246,11 @@ static void get_ext_port_caps(struct mlx5_ib_dev *dev) mlx5_query_ext_port_caps(dev, port); } -static int get_port_caps(struct mlx5_ib_dev *dev) +static int get_port_caps(struct mlx5_ib_dev *dev, u8 port) { struct ib_device_attr *dprops = NULL; struct ib_port_attr *pprops = NULL; int err = -ENOMEM; - int port; struct ib_udata uhw = {.inlen = 0, .outlen = 0}; pprops = kmalloc(sizeof(*pprops), GFP_KERNEL); @@ -3186,22 +3271,21 @@ static int get_port_caps(struct mlx5_ib_dev *dev) goto out; } - for (port = 1; port <= dev->num_ports; port++) { - memset(pprops, 0, sizeof(*pprops)); - err = mlx5_ib_query_port(&dev->ib_dev, port, pprops); - if (err) { - mlx5_ib_warn(dev, "query_port %d failed %d\n", - port, err); - break; - } - dev->mdev->port_caps[port - 1].pkey_table_len = - dprops->max_pkeys; - dev->mdev->port_caps[port - 1].gid_table_len = - pprops->gid_tbl_len; - mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n", - dprops->max_pkeys, pprops->gid_tbl_len); + memset(pprops, 0, sizeof(*pprops)); + err = mlx5_ib_query_port(&dev->ib_dev, port, pprops); + if (err) { + mlx5_ib_warn(dev, "query_port %d failed %d\n", + port, err); + goto out; } + dev->mdev->port_caps[port - 1].pkey_table_len = + dprops->max_pkeys; + dev->mdev->port_caps[port - 1].gid_table_len = + pprops->gid_tbl_len; + mlx5_ib_dbg(dev, "port %d: pkey_table_len %d, gid_table_len %d\n", + port, dprops->max_pkeys, pprops->gid_tbl_len); + out: kfree(pprops); kfree(dprops); @@ -4054,8 +4138,203 @@ mlx5_ib_get_vector_affinity(struct ib_device *ibdev, int comp_vector) return mlx5_get_vector_affinity(dev->mdev, comp_vector); } +/* The mlx5_ib_multiport_mutex should be held when calling this function */ +static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, + struct mlx5_ib_multiport_info *mpi) +{ + u8 port_num = mlx5_core_native_port_num(mpi->mdev) - 1; + struct mlx5_ib_port *port = &ibdev->port[port_num]; + int comps; + int err; + int i; + + spin_lock(&port->mp.mpi_lock); + if (!mpi->ibdev) { + spin_unlock(&port->mp.mpi_lock); + return; + } + mpi->ibdev = NULL; + + spin_unlock(&port->mp.mpi_lock); + mlx5_remove_netdev_notifier(ibdev, port_num); + spin_lock(&port->mp.mpi_lock); + + comps = mpi->mdev_refcnt; + if (comps) { + mpi->unaffiliate = true; + init_completion(&mpi->unref_comp); + spin_unlock(&port->mp.mpi_lock); + + for (i = 0; i < comps; i++) + wait_for_completion(&mpi->unref_comp); + + spin_lock(&port->mp.mpi_lock); + mpi->unaffiliate = false; + } + + port->mp.mpi = NULL; + + list_add_tail(&mpi->list, &mlx5_ib_unaffiliated_port_list); + + spin_unlock(&port->mp.mpi_lock); + + err = mlx5_nic_vport_unaffiliate_multiport(mpi->mdev); + + mlx5_ib_dbg(ibdev, "unaffiliated port %d\n", port_num + 1); + /* Log an error, still needed to cleanup the pointers and add + * it back to the list. + */ + if (err) + mlx5_ib_err(ibdev, "Failed to unaffiliate port %u\n", + port_num + 1); + + ibdev->roce[port_num].last_port_state = IB_PORT_DOWN; +} + +/* The mlx5_ib_multiport_mutex should be held when calling this function */ +static bool mlx5_ib_bind_slave_port(struct mlx5_ib_dev *ibdev, + struct mlx5_ib_multiport_info *mpi) +{ + u8 port_num = mlx5_core_native_port_num(mpi->mdev) - 1; + int err; + + spin_lock(&ibdev->port[port_num].mp.mpi_lock); + if (ibdev->port[port_num].mp.mpi) { + mlx5_ib_warn(ibdev, "port %d already affiliated.\n", + port_num + 1); + spin_unlock(&ibdev->port[port_num].mp.mpi_lock); + return false; + } + + ibdev->port[port_num].mp.mpi = mpi; + mpi->ibdev = ibdev; + spin_unlock(&ibdev->port[port_num].mp.mpi_lock); + + err = mlx5_nic_vport_affiliate_multiport(ibdev->mdev, mpi->mdev); + if (err) + goto unbind; + + err = get_port_caps(ibdev, mlx5_core_native_port_num(mpi->mdev)); + if (err) + goto unbind; + + err = mlx5_add_netdev_notifier(ibdev, port_num); + if (err) { + mlx5_ib_err(ibdev, "failed adding netdev notifier for port %u\n", + port_num + 1); + goto unbind; + } + + return true; + +unbind: + mlx5_ib_unbind_slave_port(ibdev, mpi); + return false; +} + +static int mlx5_ib_init_multiport_master(struct mlx5_ib_dev *dev) +{ + int port_num = mlx5_core_native_port_num(dev->mdev) - 1; + enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, + port_num + 1); + struct mlx5_ib_multiport_info *mpi; + int err; + int i; + + if (!mlx5_core_is_mp_master(dev->mdev) || ll != IB_LINK_LAYER_ETHERNET) + return 0; + + err = mlx5_query_nic_vport_system_image_guid(dev->mdev, + &dev->sys_image_guid); + if (err) + return err; + + err = mlx5_nic_vport_enable_roce(dev->mdev); + if (err) + return err; + + mutex_lock(&mlx5_ib_multiport_mutex); + for (i = 0; i < dev->num_ports; i++) { + bool bound = false; + + /* build a stub multiport info struct for the native port. */ + if (i == port_num) { + mpi = kzalloc(sizeof(*mpi), GFP_KERNEL); + if (!mpi) { + mutex_unlock(&mlx5_ib_multiport_mutex); + mlx5_nic_vport_disable_roce(dev->mdev); + return -ENOMEM; + } + + mpi->is_master = true; + mpi->mdev = dev->mdev; + mpi->sys_image_guid = dev->sys_image_guid; + dev->port[i].mp.mpi = mpi; + mpi->ibdev = dev; + mpi = NULL; + continue; + } + + list_for_each_entry(mpi, &mlx5_ib_unaffiliated_port_list, + list) { + if (dev->sys_image_guid == mpi->sys_image_guid && + (mlx5_core_native_port_num(mpi->mdev) - 1) == i) { + bound = mlx5_ib_bind_slave_port(dev, mpi); + } + + if (bound) { + dev_dbg(&mpi->mdev->pdev->dev, "removing port from unaffiliated list.\n"); + mlx5_ib_dbg(dev, "port %d bound\n", i + 1); + list_del(&mpi->list); + break; + } + } + if (!bound) { + get_port_caps(dev, i + 1); + mlx5_ib_dbg(dev, "no free port found for port %d\n", + i + 1); + } + } + + list_add_tail(&dev->ib_dev_list, &mlx5_ib_dev_list); + mutex_unlock(&mlx5_ib_multiport_mutex); + return err; +} + +static void mlx5_ib_cleanup_multiport_master(struct mlx5_ib_dev *dev) +{ + int port_num = mlx5_core_native_port_num(dev->mdev) - 1; + enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, + port_num + 1); + int i; + + if (!mlx5_core_is_mp_master(dev->mdev) || ll != IB_LINK_LAYER_ETHERNET) + return; + + mutex_lock(&mlx5_ib_multiport_mutex); + for (i = 0; i < dev->num_ports; i++) { + if (dev->port[i].mp.mpi) { + /* Destroy the native port stub */ + if (i == port_num) { + kfree(dev->port[i].mp.mpi); + dev->port[i].mp.mpi = NULL; + } else { + mlx5_ib_dbg(dev, "unbinding port_num: %d\n", i + 1); + mlx5_ib_unbind_slave_port(dev, dev->port[i].mp.mpi); + } + } + } + + mlx5_ib_dbg(dev, "removing from devlist\n"); + list_del(&dev->ib_dev_list); + mutex_unlock(&mlx5_ib_multiport_mutex); + + mlx5_nic_vport_disable_roce(dev->mdev); +} + static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { + mlx5_ib_cleanup_multiport_master(dev); #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING cleanup_srcu_struct(&dev->mr_srcu); #endif @@ -4067,16 +4346,36 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) struct mlx5_core_dev *mdev = dev->mdev; const char *name; int err; + int i; dev->port = kcalloc(dev->num_ports, sizeof(*dev->port), GFP_KERNEL); if (!dev->port) return -ENOMEM; - err = get_port_caps(dev); + for (i = 0; i < dev->num_ports; i++) { + spin_lock_init(&dev->port[i].mp.mpi_lock); + rwlock_init(&dev->roce[i].netdev_lock); + } + + err = mlx5_ib_init_multiport_master(dev); if (err) goto err_free_port; + if (!mlx5_core_mp_enabled(mdev)) { + int i; + + for (i = 1; i <= dev->num_ports; i++) { + err = get_port_caps(dev, i); + if (err) + break; + } + } else { + err = get_port_caps(dev, mlx5_core_native_port_num(mdev)); + } + if (err) + goto err_mp; + if (mlx5_use_mad_ifc(dev)) get_ext_port_caps(dev); @@ -4106,6 +4405,8 @@ static int mlx5_ib_stage_init_init(struct mlx5_ib_dev *dev) #endif return 0; +err_mp: + mlx5_ib_cleanup_multiport_master(dev); err_free_port: kfree(dev->port); @@ -4252,16 +4553,16 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev) struct mlx5_core_dev *mdev = dev->mdev; enum rdma_link_layer ll; int port_type_cap; - u8 port_num = 0; + u8 port_num; int err; int i; + port_num = mlx5_core_native_port_num(dev->mdev) - 1; port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) { for (i = 0; i < dev->num_ports; i++) { - rwlock_init(&dev->roce[i].netdev_lock); dev->roce[i].dev = dev; dev->roce[i].native_port_num = i + 1; dev->roce[i].last_port_state = IB_PORT_DOWN; @@ -4292,8 +4593,9 @@ static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev) struct mlx5_core_dev *mdev = dev->mdev; enum rdma_link_layer ll; int port_type_cap; - u8 port_num = 0; + u8 port_num; + port_num = mlx5_core_native_port_num(dev->mdev) - 1; port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); @@ -4443,6 +4745,8 @@ static void __mlx5_ib_remove(struct mlx5_ib_dev *dev, ib_dealloc_device((struct ib_device *)dev); } +static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num); + static void *__mlx5_ib_add(struct mlx5_core_dev *mdev, const struct mlx5_ib_profile *profile) { @@ -4457,7 +4761,8 @@ static void *__mlx5_ib_add(struct mlx5_core_dev *mdev, return NULL; dev->mdev = mdev; - dev->num_ports = MLX5_CAP_GEN(mdev, num_ports); + dev->num_ports = max(MLX5_CAP_GEN(mdev, num_ports), + MLX5_CAP_GEN(mdev, num_vhca_ports)); for (i = 0; i < MLX5_IB_STAGE_MAX; i++) { if (profile->stage[i].init) { @@ -4520,15 +4825,81 @@ static const struct mlx5_ib_profile pf_profile = { NULL), }; +static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev, u8 port_num) +{ + struct mlx5_ib_multiport_info *mpi; + struct mlx5_ib_dev *dev; + bool bound = false; + int err; + + mpi = kzalloc(sizeof(*mpi), GFP_KERNEL); + if (!mpi) + return NULL; + + mpi->mdev = mdev; + + err = mlx5_query_nic_vport_system_image_guid(mdev, + &mpi->sys_image_guid); + if (err) { + kfree(mpi); + return NULL; + } + + mutex_lock(&mlx5_ib_multiport_mutex); + list_for_each_entry(dev, &mlx5_ib_dev_list, ib_dev_list) { + if (dev->sys_image_guid == mpi->sys_image_guid) + bound = mlx5_ib_bind_slave_port(dev, mpi); + + if (bound) { + rdma_roce_rescan_device(&dev->ib_dev); + break; + } + } + + if (!bound) { + list_add_tail(&mpi->list, &mlx5_ib_unaffiliated_port_list); + dev_dbg(&mdev->pdev->dev, "no suitable IB device found to bind to, added to unaffiliated list.\n"); + } else { + mlx5_ib_dbg(dev, "bound port %u\n", port_num + 1); + } + mutex_unlock(&mlx5_ib_multiport_mutex); + + return mpi; +} + static void *mlx5_ib_add(struct mlx5_core_dev *mdev) { + enum rdma_link_layer ll; + int port_type_cap; + + port_type_cap = MLX5_CAP_GEN(mdev, port_type); + ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); + + if (mlx5_core_is_mp_slave(mdev) && ll == IB_LINK_LAYER_ETHERNET) { + u8 port_num = mlx5_core_native_port_num(mdev) - 1; + + return mlx5_ib_add_slave_port(mdev, port_num); + } + return __mlx5_ib_add(mdev, &pf_profile); } static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) { - struct mlx5_ib_dev *dev = context; + struct mlx5_ib_multiport_info *mpi; + struct mlx5_ib_dev *dev; + + if (mlx5_core_is_mp_slave(mdev)) { + mpi = context; + mutex_lock(&mlx5_ib_multiport_mutex); + if (mpi->ibdev) + mlx5_ib_unbind_slave_port(mpi->ibdev, mpi); + list_del(&mpi->list); + mutex_unlock(&mlx5_ib_multiport_mutex); + return; + } + dev = context; __mlx5_ib_remove(dev, dev->profile, MLX5_IB_STAGE_MAX); } diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 6106dde35144..a70a4c02e396 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -654,8 +654,17 @@ struct mlx5_ib_counters { u16 set_id; }; +struct mlx5_ib_multiport_info; + +struct mlx5_ib_multiport { + struct mlx5_ib_multiport_info *mpi; + /* To be held when accessing the multiport info */ + spinlock_t mpi_lock; +}; + struct mlx5_ib_port { struct mlx5_ib_counters cnts; + struct mlx5_ib_multiport mp; }; struct mlx5_roce { @@ -756,6 +765,17 @@ struct mlx5_ib_profile { struct mlx5_ib_stage stage[MLX5_IB_STAGE_MAX]; }; +struct mlx5_ib_multiport_info { + struct list_head list; + struct mlx5_ib_dev *ibdev; + struct mlx5_core_dev *mdev; + struct completion unref_comp; + u64 sys_image_guid; + u32 mdev_refcnt; + bool is_master; + bool unaffiliate; +}; + struct mlx5_ib_dev { struct ib_device ib_dev; struct mlx5_core_dev *mdev; @@ -800,6 +820,8 @@ struct mlx5_ib_dev { struct mutex lb_mutex; u32 user_td; u8 umr_fence; + struct list_head ib_dev_list; + u64 sys_image_guid; }; static inline struct mlx5_ib_cq *to_mibcq(struct mlx5_core_cq *mcq) @@ -1071,6 +1093,12 @@ int mlx5_ib_generate_wc(struct ib_cq *ibcq, struct ib_wc *wc); void mlx5_ib_free_bfreg(struct mlx5_ib_dev *dev, struct mlx5_bfreg_info *bfregi, int bfregn); +struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi); +struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *dev, + u8 ib_port_num, + u8 *native_port_num); +void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *dev, + u8 port_num); static inline void init_query_mad(struct ib_smp *mad) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c index c4392f741c5f..c841b03c3e48 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -688,7 +688,7 @@ static inline int mlx5_fpga_conn_init_qp(struct mlx5_fpga_conn *conn) MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC); MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX); - MLX5_SET(qpc, qpc, primary_address_path.port, MLX5_FPGA_PORT_NUM); + MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, MLX5_FPGA_PORT_NUM); MLX5_SET(qpc, qpc, pd, conn->fdev->conn_res.pdn); MLX5_SET(qpc, qpc, cqn_snd, conn->cq.mcq.cqn); MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn); @@ -727,7 +727,7 @@ static inline int mlx5_fpga_conn_rtr_qp(struct mlx5_fpga_conn *conn) MLX5_SET(qpc, qpc, next_rcv_psn, MLX5_GET(fpga_qpc, conn->fpga_qpc, next_send_psn)); MLX5_SET(qpc, qpc, primary_address_path.pkey_index, MLX5_FPGA_PKEY_INDEX); - MLX5_SET(qpc, qpc, primary_address_path.port, MLX5_FPGA_PORT_NUM); + MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, MLX5_FPGA_PORT_NUM); ether_addr_copy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32), MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, fpga_mac_47_32)); MLX5_SET(qpc, qpc, primary_address_path.udp_sport, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c index d2a66dc4adc6..261b95d014a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c @@ -187,7 +187,7 @@ int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp MLX5_QP_ENHANCED_ULP_STATELESS_MODE); addr_path = MLX5_ADDR_OF(qpc, qpc, primary_address_path); - MLX5_SET(ads, addr_path, port, 1); + MLX5_SET(ads, addr_path, vhca_port_num, 1); MLX5_SET(ads, addr_path, grh, 1); ret = mlx5_core_create_qp(mdev, qp, in, inlen); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 916523103f16..9cb939b6a859 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -1121,3 +1121,61 @@ ex: return err; } EXPORT_SYMBOL_GPL(mlx5_core_modify_hca_vport_context); + +int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev, + struct mlx5_core_dev *port_mdev) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *in; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + err = mlx5_nic_vport_enable_roce(port_mdev); + if (err) + goto free; + + MLX5_SET(modify_nic_vport_context_in, in, field_select.affiliation, 1); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.affiliated_vhca_id, + MLX5_CAP_GEN(master_mdev, vhca_id)); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.affiliation_criteria, + MLX5_CAP_GEN(port_mdev, affiliate_nic_vport_criteria)); + + err = mlx5_modify_nic_vport_context(port_mdev, in, inlen); + if (err) + mlx5_nic_vport_disable_roce(port_mdev); + +free: + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_nic_vport_affiliate_multiport); + +int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev) +{ + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *in; + int err; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(modify_nic_vport_context_in, in, field_select.affiliation, 1); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.affiliated_vhca_id, 0); + MLX5_SET(modify_nic_vport_context_in, in, + nic_vport_context.affiliation_criteria, 0); + + err = mlx5_modify_nic_vport_context(port_mdev, in, inlen); + if (!err) + mlx5_nic_vport_disable_roce(port_mdev); + + kvfree(in); + return err; +} +EXPORT_SYMBOL_GPL(mlx5_nic_vport_unaffiliate_multiport); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 28733529f6ff..d5c787519e06 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1234,9 +1234,29 @@ static inline bool mlx5_rl_is_supported(struct mlx5_core_dev *dev) return !!(dev->priv.rl_table.max_size); } +static inline int mlx5_core_is_mp_slave(struct mlx5_core_dev *dev) +{ + return MLX5_CAP_GEN(dev, affiliate_nic_vport_criteria) && + MLX5_CAP_GEN(dev, num_vhca_ports) <= 1; +} + +static inline int mlx5_core_is_mp_master(struct mlx5_core_dev *dev) +{ + return MLX5_CAP_GEN(dev, num_vhca_ports) > 1; +} + +static inline int mlx5_core_mp_enabled(struct mlx5_core_dev *dev) +{ + return mlx5_core_is_mp_slave(dev) || + mlx5_core_is_mp_master(dev); +} + static inline int mlx5_core_native_port_num(struct mlx5_core_dev *dev) { - return 1; + if (!mlx5_core_mp_enabled(dev)) + return 1; + + return MLX5_CAP_GEN(dev, native_port_num); } enum { diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index b1c81d7a86cb..7e88c8e7f374 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -502,7 +502,7 @@ struct mlx5_ifc_ads_bits { u8 dei_cfi[0x1]; u8 eth_prio[0x3]; u8 sl[0x4]; - u8 port[0x8]; + u8 vhca_port_num[0x8]; u8 rmac_47_32[0x10]; u8 rmac_31_0[0x20]; @@ -794,7 +794,10 @@ enum { }; struct mlx5_ifc_cmd_hca_cap_bits { - u8 reserved_at_0[0x80]; + u8 reserved_at_0[0x30]; + u8 vhca_id[0x10]; + + u8 reserved_at_40[0x40]; u8 log_max_srq_sz[0x8]; u8 log_max_qp_sz[0x8]; @@ -1066,8 +1069,11 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 reserved_at_5f8[0x3]; u8 log_max_xrq[0x5]; - u8 reserved_at_600[0x1e]; - u8 sw_owner_id; + u8 affiliate_nic_vport_criteria[0x8]; + u8 native_port_num[0x8]; + u8 num_vhca_ports[0x8]; + u8 reserved_at_618[0x6]; + u8 sw_owner_id[0x1]; u8 reserved_at_61f[0x1e1]; }; @@ -2617,7 +2623,12 @@ struct mlx5_ifc_nic_vport_context_bits { u8 event_on_mc_address_change[0x1]; u8 event_on_uc_address_change[0x1]; - u8 reserved_at_40[0xf0]; + u8 reserved_at_40[0xc]; + + u8 affiliation_criteria[0x4]; + u8 affiliated_vhca_id[0x10]; + + u8 reserved_at_60[0xd0]; u8 mtu[0x10]; @@ -3260,7 +3271,8 @@ struct mlx5_ifc_set_roce_address_in_bits { u8 op_mod[0x10]; u8 roce_address_index[0x10]; - u8 reserved_at_50[0x10]; + u8 reserved_at_50[0xc]; + u8 vhca_port_num[0x4]; u8 reserved_at_60[0x20]; @@ -3880,7 +3892,8 @@ struct mlx5_ifc_query_roce_address_in_bits { u8 op_mod[0x10]; u8 roce_address_index[0x10]; - u8 reserved_at_50[0x10]; + u8 reserved_at_50[0xc]; + u8 vhca_port_num[0x4]; u8 reserved_at_60[0x20]; }; @@ -5312,7 +5325,9 @@ struct mlx5_ifc_modify_nic_vport_context_out_bits { }; struct mlx5_ifc_modify_nic_vport_field_select_bits { - u8 reserved_at_0[0x14]; + u8 reserved_at_0[0x12]; + u8 affiliation[0x1]; + u8 reserved_at_e[0x1]; u8 disable_uc_local_lb[0x1]; u8 disable_mc_local_lb[0x1]; u8 node_guid[0x1]; diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index aaa0bb9e7655..64e193e87394 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -116,4 +116,8 @@ int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev, struct mlx5_hca_vport_context *req); int mlx5_nic_vport_update_local_lb(struct mlx5_core_dev *mdev, bool enable); int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status); + +int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev, + struct mlx5_core_dev *port_mdev); +int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev); #endif /* __MLX5_VPORT_H__ */ diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index e44a8adac677..f25c03687ee9 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3850,4 +3850,12 @@ ib_get_vector_affinity(struct ib_device *device, int comp_vector) } +/** + * rdma_roce_rescan_device - Rescan all of the network devices in the system + * and add their gids, as needed, to the relevant RoCE devices. + * + * @device: the rdma device + */ +void rdma_roce_rescan_device(struct ib_device *ibdev); + #endif /* IB_VERBS_H */ -- cgit v1.2.3 From cfe4e37fdcacbc33176cfc2430df96355ee14489 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Thu, 4 Jan 2018 17:25:41 +0200 Subject: {net, IB}/mlx5: Change set_roce_gid to take a port number When in dual port mode setting a RoCE GID for any port flows through the master ports mlx5_core_dev. Provide an interface to set the port when sending this command. Signed-off-by: Daniel Jurgens Reviewed-by: Parav Pandit Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c | 7 ++++--- drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c | 5 ++++- include/linux/mlx5/driver.h | 2 +- 4 files changed, 10 insertions(+), 6 deletions(-) (limited to 'include/linux/mlx5/driver.h') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 4791d747cc57..653b56377e69 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -475,7 +475,7 @@ static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num, return mlx5_core_roce_gid_set(dev->mdev, index, roce_version, roce_l3_type, gid->raw, mac, vlan, - vlan_id); + vlan_id, port_num); } static int mlx5_ib_add_gid(struct ib_device *device, u8 port_num, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c index c841b03c3e48..e6175f8ac0e4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c @@ -888,7 +888,8 @@ struct mlx5_fpga_conn *mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev, err = mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index, MLX5_ROCE_VERSION_2, MLX5_ROCE_L3_TYPE_IPV6, - remote_ip, remote_mac, true, 0); + remote_ip, remote_mac, true, 0, + MLX5_FPGA_PORT_NUM); if (err) { mlx5_fpga_err(fdev, "Failed to set SGID: %d\n", err); ret = ERR_PTR(err); @@ -954,7 +955,7 @@ err_cq: mlx5_fpga_conn_destroy_cq(conn); err_gid: mlx5_core_roce_gid_set(fdev->mdev, conn->qp.sgid_index, 0, 0, NULL, - NULL, false, 0); + NULL, false, 0, MLX5_FPGA_PORT_NUM); err_rsvd_gid: mlx5_core_reserved_gid_free(fdev->mdev, conn->qp.sgid_index); err: @@ -982,7 +983,7 @@ void mlx5_fpga_conn_destroy(struct mlx5_fpga_conn *conn) mlx5_fpga_conn_destroy_cq(conn); mlx5_core_roce_gid_set(conn->fdev->mdev, conn->qp.sgid_index, 0, 0, - NULL, NULL, false, 0); + NULL, NULL, false, 0, MLX5_FPGA_PORT_NUM); mlx5_core_reserved_gid_free(conn->fdev->mdev, conn->qp.sgid_index); kfree(conn); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c index 573f59f46d41..7722a3f9bb68 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c @@ -121,7 +121,7 @@ EXPORT_SYMBOL_GPL(mlx5_core_reserved_gids_count); int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index, u8 roce_version, u8 roce_l3_type, const u8 *gid, - const u8 *mac, bool vlan, u16 vlan_id) + const u8 *mac, bool vlan, u16 vlan_id, u8 port_num) { #define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v) u32 in[MLX5_ST_SZ_DW(set_roce_address_in)] = {0}; @@ -148,6 +148,9 @@ int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index, memcpy(addr_l3_addr, gid, gidsz); } + if (MLX5_CAP_GEN(dev, num_vhca_ports) > 0) + MLX5_SET(set_roce_address_in, in, vhca_port_num, port_num); + MLX5_SET(set_roce_address_in, in, roce_address_index, index); MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS); return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index d5c787519e06..9136e35f2f7e 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1112,7 +1112,7 @@ void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg); unsigned int mlx5_core_reserved_gids_count(struct mlx5_core_dev *dev); int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index, u8 roce_version, u8 roce_l3_type, const u8 *gid, - const u8 *mac, bool vlan, u16 vlan_id); + const u8 *mac, bool vlan, u16 vlan_id, u8 port_num); static inline int fw_initializing(struct mlx5_core_dev *dev) { -- cgit v1.2.3 From 24d33d2c8e92abffe1f0653d42fc65b8f164a6d9 Mon Sep 17 00:00:00 2001 From: Feras Daoud Date: Tue, 16 Jan 2018 20:08:40 +0200 Subject: net/mlx5e: Add clock info page to mlx5 core devices Adds a new page to mlx5 core containing clock info data that allows user level applications to translate between cqe timestamp to nanoseconds. The information stored into this page is represented through mlx5_ib_clock_info. In order to synchronize between kernel and user space a sequence number is incremented at the beginning and end of each update. An odd number means the data is being updated while an even means the access was already done. To guarantee that the data structure was accessed atomically user will: repeat: seq1 = goto while odd seq2 = if seq1 != seq2 goto repeat Reviewed-by: Yishai Hadas Reviewed-by: Jason Gunthorpe Reviewed-by: Alex Vesker Signed-off-by: Feras Daoud Signed-off-by: Eitan Rabin Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- .../net/ethernet/mellanox/mlx5/core/lib/clock.c | 55 ++++++++++++++++++++++ include/linux/mlx5/driver.h | 3 ++ include/uapi/rdma/mlx5-abi.h | 16 +++++++ 3 files changed, 74 insertions(+) (limited to 'include/linux/mlx5/driver.h') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index fa8aed62b231..4b6cb9b38686 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -31,6 +31,8 @@ */ #include +#include +#include #include "en.h" enum { @@ -71,6 +73,28 @@ static u64 read_internal_timer(const struct cyclecounter *cc) return mlx5_read_internal_timer(mdev) & cc->mask; } +static void mlx5_update_clock_info_page(struct mlx5_core_dev *mdev) +{ + struct mlx5_ib_clock_info *clock_info = mdev->clock_info; + struct mlx5_clock *clock = &mdev->clock; + u32 sign; + + if (!clock_info) + return; + + sign = smp_load_acquire(&clock_info->sign); + smp_store_mb(clock_info->sign, + sign | MLX5_IB_CLOCK_INFO_KERNEL_UPDATING); + + clock_info->cycles = clock->tc.cycle_last; + clock_info->mult = clock->cycles.mult; + clock_info->nsec = clock->tc.nsec; + clock_info->frac = clock->tc.frac; + + smp_store_release(&clock_info->sign, + sign + MLX5_IB_CLOCK_INFO_KERNEL_UPDATING * 2); +} + static void mlx5_pps_out(struct work_struct *work) { struct mlx5_pps *pps_info = container_of(work, struct mlx5_pps, @@ -109,6 +133,7 @@ static void mlx5_timestamp_overflow(struct work_struct *work) write_lock_irqsave(&clock->lock, flags); timecounter_read(&clock->tc); + mlx5_update_clock_info_page(clock->mdev); write_unlock_irqrestore(&clock->lock, flags); schedule_delayed_work(&clock->overflow_work, clock->overflow_period); } @@ -123,6 +148,7 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp, write_lock_irqsave(&clock->lock, flags); timecounter_init(&clock->tc, &clock->cycles, ns); + mlx5_update_clock_info_page(clock->mdev); write_unlock_irqrestore(&clock->lock, flags); return 0; @@ -152,6 +178,7 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta) write_lock_irqsave(&clock->lock, flags); timecounter_adjtime(&clock->tc, delta); + mlx5_update_clock_info_page(clock->mdev); write_unlock_irqrestore(&clock->lock, flags); return 0; @@ -179,6 +206,7 @@ static int mlx5_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta) timecounter_read(&clock->tc); clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff : clock->nominal_c_mult + diff; + mlx5_update_clock_info_page(clock->mdev); write_unlock_irqrestore(&clock->lock, flags); return 0; @@ -470,6 +498,7 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev) clock->cycles.shift); clock->nominal_c_mult = clock->cycles.mult; clock->cycles.mask = CLOCKSOURCE_MASK(41); + clock->mdev = mdev; timecounter_init(&clock->tc, &clock->cycles, ktime_to_ns(ktime_get_real())); @@ -482,6 +511,25 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev) do_div(ns, NSEC_PER_SEC / 2 / HZ); clock->overflow_period = ns; + mdev->clock_info_page = alloc_page(GFP_KERNEL); + if (mdev->clock_info_page) { + mdev->clock_info = kmap(mdev->clock_info_page); + if (!mdev->clock_info) { + __free_page(mdev->clock_info_page); + mlx5_core_warn(mdev, "failed to map clock page\n"); + } else { + mdev->clock_info->sign = 0; + mdev->clock_info->nsec = clock->tc.nsec; + mdev->clock_info->cycles = clock->tc.cycle_last; + mdev->clock_info->mask = clock->cycles.mask; + mdev->clock_info->mult = clock->nominal_c_mult; + mdev->clock_info->shift = clock->cycles.shift; + mdev->clock_info->frac = clock->tc.frac; + mdev->clock_info->overflow_period = + clock->overflow_period; + } + } + INIT_WORK(&clock->pps_info.out_work, mlx5_pps_out); INIT_DELAYED_WORK(&clock->overflow_work, mlx5_timestamp_overflow); if (clock->overflow_period) @@ -521,5 +569,12 @@ void mlx5_cleanup_clock(struct mlx5_core_dev *mdev) cancel_work_sync(&clock->pps_info.out_work); cancel_delayed_work_sync(&clock->overflow_work); + + if (mdev->clock_info) { + kunmap(mdev->clock_info_page); + __free_page(mdev->clock_info_page); + mdev->clock_info = NULL; + } + kfree(clock->ptp_info.pin_config); } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 9136e35f2f7e..c403151133e9 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -797,6 +797,7 @@ struct mlx5_clock { u32 nominal_c_mult; unsigned long overflow_period; struct delayed_work overflow_work; + struct mlx5_core_dev *mdev; struct ptp_clock *ptp; struct ptp_clock_info ptp_info; struct mlx5_pps pps_info; @@ -844,6 +845,8 @@ struct mlx5_core_dev { struct cpu_rmap *rmap; #endif struct mlx5_clock clock; + struct mlx5_ib_clock_info *clock_info; + struct page *clock_info_page; }; struct mlx5_db { diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index f6d319dfc7bf..0299deed71a2 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -381,4 +381,20 @@ struct mlx5_ib_modify_wq { __u32 comp_mask; __u32 reserved; }; + +struct mlx5_ib_clock_info { + __u32 sign; + __u32 resv; + __u64 nsec; + __u64 cycles; + __u64 frac; + __u32 mult; + __u32 shift; + __u64 mask; + __u64 overflow_period; +}; + +enum { + MLX5_IB_CLOCK_INFO_KERNEL_UPDATING = 1, +}; #endif /* MLX5_ABI_USER_H */ -- cgit v1.2.3