From 13ad1125b941a5f257d9d3ae70485773abd34792 Mon Sep 17 00:00:00 2001 From: Aharon Landau Date: Sun, 31 Jul 2022 11:26:36 +0300 Subject: RDMA/mlx5: Don't compare mkey tags in DEVX indirect mkey According to the ib spec: If the CI supports the Base Memory Management Extensions defined in this specification, the L_Key format must consist of: 24 bit index in the most significant bits of the R_Key, and 8 bit key in the least significant bits of the R_Key Through a successful Allocate L_Key verb invocation, the CI must let the consumer own the key portion of the returned R_Key Therefore, when creating a mkey using DEVX, the consumer is allowed to change the key part. The kernel should compare only the index part of a R_Key to determine equality with another R_Key. Adding capability in order not to break backward compatibility. Fixes: 534fd7aac56a ("IB/mlx5: Manage indirection mkey upon DEVX flow for ODP") Link: https://lore.kernel.org/r/3d669aacea85a3a15c3b3b953b3eaba3f80ef9be.1659255945.git.leonro@nvidia.com Signed-off-by: Aharon Landau Signed-off-by: Leon Romanovsky --- include/uapi/rdma/mlx5-abi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 86be4a92b67b..a96b7d2770e1 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -104,6 +104,7 @@ enum mlx5_ib_alloc_ucontext_resp_mask { MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE = 1UL << 2, MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_SQD2RTS = 1UL << 3, MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_REAL_TIME_TS = 1UL << 4, + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_MKEY_UPDATE_TAG = 1UL << 5, }; enum mlx5_user_cmds_supp_uhw { -- cgit v1.2.3 From dc13fbf79ec8f983fc398cd200ed12973f390957 Mon Sep 17 00:00:00 2001 From: Michael Margolin Date: Thu, 18 Aug 2022 17:04:49 +0300 Subject: RDMA/efa: Support CQ receive entries with source GID Add a parameter for create CQ admin command to set source address on receive completion descriptors. Report capability for this feature through query device verb. Link: https://lore.kernel.org/r/20220818140449.414-1-mrgolin@amazon.com Reviewed-by: Firas Jahjah Reviewed-by: Yossi Leybovich Signed-off-by: Daniel Kranzdorf Signed-off-by: Michael Margolin Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/efa/efa_admin_cmds_defs.h | 6 +- drivers/infiniband/hw/efa/efa_com_cmd.c | 5 +- drivers/infiniband/hw/efa/efa_com_cmd.h | 3 +- drivers/infiniband/hw/efa/efa_io_defs.h | 289 ++++++++++++++++++++++++ drivers/infiniband/hw/efa/efa_verbs.c | 11 +- include/uapi/rdma/efa-abi.h | 4 +- 6 files changed, 312 insertions(+), 6 deletions(-) create mode 100644 drivers/infiniband/hw/efa/efa_io_defs.h (limited to 'include') diff --git a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h index 0b0b93b529f3..d4b9226088bd 100644 --- a/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h +++ b/drivers/infiniband/hw/efa/efa_admin_cmds_defs.h @@ -444,7 +444,10 @@ struct efa_admin_create_cq_cmd { /* * 4:0 : cq_entry_size_words - size of CQ entry in * 32-bit words, valid values: 4, 8. - * 7:5 : reserved7 - MBZ + * 5 : set_src_addr - If set, source address will be + * filled on RX completions from unknown senders. + * Requires 8 words CQ entry size. + * 7:6 : reserved7 - MBZ */ u8 cq_caps_2; @@ -980,6 +983,7 @@ struct efa_admin_host_info { #define EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK BIT(5) #define EFA_ADMIN_CREATE_CQ_CMD_VIRT_MASK BIT(6) #define EFA_ADMIN_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) +#define EFA_ADMIN_CREATE_CQ_CMD_SET_SRC_ADDR_MASK BIT(5) /* create_cq_resp */ #define EFA_ADMIN_CREATE_CQ_RESP_DB_VALID_MASK BIT(0) diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.c b/drivers/infiniband/hw/efa/efa_com_cmd.c index fb405da4e1db..8f8885e002ba 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.c +++ b/drivers/infiniband/hw/efa/efa_com_cmd.c @@ -168,7 +168,10 @@ int efa_com_create_cq(struct efa_com_dev *edev, EFA_ADMIN_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED, 1); create_cmd.eqn = params->eqn; } - + if (params->set_src_addr) { + EFA_SET(&create_cmd.cq_caps_2, + EFA_ADMIN_CREATE_CQ_CMD_SET_SRC_ADDR, 1); + } efa_com_set_dma_addr(params->dma_addr, &create_cmd.cq_ba.mem_addr_high, &create_cmd.cq_ba.mem_addr_low); diff --git a/drivers/infiniband/hw/efa/efa_com_cmd.h b/drivers/infiniband/hw/efa/efa_com_cmd.h index c33010bbf9e8..0898ad5bc340 100644 --- a/drivers/infiniband/hw/efa/efa_com_cmd.h +++ b/drivers/infiniband/hw/efa/efa_com_cmd.h @@ -75,7 +75,8 @@ struct efa_com_create_cq_params { u16 uarn; u16 eqn; u8 entry_size_in_bytes; - bool interrupt_mode_enabled; + u8 interrupt_mode_enabled : 1; + u8 set_src_addr : 1; }; struct efa_com_create_cq_result { diff --git a/drivers/infiniband/hw/efa/efa_io_defs.h b/drivers/infiniband/hw/efa/efa_io_defs.h new file mode 100644 index 000000000000..17ba8984b11e --- /dev/null +++ b/drivers/infiniband/hw/efa/efa_io_defs.h @@ -0,0 +1,289 @@ +/* SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause */ +/* + * Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved. + */ + +#ifndef _EFA_IO_H_ +#define _EFA_IO_H_ + +#define EFA_IO_TX_DESC_NUM_BUFS 2 +#define EFA_IO_TX_DESC_NUM_RDMA_BUFS 1 +#define EFA_IO_TX_DESC_INLINE_MAX_SIZE 32 +#define EFA_IO_TX_DESC_IMM_DATA_SIZE 4 + +enum efa_io_queue_type { + /* send queue (of a QP) */ + EFA_IO_SEND_QUEUE = 1, + /* recv queue (of a QP) */ + EFA_IO_RECV_QUEUE = 2, +}; + +enum efa_io_send_op_type { + /* send message */ + EFA_IO_SEND = 0, + /* RDMA read */ + EFA_IO_RDMA_READ = 1, +}; + +enum efa_io_comp_status { + /* Successful completion */ + EFA_IO_COMP_STATUS_OK = 0, + /* Flushed during QP destroy */ + EFA_IO_COMP_STATUS_FLUSHED = 1, + /* Internal QP error */ + EFA_IO_COMP_STATUS_LOCAL_ERROR_QP_INTERNAL_ERROR = 2, + /* Bad operation type */ + EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_OP_TYPE = 3, + /* Bad AH */ + EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_AH = 4, + /* LKEY not registered or does not match IOVA */ + EFA_IO_COMP_STATUS_LOCAL_ERROR_INVALID_LKEY = 5, + /* Message too long */ + EFA_IO_COMP_STATUS_LOCAL_ERROR_BAD_LENGTH = 6, + /* Destination ENI is down or does not run EFA */ + EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_ADDRESS = 7, + /* Connection was reset by remote side */ + EFA_IO_COMP_STATUS_REMOTE_ERROR_ABORT = 8, + /* Bad dest QP number (QP does not exist or is in error state) */ + EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_DEST_QPN = 9, + /* Destination resource not ready (no WQEs posted on RQ) */ + EFA_IO_COMP_STATUS_REMOTE_ERROR_RNR = 10, + /* Receiver SGL too short */ + EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_LENGTH = 11, + /* Unexpected status returned by responder */ + EFA_IO_COMP_STATUS_REMOTE_ERROR_BAD_STATUS = 12, + /* Unresponsive remote - detected locally */ + EFA_IO_COMP_STATUS_LOCAL_ERROR_UNRESP_REMOTE = 13, +}; + +struct efa_io_tx_meta_desc { + /* Verbs-generated Request ID */ + u16 req_id; + + /* + * control flags + * 3:0 : op_type - operation type: send/rdma/fast mem + * ops/etc + * 4 : has_imm - immediate_data field carries valid + * data. + * 5 : inline_msg - inline mode - inline message data + * follows this descriptor (no buffer descriptors). + * Note that it is different from immediate data + * 6 : meta_extension - Extended metadata. MBZ + * 7 : meta_desc - Indicates metadata descriptor. + * Must be set. + */ + u8 ctrl1; + + /* + * control flags + * 0 : phase + * 1 : reserved25 - MBZ + * 2 : first - Indicates first descriptor in + * transaction. Must be set. + * 3 : last - Indicates last descriptor in + * transaction. Must be set. + * 4 : comp_req - Indicates whether completion should + * be posted, after packet is transmitted. Valid only + * for the first descriptor + * 7:5 : reserved29 - MBZ + */ + u8 ctrl2; + + u16 dest_qp_num; + + /* + * If inline_msg bit is set, length of inline message in bytes, + * otherwise length of SGL (number of buffers). + */ + u16 length; + + /* + * immediate data: if has_imm is set, then this field is included + * within Tx message and reported in remote Rx completion. + */ + u32 immediate_data; + + u16 ah; + + u16 reserved; + + /* Queue key */ + u32 qkey; + + u8 reserved2[12]; +}; + +/* + * Tx queue buffer descriptor, for any transport type. Preceded by metadata + * descriptor. + */ +struct efa_io_tx_buf_desc { + /* length in bytes */ + u32 length; + + /* + * 23:0 : lkey - local memory translation key + * 31:24 : reserved - MBZ + */ + u32 lkey; + + /* Buffer address bits[31:0] */ + u32 buf_addr_lo; + + /* Buffer address bits[63:32] */ + u32 buf_addr_hi; +}; + +struct efa_io_remote_mem_addr { + /* length in bytes */ + u32 length; + + /* remote memory translation key */ + u32 rkey; + + /* Buffer address bits[31:0] */ + u32 buf_addr_lo; + + /* Buffer address bits[63:32] */ + u32 buf_addr_hi; +}; + +struct efa_io_rdma_req { + /* Remote memory address */ + struct efa_io_remote_mem_addr remote_mem; + + /* Local memory address */ + struct efa_io_tx_buf_desc local_mem[1]; +}; + +/* + * Tx WQE, composed of tx meta descriptors followed by either tx buffer + * descriptors or inline data + */ +struct efa_io_tx_wqe { + /* TX meta */ + struct efa_io_tx_meta_desc meta; + + union { + /* Send buffer descriptors */ + struct efa_io_tx_buf_desc sgl[2]; + + u8 inline_data[32]; + + /* RDMA local and remote memory addresses */ + struct efa_io_rdma_req rdma_req; + } data; +}; + +/* + * Rx buffer descriptor; RX WQE is composed of one or more RX buffer + * descriptors. + */ +struct efa_io_rx_desc { + /* Buffer address bits[31:0] */ + u32 buf_addr_lo; + + /* Buffer Pointer[63:32] */ + u32 buf_addr_hi; + + /* Verbs-generated request id. */ + u16 req_id; + + /* Length in bytes. */ + u16 length; + + /* + * LKey and control flags + * 23:0 : lkey + * 29:24 : reserved - MBZ + * 30 : first - Indicates first descriptor in WQE + * 31 : last - Indicates last descriptor in WQE + */ + u32 lkey_ctrl; +}; + +/* Common IO completion descriptor */ +struct efa_io_cdesc_common { + /* + * verbs-generated request ID, as provided in the completed tx or rx + * descriptor. + */ + u16 req_id; + + u8 status; + + /* + * flags + * 0 : phase - Phase bit + * 2:1 : q_type - enum efa_io_queue_type: send/recv + * 3 : has_imm - indicates that immediate data is + * present - for RX completions only + * 7:4 : reserved28 - MBZ + */ + u8 flags; + + /* local QP number */ + u16 qp_num; + + /* Transferred length */ + u16 length; +}; + +/* Tx completion descriptor */ +struct efa_io_tx_cdesc { + /* Common completion info */ + struct efa_io_cdesc_common common; +}; + +/* Rx Completion Descriptor */ +struct efa_io_rx_cdesc { + /* Common completion info */ + struct efa_io_cdesc_common common; + + /* Remote Address Handle FW index, 0xFFFF indicates invalid ah */ + u16 ah; + + u16 src_qp_num; + + /* Immediate data */ + u32 imm; +}; + +/* Extended Rx Completion Descriptor */ +struct efa_io_rx_cdesc_ex { + /* Base RX completion info */ + struct efa_io_rx_cdesc rx_cdesc_base; + + /* + * Valid only in case of unknown AH (0xFFFF) and CQ set_src_addr is + * enabled. + */ + u8 src_addr[16]; +}; + +/* tx_meta_desc */ +#define EFA_IO_TX_META_DESC_OP_TYPE_MASK GENMASK(3, 0) +#define EFA_IO_TX_META_DESC_HAS_IMM_MASK BIT(4) +#define EFA_IO_TX_META_DESC_INLINE_MSG_MASK BIT(5) +#define EFA_IO_TX_META_DESC_META_EXTENSION_MASK BIT(6) +#define EFA_IO_TX_META_DESC_META_DESC_MASK BIT(7) +#define EFA_IO_TX_META_DESC_PHASE_MASK BIT(0) +#define EFA_IO_TX_META_DESC_FIRST_MASK BIT(2) +#define EFA_IO_TX_META_DESC_LAST_MASK BIT(3) +#define EFA_IO_TX_META_DESC_COMP_REQ_MASK BIT(4) + +/* tx_buf_desc */ +#define EFA_IO_TX_BUF_DESC_LKEY_MASK GENMASK(23, 0) + +/* rx_desc */ +#define EFA_IO_RX_DESC_LKEY_MASK GENMASK(23, 0) +#define EFA_IO_RX_DESC_FIRST_MASK BIT(30) +#define EFA_IO_RX_DESC_LAST_MASK BIT(31) + +/* cdesc_common */ +#define EFA_IO_CDESC_COMMON_PHASE_MASK BIT(0) +#define EFA_IO_CDESC_COMMON_Q_TYPE_MASK GENMASK(2, 1) +#define EFA_IO_CDESC_COMMON_HAS_IMM_MASK BIT(3) + +#endif /* _EFA_IO_H_ */ diff --git a/drivers/infiniband/hw/efa/efa_verbs.c b/drivers/infiniband/hw/efa/efa_verbs.c index ecfe70eb5efb..31454643f8c5 100644 --- a/drivers/infiniband/hw/efa/efa_verbs.c +++ b/drivers/infiniband/hw/efa/efa_verbs.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* - * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved. */ #include @@ -15,6 +15,7 @@ #include #include "efa.h" +#include "efa_io_defs.h" enum { EFA_MMAP_DMA_PAGE = 0, @@ -242,6 +243,7 @@ int efa_query_device(struct ib_device *ibdev, resp.max_rq_wr = dev_attr->max_rq_depth; resp.max_rdma_size = dev_attr->max_rdma_size; + resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_WITH_SGID; if (EFA_DEV_CAP(dev, RDMA_READ)) resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ; @@ -1064,6 +1066,7 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct efa_ibv_create_cq cmd = {}; struct efa_cq *cq = to_ecq(ibcq); int entries = attr->cqe; + bool set_src_addr; int err; ibdev_dbg(ibdev, "create_cq entries %d\n", entries); @@ -1109,7 +1112,10 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, goto err_out; } - if (!cmd.cq_entry_size) { + set_src_addr = !!(cmd.flags & EFA_CREATE_CQ_WITH_SGID); + if ((cmd.cq_entry_size != sizeof(struct efa_io_rx_cdesc_ex)) && + (set_src_addr || + cmd.cq_entry_size != sizeof(struct efa_io_rx_cdesc))) { ibdev_dbg(ibdev, "Invalid entry size [%u]\n", cmd.cq_entry_size); err = -EINVAL; @@ -1138,6 +1144,7 @@ int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, params.dma_addr = cq->dma_addr; params.entry_size_in_bytes = cmd.cq_entry_size; params.num_sub_cqs = cmd.num_sub_cqs; + params.set_src_addr = set_src_addr; if (cmd.flags & EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL) { cq->eq = efa_vec2eq(dev, attr->comp_vector); params.eqn = cq->eq->eeq.eqn; diff --git a/include/uapi/rdma/efa-abi.h b/include/uapi/rdma/efa-abi.h index 08035ccf1fff..163ac79556d6 100644 --- a/include/uapi/rdma/efa-abi.h +++ b/include/uapi/rdma/efa-abi.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-2-Clause) */ /* - * Copyright 2018-2021 Amazon.com, Inc. or its affiliates. All rights reserved. + * Copyright 2018-2022 Amazon.com, Inc. or its affiliates. All rights reserved. */ #ifndef EFA_ABI_USER_H @@ -54,6 +54,7 @@ struct efa_ibv_alloc_pd_resp { enum { EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL = 1 << 0, + EFA_CREATE_CQ_WITH_SGID = 1 << 1, }; struct efa_ibv_create_cq { @@ -118,6 +119,7 @@ enum { EFA_QUERY_DEVICE_CAPS_RDMA_READ = 1 << 0, EFA_QUERY_DEVICE_CAPS_RNR_RETRY = 1 << 1, EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS = 1 << 2, + EFA_QUERY_DEVICE_CAPS_CQ_WITH_SGID = 1 << 3, }; struct efa_ibv_ex_query_device_resp { -- cgit v1.2.3 From 2c34bb6dea481fa11048e26ffd1ce7400dbc2105 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 18 Aug 2022 23:00:18 +0200 Subject: IB: move from strlcpy with unused retval to strscpy Follow the advice of the below link and prefer 'strscpy' in this subsystem. Conversion is 1:1 because the return value is not used. Generated by a coccinelle script. Link: https://lore.kernel.org/r/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL=V6A6G1oUZcprmknw@mail.gmail.com/ Link: https://lore.kernel.org/r/20220818210018.6841-1-wsa+renesas@sang-engineering.com Signed-off-by: Wolfram Sang Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cma_configfs.c | 2 +- drivers/infiniband/core/device.c | 4 ++-- drivers/infiniband/hw/bnxt_re/main.c | 2 +- drivers/infiniband/hw/hfi1/file_ops.c | 2 +- drivers/infiniband/hw/hfi1/verbs.c | 2 +- drivers/infiniband/hw/mthca/mthca_cmd.c | 2 +- drivers/infiniband/hw/ocrdma/ocrdma_hw.c | 2 +- drivers/infiniband/hw/qib/qib_iba7322.c | 2 +- drivers/infiniband/ulp/ipoib/ipoib_ethtool.c | 4 ++-- drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c | 4 ++-- drivers/infiniband/ulp/srpt/ib_srpt.c | 2 +- include/rdma/rdma_vt.h | 2 +- 12 files changed, 15 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/cma_configfs.c b/drivers/infiniband/core/cma_configfs.c index de8a2d5d741c..7b68b3ea979f 100644 --- a/drivers/infiniband/core/cma_configfs.c +++ b/drivers/infiniband/core/cma_configfs.c @@ -292,7 +292,7 @@ static struct config_group *make_cma_dev(struct config_group *group, goto fail; } - strlcpy(cma_dev_group->name, name, sizeof(cma_dev_group->name)); + strscpy(cma_dev_group->name, name, sizeof(cma_dev_group->name)); config_group_init_type_name(&cma_dev_group->ports_group, "ports", &cma_ports_group_type); diff --git a/drivers/infiniband/core/device.c b/drivers/infiniband/core/device.c index d275db195f1a..ae60c73babcc 100644 --- a/drivers/infiniband/core/device.c +++ b/drivers/infiniband/core/device.c @@ -422,7 +422,7 @@ int ib_device_rename(struct ib_device *ibdev, const char *name) return ret; } - strlcpy(ibdev->name, name, IB_DEVICE_NAME_MAX); + strscpy(ibdev->name, name, IB_DEVICE_NAME_MAX); ret = rename_compat_devs(ibdev); downgrade_write(&devices_rwsem); @@ -1217,7 +1217,7 @@ static int assign_name(struct ib_device *device, const char *name) ret = -ENFILE; goto out; } - strlcpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX); + strscpy(device->name, dev_name(&device->dev), IB_DEVICE_NAME_MAX); ret = xa_alloc_cyclic(&devices, &device->index, device, xa_limit_31b, &last_id, GFP_KERNEL); diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 3d6834d3d4fb..8c0c80a8d338 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -725,7 +725,7 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) /* ib device init */ ibdev->node_type = RDMA_NODE_IB_CA; - strlcpy(ibdev->node_desc, BNXT_RE_DESC " HCA", + strscpy(ibdev->node_desc, BNXT_RE_DESC " HCA", strlen(BNXT_RE_DESC) + 5); ibdev->phys_port_cnt = 1; diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index 629beff053ad..f5f9269fdc16 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -965,7 +965,7 @@ static int allocate_ctxt(struct hfi1_filedata *fd, struct hfi1_devdata *dd, uctxt->userversion = uinfo->userversion; uctxt->flags = hfi1_cap_mask; /* save current flag state */ init_waitqueue_head(&uctxt->wait); - strlcpy(uctxt->comm, current->comm, sizeof(uctxt->comm)); + strscpy(uctxt->comm, current->comm, sizeof(uctxt->comm)); memcpy(uctxt->uuid, uinfo->uuid, sizeof(uctxt->uuid)); uctxt->jkey = generate_jkey(current_uid()); hfi1_stats.sps_ctxts++; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 6988f6f21bde..ec4f316a28e1 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1801,7 +1801,7 @@ int hfi1_register_ib_device(struct hfi1_devdata *dd) ib_set_device_ops(ibdev, &hfi1_dev_ops); - strlcpy(ibdev->node_desc, init_utsname()->nodename, + strscpy(ibdev->node_desc, init_utsname()->nodename, sizeof(ibdev->node_desc)); /* diff --git a/drivers/infiniband/hw/mthca/mthca_cmd.c b/drivers/infiniband/hw/mthca/mthca_cmd.c index bdf5ed38de22..f330ce895d88 100644 --- a/drivers/infiniband/hw/mthca/mthca_cmd.c +++ b/drivers/infiniband/hw/mthca/mthca_cmd.c @@ -1252,7 +1252,7 @@ static void get_board_id(void *vsd, char *board_id) if (be16_to_cpup(vsd + VSD_OFFSET_SIG1) == VSD_SIGNATURE_TOPSPIN && be16_to_cpup(vsd + VSD_OFFSET_SIG2) == VSD_SIGNATURE_TOPSPIN) { - strlcpy(board_id, vsd + VSD_OFFSET_TS_BOARD_ID, MTHCA_BOARD_ID_LEN); + strscpy(board_id, vsd + VSD_OFFSET_TS_BOARD_ID, MTHCA_BOARD_ID_LEN); } else { /* * The board ID is a string but the firmware byte diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c index 265a581133dc..56f06c68f31a 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_hw.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_hw.c @@ -1363,7 +1363,7 @@ static int ocrdma_mbx_get_ctrl_attribs(struct ocrdma_dev *dev) dev->hba_port_num = (hba_attribs->ptpnum_maxdoms_hbast_cv & OCRDMA_HBA_ATTRB_PTNUM_MASK) >> OCRDMA_HBA_ATTRB_PTNUM_SHIFT; - strlcpy(dev->model_number, + strscpy(dev->model_number, hba_attribs->controller_model_number, sizeof(dev->model_number)); } diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c index 6861c6384f18..9d2dd135b784 100644 --- a/drivers/infiniband/hw/qib/qib_iba7322.c +++ b/drivers/infiniband/hw/qib/qib_iba7322.c @@ -2124,7 +2124,7 @@ static void qib_7322_handle_hwerrors(struct qib_devdata *dd, char *msg, if (hwerrs & HWE_MASK(PowerOnBISTFailed)) { isfatal = 1; - strlcpy(msg, + strscpy(msg, "[Memory BIST test failed, InfiniPath hardware unusable]", msgl); /* ignore from now on, so disable until driver reloaded */ diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c index a09ca21f7dff..8af99b18d361 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ethtool.c @@ -65,10 +65,10 @@ static void ipoib_get_drvinfo(struct net_device *netdev, ib_get_device_fw_str(priv->ca, drvinfo->fw_version); - strlcpy(drvinfo->bus_info, dev_name(priv->ca->dev.parent), + strscpy(drvinfo->bus_info, dev_name(priv->ca->dev.parent), sizeof(drvinfo->bus_info)); - strlcpy(drvinfo->driver, "ib_ipoib", sizeof(drvinfo->driver)); + strscpy(drvinfo->driver, "ib_ipoib", sizeof(drvinfo->driver)); } static int ipoib_get_coalesce(struct net_device *dev, diff --git a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c index 42d557dff19d..29b3d8fce3f5 100644 --- a/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c +++ b/drivers/infiniband/ulp/opa_vnic/opa_vnic_ethtool.c @@ -124,8 +124,8 @@ static struct vnic_stats vnic_gstrings_stats[] = { static void vnic_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) { - strlcpy(drvinfo->driver, opa_vnic_driver_name, sizeof(drvinfo->driver)); - strlcpy(drvinfo->bus_info, dev_name(netdev->dev.parent), + strscpy(drvinfo->driver, opa_vnic_driver_name, sizeof(drvinfo->driver)); + strscpy(drvinfo->bus_info, dev_name(netdev->dev.parent), sizeof(drvinfo->bus_info)); } diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index 21cbe30d526f..c1f0566bf6a0 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2300,7 +2300,7 @@ static int srpt_cm_req_recv(struct srpt_device *const sdev, goto free_recv_ring; } - strlcpy(ch->sess_name, src_addr, sizeof(ch->sess_name)); + strscpy(ch->sess_name, src_addr, sizeof(ch->sess_name)); snprintf(i_port_id, sizeof(i_port_id), "0x%016llx%016llx", be64_to_cpu(*(__be64 *)nexus->i_port_id), be64_to_cpu(*(__be64 *)(nexus->i_port_id + 8))); diff --git a/include/rdma/rdma_vt.h b/include/rdma/rdma_vt.h index 2dafd7dbe893..c429d6ddb129 100644 --- a/include/rdma/rdma_vt.h +++ b/include/rdma/rdma_vt.h @@ -445,7 +445,7 @@ static inline void rvt_set_ibdev_name(struct rvt_dev_info *rdi, * to work by setting the name manually here. */ dev_set_name(&rdi->ibdev.dev, fmt, name, unit); - strlcpy(rdi->ibdev.name, dev_name(&rdi->ibdev.dev), IB_DEVICE_NAME_MAX); + strscpy(rdi->ibdev.name, dev_name(&rdi->ibdev.dev), IB_DEVICE_NAME_MAX); } /** -- cgit v1.2.3 From 91a3f14ec953f3224215dc867001b9a201785740 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Fri, 19 Aug 2022 12:08:57 +0300 Subject: IB/cm: Remove the service_mask parameter from ib_cm_listen() Remove the service_mask parameter of ib_cm_listen(), as all callers use 0. Link: https://lore.kernel.org/r/20220819090859.957943-2-markzhang@nvidia.com Signed-off-by: Mark Zhang Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cm.c | 8 ++------ drivers/infiniband/ulp/ipoib/ipoib_cm.c | 4 ++-- drivers/infiniband/ulp/srpt/ib_srpt.c | 2 +- include/rdma/ib_cm.h | 7 +------ 4 files changed, 6 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index b985e0d9bc05..b59f864b3d79 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -1185,12 +1185,8 @@ static int cm_init_listen(struct cm_id_private *cm_id_priv, __be64 service_id, * and service ID resolution requests. The service ID should be specified * network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will * assign a service ID to the caller. - * @service_mask: Mask applied to service ID used to listen across a - * range of service IDs. If set to 0, the service ID is matched - * exactly. This parameter is ignored if %service_id is set to - * IB_CM_ASSIGN_SERVICE_ID. */ -int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask) +int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id) { struct cm_id_private *cm_id_priv = container_of(cm_id, struct cm_id_private, id); @@ -1203,7 +1199,7 @@ int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask) goto out; } - ret = cm_init_listen(cm_id_priv, service_id, service_mask); + ret = cm_init_listen(cm_id_priv, service_id, 0); if (ret) goto out; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index fd9d7f2c4d64..ebb35b809f26 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -884,8 +884,8 @@ int ipoib_cm_dev_open(struct net_device *dev) goto err_cm; } - ret = ib_cm_listen(priv->cm.id, cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num), - 0); + ret = ib_cm_listen(priv->cm.id, + cpu_to_be64(IPOIB_CM_IETF_ID | priv->qp->qp_num)); if (ret) { pr_warn("%s: failed to listen on ID 0x%llx\n", priv->ca->name, IPOIB_CM_IETF_ID | priv->qp->qp_num); diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index c1f0566bf6a0..9450c609bf3b 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -3191,7 +3191,7 @@ static int srpt_add_one(struct ib_device *device) * if this HCA is gone bad and replaced by different HCA */ ret = sdev->cm_id ? - ib_cm_listen(sdev->cm_id, cpu_to_be64(srpt_service_guid), 0) : + ib_cm_listen(sdev->cm_id, cpu_to_be64(srpt_service_guid)) : 0; if (ret < 0) { pr_err("ib_cm_listen() failed: %d (cm_id state = %d)\n", ret, diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index e23eb357b761..fbf260c1b1df 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -340,13 +340,8 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id); * and service ID resolution requests. The service ID should be specified * network-byte order. If set to IB_CM_ASSIGN_SERVICE_ID, the CM will * assign a service ID to the caller. - * @service_mask: Mask applied to service ID used to listen across a - * range of service IDs. If set to 0, the service ID is matched - * exactly. This parameter is ignored if %service_id is set to - * IB_CM_ASSIGN_SERVICE_ID. */ -int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, - __be64 service_mask); +int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id); struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, ib_cm_handler cm_handler, -- cgit v1.2.3 From a461b746c5768b9b3001045cff2d508346f5f789 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Fri, 19 Aug 2022 12:08:58 +0300 Subject: IB/cm: remove cm_id_priv->id.service_mask and service_mask parameter of cm_init_listen() The service_mask is always ~cpu_to_be64(0), so the result is always a NOP when it is &'d with a service_id. Remove it for simplicity. Link: https://lore.kernel.org/r/20220819090859.957943-3-markzhang@nvidia.com Signed-off-by: Mark Zhang Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cm.c | 28 ++++++++-------------------- include/rdma/ib_cm.h | 1 - 2 files changed, 8 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index b59f864b3d79..84bb10799467 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -617,7 +617,6 @@ static struct cm_id_private *cm_insert_listen(struct cm_id_private *cm_id_priv, struct rb_node *parent = NULL; struct cm_id_private *cur_cm_id_priv; __be64 service_id = cm_id_priv->id.service_id; - __be64 service_mask = cm_id_priv->id.service_mask; unsigned long flags; spin_lock_irqsave(&cm.lock, flags); @@ -625,8 +624,7 @@ static struct cm_id_private *cm_insert_listen(struct cm_id_private *cm_id_priv, parent = *link; cur_cm_id_priv = rb_entry(parent, struct cm_id_private, service_node); - if ((cur_cm_id_priv->id.service_mask & service_id) == - (service_mask & cur_cm_id_priv->id.service_id) && + if ((service_id == cur_cm_id_priv->id.service_id) && (cm_id_priv->id.device == cur_cm_id_priv->id.device)) { /* * Sharing an ib_cm_id with different handlers is not @@ -670,8 +668,7 @@ static struct cm_id_private *cm_find_listen(struct ib_device *device, while (node) { cm_id_priv = rb_entry(node, struct cm_id_private, service_node); - if ((cm_id_priv->id.service_mask & service_id) == - cm_id_priv->id.service_id && + if ((service_id == cm_id_priv->id.service_id) && (cm_id_priv->id.device == device)) { refcount_inc(&cm_id_priv->refcount); return cm_id_priv; @@ -1158,22 +1155,17 @@ void ib_destroy_cm_id(struct ib_cm_id *cm_id) } EXPORT_SYMBOL(ib_destroy_cm_id); -static int cm_init_listen(struct cm_id_private *cm_id_priv, __be64 service_id, - __be64 service_mask) +static int cm_init_listen(struct cm_id_private *cm_id_priv, __be64 service_id) { - service_mask = service_mask ? service_mask : ~cpu_to_be64(0); - service_id &= service_mask; if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID && (service_id != IB_CM_ASSIGN_SERVICE_ID)) return -EINVAL; - if (service_id == IB_CM_ASSIGN_SERVICE_ID) { + if (service_id == IB_CM_ASSIGN_SERVICE_ID) cm_id_priv->id.service_id = cpu_to_be64(cm.listen_service_id++); - cm_id_priv->id.service_mask = ~cpu_to_be64(0); - } else { + else cm_id_priv->id.service_id = service_id; - cm_id_priv->id.service_mask = service_mask; - } + return 0; } @@ -1199,7 +1191,7 @@ int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id) goto out; } - ret = cm_init_listen(cm_id_priv, service_id, 0); + ret = cm_init_listen(cm_id_priv, service_id); if (ret) goto out; @@ -1247,7 +1239,7 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, if (IS_ERR(cm_id_priv)) return ERR_CAST(cm_id_priv); - err = cm_init_listen(cm_id_priv, service_id, 0); + err = cm_init_listen(cm_id_priv, service_id); if (err) { ib_destroy_cm_id(&cm_id_priv->id); return ERR_PTR(err); @@ -1518,7 +1510,6 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, } } cm_id->service_id = param->service_id; - cm_id->service_mask = ~cpu_to_be64(0); cm_id_priv->timeout_ms = cm_convert_to_ms( param->primary_path->packet_life_time) * 2 + cm_convert_to_ms( @@ -2075,7 +2066,6 @@ static int cm_req_handler(struct cm_work *work) cpu_to_be32(IBA_GET(CM_REQ_LOCAL_COMM_ID, req_msg)); cm_id_priv->id.service_id = cpu_to_be64(IBA_GET(CM_REQ_SERVICE_ID, req_msg)); - cm_id_priv->id.service_mask = ~cpu_to_be64(0); cm_id_priv->tid = req_msg->hdr.tid; cm_id_priv->timeout_ms = cm_convert_to_ms( IBA_GET(CM_REQ_LOCAL_CM_RESPONSE_TIMEOUT, req_msg)); @@ -3482,7 +3472,6 @@ int ib_send_cm_sidr_req(struct ib_cm_id *cm_id, spin_lock_irqsave(&cm_id_priv->lock, flags); cm_move_av_from_path(&cm_id_priv->av, &av); cm_id->service_id = param->service_id; - cm_id->service_mask = ~cpu_to_be64(0); cm_id_priv->timeout_ms = param->timeout_ms; cm_id_priv->max_cm_retries = param->max_cm_retries; if (cm_id->state != IB_CM_IDLE) { @@ -3557,7 +3546,6 @@ static int cm_sidr_req_handler(struct cm_work *work) cpu_to_be32(IBA_GET(CM_SIDR_REQ_REQUESTID, sidr_req_msg)); cm_id_priv->id.service_id = cpu_to_be64(IBA_GET(CM_SIDR_REQ_SERVICEID, sidr_req_msg)); - cm_id_priv->id.service_mask = ~cpu_to_be64(0); cm_id_priv->tid = sidr_req_msg->hdr.tid; wc = work->mad_recv_wc->wc; diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index fbf260c1b1df..8dae5847020a 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -294,7 +294,6 @@ struct ib_cm_id { void *context; struct ib_device *device; __be64 service_id; - __be64 service_mask; enum ib_cm_state state; /* internal CM/debug use */ enum ib_cm_lap_state lap_state; /* internal CM/debug use */ __be32 local_id; -- cgit v1.2.3 From bf9a9928510a03e445fa4f54bdc0b8e71f4c0067 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Thu, 8 Sep 2022 13:09:00 +0300 Subject: RDMA/core: Rename rdma_route.num_paths field to num_pri_alt_paths This fields means the total number of primary and alternative paths, i.e.,: 0 - No primary nor alternate path is available; 1 - Only primary path is available; 2 - Both primary and alternate path are available. Rename it to avoid confusion as with follow patches primary path will support multiple path records. Signed-off-by: Mark Zhang Reviewed-by: Mark Bloch Link: https://lore.kernel.org/r/cbe424de63a56207870d70c5edce7c68e45f429e.1662631201.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cma.c | 18 +++++++++--------- drivers/infiniband/core/ucma.c | 10 +++++----- include/rdma/rdma_cm.h | 7 ++++++- 3 files changed, 20 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 46d06678dfbe..91e72a76d95e 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2241,14 +2241,14 @@ cma_ib_new_conn_id(const struct rdma_cm_id *listen_id, goto err; rt = &id->route; - rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; - rt->path_rec = kmalloc_array(rt->num_paths, sizeof(*rt->path_rec), - GFP_KERNEL); + rt->num_pri_alt_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1; + rt->path_rec = kmalloc_array(rt->num_pri_alt_paths, + sizeof(*rt->path_rec), GFP_KERNEL); if (!rt->path_rec) goto err; rt->path_rec[0] = *path; - if (rt->num_paths == 2) + if (rt->num_pri_alt_paths == 2) rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path; if (net_dev) { @@ -2826,7 +2826,7 @@ static void cma_query_handler(int status, struct sa_path_rec *path_rec, route = &work->id->id.route; if (!status) { - route->num_paths = 1; + route->num_pri_alt_paths = 1; *route->path_rec = *path_rec; } else { work->old_state = RDMA_CM_ROUTE_QUERY; @@ -3081,7 +3081,7 @@ int rdma_set_ib_path(struct rdma_cm_id *id, dev_put(ndev); } - id->route.num_paths = 1; + id->route.num_pri_alt_paths = 1; return 0; err_free: @@ -3214,7 +3214,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) goto err1; } - route->num_paths = 1; + route->num_pri_alt_paths = 1; ndev = cma_iboe_set_path_rec_l2_fields(id_priv); if (!ndev) { @@ -3274,7 +3274,7 @@ static int cma_resolve_iboe_route(struct rdma_id_private *id_priv) err2: kfree(route->path_rec); route->path_rec = NULL; - route->num_paths = 0; + route->num_pri_alt_paths = 0; err1: kfree(work); return ret; @@ -4265,7 +4265,7 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, } req.primary_path = &route->path_rec[0]; - if (route->num_paths == 2) + if (route->num_pri_alt_paths == 2) req.alternate_path = &route->path_rec[1]; req.ppath_sgid_attr = id_priv->id.route.addr.dev_addr.sgid_attr; diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 9d6ac9dff39a..bf42650f125b 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -754,8 +754,8 @@ static void ucma_copy_ib_route(struct rdma_ucm_query_route_resp *resp, { struct rdma_dev_addr *dev_addr; - resp->num_paths = route->num_paths; - switch (route->num_paths) { + resp->num_paths = route->num_pri_alt_paths; + switch (route->num_pri_alt_paths) { case 0: dev_addr = &route->addr.dev_addr; rdma_addr_get_dgid(dev_addr, @@ -781,8 +781,8 @@ static void ucma_copy_iboe_route(struct rdma_ucm_query_route_resp *resp, struct rdma_route *route) { - resp->num_paths = route->num_paths; - switch (route->num_paths) { + resp->num_paths = route->num_pri_alt_paths; + switch (route->num_pri_alt_paths) { case 0: rdma_ip2gid((struct sockaddr *)&route->addr.dst_addr, (union ib_gid *)&resp->ib_route[0].dgid); @@ -921,7 +921,7 @@ static ssize_t ucma_query_path(struct ucma_context *ctx, if (!resp) return -ENOMEM; - resp->num_paths = ctx->cm_id->route.num_paths; + resp->num_paths = ctx->cm_id->route.num_pri_alt_paths; for (i = 0, out_len -= sizeof(*resp); i < resp->num_paths && out_len > sizeof(struct ib_path_rec_data); i++, out_len -= sizeof(struct ib_path_rec_data)) { diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 5b18e2e36ee6..81916039ee24 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -52,7 +52,12 @@ struct rdma_addr { struct rdma_route { struct rdma_addr addr; struct sa_path_rec *path_rec; - int num_paths; + /* + * 0 - No primary nor alternate path is available + * 1 - Only primary path is available + * 2 - Both primary and alternate path are available + */ + int num_pri_alt_paths; }; struct rdma_conn_param { -- cgit v1.2.3 From 5a3749493394276449cfc4efb417ed267edbd480 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Thu, 8 Sep 2022 13:09:01 +0300 Subject: RDMA/cma: Multiple path records support with netlink channel Support receiving inbound and outbound IB path records (along with GMP PathRecord) from user-space service through the RDMA netlink channel. The LIDs in these 3 PRs can be used in this way: 1. GMP PR: used as the standard local/remote LIDs; 2. DLID of outbound PR: Used as the "dlid" field for outbound traffic; 3. DLID of inbound PR: Used as the "dlid" field for outbound traffic in responder side. This is aimed to support adaptive routing. With current IB routing solution when a packet goes out it's assigned with a fixed DLID per target, meaning a fixed router will be used. The LIDs in inbound/outbound path records can be used to identify group of routers that allow communication with another subnet's entity. With them packets from an inter-subnet connection may travel through any router in the set to reach the target. As confirmed with Jason, when sending a netlink request, kernel uses LS_RESOLVE_PATH_USE_ALL so that the service knows kernel supports multiple PRs. Signed-off-by: Mark Zhang Reviewed-by: Mark Bloch Link: https://lore.kernel.org/r/2fa2b6c93c4c16c8915bac3cfc4f27be1d60519d.1662631201.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cma.c | 70 +++++++-- drivers/infiniband/core/sa_query.c | 235 ++++++++++++++++++++---------- drivers/infiniband/ulp/ipoib/ipoib_main.c | 2 +- drivers/infiniband/ulp/srp/ib_srp.c | 2 +- include/rdma/ib_sa.h | 3 +- include/rdma/rdma_cm.h | 6 + 6 files changed, 231 insertions(+), 87 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index 91e72a76d95e..a3efc462305d 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -2026,6 +2026,8 @@ static void _destroy_id(struct rdma_id_private *id_priv, cma_id_put(id_priv->id.context); kfree(id_priv->id.route.path_rec); + kfree(id_priv->id.route.path_rec_inbound); + kfree(id_priv->id.route.path_rec_outbound); put_net(id_priv->id.route.addr.dev_addr.net); kfree(id_priv); @@ -2817,26 +2819,72 @@ int rdma_set_min_rnr_timer(struct rdma_cm_id *id, u8 min_rnr_timer) } EXPORT_SYMBOL(rdma_set_min_rnr_timer); +static void route_set_path_rec_inbound(struct cma_work *work, + struct sa_path_rec *path_rec) +{ + struct rdma_route *route = &work->id->id.route; + + if (!route->path_rec_inbound) { + route->path_rec_inbound = + kzalloc(sizeof(*route->path_rec_inbound), GFP_KERNEL); + if (!route->path_rec_inbound) + return; + } + + *route->path_rec_inbound = *path_rec; +} + +static void route_set_path_rec_outbound(struct cma_work *work, + struct sa_path_rec *path_rec) +{ + struct rdma_route *route = &work->id->id.route; + + if (!route->path_rec_outbound) { + route->path_rec_outbound = + kzalloc(sizeof(*route->path_rec_outbound), GFP_KERNEL); + if (!route->path_rec_outbound) + return; + } + + *route->path_rec_outbound = *path_rec; +} + static void cma_query_handler(int status, struct sa_path_rec *path_rec, - void *context) + int num_prs, void *context) { struct cma_work *work = context; struct rdma_route *route; + int i; route = &work->id->id.route; - if (!status) { - route->num_pri_alt_paths = 1; - *route->path_rec = *path_rec; - } else { - work->old_state = RDMA_CM_ROUTE_QUERY; - work->new_state = RDMA_CM_ADDR_RESOLVED; - work->event.event = RDMA_CM_EVENT_ROUTE_ERROR; - work->event.status = status; - pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n", - status); + if (status) + goto fail; + + for (i = 0; i < num_prs; i++) { + if (!path_rec[i].flags || (path_rec[i].flags & IB_PATH_GMP)) + *route->path_rec = path_rec[i]; + else if (path_rec[i].flags & IB_PATH_INBOUND) + route_set_path_rec_inbound(work, &path_rec[i]); + else if (path_rec[i].flags & IB_PATH_OUTBOUND) + route_set_path_rec_outbound(work, &path_rec[i]); + } + if (!route->path_rec) { + status = -EINVAL; + goto fail; } + route->num_pri_alt_paths = 1; + queue_work(cma_wq, &work->work); + return; + +fail: + work->old_state = RDMA_CM_ROUTE_QUERY; + work->new_state = RDMA_CM_ADDR_RESOLVED; + work->event.event = RDMA_CM_EVENT_ROUTE_ERROR; + work->event.status = status; + pr_debug_ratelimited("RDMA CM: ROUTE_ERROR: failed to query path. status %d\n", + status); queue_work(cma_wq, &work->work); } diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index 003e504feca2..0de83d9a4985 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -50,6 +50,7 @@ #include #include #include +#include #include "sa.h" #include "core_priv.h" @@ -104,7 +105,8 @@ struct ib_sa_device { }; struct ib_sa_query { - void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *); + void (*callback)(struct ib_sa_query *sa_query, int status, + int num_prs, struct ib_sa_mad *mad); void (*release)(struct ib_sa_query *); struct ib_sa_client *client; struct ib_sa_port *port; @@ -116,6 +118,12 @@ struct ib_sa_query { u32 seq; /* Local svc request sequence number */ unsigned long timeout; /* Local svc timeout */ u8 path_use; /* How will the pathrecord be used */ + + /* A separate buffer to save pathrecords of a response, as in cases + * like IB/netlink, mulptiple pathrecords are supported, so that + * mad->data is not large enough to hold them + */ + void *resp_pr_data; }; #define IB_SA_ENABLE_LOCAL_SERVICE 0x00000001 @@ -123,7 +131,8 @@ struct ib_sa_query { #define IB_SA_QUERY_OPA 0x00000004 struct ib_sa_path_query { - void (*callback)(int, struct sa_path_rec *, void *); + void (*callback)(int status, struct sa_path_rec *rec, + int num_paths, void *context); void *context; struct ib_sa_query sa_query; struct sa_path_rec *conv_pr; @@ -712,7 +721,7 @@ static void ib_nl_set_path_rec_attrs(struct sk_buff *skb, if ((comp_mask & IB_SA_PATH_REC_REVERSIBLE) && sa_rec->reversible != 0) - query->path_use = LS_RESOLVE_PATH_USE_GMP; + query->path_use = LS_RESOLVE_PATH_USE_ALL; else query->path_use = LS_RESOLVE_PATH_USE_UNIDIRECTIONAL; header->path_use = query->path_use; @@ -865,50 +874,81 @@ static void send_handler(struct ib_mad_agent *agent, static void ib_nl_process_good_resolve_rsp(struct ib_sa_query *query, const struct nlmsghdr *nlh) { + struct ib_path_rec_data *srec, *drec; + struct ib_sa_path_query *path_query; struct ib_mad_send_wc mad_send_wc; - struct ib_sa_mad *mad = NULL; const struct nlattr *head, *curr; - struct ib_path_rec_data *rec; - int len, rem; + struct ib_sa_mad *mad = NULL; + int len, rem, num_prs = 0; u32 mask = 0; int status = -EIO; - if (query->callback) { - head = (const struct nlattr *) nlmsg_data(nlh); - len = nlmsg_len(nlh); - switch (query->path_use) { - case LS_RESOLVE_PATH_USE_UNIDIRECTIONAL: - mask = IB_PATH_PRIMARY | IB_PATH_OUTBOUND; - break; + if (!query->callback) + goto out; - case LS_RESOLVE_PATH_USE_ALL: - case LS_RESOLVE_PATH_USE_GMP: - default: - mask = IB_PATH_PRIMARY | IB_PATH_GMP | - IB_PATH_BIDIRECTIONAL; - break; + path_query = container_of(query, struct ib_sa_path_query, sa_query); + mad = query->mad_buf->mad; + if (!path_query->conv_pr && + (be16_to_cpu(mad->mad_hdr.attr_id) == IB_SA_ATTR_PATH_REC)) { + /* Need a larger buffer for possible multiple PRs */ + query->resp_pr_data = kvcalloc(RDMA_PRIMARY_PATH_MAX_REC_NUM, + sizeof(*drec), GFP_KERNEL); + if (!query->resp_pr_data) { + query->callback(query, -ENOMEM, 0, NULL); + return; } - nla_for_each_attr(curr, head, len, rem) { - if (curr->nla_type == LS_NLA_TYPE_PATH_RECORD) { - rec = nla_data(curr); - /* - * Get the first one. In the future, we may - * need to get up to 6 pathrecords. - */ - if ((rec->flags & mask) == mask) { - mad = query->mad_buf->mad; - mad->mad_hdr.method |= - IB_MGMT_METHOD_RESP; - memcpy(mad->data, rec->path_rec, - sizeof(rec->path_rec)); - status = 0; - break; - } - } + } + + head = (const struct nlattr *) nlmsg_data(nlh); + len = nlmsg_len(nlh); + switch (query->path_use) { + case LS_RESOLVE_PATH_USE_UNIDIRECTIONAL: + mask = IB_PATH_PRIMARY | IB_PATH_OUTBOUND; + break; + + case LS_RESOLVE_PATH_USE_ALL: + mask = IB_PATH_PRIMARY; + break; + + case LS_RESOLVE_PATH_USE_GMP: + default: + mask = IB_PATH_PRIMARY | IB_PATH_GMP | + IB_PATH_BIDIRECTIONAL; + break; + } + + drec = (struct ib_path_rec_data *)query->resp_pr_data; + nla_for_each_attr(curr, head, len, rem) { + if (curr->nla_type != LS_NLA_TYPE_PATH_RECORD) + continue; + + srec = nla_data(curr); + if ((srec->flags & mask) != mask) + continue; + + status = 0; + if (!drec) { + memcpy(mad->data, srec->path_rec, + sizeof(srec->path_rec)); + num_prs = 1; + break; } - query->callback(query, status, mad); + + memcpy(drec, srec, sizeof(*drec)); + drec++; + num_prs++; + if (num_prs >= RDMA_PRIMARY_PATH_MAX_REC_NUM) + break; } + if (!status) + mad->mad_hdr.method |= IB_MGMT_METHOD_RESP; + + query->callback(query, status, num_prs, mad); + kvfree(query->resp_pr_data); + query->resp_pr_data = NULL; + +out: mad_send_wc.send_buf = query->mad_buf; mad_send_wc.status = IB_WC_SUCCESS; send_handler(query->mad_buf->mad_agent, &mad_send_wc); @@ -1411,41 +1451,90 @@ static int opa_pr_query_possible(struct ib_sa_client *client, return PR_IB_SUPPORTED; } +static void ib_sa_pr_callback_single(struct ib_sa_path_query *query, + int status, struct ib_sa_mad *mad) +{ + struct sa_path_rec rec = {}; + + ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), + mad->data, &rec); + rec.rec_type = SA_PATH_REC_TYPE_IB; + sa_path_set_dmac_zero(&rec); + + if (query->conv_pr) { + struct sa_path_rec opa; + + memset(&opa, 0, sizeof(struct sa_path_rec)); + sa_convert_path_ib_to_opa(&opa, &rec); + query->callback(status, &opa, 1, query->context); + } else { + query->callback(status, &rec, 1, query->context); + } +} + +/** + * ib_sa_pr_callback_multiple() - Parse path records then do callback. + * + * In a multiple-PR case the PRs are saved in "query->resp_pr_data" + * (instead of"mad->data") and with "ib_path_rec_data" structure format, + * so that rec->flags can be set to indicate the type of PR. + * This is valid only in IB fabric. + */ +static void ib_sa_pr_callback_multiple(struct ib_sa_path_query *query, + int status, int num_prs, + struct ib_path_rec_data *rec_data) +{ + struct sa_path_rec *rec; + int i; + + rec = kvcalloc(num_prs, sizeof(*rec), GFP_KERNEL); + if (!rec) { + query->callback(-ENOMEM, NULL, 0, query->context); + return; + } + + for (i = 0; i < num_prs; i++) { + ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table), + rec_data[i].path_rec, rec + i); + rec[i].rec_type = SA_PATH_REC_TYPE_IB; + sa_path_set_dmac_zero(rec + i); + rec[i].flags = rec_data[i].flags; + } + + query->callback(status, rec, num_prs, query->context); + kvfree(rec); +} + static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query, - int status, + int status, int num_prs, struct ib_sa_mad *mad) { struct ib_sa_path_query *query = container_of(sa_query, struct ib_sa_path_query, sa_query); + struct sa_path_rec rec; - if (mad) { - struct sa_path_rec rec; - - if (sa_query->flags & IB_SA_QUERY_OPA) { - ib_unpack(opa_path_rec_table, - ARRAY_SIZE(opa_path_rec_table), - mad->data, &rec); - rec.rec_type = SA_PATH_REC_TYPE_OPA; - query->callback(status, &rec, query->context); - } else { - ib_unpack(path_rec_table, - ARRAY_SIZE(path_rec_table), - mad->data, &rec); - rec.rec_type = SA_PATH_REC_TYPE_IB; - sa_path_set_dmac_zero(&rec); - - if (query->conv_pr) { - struct sa_path_rec opa; + if (!mad || !num_prs) { + query->callback(status, NULL, 0, query->context); + return; + } - memset(&opa, 0, sizeof(struct sa_path_rec)); - sa_convert_path_ib_to_opa(&opa, &rec); - query->callback(status, &opa, query->context); - } else { - query->callback(status, &rec, query->context); - } + if (sa_query->flags & IB_SA_QUERY_OPA) { + if (num_prs != 1) { + query->callback(-EINVAL, NULL, 0, query->context); + return; } - } else - query->callback(status, NULL, query->context); + + ib_unpack(opa_path_rec_table, ARRAY_SIZE(opa_path_rec_table), + mad->data, &rec); + rec.rec_type = SA_PATH_REC_TYPE_OPA; + query->callback(status, &rec, num_prs, query->context); + } else { + if (!sa_query->resp_pr_data) + ib_sa_pr_callback_single(query, status, mad); + else + ib_sa_pr_callback_multiple(query, status, num_prs, + sa_query->resp_pr_data); + } } static void ib_sa_path_rec_release(struct ib_sa_query *sa_query) @@ -1489,7 +1578,7 @@ int ib_sa_path_rec_get(struct ib_sa_client *client, unsigned long timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct sa_path_rec *resp, - void *context), + int num_paths, void *context), void *context, struct ib_sa_query **sa_query) { @@ -1588,7 +1677,7 @@ err1: EXPORT_SYMBOL(ib_sa_path_rec_get); static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query, - int status, + int status, int num_prs, struct ib_sa_mad *mad) { struct ib_sa_mcmember_query *query = @@ -1680,7 +1769,7 @@ err1: /* Support GuidInfoRecord */ static void ib_sa_guidinfo_rec_callback(struct ib_sa_query *sa_query, - int status, + int status, int num_paths, struct ib_sa_mad *mad) { struct ib_sa_guidinfo_query *query = @@ -1790,7 +1879,7 @@ static void ib_classportinfo_cb(void *context) } static void ib_sa_classport_info_rec_callback(struct ib_sa_query *sa_query, - int status, + int status, int num_prs, struct ib_sa_mad *mad) { unsigned long flags; @@ -1966,13 +2055,13 @@ static void send_handler(struct ib_mad_agent *agent, /* No callback -- already got recv */ break; case IB_WC_RESP_TIMEOUT_ERR: - query->callback(query, -ETIMEDOUT, NULL); + query->callback(query, -ETIMEDOUT, 0, NULL); break; case IB_WC_WR_FLUSH_ERR: - query->callback(query, -EINTR, NULL); + query->callback(query, -EINTR, 0, NULL); break; default: - query->callback(query, -EIO, NULL); + query->callback(query, -EIO, 0, NULL); break; } @@ -2000,10 +2089,10 @@ static void recv_handler(struct ib_mad_agent *mad_agent, if (mad_recv_wc->wc->status == IB_WC_SUCCESS) query->callback(query, mad_recv_wc->recv_buf.mad->mad_hdr.status ? - -EINVAL : 0, + -EINVAL : 0, 1, (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad); else - query->callback(query, -EIO, NULL); + query->callback(query, -EIO, 0, NULL); } ib_free_recv_mad(mad_recv_wc); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index a4904371e2db..ac25fc80fb33 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -742,7 +742,7 @@ void ipoib_flush_paths(struct net_device *dev) static void path_rec_completion(int status, struct sa_path_rec *pathrec, - void *path_ptr) + int num_prs, void *path_ptr) { struct ipoib_path *path = path_ptr; struct net_device *dev = path->dev; diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 9d593445d436..d01102db4fd4 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -699,7 +699,7 @@ static void srp_free_ch_ib(struct srp_target_port *target, static void srp_path_rec_completion(int status, struct sa_path_rec *pathrec, - void *ch_ptr) + int num_paths, void *ch_ptr) { struct srp_rdma_ch *ch = ch_ptr; struct srp_target_port *target = ch->target; diff --git a/include/rdma/ib_sa.h b/include/rdma/ib_sa.h index 3634d4cc7a56..e930bec33b31 100644 --- a/include/rdma/ib_sa.h +++ b/include/rdma/ib_sa.h @@ -186,6 +186,7 @@ struct sa_path_rec { struct sa_path_rec_opa opa; }; enum sa_path_rec_type rec_type; + u32 flags; }; static inline enum ib_gid_type @@ -413,7 +414,7 @@ int ib_sa_path_rec_get(struct ib_sa_client *client, struct ib_device *device, ib_sa_comp_mask comp_mask, unsigned long timeout_ms, gfp_t gfp_mask, void (*callback)(int status, struct sa_path_rec *resp, - void *context), + int num_prs, void *context), void *context, struct ib_sa_query **query); struct ib_sa_multicast { diff --git a/include/rdma/rdma_cm.h b/include/rdma/rdma_cm.h index 81916039ee24..cdc7cafab572 100644 --- a/include/rdma/rdma_cm.h +++ b/include/rdma/rdma_cm.h @@ -49,9 +49,15 @@ struct rdma_addr { struct rdma_dev_addr dev_addr; }; +#define RDMA_PRIMARY_PATH_MAX_REC_NUM 3 struct rdma_route { struct rdma_addr addr; struct sa_path_rec *path_rec; + + /* Optional path records of primary path */ + struct sa_path_rec *path_rec_inbound; + struct sa_path_rec *path_rec_outbound; + /* * 0 - No primary nor alternate path is available * 1 - Only primary path is available -- cgit v1.2.3 From eb8336dbe373edd1ad6061c543e4ba6ea60f6cc9 Mon Sep 17 00:00:00 2001 From: Mark Zhang Date: Thu, 8 Sep 2022 13:09:03 +0300 Subject: RDMA/cm: Use DLID from inbound/outbound PathRecords as the datapath DLID In inter-subnet cases, when inbound/outbound PRs are available, outbound_PR.dlid is used as the requestor's datapath DLID and inbound_PR.dlid is used as the responder's DLID. The inbound_PR.dlid is passed to responder side with the "ConnectReq.Primary_Local_Port_LID" field. With this solution the PERMISSIVE_LID is no longer used in Primary Local LID field. Signed-off-by: Mark Zhang Reviewed-by: Mark Bloch Link: https://lore.kernel.org/r/b3f6cac685bce9dde37c610be82e2c19d9e51d9e.1662631201.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cm.c | 25 +++++++++++++++++++++++-- drivers/infiniband/core/cma.c | 2 ++ include/rdma/ib_cm.h | 2 ++ 3 files changed, 27 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/core/cm.c b/drivers/infiniband/core/cm.c index ade82752f9f7..1f9938a2c475 100644 --- a/drivers/infiniband/core/cm.c +++ b/drivers/infiniband/core/cm.c @@ -175,6 +175,7 @@ struct cm_device { struct cm_av { struct cm_port *port; struct rdma_ah_attr ah_attr; + u16 dlid_datapath; u16 pkey_index; u8 timeout; }; @@ -1304,6 +1305,7 @@ static void cm_format_req(struct cm_req_msg *req_msg, struct sa_path_rec *pri_path = param->primary_path; struct sa_path_rec *alt_path = param->alternate_path; bool pri_ext = false; + __be16 lid; if (pri_path->rec_type == SA_PATH_REC_TYPE_OPA) pri_ext = opa_is_extended_lid(pri_path->opa.dlid, @@ -1363,9 +1365,16 @@ static void cm_format_req(struct cm_req_msg *req_msg, htons(ntohl(sa_path_get_dlid( pri_path))))); } else { + + if (param->primary_path_inbound) { + lid = param->primary_path_inbound->ib.dlid; + IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg, + be16_to_cpu(lid)); + } else + IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg, + be16_to_cpu(IB_LID_PERMISSIVE)); + /* Work-around until there's a way to obtain remote LID info */ - IBA_SET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg, - be16_to_cpu(IB_LID_PERMISSIVE)); IBA_SET(CM_REQ_PRIMARY_REMOTE_PORT_LID, req_msg, be16_to_cpu(IB_LID_PERMISSIVE)); } @@ -1520,6 +1529,10 @@ int ib_send_cm_req(struct ib_cm_id *cm_id, spin_lock_irqsave(&cm_id_priv->lock, flags); cm_move_av_from_path(&cm_id_priv->av, &av); + if (param->primary_path_outbound) + cm_id_priv->av.dlid_datapath = + be16_to_cpu(param->primary_path_outbound->ib.dlid); + if (param->alternate_path) cm_move_av_from_path(&cm_id_priv->alt_av, &alt_av); @@ -2154,6 +2167,10 @@ static int cm_req_handler(struct cm_work *work) NULL, 0); goto rejected; } + if (cm_id_priv->av.ah_attr.type == RDMA_AH_ATTR_TYPE_IB) + cm_id_priv->av.dlid_datapath = + IBA_GET(CM_REQ_PRIMARY_LOCAL_PORT_LID, req_msg); + if (cm_req_has_alt_path(req_msg)) { ret = cm_init_av_by_path(&work->path[1], NULL, &cm_id_priv->alt_av); @@ -4113,6 +4130,10 @@ static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv, *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU | IB_QP_DEST_QPN | IB_QP_RQ_PSN; qp_attr->ah_attr = cm_id_priv->av.ah_attr; + if ((qp_attr->ah_attr.type == RDMA_AH_ATTR_TYPE_IB) && + cm_id_priv->av.dlid_datapath && + (cm_id_priv->av.dlid_datapath != 0xffff)) + qp_attr->ah_attr.ib.dlid = cm_id_priv->av.dlid_datapath; qp_attr->path_mtu = cm_id_priv->path_mtu; qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn); qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn); diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index a3efc462305d..7eacb23165fc 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4313,6 +4313,8 @@ static int cma_connect_ib(struct rdma_id_private *id_priv, } req.primary_path = &route->path_rec[0]; + req.primary_path_inbound = route->path_rec_inbound; + req.primary_path_outbound = route->path_rec_outbound; if (route->num_pri_alt_paths == 2) req.alternate_path = &route->path_rec[1]; diff --git a/include/rdma/ib_cm.h b/include/rdma/ib_cm.h index 8dae5847020a..a2ac62b4a6cf 100644 --- a/include/rdma/ib_cm.h +++ b/include/rdma/ib_cm.h @@ -348,6 +348,8 @@ struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device, struct ib_cm_req_param { struct sa_path_rec *primary_path; + struct sa_path_rec *primary_path_inbound; + struct sa_path_rec *primary_path_outbound; struct sa_path_rec *alternate_path; const struct ib_gid_attr *ppath_sgid_attr; __be64 service_id; -- cgit v1.2.3 From 4bf207d7a54d49637da94dbc00d2c025b74764d1 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 1 Sep 2022 11:20:53 -0300 Subject: net/mlx5: Add IFC bits for mkey ATS Allows telling a mkey to use PCI ATS for DMA that flows through it. Link: https://lore.kernel.org/r/1-v1-bd147097458e+ede-umem_dmabuf_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/mlx5_ifc.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 4acd5610e96b..92602e33a82c 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1707,7 +1707,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 steering_format_version[0x4]; u8 create_qp_start_hint[0x18]; - u8 reserved_at_460[0x3]; + u8 reserved_at_460[0x1]; + u8 ats[0x1]; + u8 reserved_at_462[0x1]; u8 log_max_uctx[0x5]; u8 reserved_at_468[0x2]; u8 ipsec_offload[0x1]; @@ -3873,7 +3875,9 @@ struct mlx5_ifc_mkc_bits { u8 lw[0x1]; u8 lr[0x1]; u8 access_mode_1_0[0x2]; - u8 reserved_at_18[0x8]; + u8 reserved_at_18[0x2]; + u8 ma_translation_mode[0x2]; + u8 reserved_at_1c[0x4]; u8 qpn[0x18]; u8 mkey_7_0[0x8]; @@ -11134,7 +11138,8 @@ struct mlx5_ifc_dealloc_memic_out_bits { struct mlx5_ifc_umem_bits { u8 reserved_at_0[0x80]; - u8 reserved_at_80[0x1b]; + u8 ats[0x1]; + u8 reserved_at_81[0x1a]; u8 log_page_size[0x5]; u8 page_offset[0x20]; -- cgit v1.2.3 From 015bda8abd3a6a77656e60b36d499c43a2c0f0a1 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 1 Sep 2022 11:20:54 -0300 Subject: RDMA/core: Add UVERBS_ATTR_RAW_FD This uses the same passing protocol as UVERBS_ATTR_FD (eg len = 0 data_s64 = fd), except that the FD is not required to be a uverbs object and the core code does not covert the FD to an object handle automatically. Access to the int fd is provided by uverbs_get_raw_fd(). Link: https://lore.kernel.org/r/2-v1-bd147097458e+ede-umem_dmabuf_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/uverbs_ioctl.c | 8 ++++++++ include/rdma/uverbs_ioctl.h | 13 +++++++++++++ 2 files changed, 21 insertions(+) (limited to 'include') diff --git a/drivers/infiniband/core/uverbs_ioctl.c b/drivers/infiniband/core/uverbs_ioctl.c index 990f0724acc6..d9799706c58e 100644 --- a/drivers/infiniband/core/uverbs_ioctl.c +++ b/drivers/infiniband/core/uverbs_ioctl.c @@ -337,6 +337,14 @@ static int uverbs_process_attr(struct bundle_priv *pbundle, break; + case UVERBS_ATTR_TYPE_RAW_FD: + if (uattr->attr_data.reserved || uattr->len != 0 || + uattr->data_s64 < INT_MIN || uattr->data_s64 > INT_MAX) + return -EINVAL; + /* _uverbs_get_const_signed() is the accessor */ + e->ptr_attr.data = uattr->data_s64; + break; + case UVERBS_ATTR_TYPE_IDRS_ARRAY: return uverbs_process_idrs_array(pbundle, attr_uapi, &e->objs_arr_attr, uattr, diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 23bb404aba12..9d45a5b20316 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -24,6 +24,7 @@ enum uverbs_attr_type { UVERBS_ATTR_TYPE_PTR_OUT, UVERBS_ATTR_TYPE_IDR, UVERBS_ATTR_TYPE_FD, + UVERBS_ATTR_TYPE_RAW_FD, UVERBS_ATTR_TYPE_ENUM_IN, UVERBS_ATTR_TYPE_IDRS_ARRAY, }; @@ -521,6 +522,11 @@ struct uapi_definition { .u.obj.access = _access, \ __VA_ARGS__ } }) +#define UVERBS_ATTR_RAW_FD(_attr_id, ...) \ + (&(const struct uverbs_attr_def){ \ + .id = (_attr_id), \ + .attr = { .type = UVERBS_ATTR_TYPE_RAW_FD, __VA_ARGS__ } }) + #define UVERBS_ATTR_PTR_IN(_attr_id, _type, ...) \ (&(const struct uverbs_attr_def){ \ .id = _attr_id, \ @@ -999,4 +1005,11 @@ _uverbs_get_const_unsigned(u64 *to, uverbs_get_const_default_unsigned(_to, _attrs_bundle, _idx, \ _default)) +static inline int +uverbs_get_raw_fd(int *to, const struct uverbs_attr_bundle *attrs_bundle, + size_t idx) +{ + return uverbs_get_const_signed(to, attrs_bundle, idx); +} + #endif -- cgit v1.2.3 From 9af859c58d0f169ead0ed95204cdd891b0ee623a Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Thu, 1 Sep 2022 11:20:55 -0300 Subject: RDMA/mlx5: Add support for dmabuf to devx umem This is modeled after the similar EFA enablement in commit 66f4817b5712 ("RDMA/efa: Add support for dmabuf memory regions"). Like EFA there is no support for revocation so we simply call the ib_umem_dmabuf_get_pinned() to obtain a umem instead of the normal ib_umem_get(). Everything else stays the same. Link: https://lore.kernel.org/r/3-v1-bd147097458e+ede-umem_dmabuf_jgg@nvidia.com Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/devx.c | 24 +++++++++++++++++++++--- include/uapi/rdma/mlx5_user_ioctl_cmds.h | 1 + 2 files changed, 22 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c index adefff89fb39..a41e8d582f5b 100644 --- a/drivers/infiniband/hw/mlx5/devx.c +++ b/drivers/infiniband/hw/mlx5/devx.c @@ -2183,9 +2183,25 @@ static int devx_umem_get(struct mlx5_ib_dev *dev, struct ib_ucontext *ucontext, if (err) return err; - obj->umem = ib_umem_get(&dev->ib_dev, addr, size, access); - if (IS_ERR(obj->umem)) - return PTR_ERR(obj->umem); + if (uverbs_attr_is_valid(attrs, MLX5_IB_ATTR_DEVX_UMEM_REG_DMABUF_FD)) { + struct ib_umem_dmabuf *umem_dmabuf; + int dmabuf_fd; + + err = uverbs_get_raw_fd(&dmabuf_fd, attrs, + MLX5_IB_ATTR_DEVX_UMEM_REG_DMABUF_FD); + if (err) + return -EFAULT; + + umem_dmabuf = ib_umem_dmabuf_get_pinned( + &dev->ib_dev, addr, size, dmabuf_fd, access); + if (IS_ERR(umem_dmabuf)) + return PTR_ERR(umem_dmabuf); + obj->umem = &umem_dmabuf->umem; + } else { + obj->umem = ib_umem_get(&dev->ib_dev, addr, size, access); + if (IS_ERR(obj->umem)) + return PTR_ERR(obj->umem); + } return 0; } @@ -2835,6 +2851,8 @@ DECLARE_UVERBS_NAMED_METHOD( UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_LEN, UVERBS_ATTR_TYPE(u64), UA_MANDATORY), + UVERBS_ATTR_RAW_FD(MLX5_IB_ATTR_DEVX_UMEM_REG_DMABUF_FD, + UA_OPTIONAL), UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, enum ib_access_flags), UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_DEVX_UMEM_REG_PGSZ_BITMAP, diff --git a/include/uapi/rdma/mlx5_user_ioctl_cmds.h b/include/uapi/rdma/mlx5_user_ioctl_cmds.h index 3bee490eb585..595edad03dfe 100644 --- a/include/uapi/rdma/mlx5_user_ioctl_cmds.h +++ b/include/uapi/rdma/mlx5_user_ioctl_cmds.h @@ -174,6 +174,7 @@ enum mlx5_ib_devx_umem_reg_attrs { MLX5_IB_ATTR_DEVX_UMEM_REG_ACCESS, MLX5_IB_ATTR_DEVX_UMEM_REG_OUT_ID, MLX5_IB_ATTR_DEVX_UMEM_REG_PGSZ_BITMAP, + MLX5_IB_ATTR_DEVX_UMEM_REG_DMABUF_FD, }; enum mlx5_ib_devx_umem_dereg_attrs { -- cgit v1.2.3 From 6c5e683925cf19d36033f3e9e9d90755f034614e Mon Sep 17 00:00:00 2001 From: Bob Pearson Date: Tue, 13 Sep 2022 17:27:17 -0500 Subject: RDMA/rxe: Remove redundant num_sge fields In include/uapi/rdma/rdma_user_rxe.h there are redundant copies of num_sge in the rxe_send_wr, rxe_recv_wqe, and rxe_dma_info. Only the ones in rxe_dma_info are actually used by the rxe kernel driver. The userspace would set these values, but the kernel never read them. This change has no affect on the current ABI and new or old versions of rdma-core operate correctly with new or old versions of the kernel rxe driver. Link: https://lore.kernel.org/r/20220913222716.18335-1-rpearsonhpe@gmail.com Signed-off-by: Bob Pearson Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_verbs.c | 2 -- include/uapi/rdma/rdma_user_rxe.h | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 3d37216609e4..88825edc7dce 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -262,7 +262,6 @@ static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) recv_wqe = queue_producer_addr(rq->queue, QUEUE_TYPE_TO_DRIVER); recv_wqe->wr_id = ibwr->wr_id; - recv_wqe->num_sge = num_sge; memcpy(recv_wqe->dma.sge, ibwr->sg_list, num_sge * sizeof(struct ib_sge)); @@ -526,7 +525,6 @@ static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, const struct ib_send_wr *ibwr) { wr->wr_id = ibwr->wr_id; - wr->num_sge = ibwr->num_sge; wr->opcode = ibwr->opcode; wr->send_flags = ibwr->send_flags; diff --git a/include/uapi/rdma/rdma_user_rxe.h b/include/uapi/rdma/rdma_user_rxe.h index f09c5c9e3dd5..73f679dfd2df 100644 --- a/include/uapi/rdma/rdma_user_rxe.h +++ b/include/uapi/rdma/rdma_user_rxe.h @@ -74,7 +74,7 @@ struct rxe_av { struct rxe_send_wr { __aligned_u64 wr_id; - __u32 num_sge; + __u32 reserved; __u32 opcode; __u32 send_flags; union { @@ -166,7 +166,7 @@ struct rxe_send_wqe { struct rxe_recv_wqe { __aligned_u64 wr_id; - __u32 num_sge; + __u32 reserved; __u32 padding; struct rxe_dma_info dma; }; -- cgit v1.2.3