From e549f6f9c098067a99e9de8ac84f5cc2c07ae5c6 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Thu, 22 Feb 2018 11:57:10 -0600 Subject: net/dcb: Add dcbnl buffer attribute In this patch, we add dcbnl buffer attribute to allow user change the NIC's buffer configuration such as priority to buffer mapping and buffer size of individual buffer. This attribute combined with pfc attribute allows advanced user to fine tune the qos setting for specific priority queue. For example, user can give dedicated buffer for one or more priorities or user can give large buffer to certain priorities. The dcb buffer configuration will be controlled by lldptool. lldptool -T -i eth2 -V BUFFER prio 0,2,5,7,1,2,3,6 maps priorities 0,1,2,3,4,5,6,7 to receive buffer 0,2,5,7,1,2,3,6 lldptool -T -i eth2 -V BUFFER size 87296,87296,0,87296,0,0,0,0 sets receive buffer size for buffer 0,1,2,3,4,5,6,7 respectively After discussion on mailing list with Jakub, Jiri, Ido and John, we agreed to choose dcbnl over devlink interface since this feature is intended to set port attributes which are governed by the netdev instance of that port, where devlink API is more suitable for global ASIC configurations. We present an use case scenario where dcbnl buffer attribute configured by advance user helps reduce the latency of messages of different sizes. Scenarios description: On ConnectX-5, we run latency sensitive traffic with small/medium message sizes ranging from 64B to 256KB and bandwidth sensitive traffic with large messages sizes 512KB and 1MB. We group small, medium, and large message sizes to their own pfc enables priorities as follow. Priorities 1 & 2 (64B, 256B and 1KB) Priorities 3 & 4 (4KB, 8KB, 16KB, 64KB, 128KB and 256KB) Priorities 5 & 6 (512KB and 1MB) By default, ConnectX-5 maps all pfc enabled priorities to a single lossless fixed buffer size of 50% of total available buffer space. The other 50% is assigned to lossy buffer. Using dcbnl buffer attribute, we create three equal size lossless buffers. Each buffer has 25% of total available buffer space. Thus, the lossy buffer size reduces to 25%. Priority to lossless buffer mappings are set as follow. Priorities 1 & 2 on lossless buffer #1 Priorities 3 & 4 on lossless buffer #2 Priorities 5 & 6 on lossless buffer #3 We observe improvements in latency for small and medium message sizes as follows. Please note that the large message sizes bandwidth performance is reduced but the total bandwidth remains the same. 256B message size (42 % latency reduction) 4K message size (21% latency reduction) 64K message size (16% latency reduction) CC: Ido Schimmel CC: Jakub Kicinski CC: Jiri Pirko CC: Or Gerlitz CC: Parav Pandit CC: Aron Silverton Signed-off-by: Huy Nguyen Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- include/net/dcbnl.h | 4 ++++ include/uapi/linux/dcbnl.h | 11 +++++++++++ 2 files changed, 15 insertions(+) (limited to 'include') diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h index 207d9ba1f92c..0e5e91be2d30 100644 --- a/include/net/dcbnl.h +++ b/include/net/dcbnl.h @@ -101,6 +101,10 @@ struct dcbnl_rtnl_ops { /* CEE peer */ int (*cee_peer_getpg) (struct net_device *, struct cee_pg *); int (*cee_peer_getpfc) (struct net_device *, struct cee_pfc *); + + /* buffer settings */ + int (*dcbnl_getbuffer)(struct net_device *, struct dcbnl_buffer *); + int (*dcbnl_setbuffer)(struct net_device *, struct dcbnl_buffer *); }; #endif /* __NET_DCBNL_H__ */ diff --git a/include/uapi/linux/dcbnl.h b/include/uapi/linux/dcbnl.h index 2c0c6453c3f4..60aa2e446698 100644 --- a/include/uapi/linux/dcbnl.h +++ b/include/uapi/linux/dcbnl.h @@ -163,6 +163,16 @@ struct ieee_pfc { __u64 indications[IEEE_8021QAZ_MAX_TCS]; }; +#define IEEE_8021Q_MAX_PRIORITIES 8 +#define DCBX_MAX_BUFFERS 8 +struct dcbnl_buffer { + /* priority to buffer mapping */ + __u8 prio2buffer[IEEE_8021Q_MAX_PRIORITIES]; + /* buffer size in Bytes */ + __u32 buffer_size[DCBX_MAX_BUFFERS]; + __u32 total_size; +}; + /* CEE DCBX std supported values */ #define CEE_DCBX_MAX_PGS 8 #define CEE_DCBX_MAX_PRIO 8 @@ -406,6 +416,7 @@ enum ieee_attrs { DCB_ATTR_IEEE_MAXRATE, DCB_ATTR_IEEE_QCN, DCB_ATTR_IEEE_QCN_STATS, + DCB_ATTR_DCB_BUFFER, __DCB_ATTR_IEEE_MAX }; #define DCB_ATTR_IEEE_MAX (__DCB_ATTR_IEEE_MAX - 1) -- cgit v1.2.3 From df5f1361cc080877013f7838b61d31ad31307c2b Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Wed, 28 Feb 2018 14:16:47 -0600 Subject: net/mlx5: Add pbmc and pptb in the port_access_reg_cap_mask Add pbmc and pptb in the port_access_reg_cap_mask. These two bits determine if device supports receive buffer configuration. Signed-off-by: Huy Nguyen Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 3 +++ include/linux/mlx5/mlx5_ifc.h | 12 ++++++++++++ 2 files changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 2bc27f8c5b87..db0332a6d23c 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1152,6 +1152,9 @@ enum mlx5_qcam_feature_groups { #define MLX5_CAP_PCAM_FEATURE(mdev, fld) \ MLX5_GET(pcam_reg, (mdev)->caps.pcam, feature_cap_mask.enhanced_features.fld) +#define MLX5_CAP_PCAM_REG(mdev, reg) \ + MLX5_GET(pcam_reg, (mdev)->caps.pcam, port_access_reg_cap_mask.regs_5000_to_507f.reg) + #define MLX5_CAP_MCAM_REG(mdev, reg) \ MLX5_GET(mcam_reg, (mdev)->caps.mcam, mng_access_reg_cap_mask.access_regs.reg) diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index b4ea8a9914c4..f687989d336b 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -8003,6 +8003,17 @@ struct mlx5_ifc_pcam_enhanced_features_bits { u8 ppcnt_statistical_group[0x1]; }; +struct mlx5_ifc_pcam_regs_5000_to_507f_bits { + u8 port_access_reg_cap_mask_127_to_96[0x20]; + u8 port_access_reg_cap_mask_95_to_64[0x20]; + u8 port_access_reg_cap_mask_63_to_32[0x20]; + + u8 port_access_reg_cap_mask_31_to_13[0x13]; + u8 pbmc[0x1]; + u8 pptb[0x1]; + u8 port_access_reg_cap_mask_10_to_0[0xb]; +}; + struct mlx5_ifc_pcam_reg_bits { u8 reserved_at_0[0x8]; u8 feature_group[0x8]; @@ -8012,6 +8023,7 @@ struct mlx5_ifc_pcam_reg_bits { u8 reserved_at_20[0x20]; union { + struct mlx5_ifc_pcam_regs_5000_to_507f_bits regs_5000_to_507f; u8 reserved_at_0[0x80]; } port_access_reg_cap_mask; -- cgit v1.2.3 From 50b4a3c23646254c7345f3663ff1e0a6cbcd9abb Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Fri, 2 Mar 2018 15:47:01 -0600 Subject: net/mlx5: PPTB and PBMC register firmware command support Add firmware command interface to read and write PPTB and PBMC registers. PPTB register enables mappings priority to a specific receive buffer. PBMC registers enables changing the receive buffer's configuration such as buffer size, xon/xoff thresholds, buffer's lossy property and buffer's shared property. Signed-off-by: Huy Nguyen Reviewed-by: Parav Pandit Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/port.c | 108 ++++++++++++++++++++++ drivers/net/ethernet/mellanox/mlx5/core/en/port.h | 5 + include/linux/mlx5/driver.h | 2 + include/linux/mlx5/mlx5_ifc.h | 35 +++++++ 4 files changed, 150 insertions(+) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c index 9f04542f3661..24e3b564964f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.c @@ -127,3 +127,111 @@ u32 mlx5e_port_speed2linkmodes(u32 speed) return link_modes; } + +int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out) +{ + int sz = MLX5_ST_SZ_BYTES(pbmc_reg); + void *in; + int err; + + in = kzalloc(sz, GFP_KERNEL); + if (!in) + return -ENOMEM; + + MLX5_SET(pbmc_reg, in, local_port, 1); + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PBMC, 0, 0); + + kfree(in); + return err; +} + +int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in) +{ + int sz = MLX5_ST_SZ_BYTES(pbmc_reg); + void *out; + int err; + + out = kzalloc(sz, GFP_KERNEL); + if (!out) + return -ENOMEM; + + MLX5_SET(pbmc_reg, in, local_port, 1); + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PBMC, 0, 1); + + kfree(out); + return err; +} + +/* buffer[i]: buffer that priority i mapped to */ +int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer) +{ + int sz = MLX5_ST_SZ_BYTES(pptb_reg); + u32 prio_x_buff; + void *out; + void *in; + int prio; + int err; + + in = kzalloc(sz, GFP_KERNEL); + out = kzalloc(sz, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + MLX5_SET(pptb_reg, in, local_port, 1); + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPTB, 0, 0); + if (err) + goto out; + + prio_x_buff = MLX5_GET(pptb_reg, out, prio_x_buff); + for (prio = 0; prio < 8; prio++) { + buffer[prio] = (u8)(prio_x_buff >> (4 * prio)) & 0xF; + mlx5_core_dbg(mdev, "prio %d, buffer %d\n", prio, buffer[prio]); + } +out: + kfree(in); + kfree(out); + return err; +} + +int mlx5e_port_set_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer) +{ + int sz = MLX5_ST_SZ_BYTES(pptb_reg); + u32 prio_x_buff; + void *out; + void *in; + int prio; + int err; + + in = kzalloc(sz, GFP_KERNEL); + out = kzalloc(sz, GFP_KERNEL); + if (!in || !out) { + err = -ENOMEM; + goto out; + } + + /* First query the pptb register */ + MLX5_SET(pptb_reg, in, local_port, 1); + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPTB, 0, 0); + if (err) + goto out; + + memcpy(in, out, sz); + MLX5_SET(pptb_reg, in, local_port, 1); + + /* Update the pm and prio_x_buff */ + MLX5_SET(pptb_reg, in, pm, 0xFF); + + prio_x_buff = 0; + for (prio = 0; prio < 8; prio++) + prio_x_buff |= (buffer[prio] << (4 * prio)); + MLX5_SET(pptb_reg, in, prio_x_buff, prio_x_buff); + + err = mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_PPTB, 0, 1); + +out: + kfree(in); + kfree(out); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h index 7aae38e98a65..f8cbd8194179 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/port.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/port.h @@ -40,4 +40,9 @@ u32 mlx5e_port_ptys2speed(u32 eth_proto_oper); int mlx5e_port_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); int mlx5e_port_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); u32 mlx5e_port_speed2linkmodes(u32 speed); + +int mlx5e_port_query_pbmc(struct mlx5_core_dev *mdev, void *out); +int mlx5e_port_set_pbmc(struct mlx5_core_dev *mdev, void *in); +int mlx5e_port_query_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer); +int mlx5e_port_set_priority2buffer(struct mlx5_core_dev *mdev, u8 *buffer); #endif diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index d703774982ca..92d292454351 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -124,6 +124,8 @@ enum { MLX5_REG_PAOS = 0x5006, MLX5_REG_PFCC = 0x5007, MLX5_REG_PPCNT = 0x5008, + MLX5_REG_PPTB = 0x500b, + MLX5_REG_PBMC = 0x500c, MLX5_REG_PMAOS = 0x5012, MLX5_REG_PUDE = 0x5009, MLX5_REG_PMPE = 0x5010, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f687989d336b..edbddeaacc88 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -8788,6 +8788,41 @@ struct mlx5_ifc_qpts_reg_bits { u8 trust_state[0x3]; }; +struct mlx5_ifc_pptb_reg_bits { + u8 reserved_at_0[0x2]; + u8 mm[0x2]; + u8 reserved_at_4[0x4]; + u8 local_port[0x8]; + u8 reserved_at_10[0x6]; + u8 cm[0x1]; + u8 um[0x1]; + u8 pm[0x8]; + + u8 prio_x_buff[0x20]; + + u8 pm_msb[0x8]; + u8 reserved_at_48[0x10]; + u8 ctrl_buff[0x4]; + u8 untagged_buff[0x4]; +}; + +struct mlx5_ifc_pbmc_reg_bits { + u8 reserved_at_0[0x8]; + u8 local_port[0x8]; + u8 reserved_at_10[0x10]; + + u8 xoff_timer_value[0x10]; + u8 xoff_refresh[0x10]; + + u8 reserved_at_40[0x9]; + u8 fullness_threshold[0x7]; + u8 port_buffer_size[0x10]; + + struct mlx5_ifc_bufferx_reg_bits buffer[10]; + + u8 reserved_at_2e0[0x40]; +}; + struct mlx5_ifc_qtct_reg_bits { u8 reserved_at_0[0x8]; u8 port_number[0x8]; -- cgit v1.2.3