diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx5/main.c')
| -rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 511 | 
1 files changed, 441 insertions, 70 deletions
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 69716a7ea993..e52dd21519b4 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -982,13 +982,21 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,  	}  	if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) { -		resp.cqe_comp_caps.max_num = -			MLX5_CAP_GEN(dev->mdev, cqe_compression) ? -			MLX5_CAP_GEN(dev->mdev, cqe_compression_max_num) : 0; -		resp.cqe_comp_caps.supported_format = -			MLX5_IB_CQE_RES_FORMAT_HASH | -			MLX5_IB_CQE_RES_FORMAT_CSUM;  		resp.response_length += sizeof(resp.cqe_comp_caps); + +		if (MLX5_CAP_GEN(dev->mdev, cqe_compression)) { +			resp.cqe_comp_caps.max_num = +				MLX5_CAP_GEN(dev->mdev, +					     cqe_compression_max_num); + +			resp.cqe_comp_caps.supported_format = +				MLX5_IB_CQE_RES_FORMAT_HASH | +				MLX5_IB_CQE_RES_FORMAT_CSUM; + +			if (MLX5_CAP_GEN(dev->mdev, mini_cqe_resp_stride_index)) +				resp.cqe_comp_caps.supported_format |= +					MLX5_IB_CQE_RES_FORMAT_CSUM_STRIDX; +		}  	}  	if (field_avail(typeof(resp), packet_pacing_caps, uhw->outlen) && @@ -1084,6 +1092,14 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,  		if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre))  			resp.tunnel_offloads_caps |=  				MLX5_IB_TUNNELED_OFFLOADS_GRE; +		if (MLX5_CAP_GEN(mdev, flex_parser_protocols) & +		    MLX5_FLEX_PROTO_CW_MPLS_GRE) +			resp.tunnel_offloads_caps |= +				MLX5_IB_TUNNELED_OFFLOADS_MPLS_GRE; +		if (MLX5_CAP_GEN(mdev, flex_parser_protocols) & +		    MLX5_FLEX_PROTO_CW_MPLS_UDP) +			resp.tunnel_offloads_caps |= +				MLX5_IB_TUNNELED_OFFLOADS_MPLS_UDP;  	}  	if (uhw->outlen) { @@ -1953,49 +1969,15 @@ static int mlx5_ib_set_vma_data(struct vm_area_struct *vma,  static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)  { -	int ret;  	struct vm_area_struct *vma;  	struct mlx5_ib_vma_private_data *vma_private, *n;  	struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); -	struct task_struct *owning_process  = NULL; -	struct mm_struct   *owning_mm       = NULL; -	owning_process = get_pid_task(ibcontext->tgid, PIDTYPE_PID); -	if (!owning_process) -		return; - -	owning_mm = get_task_mm(owning_process); -	if (!owning_mm) { -		pr_info("no mm, disassociate ucontext is pending task termination\n"); -		while (1) { -			put_task_struct(owning_process); -			usleep_range(1000, 2000); -			owning_process = get_pid_task(ibcontext->tgid, -						      PIDTYPE_PID); -			if (!owning_process || -			    owning_process->state == TASK_DEAD) { -				pr_info("disassociate ucontext done, task was terminated\n"); -				/* in case task was dead need to release the -				 * task struct. -				 */ -				if (owning_process) -					put_task_struct(owning_process); -				return; -			} -		} -	} - -	/* need to protect from a race on closing the vma as part of -	 * mlx5_ib_vma_close. -	 */ -	down_write(&owning_mm->mmap_sem);  	mutex_lock(&context->vma_private_list_mutex);  	list_for_each_entry_safe(vma_private, n, &context->vma_private_list,  				 list) {  		vma = vma_private->vma; -		ret = zap_vma_ptes(vma, vma->vm_start, -				   PAGE_SIZE); -		WARN_ONCE(ret, "%s: zap_vma_ptes failed", __func__); +		zap_vma_ptes(vma, vma->vm_start, PAGE_SIZE);  		/* context going to be destroyed, should  		 * not access ops any more.  		 */ @@ -2005,9 +1987,6 @@ static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)  		kfree(vma_private);  	}  	mutex_unlock(&context->vma_private_list_mutex); -	up_write(&owning_mm->mmap_sem); -	mmput(owning_mm); -	put_task_struct(owning_process);  }  static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd) @@ -2051,10 +2030,6 @@ static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev,  	if (err)  		return err; -	mlx5_ib_dbg(dev, "mapped clock info at 0x%lx, PA 0x%llx\n", -		    vma->vm_start, -		    (unsigned long long)pfn << PAGE_SHIFT); -  	return mlx5_ib_set_vma_data(vma, context);  } @@ -2149,15 +2124,14 @@ static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,  	err = io_remap_pfn_range(vma, vma->vm_start, pfn,  				 PAGE_SIZE, vma->vm_page_prot);  	if (err) { -		mlx5_ib_err(dev, "io_remap_pfn_range failed with error=%d, vm_start=0x%lx, pfn=%pa, mmap_cmd=%s\n", -			    err, vma->vm_start, &pfn, mmap_cmd2str(cmd)); +		mlx5_ib_err(dev, +			    "io_remap_pfn_range failed with error=%d, mmap_cmd=%s\n", +			    err, mmap_cmd2str(cmd));  		err = -EAGAIN;  		goto err;  	}  	pa = pfn << PAGE_SHIFT; -	mlx5_ib_dbg(dev, "mapped %s at 0x%lx, PA %pa\n", mmap_cmd2str(cmd), -		    vma->vm_start, &pa);  	err = mlx5_ib_set_vma_data(vma, context);  	if (err) @@ -2243,10 +2217,6 @@ static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vm  		if (io_remap_pfn_range(vma, vma->vm_start, pfn,  				       PAGE_SIZE, vma->vm_page_prot))  			return -EAGAIN; - -		mlx5_ib_dbg(dev, "mapped internal timer at 0x%lx, PA 0x%llx\n", -			    vma->vm_start, -			    (unsigned long long)pfn << PAGE_SHIFT);  		break;  	case MLX5_IB_MMAP_CLOCK_INFO:  		return mlx5_ib_mmap_clock_info_page(dev, vma, context); @@ -2386,7 +2356,8 @@ static int mlx5_ib_dealloc_pd(struct ib_pd *pd)  enum {  	MATCH_CRITERIA_ENABLE_OUTER_BIT,  	MATCH_CRITERIA_ENABLE_MISC_BIT, -	MATCH_CRITERIA_ENABLE_INNER_BIT +	MATCH_CRITERIA_ENABLE_INNER_BIT, +	MATCH_CRITERIA_ENABLE_MISC2_BIT  };  #define HEADER_IS_ZERO(match_criteria, headers)			           \ @@ -2406,6 +2377,9 @@ static u8 get_match_criteria_enable(u32 *match_criteria)  	match_criteria_enable |=  		(!HEADER_IS_ZERO(match_criteria, inner_headers)) <<  		MATCH_CRITERIA_ENABLE_INNER_BIT; +	match_criteria_enable |= +		(!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) << +		MATCH_CRITERIA_ENABLE_MISC2_BIT;  	return match_criteria_enable;  } @@ -2440,6 +2414,27 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)  	MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2);  } +static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask) +{ +	if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) && +	    !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL)) +		return -EOPNOTSUPP; + +	if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) && +	    !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP)) +		return -EOPNOTSUPP; + +	if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) && +	    !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS)) +		return -EOPNOTSUPP; + +	if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) && +	    !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL)) +		return -EOPNOTSUPP; + +	return 0; +} +  #define LAST_ETH_FIELD vlan_tag  #define LAST_IB_FIELD sl  #define LAST_IPV4_FIELD tos @@ -2448,6 +2443,7 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)  #define LAST_TUNNEL_FIELD tunnel_id  #define LAST_FLOW_TAG_FIELD tag_id  #define LAST_DROP_FIELD size +#define LAST_COUNTERS_FIELD counters  /* Field is the last supported field */  #define FIELDS_NOT_SUPPORTED(filter, field)\ @@ -2479,12 +2475,16 @@ static int parse_flow_flow_action(const union ib_flow_spec *ib_spec,  static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,  			   u32 *match_v, const union ib_flow_spec *ib_spec,  			   const struct ib_flow_attr *flow_attr, -			   struct mlx5_flow_act *action) +			   struct mlx5_flow_act *action, u32 prev_type)  {  	void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,  					   misc_parameters);  	void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,  					   misc_parameters); +	void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c, +					    misc_parameters_2); +	void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v, +					    misc_parameters_2);  	void *headers_c;  	void *headers_v;  	int match_ipv; @@ -2689,6 +2689,93 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,  		MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,  			 ntohs(ib_spec->tcp_udp.val.dst_port));  		break; +	case IB_FLOW_SPEC_GRE: +		if (ib_spec->gre.mask.c_ks_res0_ver) +			return -EOPNOTSUPP; + +		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, +			 0xff); +		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, +			 IPPROTO_GRE); + +		MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol, +			 0xffff); +		MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol, +			 ntohs(ib_spec->gre.val.protocol)); + +		memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c, +				    gre_key_h), +		       &ib_spec->gre.mask.key, +		       sizeof(ib_spec->gre.mask.key)); +		memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v, +				    gre_key_h), +		       &ib_spec->gre.val.key, +		       sizeof(ib_spec->gre.val.key)); +		break; +	case IB_FLOW_SPEC_MPLS: +		switch (prev_type) { +		case IB_FLOW_SPEC_UDP: +			if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, +						   ft_field_support.outer_first_mpls_over_udp), +						   &ib_spec->mpls.mask.tag)) +				return -EOPNOTSUPP; + +			memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, +					    outer_first_mpls_over_udp), +			       &ib_spec->mpls.val.tag, +			       sizeof(ib_spec->mpls.val.tag)); +			memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, +					    outer_first_mpls_over_udp), +			       &ib_spec->mpls.mask.tag, +			       sizeof(ib_spec->mpls.mask.tag)); +			break; +		case IB_FLOW_SPEC_GRE: +			if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, +						   ft_field_support.outer_first_mpls_over_gre), +						   &ib_spec->mpls.mask.tag)) +				return -EOPNOTSUPP; + +			memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, +					    outer_first_mpls_over_gre), +			       &ib_spec->mpls.val.tag, +			       sizeof(ib_spec->mpls.val.tag)); +			memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, +					    outer_first_mpls_over_gre), +			       &ib_spec->mpls.mask.tag, +			       sizeof(ib_spec->mpls.mask.tag)); +			break; +		default: +			if (ib_spec->type & IB_FLOW_SPEC_INNER) { +				if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, +							   ft_field_support.inner_first_mpls), +							   &ib_spec->mpls.mask.tag)) +					return -EOPNOTSUPP; + +				memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, +						    inner_first_mpls), +				       &ib_spec->mpls.val.tag, +				       sizeof(ib_spec->mpls.val.tag)); +				memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, +						    inner_first_mpls), +				       &ib_spec->mpls.mask.tag, +				       sizeof(ib_spec->mpls.mask.tag)); +			} else { +				if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, +							   ft_field_support.outer_first_mpls), +							   &ib_spec->mpls.mask.tag)) +					return -EOPNOTSUPP; + +				memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, +						    outer_first_mpls), +				       &ib_spec->mpls.val.tag, +				       sizeof(ib_spec->mpls.val.tag)); +				memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, +						    outer_first_mpls), +				       &ib_spec->mpls.mask.tag, +				       sizeof(ib_spec->mpls.mask.tag)); +			} +		} +		break;  	case IB_FLOW_SPEC_VXLAN_TUNNEL:  		if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,  					 LAST_TUNNEL_FIELD)) @@ -2720,6 +2807,18 @@ static int parse_flow_attr(struct mlx5_core_dev *mdev, u32 *match_c,  		if (ret)  			return ret;  		break; +	case IB_FLOW_SPEC_ACTION_COUNT: +		if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count, +					 LAST_COUNTERS_FIELD)) +			return -EOPNOTSUPP; + +		/* for now support only one counters spec per flow */ +		if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) +			return -EINVAL; + +		action->counters = ib_spec->flow_count.counters; +		action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; +		break;  	default:  		return -EINVAL;  	} @@ -2867,6 +2966,17 @@ static void put_flow_table(struct mlx5_ib_dev *dev,  	}  } +static void counters_clear_description(struct ib_counters *counters) +{ +	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); + +	mutex_lock(&mcounters->mcntrs_mutex); +	kfree(mcounters->counters_data); +	mcounters->counters_data = NULL; +	mcounters->cntrs_max_index = 0; +	mutex_unlock(&mcounters->mcntrs_mutex); +} +  static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)  {  	struct mlx5_ib_dev *dev = to_mdev(flow_id->qp->device); @@ -2886,8 +2996,11 @@ static int mlx5_ib_destroy_flow(struct ib_flow *flow_id)  	mlx5_del_flow_rules(handler->rule);  	put_flow_table(dev, handler->prio, true); -	mutex_unlock(&dev->flow_db->lock); +	if (handler->ibcounters && +	    atomic_read(&handler->ibcounters->usecnt) == 1) +		counters_clear_description(handler->ibcounters); +	mutex_unlock(&dev->flow_db->lock);  	kfree(handler);  	return 0; @@ -3007,21 +3120,143 @@ static void set_underlay_qp(struct mlx5_ib_dev *dev,  	}  } +static int read_flow_counters(struct ib_device *ibdev, +			      struct mlx5_read_counters_attr *read_attr) +{ +	struct mlx5_fc *fc = read_attr->hw_cntrs_hndl; +	struct mlx5_ib_dev *dev = to_mdev(ibdev); + +	return mlx5_fc_query(dev->mdev, fc, +			     &read_attr->out[IB_COUNTER_PACKETS], +			     &read_attr->out[IB_COUNTER_BYTES]); +} + +/* flow counters currently expose two counters packets and bytes */ +#define FLOW_COUNTERS_NUM 2 +static int counters_set_description(struct ib_counters *counters, +				    enum mlx5_ib_counters_type counters_type, +				    struct mlx5_ib_flow_counters_desc *desc_data, +				    u32 ncounters) +{ +	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); +	u32 cntrs_max_index = 0; +	int i; + +	if (counters_type != MLX5_IB_COUNTERS_FLOW) +		return -EINVAL; + +	/* init the fields for the object */ +	mcounters->type = counters_type; +	mcounters->read_counters = read_flow_counters; +	mcounters->counters_num = FLOW_COUNTERS_NUM; +	mcounters->ncounters = ncounters; +	/* each counter entry have both description and index pair */ +	for (i = 0; i < ncounters; i++) { +		if (desc_data[i].description > IB_COUNTER_BYTES) +			return -EINVAL; + +		if (cntrs_max_index <= desc_data[i].index) +			cntrs_max_index = desc_data[i].index + 1; +	} + +	mutex_lock(&mcounters->mcntrs_mutex); +	mcounters->counters_data = desc_data; +	mcounters->cntrs_max_index = cntrs_max_index; +	mutex_unlock(&mcounters->mcntrs_mutex); + +	return 0; +} + +#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2)) +static int flow_counters_set_data(struct ib_counters *ibcounters, +				  struct mlx5_ib_create_flow *ucmd) +{ +	struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters); +	struct mlx5_ib_flow_counters_data *cntrs_data = NULL; +	struct mlx5_ib_flow_counters_desc *desc_data = NULL; +	bool hw_hndl = false; +	int ret = 0; + +	if (ucmd && ucmd->ncounters_data != 0) { +		cntrs_data = ucmd->data; +		if (cntrs_data->ncounters > MAX_COUNTERS_NUM) +			return -EINVAL; + +		desc_data = kcalloc(cntrs_data->ncounters, +				    sizeof(*desc_data), +				    GFP_KERNEL); +		if (!desc_data) +			return  -ENOMEM; + +		if (copy_from_user(desc_data, +				   u64_to_user_ptr(cntrs_data->counters_data), +				   sizeof(*desc_data) * cntrs_data->ncounters)) { +			ret = -EFAULT; +			goto free; +		} +	} + +	if (!mcounters->hw_cntrs_hndl) { +		mcounters->hw_cntrs_hndl = mlx5_fc_create( +			to_mdev(ibcounters->device)->mdev, false); +		if (!mcounters->hw_cntrs_hndl) { +			ret = -ENOMEM; +			goto free; +		} +		hw_hndl = true; +	} + +	if (desc_data) { +		/* counters already bound to at least one flow */ +		if (mcounters->cntrs_max_index) { +			ret = -EINVAL; +			goto free_hndl; +		} + +		ret = counters_set_description(ibcounters, +					       MLX5_IB_COUNTERS_FLOW, +					       desc_data, +					       cntrs_data->ncounters); +		if (ret) +			goto free_hndl; + +	} else if (!mcounters->cntrs_max_index) { +		/* counters not bound yet, must have udata passed */ +		ret = -EINVAL; +		goto free_hndl; +	} + +	return 0; + +free_hndl: +	if (hw_hndl) { +		mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev, +				mcounters->hw_cntrs_hndl); +		mcounters->hw_cntrs_hndl = NULL; +	} +free: +	kfree(desc_data); +	return ret; +} +  static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,  						      struct mlx5_ib_flow_prio *ft_prio,  						      const struct ib_flow_attr *flow_attr,  						      struct mlx5_flow_destination *dst, -						      u32 underlay_qpn) +						      u32 underlay_qpn, +						      struct mlx5_ib_create_flow *ucmd)  {  	struct mlx5_flow_table	*ft = ft_prio->flow_table;  	struct mlx5_ib_flow_handler *handler;  	struct mlx5_flow_act flow_act = {.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG};  	struct mlx5_flow_spec *spec; -	struct mlx5_flow_destination *rule_dst = dst; +	struct mlx5_flow_destination dest_arr[2] = {}; +	struct mlx5_flow_destination *rule_dst = dest_arr;  	const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);  	unsigned int spec_index; +	u32 prev_type = 0;  	int err = 0; -	int dest_num = 1; +	int dest_num = 0;  	bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS;  	if (!is_valid_attr(dev->mdev, flow_attr)) @@ -3035,14 +3270,20 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,  	}  	INIT_LIST_HEAD(&handler->list); +	if (dst) { +		memcpy(&dest_arr[0], dst, sizeof(*dst)); +		dest_num++; +	}  	for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {  		err = parse_flow_attr(dev->mdev, spec->match_criteria,  				      spec->match_value, -				      ib_flow, flow_attr, &flow_act); +				      ib_flow, flow_attr, &flow_act, +				      prev_type);  		if (err < 0)  			goto free; +		prev_type = ((union ib_flow_spec *)ib_flow)->type;  		ib_flow += ((union ib_flow_spec *)ib_flow)->size;  	} @@ -3069,15 +3310,30 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,  		goto free;  	} +	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { +		err = flow_counters_set_data(flow_act.counters, ucmd); +		if (err) +			goto free; + +		handler->ibcounters = flow_act.counters; +		dest_arr[dest_num].type = +			MLX5_FLOW_DESTINATION_TYPE_COUNTER; +		dest_arr[dest_num].counter = +			to_mcounters(flow_act.counters)->hw_cntrs_hndl; +		dest_num++; +	} +  	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) { -		rule_dst = NULL; -		dest_num = 0; +		if (!(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT)) { +			rule_dst = NULL; +			dest_num = 0; +		}  	} else {  		if (is_egress)  			flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;  		else  			flow_act.action |= -				dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST : +				dest_num ?  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :  					MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;  	} @@ -3103,8 +3359,12 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,  	ft_prio->flow_table = ft;  free: -	if (err) +	if (err && handler) { +		if (handler->ibcounters && +		    atomic_read(&handler->ibcounters->usecnt) == 1) +			counters_clear_description(handler->ibcounters);  		kfree(handler); +	}  	kvfree(spec);  	return err ? ERR_PTR(err) : handler;  } @@ -3114,7 +3374,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,  						     const struct ib_flow_attr *flow_attr,  						     struct mlx5_flow_destination *dst)  { -	return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0); +	return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL);  }  static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev, @@ -3244,7 +3504,8 @@ err:  static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,  					   struct ib_flow_attr *flow_attr, -					   int domain) +					   int domain, +					   struct ib_udata *udata)  {  	struct mlx5_ib_dev *dev = to_mdev(qp->device);  	struct mlx5_ib_qp *mqp = to_mqp(qp); @@ -3253,9 +3514,44 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,  	struct mlx5_ib_flow_prio *ft_prio_tx = NULL;  	struct mlx5_ib_flow_prio *ft_prio;  	bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; +	struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr; +	size_t min_ucmd_sz, required_ucmd_sz;  	int err;  	int underlay_qpn; +	if (udata && udata->inlen) { +		min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) + +				sizeof(ucmd_hdr.reserved); +		if (udata->inlen < min_ucmd_sz) +			return ERR_PTR(-EOPNOTSUPP); + +		err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz); +		if (err) +			return ERR_PTR(err); + +		/* currently supports only one counters data */ +		if (ucmd_hdr.ncounters_data > 1) +			return ERR_PTR(-EINVAL); + +		required_ucmd_sz = min_ucmd_sz + +			sizeof(struct mlx5_ib_flow_counters_data) * +			ucmd_hdr.ncounters_data; +		if (udata->inlen > required_ucmd_sz && +		    !ib_is_udata_cleared(udata, required_ucmd_sz, +					 udata->inlen - required_ucmd_sz)) +			return ERR_PTR(-EOPNOTSUPP); + +		ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL); +		if (!ucmd) +			return ERR_PTR(-ENOMEM); + +		err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz); +		if (err) { +			kfree(ucmd); +			return ERR_PTR(err); +		} +	} +  	if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO)  		return ERR_PTR(-ENOMEM); @@ -3309,7 +3605,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,  			underlay_qpn = (mqp->flags & MLX5_IB_QP_UNDERLAY) ?  					mqp->underlay_qpn : 0;  			handler = _create_flow_rule(dev, ft_prio, flow_attr, -						    dst, underlay_qpn); +						    dst, underlay_qpn, ucmd);  		}  	} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||  		   flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { @@ -3330,6 +3626,7 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,  	mutex_unlock(&dev->flow_db->lock);  	kfree(dst); +	kfree(ucmd);  	return &handler->ibflow; @@ -3340,6 +3637,7 @@ destroy_ft:  unlock:  	mutex_unlock(&dev->flow_db->lock);  	kfree(dst); +	kfree(ucmd);  	kfree(handler);  	return ERR_PTR(err);  } @@ -5000,6 +5298,76 @@ static void depopulate_specs_root(struct mlx5_ib_dev *dev)  	uverbs_free_spec_tree(dev->ib_dev.specs_root);  } +static int mlx5_ib_read_counters(struct ib_counters *counters, +				 struct ib_counters_read_attr *read_attr, +				 struct uverbs_attr_bundle *attrs) +{ +	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); +	struct mlx5_read_counters_attr mread_attr = {}; +	struct mlx5_ib_flow_counters_desc *desc; +	int ret, i; + +	mutex_lock(&mcounters->mcntrs_mutex); +	if (mcounters->cntrs_max_index > read_attr->ncounters) { +		ret = -EINVAL; +		goto err_bound; +	} + +	mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64), +				 GFP_KERNEL); +	if (!mread_attr.out) { +		ret = -ENOMEM; +		goto err_bound; +	} + +	mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl; +	mread_attr.flags = read_attr->flags; +	ret = mcounters->read_counters(counters->device, &mread_attr); +	if (ret) +		goto err_read; + +	/* do the pass over the counters data array to assign according to the +	 * descriptions and indexing pairs +	 */ +	desc = mcounters->counters_data; +	for (i = 0; i < mcounters->ncounters; i++) +		read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description]; + +err_read: +	kfree(mread_attr.out); +err_bound: +	mutex_unlock(&mcounters->mcntrs_mutex); +	return ret; +} + +static int mlx5_ib_destroy_counters(struct ib_counters *counters) +{ +	struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); + +	counters_clear_description(counters); +	if (mcounters->hw_cntrs_hndl) +		mlx5_fc_destroy(to_mdev(counters->device)->mdev, +				mcounters->hw_cntrs_hndl); + +	kfree(mcounters); + +	return 0; +} + +static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device, +						   struct uverbs_attr_bundle *attrs) +{ +	struct mlx5_ib_mcounters *mcounters; + +	mcounters = kzalloc(sizeof(*mcounters), GFP_KERNEL); +	if (!mcounters) +		return ERR_PTR(-ENOMEM); + +	mutex_init(&mcounters->mcntrs_mutex); + +	return &mcounters->ibcntrs; +} +  void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev)  {  	mlx5_ib_cleanup_multiport_master(dev); @@ -5243,6 +5611,9 @@ int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev)  	dev->ib_dev.destroy_flow_action = mlx5_ib_destroy_flow_action;  	dev->ib_dev.modify_flow_action_esp = mlx5_ib_modify_flow_action_esp;  	dev->ib_dev.driver_id = RDMA_DRIVER_MLX5; +	dev->ib_dev.create_counters = mlx5_ib_create_counters; +	dev->ib_dev.destroy_counters = mlx5_ib_destroy_counters; +	dev->ib_dev.read_counters = mlx5_ib_read_counters;  	err = init_node_data(dev);  	if (err)  | 
