From 6989310f5d4327e8595664954edd40a7f99ddd0d Mon Sep 17 00:00:00 2001 From: Pi-Hsun Shih Date: Wed, 4 Dec 2019 16:13:07 +0800 Subject: wireless: Use offsetof instead of custom macro. Use offsetof to calculate offset of a field to take advantage of compiler built-in version when possible, and avoid UBSAN warning when compiling with Clang: ================================================================== UBSAN: Undefined behaviour in net/wireless/wext-core.c:525:14 member access within null pointer of type 'struct iw_point' CPU: 3 PID: 165 Comm: kworker/u16:3 Tainted: G S W 4.19.23 #43 Workqueue: cfg80211 __cfg80211_scan_done [cfg80211] Call trace: dump_backtrace+0x0/0x194 show_stack+0x20/0x2c __dump_stack+0x20/0x28 dump_stack+0x70/0x94 ubsan_epilogue+0x14/0x44 ubsan_type_mismatch_common+0xf4/0xfc __ubsan_handle_type_mismatch_v1+0x34/0x54 wireless_send_event+0x3cc/0x470 ___cfg80211_scan_done+0x13c/0x220 [cfg80211] __cfg80211_scan_done+0x28/0x34 [cfg80211] process_one_work+0x170/0x35c worker_thread+0x254/0x380 kthread+0x13c/0x158 ret_from_fork+0x10/0x18 =================================================================== Signed-off-by: Pi-Hsun Shih Reviewed-by: Nick Desaulniers Link: https://lore.kernel.org/r/20191204081307.138765-1-pihsun@chromium.org Signed-off-by: Johannes Berg --- include/uapi/linux/wireless.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/wireless.h b/include/uapi/linux/wireless.h index 86eca3208b6b..a2c006a364e0 100644 --- a/include/uapi/linux/wireless.h +++ b/include/uapi/linux/wireless.h @@ -74,6 +74,8 @@ #include /* for "struct sockaddr" et al */ #include /* for IFNAMSIZ and co... */ +#include /* for offsetof */ + /***************************** VERSION *****************************/ /* * This constant is used to know the availability of the wireless @@ -1090,8 +1092,7 @@ struct iw_event { /* iw_point events are special. First, the payload (extra data) come at * the end of the event, so they are bigger than IW_EV_POINT_LEN. Second, * we omit the pointer, so start at an offset. */ -#define IW_EV_POINT_OFF (((char *) &(((struct iw_point *) NULL)->length)) - \ - (char *) NULL) +#define IW_EV_POINT_OFF offsetof(struct iw_point, length) #define IW_EV_POINT_LEN (IW_EV_LCP_LEN + sizeof(struct iw_point) - \ IW_EV_POINT_OFF) -- cgit v1.2.3 From 5c5e52d1bb962510fecdc1ebecdde89694d1b223 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Tue, 17 Dec 2019 15:19:18 +0100 Subject: nl80211: add handling for BSS color This patch adds the attributes, policy and parsing code to allow userland to send the info about the BSS coloring settings to the kernel. Signed-off-by: John Crispin Link: https://lore.kernel.org/r/20191217141921.8114-1-john@phrozen.org [johannes: remove the strict policy parsing, that was a misunderstanding] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 15 +++++++++++++++ include/uapi/linux/nl80211.h | 26 ++++++++++++++++++++++++++ net/wireless/nl80211.c | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 059524b87c4c..5c0b1b5ec77b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -259,6 +259,19 @@ struct ieee80211_he_obss_pd { u8 max_offset; }; +/** + * struct cfg80211_he_bss_color - AP settings for BSS coloring + * + * @color: the current color. + * @disabled: is the feature disabled. + * @partial: define the AID equation. + */ +struct cfg80211_he_bss_color { + u8 color; + bool disabled; + bool partial; +}; + /** * struct ieee80211_sta_ht_cap - STA's HT capabilities * @@ -990,6 +1003,7 @@ enum cfg80211_ap_settings_flags { * @twt_responder: Enable Target Wait Time * @flags: flags, as defined in enum cfg80211_ap_settings_flags * @he_obss_pd: OBSS Packet Detection settings + * @he_bss_color: BSS Color settings */ struct cfg80211_ap_settings { struct cfg80211_chan_def chandef; @@ -1018,6 +1032,7 @@ struct cfg80211_ap_settings { bool twt_responder; u32 flags; struct ieee80211_he_obss_pd he_obss_pd; + struct cfg80211_he_bss_color he_bss_color; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 5eab191607f8..809ef9165684 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2400,6 +2400,8 @@ enum nl80211_commands { * @NL80211_ATTR_VLAN_ID: VLAN ID (1..4094) for the station and VLAN group key * (u16). * + * @NL80211_ATTR_HE_BSS_COLOR: nested attribute for BSS Color Settings. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2864,6 +2866,8 @@ enum nl80211_attrs { NL80211_ATTR_VLAN_ID, + NL80211_ATTR_HE_BSS_COLOR, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -6587,5 +6591,27 @@ enum nl80211_obss_pd_attributes { NL80211_HE_OBSS_PD_ATTR_MAX = __NL80211_HE_OBSS_PD_ATTR_LAST - 1, }; +/** + * enum nl80211_bss_color_attributes - BSS Color attributes + * @__NL80211_HE_BSS_COLOR_ATTR_INVALID: Invalid + * + * @NL80211_HE_BSS_COLOR_ATTR_COLOR: the current BSS Color. + * @NL80211_HE_BSS_COLOR_ATTR_DISABLED: is BSS coloring disabled. + * @NL80211_HE_BSS_COLOR_ATTR_PARTIAL: the AID equation to be used.. + * + * @__NL80211_HE_BSS_COLOR_ATTR_LAST: Internal + * @NL80211_HE_BSS_COLOR_ATTR_MAX: highest BSS Color attribute. + */ +enum nl80211_bss_color_attributes { + __NL80211_HE_BSS_COLOR_ATTR_INVALID, + + NL80211_HE_BSS_COLOR_ATTR_COLOR, + NL80211_HE_BSS_COLOR_ATTR_DISABLED, + NL80211_HE_BSS_COLOR_ATTR_PARTIAL, + + /* keep last */ + __NL80211_HE_BSS_COLOR_ATTR_LAST, + NL80211_HE_BSS_COLOR_ATTR_MAX = __NL80211_HE_BSS_COLOR_ATTR_LAST - 1, +}; #endif /* __LINUX_NL80211_H */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index fa3526592c51..00f24d4c623e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -321,6 +321,13 @@ he_obss_pd_policy[NL80211_HE_OBSS_PD_ATTR_MAX + 1] = { NLA_POLICY_RANGE(NLA_U8, 1, 20), }; +static const struct nla_policy +he_bss_color_policy[NL80211_HE_BSS_COLOR_ATTR_MAX + 1] = { + [NL80211_HE_BSS_COLOR_ATTR_COLOR] = NLA_POLICY_RANGE(NLA_U8, 1, 63), + [NL80211_HE_BSS_COLOR_ATTR_DISABLED] = { .type = NLA_FLAG }, + [NL80211_HE_BSS_COLOR_ATTR_PARTIAL] = { .type = NLA_FLAG }, +}; + const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [0] = { .strict_start_type = NL80211_ATTR_HE_OBSS_PD }, [NL80211_ATTR_WIPHY] = { .type = NLA_U32 }, @@ -625,6 +632,7 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_TWT_RESPONDER] = { .type = NLA_FLAG }, [NL80211_ATTR_HE_OBSS_PD] = NLA_POLICY_NESTED(he_obss_pd_policy), [NL80211_ATTR_VLAN_ID] = NLA_POLICY_RANGE(NLA_U16, 1, VLAN_N_VID - 2), + [NL80211_ATTR_HE_BSS_COLOR] = NLA_POLICY_NESTED(he_bss_color_policy), }; /* policy for the key attributes */ @@ -4511,6 +4519,30 @@ static int nl80211_parse_he_obss_pd(struct nlattr *attrs, return 0; } +static int nl80211_parse_he_bss_color(struct nlattr *attrs, + struct cfg80211_he_bss_color *he_bss_color) +{ + struct nlattr *tb[NL80211_HE_BSS_COLOR_ATTR_MAX + 1]; + int err; + + err = nla_parse_nested(tb, NL80211_HE_BSS_COLOR_ATTR_MAX, attrs, + he_bss_color_policy, NULL); + if (err) + return err; + + if (!tb[NL80211_HE_BSS_COLOR_ATTR_COLOR]) + return -EINVAL; + + he_bss_color->color = + nla_get_u8(tb[NL80211_HE_BSS_COLOR_ATTR_COLOR]); + he_bss_color->disabled = + nla_get_flag(tb[NL80211_HE_BSS_COLOR_ATTR_DISABLED]); + he_bss_color->partial = + nla_get_flag(tb[NL80211_HE_BSS_COLOR_ATTR_PARTIAL]); + + return 0; +} + static void nl80211_check_ap_rate_selectors(struct cfg80211_ap_settings *params, const u8 *rates) { @@ -4803,6 +4835,14 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) return err; } + if (info->attrs[NL80211_ATTR_HE_BSS_COLOR]) { + err = nl80211_parse_he_bss_color( + info->attrs[NL80211_ATTR_HE_BSS_COLOR], + ¶ms.he_bss_color); + if (err) + return err; + } + nl80211_calculate_ap_params(¶ms); if (info->attrs[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT]) -- cgit v1.2.3 From 1e61d82cca170465902003bfe445f0461e28208b Mon Sep 17 00:00:00 2001 From: Haim Dreyfuss Date: Tue, 21 Jan 2020 10:12:13 +0200 Subject: cfg80211: add no HE indication to the channel flag The regulatory domain might forbid HE operation. Certain regulatory domains may restrict it for specific channels whereas others may do it for the whole regulatory domain. Add an option to indicate it in the channel flag. Signed-off-by: Haim Dreyfuss Signed-off-by: Luca Coelho Link: https://lore.kernel.org/r/20200121081213.733757-1-luca@coelho.fi Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ include/uapi/linux/nl80211.h | 5 +++++ net/wireless/nl80211.c | 3 +++ net/wireless/reg.c | 2 ++ 4 files changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index fa027d0d031b..40f2a3a30e3d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -95,6 +95,7 @@ struct wiphy; * on this channel. * @IEEE80211_CHAN_NO_10MHZ: 10 MHz bandwidth is not permitted * on this channel. + * @IEEE80211_CHAN_NO_HE: HE operation is not permitted on this channel. * */ enum ieee80211_channel_flags { @@ -111,6 +112,7 @@ enum ieee80211_channel_flags { IEEE80211_CHAN_IR_CONCURRENT = 1<<10, IEEE80211_CHAN_NO_20MHZ = 1<<11, IEEE80211_CHAN_NO_10MHZ = 1<<12, + IEEE80211_CHAN_NO_HE = 1<<13, }; #define IEEE80211_CHAN_NO_HT40 \ diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 809ef9165684..d996bac97e9d 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3587,6 +3587,8 @@ enum nl80211_wmm_rule { * @NL80211_FREQUENCY_ATTR_WMM: this channel has wmm limitations. * This is a nested attribute that contains the wmm limitation per AC. * (see &enum nl80211_wmm_rule) + * @NL80211_FREQUENCY_ATTR_NO_HE: HE operation is not allowed on this channel + * in current regulatory domain. * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number * currently defined * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use @@ -3616,6 +3618,7 @@ enum nl80211_frequency_attr { NL80211_FREQUENCY_ATTR_NO_20MHZ, NL80211_FREQUENCY_ATTR_NO_10MHZ, NL80211_FREQUENCY_ATTR_WMM, + NL80211_FREQUENCY_ATTR_NO_HE, /* keep last */ __NL80211_FREQUENCY_ATTR_AFTER_LAST, @@ -3813,6 +3816,7 @@ enum nl80211_sched_scan_match_attr { * @NL80211_RRF_NO_HT40PLUS: channels can't be used in HT40+ operation * @NL80211_RRF_NO_80MHZ: 80MHz operation not allowed * @NL80211_RRF_NO_160MHZ: 160MHz operation not allowed + * @NL80211_RRF_NO_HE: HE operation not allowed */ enum nl80211_reg_rule_flags { NL80211_RRF_NO_OFDM = 1<<0, @@ -3830,6 +3834,7 @@ enum nl80211_reg_rule_flags { NL80211_RRF_NO_HT40PLUS = 1<<14, NL80211_RRF_NO_80MHZ = 1<<15, NL80211_RRF_NO_160MHZ = 1<<16, + NL80211_RRF_NO_HE = 1<<17, }; #define NL80211_RRF_PASSIVE_SCAN NL80211_RRF_NO_IR diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 00f24d4c623e..d8cdbf07aeec 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -972,6 +972,9 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy, if ((chan->flags & IEEE80211_CHAN_NO_10MHZ) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_10MHZ)) goto nla_put_failure; + if ((chan->flags & IEEE80211_CHAN_NO_HE) && + nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_HE)) + goto nla_put_failure; } if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 446c76d44e65..ea7bc5652a41 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1569,6 +1569,8 @@ static u32 map_regdom_flags(u32 rd_flags) channel_flags |= IEEE80211_CHAN_NO_80MHZ; if (rd_flags & NL80211_RRF_NO_160MHZ) channel_flags |= IEEE80211_CHAN_NO_160MHZ; + if (rd_flags & NL80211_RRF_NO_HE) + channel_flags |= IEEE80211_CHAN_NO_HE; return channel_flags; } -- cgit v1.2.3 From d6039a3416f7af37c04f22c411f120ad46f51663 Mon Sep 17 00:00:00 2001 From: Veerendranath Jakkam Date: Mon, 27 Jan 2020 02:00:32 +0530 Subject: cfg80211: Enhance the AKM advertizement to support per interface. Commit ab4dfa20534e ("cfg80211: Allow drivers to advertise supported AKM suites") introduces the support to advertize supported AKMs to userspace. This needs an enhancement to advertize the AKM support per interface type, specifically for the cfg80211-based drivers that implement SME and use different mechanisms to support the AKM's for each interface type (e.g., the support for SAE, OWE AKM's take different paths for such drivers on STA/AP mode). This commit aims the same and enhances the earlier mechanism of advertizing the AKMs per wiphy. Add new nl80211 attributes and data structure to provide supported AKMs per interface type to userspace. the AKMs advertized in akm_suites are default capabilities if not advertized for a specific interface type in iftype_akm_suites. Signed-off-by: Veerendranath Jakkam Link: https://lore.kernel.org/r/20200126203032.21934-1-vjakkam@codeaurora.org Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 28 +++++++++++++++++++++++++++- include/uapi/linux/nl80211.h | 33 +++++++++++++++++++++++++++++++++ net/wireless/nl80211.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 103 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 40f2a3a30e3d..c3a9b375d3ca 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -4400,6 +4400,21 @@ struct cfg80211_pmsr_capabilities { } ftm; }; +/** + * struct wiphy_iftype_akm_suites - This structure encapsulates supported akm + * suites for interface types defined in @iftypes_mask. Each type in the + * @iftypes_mask must be unique across all instances of iftype_akm_suites. + * + * @iftypes_mask: bitmask of interfaces types + * @akm_suites: points to an array of supported akm suites + * @n_akm_suites: number of supported AKM suites + */ +struct wiphy_iftype_akm_suites { + u16 iftypes_mask; + const u32 *akm_suites; + int n_akm_suites; +}; + /** * struct wiphy - wireless hardware description * @reg_notifier: the driver's regulatory notification callback, @@ -4412,8 +4427,16 @@ struct cfg80211_pmsr_capabilities { * @signal_type: signal type reported in &struct cfg80211_bss. * @cipher_suites: supported cipher suites * @n_cipher_suites: number of supported cipher suites - * @akm_suites: supported AKM suites + * @akm_suites: supported AKM suites. These are the default AKMs supported if + * the supported AKMs not advertized for a specific interface type in + * iftype_akm_suites. * @n_akm_suites: number of supported AKM suites + * @iftype_akm_suites: array of supported akm suites info per interface type. + * Note that the bits in @iftypes_mask inside this structure cannot + * overlap (i.e. only one occurrence of each type is allowed across all + * instances of iftype_akm_suites). + * @num_iftype_akm_suites: number of interface types for which supported akm + * suites are specified separately. * @retry_short: Retry limit for short frames (dot11ShortRetryLimit) * @retry_long: Retry limit for long frames (dot11LongRetryLimit) * @frag_threshold: Fragmentation threshold (dot11FragmentationThreshold); @@ -4620,6 +4643,9 @@ struct wiphy { int n_akm_suites; const u32 *akm_suites; + const struct wiphy_iftype_akm_suites *iftype_akm_suites; + unsigned int num_iftype_akm_suites; + u8 retry_short; u8 retry_long; u32 frag_threshold; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index d996bac97e9d..350ab86fe20e 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2402,6 +2402,13 @@ enum nl80211_commands { * * @NL80211_ATTR_HE_BSS_COLOR: nested attribute for BSS Color Settings. * + * @NL80211_ATTR_IFTYPE_AKM_SUITES: nested array attribute, with each entry + * using attributes from &enum nl80211_iftype_akm_attributes. This + * attribute is sent in a response to %NL80211_CMD_GET_WIPHY indicating + * supported AKM suites capability per interface. AKMs advertised in + * %NL80211_ATTR_AKM_SUITES are default capabilities if AKM suites not + * advertised for a specific interface type. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2868,6 +2875,8 @@ enum nl80211_attrs { NL80211_ATTR_HE_BSS_COLOR, + NL80211_ATTR_IFTYPE_AKM_SUITES, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -6619,4 +6628,28 @@ enum nl80211_bss_color_attributes { NL80211_HE_BSS_COLOR_ATTR_MAX = __NL80211_HE_BSS_COLOR_ATTR_LAST - 1, }; +/** + * enum nl80211_iftype_akm_attributes - interface type AKM attributes + * @__NL80211_IFTYPE_AKM_ATTR_INVALID: Invalid + * + * @NL80211_IFTYPE_AKM_ATTR_IFTYPES: nested attribute containing a flag + * attribute for each interface type that supports AKM suites specified in + * %NL80211_IFTYPE_AKM_ATTR_SUITES + * @NL80211_IFTYPE_AKM_ATTR_SUITES: an array of u32. Used to indicate supported + * AKM suites for the specified interface types. + * + * @__NL80211_IFTYPE_AKM_ATTR_LAST: Internal + * @NL80211_IFTYPE_AKM_ATTR_MAX: highest interface type AKM attribute. + */ +enum nl80211_iftype_akm_attributes { + __NL80211_IFTYPE_AKM_ATTR_INVALID, + + NL80211_IFTYPE_AKM_ATTR_IFTYPES, + NL80211_IFTYPE_AKM_ATTR_SUITES, + + /* keep last */ + __NL80211_IFTYPE_AKM_ATTR_LAST, + NL80211_IFTYPE_AKM_ATTR_MAX = __NL80211_IFTYPE_AKM_ATTR_LAST - 1, +}; + #endif /* __LINUX_NL80211_H */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index d8cdbf07aeec..3ad937d0a256 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1896,6 +1896,46 @@ static int nl80211_send_pmsr_capa(struct cfg80211_registered_device *rdev, return 0; } +static int +nl80211_put_iftype_akm_suites(struct cfg80211_registered_device *rdev, + struct sk_buff *msg) +{ + int i; + struct nlattr *nested, *nested_akms; + const struct wiphy_iftype_akm_suites *iftype_akms; + + if (!rdev->wiphy.num_iftype_akm_suites || + !rdev->wiphy.iftype_akm_suites) + return 0; + + nested = nla_nest_start(msg, NL80211_ATTR_IFTYPE_AKM_SUITES); + if (!nested) + return -ENOBUFS; + + for (i = 0; i < rdev->wiphy.num_iftype_akm_suites; i++) { + nested_akms = nla_nest_start(msg, i + 1); + if (!nested_akms) + return -ENOBUFS; + + iftype_akms = &rdev->wiphy.iftype_akm_suites[i]; + + if (nl80211_put_iftypes(msg, NL80211_IFTYPE_AKM_ATTR_IFTYPES, + iftype_akms->iftypes_mask)) + return -ENOBUFS; + + if (nla_put(msg, NL80211_IFTYPE_AKM_ATTR_SUITES, + sizeof(u32) * iftype_akms->n_akm_suites, + iftype_akms->akm_suites)) { + return -ENOBUFS; + } + nla_nest_end(msg, nested_akms); + } + + nla_nest_end(msg, nested); + + return 0; +} + struct nl80211_dump_wiphy_state { s64 filter_wiphy; long start; @@ -2454,6 +2494,9 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, rdev->wiphy.akm_suites)) goto nla_put_failure; + if (nl80211_put_iftype_akm_suites(rdev, msg)) + goto nla_put_failure; + /* done */ state->split_start = 0; break; -- cgit v1.2.3 From 8c3ed7aa2b9ef666195b789e9b02e28383243fa8 Mon Sep 17 00:00:00 2001 From: Markus Theil Date: Wed, 15 Jan 2020 13:55:22 +0100 Subject: nl80211: add src and dst addr attributes for control port tx/rx When using control port over nl80211 in AP mode with pre-authentication, APs need to forward frames to other APs defined by their MAC address. Before this patch, pre-auth frames reaching user space over nl80211 control port have no longer any information about the dest attached, which can be used for forwarding to a controller or injecting the frame back to a ethernet interface over a AF_PACKET socket. Analog problems exist, when forwarding pre-auth frames from AP -> STA. This patch therefore adds the NL80211_ATTR_DST_MAC and NL80211_ATTR_SRC_MAC attributes to provide more context information when forwarding. The respective arguments are optional on tx and included on rx. Therefore unaware existing software is not affected. Software which wants to detect this feature, can do so by checking against: NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_MAC_ADDRS Signed-off-by: Markus Theil Link: https://lore.kernel.org/r/20200115125522.3755-1-markus.theil@tu-ilmenau.de [split into separate cfg80211/mac80211 patches] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 ++- include/uapi/linux/nl80211.h | 16 +++++++++++++++- net/mac80211/ieee80211_i.h | 3 ++- net/mac80211/tx.c | 3 ++- net/wireless/nl80211.c | 18 +++++++++++++++--- net/wireless/rdev-ops.h | 8 ++++---- net/wireless/trace.h | 27 +++++++++++++++++---------- 7 files changed, 57 insertions(+), 21 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index c3a9b375d3ca..ec0de3c43c61 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3969,7 +3969,8 @@ struct cfg80211_ops { int (*tx_control_port)(struct wiphy *wiphy, struct net_device *dev, const u8 *buf, size_t len, - const u8 *dest, const __be16 proto, + const u8 *dest, const u8 *src, + const __be16 proto, const bool noencrypt); int (*get_ftm_responder_stats)(struct wiphy *wiphy, diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 350ab86fe20e..158bccb4a47b 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1039,11 +1039,14 @@ * a control port frame and as a notification that a control port frame * has been received. %NL80211_ATTR_FRAME is used to specify the * frame contents. The frame is the raw EAPoL data, without ethernet or - * 802.11 headers. + * 802.11 headers. An optional %NL80211_ATTR_SRC_MAC can be used to send + * pre-auth frames to STAs on behalf of other APs. * When used as an event indication %NL80211_ATTR_CONTROL_PORT_ETHERTYPE, * %NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT and %NL80211_ATTR_MAC are added * indicating the protocol type of the received frame; whether the frame * was received unencrypted and the MAC address of the peer respectively. + * %NL80211_ATTR_DST_MAC can be used to forward pre-auth frames in + * userspace while using AP mode. * * @NL80211_CMD_RELOAD_REGDB: Request that the regdb firmware file is reloaded. * @@ -2409,6 +2412,9 @@ enum nl80211_commands { * %NL80211_ATTR_AKM_SUITES are default capabilities if AKM suites not * advertised for a specific interface type. * + * @NL80211_ATTR_SRC_MAC: MAC address used in control port over nl80211 transmit + * @NL80211_ATTR_DST_MAC: MAC address used in control port over nl80211 receive + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2877,6 +2883,9 @@ enum nl80211_attrs { NL80211_ATTR_IFTYPE_AKM_SUITES, + NL80211_ATTR_SRC_MAC, + NL80211_ATTR_DST_MAC, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -5539,6 +5548,10 @@ enum nl80211_feature_flags { * feature, which prevents bufferbloat by using the expected transmission * time to limit the amount of data buffered in the hardware. * + * @NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_MAC_ADDRS: The driver + * can use src and dst MAC addresses with control port over nl80211 rx + * and tx operations. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -5586,6 +5599,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_SAE_OFFLOAD, NL80211_EXT_FEATURE_VLAN_OFFLOAD, NL80211_EXT_FEATURE_AQL, + NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_MAC_ADDRS, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 8a49d78ad7c9..da9eaa9ee37e 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1792,7 +1792,8 @@ void ieee80211_check_fast_xmit_iface(struct ieee80211_sub_if_data *sdata); void ieee80211_clear_fast_xmit(struct sta_info *sta); int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, const u8 *buf, size_t len, - const u8 *dest, __be16 proto, bool unencrypted); + const u8 *dest, const u8 *src, __be16 proto, + bool unencrypted); int ieee80211_probe_mesh_link(struct wiphy *wiphy, struct net_device *dev, const u8 *buf, size_t len); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 4296d9d71311..059c66b6bb5c 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5285,7 +5285,8 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, const u8 *buf, size_t len, - const u8 *dest, __be16 proto, bool unencrypted) + const u8 *dest, const u8 *src, __be16 proto, + bool unencrypted) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 4c0ea54e0f59..33fe6ac1c242 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -633,6 +633,8 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_HE_OBSS_PD] = NLA_POLICY_NESTED(he_obss_pd_policy), [NL80211_ATTR_VLAN_ID] = NLA_POLICY_RANGE(NLA_U16, 1, VLAN_N_VID - 2), [NL80211_ATTR_HE_BSS_COLOR] = NLA_POLICY_NESTED(he_bss_color_policy), + [NL80211_ATTR_SRC_MAC] = NLA_POLICY_ETH_ADDR, + [NL80211_ATTR_DST_MAC] = NLA_POLICY_ETH_ADDR, }; /* policy for the key attributes */ @@ -13694,6 +13696,7 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info) const u8 *buf; size_t len; u8 *dest; + u8 src[ETH_ALEN]; u16 proto; bool noencrypt; int err; @@ -13731,6 +13734,13 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info) goto out; } + /* copy src address under wdev_lock, as we may copy wdev_address */ + if (info->attrs[NL80211_ATTR_SRC_MAC]) + ether_addr_copy(src, + nla_data(info->attrs[NL80211_ATTR_SRC_MAC])); + else + ether_addr_copy(src, wdev_address(wdev)); + wdev_unlock(wdev); buf = nla_data(info->attrs[NL80211_ATTR_FRAME]); @@ -13741,7 +13751,7 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info) nla_get_flag(info->attrs[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT]); return rdev_tx_control_port(rdev, dev, buf, len, - dest, cpu_to_be16(proto), noencrypt); + dest, src, cpu_to_be16(proto), noencrypt); out: wdev_unlock(wdev); @@ -15996,7 +16006,8 @@ static int __nl80211_rx_control_port(struct net_device *dev, struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct ethhdr *ehdr = eth_hdr(skb); - const u8 *addr = ehdr->h_source; + const u8 *daddr = ehdr->h_dest; + const u8 *saddr = ehdr->h_source; u16 proto = be16_to_cpu(skb->protocol); struct sk_buff *msg; void *hdr; @@ -16021,7 +16032,8 @@ static int __nl80211_rx_control_port(struct net_device *dev, nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD) || - nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) || + nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, saddr) || + nla_put(msg, NL80211_ATTR_DST_MAC, ETH_ALEN, daddr) || nla_put_u16(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE, proto) || (unencrypted && nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT))) diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index e853a4fe6f97..39e6c1db3092 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -730,14 +730,14 @@ static inline int rdev_mgmt_tx(struct cfg80211_registered_device *rdev, static inline int rdev_tx_control_port(struct cfg80211_registered_device *rdev, struct net_device *dev, const void *buf, size_t len, - const u8 *dest, __be16 proto, - const bool noencrypt) + const u8 *dest, const u8 *src, + __be16 proto, const bool noencrypt) { int ret; trace_rdev_tx_control_port(&rdev->wiphy, dev, buf, len, - dest, proto, noencrypt); + dest, src, proto, noencrypt); ret = rdev->ops->tx_control_port(&rdev->wiphy, dev, buf, len, - dest, proto, noencrypt); + dest, src, proto, noencrypt); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index d98ad2b3143b..fefa255fd062 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1923,27 +1923,31 @@ TRACE_EVENT(rdev_mgmt_tx, TRACE_EVENT(rdev_tx_control_port, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, - const u8 *buf, size_t len, const u8 *dest, __be16 proto, + const u8 *buf, size_t len, + const u8 *dest, const u8 *src, __be16 proto, bool unencrypted), - TP_ARGS(wiphy, netdev, buf, len, dest, proto, unencrypted), + TP_ARGS(wiphy, netdev, buf, len, dest, src, proto, unencrypted), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(dest) - __field(__be16, proto) + MAC_ENTRY(src) + __field(u16, proto) __field(bool, unencrypted) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(dest, dest); - __entry->proto = proto; + MAC_ASSIGN(src, src); + __entry->proto = be16_to_cpu(proto); __entry->unencrypted = unencrypted; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT "," - " proto: 0x%x, unencrypted: %s", - WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(dest), - be16_to_cpu(__entry->proto), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", dest: " MAC_PR_FMT + ", src: " MAC_PR_FMT ", proto: 0x%x, unencrypted: %s", + WIPHY_PR_ARG, NETDEV_PR_ARG, + MAC_PR_ARG(dest), MAC_PR_ARG(src), + __entry->proto, BOOL_TO_STR(__entry->unencrypted)) ); @@ -2835,6 +2839,7 @@ TRACE_EVENT(cfg80211_rx_control_port, TP_STRUCT__entry( NETDEV_ENTRY __field(int, len) + MAC_ENTRY(to) MAC_ENTRY(from) __field(u16, proto) __field(bool, unencrypted) @@ -2842,12 +2847,14 @@ TRACE_EVENT(cfg80211_rx_control_port, TP_fast_assign( NETDEV_ASSIGN; __entry->len = skb->len; + MAC_ASSIGN(to, eth_hdr(skb)->h_dest); MAC_ASSIGN(from, eth_hdr(skb)->h_source); __entry->proto = be16_to_cpu(skb->protocol); __entry->unencrypted = unencrypted; ), - TP_printk(NETDEV_PR_FMT ", len=%d, " MAC_PR_FMT ", proto: 0x%x, unencrypted: %s", - NETDEV_PR_ARG, __entry->len, MAC_PR_ARG(from), + TP_printk(NETDEV_PR_FMT ", len=%d, dest: " MAC_PR_FMT + ", src: " MAC_PR_FMT ", proto: 0x%x, unencrypted: %s", + NETDEV_PR_ARG, __entry->len, MAC_PR_ARG(to), MAC_PR_ARG(from), __entry->proto, BOOL_TO_STR(__entry->unencrypted)) ); -- cgit v1.2.3 From c8856c051454909e5059df4e81c77b9c366c5515 Mon Sep 17 00:00:00 2001 From: Arjun Roy Date: Fri, 14 Feb 2020 15:30:49 -0800 Subject: tcp-zerocopy: Return inq along with tcp receive zerocopy. This patchset is intended to reduce the number of extra system calls imposed by TCP receive zerocopy. For ping-pong RPC style workloads, this patchset has demonstrated a system call reduction of about 30% when coupled with userspace changes. For applications using edge-triggered epoll, returning inq along with the result of tcp receive zerocopy could remove the need to call recvmsg()=-EAGAIN after a successful zerocopy. Generally speaking, since normally we would need to perform a recvmsg() call for every successful small RPC read via TCP receive zerocopy, returning inq can reduce the number of system calls performed by approximately half. Signed-off-by: Arjun Roy Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/uapi/linux/tcp.h | 1 + net/ipv4/tcp.c | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index fd9eb8f6bcae..548f480b9c66 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -345,5 +345,6 @@ struct tcp_zerocopy_receive { __u64 address; /* in: address of mapping */ __u32 length; /* in/out: number of bytes to map/mapped */ __u32 recv_skip_hint; /* out: amount of bytes to skip */ + __u32 inq; /* out: amount of bytes in read queue */ }; #endif /* _UAPI_LINUX_TCP_H */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index eb2d80519f8e..a697f1455f8d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3667,13 +3667,26 @@ static int do_tcp_getsockopt(struct sock *sk, int level, if (get_user(len, optlen)) return -EFAULT; - if (len != sizeof(zc)) + if (len < offsetofend(struct tcp_zerocopy_receive, length)) return -EINVAL; + if (len > sizeof(zc)) + len = sizeof(zc); if (copy_from_user(&zc, optval, len)) return -EFAULT; lock_sock(sk); err = tcp_zerocopy_receive(sk, &zc); release_sock(sk); + switch (len) { + case sizeof(zc): + case offsetofend(struct tcp_zerocopy_receive, inq): + goto zerocopy_rcv_inq; + case offsetofend(struct tcp_zerocopy_receive, length): + default: + goto zerocopy_rcv_out; + } +zerocopy_rcv_inq: + zc.inq = tcp_inq_hint(sk); +zerocopy_rcv_out: if (!err && copy_to_user(optval, &zc, len)) err = -EFAULT; return err; -- cgit v1.2.3 From 33946518d493cdf10aedb4a483f1aa41948a3dab Mon Sep 17 00:00:00 2001 From: Arjun Roy Date: Fri, 14 Feb 2020 15:30:50 -0800 Subject: tcp-zerocopy: Return sk_err (if set) along with tcp receive zerocopy. This patchset is intended to reduce the number of extra system calls imposed by TCP receive zerocopy. For ping-pong RPC style workloads, this patchset has demonstrated a system call reduction of about 30% when coupled with userspace changes. For applications using epoll, returning sk_err along with the result of tcp receive zerocopy could remove the need to call recvmsg()=-EAGAIN after a spurious wakeup. Consider a multi-threaded application using epoll. A thread may awaken with EPOLLIN but another thread may already be reading. The spuriously-awoken thread does not necessarily know that another thread 'won'; rather, it may be possible that it was woken up due to the presence of an error if there is no data. A zerocopy read receiving 0 bytes thus would need to be followed up by recvmsg to be sure. Instead, we return sk_err directly with zerocopy, so the application can avoid this extra system call. Signed-off-by: Arjun Roy Signed-off-by: Eric Dumazet Signed-off-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/uapi/linux/tcp.h | 1 + net/ipv4/tcp.c | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 548f480b9c66..1a7fc856e237 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -346,5 +346,6 @@ struct tcp_zerocopy_receive { __u32 length; /* in/out: number of bytes to map/mapped */ __u32 recv_skip_hint; /* out: amount of bytes to skip */ __u32 inq; /* out: amount of bytes in read queue */ + __s32 err; /* out: socket error */ }; #endif /* _UAPI_LINUX_TCP_H */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a697f1455f8d..1b685485a5b5 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3676,14 +3676,20 @@ static int do_tcp_getsockopt(struct sock *sk, int level, lock_sock(sk); err = tcp_zerocopy_receive(sk, &zc); release_sock(sk); + if (len == sizeof(zc)) + goto zerocopy_rcv_sk_err; switch (len) { - case sizeof(zc): + case offsetofend(struct tcp_zerocopy_receive, err): + goto zerocopy_rcv_sk_err; case offsetofend(struct tcp_zerocopy_receive, inq): goto zerocopy_rcv_inq; case offsetofend(struct tcp_zerocopy_receive, length): default: goto zerocopy_rcv_out; } +zerocopy_rcv_sk_err: + if (!err) + zc.err = sock_error(sk); zerocopy_rcv_inq: zc.inq = tcp_inq_hint(sk); zerocopy_rcv_out: -- cgit v1.2.3 From 744676e777207f4992ba4cc728a8a71352963c5b Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Sat, 15 Feb 2020 14:20:56 +0100 Subject: openvswitch: add TTL decrement action New action to decrement TTL instead of setting it to a fixed value. This action will decrement the TTL and, in case of expired TTL, drop it or execute an action passed via a nested attribute. The default TTL expired action is to drop the packet. Supports both IPv4 and IPv6 via the ttl and hop_limit fields, respectively. Tested with a corresponding change in the userspace: # ovs-dpctl dump-flows in_port(2),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1 in_port(1),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},2 in_port(1),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:2 in_port(2),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:1 # ping -c1 192.168.0.2 -t 42 IP (tos 0x0, ttl 41, id 61647, offset 0, flags [DF], proto ICMP (1), length 84) 192.168.0.1 > 192.168.0.2: ICMP echo request, id 386, seq 1, length 64 # ping -c1 192.168.0.2 -t 120 IP (tos 0x0, ttl 119, id 62070, offset 0, flags [DF], proto ICMP (1), length 84) 192.168.0.1 > 192.168.0.2: ICMP echo request, id 388, seq 1, length 64 # ping -c1 192.168.0.2 -t 1 # Co-developed-by: Bindiya Kurle Signed-off-by: Bindiya Kurle Signed-off-by: Matteo Croce Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/uapi/linux/openvswitch.h | 7 ++++ net/openvswitch/actions.c | 67 ++++++++++++++++++++++++++++++++++++++ net/openvswitch/flow_netlink.c | 70 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 144 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index ae2bff14e7e1..9b14519e74d9 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -958,6 +958,7 @@ enum ovs_action_attr { OVS_ACTION_ATTR_CLONE, /* Nested OVS_CLONE_ATTR_*. */ OVS_ACTION_ATTR_CHECK_PKT_LEN, /* Nested OVS_CHECK_PKT_LEN_ATTR_*. */ OVS_ACTION_ATTR_ADD_MPLS, /* struct ovs_action_add_mpls. */ + OVS_ACTION_ATTR_DEC_TTL, /* Nested OVS_DEC_TTL_ATTR_*. */ __OVS_ACTION_ATTR_MAX, /* Nothing past this will be accepted * from userspace. */ @@ -1050,4 +1051,10 @@ struct ovs_zone_limit { __u32 count; }; +enum ovs_dec_ttl_attr { + OVS_DEC_TTL_ATTR_UNSPEC, + OVS_DEC_TTL_ATTR_ACTION, /* Nested struct nlattr */ + __OVS_DEC_TTL_ATTR_MAX +}; + #endif /* _LINUX_OPENVSWITCH_H */ diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 7fbfe2adfffa..fc0efd8833c8 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -964,6 +964,25 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, return ovs_dp_upcall(dp, skb, key, &upcall, cutlen); } +static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb, + struct sw_flow_key *key, + const struct nlattr *attr, bool last) +{ + /* The first action is always 'OVS_DEC_TTL_ATTR_ARG'. */ + struct nlattr *dec_ttl_arg = nla_data(attr); + int rem = nla_len(attr); + + if (nla_len(dec_ttl_arg)) { + struct nlattr *actions = nla_next(dec_ttl_arg, &rem); + + if (actions) + return clone_execute(dp, skb, key, 0, actions, rem, + last, false); + } + consume_skb(skb); + return 0; +} + /* When 'last' is true, sample() should always consume the 'skb'. * Otherwise, sample() should keep 'skb' intact regardless what * actions are executed within sample(). @@ -1180,6 +1199,45 @@ static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb, nla_len(actions), last, clone_flow_key); } +static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key) +{ + int err; + + if (skb->protocol == htons(ETH_P_IPV6)) { + struct ipv6hdr *nh; + + err = skb_ensure_writable(skb, skb_network_offset(skb) + + sizeof(*nh)); + if (unlikely(err)) + return err; + + nh = ipv6_hdr(skb); + + if (nh->hop_limit <= 1) + return -EHOSTUNREACH; + + key->ip.ttl = --nh->hop_limit; + } else { + struct iphdr *nh; + u8 old_ttl; + + err = skb_ensure_writable(skb, skb_network_offset(skb) + + sizeof(*nh)); + if (unlikely(err)) + return err; + + nh = ip_hdr(skb); + if (nh->ttl <= 1) + return -EHOSTUNREACH; + + old_ttl = nh->ttl--; + csum_replace2(&nh->check, htons(old_ttl << 8), + htons(nh->ttl << 8)); + key->ip.ttl = nh->ttl; + } + return 0; +} + /* Execute a list of actions against 'skb'. */ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, @@ -1365,6 +1423,15 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, break; } + + case OVS_ACTION_ATTR_DEC_TTL: + err = execute_dec_ttl(skb, key); + if (err == -EHOSTUNREACH) { + err = dec_ttl_exception_handler(dp, skb, key, + a, true); + return err; + } + break; } if (unlikely(err)) { diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 7da4230627f5..1ccd5e092bca 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -80,6 +80,7 @@ static bool actions_may_change_flow(const struct nlattr *actions) case OVS_ACTION_ATTR_METER: case OVS_ACTION_ATTR_CHECK_PKT_LEN: case OVS_ACTION_ATTR_ADD_MPLS: + case OVS_ACTION_ATTR_DEC_TTL: default: return true; } @@ -2495,6 +2496,39 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, return 0; } +static int validate_and_copy_dec_ttl(struct net *net, + const struct nlattr *attr, + const struct sw_flow_key *key, + struct sw_flow_actions **sfa, + __be16 eth_type, __be16 vlan_tci, + u32 mpls_label_count, bool log) +{ + int start, err; + u32 nested = true; + + if (!nla_len(attr)) + return ovs_nla_add_action(sfa, OVS_ACTION_ATTR_DEC_TTL, + NULL, 0, log); + + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_DEC_TTL, log); + if (start < 0) + return start; + + err = ovs_nla_add_action(sfa, OVS_DEC_TTL_ATTR_ACTION, &nested, + sizeof(nested), log); + + if (err) + return err; + + err = __ovs_nla_copy_actions(net, attr, key, sfa, eth_type, + vlan_tci, mpls_label_count, log); + if (err) + return err; + + add_nested_action_end(*sfa, start); + return 0; +} + static int validate_and_copy_clone(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, @@ -3007,6 +3041,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, [OVS_ACTION_ATTR_CLONE] = (u32)-1, [OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1, [OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls), + [OVS_ACTION_ATTR_DEC_TTL] = (u32)-1, }; const struct ovs_action_push_vlan *vlan; int type = nla_type(a); @@ -3267,6 +3302,15 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, break; } + case OVS_ACTION_ATTR_DEC_TTL: + err = validate_and_copy_dec_ttl(net, a, key, sfa, + eth_type, vlan_tci, + mpls_label_count, log); + if (err) + return err; + skip_copy = true; + break; + default: OVS_NLERR(log, "Unknown Action type %d", type); return -EINVAL; @@ -3438,6 +3482,26 @@ out: return err; } +static int dec_ttl_action_to_attr(const struct nlattr *attr, + struct sk_buff *skb) +{ + int err = 0, rem = nla_len(attr); + struct nlattr *start; + + start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_DEC_TTL); + + if (!start) + return -EMSGSIZE; + + err = ovs_nla_put_actions(nla_data(attr), rem, skb); + if (err) + nla_nest_cancel(skb, start); + else + nla_nest_end(skb, start); + + return err; +} + static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) { const struct nlattr *ovs_key = nla_data(a); @@ -3538,6 +3602,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) return err; break; + case OVS_ACTION_ATTR_DEC_TTL: + err = dec_ttl_action_to_attr(a, skb); + if (err) + return err; + break; + default: if (nla_put(skb, type, nla_len(a), nla_data(a))) return -EMSGSIZE; -- cgit v1.2.3 From f623e5970501449b475a8dc007f0fe24743ab9d3 Mon Sep 17 00:00:00 2001 From: Aya Levin Date: Tue, 11 Feb 2020 14:32:52 -0800 Subject: ethtool: Add support for low latency RS FEC Add support for low latency Reed Solomon FEC as LLRS. The LL-FEC is defined by the 25G/50G ethernet consortium, in the document titled "Low Latency Reed Solomon Forward Error Correction" Signed-off-by: Aya Levin Reviewed-by: Eran Ben Elisha CC: Andrew Lunn Signed-off-by: Saeed Mahameed Reviewed-by: Andrew Lunn --- drivers/net/phy/phy-core.c | 2 +- include/uapi/linux/ethtool.h | 4 +++- net/ethtool/common.c | 1 + net/ethtool/linkmodes.c | 1 + 4 files changed, 6 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c index a4d2d59fceca..e083e7a76ada 100644 --- a/drivers/net/phy/phy-core.c +++ b/drivers/net/phy/phy-core.c @@ -8,7 +8,7 @@ const char *phy_speed_to_str(int speed) { - BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 74, + BUILD_BUG_ON_MSG(__ETHTOOL_LINK_MODE_MASK_NBITS != 75, "Enum ethtool_link_mode_bit_indices and phylib are out of sync. " "If a speed or mode has been added please update phy_speed_to_str " "and the PHY settings array.\n"); diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 4295ebfa2f91..d586ee5e10a1 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1330,6 +1330,7 @@ enum ethtool_fec_config_bits { ETHTOOL_FEC_OFF_BIT, ETHTOOL_FEC_RS_BIT, ETHTOOL_FEC_BASER_BIT, + ETHTOOL_FEC_LLRS_BIT, }; #define ETHTOOL_FEC_NONE (1 << ETHTOOL_FEC_NONE_BIT) @@ -1337,6 +1338,7 @@ enum ethtool_fec_config_bits { #define ETHTOOL_FEC_OFF (1 << ETHTOOL_FEC_OFF_BIT) #define ETHTOOL_FEC_RS (1 << ETHTOOL_FEC_RS_BIT) #define ETHTOOL_FEC_BASER (1 << ETHTOOL_FEC_BASER_BIT) +#define ETHTOOL_FEC_LLRS (1 << ETHTOOL_FEC_LLRS_BIT) /* CMDs currently supported */ #define ETHTOOL_GSET 0x00000001 /* DEPRECATED, Get settings. @@ -1521,7 +1523,7 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_400000baseLR8_ER8_FR8_Full_BIT = 71, ETHTOOL_LINK_MODE_400000baseDR8_Full_BIT = 72, ETHTOOL_LINK_MODE_400000baseCR8_Full_BIT = 73, - + ETHTOOL_LINK_MODE_FEC_LLRS_BIT = 74, /* must be last entry */ __ETHTOOL_LINK_MODE_MASK_NBITS }; diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 636ec6d5110e..7b6969af5ae7 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -168,6 +168,7 @@ const char link_mode_names[][ETH_GSTRING_LEN] = { __DEFINE_LINK_MODE_NAME(400000, LR8_ER8_FR8, Full), __DEFINE_LINK_MODE_NAME(400000, DR8, Full), __DEFINE_LINK_MODE_NAME(400000, CR8, Full), + __DEFINE_SPECIAL_MODE_NAME(FEC_LLRS, "LLRS"), }; static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS); diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c index 96f20be64553..f049b97072fe 100644 --- a/net/ethtool/linkmodes.c +++ b/net/ethtool/linkmodes.c @@ -237,6 +237,7 @@ static const struct link_mode_info link_mode_params[] = { __DEFINE_LINK_MODE_PARAMS(400000, LR8_ER8_FR8, Full), __DEFINE_LINK_MODE_PARAMS(400000, DR8, Full), __DEFINE_LINK_MODE_PARAMS(400000, CR8, Full), + __DEFINE_SPECIAL_MODE_PARAMS(FEC_LLRS), }; static const struct nla_policy -- cgit v1.2.3 From fff7b64355eac6e29b50229ad1512315bc04b44e Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Mon, 17 Feb 2020 19:04:31 -0800 Subject: bpf: Add bpf_read_branch_records() helper Branch records are a CPU feature that can be configured to record certain branches that are taken during code execution. This data is particularly interesting for profile guided optimizations. perf has had branch record support for a while but the data collection can be a bit coarse grained. We (Facebook) have seen in experiments that associating metadata with branch records can improve results (after postprocessing). We generally use bpf_probe_read_*() to get metadata out of userspace. That's why bpf support for branch records is useful. Aside from this particular use case, having branch data available to bpf progs can be useful to get stack traces out of userspace applications that omit frame pointers. Signed-off-by: Daniel Xu Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200218030432.4600-2-dxu@dxuuu.xyz --- include/uapi/linux/bpf.h | 25 ++++++++++++++++++++++++- kernel/trace/bpf_trace.c | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index f1d74a2bd234..a7e59756853f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2892,6 +2892,25 @@ union bpf_attr { * Obtain the 64bit jiffies * Return * The 64 bit jiffies + * + * int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags) + * Description + * For an eBPF program attached to a perf event, retrieve the + * branch records (struct perf_branch_entry) associated to *ctx* + * and store it in the buffer pointed by *buf* up to size + * *size* bytes. + * Return + * On success, number of bytes written to *buf*. On error, a + * negative value. + * + * The *flags* can be set to **BPF_F_GET_BRANCH_RECORDS_SIZE** to + * instead return the number of bytes required to store all the + * branch entries. If this flag is set, *buf* may be NULL. + * + * **-EINVAL** if arguments invalid or **size** not a multiple + * of sizeof(struct perf_branch_entry). + * + * **-ENOENT** if architecture does not support branch records. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3012,7 +3031,8 @@ union bpf_attr { FN(probe_read_kernel_str), \ FN(tcp_send_ack), \ FN(send_signal_thread), \ - FN(jiffies64), + FN(jiffies64), \ + FN(read_branch_records), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -3091,6 +3111,9 @@ enum bpf_func_id { /* BPF_FUNC_sk_storage_get flags */ #define BPF_SK_STORAGE_GET_F_CREATE (1ULL << 0) +/* BPF_FUNC_read_branch_records flags. */ +#define BPF_F_GET_BRANCH_RECORDS_SIZE (1ULL << 0) + /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 4ddd5ac46094..b8661bd0d028 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1028,6 +1028,45 @@ static const struct bpf_func_proto bpf_perf_prog_read_value_proto = { .arg3_type = ARG_CONST_SIZE, }; +BPF_CALL_4(bpf_read_branch_records, struct bpf_perf_event_data_kern *, ctx, + void *, buf, u32, size, u64, flags) +{ +#ifndef CONFIG_X86 + return -ENOENT; +#else + static const u32 br_entry_size = sizeof(struct perf_branch_entry); + struct perf_branch_stack *br_stack = ctx->data->br_stack; + u32 to_copy; + + if (unlikely(flags & ~BPF_F_GET_BRANCH_RECORDS_SIZE)) + return -EINVAL; + + if (unlikely(!br_stack)) + return -EINVAL; + + if (flags & BPF_F_GET_BRANCH_RECORDS_SIZE) + return br_stack->nr * br_entry_size; + + if (!buf || (size % br_entry_size != 0)) + return -EINVAL; + + to_copy = min_t(u32, br_stack->nr * br_entry_size, size); + memcpy(buf, br_stack->entries, to_copy); + + return to_copy; +#endif +} + +static const struct bpf_func_proto bpf_read_branch_records_proto = { + .func = bpf_read_branch_records, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM_OR_NULL, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, + .arg4_type = ARG_ANYTHING, +}; + static const struct bpf_func_proto * pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { @@ -1040,6 +1079,8 @@ pe_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_get_stack_proto_tp; case BPF_FUNC_perf_prog_read_value: return &bpf_perf_prog_read_value_proto; + case BPF_FUNC_read_branch_records: + return &bpf_read_branch_records_proto; default: return tracing_func_proto(func_id, prog); } -- cgit v1.2.3 From 8d74a623cc3cecda89da628b8f3d115d8cf1ee8f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 24 Feb 2020 10:19:12 +0100 Subject: Revert "nl80211: add src and dst addr attributes for control port tx/rx" This reverts commit 8c3ed7aa2b9ef666195b789e9b02e28383243fa8. As Jouni points out, there's really no need for this, since the RSN pre-authentication frames are normal data frames, not port control frames (locally). We can still revert this now since it hasn't actually gone beyond -next. Fixes: 8c3ed7aa2b9e ("nl80211: add src and dst addr attributes for control port tx/rx") Signed-off-by: Johannes Berg Link: https://lore.kernel.org/r/20200224101910.b746e263287a.I9eb15d6895515179d50964dec3550c9dc784bb93@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 +-- include/uapi/linux/nl80211.h | 16 +--------------- net/mac80211/ieee80211_i.h | 3 +-- net/mac80211/tx.c | 3 +-- net/wireless/nl80211.c | 18 +++--------------- net/wireless/rdev-ops.h | 8 ++++---- net/wireless/trace.h | 27 ++++++++++----------------- 7 files changed, 21 insertions(+), 57 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 7ea23caa7301..089ffdd6dba5 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3974,8 +3974,7 @@ struct cfg80211_ops { int (*tx_control_port)(struct wiphy *wiphy, struct net_device *dev, const u8 *buf, size_t len, - const u8 *dest, const u8 *src, - const __be16 proto, + const u8 *dest, const __be16 proto, const bool noencrypt); int (*get_ftm_responder_stats)(struct wiphy *wiphy, diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 158bccb4a47b..350ab86fe20e 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1039,14 +1039,11 @@ * a control port frame and as a notification that a control port frame * has been received. %NL80211_ATTR_FRAME is used to specify the * frame contents. The frame is the raw EAPoL data, without ethernet or - * 802.11 headers. An optional %NL80211_ATTR_SRC_MAC can be used to send - * pre-auth frames to STAs on behalf of other APs. + * 802.11 headers. * When used as an event indication %NL80211_ATTR_CONTROL_PORT_ETHERTYPE, * %NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT and %NL80211_ATTR_MAC are added * indicating the protocol type of the received frame; whether the frame * was received unencrypted and the MAC address of the peer respectively. - * %NL80211_ATTR_DST_MAC can be used to forward pre-auth frames in - * userspace while using AP mode. * * @NL80211_CMD_RELOAD_REGDB: Request that the regdb firmware file is reloaded. * @@ -2412,9 +2409,6 @@ enum nl80211_commands { * %NL80211_ATTR_AKM_SUITES are default capabilities if AKM suites not * advertised for a specific interface type. * - * @NL80211_ATTR_SRC_MAC: MAC address used in control port over nl80211 transmit - * @NL80211_ATTR_DST_MAC: MAC address used in control port over nl80211 receive - * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2883,9 +2877,6 @@ enum nl80211_attrs { NL80211_ATTR_IFTYPE_AKM_SUITES, - NL80211_ATTR_SRC_MAC, - NL80211_ATTR_DST_MAC, - /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -5548,10 +5539,6 @@ enum nl80211_feature_flags { * feature, which prevents bufferbloat by using the expected transmission * time to limit the amount of data buffered in the hardware. * - * @NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_MAC_ADDRS: The driver - * can use src and dst MAC addresses with control port over nl80211 rx - * and tx operations. - * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -5599,7 +5586,6 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_SAE_OFFLOAD, NL80211_EXT_FEATURE_VLAN_OFFLOAD, NL80211_EXT_FEATURE_AQL, - NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_MAC_ADDRS, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index da9eaa9ee37e..8a49d78ad7c9 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1792,8 +1792,7 @@ void ieee80211_check_fast_xmit_iface(struct ieee80211_sub_if_data *sdata); void ieee80211_clear_fast_xmit(struct sta_info *sta); int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, const u8 *buf, size_t len, - const u8 *dest, const u8 *src, __be16 proto, - bool unencrypted); + const u8 *dest, __be16 proto, bool unencrypted); int ieee80211_probe_mesh_link(struct wiphy *wiphy, struct net_device *dev, const u8 *buf, size_t len); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 2645a39cce88..cddaacaa31a3 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -5283,8 +5283,7 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, const u8 *buf, size_t len, - const u8 *dest, const u8 *src, __be16 proto, - bool unencrypted) + const u8 *dest, __be16 proto, bool unencrypted) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index f0112dabe21e..d795552f97b2 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -634,8 +634,6 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_HE_OBSS_PD] = NLA_POLICY_NESTED(he_obss_pd_policy), [NL80211_ATTR_VLAN_ID] = NLA_POLICY_RANGE(NLA_U16, 1, VLAN_N_VID - 2), [NL80211_ATTR_HE_BSS_COLOR] = NLA_POLICY_NESTED(he_bss_color_policy), - [NL80211_ATTR_SRC_MAC] = NLA_POLICY_ETH_ADDR, - [NL80211_ATTR_DST_MAC] = NLA_POLICY_ETH_ADDR, }; /* policy for the key attributes */ @@ -13698,7 +13696,6 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info) const u8 *buf; size_t len; u8 *dest; - u8 src[ETH_ALEN]; u16 proto; bool noencrypt; int err; @@ -13736,13 +13733,6 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info) goto out; } - /* copy src address under wdev_lock, as we may copy wdev_address */ - if (info->attrs[NL80211_ATTR_SRC_MAC]) - ether_addr_copy(src, - nla_data(info->attrs[NL80211_ATTR_SRC_MAC])); - else - ether_addr_copy(src, wdev_address(wdev)); - wdev_unlock(wdev); buf = nla_data(info->attrs[NL80211_ATTR_FRAME]); @@ -13753,7 +13743,7 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info) nla_get_flag(info->attrs[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT]); return rdev_tx_control_port(rdev, dev, buf, len, - dest, src, cpu_to_be16(proto), noencrypt); + dest, cpu_to_be16(proto), noencrypt); out: wdev_unlock(wdev); @@ -16010,8 +16000,7 @@ static int __nl80211_rx_control_port(struct net_device *dev, struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct ethhdr *ehdr = eth_hdr(skb); - const u8 *daddr = ehdr->h_dest; - const u8 *saddr = ehdr->h_source; + const u8 *addr = ehdr->h_source; u16 proto = be16_to_cpu(skb->protocol); struct sk_buff *msg; void *hdr; @@ -16036,8 +16025,7 @@ static int __nl80211_rx_control_port(struct net_device *dev, nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), NL80211_ATTR_PAD) || - nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, saddr) || - nla_put(msg, NL80211_ATTR_DST_MAC, ETH_ALEN, daddr) || + nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, addr) || nla_put_u16(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE, proto) || (unencrypted && nla_put_flag(msg, NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT))) diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 5ea34c1b60fe..e0d34f796d0b 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -734,14 +734,14 @@ static inline int rdev_mgmt_tx(struct cfg80211_registered_device *rdev, static inline int rdev_tx_control_port(struct cfg80211_registered_device *rdev, struct net_device *dev, const void *buf, size_t len, - const u8 *dest, const u8 *src, - __be16 proto, const bool noencrypt) + const u8 *dest, __be16 proto, + const bool noencrypt) { int ret; trace_rdev_tx_control_port(&rdev->wiphy, dev, buf, len, - dest, src, proto, noencrypt); + dest, proto, noencrypt); ret = rdev->ops->tx_control_port(&rdev->wiphy, dev, buf, len, - dest, src, proto, noencrypt); + dest, proto, noencrypt); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index b6b60e3aea41..3ef1679b0e66 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1928,31 +1928,27 @@ TRACE_EVENT(rdev_mgmt_tx, TRACE_EVENT(rdev_tx_control_port, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, - const u8 *buf, size_t len, - const u8 *dest, const u8 *src, __be16 proto, + const u8 *buf, size_t len, const u8 *dest, __be16 proto, bool unencrypted), - TP_ARGS(wiphy, netdev, buf, len, dest, src, proto, unencrypted), + TP_ARGS(wiphy, netdev, buf, len, dest, proto, unencrypted), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(dest) - MAC_ENTRY(src) - __field(u16, proto) + __field(__be16, proto) __field(bool, unencrypted) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(dest, dest); - MAC_ASSIGN(src, src); - __entry->proto = be16_to_cpu(proto); + __entry->proto = proto; __entry->unencrypted = unencrypted; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", dest: " MAC_PR_FMT - ", src: " MAC_PR_FMT ", proto: 0x%x, unencrypted: %s", - WIPHY_PR_ARG, NETDEV_PR_ARG, - MAC_PR_ARG(dest), MAC_PR_ARG(src), - __entry->proto, + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT "," + " proto: 0x%x, unencrypted: %s", + WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(dest), + be16_to_cpu(__entry->proto), BOOL_TO_STR(__entry->unencrypted)) ); @@ -2844,7 +2840,6 @@ TRACE_EVENT(cfg80211_rx_control_port, TP_STRUCT__entry( NETDEV_ENTRY __field(int, len) - MAC_ENTRY(to) MAC_ENTRY(from) __field(u16, proto) __field(bool, unencrypted) @@ -2852,14 +2847,12 @@ TRACE_EVENT(cfg80211_rx_control_port, TP_fast_assign( NETDEV_ASSIGN; __entry->len = skb->len; - MAC_ASSIGN(to, eth_hdr(skb)->h_dest); MAC_ASSIGN(from, eth_hdr(skb)->h_source); __entry->proto = be16_to_cpu(skb->protocol); __entry->unencrypted = unencrypted; ), - TP_printk(NETDEV_PR_FMT ", len=%d, dest: " MAC_PR_FMT - ", src: " MAC_PR_FMT ", proto: 0x%x, unencrypted: %s", - NETDEV_PR_ARG, __entry->len, MAC_PR_ARG(to), MAC_PR_ARG(from), + TP_printk(NETDEV_PR_FMT ", len=%d, " MAC_PR_FMT ", proto: 0x%x, unencrypted: %s", + NETDEV_PR_ARG, __entry->len, MAC_PR_ARG(from), __entry->proto, BOOL_TO_STR(__entry->unencrypted)) ); -- cgit v1.2.3 From 56be393fa8b40db2d4f54f97614f645eb8d3c32e Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Sat, 22 Feb 2020 15:25:43 +0200 Subject: cfg80211: Support key configuration for Beacon protection (BIGTK) IEEE P802.11-REVmd/D3.0 adds support for protecting Beacon frames using a new set of keys (BIGTK; key index 6..7) similarly to the way group-addressed Robust Management frames are protected (IGTK; key index 4..5). Extend cfg80211 and nl80211 to allow the new BIGTK to be configured. Add an extended feature flag to indicate driver support for the new key index values to avoid array overflows in driver implementations and also to indicate to user space when this functionality is available. Signed-off-by: Jouni Malinen Link: https://lore.kernel.org/r/20200222132548.20835-2-jouni@codeaurora.org Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 5 ++++ include/uapi/linux/nl80211.h | 6 +++++ net/wireless/nl80211.c | 56 +++++++++++++++++++++++++++++++++++--------- net/wireless/rdev-ops.h | 13 ++++++++++ net/wireless/sme.c | 11 +++++++-- net/wireless/trace.h | 17 ++++++++++++++ net/wireless/util.c | 7 +++++- 7 files changed, 101 insertions(+), 14 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 682ee4dd6963..04b6df15706c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3369,6 +3369,8 @@ struct cfg80211_update_owe_info { * @set_default_key: set the default key on an interface * * @set_default_mgmt_key: set the default management frame key on an interface + + * @set_default_beacon_key: set the default Beacon frame key on an interface * * @set_rekey_data: give the data necessary for GTK rekeying to the driver * @@ -3702,6 +3704,9 @@ struct cfg80211_ops { int (*set_default_mgmt_key)(struct wiphy *wiphy, struct net_device *netdev, u8 key_index); + int (*set_default_beacon_key)(struct wiphy *wiphy, + struct net_device *netdev, + u8 key_index); int (*start_ap)(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_ap_settings *settings); diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 350ab86fe20e..934e62fe8705 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4550,6 +4550,7 @@ enum nl80211_key_default_types { * See &enum nl80211_key_default_types. * @NL80211_KEY_MODE: the mode from enum nl80211_key_mode. * Defaults to @NL80211_KEY_RX_TX. + * @NL80211_KEY_DEFAULT_BEACON: flag indicating default Beacon frame key * * @__NL80211_KEY_AFTER_LAST: internal * @NL80211_KEY_MAX: highest key attribute @@ -4565,6 +4566,7 @@ enum nl80211_key_attributes { NL80211_KEY_TYPE, NL80211_KEY_DEFAULT_TYPES, NL80211_KEY_MODE, + NL80211_KEY_DEFAULT_BEACON, /* keep last */ __NL80211_KEY_AFTER_LAST, @@ -5539,6 +5541,9 @@ enum nl80211_feature_flags { * feature, which prevents bufferbloat by using the expected transmission * time to limit the amount of data buffered in the hardware. * + * @NL80211_EXT_FEATURE_BEACON_PROTECTION: The driver supports Beacon protection + * and can receive key configuration for BIGTK using key indexes 6 and 7. + * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. */ @@ -5586,6 +5591,7 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_SAE_OFFLOAD, NL80211_EXT_FEATURE_VLAN_OFFLOAD, NL80211_EXT_FEATURE_AQL, + NL80211_EXT_FEATURE_BEACON_PROTECTION, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ce55a2c05fe9..a75f72288139 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -368,7 +368,7 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_KEY] = { .type = NLA_NESTED, }, [NL80211_ATTR_KEY_DATA] = { .type = NLA_BINARY, .len = WLAN_MAX_KEY_LEN }, - [NL80211_ATTR_KEY_IDX] = NLA_POLICY_MAX(NLA_U8, 5), + [NL80211_ATTR_KEY_IDX] = NLA_POLICY_MAX(NLA_U8, 7), [NL80211_ATTR_KEY_CIPHER] = { .type = NLA_U32 }, [NL80211_ATTR_KEY_DEFAULT] = { .type = NLA_FLAG }, [NL80211_ATTR_KEY_SEQ] = { .type = NLA_BINARY, .len = 16 }, @@ -1037,7 +1037,7 @@ struct key_parse { struct key_params p; int idx; int type; - bool def, defmgmt; + bool def, defmgmt, defbeacon; bool def_uni, def_multi; }; @@ -1053,12 +1053,13 @@ static int nl80211_parse_key_new(struct genl_info *info, struct nlattr *key, k->def = !!tb[NL80211_KEY_DEFAULT]; k->defmgmt = !!tb[NL80211_KEY_DEFAULT_MGMT]; + k->defbeacon = !!tb[NL80211_KEY_DEFAULT_BEACON]; if (k->def) { k->def_uni = true; k->def_multi = true; } - if (k->defmgmt) + if (k->defmgmt || k->defbeacon) k->def_multi = true; if (tb[NL80211_KEY_IDX]) @@ -1165,14 +1166,17 @@ static int nl80211_parse_key(struct genl_info *info, struct key_parse *k) if (err) return err; - if (k->def && k->defmgmt) { - GENL_SET_ERR_MSG(info, "key with def && defmgmt is invalid"); + if ((k->def ? 1 : 0) + (k->defmgmt ? 1 : 0) + + (k->defbeacon ? 1 : 0) > 1) { + GENL_SET_ERR_MSG(info, + "key with multiple default flags is invalid"); return -EINVAL; } - if (k->defmgmt) { + if (k->defmgmt || k->defbeacon) { if (k->def_uni || !k->def_multi) { - GENL_SET_ERR_MSG(info, "defmgmt key must be mcast"); + GENL_SET_ERR_MSG(info, + "defmgmt/defbeacon key must be mcast"); return -EINVAL; } } @@ -1184,14 +1188,20 @@ static int nl80211_parse_key(struct genl_info *info, struct key_parse *k) "defmgmt key idx not 4 or 5"); return -EINVAL; } + } else if (k->defbeacon) { + if (k->idx < 6 || k->idx > 7) { + GENL_SET_ERR_MSG(info, + "defbeacon key idx not 6 or 7"); + return -EINVAL; + } } else if (k->def) { if (k->idx < 0 || k->idx > 3) { GENL_SET_ERR_MSG(info, "def key idx not 0-3"); return -EINVAL; } } else { - if (k->idx < 0 || k->idx > 5) { - GENL_SET_ERR_MSG(info, "key idx not 0-5"); + if (k->idx < 0 || k->idx > 7) { + GENL_SET_ERR_MSG(info, "key idx not 0-7"); return -EINVAL; } } @@ -3817,8 +3827,14 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) void *hdr; struct sk_buff *msg; - if (info->attrs[NL80211_ATTR_KEY_IDX]) + if (info->attrs[NL80211_ATTR_KEY_IDX]) { key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]); + if (key_idx > 5 && + !wiphy_ext_feature_isset( + &rdev->wiphy, + NL80211_EXT_FEATURE_BEACON_PROTECTION)) + return -EINVAL; + } if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); @@ -3894,7 +3910,7 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) /* Only support setting default key and * Extended Key ID action NL80211_KEY_SET_TX. */ - if (!key.def && !key.defmgmt && + if (!key.def && !key.defmgmt && !key.defbeacon && !(key.p.mode == NL80211_KEY_SET_TX)) return -EINVAL; @@ -3941,6 +3957,24 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) #ifdef CONFIG_CFG80211_WEXT dev->ieee80211_ptr->wext.default_mgmt_key = key.idx; #endif + } else if (key.defbeacon) { + if (key.def_uni || !key.def_multi) { + err = -EINVAL; + goto out; + } + + if (!rdev->ops->set_default_beacon_key) { + err = -EOPNOTSUPP; + goto out; + } + + err = nl80211_key_allowed(dev->ieee80211_ptr); + if (err) + goto out; + + err = rdev_set_default_beacon_key(rdev, dev, key.idx); + if (err) + goto out; } else if (key.p.mode == NL80211_KEY_SET_TX && wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_EXT_KEY_ID)) { diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index e0d34f796d0b..af7fcf2a3b4a 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -136,6 +136,19 @@ rdev_set_default_mgmt_key(struct cfg80211_registered_device *rdev, return ret; } +static inline int +rdev_set_default_beacon_key(struct cfg80211_registered_device *rdev, + struct net_device *netdev, u8 key_index) +{ + int ret; + + trace_rdev_set_default_beacon_key(&rdev->wiphy, netdev, key_index); + ret = rdev->ops->set_default_beacon_key(&rdev->wiphy, netdev, + key_index); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + static inline int rdev_start_ap(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_ap_settings *settings) diff --git a/net/wireless/sme.c b/net/wireless/sme.c index d32a2ec4d96a..ac3e60aa1fc8 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -1111,9 +1111,16 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, * Delete all the keys ... pairwise keys can't really * exist any more anyway, but default keys might. */ - if (rdev->ops->del_key) - for (i = 0; i < 6; i++) + if (rdev->ops->del_key) { + int max_key_idx = 5; + + if (wiphy_ext_feature_isset( + wdev->wiphy, + NL80211_EXT_FEATURE_BEACON_PROTECTION)) + max_key_idx = 7; + for (i = 0; i <= max_key_idx; i++) rdev_del_key(rdev, dev, i, false, NULL); + } rdev_set_qos_map(rdev, dev, NULL); diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 3ef1679b0e66..56b78222746c 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -510,6 +510,23 @@ TRACE_EVENT(rdev_set_default_mgmt_key, WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->key_index) ); +TRACE_EVENT(rdev_set_default_beacon_key, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 key_index), + TP_ARGS(wiphy, netdev, key_index), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + __field(u8, key_index) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + __entry->key_index = key_index; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", key index: %u", + WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->key_index) +); + TRACE_EVENT(rdev_start_ap, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_ap_settings *settings), diff --git a/net/wireless/util.c b/net/wireless/util.c index 8481e9ac33da..72926f87c913 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -231,7 +231,12 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, struct key_params *params, int key_idx, bool pairwise, const u8 *mac_addr) { - if (key_idx < 0 || key_idx > 5) + int max_key_idx = 5; + + if (wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_BEACON_PROTECTION)) + max_key_idx = 7; + if (key_idx < 0 || key_idx > max_key_idx) return -EINVAL; if (!pairwise && mac_addr && !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)) -- cgit v1.2.3 From 77f576deaa393b54a0f2ca8ab1ab5b2d3c6b971b Mon Sep 17 00:00:00 2001 From: Tamizh chelvam Date: Mon, 20 Jan 2020 13:21:22 +0530 Subject: nl80211: Add NL command to support TID speicific configurations Add the new NL80211_CMD_SET_TID_CONFIG command to support data TID specific configuration. Per TID configuration is passed in the nested NL80211_ATTR_TID_CONFIG attribute. This patch adds support to configure per TID noack policy through the NL80211_TID_CONFIG_ATTR_NOACK attribute. Signed-off-by: Tamizh chelvam Link: https://lore.kernel.org/r/1579506687-18296-2-git-send-email-tamizhr@codeaurora.org Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 40 ++++++++++++ include/uapi/linux/nl80211.h | 71 +++++++++++++++++++++ net/wireless/nl80211.c | 148 +++++++++++++++++++++++++++++++++++++++++++ net/wireless/rdev-ops.h | 24 +++++++ net/wireless/trace.h | 37 +++++++++++ 5 files changed, 320 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 04b6df15706c..65a8bf73d21e 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -626,6 +626,38 @@ struct cfg80211_chan_def { struct ieee80211_edmg edmg; }; +enum ieee80211_tid_conf_mask { + IEEE80211_TID_CONF_NOACK = BIT(NL80211_TID_CONFIG_ATTR_NOACK), +}; + +/** + * struct ieee80211_tid_cfg - TID specific configuration + * @config_override: Flag to notify driver to reset TID configuration + * of the peer. + * @tid: TID number + * @tid_conf_mask: bitmap indicating which parameter changed + * see &enum ieee80211_tid_conf_mask + * @noack: noack configuration value for the TID + */ +struct ieee80211_tid_cfg { + bool config_override; + u8 tid; + u32 tid_conf_mask; + enum nl80211_tid_config noack; +}; + +/** + * struct ieee80211_tid_config - TID configuration + * @peer: Station's MAC address + * @n_tid_conf: Number of TID specific configurations to be applied + * @tid_conf: Configuration change info + */ +struct ieee80211_tid_config { + const u8 *peer; + u32 n_tid_conf; + struct ieee80211_tid_cfg tid_conf[]; +}; + /** * cfg80211_get_chandef_type - return old channel type from chandef * @chandef: the channel definition @@ -3671,6 +3703,10 @@ struct cfg80211_update_owe_info { * * @probe_mesh_link: Probe direct Mesh peer's link quality by sending data frame * and overrule HWMP path selection algorithm. + * @set_tid_config: TID specific configuration, this can be peer or BSS specific + * This callback may sleep. + * @reset_tid_config: Reset TID specific configuration for the peer. + * This callback may sleep. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -3994,6 +4030,10 @@ struct cfg80211_ops { struct cfg80211_update_owe_info *owe_info); int (*probe_mesh_link)(struct wiphy *wiphy, struct net_device *dev, const u8 *buf, size_t len); + int (*set_tid_config)(struct wiphy *wiphy, struct net_device *dev, + struct ieee80211_tid_config *tid_conf); + int (*reset_tid_config)(struct wiphy *wiphy, struct net_device *dev, + const u8 *peer, u8 tid); }; /* diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 934e62fe8705..0b12fcffb518 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -264,6 +264,27 @@ * %NL80211_ATTR_VLAN_ID. */ +/** + * DOC: TID configuration + * + * TID configuration support can be advertised by drivers by setting + * @NL80211_EXT_FEATURE_PER_TID_* and/or @NL80211_EXT_FEATURE_PER_STA_* config + * mentioned in &enum nl80211_tid_config_attr. + * Needed configuration parameters are mentioned in + * &enum nl80211_tid_config_attr and it will be passed using + * %NL80211_CMD_SET_TID_CONFIG through %NL80211_ATTR_TID_CONFIG. + * If the configuration needs to be applied for specific peer then MAC address + * of the peer needs to be passed in %NL80211_ATT_MAC, otherwise the + * configuration will be applied for all the connected peers in the vif except + * the peer which has peer specific configuration for the TID. + * And the peer specific configuration will be overridden if + * %NL80211_TID_CONFIG_ATTR_OVERRIDE flag is set. + * All this configurations are valid only for STA's current connection + * i.e. the configurations will be reset to default when the STA connects back + * after disconnection/roaming, and this configuration will be cleared when + * the interface goes down. + */ + /** * enum nl80211_commands - supported nl80211 commands * @@ -1125,6 +1146,9 @@ * peer MAC address and %NL80211_ATTR_FRAME is used to specify the frame * content. The frame is ethernet data. * + * @NL80211_CMD_SET_TID_CONFIG: Data frame TID specific configuration + * is passed using %NL80211_ATTR_TID_CONFIG attribute. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1349,6 +1373,8 @@ enum nl80211_commands { NL80211_CMD_PROBE_MESH_LINK, + NL80211_CMD_SET_TID_CONFIG, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -2409,6 +2435,9 @@ enum nl80211_commands { * %NL80211_ATTR_AKM_SUITES are default capabilities if AKM suites not * advertised for a specific interface type. * + * @NL80211_ATTR_TID_CONFIG: TID specific configuration in a + * nested attribute with &enum nl80211_tid_config_attr sub-attributes. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2877,6 +2906,8 @@ enum nl80211_attrs { NL80211_ATTR_IFTYPE_AKM_SUITES, + NL80211_ATTR_TID_CONFIG, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -4722,6 +4753,40 @@ enum nl80211_tx_power_setting { NL80211_TX_POWER_FIXED, }; +/** + * enum nl80211_tid_config - TID config state + * @NL80211_TID_CONFIG_ENABLE: Enable config for the TID + * @NL80211_TID_CONFIG_DISABLE: Disable config for the TID + */ +enum nl80211_tid_config { + NL80211_TID_CONFIG_ENABLE, + NL80211_TID_CONFIG_DISABLE, +}; + +/* enum nl80211_tid_config_attr - TID specific configuration. + * @NL80211_TID_CONFIG_ATTR_OVERRIDE: flag attribue, if no peer + * is selected, if set indicates that the new configuration overrides + * all previous peer configurations, otherwise previous peer specific + * configurations should be left untouched. If peer is selected then + * it will reset particular TID configuration of that peer and it will + * not accept other TID config attributes along with peer. + * @NL80211_TID_CONFIG_ATTR_TIDS: a bitmask value of TIDs(bit 0 to 7) + * Its type is u8. + * @NL80211_TID_CONFIG_ATTR_NOACK: Configure ack policy for the TID. + * specified in %NL80211_TID_CONFIG_ATTR_TID. see %enum nl80211_tid_config. + * Its type is u8. + */ +enum nl80211_tid_config_attr { + __NL80211_TID_CONFIG_ATTR_INVALID, + NL80211_TID_CONFIG_ATTR_OVERRIDE, + NL80211_TID_CONFIG_ATTR_TIDS, + NL80211_TID_CONFIG_ATTR_NOACK, + + /* keep last */ + __NL80211_TID_CONFIG_ATTR_AFTER_LAST, + NL80211_TID_CONFIG_ATTR_MAX = __NL80211_TID_CONFIG_ATTR_AFTER_LAST - 1 +}; + /** * enum nl80211_packet_pattern_attr - packet pattern attribute * @__NL80211_PKTPAT_INVALID: invalid number for nested attribute @@ -5540,6 +5605,10 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_AQL: The driver supports the Airtime Queue Limit (AQL) * feature, which prevents bufferbloat by using the expected transmission * time to limit the amount of data buffered in the hardware. + * @NL80211_EXT_FEATURE_PER_TID_NOACK_CONFIG: Driver supports per TID NoAck + * policy functionality. + * @NL80211_EXT_FEATURE_PER_STA_NOACK_CONFIG: Driver supports STA specific NoAck + * policy functionality. * * @NL80211_EXT_FEATURE_BEACON_PROTECTION: The driver supports Beacon protection * and can receive key configuration for BIGTK using key indexes 6 and 7. @@ -5592,6 +5661,8 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_VLAN_OFFLOAD, NL80211_EXT_FEATURE_AQL, NL80211_EXT_FEATURE_BEACON_PROTECTION, + NL80211_EXT_FEATURE_PER_TID_NOACK_CONFIG, + NL80211_EXT_FEATURE_PER_STA_NOACK_CONFIG, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a75f72288139..a0839fae6555 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -328,6 +328,14 @@ he_bss_color_policy[NL80211_HE_BSS_COLOR_ATTR_MAX + 1] = { [NL80211_HE_BSS_COLOR_ATTR_PARTIAL] = { .type = NLA_FLAG }, }; +static const struct nla_policy +nl80211_tid_config_attr_policy[NL80211_TID_CONFIG_ATTR_MAX + 1] = { + [NL80211_TID_CONFIG_ATTR_OVERRIDE] = { .type = NLA_FLAG }, + [NL80211_TID_CONFIG_ATTR_TIDS] = { .type = NLA_U8 }, + [NL80211_TID_CONFIG_ATTR_NOACK] = + NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE), +}; + const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [0] = { .strict_start_type = NL80211_ATTR_HE_OBSS_PD }, [NL80211_ATTR_WIPHY] = { .type = NLA_U32 }, @@ -634,6 +642,8 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_HE_OBSS_PD] = NLA_POLICY_NESTED(he_obss_pd_policy), [NL80211_ATTR_VLAN_ID] = NLA_POLICY_RANGE(NLA_U16, 1, VLAN_N_VID - 2), [NL80211_ATTR_HE_BSS_COLOR] = NLA_POLICY_NESTED(he_bss_color_policy), + [NL80211_ATTR_TID_CONFIG] = + NLA_POLICY_NESTED_ARRAY(nl80211_tid_config_attr_policy), }; /* policy for the key attributes */ @@ -13934,6 +13944,137 @@ static int nl80211_probe_mesh_link(struct sk_buff *skb, struct genl_info *info) return rdev_probe_mesh_link(rdev, dev, dest, buf, len); } +static int +__nl80211_check_tid_conf_support(struct cfg80211_registered_device *rdev, + struct netlink_ext_ack *extack, + const u8 *peer, struct nlattr *attrs[], + struct ieee80211_tid_cfg *tid_conf, + enum nl80211_tid_config_attr attr, + enum nl80211_ext_feature_index per_tid_config, + enum nl80211_ext_feature_index per_sta_config) +{ + if (!wiphy_ext_feature_isset(&rdev->wiphy, per_tid_config)) { + NL_SET_ERR_MSG_ATTR(extack, attrs[attr], + "TID specific configuration not supported"); + return -ENOTSUPP; + } + + if (peer && !wiphy_ext_feature_isset(&rdev->wiphy, per_sta_config)) { + NL_SET_ERR_MSG_ATTR(extack, attrs[attr], + "peer specific TID configuration not supported"); + return -ENOTSUPP; + } + + tid_conf->tid_conf_mask |= BIT(attr); + return 0; +} + +#define nl80211_check_tid_config_support(rdev, extack, peer, attrs, tid_conf, \ + conf) \ + __nl80211_check_tid_conf_support(rdev, extack, peer, attrs, tid_conf, \ + NL80211_TID_CONFIG_ATTR_##conf, \ + NL80211_EXT_FEATURE_PER_TID_##conf##_CONFIG, \ + NL80211_EXT_FEATURE_PER_STA_##conf##_CONFIG) + +static int parse_tid_conf(struct cfg80211_registered_device *rdev, + struct nlattr *attrs[], struct net_device *dev, + struct ieee80211_tid_cfg *tid_conf, + struct genl_info *info, const u8 *peer) +{ + struct netlink_ext_ack *extack = info->extack; + int err; + + if (!attrs[NL80211_TID_CONFIG_ATTR_TIDS]) + return -EINVAL; + + tid_conf->config_override = + nla_get_flag(attrs[NL80211_TID_CONFIG_ATTR_OVERRIDE]); + tid_conf->tid = nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_TIDS]); + + if (tid_conf->config_override) { + if (rdev->ops->reset_tid_config) { + err = rdev_reset_tid_config(rdev, dev, peer, + tid_conf->tid); + /* If peer is there no other configuration will be + * allowed + */ + if (err || peer) + return err; + } else { + return -EINVAL; + } + } + + if (attrs[NL80211_TID_CONFIG_ATTR_NOACK]) { + err = nl80211_check_tid_config_support(rdev, extack, peer, + attrs, tid_conf, + NOACK); + if (err) + return err; + + tid_conf->noack = + nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_NOACK]); + } + + return 0; +} + +static int nl80211_set_tid_config(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct nlattr *attrs[NL80211_TID_CONFIG_ATTR_MAX + 1]; + struct net_device *dev = info->user_ptr[1]; + struct ieee80211_tid_config *tid_config; + struct nlattr *tid; + int conf_idx = 0, rem_conf; + int ret = -EINVAL; + u32 num_conf = 0; + + if (!info->attrs[NL80211_ATTR_TID_CONFIG]) + return -EINVAL; + + if (!rdev->ops->set_tid_config) + return -EOPNOTSUPP; + + nla_for_each_nested(tid, info->attrs[NL80211_ATTR_TID_CONFIG], + rem_conf) + num_conf++; + + tid_config = kzalloc(struct_size(tid_config, tid_conf, num_conf), + GFP_KERNEL); + if (!tid_config) + return -ENOMEM; + + tid_config->n_tid_conf = num_conf; + + if (info->attrs[NL80211_ATTR_MAC]) + tid_config->peer = nla_data(info->attrs[NL80211_ATTR_MAC]); + + nla_for_each_nested(tid, info->attrs[NL80211_ATTR_TID_CONFIG], + rem_conf) { + ret = nla_parse_nested(attrs, NL80211_TID_CONFIG_ATTR_MAX, + tid, NULL, NULL); + + if (ret) + goto bad_tid_conf; + + ret = parse_tid_conf(rdev, attrs, dev, + &tid_config->tid_conf[conf_idx], + info, tid_config->peer); + if (ret) + goto bad_tid_conf; + + conf_idx++; + } + + ret = rdev_set_tid_config(rdev, dev, tid_config); + +bad_tid_conf: + kfree(tid_config); + return ret; +} + #define NL80211_FLAG_NEED_WIPHY 0x01 #define NL80211_FLAG_NEED_NETDEV 0x02 #define NL80211_FLAG_NEED_RTNL 0x04 @@ -14888,6 +15029,13 @@ static const struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_NETDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_SET_TID_CONFIG, + .doit = nl80211_set_tid_config, + .flags = GENL_UNS_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV | + NL80211_FLAG_NEED_RTNL, + }, }; static struct genl_family nl80211_fam __ro_after_init = { diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index af7fcf2a3b4a..a754e0496b6c 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1326,4 +1326,28 @@ rdev_probe_mesh_link(struct cfg80211_registered_device *rdev, return ret; } +static inline int rdev_set_tid_config(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct ieee80211_tid_config *tid_conf) +{ + int ret; + + trace_rdev_set_tid_config(&rdev->wiphy, dev, tid_conf); + ret = rdev->ops->set_tid_config(&rdev->wiphy, dev, tid_conf); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + +static inline int rdev_reset_tid_config(struct cfg80211_registered_device *rdev, + struct net_device *dev, const u8 *peer, + u8 tid) +{ + int ret; + + trace_rdev_reset_tid_config(&rdev->wiphy, dev, peer, tid); + ret = rdev->ops->reset_tid_config(&rdev->wiphy, dev, peer, tid); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + #endif /* __CFG80211_RDEV_OPS */ diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 56b78222746c..167b18896cd6 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -3480,6 +3480,43 @@ TRACE_EVENT(rdev_probe_mesh_link, WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(dest)) ); +TRACE_EVENT(rdev_set_tid_config, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + struct ieee80211_tid_config *tid_conf), + TP_ARGS(wiphy, netdev, tid_conf), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + MAC_ENTRY(peer) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + MAC_ASSIGN(peer, tid_conf->peer); + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: " MAC_PR_FMT, + WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer)) +); + +TRACE_EVENT(rdev_reset_tid_config, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + const u8 *peer, u8 tid), + TP_ARGS(wiphy, netdev, peer, tid), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + MAC_ENTRY(peer) + __field(u8, tid) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + MAC_ASSIGN(peer, peer); + __entry->tid = tid; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: " MAC_PR_FMT ", tid: %u", + WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer), __entry->tid) +); #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3 From 3710a8a6284f58a78ba4fe9c4b6672207636a223 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 24 Feb 2020 11:34:25 +0100 Subject: nl80211: modify TID-config API Make some changes to the TID-config API: * use u16 in nl80211 (only, and restrict to using 8 bits for now), to avoid issues in the future if we ever want to use higher TIDs. * reject empty TIDs mask (via netlink policy) * change feature advertising to not use extended feature flags but have own mechanism for this, which simplifies the code * fix all variable names from 'tid' to 'tids' since it's a mask * change to cfg80211_ name prefixes, not ieee80211_ * fix some minor docs/spelling things. Change-Id: Ia234d464b3f914cdeab82f540e018855be580dce Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 43 +++++++++++-------- include/uapi/linux/nl80211.h | 51 +++++++++++++---------- net/wireless/nl80211.c | 99 +++++++++++++++++++++++++------------------- net/wireless/rdev-ops.h | 8 ++-- net/wireless/trace.h | 14 +++---- 5 files changed, 121 insertions(+), 94 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 65a8bf73d21e..bbe4acef729d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -626,36 +626,32 @@ struct cfg80211_chan_def { struct ieee80211_edmg edmg; }; -enum ieee80211_tid_conf_mask { - IEEE80211_TID_CONF_NOACK = BIT(NL80211_TID_CONFIG_ATTR_NOACK), -}; - /** - * struct ieee80211_tid_cfg - TID specific configuration + * struct cfg80211_tid_cfg - TID specific configuration * @config_override: Flag to notify driver to reset TID configuration * of the peer. - * @tid: TID number - * @tid_conf_mask: bitmap indicating which parameter changed - * see &enum ieee80211_tid_conf_mask + * @tids: bitmap of TIDs to modify + * @mask: bitmap of attributes indicating which parameter changed, + * similar to &nl80211_tid_config_supp. * @noack: noack configuration value for the TID */ -struct ieee80211_tid_cfg { +struct cfg80211_tid_cfg { bool config_override; - u8 tid; - u32 tid_conf_mask; + u8 tids; + u32 mask; enum nl80211_tid_config noack; }; /** - * struct ieee80211_tid_config - TID configuration + * struct cfg80211_tid_config - TID configuration * @peer: Station's MAC address * @n_tid_conf: Number of TID specific configurations to be applied * @tid_conf: Configuration change info */ -struct ieee80211_tid_config { +struct cfg80211_tid_config { const u8 *peer; u32 n_tid_conf; - struct ieee80211_tid_cfg tid_conf[]; + struct cfg80211_tid_cfg tid_conf[]; }; /** @@ -3705,8 +3701,8 @@ struct cfg80211_update_owe_info { * and overrule HWMP path selection algorithm. * @set_tid_config: TID specific configuration, this can be peer or BSS specific * This callback may sleep. - * @reset_tid_config: Reset TID specific configuration for the peer. - * This callback may sleep. + * @reset_tid_config: Reset TID specific configuration for the peer, for the + * given TIDs. This callback may sleep. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -4031,9 +4027,9 @@ struct cfg80211_ops { int (*probe_mesh_link)(struct wiphy *wiphy, struct net_device *dev, const u8 *buf, size_t len); int (*set_tid_config)(struct wiphy *wiphy, struct net_device *dev, - struct ieee80211_tid_config *tid_conf); + struct cfg80211_tid_config *tid_conf); int (*reset_tid_config)(struct wiphy *wiphy, struct net_device *dev, - const u8 *peer, u8 tid); + const u8 *peer, u8 tids); }; /* @@ -4641,6 +4637,13 @@ struct wiphy_iftype_akm_suites { * @support_mbssid must be set for this to have any effect. * * @pmsr_capa: peer measurement capabilities + * + * @tid_config_support: describes the per-TID config support that the + * device has + * @tid_config_support.vif: bitmap of attributes (configurations) + * supported by the driver for each vif + * @tid_config_support.peer: bitmap of attributes (configurations) + * supported by the driver for each peer */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -4772,6 +4775,10 @@ struct wiphy { const struct cfg80211_pmsr_capabilities *pmsr_capa; + struct { + u64 peer, vif; + } tid_config_support; + char priv[0] __aligned(NETDEV_ALIGN); }; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 0b12fcffb518..591d843eda72 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -267,20 +267,22 @@ /** * DOC: TID configuration * - * TID configuration support can be advertised by drivers by setting - * @NL80211_EXT_FEATURE_PER_TID_* and/or @NL80211_EXT_FEATURE_PER_STA_* config - * mentioned in &enum nl80211_tid_config_attr. - * Needed configuration parameters are mentioned in - * &enum nl80211_tid_config_attr and it will be passed using - * %NL80211_CMD_SET_TID_CONFIG through %NL80211_ATTR_TID_CONFIG. - * If the configuration needs to be applied for specific peer then MAC address - * of the peer needs to be passed in %NL80211_ATT_MAC, otherwise the + * TID config support can be checked in the %NL80211_ATTR_TID_CONFIG + * attribute given in wiphy capabilities. + * + * The necessary configuration parameters are mentioned in + * &enum nl80211_tid_config_attr and it will be passed to the + * %NL80211_CMD_SET_TID_CONFIG command in %NL80211_ATTR_TID_CONFIG. + * + * If the configuration needs to be applied for specific peer then the MAC + * address of the peer needs to be passed in %NL80211_ATTR_MAC, otherwise the * configuration will be applied for all the connected peers in the vif except - * the peer which has peer specific configuration for the TID. - * And the peer specific configuration will be overridden if - * %NL80211_TID_CONFIG_ATTR_OVERRIDE flag is set. - * All this configurations are valid only for STA's current connection - * i.e. the configurations will be reset to default when the STA connects back + * any peers that have peer specific configuration for the TID by default; if + * the %NL80211_TID_CONFIG_ATTR_OVERRIDE flag is set, peer specific values + * will be overwritten. + * + * All this configuration is valid only for STA's current connection + * i.e. the configuration will be reset to default when the STA connects back * after disconnection/roaming, and this configuration will be cleared when * the interface goes down. */ @@ -2436,7 +2438,9 @@ enum nl80211_commands { * advertised for a specific interface type. * * @NL80211_ATTR_TID_CONFIG: TID specific configuration in a - * nested attribute with &enum nl80211_tid_config_attr sub-attributes. + * nested attribute with &enum nl80211_tid_config_attr sub-attributes; + * on output (in wiphy attributes) it contains only the feature sub- + * attributes. * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined @@ -4764,20 +4768,29 @@ enum nl80211_tid_config { }; /* enum nl80211_tid_config_attr - TID specific configuration. + * @NL80211_TID_CONFIG_ATTR_PAD: pad attribute for 64-bit values + * @NL80211_TID_CONFIG_ATTR_VIF_SUPP: a bitmap (u64) of attributes supported + * for per-vif configuration; doesn't list the ones that are generic + * (%NL80211_TID_CONFIG_ATTR_TIDS, %NL80211_TID_CONFIG_ATTR_OVERRIDE). + * @NL80211_TID_CONFIG_ATTR_PEER_SUPP: same as the previous per-vif one, but + * per peer instead. * @NL80211_TID_CONFIG_ATTR_OVERRIDE: flag attribue, if no peer * is selected, if set indicates that the new configuration overrides * all previous peer configurations, otherwise previous peer specific * configurations should be left untouched. If peer is selected then * it will reset particular TID configuration of that peer and it will * not accept other TID config attributes along with peer. - * @NL80211_TID_CONFIG_ATTR_TIDS: a bitmask value of TIDs(bit 0 to 7) - * Its type is u8. + * @NL80211_TID_CONFIG_ATTR_TIDS: a bitmask value of TIDs (bit 0 to 7) + * Its type is u16. * @NL80211_TID_CONFIG_ATTR_NOACK: Configure ack policy for the TID. * specified in %NL80211_TID_CONFIG_ATTR_TID. see %enum nl80211_tid_config. * Its type is u8. */ enum nl80211_tid_config_attr { __NL80211_TID_CONFIG_ATTR_INVALID, + NL80211_TID_CONFIG_ATTR_PAD, + NL80211_TID_CONFIG_ATTR_VIF_SUPP, + NL80211_TID_CONFIG_ATTR_PEER_SUPP, NL80211_TID_CONFIG_ATTR_OVERRIDE, NL80211_TID_CONFIG_ATTR_TIDS, NL80211_TID_CONFIG_ATTR_NOACK, @@ -5605,10 +5618,6 @@ enum nl80211_feature_flags { * @NL80211_EXT_FEATURE_AQL: The driver supports the Airtime Queue Limit (AQL) * feature, which prevents bufferbloat by using the expected transmission * time to limit the amount of data buffered in the hardware. - * @NL80211_EXT_FEATURE_PER_TID_NOACK_CONFIG: Driver supports per TID NoAck - * policy functionality. - * @NL80211_EXT_FEATURE_PER_STA_NOACK_CONFIG: Driver supports STA specific NoAck - * policy functionality. * * @NL80211_EXT_FEATURE_BEACON_PROTECTION: The driver supports Beacon protection * and can receive key configuration for BIGTK using key indexes 6 and 7. @@ -5661,8 +5670,6 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_VLAN_OFFLOAD, NL80211_EXT_FEATURE_AQL, NL80211_EXT_FEATURE_BEACON_PROTECTION, - NL80211_EXT_FEATURE_PER_TID_NOACK_CONFIG, - NL80211_EXT_FEATURE_PER_STA_NOACK_CONFIG, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a0839fae6555..56ac851ccee1 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -330,8 +330,10 @@ he_bss_color_policy[NL80211_HE_BSS_COLOR_ATTR_MAX + 1] = { static const struct nla_policy nl80211_tid_config_attr_policy[NL80211_TID_CONFIG_ATTR_MAX + 1] = { + [NL80211_TID_CONFIG_ATTR_VIF_SUPP] = { .type = NLA_U64 }, + [NL80211_TID_CONFIG_ATTR_PEER_SUPP] = { .type = NLA_U64 }, [NL80211_TID_CONFIG_ATTR_OVERRIDE] = { .type = NLA_FLAG }, - [NL80211_TID_CONFIG_ATTR_TIDS] = { .type = NLA_U8 }, + [NL80211_TID_CONFIG_ATTR_TIDS] = NLA_POLICY_RANGE(NLA_U16, 1, 0xff), [NL80211_TID_CONFIG_ATTR_NOACK] = NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE), }; @@ -1957,6 +1959,40 @@ nl80211_put_iftype_akm_suites(struct cfg80211_registered_device *rdev, return 0; } +static int +nl80211_put_tid_config_support(struct cfg80211_registered_device *rdev, + struct sk_buff *msg) +{ + struct nlattr *supp; + + if (!rdev->wiphy.tid_config_support.vif && + !rdev->wiphy.tid_config_support.peer) + return 0; + + supp = nla_nest_start(msg, NL80211_ATTR_TID_CONFIG); + if (!supp) + return -ENOSPC; + + if (rdev->wiphy.tid_config_support.vif && + nla_put_u64_64bit(msg, NL80211_TID_CONFIG_ATTR_VIF_SUPP, + rdev->wiphy.tid_config_support.vif, + NL80211_TID_CONFIG_ATTR_PAD)) + goto fail; + + if (rdev->wiphy.tid_config_support.peer && + nla_put_u64_64bit(msg, NL80211_TID_CONFIG_ATTR_PEER_SUPP, + rdev->wiphy.tid_config_support.peer, + NL80211_TID_CONFIG_ATTR_PAD)) + goto fail; + + nla_nest_end(msg, supp); + + return 0; +fail: + nla_nest_cancel(msg, supp); + return -ENOBUFS; +} + struct nl80211_dump_wiphy_state { s64 filter_wiphy; long start; @@ -2518,6 +2554,9 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev, if (nl80211_put_iftype_akm_suites(rdev, msg)) goto nla_put_failure; + if (nl80211_put_tid_config_support(rdev, msg)) + goto nla_put_failure; + /* done */ state->split_start = 0; break; @@ -13944,44 +13983,13 @@ static int nl80211_probe_mesh_link(struct sk_buff *skb, struct genl_info *info) return rdev_probe_mesh_link(rdev, dev, dest, buf, len); } -static int -__nl80211_check_tid_conf_support(struct cfg80211_registered_device *rdev, - struct netlink_ext_ack *extack, - const u8 *peer, struct nlattr *attrs[], - struct ieee80211_tid_cfg *tid_conf, - enum nl80211_tid_config_attr attr, - enum nl80211_ext_feature_index per_tid_config, - enum nl80211_ext_feature_index per_sta_config) -{ - if (!wiphy_ext_feature_isset(&rdev->wiphy, per_tid_config)) { - NL_SET_ERR_MSG_ATTR(extack, attrs[attr], - "TID specific configuration not supported"); - return -ENOTSUPP; - } - - if (peer && !wiphy_ext_feature_isset(&rdev->wiphy, per_sta_config)) { - NL_SET_ERR_MSG_ATTR(extack, attrs[attr], - "peer specific TID configuration not supported"); - return -ENOTSUPP; - } - - tid_conf->tid_conf_mask |= BIT(attr); - return 0; -} - -#define nl80211_check_tid_config_support(rdev, extack, peer, attrs, tid_conf, \ - conf) \ - __nl80211_check_tid_conf_support(rdev, extack, peer, attrs, tid_conf, \ - NL80211_TID_CONFIG_ATTR_##conf, \ - NL80211_EXT_FEATURE_PER_TID_##conf##_CONFIG, \ - NL80211_EXT_FEATURE_PER_STA_##conf##_CONFIG) - static int parse_tid_conf(struct cfg80211_registered_device *rdev, struct nlattr *attrs[], struct net_device *dev, - struct ieee80211_tid_cfg *tid_conf, + struct cfg80211_tid_cfg *tid_conf, struct genl_info *info, const u8 *peer) { struct netlink_ext_ack *extack = info->extack; + u64 mask; int err; if (!attrs[NL80211_TID_CONFIG_ATTR_TIDS]) @@ -13989,12 +13997,12 @@ static int parse_tid_conf(struct cfg80211_registered_device *rdev, tid_conf->config_override = nla_get_flag(attrs[NL80211_TID_CONFIG_ATTR_OVERRIDE]); - tid_conf->tid = nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_TIDS]); + tid_conf->tids = nla_get_u16(attrs[NL80211_TID_CONFIG_ATTR_TIDS]); if (tid_conf->config_override) { if (rdev->ops->reset_tid_config) { err = rdev_reset_tid_config(rdev, dev, peer, - tid_conf->tid); + tid_conf->tids); /* If peer is there no other configuration will be * allowed */ @@ -14006,16 +14014,21 @@ static int parse_tid_conf(struct cfg80211_registered_device *rdev, } if (attrs[NL80211_TID_CONFIG_ATTR_NOACK]) { - err = nl80211_check_tid_config_support(rdev, extack, peer, - attrs, tid_conf, - NOACK); - if (err) - return err; - + tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_NOACK); tid_conf->noack = nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_NOACK]); } + if (peer) + mask = rdev->wiphy.tid_config_support.peer; + else + mask = rdev->wiphy.tid_config_support.vif; + + if (tid_conf->mask & ~mask) { + NL_SET_ERR_MSG(extack, "unsupported TID configuration"); + return -ENOTSUPP; + } + return 0; } @@ -14025,7 +14038,7 @@ static int nl80211_set_tid_config(struct sk_buff *skb, struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct nlattr *attrs[NL80211_TID_CONFIG_ATTR_MAX + 1]; struct net_device *dev = info->user_ptr[1]; - struct ieee80211_tid_config *tid_config; + struct cfg80211_tid_config *tid_config; struct nlattr *tid; int conf_idx = 0, rem_conf; int ret = -EINVAL; diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index a754e0496b6c..99462f0c4e08 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1328,7 +1328,7 @@ rdev_probe_mesh_link(struct cfg80211_registered_device *rdev, static inline int rdev_set_tid_config(struct cfg80211_registered_device *rdev, struct net_device *dev, - struct ieee80211_tid_config *tid_conf) + struct cfg80211_tid_config *tid_conf) { int ret; @@ -1340,12 +1340,12 @@ static inline int rdev_set_tid_config(struct cfg80211_registered_device *rdev, static inline int rdev_reset_tid_config(struct cfg80211_registered_device *rdev, struct net_device *dev, const u8 *peer, - u8 tid) + u8 tids) { int ret; - trace_rdev_reset_tid_config(&rdev->wiphy, dev, peer, tid); - ret = rdev->ops->reset_tid_config(&rdev->wiphy, dev, peer, tid); + trace_rdev_reset_tid_config(&rdev->wiphy, dev, peer, tids); + ret = rdev->ops->reset_tid_config(&rdev->wiphy, dev, peer, tids); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 167b18896cd6..839df54cee21 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -3482,7 +3482,7 @@ TRACE_EVENT(rdev_probe_mesh_link, TRACE_EVENT(rdev_set_tid_config, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, - struct ieee80211_tid_config *tid_conf), + struct cfg80211_tid_config *tid_conf), TP_ARGS(wiphy, netdev, tid_conf), TP_STRUCT__entry( WIPHY_ENTRY @@ -3500,22 +3500,22 @@ TRACE_EVENT(rdev_set_tid_config, TRACE_EVENT(rdev_reset_tid_config, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, - const u8 *peer, u8 tid), - TP_ARGS(wiphy, netdev, peer, tid), + const u8 *peer, u8 tids), + TP_ARGS(wiphy, netdev, peer, tids), TP_STRUCT__entry( WIPHY_ENTRY NETDEV_ENTRY MAC_ENTRY(peer) - __field(u8, tid) + __field(u8, tids) ), TP_fast_assign( WIPHY_ASSIGN; NETDEV_ASSIGN; MAC_ASSIGN(peer, peer); - __entry->tid = tid; + __entry->tids = tids; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: " MAC_PR_FMT ", tid: %u", - WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer), __entry->tid) + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: " MAC_PR_FMT ", tids: 0x%x", + WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer), __entry->tids) ); #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ -- cgit v1.2.3 From 6a21d16c4db08398c737e0ffd03e4eca7131ac78 Mon Sep 17 00:00:00 2001 From: Tamizh chelvam Date: Mon, 20 Jan 2020 13:21:23 +0530 Subject: nl80211: Add support to configure TID specific retry configuration This patch adds support to configure per TID retry configuration through the NL80211_TID_CONFIG_ATTR_RETRY_SHORT and NL80211_TID_CONFIG_ATTR_RETRY_LONG attributes. This TID specific retry configuration will have more precedence than phy level configuration. Signed-off-by: Tamizh chelvam Link: https://lore.kernel.org/r/1579506687-18296-3-git-send-email-tamizhr@codeaurora.org [rebase completely on top of my previous API changes] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 8 ++++++++ include/uapi/linux/nl80211.h | 12 ++++++++++++ net/wireless/nl80211.c | 28 ++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index bbe4acef729d..98981d1a026b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -634,12 +634,15 @@ struct cfg80211_chan_def { * @mask: bitmap of attributes indicating which parameter changed, * similar to &nl80211_tid_config_supp. * @noack: noack configuration value for the TID + * @retry_long: retry count value + * @retry_short: retry count value */ struct cfg80211_tid_cfg { bool config_override; u8 tids; u32 mask; enum nl80211_tid_config noack; + u8 retry_long, retry_short; }; /** @@ -4644,6 +4647,8 @@ struct wiphy_iftype_akm_suites { * supported by the driver for each vif * @tid_config_support.peer: bitmap of attributes (configurations) * supported by the driver for each peer + * @tid_config_support.max_retry: maximum supported retry count for + * long/short retry configuration */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -4777,8 +4782,11 @@ struct wiphy { struct { u64 peer, vif; + u8 max_retry; } tid_config_support; + u8 max_data_retry_count; + char priv[0] __aligned(NETDEV_ALIGN); }; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 591d843eda72..c3481e1feebe 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4785,6 +4785,16 @@ enum nl80211_tid_config { * @NL80211_TID_CONFIG_ATTR_NOACK: Configure ack policy for the TID. * specified in %NL80211_TID_CONFIG_ATTR_TID. see %enum nl80211_tid_config. * Its type is u8. + * @NL80211_TID_CONFIG_ATTR_RETRY_SHORT: Number of retries used with data frame + * transmission, user-space sets this configuration in + * &NL80211_CMD_SET_TID_CONFIG. It is u8 type, min value is 1 and + * the max value is advertised by the driver in this attribute on + * output in wiphy capabilities. + * @NL80211_TID_CONFIG_ATTR_RETRY_LONG: Number of retries used with data frame + * transmission, user-space sets this configuration in + * &NL80211_CMD_SET_TID_CONFIG. Its type is u8, min value is 1 and + * the max value is advertised by the driver in this attribute on + * output in wiphy capabilities. */ enum nl80211_tid_config_attr { __NL80211_TID_CONFIG_ATTR_INVALID, @@ -4794,6 +4804,8 @@ enum nl80211_tid_config_attr { NL80211_TID_CONFIG_ATTR_OVERRIDE, NL80211_TID_CONFIG_ATTR_TIDS, NL80211_TID_CONFIG_ATTR_NOACK, + NL80211_TID_CONFIG_ATTR_RETRY_SHORT, + NL80211_TID_CONFIG_ATTR_RETRY_LONG, /* keep last */ __NL80211_TID_CONFIG_ATTR_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 56ac851ccee1..4c79ba685992 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -336,6 +336,8 @@ nl80211_tid_config_attr_policy[NL80211_TID_CONFIG_ATTR_MAX + 1] = { [NL80211_TID_CONFIG_ATTR_TIDS] = NLA_POLICY_RANGE(NLA_U16, 1, 0xff), [NL80211_TID_CONFIG_ATTR_NOACK] = NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE), + [NL80211_TID_CONFIG_ATTR_RETRY_SHORT] = NLA_POLICY_MIN(NLA_U8, 1), + [NL80211_TID_CONFIG_ATTR_RETRY_LONG] = NLA_POLICY_MIN(NLA_U8, 1), }; const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { @@ -1985,6 +1987,14 @@ nl80211_put_tid_config_support(struct cfg80211_registered_device *rdev, NL80211_TID_CONFIG_ATTR_PAD)) goto fail; + /* for now we just use the same value ... makes more sense */ + if (nla_put_u8(msg, NL80211_TID_CONFIG_ATTR_RETRY_SHORT, + rdev->wiphy.tid_config_support.max_retry)) + goto fail; + if (nla_put_u8(msg, NL80211_TID_CONFIG_ATTR_RETRY_LONG, + rdev->wiphy.tid_config_support.max_retry)) + goto fail; + nla_nest_end(msg, supp); return 0; @@ -14019,6 +14029,24 @@ static int parse_tid_conf(struct cfg80211_registered_device *rdev, nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_NOACK]); } + if (attrs[NL80211_TID_CONFIG_ATTR_RETRY_SHORT]) { + tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_RETRY_SHORT); + tid_conf->retry_short = + nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_RETRY_SHORT]); + + if (tid_conf->retry_short > rdev->wiphy.max_data_retry_count) + return -EINVAL; + } + + if (attrs[NL80211_TID_CONFIG_ATTR_RETRY_LONG]) { + tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_RETRY_LONG); + tid_conf->retry_long = + nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_RETRY_LONG]); + + if (tid_conf->retry_long > rdev->wiphy.max_data_retry_count) + return -EINVAL; + } + if (peer) mask = rdev->wiphy.tid_config_support.peer; else -- cgit v1.2.3 From ade274b23e41886091a7e105ab3de71baef112e7 Mon Sep 17 00:00:00 2001 From: Tamizh chelvam Date: Mon, 20 Jan 2020 13:21:24 +0530 Subject: nl80211: Add support to configure TID specific AMPDU configuration This patch adds support to configure per TID AMPDU control configuration to enable/disable aggregation through the NL80211_TID_CONFIG_ATTR_AMPDU_CTRL attribute. Signed-off-by: Tamizh chelvam Link: https://lore.kernel.org/r/1579506687-18296-4-git-send-email-tamizhr@codeaurora.org Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ include/uapi/linux/nl80211.h | 4 ++++ net/wireless/nl80211.c | 8 ++++++++ 3 files changed, 14 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 98981d1a026b..78171ae01406 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -636,6 +636,7 @@ struct cfg80211_chan_def { * @noack: noack configuration value for the TID * @retry_long: retry count value * @retry_short: retry count value + * @ampdu: Enable/Disable aggregation */ struct cfg80211_tid_cfg { bool config_override; @@ -643,6 +644,7 @@ struct cfg80211_tid_cfg { u32 mask; enum nl80211_tid_config noack; u8 retry_long, retry_short; + enum nl80211_tid_config ampdu; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index c3481e1feebe..71cae33e6679 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4795,6 +4795,9 @@ enum nl80211_tid_config { * &NL80211_CMD_SET_TID_CONFIG. Its type is u8, min value is 1 and * the max value is advertised by the driver in this attribute on * output in wiphy capabilities. + * @NL80211_TID_CONFIG_ATTR_AMPDU_CTRL: Enable/Disable aggregation for the TIDs + * specified in %NL80211_TID_CONFIG_ATTR_TIDS. Its type is u8, using + * the values from &nl80211_tid_config. */ enum nl80211_tid_config_attr { __NL80211_TID_CONFIG_ATTR_INVALID, @@ -4806,6 +4809,7 @@ enum nl80211_tid_config_attr { NL80211_TID_CONFIG_ATTR_NOACK, NL80211_TID_CONFIG_ATTR_RETRY_SHORT, NL80211_TID_CONFIG_ATTR_RETRY_LONG, + NL80211_TID_CONFIG_ATTR_AMPDU_CTRL, /* keep last */ __NL80211_TID_CONFIG_ATTR_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 4c79ba685992..078d30756b3e 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -338,6 +338,8 @@ nl80211_tid_config_attr_policy[NL80211_TID_CONFIG_ATTR_MAX + 1] = { NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE), [NL80211_TID_CONFIG_ATTR_RETRY_SHORT] = NLA_POLICY_MIN(NLA_U8, 1), [NL80211_TID_CONFIG_ATTR_RETRY_LONG] = NLA_POLICY_MIN(NLA_U8, 1), + [NL80211_TID_CONFIG_ATTR_AMPDU_CTRL] = + NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE), }; const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { @@ -14047,6 +14049,12 @@ static int parse_tid_conf(struct cfg80211_registered_device *rdev, return -EINVAL; } + if (attrs[NL80211_TID_CONFIG_ATTR_AMPDU_CTRL]) { + tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_AMPDU_CTRL); + tid_conf->ampdu = + nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_AMPDU_CTRL]); + } + if (peer) mask = rdev->wiphy.tid_config_support.peer; else -- cgit v1.2.3 From 04f7d142f51c6019a695cfd70c09bb60233472c5 Mon Sep 17 00:00:00 2001 From: Tamizh chelvam Date: Mon, 20 Jan 2020 13:21:25 +0530 Subject: nl80211: Add support to configure TID specific RTSCTS configuration This patch adds support to configure per TID RTSCTS control configuration to enable/disable through the NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL attribute. Signed-off-by: Tamizh chelvam Link: https://lore.kernel.org/r/1579506687-18296-5-git-send-email-tamizhr@codeaurora.org Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ include/uapi/linux/nl80211.h | 4 ++++ net/wireless/nl80211.c | 8 ++++++++ 3 files changed, 14 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 78171ae01406..f7c84c32ba39 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -637,6 +637,7 @@ struct cfg80211_chan_def { * @retry_long: retry count value * @retry_short: retry count value * @ampdu: Enable/Disable aggregation + * @rtscts: Enable/Disable RTS/CTS */ struct cfg80211_tid_cfg { bool config_override; @@ -645,6 +646,7 @@ struct cfg80211_tid_cfg { enum nl80211_tid_config noack; u8 retry_long, retry_short; enum nl80211_tid_config ampdu; + enum nl80211_tid_config rtscts; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 71cae33e6679..b002ef2060fa 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -4798,6 +4798,9 @@ enum nl80211_tid_config { * @NL80211_TID_CONFIG_ATTR_AMPDU_CTRL: Enable/Disable aggregation for the TIDs * specified in %NL80211_TID_CONFIG_ATTR_TIDS. Its type is u8, using * the values from &nl80211_tid_config. + * @NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL: Enable/Disable RTS_CTS for the TIDs + * specified in %NL80211_TID_CONFIG_ATTR_TIDS. It is u8 type, using + * the values from &nl80211_tid_config. */ enum nl80211_tid_config_attr { __NL80211_TID_CONFIG_ATTR_INVALID, @@ -4810,6 +4813,7 @@ enum nl80211_tid_config_attr { NL80211_TID_CONFIG_ATTR_RETRY_SHORT, NL80211_TID_CONFIG_ATTR_RETRY_LONG, NL80211_TID_CONFIG_ATTR_AMPDU_CTRL, + NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL, /* keep last */ __NL80211_TID_CONFIG_ATTR_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 078d30756b3e..ae5e10fe1196 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -340,6 +340,8 @@ nl80211_tid_config_attr_policy[NL80211_TID_CONFIG_ATTR_MAX + 1] = { [NL80211_TID_CONFIG_ATTR_RETRY_LONG] = NLA_POLICY_MIN(NLA_U8, 1), [NL80211_TID_CONFIG_ATTR_AMPDU_CTRL] = NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE), + [NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL] = + NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE), }; const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { @@ -14055,6 +14057,12 @@ static int parse_tid_conf(struct cfg80211_registered_device *rdev, nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_AMPDU_CTRL]); } + if (attrs[NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL]) { + tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL); + tid_conf->rtscts = + nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL]); + } + if (peer) mask = rdev->wiphy.tid_config_support.peer; else -- cgit v1.2.3 From 571912c69f0ed731bd1e071ade9dc7ca4aa52065 Mon Sep 17 00:00:00 2001 From: Martin Varghese Date: Mon, 24 Feb 2020 10:57:50 +0530 Subject: net: UDP tunnel encapsulation module for tunnelling different protocols like MPLS, IP, NSH etc. The Bareudp tunnel module provides a generic L3 encapsulation tunnelling module for tunnelling different protocols like MPLS, IP,NSH etc inside a UDP tunnel. Signed-off-by: Martin Varghese Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- Documentation/networking/bareudp.rst | 34 ++ Documentation/networking/index.rst | 1 + drivers/net/Kconfig | 13 + drivers/net/Makefile | 1 + drivers/net/bareudp.c | 743 +++++++++++++++++++++++++++++++++++ include/net/bareudp.h | 19 + include/net/ipv6.h | 6 + include/net/route.h | 6 + include/uapi/linux/if_link.h | 11 + net/ipv4/route.c | 48 +++ net/ipv6/ip6_output.c | 70 ++++ 11 files changed, 952 insertions(+) create mode 100644 Documentation/networking/bareudp.rst create mode 100644 drivers/net/bareudp.c create mode 100644 include/net/bareudp.h (limited to 'include/uapi/linux') diff --git a/Documentation/networking/bareudp.rst b/Documentation/networking/bareudp.rst new file mode 100644 index 000000000000..6ee68c16cf5b --- /dev/null +++ b/Documentation/networking/bareudp.rst @@ -0,0 +1,34 @@ +.. SPDX-License-Identifier: GPL-2.0 + +======================================== +Bare UDP Tunnelling Module Documentation +======================================== + +There are various L3 encapsulation standards using UDP being discussed to +leverage the UDP based load balancing capability of different networks. +MPLSoUDP (__ https://tools.ietf.org/html/rfc7510) is one among them. + +The Bareudp tunnel module provides a generic L3 encapsulation tunnelling +support for tunnelling different L3 protocols like MPLS, IP, NSH etc. inside +a UDP tunnel. + +Usage +------ + +1) Device creation & deletion + + a) ip link add dev bareudp0 type bareudp dstport 6635 ethertype 0x8847. + + This creates a bareudp tunnel device which tunnels L3 traffic with ethertype + 0x8847 (MPLS traffic). The destination port of the UDP header will be set to + 6635.The device will listen on UDP port 6635 to receive traffic. + + b) ip link delete bareudp0 + +2) Device Usage + +The bareudp device could be used along with OVS or flower filter in TC. +The OVS or TC flower layer must set the tunnel information in SKB dst field before +sending packet buffer to the bareudp device for transmission. On reception the +bareudp device extracts and stores the tunnel information in SKB dst field before +passing the packet buffer to the network stack. diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst index d07d9855dcd3..3a83cfb66704 100644 --- a/Documentation/networking/index.rst +++ b/Documentation/networking/index.rst @@ -8,6 +8,7 @@ Contents: netdev-FAQ af_xdp + bareudp batman-adv can can_ucan_protocol diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 25a8f9387d5a..66e410e58c8e 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -258,6 +258,19 @@ config GENEVE To compile this driver as a module, choose M here: the module will be called geneve. +config BAREUDP + tristate "Bare UDP Encapsulation" + depends on INET + depends on IPV6 || !IPV6 + select NET_UDP_TUNNEL + select GRO_CELLS + help + This adds a bare UDP tunnel module for tunnelling different + kinds of traffic like MPLS, IP, etc. inside a UDP tunnel. + + To compile this driver as a module, choose M here: the module + will be called bareudp. + config GTP tristate "GPRS Tunneling Protocol datapath (GTP-U)" depends on INET diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 71b88ffc5587..65967246f240 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_VETH) += veth.o obj-$(CONFIG_VIRTIO_NET) += virtio_net.o obj-$(CONFIG_VXLAN) += vxlan.o obj-$(CONFIG_GENEVE) += geneve.o +obj-$(CONFIG_BAREUDP) += bareudp.o obj-$(CONFIG_GTP) += gtp.o obj-$(CONFIG_NLMON) += nlmon.o obj-$(CONFIG_NET_VRF) += vrf.o diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c new file mode 100644 index 000000000000..32518969139e --- /dev/null +++ b/drivers/net/bareudp.c @@ -0,0 +1,743 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Bareudp: UDP tunnel encasulation for different Payload types like + * MPLS, NSH, IP, etc. + * Copyright (c) 2019 Nokia, Inc. + * Authors: Martin Varghese, + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define BAREUDP_BASE_HLEN sizeof(struct udphdr) +#define BAREUDP_IPV4_HLEN (sizeof(struct iphdr) + \ + sizeof(struct udphdr)) +#define BAREUDP_IPV6_HLEN (sizeof(struct ipv6hdr) + \ + sizeof(struct udphdr)) + +static bool log_ecn_error = true; +module_param(log_ecn_error, bool, 0644); +MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); + +/* per-network namespace private data for this module */ + +static unsigned int bareudp_net_id; + +struct bareudp_net { + struct list_head bareudp_list; +}; + +/* Pseudo network device */ +struct bareudp_dev { + struct net *net; /* netns for packet i/o */ + struct net_device *dev; /* netdev for bareudp tunnel */ + __be16 ethertype; + __be16 port; + u16 sport_min; + struct socket __rcu *sock; + struct list_head next; /* bareudp node on namespace list */ + struct gro_cells gro_cells; +}; + +static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) +{ + struct metadata_dst *tun_dst = NULL; + struct pcpu_sw_netstats *stats; + struct bareudp_dev *bareudp; + unsigned short family; + unsigned int len; + __be16 proto; + void *oiph; + int err; + + bareudp = rcu_dereference_sk_user_data(sk); + if (!bareudp) + goto drop; + + if (skb->protocol == htons(ETH_P_IP)) + family = AF_INET; + else + family = AF_INET6; + + proto = bareudp->ethertype; + + if (iptunnel_pull_header(skb, BAREUDP_BASE_HLEN, + proto, + !net_eq(bareudp->net, + dev_net(bareudp->dev)))) { + bareudp->dev->stats.rx_dropped++; + goto drop; + } + + tun_dst = udp_tun_rx_dst(skb, family, TUNNEL_KEY, 0, 0); + if (!tun_dst) { + bareudp->dev->stats.rx_dropped++; + goto drop; + } + skb_dst_set(skb, &tun_dst->dst); + skb->dev = bareudp->dev; + oiph = skb_network_header(skb); + skb_reset_network_header(skb); + + if (family == AF_INET) + err = IP_ECN_decapsulate(oiph, skb); +#if IS_ENABLED(CONFIG_IPV6) + else + err = IP6_ECN_decapsulate(oiph, skb); +#endif + + if (unlikely(err)) { + if (log_ecn_error) { + if (family == AF_INET) + net_info_ratelimited("non-ECT from %pI4 " + "with TOS=%#x\n", + &((struct iphdr *)oiph)->saddr, + ((struct iphdr *)oiph)->tos); +#if IS_ENABLED(CONFIG_IPV6) + else + net_info_ratelimited("non-ECT from %pI6\n", + &((struct ipv6hdr *)oiph)->saddr); +#endif + } + if (err > 1) { + ++bareudp->dev->stats.rx_frame_errors; + ++bareudp->dev->stats.rx_errors; + goto drop; + } + } + + len = skb->len; + err = gro_cells_receive(&bareudp->gro_cells, skb); + if (likely(err == NET_RX_SUCCESS)) { + stats = this_cpu_ptr(bareudp->dev->tstats); + u64_stats_update_begin(&stats->syncp); + stats->rx_packets++; + stats->rx_bytes += len; + u64_stats_update_end(&stats->syncp); + } + return 0; +drop: + /* Consume bad packet */ + kfree_skb(skb); + + return 0; +} + +static int bareudp_err_lookup(struct sock *sk, struct sk_buff *skb) +{ + return 0; +} + +static int bareudp_init(struct net_device *dev) +{ + struct bareudp_dev *bareudp = netdev_priv(dev); + int err; + + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!dev->tstats) + return -ENOMEM; + + err = gro_cells_init(&bareudp->gro_cells, dev); + if (err) { + free_percpu(dev->tstats); + return err; + } + return 0; +} + +static void bareudp_uninit(struct net_device *dev) +{ + struct bareudp_dev *bareudp = netdev_priv(dev); + + gro_cells_destroy(&bareudp->gro_cells); + free_percpu(dev->tstats); +} + +static struct socket *bareudp_create_sock(struct net *net, __be16 port) +{ + struct udp_port_cfg udp_conf; + struct socket *sock; + int err; + + memset(&udp_conf, 0, sizeof(udp_conf)); +#if IS_ENABLED(CONFIG_IPV6) + udp_conf.family = AF_INET6; +#else + udp_conf.family = AF_INET; +#endif + udp_conf.local_udp_port = port; + /* Open UDP socket */ + err = udp_sock_create(net, &udp_conf, &sock); + if (err < 0) + return ERR_PTR(err); + + return sock; +} + +/* Create new listen socket if needed */ +static int bareudp_socket_create(struct bareudp_dev *bareudp, __be16 port) +{ + struct udp_tunnel_sock_cfg tunnel_cfg; + struct socket *sock; + + sock = bareudp_create_sock(bareudp->net, port); + if (IS_ERR(sock)) + return PTR_ERR(sock); + + /* Mark socket as an encapsulation socket */ + memset(&tunnel_cfg, 0, sizeof(tunnel_cfg)); + tunnel_cfg.sk_user_data = bareudp; + tunnel_cfg.encap_type = 1; + tunnel_cfg.encap_rcv = bareudp_udp_encap_recv; + tunnel_cfg.encap_err_lookup = bareudp_err_lookup; + tunnel_cfg.encap_destroy = NULL; + setup_udp_tunnel_sock(bareudp->net, sock, &tunnel_cfg); + + if (sock->sk->sk_family == AF_INET6) + udp_encap_enable(); + + rcu_assign_pointer(bareudp->sock, sock); + return 0; +} + +static int bareudp_open(struct net_device *dev) +{ + struct bareudp_dev *bareudp = netdev_priv(dev); + int ret = 0; + + ret = bareudp_socket_create(bareudp, bareudp->port); + return ret; +} + +static void bareudp_sock_release(struct bareudp_dev *bareudp) +{ + struct socket *sock; + + sock = bareudp->sock; + rcu_assign_pointer(bareudp->sock, NULL); + synchronize_net(); + udp_tunnel_sock_release(sock); +} + +static int bareudp_stop(struct net_device *dev) +{ + struct bareudp_dev *bareudp = netdev_priv(dev); + + bareudp_sock_release(bareudp); + return 0; +} + +static int bareudp_xmit_skb(struct sk_buff *skb, struct net_device *dev, + struct bareudp_dev *bareudp, + const struct ip_tunnel_info *info) +{ + bool xnet = !net_eq(bareudp->net, dev_net(bareudp->dev)); + bool use_cache = ip_tunnel_dst_cache_usable(skb, info); + struct socket *sock = rcu_dereference(bareudp->sock); + bool udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM); + const struct ip_tunnel_key *key = &info->key; + struct rtable *rt; + __be16 sport, df; + int min_headroom; + __u8 tos, ttl; + __be32 saddr; + int err; + + if (!sock) + return -ESHUTDOWN; + + rt = ip_route_output_tunnel(skb, dev, bareudp->net, &saddr, info, + IPPROTO_UDP, use_cache); + + if (IS_ERR(rt)) + return PTR_ERR(rt); + + skb_tunnel_check_pmtu(skb, &rt->dst, + BAREUDP_IPV4_HLEN + info->options_len); + + sport = udp_flow_src_port(bareudp->net, skb, + bareudp->sport_min, USHRT_MAX, + true); + tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); + ttl = key->ttl; + df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; + skb_scrub_packet(skb, xnet); + + if (!skb_pull(skb, skb_network_offset(skb))) + goto free_dst; + + min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + + BAREUDP_BASE_HLEN + info->options_len + sizeof(struct iphdr); + + err = skb_cow_head(skb, min_headroom); + if (unlikely(err)) + goto free_dst; + + err = udp_tunnel_handle_offloads(skb, udp_sum); + if (err) + goto free_dst; + + skb_set_inner_protocol(skb, bareudp->ethertype); + udp_tunnel_xmit_skb(rt, sock->sk, skb, saddr, info->key.u.ipv4.dst, + tos, ttl, df, sport, bareudp->port, + !net_eq(bareudp->net, dev_net(bareudp->dev)), + !(info->key.tun_flags & TUNNEL_CSUM)); + return 0; + +free_dst: + dst_release(&rt->dst); + return err; +} + +#if IS_ENABLED(CONFIG_IPV6) +static int bareudp6_xmit_skb(struct sk_buff *skb, struct net_device *dev, + struct bareudp_dev *bareudp, + const struct ip_tunnel_info *info) +{ + bool xnet = !net_eq(bareudp->net, dev_net(bareudp->dev)); + bool use_cache = ip_tunnel_dst_cache_usable(skb, info); + struct socket *sock = rcu_dereference(bareudp->sock); + bool udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM); + const struct ip_tunnel_key *key = &info->key; + struct dst_entry *dst = NULL; + struct in6_addr saddr, daddr; + int min_headroom; + __u8 prio, ttl; + __be16 sport; + int err; + + if (!sock) + return -ESHUTDOWN; + + dst = ip6_dst_lookup_tunnel(skb, dev, bareudp->net, sock, &saddr, info, + IPPROTO_UDP, use_cache); + if (IS_ERR(dst)) + return PTR_ERR(dst); + + skb_tunnel_check_pmtu(skb, dst, BAREUDP_IPV6_HLEN + info->options_len); + + sport = udp_flow_src_port(bareudp->net, skb, + bareudp->sport_min, USHRT_MAX, + true); + prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb); + ttl = key->ttl; + + skb_scrub_packet(skb, xnet); + + if (!skb_pull(skb, skb_network_offset(skb))) + goto free_dst; + + min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len + + BAREUDP_BASE_HLEN + info->options_len + sizeof(struct iphdr); + + err = skb_cow_head(skb, min_headroom); + if (unlikely(err)) + goto free_dst; + + err = udp_tunnel_handle_offloads(skb, udp_sum); + if (err) + goto free_dst; + + daddr = info->key.u.ipv6.dst; + udp_tunnel6_xmit_skb(dst, sock->sk, skb, dev, + &saddr, &daddr, prio, ttl, + info->key.label, sport, bareudp->port, + !(info->key.tun_flags & TUNNEL_CSUM)); + return 0; + +free_dst: + dst_release(dst); + return err; +} +#endif + +static netdev_tx_t bareudp_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct bareudp_dev *bareudp = netdev_priv(dev); + struct ip_tunnel_info *info = NULL; + int err; + + if (skb->protocol != bareudp->ethertype) { + err = -EINVAL; + goto tx_error; + } + + info = skb_tunnel_info(skb); + if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) { + err = -EINVAL; + goto tx_error; + } + + rcu_read_lock(); +#if IS_ENABLED(CONFIG_IPV6) + if (info->mode & IP_TUNNEL_INFO_IPV6) + err = bareudp6_xmit_skb(skb, dev, bareudp, info); + else +#endif + err = bareudp_xmit_skb(skb, dev, bareudp, info); + + rcu_read_unlock(); + + if (likely(!err)) + return NETDEV_TX_OK; +tx_error: + dev_kfree_skb(skb); + + if (err == -ELOOP) + dev->stats.collisions++; + else if (err == -ENETUNREACH) + dev->stats.tx_carrier_errors++; + + dev->stats.tx_errors++; + return NETDEV_TX_OK; +} + +static int bareudp_fill_metadata_dst(struct net_device *dev, + struct sk_buff *skb) +{ + struct ip_tunnel_info *info = skb_tunnel_info(skb); + struct bareudp_dev *bareudp = netdev_priv(dev); + bool use_cache; + + use_cache = ip_tunnel_dst_cache_usable(skb, info); + + if (ip_tunnel_info_af(info) == AF_INET) { + struct rtable *rt; + __be32 saddr; + + rt = ip_route_output_tunnel(skb, dev, bareudp->net, &saddr, + info, IPPROTO_UDP, use_cache); + if (IS_ERR(rt)) + return PTR_ERR(rt); + + ip_rt_put(rt); + info->key.u.ipv4.src = saddr; +#if IS_ENABLED(CONFIG_IPV6) + } else if (ip_tunnel_info_af(info) == AF_INET6) { + struct dst_entry *dst; + struct in6_addr saddr; + struct socket *sock = rcu_dereference(bareudp->sock); + + dst = ip6_dst_lookup_tunnel(skb, dev, bareudp->net, sock, + &saddr, info, IPPROTO_UDP, + use_cache); + if (IS_ERR(dst)) + return PTR_ERR(dst); + + dst_release(dst); + info->key.u.ipv6.src = saddr; +#endif + } else { + return -EINVAL; + } + + info->key.tp_src = udp_flow_src_port(bareudp->net, skb, + bareudp->sport_min, + USHRT_MAX, true); + info->key.tp_dst = bareudp->port; + return 0; +} + +static const struct net_device_ops bareudp_netdev_ops = { + .ndo_init = bareudp_init, + .ndo_uninit = bareudp_uninit, + .ndo_open = bareudp_open, + .ndo_stop = bareudp_stop, + .ndo_start_xmit = bareudp_xmit, + .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_fill_metadata_dst = bareudp_fill_metadata_dst, +}; + +static const struct nla_policy bareudp_policy[IFLA_BAREUDP_MAX + 1] = { + [IFLA_BAREUDP_PORT] = { .type = NLA_U16 }, + [IFLA_BAREUDP_ETHERTYPE] = { .type = NLA_U16 }, + [IFLA_BAREUDP_SRCPORT_MIN] = { .type = NLA_U16 }, +}; + +/* Info for udev, that this is a virtual tunnel endpoint */ +static struct device_type bareudp_type = { + .name = "bareudp", +}; + +/* Initialize the device structure. */ +static void bareudp_setup(struct net_device *dev) +{ + dev->netdev_ops = &bareudp_netdev_ops; + dev->needs_free_netdev = true; + SET_NETDEV_DEVTYPE(dev, &bareudp_type); + dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; + dev->features |= NETIF_F_RXCSUM; + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; + dev->hard_header_len = 0; + dev->addr_len = 0; + dev->mtu = ETH_DATA_LEN; + dev->min_mtu = IPV4_MIN_MTU; + dev->max_mtu = IP_MAX_MTU - BAREUDP_BASE_HLEN; + dev->type = ARPHRD_NONE; + netif_keep_dst(dev); + dev->priv_flags |= IFF_NO_QUEUE; + dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST; +} + +static int bareudp_validate(struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + if (!data) { + NL_SET_ERR_MSG(extack, + "Not enough attributes provided to perform the operation"); + return -EINVAL; + } + return 0; +} + +static int bareudp2info(struct nlattr *data[], struct bareudp_conf *conf) +{ + if (!data[IFLA_BAREUDP_PORT] || !data[IFLA_BAREUDP_ETHERTYPE]) + return -EINVAL; + + if (data[IFLA_BAREUDP_PORT]) + conf->port = nla_get_u16(data[IFLA_BAREUDP_PORT]); + + if (data[IFLA_BAREUDP_ETHERTYPE]) + conf->ethertype = nla_get_u16(data[IFLA_BAREUDP_ETHERTYPE]); + + if (data[IFLA_BAREUDP_SRCPORT_MIN]) + conf->sport_min = nla_get_u16(data[IFLA_BAREUDP_SRCPORT_MIN]); + + return 0; +} + +static struct bareudp_dev *bareudp_find_dev(struct bareudp_net *bn, + const struct bareudp_conf *conf) +{ + struct bareudp_dev *bareudp, *t = NULL; + + list_for_each_entry(bareudp, &bn->bareudp_list, next) { + if (conf->port == bareudp->port) + t = bareudp; + } + return t; +} + +static int bareudp_configure(struct net *net, struct net_device *dev, + struct bareudp_conf *conf) +{ + struct bareudp_net *bn = net_generic(net, bareudp_net_id); + struct bareudp_dev *t, *bareudp = netdev_priv(dev); + int err; + + bareudp->net = net; + bareudp->dev = dev; + t = bareudp_find_dev(bn, conf); + if (t) + return -EBUSY; + + bareudp->port = conf->port; + bareudp->ethertype = conf->ethertype; + bareudp->sport_min = conf->sport_min; + err = register_netdevice(dev); + if (err) + return err; + + list_add(&bareudp->next, &bn->bareudp_list); + return 0; +} + +static int bareudp_link_config(struct net_device *dev, + struct nlattr *tb[]) +{ + int err; + + if (tb[IFLA_MTU]) { + err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU])); + if (err) + return err; + } + return 0; +} + +static int bareudp_newlink(struct net *net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct bareudp_conf conf; + int err; + + err = bareudp2info(data, &conf); + if (err) + return err; + + err = bareudp_configure(net, dev, &conf); + if (err) + return err; + + err = bareudp_link_config(dev, tb); + if (err) + return err; + + return 0; +} + +static void bareudp_dellink(struct net_device *dev, struct list_head *head) +{ + struct bareudp_dev *bareudp = netdev_priv(dev); + + list_del(&bareudp->next); + unregister_netdevice_queue(dev, head); +} + +static size_t bareudp_get_size(const struct net_device *dev) +{ + return nla_total_size(sizeof(__be16)) + /* IFLA_BAREUDP_PORT */ + nla_total_size(sizeof(__be16)) + /* IFLA_BAREUDP_ETHERTYPE */ + nla_total_size(sizeof(__u16)) + /* IFLA_BAREUDP_SRCPORT_MIN */ + 0; +} + +static int bareudp_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct bareudp_dev *bareudp = netdev_priv(dev); + + if (nla_put_be16(skb, IFLA_BAREUDP_PORT, bareudp->port)) + goto nla_put_failure; + if (nla_put_be16(skb, IFLA_BAREUDP_ETHERTYPE, bareudp->ethertype)) + goto nla_put_failure; + if (nla_put_u16(skb, IFLA_BAREUDP_SRCPORT_MIN, bareudp->sport_min)) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static struct rtnl_link_ops bareudp_link_ops __read_mostly = { + .kind = "bareudp", + .maxtype = IFLA_BAREUDP_MAX, + .policy = bareudp_policy, + .priv_size = sizeof(struct bareudp_dev), + .setup = bareudp_setup, + .validate = bareudp_validate, + .newlink = bareudp_newlink, + .dellink = bareudp_dellink, + .get_size = bareudp_get_size, + .fill_info = bareudp_fill_info, +}; + +struct net_device *bareudp_dev_create(struct net *net, const char *name, + u8 name_assign_type, + struct bareudp_conf *conf) +{ + struct nlattr *tb[IFLA_MAX + 1]; + struct net_device *dev; + LIST_HEAD(list_kill); + int err; + + memset(tb, 0, sizeof(tb)); + dev = rtnl_create_link(net, name, name_assign_type, + &bareudp_link_ops, tb, NULL); + if (IS_ERR(dev)) + return dev; + + err = bareudp_configure(net, dev, conf); + if (err) { + free_netdev(dev); + return ERR_PTR(err); + } + err = dev_set_mtu(dev, IP_MAX_MTU - BAREUDP_BASE_HLEN); + if (err) + goto err; + + err = rtnl_configure_link(dev, NULL); + if (err < 0) + goto err; + + return dev; +err: + bareudp_dellink(dev, &list_kill); + unregister_netdevice_many(&list_kill); + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(bareudp_dev_create); + +static __net_init int bareudp_init_net(struct net *net) +{ + struct bareudp_net *bn = net_generic(net, bareudp_net_id); + + INIT_LIST_HEAD(&bn->bareudp_list); + return 0; +} + +static void bareudp_destroy_tunnels(struct net *net, struct list_head *head) +{ + struct bareudp_net *bn = net_generic(net, bareudp_net_id); + struct bareudp_dev *bareudp, *next; + + list_for_each_entry_safe(bareudp, next, &bn->bareudp_list, next) + unregister_netdevice_queue(bareudp->dev, head); +} + +static void __net_exit bareudp_exit_batch_net(struct list_head *net_list) +{ + struct net *net; + LIST_HEAD(list); + + rtnl_lock(); + list_for_each_entry(net, net_list, exit_list) + bareudp_destroy_tunnels(net, &list); + + /* unregister the devices gathered above */ + unregister_netdevice_many(&list); + rtnl_unlock(); +} + +static struct pernet_operations bareudp_net_ops = { + .init = bareudp_init_net, + .exit_batch = bareudp_exit_batch_net, + .id = &bareudp_net_id, + .size = sizeof(struct bareudp_net), +}; + +static int __init bareudp_init_module(void) +{ + int rc; + + rc = register_pernet_subsys(&bareudp_net_ops); + if (rc) + goto out1; + + rc = rtnl_link_register(&bareudp_link_ops); + if (rc) + goto out2; + + return 0; +out2: + unregister_pernet_subsys(&bareudp_net_ops); +out1: + return rc; +} +late_initcall(bareudp_init_module); + +static void __exit bareudp_cleanup_module(void) +{ + rtnl_link_unregister(&bareudp_link_ops); + unregister_pernet_subsys(&bareudp_net_ops); +} +module_exit(bareudp_cleanup_module); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Martin Varghese "); +MODULE_DESCRIPTION("Interface driver for UDP encapsulated traffic"); diff --git a/include/net/bareudp.h b/include/net/bareudp.h new file mode 100644 index 000000000000..513fae6f6ba1 --- /dev/null +++ b/include/net/bareudp.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __NET_BAREUDP_H +#define __NET_BAREUDP_H + +#include +#include + +struct bareudp_conf { + __be16 ethertype; + __be16 port; + u16 sport_min; +}; + +struct net_device *bareudp_dev_create(struct net *net, const char *name, + u8 name_assign_type, + struct bareudp_conf *info); + +#endif diff --git a/include/net/ipv6.h b/include/net/ipv6.h index cec1a54401f2..1bf8065fe871 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1027,6 +1027,12 @@ struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, st struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst, bool connected); +struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb, + struct net_device *dev, + struct net *net, struct socket *sock, + struct in6_addr *saddr, + const struct ip_tunnel_info *info, + u8 protocol, bool use_cache); struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *orig_dst); diff --git a/include/net/route.h b/include/net/route.h index a9c60fc68e36..81750ae50833 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -128,6 +128,12 @@ static inline struct rtable *__ip_route_output_key(struct net *net, struct rtable *ip_route_output_flow(struct net *, struct flowi4 *flp, const struct sock *sk); +struct rtable *ip_route_output_tunnel(struct sk_buff *skb, + struct net_device *dev, + struct net *net, __be32 *saddr, + const struct ip_tunnel_info *info, + u8 protocol, bool use_cache); + struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig); diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 024af2d1d0af..fb4b33af23d5 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -590,6 +590,17 @@ enum ifla_geneve_df { GENEVE_DF_MAX = __GENEVE_DF_END - 1, }; +/* Bareudp section */ +enum { + IFLA_BAREUDP_UNSPEC, + IFLA_BAREUDP_PORT, + IFLA_BAREUDP_ETHERTYPE, + IFLA_BAREUDP_SRCPORT_MIN, + __IFLA_BAREUDP_MAX +}; + +#define IFLA_BAREUDP_MAX (__IFLA_BAREUDP_MAX - 1) + /* PPP section */ enum { IFLA_PPP_UNSPEC, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ebe7060d0fc9..042599cc691d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2774,6 +2774,54 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, } EXPORT_SYMBOL_GPL(ip_route_output_flow); +struct rtable *ip_route_output_tunnel(struct sk_buff *skb, + struct net_device *dev, + struct net *net, __be32 *saddr, + const struct ip_tunnel_info *info, + u8 protocol, bool use_cache) +{ +#ifdef CONFIG_DST_CACHE + struct dst_cache *dst_cache; +#endif + struct rtable *rt = NULL; + struct flowi4 fl4; + __u8 tos; + +#ifdef CONFIG_DST_CACHE + dst_cache = (struct dst_cache *)&info->dst_cache; + if (use_cache) { + rt = dst_cache_get_ip4(dst_cache, saddr); + if (rt) + return rt; + } +#endif + memset(&fl4, 0, sizeof(fl4)); + fl4.flowi4_mark = skb->mark; + fl4.flowi4_proto = protocol; + fl4.daddr = info->key.u.ipv4.dst; + fl4.saddr = info->key.u.ipv4.src; + tos = info->key.tos; + fl4.flowi4_tos = RT_TOS(tos); + + rt = ip_route_output_key(net, &fl4); + if (IS_ERR(rt)) { + netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr); + return ERR_PTR(-ENETUNREACH); + } + if (rt->dst.dev == dev) { /* is this necessary? */ + netdev_dbg(dev, "circular route to %pI4\n", &fl4.daddr); + ip_rt_put(rt); + return ERR_PTR(-ELOOP); + } +#ifdef CONFIG_DST_CACHE + if (use_cache) + dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr); +#endif + *saddr = fl4.saddr; + return rt; +} +EXPORT_SYMBOL_GPL(ip_route_output_tunnel); + /* called with rcu_read_lock held */ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, struct rtable *rt, u32 table_id, struct flowi4 *fl4, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 087304427bbb..8a8c2d0cfcc8 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -54,6 +54,7 @@ #include #include #include +#include static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) { @@ -1196,6 +1197,75 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, } EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); +/** + * ip6_dst_lookup_tunnel - perform route lookup on tunnel + * @skb: Packet for which lookup is done + * @dev: Tunnel device + * @net: Network namespace of tunnel device + * @sk: Socket which provides route info + * @saddr: Memory to store the src ip address + * @info: Tunnel information + * @protocol: IP protocol + * @use_cahce: Flag to enable cache usage + * This function performs a route lookup on a tunnel + * + * It returns a valid dst pointer and stores src address to be used in + * tunnel in param saddr on success, else a pointer encoded error code. + */ + +struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb, + struct net_device *dev, + struct net *net, + struct socket *sock, + struct in6_addr *saddr, + const struct ip_tunnel_info *info, + u8 protocol, + bool use_cache) +{ + struct dst_entry *dst = NULL; +#ifdef CONFIG_DST_CACHE + struct dst_cache *dst_cache; +#endif + struct flowi6 fl6; + __u8 prio; + +#ifdef CONFIG_DST_CACHE + dst_cache = (struct dst_cache *)&info->dst_cache; + if (use_cache) { + dst = dst_cache_get_ip6(dst_cache, saddr); + if (dst) + return dst; + } +#endif + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_mark = skb->mark; + fl6.flowi6_proto = protocol; + fl6.daddr = info->key.u.ipv6.dst; + fl6.saddr = info->key.u.ipv6.src; + prio = info->key.tos; + fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio), + info->key.label); + + dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6, + NULL); + if (IS_ERR(dst)) { + netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr); + return ERR_PTR(-ENETUNREACH); + } + if (dst->dev == dev) { /* is this necessary? */ + netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr); + dst_release(dst); + return ERR_PTR(-ELOOP); + } +#ifdef CONFIG_DST_CACHE + if (use_cache) + dst_cache_set_ip6(dst_cache, dst, &fl6.saddr); +#endif + *saddr = fl6.saddr; + return dst; +} +EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel); + static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, gfp_t gfp) { -- cgit v1.2.3 From 4b5f67232d956a5f837082578ba682410ccab498 Mon Sep 17 00:00:00 2001 From: Martin Varghese Date: Mon, 24 Feb 2020 10:58:35 +0530 Subject: net: Special handling for IP & MPLS. Special handling is needed in bareudp module for IP & MPLS as they support more than one ethertypes. MPLS has 2 ethertypes. 0x8847 for MPLS unicast and 0x8848 for MPLS multicast. While decapsulating MPLS packet from UDP packet the tunnel destination IP address is checked to determine the ethertype. The ethertype of the packet will be set to 0x8848 if the tunnel destination IP address is a multicast IP address. The ethertype of the packet will be set to 0x8847 if the tunnel destination IP address is a unicast IP address. IP has 2 ethertypes.0x0800 for IPV4 and 0x86dd for IPv6. The version field of the IP header tunnelled will be checked to determine the ethertype. This special handling to tunnel additional ethertypes will be disabled by default and can be enabled using a flag called multiproto. This flag can be used only with ethertypes 0x8847 and 0x0800. Signed-off-by: Martin Varghese Acked-by: Willem de Bruijn Signed-off-by: David S. Miller --- Documentation/networking/bareudp.rst | 20 ++++++++++- drivers/net/bareudp.c | 67 ++++++++++++++++++++++++++++++++++-- include/net/bareudp.h | 1 + include/uapi/linux/if_link.h | 1 + 4 files changed, 85 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/networking/bareudp.rst b/Documentation/networking/bareudp.rst index 6ee68c16cf5b..465a8b251bfe 100644 --- a/Documentation/networking/bareudp.rst +++ b/Documentation/networking/bareudp.rst @@ -12,6 +12,15 @@ The Bareudp tunnel module provides a generic L3 encapsulation tunnelling support for tunnelling different L3 protocols like MPLS, IP, NSH etc. inside a UDP tunnel. +Special Handling +---------------- +The bareudp device supports special handling for MPLS & IP as they can have +multiple ethertypes. +MPLS procotcol can have ethertypes ETH_P_MPLS_UC (unicast) & ETH_P_MPLS_MC (multicast). +IP protocol can have ethertypes ETH_P_IP (v4) & ETH_P_IPV6 (v6). +This special handling can be enabled only for ethertypes ETH_P_IP & ETH_P_MPLS_UC +with a flag called multiproto mode. + Usage ------ @@ -25,7 +34,16 @@ Usage b) ip link delete bareudp0 -2) Device Usage +2) Device creation with multiple proto mode enabled + +There are two ways to create a bareudp device for MPLS & IP with multiproto mode +enabled. + + a) ip link add dev bareudp0 type bareudp dstport 6635 ethertype 0x8847 multiproto + + b) ip link add dev bareudp0 type bareudp dstport 6635 ethertype mpls + +3) Device Usage The bareudp device could be used along with OVS or flower filter in TC. The OVS or TC flower layer must set the tunnel information in SKB dst field before diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c index 32518969139e..77e72477499d 100644 --- a/drivers/net/bareudp.c +++ b/drivers/net/bareudp.c @@ -45,6 +45,7 @@ struct bareudp_dev { __be16 ethertype; __be16 port; u16 sport_min; + bool multi_proto_mode; struct socket __rcu *sock; struct list_head next; /* bareudp node on namespace list */ struct gro_cells gro_cells; @@ -70,7 +71,52 @@ static int bareudp_udp_encap_recv(struct sock *sk, struct sk_buff *skb) else family = AF_INET6; - proto = bareudp->ethertype; + if (bareudp->ethertype == htons(ETH_P_IP)) { + struct iphdr *iphdr; + + iphdr = (struct iphdr *)(skb->data + BAREUDP_BASE_HLEN); + if (iphdr->version == 4) { + proto = bareudp->ethertype; + } else if (bareudp->multi_proto_mode && (iphdr->version == 6)) { + proto = htons(ETH_P_IPV6); + } else { + bareudp->dev->stats.rx_dropped++; + goto drop; + } + } else if (bareudp->ethertype == htons(ETH_P_MPLS_UC)) { + struct iphdr *tunnel_hdr; + + tunnel_hdr = (struct iphdr *)skb_network_header(skb); + if (tunnel_hdr->version == 4) { + if (!ipv4_is_multicast(tunnel_hdr->daddr)) { + proto = bareudp->ethertype; + } else if (bareudp->multi_proto_mode && + ipv4_is_multicast(tunnel_hdr->daddr)) { + proto = htons(ETH_P_MPLS_MC); + } else { + bareudp->dev->stats.rx_dropped++; + goto drop; + } + } else { + int addr_type; + struct ipv6hdr *tunnel_hdr_v6; + + tunnel_hdr_v6 = (struct ipv6hdr *)skb_network_header(skb); + addr_type = + ipv6_addr_type((struct in6_addr *)&tunnel_hdr_v6->daddr); + if (!(addr_type & IPV6_ADDR_MULTICAST)) { + proto = bareudp->ethertype; + } else if (bareudp->multi_proto_mode && + (addr_type & IPV6_ADDR_MULTICAST)) { + proto = htons(ETH_P_MPLS_MC); + } else { + bareudp->dev->stats.rx_dropped++; + goto drop; + } + } + } else { + proto = bareudp->ethertype; + } if (iptunnel_pull_header(skb, BAREUDP_BASE_HLEN, proto, @@ -369,8 +415,12 @@ static netdev_tx_t bareudp_xmit(struct sk_buff *skb, struct net_device *dev) int err; if (skb->protocol != bareudp->ethertype) { - err = -EINVAL; - goto tx_error; + if (!bareudp->multi_proto_mode || + (skb->protocol != htons(ETH_P_MPLS_MC) && + skb->protocol != htons(ETH_P_IPV6))) { + err = -EINVAL; + goto tx_error; + } } info = skb_tunnel_info(skb); @@ -463,6 +513,7 @@ static const struct nla_policy bareudp_policy[IFLA_BAREUDP_MAX + 1] = { [IFLA_BAREUDP_PORT] = { .type = NLA_U16 }, [IFLA_BAREUDP_ETHERTYPE] = { .type = NLA_U16 }, [IFLA_BAREUDP_SRCPORT_MIN] = { .type = NLA_U16 }, + [IFLA_BAREUDP_MULTIPROTO_MODE] = { .type = NLA_FLAG }, }; /* Info for udev, that this is a virtual tunnel endpoint */ @@ -545,9 +596,15 @@ static int bareudp_configure(struct net *net, struct net_device *dev, if (t) return -EBUSY; + if (conf->multi_proto_mode && + (conf->ethertype != htons(ETH_P_MPLS_UC) && + conf->ethertype != htons(ETH_P_IP))) + return -EINVAL; + bareudp->port = conf->port; bareudp->ethertype = conf->ethertype; bareudp->sport_min = conf->sport_min; + bareudp->multi_proto_mode = conf->multi_proto_mode; err = register_netdevice(dev); if (err) return err; @@ -604,6 +661,7 @@ static size_t bareudp_get_size(const struct net_device *dev) return nla_total_size(sizeof(__be16)) + /* IFLA_BAREUDP_PORT */ nla_total_size(sizeof(__be16)) + /* IFLA_BAREUDP_ETHERTYPE */ nla_total_size(sizeof(__u16)) + /* IFLA_BAREUDP_SRCPORT_MIN */ + nla_total_size(0) + /* IFLA_BAREUDP_MULTIPROTO_MODE */ 0; } @@ -617,6 +675,9 @@ static int bareudp_fill_info(struct sk_buff *skb, const struct net_device *dev) goto nla_put_failure; if (nla_put_u16(skb, IFLA_BAREUDP_SRCPORT_MIN, bareudp->sport_min)) goto nla_put_failure; + if (bareudp->multi_proto_mode && + nla_put_flag(skb, IFLA_BAREUDP_MULTIPROTO_MODE)) + goto nla_put_failure; return 0; diff --git a/include/net/bareudp.h b/include/net/bareudp.h index 513fae6f6ba1..cb03f6f15956 100644 --- a/include/net/bareudp.h +++ b/include/net/bareudp.h @@ -10,6 +10,7 @@ struct bareudp_conf { __be16 ethertype; __be16 port; u16 sport_min; + bool multi_proto_mode; }; struct net_device *bareudp_dev_create(struct net *net, const char *name, diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index fb4b33af23d5..61e0801c82df 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -596,6 +596,7 @@ enum { IFLA_BAREUDP_PORT, IFLA_BAREUDP_ETHERTYPE, IFLA_BAREUDP_SRCPORT_MIN, + IFLA_BAREUDP_MULTIPROTO_MODE, __IFLA_BAREUDP_MAX }; -- cgit v1.2.3 From 85b0589ede83d7b4aeb2bc3cb8910183876cd5ee Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 25 Feb 2020 11:45:19 +0100 Subject: devlink: add trap metadata type for cookie Allow driver to indicate cookie metadata for registered traps. Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/devlink.h | 1 + include/uapi/linux/devlink.h | 2 ++ net/core/devlink.c | 3 +++ 3 files changed, 6 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/devlink.h b/include/net/devlink.h index 07923e619206..014a8b3d1499 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -541,6 +541,7 @@ struct devlink_trap_group { }; #define DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT BIT(0) +#define DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE BIT(1) /** * struct devlink_trap - Immutable packet trap attributes. diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index ae37fd4d194a..be2a2948f452 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -252,6 +252,8 @@ enum devlink_trap_type { enum { /* Trap can report input port as metadata */ DEVLINK_ATTR_TRAP_METADATA_TYPE_IN_PORT, + /* Trap can report flow action cookie as metadata */ + DEVLINK_ATTR_TRAP_METADATA_TYPE_FA_COOKIE, }; enum devlink_attr { diff --git a/net/core/devlink.c b/net/core/devlink.c index 0d7c5d3443d2..12e6ef749b8a 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -5540,6 +5540,9 @@ static int devlink_trap_metadata_put(struct sk_buff *msg, if ((trap->metadata_cap & DEVLINK_TRAP_METADATA_TYPE_F_IN_PORT) && nla_put_flag(msg, DEVLINK_ATTR_TRAP_METADATA_TYPE_IN_PORT)) goto nla_put_failure; + if ((trap->metadata_cap & DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE) && + nla_put_flag(msg, DEVLINK_ATTR_TRAP_METADATA_TYPE_FA_COOKIE)) + goto nla_put_failure; nla_nest_end(msg, attr); -- cgit v1.2.3 From 742b8cceaabc3ed09d4c4d527d9c6311c4925545 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 25 Feb 2020 11:45:20 +0100 Subject: drop_monitor: extend by passing cookie from driver If driver passed along the cookie, push it through Netlink. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/drop_monitor.h | 3 +++ include/uapi/linux/net_dropmon.h | 1 + net/core/drop_monitor.c | 33 ++++++++++++++++++++++++++++++++- 3 files changed, 36 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/net/drop_monitor.h b/include/net/drop_monitor.h index 2ab668461463..ddd441a60e03 100644 --- a/include/net/drop_monitor.h +++ b/include/net/drop_monitor.h @@ -6,17 +6,20 @@ #include #include #include +#include /** * struct net_dm_hw_metadata - Hardware-supplied packet metadata. * @trap_group_name: Hardware trap group name. * @trap_name: Hardware trap name. * @input_dev: Input netdevice. + * @fa_cookie: Flow action user cookie. */ struct net_dm_hw_metadata { const char *trap_group_name; const char *trap_name; struct net_device *input_dev; + const struct flow_action_cookie *fa_cookie; }; #if IS_ENABLED(CONFIG_NET_DROP_MONITOR) diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h index 8bf79a9eb234..66048cc5d7b3 100644 --- a/include/uapi/linux/net_dropmon.h +++ b/include/uapi/linux/net_dropmon.h @@ -92,6 +92,7 @@ enum net_dm_attr { NET_DM_ATTR_HW_TRAP_COUNT, /* u32 */ NET_DM_ATTR_SW_DROPS, /* flag */ NET_DM_ATTR_HW_DROPS, /* flag */ + NET_DM_ATTR_FLOW_ACTION_COOKIE, /* binary */ __NET_DM_ATTR_MAX, NET_DM_ATTR_MAX = __NET_DM_ATTR_MAX - 1 diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 31700e0c3928..d58c1c45a895 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -700,6 +701,13 @@ static void net_dm_packet_work(struct work_struct *work) net_dm_packet_report(skb); } +static size_t +net_dm_flow_action_cookie_size(const struct net_dm_hw_metadata *hw_metadata) +{ + return hw_metadata->fa_cookie ? + nla_total_size(hw_metadata->fa_cookie->cookie_len) : 0; +} + static size_t net_dm_hw_packet_report_size(size_t payload_len, const struct net_dm_hw_metadata *hw_metadata) @@ -717,6 +725,8 @@ net_dm_hw_packet_report_size(size_t payload_len, nla_total_size(strlen(hw_metadata->trap_name) + 1) + /* NET_DM_ATTR_IN_PORT */ net_dm_in_port_size() + + /* NET_DM_ATTR_FLOW_ACTION_COOKIE */ + net_dm_flow_action_cookie_size(hw_metadata) + /* NET_DM_ATTR_TIMESTAMP */ nla_total_size(sizeof(u64)) + /* NET_DM_ATTR_ORIG_LEN */ @@ -762,6 +772,12 @@ static int net_dm_hw_packet_report_fill(struct sk_buff *msg, goto nla_put_failure; } + if (hw_metadata->fa_cookie && + nla_put(msg, NET_DM_ATTR_FLOW_ACTION_COOKIE, + hw_metadata->fa_cookie->cookie_len, + hw_metadata->fa_cookie->cookie)) + goto nla_put_failure; + if (nla_put_u64_64bit(msg, NET_DM_ATTR_TIMESTAMP, ktime_to_ns(skb->tstamp), NET_DM_ATTR_PAD)) goto nla_put_failure; @@ -794,11 +810,12 @@ nla_put_failure: static struct net_dm_hw_metadata * net_dm_hw_metadata_clone(const struct net_dm_hw_metadata *hw_metadata) { + const struct flow_action_cookie *fa_cookie; struct net_dm_hw_metadata *n_hw_metadata; const char *trap_group_name; const char *trap_name; - n_hw_metadata = kmalloc(sizeof(*hw_metadata), GFP_ATOMIC); + n_hw_metadata = kzalloc(sizeof(*hw_metadata), GFP_ATOMIC); if (!n_hw_metadata) return NULL; @@ -812,12 +829,25 @@ net_dm_hw_metadata_clone(const struct net_dm_hw_metadata *hw_metadata) goto free_trap_group; n_hw_metadata->trap_name = trap_name; + if (hw_metadata->fa_cookie) { + size_t cookie_size = sizeof(*fa_cookie) + + hw_metadata->fa_cookie->cookie_len; + + fa_cookie = kmemdup(hw_metadata->fa_cookie, cookie_size, + GFP_ATOMIC); + if (!fa_cookie) + goto free_trap_name; + n_hw_metadata->fa_cookie = fa_cookie; + } + n_hw_metadata->input_dev = hw_metadata->input_dev; if (n_hw_metadata->input_dev) dev_hold(n_hw_metadata->input_dev); return n_hw_metadata; +free_trap_name: + kfree(trap_name); free_trap_group: kfree(trap_group_name); free_hw_metadata: @@ -830,6 +860,7 @@ net_dm_hw_metadata_free(const struct net_dm_hw_metadata *hw_metadata) { if (hw_metadata->input_dev) dev_put(hw_metadata->input_dev); + kfree(hw_metadata->fa_cookie); kfree(hw_metadata->trap_name); kfree(hw_metadata->trap_group_name); kfree(hw_metadata); -- cgit v1.2.3 From d7f10df86202273155a9d8f8553bc2ad28e0dd46 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 26 Feb 2020 18:17:44 -0600 Subject: bpf: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Daniel Borkmann Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200227001744.GA3317@embeddedor --- include/linux/bpf-cgroup.h | 2 +- include/linux/bpf.h | 2 +- include/uapi/linux/bpf.h | 2 +- kernel/bpf/bpf_struct_ops.c | 2 +- kernel/bpf/hashtab.c | 2 +- kernel/bpf/lpm_trie.c | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index a11d5b7dbbf3..a7cd5c7a2509 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -36,7 +36,7 @@ struct bpf_cgroup_storage_map; struct bpf_storage_buffer { struct rcu_head rcu; - char data[0]; + char data[]; }; struct bpf_cgroup_storage { diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1acd5bf70350..9aa33b8f3d55 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -859,7 +859,7 @@ struct bpf_prog_array_item { struct bpf_prog_array { struct rcu_head rcu; - struct bpf_prog_array_item items[0]; + struct bpf_prog_array_item items[]; }; struct bpf_prog_array *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 906e9f2752db..8e98ced0963b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -73,7 +73,7 @@ struct bpf_insn { /* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */ struct bpf_lpm_trie_key { __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */ - __u8 data[0]; /* Arbitrary size */ + __u8 data[]; /* Arbitrary size */ }; struct bpf_cgroup_storage_key { diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 042f95534f86..c498f0fffb40 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -23,7 +23,7 @@ enum bpf_struct_ops_state { struct bpf_struct_ops_value { BPF_STRUCT_OPS_COMMON_VALUE; - char data[0] ____cacheline_aligned_in_smp; + char data[] ____cacheline_aligned_in_smp; }; struct bpf_struct_ops_map { diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 53d9483fee10..d541c8486c95 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -118,7 +118,7 @@ struct htab_elem { struct bpf_lru_node lru_node; }; u32 hash; - char key[0] __aligned(8); + char key[] __aligned(8); }; static inline bool htab_is_prealloc(const struct bpf_htab *htab) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 3b3c420bc8ed..65c236cf341e 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -25,7 +25,7 @@ struct lpm_trie_node { struct lpm_trie_node __rcu *child[2]; u32 prefixlen; u32 flags; - u8 data[0]; + u8 data[]; }; struct lpm_trie { -- cgit v1.2.3 From 0df6d32842b9a5f97a29ea90c8adc5cfac38341d Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 25 Feb 2020 15:04:15 -0800 Subject: inet_diag: Move the INET_DIAG_REQ_BYTECODE nlattr to cb->data The INET_DIAG_REQ_BYTECODE nlattr is currently re-found every time when the "dump()" is re-started. In a latter patch, it will also need to parse the new INET_DIAG_REQ_SK_BPF_STORAGES nlattr to learn the map_fds. Thus, this patch takes this chance to store the parsed nlattr in cb->data during the "start" time of a dump. By doing this, the "bc" argument also becomes unnecessary and is removed. Also, the two copies of the INET_DIAG_REQ_BYTECODE parsing-audit logic between compat/current version can be consolidated to one. Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200225230415.1975555-1-kafai@fb.com --- include/linux/inet_diag.h | 11 ++-- include/uapi/linux/inet_diag.h | 3 +- net/dccp/diag.c | 4 +- net/ipv4/inet_diag.c | 117 ++++++++++++++++++++++++----------------- net/ipv4/raw_diag.c | 6 ++- net/ipv4/tcp_diag.c | 4 +- net/ipv4/udp_diag.c | 15 +++--- net/sctp/diag.c | 2 +- 8 files changed, 98 insertions(+), 64 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 6b157ce07d74..1bb94cac265f 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -15,8 +15,7 @@ struct netlink_callback; struct inet_diag_handler { void (*dump)(struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, - struct nlattr *bc); + const struct inet_diag_req_v2 *r); int (*dump_one)(struct netlink_callback *cb, const struct inet_diag_req_v2 *req); @@ -39,6 +38,11 @@ struct inet_diag_handler { __u16 idiag_info_size; }; +struct inet_diag_dump_data { + struct nlattr *req_nlas[__INET_DIAG_REQ_MAX]; +#define inet_diag_nla_bc req_nlas[INET_DIAG_REQ_BYTECODE] +}; + struct inet_connection_sock; int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, struct netlink_callback *cb, @@ -46,8 +50,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, u16 nlmsg_flags, bool net_admin); void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, - struct nlattr *bc); + const struct inet_diag_req_v2 *r); int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct netlink_callback *cb, const struct inet_diag_req_v2 *req); diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index a1ff345b3f33..bab9a9f8da12 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -64,9 +64,10 @@ struct inet_diag_req_raw { enum { INET_DIAG_REQ_NONE, INET_DIAG_REQ_BYTECODE, + __INET_DIAG_REQ_MAX, }; -#define INET_DIAG_REQ_MAX INET_DIAG_REQ_BYTECODE +#define INET_DIAG_REQ_MAX (__INET_DIAG_REQ_MAX - 1) /* Bytecode is sequence of 4 byte commands followed by variable arguments. * All the commands identified by "code" are conditional jumps forward: diff --git a/net/dccp/diag.c b/net/dccp/diag.c index 8f1e2a653f6d..8a82c5a2c5a8 100644 --- a/net/dccp/diag.c +++ b/net/dccp/diag.c @@ -46,9 +46,9 @@ static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, } static void dccp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r) { - inet_diag_dump_icsk(&dccp_hashinfo, skb, cb, r, bc); + inet_diag_dump_icsk(&dccp_hashinfo, skb, cb, r); } static int dccp_diag_dump_one(struct netlink_callback *cb, diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index d2ecff3195ba..4bce8a477699 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -495,9 +495,11 @@ static int inet_diag_cmd_exact(int cmd, struct sk_buff *in_skb, if (IS_ERR(handler)) { err = PTR_ERR(handler); } else if (cmd == SOCK_DIAG_BY_FAMILY) { + struct inet_diag_dump_data empty_dump_data = {}; struct netlink_callback cb = { .nlh = nlh, .skb = in_skb, + .data = &empty_dump_data, }; err = handler->dump_one(&cb, req); } else if (cmd == SOCK_DESTROY && handler->destroy) { @@ -863,14 +865,17 @@ static void twsk_build_assert(void) void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r) { bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); + struct inet_diag_dump_data *cb_data = cb->data; struct net *net = sock_net(skb->sk); u32 idiag_states = r->idiag_states; int i, num, s_i, s_num; + struct nlattr *bc; struct sock *sk; + bc = cb_data->inet_diag_nla_bc; if (idiag_states & TCPF_SYN_RECV) idiag_states |= TCPF_NEW_SYN_RECV; s_i = cb->args[1]; @@ -1014,15 +1019,14 @@ out: EXPORT_SYMBOL_GPL(inet_diag_dump_icsk); static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, - struct nlattr *bc) + const struct inet_diag_req_v2 *r) { const struct inet_diag_handler *handler; int err = 0; handler = inet_diag_lock_handler(r->sdiag_protocol); if (!IS_ERR(handler)) - handler->dump(skb, cb, r, bc); + handler->dump(skb, cb, r); else err = PTR_ERR(handler); inet_diag_unlock_handler(handler); @@ -1032,13 +1036,57 @@ static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) { - int hdrlen = sizeof(struct inet_diag_req_v2); - struct nlattr *bc = NULL; + return __inet_diag_dump(skb, cb, nlmsg_data(cb->nlh)); +} + +static int __inet_diag_dump_start(struct netlink_callback *cb, int hdrlen) +{ + const struct nlmsghdr *nlh = cb->nlh; + struct inet_diag_dump_data *cb_data; + struct sk_buff *skb = cb->skb; + struct nlattr *nla; + int rem, err; + + cb_data = kzalloc(sizeof(*cb_data), GFP_KERNEL); + if (!cb_data) + return -ENOMEM; + + nla_for_each_attr(nla, nlmsg_attrdata(nlh, hdrlen), + nlmsg_attrlen(nlh, hdrlen), rem) { + int type = nla_type(nla); + + if (type < __INET_DIAG_REQ_MAX) + cb_data->req_nlas[type] = nla; + } + + nla = cb_data->inet_diag_nla_bc; + if (nla) { + err = inet_diag_bc_audit(nla, skb); + if (err) { + kfree(cb_data); + return err; + } + } + + cb->data = cb_data; + return 0; +} + +static int inet_diag_dump_start(struct netlink_callback *cb) +{ + return __inet_diag_dump_start(cb, sizeof(struct inet_diag_req_v2)); +} + +static int inet_diag_dump_start_compat(struct netlink_callback *cb) +{ + return __inet_diag_dump_start(cb, sizeof(struct inet_diag_req)); +} - if (nlmsg_attrlen(cb->nlh, hdrlen)) - bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE); +static int inet_diag_dump_done(struct netlink_callback *cb) +{ + kfree(cb->data); - return __inet_diag_dump(skb, cb, nlmsg_data(cb->nlh), bc); + return 0; } static int inet_diag_type2proto(int type) @@ -1057,9 +1105,7 @@ static int inet_diag_dump_compat(struct sk_buff *skb, struct netlink_callback *cb) { struct inet_diag_req *rc = nlmsg_data(cb->nlh); - int hdrlen = sizeof(struct inet_diag_req); struct inet_diag_req_v2 req; - struct nlattr *bc = NULL; req.sdiag_family = AF_UNSPEC; /* compatibility */ req.sdiag_protocol = inet_diag_type2proto(cb->nlh->nlmsg_type); @@ -1067,10 +1113,7 @@ static int inet_diag_dump_compat(struct sk_buff *skb, req.idiag_states = rc->idiag_states; req.id = rc->id; - if (nlmsg_attrlen(cb->nlh, hdrlen)) - bc = nlmsg_find_attr(cb->nlh, hdrlen, INET_DIAG_REQ_BYTECODE); - - return __inet_diag_dump(skb, cb, &req, bc); + return __inet_diag_dump(skb, cb, &req); } static int inet_diag_get_exact_compat(struct sk_buff *in_skb, @@ -1098,22 +1141,12 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) return -EINVAL; if (nlh->nlmsg_flags & NLM_F_DUMP) { - if (nlmsg_attrlen(nlh, hdrlen)) { - struct nlattr *attr; - int err; - - attr = nlmsg_find_attr(nlh, hdrlen, - INET_DIAG_REQ_BYTECODE); - err = inet_diag_bc_audit(attr, skb); - if (err) - return err; - } - { - struct netlink_dump_control c = { - .dump = inet_diag_dump_compat, - }; - return netlink_dump_start(net->diag_nlsk, skb, nlh, &c); - } + struct netlink_dump_control c = { + .start = inet_diag_dump_start_compat, + .done = inet_diag_dump_done, + .dump = inet_diag_dump_compat, + }; + return netlink_dump_start(net->diag_nlsk, skb, nlh, &c); } return inet_diag_get_exact_compat(skb, nlh); @@ -1129,22 +1162,12 @@ static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h) if (h->nlmsg_type == SOCK_DIAG_BY_FAMILY && h->nlmsg_flags & NLM_F_DUMP) { - if (nlmsg_attrlen(h, hdrlen)) { - struct nlattr *attr; - int err; - - attr = nlmsg_find_attr(h, hdrlen, - INET_DIAG_REQ_BYTECODE); - err = inet_diag_bc_audit(attr, skb); - if (err) - return err; - } - { - struct netlink_dump_control c = { - .dump = inet_diag_dump, - }; - return netlink_dump_start(net->diag_nlsk, skb, h, &c); - } + struct netlink_dump_control c = { + .start = inet_diag_dump_start, + .done = inet_diag_dump_done, + .dump = inet_diag_dump, + }; + return netlink_dump_start(net->diag_nlsk, skb, h, &c); } return inet_diag_cmd_exact(h->nlmsg_type, skb, h, nlmsg_data(h)); diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c index a2933eeabd91..d19cce39be1b 100644 --- a/net/ipv4/raw_diag.c +++ b/net/ipv4/raw_diag.c @@ -138,17 +138,21 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, } static void raw_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r) { bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); struct raw_hashinfo *hashinfo = raw_get_hashinfo(r); struct net *net = sock_net(skb->sk); + struct inet_diag_dump_data *cb_data; int num, s_num, slot, s_slot; struct sock *sk = NULL; + struct nlattr *bc; if (IS_ERR(hashinfo)) return; + cb_data = cb->data; + bc = cb_data->inet_diag_nla_bc; s_slot = cb->args[0]; num = s_num = cb->args[1]; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index bcd3a26efff1..75a1c985f49a 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -179,9 +179,9 @@ static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin) } static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r) { - inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r, bc); + inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r); } static int tcp_diag_dump_one(struct netlink_callback *cb, diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 7d65a6a5cd51..93884696abdd 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -89,12 +89,16 @@ out_nosk: static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r) { bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); struct net *net = sock_net(skb->sk); + struct inet_diag_dump_data *cb_data; int num, s_num, slot, s_slot; + struct nlattr *bc; + cb_data = cb->data; + bc = cb_data->inet_diag_nla_bc; s_slot = cb->args[0]; num = s_num = cb->args[1]; @@ -142,9 +146,9 @@ done: } static void udp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r) { - udp_dump(&udp_table, skb, cb, r, bc); + udp_dump(&udp_table, skb, cb, r); } static int udp_diag_dump_one(struct netlink_callback *cb, @@ -245,10 +249,9 @@ static const struct inet_diag_handler udp_diag_handler = { }; static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, - struct nlattr *bc) + const struct inet_diag_req_v2 *r) { - udp_dump(&udplite_table, skb, cb, r, bc); + udp_dump(&udplite_table, skb, cb, r); } static int udplite_diag_dump_one(struct netlink_callback *cb, diff --git a/net/sctp/diag.c b/net/sctp/diag.c index bed6436cd0af..69743a6aaf6f 100644 --- a/net/sctp/diag.c +++ b/net/sctp/diag.c @@ -471,7 +471,7 @@ static int sctp_diag_dump_one(struct netlink_callback *cb, } static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r, struct nlattr *bc) + const struct inet_diag_req_v2 *r) { u32 idiag_states = r->idiag_states; struct net *net = sock_net(skb->sk); -- cgit v1.2.3 From 1ed4d92458a969e71e7914550b6f0c730c14d84e Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 25 Feb 2020 15:04:21 -0800 Subject: bpf: INET_DIAG support in bpf_sk_storage This patch adds INET_DIAG support to bpf_sk_storage. 1. Although this series adds bpf_sk_storage diag capability to inet sk, bpf_sk_storage is in general applicable to all fullsock. Hence, the bpf_sk_storage logic will operate on SK_DIAG_* nlattr. The caller will pass in its specific nesting nlattr (e.g. INET_DIAG_*) as the argument. 2. The request will be like: INET_DIAG_REQ_SK_BPF_STORAGES (nla_nest) (defined in latter patch) SK_DIAG_BPF_STORAGE_REQ_MAP_FD (nla_put_u32) SK_DIAG_BPF_STORAGE_REQ_MAP_FD (nla_put_u32) ...... Considering there could have multiple bpf_sk_storages in a sk, instead of reusing INET_DIAG_INFO ("ss -i"), the user can select some specific bpf_sk_storage to dump by specifying an array of SK_DIAG_BPF_STORAGE_REQ_MAP_FD. If no SK_DIAG_BPF_STORAGE_REQ_MAP_FD is specified (i.e. an empty INET_DIAG_REQ_SK_BPF_STORAGES), it will dump all bpf_sk_storages of a sk. 3. The reply will be like: INET_DIAG_BPF_SK_STORAGES (nla_nest) (defined in latter patch) SK_DIAG_BPF_STORAGE (nla_nest) SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) SK_DIAG_BPF_STORAGE (nla_nest) SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) ...... 4. Unlike other INET_DIAG info of a sk which is pretty static, the size required to dump the bpf_sk_storage(s) of a sk is dynamic as the system adding more bpf_sk_storage_map. It is hard to set a static min_dump_alloc size. Hence, this series learns it at the runtime and adjust the cb->min_dump_alloc as it iterates all sk(s) of a system. The "unsigned int *res_diag_size" in bpf_sk_storage_diag_put() is for this purpose. The next patch will update the cb->min_dump_alloc as it iterates the sk(s). Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200225230421.1975729-1-kafai@fb.com --- include/linux/bpf.h | 1 + include/net/bpf_sk_storage.h | 27 ++++ include/uapi/linux/sock_diag.h | 26 ++++ kernel/bpf/syscall.c | 15 +++ net/core/bpf_sk_storage.c | 283 ++++++++++++++++++++++++++++++++++++++++- 5 files changed, 346 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 9aa33b8f3d55..6015a4daf118 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1023,6 +1023,7 @@ void __bpf_free_used_maps(struct bpf_prog_aux *aux, void bpf_prog_free_id(struct bpf_prog *prog, bool do_idr_lock); void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock); +struct bpf_map *bpf_map_get(u32 ufd); struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); void bpf_map_inc(struct bpf_map *map); diff --git a/include/net/bpf_sk_storage.h b/include/net/bpf_sk_storage.h index 8e4f831d2e52..5036c94c0503 100644 --- a/include/net/bpf_sk_storage.h +++ b/include/net/bpf_sk_storage.h @@ -10,14 +10,41 @@ void bpf_sk_storage_free(struct sock *sk); extern const struct bpf_func_proto bpf_sk_storage_get_proto; extern const struct bpf_func_proto bpf_sk_storage_delete_proto; +struct bpf_sk_storage_diag; +struct sk_buff; +struct nlattr; +struct sock; + #ifdef CONFIG_BPF_SYSCALL int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk); +struct bpf_sk_storage_diag * +bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs); +void bpf_sk_storage_diag_free(struct bpf_sk_storage_diag *diag); +int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag, + struct sock *sk, struct sk_buff *skb, + int stg_array_type, + unsigned int *res_diag_size); #else static inline int bpf_sk_storage_clone(const struct sock *sk, struct sock *newsk) { return 0; } +static inline struct bpf_sk_storage_diag * +bpf_sk_storage_diag_alloc(const struct nlattr *nla) +{ + return NULL; +} +static inline void bpf_sk_storage_diag_free(struct bpf_sk_storage_diag *diag) +{ +} +static inline int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag, + struct sock *sk, struct sk_buff *skb, + int stg_array_type, + unsigned int *res_diag_size) +{ + return 0; +} #endif #endif /* _BPF_SK_STORAGE_H */ diff --git a/include/uapi/linux/sock_diag.h b/include/uapi/linux/sock_diag.h index e5925009a652..5f74a5f6091d 100644 --- a/include/uapi/linux/sock_diag.h +++ b/include/uapi/linux/sock_diag.h @@ -36,4 +36,30 @@ enum sknetlink_groups { }; #define SKNLGRP_MAX (__SKNLGRP_MAX - 1) +enum { + SK_DIAG_BPF_STORAGE_REQ_NONE, + SK_DIAG_BPF_STORAGE_REQ_MAP_FD, + __SK_DIAG_BPF_STORAGE_REQ_MAX, +}; + +#define SK_DIAG_BPF_STORAGE_REQ_MAX (__SK_DIAG_BPF_STORAGE_REQ_MAX - 1) + +enum { + SK_DIAG_BPF_STORAGE_REP_NONE, + SK_DIAG_BPF_STORAGE, + __SK_DIAG_BPF_STORAGE_REP_MAX, +}; + +#define SK_DIAB_BPF_STORAGE_REP_MAX (__SK_DIAG_BPF_STORAGE_REP_MAX - 1) + +enum { + SK_DIAG_BPF_STORAGE_NONE, + SK_DIAG_BPF_STORAGE_PAD, + SK_DIAG_BPF_STORAGE_MAP_ID, + SK_DIAG_BPF_STORAGE_MAP_VALUE, + __SK_DIAG_BPF_STORAGE_MAX, +}; + +#define SK_DIAG_BPF_STORAGE_MAX (__SK_DIAG_BPF_STORAGE_MAX - 1) + #endif /* _UAPI__SOCK_DIAG_H__ */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index a79743a89815..c536c65256ad 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -902,6 +902,21 @@ void bpf_map_inc_with_uref(struct bpf_map *map) } EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref); +struct bpf_map *bpf_map_get(u32 ufd) +{ + struct fd f = fdget(ufd); + struct bpf_map *map; + + map = __bpf_map_get(f); + if (IS_ERR(map)) + return map; + + bpf_map_inc(map); + fdput(f); + + return map; +} + struct bpf_map *bpf_map_get_with_uref(u32 ufd) { struct fd f = fdget(ufd); diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 3ab23f698221..3415a4896c59 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -8,6 +8,7 @@ #include #include #include +#include #include static atomic_t cache_idx; @@ -606,6 +607,14 @@ static void bpf_sk_storage_map_free(struct bpf_map *map) kfree(map); } +/* U16_MAX is much more than enough for sk local storage + * considering a tcp_sock is ~2k. + */ +#define MAX_VALUE_SIZE \ + min_t(u32, \ + (KMALLOC_MAX_SIZE - MAX_BPF_STACK - sizeof(struct bpf_sk_storage_elem)), \ + (U16_MAX - sizeof(struct bpf_sk_storage_elem))) + static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr) { if (attr->map_flags & ~SK_STORAGE_CREATE_FLAG_MASK || @@ -619,12 +628,7 @@ static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr) if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (attr->value_size >= KMALLOC_MAX_SIZE - - MAX_BPF_STACK - sizeof(struct bpf_sk_storage_elem) || - /* U16_MAX is much more than enough for sk local storage - * considering a tcp_sock is ~2k. - */ - attr->value_size > U16_MAX - sizeof(struct bpf_sk_storage_elem)) + if (attr->value_size > MAX_VALUE_SIZE) return -E2BIG; return 0; @@ -910,3 +914,270 @@ const struct bpf_func_proto bpf_sk_storage_delete_proto = { .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_SOCKET, }; + +struct bpf_sk_storage_diag { + u32 nr_maps; + struct bpf_map *maps[]; +}; + +/* The reply will be like: + * INET_DIAG_BPF_SK_STORAGES (nla_nest) + * SK_DIAG_BPF_STORAGE (nla_nest) + * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) + * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) + * SK_DIAG_BPF_STORAGE (nla_nest) + * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) + * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) + * .... + */ +static int nla_value_size(u32 value_size) +{ + /* SK_DIAG_BPF_STORAGE (nla_nest) + * SK_DIAG_BPF_STORAGE_MAP_ID (nla_put_u32) + * SK_DIAG_BPF_STORAGE_MAP_VALUE (nla_reserve_64bit) + */ + return nla_total_size(0) + nla_total_size(sizeof(u32)) + + nla_total_size_64bit(value_size); +} + +void bpf_sk_storage_diag_free(struct bpf_sk_storage_diag *diag) +{ + u32 i; + + if (!diag) + return; + + for (i = 0; i < diag->nr_maps; i++) + bpf_map_put(diag->maps[i]); + + kfree(diag); +} +EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_free); + +static bool diag_check_dup(const struct bpf_sk_storage_diag *diag, + const struct bpf_map *map) +{ + u32 i; + + for (i = 0; i < diag->nr_maps; i++) { + if (diag->maps[i] == map) + return true; + } + + return false; +} + +struct bpf_sk_storage_diag * +bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs) +{ + struct bpf_sk_storage_diag *diag; + struct nlattr *nla; + u32 nr_maps = 0; + int rem, err; + + /* bpf_sk_storage_map is currently limited to CAP_SYS_ADMIN as + * the map_alloc_check() side also does. + */ + if (!capable(CAP_SYS_ADMIN)) + return ERR_PTR(-EPERM); + + nla_for_each_nested(nla, nla_stgs, rem) { + if (nla_type(nla) == SK_DIAG_BPF_STORAGE_REQ_MAP_FD) + nr_maps++; + } + + diag = kzalloc(sizeof(*diag) + sizeof(diag->maps[0]) * nr_maps, + GFP_KERNEL); + if (!diag) + return ERR_PTR(-ENOMEM); + + nla_for_each_nested(nla, nla_stgs, rem) { + struct bpf_map *map; + int map_fd; + + if (nla_type(nla) != SK_DIAG_BPF_STORAGE_REQ_MAP_FD) + continue; + + map_fd = nla_get_u32(nla); + map = bpf_map_get(map_fd); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto err_free; + } + if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) { + bpf_map_put(map); + err = -EINVAL; + goto err_free; + } + if (diag_check_dup(diag, map)) { + bpf_map_put(map); + err = -EEXIST; + goto err_free; + } + diag->maps[diag->nr_maps++] = map; + } + + return diag; + +err_free: + bpf_sk_storage_diag_free(diag); + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_alloc); + +static int diag_get(struct bpf_sk_storage_data *sdata, struct sk_buff *skb) +{ + struct nlattr *nla_stg, *nla_value; + struct bpf_sk_storage_map *smap; + + /* It cannot exceed max nlattr's payload */ + BUILD_BUG_ON(U16_MAX - NLA_HDRLEN < MAX_VALUE_SIZE); + + nla_stg = nla_nest_start(skb, SK_DIAG_BPF_STORAGE); + if (!nla_stg) + return -EMSGSIZE; + + smap = rcu_dereference(sdata->smap); + if (nla_put_u32(skb, SK_DIAG_BPF_STORAGE_MAP_ID, smap->map.id)) + goto errout; + + nla_value = nla_reserve_64bit(skb, SK_DIAG_BPF_STORAGE_MAP_VALUE, + smap->map.value_size, + SK_DIAG_BPF_STORAGE_PAD); + if (!nla_value) + goto errout; + + if (map_value_has_spin_lock(&smap->map)) + copy_map_value_locked(&smap->map, nla_data(nla_value), + sdata->data, true); + else + copy_map_value(&smap->map, nla_data(nla_value), sdata->data); + + nla_nest_end(skb, nla_stg); + return 0; + +errout: + nla_nest_cancel(skb, nla_stg); + return -EMSGSIZE; +} + +static int bpf_sk_storage_diag_put_all(struct sock *sk, struct sk_buff *skb, + int stg_array_type, + unsigned int *res_diag_size) +{ + /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */ + unsigned int diag_size = nla_total_size(0); + struct bpf_sk_storage *sk_storage; + struct bpf_sk_storage_elem *selem; + struct bpf_sk_storage_map *smap; + struct nlattr *nla_stgs; + unsigned int saved_len; + int err = 0; + + rcu_read_lock(); + + sk_storage = rcu_dereference(sk->sk_bpf_storage); + if (!sk_storage || hlist_empty(&sk_storage->list)) { + rcu_read_unlock(); + return 0; + } + + nla_stgs = nla_nest_start(skb, stg_array_type); + if (!nla_stgs) + /* Continue to learn diag_size */ + err = -EMSGSIZE; + + saved_len = skb->len; + hlist_for_each_entry_rcu(selem, &sk_storage->list, snode) { + smap = rcu_dereference(SDATA(selem)->smap); + diag_size += nla_value_size(smap->map.value_size); + + if (nla_stgs && diag_get(SDATA(selem), skb)) + /* Continue to learn diag_size */ + err = -EMSGSIZE; + } + + rcu_read_unlock(); + + if (nla_stgs) { + if (saved_len == skb->len) + nla_nest_cancel(skb, nla_stgs); + else + nla_nest_end(skb, nla_stgs); + } + + if (diag_size == nla_total_size(0)) { + *res_diag_size = 0; + return 0; + } + + *res_diag_size = diag_size; + return err; +} + +int bpf_sk_storage_diag_put(struct bpf_sk_storage_diag *diag, + struct sock *sk, struct sk_buff *skb, + int stg_array_type, + unsigned int *res_diag_size) +{ + /* stg_array_type (e.g. INET_DIAG_BPF_SK_STORAGES) */ + unsigned int diag_size = nla_total_size(0); + struct bpf_sk_storage *sk_storage; + struct bpf_sk_storage_data *sdata; + struct nlattr *nla_stgs; + unsigned int saved_len; + int err = 0; + u32 i; + + *res_diag_size = 0; + + /* No map has been specified. Dump all. */ + if (!diag->nr_maps) + return bpf_sk_storage_diag_put_all(sk, skb, stg_array_type, + res_diag_size); + + rcu_read_lock(); + sk_storage = rcu_dereference(sk->sk_bpf_storage); + if (!sk_storage || hlist_empty(&sk_storage->list)) { + rcu_read_unlock(); + return 0; + } + + nla_stgs = nla_nest_start(skb, stg_array_type); + if (!nla_stgs) + /* Continue to learn diag_size */ + err = -EMSGSIZE; + + saved_len = skb->len; + for (i = 0; i < diag->nr_maps; i++) { + sdata = __sk_storage_lookup(sk_storage, + (struct bpf_sk_storage_map *)diag->maps[i], + false); + + if (!sdata) + continue; + + diag_size += nla_value_size(diag->maps[i]->value_size); + + if (nla_stgs && diag_get(sdata, skb)) + /* Continue to learn diag_size */ + err = -EMSGSIZE; + } + rcu_read_unlock(); + + if (nla_stgs) { + if (saved_len == skb->len) + nla_nest_cancel(skb, nla_stgs); + else + nla_nest_end(skb, nla_stgs); + } + + if (diag_size == nla_total_size(0)) { + *res_diag_size = 0; + return 0; + } + + *res_diag_size = diag_size; + return err; +} +EXPORT_SYMBOL_GPL(bpf_sk_storage_diag_put); -- cgit v1.2.3 From 085c20cacf2b72991ce1c9d99a5e2f1d9e73bb68 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 25 Feb 2020 15:04:27 -0800 Subject: bpf: inet_diag: Dump bpf_sk_storages in inet_diag_dump() This patch will dump out the bpf_sk_storages of a sk if the request has the INET_DIAG_REQ_SK_BPF_STORAGES nlattr. An array of SK_DIAG_BPF_STORAGE_REQ_MAP_FD can be specified in INET_DIAG_REQ_SK_BPF_STORAGES to select which bpf_sk_storage to dump. If no map_fd is specified, all bpf_sk_storages of a sk will be dumped. bpf_sk_storages can be added to the system at runtime. It is difficult to find a proper static value for cb->min_dump_alloc. This patch learns the nlattr size required to dump the bpf_sk_storages of a sk. If it happens to be the very first nlmsg of a dump and it cannot fit the needed bpf_sk_storages, it will try to expand the skb by "pskb_expand_head()". Instead of expanding it in inet_sk_diag_fill(), it is expanded at a sleepable context in __inet_diag_dump() so __GFP_DIRECT_RECLAIM can be used. In __inet_diag_dump(), it will retry as long as the skb is empty and the cb->min_dump_alloc becomes larger than before. cb->min_dump_alloc is bounded by KMALLOC_MAX_SIZE. The min_dump_alloc is also changed from 'u16' to 'u32' to accommodate a sk that may have a few large bpf_sk_storages. The updated cb->min_dump_alloc will also be used to allocate the skb in the next dump. This logic already exists in netlink_dump(). Here is the sample output of a locally modified 'ss' and it could be made more readable by using BTF later: [root@arch-fb-vm1 ~]# ss --bpf-map-id 14 --bpf-map-id 13 -t6an 'dst [::1]:8989' State Recv-Q Send-Q Local Address:Port Peer Address:PortProcess ESTAB 0 0 [::1]:51072 [::1]:8989 bpf_map_id:14 value:[ 3feb ] bpf_map_id:13 value:[ 3f ] ESTAB 0 0 [::1]:51070 [::1]:8989 bpf_map_id:14 value:[ 3feb ] bpf_map_id:13 value:[ 3f ] [root@arch-fb-vm1 ~]# ~/devshare/github/iproute2/misc/ss --bpf-maps -t6an 'dst [::1]:8989' State Recv-Q Send-Q Local Address:Port Peer Address:Port Process ESTAB 0 0 [::1]:51072 [::1]:8989 bpf_map_id:14 value:[ 3feb ] bpf_map_id:13 value:[ 3f ] bpf_map_id:12 value:[ 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000... total:65407 ] ESTAB 0 0 [::1]:51070 [::1]:8989 bpf_map_id:14 value:[ 3feb ] bpf_map_id:13 value:[ 3f ] bpf_map_id:12 value:[ 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000 0000... total:65407 ] Signed-off-by: Martin KaFai Lau Signed-off-by: Alexei Starovoitov Acked-by: Song Liu Link: https://lore.kernel.org/bpf/20200225230427.1976129-1-kafai@fb.com --- include/linux/inet_diag.h | 4 +++ include/linux/netlink.h | 4 +-- include/uapi/linux/inet_diag.h | 2 ++ net/ipv4/inet_diag.c | 74 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 82 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 1bb94cac265f..e4ba25d63913 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -38,9 +38,13 @@ struct inet_diag_handler { __u16 idiag_info_size; }; +struct bpf_sk_storage_diag; struct inet_diag_dump_data { struct nlattr *req_nlas[__INET_DIAG_REQ_MAX]; #define inet_diag_nla_bc req_nlas[INET_DIAG_REQ_BYTECODE] +#define inet_diag_nla_bpf_stgs req_nlas[INET_DIAG_REQ_SK_BPF_STORAGES] + + struct bpf_sk_storage_diag *bpf_stg_diag; }; struct inet_connection_sock; diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 205fa7b1f07a..788969ccbbde 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -188,10 +188,10 @@ struct netlink_callback { struct module *module; struct netlink_ext_ack *extack; u16 family; - u16 min_dump_alloc; - bool strict_check; u16 answer_flags; + u32 min_dump_alloc; unsigned int prev_seq, seq; + bool strict_check; union { u8 ctx[48]; diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index bab9a9f8da12..75dffd78363a 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -64,6 +64,7 @@ struct inet_diag_req_raw { enum { INET_DIAG_REQ_NONE, INET_DIAG_REQ_BYTECODE, + INET_DIAG_REQ_SK_BPF_STORAGES, __INET_DIAG_REQ_MAX, }; @@ -155,6 +156,7 @@ enum { INET_DIAG_CLASS_ID, /* request as INET_DIAG_TCLASS */ INET_DIAG_MD5SIG, INET_DIAG_ULP_INFO, + INET_DIAG_SK_BPF_STORAGES, __INET_DIAG_MAX, }; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 4bce8a477699..e1cad25909df 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -156,6 +157,8 @@ errout: } EXPORT_SYMBOL_GPL(inet_diag_msg_attrs_fill); +#define MAX_DUMP_ALLOC_SIZE (KMALLOC_MAX_SIZE - SKB_DATA_ALIGN(sizeof(struct skb_shared_info))) + int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *req, @@ -163,12 +166,14 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, { const struct tcp_congestion_ops *ca_ops; const struct inet_diag_handler *handler; + struct inet_diag_dump_data *cb_data; int ext = req->idiag_ext; struct inet_diag_msg *r; struct nlmsghdr *nlh; struct nlattr *attr; void *info = NULL; + cb_data = cb->data; handler = inet_diag_table[req->sdiag_protocol]; BUG_ON(!handler); @@ -302,6 +307,48 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, goto errout; } + /* Keep it at the end for potential retry with a larger skb, + * or else do best-effort fitting, which is only done for the + * first_nlmsg. + */ + if (cb_data->bpf_stg_diag) { + bool first_nlmsg = ((unsigned char *)nlh == skb->data); + unsigned int prev_min_dump_alloc; + unsigned int total_nla_size = 0; + unsigned int msg_len; + int err; + + msg_len = skb_tail_pointer(skb) - (unsigned char *)nlh; + err = bpf_sk_storage_diag_put(cb_data->bpf_stg_diag, sk, skb, + INET_DIAG_SK_BPF_STORAGES, + &total_nla_size); + + if (!err) + goto out; + + total_nla_size += msg_len; + prev_min_dump_alloc = cb->min_dump_alloc; + if (total_nla_size > prev_min_dump_alloc) + cb->min_dump_alloc = min_t(u32, total_nla_size, + MAX_DUMP_ALLOC_SIZE); + + if (!first_nlmsg) + goto errout; + + if (cb->min_dump_alloc > prev_min_dump_alloc) + /* Retry with pskb_expand_head() with + * __GFP_DIRECT_RECLAIM + */ + goto errout; + + WARN_ON_ONCE(total_nla_size <= prev_min_dump_alloc); + + /* Send what we have for this sk + * and move on to the next sk in the following + * dump() + */ + } + out: nlmsg_end(skb, nlh); return 0; @@ -1022,8 +1069,11 @@ static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r) { const struct inet_diag_handler *handler; + u32 prev_min_dump_alloc; int err = 0; +again: + prev_min_dump_alloc = cb->min_dump_alloc; handler = inet_diag_lock_handler(r->sdiag_protocol); if (!IS_ERR(handler)) handler->dump(skb, cb, r); @@ -1031,6 +1081,15 @@ static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, err = PTR_ERR(handler); inet_diag_unlock_handler(handler); + /* The skb is not large enough to fit one sk info and + * inet_sk_diag_fill() has requested for a larger skb. + */ + if (!skb->len && cb->min_dump_alloc > prev_min_dump_alloc) { + err = pskb_expand_head(skb, 0, cb->min_dump_alloc, GFP_KERNEL); + if (!err) + goto again; + } + return err ? : skb->len; } @@ -1068,6 +1127,18 @@ static int __inet_diag_dump_start(struct netlink_callback *cb, int hdrlen) } } + nla = cb_data->inet_diag_nla_bpf_stgs; + if (nla) { + struct bpf_sk_storage_diag *bpf_stg_diag; + + bpf_stg_diag = bpf_sk_storage_diag_alloc(nla); + if (IS_ERR(bpf_stg_diag)) { + kfree(cb_data); + return PTR_ERR(bpf_stg_diag); + } + cb_data->bpf_stg_diag = bpf_stg_diag; + } + cb->data = cb_data; return 0; } @@ -1084,6 +1155,9 @@ static int inet_diag_dump_start_compat(struct netlink_callback *cb) static int inet_diag_dump_done(struct netlink_callback *cb) { + struct inet_diag_dump_data *cb_data = cb->data; + + bpf_sk_storage_diag_free(cb_data->bpf_stg_diag); kfree(cb->data); return 0; -- cgit v1.2.3 From 5a8b7c4b7f95c8936cd3dcba01169cd5840e291f Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Fri, 28 Feb 2020 19:07:01 -0600 Subject: arcnet: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- include/uapi/linux/if_arcnet.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_arcnet.h b/include/uapi/linux/if_arcnet.h index 683878036d76..b122cfac7128 100644 --- a/include/uapi/linux/if_arcnet.h +++ b/include/uapi/linux/if_arcnet.h @@ -60,7 +60,7 @@ struct arc_rfc1201 { __u8 proto; /* protocol ID field - varies */ __u8 split_flag; /* for use with split packets */ __be16 sequence; /* sequence number */ - __u8 payload[0]; /* space remaining in packet (504 bytes)*/ + __u8 payload[]; /* space remaining in packet (504 bytes)*/ }; #define RFC1201_HDR_SIZE 4 @@ -69,7 +69,7 @@ struct arc_rfc1201 { */ struct arc_rfc1051 { __u8 proto; /* ARC_P_RFC1051_ARP/RFC1051_IP */ - __u8 payload[0]; /* 507 bytes */ + __u8 payload[]; /* 507 bytes */ }; #define RFC1051_HDR_SIZE 1 @@ -80,7 +80,7 @@ struct arc_rfc1051 { struct arc_eth_encap { __u8 proto; /* Always ARC_P_ETHER */ struct ethhdr eth; /* standard ethernet header (yuck!) */ - __u8 payload[0]; /* 493 bytes */ + __u8 payload[]; /* 493 bytes */ }; #define ETH_ENCAP_HDR_SIZE 14 -- cgit v1.2.3 From 1776658da830eb024b63ac34167dfd4bd6c21cf9 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Mon, 2 Mar 2020 06:02:09 -0600 Subject: drop_monitor: Replace zero-length array with flexible-array member The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: David S. Miller --- include/uapi/linux/net_dropmon.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/net_dropmon.h b/include/uapi/linux/net_dropmon.h index 66048cc5d7b3..67e31f329190 100644 --- a/include/uapi/linux/net_dropmon.h +++ b/include/uapi/linux/net_dropmon.h @@ -29,12 +29,12 @@ struct net_dm_config_entry { struct net_dm_config_msg { __u32 entries; - struct net_dm_config_entry options[0]; + struct net_dm_config_entry options[]; }; struct net_dm_alert_msg { __u32 entries; - struct net_dm_drop_point points[0]; + struct net_dm_drop_point points[]; }; struct net_dm_user_msg { -- cgit v1.2.3 From acf1ee44ca5da39755d2aa9080392eae46a0eb34 Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Tue, 3 Mar 2020 08:12:42 -0600 Subject: devlink: Introduce devlink port flavour virtual Currently mlx5 PCI PF and VF devlink devices register their ports as physical port in non-representors mode. Introduce a new port flavour as virtual so that virtual devices can register 'virtual' flavour to make it more clear to users. An example of one PCI PF and 2 PCI virtual functions, each having one devlink port. $ devlink port show pci/0000:06:00.0/1: type eth netdev ens2f0 flavour physical port 0 pci/0000:06:00.2/1: type eth netdev ens2f2 flavour virtual port 0 pci/0000:06:00.3/1: type eth netdev ens2f3 flavour virtual port 0 Reviewed-by: Jiri Pirko Signed-off-by: Parav Pandit Acked-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/uapi/linux/devlink.h | 1 + net/core/devlink.c | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h index be2a2948f452..dfdffc42e87d 100644 --- a/include/uapi/linux/devlink.h +++ b/include/uapi/linux/devlink.h @@ -187,6 +187,7 @@ enum devlink_port_flavour { * for the PCI VF. It is an internal * port that faces the PCI VF. */ + DEVLINK_PORT_FLAVOUR_VIRTUAL, /* Any virtual port facing the user. */ }; enum devlink_param_cmode { diff --git a/net/core/devlink.c b/net/core/devlink.c index 295d761cbfb1..e8ccea9035c8 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -545,6 +545,7 @@ static int devlink_nl_port_attrs_put(struct sk_buff *msg, case DEVLINK_PORT_FLAVOUR_PHYSICAL: case DEVLINK_PORT_FLAVOUR_CPU: case DEVLINK_PORT_FLAVOUR_DSA: + case DEVLINK_PORT_FLAVOUR_VIRTUAL: if (nla_put_u32(msg, DEVLINK_ATTR_PORT_NUMBER, attrs->phys.port_number)) return -EMSGSIZE; @@ -6806,6 +6807,7 @@ static int __devlink_port_phys_port_name_get(struct devlink_port *devlink_port, switch (attrs->flavour) { case DEVLINK_PORT_FLAVOUR_PHYSICAL: + case DEVLINK_PORT_FLAVOUR_VIRTUAL: if (!attrs->split) n = snprintf(name, len, "p%u", attrs->phys.port_number); else -- cgit v1.2.3 From cf62089b0edd7e74a1f474844b4d9f7b5697fb5c Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 3 Mar 2020 15:05:01 -0500 Subject: bpf: Add gso_size to __sk_buff BPF programs may want to know whether an skb is gso. The canonical answer is skb_is_gso(skb), which tests that gso_size != 0. Expose this field in the same manner as gso_segs. That field itself is not a sufficient signal, as the comment in skb_shared_info makes clear: gso_segs may be zero, e.g., from dodgy sources. Also prepare net/bpf/test_run for upcoming BPF_PROG_TEST_RUN tests of the feature. Signed-off-by: Willem de Bruijn Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200303200503.226217-2-willemdebruijn.kernel@gmail.com --- include/uapi/linux/bpf.h | 1 + net/bpf/test_run.c | 7 +++++++ net/core/filter.c | 44 ++++++++++++++++++++++++++++++-------------- 3 files changed, 38 insertions(+), 14 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 8e98ced0963b..180337fae97e 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3176,6 +3176,7 @@ struct __sk_buff { __u32 wire_len; __u32 gso_segs; __bpf_md_ptr(struct bpf_sock *, sk); + __u32 gso_size; }; struct bpf_tunnel_key { diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 562443f94133..1cd7a1c2f8b2 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -277,6 +277,12 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) /* gso_segs is allowed */ if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_segs), + offsetof(struct __sk_buff, gso_size))) + return -EINVAL; + + /* gso_size is allowed */ + + if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_size), sizeof(struct __sk_buff))) return -EINVAL; @@ -297,6 +303,7 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) if (__skb->gso_segs > GSO_MAX_SEGS) return -EINVAL; skb_shinfo(skb)->gso_segs = __skb->gso_segs; + skb_shinfo(skb)->gso_size = __skb->gso_size; return 0; } diff --git a/net/core/filter.c b/net/core/filter.c index 4a08c9fb2be7..cd0a532db4e7 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7139,6 +7139,27 @@ static u32 flow_dissector_convert_ctx_access(enum bpf_access_type type, return insn - insn_buf; } +static struct bpf_insn *bpf_convert_shinfo_access(const struct bpf_insn *si, + struct bpf_insn *insn) +{ + /* si->dst_reg = skb_shinfo(SKB); */ +#ifdef NET_SKBUFF_DATA_USES_OFFSET + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), + BPF_REG_AX, si->src_reg, + offsetof(struct sk_buff, end)); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head), + si->dst_reg, si->src_reg, + offsetof(struct sk_buff, head)); + *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX); +#else + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), + si->dst_reg, si->src_reg, + offsetof(struct sk_buff, end)); +#endif + + return insn; +} + static u32 bpf_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, @@ -7461,26 +7482,21 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, break; case offsetof(struct __sk_buff, gso_segs): - /* si->dst_reg = skb_shinfo(SKB); */ -#ifdef NET_SKBUFF_DATA_USES_OFFSET - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), - BPF_REG_AX, si->src_reg, - offsetof(struct sk_buff, end)); - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head), - si->dst_reg, si->src_reg, - offsetof(struct sk_buff, head)); - *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX); -#else - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), - si->dst_reg, si->src_reg, - offsetof(struct sk_buff, end)); -#endif + insn = bpf_convert_shinfo_access(si, insn); *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_segs), si->dst_reg, si->dst_reg, bpf_target_off(struct skb_shared_info, gso_segs, 2, target_size)); break; + case offsetof(struct __sk_buff, gso_size): + insn = bpf_convert_shinfo_access(si, insn); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct skb_shared_info, gso_size), + si->dst_reg, si->dst_reg, + bpf_target_off(struct skb_shared_info, + gso_size, 2, + target_size)); + break; case offsetof(struct __sk_buff, wire_len): BUILD_BUG_ON(sizeof_field(struct qdisc_skb_cb, pkt_len) != 4); -- cgit v1.2.3 From 1aae4bdd787998ea331a56f3db9d8595790fe2f9 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Mon, 2 Mar 2020 16:32:31 -0800 Subject: bpf: Switch BPF UAPI #define constants used from BPF program side to enums Switch BPF UAPI constants, previously defined as #define macro, to anonymous enum values. This preserves constants values and behavior in expressions, but has added advantaged of being captured as part of DWARF and, subsequently, BTF type info. Which, in turn, greatly improves usefulness of generated vmlinux.h for BPF applications, as it will not require BPF users to copy/paste various flags and constants, which are frequently used with BPF helpers. Only those constants that are used/useful from BPF program side are converted. Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200303003233.3496043-2-andriin@fb.com --- include/uapi/linux/bpf.h | 175 +++++++++++++++++++++++++--------------- tools/include/uapi/linux/bpf.h | 177 +++++++++++++++++++++++++---------------- 2 files changed, 219 insertions(+), 133 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 180337fae97e..d6b33ea27bcc 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -325,44 +325,46 @@ enum bpf_attach_type { #define BPF_PSEUDO_CALL 1 /* flags for BPF_MAP_UPDATE_ELEM command */ -#define BPF_ANY 0 /* create new element or update existing */ -#define BPF_NOEXIST 1 /* create new element if it didn't exist */ -#define BPF_EXIST 2 /* update existing element */ -#define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */ +enum { + BPF_ANY = 0, /* create new element or update existing */ + BPF_NOEXIST = 1, /* create new element if it didn't exist */ + BPF_EXIST = 2, /* update existing element */ + BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */ +}; /* flags for BPF_MAP_CREATE command */ -#define BPF_F_NO_PREALLOC (1U << 0) +enum { + BPF_F_NO_PREALLOC = (1U << 0), /* Instead of having one common LRU list in the * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list * which can scale and perform better. * Note, the LRU nodes (including free nodes) cannot be moved * across different LRU lists. */ -#define BPF_F_NO_COMMON_LRU (1U << 1) + BPF_F_NO_COMMON_LRU = (1U << 1), /* Specify numa node during map creation */ -#define BPF_F_NUMA_NODE (1U << 2) - -#define BPF_OBJ_NAME_LEN 16U + BPF_F_NUMA_NODE = (1U << 2), /* Flags for accessing BPF object from syscall side. */ -#define BPF_F_RDONLY (1U << 3) -#define BPF_F_WRONLY (1U << 4) + BPF_F_RDONLY = (1U << 3), + BPF_F_WRONLY = (1U << 4), /* Flag for stack_map, store build_id+offset instead of pointer */ -#define BPF_F_STACK_BUILD_ID (1U << 5) + BPF_F_STACK_BUILD_ID = (1U << 5), /* Zero-initialize hash function seed. This should only be used for testing. */ -#define BPF_F_ZERO_SEED (1U << 6) + BPF_F_ZERO_SEED = (1U << 6), /* Flags for accessing BPF object from program side. */ -#define BPF_F_RDONLY_PROG (1U << 7) -#define BPF_F_WRONLY_PROG (1U << 8) + BPF_F_RDONLY_PROG = (1U << 7), + BPF_F_WRONLY_PROG = (1U << 8), /* Clone map from listener for newly accepted socket */ -#define BPF_F_CLONE (1U << 9) + BPF_F_CLONE = (1U << 9), /* Enable memory-mapping BPF map */ -#define BPF_F_MMAPABLE (1U << 10) + BPF_F_MMAPABLE = (1U << 10), +}; /* Flags for BPF_PROG_QUERY. */ @@ -391,6 +393,8 @@ struct bpf_stack_build_id { }; }; +#define BPF_OBJ_NAME_LEN 16U + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ @@ -3045,72 +3049,100 @@ enum bpf_func_id { /* All flags used by eBPF helper functions, placed here. */ /* BPF_FUNC_skb_store_bytes flags. */ -#define BPF_F_RECOMPUTE_CSUM (1ULL << 0) -#define BPF_F_INVALIDATE_HASH (1ULL << 1) +enum { + BPF_F_RECOMPUTE_CSUM = (1ULL << 0), + BPF_F_INVALIDATE_HASH = (1ULL << 1), +}; /* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags. * First 4 bits are for passing the header field size. */ -#define BPF_F_HDR_FIELD_MASK 0xfULL +enum { + BPF_F_HDR_FIELD_MASK = 0xfULL, +}; /* BPF_FUNC_l4_csum_replace flags. */ -#define BPF_F_PSEUDO_HDR (1ULL << 4) -#define BPF_F_MARK_MANGLED_0 (1ULL << 5) -#define BPF_F_MARK_ENFORCE (1ULL << 6) +enum { + BPF_F_PSEUDO_HDR = (1ULL << 4), + BPF_F_MARK_MANGLED_0 = (1ULL << 5), + BPF_F_MARK_ENFORCE = (1ULL << 6), +}; /* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */ -#define BPF_F_INGRESS (1ULL << 0) +enum { + BPF_F_INGRESS = (1ULL << 0), +}; /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ -#define BPF_F_TUNINFO_IPV6 (1ULL << 0) +enum { + BPF_F_TUNINFO_IPV6 = (1ULL << 0), +}; /* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */ -#define BPF_F_SKIP_FIELD_MASK 0xffULL -#define BPF_F_USER_STACK (1ULL << 8) +enum { + BPF_F_SKIP_FIELD_MASK = 0xffULL, + BPF_F_USER_STACK = (1ULL << 8), /* flags used by BPF_FUNC_get_stackid only. */ -#define BPF_F_FAST_STACK_CMP (1ULL << 9) -#define BPF_F_REUSE_STACKID (1ULL << 10) + BPF_F_FAST_STACK_CMP = (1ULL << 9), + BPF_F_REUSE_STACKID = (1ULL << 10), /* flags used by BPF_FUNC_get_stack only. */ -#define BPF_F_USER_BUILD_ID (1ULL << 11) + BPF_F_USER_BUILD_ID = (1ULL << 11), +}; /* BPF_FUNC_skb_set_tunnel_key flags. */ -#define BPF_F_ZERO_CSUM_TX (1ULL << 1) -#define BPF_F_DONT_FRAGMENT (1ULL << 2) -#define BPF_F_SEQ_NUMBER (1ULL << 3) +enum { + BPF_F_ZERO_CSUM_TX = (1ULL << 1), + BPF_F_DONT_FRAGMENT = (1ULL << 2), + BPF_F_SEQ_NUMBER = (1ULL << 3), +}; /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and * BPF_FUNC_perf_event_read_value flags. */ -#define BPF_F_INDEX_MASK 0xffffffffULL -#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK +enum { + BPF_F_INDEX_MASK = 0xffffffffULL, + BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK, /* BPF_FUNC_perf_event_output for sk_buff input context. */ -#define BPF_F_CTXLEN_MASK (0xfffffULL << 32) + BPF_F_CTXLEN_MASK = (0xfffffULL << 32), +}; /* Current network namespace */ -#define BPF_F_CURRENT_NETNS (-1L) +enum { + BPF_F_CURRENT_NETNS = (-1L), +}; /* BPF_FUNC_skb_adjust_room flags. */ -#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0) +enum { + BPF_F_ADJ_ROOM_FIXED_GSO = (1ULL << 0), + BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 = (1ULL << 1), + BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 = (1ULL << 2), + BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3), + BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), +}; -#define BPF_ADJ_ROOM_ENCAP_L2_MASK 0xff -#define BPF_ADJ_ROOM_ENCAP_L2_SHIFT 56 +enum { + BPF_ADJ_ROOM_ENCAP_L2_MASK = 0xff, + BPF_ADJ_ROOM_ENCAP_L2_SHIFT = 56, +}; -#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1) -#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2) -#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3) -#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4) #define BPF_F_ADJ_ROOM_ENCAP_L2(len) (((__u64)len & \ BPF_ADJ_ROOM_ENCAP_L2_MASK) \ << BPF_ADJ_ROOM_ENCAP_L2_SHIFT) /* BPF_FUNC_sysctl_get_name flags. */ -#define BPF_F_SYSCTL_BASE_NAME (1ULL << 0) +enum { + BPF_F_SYSCTL_BASE_NAME = (1ULL << 0), +}; /* BPF_FUNC_sk_storage_get flags */ -#define BPF_SK_STORAGE_GET_F_CREATE (1ULL << 0) +enum { + BPF_SK_STORAGE_GET_F_CREATE = (1ULL << 0), +}; /* BPF_FUNC_read_branch_records flags. */ -#define BPF_F_GET_BRANCH_RECORDS_SIZE (1ULL << 0) +enum { + BPF_F_GET_BRANCH_RECORDS_SIZE = (1ULL << 0), +}; /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { @@ -3529,13 +3561,14 @@ struct bpf_sock_ops { }; /* Definitions for bpf_sock_ops_cb_flags */ -#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) -#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) -#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2) -#define BPF_SOCK_OPS_RTT_CB_FLAG (1<<3) -#define BPF_SOCK_OPS_ALL_CB_FLAGS 0xF /* Mask of all currently - * supported cb flags - */ +enum { + BPF_SOCK_OPS_RTO_CB_FLAG = (1<<0), + BPF_SOCK_OPS_RETRANS_CB_FLAG = (1<<1), + BPF_SOCK_OPS_STATE_CB_FLAG = (1<<2), + BPF_SOCK_OPS_RTT_CB_FLAG = (1<<3), +/* Mask of all currently supported cb flags */ + BPF_SOCK_OPS_ALL_CB_FLAGS = 0xF, +}; /* List of known BPF sock_ops operators. * New entries can only be added at the end @@ -3614,8 +3647,10 @@ enum { BPF_TCP_MAX_STATES /* Leave at the end! */ }; -#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ -#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */ +enum { + TCP_BPF_IW = 1001, /* Set TCP initial congestion window */ + TCP_BPF_SNDCWND_CLAMP = 1002, /* Set sndcwnd_clamp */ +}; struct bpf_perf_event_value { __u64 counter; @@ -3623,12 +3658,16 @@ struct bpf_perf_event_value { __u64 running; }; -#define BPF_DEVCG_ACC_MKNOD (1ULL << 0) -#define BPF_DEVCG_ACC_READ (1ULL << 1) -#define BPF_DEVCG_ACC_WRITE (1ULL << 2) +enum { + BPF_DEVCG_ACC_MKNOD = (1ULL << 0), + BPF_DEVCG_ACC_READ = (1ULL << 1), + BPF_DEVCG_ACC_WRITE = (1ULL << 2), +}; -#define BPF_DEVCG_DEV_BLOCK (1ULL << 0) -#define BPF_DEVCG_DEV_CHAR (1ULL << 1) +enum { + BPF_DEVCG_DEV_BLOCK = (1ULL << 0), + BPF_DEVCG_DEV_CHAR = (1ULL << 1), +}; struct bpf_cgroup_dev_ctx { /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */ @@ -3644,8 +3683,10 @@ struct bpf_raw_tracepoint_args { /* DIRECT: Skip the FIB rules and go to FIB table associated with device * OUTPUT: Do lookup from egress perspective; default is ingress */ -#define BPF_FIB_LOOKUP_DIRECT (1U << 0) -#define BPF_FIB_LOOKUP_OUTPUT (1U << 1) +enum { + BPF_FIB_LOOKUP_DIRECT = (1U << 0), + BPF_FIB_LOOKUP_OUTPUT = (1U << 1), +}; enum { BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */ @@ -3717,9 +3758,11 @@ enum bpf_task_fd_type { BPF_FD_TYPE_URETPROBE, /* filename + offset */ }; -#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0) -#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1) -#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2) +enum { + BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG = (1U << 0), + BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL = (1U << 1), + BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP = (1U << 2), +}; struct bpf_flow_keys { __u16 nhoff; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 7c689f4552dd..d6b33ea27bcc 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -73,7 +73,7 @@ struct bpf_insn { /* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */ struct bpf_lpm_trie_key { __u32 prefixlen; /* up to 32 for AF_INET, 128 for AF_INET6 */ - __u8 data[0]; /* Arbitrary size */ + __u8 data[]; /* Arbitrary size */ }; struct bpf_cgroup_storage_key { @@ -325,44 +325,46 @@ enum bpf_attach_type { #define BPF_PSEUDO_CALL 1 /* flags for BPF_MAP_UPDATE_ELEM command */ -#define BPF_ANY 0 /* create new element or update existing */ -#define BPF_NOEXIST 1 /* create new element if it didn't exist */ -#define BPF_EXIST 2 /* update existing element */ -#define BPF_F_LOCK 4 /* spin_lock-ed map_lookup/map_update */ +enum { + BPF_ANY = 0, /* create new element or update existing */ + BPF_NOEXIST = 1, /* create new element if it didn't exist */ + BPF_EXIST = 2, /* update existing element */ + BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */ +}; /* flags for BPF_MAP_CREATE command */ -#define BPF_F_NO_PREALLOC (1U << 0) +enum { + BPF_F_NO_PREALLOC = (1U << 0), /* Instead of having one common LRU list in the * BPF_MAP_TYPE_LRU_[PERCPU_]HASH map, use a percpu LRU list * which can scale and perform better. * Note, the LRU nodes (including free nodes) cannot be moved * across different LRU lists. */ -#define BPF_F_NO_COMMON_LRU (1U << 1) + BPF_F_NO_COMMON_LRU = (1U << 1), /* Specify numa node during map creation */ -#define BPF_F_NUMA_NODE (1U << 2) - -#define BPF_OBJ_NAME_LEN 16U + BPF_F_NUMA_NODE = (1U << 2), /* Flags for accessing BPF object from syscall side. */ -#define BPF_F_RDONLY (1U << 3) -#define BPF_F_WRONLY (1U << 4) + BPF_F_RDONLY = (1U << 3), + BPF_F_WRONLY = (1U << 4), /* Flag for stack_map, store build_id+offset instead of pointer */ -#define BPF_F_STACK_BUILD_ID (1U << 5) + BPF_F_STACK_BUILD_ID = (1U << 5), /* Zero-initialize hash function seed. This should only be used for testing. */ -#define BPF_F_ZERO_SEED (1U << 6) + BPF_F_ZERO_SEED = (1U << 6), /* Flags for accessing BPF object from program side. */ -#define BPF_F_RDONLY_PROG (1U << 7) -#define BPF_F_WRONLY_PROG (1U << 8) + BPF_F_RDONLY_PROG = (1U << 7), + BPF_F_WRONLY_PROG = (1U << 8), /* Clone map from listener for newly accepted socket */ -#define BPF_F_CLONE (1U << 9) + BPF_F_CLONE = (1U << 9), /* Enable memory-mapping BPF map */ -#define BPF_F_MMAPABLE (1U << 10) + BPF_F_MMAPABLE = (1U << 10), +}; /* Flags for BPF_PROG_QUERY. */ @@ -391,6 +393,8 @@ struct bpf_stack_build_id { }; }; +#define BPF_OBJ_NAME_LEN 16U + union bpf_attr { struct { /* anonymous struct used by BPF_MAP_CREATE command */ __u32 map_type; /* one of enum bpf_map_type */ @@ -3045,72 +3049,100 @@ enum bpf_func_id { /* All flags used by eBPF helper functions, placed here. */ /* BPF_FUNC_skb_store_bytes flags. */ -#define BPF_F_RECOMPUTE_CSUM (1ULL << 0) -#define BPF_F_INVALIDATE_HASH (1ULL << 1) +enum { + BPF_F_RECOMPUTE_CSUM = (1ULL << 0), + BPF_F_INVALIDATE_HASH = (1ULL << 1), +}; /* BPF_FUNC_l3_csum_replace and BPF_FUNC_l4_csum_replace flags. * First 4 bits are for passing the header field size. */ -#define BPF_F_HDR_FIELD_MASK 0xfULL +enum { + BPF_F_HDR_FIELD_MASK = 0xfULL, +}; /* BPF_FUNC_l4_csum_replace flags. */ -#define BPF_F_PSEUDO_HDR (1ULL << 4) -#define BPF_F_MARK_MANGLED_0 (1ULL << 5) -#define BPF_F_MARK_ENFORCE (1ULL << 6) +enum { + BPF_F_PSEUDO_HDR = (1ULL << 4), + BPF_F_MARK_MANGLED_0 = (1ULL << 5), + BPF_F_MARK_ENFORCE = (1ULL << 6), +}; /* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */ -#define BPF_F_INGRESS (1ULL << 0) +enum { + BPF_F_INGRESS = (1ULL << 0), +}; /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ -#define BPF_F_TUNINFO_IPV6 (1ULL << 0) +enum { + BPF_F_TUNINFO_IPV6 = (1ULL << 0), +}; /* flags for both BPF_FUNC_get_stackid and BPF_FUNC_get_stack. */ -#define BPF_F_SKIP_FIELD_MASK 0xffULL -#define BPF_F_USER_STACK (1ULL << 8) +enum { + BPF_F_SKIP_FIELD_MASK = 0xffULL, + BPF_F_USER_STACK = (1ULL << 8), /* flags used by BPF_FUNC_get_stackid only. */ -#define BPF_F_FAST_STACK_CMP (1ULL << 9) -#define BPF_F_REUSE_STACKID (1ULL << 10) + BPF_F_FAST_STACK_CMP = (1ULL << 9), + BPF_F_REUSE_STACKID = (1ULL << 10), /* flags used by BPF_FUNC_get_stack only. */ -#define BPF_F_USER_BUILD_ID (1ULL << 11) + BPF_F_USER_BUILD_ID = (1ULL << 11), +}; /* BPF_FUNC_skb_set_tunnel_key flags. */ -#define BPF_F_ZERO_CSUM_TX (1ULL << 1) -#define BPF_F_DONT_FRAGMENT (1ULL << 2) -#define BPF_F_SEQ_NUMBER (1ULL << 3) +enum { + BPF_F_ZERO_CSUM_TX = (1ULL << 1), + BPF_F_DONT_FRAGMENT = (1ULL << 2), + BPF_F_SEQ_NUMBER = (1ULL << 3), +}; /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and * BPF_FUNC_perf_event_read_value flags. */ -#define BPF_F_INDEX_MASK 0xffffffffULL -#define BPF_F_CURRENT_CPU BPF_F_INDEX_MASK +enum { + BPF_F_INDEX_MASK = 0xffffffffULL, + BPF_F_CURRENT_CPU = BPF_F_INDEX_MASK, /* BPF_FUNC_perf_event_output for sk_buff input context. */ -#define BPF_F_CTXLEN_MASK (0xfffffULL << 32) + BPF_F_CTXLEN_MASK = (0xfffffULL << 32), +}; /* Current network namespace */ -#define BPF_F_CURRENT_NETNS (-1L) +enum { + BPF_F_CURRENT_NETNS = (-1L), +}; /* BPF_FUNC_skb_adjust_room flags. */ -#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0) +enum { + BPF_F_ADJ_ROOM_FIXED_GSO = (1ULL << 0), + BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 = (1ULL << 1), + BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 = (1ULL << 2), + BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3), + BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4), +}; -#define BPF_ADJ_ROOM_ENCAP_L2_MASK 0xff -#define BPF_ADJ_ROOM_ENCAP_L2_SHIFT 56 +enum { + BPF_ADJ_ROOM_ENCAP_L2_MASK = 0xff, + BPF_ADJ_ROOM_ENCAP_L2_SHIFT = 56, +}; -#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1) -#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2) -#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3) -#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4) #define BPF_F_ADJ_ROOM_ENCAP_L2(len) (((__u64)len & \ BPF_ADJ_ROOM_ENCAP_L2_MASK) \ << BPF_ADJ_ROOM_ENCAP_L2_SHIFT) /* BPF_FUNC_sysctl_get_name flags. */ -#define BPF_F_SYSCTL_BASE_NAME (1ULL << 0) +enum { + BPF_F_SYSCTL_BASE_NAME = (1ULL << 0), +}; /* BPF_FUNC_sk_storage_get flags */ -#define BPF_SK_STORAGE_GET_F_CREATE (1ULL << 0) +enum { + BPF_SK_STORAGE_GET_F_CREATE = (1ULL << 0), +}; /* BPF_FUNC_read_branch_records flags. */ -#define BPF_F_GET_BRANCH_RECORDS_SIZE (1ULL << 0) +enum { + BPF_F_GET_BRANCH_RECORDS_SIZE = (1ULL << 0), +}; /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { @@ -3529,13 +3561,14 @@ struct bpf_sock_ops { }; /* Definitions for bpf_sock_ops_cb_flags */ -#define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) -#define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) -#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2) -#define BPF_SOCK_OPS_RTT_CB_FLAG (1<<3) -#define BPF_SOCK_OPS_ALL_CB_FLAGS 0xF /* Mask of all currently - * supported cb flags - */ +enum { + BPF_SOCK_OPS_RTO_CB_FLAG = (1<<0), + BPF_SOCK_OPS_RETRANS_CB_FLAG = (1<<1), + BPF_SOCK_OPS_STATE_CB_FLAG = (1<<2), + BPF_SOCK_OPS_RTT_CB_FLAG = (1<<3), +/* Mask of all currently supported cb flags */ + BPF_SOCK_OPS_ALL_CB_FLAGS = 0xF, +}; /* List of known BPF sock_ops operators. * New entries can only be added at the end @@ -3614,8 +3647,10 @@ enum { BPF_TCP_MAX_STATES /* Leave at the end! */ }; -#define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ -#define TCP_BPF_SNDCWND_CLAMP 1002 /* Set sndcwnd_clamp */ +enum { + TCP_BPF_IW = 1001, /* Set TCP initial congestion window */ + TCP_BPF_SNDCWND_CLAMP = 1002, /* Set sndcwnd_clamp */ +}; struct bpf_perf_event_value { __u64 counter; @@ -3623,12 +3658,16 @@ struct bpf_perf_event_value { __u64 running; }; -#define BPF_DEVCG_ACC_MKNOD (1ULL << 0) -#define BPF_DEVCG_ACC_READ (1ULL << 1) -#define BPF_DEVCG_ACC_WRITE (1ULL << 2) +enum { + BPF_DEVCG_ACC_MKNOD = (1ULL << 0), + BPF_DEVCG_ACC_READ = (1ULL << 1), + BPF_DEVCG_ACC_WRITE = (1ULL << 2), +}; -#define BPF_DEVCG_DEV_BLOCK (1ULL << 0) -#define BPF_DEVCG_DEV_CHAR (1ULL << 1) +enum { + BPF_DEVCG_DEV_BLOCK = (1ULL << 0), + BPF_DEVCG_DEV_CHAR = (1ULL << 1), +}; struct bpf_cgroup_dev_ctx { /* access_type encoded as (BPF_DEVCG_ACC_* << 16) | BPF_DEVCG_DEV_* */ @@ -3644,8 +3683,10 @@ struct bpf_raw_tracepoint_args { /* DIRECT: Skip the FIB rules and go to FIB table associated with device * OUTPUT: Do lookup from egress perspective; default is ingress */ -#define BPF_FIB_LOOKUP_DIRECT (1U << 0) -#define BPF_FIB_LOOKUP_OUTPUT (1U << 1) +enum { + BPF_FIB_LOOKUP_DIRECT = (1U << 0), + BPF_FIB_LOOKUP_OUTPUT = (1U << 1), +}; enum { BPF_FIB_LKUP_RET_SUCCESS, /* lookup successful */ @@ -3717,9 +3758,11 @@ enum bpf_task_fd_type { BPF_FD_TYPE_URETPROBE, /* filename + offset */ }; -#define BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG (1U << 0) -#define BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL (1U << 1) -#define BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP (1U << 2) +enum { + BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG = (1U << 0), + BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL = (1U << 1), + BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP = (1U << 2), +}; struct bpf_flow_keys { __u16 nhoff; -- cgit v1.2.3 From ae24082331d9bbaae283aafbe930a8f0eb85605a Mon Sep 17 00:00:00 2001 From: KP Singh Date: Wed, 4 Mar 2020 20:18:49 +0100 Subject: bpf: Introduce BPF_MODIFY_RETURN When multiple programs are attached, each program receives the return value from the previous program on the stack and the last program provides the return value to the attached function. The fmod_ret bpf programs are run after the fentry programs and before the fexit programs. The original function is only called if all the fmod_ret programs return 0 to avoid any unintended side-effects. The success value, i.e. 0 is not currently configurable but can be made so where user-space can specify it at load time. For example: int func_to_be_attached(int a, int b) { <--- do_fentry do_fmod_ret: if (ret != 0) goto do_fexit; original_function: } <--- do_fexit The fmod_ret program attached to this function can be defined as: SEC("fmod_ret/func_to_be_attached") int BPF_PROG(func_name, int a, int b, int ret) { // This will skip the original function logic. return 1; } The first fmod_ret program is passed 0 in its return argument. Signed-off-by: KP Singh Signed-off-by: Alexei Starovoitov Acked-by: Andrii Nakryiko Acked-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20200304191853.1529-4-kpsingh@chromium.org --- arch/x86/net/bpf_jit_comp.c | 130 +++++++++++++++++++++++++++++++++++++---- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 1 + kernel/bpf/btf.c | 3 +- kernel/bpf/syscall.c | 1 + kernel/bpf/trampoline.c | 5 +- kernel/bpf/verifier.c | 1 + tools/include/uapi/linux/bpf.h | 1 + 8 files changed, 130 insertions(+), 13 deletions(-) (limited to 'include/uapi/linux') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index d6349e930b06..b1fd000feb89 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1362,7 +1362,7 @@ static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args, } static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, - struct bpf_prog *p, int stack_size) + struct bpf_prog *p, int stack_size, bool mod_ret) { u8 *prog = *pprog; int cnt = 0; @@ -1383,6 +1383,13 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, if (emit_call(&prog, p->bpf_func, prog)) return -EINVAL; + /* BPF_TRAMP_MODIFY_RETURN trampolines can modify the return + * of the previous call which is then passed on the stack to + * the next BPF program. + */ + if (mod_ret) + emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); + /* arg1: mov rdi, progs[i] */ emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p); @@ -1442,6 +1449,23 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond) return 0; } +static int emit_mod_ret_check_imm8(u8 **pprog, int value) +{ + u8 *prog = *pprog; + int cnt = 0; + + if (!is_imm8(value)) + return -EINVAL; + + if (value == 0) + EMIT2(0x85, add_2reg(0xC0, BPF_REG_0, BPF_REG_0)); + else + EMIT3(0x83, add_1reg(0xF8, BPF_REG_0), value); + + *pprog = prog; + return 0; +} + static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, struct bpf_tramp_progs *tp, int stack_size) { @@ -1449,9 +1473,49 @@ static int invoke_bpf(const struct btf_func_model *m, u8 **pprog, u8 *prog = *pprog; for (i = 0; i < tp->nr_progs; i++) { - if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size)) + if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, false)) + return -EINVAL; + } + *pprog = prog; + return 0; +} + +static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, + struct bpf_tramp_progs *tp, int stack_size, + u8 **branches) +{ + u8 *prog = *pprog; + int i; + + /* The first fmod_ret program will receive a garbage return value. + * Set this to 0 to avoid confusing the program. + */ + emit_mov_imm32(&prog, false, BPF_REG_0, 0); + emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); + for (i = 0; i < tp->nr_progs; i++) { + if (invoke_bpf_prog(m, &prog, tp->progs[i], stack_size, true)) return -EINVAL; + + /* Generate a branch: + * + * if (ret != 0) + * goto do_fexit; + * + * If needed this can be extended to any integer value which can + * be passed by user-space when the program is loaded. + */ + if (emit_mod_ret_check_imm8(&prog, 0)) + return -EINVAL; + + /* Save the location of the branch and Generate 6 nops + * (4 bytes for an offset and 2 bytes for the jump) These nops + * are replaced with a conditional jump once do_fexit (i.e. the + * start of the fexit invocation) is finalized. + */ + branches[i] = prog; + emit_nops(&prog, 4 + 2); } + *pprog = prog; return 0; } @@ -1521,10 +1585,12 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end, struct bpf_tramp_progs *tprogs, void *orig_call) { - int cnt = 0, nr_args = m->nr_args; + int ret, i, cnt = 0, nr_args = m->nr_args; int stack_size = nr_args * 8; struct bpf_tramp_progs *fentry = &tprogs[BPF_TRAMP_FENTRY]; struct bpf_tramp_progs *fexit = &tprogs[BPF_TRAMP_FEXIT]; + struct bpf_tramp_progs *fmod_ret = &tprogs[BPF_TRAMP_MODIFY_RETURN]; + u8 **branches = NULL; u8 *prog; /* x86-64 supports up to 6 arguments. 7+ can be added in the future */ @@ -1557,24 +1623,60 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end, if (invoke_bpf(m, &prog, fentry, stack_size)) return -EINVAL; + if (fmod_ret->nr_progs) { + branches = kcalloc(fmod_ret->nr_progs, sizeof(u8 *), + GFP_KERNEL); + if (!branches) + return -ENOMEM; + + if (invoke_bpf_mod_ret(m, &prog, fmod_ret, stack_size, + branches)) { + ret = -EINVAL; + goto cleanup; + } + } + if (flags & BPF_TRAMP_F_CALL_ORIG) { - if (fentry->nr_progs) + if (fentry->nr_progs || fmod_ret->nr_progs) restore_regs(m, &prog, nr_args, stack_size); /* call original function */ - if (emit_call(&prog, orig_call, prog)) - return -EINVAL; + if (emit_call(&prog, orig_call, prog)) { + ret = -EINVAL; + goto cleanup; + } /* remember return value in a stack for bpf prog to access */ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); } + if (fmod_ret->nr_progs) { + /* From Intel 64 and IA-32 Architectures Optimization + * Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler + * Coding Rule 11: All branch targets should be 16-byte + * aligned. + */ + emit_align(&prog, 16); + /* Update the branches saved in invoke_bpf_mod_ret with the + * aligned address of do_fexit. + */ + for (i = 0; i < fmod_ret->nr_progs; i++) + emit_cond_near_jump(&branches[i], prog, branches[i], + X86_JNE); + } + if (fexit->nr_progs) - if (invoke_bpf(m, &prog, fexit, stack_size)) - return -EINVAL; + if (invoke_bpf(m, &prog, fexit, stack_size)) { + ret = -EINVAL; + goto cleanup; + } if (flags & BPF_TRAMP_F_RESTORE_REGS) restore_regs(m, &prog, nr_args, stack_size); + /* This needs to be done regardless. If there were fmod_ret programs, + * the return value is only updated on the stack and still needs to be + * restored to R0. + */ if (flags & BPF_TRAMP_F_CALL_ORIG) /* restore original return value back into RAX */ emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); @@ -1586,9 +1688,15 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end, EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */ EMIT1(0xC3); /* ret */ /* Make sure the trampoline generation logic doesn't overflow */ - if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) - return -EFAULT; - return prog - (u8 *)image; + if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) { + ret = -EFAULT; + goto cleanup; + } + ret = prog - (u8 *)image; + +cleanup: + kfree(branches); + return ret; } static int emit_fallback_jump(u8 **pprog) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 98ec10b23dbb..f748b31e5888 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -474,6 +474,7 @@ void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); enum bpf_tramp_prog_type { BPF_TRAMP_FENTRY, BPF_TRAMP_FEXIT, + BPF_TRAMP_MODIFY_RETURN, BPF_TRAMP_MAX, BPF_TRAMP_REPLACE, /* more than MAX */ }; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d6b33ea27bcc..40b2d9476268 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -210,6 +210,7 @@ enum bpf_attach_type { BPF_TRACE_RAW_TP, BPF_TRACE_FENTRY, BPF_TRACE_FEXIT, + BPF_MODIFY_RETURN, __MAX_BPF_ATTACH_TYPE }; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 787140095e58..30841fb8b3c0 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3710,7 +3710,8 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, nr_args--; } - if (prog->expected_attach_type == BPF_TRACE_FEXIT && + if ((prog->expected_attach_type == BPF_TRACE_FEXIT || + prog->expected_attach_type == BPF_MODIFY_RETURN) && arg == nr_args) { if (!t) /* Default prog with 5 args. 6th arg is retval. */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 13de65363ba2..7ce0815793dd 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2324,6 +2324,7 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog) if (prog->expected_attach_type != BPF_TRACE_FENTRY && prog->expected_attach_type != BPF_TRACE_FEXIT && + prog->expected_attach_type != BPF_MODIFY_RETURN && prog->type != BPF_PROG_TYPE_EXT) { err = -EINVAL; goto out_put_prog; diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 546198f6f307..221a17af1f81 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -232,7 +232,8 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr) goto out; } - if (tprogs[BPF_TRAMP_FEXIT].nr_progs) + if (tprogs[BPF_TRAMP_FEXIT].nr_progs || + tprogs[BPF_TRAMP_MODIFY_RETURN].nr_progs) flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME; /* Though the second half of trampoline page is unused a task could be @@ -269,6 +270,8 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(enum bpf_attach_type t) switch (t) { case BPF_TRACE_FENTRY: return BPF_TRAMP_FENTRY; + case BPF_MODIFY_RETURN: + return BPF_TRAMP_MODIFY_RETURN; case BPF_TRACE_FEXIT: return BPF_TRAMP_FEXIT; default: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 289383edfc8c..2460c8e6b5be 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -9950,6 +9950,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) if (!prog_extension) return -EINVAL; /* fallthrough */ + case BPF_MODIFY_RETURN: case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: if (!btf_type_is_func(t)) { diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index d6b33ea27bcc..40b2d9476268 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -210,6 +210,7 @@ enum bpf_attach_type { BPF_TRACE_RAW_TP, BPF_TRACE_FENTRY, BPF_TRACE_FEXIT, + BPF_MODIFY_RETURN, __MAX_BPF_ATTACH_TYPE }; -- cgit v1.2.3 From 44f8658017419dccbeefe64f30122fa191d0e173 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Sat, 7 Mar 2020 12:40:20 +0100 Subject: sched: act: allow user to specify type of HW stats for a filter Currently, user who is adding an action expects HW to report stats, however it does not have exact expectations about the stats types. That is aligned with TCA_ACT_HW_STATS_TYPE_ANY. Allow user to specify the type of HW stats for an action and require it. Pass the information down to flow_offload layer. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/net/act_api.h | 4 ++++ include/uapi/linux/pkt_cls.h | 22 ++++++++++++++++++++++ net/sched/act_api.c | 36 ++++++++++++++++++++++++++++++++++++ net/sched/cls_api.c | 7 +++++++ 4 files changed, 69 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/act_api.h b/include/net/act_api.h index 71347a90a9d1..41337c7fc728 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -41,6 +41,7 @@ struct tc_action { struct tc_cookie __rcu *act_cookie; struct tcf_chain __rcu *goto_chain; u32 tcfa_flags; + u8 hw_stats_type; }; #define tcf_index common.tcfa_index #define tcf_refcnt common.tcfa_refcnt @@ -52,6 +53,9 @@ struct tc_action { #define tcf_rate_est common.tcfa_rate_est #define tcf_lock common.tcfa_lock +#define TCA_ACT_HW_STATS_TYPE_ANY (TCA_ACT_HW_STATS_TYPE_IMMEDIATE | \ + TCA_ACT_HW_STATS_TYPE_DELAYED) + /* Update lastuse only if needed, to avoid dirtying a cache line. * We use a temp variable to avoid fetching jiffies twice. */ diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index 449a63971451..81cc1a869588 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -17,6 +17,7 @@ enum { TCA_ACT_PAD, TCA_ACT_COOKIE, TCA_ACT_FLAGS, + TCA_ACT_HW_STATS_TYPE, __TCA_ACT_MAX }; @@ -24,6 +25,27 @@ enum { * actions stats. */ +/* tca HW stats type + * When user does not pass the attribute, he does not care. + * It is the same as if he would pass the attribute with + * all supported bits set. + * In case no bits are set, user is not interested in getting any HW statistics. + */ +#define TCA_ACT_HW_STATS_TYPE_IMMEDIATE (1 << 0) /* Means that in dump, user + * gets the current HW stats + * state from the device + * queried at the dump time. + */ +#define TCA_ACT_HW_STATS_TYPE_DELAYED (1 << 1) /* Means that in dump, user gets + * HW stats that might be out + * of date for some time, maybe + * couple of seconds. This is + * the case when driver polls + * stats updates periodically + * or when it gets async stats update + * from the device. + */ + #define TCA_ACT_MAX __TCA_ACT_MAX #define TCA_OLD_COMPAT (TCA_ACT_MAX+1) #define TCA_ACT_MAX_PRIO 32 diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 8c466a712cda..aa7b737fed2e 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -185,6 +185,7 @@ static size_t tcf_action_shared_attrs_size(const struct tc_action *act) return nla_total_size(0) /* action number nested */ + nla_total_size(IFNAMSIZ) /* TCA_ACT_KIND */ + cookie_len /* TCA_ACT_COOKIE */ + + nla_total_size(sizeof(struct nla_bitfield32)) /* TCA_ACT_HW_STATS_TYPE */ + nla_total_size(0) /* TCA_ACT_STATS nested */ + nla_total_size(sizeof(struct nla_bitfield32)) /* TCA_ACT_FLAGS */ /* TCA_STATS_BASIC */ @@ -788,6 +789,17 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref) } rcu_read_unlock(); + if (a->hw_stats_type != TCA_ACT_HW_STATS_TYPE_ANY) { + struct nla_bitfield32 hw_stats_type = { + a->hw_stats_type, + TCA_ACT_HW_STATS_TYPE_ANY, + }; + + if (nla_put(skb, TCA_ACT_HW_STATS_TYPE, sizeof(hw_stats_type), + &hw_stats_type)) + goto nla_put_failure; + } + if (a->tcfa_flags) { struct nla_bitfield32 flags = { a->tcfa_flags, a->tcfa_flags, }; @@ -854,7 +866,23 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb) return c; } +static u8 tcf_action_hw_stats_type_get(struct nlattr *hw_stats_type_attr) +{ + struct nla_bitfield32 hw_stats_type_bf; + + /* If the user did not pass the attr, that means he does + * not care about the type. Return "any" in that case + * which is setting on all supported types. + */ + if (!hw_stats_type_attr) + return TCA_ACT_HW_STATS_TYPE_ANY; + hw_stats_type_bf = nla_get_bitfield32(hw_stats_type_attr); + return hw_stats_type_bf.value; +} + static const u32 tca_act_flags_allowed = TCA_ACT_FLAGS_NO_PERCPU_STATS; +static const u32 tca_act_hw_stats_type_allowed = TCA_ACT_HW_STATS_TYPE_ANY; + static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = { [TCA_ACT_KIND] = { .type = NLA_STRING }, [TCA_ACT_INDEX] = { .type = NLA_U32 }, @@ -863,6 +891,8 @@ static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = { [TCA_ACT_OPTIONS] = { .type = NLA_NESTED }, [TCA_ACT_FLAGS] = { .type = NLA_BITFIELD32, .validation_data = &tca_act_flags_allowed }, + [TCA_ACT_HW_STATS_TYPE] = { .type = NLA_BITFIELD32, + .validation_data = &tca_act_hw_stats_type_allowed }, }; struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, @@ -871,6 +901,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, bool rtnl_held, struct netlink_ext_ack *extack) { + u8 hw_stats_type = TCA_ACT_HW_STATS_TYPE_ANY; struct nla_bitfield32 flags = { 0, 0 }; struct tc_action *a; struct tc_action_ops *a_o; @@ -903,6 +934,8 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, goto err_out; } } + hw_stats_type = + tcf_action_hw_stats_type_get(tb[TCA_ACT_HW_STATS_TYPE]); if (tb[TCA_ACT_FLAGS]) flags = nla_get_bitfield32(tb[TCA_ACT_FLAGS]); } else { @@ -953,6 +986,9 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp, if (!name && tb[TCA_ACT_COOKIE]) tcf_set_action_cookie(&a->act_cookie, cookie); + if (!name) + a->hw_stats_type = hw_stats_type; + /* module count goes up only when brand new policy is created * if it exists and is only bound to in a_o->init() then * ACT_P_CREATED is not returned (a zero is). diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 4e766c5ab77a..e91448640a4f 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3464,6 +3464,10 @@ int tc_setup_flow_action(struct flow_action *flow_action, struct tc_action *act; int i, j, k, err = 0; + BUILD_BUG_ON(TCA_ACT_HW_STATS_TYPE_ANY != FLOW_ACTION_HW_STATS_TYPE_ANY); + BUILD_BUG_ON(TCA_ACT_HW_STATS_TYPE_IMMEDIATE != FLOW_ACTION_HW_STATS_TYPE_IMMEDIATE); + BUILD_BUG_ON(TCA_ACT_HW_STATS_TYPE_DELAYED != FLOW_ACTION_HW_STATS_TYPE_DELAYED); + if (!exts) return 0; @@ -3476,6 +3480,9 @@ int tc_setup_flow_action(struct flow_action *flow_action, err = tcf_act_get_cookie(entry, act); if (err) goto err_out_locked; + + entry->hw_stats_type = act->hw_stats_type; + if (is_tcf_gact_ok(act)) { entry->id = FLOW_ACTION_ACCEPT; } else if (is_tcf_gact_shot(act)) { -- cgit v1.2.3 From e08ab0b377a1489760533424437c5f4be7f484a4 Mon Sep 17 00:00:00 2001 From: Yousuk Seung Date: Mon, 9 Mar 2020 13:16:40 -0700 Subject: tcp: add bytes not sent to SCM_TIMESTAMPING_OPT_STATS Add TCP_NLA_BYTES_NOTSENT to SCM_TIMESTAMPING_OPT_STATS that reports bytes in the write queue but not sent. This is the same metric as what is exported with tcp_info.tcpi_notsent_bytes. Signed-off-by: Yousuk Seung Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Acked-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/uapi/linux/tcp.h | 1 + net/ipv4/tcp.c | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 1a7fc856e237..f2acb2566333 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -312,6 +312,7 @@ enum { TCP_NLA_REORD_SEEN, /* reordering events seen */ TCP_NLA_SRTT, /* smoothed RTT in usecs */ TCP_NLA_TIMEOUT_REHASH, /* Timeout-triggered rehash attempts */ + TCP_NLA_BYTES_NOTSENT, /* Bytes in write queue not yet sent */ }; /* for TCP_MD5SIG socket option */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 48aa457a9516..b7134f76f840 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3344,6 +3344,7 @@ static size_t tcp_opt_stats_get_size(void) nla_total_size(sizeof(u32)) + /* TCP_NLA_REORD_SEEN */ nla_total_size(sizeof(u32)) + /* TCP_NLA_SRTT */ nla_total_size(sizeof(u16)) + /* TCP_NLA_TIMEOUT_REHASH */ + nla_total_size(sizeof(u32)) + /* TCP_NLA_BYTES_NOTSENT */ 0; } @@ -3399,6 +3400,8 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) nla_put_u32(stats, TCP_NLA_REORD_SEEN, tp->reord_seen); nla_put_u32(stats, TCP_NLA_SRTT, tp->srtt_us >> 3); nla_put_u16(stats, TCP_NLA_TIMEOUT_REHASH, tp->timeout_rehash); + nla_put_u32(stats, TCP_NLA_BYTES_NOTSENT, + max_t(int, 0, tp->write_seq - tp->snd_nxt)); return stats; } -- cgit v1.2.3 From 0524399d4612f5af38b8383680dde4df4bc4eea2 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 12 Mar 2020 21:07:48 +0100 Subject: ethtool: provide netdev features with FEATURES_GET request Implement FEATURES_GET request to get network device features. These are traditionally available via ETHTOOL_GFEATURES ioctl request. v2: - style cleanup suggested by Jakub Kicinski Signed-off-by: Michal Kubecek Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 51 +++++++++-- include/uapi/linux/ethtool_netlink.h | 17 ++++ net/ethtool/Makefile | 2 +- net/ethtool/common.h | 2 + net/ethtool/features.c | 131 +++++++++++++++++++++++++++ net/ethtool/ioctl.c | 2 - net/ethtool/netlink.c | 8 ++ net/ethtool/netlink.h | 1 + 8 files changed, 202 insertions(+), 12 deletions(-) create mode 100644 net/ethtool/features.c (limited to 'include/uapi/linux') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index f1f868479ceb..5713abf98534 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -189,6 +189,7 @@ Userspace to kernel: ``ETHTOOL_MSG_DEBUG_SET`` set debugging settings ``ETHTOOL_MSG_WOL_GET`` get wake-on-lan settings ``ETHTOOL_MSG_WOL_SET`` set wake-on-lan settings + ``ETHTOOL_MSG_FEATURES_GET`` get device features ===================================== ================================ Kernel to userspace: @@ -204,6 +205,7 @@ Kernel to userspace: ``ETHTOOL_MSG_DEBUG_NTF`` debugging settings notification ``ETHTOOL_MSG_WOL_GET_REPLY`` wake-on-lan settings ``ETHTOOL_MSG_WOL_NTF`` wake-on-lan settings notification + ``ETHTOOL_MSG_FEATURES_GET_REPLY`` device features ===================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -521,6 +523,37 @@ Request contents: ``WAKE_MAGICSECURE`` mode. +FEATURES_GET +============ + +Gets netdev features like ``ETHTOOL_GFEATURES`` ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_FEATURES_HEADER`` nested request header + ==================================== ====== ========================== + +Kernel response contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_FEATURES_HEADER`` nested reply header + ``ETHTOOL_A_FEATURES_HW`` bitset dev->hw_features + ``ETHTOOL_A_FEATURES_WANTED`` bitset dev->wanted_features + ``ETHTOOL_A_FEATURES_ACTIVE`` bitset dev->features + ``ETHTOOL_A_FEATURES_NOCHANGE`` bitset NETIF_F_NEVER_CHANGE + ==================================== ====== ========================== + +Bitmaps in kernel response have the same meaning as bitmaps used in ioctl +interference but attribute names are different (they are based on +corresponding members of struct net_device). Legacy "flags" are not provided, +if userspace needs them (most likely only ethtool for backward compatibility), +it can calculate their values from related feature bits itself. +ETHA_FEATURES_HW uses mask consisting of all features recognized by kernel (to +provide all names when using verbose bitmap format), the other three use no +mask (simple bit lists). + + Request translation =================== @@ -551,30 +584,30 @@ have their netlink replacement yet. ``ETHTOOL_SRINGPARAM`` n/a ``ETHTOOL_GPAUSEPARAM`` n/a ``ETHTOOL_SPAUSEPARAM`` n/a - ``ETHTOOL_GRXCSUM`` n/a + ``ETHTOOL_GRXCSUM`` ``ETHTOOL_MSG_FEATURES_GET`` ``ETHTOOL_SRXCSUM`` n/a - ``ETHTOOL_GTXCSUM`` n/a + ``ETHTOOL_GTXCSUM`` ``ETHTOOL_MSG_FEATURES_GET`` ``ETHTOOL_STXCSUM`` n/a - ``ETHTOOL_GSG`` n/a + ``ETHTOOL_GSG`` ``ETHTOOL_MSG_FEATURES_GET`` ``ETHTOOL_SSG`` n/a ``ETHTOOL_TEST`` n/a ``ETHTOOL_GSTRINGS`` ``ETHTOOL_MSG_STRSET_GET`` ``ETHTOOL_PHYS_ID`` n/a ``ETHTOOL_GSTATS`` n/a - ``ETHTOOL_GTSO`` n/a + ``ETHTOOL_GTSO`` ``ETHTOOL_MSG_FEATURES_GET`` ``ETHTOOL_STSO`` n/a ``ETHTOOL_GPERMADDR`` rtnetlink ``RTM_GETLINK`` - ``ETHTOOL_GUFO`` n/a + ``ETHTOOL_GUFO`` ``ETHTOOL_MSG_FEATURES_GET`` ``ETHTOOL_SUFO`` n/a - ``ETHTOOL_GGSO`` n/a + ``ETHTOOL_GGSO`` ``ETHTOOL_MSG_FEATURES_GET`` ``ETHTOOL_SGSO`` n/a - ``ETHTOOL_GFLAGS`` n/a + ``ETHTOOL_GFLAGS`` ``ETHTOOL_MSG_FEATURES_GET`` ``ETHTOOL_SFLAGS`` n/a ``ETHTOOL_GPFLAGS`` n/a ``ETHTOOL_SPFLAGS`` n/a ``ETHTOOL_GRXFH`` n/a ``ETHTOOL_SRXFH`` n/a - ``ETHTOOL_GGRO`` n/a + ``ETHTOOL_GGRO`` ``ETHTOOL_MSG_FEATURES_GET`` ``ETHTOOL_SGRO`` n/a ``ETHTOOL_GRXRINGS`` n/a ``ETHTOOL_GRXCLSRLCNT`` n/a @@ -589,7 +622,7 @@ have their netlink replacement yet. ``ETHTOOL_GSSET_INFO`` ``ETHTOOL_MSG_STRSET_GET`` ``ETHTOOL_GRXFHINDIR`` n/a ``ETHTOOL_SRXFHINDIR`` n/a - ``ETHTOOL_GFEATURES`` n/a + ``ETHTOOL_GFEATURES`` ``ETHTOOL_MSG_FEATURES_GET`` ``ETHTOOL_SFEATURES`` n/a ``ETHTOOL_GCHANNELS`` n/a ``ETHTOOL_SCHANNELS`` n/a diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 7e0b460f872c..d0cc7a0334c8 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -24,6 +24,7 @@ enum { ETHTOOL_MSG_DEBUG_SET, ETHTOOL_MSG_WOL_GET, ETHTOOL_MSG_WOL_SET, + ETHTOOL_MSG_FEATURES_GET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -43,6 +44,7 @@ enum { ETHTOOL_MSG_DEBUG_NTF, ETHTOOL_MSG_WOL_GET_REPLY, ETHTOOL_MSG_WOL_NTF, + ETHTOOL_MSG_FEATURES_GET_REPLY, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -228,6 +230,21 @@ enum { ETHTOOL_A_WOL_MAX = __ETHTOOL_A_WOL_CNT - 1 }; +/* FEATURES */ + +enum { + ETHTOOL_A_FEATURES_UNSPEC, + ETHTOOL_A_FEATURES_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_FEATURES_HW, /* bitset */ + ETHTOOL_A_FEATURES_WANTED, /* bitset */ + ETHTOOL_A_FEATURES_ACTIVE, /* bitset */ + ETHTOOL_A_FEATURES_NOCHANGE, /* bitset */ + + /* add new constants above here */ + __ETHTOOL_A_FEATURES_CNT, + ETHTOOL_A_FEATURES_MAX = __ETHTOOL_A_FEATURES_CNT - 1 +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index 424545a4aaec..5be8c9ab26d1 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -5,4 +5,4 @@ obj-y += ioctl.o common.o obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o \ - linkstate.o debug.o wol.o + linkstate.o debug.o wol.o features.o diff --git a/net/ethtool/common.h b/net/ethtool/common.h index 40ba74e0b9bb..7dc1163800a7 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -6,6 +6,8 @@ #include #include +#define ETHTOOL_DEV_FEATURE_WORDS DIV_ROUND_UP(NETDEV_FEATURE_COUNT, 32) + /* compose link mode index from speed, type and duplex */ #define ETHTOOL_LINK_MODE(speed, type, duplex) \ ETHTOOL_LINK_MODE_ ## speed ## base ## type ## _ ## duplex ## _BIT diff --git a/net/ethtool/features.c b/net/ethtool/features.c new file mode 100644 index 000000000000..a0cc2b969053 --- /dev/null +++ b/net/ethtool/features.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "netlink.h" +#include "common.h" +#include "bitset.h" + +struct features_req_info { + struct ethnl_req_info base; +}; + +struct features_reply_data { + struct ethnl_reply_data base; + u32 hw[ETHTOOL_DEV_FEATURE_WORDS]; + u32 wanted[ETHTOOL_DEV_FEATURE_WORDS]; + u32 active[ETHTOOL_DEV_FEATURE_WORDS]; + u32 nochange[ETHTOOL_DEV_FEATURE_WORDS]; + u32 all[ETHTOOL_DEV_FEATURE_WORDS]; +}; + +#define FEATURES_REPDATA(__reply_base) \ + container_of(__reply_base, struct features_reply_data, base) + +static const struct nla_policy +features_get_policy[ETHTOOL_A_FEATURES_MAX + 1] = { + [ETHTOOL_A_FEATURES_UNSPEC] = { .type = NLA_REJECT }, + [ETHTOOL_A_FEATURES_HEADER] = { .type = NLA_NESTED }, + [ETHTOOL_A_FEATURES_HW] = { .type = NLA_REJECT }, + [ETHTOOL_A_FEATURES_WANTED] = { .type = NLA_REJECT }, + [ETHTOOL_A_FEATURES_ACTIVE] = { .type = NLA_REJECT }, + [ETHTOOL_A_FEATURES_NOCHANGE] = { .type = NLA_REJECT }, +}; + +static void ethnl_features_to_bitmap32(u32 *dest, netdev_features_t src) +{ + unsigned int i; + + for (i = 0; i < ETHTOOL_DEV_FEATURE_WORDS; i++) + dest[i] = src >> (32 * i); +} + +static int features_prepare_data(const struct ethnl_req_info *req_base, + struct ethnl_reply_data *reply_base, + struct genl_info *info) +{ + struct features_reply_data *data = FEATURES_REPDATA(reply_base); + struct net_device *dev = reply_base->dev; + netdev_features_t all_features; + + ethnl_features_to_bitmap32(data->hw, dev->hw_features); + ethnl_features_to_bitmap32(data->wanted, dev->wanted_features); + ethnl_features_to_bitmap32(data->active, dev->features); + ethnl_features_to_bitmap32(data->nochange, NETIF_F_NEVER_CHANGE); + all_features = GENMASK_ULL(NETDEV_FEATURE_COUNT - 1, 0); + ethnl_features_to_bitmap32(data->all, all_features); + + return 0; +} + +static int features_reply_size(const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct features_reply_data *data = FEATURES_REPDATA(reply_base); + bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; + unsigned int len = 0; + int ret; + + ret = ethnl_bitset32_size(data->hw, data->all, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + return ret; + len += ret; + ret = ethnl_bitset32_size(data->wanted, NULL, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + return ret; + len += ret; + ret = ethnl_bitset32_size(data->active, NULL, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + return ret; + len += ret; + ret = ethnl_bitset32_size(data->nochange, NULL, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + return ret; + len += ret; + + return len; +} + +static int features_fill_reply(struct sk_buff *skb, + const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct features_reply_data *data = FEATURES_REPDATA(reply_base); + bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; + int ret; + + ret = ethnl_put_bitset32(skb, ETHTOOL_A_FEATURES_HW, data->hw, + data->all, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + return ret; + ret = ethnl_put_bitset32(skb, ETHTOOL_A_FEATURES_WANTED, data->wanted, + NULL, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + return ret; + ret = ethnl_put_bitset32(skb, ETHTOOL_A_FEATURES_ACTIVE, data->active, + NULL, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + return ret; + return ethnl_put_bitset32(skb, ETHTOOL_A_FEATURES_NOCHANGE, + data->nochange, NULL, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); +} + +const struct ethnl_request_ops ethnl_features_request_ops = { + .request_cmd = ETHTOOL_MSG_FEATURES_GET, + .reply_cmd = ETHTOOL_MSG_FEATURES_GET_REPLY, + .hdr_attr = ETHTOOL_A_FEATURES_HEADER, + .max_attr = ETHTOOL_A_FEATURES_MAX, + .req_info_size = sizeof(struct features_req_info), + .reply_data_size = sizeof(struct features_reply_data), + .request_policy = features_get_policy, + + .prepare_data = features_prepare_data, + .reply_size = features_reply_size, + .fill_reply = features_fill_reply, +}; diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index ae97c82c7052..45d1bf1764b7 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -56,8 +56,6 @@ EXPORT_SYMBOL(ethtool_op_get_ts_info); /* Handlers for each ethtool command */ -#define ETHTOOL_DEV_FEATURE_WORDS ((NETDEV_FEATURE_COUNT + 31) / 32) - static int ethtool_get_features(struct net_device *dev, void __user *useraddr) { struct ethtool_gfeatures cmd = { diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 8eca55122ef3..e451a75e9577 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -215,6 +215,7 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = { [ETHTOOL_MSG_LINKSTATE_GET] = ðnl_linkstate_request_ops, [ETHTOOL_MSG_DEBUG_GET] = ðnl_debug_request_ops, [ETHTOOL_MSG_WOL_GET] = ðnl_wol_request_ops, + [ETHTOOL_MSG_FEATURES_GET] = ðnl_features_request_ops, }; static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb) @@ -695,6 +696,13 @@ static const struct genl_ops ethtool_genl_ops[] = { .flags = GENL_UNS_ADMIN_PERM, .doit = ethnl_set_wol, }, + { + .cmd = ETHTOOL_MSG_FEATURES_GET, + .doit = ethnl_default_doit, + .start = ethnl_default_start, + .dumpit = ethnl_default_dumpit, + .done = ethnl_default_done, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 961708290074..be2325ea8493 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -337,6 +337,7 @@ extern const struct ethnl_request_ops ethnl_linkmodes_request_ops; extern const struct ethnl_request_ops ethnl_linkstate_request_ops; extern const struct ethnl_request_ops ethnl_debug_request_ops; extern const struct ethnl_request_ops ethnl_wol_request_ops; +extern const struct ethnl_request_ops ethnl_features_request_ops; int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info); int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info); -- cgit v1.2.3 From 0980bfcd6954f124e40a000b85335c197764de14 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 12 Mar 2020 21:07:58 +0100 Subject: ethtool: set netdev features with FEATURES_SET request Implement FEATURES_SET netlink request to set network device features. These are traditionally set using ETHTOOL_SFEATURES ioctl request. Actual change is subject to netdev_change_features() sanity checks so that it can differ from what was requested. Unlike with most other SET requests, in addition to error code and optional extack, kernel provides an optional reply message (ETHTOOL_MSG_FEATURES_SET_REPLY) in the same format but with different semantics: information about difference between user request and actual result and difference between old and new state of dev->features. This reply message can be suppressed by setting ETHTOOL_FLAG_OMIT_REPLY flag in request header. Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 56 +++++++-- include/uapi/linux/ethtool_netlink.h | 2 + net/ethtool/features.c | 169 +++++++++++++++++++++++++++ net/ethtool/netlink.c | 5 + net/ethtool/netlink.h | 1 + 5 files changed, 224 insertions(+), 9 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 5713abf98534..d6706c4aa972 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -190,6 +190,7 @@ Userspace to kernel: ``ETHTOOL_MSG_WOL_GET`` get wake-on-lan settings ``ETHTOOL_MSG_WOL_SET`` set wake-on-lan settings ``ETHTOOL_MSG_FEATURES_GET`` get device features + ``ETHTOOL_MSG_FEATURES_SET`` set device features ===================================== ================================ Kernel to userspace: @@ -206,6 +207,7 @@ Kernel to userspace: ``ETHTOOL_MSG_WOL_GET_REPLY`` wake-on-lan settings ``ETHTOOL_MSG_WOL_NTF`` wake-on-lan settings notification ``ETHTOOL_MSG_FEATURES_GET_REPLY`` device features + ``ETHTOOL_MSG_FEATURES_SET_REPLY`` optional reply to FEATURES_SET ===================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -554,6 +556,42 @@ provide all names when using verbose bitmap format), the other three use no mask (simple bit lists). +FEATURES_SET +============ + +Request to set netdev features like ``ETHTOOL_SFEATURES`` ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_FEATURES_HEADER`` nested request header + ``ETHTOOL_A_FEATURES_WANTED`` bitset requested features + ==================================== ====== ========================== + +Kernel response contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_FEATURES_HEADER`` nested reply header + ``ETHTOOL_A_FEATURES_WANTED`` bitset diff wanted vs. result + ``ETHTOOL_A_FEATURES_ACTIVE`` bitset diff old vs. new active + ==================================== ====== ========================== + +Request constains only one bitset which can be either value/mask pair (request +to change specific feature bits and leave the rest) or only a value (request +to set all features to specified set). + +As request is subject to netdev_change_features() sanity checks, optional +kernel reply (can be suppressed by ``ETHTOOL_FLAG_OMIT_REPLY`` flag in request +header) informs client about the actual result. ``ETHTOOL_A_FEATURES_WANTED`` +reports the difference between client request and actual result: mask consists +of bits which differ between requested features and result (dev->features +after the operation), value consists of values of these bits in the request +(i.e. negated values from resulting features). ``ETHTOOL_A_FEATURES_ACTIVE`` +reports the difference between old and new dev->features: mask consists of +bits which have changed, values are their values in new dev->features (after +the operation). + + Request translation =================== @@ -585,30 +623,30 @@ have their netlink replacement yet. ``ETHTOOL_GPAUSEPARAM`` n/a ``ETHTOOL_SPAUSEPARAM`` n/a ``ETHTOOL_GRXCSUM`` ``ETHTOOL_MSG_FEATURES_GET`` - ``ETHTOOL_SRXCSUM`` n/a + ``ETHTOOL_SRXCSUM`` ``ETHTOOL_MSG_FEATURES_SET`` ``ETHTOOL_GTXCSUM`` ``ETHTOOL_MSG_FEATURES_GET`` - ``ETHTOOL_STXCSUM`` n/a + ``ETHTOOL_STXCSUM`` ``ETHTOOL_MSG_FEATURES_SET`` ``ETHTOOL_GSG`` ``ETHTOOL_MSG_FEATURES_GET`` - ``ETHTOOL_SSG`` n/a + ``ETHTOOL_SSG`` ``ETHTOOL_MSG_FEATURES_SET`` ``ETHTOOL_TEST`` n/a ``ETHTOOL_GSTRINGS`` ``ETHTOOL_MSG_STRSET_GET`` ``ETHTOOL_PHYS_ID`` n/a ``ETHTOOL_GSTATS`` n/a ``ETHTOOL_GTSO`` ``ETHTOOL_MSG_FEATURES_GET`` - ``ETHTOOL_STSO`` n/a + ``ETHTOOL_STSO`` ``ETHTOOL_MSG_FEATURES_SET`` ``ETHTOOL_GPERMADDR`` rtnetlink ``RTM_GETLINK`` ``ETHTOOL_GUFO`` ``ETHTOOL_MSG_FEATURES_GET`` - ``ETHTOOL_SUFO`` n/a + ``ETHTOOL_SUFO`` ``ETHTOOL_MSG_FEATURES_SET`` ``ETHTOOL_GGSO`` ``ETHTOOL_MSG_FEATURES_GET`` - ``ETHTOOL_SGSO`` n/a + ``ETHTOOL_SGSO`` ``ETHTOOL_MSG_FEATURES_SET`` ``ETHTOOL_GFLAGS`` ``ETHTOOL_MSG_FEATURES_GET`` - ``ETHTOOL_SFLAGS`` n/a + ``ETHTOOL_SFLAGS`` ``ETHTOOL_MSG_FEATURES_SET`` ``ETHTOOL_GPFLAGS`` n/a ``ETHTOOL_SPFLAGS`` n/a ``ETHTOOL_GRXFH`` n/a ``ETHTOOL_SRXFH`` n/a ``ETHTOOL_GGRO`` ``ETHTOOL_MSG_FEATURES_GET`` - ``ETHTOOL_SGRO`` n/a + ``ETHTOOL_SGRO`` ``ETHTOOL_MSG_FEATURES_SET`` ``ETHTOOL_GRXRINGS`` n/a ``ETHTOOL_GRXCLSRLCNT`` n/a ``ETHTOOL_GRXCLSRULE`` n/a @@ -623,7 +661,7 @@ have their netlink replacement yet. ``ETHTOOL_GRXFHINDIR`` n/a ``ETHTOOL_SRXFHINDIR`` n/a ``ETHTOOL_GFEATURES`` ``ETHTOOL_MSG_FEATURES_GET`` - ``ETHTOOL_SFEATURES`` n/a + ``ETHTOOL_SFEATURES`` ``ETHTOOL_MSG_FEATURES_SET`` ``ETHTOOL_GCHANNELS`` n/a ``ETHTOOL_SCHANNELS`` n/a ``ETHTOOL_SET_DUMP`` n/a diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index d0cc7a0334c8..6f7aaa6b7f42 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -25,6 +25,7 @@ enum { ETHTOOL_MSG_WOL_GET, ETHTOOL_MSG_WOL_SET, ETHTOOL_MSG_FEATURES_GET, + ETHTOOL_MSG_FEATURES_SET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -45,6 +46,7 @@ enum { ETHTOOL_MSG_WOL_GET_REPLY, ETHTOOL_MSG_WOL_NTF, ETHTOOL_MSG_FEATURES_GET_REPLY, + ETHTOOL_MSG_FEATURES_SET_REPLY, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, diff --git a/net/ethtool/features.c b/net/ethtool/features.c index a0cc2b969053..4ac1e05684ce 100644 --- a/net/ethtool/features.c +++ b/net/ethtool/features.c @@ -129,3 +129,172 @@ const struct ethnl_request_ops ethnl_features_request_ops = { .reply_size = features_reply_size, .fill_reply = features_fill_reply, }; + +/* FEATURES_SET */ + +static const struct nla_policy +features_set_policy[ETHTOOL_A_FEATURES_MAX + 1] = { + [ETHTOOL_A_FEATURES_UNSPEC] = { .type = NLA_REJECT }, + [ETHTOOL_A_FEATURES_HEADER] = { .type = NLA_NESTED }, + [ETHTOOL_A_FEATURES_HW] = { .type = NLA_REJECT }, + [ETHTOOL_A_FEATURES_WANTED] = { .type = NLA_NESTED }, + [ETHTOOL_A_FEATURES_ACTIVE] = { .type = NLA_REJECT }, + [ETHTOOL_A_FEATURES_NOCHANGE] = { .type = NLA_REJECT }, +}; + +static void ethnl_features_to_bitmap(unsigned long *dest, netdev_features_t val) +{ + const unsigned int words = BITS_TO_LONGS(NETDEV_FEATURE_COUNT); + unsigned int i; + + bitmap_zero(dest, NETDEV_FEATURE_COUNT); + for (i = 0; i < words; i++) + dest[i] = (unsigned long)(val >> (i * BITS_PER_LONG)); +} + +static netdev_features_t ethnl_bitmap_to_features(unsigned long *src) +{ + const unsigned int nft_bits = sizeof(netdev_features_t) * BITS_PER_BYTE; + const unsigned int words = BITS_TO_LONGS(NETDEV_FEATURE_COUNT); + netdev_features_t ret = 0; + unsigned int i; + + for (i = 0; i < words; i++) + ret |= (netdev_features_t)(src[i]) << (i * BITS_PER_LONG); + ret &= ~(netdev_features_t)0 >> (nft_bits - NETDEV_FEATURE_COUNT); + return ret; +} + +static int features_send_reply(struct net_device *dev, struct genl_info *info, + const unsigned long *wanted, + const unsigned long *wanted_mask, + const unsigned long *active, + const unsigned long *active_mask, bool compact) +{ + struct sk_buff *rskb; + void *reply_payload; + int reply_len = 0; + int ret; + + reply_len = ethnl_reply_header_size(); + ret = ethnl_bitset_size(wanted, wanted_mask, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + goto err; + reply_len += ret; + ret = ethnl_bitset_size(active, active_mask, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + goto err; + reply_len += ret; + + ret = -ENOMEM; + rskb = ethnl_reply_init(reply_len, dev, ETHTOOL_MSG_FEATURES_SET_REPLY, + ETHTOOL_A_FEATURES_HEADER, info, + &reply_payload); + if (!rskb) + goto err; + + ret = ethnl_put_bitset(rskb, ETHTOOL_A_FEATURES_WANTED, wanted, + wanted_mask, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + goto nla_put_failure; + ret = ethnl_put_bitset(rskb, ETHTOOL_A_FEATURES_ACTIVE, active, + active_mask, NETDEV_FEATURE_COUNT, + netdev_features_strings, compact); + if (ret < 0) + goto nla_put_failure; + + genlmsg_end(rskb, reply_payload); + ret = genlmsg_reply(rskb, info); + return ret; + +nla_put_failure: + nlmsg_free(rskb); + WARN_ONCE(1, "calculated message payload length (%d) not sufficient\n", + reply_len); +err: + GENL_SET_ERR_MSG(info, "failed to send reply message"); + return ret; +} + +int ethnl_set_features(struct sk_buff *skb, struct genl_info *info) +{ + DECLARE_BITMAP(wanted_diff_mask, NETDEV_FEATURE_COUNT); + DECLARE_BITMAP(active_diff_mask, NETDEV_FEATURE_COUNT); + DECLARE_BITMAP(old_active, NETDEV_FEATURE_COUNT); + DECLARE_BITMAP(new_active, NETDEV_FEATURE_COUNT); + DECLARE_BITMAP(req_wanted, NETDEV_FEATURE_COUNT); + DECLARE_BITMAP(req_mask, NETDEV_FEATURE_COUNT); + struct nlattr *tb[ETHTOOL_A_FEATURES_MAX + 1]; + struct ethnl_req_info req_info = {}; + struct net_device *dev; + int ret; + + ret = nlmsg_parse(info->nlhdr, GENL_HDRLEN, tb, + ETHTOOL_A_FEATURES_MAX, features_set_policy, + info->extack); + if (ret < 0) + return ret; + if (!tb[ETHTOOL_A_FEATURES_WANTED]) + return -EINVAL; + ret = ethnl_parse_header_dev_get(&req_info, + tb[ETHTOOL_A_FEATURES_HEADER], + genl_info_net(info), info->extack, + true); + if (ret < 0) + return ret; + dev = req_info.dev; + + rtnl_lock(); + ethnl_features_to_bitmap(old_active, dev->features); + ret = ethnl_parse_bitset(req_wanted, req_mask, NETDEV_FEATURE_COUNT, + tb[ETHTOOL_A_FEATURES_WANTED], + netdev_features_strings, info->extack); + if (ret < 0) + goto out_rtnl; + if (ethnl_bitmap_to_features(req_mask) & ~NETIF_F_ETHTOOL_BITS) { + GENL_SET_ERR_MSG(info, "attempt to change non-ethtool features"); + ret = -EINVAL; + goto out_rtnl; + } + + /* set req_wanted bits not in req_mask from old_active */ + bitmap_and(req_wanted, req_wanted, req_mask, NETDEV_FEATURE_COUNT); + bitmap_andnot(new_active, old_active, req_mask, NETDEV_FEATURE_COUNT); + bitmap_or(req_wanted, new_active, req_wanted, NETDEV_FEATURE_COUNT); + if (bitmap_equal(req_wanted, old_active, NETDEV_FEATURE_COUNT)) { + ret = 0; + goto out_rtnl; + } + + dev->wanted_features = ethnl_bitmap_to_features(req_wanted); + __netdev_update_features(dev); + ethnl_features_to_bitmap(new_active, dev->features); + + ret = 0; + if (!(req_info.flags & ETHTOOL_FLAG_OMIT_REPLY)) { + bool compact = req_info.flags & ETHTOOL_FLAG_COMPACT_BITSETS; + + bitmap_xor(wanted_diff_mask, req_wanted, new_active, + NETDEV_FEATURE_COUNT); + bitmap_xor(active_diff_mask, old_active, new_active, + NETDEV_FEATURE_COUNT); + bitmap_and(wanted_diff_mask, wanted_diff_mask, req_mask, + NETDEV_FEATURE_COUNT); + bitmap_and(req_wanted, req_wanted, wanted_diff_mask, + NETDEV_FEATURE_COUNT); + bitmap_and(new_active, new_active, active_diff_mask, + NETDEV_FEATURE_COUNT); + + ret = features_send_reply(dev, info, req_wanted, + wanted_diff_mask, new_active, + active_diff_mask, compact); + } + +out_rtnl: + rtnl_unlock(); + dev_put(dev); + return ret; +} diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index e451a75e9577..757ea3fc98a0 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -703,6 +703,11 @@ static const struct genl_ops ethtool_genl_ops[] = { .dumpit = ethnl_default_dumpit, .done = ethnl_default_done, }, + { + .cmd = ETHTOOL_MSG_FEATURES_SET, + .flags = GENL_UNS_ADMIN_PERM, + .doit = ethnl_set_features, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index be2325ea8493..135836201e89 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -343,5 +343,6 @@ int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info); int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info); int ethnl_set_debug(struct sk_buff *skb, struct genl_info *info); int ethnl_set_wol(struct sk_buff *skb, struct genl_info *info); +int ethnl_set_features(struct sk_buff *skb, struct genl_info *info); #endif /* _NET_ETHTOOL_NETLINK_H */ -- cgit v1.2.3 From 9c6451ef4881c2f528c625766e8bb9af5e40941a Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 12 Mar 2020 21:08:03 +0100 Subject: ethtool: add FEATURES_NTF notification Send ETHTOOL_MSG_FEATURES_NTF notification whenever network device features are modified using ETHTOOL_MSG_FEATURES_SET netlink message, ethtool ioctl request or any other way resulting in call to netdev_update_features() or netdev_change_features() Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 6 ++++++ include/uapi/linux/ethtool_netlink.h | 1 + net/ethtool/features.c | 4 ++++ net/ethtool/netlink.c | 29 +++++++++++++++++++++++++++- 4 files changed, 39 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index d6706c4aa972..47542f042e9d 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -208,6 +208,7 @@ Kernel to userspace: ``ETHTOOL_MSG_WOL_NTF`` wake-on-lan settings notification ``ETHTOOL_MSG_FEATURES_GET_REPLY`` device features ``ETHTOOL_MSG_FEATURES_SET_REPLY`` optional reply to FEATURES_SET + ``ETHTOOL_MSG_FEATURES_NTF`` netdev features notification ===================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -591,6 +592,11 @@ reports the difference between old and new dev->features: mask consists of bits which have changed, values are their values in new dev->features (after the operation). +``ETHTOOL_MSG_FEATURES_NTF`` notification is sent not only if device features +are modified using ``ETHTOOL_MSG_FEATURES_SET`` request or on of ethtool ioctl +request but also each time features are modified with netdev_update_features() +or netdev_change_features(). + Request translation =================== diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 6f7aaa6b7f42..3d0204cf96a6 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -47,6 +47,7 @@ enum { ETHTOOL_MSG_WOL_NTF, ETHTOOL_MSG_FEATURES_GET_REPLY, ETHTOOL_MSG_FEATURES_SET_REPLY, + ETHTOOL_MSG_FEATURES_NTF, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, diff --git a/net/ethtool/features.c b/net/ethtool/features.c index 4ac1e05684ce..4e632dc987d8 100644 --- a/net/ethtool/features.c +++ b/net/ethtool/features.c @@ -230,6 +230,7 @@ int ethnl_set_features(struct sk_buff *skb, struct genl_info *info) struct nlattr *tb[ETHTOOL_A_FEATURES_MAX + 1]; struct ethnl_req_info req_info = {}; struct net_device *dev; + bool mod; int ret; ret = nlmsg_parse(info->nlhdr, GENL_HDRLEN, tb, @@ -272,6 +273,7 @@ int ethnl_set_features(struct sk_buff *skb, struct genl_info *info) dev->wanted_features = ethnl_bitmap_to_features(req_wanted); __netdev_update_features(dev); ethnl_features_to_bitmap(new_active, dev->features); + mod = !bitmap_equal(old_active, new_active, NETDEV_FEATURE_COUNT); ret = 0; if (!(req_info.flags & ETHTOOL_FLAG_OMIT_REPLY)) { @@ -292,6 +294,8 @@ int ethnl_set_features(struct sk_buff *skb, struct genl_info *info) wanted_diff_mask, new_active, active_diff_mask, compact); } + if (mod) + ethtool_notify(dev, ETHTOOL_MSG_FEATURES_NTF, NULL); out_rtnl: rtnl_unlock(); diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 757ea3fc98a0..5c0e361bfd66 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -528,6 +528,7 @@ ethnl_default_notify_ops[ETHTOOL_MSG_KERNEL_MAX + 1] = { [ETHTOOL_MSG_LINKMODES_NTF] = ðnl_linkmodes_request_ops, [ETHTOOL_MSG_DEBUG_NTF] = ðnl_debug_request_ops, [ETHTOOL_MSG_WOL_NTF] = ðnl_wol_request_ops, + [ETHTOOL_MSG_FEATURES_NTF] = ðnl_features_request_ops, }; /* default notification handler */ @@ -613,6 +614,7 @@ static const ethnl_notify_handler_t ethnl_notify_handlers[] = { [ETHTOOL_MSG_LINKMODES_NTF] = ethnl_default_notify, [ETHTOOL_MSG_DEBUG_NTF] = ethnl_default_notify, [ETHTOOL_MSG_WOL_NTF] = ethnl_default_notify, + [ETHTOOL_MSG_FEATURES_NTF] = ethnl_default_notify, }; void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data) @@ -630,6 +632,29 @@ void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data) } EXPORT_SYMBOL(ethtool_notify); +static void ethnl_notify_features(struct netdev_notifier_info *info) +{ + struct net_device *dev = netdev_notifier_info_to_dev(info); + + ethtool_notify(dev, ETHTOOL_MSG_FEATURES_NTF, NULL); +} + +static int ethnl_netdev_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + switch (event) { + case NETDEV_FEAT_CHANGE: + ethnl_notify_features(ptr); + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block ethnl_netdev_notifier = { + .notifier_call = ethnl_netdev_event, +}; + /* genetlink setup */ static const struct genl_ops ethtool_genl_ops[] = { @@ -736,7 +761,9 @@ static int __init ethnl_init(void) return ret; ethnl_ok = true; - return 0; + ret = register_netdevice_notifier(ðnl_netdev_notifier); + WARN(ret < 0, "ethtool: net device notifier registration failed"); + return ret; } subsys_initcall(ethnl_init); -- cgit v1.2.3 From e16c3386fc4d9611b6a2abaa639318c7ae793370 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 12 Mar 2020 21:08:08 +0100 Subject: ethtool: provide private flags with PRIVFLAGS_GET request Implement PRIVFLAGS_GET request to get private flags for a network device. These are traditionally available via ETHTOOL_GPFLAGS ioctl request. Signed-off-by: Michal Kubecek Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 30 +++++- include/uapi/linux/ethtool_netlink.h | 14 +++ net/ethtool/Makefile | 2 +- net/ethtool/netlink.c | 8 ++ net/ethtool/netlink.h | 1 + net/ethtool/privflags.c | 136 +++++++++++++++++++++++++++ 6 files changed, 189 insertions(+), 2 deletions(-) create mode 100644 net/ethtool/privflags.c (limited to 'include/uapi/linux') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 47542f042e9d..7bba4c940ef7 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -191,6 +191,7 @@ Userspace to kernel: ``ETHTOOL_MSG_WOL_SET`` set wake-on-lan settings ``ETHTOOL_MSG_FEATURES_GET`` get device features ``ETHTOOL_MSG_FEATURES_SET`` set device features + ``ETHTOOL_MSG_PRIVFLAGS_GET`` get private flags ===================================== ================================ Kernel to userspace: @@ -209,6 +210,7 @@ Kernel to userspace: ``ETHTOOL_MSG_FEATURES_GET_REPLY`` device features ``ETHTOOL_MSG_FEATURES_SET_REPLY`` optional reply to FEATURES_SET ``ETHTOOL_MSG_FEATURES_NTF`` netdev features notification + ``ETHTOOL_MSG_PRIVFLAGS_GET_REPLY`` private flags ===================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -598,6 +600,32 @@ request but also each time features are modified with netdev_update_features() or netdev_change_features(). +PRIVFLAGS_GET +============= + +Gets private flags like ``ETHTOOL_GPFLAGS`` ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_PRIVFLAGS_HEADER`` nested request header + ==================================== ====== ========================== + +Kernel response contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_PRIVFLAGS_HEADER`` nested reply header + ``ETHTOOL_A_PRIVFLAGS_FLAGS`` bitset private flags + ==================================== ====== ========================== + +``ETHTOOL_A_PRIVFLAGS_FLAGS`` is a bitset with values of device private flags. +These flags are defined by driver, their number and names (and also meaning) +are device dependent. For compact bitset format, names can be retrieved as +``ETH_SS_PRIV_FLAGS`` string set. If verbose bitset format is requested, +response uses all private flags supported by the device as mask so that client +gets the full information without having to fetch the string set with names. + + Request translation =================== @@ -647,7 +675,7 @@ have their netlink replacement yet. ``ETHTOOL_SGSO`` ``ETHTOOL_MSG_FEATURES_SET`` ``ETHTOOL_GFLAGS`` ``ETHTOOL_MSG_FEATURES_GET`` ``ETHTOOL_SFLAGS`` ``ETHTOOL_MSG_FEATURES_SET`` - ``ETHTOOL_GPFLAGS`` n/a + ``ETHTOOL_GPFLAGS`` ``ETHTOOL_MSG_PRIVFLAGS_GET`` ``ETHTOOL_SPFLAGS`` n/a ``ETHTOOL_GRXFH`` n/a ``ETHTOOL_SRXFH`` n/a diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 3d0204cf96a6..d94bbf5e4d1c 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -26,6 +26,7 @@ enum { ETHTOOL_MSG_WOL_SET, ETHTOOL_MSG_FEATURES_GET, ETHTOOL_MSG_FEATURES_SET, + ETHTOOL_MSG_PRIVFLAGS_GET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -48,6 +49,7 @@ enum { ETHTOOL_MSG_FEATURES_GET_REPLY, ETHTOOL_MSG_FEATURES_SET_REPLY, ETHTOOL_MSG_FEATURES_NTF, + ETHTOOL_MSG_PRIVFLAGS_GET_REPLY, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -248,6 +250,18 @@ enum { ETHTOOL_A_FEATURES_MAX = __ETHTOOL_A_FEATURES_CNT - 1 }; +/* PRIVFLAGS */ + +enum { + ETHTOOL_A_PRIVFLAGS_UNSPEC, + ETHTOOL_A_PRIVFLAGS_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_PRIVFLAGS_FLAGS, /* bitset */ + + /* add new constants above here */ + __ETHTOOL_A_PRIVFLAGS_CNT, + ETHTOOL_A_PRIVFLAGS_MAX = __ETHTOOL_A_PRIVFLAGS_CNT - 1 +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index 5be8c9ab26d1..58708bcc2968 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -5,4 +5,4 @@ obj-y += ioctl.o common.o obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o \ - linkstate.o debug.o wol.o features.o + linkstate.o debug.o wol.o features.o privflags.o diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 5c0e361bfd66..9cbb1d8b4d23 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -216,6 +216,7 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = { [ETHTOOL_MSG_DEBUG_GET] = ðnl_debug_request_ops, [ETHTOOL_MSG_WOL_GET] = ðnl_wol_request_ops, [ETHTOOL_MSG_FEATURES_GET] = ðnl_features_request_ops, + [ETHTOOL_MSG_PRIVFLAGS_GET] = ðnl_privflags_request_ops, }; static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb) @@ -733,6 +734,13 @@ static const struct genl_ops ethtool_genl_ops[] = { .flags = GENL_UNS_ADMIN_PERM, .doit = ethnl_set_features, }, + { + .cmd = ETHTOOL_MSG_PRIVFLAGS_GET, + .doit = ethnl_default_doit, + .start = ethnl_default_start, + .dumpit = ethnl_default_dumpit, + .done = ethnl_default_done, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 135836201e89..36cf077c3085 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -338,6 +338,7 @@ extern const struct ethnl_request_ops ethnl_linkstate_request_ops; extern const struct ethnl_request_ops ethnl_debug_request_ops; extern const struct ethnl_request_ops ethnl_wol_request_ops; extern const struct ethnl_request_ops ethnl_features_request_ops; +extern const struct ethnl_request_ops ethnl_privflags_request_ops; int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info); int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info); diff --git a/net/ethtool/privflags.c b/net/ethtool/privflags.c new file mode 100644 index 000000000000..169dd4a832f6 --- /dev/null +++ b/net/ethtool/privflags.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "netlink.h" +#include "common.h" +#include "bitset.h" + +struct privflags_req_info { + struct ethnl_req_info base; +}; + +struct privflags_reply_data { + struct ethnl_reply_data base; + const char (*priv_flag_names)[ETH_GSTRING_LEN]; + unsigned int n_priv_flags; + u32 priv_flags; +}; + +#define PRIVFLAGS_REPDATA(__reply_base) \ + container_of(__reply_base, struct privflags_reply_data, base) + +static const struct nla_policy +privflags_get_policy[ETHTOOL_A_PRIVFLAGS_MAX + 1] = { + [ETHTOOL_A_PRIVFLAGS_UNSPEC] = { .type = NLA_REJECT }, + [ETHTOOL_A_PRIVFLAGS_HEADER] = { .type = NLA_NESTED }, + [ETHTOOL_A_PRIVFLAGS_FLAGS] = { .type = NLA_REJECT }, +}; + +static int ethnl_get_priv_flags_info(struct net_device *dev, + unsigned int *count, + const char (**names)[ETH_GSTRING_LEN]) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + int nflags; + + nflags = ops->get_sset_count(dev, ETH_SS_PRIV_FLAGS); + if (nflags < 0) + return nflags; + + if (names) { + *names = kcalloc(nflags, ETH_GSTRING_LEN, GFP_KERNEL); + if (!*names) + return -ENOMEM; + ops->get_strings(dev, ETH_SS_PRIV_FLAGS, (u8 *)*names); + } + + /* We can pass more than 32 private flags to userspace via netlink but + * we cannot get more with ethtool_ops::get_priv_flags(). Note that we + * must not adjust nflags before allocating the space for flag names + * as the buffer must be large enough for all flags. + */ + if (WARN_ONCE(nflags > 32, + "device %s reports more than 32 private flags (%d)\n", + netdev_name(dev), nflags)) + nflags = 32; + *count = nflags; + + return 0; +} + +static int privflags_prepare_data(const struct ethnl_req_info *req_base, + struct ethnl_reply_data *reply_base, + struct genl_info *info) +{ + struct privflags_reply_data *data = PRIVFLAGS_REPDATA(reply_base); + struct net_device *dev = reply_base->dev; + const char (*names)[ETH_GSTRING_LEN]; + const struct ethtool_ops *ops; + unsigned int nflags; + int ret; + + ops = dev->ethtool_ops; + if (!ops->get_priv_flags || !ops->get_sset_count || !ops->get_strings) + return -EOPNOTSUPP; + ret = ethnl_ops_begin(dev); + if (ret < 0) + return ret; + + ret = ethnl_get_priv_flags_info(dev, &nflags, &names); + if (ret < 0) + goto out_ops; + data->priv_flags = ops->get_priv_flags(dev); + data->priv_flag_names = names; + data->n_priv_flags = nflags; + +out_ops: + ethnl_ops_complete(dev); + return ret; +} + +static int privflags_reply_size(const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct privflags_reply_data *data = PRIVFLAGS_REPDATA(reply_base); + bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; + const u32 all_flags = ~(u32)0 >> (32 - data->n_priv_flags); + + return ethnl_bitset32_size(&data->priv_flags, &all_flags, + data->n_priv_flags, + data->priv_flag_names, compact); +} + +static int privflags_fill_reply(struct sk_buff *skb, + const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct privflags_reply_data *data = PRIVFLAGS_REPDATA(reply_base); + bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS; + const u32 all_flags = ~(u32)0 >> (32 - data->n_priv_flags); + + return ethnl_put_bitset32(skb, ETHTOOL_A_PRIVFLAGS_FLAGS, + &data->priv_flags, &all_flags, + data->n_priv_flags, data->priv_flag_names, + compact); +} + +static void privflags_cleanup_data(struct ethnl_reply_data *reply_data) +{ + struct privflags_reply_data *data = PRIVFLAGS_REPDATA(reply_data); + + kfree(data->priv_flag_names); +} + +const struct ethnl_request_ops ethnl_privflags_request_ops = { + .request_cmd = ETHTOOL_MSG_PRIVFLAGS_GET, + .reply_cmd = ETHTOOL_MSG_PRIVFLAGS_GET_REPLY, + .hdr_attr = ETHTOOL_A_PRIVFLAGS_HEADER, + .max_attr = ETHTOOL_A_PRIVFLAGS_MAX, + .req_info_size = sizeof(struct privflags_req_info), + .reply_data_size = sizeof(struct privflags_reply_data), + .request_policy = privflags_get_policy, + + .prepare_data = privflags_prepare_data, + .reply_size = privflags_reply_size, + .fill_reply = privflags_fill_reply, + .cleanup_data = privflags_cleanup_data, +}; -- cgit v1.2.3 From f265d799596aff80fe19b0b3896a4abe13cdb534 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 12 Mar 2020 21:08:13 +0100 Subject: ethtool: set device private flags with PRIVFLAGS_SET request Implement PRIVFLAGS_SET netlink request to set private flags of a network device. These are traditionally set with ETHTOOL_SPFLAGS ioctl request. Signed-off-by: Michal Kubecek Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 21 ++++++++- include/uapi/linux/ethtool_netlink.h | 1 + net/ethtool/netlink.c | 5 ++ net/ethtool/netlink.h | 1 + net/ethtool/privflags.c | 70 ++++++++++++++++++++++++++++ 5 files changed, 97 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 7bba4c940ef7..e553112fa2d7 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -192,6 +192,7 @@ Userspace to kernel: ``ETHTOOL_MSG_FEATURES_GET`` get device features ``ETHTOOL_MSG_FEATURES_SET`` set device features ``ETHTOOL_MSG_PRIVFLAGS_GET`` get private flags + ``ETHTOOL_MSG_PRIVFLAGS_SET`` set private flags ===================================== ================================ Kernel to userspace: @@ -211,6 +212,7 @@ Kernel to userspace: ``ETHTOOL_MSG_FEATURES_SET_REPLY`` optional reply to FEATURES_SET ``ETHTOOL_MSG_FEATURES_NTF`` netdev features notification ``ETHTOOL_MSG_PRIVFLAGS_GET_REPLY`` private flags + ``ETHTOOL_MSG_PRIVFLAGS_NTF`` private flags ===================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -626,6 +628,23 @@ response uses all private flags supported by the device as mask so that client gets the full information without having to fetch the string set with names. +PRIVFLAGS_SET +============= + +Sets or modifies values of device private flags like ``ETHTOOL_SPFLAGS`` +ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_PRIVFLAGS_HEADER`` nested request header + ``ETHTOOL_A_PRIVFLAGS_FLAGS`` bitset private flags + ==================================== ====== ========================== + +``ETHTOOL_A_PRIVFLAGS_FLAGS`` can either set the whole set of private flags or +modify only values of some of them. + + Request translation =================== @@ -676,7 +695,7 @@ have their netlink replacement yet. ``ETHTOOL_GFLAGS`` ``ETHTOOL_MSG_FEATURES_GET`` ``ETHTOOL_SFLAGS`` ``ETHTOOL_MSG_FEATURES_SET`` ``ETHTOOL_GPFLAGS`` ``ETHTOOL_MSG_PRIVFLAGS_GET`` - ``ETHTOOL_SPFLAGS`` n/a + ``ETHTOOL_SPFLAGS`` ``ETHTOOL_MSG_PRIVFLAGS_SET`` ``ETHTOOL_GRXFH`` n/a ``ETHTOOL_SRXFH`` n/a ``ETHTOOL_GGRO`` ``ETHTOOL_MSG_FEATURES_GET`` diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index d94bbf5e4d1c..13e631c86825 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -27,6 +27,7 @@ enum { ETHTOOL_MSG_FEATURES_GET, ETHTOOL_MSG_FEATURES_SET, ETHTOOL_MSG_PRIVFLAGS_GET, + ETHTOOL_MSG_PRIVFLAGS_SET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 9cbb1d8b4d23..b6795aad7ccb 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -741,6 +741,11 @@ static const struct genl_ops ethtool_genl_ops[] = { .dumpit = ethnl_default_dumpit, .done = ethnl_default_done, }, + { + .cmd = ETHTOOL_MSG_PRIVFLAGS_SET, + .flags = GENL_UNS_ADMIN_PERM, + .doit = ethnl_set_privflags, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 36cf077c3085..ca789f587ef3 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -345,5 +345,6 @@ int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info); int ethnl_set_debug(struct sk_buff *skb, struct genl_info *info); int ethnl_set_wol(struct sk_buff *skb, struct genl_info *info); int ethnl_set_features(struct sk_buff *skb, struct genl_info *info); +int ethnl_set_privflags(struct sk_buff *skb, struct genl_info *info); #endif /* _NET_ETHTOOL_NETLINK_H */ diff --git a/net/ethtool/privflags.c b/net/ethtool/privflags.c index 169dd4a832f6..dfa76d552277 100644 --- a/net/ethtool/privflags.c +++ b/net/ethtool/privflags.c @@ -134,3 +134,73 @@ const struct ethnl_request_ops ethnl_privflags_request_ops = { .fill_reply = privflags_fill_reply, .cleanup_data = privflags_cleanup_data, }; + +/* PRIVFLAGS_SET */ + +static const struct nla_policy +privflags_set_policy[ETHTOOL_A_PRIVFLAGS_MAX + 1] = { + [ETHTOOL_A_PRIVFLAGS_UNSPEC] = { .type = NLA_REJECT }, + [ETHTOOL_A_PRIVFLAGS_HEADER] = { .type = NLA_NESTED }, + [ETHTOOL_A_PRIVFLAGS_FLAGS] = { .type = NLA_NESTED }, +}; + +int ethnl_set_privflags(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *tb[ETHTOOL_A_PRIVFLAGS_MAX + 1]; + const char (*names)[ETH_GSTRING_LEN] = NULL; + struct ethnl_req_info req_info = {}; + const struct ethtool_ops *ops; + struct net_device *dev; + unsigned int nflags; + bool mod = false; + bool compact; + u32 flags; + int ret; + + ret = nlmsg_parse(info->nlhdr, GENL_HDRLEN, tb, + ETHTOOL_A_PRIVFLAGS_MAX, privflags_set_policy, + info->extack); + if (ret < 0) + return ret; + if (!tb[ETHTOOL_A_PRIVFLAGS_FLAGS]) + return -EINVAL; + ret = ethnl_bitset_is_compact(tb[ETHTOOL_A_PRIVFLAGS_FLAGS], &compact); + if (ret < 0) + return ret; + ret = ethnl_parse_header_dev_get(&req_info, + tb[ETHTOOL_A_PRIVFLAGS_HEADER], + genl_info_net(info), info->extack, + true); + if (ret < 0) + return ret; + dev = req_info.dev; + ops = dev->ethtool_ops; + if (!ops->get_priv_flags || !ops->set_priv_flags || + !ops->get_sset_count || !ops->get_strings) + return -EOPNOTSUPP; + + rtnl_lock(); + ret = ethnl_ops_begin(dev); + if (ret < 0) + goto out_rtnl; + ret = ethnl_get_priv_flags_info(dev, &nflags, compact ? NULL : &names); + if (ret < 0) + goto out_ops; + flags = ops->get_priv_flags(dev); + + ret = ethnl_update_bitset32(&flags, nflags, + tb[ETHTOOL_A_PRIVFLAGS_FLAGS], names, + info->extack, &mod); + if (ret < 0 || !mod) + goto out_free; + ret = ops->set_priv_flags(dev, flags); + +out_free: + kfree(names); +out_ops: + ethnl_ops_complete(dev); +out_rtnl: + rtnl_unlock(); + dev_put(dev); + return ret; +} -- cgit v1.2.3 From 111dcba3c694b4ca7dc8b6abfd575745dd51be3d Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 12 Mar 2020 21:08:18 +0100 Subject: ethtool: add PRIVFLAGS_NTF notification Send ETHTOOL_MSG_PRIVFLAGS_NTF notification whenever private flags of a network device are modified using ETHTOOL_MSG_PRIVFLAGS_SET netlink message or ETHTOOL_SPFLAGS ioctl request. Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- include/uapi/linux/ethtool_netlink.h | 1 + net/ethtool/ioctl.c | 2 ++ net/ethtool/netlink.c | 2 ++ net/ethtool/privflags.c | 3 +++ 4 files changed, 8 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 13e631c86825..7f23a7f0fca1 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -51,6 +51,7 @@ enum { ETHTOOL_MSG_FEATURES_SET_REPLY, ETHTOOL_MSG_FEATURES_NTF, ETHTOOL_MSG_PRIVFLAGS_GET_REPLY, + ETHTOOL_MSG_PRIVFLAGS_NTF, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 45d1bf1764b7..298822289496 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -2716,6 +2716,8 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GPFLAGS: rc = ethtool_get_value(dev, useraddr, ethcmd, dev->ethtool_ops->get_priv_flags); + if (!rc) + ethtool_notify(dev, ETHTOOL_MSG_PRIVFLAGS_NTF, NULL); break; case ETHTOOL_SPFLAGS: rc = ethtool_set_value(dev, useraddr, diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index b6795aad7ccb..dec82b0b80bd 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -530,6 +530,7 @@ ethnl_default_notify_ops[ETHTOOL_MSG_KERNEL_MAX + 1] = { [ETHTOOL_MSG_DEBUG_NTF] = ðnl_debug_request_ops, [ETHTOOL_MSG_WOL_NTF] = ðnl_wol_request_ops, [ETHTOOL_MSG_FEATURES_NTF] = ðnl_features_request_ops, + [ETHTOOL_MSG_PRIVFLAGS_NTF] = ðnl_privflags_request_ops, }; /* default notification handler */ @@ -616,6 +617,7 @@ static const ethnl_notify_handler_t ethnl_notify_handlers[] = { [ETHTOOL_MSG_DEBUG_NTF] = ethnl_default_notify, [ETHTOOL_MSG_WOL_NTF] = ethnl_default_notify, [ETHTOOL_MSG_FEATURES_NTF] = ethnl_default_notify, + [ETHTOOL_MSG_PRIVFLAGS_NTF] = ethnl_default_notify, }; void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data) diff --git a/net/ethtool/privflags.c b/net/ethtool/privflags.c index dfa76d552277..e8f03b33db9b 100644 --- a/net/ethtool/privflags.c +++ b/net/ethtool/privflags.c @@ -194,6 +194,9 @@ int ethnl_set_privflags(struct sk_buff *skb, struct genl_info *info) if (ret < 0 || !mod) goto out_free; ret = ops->set_priv_flags(dev, flags); + if (ret < 0) + goto out_free; + ethtool_notify(dev, ETHTOOL_MSG_PRIVFLAGS_NTF, NULL); out_free: kfree(names); -- cgit v1.2.3 From e4a1717b677c5cb285e9b425c569e261084a484c Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 12 Mar 2020 21:08:23 +0100 Subject: ethtool: provide ring sizes with RINGS_GET request Implement RINGS_GET request to get ring sizes of a network device. These are traditionally available via ETHTOOL_GRINGPARAM ioctl request. Omit attributes for ring types which are not supported by driver or device (zero reported for maximum). v2: (all suggested by Jakub Kicinski) - minor cleanup in rings_prepare_data() - more descriptive rings_reply_size() - omit attributes with zero max size Signed-off-by: Michal Kubecek Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 30 +++++++- include/uapi/linux/ethtool_netlink.h | 21 ++++++ net/ethtool/Makefile | 2 +- net/ethtool/netlink.c | 8 ++ net/ethtool/netlink.h | 1 + net/ethtool/rings.c | 108 +++++++++++++++++++++++++++ 6 files changed, 168 insertions(+), 2 deletions(-) create mode 100644 net/ethtool/rings.c (limited to 'include/uapi/linux') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index e553112fa2d7..798c2f97d89b 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -193,6 +193,7 @@ Userspace to kernel: ``ETHTOOL_MSG_FEATURES_SET`` set device features ``ETHTOOL_MSG_PRIVFLAGS_GET`` get private flags ``ETHTOOL_MSG_PRIVFLAGS_SET`` set private flags + ``ETHTOOL_MSG_RINGS_GET`` get ring sizes ===================================== ================================ Kernel to userspace: @@ -213,6 +214,7 @@ Kernel to userspace: ``ETHTOOL_MSG_FEATURES_NTF`` netdev features notification ``ETHTOOL_MSG_PRIVFLAGS_GET_REPLY`` private flags ``ETHTOOL_MSG_PRIVFLAGS_NTF`` private flags + ``ETHTOOL_MSG_RINGS_GET_REPLY`` ring sizes ===================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -645,6 +647,32 @@ Request contents: modify only values of some of them. +RINGS_GET +========= + +Gets ring sizes like ``ETHTOOL_GRINGPARAM`` ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_RINGS_HEADER`` nested request header + ==================================== ====== ========================== + +Kernel response contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_RINGS_HEADER`` nested reply header + ``ETHTOOL_A_RINGS_RX_MAX`` u32 max size of RX ring + ``ETHTOOL_A_RINGS_RX_MINI_MAX`` u32 max size of RX mini ring + ``ETHTOOL_A_RINGS_RX_JUMBO_MAX`` u32 max size of RX jumbo ring + ``ETHTOOL_A_RINGS_TX_MAX`` u32 max size of TX ring + ``ETHTOOL_A_RINGS_RX`` u32 size of RX ring + ``ETHTOOL_A_RINGS_RX_MINI`` u32 size of RX mini ring + ``ETHTOOL_A_RINGS_RX_JUMBO`` u32 size of RX jumbo ring + ``ETHTOOL_A_RINGS_TX`` u32 size of TX ring + ==================================== ====== ========================== + + Request translation =================== @@ -671,7 +699,7 @@ have their netlink replacement yet. ``ETHTOOL_SEEPROM`` n/a ``ETHTOOL_GCOALESCE`` n/a ``ETHTOOL_SCOALESCE`` n/a - ``ETHTOOL_GRINGPARAM`` n/a + ``ETHTOOL_GRINGPARAM`` ``ETHTOOL_MSG_RINGS_GET`` ``ETHTOOL_SRINGPARAM`` n/a ``ETHTOOL_GPAUSEPARAM`` n/a ``ETHTOOL_SPAUSEPARAM`` n/a diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 7f23a7f0fca1..7cd220f8cf73 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -28,6 +28,7 @@ enum { ETHTOOL_MSG_FEATURES_SET, ETHTOOL_MSG_PRIVFLAGS_GET, ETHTOOL_MSG_PRIVFLAGS_SET, + ETHTOOL_MSG_RINGS_GET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -52,6 +53,7 @@ enum { ETHTOOL_MSG_FEATURES_NTF, ETHTOOL_MSG_PRIVFLAGS_GET_REPLY, ETHTOOL_MSG_PRIVFLAGS_NTF, + ETHTOOL_MSG_RINGS_GET_REPLY, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -264,6 +266,25 @@ enum { ETHTOOL_A_PRIVFLAGS_MAX = __ETHTOOL_A_PRIVFLAGS_CNT - 1 }; +/* RINGS */ + +enum { + ETHTOOL_A_RINGS_UNSPEC, + ETHTOOL_A_RINGS_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_RINGS_RX_MAX, /* u32 */ + ETHTOOL_A_RINGS_RX_MINI_MAX, /* u32 */ + ETHTOOL_A_RINGS_RX_JUMBO_MAX, /* u32 */ + ETHTOOL_A_RINGS_TX_MAX, /* u32 */ + ETHTOOL_A_RINGS_RX, /* u32 */ + ETHTOOL_A_RINGS_RX_MINI, /* u32 */ + ETHTOOL_A_RINGS_RX_JUMBO, /* u32 */ + ETHTOOL_A_RINGS_TX, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_RINGS_CNT, + ETHTOOL_A_RINGS_MAX = (__ETHTOOL_A_RINGS_CNT - 1) +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index 58708bcc2968..b48b70ef4ed0 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -5,4 +5,4 @@ obj-y += ioctl.o common.o obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o \ - linkstate.o debug.o wol.o features.o privflags.o + linkstate.o debug.o wol.o features.o privflags.o rings.o diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index dec82b0b80bd..0dc25a490450 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -217,6 +217,7 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = { [ETHTOOL_MSG_WOL_GET] = ðnl_wol_request_ops, [ETHTOOL_MSG_FEATURES_GET] = ðnl_features_request_ops, [ETHTOOL_MSG_PRIVFLAGS_GET] = ðnl_privflags_request_ops, + [ETHTOOL_MSG_RINGS_GET] = ðnl_rings_request_ops, }; static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb) @@ -748,6 +749,13 @@ static const struct genl_ops ethtool_genl_ops[] = { .flags = GENL_UNS_ADMIN_PERM, .doit = ethnl_set_privflags, }, + { + .cmd = ETHTOOL_MSG_RINGS_GET, + .doit = ethnl_default_doit, + .start = ethnl_default_start, + .dumpit = ethnl_default_dumpit, + .done = ethnl_default_done, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index ca789f587ef3..01761932ed15 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -339,6 +339,7 @@ extern const struct ethnl_request_ops ethnl_debug_request_ops; extern const struct ethnl_request_ops ethnl_wol_request_ops; extern const struct ethnl_request_ops ethnl_features_request_ops; extern const struct ethnl_request_ops ethnl_privflags_request_ops; +extern const struct ethnl_request_ops ethnl_rings_request_ops; int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info); int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info); diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c new file mode 100644 index 000000000000..d3129d8a252d --- /dev/null +++ b/net/ethtool/rings.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "netlink.h" +#include "common.h" + +struct rings_req_info { + struct ethnl_req_info base; +}; + +struct rings_reply_data { + struct ethnl_reply_data base; + struct ethtool_ringparam ringparam; +}; + +#define RINGS_REPDATA(__reply_base) \ + container_of(__reply_base, struct rings_reply_data, base) + +static const struct nla_policy +rings_get_policy[ETHTOOL_A_RINGS_MAX + 1] = { + [ETHTOOL_A_RINGS_UNSPEC] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_HEADER] = { .type = NLA_NESTED }, + [ETHTOOL_A_RINGS_RX_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_RX_MINI_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_RX_JUMBO_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_TX_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_RX] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_RX_MINI] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_RX_JUMBO] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_TX] = { .type = NLA_REJECT }, +}; + +static int rings_prepare_data(const struct ethnl_req_info *req_base, + struct ethnl_reply_data *reply_base, + struct genl_info *info) +{ + struct rings_reply_data *data = RINGS_REPDATA(reply_base); + struct net_device *dev = reply_base->dev; + int ret; + + if (!dev->ethtool_ops->get_ringparam) + return -EOPNOTSUPP; + ret = ethnl_ops_begin(dev); + if (ret < 0) + return ret; + dev->ethtool_ops->get_ringparam(dev, &data->ringparam); + ethnl_ops_complete(dev); + + return 0; +} + +static int rings_reply_size(const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + return nla_total_size(sizeof(u32)) + /* _RINGS_RX_MAX */ + nla_total_size(sizeof(u32)) + /* _RINGS_RX_MINI_MAX */ + nla_total_size(sizeof(u32)) + /* _RINGS_RX_JUMBO_MAX */ + nla_total_size(sizeof(u32)) + /* _RINGS_TX_MAX */ + nla_total_size(sizeof(u32)) + /* _RINGS_RX */ + nla_total_size(sizeof(u32)) + /* _RINGS_RX_MINI */ + nla_total_size(sizeof(u32)) + /* _RINGS_RX_JUMBO */ + nla_total_size(sizeof(u32)); /* _RINGS_TX */ +} + +static int rings_fill_reply(struct sk_buff *skb, + const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct rings_reply_data *data = RINGS_REPDATA(reply_base); + const struct ethtool_ringparam *ringparam = &data->ringparam; + + if ((ringparam->rx_max_pending && + (nla_put_u32(skb, ETHTOOL_A_RINGS_RX_MAX, + ringparam->rx_max_pending) || + nla_put_u32(skb, ETHTOOL_A_RINGS_RX, + ringparam->rx_pending))) || + (ringparam->rx_mini_max_pending && + (nla_put_u32(skb, ETHTOOL_A_RINGS_RX_MINI_MAX, + ringparam->rx_mini_max_pending) || + nla_put_u32(skb, ETHTOOL_A_RINGS_RX_MINI, + ringparam->rx_mini_pending))) || + (ringparam->rx_jumbo_max_pending && + (nla_put_u32(skb, ETHTOOL_A_RINGS_RX_JUMBO_MAX, + ringparam->rx_jumbo_max_pending) || + nla_put_u32(skb, ETHTOOL_A_RINGS_RX_JUMBO, + ringparam->rx_jumbo_pending))) || + (ringparam->tx_max_pending && + (nla_put_u32(skb, ETHTOOL_A_RINGS_TX_MAX, + ringparam->tx_max_pending) || + nla_put_u32(skb, ETHTOOL_A_RINGS_TX, + ringparam->tx_pending)))) + return -EMSGSIZE; + + return 0; +} + +const struct ethnl_request_ops ethnl_rings_request_ops = { + .request_cmd = ETHTOOL_MSG_RINGS_GET, + .reply_cmd = ETHTOOL_MSG_RINGS_GET_REPLY, + .hdr_attr = ETHTOOL_A_RINGS_HEADER, + .max_attr = ETHTOOL_A_RINGS_MAX, + .req_info_size = sizeof(struct rings_req_info), + .reply_data_size = sizeof(struct rings_reply_data), + .request_policy = rings_get_policy, + + .prepare_data = rings_prepare_data, + .reply_size = rings_reply_size, + .fill_reply = rings_fill_reply, +}; -- cgit v1.2.3 From 2fc2929e807211a9535a6541f24b57fa1c469728 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 12 Mar 2020 21:08:28 +0100 Subject: ethtool: set device ring sizes with RINGS_SET request Implement RINGS_SET netlink request to set ring sizes of a network device. These are traditionally set with ETHTOOL_SRINGPARAM ioctl request. Like the ioctl implementation, the generic ethtool code checks if supplied values do not exceed driver defined limits; if they do, first offending attribute is reported using extack. v2: - fix netdev reference leak in error path (found by Jakub Kicinsky) Signed-off-by: Michal Kubecek Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 23 ++++++- include/uapi/linux/ethtool_netlink.h | 1 + net/ethtool/netlink.c | 5 ++ net/ethtool/netlink.h | 1 + net/ethtool/rings.c | 89 ++++++++++++++++++++++++++++ 5 files changed, 118 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 798c2f97d89b..ba31ae8f1feb 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -194,6 +194,7 @@ Userspace to kernel: ``ETHTOOL_MSG_PRIVFLAGS_GET`` get private flags ``ETHTOOL_MSG_PRIVFLAGS_SET`` set private flags ``ETHTOOL_MSG_RINGS_GET`` get ring sizes + ``ETHTOOL_MSG_RINGS_SET`` set ring sizes ===================================== ================================ Kernel to userspace: @@ -673,6 +674,26 @@ Kernel response contents: ==================================== ====== ========================== +RINGS_SET +========= + +Sets ring sizes like ``ETHTOOL_SRINGPARAM`` ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_RINGS_HEADER`` nested reply header + ``ETHTOOL_A_RINGS_RX`` u32 size of RX ring + ``ETHTOOL_A_RINGS_RX_MINI`` u32 size of RX mini ring + ``ETHTOOL_A_RINGS_RX_JUMBO`` u32 size of RX jumbo ring + ``ETHTOOL_A_RINGS_TX`` u32 size of TX ring + ==================================== ====== ========================== + +Kernel checks that requested ring sizes do not exceed limits reported by +driver. Driver may impose additional constraints and may not suspport all +attributes. + + Request translation =================== @@ -700,7 +721,7 @@ have their netlink replacement yet. ``ETHTOOL_GCOALESCE`` n/a ``ETHTOOL_SCOALESCE`` n/a ``ETHTOOL_GRINGPARAM`` ``ETHTOOL_MSG_RINGS_GET`` - ``ETHTOOL_SRINGPARAM`` n/a + ``ETHTOOL_SRINGPARAM`` ``ETHTOOL_MSG_RINGS_SET`` ``ETHTOOL_GPAUSEPARAM`` n/a ``ETHTOOL_SPAUSEPARAM`` n/a ``ETHTOOL_GRXCSUM`` ``ETHTOOL_MSG_FEATURES_GET`` diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 7cd220f8cf73..ae71801b7aac 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -29,6 +29,7 @@ enum { ETHTOOL_MSG_PRIVFLAGS_GET, ETHTOOL_MSG_PRIVFLAGS_SET, ETHTOOL_MSG_RINGS_GET, + ETHTOOL_MSG_RINGS_SET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 0dc25a490450..6a1ac8897a7e 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -756,6 +756,11 @@ static const struct genl_ops ethtool_genl_ops[] = { .dumpit = ethnl_default_dumpit, .done = ethnl_default_done, }, + { + .cmd = ETHTOOL_MSG_RINGS_SET, + .flags = GENL_UNS_ADMIN_PERM, + .doit = ethnl_set_rings, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 01761932ed15..b30426d01890 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -347,5 +347,6 @@ int ethnl_set_debug(struct sk_buff *skb, struct genl_info *info); int ethnl_set_wol(struct sk_buff *skb, struct genl_info *info); int ethnl_set_features(struct sk_buff *skb, struct genl_info *info); int ethnl_set_privflags(struct sk_buff *skb, struct genl_info *info); +int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info); #endif /* _NET_ETHTOOL_NETLINK_H */ diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c index d3129d8a252d..93f428e9a6c2 100644 --- a/net/ethtool/rings.c +++ b/net/ethtool/rings.c @@ -106,3 +106,92 @@ const struct ethnl_request_ops ethnl_rings_request_ops = { .reply_size = rings_reply_size, .fill_reply = rings_fill_reply, }; + +/* RINGS_SET */ + +static const struct nla_policy +rings_set_policy[ETHTOOL_A_RINGS_MAX + 1] = { + [ETHTOOL_A_RINGS_UNSPEC] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_HEADER] = { .type = NLA_NESTED }, + [ETHTOOL_A_RINGS_RX_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_RX_MINI_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_RX_JUMBO_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_TX_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_RINGS_RX] = { .type = NLA_U32 }, + [ETHTOOL_A_RINGS_RX_MINI] = { .type = NLA_U32 }, + [ETHTOOL_A_RINGS_RX_JUMBO] = { .type = NLA_U32 }, + [ETHTOOL_A_RINGS_TX] = { .type = NLA_U32 }, +}; + +int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *tb[ETHTOOL_A_RINGS_MAX + 1]; + struct ethtool_ringparam ringparam = {}; + struct ethnl_req_info req_info = {}; + const struct nlattr *err_attr; + const struct ethtool_ops *ops; + struct net_device *dev; + bool mod = false; + int ret; + + ret = nlmsg_parse(info->nlhdr, GENL_HDRLEN, tb, + ETHTOOL_A_RINGS_MAX, rings_set_policy, + info->extack); + if (ret < 0) + return ret; + ret = ethnl_parse_header_dev_get(&req_info, + tb[ETHTOOL_A_RINGS_HEADER], + genl_info_net(info), info->extack, + true); + if (ret < 0) + return ret; + dev = req_info.dev; + ops = dev->ethtool_ops; + ret = -EOPNOTSUPP; + if (!ops->get_ringparam || !ops->set_ringparam) + goto out_dev; + + rtnl_lock(); + ret = ethnl_ops_begin(dev); + if (ret < 0) + goto out_rtnl; + ops->get_ringparam(dev, &ringparam); + + ethnl_update_u32(&ringparam.rx_pending, tb[ETHTOOL_A_RINGS_RX], &mod); + ethnl_update_u32(&ringparam.rx_mini_pending, + tb[ETHTOOL_A_RINGS_RX_MINI], &mod); + ethnl_update_u32(&ringparam.rx_jumbo_pending, + tb[ETHTOOL_A_RINGS_RX_JUMBO], &mod); + ethnl_update_u32(&ringparam.tx_pending, tb[ETHTOOL_A_RINGS_TX], &mod); + ret = 0; + if (!mod) + goto out_ops; + + /* ensure new ring parameters are within limits */ + if (ringparam.rx_pending > ringparam.rx_max_pending) + err_attr = tb[ETHTOOL_A_RINGS_RX]; + else if (ringparam.rx_mini_pending > ringparam.rx_mini_max_pending) + err_attr = tb[ETHTOOL_A_RINGS_RX_MINI]; + else if (ringparam.rx_jumbo_pending > ringparam.rx_jumbo_max_pending) + err_attr = tb[ETHTOOL_A_RINGS_RX_JUMBO]; + else if (ringparam.tx_pending > ringparam.tx_max_pending) + err_attr = tb[ETHTOOL_A_RINGS_TX]; + else + err_attr = NULL; + if (err_attr) { + ret = -EINVAL; + NL_SET_ERR_MSG_ATTR(info->extack, err_attr, + "requested ring size exceeeds maximum"); + goto out_ops; + } + + ret = dev->ethtool_ops->set_ringparam(dev, &ringparam); + +out_ops: + ethnl_ops_complete(dev); +out_rtnl: + rtnl_unlock(); +out_dev: + dev_put(dev); + return ret; +} -- cgit v1.2.3 From bc9d1c995ecb8cccaac8ead2ed570544c2c885eb Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 12 Mar 2020 21:08:33 +0100 Subject: ethtool: add RINGS_NTF notification Send ETHTOOL_MSG_RINGS_NTF notification whenever ring sizes of a network device are modified using ETHTOOL_MSG_RINGS_SET netlink message or ETHTOOL_SRINGPARAM ioctl request. Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 1 + include/uapi/linux/ethtool_netlink.h | 1 + net/ethtool/ioctl.c | 6 +++++- net/ethtool/netlink.c | 2 ++ net/ethtool/rings.c | 3 +++ 5 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index ba31ae8f1feb..026a5fd4a08b 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -216,6 +216,7 @@ Kernel to userspace: ``ETHTOOL_MSG_PRIVFLAGS_GET_REPLY`` private flags ``ETHTOOL_MSG_PRIVFLAGS_NTF`` private flags ``ETHTOOL_MSG_RINGS_GET_REPLY`` ring sizes + ``ETHTOOL_MSG_RINGS_NTF`` ring sizes ===================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index ae71801b7aac..abfc8fd626da 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -55,6 +55,7 @@ enum { ETHTOOL_MSG_PRIVFLAGS_GET_REPLY, ETHTOOL_MSG_PRIVFLAGS_NTF, ETHTOOL_MSG_RINGS_GET_REPLY, + ETHTOOL_MSG_RINGS_NTF, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 298822289496..1d5c1b6b81a4 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1635,6 +1635,7 @@ static int ethtool_get_ringparam(struct net_device *dev, void __user *useraddr) static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr) { struct ethtool_ringparam ringparam, max = { .cmd = ETHTOOL_GRINGPARAM }; + int ret; if (!dev->ethtool_ops->set_ringparam || !dev->ethtool_ops->get_ringparam) return -EOPNOTSUPP; @@ -1651,7 +1652,10 @@ static int ethtool_set_ringparam(struct net_device *dev, void __user *useraddr) ringparam.tx_pending > max.tx_max_pending) return -EINVAL; - return dev->ethtool_ops->set_ringparam(dev, &ringparam); + ret = dev->ethtool_ops->set_ringparam(dev, &ringparam); + if (!ret) + ethtool_notify(dev, ETHTOOL_MSG_RINGS_NTF, NULL); + return ret; } static noinline_for_stack int ethtool_get_channels(struct net_device *dev, diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 6a1ac8897a7e..653e009216cd 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -532,6 +532,7 @@ ethnl_default_notify_ops[ETHTOOL_MSG_KERNEL_MAX + 1] = { [ETHTOOL_MSG_WOL_NTF] = ðnl_wol_request_ops, [ETHTOOL_MSG_FEATURES_NTF] = ðnl_features_request_ops, [ETHTOOL_MSG_PRIVFLAGS_NTF] = ðnl_privflags_request_ops, + [ETHTOOL_MSG_RINGS_NTF] = ðnl_rings_request_ops, }; /* default notification handler */ @@ -619,6 +620,7 @@ static const ethnl_notify_handler_t ethnl_notify_handlers[] = { [ETHTOOL_MSG_WOL_NTF] = ethnl_default_notify, [ETHTOOL_MSG_FEATURES_NTF] = ethnl_default_notify, [ETHTOOL_MSG_PRIVFLAGS_NTF] = ethnl_default_notify, + [ETHTOOL_MSG_RINGS_NTF] = ethnl_default_notify, }; void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data) diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c index 93f428e9a6c2..c2ebf72be217 100644 --- a/net/ethtool/rings.c +++ b/net/ethtool/rings.c @@ -186,6 +186,9 @@ int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info) } ret = dev->ethtool_ops->set_ringparam(dev, &ringparam); + if (ret < 0) + goto out_ops; + ethtool_notify(dev, ETHTOOL_MSG_RINGS_NTF, NULL); out_ops: ethnl_ops_complete(dev); -- cgit v1.2.3 From 0c84979c951a85fcb5c77ac2480fd476e283a07c Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 12 Mar 2020 21:08:38 +0100 Subject: ethtool: provide channel counts with CHANNELS_GET request Implement CHANNELS_GET request to get channel counts of a network device. These are traditionally available via ETHTOOL_GCHANNELS ioctl request. Omit attributes for channel types which are not supported by driver or device (zero reported for maximum). v2: (all suggested by Jakub Kicinski) - minor cleanup in channels_prepare_data() - more descriptive channels_reply_size() - omit attributes with zero max count Signed-off-by: Michal Kubecek Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 30 +++++++- include/uapi/linux/ethtool_netlink.h | 21 ++++++ net/ethtool/Makefile | 3 +- net/ethtool/channels.c | 108 +++++++++++++++++++++++++++ net/ethtool/netlink.c | 8 ++ net/ethtool/netlink.h | 1 + 6 files changed, 169 insertions(+), 2 deletions(-) create mode 100644 net/ethtool/channels.c (limited to 'include/uapi/linux') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 026a5fd4a08b..decbbddfd8be 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -195,6 +195,7 @@ Userspace to kernel: ``ETHTOOL_MSG_PRIVFLAGS_SET`` set private flags ``ETHTOOL_MSG_RINGS_GET`` get ring sizes ``ETHTOOL_MSG_RINGS_SET`` set ring sizes + ``ETHTOOL_MSG_CHANNELS_GET`` get channel counts ===================================== ================================ Kernel to userspace: @@ -217,6 +218,7 @@ Kernel to userspace: ``ETHTOOL_MSG_PRIVFLAGS_NTF`` private flags ``ETHTOOL_MSG_RINGS_GET_REPLY`` ring sizes ``ETHTOOL_MSG_RINGS_NTF`` ring sizes + ``ETHTOOL_MSG_CHANNELS_GET_REPLY`` channel counts ===================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -695,6 +697,32 @@ driver. Driver may impose additional constraints and may not suspport all attributes. +CHANNELS_GET +============ + +Gets channel counts like ``ETHTOOL_GCHANNELS`` ioctl request. + +Request contents: + + ==================================== ====== ========================== + ``ETHTOOL_A_CHANNELS_HEADER`` nested request header + ==================================== ====== ========================== + +Kernel response contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_CHANNELS_HEADER`` nested reply header + ``ETHTOOL_A_CHANNELS_RX_MAX`` u32 max receive channels + ``ETHTOOL_A_CHANNELS_TX_MAX`` u32 max transmit channels + ``ETHTOOL_A_CHANNELS_OTHER_MAX`` u32 max other channels + ``ETHTOOL_A_CHANNELS_COMBINED_MAX`` u32 max combined channels + ``ETHTOOL_A_CHANNELS_RX_COUNT`` u32 receive channel count + ``ETHTOOL_A_CHANNELS_TX_COUNT`` u32 transmit channel count + ``ETHTOOL_A_CHANNELS_OTHER_COUNT`` u32 other channel count + ``ETHTOOL_A_CHANNELS_COMBINED_COUNT`` u32 combined channel count + ===================================== ====== ========================== + + Request translation =================== @@ -765,7 +793,7 @@ have their netlink replacement yet. ``ETHTOOL_SRXFHINDIR`` n/a ``ETHTOOL_GFEATURES`` ``ETHTOOL_MSG_FEATURES_GET`` ``ETHTOOL_SFEATURES`` ``ETHTOOL_MSG_FEATURES_SET`` - ``ETHTOOL_GCHANNELS`` n/a + ``ETHTOOL_GCHANNELS`` ``ETHTOOL_MSG_CHANNELS_GET`` ``ETHTOOL_SCHANNELS`` n/a ``ETHTOOL_SET_DUMP`` n/a ``ETHTOOL_GET_DUMP_FLAG`` n/a diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index abfc8fd626da..2270eb115eca 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -30,6 +30,7 @@ enum { ETHTOOL_MSG_PRIVFLAGS_SET, ETHTOOL_MSG_RINGS_GET, ETHTOOL_MSG_RINGS_SET, + ETHTOOL_MSG_CHANNELS_GET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -56,6 +57,7 @@ enum { ETHTOOL_MSG_PRIVFLAGS_NTF, ETHTOOL_MSG_RINGS_GET_REPLY, ETHTOOL_MSG_RINGS_NTF, + ETHTOOL_MSG_CHANNELS_GET_REPLY, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -287,6 +289,25 @@ enum { ETHTOOL_A_RINGS_MAX = (__ETHTOOL_A_RINGS_CNT - 1) }; +/* CHANNELS */ + +enum { + ETHTOOL_A_CHANNELS_UNSPEC, + ETHTOOL_A_CHANNELS_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_CHANNELS_RX_MAX, /* u32 */ + ETHTOOL_A_CHANNELS_TX_MAX, /* u32 */ + ETHTOOL_A_CHANNELS_OTHER_MAX, /* u32 */ + ETHTOOL_A_CHANNELS_COMBINED_MAX, /* u32 */ + ETHTOOL_A_CHANNELS_RX_COUNT, /* u32 */ + ETHTOOL_A_CHANNELS_TX_COUNT, /* u32 */ + ETHTOOL_A_CHANNELS_OTHER_COUNT, /* u32 */ + ETHTOOL_A_CHANNELS_COMBINED_COUNT, /* u32 */ + + /* add new constants above here */ + __ETHTOOL_A_CHANNELS_CNT, + ETHTOOL_A_CHANNELS_MAX = (__ETHTOOL_A_CHANNELS_CNT - 1) +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index b48b70ef4ed0..b0bd3decad02 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -5,4 +5,5 @@ obj-y += ioctl.o common.o obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o \ - linkstate.o debug.o wol.o features.o privflags.o rings.o + linkstate.o debug.o wol.o features.o privflags.o rings.o \ + channels.o diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c new file mode 100644 index 000000000000..500dd5ad250b --- /dev/null +++ b/net/ethtool/channels.c @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "netlink.h" +#include "common.h" + +struct channels_req_info { + struct ethnl_req_info base; +}; + +struct channels_reply_data { + struct ethnl_reply_data base; + struct ethtool_channels channels; +}; + +#define CHANNELS_REPDATA(__reply_base) \ + container_of(__reply_base, struct channels_reply_data, base) + +static const struct nla_policy +channels_get_policy[ETHTOOL_A_CHANNELS_MAX + 1] = { + [ETHTOOL_A_CHANNELS_UNSPEC] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_HEADER] = { .type = NLA_NESTED }, + [ETHTOOL_A_CHANNELS_RX_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_TX_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_OTHER_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_COMBINED_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_RX_COUNT] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_TX_COUNT] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_OTHER_COUNT] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_COMBINED_COUNT] = { .type = NLA_REJECT }, +}; + +static int channels_prepare_data(const struct ethnl_req_info *req_base, + struct ethnl_reply_data *reply_base, + struct genl_info *info) +{ + struct channels_reply_data *data = CHANNELS_REPDATA(reply_base); + struct net_device *dev = reply_base->dev; + int ret; + + if (!dev->ethtool_ops->get_channels) + return -EOPNOTSUPP; + ret = ethnl_ops_begin(dev); + if (ret < 0) + return ret; + dev->ethtool_ops->get_channels(dev, &data->channels); + ethnl_ops_complete(dev); + + return 0; +} + +static int channels_reply_size(const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + return nla_total_size(sizeof(u32)) + /* _CHANNELS_RX_MAX */ + nla_total_size(sizeof(u32)) + /* _CHANNELS_TX_MAX */ + nla_total_size(sizeof(u32)) + /* _CHANNELS_OTHER_MAX */ + nla_total_size(sizeof(u32)) + /* _CHANNELS_COMBINED_MAX */ + nla_total_size(sizeof(u32)) + /* _CHANNELS_RX_COUNT */ + nla_total_size(sizeof(u32)) + /* _CHANNELS_TX_COUNT */ + nla_total_size(sizeof(u32)) + /* _CHANNELS_OTHER_COUNT */ + nla_total_size(sizeof(u32)); /* _CHANNELS_COMBINED_COUNT */ +} + +static int channels_fill_reply(struct sk_buff *skb, + const struct ethnl_req_info *req_base, + const struct ethnl_reply_data *reply_base) +{ + const struct channels_reply_data *data = CHANNELS_REPDATA(reply_base); + const struct ethtool_channels *channels = &data->channels; + + if ((channels->max_rx && + (nla_put_u32(skb, ETHTOOL_A_CHANNELS_RX_MAX, + channels->max_rx) || + nla_put_u32(skb, ETHTOOL_A_CHANNELS_RX_COUNT, + channels->rx_count))) || + (channels->max_tx && + (nla_put_u32(skb, ETHTOOL_A_CHANNELS_TX_MAX, + channels->max_tx) || + nla_put_u32(skb, ETHTOOL_A_CHANNELS_TX_COUNT, + channels->tx_count))) || + (channels->max_other && + (nla_put_u32(skb, ETHTOOL_A_CHANNELS_OTHER_MAX, + channels->max_other) || + nla_put_u32(skb, ETHTOOL_A_CHANNELS_OTHER_COUNT, + channels->other_count))) || + (channels->max_combined && + (nla_put_u32(skb, ETHTOOL_A_CHANNELS_COMBINED_MAX, + channels->max_combined) || + nla_put_u32(skb, ETHTOOL_A_CHANNELS_COMBINED_COUNT, + channels->combined_count)))) + return -EMSGSIZE; + + return 0; +} + +const struct ethnl_request_ops ethnl_channels_request_ops = { + .request_cmd = ETHTOOL_MSG_CHANNELS_GET, + .reply_cmd = ETHTOOL_MSG_CHANNELS_GET_REPLY, + .hdr_attr = ETHTOOL_A_CHANNELS_HEADER, + .max_attr = ETHTOOL_A_CHANNELS_MAX, + .req_info_size = sizeof(struct channels_req_info), + .reply_data_size = sizeof(struct channels_reply_data), + .request_policy = channels_get_policy, + + .prepare_data = channels_prepare_data, + .reply_size = channels_reply_size, + .fill_reply = channels_fill_reply, +}; diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 653e009216cd..ac35513abeed 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -218,6 +218,7 @@ ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = { [ETHTOOL_MSG_FEATURES_GET] = ðnl_features_request_ops, [ETHTOOL_MSG_PRIVFLAGS_GET] = ðnl_privflags_request_ops, [ETHTOOL_MSG_RINGS_GET] = ðnl_rings_request_ops, + [ETHTOOL_MSG_CHANNELS_GET] = ðnl_channels_request_ops, }; static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb) @@ -763,6 +764,13 @@ static const struct genl_ops ethtool_genl_ops[] = { .flags = GENL_UNS_ADMIN_PERM, .doit = ethnl_set_rings, }, + { + .cmd = ETHTOOL_MSG_CHANNELS_GET, + .doit = ethnl_default_doit, + .start = ethnl_default_start, + .dumpit = ethnl_default_dumpit, + .done = ethnl_default_done, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index b30426d01890..53bfe720ff1a 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -340,6 +340,7 @@ extern const struct ethnl_request_ops ethnl_wol_request_ops; extern const struct ethnl_request_ops ethnl_features_request_ops; extern const struct ethnl_request_ops ethnl_privflags_request_ops; extern const struct ethnl_request_ops ethnl_rings_request_ops; +extern const struct ethnl_request_ops ethnl_channels_request_ops; int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info); int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info); -- cgit v1.2.3 From e19c591eafad4d723a2eed385e253eca0506cc25 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 12 Mar 2020 21:08:43 +0100 Subject: ethtool: set device channel counts with CHANNELS_SET request Implement CHANNELS_SET netlink request to set channel counts of a network device. These are traditionally set with ETHTOOL_SCHANNELS ioctl request. Like the ioctl implementation, the generic ethtool code checks if supplied values do not exceed driver defined limits; if they do, first offending attribute is reported using extack. Checks preventing removing channels used for RX indirection table or zerocopy AF_XDP socket are also implemented. Move ethtool_get_max_rxfh_channel() helper into common.c so that it can be used by both ioctl and netlink code. v2: - fix netdev reference leak in error path (found by Jakub Kicinsky) Signed-off-by: Michal Kubecek Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 23 +++++- include/uapi/linux/ethtool_netlink.h | 1 + net/ethtool/channels.c | 116 +++++++++++++++++++++++++++ net/ethtool/common.c | 31 +++++++ net/ethtool/common.h | 1 + net/ethtool/ioctl.c | 31 ------- net/ethtool/netlink.c | 5 ++ net/ethtool/netlink.h | 1 + 8 files changed, 177 insertions(+), 32 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index decbbddfd8be..7df7476cf310 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -196,6 +196,7 @@ Userspace to kernel: ``ETHTOOL_MSG_RINGS_GET`` get ring sizes ``ETHTOOL_MSG_RINGS_SET`` set ring sizes ``ETHTOOL_MSG_CHANNELS_GET`` get channel counts + ``ETHTOOL_MSG_CHANNELS_SET`` set channel counts ===================================== ================================ Kernel to userspace: @@ -723,6 +724,26 @@ Kernel response contents: ===================================== ====== ========================== +CHANNELS_SET +============ + +Sets channel counts like ``ETHTOOL_SCHANNELS`` ioctl request. + +Request contents: + + ===================================== ====== ========================== + ``ETHTOOL_A_CHANNELS_HEADER`` nested request header + ``ETHTOOL_A_CHANNELS_RX_COUNT`` u32 receive channel count + ``ETHTOOL_A_CHANNELS_TX_COUNT`` u32 transmit channel count + ``ETHTOOL_A_CHANNELS_OTHER_COUNT`` u32 other channel count + ``ETHTOOL_A_CHANNELS_COMBINED_COUNT`` u32 combined channel count + ===================================== ====== ========================== + +Kernel checks that requested channel counts do not exceed limits reported by +driver. Driver may impose additional constraints and may not suspport all +attributes. + + Request translation =================== @@ -794,7 +815,7 @@ have their netlink replacement yet. ``ETHTOOL_GFEATURES`` ``ETHTOOL_MSG_FEATURES_GET`` ``ETHTOOL_SFEATURES`` ``ETHTOOL_MSG_FEATURES_SET`` ``ETHTOOL_GCHANNELS`` ``ETHTOOL_MSG_CHANNELS_GET`` - ``ETHTOOL_SCHANNELS`` n/a + ``ETHTOOL_SCHANNELS`` ``ETHTOOL_MSG_CHANNELS_SET`` ``ETHTOOL_SET_DUMP`` n/a ``ETHTOOL_GET_DUMP_FLAG`` n/a ``ETHTOOL_GET_DUMP_DATA`` n/a diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 2270eb115eca..f1384a8f3534 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -31,6 +31,7 @@ enum { ETHTOOL_MSG_RINGS_GET, ETHTOOL_MSG_RINGS_SET, ETHTOOL_MSG_CHANNELS_GET, + ETHTOOL_MSG_CHANNELS_SET, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c index 500dd5ad250b..ee232c11acae 100644 --- a/net/ethtool/channels.c +++ b/net/ethtool/channels.c @@ -1,5 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only +#include + #include "netlink.h" #include "common.h" @@ -106,3 +108,117 @@ const struct ethnl_request_ops ethnl_channels_request_ops = { .reply_size = channels_reply_size, .fill_reply = channels_fill_reply, }; + +/* CHANNELS_SET */ + +static const struct nla_policy +channels_set_policy[ETHTOOL_A_CHANNELS_MAX + 1] = { + [ETHTOOL_A_CHANNELS_UNSPEC] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_HEADER] = { .type = NLA_NESTED }, + [ETHTOOL_A_CHANNELS_RX_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_TX_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_OTHER_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_COMBINED_MAX] = { .type = NLA_REJECT }, + [ETHTOOL_A_CHANNELS_RX_COUNT] = { .type = NLA_U32 }, + [ETHTOOL_A_CHANNELS_TX_COUNT] = { .type = NLA_U32 }, + [ETHTOOL_A_CHANNELS_OTHER_COUNT] = { .type = NLA_U32 }, + [ETHTOOL_A_CHANNELS_COMBINED_COUNT] = { .type = NLA_U32 }, +}; + +int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr *tb[ETHTOOL_A_CHANNELS_MAX + 1]; + unsigned int from_channel, old_total, i; + struct ethtool_channels channels = {}; + struct ethnl_req_info req_info = {}; + const struct nlattr *err_attr; + const struct ethtool_ops *ops; + struct net_device *dev; + u32 max_rx_in_use = 0; + bool mod = false; + int ret; + + ret = nlmsg_parse(info->nlhdr, GENL_HDRLEN, tb, + ETHTOOL_A_CHANNELS_MAX, channels_set_policy, + info->extack); + if (ret < 0) + return ret; + ret = ethnl_parse_header_dev_get(&req_info, + tb[ETHTOOL_A_CHANNELS_HEADER], + genl_info_net(info), info->extack, + true); + if (ret < 0) + return ret; + dev = req_info.dev; + ops = dev->ethtool_ops; + ret = -EOPNOTSUPP; + if (!ops->get_channels || !ops->set_channels) + goto out_dev; + + rtnl_lock(); + ret = ethnl_ops_begin(dev); + if (ret < 0) + goto out_rtnl; + ops->get_channels(dev, &channels); + old_total = channels.combined_count + + max(channels.rx_count, channels.tx_count); + + ethnl_update_u32(&channels.rx_count, tb[ETHTOOL_A_CHANNELS_RX_COUNT], + &mod); + ethnl_update_u32(&channels.tx_count, tb[ETHTOOL_A_CHANNELS_TX_COUNT], + &mod); + ethnl_update_u32(&channels.other_count, + tb[ETHTOOL_A_CHANNELS_OTHER_COUNT], &mod); + ethnl_update_u32(&channels.combined_count, + tb[ETHTOOL_A_CHANNELS_COMBINED_COUNT], &mod); + ret = 0; + if (!mod) + goto out_ops; + + /* ensure new channel counts are within limits */ + if (channels.rx_count > channels.max_rx) + err_attr = tb[ETHTOOL_A_CHANNELS_RX_COUNT]; + else if (channels.tx_count > channels.max_tx) + err_attr = tb[ETHTOOL_A_CHANNELS_TX_COUNT]; + else if (channels.other_count > channels.max_other) + err_attr = tb[ETHTOOL_A_CHANNELS_OTHER_COUNT]; + else if (channels.combined_count > channels.max_combined) + err_attr = tb[ETHTOOL_A_CHANNELS_COMBINED_COUNT]; + else + err_attr = NULL; + if (err_attr) { + ret = -EINVAL; + NL_SET_ERR_MSG_ATTR(info->extack, err_attr, + "requested channel count exceeeds maximum"); + goto out_ops; + } + + /* ensure the new Rx count fits within the configured Rx flow + * indirection table settings + */ + if (netif_is_rxfh_configured(dev) && + !ethtool_get_max_rxfh_channel(dev, &max_rx_in_use) && + (channels.combined_count + channels.rx_count) <= max_rx_in_use) { + GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing indirection table settings"); + return -EINVAL; + } + + /* Disabling channels, query zero-copy AF_XDP sockets */ + from_channel = channels.combined_count + + min(channels.rx_count, channels.tx_count); + for (i = from_channel; i < old_total; i++) + if (xdp_get_umem_from_qid(dev, i)) { + GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing zerocopy AF_XDP sockets"); + return -EINVAL; + } + + ret = dev->ethtool_ops->set_channels(dev, &channels); + +out_ops: + ethnl_ops_complete(dev); +out_rtnl: + rtnl_unlock(); +out_dev: + dev_put(dev); + return ret; +} diff --git a/net/ethtool/common.c b/net/ethtool/common.c index 7b6969af5ae7..0b22741b2f8f 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -258,3 +258,34 @@ int __ethtool_get_link(struct net_device *dev) return netif_running(dev) && dev->ethtool_ops->get_link(dev); } + +int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max) +{ + u32 dev_size, current_max = 0; + u32 *indir; + int ret; + + if (!dev->ethtool_ops->get_rxfh_indir_size || + !dev->ethtool_ops->get_rxfh) + return -EOPNOTSUPP; + dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); + if (dev_size == 0) + return -EOPNOTSUPP; + + indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER); + if (!indir) + return -ENOMEM; + + ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL, NULL); + if (ret) + goto out; + + while (dev_size--) + current_max = max(current_max, indir[dev_size]); + + *max = current_max; + +out: + kfree(indir); + return ret; +} diff --git a/net/ethtool/common.h b/net/ethtool/common.h index 7dc1163800a7..03946e16e623 100644 --- a/net/ethtool/common.h +++ b/net/ethtool/common.h @@ -29,5 +29,6 @@ int __ethtool_get_link(struct net_device *dev); bool convert_legacy_settings_to_link_ksettings( struct ethtool_link_ksettings *link_ksettings, const struct ethtool_cmd *legacy_settings); +int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max); #endif /* _ETHTOOL_COMMON_H */ diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 1d5c1b6b81a4..06224a03139e 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -929,37 +929,6 @@ void netdev_rss_key_fill(void *buffer, size_t len) } EXPORT_SYMBOL(netdev_rss_key_fill); -static int ethtool_get_max_rxfh_channel(struct net_device *dev, u32 *max) -{ - u32 dev_size, current_max = 0; - u32 *indir; - int ret; - - if (!dev->ethtool_ops->get_rxfh_indir_size || - !dev->ethtool_ops->get_rxfh) - return -EOPNOTSUPP; - dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev); - if (dev_size == 0) - return -EOPNOTSUPP; - - indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER); - if (!indir) - return -ENOMEM; - - ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL, NULL); - if (ret) - goto out; - - while (dev_size--) - current_max = max(current_max, indir[dev_size]); - - *max = current_max; - -out: - kfree(indir); - return ret; -} - static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev, void __user *useraddr) { diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index ac35513abeed..f61654b8f210 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -771,6 +771,11 @@ static const struct genl_ops ethtool_genl_ops[] = { .dumpit = ethnl_default_dumpit, .done = ethnl_default_done, }, + { + .cmd = ETHTOOL_MSG_CHANNELS_SET, + .flags = GENL_UNS_ADMIN_PERM, + .doit = ethnl_set_channels, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 53bfe720ff1a..45aad99a6021 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -349,5 +349,6 @@ int ethnl_set_wol(struct sk_buff *skb, struct genl_info *info); int ethnl_set_features(struct sk_buff *skb, struct genl_info *info); int ethnl_set_privflags(struct sk_buff *skb, struct genl_info *info); int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info); +int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info); #endif /* _NET_ETHTOOL_NETLINK_H */ -- cgit v1.2.3 From 546379b9a01b6bdb5b267f8e7433944d5364cbf2 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Thu, 12 Mar 2020 21:08:48 +0100 Subject: ethtool: add CHANNELS_NTF notification Send ETHTOOL_MSG_CHANNELS_NTF notification whenever channel counts of a network device are modified using ETHTOOL_MSG_CHANNELS_SET netlink message or ETHTOOL_SCHANNELS ioctl request. Signed-off-by: Michal Kubecek Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 1 + include/uapi/linux/ethtool_netlink.h | 1 + net/ethtool/channels.c | 3 +++ net/ethtool/ioctl.c | 6 +++++- net/ethtool/netlink.c | 2 ++ 5 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 7df7476cf310..31a601cafa3f 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -220,6 +220,7 @@ Kernel to userspace: ``ETHTOOL_MSG_RINGS_GET_REPLY`` ring sizes ``ETHTOOL_MSG_RINGS_NTF`` ring sizes ``ETHTOOL_MSG_CHANNELS_GET_REPLY`` channel counts + ``ETHTOOL_MSG_CHANNELS_NTF`` channel counts ===================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index f1384a8f3534..c7c7a1a550af 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -59,6 +59,7 @@ enum { ETHTOOL_MSG_RINGS_GET_REPLY, ETHTOOL_MSG_RINGS_NTF, ETHTOOL_MSG_CHANNELS_GET_REPLY, + ETHTOOL_MSG_CHANNELS_NTF, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c index ee232c11acae..8dc5485333a4 100644 --- a/net/ethtool/channels.c +++ b/net/ethtool/channels.c @@ -213,6 +213,9 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info) } ret = dev->ethtool_ops->set_channels(dev, &channels); + if (ret < 0) + goto out_ops; + ethtool_notify(dev, ETHTOOL_MSG_CHANNELS_NTF, NULL); out_ops: ethnl_ops_complete(dev); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 06224a03139e..258840b19fb5 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -1649,6 +1649,7 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev, u16 from_channel, to_channel; u32 max_rx_in_use = 0; unsigned int i; + int ret; if (!dev->ethtool_ops->set_channels || !dev->ethtool_ops->get_channels) return -EOPNOTSUPP; @@ -1680,7 +1681,10 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev, if (xdp_get_umem_from_qid(dev, i)) return -EINVAL; - return dev->ethtool_ops->set_channels(dev, &channels); + ret = dev->ethtool_ops->set_channels(dev, &channels); + if (!ret) + ethtool_notify(dev, ETHTOOL_MSG_CHANNELS_NTF, NULL); + return ret; } static int ethtool_get_pauseparam(struct net_device *dev, void __user *useraddr) diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index f61654b8f210..55c8ce4019d9 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -534,6 +534,7 @@ ethnl_default_notify_ops[ETHTOOL_MSG_KERNEL_MAX + 1] = { [ETHTOOL_MSG_FEATURES_NTF] = ðnl_features_request_ops, [ETHTOOL_MSG_PRIVFLAGS_NTF] = ðnl_privflags_request_ops, [ETHTOOL_MSG_RINGS_NTF] = ðnl_rings_request_ops, + [ETHTOOL_MSG_CHANNELS_NTF] = ðnl_channels_request_ops, }; /* default notification handler */ @@ -622,6 +623,7 @@ static const ethnl_notify_handler_t ethnl_notify_handlers[] = { [ETHTOOL_MSG_FEATURES_NTF] = ethnl_default_notify, [ETHTOOL_MSG_PRIVFLAGS_NTF] = ethnl_default_notify, [ETHTOOL_MSG_RINGS_NTF] = ethnl_default_notify, + [ETHTOOL_MSG_CHANNELS_NTF] = ethnl_default_notify, }; void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data) -- cgit v1.2.3 From b4490c5c4e023f09b7d27c9a9d3e7ad7d09ea6bf Mon Sep 17 00:00:00 2001 From: Carlos Neira Date: Wed, 4 Mar 2020 17:41:56 -0300 Subject: bpf: Added new helper bpf_get_ns_current_pid_tgid New bpf helper bpf_get_ns_current_pid_tgid, This helper will return pid and tgid from current task which namespace matches dev_t and inode number provided, this will allows us to instrument a process inside a container. Signed-off-by: Carlos Neira Signed-off-by: Alexei Starovoitov Acked-by: Yonghong Song Link: https://lore.kernel.org/bpf/20200304204157.58695-3-cneirabustos@gmail.com --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 20 ++++++++++++++++++- kernel/bpf/core.c | 1 + kernel/bpf/helpers.c | 45 ++++++++++++++++++++++++++++++++++++++++++ kernel/trace/bpf_trace.c | 2 ++ scripts/bpf_helpers_doc.py | 1 + tools/include/uapi/linux/bpf.h | 20 ++++++++++++++++++- 7 files changed, 88 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4fd91b7c95ea..4ec835334a1f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1497,6 +1497,7 @@ extern const struct bpf_func_proto bpf_strtol_proto; extern const struct bpf_func_proto bpf_strtoul_proto; extern const struct bpf_func_proto bpf_tcp_sock_proto; extern const struct bpf_func_proto bpf_jiffies64_proto; +extern const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto; /* Shared helpers among cBPF and eBPF. */ void bpf_user_rnd_init_once(void); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 40b2d9476268..15b239da775b 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2914,6 +2914,19 @@ union bpf_attr { * of sizeof(struct perf_branch_entry). * * **-ENOENT** if architecture does not support branch records. + * + * int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size) + * Description + * Returns 0 on success, values for *pid* and *tgid* as seen from the current + * *namespace* will be returned in *nsdata*. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** if dev and inum supplied don't match dev_t and inode number + * with nsfs of current task, or if dev conversion to dev_t lost high bits. + * + * **-ENOENT** if pidns does not exists for the current task. + * */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3035,7 +3048,8 @@ union bpf_attr { FN(tcp_send_ack), \ FN(send_signal_thread), \ FN(jiffies64), \ - FN(read_branch_records), + FN(read_branch_records), \ + FN(get_ns_current_pid_tgid), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -3829,4 +3843,8 @@ struct bpf_sockopt { __s32 retval; }; +struct bpf_pidns_info { + __u32 pid; + __u32 tgid; +}; #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 973a20d49749..0f9ca46e1978 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2149,6 +2149,7 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak; const struct bpf_func_proto bpf_get_current_comm_proto __weak; const struct bpf_func_proto bpf_get_current_cgroup_id_proto __weak; const struct bpf_func_proto bpf_get_local_storage_proto __weak; +const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto __weak; const struct bpf_func_proto * __weak bpf_get_trace_printk_proto(void) { diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index d8b7b110a1c5..01878db15eaf 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -12,6 +12,8 @@ #include #include #include +#include +#include #include "../../lib/kstrtox.h" @@ -499,3 +501,46 @@ const struct bpf_func_proto bpf_strtoul_proto = { .arg4_type = ARG_PTR_TO_LONG, }; #endif + +BPF_CALL_4(bpf_get_ns_current_pid_tgid, u64, dev, u64, ino, + struct bpf_pidns_info *, nsdata, u32, size) +{ + struct task_struct *task = current; + struct pid_namespace *pidns; + int err = -EINVAL; + + if (unlikely(size != sizeof(struct bpf_pidns_info))) + goto clear; + + if (unlikely((u64)(dev_t)dev != dev)) + goto clear; + + if (unlikely(!task)) + goto clear; + + pidns = task_active_pid_ns(task); + if (unlikely(!pidns)) { + err = -ENOENT; + goto clear; + } + + if (!ns_match(&pidns->ns, (dev_t)dev, ino)) + goto clear; + + nsdata->pid = task_pid_nr_ns(task, pidns); + nsdata->tgid = task_tgid_nr_ns(task, pidns); + return 0; +clear: + memset((void *)nsdata, 0, (size_t) size); + return err; +} + +const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = { + .func = bpf_get_ns_current_pid_tgid, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_PTR_TO_UNINIT_MEM, + .arg4_type = ARG_CONST_SIZE, +}; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 6a490d8ce9de..b5071c7e93ca 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -843,6 +843,8 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_send_signal_thread_proto; case BPF_FUNC_perf_event_read_value: return &bpf_perf_event_read_value_proto; + case BPF_FUNC_get_ns_current_pid_tgid: + return &bpf_get_ns_current_pid_tgid_proto; default: return NULL; } diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index cebed6fb5bbb..c1e2b5410faa 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -435,6 +435,7 @@ class PrinterHelpers(Printer): 'struct bpf_fib_lookup', 'struct bpf_perf_event_data', 'struct bpf_perf_event_value', + 'struct bpf_pidns_info', 'struct bpf_sock', 'struct bpf_sock_addr', 'struct bpf_sock_ops', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 40b2d9476268..15b239da775b 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2914,6 +2914,19 @@ union bpf_attr { * of sizeof(struct perf_branch_entry). * * **-ENOENT** if architecture does not support branch records. + * + * int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size) + * Description + * Returns 0 on success, values for *pid* and *tgid* as seen from the current + * *namespace* will be returned in *nsdata*. + * + * On failure, the returned value is one of the following: + * + * **-EINVAL** if dev and inum supplied don't match dev_t and inode number + * with nsfs of current task, or if dev conversion to dev_t lost high bits. + * + * **-ENOENT** if pidns does not exists for the current task. + * */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3035,7 +3048,8 @@ union bpf_attr { FN(tcp_send_ack), \ FN(send_signal_thread), \ FN(jiffies64), \ - FN(read_branch_records), + FN(read_branch_records), \ + FN(get_ns_current_pid_tgid), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -3829,4 +3843,8 @@ struct bpf_sockopt { __s32 retval; }; +struct bpf_pidns_info { + __u32 pid; + __u32 tgid; +}; #endif /* _UAPI__LINUX_BPF_H__ */ -- cgit v1.2.3 From d831ee84bfc9173eecf30dbbc2553ae81b996c60 Mon Sep 17 00:00:00 2001 From: Eelco Chaudron Date: Fri, 6 Mar 2020 08:59:23 +0000 Subject: bpf: Add bpf_xdp_output() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce new helper that reuses existing xdp perf_event output implementation, but can be called from raw_tracepoint programs that receive 'struct xdp_buff *' as a tracepoint argument. Signed-off-by: Eelco Chaudron Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Acked-by: Toke Høiland-Jørgensen Link: https://lore.kernel.org/bpf/158348514556.2239.11050972434793741444.stgit@xdp-tutorial --- include/uapi/linux/bpf.h | 26 ++++++++++- kernel/bpf/verifier.c | 4 +- kernel/trace/bpf_trace.c | 3 ++ net/core/filter.c | 16 ++++++- tools/include/uapi/linux/bpf.h | 26 ++++++++++- .../testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c | 53 ++++++++++++++++++++++ .../testing/selftests/bpf/progs/test_xdp_bpf2bpf.c | 24 ++++++++++ 7 files changed, 148 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 15b239da775b..5d01c5c7e598 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2927,6 +2927,29 @@ union bpf_attr { * * **-ENOENT** if pidns does not exists for the current task. * + * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * Description + * Write raw *data* blob into a special BPF perf event held by + * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf + * event must have the following attributes: **PERF_SAMPLE_RAW** + * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and + * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. + * + * The *flags* are used to indicate the index in *map* for which + * the value must be put, masked with **BPF_F_INDEX_MASK**. + * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** + * to indicate that the index of the current CPU core should be + * used. + * + * The value to write, of *size*, is passed through eBPF stack and + * pointed by *data*. + * + * *ctx* is a pointer to in-kernel struct xdp_buff. + * + * This helper is similar to **bpf_perf_eventoutput**\ () but + * restricted to raw_tracepoint bpf programs. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3049,7 +3072,8 @@ union bpf_attr { FN(send_signal_thread), \ FN(jiffies64), \ FN(read_branch_records), \ - FN(get_ns_current_pid_tgid), + FN(get_ns_current_pid_tgid), \ + FN(xdp_output), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 55d376c53f7d..745f3cfdf3b2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3650,7 +3650,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, if (func_id != BPF_FUNC_perf_event_read && func_id != BPF_FUNC_perf_event_output && func_id != BPF_FUNC_skb_output && - func_id != BPF_FUNC_perf_event_read_value) + func_id != BPF_FUNC_perf_event_read_value && + func_id != BPF_FUNC_xdp_output) goto error; break; case BPF_MAP_TYPE_STACK_TRACE: @@ -3740,6 +3741,7 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, case BPF_FUNC_perf_event_output: case BPF_FUNC_perf_event_read_value: case BPF_FUNC_skb_output: + case BPF_FUNC_xdp_output: if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) goto error; break; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index b5071c7e93ca..e619eedb5919 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1145,6 +1145,7 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_raw_tp = { }; extern const struct bpf_func_proto bpf_skb_output_proto; +extern const struct bpf_func_proto bpf_xdp_output_proto; BPF_CALL_3(bpf_get_stackid_raw_tp, struct bpf_raw_tracepoint_args *, args, struct bpf_map *, map, u64, flags) @@ -1220,6 +1221,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) #ifdef CONFIG_NET case BPF_FUNC_skb_output: return &bpf_skb_output_proto; + case BPF_FUNC_xdp_output: + return &bpf_xdp_output_proto; #endif default: return raw_tp_prog_func_proto(func_id, prog); diff --git a/net/core/filter.c b/net/core/filter.c index cd0a532db4e7..22219544410f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4061,7 +4061,8 @@ BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map, if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK))) return -EINVAL; - if (unlikely(xdp_size > (unsigned long)(xdp->data_end - xdp->data))) + if (unlikely(!xdp || + xdp_size > (unsigned long)(xdp->data_end - xdp->data))) return -EFAULT; return bpf_event_output(map, flags, meta, meta_size, xdp->data, @@ -4079,6 +4080,19 @@ static const struct bpf_func_proto bpf_xdp_event_output_proto = { .arg5_type = ARG_CONST_SIZE_OR_ZERO, }; +static int bpf_xdp_output_btf_ids[5]; +const struct bpf_func_proto bpf_xdp_output_proto = { + .func = bpf_xdp_event_output, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_MEM, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, + .btf_id = bpf_xdp_output_btf_ids, +}; + BPF_CALL_1(bpf_get_socket_cookie, struct sk_buff *, skb) { return skb->sk ? sock_gen_cookie(skb->sk) : 0; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 15b239da775b..5d01c5c7e598 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -2927,6 +2927,29 @@ union bpf_attr { * * **-ENOENT** if pidns does not exists for the current task. * + * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * Description + * Write raw *data* blob into a special BPF perf event held by + * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf + * event must have the following attributes: **PERF_SAMPLE_RAW** + * as **sample_type**, **PERF_TYPE_SOFTWARE** as **type**, and + * **PERF_COUNT_SW_BPF_OUTPUT** as **config**. + * + * The *flags* are used to indicate the index in *map* for which + * the value must be put, masked with **BPF_F_INDEX_MASK**. + * Alternatively, *flags* can be set to **BPF_F_CURRENT_CPU** + * to indicate that the index of the current CPU core should be + * used. + * + * The value to write, of *size*, is passed through eBPF stack and + * pointed by *data*. + * + * *ctx* is a pointer to in-kernel struct xdp_buff. + * + * This helper is similar to **bpf_perf_eventoutput**\ () but + * restricted to raw_tracepoint bpf programs. + * Return + * 0 on success, or a negative error in case of failure. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3049,7 +3072,8 @@ union bpf_attr { FN(send_signal_thread), \ FN(jiffies64), \ FN(read_branch_records), \ - FN(get_ns_current_pid_tgid), + FN(get_ns_current_pid_tgid), \ + FN(xdp_output), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c index 4ba011031d4c..a0f688c37023 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c @@ -4,17 +4,51 @@ #include "test_xdp.skel.h" #include "test_xdp_bpf2bpf.skel.h" +struct meta { + int ifindex; + int pkt_len; +}; + +static void on_sample(void *ctx, int cpu, void *data, __u32 size) +{ + int duration = 0; + struct meta *meta = (struct meta *)data; + struct ipv4_packet *trace_pkt_v4 = data + sizeof(*meta); + + if (CHECK(size < sizeof(pkt_v4) + sizeof(*meta), + "check_size", "size %u < %zu\n", + size, sizeof(pkt_v4) + sizeof(*meta))) + return; + + if (CHECK(meta->ifindex != if_nametoindex("lo"), "check_meta_ifindex", + "meta->ifindex = %d\n", meta->ifindex)) + return; + + if (CHECK(meta->pkt_len != sizeof(pkt_v4), "check_meta_pkt_len", + "meta->pkt_len = %zd\n", sizeof(pkt_v4))) + return; + + if (CHECK(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)), + "check_packet_content", "content not the same\n")) + return; + + *(bool *)ctx = true; +} + void test_xdp_bpf2bpf(void) { __u32 duration = 0, retval, size; char buf[128]; int err, pkt_fd, map_fd; + bool passed = false; struct iphdr *iph = (void *)buf + sizeof(struct ethhdr); struct iptnl_info value4 = {.family = AF_INET}; struct test_xdp *pkt_skel = NULL; struct test_xdp_bpf2bpf *ftrace_skel = NULL; struct vip key4 = {.protocol = 6, .family = AF_INET}; struct bpf_program *prog; + struct perf_buffer *pb = NULL; + struct perf_buffer_opts pb_opts = {}; /* Load XDP program to introspect */ pkt_skel = test_xdp__open_and_load(); @@ -50,6 +84,14 @@ void test_xdp_bpf2bpf(void) if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err)) goto out; + /* Set up perf buffer */ + pb_opts.sample_cb = on_sample; + pb_opts.ctx = &passed; + pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), + 1, &pb_opts); + if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb))) + goto out; + /* Run test program */ err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); @@ -60,6 +102,15 @@ void test_xdp_bpf2bpf(void) err, errno, retval, size)) goto out; + /* Make sure bpf_xdp_output() was triggered and it sent the expected + * data to the perf ring buffer. + */ + err = perf_buffer__poll(pb, 100); + if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err)) + goto out; + + CHECK_FAIL(!passed); + /* Verify test results */ if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"), "result", "fentry failed err %llu\n", @@ -70,6 +121,8 @@ void test_xdp_bpf2bpf(void) "fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit); out: + if (pb) + perf_buffer__free(pb); test_xdp__destroy(pkt_skel); test_xdp_bpf2bpf__destroy(ftrace_skel); } diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c index 42dd2fedd588..a038e827f850 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c @@ -3,6 +3,8 @@ #include #include +char _license[] SEC("license") = "GPL"; + struct net_device { /* Structure does not need to contain all entries, * as "preserve_access_index" will use BTF to fix this... @@ -27,10 +29,32 @@ struct xdp_buff { struct xdp_rxq_info *rxq; } __attribute__((preserve_access_index)); +struct meta { + int ifindex; + int pkt_len; +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); +} perf_buf_map SEC(".maps"); + __u64 test_result_fentry = 0; SEC("fentry/FUNC") int BPF_PROG(trace_on_entry, struct xdp_buff *xdp) { + struct meta meta; + void *data_end = (void *)(long)xdp->data_end; + void *data = (void *)(long)xdp->data; + + meta.ifindex = xdp->rxq->dev->ifindex; + meta.pkt_len = data_end - data; + bpf_xdp_output(xdp, &perf_buf_map, + ((__u64) meta.pkt_len << 32) | + BPF_F_CURRENT_CPU, + &meta, sizeof(meta)); + test_result_fentry = xdp->rxq->dev->ifindex; return 0; } -- cgit v1.2.3 From 14bc175d9c885c86239de3d730eea85ad67bfe7b Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 13 Mar 2020 01:10:56 +0200 Subject: net: sched: Allow extending set of supported RED flags The qdiscs RED, GRED, SFQ and CHOKE use different subsets of the same pool of global RED flags. These are passed in tc_red_qopt.flags. However none of these qdiscs validate the flag field, and just copy it over wholesale to internal structures, and later dump it back. (An exception is GRED, which does validate for VQs -- however not for the main setup.) A broken userspace can therefore configure a qdisc with arbitrary unsupported flags, and later expect to see the flags on qdisc dump. The current ABI therefore allows storage of several bits of custom data to qdisc instances of the types mentioned above. How many bits, depends on which flags are meaningful for the qdisc in question. E.g. SFQ recognizes flags ECN and HARDDROP, and the rest is not interpreted. If SFQ ever needs to support ADAPTATIVE, it needs another way of doing it, and at the same time it needs to retain the possibility to store 6 bits of uninterpreted data. Likewise RED, which adds a new flag later in this patchset. To that end, this patch adds a new function, red_get_flags(), to split the passed flags of RED-like qdiscs to flags and user bits, and red_validate_flags() to validate the resulting configuration. It further adds a new attribute, TCA_RED_FLAGS, to pass arbitrary flags. Signed-off-by: Petr Machata Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/red.h | 33 ++++++++++++++++++++++++++++++++ include/uapi/linux/pkt_sched.h | 16 ++++++++++++++++ net/sched/sch_red.c | 43 +++++++++++++++++++++++++++++++++++++++--- 3 files changed, 89 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/red.h b/include/net/red.h index 9665582c4687..6a2aaa6c7c41 100644 --- a/include/net/red.h +++ b/include/net/red.h @@ -179,6 +179,39 @@ static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog) return true; } +static inline int red_get_flags(unsigned char qopt_flags, + unsigned char historic_mask, + struct nlattr *flags_attr, + unsigned char supported_mask, + struct nla_bitfield32 *p_flags, + unsigned char *p_userbits, + struct netlink_ext_ack *extack) +{ + struct nla_bitfield32 flags; + + if (qopt_flags && flags_attr) { + NL_SET_ERR_MSG_MOD(extack, "flags should be passed either through qopt, or through a dedicated attribute"); + return -EINVAL; + } + + if (flags_attr) { + flags = nla_get_bitfield32(flags_attr); + } else { + flags.selector = historic_mask; + flags.value = qopt_flags & historic_mask; + } + + *p_flags = flags; + *p_userbits = qopt_flags & ~historic_mask; + return 0; +} + +static inline int red_validate_flags(unsigned char flags, + struct netlink_ext_ack *extack) +{ + return 0; +} + static inline void red_set_parms(struct red_parms *p, u32 qth_min, u32 qth_max, u8 Wlog, u8 Plog, u8 Scell_log, u8 *stab, u32 max_P) diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index bbe791b24168..6325507935ea 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -256,6 +256,7 @@ enum { TCA_RED_PARMS, TCA_RED_STAB, TCA_RED_MAX_P, + TCA_RED_FLAGS, /* bitfield32 */ __TCA_RED_MAX, }; @@ -268,12 +269,27 @@ struct tc_red_qopt { unsigned char Wlog; /* log(W) */ unsigned char Plog; /* log(P_max/(qth_max-qth_min)) */ unsigned char Scell_log; /* cell size for idle damping */ + + /* This field can be used for flags that a RED-like qdisc has + * historically supported. E.g. when configuring RED, it can be used for + * ECN, HARDDROP and ADAPTATIVE. For SFQ it can be used for ECN, + * HARDDROP. Etc. Because this field has not been validated, and is + * copied back on dump, any bits besides those to which a given qdisc + * has assigned a historical meaning need to be considered for free use + * by userspace tools. + * + * Any further flags need to be passed differently, e.g. through an + * attribute (such as TCA_RED_FLAGS above). Such attribute should allow + * passing both recent and historic flags in one value. + */ unsigned char flags; #define TC_RED_ECN 1 #define TC_RED_HARDDROP 2 #define TC_RED_ADAPTATIVE 4 }; +#define TC_RED_HISTORIC_FLAGS (TC_RED_ECN | TC_RED_HARDDROP | TC_RED_ADAPTATIVE) + struct tc_red_xstats { __u32 early; /* Early drops */ __u32 pdrop; /* Drops due to queue limits */ diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 1695421333e3..d4ce111704dc 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -35,7 +35,11 @@ struct red_sched_data { u32 limit; /* HARD maximal queue length */ + unsigned char flags; + /* Non-flags in tc_red_qopt.flags. */ + unsigned char userbits; + struct timer_list adapt_timer; struct Qdisc *sch; struct red_parms parms; @@ -44,6 +48,8 @@ struct red_sched_data { struct Qdisc *qdisc; }; +static const u32 red_supported_flags = TC_RED_HISTORIC_FLAGS; + static inline int red_use_ecn(struct red_sched_data *q) { return q->flags & TC_RED_ECN; @@ -183,9 +189,12 @@ static void red_destroy(struct Qdisc *sch) } static const struct nla_policy red_policy[TCA_RED_MAX + 1] = { + [TCA_RED_UNSPEC] = { .strict_start_type = TCA_RED_FLAGS }, [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) }, [TCA_RED_STAB] = { .len = RED_STAB_SIZE }, [TCA_RED_MAX_P] = { .type = NLA_U32 }, + [TCA_RED_FLAGS] = { .type = NLA_BITFIELD32, + .validation_data = &red_supported_flags }, }; static int red_change(struct Qdisc *sch, struct nlattr *opt, @@ -194,7 +203,10 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt, struct Qdisc *old_child = NULL, *child = NULL; struct red_sched_data *q = qdisc_priv(sch); struct nlattr *tb[TCA_RED_MAX + 1]; + struct nla_bitfield32 flags_bf; struct tc_red_qopt *ctl; + unsigned char userbits; + unsigned char flags; int err; u32 max_P; @@ -216,6 +228,12 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt, if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog)) return -EINVAL; + err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS, + tb[TCA_RED_FLAGS], red_supported_flags, + &flags_bf, &userbits, extack); + if (err) + return err; + if (ctl->limit > 0) { child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit, extack); @@ -227,7 +245,14 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt, } sch_tree_lock(sch); - q->flags = ctl->flags; + + flags = (q->flags & ~flags_bf.selector) | flags_bf.value; + err = red_validate_flags(flags, extack); + if (err) + goto unlock_out; + + q->flags = flags; + q->userbits = userbits; q->limit = ctl->limit; if (child) { qdisc_tree_flush_backlog(q->qdisc); @@ -256,6 +281,12 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt, if (old_child) qdisc_put(old_child); return 0; + +unlock_out: + sch_tree_unlock(sch); + if (child) + qdisc_put(child); + return err; } static inline void red_adaptative_timer(struct timer_list *t) @@ -299,10 +330,15 @@ static int red_dump_offload_stats(struct Qdisc *sch) static int red_dump(struct Qdisc *sch, struct sk_buff *skb) { struct red_sched_data *q = qdisc_priv(sch); + struct nla_bitfield32 flags_bf = { + .selector = red_supported_flags, + .value = q->flags, + }; struct nlattr *opts = NULL; struct tc_red_qopt opt = { .limit = q->limit, - .flags = q->flags, + .flags = (q->flags & TC_RED_HISTORIC_FLAGS) | + q->userbits, .qth_min = q->parms.qth_min >> q->parms.Wlog, .qth_max = q->parms.qth_max >> q->parms.Wlog, .Wlog = q->parms.Wlog, @@ -319,7 +355,8 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb) if (opts == NULL) goto nla_put_failure; if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) || - nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P)) + nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) || + nla_put(skb, TCA_RED_FLAGS, sizeof(flags_bf), &flags_bf)) goto nla_put_failure; return nla_nest_end(skb, opts); -- cgit v1.2.3 From 0a7fad2376ba6b37c6b1a1072ed2a2381d82cd18 Mon Sep 17 00:00:00 2001 From: Petr Machata Date: Fri, 13 Mar 2020 01:10:57 +0200 Subject: net: sched: RED: Introduce an ECN nodrop mode When the RED Qdisc is currently configured to enable ECN, the RED algorithm is used to decide whether a certain SKB should be marked. If that SKB is not ECN-capable, it is early-dropped. It is also possible to keep all traffic in the queue, and just mark the ECN-capable subset of it, as appropriate under the RED algorithm. Some switches support this mode, and some installations make use of it. To that end, add a new RED flag, TC_RED_NODROP. When the Qdisc is configured with this flag, non-ECT traffic is enqueued instead of being early-dropped. Signed-off-by: Petr Machata Reviewed-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 1 + include/net/red.h | 5 +++++ include/uapi/linux/pkt_sched.h | 1 + net/sched/sch_red.c | 31 +++++++++++++++++++++++++------ 4 files changed, 32 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index dbc89452f90b..1db8b27d4515 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -740,6 +740,7 @@ struct tc_red_qopt_offload_params { u32 limit; bool is_ecn; bool is_harddrop; + bool is_nodrop; struct gnet_stats_queue *qstats; }; diff --git a/include/net/red.h b/include/net/red.h index 6a2aaa6c7c41..fc455445f4b2 100644 --- a/include/net/red.h +++ b/include/net/red.h @@ -209,6 +209,11 @@ static inline int red_get_flags(unsigned char qopt_flags, static inline int red_validate_flags(unsigned char flags, struct netlink_ext_ack *extack) { + if ((flags & TC_RED_NODROP) && !(flags & TC_RED_ECN)) { + NL_SET_ERR_MSG_MOD(extack, "nodrop mode is only meaningful with ECN"); + return -EINVAL; + } + return 0; } diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 6325507935ea..ea39287d59c8 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -286,6 +286,7 @@ struct tc_red_qopt { #define TC_RED_ECN 1 #define TC_RED_HARDDROP 2 #define TC_RED_ADAPTATIVE 4 +#define TC_RED_NODROP 8 }; #define TC_RED_HISTORIC_FLAGS (TC_RED_ECN | TC_RED_HARDDROP | TC_RED_ADAPTATIVE) diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index d4ce111704dc..3ef0a4f7399b 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -48,7 +48,7 @@ struct red_sched_data { struct Qdisc *qdisc; }; -static const u32 red_supported_flags = TC_RED_HISTORIC_FLAGS; +static const u32 red_supported_flags = TC_RED_HISTORIC_FLAGS | TC_RED_NODROP; static inline int red_use_ecn(struct red_sched_data *q) { @@ -60,6 +60,11 @@ static inline int red_use_harddrop(struct red_sched_data *q) return q->flags & TC_RED_HARDDROP; } +static int red_use_nodrop(struct red_sched_data *q) +{ + return q->flags & TC_RED_NODROP; +} + static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { @@ -80,23 +85,36 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, case RED_PROB_MARK: qdisc_qstats_overlimit(sch); - if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) { + if (!red_use_ecn(q)) { q->stats.prob_drop++; goto congestion_drop; } - q->stats.prob_mark++; + if (INET_ECN_set_ce(skb)) { + q->stats.prob_mark++; + } else if (!red_use_nodrop(q)) { + q->stats.prob_drop++; + goto congestion_drop; + } + + /* Non-ECT packet in ECN nodrop mode: queue it. */ break; case RED_HARD_MARK: qdisc_qstats_overlimit(sch); - if (red_use_harddrop(q) || !red_use_ecn(q) || - !INET_ECN_set_ce(skb)) { + if (red_use_harddrop(q) || !red_use_ecn(q)) { + q->stats.forced_drop++; + goto congestion_drop; + } + + if (INET_ECN_set_ce(skb)) { + q->stats.forced_mark++; + } else if (!red_use_nodrop(q)) { q->stats.forced_drop++; goto congestion_drop; } - q->stats.forced_mark++; + /* Non-ECT packet in ECN nodrop mode: queue it. */ break; } @@ -171,6 +189,7 @@ static int red_offload(struct Qdisc *sch, bool enable) opt.set.limit = q->limit; opt.set.is_ecn = red_use_ecn(q); opt.set.is_harddrop = red_use_harddrop(q); + opt.set.is_nodrop = red_use_nodrop(q); opt.set.qstats = &sch->qstats; } else { opt.command = TC_RED_DESTROY; -- cgit v1.2.3 From 68983a354a655c35d3fb204489d383a2a051fda7 Mon Sep 17 00:00:00 2001 From: Manoj Basapathi Date: Thu, 6 Feb 2020 16:37:29 +0530 Subject: netfilter: xtables: Add snapshot of hardidletimer target This is a snapshot of hardidletimer netfilter target. This patch implements a hardidletimer Xtables target that can be used to identify when interfaces have been idle for a certain period of time. Timers are identified by labels and are created when a rule is set with a new label. The rules also take a timeout value (in seconds) as an option. If more than one rule uses the same timer label, the timer will be restarted whenever any of the rules get a hit. One entry for each timer is created in sysfs. This attribute contains the timer remaining for the timer to expire. The attributes are located under the xt_idletimer class: /sys/class/xt_idletimer/timers/