From e39e5b5e7206767a0f1be0e5cb7acbd0db87ae60 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Sun, 30 Sep 2012 19:29:39 +0300 Subject: cfg80211: Allow user space to specify non-IEs to SAE Authentication SAE extends Authentication frames with fields that are not information elements. NL80211_ATTR_IE is not suitable for these, so introduce a new attribute that can be used to specify the fields needed for SAE in station mode. Signed-off-by: Jouni Malinen [change to verify that SAE is only used with authenticate command] Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 7df9b500c804..179a0c2e2f61 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1273,6 +1273,9 @@ enum nl80211_commands { * the connection request from a station. nl80211_connect_failed_reason * enum has different reasons of connection failure. * + * @NL80211_ATTR_SAE_DATA: SAE elements in Authentication frames. This starts + * with the Authentication transaction sequence number field. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1530,6 +1533,8 @@ enum nl80211_attrs { NL80211_ATTR_CONN_FAILED_REASON, + NL80211_ATTR_SAE_DATA, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -2489,6 +2494,7 @@ enum nl80211_bss_status { * @NL80211_AUTHTYPE_SHARED_KEY: Shared Key authentication (WEP only) * @NL80211_AUTHTYPE_FT: Fast BSS Transition (IEEE 802.11r) * @NL80211_AUTHTYPE_NETWORK_EAP: Network EAP (some Cisco APs and mainly LEAP) + * @NL80211_AUTHTYPE_SAE: Simultaneous authentication of equals * @__NL80211_AUTHTYPE_NUM: internal * @NL80211_AUTHTYPE_MAX: maximum valid auth algorithm * @NL80211_AUTHTYPE_AUTOMATIC: determine automatically (if necessary by @@ -2500,6 +2506,7 @@ enum nl80211_auth_type { NL80211_AUTHTYPE_SHARED_KEY, NL80211_AUTHTYPE_FT, NL80211_AUTHTYPE_NETWORK_EAP, + NL80211_AUTHTYPE_SAE, /* keep last */ __NL80211_AUTHTYPE_NUM, @@ -3028,6 +3035,9 @@ enum nl80211_ap_sme_features { * in the interface combinations, even when it's only used for scan * and remain-on-channel. This could be due to, for example, the * remain-on-channel implementation requiring a channel context. + * @NL80211_FEATURE_SAE: This driver supports simultaneous authentication of + * equals (SAE) with user space SME (NL80211_CMD_AUTHENTICATE) in station + * mode */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, @@ -3035,6 +3045,7 @@ enum nl80211_feature_flags { NL80211_FEATURE_INACTIVITY_TIMER = 1 << 2, NL80211_FEATURE_CELL_BASE_REG_HINTS = 1 << 3, NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL = 1 << 4, + NL80211_FEATURE_SAE = 1 << 5, }; /** -- cgit v1.2.3 From f461be3eff662f01a177ecea8c1d7b040bb6bfbe Mon Sep 17 00:00:00 2001 From: Mahesh Palivela Date: Thu, 11 Oct 2012 08:04:52 +0000 Subject: {nl,cfg}80211: Peer STA VHT caps To save STAs VHT caps in AP mode Signed-off-by: Mahesh Palivela Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 2 ++ include/uapi/linux/nl80211.h | 6 ++++++ net/mac80211/cfg.c | 5 +++++ net/wireless/nl80211.c | 5 +++++ 4 files changed, 18 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 60cebfac3e3c..607b5c02f740 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -498,6 +498,7 @@ enum station_parameters_apply_mask { * @plink_action: plink action to take * @plink_state: set the peer link state for a station * @ht_capa: HT capabilities of station + * @vht_capa: VHT capabilities of station * @uapsd_queues: bitmap of queues configured for uapsd. same format * as the AC bitmap in the QoS info field * @max_sp: max Service Period. same format as the MAX_SP in the @@ -517,6 +518,7 @@ struct station_parameters { u8 plink_action; u8 plink_state; struct ieee80211_ht_cap *ht_capa; + struct ieee80211_vht_cap *vht_capa; u8 uapsd_queues; u8 max_sp; }; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 179a0c2e2f61..71ab23b0356d 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1276,6 +1276,9 @@ enum nl80211_commands { * @NL80211_ATTR_SAE_DATA: SAE elements in Authentication frames. This starts * with the Authentication transaction sequence number field. * + * @NL80211_ATTR_VHT_CAPABILITY: VHT Capability information element (from + * association request when used with NL80211_CMD_NEW_STATION) + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1535,6 +1538,8 @@ enum nl80211_attrs { NL80211_ATTR_SAE_DATA, + NL80211_ATTR_VHT_CAPABILITY, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -1578,6 +1583,7 @@ enum nl80211_attrs { #define NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY 16 #define NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY 24 #define NL80211_HT_CAPABILITY_LEN 26 +#define NL80211_VHT_CAPABILITY_LEN 12 #define NL80211_MAX_NR_CIPHER_SUITES 5 #define NL80211_MAX_NR_AKM_SUITES 2 diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 03216b0408c7..ed27988f9d35 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1157,6 +1157,11 @@ static int sta_apply_parameters(struct ieee80211_local *local, params->ht_capa, &sta->sta.ht_cap); + if (params->vht_capa) + ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, + params->vht_capa, + &sta->sta.vht_cap); + if (ieee80211_vif_is_mesh(&sdata->vif)) { #ifdef CONFIG_MAC80211_MESH if (sdata->u.mesh.security & IEEE80211_MESH_SEC_SECURED) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 74d8123ada77..ef170e982f91 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -355,6 +355,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_WDEV] = { .type = NLA_U64 }, [NL80211_ATTR_USER_REG_HINT_TYPE] = { .type = NLA_U32 }, [NL80211_ATTR_SAE_DATA] = { .type = NLA_BINARY, }, + [NL80211_ATTR_VHT_CAPABILITY] = { .len = NL80211_VHT_CAPABILITY_LEN }, }; /* policy for the key attributes */ @@ -3223,6 +3224,10 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]); + if (info->attrs[NL80211_ATTR_VHT_CAPABILITY]) + params.vht_capa = + nla_data(info->attrs[NL80211_ATTR_VHT_CAPABILITY]); + if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) params.plink_action = nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]); -- cgit v1.2.3 From ed47377154310fd2fd59d75fcdeb3d022344fb31 Mon Sep 17 00:00:00 2001 From: Sam Leffler Date: Thu, 11 Oct 2012 21:03:31 -0700 Subject: {nl,cfg}80211: add a flags word to scan requests Add a flags word to direct and scheduled scan requests; it will be used for control of optional behaviours such as flushing the bss cache prior to doing a scan. Signed-off-by: Sam Leffler Tested-by: Amitkumar Karwar Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ++++ include/uapi/linux/nl80211.h | 15 +++++++++++++++ net/wireless/nl80211.c | 12 ++++++++++++ 3 files changed, 31 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 607b5c02f740..d95da8f55f6e 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1002,6 +1002,7 @@ struct cfg80211_ssid { * @n_channels: total number of channels to scan * @ie: optional information element(s) to add into Probe Request or %NULL * @ie_len: length of ie in octets + * @flags: bit field of flags controlling operation * @rates: bitmap of rates to advertise for each band * @wiphy: the wiphy this was for * @wdev: the wireless device to scan for @@ -1014,6 +1015,7 @@ struct cfg80211_scan_request { u32 n_channels; const u8 *ie; size_t ie_len; + u32 flags; u32 rates[IEEE80211_NUM_BANDS]; @@ -1046,6 +1048,7 @@ struct cfg80211_match_set { * @interval: interval between each scheduled scan cycle * @ie: optional information element(s) to add into Probe Request or %NULL * @ie_len: length of ie in octets + * @flags: bit field of flags controlling operation * @match_sets: sets of parameters to be matched for a scan result * entry to be considered valid and to be passed to the host * (others are filtered out). @@ -1063,6 +1066,7 @@ struct cfg80211_sched_scan_request { u32 interval; const u8 *ie; size_t ie_len; + u32 flags; struct cfg80211_match_set *match_sets; int n_match_sets; s32 rssi_thold; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 71ab23b0356d..4d0b49ee4c2c 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1279,6 +1279,8 @@ enum nl80211_commands { * @NL80211_ATTR_VHT_CAPABILITY: VHT Capability information element (from * association request when used with NL80211_CMD_NEW_STATION) * + * @NL80211_ATTR_SCAN_FLAGS: scan request control flags (u32) + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1540,6 +1542,8 @@ enum nl80211_attrs { NL80211_ATTR_VHT_CAPABILITY, + NL80211_ATTR_SCAN_FLAGS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -3086,4 +3090,15 @@ enum nl80211_connect_failed_reason { NL80211_CONN_FAIL_BLOCKED_CLIENT, }; +/** + * enum nl80211_scan_flags - scan request control flags + * + * Scan request control flags are used to control the handling + * of NL80211_CMD_TRIGGER_SCAN and NL80211_CMD_START_SCHED_SCAN + * requests. + * (will be filled) +enum nl80211_scan_flags { +}; + */ + #endif /* __LINUX_NL80211_H */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index ef170e982f91..dc08211c6c6b 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -356,6 +356,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_USER_REG_HINT_TYPE] = { .type = NLA_U32 }, [NL80211_ATTR_SAE_DATA] = { .type = NLA_BINARY, }, [NL80211_ATTR_VHT_CAPABILITY] = { .len = NL80211_VHT_CAPABILITY_LEN }, + [NL80211_ATTR_SCAN_FLAGS] = { .type = NLA_U32 }, }; /* policy for the key attributes */ @@ -4367,6 +4368,10 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) } } + if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) + request->flags = nla_get_u32( + info->attrs[NL80211_ATTR_SCAN_FLAGS]); + request->no_cck = nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); @@ -4598,6 +4603,10 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, request->ie_len); } + if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) + request->flags = nla_get_u32( + info->attrs[NL80211_ATTR_SCAN_FLAGS]); + request->dev = dev; request->wiphy = &rdev->wiphy; request->interval = interval; @@ -7663,6 +7672,9 @@ static int nl80211_add_scan_req(struct sk_buff *msg, nla_put(msg, NL80211_ATTR_IE, req->ie_len, req->ie)) goto nla_put_failure; + if (req->flags) + nla_put_u32(msg, NL80211_ATTR_SCAN_FLAGS, req->flags); + return 0; nla_put_failure: return -ENOBUFS; -- cgit v1.2.3 From 46856bbf0f0412c12e9674df68822cb531d49327 Mon Sep 17 00:00:00 2001 From: Sam Leffler Date: Thu, 11 Oct 2012 21:03:32 -0700 Subject: cfg80211: add scan flag to indicate its priority Add NL80211_SCAN_FLAG_LOW_PRIORITY flag support. It tells drivers that this is a low priority scan request, so that they can take necessary action. Drivers need to advertise low priority scan capability during registration. Signed-off-by: Sam Leffler Tested-by: Amitkumar Karwar Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 8 ++++++-- net/wireless/nl80211.c | 16 ++++++++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 4d0b49ee4c2c..c68e15e41321 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3048,6 +3048,7 @@ enum nl80211_ap_sme_features { * @NL80211_FEATURE_SAE: This driver supports simultaneous authentication of * equals (SAE) with user space SME (NL80211_CMD_AUTHENTICATE) in station * mode + * @NL80211_FEATURE_LOW_PRIORITY_SCAN: This driver supports low priority scan */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, @@ -3056,6 +3057,7 @@ enum nl80211_feature_flags { NL80211_FEATURE_CELL_BASE_REG_HINTS = 1 << 3, NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL = 1 << 4, NL80211_FEATURE_SAE = 1 << 5, + NL80211_FEATURE_LOW_PRIORITY_SCAN = 1 << 6, }; /** @@ -3096,9 +3098,11 @@ enum nl80211_connect_failed_reason { * Scan request control flags are used to control the handling * of NL80211_CMD_TRIGGER_SCAN and NL80211_CMD_START_SCHED_SCAN * requests. - * (will be filled) + * + * @NL80211_SCAN_FLAG_LOW_PRIORITY: scan request has low priority + */ enum nl80211_scan_flags { + NL80211_SCAN_FLAG_LOW_PRIORITY = 1<<0, }; - */ #endif /* __LINUX_NL80211_H */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index dc08211c6c6b..aee252d65b8f 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4368,9 +4368,15 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) } } - if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) + if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) { request->flags = nla_get_u32( info->attrs[NL80211_ATTR_SCAN_FLAGS]); + if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && + !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) { + err = -EOPNOTSUPP; + goto out_free; + } + } request->no_cck = nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); @@ -4603,9 +4609,15 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, request->ie_len); } - if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) + if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) { request->flags = nla_get_u32( info->attrs[NL80211_ATTR_SCAN_FLAGS]); + if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && + !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) { + err = -EOPNOTSUPP; + goto out_free; + } + } request->dev = dev; request->wiphy = &rdev->wiphy; -- cgit v1.2.3 From 15d6030b4bec618742b8b9ccae9209c8f9e4a916 Mon Sep 17 00:00:00 2001 From: Sam Leffler Date: Thu, 11 Oct 2012 21:03:34 -0700 Subject: cfg80211: add support for flushing old scan results Add an NL80211_SCAN_FLAG_FLUSH flag that causes old bss cache entries to be flushed on scan completion. This is useful for collecting guaranteed fresh scan/survey result (e.g. on resume). For normal scan, flushing only happens on successful completion of a scan; i.e. it does not happen if the scan is aborted. For scheduled scan, previous scan results are flushed everytime when we get new scan results. This feature is enabled by default. Drivers can disable it by unsetting the NL80211_FEATURE_SCAN_FLUSH flag. Signed-off-by: Sam Leffler Tested-by: Amitkumar Karwar Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao [invert polarity of feature flag to account for old kernels] Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 3 ++ include/uapi/linux/nl80211.h | 4 +++ net/wireless/core.c | 2 ++ net/wireless/nl80211.c | 14 +++++++--- net/wireless/scan.c | 66 ++++++++++++++++++++++++++++++-------------- net/wireless/sme.c | 1 + 6 files changed, 66 insertions(+), 24 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index d95da8f55f6e..aa0e4a12308c 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1005,6 +1005,7 @@ struct cfg80211_ssid { * @flags: bit field of flags controlling operation * @rates: bitmap of rates to advertise for each band * @wiphy: the wiphy this was for + * @scan_start: time (in jiffies) when the scan started * @wdev: the wireless device to scan for * @aborted: (internal) scan request was notified as aborted * @no_cck: used to send probe requests at non CCK rate in 2GHz band @@ -1023,6 +1024,7 @@ struct cfg80211_scan_request { /* internal */ struct wiphy *wiphy; + unsigned long scan_start; bool aborted; bool no_cck; @@ -1074,6 +1076,7 @@ struct cfg80211_sched_scan_request { /* internal */ struct wiphy *wiphy; struct net_device *dev; + unsigned long scan_start; /* keep last */ struct ieee80211_channel *channels[0]; diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index c68e15e41321..0e6277a06c29 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3049,6 +3049,7 @@ enum nl80211_ap_sme_features { * equals (SAE) with user space SME (NL80211_CMD_AUTHENTICATE) in station * mode * @NL80211_FEATURE_LOW_PRIORITY_SCAN: This driver supports low priority scan + * @NL80211_FEATURE_SCAN_FLUSH: Scan flush is supported */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, @@ -3058,6 +3059,7 @@ enum nl80211_feature_flags { NL80211_FEATURE_P2P_DEVICE_NEEDS_CHANNEL = 1 << 4, NL80211_FEATURE_SAE = 1 << 5, NL80211_FEATURE_LOW_PRIORITY_SCAN = 1 << 6, + NL80211_FEATURE_SCAN_FLUSH = 1 << 7, }; /** @@ -3100,9 +3102,11 @@ enum nl80211_connect_failed_reason { * requests. * * @NL80211_SCAN_FLAG_LOW_PRIORITY: scan request has low priority + * @NL80211_SCAN_FLAG_FLUSH: flush cache before scanning */ enum nl80211_scan_flags { NL80211_SCAN_FLAG_LOW_PRIORITY = 1<<0, + NL80211_SCAN_FLAG_FLUSH = 1<<1, }; #endif /* __LINUX_NL80211_H */ diff --git a/net/wireless/core.c b/net/wireless/core.c index 443d4d7deea2..48c2ea4712e9 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -370,6 +370,8 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv) rdev->wiphy.rts_threshold = (u32) -1; rdev->wiphy.coverage_class = 0; + rdev->wiphy.features = NL80211_FEATURE_SCAN_FLUSH; + return &rdev->wiphy; } EXPORT_SYMBOL(wiphy_new); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index aee252d65b8f..9e5a7206b0b4 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -4371,8 +4371,10 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) { request->flags = nla_get_u32( info->attrs[NL80211_ATTR_SCAN_FLAGS]); - if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && - !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) { + if (((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && + !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) || + ((request->flags & NL80211_SCAN_FLAG_FLUSH) && + !(wiphy->features & NL80211_FEATURE_SCAN_FLUSH))) { err = -EOPNOTSUPP; goto out_free; } @@ -4383,6 +4385,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) request->wdev = wdev; request->wiphy = &rdev->wiphy; + request->scan_start = jiffies; rdev->scan_req = request; err = rdev->ops->scan(&rdev->wiphy, request); @@ -4612,8 +4615,10 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) { request->flags = nla_get_u32( info->attrs[NL80211_ATTR_SCAN_FLAGS]); - if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && - !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) { + if (((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && + !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) || + ((request->flags & NL80211_SCAN_FLAG_FLUSH) && + !(wiphy->features & NL80211_FEATURE_SCAN_FLUSH))) { err = -EOPNOTSUPP; goto out_free; } @@ -4622,6 +4627,7 @@ static int nl80211_start_sched_scan(struct sk_buff *skb, request->dev = dev; request->wiphy = &rdev->wiphy; request->interval = interval; + request->scan_start = jiffies; err = rdev->ops->sched_scan_start(&rdev->wiphy, dev, request); if (!err) { diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 20050965abca..a8d5a9a07e49 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -47,6 +47,27 @@ static void __cfg80211_unlink_bss(struct cfg80211_registered_device *dev, kref_put(&bss->ref, bss_release); } +/* must hold dev->bss_lock! */ +static void __cfg80211_bss_expire(struct cfg80211_registered_device *dev, + unsigned long expire_time) +{ + struct cfg80211_internal_bss *bss, *tmp; + bool expired = false; + + list_for_each_entry_safe(bss, tmp, &dev->bss_list, list) { + if (atomic_read(&bss->hold)) + continue; + if (!time_after(expire_time, bss->ts)) + continue; + + __cfg80211_unlink_bss(dev, bss); + expired = true; + } + + if (expired) + dev->bss_generation++; +} + void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak) { struct cfg80211_scan_request *request; @@ -72,10 +93,17 @@ void ___cfg80211_scan_done(struct cfg80211_registered_device *rdev, bool leak) if (wdev->netdev) cfg80211_sme_scan_done(wdev->netdev); - if (request->aborted) + if (request->aborted) { nl80211_send_scan_aborted(rdev, wdev); - else + } else { + if (request->flags & NL80211_SCAN_FLAG_FLUSH) { + /* flush entries from previous scans */ + spin_lock_bh(&rdev->bss_lock); + __cfg80211_bss_expire(rdev, request->scan_start); + spin_unlock_bh(&rdev->bss_lock); + } nl80211_send_scan_done(rdev, wdev); + } #ifdef CONFIG_CFG80211_WEXT if (wdev->netdev && !request->aborted) { @@ -126,16 +154,27 @@ EXPORT_SYMBOL(cfg80211_scan_done); void __cfg80211_sched_scan_results(struct work_struct *wk) { struct cfg80211_registered_device *rdev; + struct cfg80211_sched_scan_request *request; rdev = container_of(wk, struct cfg80211_registered_device, sched_scan_results_wk); + request = rdev->sched_scan_req; + mutex_lock(&rdev->sched_scan_mtx); /* we don't have sched_scan_req anymore if the scan is stopping */ - if (rdev->sched_scan_req) - nl80211_send_sched_scan_results(rdev, - rdev->sched_scan_req->dev); + if (request) { + if (request->flags & NL80211_SCAN_FLAG_FLUSH) { + /* flush entries from previous scans */ + spin_lock_bh(&rdev->bss_lock); + __cfg80211_bss_expire(rdev, request->scan_start); + spin_unlock_bh(&rdev->bss_lock); + request->scan_start = + jiffies + msecs_to_jiffies(request->interval); + } + nl80211_send_sched_scan_results(rdev, request->dev); + } mutex_unlock(&rdev->sched_scan_mtx); } @@ -197,23 +236,9 @@ void cfg80211_bss_age(struct cfg80211_registered_device *dev, } } -/* must hold dev->bss_lock! */ void cfg80211_bss_expire(struct cfg80211_registered_device *dev) { - struct cfg80211_internal_bss *bss, *tmp; - bool expired = false; - - list_for_each_entry_safe(bss, tmp, &dev->bss_list, list) { - if (atomic_read(&bss->hold)) - continue; - if (!time_after(jiffies, bss->ts + IEEE80211_SCAN_RESULT_EXPIRE)) - continue; - __cfg80211_unlink_bss(dev, bss); - expired = true; - } - - if (expired) - dev->bss_generation++; + __cfg80211_bss_expire(dev, jiffies - IEEE80211_SCAN_RESULT_EXPIRE); } const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len) @@ -962,6 +987,7 @@ int cfg80211_wext_siwscan(struct net_device *dev, creq->ssids = (void *)&creq->channels[n_channels]; creq->n_channels = n_channels; creq->n_ssids = 1; + creq->scan_start = jiffies; /* translate "Scan on frequencies" request */ i = 0; diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 055d59643616..07d717eb9e2a 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -138,6 +138,7 @@ static int cfg80211_conn_scan(struct wireless_dev *wdev) request->wdev = wdev; request->wiphy = &rdev->wiphy; + request->scan_start = jiffies; rdev->scan_req = request; -- cgit v1.2.3 From 5c95b940bd97e744267249e3b0780e6ef04b029c Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Tue, 16 Oct 2012 08:39:22 +0200 Subject: nl/cfg80211: force scan using an AP vif if requested If the user wants to scan using a vif configured as AP, cfg80211 must give him a chance to do it, even if this will disrupt the stations performance due to off-channel scanning. To do so, this patch adds a 'force' flag to the SCAN_TRIGGER command which tells cfg80211 to perform the scanning operation even if the vif is an AP and the beaconing has already started. Signed-off-by: Antonio Quartulli Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 8 ++++++++ net/mac80211/cfg.c | 11 ++++++++++- net/mac80211/main.c | 4 +++- 3 files changed, 21 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 0e6277a06c29..617d0fbfc96f 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3050,6 +3050,7 @@ enum nl80211_ap_sme_features { * mode * @NL80211_FEATURE_LOW_PRIORITY_SCAN: This driver supports low priority scan * @NL80211_FEATURE_SCAN_FLUSH: Scan flush is supported + * @NL80211_FEATURE_AP_SCAN: Support scanning using an AP vif */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, @@ -3060,6 +3061,7 @@ enum nl80211_feature_flags { NL80211_FEATURE_SAE = 1 << 5, NL80211_FEATURE_LOW_PRIORITY_SCAN = 1 << 6, NL80211_FEATURE_SCAN_FLUSH = 1 << 7, + NL80211_FEATURE_AP_SCAN = 1 << 8, }; /** @@ -3103,10 +3105,16 @@ enum nl80211_connect_failed_reason { * * @NL80211_SCAN_FLAG_LOW_PRIORITY: scan request has low priority * @NL80211_SCAN_FLAG_FLUSH: flush cache before scanning + * @NL80211_SCAN_FLAG_AP: force a scan even if the interface is configured + * as AP and the beaconing has already been configured. This attribute is + * dangerous because will destroy stations performance as a lot of frames + * will be lost while scanning off-channel, therefore it must be used only + * when really needed */ enum nl80211_scan_flags { NL80211_SCAN_FLAG_LOW_PRIORITY = 1<<0, NL80211_SCAN_FLAG_FLUSH = 1<<1, + NL80211_SCAN_FLAG_AP = 1<<2, }; #endif /* __LINUX_NL80211_H */ diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 5739bfbf2999..5eab1325a0f6 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1855,7 +1855,16 @@ static int ieee80211_scan(struct wiphy *wiphy, * beaconing hasn't been configured yet */ case NL80211_IFTYPE_AP: - if (sdata->u.ap.beacon) + /* + * If the scan has been forced (and the driver supports + * forcing), don't care about being beaconing already. + * This will create problems to the attached stations (e.g. all + * the frames sent while scanning on other channel will be + * lost) + */ + if (sdata->u.ap.beacon && + (!(wiphy->features & NL80211_FEATURE_AP_SCAN) || + !(req->flags & NL80211_SCAN_FLAG_AP))) return -EOPNOTSUPP; break; default: diff --git a/net/mac80211/main.c b/net/mac80211/main.c index ba5a23249771..c42094be2f0b 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -603,7 +603,9 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, NL80211_FEATURE_HT_IBSS; if (!ops->hw_scan) - wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN; + wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN | + NL80211_FEATURE_AP_SCAN; + if (!ops->set_key) wiphy->flags |= WIPHY_FLAG_IBSS_RSN; -- cgit v1.2.3 From e4e541a84863b6a41f2427f59cc9156c644491a8 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 23 Oct 2012 22:29:56 +0400 Subject: sock-diag: Report shutdown for inet and unix sockets (v2) Make it simple -- just put new nlattr with just sk->sk_shutdown bits. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/uapi/linux/inet_diag.h | 3 ++- include/uapi/linux/unix_diag.h | 1 + net/ipv4/inet_diag.c | 3 +++ net/unix/diag.c | 3 +++ 4 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index 8c469af939aa..bbde90fa5838 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -109,9 +109,10 @@ enum { INET_DIAG_TOS, INET_DIAG_TCLASS, INET_DIAG_SKMEMINFO, + INET_DIAG_SHUTDOWN, }; -#define INET_DIAG_MAX INET_DIAG_SKMEMINFO +#define INET_DIAG_MAX INET_DIAG_SHUTDOWN /* INET_DIAG_MEM */ diff --git a/include/uapi/linux/unix_diag.h b/include/uapi/linux/unix_diag.h index b1d2bf16b33c..b8a24941db21 100644 --- a/include/uapi/linux/unix_diag.h +++ b/include/uapi/linux/unix_diag.h @@ -37,6 +37,7 @@ enum { UNIX_DIAG_ICONS, UNIX_DIAG_RQLEN, UNIX_DIAG_MEMINFO, + UNIX_DIAG_SHUTDOWN, UNIX_DIAG_MAX, }; diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 535584c00f91..e5bad82d3584 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -105,6 +105,9 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->id.idiag_src[0] = inet->inet_rcv_saddr; r->id.idiag_dst[0] = inet->inet_daddr; + if (nla_put_u8(skb, INET_DIAG_SHUTDOWN, sk->sk_shutdown)) + goto errout; + /* IPv6 dual-stack sockets use inet->tos for IPv4 connections, * hence this needs to be included regardless of socket family. */ diff --git a/net/unix/diag.c b/net/unix/diag.c index 06748f108a57..5ac19dc1d5e4 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -151,6 +151,9 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r sock_diag_put_meminfo(sk, skb, UNIX_DIAG_MEMINFO)) goto out_nlmsg_trim; + if (nla_put_u8(skb, UNIX_DIAG_SHUTDOWN, sk->sk_shutdown)) + goto out_nlmsg_trim; + return nlmsg_end(skb, nlh); out_nlmsg_trim: -- cgit v1.2.3 From 2e74598d7f4c6d1b34da84037d9a7f8b1c8e04ae Mon Sep 17 00:00:00 2001 From: Kirill Smelkov Date: Mon, 22 Oct 2012 14:14:01 -0300 Subject: [media] v4l2: Fix typo in struct v4l2_captureparm description Judging from what drivers do and from my experience temeperframe fraction is set in seconds - look e.g. here static int bttv_g_parm(struct file *file, void *f, struct v4l2_streamparm *parm) { struct bttv_fh *fh = f; struct bttv *btv = fh->btv; v4l2_video_std_frame_period(bttv_tvnorms[btv->tvnorm].v4l2_id, &parm->parm.capture.timeperframe); ... void v4l2_video_std_frame_period(int id, struct v4l2_fract *frameperiod) { if (id & V4L2_STD_525_60) { frameperiod->numerator = 1001; frameperiod->denominator = 30000; } else { frameperiod->numerator = 1; frameperiod->denominator = 25; } and also v4l2-ctl in userspace decodes this as seconds: if (doioctl(fd, VIDIOC_G_PARM, &parm, "VIDIOC_G_PARM") == 0) { const struct v4l2_fract &tf = parm.parm.capture.timeperframe; ... printf("\tFrames per second: %.3f (%d/%d)\n", (1.0 * tf.denominator) / tf.numerator, tf.denominator, tf.numerator); The typo was there from day 1 - added in 2002 in e028b61b ([PATCH] add v4l2 api)(*) (*) found in history tree git://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git Signed-off-by: Kirill Smelkov Acked-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/videodev2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 57bfa59cda74..2fff7ff3e05b 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -737,7 +737,7 @@ struct v4l2_window { struct v4l2_captureparm { __u32 capability; /* Supported modes */ __u32 capturemode; /* Current mode */ - struct v4l2_fract timeperframe; /* Time per frame in .1us units */ + struct v4l2_fract timeperframe; /* Time per frame in seconds */ __u32 extendedmode; /* Driver-specific extensions */ __u32 readbuffers; /* # of buffers for read */ __u32 reserved[4]; -- cgit v1.2.3 From d900082bd9060dc955b181dae2f2adf86e27d747 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 25 Oct 2012 22:28:49 +0000 Subject: rtnl: add a new type of msg to advertise protocol configuration A new type is added to allow userland to monitor protocol configuration, like IPv4 or IPv6. For example, monitoring the state of the forwarding status of an interface of the system. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/netconf.h | 22 ++++++++++++++++++++++ include/uapi/linux/rtnetlink.h | 5 +++++ 2 files changed, 27 insertions(+) create mode 100644 include/uapi/linux/netconf.h (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netconf.h b/include/uapi/linux/netconf.h new file mode 100644 index 000000000000..d0513726711f --- /dev/null +++ b/include/uapi/linux/netconf.h @@ -0,0 +1,22 @@ +#ifndef _UAPI_LINUX_NETCONF_H_ +#define _UAPI_LINUX_NETCONF_H_ + +#include +#include + +struct netconfmsg { + __u8 ncm_family; +}; + +enum { + NETCONFA_UNSPEC, + NETCONFA_IFINDEX, + NETCONFA_FORWARDING, + __NETCONFA_MAX +}; +#define NETCONFA_MAX (__NETCONFA_MAX - 1) + +#define NETCONFA_IFINDEX_ALL -1 +#define NETCONFA_IFINDEX_DEFAULT -2 + +#endif /* _UAPI_LINUX_NETCONF_H_ */ diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index fcd768b09f6e..0043b413b8bc 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -120,6 +120,11 @@ enum { RTM_SETDCB, #define RTM_SETDCB RTM_SETDCB + RTM_NEWNETCONF = 80, +#define RTM_NEWNETCONF RTM_NEWNETCONF + RTM_GETNETCONF = 82, +#define RTM_GETNETCONF RTM_GETNETCONF + __RTM_MAX, #define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) }; -- cgit v1.2.3 From f3a1bfb11ccbc72d44f0b58c92115a40128979c3 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 25 Oct 2012 22:28:50 +0000 Subject: rtnl/ipv6: use netconf msg to advertise forwarding status Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 2 ++ net/ipv6/addrconf.c | 77 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 0043b413b8bc..a4d75ea868ed 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -592,6 +592,8 @@ enum rtnetlink_groups { #define RTNLGRP_PHONET_ROUTE RTNLGRP_PHONET_ROUTE RTNLGRP_DCB, #define RTNLGRP_DCB RTNLGRP_DCB + RTNLGRP_IPV6_NETCONF, +#define RTNLGRP_IPV6_NETCONF RTNLGRP_IPV6_NETCONF __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 0424e4e27414..0c57a8f67715 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -81,6 +81,7 @@ #include #include #include +#include #ifdef CONFIG_IPV6_PRIVACY #include @@ -460,6 +461,72 @@ static struct inet6_dev *ipv6_find_idev(struct net_device *dev) return idev; } +static int inet6_netconf_msgsize_devconf(int type) +{ + int size = NLMSG_ALIGN(sizeof(struct netconfmsg)) + + nla_total_size(4); /* NETCONFA_IFINDEX */ + + if (type == NETCONFA_FORWARDING) + size += nla_total_size(4); + + return size; +} + +static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, + struct ipv6_devconf *devconf, u32 portid, + u32 seq, int event, unsigned int flags, + int type) +{ + struct nlmsghdr *nlh; + struct netconfmsg *ncm; + + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg), + flags); + if (nlh == NULL) + return -EMSGSIZE; + + ncm = nlmsg_data(nlh); + ncm->ncm_family = AF_INET6; + + if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0) + goto nla_put_failure; + + if (type == NETCONFA_FORWARDING && + nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0) + goto nla_put_failure; + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex, + struct ipv6_devconf *devconf) +{ + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(inet6_netconf_msgsize_devconf(type), GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = inet6_netconf_fill_devconf(skb, ifindex, devconf, 0, 0, + RTM_NEWNETCONF, 0, type); + if (err < 0) { + /* -EMSGSIZE implies BUG in inet6_netconf_msgsize_devconf() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + rtnl_notify(skb, net, 0, RTNLGRP_IPV6_NETCONF, NULL, GFP_ATOMIC); + return; +errout: + if (err < 0) + rtnl_set_sk_err(net, RTNLGRP_IPV6_NETCONF, err); +} + #ifdef CONFIG_SYSCTL static void dev_forward_change(struct inet6_dev *idev) { @@ -486,6 +553,8 @@ static void dev_forward_change(struct inet6_dev *idev) else addrconf_leave_anycast(ifa); } + inet6_netconf_notify_devconf(dev_net(dev), NETCONFA_FORWARDING, + dev->ifindex, &idev->cnf); } @@ -518,6 +587,10 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) *p = newf; if (p == &net->ipv6.devconf_dflt->forwarding) { + if ((!newf) ^ (!old)) + inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING, + NETCONFA_IFINDEX_DEFAULT, + net->ipv6.devconf_dflt); rtnl_unlock(); return 0; } @@ -525,6 +598,10 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) if (p == &net->ipv6.devconf_all->forwarding) { net->ipv6.devconf_dflt->forwarding = newf; addrconf_forward_change(net, newf); + if ((!newf) ^ (!old)) + inet6_netconf_notify_devconf(net, NETCONFA_FORWARDING, + NETCONFA_IFINDEX_ALL, + net->ipv6.devconf_all); } else if ((!newf) ^ (!old)) dev_forward_change((struct inet6_dev *)table->extra1); rtnl_unlock(); -- cgit v1.2.3 From edc9e748934cf406cab708ca5dda7bd3c0f0a1db Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 25 Oct 2012 22:28:52 +0000 Subject: rtnl/ipv4: use netconf msg to advertise forwarding status Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 2 + net/ipv4/devinet.c | 93 ++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 91 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index a4d75ea868ed..3dee071770d5 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -592,6 +592,8 @@ enum rtnetlink_groups { #define RTNLGRP_PHONET_ROUTE RTNLGRP_PHONET_ROUTE RTNLGRP_DCB, #define RTNLGRP_DCB RTNLGRP_DCB + RTNLGRP_IPV4_NETCONF, +#define RTNLGRP_IPV4_NETCONF RTNLGRP_IPV4_NETCONF RTNLGRP_IPV6_NETCONF, #define RTNLGRP_IPV6_NETCONF RTNLGRP_IPV6_NETCONF __RTNLGRP_MAX diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 2a6abc163ed2..020fdd2e6e19 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -55,6 +55,7 @@ #include #endif #include +#include #include #include @@ -1442,6 +1443,73 @@ static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla) return 0; } +static int inet_netconf_msgsize_devconf(int type) +{ + int size = NLMSG_ALIGN(sizeof(struct netconfmsg)) + + nla_total_size(4); /* NETCONFA_IFINDEX */ + + if (type == NETCONFA_FORWARDING) + size += nla_total_size(4); + + return size; +} + +static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, + struct ipv4_devconf *devconf, u32 portid, + u32 seq, int event, unsigned int flags, + int type) +{ + struct nlmsghdr *nlh; + struct netconfmsg *ncm; + + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg), + flags); + if (nlh == NULL) + return -EMSGSIZE; + + ncm = nlmsg_data(nlh); + ncm->ncm_family = AF_INET; + + if (nla_put_s32(skb, NETCONFA_IFINDEX, ifindex) < 0) + goto nla_put_failure; + + if (type == NETCONFA_FORWARDING && + nla_put_s32(skb, NETCONFA_FORWARDING, + IPV4_DEVCONF(*devconf, FORWARDING)) < 0) + goto nla_put_failure; + + return nlmsg_end(skb, nlh); + +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static void inet_netconf_notify_devconf(struct net *net, int type, int ifindex, + struct ipv4_devconf *devconf) +{ + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(inet_netconf_msgsize_devconf(type), GFP_ATOMIC); + if (skb == NULL) + goto errout; + + err = inet_netconf_fill_devconf(skb, ifindex, devconf, 0, 0, + RTM_NEWNETCONF, 0, type); + if (err < 0) { + /* -EMSGSIZE implies BUG in inet_netconf_msgsize_devconf() */ + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + goto errout; + } + rtnl_notify(skb, net, 0, RTNLGRP_IPV4_NETCONF, NULL, GFP_ATOMIC); + return; +errout: + if (err < 0) + rtnl_set_sk_err(net, RTNLGRP_IPV4_NETCONF, err); +} + #ifdef CONFIG_SYSCTL static void devinet_copy_dflt_conf(struct net *net, int i) @@ -1467,6 +1535,12 @@ static void inet_forward_change(struct net *net) IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on; IPV4_DEVCONF_DFLT(net, FORWARDING) = on; + inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + NETCONFA_IFINDEX_ALL, + net->ipv4.devconf_all); + inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + NETCONFA_IFINDEX_DEFAULT, + net->ipv4.devconf_dflt); for_each_netdev(net, dev) { struct in_device *in_dev; @@ -1474,8 +1548,11 @@ static void inet_forward_change(struct net *net) dev_disable_lro(dev); rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); - if (in_dev) + if (in_dev) { IN_DEV_CONF_SET(in_dev, FORWARDING, on); + inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + dev->ifindex, &in_dev->cnf); + } rcu_read_unlock(); } } @@ -1527,15 +1604,23 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, } if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) { inet_forward_change(net); - } else if (*valp) { + } else { struct ipv4_devconf *cnf = ctl->extra1; struct in_device *idev = container_of(cnf, struct in_device, cnf); - dev_disable_lro(idev->dev); + if (*valp) + dev_disable_lro(idev->dev); + inet_netconf_notify_devconf(net, + NETCONFA_FORWARDING, + idev->dev->ifindex, + cnf); } rtnl_unlock(); rt_cache_flush(net); - } + } else + inet_netconf_notify_devconf(net, NETCONFA_FORWARDING, + NETCONFA_IFINDEX_DEFAULT, + net->ipv4.devconf_dflt); } return ret; -- cgit v1.2.3 From 52feb444a90304eb13c03115bb9758101dbb9254 Mon Sep 17 00:00:00 2001 From: Thierry Escande Date: Wed, 17 Oct 2012 14:43:39 +0200 Subject: NFC: Extend netlink interface for LTO, RW, and MIUX parameters support NFC_CMD_LLC_GET_PARAMS: request LTO, RW, and MIUX parameters for a device NFC_CMD_LLC_SET_PARAMS: set one or more of LTO, RW, and MIUX parameters for a device. LTO must be set before the link is up otherwise -EINPROGRESS is returned. RW and MIUX can be set at anytime and will be passed in subsequent CONNECT and CC messages. If one of the passed parameters is wrong none is set and -EINVAL is returned. Signed-off-by: Thierry Escande Signed-off-by: Samuel Ortiz --- include/uapi/linux/nfc.h | 15 +++++ net/nfc/llcp/commands.c | 18 ++---- net/nfc/llcp/llcp.c | 14 ++--- net/nfc/llcp/llcp.h | 3 + net/nfc/netlink.c | 152 +++++++++++++++++++++++++++++++++++++++++++++++ net/nfc/nfc.h | 6 ++ 6 files changed, 189 insertions(+), 19 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h index d908d17da56d..0e63cee8d810 100644 --- a/include/uapi/linux/nfc.h +++ b/include/uapi/linux/nfc.h @@ -60,6 +60,13 @@ * target mode. * @NFC_EVENT_DEVICE_DEACTIVATED: event emitted when the adapter is deactivated * from target mode. + * @NFC_CMD_LLC_GET_PARAMS: request LTO, RW, and MIUX parameters for a device + * @NFC_CMD_LLC_SET_PARAMS: set one or more of LTO, RW, and MIUX parameters for + * a device. LTO must be set before the link is up otherwise -EINPROGRESS + * is returned. RW and MIUX can be set at anytime and will be passed in + * subsequent CONNECT and CC messages. + * If one of the passed parameters is wrong none is set and -EINVAL is + * returned. */ enum nfc_commands { NFC_CMD_UNSPEC, @@ -77,6 +84,8 @@ enum nfc_commands { NFC_EVENT_TARGET_LOST, NFC_EVENT_TM_ACTIVATED, NFC_EVENT_TM_DEACTIVATED, + NFC_CMD_LLC_GET_PARAMS, + NFC_CMD_LLC_SET_PARAMS, /* private: internal use only */ __NFC_CMD_AFTER_LAST }; @@ -102,6 +111,9 @@ enum nfc_commands { * @NFC_ATTR_RF_MODE: Initiator or target * @NFC_ATTR_IM_PROTOCOLS: Initiator mode protocols to poll for * @NFC_ATTR_TM_PROTOCOLS: Target mode protocols to listen for + * @NFC_ATTR_LLC_PARAM_LTO: Link TimeOut parameter + * @NFC_ATTR_LLC_PARAM_RW: Receive Window size parameter + * @NFC_ATTR_LLC_PARAM_MIUX: MIU eXtension parameter */ enum nfc_attrs { NFC_ATTR_UNSPEC, @@ -119,6 +131,9 @@ enum nfc_attrs { NFC_ATTR_DEVICE_POWERED, NFC_ATTR_IM_PROTOCOLS, NFC_ATTR_TM_PROTOCOLS, + NFC_ATTR_LLC_PARAM_LTO, + NFC_ATTR_LLC_PARAM_RW, + NFC_ATTR_LLC_PARAM_MIUX, /* private: internal use only */ __NFC_ATTR_AFTER_LAST }; diff --git a/net/nfc/llcp/commands.c b/net/nfc/llcp/commands.c index 79415353cc28..ed2d17312d61 100644 --- a/net/nfc/llcp/commands.c +++ b/net/nfc/llcp/commands.c @@ -316,8 +316,7 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock) struct sk_buff *skb; u8 *service_name_tlv = NULL, service_name_tlv_length; u8 *miux_tlv = NULL, miux_tlv_length; - u8 *rw_tlv = NULL, rw_tlv_length, rw; - __be16 miux; + u8 *rw_tlv = NULL, rw_tlv_length; int err; u16 size = 0; @@ -335,13 +334,11 @@ int nfc_llcp_send_connect(struct nfc_llcp_sock *sock) size += service_name_tlv_length; } - miux = cpu_to_be16(LLCP_MAX_MIUX); - miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0, + miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0, &miux_tlv_length); size += miux_tlv_length; - rw = LLCP_MAX_RW; - rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length); + rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &local->rw, 0, &rw_tlv_length); size += rw_tlv_length; pr_debug("SKB size %d SN length %zu\n", size, sock->service_name_len); @@ -378,8 +375,7 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock) struct nfc_llcp_local *local; struct sk_buff *skb; u8 *miux_tlv = NULL, miux_tlv_length; - u8 *rw_tlv = NULL, rw_tlv_length, rw; - __be16 miux; + u8 *rw_tlv = NULL, rw_tlv_length; int err; u16 size = 0; @@ -389,13 +385,11 @@ int nfc_llcp_send_cc(struct nfc_llcp_sock *sock) if (local == NULL) return -ENODEV; - miux = cpu_to_be16(LLCP_MAX_MIUX); - miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0, + miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0, &miux_tlv_length); size += miux_tlv_length; - rw = LLCP_MAX_RW; - rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &rw, 0, &rw_tlv_length); + rw_tlv = nfc_llcp_build_tlv(LLCP_TLV_RW, &local->rw, 0, &rw_tlv_length); size += rw_tlv_length; skb = llcp_allocate_pdu(sock, LLCP_PDU_CC, size); diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c index 2e23bd348ebd..f6804532047a 100644 --- a/net/nfc/llcp/llcp.c +++ b/net/nfc/llcp/llcp.c @@ -467,10 +467,9 @@ static u8 nfc_llcp_reserve_sdp_ssap(struct nfc_llcp_local *local) static int nfc_llcp_build_gb(struct nfc_llcp_local *local) { u8 *gb_cur, *version_tlv, version, version_length; - u8 *lto_tlv, lto, lto_length; + u8 *lto_tlv, lto_length; u8 *wks_tlv, wks_length; u8 *miux_tlv, miux_length; - __be16 miux; u8 gb_len = 0; int ret = 0; @@ -479,9 +478,7 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local) 1, &version_length); gb_len += version_length; - /* 1500 ms */ - lto = 150; - lto_tlv = nfc_llcp_build_tlv(LLCP_TLV_LTO, <o, 1, <o_length); + lto_tlv = nfc_llcp_build_tlv(LLCP_TLV_LTO, &local->lto, 1, <o_length); gb_len += lto_length; pr_debug("Local wks 0x%lx\n", local->local_wks); @@ -489,8 +486,7 @@ static int nfc_llcp_build_gb(struct nfc_llcp_local *local) &wks_length); gb_len += wks_length; - miux = cpu_to_be16(LLCP_MAX_MIUX); - miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&miux, 0, + miux_tlv = nfc_llcp_build_tlv(LLCP_TLV_MIUX, (u8 *)&local->miux, 0, &miux_length); gb_len += miux_length; @@ -1383,6 +1379,10 @@ int nfc_llcp_register_device(struct nfc_dev *ndev) rwlock_init(&local->connecting_sockets.lock); rwlock_init(&local->raw_sockets.lock); + local->lto = 150; /* 1500 ms */ + local->rw = LLCP_MAX_RW; + local->miux = cpu_to_be16(LLCP_MAX_MIUX); + nfc_llcp_build_gb(local); local->remote_miu = LLCP_DEFAULT_MIU; diff --git a/net/nfc/llcp/llcp.h b/net/nfc/llcp/llcp.h index 276da3a6a589..0d62366f8cc3 100644 --- a/net/nfc/llcp/llcp.h +++ b/net/nfc/llcp/llcp.h @@ -64,6 +64,9 @@ struct nfc_llcp_local { u32 target_idx; u8 rf_mode; u8 comm_mode; + u8 lto; + u8 rw; + __be16 miux; unsigned long local_wks; /* Well known services */ unsigned long local_sdp; /* Local services */ unsigned long local_sap; /* Local SAPs, not available for discovery */ diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 614cfd0470b7..3568ae16786d 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -29,6 +29,8 @@ #include "nfc.h" +#include "llcp/llcp.h" + static struct genl_multicast_group nfc_genl_event_mcgrp = { .name = NFC_GENL_MCAST_EVENT_NAME, }; @@ -716,6 +718,146 @@ static int nfc_genl_dep_link_down(struct sk_buff *skb, struct genl_info *info) return rc; } +static int nfc_genl_send_params(struct sk_buff *msg, + struct nfc_llcp_local *local, + u32 portid, u32 seq) +{ + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &nfc_genl_family, 0, + NFC_CMD_LLC_GET_PARAMS); + if (!hdr) + return -EMSGSIZE; + + if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, local->dev->idx) || + nla_put_u8(msg, NFC_ATTR_LLC_PARAM_LTO, local->lto) || + nla_put_u8(msg, NFC_ATTR_LLC_PARAM_RW, local->rw) || + nla_put_u16(msg, NFC_ATTR_LLC_PARAM_MIUX, be16_to_cpu(local->miux))) + goto nla_put_failure; + + return genlmsg_end(msg, hdr); + +nla_put_failure: + + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +static int nfc_genl_llc_get_params(struct sk_buff *skb, struct genl_info *info) +{ + struct nfc_dev *dev; + struct nfc_llcp_local *local; + int rc = 0; + struct sk_buff *msg = NULL; + u32 idx; + + if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) + return -EINVAL; + + idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); + + dev = nfc_get_device(idx); + if (!dev) + return -ENODEV; + + device_lock(&dev->dev); + + local = nfc_llcp_find_local(dev); + if (!local) { + rc = -ENODEV; + goto exit; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + rc = -ENOMEM; + goto exit; + } + + rc = nfc_genl_send_params(msg, local, info->snd_portid, info->snd_seq); + +exit: + device_unlock(&dev->dev); + + nfc_put_device(dev); + + if (rc < 0) { + if (msg) + nlmsg_free(msg); + + return rc; + } + + return genlmsg_reply(msg, info); +} + +static int nfc_genl_llc_set_params(struct sk_buff *skb, struct genl_info *info) +{ + struct nfc_dev *dev; + struct nfc_llcp_local *local; + u8 rw = 0; + u16 miux = 0; + u32 idx; + int rc = 0; + + if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || + (!info->attrs[NFC_ATTR_LLC_PARAM_LTO] && + !info->attrs[NFC_ATTR_LLC_PARAM_RW] && + !info->attrs[NFC_ATTR_LLC_PARAM_MIUX])) + return -EINVAL; + + if (info->attrs[NFC_ATTR_LLC_PARAM_RW]) { + rw = nla_get_u8(info->attrs[NFC_ATTR_LLC_PARAM_RW]); + + if (rw > LLCP_MAX_RW) + return -EINVAL; + } + + if (info->attrs[NFC_ATTR_LLC_PARAM_MIUX]) { + miux = nla_get_u16(info->attrs[NFC_ATTR_LLC_PARAM_MIUX]); + + if (miux > LLCP_MAX_MIUX) + return -EINVAL; + } + + idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); + + dev = nfc_get_device(idx); + if (!dev) + return -ENODEV; + + device_lock(&dev->dev); + + local = nfc_llcp_find_local(dev); + if (!local) { + nfc_put_device(dev); + rc = -ENODEV; + goto exit; + } + + if (info->attrs[NFC_ATTR_LLC_PARAM_LTO]) { + if (dev->dep_link_up) { + rc = -EINPROGRESS; + goto exit; + } + + local->lto = nla_get_u8(info->attrs[NFC_ATTR_LLC_PARAM_LTO]); + } + + if (info->attrs[NFC_ATTR_LLC_PARAM_RW]) + local->rw = rw; + + if (info->attrs[NFC_ATTR_LLC_PARAM_MIUX]) + local->miux = cpu_to_be16(miux); + +exit: + device_unlock(&dev->dev); + + nfc_put_device(dev); + + return rc; +} + static struct genl_ops nfc_genl_ops[] = { { .cmd = NFC_CMD_GET_DEVICE, @@ -760,6 +902,16 @@ static struct genl_ops nfc_genl_ops[] = { .done = nfc_genl_dump_targets_done, .policy = nfc_genl_policy, }, + { + .cmd = NFC_CMD_LLC_GET_PARAMS, + .doit = nfc_genl_llc_get_params, + .policy = nfc_genl_policy, + }, + { + .cmd = NFC_CMD_LLC_SET_PARAMS, + .doit = nfc_genl_llc_set_params, + .policy = nfc_genl_policy, + }, }; diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h index c5e42b79a418..87d914d2876a 100644 --- a/net/nfc/nfc.h +++ b/net/nfc/nfc.h @@ -56,6 +56,7 @@ void nfc_llcp_unregister_device(struct nfc_dev *dev); int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len); u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, size_t *general_bytes_len); int nfc_llcp_data_received(struct nfc_dev *dev, struct sk_buff *skb); +struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev); int __init nfc_llcp_init(void); void nfc_llcp_exit(void); @@ -97,6 +98,11 @@ static inline int nfc_llcp_data_received(struct nfc_dev *dev, return 0; } +static inline struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev) +{ + return NULL; +} + static inline int nfc_llcp_init(void) { return 0; -- cgit v1.2.3 From c8442118ad9cd05cfe3b993f058e70ab25b1009a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 24 Oct 2012 10:17:18 +0200 Subject: cfg80211: allow per interface TX power setting The TX power setting is currently per wiphy (hardware device) but with multi-channel capabilities that doesn't make much sense any more. Allow drivers (and mac80211) to advertise support for per-interface TX power configuration. When the TX power is configured for the wiphy, the wdev will be NULL and the driver can still handle that, but when a wdev is given the TX power can be set only for that wdev now. Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 9 ++++---- .../net/wireless/brcm80211/brcmfmac/wl_cfg80211.c | 6 ++++-- drivers/net/wireless/mwifiex/cfg80211.c | 1 + drivers/net/wireless/rndis_wlan.c | 10 +++++++-- include/net/cfg80211.h | 10 ++++++--- include/uapi/linux/nl80211.h | 2 ++ net/mac80211/cfg.c | 5 ++++- net/wireless/nl80211.c | 6 +++++- net/wireless/rdev-ops.h | 11 +++++----- net/wireless/trace.h | 24 ++++++++++++---------- net/wireless/wext-compat.c | 4 ++-- 11 files changed, 56 insertions(+), 32 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index 277089963eb4..d615f9f7506a 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -1384,11 +1384,8 @@ static int ath6kl_cfg80211_set_wiphy_params(struct wiphy *wiphy, u32 changed) return 0; } -/* - * The type nl80211_tx_power_setting replaces the following - * data type from 2.6.36 onwards -*/ static int ath6kl_cfg80211_set_txpower(struct wiphy *wiphy, + struct wireless_dev *wdev, enum nl80211_tx_power_setting type, int mbm) { @@ -1423,7 +1420,9 @@ static int ath6kl_cfg80211_set_txpower(struct wiphy *wiphy, return 0; } -static int ath6kl_cfg80211_get_txpower(struct wiphy *wiphy, int *dbm) +static int ath6kl_cfg80211_get_txpower(struct wiphy *wiphy, + struct wireless_dev *wdev, + int *dbm) { struct ath6kl *ar = (struct ath6kl *)wiphy_priv(wiphy); struct ath6kl_vif *vif; diff --git a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c index cb30feaa565b..904c94121c13 100644 --- a/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c +++ b/drivers/net/wireless/brcm80211/brcmfmac/wl_cfg80211.c @@ -1721,7 +1721,7 @@ brcmf_cfg80211_disconnect(struct wiphy *wiphy, struct net_device *ndev, } static s32 -brcmf_cfg80211_set_tx_power(struct wiphy *wiphy, +brcmf_cfg80211_set_tx_power(struct wiphy *wiphy, struct wireless_dev *wdev, enum nl80211_tx_power_setting type, s32 mbm) { @@ -1770,7 +1770,9 @@ done: return err; } -static s32 brcmf_cfg80211_get_tx_power(struct wiphy *wiphy, s32 *dbm) +static s32 brcmf_cfg80211_get_tx_power(struct wiphy *wiphy, + struct wireless_dev *wdev, + s32 *dbm) { struct brcmf_cfg80211_info *cfg = wiphy_to_cfg(wiphy); struct brcmf_if *ifp = netdev_priv(cfg_to_ndev(cfg)); diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c index fdb1eb861021..8e829b251d83 100644 --- a/drivers/net/wireless/mwifiex/cfg80211.c +++ b/drivers/net/wireless/mwifiex/cfg80211.c @@ -324,6 +324,7 @@ mwifiex_cfg80211_cancel_remain_on_channel(struct wiphy *wiphy, */ static int mwifiex_cfg80211_set_tx_power(struct wiphy *wiphy, + struct wireless_dev *wdev, enum nl80211_tx_power_setting type, int mbm) { diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c index bd1f0cb56085..5390af36c064 100644 --- a/drivers/net/wireless/rndis_wlan.c +++ b/drivers/net/wireless/rndis_wlan.c @@ -490,9 +490,12 @@ static int rndis_scan(struct wiphy *wiphy, static int rndis_set_wiphy_params(struct wiphy *wiphy, u32 changed); static int rndis_set_tx_power(struct wiphy *wiphy, + struct wireless_dev *wdev, enum nl80211_tx_power_setting type, int mbm); -static int rndis_get_tx_power(struct wiphy *wiphy, int *dbm); +static int rndis_get_tx_power(struct wiphy *wiphy, + struct wireless_dev *wdev, + int *dbm); static int rndis_connect(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_connect_params *sme); @@ -1903,6 +1906,7 @@ static int rndis_set_wiphy_params(struct wiphy *wiphy, u32 changed) } static int rndis_set_tx_power(struct wiphy *wiphy, + struct wireless_dev *wdev, enum nl80211_tx_power_setting type, int mbm) { @@ -1930,7 +1934,9 @@ static int rndis_set_tx_power(struct wiphy *wiphy, return -ENOTSUPP; } -static int rndis_get_tx_power(struct wiphy *wiphy, int *dbm) +static int rndis_get_tx_power(struct wiphy *wiphy, + struct wireless_dev *wdev, + int *dbm) { struct rndis_wlan_private *priv = wiphy_priv(wiphy); struct usbnet *usbdev = priv->usbdev; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index c6964572890f..8034a4268fcb 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1551,7 +1551,10 @@ struct cfg80211_gtk_rekey_data { * struct wiphy. If returning an error, no value should be changed. * * @set_tx_power: set the transmit power according to the parameters, - * the power passed is in mBm, to get dBm use MBM_TO_DBM(). + * the power passed is in mBm, to get dBm use MBM_TO_DBM(). The + * wdev may be %NULL if power was set for the wiphy, and will + * always be %NULL unless the driver supports per-vif TX power + * (as advertised by the nl80211 feature flag.) * @get_tx_power: store the current TX power into the dbm variable; * return 0 if successful * @@ -1748,9 +1751,10 @@ struct cfg80211_ops { int (*set_wiphy_params)(struct wiphy *wiphy, u32 changed); - int (*set_tx_power)(struct wiphy *wiphy, + int (*set_tx_power)(struct wiphy *wiphy, struct wireless_dev *wdev, enum nl80211_tx_power_setting type, int mbm); - int (*get_tx_power)(struct wiphy *wiphy, int *dbm); + int (*get_tx_power)(struct wiphy *wiphy, struct wireless_dev *wdev, + int *dbm); int (*set_wds_peer)(struct wiphy *wiphy, struct net_device *dev, const u8 *addr); diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 617d0fbfc96f..4c5f6748ed7d 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3051,6 +3051,7 @@ enum nl80211_ap_sme_features { * @NL80211_FEATURE_LOW_PRIORITY_SCAN: This driver supports low priority scan * @NL80211_FEATURE_SCAN_FLUSH: Scan flush is supported * @NL80211_FEATURE_AP_SCAN: Support scanning using an AP vif + * @NL80211_FEATURE_VIF_TXPOWER: The driver supports per-vif TX power setting */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, @@ -3062,6 +3063,7 @@ enum nl80211_feature_flags { NL80211_FEATURE_LOW_PRIORITY_SCAN = 1 << 6, NL80211_FEATURE_SCAN_FLUSH = 1 << 7, NL80211_FEATURE_AP_SCAN = 1 << 8, + NL80211_FEATURE_VIF_TXPOWER = 1 << 9, }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 34fd3eba3090..a352e4d22dd9 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1992,6 +1992,7 @@ static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed) } static int ieee80211_set_tx_power(struct wiphy *wiphy, + struct wireless_dev *wdev, enum nl80211_tx_power_setting type, int mbm) { struct ieee80211_local *local = wiphy_priv(wiphy); @@ -2026,7 +2027,9 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy, return 0; } -static int ieee80211_get_tx_power(struct wiphy *wiphy, int *dbm) +static int ieee80211_get_tx_power(struct wiphy *wiphy, + struct wireless_dev *wdev, + int *dbm) { struct ieee80211_local *local = wiphy_priv(wiphy); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 879ca620fd6f..87d4670ee53a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1585,9 +1585,13 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) } if (info->attrs[NL80211_ATTR_WIPHY_TX_POWER_SETTING]) { + struct wireless_dev *txp_wdev = wdev; enum nl80211_tx_power_setting type; int idx, mbm = 0; + if (!(rdev->wiphy.features & NL80211_FEATURE_VIF_TXPOWER)) + txp_wdev = NULL; + if (!rdev->ops->set_tx_power) { result = -EOPNOTSUPP; goto bad_res; @@ -1607,7 +1611,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info) mbm = nla_get_u32(info->attrs[idx]); } - result = rdev_set_tx_power(rdev, type, mbm); + result = rdev_set_tx_power(rdev, txp_wdev, type, mbm); if (result) goto bad_res; } diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index eb5f8974e148..6e5fa659068d 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -476,21 +476,22 @@ rdev_set_wiphy_params(struct cfg80211_registered_device *rdev, u32 changed) } static inline int rdev_set_tx_power(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, enum nl80211_tx_power_setting type, int mbm) { int ret; - trace_rdev_set_tx_power(&rdev->wiphy, type, mbm); - ret = rdev->ops->set_tx_power(&rdev->wiphy, type, mbm); + trace_rdev_set_tx_power(&rdev->wiphy, wdev, type, mbm); + ret = rdev->ops->set_tx_power(&rdev->wiphy, wdev, type, mbm); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } static inline int rdev_get_tx_power(struct cfg80211_registered_device *rdev, - int *dbm) + struct wireless_dev *wdev, int *dbm) { int ret; - trace_rdev_get_tx_power(&rdev->wiphy); - ret = rdev->ops->get_tx_power(&rdev->wiphy, dbm); + trace_rdev_get_tx_power(&rdev->wiphy, wdev); + ret = rdev->ops->get_tx_power(&rdev->wiphy, wdev, dbm); trace_rdev_return_int_int(&rdev->wiphy, ret, *dbm); return ret; } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 0ca71caf85fb..8e03c6382a8a 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -26,7 +26,7 @@ #define WIPHY_PR_ARG MAC_PR_ARG(wiphy_mac) #define WDEV_ENTRY __field(u32, id) -#define WDEV_ASSIGN (__entry->id) = (wdev->identifier) +#define WDEV_ASSIGN (__entry->id) = (wdev ? wdev->identifier : 0) #define WDEV_PR_FMT ", wdev id: %u" #define WDEV_PR_ARG (__entry->id) @@ -260,11 +260,6 @@ DEFINE_EVENT(wiphy_only_evt, rdev_get_antenna, TP_ARGS(wiphy) ); -DEFINE_EVENT(wiphy_only_evt, rdev_get_tx_power, - TP_PROTO(struct wiphy *wiphy), - TP_ARGS(wiphy) -); - DEFINE_EVENT(wiphy_only_evt, rdev_rfkill_poll, TP_PROTO(struct wiphy *wiphy), TP_ARGS(wiphy) @@ -1230,22 +1225,29 @@ TRACE_EVENT(rdev_set_wiphy_params, WIPHY_PR_ARG, __entry->changed) ); +DEFINE_EVENT(wiphy_wdev_evt, rdev_get_tx_power, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), + TP_ARGS(wiphy, wdev) +); + TRACE_EVENT(rdev_set_tx_power, - TP_PROTO(struct wiphy *wiphy, enum nl80211_tx_power_setting type, - int mbm), - TP_ARGS(wiphy, type, mbm), + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + enum nl80211_tx_power_setting type, int mbm), + TP_ARGS(wiphy, wdev, type, mbm), TP_STRUCT__entry( WIPHY_ENTRY + WDEV_ENTRY __field(enum nl80211_tx_power_setting, type) __field(int, mbm) ), TP_fast_assign( WIPHY_ASSIGN; + WDEV_ASSIGN; __entry->type = type; __entry->mbm = mbm; ), - TP_printk(WIPHY_PR_FMT ", type: %d, mbm: %d", - WIPHY_PR_ARG, __entry->type, __entry->mbm) + TP_printk(WIPHY_PR_FMT WDEV_PR_FMT ", type: %d, mbm: %d", + WIPHY_PR_ARG, WDEV_PR_ARG,__entry->type, __entry->mbm) ); TRACE_EVENT(rdev_return_int_int, diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 6488d2dbc1d7..742ab6ec4c9d 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -895,7 +895,7 @@ static int cfg80211_wext_siwtxpower(struct net_device *dev, return 0; } - return rdev_set_tx_power(rdev, type, DBM_TO_MBM(dbm)); + return rdev_set_tx_power(rdev, wdev, type, DBM_TO_MBM(dbm)); } static int cfg80211_wext_giwtxpower(struct net_device *dev, @@ -914,7 +914,7 @@ static int cfg80211_wext_giwtxpower(struct net_device *dev, if (!rdev->ops->get_tx_power) return -EOPNOTSUPP; - err = rdev_get_tx_power(rdev, &val); + err = rdev_get_tx_power(rdev, wdev, &val); if (err) return err; -- cgit v1.2.3 From 91716322d834cba34f4a7ed5e4a39673eb90862b Mon Sep 17 00:00:00 2001 From: Matt Fleming Date: Mon, 22 Oct 2012 15:51:45 +0100 Subject: efivarfs: Add unique magic number Using pstore's superblock magic number is no doubt going to cause problems in the future. Give efivarfs its own magic number. Acked-by: Jeremy Kerr Signed-off-by: Matt Fleming --- drivers/firmware/efivars.c | 2 +- include/uapi/linux/magic.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c index 2c044343c99b..3b0cf9acb504 100644 --- a/drivers/firmware/efivars.c +++ b/drivers/firmware/efivars.c @@ -991,7 +991,7 @@ int efivarfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; - sb->s_magic = PSTOREFS_MAGIC; + sb->s_magic = EFIVARFS_MAGIC; sb->s_op = &efivarfs_ops; sb->s_time_gran = 1; diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index e15192cb9cf4..12f68c7ceba6 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -27,6 +27,7 @@ #define ISOFS_SUPER_MAGIC 0x9660 #define JFFS2_SUPER_MAGIC 0x72b6 #define PSTOREFS_MAGIC 0x6165676C +#define EFIVARFS_MAGIC 0xde5e81e4 #define MINIX_SUPER_MAGIC 0x137F /* minix v1 fs, 14 char names */ #define MINIX_SUPER_MAGIC2 0x138F /* minix v1 fs, 30 char names */ -- cgit v1.2.3 From 2ac4ad2a1468123f6bb439a547880a9c0d302e0a Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Sat, 27 Oct 2012 12:47:12 +0530 Subject: serial/arc-uart: Add new driver Driver for non-standard on-chip UART, instantiated in the ARC (Synopsys) FPGA Boards such as ARCAngel4/ML50x Signed-off-by: Vineet Gupta Reviewed-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/Kconfig | 23 ++ drivers/tty/serial/Makefile | 1 + drivers/tty/serial/arc_uart.c | 746 +++++++++++++++++++++++++++++++++++++++ include/uapi/linux/serial_core.h | 2 + 4 files changed, 772 insertions(+) create mode 100644 drivers/tty/serial/arc_uart.c (limited to 'include/uapi/linux') diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig index 2a53be5f010d..b1768012ed21 100644 --- a/drivers/tty/serial/Kconfig +++ b/drivers/tty/serial/Kconfig @@ -1423,4 +1423,27 @@ config SERIAL_EFM32_UART_CONSOLE depends on SERIAL_EFM32_UART=y select SERIAL_CORE_CONSOLE +config SERIAL_ARC + tristate "ARC UART driver support" + select SERIAL_CORE + help + Driver for on-chip UART for ARC(Synopsys) for the legacy + FPGA Boards (ML50x/ARCAngel4) + +config SERIAL_ARC_CONSOLE + bool "Console on ARC UART" + depends on SERIAL_ARC=y + select SERIAL_CORE_CONSOLE + help + Enable system Console on ARC UART + +config SERIAL_ARC_NR_PORTS + int "Number of ARC UART ports" + depends on SERIAL_ARC + range 1 3 + default "1" + help + Set this to the number of serial ports you want the driver + to support. + endmenu diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile index 4f694dafa719..df1b998c436b 100644 --- a/drivers/tty/serial/Makefile +++ b/drivers/tty/serial/Makefile @@ -82,3 +82,4 @@ obj-$(CONFIG_SERIAL_XILINX_PS_UART) += xilinx_uartps.o obj-$(CONFIG_SERIAL_SIRFSOC) += sirfsoc_uart.o obj-$(CONFIG_SERIAL_AR933X) += ar933x_uart.o obj-$(CONFIG_SERIAL_EFM32_UART) += efm32-uart.o +obj-$(CONFIG_SERIAL_ARC) += arc_uart.o diff --git a/drivers/tty/serial/arc_uart.c b/drivers/tty/serial/arc_uart.c new file mode 100644 index 000000000000..e9c61d1b1c79 --- /dev/null +++ b/drivers/tty/serial/arc_uart.c @@ -0,0 +1,746 @@ +/* + * ARC On-Chip(fpga) UART Driver + * + * Copyright (C) 2010-2012 Synopsys, Inc. (www.synopsys.com) + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * vineetg: July 10th 2012 + * -Decoupled the driver from arch/arc + * +Using platform_get_resource() for irq/membase (thx to bfin_uart.c) + * +Using early_platform_xxx() for early console (thx to mach-shmobile/xxx) + * + * Vineetg: Aug 21st 2010 + * -Is uart_tx_stopped() not done in tty write path as it has already been + * taken care of, in serial core + * + * Vineetg: Aug 18th 2010 + * -New Serial Core based ARC UART driver + * -Derived largely from blackfin driver albiet with some major tweaks + * + * TODO: + * -check if sysreq works + */ + +#if defined(CONFIG_SERIAL_ARC_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ) +#define SUPPORT_SYSRQ +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/************************************* + * ARC UART Hardware Specs + ************************************/ +#define ARC_UART_TX_FIFO_SIZE 1 + +/* + * UART Register set (this is not a Standards Compliant IP) + * Also each reg is Word aligned, but only 8 bits wide + */ +#define R_ID0 0 +#define R_ID1 4 +#define R_ID2 8 +#define R_ID3 12 +#define R_DATA 16 +#define R_STS 20 +#define R_BAUDL 24 +#define R_BAUDH 28 + +/* Bits for UART Status Reg (R/W) */ +#define RXIENB 0x04 /* Receive Interrupt Enable */ +#define TXIENB 0x40 /* Transmit Interrupt Enable */ + +#define RXEMPTY 0x20 /* Receive FIFO Empty: No char receivede */ +#define TXEMPTY 0x80 /* Transmit FIFO Empty, thus char can be written into */ + +#define RXFULL 0x08 /* Receive FIFO full */ +#define RXFULL1 0x10 /* Receive FIFO has space for 1 char (tot space=4) */ + +#define RXFERR 0x01 /* Frame Error: Stop Bit not detected */ +#define RXOERR 0x02 /* OverFlow Err: Char recv but RXFULL still set */ + +/* Uart bit fiddling helpers: lowest level */ +#define RBASE(uart, reg) (uart->port.membase + reg) +#define UART_REG_SET(u, r, v) writeb((v), RBASE(u, r)) +#define UART_REG_GET(u, r) readb(RBASE(u, r)) + +#define UART_REG_OR(u, r, v) UART_REG_SET(u, r, UART_REG_GET(u, r) | (v)) +#define UART_REG_CLR(u, r, v) UART_REG_SET(u, r, UART_REG_GET(u, r) & ~(v)) + +/* Uart bit fiddling helpers: API level */ +#define UART_SET_DATA(uart, val) UART_REG_SET(uart, R_DATA, val) +#define UART_GET_DATA(uart) UART_REG_GET(uart, R_DATA) + +#define UART_SET_BAUDH(uart, val) UART_REG_SET(uart, R_BAUDH, val) +#define UART_SET_BAUDL(uart, val) UART_REG_SET(uart, R_BAUDL, val) + +#define UART_CLR_STATUS(uart, val) UART_REG_CLR(uart, R_STS, val) +#define UART_GET_STATUS(uart) UART_REG_GET(uart, R_STS) + +#define UART_ALL_IRQ_DISABLE(uart) UART_REG_CLR(uart, R_STS, RXIENB|TXIENB) +#define UART_RX_IRQ_DISABLE(uart) UART_REG_CLR(uart, R_STS, RXIENB) +#define UART_TX_IRQ_DISABLE(uart) UART_REG_CLR(uart, R_STS, TXIENB) + +#define UART_ALL_IRQ_ENABLE(uart) UART_REG_OR(uart, R_STS, RXIENB|TXIENB) +#define UART_RX_IRQ_ENABLE(uart) UART_REG_OR(uart, R_STS, RXIENB) +#define UART_TX_IRQ_ENABLE(uart) UART_REG_OR(uart, R_STS, TXIENB) + +#define ARC_SERIAL_DEV_NAME "ttyARC" + +struct arc_uart_port { + struct uart_port port; + unsigned long baud; + int is_emulated; /* H/w vs. Instruction Set Simulator */ +}; + +#define to_arc_port(uport) container_of(uport, struct arc_uart_port, port) + +static struct arc_uart_port arc_uart_ports[CONFIG_SERIAL_ARC_NR_PORTS]; + +#ifdef CONFIG_SERIAL_ARC_CONSOLE +static struct console arc_console; +#endif + +#define DRIVER_NAME "arc-uart" + +static struct uart_driver arc_uart_driver = { + .owner = THIS_MODULE, + .driver_name = DRIVER_NAME, + .dev_name = ARC_SERIAL_DEV_NAME, + .major = 0, + .minor = 0, + .nr = CONFIG_SERIAL_ARC_NR_PORTS, +#ifdef CONFIG_SERIAL_ARC_CONSOLE + .cons = &arc_console, +#endif +}; + +static void arc_serial_stop_rx(struct uart_port *port) +{ + struct arc_uart_port *uart = to_arc_port(port); + + UART_RX_IRQ_DISABLE(uart); +} + +static void arc_serial_stop_tx(struct uart_port *port) +{ + struct arc_uart_port *uart = to_arc_port(port); + + while (!(UART_GET_STATUS(uart) & TXEMPTY)) + cpu_relax(); + + UART_TX_IRQ_DISABLE(uart); +} + +/* + * Return TIOCSER_TEMT when transmitter is not busy. + */ +static unsigned int arc_serial_tx_empty(struct uart_port *port) +{ + struct arc_uart_port *uart = to_arc_port(port); + unsigned int stat; + + stat = UART_GET_STATUS(uart); + if (stat & TXEMPTY) + return TIOCSER_TEMT; + + return 0; +} + +/* + * Driver internal routine, used by both tty(serial core) as well as tx-isr + * -Called under spinlock in either cases + * -also tty->stopped / tty->hw_stopped has already been checked + * = by uart_start( ) before calling us + * = tx_ist checks that too before calling + */ +static void arc_serial_tx_chars(struct arc_uart_port *uart) +{ + struct circ_buf *xmit = &uart->port.state->xmit; + int sent = 0; + unsigned char ch; + + if (unlikely(uart->port.x_char)) { + UART_SET_DATA(uart, uart->port.x_char); + uart->port.icount.tx++; + uart->port.x_char = 0; + sent = 1; + } else if (xmit->tail != xmit->head) { /* TODO: uart_circ_empty */ + ch = xmit->buf[xmit->tail]; + xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1); + uart->port.icount.tx++; + while (!(UART_GET_STATUS(uart) & TXEMPTY)) + cpu_relax(); + UART_SET_DATA(uart, ch); + sent = 1; + } + + /* + * If num chars in xmit buffer are too few, ask tty layer for more. + * By Hard ISR to schedule processing in software interrupt part + */ + if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS) + uart_write_wakeup(&uart->port); + + if (sent) + UART_TX_IRQ_ENABLE(uart); +} + +/* + * port is locked and interrupts are disabled + * uart_start( ) calls us under the port spinlock irqsave + */ +static void arc_serial_start_tx(struct uart_port *port) +{ + struct arc_uart_port *uart = to_arc_port(port); + + arc_serial_tx_chars(uart); +} + +static void arc_serial_rx_chars(struct arc_uart_port *uart) +{ + struct tty_struct *tty = tty_port_tty_get(&uart->port.state->port); + unsigned int status, ch, flg = 0; + + if (!tty) + return; + + /* + * UART has 4 deep RX-FIFO. Driver's recongnition of this fact + * is very subtle. Here's how ... + * Upon getting a RX-Intr, such that RX-EMPTY=0, meaning data available, + * driver reads the DATA Reg and keeps doing that in a loop, until + * RX-EMPTY=1. Multiple chars being avail, with a single Interrupt, + * before RX-EMPTY=0, implies some sort of buffering going on in the + * controller, which is indeed the Rx-FIFO. + */ + while (!((status = UART_GET_STATUS(uart)) & RXEMPTY)) { + + ch = UART_GET_DATA(uart); + uart->port.icount.rx++; + + if (unlikely(status & (RXOERR | RXFERR))) { + if (status & RXOERR) { + uart->port.icount.overrun++; + flg = TTY_OVERRUN; + UART_CLR_STATUS(uart, RXOERR); + } + + if (status & RXFERR) { + uart->port.icount.frame++; + flg = TTY_FRAME; + UART_CLR_STATUS(uart, RXFERR); + } + } else + flg = TTY_NORMAL; + + if (unlikely(uart_handle_sysrq_char(&uart->port, ch))) + goto done; + + uart_insert_char(&uart->port, status, RXOERR, ch, flg); + +done: + tty_flip_buffer_push(tty); + } + + tty_kref_put(tty); +} + +/* + * A note on the Interrupt handling state machine of this driver + * + * kernel printk writes funnel thru the console driver framework and in order + * to keep things simple as well as efficient, it writes to UART in polled + * mode, in one shot, and exits. + * + * OTOH, Userland output (via tty layer), uses interrupt based writes as there + * can be undeterministic delay between char writes. + * + * Thus Rx-interrupts are always enabled, while tx-interrupts are by default + * disabled. + * + * When tty has some data to send out, serial core calls driver's start_tx + * which + * -checks-if-tty-buffer-has-char-to-send + * -writes-data-to-uart + * -enable-tx-intr + * + * Once data bits are pushed out, controller raises the Tx-room-avail-Interrupt. + * The first thing Tx ISR does is disable further Tx interrupts (as this could + * be the last char to send, before settling down into the quiet polled mode). + * It then calls the exact routine used by tty layer write to send out any + * more char in tty buffer. In case of sending, it re-enables Tx-intr. In case + * of no data, it remains disabled. + * This is how the transmit state machine is dynamically switched on/off + */ + +static irqreturn_t arc_serial_isr(int irq, void *dev_id) +{ + struct arc_uart_port *uart = dev_id; + unsigned int status; + + status = UART_GET_STATUS(uart); + + /* + * Single IRQ for both Rx (data available) Tx (room available) Interrupt + * notifications from the UART Controller. + * To demultiplex between the two, we check the relevant bits + */ + if ((status & RXIENB) && !(status & RXEMPTY)) { + + /* already in ISR, no need of xx_irqsave */ + spin_lock(&uart->port.lock); + arc_serial_rx_chars(uart); + spin_unlock(&uart->port.lock); + } + + if ((status & TXIENB) && (status & TXEMPTY)) { + + /* Unconditionally disable further Tx-Interrupts. + * will be enabled by tx_chars() if needed. + */ + UART_TX_IRQ_DISABLE(uart); + + spin_lock(&uart->port.lock); + + if (!uart_tx_stopped(&uart->port)) + arc_serial_tx_chars(uart); + + spin_unlock(&uart->port.lock); + } + + return IRQ_HANDLED; +} + +static unsigned int arc_serial_get_mctrl(struct uart_port *port) +{ + /* + * Pretend we have a Modem status reg and following bits are + * always set, to satify the serial core state machine + * (DSR) Data Set Ready + * (CTS) Clear To Send + * (CAR) Carrier Detect + */ + return TIOCM_CTS | TIOCM_DSR | TIOCM_CAR; +} + +static void arc_serial_set_mctrl(struct uart_port *port, unsigned int mctrl) +{ + /* MCR not present */ +} + +/* Enable Modem Status Interrupts */ + +static void arc_serial_enable_ms(struct uart_port *port) +{ + /* MSR not present */ +} + +static void arc_serial_break_ctl(struct uart_port *port, int break_state) +{ + /* ARC UART doesn't support sending Break signal */ +} + +static int arc_serial_startup(struct uart_port *port) +{ + struct arc_uart_port *uart = to_arc_port(port); + + /* Before we hook up the ISR, Disable all UART Interrupts */ + UART_ALL_IRQ_DISABLE(uart); + + if (request_irq(uart->port.irq, arc_serial_isr, 0, "arc uart rx-tx", + uart)) { + dev_warn(uart->port.dev, "Unable to attach ARC UART intr\n"); + return -EBUSY; + } + + UART_RX_IRQ_ENABLE(uart); /* Only Rx IRQ enabled to begin with */ + + return 0; +} + +/* This is not really needed */ +static void arc_serial_shutdown(struct uart_port *port) +{ + struct arc_uart_port *uart = to_arc_port(port); + free_irq(uart->port.irq, uart); +} + +static void +arc_serial_set_termios(struct uart_port *port, struct ktermios *new, + struct ktermios *old) +{ + struct arc_uart_port *uart = to_arc_port(port); + unsigned int baud, uartl, uarth, hw_val; + unsigned long flags; + + /* + * Use the generic handler so that any specially encoded baud rates + * such as SPD_xx flags or "%B0" can be handled + * Max Baud I suppose will not be more than current 115K * 4 + * Formula for ARC UART is: hw-val = ((CLK/(BAUD*4)) -1) + * spread over two 8-bit registers + */ + baud = uart_get_baud_rate(port, new, old, 0, 460800); + + hw_val = port->uartclk / (uart->baud * 4) - 1; + uartl = hw_val & 0xFF; + uarth = (hw_val >> 8) & 0xFF; + + /* + * UART ISS(Instruction Set simulator) emulation has a subtle bug: + * A existing value of Baudh = 0 is used as a indication to startup + * it's internal state machine. + * Thus if baudh is set to 0, 2 times, it chokes. + * This happens with BAUD=115200 and the formaula above + * Until that is fixed, when running on ISS, we will set baudh to !0 + */ + if (uart->is_emulated) + uarth = 1; + + spin_lock_irqsave(&port->lock, flags); + + UART_ALL_IRQ_DISABLE(uart); + + UART_SET_BAUDL(uart, uartl); + UART_SET_BAUDH(uart, uarth); + + UART_RX_IRQ_ENABLE(uart); + + /* + * UART doesn't support Parity/Hardware Flow Control; + * Only supports 8N1 character size + */ + new->c_cflag &= ~(CMSPAR|CRTSCTS|CSIZE); + new->c_cflag |= CS8; + + if (old) + tty_termios_copy_hw(new, old); + + /* Don't rewrite B0 */ + if (tty_termios_baud_rate(new)) + tty_termios_encode_baud_rate(new, baud, baud); + + uart_update_timeout(port, new->c_cflag, baud); + + spin_unlock_irqrestore(&port->lock, flags); +} + +static const char *arc_serial_type(struct uart_port *port) +{ + struct arc_uart_port *uart = to_arc_port(port); + + return uart->port.type == PORT_ARC ? DRIVER_NAME : NULL; +} + +static void arc_serial_release_port(struct uart_port *port) +{ +} + +static int arc_serial_request_port(struct uart_port *port) +{ + return 0; +} + +/* + * Verify the new serial_struct (for TIOCSSERIAL). + */ +static int +arc_serial_verify_port(struct uart_port *port, struct serial_struct *ser) +{ + if (port->type != PORT_UNKNOWN && ser->type != PORT_ARC) + return -EINVAL; + + return 0; +} + +/* + * Configure/autoconfigure the port. + */ +static void arc_serial_config_port(struct uart_port *port, int flags) +{ + struct arc_uart_port *uart = to_arc_port(port); + + if (flags & UART_CONFIG_TYPE) + uart->port.type = PORT_ARC; +} + +#if defined(CONFIG_CONSOLE_POLL) || defined(CONFIG_SERIAL_ARC_CONSOLE) + +static void arc_serial_poll_putchar(struct uart_port *port, unsigned char chr) +{ + struct arc_uart_port *uart = to_arc_port(port); + + while (!(UART_GET_STATUS(uart) & TXEMPTY)) + cpu_relax(); + + UART_SET_DATA(uart, chr); +} +#endif + +#ifdef CONFIG_CONSOLE_POLL +static int arc_serial_poll_getchar(struct uart_port *port) +{ + struct arc_uart_port *uart = to_arc_port(port); + unsigned char chr; + + while (!(UART_GET_STATUS(uart) & RXEMPTY)) + cpu_relax(); + + chr = UART_GET_DATA(uart); + return chr; +} +#endif + +static struct uart_ops arc_serial_pops = { + .tx_empty = arc_serial_tx_empty, + .set_mctrl = arc_serial_set_mctrl, + .get_mctrl = arc_serial_get_mctrl, + .stop_tx = arc_serial_stop_tx, + .start_tx = arc_serial_start_tx, + .stop_rx = arc_serial_stop_rx, + .enable_ms = arc_serial_enable_ms, + .break_ctl = arc_serial_break_ctl, + .startup = arc_serial_startup, + .shutdown = arc_serial_shutdown, + .set_termios = arc_serial_set_termios, + .type = arc_serial_type, + .release_port = arc_serial_release_port, + .request_port = arc_serial_request_port, + .config_port = arc_serial_config_port, + .verify_port = arc_serial_verify_port, +#ifdef CONFIG_CONSOLE_POLL + .poll_put_char = arc_serial_poll_putchar, + .poll_get_char = arc_serial_poll_getchar, +#endif +}; + +static int __devinit +arc_uart_init_one(struct platform_device *pdev, struct arc_uart_port *uart) +{ + struct resource *res, *res2; + unsigned long *plat_data; + + if (pdev->id < 0 || pdev->id >= CONFIG_SERIAL_ARC_NR_PORTS) { + dev_err(&pdev->dev, "Wrong uart platform device id.\n"); + return -ENOENT; + } + + plat_data = ((unsigned long *)(pdev->dev.platform_data)); + uart->baud = plat_data[0]; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + + res2 = platform_get_resource(pdev, IORESOURCE_IRQ, 0); + if (!res2) + return -ENODEV; + + uart->port.mapbase = res->start; + uart->port.membase = ioremap_nocache(res->start, resource_size(res)); + if (!uart->port.membase) + /* No point of dev_err since UART itself is hosed here */ + return -ENXIO; + + uart->port.irq = res2->start; + uart->port.dev = &pdev->dev; + uart->port.iotype = UPIO_MEM; + uart->port.flags = UPF_BOOT_AUTOCONF; + uart->port.line = pdev->id; + uart->port.ops = &arc_serial_pops; + + uart->port.uartclk = plat_data[1]; + uart->port.fifosize = ARC_UART_TX_FIFO_SIZE; + + /* + * uart_insert_char( ) uses it in decideding whether to ignore a + * char or not. Explicitly setting it here, removes the subtelty + */ + uart->port.ignore_status_mask = 0; + + /* Real Hardware vs. emulated to work around a bug */ + uart->is_emulated = !!plat_data[2]; + + return 0; +} + +#ifdef CONFIG_SERIAL_ARC_CONSOLE + +static int __devinit arc_serial_console_setup(struct console *co, char *options) +{ + struct uart_port *port; + int baud = 115200; + int bits = 8; + int parity = 'n'; + int flow = 'n'; + + if (co->index < 0 || co->index >= CONFIG_SERIAL_ARC_NR_PORTS) + return -ENODEV; + + /* + * The uart port backing the console (e.g. ttyARC1) might not have been + * init yet. If so, defer the console setup to after the port. + */ + port = &arc_uart_ports[co->index].port; + if (!port->membase) + return -ENODEV; + + if (options) + uart_parse_options(options, &baud, &parity, &bits, &flow); + + /* + * Serial core will call port->ops->set_termios( ) + * which will set the baud reg + */ + return uart_set_options(port, co, baud, parity, bits, flow); +} + +static void arc_serial_console_putchar(struct uart_port *port, int ch) +{ + arc_serial_poll_putchar(port, (unsigned char)ch); +} + +/* + * Interrupts are disabled on entering + */ +static void arc_serial_console_write(struct console *co, const char *s, + unsigned int count) +{ + struct uart_port *port = &arc_uart_ports[co->index].port; + unsigned long flags; + + spin_lock_irqsave(&port->lock, flags); + uart_console_write(port, s, count, arc_serial_console_putchar); + spin_unlock_irqrestore(&port->lock, flags); +} + +static struct console arc_console = { + .name = ARC_SERIAL_DEV_NAME, + .write = arc_serial_console_write, + .device = uart_console_device, + .setup = arc_serial_console_setup, + .flags = CON_PRINTBUFFER, + .index = -1, + .data = &arc_uart_driver +}; + +static __init void early_serial_write(struct console *con, const char *s, + unsigned int n) +{ + struct uart_port *port = &arc_uart_ports[con->index].port; + unsigned int i; + + for (i = 0; i < n; i++, s++) { + if (*s == '\n') + arc_serial_poll_putchar(port, '\r'); + arc_serial_poll_putchar(port, *s); + } +} + +static struct __initdata console arc_early_serial_console = { + .name = "early_ARCuart", + .write = early_serial_write, + .flags = CON_PRINTBUFFER | CON_BOOT, + .index = -1 +}; + +static int __devinit arc_serial_probe_earlyprintk(struct platform_device *pdev) +{ + arc_early_serial_console.index = pdev->id; + + arc_uart_init_one(pdev, &arc_uart_ports[pdev->id]); + + arc_serial_console_setup(&arc_early_serial_console, NULL); + + register_console(&arc_early_serial_console); + return 0; +} +#else +static int __devinit arc_serial_probe_earlyprintk(struct platform_device *pdev) +{ + return -ENODEV; +} +#endif /* CONFIG_SERIAL_ARC_CONSOLE */ + +static int __devinit arc_serial_probe(struct platform_device *pdev) +{ + struct arc_uart_port *uart; + int rc; + + if (is_early_platform_device(pdev)) + return arc_serial_probe_earlyprintk(pdev); + + uart = &arc_uart_ports[pdev->id]; + rc = arc_uart_init_one(pdev, uart); + if (rc) + return rc; + + return uart_add_one_port(&arc_uart_driver, &uart->port); +} + +static int __devexit arc_serial_remove(struct platform_device *pdev) +{ + /* This will never be called */ + return 0; +} + +static struct platform_driver arc_platform_driver = { + .probe = arc_serial_probe, + .remove = __devexit_p(arc_serial_remove), + .driver = { + .name = DRIVER_NAME, + .owner = THIS_MODULE, + }, +}; + +#ifdef CONFIG_SERIAL_ARC_CONSOLE +/* + * Register an early platform driver of "earlyprintk" class. + * ARCH platform code installs the driver and probes the early devices + * The installation could rely on user specifying earlyprintk=xyx in cmd line + * or it could be done independently, for all "earlyprintk" class drivers. + * [see arch/arc/plat-arcfpga/platform.c] + */ +early_platform_init("earlyprintk", &arc_platform_driver); + +#endif /* CONFIG_SERIAL_ARC_CONSOLE */ + +static int __init arc_serial_init(void) +{ + int ret; + + ret = uart_register_driver(&arc_uart_driver); + if (ret) + return ret; + + ret = platform_driver_register(&arc_platform_driver); + if (ret) + uart_unregister_driver(&arc_uart_driver); + + return ret; +} + +static void __exit arc_serial_exit(void) +{ + platform_driver_unregister(&arc_platform_driver); + uart_unregister_driver(&arc_uart_driver); +} + +module_init(arc_serial_init); +module_exit(arc_serial_exit); + +MODULE_LICENSE("GPL"); +MODULE_ALIAS("plat-arcfpga/uart"); +MODULE_AUTHOR("Vineet Gupta"); +MODULE_DESCRIPTION("ARC(Synopsys) On-Chip(fpga) serial driver"); diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h index 7e1ab20adc03..ebcc73f0418a 100644 --- a/include/uapi/linux/serial_core.h +++ b/include/uapi/linux/serial_core.h @@ -215,5 +215,7 @@ /* Energy Micro efm32 SoC */ #define PORT_EFMUART 100 +/* ARC (Synopsys) on-chip UART */ +#define PORT_ARC 101 #endif /* _UAPILINUX_SERIAL_CORE_H */ -- cgit v1.2.3 From 2469ffd723f76ac2d3ce3d4f31ee31ee0a06cd38 Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Wed, 24 Oct 2012 08:13:03 +0000 Subject: net: set and query VEB/VEPA bridge mode via PF_BRIDGE Hardware switches may support enabling and disabling the loopback switch which puts the device in a VEPA mode defined in the IEEE 802.1Qbg specification. In this mode frames are not switched in the hardware but sent directly to the switch. SR-IOV capable NICs will likely support this mode I am aware of at least two such devices. Also I am told (but don't have any of this hardware available) that there are devices that only support VEPA modes. In these cases it is important at a minimum to be able to query these attributes. This patch adds an additional IFLA_BRIDGE_MODE attribute that can be set and dumped via the PF_BRIDGE:{SET|GET}LINK operations. Also anticipating bridge attributes that may be common for both embedded bridges and software bridges this adds a flags attribute IFLA_BRIDGE_FLAGS currently used to determine if the command or event is being generated to/from an embedded bridge or software bridge. Finally, the event generation is pulled out of the bridge module and into rtnetlink proper. For example using the macvlan driver in VEPA mode on top of an embedded switch requires putting the embedded switch into a VEPA mode to get the expected results. -------- -------- | VEPA | | VEPA | <-- macvlan vepa edge relays -------- -------- | | | | ------------------ | VEPA | <-- embedded switch in NIC ------------------ | | ------------------- | external switch | <-- shiny new physical ------------------- switch with VEPA support A packet sent from the macvlan VEPA at the top could be loopbacked on the embedded switch and never seen by the external switch. So in order for this to work the embedded switch needs to be set in the VEPA state via the above described commands. By making these attributes nested in IFLA_AF_SPEC we allow future extensions to be made as needed. CC: Lennert Buytenhek CC: Stephen Hemminger Signed-off-by: John Fastabend Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 18 +++++++++ net/bridge/br_netlink.c | 2 - net/bridge/br_private.h | 4 +- net/core/rtnetlink.c | 85 ++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 102 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index a8fe9549ddbc..b3885791e11e 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -97,5 +97,23 @@ struct __fdb_entry { __u16 unused; }; +/* Bridge Flags */ +#define BRIDGE_FLAGS_MASTER 1 /* Bridge command to/from master */ +#define BRIDGE_FLAGS_SELF 2 /* Bridge command to/from lowerdev */ +#define BRIDGE_MODE_VEB 0 /* Default loopback mode */ +#define BRIDGE_MODE_VEPA 1 /* 802.1Qbg defined VEPA mode */ + +/* Bridge management nested attributes + * [IFLA_AF_SPEC] = { + * [IFLA_BRIDGE_FLAGS] + * [IFLA_BRIDGE_MODE] + * } + */ +enum { + IFLA_BRIDGE_FLAGS, + IFLA_BRIDGE_MODE, + __IFLA_BRIDGE_MAX, +}; +#define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1) #endif /* _UAPI_LINUX_IF_BRIDGE_H */ diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 743511bb7319..14b065cbd214 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -166,8 +166,6 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) br_port_state_selection(p->br); spin_unlock_bh(&p->br->lock); - br_ifinfo_notify(RTM_NEWLINK, p); - return 0; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index fdcd5f626ca6..6f40c14a2a65 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -158,7 +158,9 @@ struct net_bridge_port static inline struct net_bridge_port *br_port_get_rcu(const struct net_device *dev) { - struct net_bridge_port *port = rcu_dereference(dev->rx_handler_data); + struct net_bridge_port *port = + rcu_dereference_rtnl(dev->rx_handler_data); + return br_port_exists(dev) ? port : NULL; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a068666b322f..8d2af0f77d36 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2295,13 +2295,60 @@ static int rtnl_bridge_getlink(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } +static inline size_t bridge_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct ifinfomsg)) + + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ + + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ + + nla_total_size(sizeof(u32)) /* IFLA_MASTER */ + + nla_total_size(sizeof(u32)) /* IFLA_MTU */ + + nla_total_size(sizeof(u32)) /* IFLA_LINK */ + + nla_total_size(sizeof(u32)) /* IFLA_OPERSTATE */ + + nla_total_size(sizeof(u8)) /* IFLA_PROTINFO */ + + nla_total_size(sizeof(struct nlattr)) /* IFLA_AF_SPEC */ + + nla_total_size(sizeof(u16)) /* IFLA_BRIDGE_FLAGS */ + + nla_total_size(sizeof(u16)); /* IFLA_BRIDGE_MODE */ +} + +static int rtnl_bridge_notify(struct net_device *dev, u16 flags) +{ + struct net *net = dev_net(dev); + struct net_device *master = dev->master; + struct sk_buff *skb; + int err = -EOPNOTSUPP; + + skb = nlmsg_new(bridge_nlmsg_size(), GFP_ATOMIC); + if (!skb) { + err = -ENOMEM; + goto errout; + } + + if (!flags && master && master->netdev_ops->ndo_bridge_getlink) + err = master->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + else if (dev->netdev_ops->ndo_bridge_getlink) + err = dev->netdev_ops->ndo_bridge_getlink(skb, 0, 0, dev); + + if (err < 0) + goto errout; + + rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); + return 0; +errout: + WARN_ON(err == -EMSGSIZE); + kfree_skb(skb); + rtnl_set_sk_err(net, RTNLGRP_LINK, err); + return err; +} + static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct net *net = sock_net(skb->sk); struct ifinfomsg *ifm; struct net_device *dev; - int err = -EINVAL; + struct nlattr *br_spec, *attr = NULL; + int rem, err = -EOPNOTSUPP; + u16 flags = 0; if (nlmsg_len(nlh) < sizeof(*ifm)) return -EINVAL; @@ -2316,15 +2363,45 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, return -ENODEV; } - if (dev->master && dev->master->netdev_ops->ndo_bridge_setlink) { + br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + if (br_spec) { + nla_for_each_nested(attr, br_spec, rem) { + if (nla_type(attr) == IFLA_BRIDGE_FLAGS) { + flags = nla_get_u16(attr); + break; + } + } + } + + if (!flags || (flags & BRIDGE_FLAGS_MASTER)) { + if (!dev->master || + !dev->master->netdev_ops->ndo_bridge_setlink) { + err = -EOPNOTSUPP; + goto out; + } + err = dev->master->netdev_ops->ndo_bridge_setlink(dev, nlh); if (err) goto out; + + flags &= ~BRIDGE_FLAGS_MASTER; } - if (dev->netdev_ops->ndo_bridge_setlink) - err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh); + if ((flags & BRIDGE_FLAGS_SELF)) { + if (!dev->netdev_ops->ndo_bridge_setlink) + err = -EOPNOTSUPP; + else + err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh); + + if (!err) + flags &= ~BRIDGE_FLAGS_SELF; + } + if (attr && nla_type(attr) == IFLA_BRIDGE_FLAGS) + memcpy(nla_data(attr), &flags, sizeof(flags)); + /* Generate event to notify upper layer of bridge change */ + if (!err) + err = rtnl_bridge_notify(dev, flags); out: return err; } -- cgit v1.2.3 From f3335031b9452baebfe49b8b5e55d3fe0c4677d1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 27 Oct 2012 02:26:17 +0000 Subject: net: filter: add vlan tag access BPF filters lack ability to access skb->vlan_tci This patch adds two new ancillary accessors : SKF_AD_VLAN_TAG (44) mapped to vlan_tx_tag_get(skb) SKF_AD_VLAN_TAG_PRESENT (48) mapped to vlan_tx_tag_present(skb) This allows libpcap/tcpdump to use a kernel filter instead of having to fallback to accept all packets, then filter them in user space. Signed-off-by: Eric Dumazet Suggested-by: Ani Sinha Suggested-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/filter.h | 2 ++ include/uapi/linux/filter.h | 4 +++- net/core/filter.c | 9 +++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 24d251f3bab0..c9f0005c35e2 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -123,6 +123,8 @@ enum { BPF_S_ANC_CPU, BPF_S_ANC_ALU_XOR_X, BPF_S_ANC_SECCOMP_LD_W, + BPF_S_ANC_VLAN_TAG, + BPF_S_ANC_VLAN_TAG_PRESENT, }; #endif /* __LINUX_FILTER_H__ */ diff --git a/include/uapi/linux/filter.h b/include/uapi/linux/filter.h index 3d7922433aba..9cfde6941099 100644 --- a/include/uapi/linux/filter.h +++ b/include/uapi/linux/filter.h @@ -127,7 +127,9 @@ struct sock_fprog { /* Required for SO_ATTACH_FILTER. */ #define SKF_AD_RXHASH 32 #define SKF_AD_CPU 36 #define SKF_AD_ALU_XOR_X 40 -#define SKF_AD_MAX 44 +#define SKF_AD_VLAN_TAG 44 +#define SKF_AD_VLAN_TAG_PRESENT 48 +#define SKF_AD_MAX 52 #define SKF_NET_OFF (-0x100000) #define SKF_LL_OFF (-0x200000) diff --git a/net/core/filter.c b/net/core/filter.c index 3d92ebb7fbcf..5a114d41bf11 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -39,6 +39,7 @@ #include #include #include +#include /* No hurry in this branch * @@ -341,6 +342,12 @@ load_b: case BPF_S_ANC_CPU: A = raw_smp_processor_id(); continue; + case BPF_S_ANC_VLAN_TAG: + A = vlan_tx_tag_get(skb); + continue; + case BPF_S_ANC_VLAN_TAG_PRESENT: + A = !!vlan_tx_tag_present(skb); + continue; case BPF_S_ANC_NLATTR: { struct nlattr *nla; @@ -600,6 +607,8 @@ int sk_chk_filter(struct sock_filter *filter, unsigned int flen) ANCILLARY(RXHASH); ANCILLARY(CPU); ANCILLARY(ALU_XOR_X); + ANCILLARY(VLAN_TAG); + ANCILLARY(VLAN_TAG_PRESENT); } } ftest->code = code; -- cgit v1.2.3 From bbb009941efaece3898910a862f6d23aa55d6ba8 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 31 Oct 2012 19:45:59 +0000 Subject: tuntap: introduce multiqueue flags Add flags to be used by creating multiqueue tuntap device. Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- include/uapi/linux/if_tun.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index 25a585ce23e6..8ef3a87b58a0 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -34,6 +34,7 @@ #define TUN_ONE_QUEUE 0x0080 #define TUN_PERSIST 0x0100 #define TUN_VNET_HDR 0x0200 +#define TUN_TAP_MQ 0x0400 /* Ioctl defines */ #define TUNSETNOCSUM _IOW('T', 200, int) @@ -61,6 +62,7 @@ #define IFF_ONE_QUEUE 0x2000 #define IFF_VNET_HDR 0x4000 #define IFF_TUN_EXCL 0x8000 +#define IFF_MULTI_QUEUE 0x0100 /* Features for GSO (TUNSETOFFLOAD). */ #define TUN_F_CSUM 0x01 /* You can hand me unchecksummed packets. */ -- cgit v1.2.3 From cde8b15f1aabe327038ee4e0e11dd6b798572f69 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 31 Oct 2012 19:46:01 +0000 Subject: tuntap: add ioctl to attach or detach a file form tuntap device Sometimes usespace may need to active/deactive a queue, this could be done by detaching and attaching a file from tuntap device. This patch introduces a new ioctls - TUNSETQUEUE which could be used to do this. Flag IFF_ATTACH_QUEUE were introduced to do attaching while IFF_DETACH_QUEUE were introduced to do the detaching. Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/tun.c | 56 ++++++++++++++++++++++++++++++++++++++------- include/uapi/linux/if_tun.h | 3 +++ 2 files changed, 51 insertions(+), 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 2762c55aeb66..79b6f9ecc12c 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -195,6 +195,15 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb) return txq; } +static inline bool tun_not_capable(struct tun_struct *tun) +{ + const struct cred *cred = current_cred(); + + return ((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) || + (gid_valid(tun->group) && !in_egroup_p(tun->group))) && + !capable(CAP_NET_ADMIN); +} + static void tun_set_real_num_queues(struct tun_struct *tun) { netif_set_real_num_tx_queues(tun->dev, tun->numqueues); @@ -1310,8 +1319,6 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) dev = __dev_get_by_name(net, ifr->ifr_name); if (dev) { - const struct cred *cred = current_cred(); - if (ifr->ifr_flags & IFF_TUN_EXCL) return -EBUSY; if ((ifr->ifr_flags & IFF_TUN) && dev->netdev_ops == &tun_netdev_ops) @@ -1321,9 +1328,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) else return -EINVAL; - if (((uid_valid(tun->owner) && !uid_eq(cred->euid, tun->owner)) || - (gid_valid(tun->group) && !in_egroup_p(tun->group))) && - !capable(CAP_NET_ADMIN)) + if (tun_not_capable(tun)) return -EPERM; err = security_tun_dev_attach(tfile->socket.sk); if (err < 0) @@ -1530,6 +1535,40 @@ static void tun_set_sndbuf(struct tun_struct *tun) } } +static int tun_set_queue(struct file *file, struct ifreq *ifr) +{ + struct tun_file *tfile = file->private_data; + struct tun_struct *tun; + struct net_device *dev; + int ret = 0; + + rtnl_lock(); + + if (ifr->ifr_flags & IFF_ATTACH_QUEUE) { + dev = __dev_get_by_name(tfile->net, ifr->ifr_name); + if (!dev) { + ret = -EINVAL; + goto unlock; + } + + tun = netdev_priv(dev); + if (dev->netdev_ops != &tap_netdev_ops && + dev->netdev_ops != &tun_netdev_ops) + ret = -EINVAL; + else if (tun_not_capable(tun)) + ret = -EPERM; + else + ret = tun_attach(tun, file); + } else if (ifr->ifr_flags & IFF_DETACH_QUEUE) + __tun_detach(tfile, false); + else + ret = -EINVAL; + +unlock: + rtnl_unlock(); + return ret; +} + static long __tun_chr_ioctl(struct file *file, unsigned int cmd, unsigned long arg, int ifreq_len) { @@ -1543,7 +1582,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, int vnet_hdr_sz; int ret; - if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89) { + if (cmd == TUNSETIFF || cmd == TUNSETQUEUE || _IOC_TYPE(cmd) == 0x89) { if (copy_from_user(&ifr, argp, ifreq_len)) return -EFAULT; } else { @@ -1554,9 +1593,10 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, * This is needed because we never checked for invalid flags on * TUNSETIFF. */ return put_user(IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE | - IFF_VNET_HDR, + IFF_VNET_HDR | IFF_MULTI_QUEUE, (unsigned int __user*)argp); - } + } else if (cmd == TUNSETQUEUE) + return tun_set_queue(file, &ifr); ret = 0; rtnl_lock(); diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index 8ef3a87b58a0..958497ad5bb5 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -54,6 +54,7 @@ #define TUNDETACHFILTER _IOW('T', 214, struct sock_fprog) #define TUNGETVNETHDRSZ _IOR('T', 215, int) #define TUNSETVNETHDRSZ _IOW('T', 216, int) +#define TUNSETQUEUE _IOW('T', 217, int) /* TUNSETIFF ifr flags */ #define IFF_TUN 0x0001 @@ -63,6 +64,8 @@ #define IFF_VNET_HDR 0x4000 #define IFF_TUN_EXCL 0x8000 #define IFF_MULTI_QUEUE 0x0100 +#define IFF_ATTACH_QUEUE 0x0200 +#define IFF_DETACH_QUEUE 0x0400 /* Features for GSO (TUNSETOFFLOAD). */ #define TUN_F_CSUM 0x01 /* You can hand me unchecksummed packets. */ -- cgit v1.2.3 From 215b13dd288c2e1e4461c1530a801f5f83e8cd90 Mon Sep 17 00:00:00 2001 From: Richard Cochran Date: Wed, 31 Oct 2012 06:19:07 +0000 Subject: ptp: add an ioctl to compare PHC time with system time This patch adds an ioctl for PTP Hardware Clock (PHC) devices that allows user space to measure the time offset between the PHC and the system clock. Rather than hard coding any kind of estimation algorithm into the kernel, this patch takes the more flexible approach of just delivering an array of raw clock readings. In that way, the user space clock servo may be adapted to new and different hardware clocks. Signed-off-by: Richard Cochran Acked-by: Jacob Keller Signed-off-by: David S. Miller --- drivers/ptp/ptp_chardev.c | 32 ++++++++++++++++++++++++++++++++ include/uapi/linux/ptp_clock.h | 14 ++++++++++++++ 2 files changed, 46 insertions(+) (limited to 'include/uapi/linux') diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c index e7f301da2902..4f8ae8057a7e 100644 --- a/drivers/ptp/ptp_chardev.c +++ b/drivers/ptp/ptp_chardev.c @@ -33,9 +33,13 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) { struct ptp_clock_caps caps; struct ptp_clock_request req; + struct ptp_sys_offset sysoff; struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); struct ptp_clock_info *ops = ptp->info; + struct ptp_clock_time *pct; + struct timespec ts; int enable, err = 0; + unsigned int i; switch (cmd) { @@ -88,6 +92,34 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg) err = ops->enable(ops, &req, enable); break; + case PTP_SYS_OFFSET: + if (copy_from_user(&sysoff, (void __user *)arg, + sizeof(sysoff))) { + err = -EFAULT; + break; + } + if (sysoff.n_samples > PTP_MAX_SAMPLES) { + err = -EINVAL; + break; + } + pct = &sysoff.ts[0]; + for (i = 0; i < sysoff.n_samples; i++) { + getnstimeofday(&ts); + pct->sec = ts.tv_sec; + pct->nsec = ts.tv_nsec; + pct++; + ptp->info->gettime(ptp->info, &ts); + pct->sec = ts.tv_sec; + pct->nsec = ts.tv_nsec; + pct++; + } + getnstimeofday(&ts); + pct->sec = ts.tv_sec; + pct->nsec = ts.tv_nsec; + if (copy_to_user((void __user *)arg, &sysoff, sizeof(sysoff))) + err = -EFAULT; + break; + default: err = -ENOTTY; break; diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h index 94e981f810a2..b65c834f83e9 100644 --- a/include/uapi/linux/ptp_clock.h +++ b/include/uapi/linux/ptp_clock.h @@ -67,12 +67,26 @@ struct ptp_perout_request { unsigned int rsv[4]; /* Reserved for future use. */ }; +#define PTP_MAX_SAMPLES 25 /* Maximum allowed offset measurement samples. */ + +struct ptp_sys_offset { + unsigned int n_samples; /* Desired number of measurements. */ + unsigned int rsv[3]; /* Reserved for future use. */ + /* + * Array of interleaved system/phc time stamps. The kernel + * will provide 2*n_samples + 1 time stamps, with the last + * one as a system time stamp. + */ + struct ptp_clock_time ts[2 * PTP_MAX_SAMPLES + 1]; +}; + #define PTP_CLK_MAGIC '=' #define PTP_CLOCK_GETCAPS _IOR(PTP_CLK_MAGIC, 1, struct ptp_clock_caps) #define PTP_EXTTS_REQUEST _IOW(PTP_CLK_MAGIC, 2, struct ptp_extts_request) #define PTP_PEROUT_REQUEST _IOW(PTP_CLK_MAGIC, 3, struct ptp_perout_request) #define PTP_ENABLE_PPS _IOW(PTP_CLK_MAGIC, 4, int) +#define PTP_SYS_OFFSET _IOW(PTP_CLK_MAGIC, 5, struct ptp_sys_offset) struct ptp_extts_event { struct ptp_clock_time t; /* Time event occured. */ -- cgit v1.2.3 From cc535dfb6a85b42218307c43f60668d7bd6f4318 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 29 Oct 2012 04:53:27 +0000 Subject: rtnl/ipv4: use netconf msg to advertise rp_filter status Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/netconf.h | 1 + net/ipv4/devinet.c | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netconf.h b/include/uapi/linux/netconf.h index d0513726711f..75dcbc587fb5 100644 --- a/include/uapi/linux/netconf.h +++ b/include/uapi/linux/netconf.h @@ -12,6 +12,7 @@ enum { NETCONFA_UNSPEC, NETCONFA_IFINDEX, NETCONFA_FORWARDING, + NETCONFA_RP_FILTER, __NETCONFA_MAX }; #define NETCONFA_MAX (__NETCONFA_MAX - 1) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index f8b1e0494d75..f6db227c1fd9 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1451,6 +1451,8 @@ static int inet_netconf_msgsize_devconf(int type) /* type -1 is used for ALL */ if (type == -1 || type == NETCONFA_FORWARDING) size += nla_total_size(4); + if (type == -1 || type == NETCONFA_RP_FILTER) + size += nla_total_size(4); return size; } @@ -1479,6 +1481,10 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, nla_put_s32(skb, NETCONFA_FORWARDING, IPV4_DEVCONF(*devconf, FORWARDING)) < 0) goto nla_put_failure; + if ((type == -1 || type == NETCONFA_RP_FILTER) && + nla_put_s32(skb, NETCONFA_RP_FILTER, + IPV4_DEVCONF(*devconf, RP_FILTER)) < 0) + goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -1515,6 +1521,7 @@ errout: static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = { [NETCONFA_IFINDEX] = { .len = sizeof(int) }, [NETCONFA_FORWARDING] = { .len = sizeof(int) }, + [NETCONFA_RP_FILTER] = { .len = sizeof(int) }, }; static int inet_netconf_get_devconf(struct sk_buff *in_skb, @@ -1647,6 +1654,23 @@ static int devinet_conf_proc(ctl_table *ctl, int write, i == IPV4_DEVCONF_ROUTE_LOCALNET - 1) if ((new_value == 0) && (old_value != 0)) rt_cache_flush(net); + if (i == IPV4_DEVCONF_RP_FILTER - 1 && + new_value != old_value) { + int ifindex; + + if (cnf == net->ipv4.devconf_dflt) + ifindex = NETCONFA_IFINDEX_DEFAULT; + else if (cnf == net->ipv4.devconf_all) + ifindex = NETCONFA_IFINDEX_ALL; + else { + struct in_device *idev = + container_of(cnf, struct in_device, + cnf); + ifindex = idev->dev->ifindex; + } + inet_netconf_notify_devconf(net, NETCONFA_RP_FILTER, + ifindex, cnf); + } } return ret; -- cgit v1.2.3 From 121d1e0941e05c64ee4223064dd83eb24e871739 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 30 Oct 2012 01:08:49 +0000 Subject: netfilter: ipv6: add getsockopt to retrieve origdst userspace can query the original ipv4 destination address of a REDIRECTed connection via getsockopt(m_sock, SOL_IP, SO_ORIGINAL_DST, &m_server_addr, &addrsize) but for ipv6 no such option existed. This adds getsockopt(..., IPPROTO_IPV6, IP6T_SO_ORIGINAL_DST, ...). Without this, userspace needs to parse /proc or use ctnetlink, which appears to be overkill. This uses option number 80 for IP6T_SO_ORIGINAL_DST, which is spare, to use the same number we use in the IPv4 socket option SO_ORIGINAL_DST. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/in6.h | 1 + include/uapi/linux/netfilter_ipv6/ip6_tables.h | 3 ++ net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 61 ++++++++++++++++++++++++++ 3 files changed, 65 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/in6.h b/include/uapi/linux/in6.h index 1e3159989958..f79c3721da6e 100644 --- a/include/uapi/linux/in6.h +++ b/include/uapi/linux/in6.h @@ -240,6 +240,7 @@ struct in6_flowlabel_req { * * IP6T_SO_GET_REVISION_MATCH 68 * IP6T_SO_GET_REVISION_TARGET 69 + * IP6T_SO_ORIGINAL_DST 80 */ /* RFC5014: Source address selection */ diff --git a/include/uapi/linux/netfilter_ipv6/ip6_tables.h b/include/uapi/linux/netfilter_ipv6/ip6_tables.h index bf1ef65cc582..649c68062dca 100644 --- a/include/uapi/linux/netfilter_ipv6/ip6_tables.h +++ b/include/uapi/linux/netfilter_ipv6/ip6_tables.h @@ -178,6 +178,9 @@ struct ip6t_error { #define IP6T_SO_GET_REVISION_TARGET (IP6T_BASE_CTL + 5) #define IP6T_SO_GET_MAX IP6T_SO_GET_REVISION_TARGET +/* obtain original address if REDIRECT'd connection */ +#define IP6T_SO_ORIGINAL_DST 80 + /* ICMP matching stuff */ struct ip6t_icmp { __u8 type; /* type to match */ diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 8860d23e61cf..02dcafdc7a95 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -295,6 +296,50 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { }, }; +static int +ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len) +{ + const struct inet_sock *inet = inet_sk(sk); + const struct ipv6_pinfo *inet6 = inet6_sk(sk); + const struct nf_conntrack_tuple_hash *h; + struct sockaddr_in6 sin6; + struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 }; + struct nf_conn *ct; + + tuple.src.u3.in6 = inet6->rcv_saddr; + tuple.src.u.tcp.port = inet->inet_sport; + tuple.dst.u3.in6 = inet6->daddr; + tuple.dst.u.tcp.port = inet->inet_dport; + tuple.dst.protonum = sk->sk_protocol; + + if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP) + return -ENOPROTOOPT; + + if (*len < 0 || (unsigned int) *len < sizeof(sin6)) + return -EINVAL; + + h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple); + if (!h) { + pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n", + &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port), + &tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port)); + return -ENOENT; + } + + ct = nf_ct_tuplehash_to_ctrack(h); + + sin6.sin6_family = AF_INET6; + sin6.sin6_port = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u.tcp.port; + sin6.sin6_flowinfo = inet6->flow_label & IPV6_FLOWINFO_MASK; + memcpy(&sin6.sin6_addr, + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.in6, + sizeof(sin6.sin6_addr)); + sin6.sin6_scope_id = sk->sk_bound_dev_if; + + nf_ct_put(ct); + return copy_to_user(user, &sin6, sizeof(sin6)) ? -EFAULT : 0; +} + #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) #include @@ -359,6 +404,14 @@ MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET6)); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI "); +static struct nf_sockopt_ops so_getorigdst6 = { + .pf = NFPROTO_IPV6, + .get_optmin = IP6T_SO_ORIGINAL_DST, + .get_optmax = IP6T_SO_ORIGINAL_DST + 1, + .get = ipv6_getorigdst, + .owner = THIS_MODULE, +}; + static int ipv6_net_init(struct net *net) { int ret = 0; @@ -425,6 +478,12 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) need_conntrack(); nf_defrag_ipv6_enable(); + ret = nf_register_sockopt(&so_getorigdst6); + if (ret < 0) { + pr_err("Unable to register netfilter socket option\n"); + return ret; + } + ret = register_pernet_subsys(&ipv6_net_ops); if (ret < 0) goto cleanup_pernet; @@ -440,6 +499,7 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) cleanup_ipv6: unregister_pernet_subsys(&ipv6_net_ops); cleanup_pernet: + nf_unregister_sockopt(&so_getorigdst6); return ret; } @@ -448,6 +508,7 @@ static void __exit nf_conntrack_l3proto_ipv6_fini(void) synchronize_net(); nf_unregister_hooks(ipv6_conntrack_ops, ARRAY_SIZE(ipv6_conntrack_ops)); unregister_pernet_subsys(&ipv6_net_ops); + nf_unregister_sockopt(&so_getorigdst6); } module_init(nf_conntrack_l3proto_ipv6_init); -- cgit v1.2.3 From 4f99ad51292078cc47343c17d3870764588cff73 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Tue, 30 Oct 2012 04:08:41 +0000 Subject: if_ether.h: add B.A.T.M.A.N.-Advanced Ethertype Add Ethertype 0x4305 (not an officially registered id). This Ethertype is used by every frame generated by B.A.T.M.A.N.-Advanced. Its definition is currently batman-adv local only and since it is not officially registered it is better to make its definition kernel-wide so that we avoid collisions given by future unofficial uses of the same Ethertype. Signed-off-by: Antonio Quartulli Signed-off-by: David S. Miller --- include/uapi/linux/if_ether.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index 0343e1f0582c..67fb87ca1094 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -48,6 +48,7 @@ #define ETH_P_BPQ 0x08FF /* G8BPQ AX.25 Ethernet Packet [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_IEEEPUP 0x0a00 /* Xerox IEEE802.3 PUP packet */ #define ETH_P_IEEEPUPAT 0x0a01 /* Xerox IEEE802.3 PUP Addr Trans packet */ +#define ETH_P_BATMAN 0x4305 /* B.A.T.M.A.N.-Advanced packet [ NOT AN OFFICIALLY REGISTERED ID ] */ #define ETH_P_DEC 0x6000 /* DEC Assigned proto */ #define ETH_P_DNA_DL 0x6001 /* DEC DNA Dump/Load */ #define ETH_P_DNA_RC 0x6002 /* DEC DNA Remote Console */ -- cgit v1.2.3 From f4e583c8935c6f52f9385ee7cfbea8f65c66a737 Mon Sep 17 00:00:00 2001 From: Antonio Quartulli Date: Fri, 2 Nov 2012 13:27:48 +0100 Subject: nl/cfg80211: add the NL80211_CMD_SET_MCAST_RATE command This command triggers a new callback: set_mcast_rate(). It enables the user to change the rate used to send multicast frames for vif configured as IBSS or MESH_POINT Signed-off-by: Antonio Quartulli Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 6 ++++++ include/uapi/linux/nl80211.h | 5 +++++ net/wireless/nl80211.c | 39 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 8034a4268fcb..cee791fd4cff 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1545,6 +1545,9 @@ struct cfg80211_gtk_rekey_data { * to a merge. * @leave_ibss: Leave the IBSS. * + * @set_mcast_rate: Set the specified multicast rate (only if vif is in ADHOC or + * MESH mode) + * * @set_wiphy_params: Notify that wiphy parameters have changed; * @changed bitfield (see &enum wiphy_params_flags) describes which values * have changed. The actual parameter values are available in @@ -1749,6 +1752,9 @@ struct cfg80211_ops { struct cfg80211_ibss_params *params); int (*leave_ibss)(struct wiphy *wiphy, struct net_device *dev); + int (*set_mcast_rate)(struct wiphy *wiphy, struct net_device *dev, + int rate[IEEE80211_NUM_BANDS]); + int (*set_wiphy_params)(struct wiphy *wiphy, u32 changed); int (*set_tx_power)(struct wiphy *wiphy, struct wireless_dev *wdev, diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 4c5f6748ed7d..cbd2d6bb907a 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -578,6 +578,9 @@ * station, due to particular reason. %NL80211_ATTR_CONN_FAILED_REASON * is used for this. * + * @NL80211_CMD_SET_MCAST_RATE: Change the rate used to send multicast frames + * for IBSS or MESH vif. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -726,6 +729,8 @@ enum nl80211_commands { NL80211_CMD_CONN_FAILED, + NL80211_CMD_SET_MCAST_RATE, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 87d4670ee53a..9b0a3b8fd20a 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1110,6 +1110,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flag goto nla_put_failure; } CMD(start_p2p_device, START_P2P_DEVICE); + CMD(set_mcast_rate, SET_MCAST_RATE); #ifdef CONFIG_NL80211_TESTMODE CMD(testmode_cmd, TESTMODE); @@ -5448,6 +5449,36 @@ static int nl80211_leave_ibss(struct sk_buff *skb, struct genl_info *info) return cfg80211_leave_ibss(rdev, dev, false); } +static int nl80211_set_mcast_rate(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct net_device *dev = info->user_ptr[1]; + int mcast_rate[IEEE80211_NUM_BANDS]; + u32 nla_rate; + int err; + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_ADHOC && + dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT) + return -EOPNOTSUPP; + + if (!rdev->ops->set_mcast_rate) + return -EOPNOTSUPP; + + memset(mcast_rate, 0, sizeof(mcast_rate)); + + if (!info->attrs[NL80211_ATTR_MCAST_RATE]) + return -EINVAL; + + nla_rate = nla_get_u32(info->attrs[NL80211_ATTR_MCAST_RATE]); + if (!nl80211_parse_mcast_rate(rdev, mcast_rate, nla_rate)) + return -EINVAL; + + err = rdev->ops->set_mcast_rate(&rdev->wiphy, dev, mcast_rate); + + return err; +} + + #ifdef CONFIG_NL80211_TESTMODE static struct genl_multicast_group nl80211_testmode_mcgrp = { .name = "testmode", @@ -7629,6 +7660,14 @@ static struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_SET_MCAST_RATE, + .doit = nl80211_set_mcast_rate, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_NETDEV | + NL80211_FLAG_NEED_RTNL, + }, }; static struct genl_multicast_group nl80211_mlme_mcgrp = { -- cgit v1.2.3 From 5920cd3a41f1aefc30e9ce86384fc2fe9f5fe0c0 Mon Sep 17 00:00:00 2001 From: Paul Chavent Date: Tue, 6 Nov 2012 23:10:47 +0000 Subject: packet: tx_ring: allow the user to choose tx data offset The tx data offset of packet mmap tx ring used to be : (TPACKET2_HDRLEN - sizeof(struct sockaddr_ll)) The problem is that, with SOCK_RAW socket, the payload (14 bytes after the beginning of the user data) is misaligned. This patch allows to let the user gives an offset for it's tx data if he desires. Set sock option PACKET_TX_HAS_OFF to 1, then specify in each frame of your tx ring tp_net for SOCK_DGRAM, or tp_mac for SOCK_RAW. Signed-off-by: Paul Chavent Signed-off-by: David S. Miller --- Documentation/networking/packet_mmap.txt | 13 +++++++++ include/uapi/linux/if_packet.h | 1 + net/packet/af_packet.c | 46 +++++++++++++++++++++++++++++++- net/packet/internal.h | 1 + 4 files changed, 60 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt index 1c08a4b0981f..7cd879eba5dc 100644 --- a/Documentation/networking/packet_mmap.txt +++ b/Documentation/networking/packet_mmap.txt @@ -163,6 +163,19 @@ As capture, each frame contains two parts: A complete tutorial is available at: http://wiki.gnu-log.net/ +By default, the user should put data at : + frame base + TPACKET_HDRLEN - sizeof(struct sockaddr_ll) + +So, whatever you choose for the socket mode (SOCK_DGRAM or SOCK_RAW), +the beginning of the user data will be at : + frame base + TPACKET_ALIGN(sizeof(struct tpacket_hdr)) + +If you wish to put user data at a custom offset from the beginning of +the frame (for payload alignment with SOCK_RAW mode for instance) you +can set tp_net (with SOCK_DGRAM) or tp_mac (with SOCK_RAW). In order +to make this work it must be enabled previously with setsockopt() +and the PACKET_TX_HAS_OFF option. + -------------------------------------------------------------------------------- + PACKET_MMAP settings -------------------------------------------------------------------------------- diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index f3799295d231..f9a60375f0d0 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -50,6 +50,7 @@ struct sockaddr_ll { #define PACKET_TX_TIMESTAMP 16 #define PACKET_TIMESTAMP 17 #define PACKET_FANOUT 18 +#define PACKET_TX_HAS_OFF 19 #define PACKET_FANOUT_HASH 0 #define PACKET_FANOUT_LB 1 diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 9034f52659b5..f262dbfc7f06 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1881,7 +1881,35 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, skb_reserve(skb, hlen); skb_reset_network_header(skb); - data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll); + if (po->tp_tx_has_off) { + int off_min, off_max, off; + off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll); + off_max = po->tx_ring.frame_size - tp_len; + if (sock->type == SOCK_DGRAM) { + switch (po->tp_version) { + case TPACKET_V2: + off = ph.h2->tp_net; + break; + default: + off = ph.h1->tp_net; + break; + } + } else { + switch (po->tp_version) { + case TPACKET_V2: + off = ph.h2->tp_mac; + break; + default: + off = ph.h1->tp_mac; + break; + } + } + if (unlikely((off < off_min) || (off_max < off))) + return -EINVAL; + data = ph.raw + off; + } else { + data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll); + } to_write = tp_len; if (sock->type == SOCK_DGRAM) { @@ -3109,6 +3137,19 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv return fanout_add(sk, val & 0xffff, val >> 16); } + case PACKET_TX_HAS_OFF: + { + unsigned int val; + + if (optlen != sizeof(val)) + return -EINVAL; + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) + return -EBUSY; + if (copy_from_user(&val, optval, sizeof(val))) + return -EFAULT; + po->tp_tx_has_off = !!val; + return 0; + } default: return -ENOPROTOOPT; } @@ -3200,6 +3241,9 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, ((u32)po->fanout->type << 16)) : 0); break; + case PACKET_TX_HAS_OFF: + val = po->tp_tx_has_off; + break; default: return -ENOPROTOOPT; } diff --git a/net/packet/internal.h b/net/packet/internal.h index 44945f6b7252..e84cab8cb7a9 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -109,6 +109,7 @@ struct packet_sock { unsigned int tp_hdrlen; unsigned int tp_reserve; unsigned int tp_loss:1; + unsigned int tp_tx_has_off:1; unsigned int tp_tstamp; struct packet_type prot_hook ____cacheline_aligned_in_smp; }; -- cgit v1.2.3 From a80a6b85b428e6ce12a8363bb1f08d44c50f3252 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 8 Nov 2012 15:53:35 -0800 Subject: revert "epoll: support for disabling items, and a self-test app" Revert commit 03a7beb55b9f ("epoll: support for disabling items, and a self-test app") pending resolution of the issues identified by Michael Kerrisk, copied below. We'll revisit this for 3.8. : I've taken a look at this patch as it currently stands in 3.7-rc1, and : done a bit of testing. (By the way, the test program : tools/testing/selftests/epoll/test_epoll.c does not compile...) : : There are one or two places where the behavior seems a little strange, : so I have a question or two at the end of this mail. But other than : that, I want to check my understanding so that the interface can be : correctly documented. : : Just to go though my understanding, the problem is the following : scenario in a multithreaded application: : : 1. Multiple threads are performing epoll_wait() operations, : and maintaining a user-space cache that contains information : corresponding to each file descriptor being monitored by : epoll_wait(). : : 2. At some point, a thread wants to delete (EPOLL_CTL_DEL) : a file descriptor from the epoll interest list, and : delete the corresponding record from the user-space cache. : : 3. The problem with (2) is that some other thread may have : previously done an epoll_wait() that retrieved information : about the fd in question, and may be in the middle of using : information in the cache that relates to that fd. Thus, : there is a potential race. : : 4. The race can't solved purely in user space, because doing : so would require applying a mutex across the epoll_wait() : call, which would of course blow thread concurrency. : : Right? : : Your solution is the EPOLL_CTL_DISABLE operation. I want to : confirm my understanding about how to use this flag, since : the description that has accompanied the patches so far : has been a bit sparse : : 0. In the scenario you're concerned about, deleting a file : descriptor means (safely) doing the following: : (a) Deleting the file descriptor from the epoll interest list : using EPOLL_CTL_DEL : (b) Deleting the corresponding record in the user-space cache : : 1. It's only meaningful to use this EPOLL_CTL_DISABLE in : conjunction with EPOLLONESHOT. : : 2. Using EPOLL_CTL_DISABLE without using EPOLLONESHOT in : conjunction is a logical error. : : 3. The correct way to code multithreaded applications using : EPOLL_CTL_DISABLE and EPOLLONESHOT is as follows: : : a. All EPOLL_CTL_ADD and EPOLL_CTL_MOD operations should : should EPOLLONESHOT. : : b. When a thread wants to delete a file descriptor, it : should do the following: : : [1] Call epoll_ctl(EPOLL_CTL_DISABLE) : [2] If the return status from epoll_ctl(EPOLL_CTL_DISABLE) : was zero, then the file descriptor can be safely : deleted by the thread that made this call. : [3] If the epoll_ctl(EPOLL_CTL_DISABLE) fails with EBUSY, : then the descriptor is in use. In this case, the calling : thread should set a flag in the user-space cache to : indicate that the thread that is using the descriptor : should perform the deletion operation. : : Is all of the above correct? : : The implementation depends on checking on whether : (events & ~EP_PRIVATE_BITS) == 0 : This replies on the fact that EPOLL_CTL_AD and EPOLL_CTL_MOD always : set EPOLLHUP and EPOLLERR in the 'events' mask, and EPOLLONESHOT : causes those flags (as well as all others in ~EP_PRIVATE_BITS) to be : cleared. : : A corollary to the previous paragraph is that using EPOLL_CTL_DISABLE : is only useful in conjunction with EPOLLONESHOT. However, as things : stand, one can use EPOLL_CTL_DISABLE on a file descriptor that does : not have EPOLLONESHOT set in 'events' This results in the following : (slightly surprising) behavior: : : (a) The first call to epoll_ctl(EPOLL_CTL_DISABLE) returns 0 : (the indicator that the file descriptor can be safely deleted). : (b) The next call to epoll_ctl(EPOLL_CTL_DISABLE) fails with EBUSY. : : This doesn't seem particularly useful, and in fact is probably an : indication that the user made a logic error: they should only be using : epoll_ctl(EPOLL_CTL_DISABLE) on a file descriptor for which : EPOLLONESHOT was set in 'events'. If that is correct, then would it : not make sense to return an error to user space for this case? Cc: Michael Kerrisk Cc: "Paton J. Lewis" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/eventpoll.c | 38 +--- include/uapi/linux/eventpoll.h | 1 - tools/testing/selftests/Makefile | 2 +- tools/testing/selftests/epoll/Makefile | 11 - tools/testing/selftests/epoll/test_epoll.c | 344 ----------------------------- 5 files changed, 4 insertions(+), 392 deletions(-) delete mode 100644 tools/testing/selftests/epoll/Makefile delete mode 100644 tools/testing/selftests/epoll/test_epoll.c (limited to 'include/uapi/linux') diff --git a/fs/eventpoll.c b/fs/eventpoll.c index da72250ddc1c..cd96649bfe62 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -346,7 +346,7 @@ static inline struct epitem *ep_item_from_epqueue(poll_table *p) /* Tells if the epoll_ctl(2) operation needs an event copy from userspace */ static inline int ep_op_has_event(int op) { - return op == EPOLL_CTL_ADD || op == EPOLL_CTL_MOD; + return op != EPOLL_CTL_DEL; } /* Initialize the poll safe wake up structure */ @@ -676,34 +676,6 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi) return 0; } -/* - * Disables a "struct epitem" in the eventpoll set. Returns -EBUSY if the item - * had no event flags set, indicating that another thread may be currently - * handling that item's events (in the case that EPOLLONESHOT was being - * used). Otherwise a zero result indicates that the item has been disabled - * from receiving events. A disabled item may be re-enabled via - * EPOLL_CTL_MOD. Must be called with "mtx" held. - */ -static int ep_disable(struct eventpoll *ep, struct epitem *epi) -{ - int result = 0; - unsigned long flags; - - spin_lock_irqsave(&ep->lock, flags); - if (epi->event.events & ~EP_PRIVATE_BITS) { - if (ep_is_linked(&epi->rdllink)) - list_del_init(&epi->rdllink); - /* Ensure ep_poll_callback will not add epi back onto ready - list: */ - epi->event.events &= EP_PRIVATE_BITS; - } - else - result = -EBUSY; - spin_unlock_irqrestore(&ep->lock, flags); - - return result; -} - static void ep_free(struct eventpoll *ep) { struct rb_node *rbp; @@ -1048,6 +1020,8 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi) rb_insert_color(&epi->rbn, &ep->rbr); } + + #define PATH_ARR_SIZE 5 /* * These are the number paths of length 1 to 5, that we are allowing to emanate @@ -1813,12 +1787,6 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, } else error = -ENOENT; break; - case EPOLL_CTL_DISABLE: - if (epi) - error = ep_disable(ep, epi); - else - error = -ENOENT; - break; } mutex_unlock(&ep->mtx); diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h index 8c99ce7202c5..2c267bcbb85c 100644 --- a/include/uapi/linux/eventpoll.h +++ b/include/uapi/linux/eventpoll.h @@ -25,7 +25,6 @@ #define EPOLL_CTL_ADD 1 #define EPOLL_CTL_DEL 2 #define EPOLL_CTL_MOD 3 -#define EPOLL_CTL_DISABLE 4 /* * Request the handling of system wakeup events so as to prevent system suspends diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 43480149119e..85baf11e2acd 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -1,4 +1,4 @@ -TARGETS = breakpoints kcmp mqueue vm cpu-hotplug memory-hotplug epoll +TARGETS = breakpoints kcmp mqueue vm cpu-hotplug memory-hotplug all: for TARGET in $(TARGETS); do \ diff --git a/tools/testing/selftests/epoll/Makefile b/tools/testing/selftests/epoll/Makefile deleted file mode 100644 index 19806ed62f50..000000000000 --- a/tools/testing/selftests/epoll/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -# Makefile for epoll selftests - -all: test_epoll -%: %.c - gcc -pthread -g -o $@ $^ - -run_tests: all - ./test_epoll - -clean: - $(RM) test_epoll diff --git a/tools/testing/selftests/epoll/test_epoll.c b/tools/testing/selftests/epoll/test_epoll.c deleted file mode 100644 index f7525392ce84..000000000000 --- a/tools/testing/selftests/epoll/test_epoll.c +++ /dev/null @@ -1,344 +0,0 @@ -/* - * tools/testing/selftests/epoll/test_epoll.c - * - * Copyright 2012 Adobe Systems Incorporated - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * Paton J. Lewis - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -/* - * A pointer to an epoll_item_private structure will be stored in the epoll - * item's event structure so that we can get access to the epoll_item_private - * data after calling epoll_wait: - */ -struct epoll_item_private { - int index; /* Position of this struct within the epoll_items array. */ - int fd; - uint32_t events; - pthread_mutex_t mutex; /* Guards the following variables... */ - int stop; - int status; /* Stores any error encountered while handling item. */ - /* The following variable allows us to test whether we have encountered - a problem while attempting to cancel and delete the associated - event. When the test program exits, 'deleted' should be exactly - one. If it is greater than one, then the failed test reflects a real - world situation where we would have tried to access the epoll item's - private data after deleting it: */ - int deleted; -}; - -struct epoll_item_private *epoll_items; - -/* - * Delete the specified item from the epoll set. In a real-world secneario this - * is where we would free the associated data structure, but in this testing - * environment we retain the structure so that we can test for double-deletion: - */ -void delete_item(int index) -{ - __sync_fetch_and_add(&epoll_items[index].deleted, 1); -} - -/* - * A pointer to a read_thread_data structure will be passed as the argument to - * each read thread: - */ -struct read_thread_data { - int stop; - int status; /* Indicates any error encountered by the read thread. */ - int epoll_set; -}; - -/* - * The function executed by the read threads: - */ -void *read_thread_function(void *function_data) -{ - struct read_thread_data *thread_data = - (struct read_thread_data *)function_data; - struct epoll_event event_data; - struct epoll_item_private *item_data; - char socket_data; - - /* Handle events until we encounter an error or this thread's 'stop' - condition is set: */ - while (1) { - int result = epoll_wait(thread_data->epoll_set, - &event_data, - 1, /* Number of desired events */ - 1000); /* Timeout in ms */ - if (result < 0) { - /* Breakpoints signal all threads. Ignore that while - debugging: */ - if (errno == EINTR) - continue; - thread_data->status = errno; - return 0; - } else if (thread_data->stop) - return 0; - else if (result == 0) /* Timeout */ - continue; - - /* We need the mutex here because checking for the stop - condition and re-enabling the epoll item need to be done - together as one atomic operation when EPOLL_CTL_DISABLE is - available: */ - item_data = (struct epoll_item_private *)event_data.data.ptr; - pthread_mutex_lock(&item_data->mutex); - - /* Remove the item from the epoll set if we want to stop - handling that event: */ - if (item_data->stop) - delete_item(item_data->index); - else { - /* Clear the data that was written to the other end of - our non-blocking socket: */ - do { - if (read(item_data->fd, &socket_data, 1) < 1) { - if ((errno == EAGAIN) || - (errno == EWOULDBLOCK)) - break; - else - goto error_unlock; - } - } while (item_data->events & EPOLLET); - - /* The item was one-shot, so re-enable it: */ - event_data.events = item_data->events; - if (epoll_ctl(thread_data->epoll_set, - EPOLL_CTL_MOD, - item_data->fd, - &event_data) < 0) - goto error_unlock; - } - - pthread_mutex_unlock(&item_data->mutex); - } - -error_unlock: - thread_data->status = item_data->status = errno; - pthread_mutex_unlock(&item_data->mutex); - return 0; -} - -/* - * A pointer to a write_thread_data structure will be passed as the argument to - * the write thread: - */ -struct write_thread_data { - int stop; - int status; /* Indicates any error encountered by the write thread. */ - int n_fds; - int *fds; -}; - -/* - * The function executed by the write thread. It writes a single byte to each - * socket in turn until the stop condition for this thread is set. If writing to - * a socket would block (i.e. errno was EAGAIN), we leave that socket alone for - * the moment and just move on to the next socket in the list. We don't care - * about the order in which we deliver events to the epoll set. In fact we don't - * care about the data we're writing to the pipes at all; we just want to - * trigger epoll events: - */ -void *write_thread_function(void *function_data) -{ - const char data = 'X'; - int index; - struct write_thread_data *thread_data = - (struct write_thread_data *)function_data; - while (!thread_data->stop) - for (index = 0; - !thread_data->stop && (index < thread_data->n_fds); - ++index) - if ((write(thread_data->fds[index], &data, 1) < 1) && - (errno != EAGAIN) && - (errno != EWOULDBLOCK)) { - thread_data->status = errno; - return; - } -} - -/* - * Arguments are currently ignored: - */ -int main(int argc, char **argv) -{ - const int n_read_threads = 100; - const int n_epoll_items = 500; - int index; - int epoll_set = epoll_create1(0); - struct write_thread_data write_thread_data = { - 0, 0, n_epoll_items, malloc(n_epoll_items * sizeof(int)) - }; - struct read_thread_data *read_thread_data = - malloc(n_read_threads * sizeof(struct read_thread_data)); - pthread_t *read_threads = malloc(n_read_threads * sizeof(pthread_t)); - pthread_t write_thread; - - printf("-----------------\n"); - printf("Runing test_epoll\n"); - printf("-----------------\n"); - - epoll_items = malloc(n_epoll_items * sizeof(struct epoll_item_private)); - - if (epoll_set < 0 || epoll_items == 0 || write_thread_data.fds == 0 || - read_thread_data == 0 || read_threads == 0) - goto error; - - if (sysconf(_SC_NPROCESSORS_ONLN) < 2) { - printf("Error: please run this test on a multi-core system.\n"); - goto error; - } - - /* Create the socket pairs and epoll items: */ - for (index = 0; index < n_epoll_items; ++index) { - int socket_pair[2]; - struct epoll_event event_data; - if (socketpair(AF_UNIX, - SOCK_STREAM | SOCK_NONBLOCK, - 0, - socket_pair) < 0) - goto error; - write_thread_data.fds[index] = socket_pair[0]; - epoll_items[index].index = index; - epoll_items[index].fd = socket_pair[1]; - if (pthread_mutex_init(&epoll_items[index].mutex, NULL) != 0) - goto error; - /* We always use EPOLLONESHOT because this test is currently - structured to demonstrate the need for EPOLL_CTL_DISABLE, - which only produces useful information in the EPOLLONESHOT - case (without EPOLLONESHOT, calling epoll_ctl with - EPOLL_CTL_DISABLE will never return EBUSY). If support for - testing events without EPOLLONESHOT is desired, it should - probably be implemented in a separate unit test. */ - epoll_items[index].events = EPOLLIN | EPOLLONESHOT; - if (index < n_epoll_items / 2) - epoll_items[index].events |= EPOLLET; - epoll_items[index].stop = 0; - epoll_items[index].status = 0; - epoll_items[index].deleted = 0; - event_data.events = epoll_items[index].events; - event_data.data.ptr = &epoll_items[index]; - if (epoll_ctl(epoll_set, - EPOLL_CTL_ADD, - epoll_items[index].fd, - &event_data) < 0) - goto error; - } - - /* Create and start the read threads: */ - for (index = 0; index < n_read_threads; ++index) { - read_thread_data[index].stop = 0; - read_thread_data[index].status = 0; - read_thread_data[index].epoll_set = epoll_set; - if (pthread_create(&read_threads[index], - NULL, - read_thread_function, - &read_thread_data[index]) != 0) - goto error; - } - - if (pthread_create(&write_thread, - NULL, - write_thread_function, - &write_thread_data) != 0) - goto error; - - /* Cancel all event pollers: */ -#ifdef EPOLL_CTL_DISABLE - for (index = 0; index < n_epoll_items; ++index) { - pthread_mutex_lock(&epoll_items[index].mutex); - ++epoll_items[index].stop; - if (epoll_ctl(epoll_set, - EPOLL_CTL_DISABLE, - epoll_items[index].fd, - NULL) == 0) - delete_item(index); - else if (errno != EBUSY) { - pthread_mutex_unlock(&epoll_items[index].mutex); - goto error; - } - /* EBUSY means events were being handled; allow the other thread - to delete the item. */ - pthread_mutex_unlock(&epoll_items[index].mutex); - } -#else - for (index = 0; index < n_epoll_items; ++index) { - pthread_mutex_lock(&epoll_items[index].mutex); - ++epoll_items[index].stop; - pthread_mutex_unlock(&epoll_items[index].mutex); - /* Wait in case a thread running read_thread_function is - currently executing code between epoll_wait and - pthread_mutex_lock with this item. Note that a longer delay - would make double-deletion less likely (at the expense of - performance), but there is no guarantee that any delay would - ever be sufficient. Note also that we delete all event - pollers at once for testing purposes, but in a real-world - environment we are likely to want to be able to cancel event - pollers at arbitrary times. Therefore we can't improve this - situation by just splitting this loop into two loops - (i.e. signal 'stop' for all items, sleep, and then delete all - items). We also can't fix the problem via EPOLL_CTL_DEL - because that command can't prevent the case where some other - thread is executing read_thread_function within the region - mentioned above: */ - usleep(1); - pthread_mutex_lock(&epoll_items[index].mutex); - if (!epoll_items[index].deleted) - delete_item(index); - pthread_mutex_unlock(&epoll_items[index].mutex); - } -#endif - - /* Shut down the read threads: */ - for (index = 0; index < n_read_threads; ++index) - __sync_fetch_and_add(&read_thread_data[index].stop, 1); - for (index = 0; index < n_read_threads; ++index) { - if (pthread_join(read_threads[index], NULL) != 0) - goto error; - if (read_thread_data[index].status) - goto error; - } - - /* Shut down the write thread: */ - __sync_fetch_and_add(&write_thread_data.stop, 1); - if ((pthread_join(write_thread, NULL) != 0) || write_thread_data.status) - goto error; - - /* Check for final error conditions: */ - for (index = 0; index < n_epoll_items; ++index) { - if (epoll_items[index].status != 0) - goto error; - if (pthread_mutex_destroy(&epoll_items[index].mutex) < 0) - goto error; - } - for (index = 0; index < n_epoll_items; ++index) - if (epoll_items[index].deleted != 1) { - printf("Error: item data deleted %1d times.\n", - epoll_items[index].deleted); - goto error; - } - - printf("[PASS]\n"); - return 0; - - error: - printf("[FAIL]\n"); - return errno; -} -- cgit v1.2.3 From b891b4dc1eed33543c5818dae43ce8bb55f2080c Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Fri, 9 Nov 2012 15:56:03 +0900 Subject: PCI: Fix bit definitions of PCI_EXP_LNKCAP2 register According to the PCIe 3.0 spec, PCI_EXP_LNKCAP2_SLS_2_5GB is 1st bit of PCI_EXP_LNKCAP2 register, not 0th bit. So, the bit definition of supported link speed vector should be fixed. [bhelgaas: change "Current" to "Supported"] Signed-off-by: Jingoo Han Signed-off-by: Bjorn Helgaas --- include/uapi/linux/pci_regs.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 20ae747ddf34..259763d2df71 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -544,9 +544,9 @@ #define PCI_EXP_OBFF_WAKE_EN 0x6000 /* OBFF using WAKE# signaling */ #define PCI_CAP_EXP_ENDPOINT_SIZEOF_V2 44 /* v2 endpoints end here */ #define PCI_EXP_LNKCAP2 44 /* Link Capability 2 */ -#define PCI_EXP_LNKCAP2_SLS_2_5GB 0x01 /* Current Link Speed 2.5GT/s */ -#define PCI_EXP_LNKCAP2_SLS_5_0GB 0x02 /* Current Link Speed 5.0GT/s */ -#define PCI_EXP_LNKCAP2_SLS_8_0GB 0x04 /* Current Link Speed 8.0GT/s */ +#define PCI_EXP_LNKCAP2_SLS_2_5GB 0x02 /* Supported Link Speed 2.5GT/s */ +#define PCI_EXP_LNKCAP2_SLS_5_0GB 0x04 /* Supported Link Speed 5.0GT/s */ +#define PCI_EXP_LNKCAP2_SLS_8_0GB 0x08 /* Supported Link Speed 8.0GT/s */ #define PCI_EXP_LNKCAP2_CROSSLINK 0x100 /* Crosslink supported */ #define PCI_EXP_LNKCTL2 48 /* Link Control 2 */ #define PCI_EXP_LNKSTA2 50 /* Link Status 2 */ -- cgit v1.2.3 From d77807230e1ef30dbdee85aa24d27073a14dd168 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 7 Nov 2012 02:37:17 +0000 Subject: UAPI: (Scripted) Disintegrate include/linux/hdlc Signed-off-by: David Howells Acked-by: Arnd Bergmann Acked-by: Thomas Gleixner Acked-by: Michael Kerrisk Acked-by: Paul E. McKenney Acked-by: Dave Jones Acked-by: Krzysztof Halasa Signed-off-by: David S. Miller --- include/linux/hdlc/Kbuild | 1 - include/linux/hdlc/ioctl.h | 81 ----------------------------------------- include/uapi/linux/hdlc/Kbuild | 1 + include/uapi/linux/hdlc/ioctl.h | 81 +++++++++++++++++++++++++++++++++++++++++ 4 files changed, 82 insertions(+), 82 deletions(-) delete mode 100644 include/linux/hdlc/ioctl.h create mode 100644 include/uapi/linux/hdlc/ioctl.h (limited to 'include/uapi/linux') diff --git a/include/linux/hdlc/Kbuild b/include/linux/hdlc/Kbuild index 1fb26448faa9..e69de29bb2d1 100644 --- a/include/linux/hdlc/Kbuild +++ b/include/linux/hdlc/Kbuild @@ -1 +0,0 @@ -header-y += ioctl.h diff --git a/include/linux/hdlc/ioctl.h b/include/linux/hdlc/ioctl.h deleted file mode 100644 index 583972364357..000000000000 --- a/include/linux/hdlc/ioctl.h +++ /dev/null @@ -1,81 +0,0 @@ -#ifndef __HDLC_IOCTL_H__ -#define __HDLC_IOCTL_H__ - - -#define GENERIC_HDLC_VERSION 4 /* For synchronization with sethdlc utility */ - -#define CLOCK_DEFAULT 0 /* Default setting */ -#define CLOCK_EXT 1 /* External TX and RX clock - DTE */ -#define CLOCK_INT 2 /* Internal TX and RX clock - DCE */ -#define CLOCK_TXINT 3 /* Internal TX and external RX clock */ -#define CLOCK_TXFROMRX 4 /* TX clock derived from external RX clock */ - - -#define ENCODING_DEFAULT 0 /* Default setting */ -#define ENCODING_NRZ 1 -#define ENCODING_NRZI 2 -#define ENCODING_FM_MARK 3 -#define ENCODING_FM_SPACE 4 -#define ENCODING_MANCHESTER 5 - - -#define PARITY_DEFAULT 0 /* Default setting */ -#define PARITY_NONE 1 /* No parity */ -#define PARITY_CRC16_PR0 2 /* CRC16, initial value 0x0000 */ -#define PARITY_CRC16_PR1 3 /* CRC16, initial value 0xFFFF */ -#define PARITY_CRC16_PR0_CCITT 4 /* CRC16, initial 0x0000, ITU-T version */ -#define PARITY_CRC16_PR1_CCITT 5 /* CRC16, initial 0xFFFF, ITU-T version */ -#define PARITY_CRC32_PR0_CCITT 6 /* CRC32, initial value 0x00000000 */ -#define PARITY_CRC32_PR1_CCITT 7 /* CRC32, initial value 0xFFFFFFFF */ - -#define LMI_DEFAULT 0 /* Default setting */ -#define LMI_NONE 1 /* No LMI, all PVCs are static */ -#define LMI_ANSI 2 /* ANSI Annex D */ -#define LMI_CCITT 3 /* ITU-T Annex A */ -#define LMI_CISCO 4 /* The "original" LMI, aka Gang of Four */ - -typedef struct { - unsigned int clock_rate; /* bits per second */ - unsigned int clock_type; /* internal, external, TX-internal etc. */ - unsigned short loopback; -} sync_serial_settings; /* V.35, V.24, X.21 */ - -typedef struct { - unsigned int clock_rate; /* bits per second */ - unsigned int clock_type; /* internal, external, TX-internal etc. */ - unsigned short loopback; - unsigned int slot_map; -} te1_settings; /* T1, E1 */ - -typedef struct { - unsigned short encoding; - unsigned short parity; -} raw_hdlc_proto; - -typedef struct { - unsigned int t391; - unsigned int t392; - unsigned int n391; - unsigned int n392; - unsigned int n393; - unsigned short lmi; - unsigned short dce; /* 1 for DCE (network side) operation */ -} fr_proto; - -typedef struct { - unsigned int dlci; -} fr_proto_pvc; /* for creating/deleting FR PVCs */ - -typedef struct { - unsigned int dlci; - char master[IFNAMSIZ]; /* Name of master FRAD device */ -}fr_proto_pvc_info; /* for returning PVC information only */ - -typedef struct { - unsigned int interval; - unsigned int timeout; -} cisco_proto; - -/* PPP doesn't need any info now - supply length = 0 to ioctl */ - -#endif /* __HDLC_IOCTL_H__ */ diff --git a/include/uapi/linux/hdlc/Kbuild b/include/uapi/linux/hdlc/Kbuild index aafaa5aa54d4..8c1d2cb75e33 100644 --- a/include/uapi/linux/hdlc/Kbuild +++ b/include/uapi/linux/hdlc/Kbuild @@ -1 +1,2 @@ # UAPI Header export list +header-y += ioctl.h diff --git a/include/uapi/linux/hdlc/ioctl.h b/include/uapi/linux/hdlc/ioctl.h new file mode 100644 index 000000000000..46939b24d612 --- /dev/null +++ b/include/uapi/linux/hdlc/ioctl.h @@ -0,0 +1,81 @@ +#ifndef __HDLC_IOCTL_H__ +#define __HDLC_IOCTL_H__ + + +#define GENERIC_HDLC_VERSION 4 /* For synchronization with sethdlc utility */ + +#define CLOCK_DEFAULT 0 /* Default setting */ +#define CLOCK_EXT 1 /* External TX and RX clock - DTE */ +#define CLOCK_INT 2 /* Internal TX and RX clock - DCE */ +#define CLOCK_TXINT 3 /* Internal TX and external RX clock */ +#define CLOCK_TXFROMRX 4 /* TX clock derived from external RX clock */ + + +#define ENCODING_DEFAULT 0 /* Default setting */ +#define ENCODING_NRZ 1 +#define ENCODING_NRZI 2 +#define ENCODING_FM_MARK 3 +#define ENCODING_FM_SPACE 4 +#define ENCODING_MANCHESTER 5 + + +#define PARITY_DEFAULT 0 /* Default setting */ +#define PARITY_NONE 1 /* No parity */ +#define PARITY_CRC16_PR0 2 /* CRC16, initial value 0x0000 */ +#define PARITY_CRC16_PR1 3 /* CRC16, initial value 0xFFFF */ +#define PARITY_CRC16_PR0_CCITT 4 /* CRC16, initial 0x0000, ITU-T version */ +#define PARITY_CRC16_PR1_CCITT 5 /* CRC16, initial 0xFFFF, ITU-T version */ +#define PARITY_CRC32_PR0_CCITT 6 /* CRC32, initial value 0x00000000 */ +#define PARITY_CRC32_PR1_CCITT 7 /* CRC32, initial value 0xFFFFFFFF */ + +#define LMI_DEFAULT 0 /* Default setting */ +#define LMI_NONE 1 /* No LMI, all PVCs are static */ +#define LMI_ANSI 2 /* ANSI Annex D */ +#define LMI_CCITT 3 /* ITU-T Annex A */ +#define LMI_CISCO 4 /* The "original" LMI, aka Gang of Four */ + +typedef struct { + unsigned int clock_rate; /* bits per second */ + unsigned int clock_type; /* internal, external, TX-internal etc. */ + unsigned short loopback; +} sync_serial_settings; /* V.35, V.24, X.21 */ + +typedef struct { + unsigned int clock_rate; /* bits per second */ + unsigned int clock_type; /* internal, external, TX-internal etc. */ + unsigned short loopback; + unsigned int slot_map; +} te1_settings; /* T1, E1 */ + +typedef struct { + unsigned short encoding; + unsigned short parity; +} raw_hdlc_proto; + +typedef struct { + unsigned int t391; + unsigned int t392; + unsigned int n391; + unsigned int n392; + unsigned int n393; + unsigned short lmi; + unsigned short dce; /* 1 for DCE (network side) operation */ +} fr_proto; + +typedef struct { + unsigned int dlci; +} fr_proto_pvc; /* for creating/deleting FR PVCs */ + +typedef struct { + unsigned int dlci; + char master[IFNAMSIZ]; /* Name of master FRAD device */ +}fr_proto_pvc_info; /* for returning PVC information only */ + +typedef struct { + unsigned int interval; + unsigned int timeout; +} cisco_proto; + +/* PPP doesn't need any info now - supply length = 0 to ioctl */ + +#endif /* __HDLC_IOCTL_H__ */ -- cgit v1.2.3 From c48c8d51c29efba160a1b27555d97f6ee0d049a6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 7 Nov 2012 02:37:24 +0000 Subject: Fix the wanxl firmware to include missing constants Fix the wanxl firmware to include missing constants such as PARITY_NONE. It should be #including the linux/hdlc/ioctl.h header. To make this work, we also have to guard parts of ioctl.h with !__ASSEMBLY__. Signed-off-by: David Howells Signed-off-by: David S. Miller --- drivers/net/wan/Makefile | 2 +- drivers/net/wan/wanxlfw.S | 1 + include/uapi/linux/hdlc/ioctl.h | 3 +++ 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/wan/Makefile b/drivers/net/wan/Makefile index 4dac96b3db2a..b0a61636fc94 100644 --- a/drivers/net/wan/Makefile +++ b/drivers/net/wan/Makefile @@ -52,7 +52,7 @@ endif quiet_cmd_build_wanxlfw = BLD FW $@ cmd_build_wanxlfw = \ - $(CPP) -Wp,-MD,$(depfile) -I$(srctree)/include/uapi $< | $(AS68K) -m68360 -o $(obj)/wanxlfw.o; \ + $(CPP) -D__ASSEMBLY__ -Wp,-MD,$(depfile) -I$(srctree)/include/uapi $< | $(AS68K) -m68360 -o $(obj)/wanxlfw.o; \ $(LD68K) --oformat binary -Ttext 0x1000 $(obj)/wanxlfw.o -o $(obj)/wanxlfw.bin; \ hexdump -ve '"\n" 16/1 "0x%02X,"' $(obj)/wanxlfw.bin | sed 's/0x ,//g;1s/^/static u8 firmware[]={/;$$s/,$$/\n};\n/' >$(obj)/wanxlfw.inc; \ rm -f $(obj)/wanxlfw.bin $(obj)/wanxlfw.o diff --git a/drivers/net/wan/wanxlfw.S b/drivers/net/wan/wanxlfw.S index 73aae2bf2f1c..21565d59ec7b 100644 --- a/drivers/net/wan/wanxlfw.S +++ b/drivers/net/wan/wanxlfw.S @@ -35,6 +35,7 @@ */ #include +#include #include "wanxl.h" /* memory addresses and offsets */ diff --git a/include/uapi/linux/hdlc/ioctl.h b/include/uapi/linux/hdlc/ioctl.h index 46939b24d612..04bc0274a189 100644 --- a/include/uapi/linux/hdlc/ioctl.h +++ b/include/uapi/linux/hdlc/ioctl.h @@ -34,6 +34,8 @@ #define LMI_CCITT 3 /* ITU-T Annex A */ #define LMI_CISCO 4 /* The "original" LMI, aka Gang of Four */ +#ifndef __ASSEMBLY__ + typedef struct { unsigned int clock_rate; /* bits per second */ unsigned int clock_type; /* internal, external, TX-internal etc. */ @@ -78,4 +80,5 @@ typedef struct { /* PPP doesn't need any info now - supply length = 0 to ioctl */ +#endif /* __ASSEMBLY__ */ #endif /* __HDLC_IOCTL_H__ */ -- cgit v1.2.3 From 0974658da47cb399b76794057823bf3cd22acf37 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 9 Nov 2012 06:09:59 +0000 Subject: ipip: advertise tunnel param via rtnl It is usefull for daemons that monitor link event to have the full parameters of these interfaces when a rtnl message is sent. It allows also to dump them via rtnetlink. It is based on what is done for GRE tunnels. Signed-off-by: Nicolas Dichtel Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/uapi/linux/if_tunnel.h | 11 ++++++++ net/ipv4/ipip.c | 57 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 67 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 5db5942575fe..ccb21d585bf4 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -37,6 +37,17 @@ struct ip_tunnel_parm { struct iphdr iph; }; +enum { + IFLA_IPTUN_UNSPEC, + IFLA_IPTUN_LINK, + IFLA_IPTUN_LOCAL, + IFLA_IPTUN_REMOTE, + IFLA_IPTUN_TTL, + IFLA_IPTUN_TOS, + __IFLA_IPTUN_MAX, +}; +#define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1) + /* SIT-mode i_flags */ #define SIT_ISATAP 0x0001 diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index cc49cc1ff3b9..720855e41100 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -138,6 +138,7 @@ struct ipip_net { static int ipip_tunnel_init(struct net_device *dev); static void ipip_tunnel_setup(struct net_device *dev); static void ipip_dev_free(struct net_device *dev); +static struct rtnl_link_ops ipip_link_ops __read_mostly; /* * Locking : hash tables are protected by RCU and RTNL @@ -305,6 +306,7 @@ static struct ip_tunnel *ipip_tunnel_locate(struct net *net, goto failed_free; strcpy(nt->parms.name, dev->name); + dev->rtnl_link_ops = &ipip_link_ops; dev_hold(dev); ipip_tunnel_link(ipn, nt); @@ -841,6 +843,47 @@ static int __net_init ipip_fb_tunnel_init(struct net_device *dev) return 0; } +static size_t ipip_get_size(const struct net_device *dev) +{ + return + /* IFLA_IPTUN_LINK */ + nla_total_size(4) + + /* IFLA_IPTUN_LOCAL */ + nla_total_size(4) + + /* IFLA_IPTUN_REMOTE */ + nla_total_size(4) + + /* IFLA_IPTUN_TTL */ + nla_total_size(1) + + /* IFLA_IPTUN_TOS */ + nla_total_size(1) + + 0; +} + +static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + struct ip_tunnel_parm *parm = &tunnel->parms; + + if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || + nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) || + nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) || + nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) || + nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static struct rtnl_link_ops ipip_link_ops __read_mostly = { + .kind = "ipip", + .maxtype = IFLA_IPTUN_MAX, + .priv_size = sizeof(struct ip_tunnel), + .get_size = ipip_get_size, + .fill_info = ipip_fill_info, +}; + static struct xfrm_tunnel ipip_handler __read_mostly = { .handler = ipip_rcv, .err_handler = ipip_err, @@ -937,14 +980,26 @@ static int __init ipip_init(void) return err; err = xfrm4_tunnel_register(&ipip_handler, AF_INET); if (err < 0) { - unregister_pernet_device(&ipip_net_ops); pr_info("%s: can't register tunnel\n", __func__); + goto xfrm_tunnel_failed; } + err = rtnl_link_register(&ipip_link_ops); + if (err < 0) + goto rtnl_link_failed; + +out: return err; + +rtnl_link_failed: + xfrm4_tunnel_deregister(&ipip_handler, AF_INET); +xfrm_tunnel_failed: + unregister_pernet_device(&ipip_net_ops); + goto out; } static void __exit ipip_fini(void) { + rtnl_link_unregister(&ipip_link_ops); if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET)) pr_info("%s: can't deregister tunnel\n", __func__); -- cgit v1.2.3 From c075b13098b399dc565b4d53f42047a8d40ed3ba Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Fri, 9 Nov 2012 06:10:01 +0000 Subject: ip6tnl: advertise tunnel param via rtnl It is usefull for daemons that monitor link event to have the full parameters of these interfaces when a rtnl message is sent. It allows also to dump them via rtnetlink. It is based on what is done for GRE tunnels. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/if_tunnel.h | 3 +++ net/ipv6/ip6_tunnel.c | 57 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index ccb21d585bf4..c1bf0b5a8da1 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -44,6 +44,9 @@ enum { IFLA_IPTUN_REMOTE, IFLA_IPTUN_TTL, IFLA_IPTUN_TOS, + IFLA_IPTUN_ENCAP_LIMIT, + IFLA_IPTUN_FLOWINFO, + IFLA_IPTUN_FLAGS, __IFLA_IPTUN_MAX, }; #define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 09482f723064..424ed45ef122 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -83,6 +83,7 @@ static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2) static int ip6_tnl_dev_init(struct net_device *dev); static void ip6_tnl_dev_setup(struct net_device *dev); +static struct rtnl_link_ops ip6_link_ops __read_mostly; static int ip6_tnl_net_id __read_mostly; struct ip6_tnl_net { @@ -299,6 +300,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) goto failed_free; strcpy(t->parms.name, dev->name); + dev->rtnl_link_ops = &ip6_link_ops; dev_hold(dev); ip6_tnl_link(ip6n, t); @@ -1504,6 +1506,55 @@ static int __net_init ip6_fb_tnl_dev_init(struct net_device *dev) return 0; } +static size_t ip6_get_size(const struct net_device *dev) +{ + return + /* IFLA_IPTUN_LINK */ + nla_total_size(4) + + /* IFLA_IPTUN_LOCAL */ + nla_total_size(sizeof(struct in6_addr)) + + /* IFLA_IPTUN_REMOTE */ + nla_total_size(sizeof(struct in6_addr)) + + /* IFLA_IPTUN_TTL */ + nla_total_size(1) + + /* IFLA_IPTUN_ENCAP_LIMIT */ + nla_total_size(1) + + /* IFLA_IPTUN_FLOWINFO */ + nla_total_size(4) + + /* IFLA_IPTUN_FLAGS */ + nla_total_size(4) + + 0; +} + +static int ip6_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct ip6_tnl *tunnel = netdev_priv(dev); + struct __ip6_tnl_parm *parm = &tunnel->parms; + + if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || + nla_put(skb, IFLA_IPTUN_LOCAL, sizeof(struct in6_addr), + &parm->raddr) || + nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr), + &parm->laddr) || + nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) || + nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) || + nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) || + nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static struct rtnl_link_ops ip6_link_ops __read_mostly = { + .kind = "ip6tnl", + .maxtype = IFLA_IPTUN_MAX, + .priv_size = sizeof(struct ip6_tnl), + .get_size = ip6_get_size, + .fill_info = ip6_fill_info, +}; + static struct xfrm6_tunnel ip4ip6_handler __read_mostly = { .handler = ip4ip6_rcv, .err_handler = ip4ip6_err, @@ -1612,9 +1663,14 @@ static int __init ip6_tunnel_init(void) pr_err("%s: can't register ip6ip6\n", __func__); goto out_ip6ip6; } + err = rtnl_link_register(&ip6_link_ops); + if (err < 0) + goto rtnl_link_failed; return 0; +rtnl_link_failed: + xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6); out_ip6ip6: xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET); out_ip4ip6: @@ -1629,6 +1685,7 @@ out_pernet: static void __exit ip6_tunnel_cleanup(void) { + rtnl_link_unregister(&ip6_link_ops); if (xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET)) pr_info("%s: can't deregister ip4ip6\n", __func__); -- cgit v1.2.3 From 2a91c9f781de209d420d751e43eb43ffe6934803 Mon Sep 17 00:00:00 2001 From: Amitkumar Karwar Date: Fri, 9 Nov 2012 17:51:30 -0800 Subject: nl/cfg80211: advertise OBSS scan requirement wpa_supplicant will do OBSS scan for drivers that implement auth/assoc API. Drivers that implement nl80211 connect API (rather than auth/assoc) may need wpa_supplicant to do this as well. Add a new feature flag to inform it (wpa_s) that a driver needs wpa_supplicant to do OBSS scans. Signed-off-by: Amitkumar Karwar Signed-off-by: Bing Zhao Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index cbd2d6bb907a..06ddc89f026c 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -3057,6 +3057,9 @@ enum nl80211_ap_sme_features { * @NL80211_FEATURE_SCAN_FLUSH: Scan flush is supported * @NL80211_FEATURE_AP_SCAN: Support scanning using an AP vif * @NL80211_FEATURE_VIF_TXPOWER: The driver supports per-vif TX power setting + * @NL80211_FEATURE_NEED_OBSS_SCAN: The driver expects userspace to perform + * OBSS scans and generate 20/40 BSS coex reports. This flag is used only + * for drivers implementing the CONNECT API, for AUTH/ASSOC it is implied. */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, @@ -3069,6 +3072,7 @@ enum nl80211_feature_flags { NL80211_FEATURE_SCAN_FLUSH = 1 << 7, NL80211_FEATURE_AP_SCAN = 1 << 8, NL80211_FEATURE_VIF_TXPOWER = 1 << 9, + NL80211_FEATURE_NEED_OBSS_SCAN = 1 << 10, }; /** -- cgit v1.2.3 From 5cb04436eef62aa8f5c482f8ec8deba391dea465 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Tue, 6 Nov 2012 16:46:20 +0000 Subject: ipv6: add knob to send unsolicited ND on link-layer address change This patch introduces a new knob ndisc_notify. If enabled, the kernel will transmit an unsolicited neighbour advertisement on link-layer address change to update the neighbour tables of the corresponding hosts more quickly. This is the equivalent to arp_notify in ipv4 world. Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + include/uapi/linux/ipv6.h | 1 + net/ipv6/addrconf.c | 8 ++++++++ net/ipv6/ndisc.c | 7 +++++++ 4 files changed, 17 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index bcba48a97868..5e11905a4f01 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -47,6 +47,7 @@ struct ipv6_devconf { __s32 disable_ipv6; __s32 accept_dad; __s32 force_tllao; + __s32 ndisc_notify; void *sysctl; }; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index a6d7d1c536c3..5a2991cf0251 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -157,6 +157,7 @@ enum { DEVCONF_DISABLE_IPV6, DEVCONF_ACCEPT_DAD, DEVCONF_FORCE_TLLAO, + DEVCONF_NDISC_NOTIFY, DEVCONF_MAX }; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index fab23db8ee73..cb803b7bb0d8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -4037,6 +4037,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_DISABLE_IPV6] = cnf->disable_ipv6; array[DEVCONF_ACCEPT_DAD] = cnf->accept_dad; array[DEVCONF_FORCE_TLLAO] = cnf->force_tllao; + array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify; } static inline size_t inet6_ifla6_size(void) @@ -4704,6 +4705,13 @@ static struct addrconf_sysctl_table .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "ndisc_notify", + .data = &ipv6_devconf.ndisc_notify, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { /* sentinel */ } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 6ba4b54a550a..f41853bca428 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1572,11 +1572,18 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, { struct net_device *dev = ptr; struct net *net = dev_net(dev); + struct inet6_dev *idev; switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&nd_tbl, dev); fib6_run_gc(~0UL, net); + idev = in6_dev_get(dev); + if (!idev) + break; + if (idev->cnf.ndisc_notify) + ndisc_send_unsol_na(dev); + in6_dev_put(idev); break; case NETDEV_DOWN: neigh_ifdown(&nd_tbl, dev); -- cgit v1.2.3 From 25c71c75ac87508528db053b818944f3650dd7a6 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 13 Nov 2012 07:53:05 +0000 Subject: bridge: bridge port parameters over netlink Expose bridge port parameter over netlink. By switching to a nested message, this can be used for other bridge parameters. This changes IFLA_PROTINFO attribute from one byte to a full nested set of attributes. This is safe for application interface because the old message used IFLA_PROTINFO and new one uses IFLA_PROTINFO | NLA_F_NESTED. The code adapts to old format requests, and therefore stays compatible with user mode RSTP daemon. Since the type field for nested and unnested attributes are different, and the old code in libnetlink doesn't do the mask, it is also safe to use with old versions of bridge monitor command. Note: although mode is only a boolean, treating it as a full byte since in the future someone will probably want to add more values (like macvlan has). Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 15 ++++ net/bridge/br_netlink.c | 165 ++++++++++++++++++++++++++++++++++--------- 2 files changed, 147 insertions(+), 33 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 5c80cb11518b..96f7cf49367b 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -205,6 +205,21 @@ enum { #define IFLA_INET6_MAX (__IFLA_INET6_MAX - 1) +enum { + BRIDGE_MODE_UNSPEC, + BRIDGE_MODE_HAIRPIN, +}; + +enum { + IFLA_BRPORT_UNSPEC, + IFLA_BRPORT_STATE, /* Spanning tree state */ + IFLA_BRPORT_PRIORITY, /* " priority */ + IFLA_BRPORT_COST, /* " cost */ + IFLA_BRPORT_MODE, /* mode (hairpin) */ + __IFLA_BRPORT_MAX +}; +#define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) + struct ifla_cacheinfo { __u32 max_reasm_len; __u32 tstamp; /* ipv6InterfaceTable updated timestamp */ diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 14b065cbd214..0188a2f706c4 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -20,16 +20,39 @@ #include "br_private.h" #include "br_private_stp.h" +static inline size_t br_port_info_size(void) +{ + return nla_total_size(1) /* IFLA_BRPORT_STATE */ + + nla_total_size(2) /* IFLA_BRPORT_PRIORITY */ + + nla_total_size(4) /* IFLA_BRPORT_COST */ + + nla_total_size(1) /* IFLA_BRPORT_MODE */ + + 0; +} + static inline size_t br_nlmsg_size(void) { return NLMSG_ALIGN(sizeof(struct ifinfomsg)) - + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ - + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ - + nla_total_size(4) /* IFLA_MASTER */ - + nla_total_size(4) /* IFLA_MTU */ - + nla_total_size(4) /* IFLA_LINK */ - + nla_total_size(1) /* IFLA_OPERSTATE */ - + nla_total_size(1); /* IFLA_PROTINFO */ + + nla_total_size(IFNAMSIZ) /* IFLA_IFNAME */ + + nla_total_size(MAX_ADDR_LEN) /* IFLA_ADDRESS */ + + nla_total_size(4) /* IFLA_MASTER */ + + nla_total_size(4) /* IFLA_MTU */ + + nla_total_size(4) /* IFLA_LINK */ + + nla_total_size(1) /* IFLA_OPERSTATE */ + + nla_total_size(br_port_info_size()); /* IFLA_PROTINFO */ +} + +static int br_port_fill_attrs(struct sk_buff *skb, + const struct net_bridge_port *p) +{ + u8 mode = !!(p->flags & BR_HAIRPIN_MODE); + + if (nla_put_u8(skb, IFLA_BRPORT_STATE, p->state) || + nla_put_u16(skb, IFLA_BRPORT_PRIORITY, p->priority) || + nla_put_u32(skb, IFLA_BRPORT_COST, p->path_cost) || + nla_put_u8(skb, IFLA_BRPORT_MODE, mode)) + return -EMSGSIZE; + + return 0; } /* @@ -67,10 +90,18 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por (dev->addr_len && nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) || (dev->ifindex != dev->iflink && - nla_put_u32(skb, IFLA_LINK, dev->iflink)) || - (event == RTM_NEWLINK && - nla_put_u8(skb, IFLA_PROTINFO, port->state))) + nla_put_u32(skb, IFLA_LINK, dev->iflink))) goto nla_put_failure; + + if (event == RTM_NEWLINK) { + struct nlattr *nest + = nla_nest_start(skb, IFLA_PROTINFO | NLA_F_NESTED); + + if (nest == NULL || br_port_fill_attrs(skb, port) < 0) + goto nla_put_failure; + nla_nest_end(skb, nest); + } + return nlmsg_end(skb, nlh); nla_put_failure: @@ -126,47 +157,115 @@ out: return err; } -/* - * Change state of port (ie from forwarding to blocking etc) - * Used by spanning tree in user space. - */ +static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = { + [IFLA_BRPORT_STATE] = { .type = NLA_U8 }, + [IFLA_BRPORT_COST] = { .type = NLA_U32 }, + [IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 }, + [IFLA_BRPORT_MODE] = { .type = NLA_U8 }, +}; + +/* Change the state of the port and notify spanning tree */ +static int br_set_port_state(struct net_bridge_port *p, u8 state) +{ + if (state > BR_STATE_BLOCKING) + return -EINVAL; + + /* if kernel STP is running, don't allow changes */ + if (p->br->stp_enabled == BR_KERNEL_STP) + return -EBUSY; + + if (!netif_running(p->dev) || + (!netif_carrier_ok(p->dev) && state != BR_STATE_DISABLED)) + return -ENETDOWN; + + p->state = state; + br_log_state(p); + br_port_state_selection(p->br); + return 0; +} + +/* Set/clear or port flags based on attribute */ +static void br_set_port_flag(struct net_bridge_port *p, struct nlattr *tb[], + int attrtype, unsigned long mask) +{ + if (tb[attrtype]) { + u8 flag = nla_get_u8(tb[attrtype]); + if (flag) + p->flags |= mask; + else + p->flags &= ~mask; + } +} + +/* Process bridge protocol info on port */ +static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) +{ + int err; + + br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE); + + if (tb[IFLA_BRPORT_COST]) { + err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); + if (err) + return err; + } + + if (tb[IFLA_BRPORT_PRIORITY]) { + err = br_stp_set_port_priority(p, nla_get_u16(tb[IFLA_BRPORT_PRIORITY])); + if (err) + return err; + } + + if (tb[IFLA_BRPORT_STATE]) { + err = br_set_port_state(p, nla_get_u8(tb[IFLA_BRPORT_STATE])); + if (err) + return err; + } + return 0; +} + +/* Change state and parameters on port. */ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh) { struct ifinfomsg *ifm; struct nlattr *protinfo; struct net_bridge_port *p; - u8 new_state; + struct nlattr *tb[IFLA_BRPORT_MAX]; + int err; ifm = nlmsg_data(nlh); protinfo = nlmsg_find_attr(nlh, sizeof(*ifm), IFLA_PROTINFO); - if (!protinfo || nla_len(protinfo) < sizeof(u8)) - return -EINVAL; - - new_state = nla_get_u8(protinfo); - if (new_state > BR_STATE_BLOCKING) - return -EINVAL; + if (!protinfo) + return 0; p = br_port_get_rtnl(dev); if (!p) return -EINVAL; - /* if kernel STP is running, don't allow changes */ - if (p->br->stp_enabled == BR_KERNEL_STP) - return -EBUSY; + if (protinfo->nla_type & NLA_F_NESTED) { + err = nla_parse_nested(tb, IFLA_BRPORT_MAX, + protinfo, ifla_brport_policy); + if (err) + return err; - if (!netif_running(dev) || - (!netif_carrier_ok(dev) && new_state != BR_STATE_DISABLED)) - return -ENETDOWN; + spin_lock_bh(&p->br->lock); + err = br_setport(p, tb); + spin_unlock_bh(&p->br->lock); + } else { + /* Binary compatability with old RSTP */ + if (nla_len(protinfo) < sizeof(u8)) + return -EINVAL; - p->state = new_state; - br_log_state(p); + spin_lock_bh(&p->br->lock); + err = br_set_port_state(p, nla_get_u8(protinfo)); + spin_unlock_bh(&p->br->lock); + } - spin_lock_bh(&p->br->lock); - br_port_state_selection(p->br); - spin_unlock_bh(&p->br->lock); + if (err == 0) + br_ifinfo_notify(RTM_NEWLINK, p); - return 0; + return err; } static int br_validate(struct nlattr *tb[], struct nlattr *data[]) -- cgit v1.2.3 From a2e01a65cd7135dab26d27d4b589b2e5358bec99 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 13 Nov 2012 07:53:07 +0000 Subject: bridge: implement BPDU blocking This is Linux bridge implementation of STP protection (Cisco BPDU guard/Juniper BPDU block). BPDU block disables the bridge port if a STP BPDU packet is received. Why would you want to do this? If running Spanning Tree on bridge, hostile devices on the network may send BPDU and cause network failure. Enabling bpdu block will detect and stop this. How to recover the port? The port will be restarted if link is brought down, or removed and reattached. For example: # ip li set dev eth0 down; ip li set dev eth0 up Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + net/bridge/br_netlink.c | 6 +++++- net/bridge/br_private.h | 1 + net/bridge/br_stp_bpdu.c | 7 +++++++ net/bridge/br_sysfs_if.c | 2 ++ 5 files changed, 16 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 96f7cf49367b..5e871e4d923f 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -216,6 +216,7 @@ enum { IFLA_BRPORT_PRIORITY, /* " priority */ IFLA_BRPORT_COST, /* " cost */ IFLA_BRPORT_MODE, /* mode (hairpin) */ + IFLA_BRPORT_GUARD, /* bpdu guard */ __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 0188a2f706c4..c331e28c7880 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -26,6 +26,7 @@ static inline size_t br_port_info_size(void) + nla_total_size(2) /* IFLA_BRPORT_PRIORITY */ + nla_total_size(4) /* IFLA_BRPORT_COST */ + nla_total_size(1) /* IFLA_BRPORT_MODE */ + + nla_total_size(1) /* IFLA_BRPORT_GUARD */ + 0; } @@ -49,7 +50,8 @@ static int br_port_fill_attrs(struct sk_buff *skb, if (nla_put_u8(skb, IFLA_BRPORT_STATE, p->state) || nla_put_u16(skb, IFLA_BRPORT_PRIORITY, p->priority) || nla_put_u32(skb, IFLA_BRPORT_COST, p->path_cost) || - nla_put_u8(skb, IFLA_BRPORT_MODE, mode)) + nla_put_u8(skb, IFLA_BRPORT_MODE, mode) || + nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD))) return -EMSGSIZE; return 0; @@ -162,6 +164,7 @@ static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_COST] = { .type = NLA_U32 }, [IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 }, [IFLA_BRPORT_MODE] = { .type = NLA_U8 }, + [IFLA_BRPORT_GUARD] = { .type = NLA_U8 }, }; /* Change the state of the port and notify spanning tree */ @@ -203,6 +206,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) int err; br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE); + br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD); if (tb[IFLA_BRPORT_COST]) { err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 22111ffd68df..c92b0804ff2d 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -135,6 +135,7 @@ struct net_bridge_port unsigned long flags; #define BR_HAIRPIN_MODE 0x00000001 +#define BR_BPDU_GUARD 0x00000002 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING u32 multicast_startup_queries_sent; diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c index fd30a6022dea..7f884e3fb955 100644 --- a/net/bridge/br_stp_bpdu.c +++ b/net/bridge/br_stp_bpdu.c @@ -170,6 +170,13 @@ void br_stp_rcv(const struct stp_proto *proto, struct sk_buff *skb, if (!ether_addr_equal(dest, br->group_addr)) goto out; + if (p->flags & BR_BPDU_GUARD) { + br_notice(br, "BPDU received on blocked port %u(%s)\n", + (unsigned int) p->port_no, p->dev->name); + br_stp_disable_port(p); + goto out; + } + buf = skb_pull(skb, 3); if (buf[0] == BPDU_TYPE_CONFIG) { diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index f26173d33d8d..d1dfa4026185 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -156,6 +156,7 @@ static int store_flush(struct net_bridge_port *p, unsigned long v) static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush); BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE); +BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) @@ -189,6 +190,7 @@ static const struct brport_attribute *brport_attrs[] = { &brport_attr_hold_timer, &brport_attr_flush, &brport_attr_hairpin_mode, + &brport_attr_bpdu_guard, #ifdef CONFIG_BRIDGE_IGMP_SNOOPING &brport_attr_multicast_router, #endif -- cgit v1.2.3 From 1007dd1aa50b0403df370834f647abef1722925c Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 13 Nov 2012 07:53:08 +0000 Subject: bridge: add root port blocking This is Linux bridge implementation of root port guard. If BPDU is received from a leaf (edge) port, it should not be elected as root port. Why would you want to do this? If using STP on a bridge and the downstream bridges are not fully trusted; this prevents a hostile guest for rerouting traffic. Why not just use netfilter? Netfilter does not track of follow spanning tree decisions. It would be difficult and error prone to try and mirror STP resolution in netfilter module. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 1 + net/bridge/br_netlink.c | 5 ++++- net/bridge/br_private.h | 1 + net/bridge/br_stp.c | 22 +++++++++++++++++++++- net/bridge/br_sysfs_if.c | 2 ++ 5 files changed, 29 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 5e871e4d923f..7aae0179ae44 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -217,6 +217,7 @@ enum { IFLA_BRPORT_COST, /* " cost */ IFLA_BRPORT_MODE, /* mode (hairpin) */ IFLA_BRPORT_GUARD, /* bpdu guard */ + IFLA_BRPORT_PROTECT, /* root port protection */ __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index c331e28c7880..65429b99a2a3 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -27,6 +27,7 @@ static inline size_t br_port_info_size(void) + nla_total_size(4) /* IFLA_BRPORT_COST */ + nla_total_size(1) /* IFLA_BRPORT_MODE */ + nla_total_size(1) /* IFLA_BRPORT_GUARD */ + + nla_total_size(1) /* IFLA_BRPORT_PROTECT */ + 0; } @@ -51,7 +52,8 @@ static int br_port_fill_attrs(struct sk_buff *skb, nla_put_u16(skb, IFLA_BRPORT_PRIORITY, p->priority) || nla_put_u32(skb, IFLA_BRPORT_COST, p->path_cost) || nla_put_u8(skb, IFLA_BRPORT_MODE, mode) || - nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD))) + nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD)) || + nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK))) return -EMSGSIZE; return 0; @@ -165,6 +167,7 @@ static const struct nla_policy ifla_brport_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 }, [IFLA_BRPORT_MODE] = { .type = NLA_U8 }, [IFLA_BRPORT_GUARD] = { .type = NLA_U8 }, + [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 }, }; /* Change the state of the port and notify spanning tree */ diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index c92b0804ff2d..eb9cd42146a5 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -136,6 +136,7 @@ struct net_bridge_port unsigned long flags; #define BR_HAIRPIN_MODE 0x00000001 #define BR_BPDU_GUARD 0x00000002 +#define BR_ROOT_BLOCK 0x00000004 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING u32 multicast_startup_queries_sent; diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index af9a12099ba4..b01849a74310 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -100,6 +100,21 @@ static int br_should_become_root_port(const struct net_bridge_port *p, return 0; } +static void br_root_port_block(const struct net_bridge *br, + struct net_bridge_port *p) +{ + + br_notice(br, "port %u(%s) tried to become root port (blocked)", + (unsigned int) p->port_no, p->dev->name); + + p->state = BR_STATE_LISTENING; + br_log_state(p); + br_ifinfo_notify(RTM_NEWLINK, p); + + if (br->forward_delay > 0) + mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay); +} + /* called under bridge lock */ static void br_root_selection(struct net_bridge *br) { @@ -107,7 +122,12 @@ static void br_root_selection(struct net_bridge *br) u16 root_port = 0; list_for_each_entry(p, &br->port_list, list) { - if (br_should_become_root_port(p, root_port)) + if (!br_should_become_root_port(p, root_port)) + continue; + + if (p->flags & BR_ROOT_BLOCK) + br_root_port_block(br, p); + else root_port = p->port_no; } diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index d1dfa4026185..80a4fc5d96ab 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -157,6 +157,7 @@ static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush); BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE); BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD); +BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) @@ -191,6 +192,7 @@ static const struct brport_attribute *brport_attrs[] = { &brport_attr_flush, &brport_attr_hairpin_mode, &brport_attr_bpdu_guard, + &brport_attr_root_block, #ifdef CONFIG_BRIDGE_IGMP_SNOOPING &brport_attr_multicast_router, #endif -- cgit v1.2.3 From cfa323b6b98f44ddf46cc987f74a23dcab697134 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 14 Nov 2012 05:13:58 +0000 Subject: ip6tnl/rtnl: add IFLA_IPTUN_PROTO on dump IPv6 tunnels can have three mode: 4in6, 6in6 and xin6. This information was missing in the netlink message. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/if_tunnel.h | 1 + net/ipv6/ip6_tunnel.c | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index c1bf0b5a8da1..f5ea6b7b651f 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -47,6 +47,7 @@ enum { IFLA_IPTUN_ENCAP_LIMIT, IFLA_IPTUN_FLOWINFO, IFLA_IPTUN_FLAGS, + IFLA_IPTUN_PROTO, __IFLA_IPTUN_MAX, }; #define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1) diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 8db4d9b7ab14..929ba0b5cc9b 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1515,6 +1515,8 @@ static size_t ip6_get_size(const struct net_device *dev) nla_total_size(4) + /* IFLA_IPTUN_FLAGS */ nla_total_size(4) + + /* IFLA_IPTUN_PROTO */ + nla_total_size(1) + 0; } @@ -1531,7 +1533,8 @@ static int ip6_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) || nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) || nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) || - nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags)) + nla_put_u32(skb, IFLA_IPTUN_FLAGS, parm->flags) || + nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto)) goto nla_put_failure; return 0; -- cgit v1.2.3 From befe2aa1b2c7b9b7e20e97906f99b58475608867 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 14 Nov 2012 05:14:02 +0000 Subject: ipip/rtnl: add IFLA_IPTUN_PMTUDISC on dump This parameter was missing in the dump. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/if_tunnel.h | 1 + net/ipv4/ipip.c | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index f5ea6b7b651f..5ab0c8ddc2bc 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -48,6 +48,7 @@ enum { IFLA_IPTUN_FLOWINFO, IFLA_IPTUN_FLAGS, IFLA_IPTUN_PROTO, + IFLA_IPTUN_PMTUDISC, __IFLA_IPTUN_MAX, }; #define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1) diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 4be88cc98957..1fc0ea4786b9 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -835,6 +835,8 @@ static size_t ipip_get_size(const struct net_device *dev) nla_total_size(1) + /* IFLA_IPTUN_TOS */ nla_total_size(1) + + /* IFLA_IPTUN_PMTUDISC */ + nla_total_size(1) + 0; } @@ -847,7 +849,9 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) || nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) || nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) || - nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos)) + nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) || + nla_put_u8(skb, IFLA_IPTUN_PMTUDISC, + !!(parm->iph.frag_off & htons(IP_DF)))) goto nla_put_failure; return 0; -- cgit v1.2.3 From af8036dd749fbf4e732161ff0f3874759b73be40 Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 14 Nov 2012 16:59:21 +0100 Subject: Input: introduce EV_MSC Timestamp Some devices provides the actual timestamp (hid_dg_scan_time in win8 ones) computed by the hardware itself. This value is global to the frame and is not specific to the multitouch protocol. Signed-off-by: Benjamin Tissoires Reviewed-by: Henrik Rydberg Acked-by: Dmitry Torokhov Signed-off-by: Jiri Kosina --- Documentation/input/event-codes.txt | 11 +++++++++++ include/uapi/linux/input.h | 1 + 2 files changed, 12 insertions(+) (limited to 'include/uapi/linux') diff --git a/Documentation/input/event-codes.txt b/Documentation/input/event-codes.txt index 53305bd08182..f1ea2c69648d 100644 --- a/Documentation/input/event-codes.txt +++ b/Documentation/input/event-codes.txt @@ -196,6 +196,17 @@ EV_MSC: EV_MSC events are used for input and output events that do not fall under other categories. +A few EV_MSC codes have special meaning: + +* MSC_TIMESTAMP: + - Used to report the number of microseconds since the last reset. This event + should be coded as an uint32 value, which is allowed to wrap around with + no special consequence. It is assumed that the time difference between two + consecutive events is reliable on a reasonable time scale (hours). + A reset to zero can happen, in which case the time since the last event is + unknown. If the device does not provide this information, the driver must + not provide it to user space. + EV_LED: ---------- EV_LED events are used for input and output to set and query the state of diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h index 558828590a69..935119c698ac 100644 --- a/include/uapi/linux/input.h +++ b/include/uapi/linux/input.h @@ -851,6 +851,7 @@ struct input_keymap_entry { #define MSC_GESTURE 0x02 #define MSC_RAW 0x03 #define MSC_SCAN 0x04 +#define MSC_TIMESTAMP 0x05 #define MSC_MAX 0x07 #define MSC_CNT (MSC_MAX+1) -- cgit v1.2.3 From 130cd273d4a46a3011b1cc739f5d2af78779d666 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 5 Nov 2012 05:28:18 +0000 Subject: ipv6: export IP6_RT_PRIO_* to userland The kernel uses some default metric when routes are managed. For example, a static route added with a metric set to 0 is inserted in the kernel with metric 1024 (IP6_RT_PRIO_USER). It is useful for routing daemons to know these values, to be able to set routes without interfering with what the kernel does. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/net/ip6_route.h | 3 --- include/uapi/linux/ipv6_route.h | 3 +++ 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 5fa2af00634a..27d83183e615 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -1,9 +1,6 @@ #ifndef _NET_IP6_ROUTE_H #define _NET_IP6_ROUTE_H -#define IP6_RT_PRIO_USER 1024 -#define IP6_RT_PRIO_ADDRCONF 256 - struct route_info { __u8 type; __u8 length; diff --git a/include/uapi/linux/ipv6_route.h b/include/uapi/linux/ipv6_route.h index 0459664c2636..2be7bd174751 100644 --- a/include/uapi/linux/ipv6_route.h +++ b/include/uapi/linux/ipv6_route.h @@ -55,4 +55,7 @@ struct in6_rtmsg { #define RTMSG_NEWROUTE 0x21 #define RTMSG_DELROUTE 0x22 +#define IP6_RT_PRIO_USER 1024 +#define IP6_RT_PRIO_ADDRCONF 256 + #endif /* _UAPI_LINUX_IPV6_ROUTE_H */ -- cgit v1.2.3 From fa0cbbf145aabbf29c6f28f8a11935c0b0fd86fc Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 12 Nov 2012 17:53:04 -0800 Subject: mm, oom: reintroduce /proc/pid/oom_adj This is mostly a revert of 01dc52ebdf47 ("oom: remove deprecated oom_adj") from Davidlohr Bueso. It reintroduces /proc/pid/oom_adj for backwards compatibility with earlier kernels. It simply scales the value linearly when /proc/pid/oom_score_adj is written. The major difference is that its scheduled removal is no longer included in Documentation/feature-removal-schedule.txt. We do warn users with a single printk, though, to suggest the more powerful and supported /proc/pid/oom_score_adj interface. Reported-by: Artem S. Tashkinov Signed-off-by: David Rientjes Signed-off-by: Linus Torvalds --- Documentation/filesystems/proc.txt | 16 ++++-- fs/proc/base.c | 109 +++++++++++++++++++++++++++++++++++++ include/uapi/linux/oom.h | 9 +++ 3 files changed, 130 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt index a1793d670cd0..3844d21d6ca3 100644 --- a/Documentation/filesystems/proc.txt +++ b/Documentation/filesystems/proc.txt @@ -33,7 +33,7 @@ Table of Contents 2 Modifying System Parameters 3 Per-Process Parameters - 3.1 /proc//oom_score_adj - Adjust the oom-killer + 3.1 /proc//oom_adj & /proc//oom_score_adj - Adjust the oom-killer score 3.2 /proc//oom_score - Display current oom-killer score 3.3 /proc//io - Display the IO accounting fields @@ -1320,10 +1320,10 @@ of the kernel. CHAPTER 3: PER-PROCESS PARAMETERS ------------------------------------------------------------------------------ -3.1 /proc//oom_score_adj- Adjust the oom-killer score +3.1 /proc//oom_adj & /proc//oom_score_adj- Adjust the oom-killer score -------------------------------------------------------------------------------- -This file can be used to adjust the badness heuristic used to select which +These file can be used to adjust the badness heuristic used to select which process gets killed in out of memory conditions. The badness heuristic assigns a value to each candidate task ranging from 0 @@ -1361,6 +1361,12 @@ same system, cpuset, mempolicy, or memory controller resources to use at least equivalent to discounting 50% of the task's allowed memory from being considered as scoring against the task. +For backwards compatibility with previous kernels, /proc//oom_adj may also +be used to tune the badness score. Its acceptable values range from -16 +(OOM_ADJUST_MIN) to +15 (OOM_ADJUST_MAX) and a special value of -17 +(OOM_DISABLE) to disable oom killing entirely for that task. Its value is +scaled linearly with /proc//oom_score_adj. + The value of /proc//oom_score_adj may be reduced no lower than the last value set by a CAP_SYS_RESOURCE process. To reduce the value any lower requires CAP_SYS_RESOURCE. @@ -1375,7 +1381,9 @@ minimal amount of work. ------------------------------------------------------------- This file can be used to check the current score used by the oom-killer is for -any given . +any given . Use it together with /proc//oom_score_adj to tune which +process should be killed in an out-of-memory situation. + 3.3 /proc//io - Display the IO accounting fields ------------------------------------------------------- diff --git a/fs/proc/base.c b/fs/proc/base.c index 144a96732dd7..3c231adf8450 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -873,6 +873,113 @@ static const struct file_operations proc_environ_operations = { .release = mem_release, }; +static ssize_t oom_adj_read(struct file *file, char __user *buf, size_t count, + loff_t *ppos) +{ + struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode); + char buffer[PROC_NUMBUF]; + int oom_adj = OOM_ADJUST_MIN; + size_t len; + unsigned long flags; + + if (!task) + return -ESRCH; + if (lock_task_sighand(task, &flags)) { + if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MAX) + oom_adj = OOM_ADJUST_MAX; + else + oom_adj = (task->signal->oom_score_adj * -OOM_DISABLE) / + OOM_SCORE_ADJ_MAX; + unlock_task_sighand(task, &flags); + } + put_task_struct(task); + len = snprintf(buffer, sizeof(buffer), "%d\n", oom_adj); + return simple_read_from_buffer(buf, count, ppos, buffer, len); +} + +static ssize_t oom_adj_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct task_struct *task; + char buffer[PROC_NUMBUF]; + int oom_adj; + unsigned long flags; + int err; + + memset(buffer, 0, sizeof(buffer)); + if (count > sizeof(buffer) - 1) + count = sizeof(buffer) - 1; + if (copy_from_user(buffer, buf, count)) { + err = -EFAULT; + goto out; + } + + err = kstrtoint(strstrip(buffer), 0, &oom_adj); + if (err) + goto out; + if ((oom_adj < OOM_ADJUST_MIN || oom_adj > OOM_ADJUST_MAX) && + oom_adj != OOM_DISABLE) { + err = -EINVAL; + goto out; + } + + task = get_proc_task(file->f_path.dentry->d_inode); + if (!task) { + err = -ESRCH; + goto out; + } + + task_lock(task); + if (!task->mm) { + err = -EINVAL; + goto err_task_lock; + } + + if (!lock_task_sighand(task, &flags)) { + err = -ESRCH; + goto err_task_lock; + } + + /* + * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum + * value is always attainable. + */ + if (oom_adj == OOM_ADJUST_MAX) + oom_adj = OOM_SCORE_ADJ_MAX; + else + oom_adj = (oom_adj * OOM_SCORE_ADJ_MAX) / -OOM_DISABLE; + + if (oom_adj < task->signal->oom_score_adj && + !capable(CAP_SYS_RESOURCE)) { + err = -EACCES; + goto err_sighand; + } + + /* + * /proc/pid/oom_adj is provided for legacy purposes, ask users to use + * /proc/pid/oom_score_adj instead. + */ + printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", + current->comm, task_pid_nr(current), task_pid_nr(task), + task_pid_nr(task)); + + task->signal->oom_score_adj = oom_adj; + trace_oom_score_adj_update(task); +err_sighand: + unlock_task_sighand(task, &flags); +err_task_lock: + task_unlock(task); + put_task_struct(task); +out: + return err < 0 ? err : count; +} + +static const struct file_operations proc_oom_adj_operations = { + .read = oom_adj_read, + .write = oom_adj_write, + .llseek = generic_file_llseek, +}; + static ssize_t oom_score_adj_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { @@ -2598,6 +2705,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("cgroup", S_IRUGO, proc_cgroup_operations), #endif INF("oom_score", S_IRUGO, proc_oom_score), + REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), @@ -2964,6 +3072,7 @@ static const struct pid_entry tid_base_stuff[] = { REG("cgroup", S_IRUGO, proc_cgroup_operations), #endif INF("oom_score", S_IRUGO, proc_oom_score), + REG("oom_adj", S_IRUGO|S_IWUSR, proc_oom_adj_operations), REG("oom_score_adj", S_IRUGO|S_IWUSR, proc_oom_score_adj_operations), #ifdef CONFIG_AUDITSYSCALL REG("loginuid", S_IWUSR|S_IRUGO, proc_loginuid_operations), diff --git a/include/uapi/linux/oom.h b/include/uapi/linux/oom.h index a49c4afc7060..b29272d621ce 100644 --- a/include/uapi/linux/oom.h +++ b/include/uapi/linux/oom.h @@ -8,4 +8,13 @@ #define OOM_SCORE_ADJ_MIN (-1000) #define OOM_SCORE_ADJ_MAX 1000 +/* + * /proc//oom_adj set to -17 protects from the oom killer for legacy + * purposes. + */ +#define OOM_DISABLE (-17) +/* inclusive */ +#define OOM_ADJUST_MIN (-16) +#define OOM_ADJUST_MAX 15 + #endif /* _UAPI__INCLUDE_LINUX_OOM_H */ -- cgit v1.2.3 From 3475b0946bd2057497628790d4b4fce4bfdcc304 Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Fri, 16 Nov 2012 22:49:57 +0200 Subject: cfg80211: Add TDLS event to allow drivers to request operations The NL80211_CMD_TDLS_OPER command was previously used only for userspace request for the kernel code to perform TDLS operations. However, there are also cases where the driver may need to request operations from userspace, e.g., when using security on the AP path. Add a new cfg80211 function for generating a TDLS operation event for drivers to request a new link to be set up (NL80211_TDLS_SETUP) or an existing link to be torn down (NL80211_TDLS_TEARDOWN). Drivers can optionally use these events, e.g., based on noticing data traffic being sent to a peer station that is seen with good signal strength. Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 19 ++++++++++++++++++ include/uapi/linux/nl80211.h | 6 ++++++ net/wireless/nl80211.c | 47 ++++++++++++++++++++++++++++++++++++++++++++ net/wireless/trace.h | 23 ++++++++++++++++++++++ 4 files changed, 95 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 81d725038f97..8a1aec54e68f 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -3593,6 +3593,25 @@ bool cfg80211_can_beacon_sec_chan(struct wiphy *wiphy, void cfg80211_ch_switch_notify(struct net_device *dev, int freq, enum nl80211_channel_type type); +/* + * cfg80211_tdls_oper_request - request userspace to perform TDLS operation + * @dev: the device on which the operation is requested + * @peer: the MAC address of the peer device + * @oper: the requested TDLS operation (NL80211_TDLS_SETUP or + * NL80211_TDLS_TEARDOWN) + * @reason_code: the reason code for teardown request + * @gfp: allocation flags + * + * This function is used to request userspace to perform TDLS operation that + * requires knowledge of keys, i.e., link setup or teardown when the AP + * connection uses encryption. This is optional mechanism for the driver to use + * if it can automatically determine when a TDLS link could be useful (e.g., + * based on traffic and signal strength for a peer). + */ +void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer, + enum nl80211_tdls_operation oper, + u16 reason_code, gfp_t gfp); + /* * cfg80211_calculate_bitrate - calculate actual bitrate (in 100Kbps units) * @rate: given rate_info to calculate bitrate from diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 06ddc89f026c..1a9a819cfab0 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -526,6 +526,12 @@ * of PMKSA caching dandidates. * * @NL80211_CMD_TDLS_OPER: Perform a high-level TDLS command (e.g. link setup). + * In addition, this can be used as an event to request userspace to take + * actions on TDLS links (set up a new link or tear down an existing one). + * In such events, %NL80211_ATTR_TDLS_OPERATION indicates the requested + * operation, %NL80211_ATTR_MAC contains the peer MAC address, and + * %NL80211_ATTR_REASON_CODE the reason code to be used (only with + * %NL80211_TDLS_TEARDOWN). * @NL80211_CMD_TDLS_MGMT: Send a TDLS management frame. * * @NL80211_CMD_UNEXPECTED_FRAME: Used by an application controlling an AP diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index c18b2fc9d492..4c427fa5c450 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -9027,6 +9027,53 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy, } EXPORT_SYMBOL(cfg80211_report_obss_beacon); +void cfg80211_tdls_oper_request(struct net_device *dev, const u8 *peer, + enum nl80211_tdls_operation oper, + u16 reason_code, gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); + struct sk_buff *msg; + void *hdr; + int err; + + trace_cfg80211_tdls_oper_request(wdev->wiphy, dev, peer, oper, + reason_code); + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_TDLS_OPER); + if (!hdr) { + nlmsg_free(msg); + return; + } + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || + nla_put_u8(msg, NL80211_ATTR_TDLS_OPERATION, oper) || + nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, peer) || + (reason_code > 0 && + nla_put_u16(msg, NL80211_ATTR_REASON_CODE, reason_code))) + goto nla_put_failure; + + err = genlmsg_end(msg, hdr); + if (err < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0, + nl80211_mlme_mcgrp.id, gfp); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} +EXPORT_SYMBOL(cfg80211_tdls_oper_request); + static int nl80211_netlink_notify(struct notifier_block * nb, unsigned long state, void *_notify) diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 8e03c6382a8a..f264c20a7090 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2157,6 +2157,29 @@ TRACE_EVENT(cfg80211_report_obss_beacon, WIPHY_PR_ARG, __entry->freq, __entry->sig_dbm) ); +TRACE_EVENT(cfg80211_tdls_oper_request, + TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, const u8 *peer, + enum nl80211_tdls_operation oper, u16 reason_code), + TP_ARGS(wiphy, netdev, peer, oper, reason_code), + TP_STRUCT__entry( + WIPHY_ENTRY + NETDEV_ENTRY + MAC_ENTRY(peer) + __field(enum nl80211_tdls_operation, oper) + __field(u16, reason_code) + ), + TP_fast_assign( + WIPHY_ASSIGN; + NETDEV_ASSIGN; + MAC_ASSIGN(peer, peer); + __entry->oper = oper; + __entry->reason_code = reason_code; + ), + TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: " MAC_PR_FMT ", oper: %d, reason_code %u", + WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer), __entry->oper, + __entry->reason_code) + ); + TRACE_EVENT(cfg80211_scan_done, TP_PROTO(struct cfg80211_scan_request *request, bool aborted), TP_ARGS(request, aborted), -- cgit v1.2.3 From d2709c7ce4c513ab7f4ca9a106a930621811f2d3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 19 Nov 2012 22:21:03 +0000 Subject: perf: Make perf build for x86 with UAPI disintegration applied Make perf build for x86 once the UAPI disintegration patches for that arch have been applied by adding the appropriate -I flags - in the right order - and then converting some #includes that use ../.. notation to find main kernel headerfiles to use and instead. Note that -Iarch/foo/include/uapi is present _before_ -Iarch/foo/include. This makes sure we get the userspace version of the pt_regs struct. Ideally, we wouldn't have the latter -I flag at all, but unfortunately we want asm/svm.h and asm/vmx.h in builtin-kvm.c and these aren't part of the UAPI - at least not for x86. I wonder if the bits outside of the __KERNEL__ guards *should* be transferred there. I note also that perf seems to do its dependency handling manually by listing all the header files it might want to use in LIB_H in the Makefile. Can this be changed to use -MD? Note that to do make this work, we need to export and UAPI disintegrate linux/hw_breakpoint.h, which I think should've been exported previously so that perf can access the bits. We have to do this in the same patch to maintain bisectability. Signed-off-by: David Howells --- include/linux/hw_breakpoint.h | 31 +------------------------------ include/uapi/linux/Kbuild | 1 + include/uapi/linux/hw_breakpoint.h | 30 ++++++++++++++++++++++++++++++ tools/perf/Makefile | 29 ++++++++++++++++++++++++++++- tools/perf/arch/x86/include/perf_regs.h | 2 +- tools/perf/builtin-kvm.c | 6 +++--- tools/perf/builtin-test.c | 2 +- tools/perf/perf.h | 16 +++------------- tools/perf/util/evsel.c | 4 ++-- tools/perf/util/evsel.h | 3 ++- tools/perf/util/header.h | 2 +- tools/perf/util/parse-events-test.c | 2 +- tools/perf/util/parse-events.c | 2 +- tools/perf/util/parse-events.h | 2 +- tools/perf/util/pmu.h | 2 +- tools/perf/util/session.h | 2 +- 16 files changed, 78 insertions(+), 58 deletions(-) create mode 100644 include/uapi/linux/hw_breakpoint.h (limited to 'include/uapi/linux') diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h index 6ae9c631a1be..0464c85e63fd 100644 --- a/include/linux/hw_breakpoint.h +++ b/include/linux/hw_breakpoint.h @@ -1,35 +1,8 @@ #ifndef _LINUX_HW_BREAKPOINT_H #define _LINUX_HW_BREAKPOINT_H -enum { - HW_BREAKPOINT_LEN_1 = 1, - HW_BREAKPOINT_LEN_2 = 2, - HW_BREAKPOINT_LEN_4 = 4, - HW_BREAKPOINT_LEN_8 = 8, -}; - -enum { - HW_BREAKPOINT_EMPTY = 0, - HW_BREAKPOINT_R = 1, - HW_BREAKPOINT_W = 2, - HW_BREAKPOINT_RW = HW_BREAKPOINT_R | HW_BREAKPOINT_W, - HW_BREAKPOINT_X = 4, - HW_BREAKPOINT_INVALID = HW_BREAKPOINT_RW | HW_BREAKPOINT_X, -}; - -enum bp_type_idx { - TYPE_INST = 0, -#ifdef CONFIG_HAVE_MIXED_BREAKPOINTS_REGS - TYPE_DATA = 0, -#else - TYPE_DATA = 1, -#endif - TYPE_MAX -}; - -#ifdef __KERNEL__ - #include +#include #ifdef CONFIG_HAVE_HW_BREAKPOINT @@ -151,6 +124,4 @@ static inline struct arch_hw_breakpoint *counter_arch_bp(struct perf_event *bp) } #endif /* CONFIG_HAVE_HW_BREAKPOINT */ -#endif /* __KERNEL__ */ - #endif /* _LINUX_HW_BREAKPOINT_H */ diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index e194387ef784..19e765fbfef7 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -415,3 +415,4 @@ header-y += wireless.h header-y += x25.h header-y += xattr.h header-y += xfrm.h +header-y += hw_breakpoint.h diff --git a/include/uapi/linux/hw_breakpoint.h b/include/uapi/linux/hw_breakpoint.h new file mode 100644 index 000000000000..b04000a2296a --- /dev/null +++ b/include/uapi/linux/hw_breakpoint.h @@ -0,0 +1,30 @@ +#ifndef _UAPI_LINUX_HW_BREAKPOINT_H +#define _UAPI_LINUX_HW_BREAKPOINT_H + +enum { + HW_BREAKPOINT_LEN_1 = 1, + HW_BREAKPOINT_LEN_2 = 2, + HW_BREAKPOINT_LEN_4 = 4, + HW_BREAKPOINT_LEN_8 = 8, +}; + +enum { + HW_BREAKPOINT_EMPTY = 0, + HW_BREAKPOINT_R = 1, + HW_BREAKPOINT_W = 2, + HW_BREAKPOINT_RW = HW_BREAKPOINT_R | HW_BREAKPOINT_W, + HW_BREAKPOINT_X = 4, + HW_BREAKPOINT_INVALID = HW_BREAKPOINT_RW | HW_BREAKPOINT_X, +}; + +enum bp_type_idx { + TYPE_INST = 0, +#ifdef CONFIG_HAVE_MIXED_BREAKPOINTS_REGS + TYPE_DATA = 0, +#else + TYPE_DATA = 1, +#endif + TYPE_MAX +}; + +#endif /* _UAPI_LINUX_HW_BREAKPOINT_H */ diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 00deed4d6159..0a619af5be43 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -169,7 +169,34 @@ endif ### --- END CONFIGURATION SECTION --- -BASIC_CFLAGS = -Iutil/include -Iarch/$(ARCH)/include -I$(OUTPUT)util -I$(TRACE_EVENT_DIR) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(shell pwd))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) +endif + +ifneq ($(objtree),) +#$(info Determined 'objtree' to be $(objtree)) +endif + +ifneq ($(OUTPUT),) +#$(info Determined 'OUTPUT' to be $(OUTPUT)) +endif + +BASIC_CFLAGS = \ + -Iutil/include \ + -Iarch/$(ARCH)/include \ + $(if $(objtree),-I$(objtree)/arch/$(ARCH)/include/generated/uapi) \ + -I$(srctree)/arch/$(ARCH)/include/uapi \ + -I$(srctree)/arch/$(ARCH)/include \ + $(if $(objtree),-I$(objtree)/include/generated/uapi) \ + -I$(srctree)/include/uapi \ + -I$(srctree)/include \ + -I$(OUTPUT)util \ + -Iutil \ + -I. \ + -I$(TRACE_EVENT_DIR) \ + -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE BASIC_LDFLAGS = # Guard against environment variables diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h index 46fc9f15c6b3..7fcdcdbee917 100644 --- a/tools/perf/arch/x86/include/perf_regs.h +++ b/tools/perf/arch/x86/include/perf_regs.h @@ -3,7 +3,7 @@ #include #include "../../util/types.h" -#include "../../../../../arch/x86/include/asm/perf_regs.h" +#include #ifndef ARCH_X86_64 #define PERF_REGS_MASK ((1ULL << PERF_REG_X86_32_MAX) - 1) diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 260abc535b5b..e013bdb5e24a 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -22,9 +22,9 @@ #include #include -#include "../../arch/x86/include/asm/svm.h" -#include "../../arch/x86/include/asm/vmx.h" -#include "../../arch/x86/include/asm/kvm.h" +#include +#include +#include struct event_key { #define INVALID_KEY (~0ULL) diff --git a/tools/perf/builtin-test.c b/tools/perf/builtin-test.c index 484f26cc0c00..5acd6e8e658b 100644 --- a/tools/perf/builtin-test.c +++ b/tools/perf/builtin-test.c @@ -15,7 +15,7 @@ #include "util/thread_map.h" #include "util/pmu.h" #include "event-parse.h" -#include "../../include/linux/hw_breakpoint.h" +#include #include diff --git a/tools/perf/perf.h b/tools/perf/perf.h index e2ba8f004d32..238f923f2218 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -5,8 +5,9 @@ struct winsize; void get_term_dimensions(struct winsize *ws); +#include + #if defined(__i386__) -#include "../../arch/x86/include/asm/unistd.h" #define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") #define cpu_relax() asm volatile("rep; nop" ::: "memory"); #define CPUINFO_PROC "model name" @@ -16,7 +17,6 @@ void get_term_dimensions(struct winsize *ws); #endif #if defined(__x86_64__) -#include "../../arch/x86/include/asm/unistd.h" #define rmb() asm volatile("lfence" ::: "memory") #define cpu_relax() asm volatile("rep; nop" ::: "memory"); #define CPUINFO_PROC "model name" @@ -26,20 +26,17 @@ void get_term_dimensions(struct winsize *ws); #endif #ifdef __powerpc__ -#include "../../arch/powerpc/include/uapi/asm/unistd.h" #define rmb() asm volatile ("sync" ::: "memory") #define cpu_relax() asm volatile ("" ::: "memory"); #define CPUINFO_PROC "cpu" #endif #ifdef __s390__ -#include "../../arch/s390/include/asm/unistd.h" #define rmb() asm volatile("bcr 15,0" ::: "memory") #define cpu_relax() asm volatile("" ::: "memory"); #endif #ifdef __sh__ -#include "../../arch/sh/include/asm/unistd.h" #if defined(__SH4A__) || defined(__SH5__) # define rmb() asm volatile("synco" ::: "memory") #else @@ -50,35 +47,30 @@ void get_term_dimensions(struct winsize *ws); #endif #ifdef __hppa__ -#include "../../arch/parisc/include/asm/unistd.h" #define rmb() asm volatile("" ::: "memory") #define cpu_relax() asm volatile("" ::: "memory"); #define CPUINFO_PROC "cpu" #endif #ifdef __sparc__ -#include "../../arch/sparc/include/uapi/asm/unistd.h" #define rmb() asm volatile("":::"memory") #define cpu_relax() asm volatile("":::"memory") #define CPUINFO_PROC "cpu" #endif #ifdef __alpha__ -#include "../../arch/alpha/include/asm/unistd.h" #define rmb() asm volatile("mb" ::: "memory") #define cpu_relax() asm volatile("" ::: "memory") #define CPUINFO_PROC "cpu model" #endif #ifdef __ia64__ -#include "../../arch/ia64/include/asm/unistd.h" #define rmb() asm volatile ("mf" ::: "memory") #define cpu_relax() asm volatile ("hint @pause" ::: "memory") #define CPUINFO_PROC "model name" #endif #ifdef __arm__ -#include "../../arch/arm/include/asm/unistd.h" /* * Use the __kuser_memory_barrier helper in the CPU helper page. See * arch/arm/kernel/entry-armv.S in the kernel source for details. @@ -89,13 +81,11 @@ void get_term_dimensions(struct winsize *ws); #endif #ifdef __aarch64__ -#include "../../arch/arm64/include/asm/unistd.h" #define rmb() asm volatile("dmb ld" ::: "memory") #define cpu_relax() asm volatile("yield" ::: "memory") #endif #ifdef __mips__ -#include "../../arch/mips/include/asm/unistd.h" #define rmb() asm volatile( \ ".set mips2\n\t" \ "sync\n\t" \ @@ -112,7 +102,7 @@ void get_term_dimensions(struct winsize *ws); #include #include -#include "../../include/uapi/linux/perf_event.h" +#include #include "util/types.h" #include diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 618d41140abd..d144d464ce39 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -18,8 +18,8 @@ #include "cpumap.h" #include "thread_map.h" #include "target.h" -#include "../../../include/linux/hw_breakpoint.h" -#include "../../../include/uapi/linux/perf_event.h" +#include +#include #include "perf_regs.h" #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y)) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 6f94d6dea00f..d99b476ef37c 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -3,7 +3,8 @@ #include #include -#include "../../../include/uapi/linux/perf_event.h" +#include +#include #include "types.h" #include "xyarray.h" #include "cgroup.h" diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 879d215cdac9..9bc00783f24f 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -1,7 +1,7 @@ #ifndef __PERF_HEADER_H #define __PERF_HEADER_H -#include "../../../include/uapi/linux/perf_event.h" +#include #include #include #include "types.h" diff --git a/tools/perf/util/parse-events-test.c b/tools/perf/util/parse-events-test.c index 516ecd9ddd6e..6ef213b35ecd 100644 --- a/tools/perf/util/parse-events-test.c +++ b/tools/perf/util/parse-events-test.c @@ -3,7 +3,7 @@ #include "evsel.h" #include "evlist.h" #include "sysfs.h" -#include "../../../include/linux/hw_breakpoint.h" +#include #define TEST_ASSERT_VAL(text, cond) \ do { \ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 75c7b0fca6d9..6b6d03e93c3d 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1,4 +1,4 @@ -#include "../../../include/linux/hw_breakpoint.h" +#include #include "util.h" #include "../perf.h" #include "evlist.h" diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index 839230ceb18b..2820c407adb2 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -7,7 +7,7 @@ #include #include #include "types.h" -#include "../../../include/uapi/linux/perf_event.h" +#include #include "types.h" struct list_head; diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 39f3abac7744..fdeb8ac7c5d2 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -2,7 +2,7 @@ #define __PMU_H #include -#include "../../../include/uapi/linux/perf_event.h" +#include enum { PERF_PMU_FORMAT_VALUE_CONFIG, diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index dd6426163ba6..0eae00ad5fe7 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -7,7 +7,7 @@ #include "symbol.h" #include "thread.h" #include -#include "../../../include/uapi/linux/perf_event.h" +#include struct sample_queue; struct ip_callchain; -- cgit v1.2.3 From e4f67addf158f98f8197e08974966b18480dc751 Mon Sep 17 00:00:00 2001 From: David Stevens Date: Tue, 20 Nov 2012 02:50:14 +0000 Subject: add DOVE extensions for VXLAN This patch provides extensions to VXLAN for supporting Distributed Overlay Virtual Ethernet (DOVE) networks. The patch includes: + a dove flag per VXLAN device to enable DOVE extensions + ARP reduction, whereby a bridge-connected VXLAN tunnel endpoint answers ARP requests from the local bridge on behalf of remote DOVE clients + route short-circuiting (aka L3 switching). Known destination IP addresses use the corresponding destination MAC address for switching rather than going to a (possibly remote) router first. + netlink notification messages for forwarding table and L3 switching misses Changes since v2 - combined bools into "u32 flags" - replaced loop with !is_zero_ether_addr() Signed-off-by: David L Stevens Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 256 ++++++++++++++++++++++++++++++++++++++----- include/uapi/linux/if_link.h | 4 + 2 files changed, 235 insertions(+), 25 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index a14df1ce99ff..ce77b8b693ae 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -29,6 +29,8 @@ #include #include #include +#include +#include #include #include #include @@ -110,7 +112,7 @@ struct vxlan_dev { __u16 port_max; __u8 tos; /* TOS override */ __u8 ttl; - bool learn; + u32 flags; /* VXLAN_F_* below */ unsigned long age_interval; struct timer_list age_timer; @@ -121,6 +123,12 @@ struct vxlan_dev { struct hlist_head fdb_head[FDB_HASH_SIZE]; }; +#define VXLAN_F_LEARN 0x01 +#define VXLAN_F_PROXY 0x02 +#define VXLAN_F_RSC 0x04 +#define VXLAN_F_L2MISS 0x08 +#define VXLAN_F_L3MISS 0x10 + /* salt for hash table */ static u32 vxlan_salt __read_mostly; @@ -154,6 +162,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, struct nda_cacheinfo ci; struct nlmsghdr *nlh; struct ndmsg *ndm; + bool send_ip, send_eth; nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags); if (nlh == NULL) @@ -161,16 +170,24 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, ndm = nlmsg_data(nlh); memset(ndm, 0, sizeof(*ndm)); - ndm->ndm_family = AF_BRIDGE; + + send_eth = send_ip = true; + + if (type == RTM_GETNEIGH) { + ndm->ndm_family = AF_INET; + send_ip = fdb->remote_ip != 0; + send_eth = !is_zero_ether_addr(fdb->eth_addr); + } else + ndm->ndm_family = AF_BRIDGE; ndm->ndm_state = fdb->state; ndm->ndm_ifindex = vxlan->dev->ifindex; ndm->ndm_flags = NTF_SELF; ndm->ndm_type = NDA_DST; - if (nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr)) + if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr)) goto nla_put_failure; - if (nla_put_be32(skb, NDA_DST, fdb->remote_ip)) + if (send_ip && nla_put_be32(skb, NDA_DST, fdb->remote_ip)) goto nla_put_failure; ci.ndm_used = jiffies_to_clock_t(now - fdb->used); @@ -222,6 +239,29 @@ errout: rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); } +static void vxlan_ip_miss(struct net_device *dev, __be32 ipa) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_fdb f; + + memset(&f, 0, sizeof f); + f.state = NUD_STALE; + f.remote_ip = ipa; /* goes to NDA_DST */ + + vxlan_fdb_notify(vxlan, &f, RTM_GETNEIGH); +} + +static void vxlan_fdb_miss(struct vxlan_dev *vxlan, const u8 eth_addr[ETH_ALEN]) +{ + struct vxlan_fdb f; + + memset(&f, 0, sizeof f); + f.state = NUD_STALE; + memcpy(f.eth_addr, eth_addr, ETH_ALEN); + + vxlan_fdb_notify(vxlan, &f, RTM_GETNEIGH); +} + /* Hash Ethernet address */ static u32 eth_hash(const unsigned char *addr) { @@ -551,6 +591,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) goto drop; } + skb_reset_mac_header(skb); + /* Re-examine inner Ethernet packet */ oip = ip_hdr(skb); skb->protocol = eth_type_trans(skb, vxlan->dev); @@ -560,7 +602,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb) vxlan->dev->dev_addr) == 0) goto drop; - if (vxlan->learn) + if (vxlan->flags & VXLAN_F_LEARN) vxlan_snoop(skb->dev, oip->saddr, eth_hdr(skb)->h_source); __skb_tunnel_rx(skb, vxlan->dev); @@ -599,6 +641,117 @@ drop: return 0; } +static int arp_reduce(struct net_device *dev, struct sk_buff *skb) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + struct arphdr *parp; + u8 *arpptr, *sha; + __be32 sip, tip; + struct neighbour *n; + + if (dev->flags & IFF_NOARP) + goto out; + + if (!pskb_may_pull(skb, arp_hdr_len(dev))) { + dev->stats.tx_dropped++; + goto out; + } + parp = arp_hdr(skb); + + if ((parp->ar_hrd != htons(ARPHRD_ETHER) && + parp->ar_hrd != htons(ARPHRD_IEEE802)) || + parp->ar_pro != htons(ETH_P_IP) || + parp->ar_op != htons(ARPOP_REQUEST) || + parp->ar_hln != dev->addr_len || + parp->ar_pln != 4) + goto out; + arpptr = (u8 *)parp + sizeof(struct arphdr); + sha = arpptr; + arpptr += dev->addr_len; /* sha */ + memcpy(&sip, arpptr, sizeof(sip)); + arpptr += sizeof(sip); + arpptr += dev->addr_len; /* tha */ + memcpy(&tip, arpptr, sizeof(tip)); + + if (ipv4_is_loopback(tip) || + ipv4_is_multicast(tip)) + goto out; + + n = neigh_lookup(&arp_tbl, &tip, dev); + + if (n) { + struct vxlan_dev *vxlan = netdev_priv(dev); + struct vxlan_fdb *f; + struct sk_buff *reply; + + if (!(n->nud_state & NUD_CONNECTED)) { + neigh_release(n); + goto out; + } + + f = vxlan_find_mac(vxlan, n->ha); + if (f && f->remote_ip == 0) { + /* bridge-local neighbor */ + neigh_release(n); + goto out; + } + + reply = arp_create(ARPOP_REPLY, ETH_P_ARP, sip, dev, tip, sha, + n->ha, sha); + + neigh_release(n); + + skb_reset_mac_header(reply); + __skb_pull(reply, skb_network_offset(reply)); + reply->ip_summed = CHECKSUM_UNNECESSARY; + reply->pkt_type = PACKET_HOST; + + if (netif_rx_ni(reply) == NET_RX_DROP) + dev->stats.rx_dropped++; + } else if (vxlan->flags & VXLAN_F_L3MISS) + vxlan_ip_miss(dev, tip); +out: + consume_skb(skb); + return NETDEV_TX_OK; +} + +static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb) +{ + struct vxlan_dev *vxlan = netdev_priv(dev); + struct neighbour *n; + struct iphdr *pip; + + if (is_multicast_ether_addr(eth_hdr(skb)->h_dest)) + return false; + + n = NULL; + switch (ntohs(eth_hdr(skb)->h_proto)) { + case ETH_P_IP: + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + return false; + pip = ip_hdr(skb); + n = neigh_lookup(&arp_tbl, &pip->daddr, dev); + break; + default: + return false; + } + + if (n) { + bool diff; + + diff = compare_ether_addr(eth_hdr(skb)->h_dest, n->ha) != 0; + if (diff) { + memcpy(eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest, + dev->addr_len); + memcpy(eth_hdr(skb)->h_dest, n->ha, dev->addr_len); + } + neigh_release(n); + return diff; + } else if (vxlan->flags & VXLAN_F_L3MISS) + vxlan_ip_miss(dev, pip->daddr); + return false; +} + /* Extract dsfield from inner protocol */ static inline u8 vxlan_get_dsfield(const struct iphdr *iph, const struct sk_buff *skb) @@ -621,22 +774,6 @@ static inline u8 vxlan_ecn_encap(u8 tos, return INET_ECN_encapsulate(tos, inner); } -static __be32 vxlan_find_dst(struct vxlan_dev *vxlan, struct sk_buff *skb) -{ - const struct ethhdr *eth = (struct ethhdr *) skb->data; - const struct vxlan_fdb *f; - - if (is_multicast_ether_addr(eth->h_dest)) - return vxlan->gaddr; - - f = vxlan_find_mac(vxlan, eth->h_dest); - if (f) - return f->remote_ip; - else - return vxlan->gaddr; - -} - static void vxlan_sock_free(struct sk_buff *skb) { sock_put(skb->sk); @@ -683,6 +820,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) struct vxlan_dev *vxlan = netdev_priv(dev); struct rtable *rt; const struct iphdr *old_iph; + struct ethhdr *eth; struct iphdr *iph; struct vxlanhdr *vxh; struct udphdr *uh; @@ -693,10 +831,50 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev) __be16 df = 0; __u8 tos, ttl; int err; + bool did_rsc = false; + const struct vxlan_fdb *f; + + skb_reset_mac_header(skb); + eth = eth_hdr(skb); + + if ((vxlan->flags & VXLAN_F_PROXY) && ntohs(eth->h_proto) == ETH_P_ARP) + return arp_reduce(dev, skb); + else if ((vxlan->flags&VXLAN_F_RSC) && ntohs(eth->h_proto) == ETH_P_IP) + did_rsc = route_shortcircuit(dev, skb); - dst = vxlan_find_dst(vxlan, skb); - if (!dst) + f = vxlan_find_mac(vxlan, eth->h_dest); + if (f == NULL) { + did_rsc = false; + dst = vxlan->gaddr; + if (!dst && (vxlan->flags & VXLAN_F_L2MISS) && + !is_multicast_ether_addr(eth->h_dest)) + vxlan_fdb_miss(vxlan, eth->h_dest); + } else + dst = f->remote_ip; + + if (!dst) { + if (did_rsc) { + __skb_pull(skb, skb_network_offset(skb)); + skb->ip_summed = CHECKSUM_NONE; + skb->pkt_type = PACKET_HOST; + + /* short-circuited back to local bridge */ + if (netif_rx(skb) == NET_RX_SUCCESS) { + struct vxlan_stats *stats = + this_cpu_ptr(vxlan->stats); + + u64_stats_update_begin(&stats->syncp); + stats->tx_packets++; + stats->tx_bytes += pkt_len; + u64_stats_update_end(&stats->syncp); + } else { + dev->stats.tx_errors++; + dev->stats.tx_aborted_errors++; + } + return NETDEV_TX_OK; + } goto drop; + } /* Need space for new headers (invalidates iph ptr) */ if (skb_cow_head(skb, VXLAN_HEADROOM)) @@ -1019,6 +1197,10 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = { [IFLA_VXLAN_AGEING] = { .type = NLA_U32 }, [IFLA_VXLAN_LIMIT] = { .type = NLA_U32 }, [IFLA_VXLAN_PORT_RANGE] = { .len = sizeof(struct ifla_vxlan_port_range) }, + [IFLA_VXLAN_PROXY] = { .type = NLA_U8 }, + [IFLA_VXLAN_RSC] = { .type = NLA_U8 }, + [IFLA_VXLAN_L2MISS] = { .type = NLA_U8 }, + [IFLA_VXLAN_L3MISS] = { .type = NLA_U8 }, }; static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[]) @@ -1114,13 +1296,25 @@ static int vxlan_newlink(struct net *net, struct net_device *dev, vxlan->ttl = nla_get_u8(data[IFLA_VXLAN_TTL]); if (!data[IFLA_VXLAN_LEARNING] || nla_get_u8(data[IFLA_VXLAN_LEARNING])) - vxlan->learn = true; + vxlan->flags |= VXLAN_F_LEARN; if (data[IFLA_VXLAN_AGEING]) vxlan->age_interval = nla_get_u32(data[IFLA_VXLAN_AGEING]); else vxlan->age_interval = FDB_AGE_DEFAULT; + if (data[IFLA_VXLAN_PROXY] && nla_get_u8(data[IFLA_VXLAN_PROXY])) + vxlan->flags |= VXLAN_F_PROXY; + + if (data[IFLA_VXLAN_RSC] && nla_get_u8(data[IFLA_VXLAN_RSC])) + vxlan->flags |= VXLAN_F_RSC; + + if (data[IFLA_VXLAN_L2MISS] && nla_get_u8(data[IFLA_VXLAN_L2MISS])) + vxlan->flags |= VXLAN_F_L2MISS; + + if (data[IFLA_VXLAN_L3MISS] && nla_get_u8(data[IFLA_VXLAN_L3MISS])) + vxlan->flags |= VXLAN_F_L3MISS; + if (data[IFLA_VXLAN_LIMIT]) vxlan->addrmax = nla_get_u32(data[IFLA_VXLAN_LIMIT]); @@ -1157,6 +1351,10 @@ static size_t vxlan_get_size(const struct net_device *dev) nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TOS */ nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LEARNING */ + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_PROXY */ + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_RSC */ + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_L2MISS */ + nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_L3MISS */ nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_AGEING */ nla_total_size(sizeof(__u32)) + /* IFLA_VXLAN_LIMIT */ nla_total_size(sizeof(struct ifla_vxlan_port_range)) + @@ -1185,7 +1383,15 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev) if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->ttl) || nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->tos) || - nla_put_u8(skb, IFLA_VXLAN_LEARNING, vxlan->learn) || + nla_put_u8(skb, IFLA_VXLAN_LEARNING, + !!(vxlan->flags & VXLAN_F_LEARN)) || + nla_put_u8(skb, IFLA_VXLAN_PROXY, + !!(vxlan->flags & VXLAN_F_PROXY)) || + nla_put_u8(skb, IFLA_VXLAN_RSC, !!(vxlan->flags & VXLAN_F_RSC)) || + nla_put_u8(skb, IFLA_VXLAN_L2MISS, + !!(vxlan->flags & VXLAN_F_L2MISS)) || + nla_put_u8(skb, IFLA_VXLAN_L3MISS, + !!(vxlan->flags & VXLAN_F_L3MISS)) || nla_put_u32(skb, IFLA_VXLAN_AGEING, vxlan->age_interval) || nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->addrmax)) goto nla_put_failure; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 7aae0179ae44..bb58aeb7f34d 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -302,6 +302,10 @@ enum { IFLA_VXLAN_AGEING, IFLA_VXLAN_LIMIT, IFLA_VXLAN_PORT_RANGE, + IFLA_VXLAN_PROXY, + IFLA_VXLAN_RSC, + IFLA_VXLAN_L2MISS, + IFLA_VXLAN_L3MISS, __IFLA_VXLAN_MAX }; #define IFLA_VXLAN_MAX (__IFLA_VXLAN_MAX - 1) -- cgit v1.2.3 From e2f1f072db8db81e6b5bcbfcf409bb5c91dc9329 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 19 Nov 2012 22:41:45 +0000 Subject: sit: allow to configure 6rd tunnels via netlink This patch add the support of 6RD tunnels management via netlink. Note that netdev_state_change() is now called when 6RD parameters are updated. 6RD parameters are updated only if there is at least one 6RD attribute. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/if_tunnel.h | 4 ++ net/ipv6/sit.c | 149 ++++++++++++++++++++++++++++++++++------- 2 files changed, 128 insertions(+), 25 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 5ab0c8ddc2bc..aee73d0611fb 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -49,6 +49,10 @@ enum { IFLA_IPTUN_FLAGS, IFLA_IPTUN_PROTO, IFLA_IPTUN_PMTUDISC, + IFLA_IPTUN_6RD_PREFIX, + IFLA_IPTUN_6RD_RELAY_PREFIX, + IFLA_IPTUN_6RD_PREFIXLEN, + IFLA_IPTUN_6RD_RELAY_PREFIXLEN, __IFLA_IPTUN_MAX, }; #define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index fee21c6c3ebf..80cb3829831c 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -936,6 +936,38 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p) netdev_state_change(t->dev); } +#ifdef CONFIG_IPV6_SIT_6RD +static int ipip6_tunnel_update_6rd(struct ip_tunnel *t, + struct ip_tunnel_6rd *ip6rd) +{ + struct in6_addr prefix; + __be32 relay_prefix; + + if (ip6rd->relay_prefixlen > 32 || + ip6rd->prefixlen + (32 - ip6rd->relay_prefixlen) > 64) + return -EINVAL; + + ipv6_addr_prefix(&prefix, &ip6rd->prefix, ip6rd->prefixlen); + if (!ipv6_addr_equal(&prefix, &ip6rd->prefix)) + return -EINVAL; + if (ip6rd->relay_prefixlen) + relay_prefix = ip6rd->relay_prefix & + htonl(0xffffffffUL << + (32 - ip6rd->relay_prefixlen)); + else + relay_prefix = 0; + if (relay_prefix != ip6rd->relay_prefix) + return -EINVAL; + + t->ip6rd.prefix = prefix; + t->ip6rd.relay_prefix = relay_prefix; + t->ip6rd.prefixlen = ip6rd->prefixlen; + t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen; + netdev_state_change(t->dev); + return 0; +} +#endif + static int ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) { @@ -1105,31 +1137,9 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) t = netdev_priv(dev); if (cmd != SIOCDEL6RD) { - struct in6_addr prefix; - __be32 relay_prefix; - - err = -EINVAL; - if (ip6rd.relay_prefixlen > 32 || - ip6rd.prefixlen + (32 - ip6rd.relay_prefixlen) > 64) - goto done; - - ipv6_addr_prefix(&prefix, &ip6rd.prefix, - ip6rd.prefixlen); - if (!ipv6_addr_equal(&prefix, &ip6rd.prefix)) + err = ipip6_tunnel_update_6rd(t, &ip6rd); + if (err < 0) goto done; - if (ip6rd.relay_prefixlen) - relay_prefix = ip6rd.relay_prefix & - htonl(0xffffffffUL << - (32 - ip6rd.relay_prefixlen)); - else - relay_prefix = 0; - if (relay_prefix != ip6rd.relay_prefix) - goto done; - - t->ip6rd.prefix = prefix; - t->ip6rd.relay_prefix = relay_prefix; - t->ip6rd.prefixlen = ip6rd.prefixlen; - t->ip6rd.relay_prefixlen = ip6rd.relay_prefixlen; } else ipip6_tunnel_clone_6rd(dev, sitn); @@ -1261,11 +1271,53 @@ static void ipip6_netlink_parms(struct nlattr *data[], parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]); } +#ifdef CONFIG_IPV6_SIT_6RD +/* This function returns true when 6RD attributes are present in the nl msg */ +static bool ipip6_netlink_6rd_parms(struct nlattr *data[], + struct ip_tunnel_6rd *ip6rd) +{ + bool ret = false; + memset(ip6rd, 0, sizeof(*ip6rd)); + + if (!data) + return ret; + + if (data[IFLA_IPTUN_6RD_PREFIX]) { + ret = true; + nla_memcpy(&ip6rd->prefix, data[IFLA_IPTUN_6RD_PREFIX], + sizeof(struct in6_addr)); + } + + if (data[IFLA_IPTUN_6RD_RELAY_PREFIX]) { + ret = true; + ip6rd->relay_prefix = + nla_get_be32(data[IFLA_IPTUN_6RD_RELAY_PREFIX]); + } + + if (data[IFLA_IPTUN_6RD_PREFIXLEN]) { + ret = true; + ip6rd->prefixlen = nla_get_u16(data[IFLA_IPTUN_6RD_PREFIXLEN]); + } + + if (data[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]) { + ret = true; + ip6rd->relay_prefixlen = + nla_get_u16(data[IFLA_IPTUN_6RD_RELAY_PREFIXLEN]); + } + + return ret; +} +#endif + static int ipip6_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct net *net = dev_net(dev); struct ip_tunnel *nt; +#ifdef CONFIG_IPV6_SIT_6RD + struct ip_tunnel_6rd ip6rd; +#endif + int err; nt = netdev_priv(dev); ipip6_netlink_parms(data, &nt->parms); @@ -1273,7 +1325,16 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev, if (ipip6_tunnel_locate(net, &nt->parms, 0)) return -EEXIST; - return ipip6_tunnel_create(dev); + err = ipip6_tunnel_create(dev); + if (err < 0) + return err; + +#ifdef CONFIG_IPV6_SIT_6RD + if (ipip6_netlink_6rd_parms(data, &ip6rd)) + err = ipip6_tunnel_update_6rd(nt, &ip6rd); +#endif + + return err; } static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[], @@ -1283,6 +1344,9 @@ static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm p; struct net *net = dev_net(dev); struct sit_net *sitn = net_generic(net, sit_net_id); +#ifdef CONFIG_IPV6_SIT_6RD + struct ip_tunnel_6rd ip6rd; +#endif if (dev == sitn->fb_tunnel_dev) return -EINVAL; @@ -1302,6 +1366,12 @@ static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[], t = netdev_priv(dev); ipip6_tunnel_update(t, &p); + +#ifdef CONFIG_IPV6_SIT_6RD + if (ipip6_netlink_6rd_parms(data, &ip6rd)) + return ipip6_tunnel_update_6rd(t, &ip6rd); +#endif + return 0; } @@ -1322,6 +1392,16 @@ static size_t ipip6_get_size(const struct net_device *dev) nla_total_size(1) + /* IFLA_IPTUN_FLAGS */ nla_total_size(2) + +#ifdef CONFIG_IPV6_SIT_6RD + /* IFLA_IPTUN_6RD_PREFIX */ + nla_total_size(sizeof(struct in6_addr)) + + /* IFLA_IPTUN_6RD_RELAY_PREFIX */ + nla_total_size(4) + + /* IFLA_IPTUN_6RD_PREFIXLEN */ + nla_total_size(2) + + /* IFLA_IPTUN_6RD_RELAY_PREFIXLEN */ + nla_total_size(2) + +#endif 0; } @@ -1339,6 +1419,19 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) !!(parm->iph.frag_off & htons(IP_DF))) || nla_put_be16(skb, IFLA_IPTUN_FLAGS, parm->i_flags)) goto nla_put_failure; + +#ifdef CONFIG_IPV6_SIT_6RD + if (nla_put(skb, IFLA_IPTUN_6RD_PREFIX, sizeof(struct in6_addr), + &tunnel->ip6rd.prefix) || + nla_put_be32(skb, IFLA_IPTUN_6RD_RELAY_PREFIX, + tunnel->ip6rd.relay_prefix) || + nla_put_u16(skb, IFLA_IPTUN_6RD_PREFIXLEN, + tunnel->ip6rd.prefixlen) || + nla_put_u16(skb, IFLA_IPTUN_6RD_RELAY_PREFIXLEN, + tunnel->ip6rd.relay_prefixlen)) + goto nla_put_failure; +#endif + return 0; nla_put_failure: @@ -1353,6 +1446,12 @@ static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_TOS] = { .type = NLA_U8 }, [IFLA_IPTUN_PMTUDISC] = { .type = NLA_U8 }, [IFLA_IPTUN_FLAGS] = { .type = NLA_U16 }, +#ifdef CONFIG_IPV6_SIT_6RD + [IFLA_IPTUN_6RD_PREFIX] = { .len = sizeof(struct in6_addr) }, + [IFLA_IPTUN_6RD_RELAY_PREFIX] = { .type = NLA_U32 }, + [IFLA_IPTUN_6RD_PREFIXLEN] = { .type = NLA_U16 }, + [IFLA_IPTUN_6RD_RELAY_PREFIXLEN] = { .type = NLA_U16 }, +#endif }; static struct rtnl_link_ops sit_link_ops __read_mostly = { -- cgit v1.2.3 From dc96efb72054985c0912f831da009a2da4e9f6dd Mon Sep 17 00:00:00 2001 From: Matt Schulte Date: Mon, 19 Nov 2012 09:12:04 -0600 Subject: Serial: Add support for new devices: Exar's XR17V35x family of multi-port PCIe UARTs Add support for new devices: Exar's XR17V35x family of multi-port PCIe UARTs. Signed-off-by: Matt Schulte Acked-by: Alan Cox Signed-off-by: Greg Kroah-Hartman --- drivers/tty/serial/8250/8250.c | 71 ++++++++++++++++++++++++++++ drivers/tty/serial/8250/8250_pci.c | 96 ++++++++++++++++++++++++++++++++++++++ include/linux/pci_ids.h | 3 ++ include/uapi/linux/serial_core.h | 3 +- include/uapi/linux/serial_reg.h | 6 +++ 5 files changed, 178 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/drivers/tty/serial/8250/8250.c b/drivers/tty/serial/8250/8250.c index 2af83a246499..3624df674a31 100644 --- a/drivers/tty/serial/8250/8250.c +++ b/drivers/tty/serial/8250/8250.c @@ -282,6 +282,15 @@ static const struct serial8250_config uart_config[] = { .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_10, .flags = UART_CAP_FIFO | UART_CAP_AFE | UART_CAP_EFR, }, + [PORT_XR17V35X] = { + .name = "XR17V35X", + .fifo_size = 256, + .tx_loadsz = 256, + .fcr = UART_FCR_ENABLE_FIFO | UART_FCR_R_TRIG_11 | + UART_FCR_T_TRIG_11, + .flags = UART_CAP_FIFO | UART_CAP_AFE | UART_CAP_EFR | + UART_CAP_SLEEP, + }, [PORT_LPC3220] = { .name = "LPC3220", .fifo_size = 64, @@ -455,6 +464,7 @@ static void io_serial_out(struct uart_port *p, int offset, int value) } static int serial8250_default_handle_irq(struct uart_port *port); +static int exar_handle_irq(struct uart_port *port); static void set_io_from_upio(struct uart_port *p) { @@ -574,6 +584,18 @@ EXPORT_SYMBOL_GPL(serial8250_clear_and_reinit_fifos); */ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep) { + /* + * Exar UARTs have a SLEEP register that enables or disables + * each UART to enter sleep mode separately. On the XR17V35x the + * register is accessible to each UART at the UART_EXAR_SLEEP + * offset but the UART channel may only write to the corresponding + * bit. + */ + if (p->port.type == PORT_XR17V35X) { + serial_out(p, UART_EXAR_SLEEP, 0xff); + return; + } + if (p->capabilities & UART_CAP_SLEEP) { if (p->capabilities & UART_CAP_EFR) { serial_out(p, UART_LCR, UART_LCR_CONF_MODE_B); @@ -881,6 +903,27 @@ static void autoconfig_16550a(struct uart_8250_port *up) up->port.type = PORT_16550A; up->capabilities |= UART_CAP_FIFO; + /* + * XR17V35x UARTs have an extra divisor register, DLD + * that gets enabled with when DLAB is set which will + * cause the device to incorrectly match and assign + * port type to PORT_16650. The EFR for this UART is + * found at offset 0x09. Instead check the Deice ID (DVID) + * register for a 2, 4 or 8 port UART. + */ + status1 = serial_in(up, UART_EXAR_DVID); + if (status1 == 0x82 || status1 == 0x84 || status1 == 0x88) { + if (up->port.flags & UPF_EXAR_EFR) { + DEBUG_AUTOCONF("Exar XR17V35x "); + up->port.type = PORT_XR17V35X; + up->capabilities |= UART_CAP_AFE | UART_CAP_EFR | + UART_CAP_SLEEP; + + return; + } + + } + /* * Check for presence of the EFR when DLAB is set. * Only ST16C650V1 UARTs pass this test. @@ -1515,6 +1558,30 @@ static int serial8250_default_handle_irq(struct uart_port *port) return serial8250_handle_irq(port, iir); } +/* + * These Exar UARTs have an extra interrupt indicator that could + * fire for a few unimplemented interrupts. One of which is a + * wakeup event when coming out of sleep. Put this here just + * to be on the safe side that these interrupts don't go unhandled. + */ +static int exar_handle_irq(struct uart_port *port) +{ + unsigned char int0, int1, int2, int3; + unsigned int iir = serial_port_in(port, UART_IIR); + int ret; + + ret = serial8250_handle_irq(port, iir); + + if (port->type == PORT_XR17V35X) { + int0 = serial_port_in(port, 0x80); + int1 = serial_port_in(port, 0x81); + int2 = serial_port_in(port, 0x82); + int3 = serial_port_in(port, 0x83); + } + + return ret; +} + /* * This is the serial driver's interrupt routine. * @@ -2614,6 +2681,10 @@ static void serial8250_config_port(struct uart_port *port, int flags) serial8250_release_rsa_resource(up); if (port->type == PORT_UNKNOWN) serial8250_release_std_resource(up); + + /* Fixme: probably not the best place for this */ + if (port->type == PORT_XR17V35X) + port->handle_irq = exar_handle_irq; } static int diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c index 97058c1d7d45..2285d3283b3b 100644 --- a/drivers/tty/serial/8250/8250_pci.c +++ b/drivers/tty/serial/8250/8250_pci.c @@ -1164,6 +1164,39 @@ pci_xr17c154_setup(struct serial_private *priv, return pci_default_setup(priv, board, port, idx); } +static int +pci_xr17v35x_setup(struct serial_private *priv, + const struct pciserial_board *board, + struct uart_8250_port *port, int idx) +{ + u8 __iomem *p; + + p = pci_ioremap_bar(priv->dev, 0); + + port->port.flags |= UPF_EXAR_EFR; + + /* + * Setup Multipurpose Input/Output pins. + */ + if (idx == 0) { + writeb(0x00, p + 0x8f); /*MPIOINT[7:0]*/ + writeb(0x00, p + 0x90); /*MPIOLVL[7:0]*/ + writeb(0x00, p + 0x91); /*MPIO3T[7:0]*/ + writeb(0x00, p + 0x92); /*MPIOINV[7:0]*/ + writeb(0x00, p + 0x93); /*MPIOSEL[7:0]*/ + writeb(0x00, p + 0x94); /*MPIOOD[7:0]*/ + writeb(0x00, p + 0x95); /*MPIOINT[15:8]*/ + writeb(0x00, p + 0x96); /*MPIOLVL[15:8]*/ + writeb(0x00, p + 0x97); /*MPIO3T[15:8]*/ + writeb(0x00, p + 0x98); /*MPIOINV[15:8]*/ + writeb(0x00, p + 0x99); /*MPIOSEL[15:8]*/ + writeb(0x00, p + 0x9a); /*MPIOOD[15:8]*/ + } + iounmap(p); + + return pci_default_setup(priv, board, port, idx); +} + static int pci_wch_ch353_setup(struct serial_private *priv, const struct pciserial_board *board, @@ -1622,6 +1655,27 @@ static struct pci_serial_quirk pci_serial_quirks[] __refdata = { .subdevice = PCI_ANY_ID, .setup = pci_xr17c154_setup, }, + { + .vendor = PCI_VENDOR_ID_EXAR, + .device = PCI_DEVICE_ID_EXAR_XR17V352, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_xr17v35x_setup, + }, + { + .vendor = PCI_VENDOR_ID_EXAR, + .device = PCI_DEVICE_ID_EXAR_XR17V354, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_xr17v35x_setup, + }, + { + .vendor = PCI_VENDOR_ID_EXAR, + .device = PCI_DEVICE_ID_EXAR_XR17V358, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + .setup = pci_xr17v35x_setup, + }, /* * Xircom cards */ @@ -1962,6 +2016,9 @@ enum pci_board_num_t { pbn_exar_XR17C152, pbn_exar_XR17C154, pbn_exar_XR17C158, + pbn_exar_XR17V352, + pbn_exar_XR17V354, + pbn_exar_XR17V358, pbn_exar_ibm_saturn, pbn_pasemi_1682M, pbn_ni8430_2, @@ -2580,6 +2637,30 @@ static struct pciserial_board pci_boards[] = { .base_baud = 921600, .uart_offset = 0x200, }, + [pbn_exar_XR17V352] = { + .flags = FL_BASE0, + .num_ports = 2, + .base_baud = 7812500, + .uart_offset = 0x400, + .reg_shift = 0, + .first_offset = 0, + }, + [pbn_exar_XR17V354] = { + .flags = FL_BASE0, + .num_ports = 4, + .base_baud = 7812500, + .uart_offset = 0x400, + .reg_shift = 0, + .first_offset = 0, + }, + [pbn_exar_XR17V358] = { + .flags = FL_BASE0, + .num_ports = 8, + .base_baud = 7812500, + .uart_offset = 0x400, + .reg_shift = 0, + .first_offset = 0, + }, [pbn_exar_ibm_saturn] = { .flags = FL_BASE0, .num_ports = 1, @@ -3826,6 +3907,21 @@ static struct pci_device_id serial_pci_tbl[] = { PCI_ANY_ID, PCI_ANY_ID, 0, 0, pbn_exar_XR17C158 }, + /* + * Exar Corp. XR17V35[248] Dual/Quad/Octal PCIe UARTs + */ + { PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17V352, + PCI_ANY_ID, PCI_ANY_ID, + 0, + 0, pbn_exar_XR17V352 }, + { PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17V354, + PCI_ANY_ID, PCI_ANY_ID, + 0, + 0, pbn_exar_XR17V354 }, + { PCI_VENDOR_ID_EXAR, PCI_DEVICE_ID_EXAR_XR17V358, + PCI_ANY_ID, PCI_ANY_ID, + 0, + 0, pbn_exar_XR17V358 }, /* * Topic TP560 Data/Fax/Voice 56k modem (reported by Evan Clarke) diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 9d36b829533a..0199a7a76fcb 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -1985,6 +1985,9 @@ #define PCI_DEVICE_ID_EXAR_XR17C152 0x0152 #define PCI_DEVICE_ID_EXAR_XR17C154 0x0154 #define PCI_DEVICE_ID_EXAR_XR17C158 0x0158 +#define PCI_DEVICE_ID_EXAR_XR17V352 0x0352 +#define PCI_DEVICE_ID_EXAR_XR17V354 0x0354 +#define PCI_DEVICE_ID_EXAR_XR17V358 0x0358 #define PCI_VENDOR_ID_MICROGATE 0x13c0 #define PCI_DEVICE_ID_MICROGATE_USC 0x0010 diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h index ebcc73f0418a..78f99d97475b 100644 --- a/include/uapi/linux/serial_core.h +++ b/include/uapi/linux/serial_core.h @@ -49,7 +49,8 @@ #define PORT_XR17D15X 21 /* Exar XR17D15x UART */ #define PORT_LPC3220 22 /* NXP LPC32xx SoC "Standard" UART */ #define PORT_8250_CIR 23 /* CIR infrared port, has its own driver */ -#define PORT_MAX_8250 23 /* max port ID */ +#define PORT_XR17V35X 24 /* Exar XR17V35x UARTs */ +#define PORT_MAX_8250 24 /* max port ID */ /* * ARM specific type numbers. These are not currently guaranteed diff --git a/include/uapi/linux/serial_reg.h b/include/uapi/linux/serial_reg.h index 5ed325e88a81..d0b47607b90b 100644 --- a/include/uapi/linux/serial_reg.h +++ b/include/uapi/linux/serial_reg.h @@ -367,5 +367,11 @@ #define UART_OMAP_MDR1_CIR_MODE 0x06 /* CIR mode */ #define UART_OMAP_MDR1_DISABLE 0x07 /* Disable (default state) */ +/* + * These are definitions for the XR17V35X and XR17D15X + */ +#define UART_EXAR_SLEEP 0x8b /* Sleep mode */ +#define UART_EXAR_DVID 0x8d /* Device identification */ + #endif /* _LINUX_SERIAL_REG_H */ -- cgit v1.2.3 From d02f81555362e0032080af62154dca00d5ec99e0 Mon Sep 17 00:00:00 2001 From: Matt Schulte Date: Tue, 20 Nov 2012 11:21:17 -0600 Subject: Add register definitions used in several Exar PCI/PCIe UARTs Add register definitions used in several Exar PCI/PCIe UARTs Signed-off-by: Matt Schulte Signed-off-by: Greg Kroah-Hartman --- include/uapi/linux/serial_reg.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/serial_reg.h b/include/uapi/linux/serial_reg.h index d0b47607b90b..e6322605b138 100644 --- a/include/uapi/linux/serial_reg.h +++ b/include/uapi/linux/serial_reg.h @@ -368,10 +368,22 @@ #define UART_OMAP_MDR1_DISABLE 0x07 /* Disable (default state) */ /* - * These are definitions for the XR17V35X and XR17D15X + * These are definitions for the Exar XR17V35X and XR17(C|D)15X */ +#define UART_EXAR_8XMODE 0x88 /* 8X sampling rate select */ #define UART_EXAR_SLEEP 0x8b /* Sleep mode */ #define UART_EXAR_DVID 0x8d /* Device identification */ +#define UART_EXAR_FCTR 0x08 /* Feature Control Register */ +#define UART_FCTR_EXAR_IRDA 0x08 /* IrDa data encode select */ +#define UART_FCTR_EXAR_485 0x10 /* Auto 485 half duplex dir ctl */ +#define UART_FCTR_EXAR_TRGA 0x00 /* FIFO trigger table A */ +#define UART_FCTR_EXAR_TRGB 0x60 /* FIFO trigger table B */ +#define UART_FCTR_EXAR_TRGC 0x80 /* FIFO trigger table C */ +#define UART_FCTR_EXAR_TRGD 0xc0 /* FIFO trigger table D programmable */ + +#define UART_EXAR_TXTRG 0x0a /* Tx FIFO trigger level write-only */ +#define UART_EXAR_RXTRG 0x0b /* Rx FIFO trigger level write-only */ + #endif /* _LINUX_SERIAL_REG_H */ -- cgit v1.2.3 From 051c7788bcb92f2e98ef86e86651e0420765b121 Mon Sep 17 00:00:00 2001 From: Sumit Semwal Date: Thu, 14 Jun 2012 10:37:35 -0300 Subject: [media] v4l: Add DMABUF as a memory type Adds DMABUF memory type to v4l framework. Also adds the related file descriptor in v4l2_plane and v4l2_buffer. [original work in the PoC for buffer sharing] Signed-off-by: Tomasz Stanislawski Signed-off-by: Sumit Semwal Signed-off-by: Sumit Semwal Acked-by: Laurent Pinchart Acked-by: Hans Verkuil Tested-by: Mauro Carvalho Chehab Signed-off-by: Mauro Carvalho Chehab --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 18 ++++++++++++++++++ drivers/media/v4l2-core/v4l2-ioctl.c | 1 + include/uapi/linux/videodev2.h | 7 +++++++ 3 files changed, 26 insertions(+) (limited to 'include/uapi/linux') diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index 83ffb6436baf..cc5998b31463 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -297,6 +297,7 @@ struct v4l2_plane32 { union { __u32 mem_offset; compat_long_t userptr; + __s32 fd; } m; __u32 data_offset; __u32 reserved[11]; @@ -318,6 +319,7 @@ struct v4l2_buffer32 { __u32 offset; compat_long_t userptr; compat_caddr_t planes; + __s32 fd; } m; __u32 length; __u32 reserved2; @@ -341,6 +343,9 @@ static int get_v4l2_plane32(struct v4l2_plane *up, struct v4l2_plane32 *up32, up_pln = compat_ptr(p); if (put_user((unsigned long)up_pln, &up->m.userptr)) return -EFAULT; + } else if (memory == V4L2_MEMORY_DMABUF) { + if (copy_in_user(&up->m.fd, &up32->m.fd, sizeof(int))) + return -EFAULT; } else { if (copy_in_user(&up->m.mem_offset, &up32->m.mem_offset, sizeof(__u32))) @@ -364,6 +369,11 @@ static int put_v4l2_plane32(struct v4l2_plane *up, struct v4l2_plane32 *up32, if (copy_in_user(&up32->m.mem_offset, &up->m.mem_offset, sizeof(__u32))) return -EFAULT; + /* For DMABUF, driver might've set up the fd, so copy it back. */ + if (memory == V4L2_MEMORY_DMABUF) + if (copy_in_user(&up32->m.fd, &up->m.fd, + sizeof(int))) + return -EFAULT; return 0; } @@ -446,6 +456,10 @@ static int get_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user if (get_user(kp->m.offset, &up->m.offset)) return -EFAULT; break; + case V4L2_MEMORY_DMABUF: + if (get_user(kp->m.fd, &up->m.fd)) + return -EFAULT; + break; } } @@ -510,6 +524,10 @@ static int put_v4l2_buffer32(struct v4l2_buffer *kp, struct v4l2_buffer32 __user if (put_user(kp->m.offset, &up->m.offset)) return -EFAULT; break; + case V4L2_MEMORY_DMABUF: + if (put_user(kp->m.fd, &up->m.fd)) + return -EFAULT; + break; } } diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index 8f388ff31ebb..530a67e3fe0e 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -155,6 +155,7 @@ static const char *v4l2_memory_names[] = { [V4L2_MEMORY_MMAP] = "mmap", [V4L2_MEMORY_USERPTR] = "userptr", [V4L2_MEMORY_OVERLAY] = "overlay", + [V4L2_MEMORY_DMABUF] = "dmabuf", }; #define prt_names(a, arr) (((unsigned)(a)) < ARRAY_SIZE(arr) ? arr[a] : "unknown") diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 2fff7ff3e05b..91ac83b21c20 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -186,6 +186,7 @@ enum v4l2_memory { V4L2_MEMORY_MMAP = 1, V4L2_MEMORY_USERPTR = 2, V4L2_MEMORY_OVERLAY = 3, + V4L2_MEMORY_DMABUF = 4, }; /* see also http://vektor.theorem.ca/graphics/ycbcr/ */ @@ -602,6 +603,8 @@ struct v4l2_requestbuffers { * should be passed to mmap() called on the video node) * @userptr: when memory is V4L2_MEMORY_USERPTR, a userspace pointer * pointing to this plane + * @fd: when memory is V4L2_MEMORY_DMABUF, a userspace file + * descriptor associated with this plane * @data_offset: offset in the plane to the start of data; usually 0, * unless there is a header in front of the data * @@ -616,6 +619,7 @@ struct v4l2_plane { union { __u32 mem_offset; unsigned long userptr; + __s32 fd; } m; __u32 data_offset; __u32 reserved[11]; @@ -640,6 +644,8 @@ struct v4l2_plane { * (or a "cookie" that should be passed to mmap() as offset) * @userptr: for non-multiplanar buffers with memory == V4L2_MEMORY_USERPTR; * a userspace pointer pointing to this buffer + * @fd: for non-multiplanar buffers with memory == V4L2_MEMORY_DMABUF; + * a userspace file descriptor associated with this buffer * @planes: for multiplanar buffers; userspace pointer to the array of plane * info structs for this buffer * @length: size in bytes of the buffer (NOT its payload) for single-plane @@ -666,6 +672,7 @@ struct v4l2_buffer { __u32 offset; unsigned long userptr; struct v4l2_plane *planes; + __s32 fd; } m; __u32 length; __u32 reserved2; -- cgit v1.2.3 From b799d09a157da71566e8013a62073435550cab6d Mon Sep 17 00:00:00 2001 From: Tomasz Stanislawski Date: Thu, 14 Jun 2012 11:32:23 -0300 Subject: [media] v4l: add buffer exporting via dmabuf This patch adds extension to V4L2 api. A new ioctl VIDIOC_EXPBUF is added. The ioctl is used to export an mmap buffer as a DMABUF file descriptor. Signed-off-by: Tomasz Stanislawski Signed-off-by: Kyungmin Park Acked-by: Hans Verkuil Tested-by: Mauro Carvalho Chehab Signed-off-by: Mauro Carvalho Chehab --- drivers/media/v4l2-core/v4l2-compat-ioctl32.c | 1 + drivers/media/v4l2-core/v4l2-dev.c | 1 + drivers/media/v4l2-core/v4l2-ioctl.c | 10 ++++++++++ include/media/v4l2-ioctl.h | 2 ++ include/uapi/linux/videodev2.h | 28 +++++++++++++++++++++++++++ 5 files changed, 42 insertions(+) (limited to 'include/uapi/linux') diff --git a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c index cc5998b31463..7157af301b14 100644 --- a/drivers/media/v4l2-core/v4l2-compat-ioctl32.c +++ b/drivers/media/v4l2-core/v4l2-compat-ioctl32.c @@ -1018,6 +1018,7 @@ long v4l2_compat_ioctl32(struct file *file, unsigned int cmd, unsigned long arg) case VIDIOC_S_FBUF32: case VIDIOC_OVERLAY32: case VIDIOC_QBUF32: + case VIDIOC_EXPBUF: case VIDIOC_DQBUF32: case VIDIOC_STREAMON32: case VIDIOC_STREAMOFF32: diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c index a2df842e5100..98dcad9c8a3b 100644 --- a/drivers/media/v4l2-core/v4l2-dev.c +++ b/drivers/media/v4l2-core/v4l2-dev.c @@ -571,6 +571,7 @@ static void determine_valid_ioctls(struct video_device *vdev) SET_VALID_IOCTL(ops, VIDIOC_REQBUFS, vidioc_reqbufs); SET_VALID_IOCTL(ops, VIDIOC_QUERYBUF, vidioc_querybuf); SET_VALID_IOCTL(ops, VIDIOC_QBUF, vidioc_qbuf); + SET_VALID_IOCTL(ops, VIDIOC_EXPBUF, vidioc_expbuf); SET_VALID_IOCTL(ops, VIDIOC_DQBUF, vidioc_dqbuf); SET_VALID_IOCTL(ops, VIDIOC_STREAMON, vidioc_streamon); SET_VALID_IOCTL(ops, VIDIOC_STREAMOFF, vidioc_streamoff); diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c index 530a67e3fe0e..aa6e7c788db2 100644 --- a/drivers/media/v4l2-core/v4l2-ioctl.c +++ b/drivers/media/v4l2-core/v4l2-ioctl.c @@ -454,6 +454,15 @@ static void v4l_print_buffer(const void *arg, bool write_only) tc->type, tc->flags, tc->frames, *(__u32 *)tc->userbits); } +static void v4l_print_exportbuffer(const void *arg, bool write_only) +{ + const struct v4l2_exportbuffer *p = arg; + + pr_cont("fd=%d, type=%s, index=%u, plane=%u, flags=0x%08x\n", + p->fd, prt_names(p->type, v4l2_type_names), + p->index, p->plane, p->flags); +} + static void v4l_print_create_buffers(const void *arg, bool write_only) { const struct v4l2_create_buffers *p = arg; @@ -1961,6 +1970,7 @@ static struct v4l2_ioctl_info v4l2_ioctls[] = { IOCTL_INFO_STD(VIDIOC_S_FBUF, vidioc_s_fbuf, v4l_print_framebuffer, INFO_FL_PRIO), IOCTL_INFO_FNC(VIDIOC_OVERLAY, v4l_overlay, v4l_print_u32, INFO_FL_PRIO), IOCTL_INFO_FNC(VIDIOC_QBUF, v4l_qbuf, v4l_print_buffer, INFO_FL_QUEUE), + IOCTL_INFO_STD(VIDIOC_EXPBUF, vidioc_expbuf, v4l_print_exportbuffer, INFO_FL_QUEUE | INFO_FL_CLEAR(v4l2_exportbuffer, flags)), IOCTL_INFO_FNC(VIDIOC_DQBUF, v4l_dqbuf, v4l_print_buffer, INFO_FL_QUEUE), IOCTL_INFO_FNC(VIDIOC_STREAMON, v4l_streamon, v4l_print_buftype, INFO_FL_PRIO | INFO_FL_QUEUE), IOCTL_INFO_FNC(VIDIOC_STREAMOFF, v4l_streamoff, v4l_print_buftype, INFO_FL_PRIO | INFO_FL_QUEUE), diff --git a/include/media/v4l2-ioctl.h b/include/media/v4l2-ioctl.h index e48b571ca37d..4118ad1324c9 100644 --- a/include/media/v4l2-ioctl.h +++ b/include/media/v4l2-ioctl.h @@ -111,6 +111,8 @@ struct v4l2_ioctl_ops { int (*vidioc_reqbufs) (struct file *file, void *fh, struct v4l2_requestbuffers *b); int (*vidioc_querybuf)(struct file *file, void *fh, struct v4l2_buffer *b); int (*vidioc_qbuf) (struct file *file, void *fh, struct v4l2_buffer *b); + int (*vidioc_expbuf) (struct file *file, void *fh, + struct v4l2_exportbuffer *e); int (*vidioc_dqbuf) (struct file *file, void *fh, struct v4l2_buffer *b); int (*vidioc_create_bufs)(struct file *file, void *fh, struct v4l2_create_buffers *b); diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h index 91ac83b21c20..3cf3e946e331 100644 --- a/include/uapi/linux/videodev2.h +++ b/include/uapi/linux/videodev2.h @@ -694,6 +694,33 @@ struct v4l2_buffer { #define V4L2_BUF_FLAG_NO_CACHE_INVALIDATE 0x0800 #define V4L2_BUF_FLAG_NO_CACHE_CLEAN 0x1000 +/** + * struct v4l2_exportbuffer - export of video buffer as DMABUF file descriptor + * + * @index: id number of the buffer + * @type: enum v4l2_buf_type; buffer type (type == *_MPLANE for + * multiplanar buffers); + * @plane: index of the plane to be exported, 0 for single plane queues + * @flags: flags for newly created file, currently only O_CLOEXEC is + * supported, refer to manual of open syscall for more details + * @fd: file descriptor associated with DMABUF (set by driver) + * + * Contains data used for exporting a video buffer as DMABUF file descriptor. + * The buffer is identified by a 'cookie' returned by VIDIOC_QUERYBUF + * (identical to the cookie used to mmap() the buffer to userspace). All + * reserved fields must be set to zero. The field reserved0 is expected to + * become a structure 'type' allowing an alternative layout of the structure + * content. Therefore this field should not be used for any other extensions. + */ +struct v4l2_exportbuffer { + __u32 type; /* enum v4l2_buf_type */ + __u32 index; + __u32 plane; + __u32 flags; + __s32 fd; + __u32 reserved[11]; +}; + /* * O V E R L A Y P R E V I E W */ @@ -1895,6 +1922,7 @@ struct v4l2_create_buffers { #define VIDIOC_S_FBUF _IOW('V', 11, struct v4l2_framebuffer) #define VIDIOC_OVERLAY _IOW('V', 14, int) #define VIDIOC_QBUF _IOWR('V', 15, struct v4l2_buffer) +#define VIDIOC_EXPBUF _IOWR('V', 16, struct v4l2_exportbuffer) #define VIDIOC_DQBUF _IOWR('V', 17, struct v4l2_buffer) #define VIDIOC_STREAMON _IOW('V', 18, int) #define VIDIOC_STREAMOFF _IOW('V', 19, int) -- cgit v1.2.3 From 42d97a599eb6b2aab3a401b3e5799a399d6c7652 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 8 Nov 2012 18:31:02 +0100 Subject: cfg80211: remove remain-on-channel channel type As mwifiex (and mac80211 in the software case) are the only drivers actually implementing remain-on-channel with channel type, userspace can't be relying on it. This is the case, as it's used only for P2P operations right now. Rather than adding a flag to tell userspace whether or not it can actually rely on it, simplify all the code by removing the ability to use different channel types. Leave only the validation of the attribute, so that if we extend it again later (with the needed capability flag), it can't break userspace sending invalid data. Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 7 ++-- drivers/net/wireless/ath/ath6kl/wmi.c | 5 ++- drivers/net/wireless/iwlwifi/dvm/dev.h | 1 - drivers/net/wireless/iwlwifi/dvm/mac80211.c | 2 -- drivers/net/wireless/mac80211_hwsim.c | 1 - drivers/net/wireless/mwifiex/cfg80211.c | 16 +++------ drivers/net/wireless/mwifiex/main.h | 2 -- drivers/net/wireless/mwifiex/sta_event.c | 1 - drivers/net/wireless/mwifiex/sta_ioctl.c | 3 +- include/net/cfg80211.h | 11 ++---- include/net/mac80211.h | 1 - include/uapi/linux/nl80211.h | 14 ++++---- net/mac80211/cfg.c | 27 +++++---------- net/mac80211/driver-ops.h | 5 ++- net/mac80211/ieee80211_i.h | 2 -- net/mac80211/main.c | 2 +- net/mac80211/offchannel.c | 8 ++--- net/mac80211/trace.h | 6 ++-- net/wireless/core.h | 6 ++-- net/wireless/mlme.c | 21 ++++-------- net/wireless/nl80211.c | 36 ++++++++------------ net/wireless/nl80211.h | 4 +-- net/wireless/rdev-ops.h | 20 +++++------ net/wireless/trace.h | 52 ++++++++++------------------- 24 files changed, 81 insertions(+), 172 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index d615f9f7506a..74091d33ed6c 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -2976,7 +2976,6 @@ static int ath6kl_change_station(struct wiphy *wiphy, struct net_device *dev, static int ath6kl_remain_on_channel(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, unsigned int duration, u64 *cookie) { @@ -3135,10 +3134,8 @@ static bool ath6kl_is_p2p_go_ssid(const u8 *buf, size_t len) static int ath6kl_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_channel *chan, bool offchan, - enum nl80211_channel_type channel_type, - bool channel_type_valid, unsigned int wait, - const u8 *buf, size_t len, bool no_cck, - bool dont_wait_for_ack, u64 *cookie) + unsigned int wait, const u8 *buf, size_t len, + bool no_cck, bool dont_wait_for_ack, u64 *cookie) { struct ath6kl_vif *vif = ath6kl_vif_from_wdev(wdev); struct ath6kl *ar = ath6kl_priv(vif->ndev); diff --git a/drivers/net/wireless/ath/ath6kl/wmi.c b/drivers/net/wireless/ath/ath6kl/wmi.c index c30ab4b11d61..0e05c41cdcfc 100644 --- a/drivers/net/wireless/ath/ath6kl/wmi.c +++ b/drivers/net/wireless/ath/ath6kl/wmi.c @@ -474,7 +474,7 @@ static int ath6kl_wmi_remain_on_chnl_event_rx(struct wmi *wmi, u8 *datap, return -EINVAL; } id = vif->last_roc_id; - cfg80211_ready_on_channel(&vif->wdev, id, chan, NL80211_CHAN_NO_HT, + cfg80211_ready_on_channel(&vif->wdev, id, chan, dur, GFP_ATOMIC); return 0; @@ -513,8 +513,7 @@ static int ath6kl_wmi_cancel_remain_on_chnl_event_rx(struct wmi *wmi, else id = vif->last_roc_id; /* timeout on uncanceled r-o-c */ vif->last_cancel_roc_id = 0; - cfg80211_remain_on_channel_expired(&vif->wdev, id, chan, - NL80211_CHAN_NO_HT, GFP_ATOMIC); + cfg80211_remain_on_channel_expired(&vif->wdev, id, chan, GFP_ATOMIC); return 0; } diff --git a/drivers/net/wireless/iwlwifi/dvm/dev.h b/drivers/net/wireless/iwlwifi/dvm/dev.h index 8141f91c3725..29c571a56251 100644 --- a/drivers/net/wireless/iwlwifi/dvm/dev.h +++ b/drivers/net/wireless/iwlwifi/dvm/dev.h @@ -789,7 +789,6 @@ struct iwl_priv { /* remain-on-channel offload support */ struct ieee80211_channel *hw_roc_channel; struct delayed_work hw_roc_disable_work; - enum nl80211_channel_type hw_roc_chantype; int hw_roc_duration; bool hw_roc_setup, hw_roc_start_notified; diff --git a/drivers/net/wireless/iwlwifi/dvm/mac80211.c b/drivers/net/wireless/iwlwifi/dvm/mac80211.c index e75d80341f28..852edb02e5f6 100644 --- a/drivers/net/wireless/iwlwifi/dvm/mac80211.c +++ b/drivers/net/wireless/iwlwifi/dvm/mac80211.c @@ -1034,7 +1034,6 @@ done: static int iwlagn_mac_remain_on_channel(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_channel *channel, - enum nl80211_channel_type channel_type, int duration) { struct iwl_priv *priv = IWL_MAC80211_GET_DVM(hw); @@ -1066,7 +1065,6 @@ static int iwlagn_mac_remain_on_channel(struct ieee80211_hw *hw, } priv->hw_roc_channel = channel; - priv->hw_roc_chantype = channel_type; /* convert from ms to TU */ priv->hw_roc_duration = DIV_ROUND_UP(1000 * duration, 1024); priv->hw_roc_start_notified = false; diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 3baa51f1bb83..b0338543547b 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -1455,7 +1455,6 @@ static void hw_roc_done(struct work_struct *work) static int mac80211_hwsim_roc(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, int duration) { struct mac80211_hwsim_data *hwsim = hw->priv; diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c index 8e829b251d83..f69190b492aa 100644 --- a/drivers/net/wireless/mwifiex/cfg80211.c +++ b/drivers/net/wireless/mwifiex/cfg80211.c @@ -180,10 +180,8 @@ mwifiex_form_mgmt_frame(struct sk_buff *skb, const u8 *buf, size_t len) static int mwifiex_cfg80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_channel *chan, bool offchan, - enum nl80211_channel_type channel_type, - bool channel_type_valid, unsigned int wait, - const u8 *buf, size_t len, bool no_cck, - bool dont_wait_for_ack, u64 *cookie) + unsigned int wait, const u8 *buf, size_t len, + bool no_cck, bool dont_wait_for_ack, u64 *cookie) { struct sk_buff *skb; u16 pkt_len; @@ -253,7 +251,6 @@ static int mwifiex_cfg80211_remain_on_channel(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, unsigned int duration, u64 *cookie) { struct mwifiex_private *priv = mwifiex_netdev_get_priv(wdev->netdev); @@ -271,15 +268,14 @@ mwifiex_cfg80211_remain_on_channel(struct wiphy *wiphy, } ret = mwifiex_remain_on_chan_cfg(priv, HostCmd_ACT_GEN_SET, chan, - &channel_type, duration); + duration); if (!ret) { *cookie = random32() | 1; priv->roc_cfg.cookie = *cookie; priv->roc_cfg.chan = *chan; - priv->roc_cfg.chan_type = channel_type; - cfg80211_ready_on_channel(wdev, *cookie, chan, channel_type, + cfg80211_ready_on_channel(wdev, *cookie, chan, duration, GFP_ATOMIC); wiphy_dbg(wiphy, "info: ROC, cookie = 0x%llx\n", *cookie); @@ -302,13 +298,11 @@ mwifiex_cfg80211_cancel_remain_on_channel(struct wiphy *wiphy, return -ENOENT; ret = mwifiex_remain_on_chan_cfg(priv, HostCmd_ACT_GEN_REMOVE, - &priv->roc_cfg.chan, - &priv->roc_cfg.chan_type, 0); + &priv->roc_cfg.chan, 0); if (!ret) { cfg80211_remain_on_channel_expired(wdev, cookie, &priv->roc_cfg.chan, - priv->roc_cfg.chan_type, GFP_ATOMIC); memset(&priv->roc_cfg, 0, sizeof(struct mwifiex_roc_cfg)); diff --git a/drivers/net/wireless/mwifiex/main.h b/drivers/net/wireless/mwifiex/main.h index 81f8772dcb07..771717df1c59 100644 --- a/drivers/net/wireless/mwifiex/main.h +++ b/drivers/net/wireless/mwifiex/main.h @@ -371,7 +371,6 @@ struct wps { struct mwifiex_roc_cfg { u64 cookie; struct ieee80211_channel chan; - enum nl80211_channel_type chan_type; }; struct mwifiex_adapter; @@ -1016,7 +1015,6 @@ int mwifiex_get_ver_ext(struct mwifiex_private *priv); int mwifiex_remain_on_chan_cfg(struct mwifiex_private *priv, u16 action, struct ieee80211_channel *chan, - enum nl80211_channel_type *channel_type, unsigned int duration); int mwifiex_set_bss_role(struct mwifiex_private *priv, u8 bss_role); diff --git a/drivers/net/wireless/mwifiex/sta_event.c b/drivers/net/wireless/mwifiex/sta_event.c index 8132119e1a21..78dfa31c908c 100644 --- a/drivers/net/wireless/mwifiex/sta_event.c +++ b/drivers/net/wireless/mwifiex/sta_event.c @@ -424,7 +424,6 @@ int mwifiex_process_sta_event(struct mwifiex_private *priv) cfg80211_remain_on_channel_expired(priv->wdev, priv->roc_cfg.cookie, &priv->roc_cfg.chan, - priv->roc_cfg.chan_type, GFP_ATOMIC); memset(&priv->roc_cfg, 0x00, sizeof(struct mwifiex_roc_cfg)); diff --git a/drivers/net/wireless/mwifiex/sta_ioctl.c b/drivers/net/wireless/mwifiex/sta_ioctl.c index 552d72ed055a..24af6ba7d8a1 100644 --- a/drivers/net/wireless/mwifiex/sta_ioctl.c +++ b/drivers/net/wireless/mwifiex/sta_ioctl.c @@ -1046,7 +1046,6 @@ mwifiex_get_ver_ext(struct mwifiex_private *priv) int mwifiex_remain_on_chan_cfg(struct mwifiex_private *priv, u16 action, struct ieee80211_channel *chan, - enum nl80211_channel_type *ct, unsigned int duration) { struct host_cmd_ds_remain_on_chan roc_cfg; @@ -1056,7 +1055,7 @@ mwifiex_remain_on_chan_cfg(struct mwifiex_private *priv, u16 action, roc_cfg.action = cpu_to_le16(action); if (action == HostCmd_ACT_GEN_SET) { roc_cfg.band_cfg = chan->band; - sc = mwifiex_chan_type_to_sec_chan_offset(*ct); + sc = mwifiex_chan_type_to_sec_chan_offset(NL80211_CHAN_NO_HT); roc_cfg.band_cfg |= (sc << 2); roc_cfg.channel = diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index c2c185febb87..1effe0682d28 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1791,7 +1791,6 @@ struct cfg80211_ops { int (*remain_on_channel)(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, unsigned int duration, u64 *cookie); int (*cancel_remain_on_channel)(struct wiphy *wiphy, @@ -1800,10 +1799,8 @@ struct cfg80211_ops { int (*mgmt_tx)(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_channel *chan, bool offchan, - enum nl80211_channel_type channel_type, - bool channel_type_valid, unsigned int wait, - const u8 *buf, size_t len, bool no_cck, - bool dont_wait_for_ack, u64 *cookie); + unsigned int wait, const u8 *buf, size_t len, + bool no_cck, bool dont_wait_for_ack, u64 *cookie); int (*mgmt_tx_cancel_wait)(struct wiphy *wiphy, struct wireless_dev *wdev, u64 cookie); @@ -3350,14 +3347,12 @@ void cfg80211_disconnected(struct net_device *dev, u16 reason, * @wdev: wireless device * @cookie: the request cookie * @chan: The current channel (from remain_on_channel request) - * @channel_type: Channel type * @duration: Duration in milliseconds that the driver intents to remain on the * channel * @gfp: allocation flags */ void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, unsigned int duration, gfp_t gfp); /** @@ -3365,12 +3360,10 @@ void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie, * @wdev: wireless device * @cookie: the request cookie * @chan: The current channel (from remain_on_channel request) - * @channel_type: Channel type * @gfp: allocation flags */ void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, gfp_t gfp); diff --git a/include/net/mac80211.h b/include/net/mac80211.h index e1293c7e4d2c..12093778b057 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2550,7 +2550,6 @@ struct ieee80211_ops { int (*remain_on_channel)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, int duration); int (*cancel_remain_on_channel)(struct ieee80211_hw *hw); int (*set_ringparam)(struct ieee80211_hw *hw, u32 tx, u32 rx); diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 1a9a819cfab0..43cd6fa084c5 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -401,8 +401,7 @@ * a response while being associated to an AP on another channel. * %NL80211_ATTR_IFINDEX is used to specify which interface (and thus * radio) is used. %NL80211_ATTR_WIPHY_FREQ is used to specify the - * frequency for the operation and %NL80211_ATTR_WIPHY_CHANNEL_TYPE may be - * optionally used to specify additional channel parameters. + * frequency for the operation. * %NL80211_ATTR_DURATION is used to specify the duration in milliseconds * to remain on the channel. This command is also used as an event to * notify when the requested duration starts (it may take a while for the @@ -440,12 +439,11 @@ * as an event indicating reception of a frame that was not processed in * kernel code, but is for us (i.e., which may need to be processed in a * user space application). %NL80211_ATTR_FRAME is used to specify the - * frame contents (including header). %NL80211_ATTR_WIPHY_FREQ (and - * optionally %NL80211_ATTR_WIPHY_CHANNEL_TYPE) is used to indicate on - * which channel the frame is to be transmitted or was received. If this - * channel is not the current channel (remain-on-channel or the - * operational channel) the device will switch to the given channel and - * transmit the frame, optionally waiting for a response for the time + * frame contents (including header). %NL80211_ATTR_WIPHY_FREQ is used + * to indicate on which channel the frame is to be transmitted or was + * received. If this channel is not the current channel (remain-on-channel + * or the operational channel) the device will switch to the given channel + * and transmit the frame, optionally waiting for a response for the time * specified using %NL80211_ATTR_DURATION. When called, this operation * returns a cookie (%NL80211_ATTR_COOKIE) that will be included with the * TX status event pertaining to the TX request. diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 18926aea480c..ac0241e3539b 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2236,7 +2236,6 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, static int ieee80211_start_roc_work(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *channel, - enum nl80211_channel_type channel_type, unsigned int duration, u64 *cookie, struct sk_buff *txskb) { @@ -2254,7 +2253,6 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, return -ENOMEM; roc->chan = channel; - roc->chan_type = channel_type; roc->duration = duration; roc->req_duration = duration; roc->frame = txskb; @@ -2287,8 +2285,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, if (!duration) duration = 10; - ret = drv_remain_on_channel(local, sdata, channel, channel_type, - duration); + ret = drv_remain_on_channel(local, sdata, channel, duration); if (ret) { kfree(roc); return ret; @@ -2299,8 +2296,7 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, out_check_combine: list_for_each_entry(tmp, &local->roc_list, list) { - if (tmp->chan != channel || tmp->chan_type != channel_type || - tmp->sdata != sdata) + if (tmp->chan != channel || tmp->sdata != sdata) continue; /* @@ -2417,7 +2413,6 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local, static int ieee80211_remain_on_channel(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, unsigned int duration, u64 *cookie) { @@ -2426,7 +2421,7 @@ static int ieee80211_remain_on_channel(struct wiphy *wiphy, int ret; mutex_lock(&local->mtx); - ret = ieee80211_start_roc_work(local, sdata, chan, channel_type, + ret = ieee80211_start_roc_work(local, sdata, chan, duration, cookie, NULL); mutex_unlock(&local->mtx); @@ -2519,10 +2514,8 @@ static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy, static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_channel *chan, bool offchan, - enum nl80211_channel_type channel_type, - bool channel_type_valid, unsigned int wait, - const u8 *buf, size_t len, bool no_cck, - bool dont_wait_for_ack, u64 *cookie) + unsigned int wait, const u8 *buf, size_t len, + bool no_cck, bool dont_wait_for_ack, u64 *cookie) { struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_local *local = sdata->local; @@ -2591,14 +2584,10 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); - if (chanctx_conf) { + if (chanctx_conf) need_offchan = chan != chanctx_conf->channel; - if (channel_type_valid && - channel_type != chanctx_conf->channel_type) - need_offchan = true; - } else { + else need_offchan = true; - } rcu_read_unlock(); } @@ -2633,7 +2622,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, local->hw.offchannel_tx_hw_queue; /* This will handle all kinds of coalescing and immediate TX */ - ret = ieee80211_start_roc_work(local, sdata, chan, channel_type, + ret = ieee80211_start_roc_work(local, sdata, chan, wait, cookie, skb); if (ret) kfree_skb(skb); diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 68c27aaf5c93..c6560cc7a9d6 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -738,16 +738,15 @@ static inline int drv_get_antenna(struct ieee80211_local *local, static inline int drv_remain_on_channel(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *chan, - enum nl80211_channel_type chantype, unsigned int duration) { int ret; might_sleep(); - trace_drv_remain_on_channel(local, sdata, chan, chantype, duration); + trace_drv_remain_on_channel(local, sdata, chan, duration); ret = local->ops->remain_on_channel(&local->hw, &sdata->vif, - chan, chantype, duration); + chan, duration); trace_drv_return_int(local, ret); return ret; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index d5da0fe14318..fba4b1f425c1 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -348,7 +348,6 @@ struct ieee80211_roc_work { struct ieee80211_sub_if_data *sdata; struct ieee80211_channel *chan; - enum nl80211_channel_type chan_type; bool started, abort, hw_begun, notified; @@ -1048,7 +1047,6 @@ struct ieee80211_local { /* Temporary remain-on-channel for off-channel operations */ struct ieee80211_channel *tmp_channel; - enum nl80211_channel_type tmp_channel_type; /* channel contexts */ struct list_head chanctx_list; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 70e87600cacc..b229cded4567 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -115,7 +115,7 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local) channel_type = NL80211_CHAN_NO_HT; } else if (local->tmp_channel) { chan = local->tmp_channel; - channel_type = local->tmp_channel_type; + channel_type = NL80211_CHAN_NO_HT; } else { chan = local->_oper_channel; channel_type = local->_oper_channel_type; diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 7f8a36510813..5abddfe3e101 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -205,8 +205,8 @@ void ieee80211_handle_roc_started(struct ieee80211_roc_work *roc) } } else { cfg80211_ready_on_channel(&roc->sdata->wdev, roc->cookie, - roc->chan, roc->chan_type, - roc->req_duration, GFP_KERNEL); + roc->chan, roc->req_duration, + GFP_KERNEL); } roc->notified = true; @@ -284,7 +284,6 @@ void ieee80211_start_next_roc(struct ieee80211_local *local) duration = 10; ret = drv_remain_on_channel(local, roc->sdata, roc->chan, - roc->chan_type, duration); roc->started = true; @@ -321,7 +320,7 @@ void ieee80211_roc_notify_destroy(struct ieee80211_roc_work *roc) if (!roc->mgmt_tx_cookie) cfg80211_remain_on_channel_expired(&roc->sdata->wdev, roc->cookie, roc->chan, - roc->chan_type, GFP_KERNEL); + GFP_KERNEL); list_for_each_entry_safe(dep, tmp, &roc->dependents, list) ieee80211_roc_notify_destroy(dep); @@ -359,7 +358,6 @@ void ieee80211_sw_roc_work(struct work_struct *work) ieee80211_recalc_idle(local); local->tmp_channel = roc->chan; - local->tmp_channel_type = roc->chan_type; ieee80211_hw_config(local, 0); /* tell userspace or send frame */ diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index e9579b7a2cd0..bc28346ba207 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -1022,15 +1022,14 @@ TRACE_EVENT(drv_remain_on_channel, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, struct ieee80211_channel *chan, - enum nl80211_channel_type chantype, unsigned int duration), + unsigned int duration), - TP_ARGS(local, sdata, chan, chantype, duration), + TP_ARGS(local, sdata, chan, duration), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(int, center_freq) - __field(int, channel_type) __field(unsigned int, duration) ), @@ -1038,7 +1037,6 @@ TRACE_EVENT(drv_remain_on_channel, LOCAL_ASSIGN; VIF_ASSIGN; __entry->center_freq = chan->center_freq; - __entry->channel_type = chantype; __entry->duration = duration; ), diff --git a/net/wireless/core.h b/net/wireless/core.h index e53831c876bb..b0a09cf56e06 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -378,10 +378,8 @@ void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev); int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, struct ieee80211_channel *chan, bool offchan, - enum nl80211_channel_type channel_type, - bool channel_type_valid, unsigned int wait, - const u8 *buf, size_t len, bool no_cck, - bool dont_wait_for_ack, u64 *cookie); + unsigned int wait, const u8 *buf, size_t len, + bool no_cck, bool dont_wait_for_ack, u64 *cookie); void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa, const struct ieee80211_ht_cap *ht_capa_mask); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 4bfd14f7c592..a9646b53a095 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -579,31 +579,25 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev, void cfg80211_ready_on_channel(struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, unsigned int duration, gfp_t gfp) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - trace_cfg80211_ready_on_channel(wdev, cookie, chan, channel_type, - duration); - nl80211_send_remain_on_channel(rdev, wdev, cookie, chan, channel_type, - duration, gfp); + trace_cfg80211_ready_on_channel(wdev, cookie, chan, duration); + nl80211_send_remain_on_channel(rdev, wdev, cookie, chan, duration, gfp); } EXPORT_SYMBOL(cfg80211_ready_on_channel); void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, gfp_t gfp) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - trace_cfg80211_ready_on_channel_expired(wdev, cookie, chan, - channel_type); - nl80211_send_remain_on_channel_cancel(rdev, wdev, cookie, chan, - channel_type, gfp); + trace_cfg80211_ready_on_channel_expired(wdev, cookie, chan); + nl80211_send_remain_on_channel_cancel(rdev, wdev, cookie, chan, gfp); } EXPORT_SYMBOL(cfg80211_remain_on_channel_expired); @@ -758,10 +752,8 @@ void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev) int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, struct ieee80211_channel *chan, bool offchan, - enum nl80211_channel_type channel_type, - bool channel_type_valid, unsigned int wait, - const u8 *buf, size_t len, bool no_cck, - bool dont_wait_for_ack, u64 *cookie) + unsigned int wait, const u8 *buf, size_t len, + bool no_cck, bool dont_wait_for_ack, u64 *cookie) { const struct ieee80211_mgmt *mgmt; u16 stype; @@ -855,7 +847,6 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, /* Transmit the Action frame as requested by user space */ return rdev_mgmt_tx(rdev, wdev, chan, offchan, - channel_type, channel_type_valid, wait, buf, len, no_cck, dont_wait_for_ack, cookie); } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 4c427fa5c450..e880f4494950 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -5952,7 +5952,6 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, struct sk_buff *msg; void *hdr; u64 cookie; - enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; u32 freq, duration; int err; @@ -5975,11 +5974,11 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, return -EINVAL; if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE] && - !nl80211_valid_channel_type(info, &channel_type)) + !nl80211_valid_channel_type(info, NULL)) return -EINVAL; freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); - chan = rdev_freq_to_chan(rdev, freq, channel_type); + chan = rdev_freq_to_chan(rdev, freq, NL80211_CHAN_NO_HT); if (chan == NULL) return -EINVAL; @@ -5995,8 +5994,7 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, goto free_msg; } - err = rdev_remain_on_channel(rdev, wdev, chan, channel_type, duration, - &cookie); + err = rdev_remain_on_channel(rdev, wdev, chan, duration, &cookie); if (err) goto free_msg; @@ -6216,8 +6214,6 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; struct ieee80211_channel *chan; - enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT; - bool channel_type_valid = false; u32 freq; int err; void *hdr = NULL; @@ -6264,11 +6260,9 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) } - if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) { - if (!nl80211_valid_channel_type(info, &channel_type)) - return -EINVAL; - channel_type_valid = true; - } + if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE] && + !nl80211_valid_channel_type(info, NULL)) + return -EINVAL; offchan = info->attrs[NL80211_ATTR_OFFCHANNEL_TX_OK]; @@ -6278,7 +6272,7 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) no_cck = nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); - chan = rdev_freq_to_chan(rdev, freq, channel_type); + chan = rdev_freq_to_chan(rdev, freq, NL80211_CHAN_NO_HT); if (chan == NULL) return -EINVAL; @@ -6296,8 +6290,7 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) } } - err = cfg80211_mlme_mgmt_tx(rdev, wdev, chan, offchan, channel_type, - channel_type_valid, wait, + err = cfg80211_mlme_mgmt_tx(rdev, wdev, chan, offchan, wait, nla_data(info->attrs[NL80211_ATTR_FRAME]), nla_len(info->attrs[NL80211_ATTR_FRAME]), no_cck, dont_wait_for_ack, &cookie); @@ -8395,7 +8388,6 @@ static void nl80211_send_remain_on_chan_event( int cmd, struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, unsigned int duration, gfp_t gfp) { struct sk_buff *msg; @@ -8416,7 +8408,8 @@ static void nl80211_send_remain_on_chan_event( wdev->netdev->ifindex)) || nla_put_u64(msg, NL80211_ATTR_WDEV, wdev_id(wdev)) || nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, chan->center_freq) || - nla_put_u32(msg, NL80211_ATTR_WIPHY_CHANNEL_TYPE, channel_type) || + nla_put_u32(msg, NL80211_ATTR_WIPHY_CHANNEL_TYPE, + NL80211_CHAN_NO_HT) || nla_put_u64(msg, NL80211_ATTR_COOKIE, cookie)) goto nla_put_failure; @@ -8438,23 +8431,20 @@ static void nl80211_send_remain_on_chan_event( void nl80211_send_remain_on_channel(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, unsigned int duration, gfp_t gfp) { nl80211_send_remain_on_chan_event(NL80211_CMD_REMAIN_ON_CHANNEL, rdev, wdev, cookie, chan, - channel_type, duration, gfp); + duration, gfp); } void nl80211_send_remain_on_channel_cancel( struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, - u64 cookie, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, gfp_t gfp) + u64 cookie, struct ieee80211_channel *chan, gfp_t gfp) { nl80211_send_remain_on_chan_event(NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL, - rdev, wdev, cookie, chan, - channel_type, 0, gfp); + rdev, wdev, cookie, chan, 0, gfp); } void nl80211_send_sta_event(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index f6153516068c..7adbd767dbfd 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -76,13 +76,11 @@ void nl80211_send_ibss_bssid(struct cfg80211_registered_device *rdev, void nl80211_send_remain_on_channel(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, unsigned int duration, gfp_t gfp); void nl80211_send_remain_on_channel_cancel( struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, - u64 cookie, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, gfp_t gfp); + u64 cookie, struct ieee80211_channel *chan, gfp_t gfp); void nl80211_send_sta_event(struct cfg80211_registered_device *rdev, struct net_device *dev, const u8 *mac_addr, diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 6e5fa659068d..ee54a5aa4381 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -600,14 +600,12 @@ static inline int rdev_remain_on_channel(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, unsigned int duration, u64 *cookie) { int ret; - trace_rdev_remain_on_channel(&rdev->wiphy, wdev, chan, channel_type, - duration); + trace_rdev_remain_on_channel(&rdev->wiphy, wdev, chan, duration); ret = rdev->ops->remain_on_channel(&rdev->wiphy, wdev, chan, - channel_type, duration, cookie); + duration, cookie); trace_rdev_return_int_cookie(&rdev->wiphy, ret, *cookie); return ret; } @@ -626,17 +624,15 @@ rdev_cancel_remain_on_channel(struct cfg80211_registered_device *rdev, static inline int rdev_mgmt_tx(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, struct ieee80211_channel *chan, bool offchan, - enum nl80211_channel_type channel_type, - bool channel_type_valid, unsigned int wait, - const u8 *buf, size_t len, bool no_cck, - bool dont_wait_for_ack, u64 *cookie) + unsigned int wait, const u8 *buf, size_t len, + bool no_cck, bool dont_wait_for_ack, u64 *cookie) { int ret; - trace_rdev_mgmt_tx(&rdev->wiphy, wdev, chan, offchan, channel_type, - channel_type_valid, wait, no_cck, dont_wait_for_ack); + trace_rdev_mgmt_tx(&rdev->wiphy, wdev, chan, offchan, + wait, no_cck, dont_wait_for_ack); ret = rdev->ops->mgmt_tx(&rdev->wiphy, wdev, chan, offchan, - channel_type, channel_type_valid, wait, buf, - len, no_cck, dont_wait_for_ack, cookie); + wait, buf, len, no_cck, + dont_wait_for_ack, cookie); trace_rdev_return_int_cookie(&rdev->wiphy, ret, *cookie); return ret; } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index f264c20a7090..ed10833f9a3a 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1573,25 +1573,22 @@ DEFINE_EVENT(rdev_pmksa, rdev_del_pmksa, TRACE_EVENT(rdev_remain_on_channel, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, unsigned int duration), - TP_ARGS(wiphy, wdev, chan, channel_type, duration), + unsigned int duration), + TP_ARGS(wiphy, wdev, chan, duration), TP_STRUCT__entry( WIPHY_ENTRY WDEV_ENTRY CHAN_ENTRY - __field(enum nl80211_channel_type, channel_type) __field(unsigned int, duration) ), TP_fast_assign( WIPHY_ASSIGN; WDEV_ASSIGN; CHAN_ASSIGN(chan); - __entry->channel_type = channel_type; __entry->duration = duration; ), - TP_printk(WIPHY_PR_FMT WDEV_PR_FMT CHAN_PR_FMT ", channel type: %d, duration: %u", - WIPHY_PR_ARG, WDEV_PR_ARG, CHAN_PR_ARG, __entry->channel_type, - __entry->duration) + TP_printk(WIPHY_PR_FMT WDEV_PR_FMT CHAN_PR_FMT ", duration: %u", + WIPHY_PR_ARG, WDEV_PR_ARG, CHAN_PR_ARG, __entry->duration) ); TRACE_EVENT(rdev_return_int_cookie, @@ -1631,18 +1628,13 @@ TRACE_EVENT(rdev_cancel_remain_on_channel, TRACE_EVENT(rdev_mgmt_tx, TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, struct ieee80211_channel *chan, bool offchan, - enum nl80211_channel_type channel_type, - bool channel_type_valid, unsigned int wait, bool no_cck, - bool dont_wait_for_ack), - TP_ARGS(wiphy, wdev, chan, offchan, channel_type, channel_type_valid, - wait, no_cck, dont_wait_for_ack), + unsigned int wait, bool no_cck, bool dont_wait_for_ack), + TP_ARGS(wiphy, wdev, chan, offchan, wait, no_cck, dont_wait_for_ack), TP_STRUCT__entry( WIPHY_ENTRY WDEV_ENTRY CHAN_ENTRY __field(bool, offchan) - __field(enum nl80211_channel_type, channel_type) - __field(bool, channel_type_valid) __field(unsigned int, wait) __field(bool, no_cck) __field(bool, dont_wait_for_ack) @@ -1652,18 +1644,14 @@ TRACE_EVENT(rdev_mgmt_tx, WDEV_ASSIGN; CHAN_ASSIGN(chan); __entry->offchan = offchan; - __entry->channel_type = channel_type; - __entry->channel_type_valid = channel_type_valid; __entry->wait = wait; __entry->no_cck = no_cck; __entry->dont_wait_for_ack = dont_wait_for_ack; ), - TP_printk(WIPHY_PR_FMT WDEV_PR_FMT CHAN_PR_FMT ", offchan: %s, " - "channel type: %d, channel type valid: %s, wait: %u, " - "no cck: %s, dont wait for ack: %s", + TP_printk(WIPHY_PR_FMT WDEV_PR_FMT CHAN_PR_FMT ", offchan: %s," + " wait: %u, no cck: %s, dont wait for ack: %s", WIPHY_PR_ARG, WDEV_PR_ARG, CHAN_PR_ARG, - BOOL_TO_STR(__entry->offchan), __entry->channel_type, - BOOL_TO_STR(__entry->channel_type_valid), __entry->wait, + BOOL_TO_STR(__entry->offchan), __entry->wait, BOOL_TO_STR(__entry->no_cck), BOOL_TO_STR(__entry->dont_wait_for_ack)) ); @@ -1894,47 +1882,41 @@ TRACE_EVENT(cfg80211_michael_mic_failure, TRACE_EVENT(cfg80211_ready_on_channel, TP_PROTO(struct wireless_dev *wdev, u64 cookie, struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, unsigned int duration), - TP_ARGS(wdev, cookie, chan, channel_type, duration), + unsigned int duration), + TP_ARGS(wdev, cookie, chan, duration), TP_STRUCT__entry( WDEV_ENTRY __field(u64, cookie) CHAN_ENTRY - __field(enum nl80211_channel_type, channel_type) __field(unsigned int, duration) ), TP_fast_assign( WDEV_ASSIGN; __entry->cookie = cookie; CHAN_ASSIGN(chan); - __entry->channel_type = channel_type; __entry->duration = duration; ), - TP_printk(WDEV_PR_FMT ", cookie: %llu, " CHAN_PR_FMT ", channel type: %d, duration: %u", + TP_printk(WDEV_PR_FMT ", cookie: %llu, " CHAN_PR_FMT ", duration: %u", WDEV_PR_ARG, __entry->cookie, CHAN_PR_ARG, - __entry->channel_type, __entry->duration) + __entry->duration) ); TRACE_EVENT(cfg80211_ready_on_channel_expired, TP_PROTO(struct wireless_dev *wdev, u64 cookie, - struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type), - TP_ARGS(wdev, cookie, chan, channel_type), + struct ieee80211_channel *chan), + TP_ARGS(wdev, cookie, chan), TP_STRUCT__entry( WDEV_ENTRY __field(u64, cookie) CHAN_ENTRY - __field(enum nl80211_channel_type, channel_type) ), TP_fast_assign( WDEV_ASSIGN; __entry->cookie = cookie; CHAN_ASSIGN(chan); - __entry->channel_type = channel_type; ), - TP_printk(WDEV_PR_FMT ", cookie: %llu, " CHAN_PR_FMT ", channel type: %d", - WDEV_PR_ARG, __entry->cookie, CHAN_PR_ARG, - __entry->channel_type) + TP_printk(WDEV_PR_FMT ", cookie: %llu, " CHAN_PR_FMT, + WDEV_PR_ARG, __entry->cookie, CHAN_PR_ARG) ); TRACE_EVENT(cfg80211_new_sta, -- cgit v1.2.3 From fe4b31810c06cc6518fb193efb9b3c3289b55832 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 8 Nov 2012 19:20:56 +0100 Subject: nl80211: add documentation for channel type Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 43cd6fa084c5..82b5ad38435b 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2438,6 +2438,15 @@ enum nl80211_ac { #define NL80211_TXQ_Q_BE NL80211_AC_BE #define NL80211_TXQ_Q_BK NL80211_AC_BK +/** + * enum nl80211_channel_type - channel type + * @NL80211_CHAN_NO_HT: 20 MHz, non-HT channel + * @NL80211_CHAN_HT20: 20 MHz HT channel + * @NL80211_CHAN_HT40MINUS: HT40 channel, secondary channel + * below the control channel + * @NL80211_CHAN_HT40PLUS: HT40 channel, secondary channel + * above the control channel + */ enum nl80211_channel_type { NL80211_CHAN_NO_HT, NL80211_CHAN_HT20, -- cgit v1.2.3 From 3d9d1d6656a73ea8407734cfb00b81d14ef62d4b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 8 Nov 2012 23:14:50 +0100 Subject: nl80211/cfg80211: support VHT channel configuration Change nl80211 to support specifying a VHT (or HT) using the control channel frequency (as before) and new attributes for the channel width and first and second center frequency. The old channel type is of course still supported for HT. Also change the cfg80211 channel definition struct to support these by adding the relevant fields to it (and removing the _type field.) This also adds new helper functions: - cfg80211_chandef_create to create a channel def struct given the control channel and channel type, - cfg80211_chandef_identical to check if two channel definitions are identical - cfg80211_chandef_compatible to check if the given channel definitions are compatible, and return the wider of the two This isn't entirely complete, but that doesn't matter until we have a driver using it. In particular, it's missing - regulatory checks on the usable bandwidth (if that even makes sense) - regulatory TX power (database can't deal with it) - a proper channel compatibility calculation for the new channel types Signed-off-by: Johannes Berg --- drivers/net/wireless/ath/ath6kl/cfg80211.c | 10 +- include/net/cfg80211.h | 73 ++++++++- include/uapi/linux/nl80211.h | 61 +++++-- net/mac80211/cfg.c | 5 +- net/mac80211/ibss.c | 13 +- net/wireless/chan.c | 249 ++++++++++++++++++++++++++--- net/wireless/core.h | 6 + net/wireless/ibss.c | 4 +- net/wireless/mesh.c | 4 +- net/wireless/nl80211.c | 160 ++++++++++++------ net/wireless/trace.h | 28 ++-- net/wireless/wext-compat.c | 4 +- net/wireless/wext-sme.c | 3 +- 13 files changed, 507 insertions(+), 113 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index c0cc2e59fe6c..51bbe85c574c 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -1099,12 +1099,10 @@ void ath6kl_cfg80211_ch_switch_notify(struct ath6kl_vif *vif, int freq, "channel switch notify nw_type %d freq %d mode %d\n", vif->nw_type, freq, mode); - chandef.chan = ieee80211_get_channel(vif->ar->wiphy, freq); - if (WARN_ON(!chandef.chan)) - return; - - chandef._type = (mode == WMI_11G_HT20) ? - NL80211_CHAN_HT20 : NL80211_CHAN_NO_HT; + cfg80211_chandef_create(&chandef, + ieee80211_get_channel(vif->ar->wiphy, freq), + (mode == WMI_11G_HT20) ? + NL80211_CHAN_HT20 : NL80211_CHAN_NO_HT); cfg80211_ch_switch_notify(vif->ndev, &chandef); } diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 86f777af79e8..977da58fb7ea 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -308,20 +308,85 @@ struct key_params { /** * struct cfg80211_chan_def - channel definition * @chan: the (control) channel - * @_type: the channel type, don't use this field, - * use cfg80211_get_chandef_type() if needed. + * @width: channel width + * @center_freq1: center frequency of first segment + * @center_freq2: center frequency of second segment + * (only with 80+80 MHz) */ struct cfg80211_chan_def { struct ieee80211_channel *chan; - enum nl80211_channel_type _type; + enum nl80211_chan_width width; + u32 center_freq1; + u32 center_freq2; }; +/** + * cfg80211_get_chandef_type - return old channel type from chandef + * @chandef: the channel definition + * + * Returns the old channel type (NOHT, HT20, HT40+/-) from a given + * chandef, which must have a bandwidth allowing this conversion. + */ static inline enum nl80211_channel_type cfg80211_get_chandef_type(const struct cfg80211_chan_def *chandef) { - return chandef->_type; + switch (chandef->width) { + case NL80211_CHAN_WIDTH_20_NOHT: + return NL80211_CHAN_NO_HT; + case NL80211_CHAN_WIDTH_20: + return NL80211_CHAN_HT20; + case NL80211_CHAN_WIDTH_40: + if (chandef->center_freq1 > chandef->chan->center_freq) + return NL80211_CHAN_HT40PLUS; + return NL80211_CHAN_HT40MINUS; + default: + WARN_ON(1); + return NL80211_CHAN_NO_HT; + } +} + +/** + * cfg80211_chandef_create - create channel definition using channel type + * @chandef: the channel definition struct to fill + * @channel: the control channel + * @chantype: the channel type + * + * Given a channel type, create a channel definition. + */ +void cfg80211_chandef_create(struct cfg80211_chan_def *chandef, + struct ieee80211_channel *channel, + enum nl80211_channel_type chantype); + +/** + * cfg80211_chandef_identical - check if two channel definitions are identical + * @chandef1: first channel definition + * @chandef2: second channel definition + * + * Returns %true if the channels defined by the channel definitions are + * identical, %false otherwise. + */ +static inline bool +cfg80211_chandef_identical(const struct cfg80211_chan_def *chandef1, + const struct cfg80211_chan_def *chandef2) +{ + return (chandef1->chan == chandef2->chan && + chandef1->width == chandef2->width && + chandef1->center_freq1 == chandef2->center_freq1 && + chandef1->center_freq2 == chandef2->center_freq2); } +/** + * cfg80211_chandef_compatible - check if two channel definitions are compatible + * @chandef1: first channel definition + * @chandef2: second channel definition + * + * Returns %NULL if the given channel definitions are incompatible, + * chandef1 or chandef2 otherwise. + */ +const struct cfg80211_chan_def * +cfg80211_chandef_compatible(const struct cfg80211_chan_def *chandef1, + const struct cfg80211_chan_def *chandef2); + /** * enum survey_info_flags - survey information flags * diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 82b5ad38435b..84f9c7d84c69 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -118,8 +118,9 @@ * to get a list of all present wiphys. * @NL80211_CMD_SET_WIPHY: set wiphy parameters, needs %NL80211_ATTR_WIPHY or * %NL80211_ATTR_IFINDEX; can be used to set %NL80211_ATTR_WIPHY_NAME, - * %NL80211_ATTR_WIPHY_TXQ_PARAMS, %NL80211_ATTR_WIPHY_FREQ, - * %NL80211_ATTR_WIPHY_CHANNEL_TYPE, %NL80211_ATTR_WIPHY_RETRY_SHORT, + * %NL80211_ATTR_WIPHY_TXQ_PARAMS, %NL80211_ATTR_WIPHY_FREQ (and the + * attributes determining the channel width; this is used for setting + * monitor mode channel), %NL80211_ATTR_WIPHY_RETRY_SHORT, * %NL80211_ATTR_WIPHY_RETRY_LONG, %NL80211_ATTR_WIPHY_FRAG_THRESHOLD, * and/or %NL80211_ATTR_WIPHY_RTS_THRESHOLD. * However, for setting the channel, see %NL80211_CMD_SET_CHANNEL @@ -171,7 +172,7 @@ * %NL80211_ATTR_AKM_SUITES, %NL80211_ATTR_PRIVACY, * %NL80211_ATTR_AUTH_TYPE and %NL80211_ATTR_INACTIVITY_TIMEOUT. * The channel to use can be set on the interface or be given using the - * %NL80211_ATTR_WIPHY_FREQ and %NL80211_ATTR_WIPHY_CHANNEL_TYPE attrs. + * %NL80211_ATTR_WIPHY_FREQ and the attributes determining channel width. * @NL80211_CMD_NEW_BEACON: old alias for %NL80211_CMD_START_AP * @NL80211_CMD_STOP_AP: Stop AP operation on the given interface * @NL80211_CMD_DEL_BEACON: old alias for %NL80211_CMD_STOP_AP @@ -471,8 +472,8 @@ * command is used as an event to indicate the that a trigger level was * reached. * @NL80211_CMD_SET_CHANNEL: Set the channel (using %NL80211_ATTR_WIPHY_FREQ - * and %NL80211_ATTR_WIPHY_CHANNEL_TYPE) the given interface (identifed - * by %NL80211_ATTR_IFINDEX) shall operate on. + * and the attributes determining channel width) the given interface + * (identifed by %NL80211_ATTR_IFINDEX) shall operate on. * In case multiple channels are supported by the device, the mechanism * with which it switches channels is implementation-defined. * When a monitor interface is given, it can only switch channel while @@ -566,8 +567,8 @@ * * @NL80211_CMD_CH_SWITCH_NOTIFY: An AP or GO may decide to switch channels * independently of the userspace SME, send this event indicating - * %NL80211_ATTR_IFINDEX is now on %NL80211_ATTR_WIPHY_FREQ with - * %NL80211_ATTR_WIPHY_CHANNEL_TYPE. + * %NL80211_ATTR_IFINDEX is now on %NL80211_ATTR_WIPHY_FREQ and the + * attributes determining channel width. * * @NL80211_CMD_START_P2P_DEVICE: Start the given P2P Device, identified by * its %NL80211_ATTR_WDEV identifier. It must have been created with @@ -771,14 +772,26 @@ enum nl80211_commands { * /sys/class/ieee80211//index * @NL80211_ATTR_WIPHY_NAME: wiphy name (used for renaming) * @NL80211_ATTR_WIPHY_TXQ_PARAMS: a nested array of TX queue parameters - * @NL80211_ATTR_WIPHY_FREQ: frequency of the selected channel in MHz + * @NL80211_ATTR_WIPHY_FREQ: frequency of the selected channel in MHz, + * defines the channel together with the (deprecated) + * %NL80211_ATTR_WIPHY_CHANNEL_TYPE attribute or the attributes + * %NL80211_ATTR_CHANNEL_WIDTH and if needed %NL80211_ATTR_CENTER_FREQ1 + * and %NL80211_ATTR_CENTER_FREQ2 + * @NL80211_ATTR_CHANNEL_WIDTH: u32 attribute containing one of the values + * of &enum nl80211_chan_width, describing the channel width. See the + * documentation of the enum for more information. + * @NL80211_ATTR_CENTER_FREQ1: Center frequency of the first part of the + * channel, used for anything but 20 MHz bandwidth + * @NL80211_ATTR_CENTER_FREQ2: Center frequency of the second part of the + * channel, used only for 80+80 MHz bandwidth * @NL80211_ATTR_WIPHY_CHANNEL_TYPE: included with NL80211_ATTR_WIPHY_FREQ - * if HT20 or HT40 are allowed (i.e., 802.11n disabled if not included): + * if HT20 or HT40 are to be used (i.e., HT disabled if not included): * NL80211_CHAN_NO_HT = HT not allowed (i.e., same as not including * this attribute) * NL80211_CHAN_HT20 = HT20 only * NL80211_CHAN_HT40MINUS = secondary channel is below the primary channel * NL80211_CHAN_HT40PLUS = secondary channel is above the primary channel + * This attribute is now deprecated. * @NL80211_ATTR_WIPHY_RETRY_SHORT: TX retry limit for frames whose length is * less than or equal to the RTS threshold; allowed range: 1..255; * dot11ShortRetryLimit; u8 @@ -1553,6 +1566,10 @@ enum nl80211_attrs { NL80211_ATTR_SCAN_FLAGS, + NL80211_ATTR_CHANNEL_WIDTH, + NL80211_ATTR_CENTER_FREQ1, + NL80211_ATTR_CENTER_FREQ2, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -2454,6 +2471,32 @@ enum nl80211_channel_type { NL80211_CHAN_HT40PLUS }; +/** + * enum nl80211_chan_width - channel width definitions + * + * These values are used with the %NL80211_ATTR_CHANNEL_WIDTH + * attribute. + * + * @NL80211_CHAN_WIDTH_20_NOHT: 20 MHz, non-HT channel + * @NL80211_CHAN_WIDTH_20: 20 MHz HT channel + * @NL80211_CHAN_WIDTH_40: 40 MHz channel, the %NL80211_ATTR_CENTER_FREQ1 + * attribute must be provided as well + * @NL80211_CHAN_WIDTH_80: 80 MHz channel, the %NL80211_ATTR_CENTER_FREQ1 + * attribute must be provided as well + * @NL80211_CHAN_WIDTH_80P80: 80+80 MHz channel, the %NL80211_ATTR_CENTER_FREQ1 + * and %NL80211_ATTR_CENTER_FREQ2 attributes must be provided as well + * @NL80211_CHAN_WIDTH_160: 160 MHz channel, the %NL80211_ATTR_CENTER_FREQ1 + * attribute must be provided as well + */ +enum nl80211_chan_width { + NL80211_CHAN_WIDTH_20_NOHT, + NL80211_CHAN_WIDTH_20, + NL80211_CHAN_WIDTH_40, + NL80211_CHAN_WIDTH_80, + NL80211_CHAN_WIDTH_80P80, + NL80211_CHAN_WIDTH_160, +}; + /** * enum nl80211_bss - netlink attributes for a BSS * diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index fbb2d072cb9e..7136b945798e 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3125,8 +3125,9 @@ static int ieee80211_cfg_get_channel(struct wiphy *wiphy, rcu_read_lock(); chanctx_conf = rcu_dereference(sdata->vif.chanctx_conf); if (chanctx_conf) { - chandef->chan = chanctx_conf->channel; - chandef->_type = chanctx_conf->channel_type; + cfg80211_chandef_create(chandef, + chanctx_conf->channel, + chanctx_conf->channel_type); ret = 0; } rcu_read_unlock(); diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index bed616fd97e9..5648bbed240b 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -52,6 +52,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, u32 bss_change; u8 supp_rates[IEEE80211_MAX_SUPP_RATES]; struct cfg80211_chan_def chandef; + enum nl80211_channel_type chan_type; lockdep_assert_held(&ifibss->mtx); @@ -79,13 +80,13 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, sdata->drop_unencrypted = capability & WLAN_CAPABILITY_PRIVACY ? 1 : 0; - chandef.chan = chan; - chandef._type = ifibss->channel_type; + chan_type = ifibss->channel_type; + cfg80211_chandef_create(&chandef, chan, chan_type); if (!cfg80211_reg_can_beacon(local->hw.wiphy, &chandef)) - chandef._type = NL80211_CHAN_HT20; + chan_type = NL80211_CHAN_HT20; ieee80211_vif_release_channel(sdata); - if (ieee80211_vif_use_channel(sdata, chan, chandef._type, + if (ieee80211_vif_use_channel(sdata, chan, chan_type, ifibss->fixed_channel ? IEEE80211_CHANCTX_SHARED : IEEE80211_CHANCTX_EXCLUSIVE)) { @@ -159,7 +160,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, ifibss->ie, ifibss->ie_len); /* add HT capability and information IEs */ - if (chandef._type != NL80211_CHAN_NO_HT && + if (chan_type != NL80211_CHAN_NO_HT && sband->ht_cap.ht_supported) { pos = skb_put(skb, 4 + sizeof(struct ieee80211_ht_cap) + @@ -172,7 +173,7 @@ static void __ieee80211_sta_join_ibss(struct ieee80211_sub_if_data *sdata, * keep them at 0 */ pos = ieee80211_ie_build_ht_oper(pos, &sband->ht_cap, - chan, chandef._type, 0); + chan, chan_type, 0); } if (local->hw.queues >= IEEE80211_NUM_ACS) { diff --git a/net/wireless/chan.c b/net/wireless/chan.c index e834422de40a..bf2dfd54ff3b 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -11,43 +11,252 @@ #include "core.h" #include "rdev-ops.h" -bool cfg80211_reg_can_beacon(struct wiphy *wiphy, - struct cfg80211_chan_def *chandef) +void cfg80211_chandef_create(struct cfg80211_chan_def *chandef, + struct ieee80211_channel *chan, + enum nl80211_channel_type chan_type) { - struct ieee80211_channel *sec_chan; - int diff; + if (WARN_ON(!chan)) + return; - trace_cfg80211_reg_can_beacon(wiphy, chandef); + chandef->chan = chan; + chandef->center_freq2 = 0; - switch (chandef->_type) { + switch (chan_type) { + case NL80211_CHAN_NO_HT: + chandef->width = NL80211_CHAN_WIDTH_20_NOHT; + chandef->center_freq1 = chan->center_freq; + break; + case NL80211_CHAN_HT20: + chandef->width = NL80211_CHAN_WIDTH_20; + chandef->center_freq1 = chan->center_freq; + break; case NL80211_CHAN_HT40PLUS: - diff = 20; + chandef->width = NL80211_CHAN_WIDTH_40; + chandef->center_freq1 = chan->center_freq + 10; break; case NL80211_CHAN_HT40MINUS: - diff = -20; + chandef->width = NL80211_CHAN_WIDTH_40; + chandef->center_freq1 = chan->center_freq - 10; + break; + default: + WARN_ON(1); + } +} +EXPORT_SYMBOL(cfg80211_chandef_create); + +bool cfg80211_chan_def_valid(const struct cfg80211_chan_def *chandef) +{ + u32 control_freq; + + if (!chandef->chan) + return false; + + control_freq = chandef->chan->center_freq; + + switch (chandef->width) { + case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_20_NOHT: + if (chandef->center_freq1 != control_freq) + return false; + if (chandef->center_freq2) + return false; + break; + case NL80211_CHAN_WIDTH_40: + if (chandef->center_freq1 != control_freq + 10 && + chandef->center_freq1 != control_freq - 10) + return false; + if (chandef->center_freq2) + return false; + break; + case NL80211_CHAN_WIDTH_80P80: + if (chandef->center_freq1 != control_freq + 30 && + chandef->center_freq1 != control_freq + 10 && + chandef->center_freq1 != control_freq - 10 && + chandef->center_freq1 != control_freq - 30) + return false; + if (!chandef->center_freq2) + return false; + break; + case NL80211_CHAN_WIDTH_80: + if (chandef->center_freq1 != control_freq + 30 && + chandef->center_freq1 != control_freq + 10 && + chandef->center_freq1 != control_freq - 10 && + chandef->center_freq1 != control_freq - 30) + return false; + if (chandef->center_freq2) + return false; + break; + case NL80211_CHAN_WIDTH_160: + if (chandef->center_freq1 != control_freq + 70 && + chandef->center_freq1 != control_freq + 50 && + chandef->center_freq1 != control_freq + 30 && + chandef->center_freq1 != control_freq + 10 && + chandef->center_freq1 != control_freq - 10 && + chandef->center_freq1 != control_freq - 30 && + chandef->center_freq1 != control_freq - 50 && + chandef->center_freq1 != control_freq - 70) + return false; + if (chandef->center_freq2) + return false; + break; + default: + return false; + } + + return true; +} + +static void chandef_primary_freqs(const struct cfg80211_chan_def *c, + int *pri40, int *pri80) +{ + int tmp; + + switch (c->width) { + case NL80211_CHAN_WIDTH_40: + *pri40 = c->center_freq1; + *pri80 = 0; + break; + case NL80211_CHAN_WIDTH_80: + case NL80211_CHAN_WIDTH_80P80: + *pri80 = c->center_freq1; + /* n_P20 */ + tmp = (30 + c->chan->center_freq - c->center_freq1)/20; + /* n_P40 */ + tmp /= 2; + /* freq_P40 */ + *pri40 = c->center_freq1 - 20 + 40 * tmp; + break; + case NL80211_CHAN_WIDTH_160: + /* n_P20 */ + tmp = (70 + c->chan->center_freq - c->center_freq1)/20; + /* n_P40 */ + tmp /= 2; + /* freq_P40 */ + *pri40 = c->center_freq1 - 60 + 40 * tmp; + /* n_P80 */ + tmp /= 2; + *pri80 = c->center_freq1 - 40 + 80 * tmp; break; default: - trace_cfg80211_return_bool(true); - return true; + WARN_ON_ONCE(1); } +} + +const struct cfg80211_chan_def * +cfg80211_chandef_compatible(const struct cfg80211_chan_def *c1, + const struct cfg80211_chan_def *c2) +{ + u32 c1_pri40, c1_pri80, c2_pri40, c2_pri80; - sec_chan = ieee80211_get_channel(wiphy, - chandef->chan->center_freq + diff); - if (!sec_chan) { + /* If they are identical, return */ + if (cfg80211_chandef_identical(c1, c2)) + return c1; + + /* otherwise, must have same control channel */ + if (c1->chan != c2->chan) + return NULL; + + /* + * If they have the same width, but aren't identical, + * then they can't be compatible. + */ + if (c1->width == c2->width) + return NULL; + + if (c1->width == NL80211_CHAN_WIDTH_20_NOHT || + c1->width == NL80211_CHAN_WIDTH_20) + return c2; + + if (c2->width == NL80211_CHAN_WIDTH_20_NOHT || + c2->width == NL80211_CHAN_WIDTH_20) + return c1; + + chandef_primary_freqs(c1, &c1_pri40, &c1_pri80); + chandef_primary_freqs(c2, &c2_pri40, &c2_pri80); + + if (c1_pri40 != c2_pri40) + return NULL; + + WARN_ON(!c1_pri80 && !c2_pri80); + if (c1_pri80 && c2_pri80 && c1_pri80 != c2_pri80) + return NULL; + + if (c1->width > c2->width) + return c1; + return c2; +} +EXPORT_SYMBOL(cfg80211_chandef_compatible); + +bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, + u32 center_freq, u32 bandwidth, + u32 prohibited_flags) +{ + struct ieee80211_channel *c; + u32 freq; + + for (freq = center_freq - bandwidth/2 + 10; + freq <= center_freq + bandwidth/2 - 10; + freq += 20) { + c = ieee80211_get_channel(wiphy, freq); + if (!c || c->flags & prohibited_flags) + return false; + } + + return true; +} + +static bool cfg80211_check_beacon_chans(struct wiphy *wiphy, + u32 center_freq, u32 bw) +{ + return cfg80211_secondary_chans_ok(wiphy, center_freq, bw, + IEEE80211_CHAN_DISABLED | + IEEE80211_CHAN_PASSIVE_SCAN | + IEEE80211_CHAN_NO_IBSS | + IEEE80211_CHAN_RADAR); +} + +bool cfg80211_reg_can_beacon(struct wiphy *wiphy, + struct cfg80211_chan_def *chandef) +{ + u32 width; + bool res; + + trace_cfg80211_reg_can_beacon(wiphy, chandef); + + if (WARN_ON(!cfg80211_chan_def_valid(chandef))) { trace_cfg80211_return_bool(false); return false; } - /* we'll need a DFS capability later */ - if (sec_chan->flags & (IEEE80211_CHAN_DISABLED | - IEEE80211_CHAN_PASSIVE_SCAN | - IEEE80211_CHAN_NO_IBSS | - IEEE80211_CHAN_RADAR)) { + switch (chandef->width) { + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_20: + width = 20; + break; + case NL80211_CHAN_WIDTH_40: + width = 40; + break; + case NL80211_CHAN_WIDTH_80: + case NL80211_CHAN_WIDTH_80P80: + width = 80; + break; + case NL80211_CHAN_WIDTH_160: + width = 160; + break; + default: + WARN_ON_ONCE(1); trace_cfg80211_return_bool(false); return false; } - trace_cfg80211_return_bool(true); - return true; + + res = cfg80211_check_beacon_chans(wiphy, chandef->center_freq1, width); + + if (res && chandef->center_freq2) + res = cfg80211_check_beacon_chans(wiphy, chandef->center_freq2, + width); + + trace_cfg80211_return_bool(res); + return res; } EXPORT_SYMBOL(cfg80211_reg_can_beacon); diff --git a/net/wireless/core.h b/net/wireless/core.h index 6183a0d25b8b..a0c8decf6a47 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -483,6 +483,12 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype, int num); +bool cfg80211_chan_def_valid(const struct cfg80211_chan_def *chandef); + +bool cfg80211_secondary_chans_ok(struct wiphy *wiphy, + u32 center_freq, u32 bandwidth, + u32 prohibited_flags); + #define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10 #ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index ccc8865dfadb..9b9551e4a6f9 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -252,7 +252,7 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, /* try to find an IBSS channel if none requested ... */ if (!wdev->wext.ibss.chandef.chan) { - wdev->wext.ibss.chandef._type = NL80211_CHAN_NO_HT; + wdev->wext.ibss.chandef.width = NL80211_CHAN_WIDTH_20_NOHT; for (band = 0; band < IEEE80211_NUM_BANDS; band++) { struct ieee80211_supported_band *sband; @@ -352,7 +352,7 @@ int cfg80211_ibss_wext_siwfreq(struct net_device *dev, if (chan) { wdev->wext.ibss.chandef.chan = chan; - wdev->wext.ibss.chandef._type = NL80211_CHAN_NO_HT; + wdev->wext.ibss.chandef.width = NL80211_CHAN_WIDTH_20_NOHT; wdev->wext.ibss.channel_fixed = true; } else { /* cfg80211_ibss_wext_join will pick one if needed */ diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 12b5a570a306..3ee5a7282283 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -146,7 +146,7 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, if (!setup->chandef.chan) return -EINVAL; - setup->chandef._type = NL80211_CHAN_NO_HT; + setup->chandef.width = NL80211_CHAN_WIDTH_20_NOHT;; } if (!cfg80211_reg_can_beacon(&rdev->wiphy, &setup->chandef)) @@ -198,7 +198,7 @@ int cfg80211_set_mesh_channel(struct cfg80211_registered_device *rdev, * compatible with 802.11 mesh. */ if (rdev->ops->libertas_set_mesh_channel) { - if (chandef->_type != NL80211_CHAN_NO_HT) + if (chandef->width != NL80211_CHAN_WIDTH_20_NOHT) return -EINVAL; if (!netif_running(wdev->netdev)) diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 999108cd947c..15158a3d64a3 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -223,8 +223,13 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_WIPHY_NAME] = { .type = NLA_NUL_STRING, .len = 20-1 }, [NL80211_ATTR_WIPHY_TXQ_PARAMS] = { .type = NLA_NESTED }, + [NL80211_ATTR_WIPHY_FREQ] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_CHANNEL_TYPE] = { .type = NLA_U32 }, + [NL80211_ATTR_CHANNEL_WIDTH] = { .type = NLA_U32 }, + [NL80211_ATTR_CENTER_FREQ1] = { .type = NLA_U32 }, + [NL80211_ATTR_CENTER_FREQ2] = { .type = NLA_U32 }, + [NL80211_ATTR_WIPHY_RETRY_SHORT] = { .type = NLA_U8 }, [NL80211_ATTR_WIPHY_RETRY_LONG] = { .type = NLA_U8 }, [NL80211_ATTR_WIPHY_FRAG_THRESHOLD] = { .type = NLA_U32 }, @@ -1360,35 +1365,13 @@ static bool nl80211_can_set_dev_channel(struct wireless_dev *wdev) wdev->iftype == NL80211_IFTYPE_P2P_GO; } -static bool nl80211_valid_channel_type(struct genl_info *info, - enum nl80211_channel_type *channel_type) -{ - enum nl80211_channel_type tmp; - - if (!info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) - return false; - - tmp = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]); - if (tmp != NL80211_CHAN_NO_HT && - tmp != NL80211_CHAN_HT20 && - tmp != NL80211_CHAN_HT40PLUS && - tmp != NL80211_CHAN_HT40MINUS) - return false; - - if (channel_type) - *channel_type = tmp; - - return true; -} - static int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, struct genl_info *info, struct cfg80211_chan_def *chandef) { struct ieee80211_sta_ht_cap *ht_cap; - struct ieee80211_channel *sc; - u32 control_freq; - int offs; + struct ieee80211_sta_vht_cap *vht_cap; + u32 control_freq, width; if (!info->attrs[NL80211_ATTR_WIPHY_FREQ]) return -EINVAL; @@ -1396,47 +1379,105 @@ static int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, control_freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]); chandef->chan = ieee80211_get_channel(&rdev->wiphy, control_freq); - chandef->_type = NL80211_CHAN_NO_HT; - - if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE] && - !nl80211_valid_channel_type(info, &chandef->_type)) - return -EINVAL; + chandef->width = NL80211_CHAN_WIDTH_20_NOHT; + chandef->center_freq1 = control_freq; + chandef->center_freq2 = 0; /* Primary channel not allowed */ if (!chandef->chan || chandef->chan->flags & IEEE80211_CHAN_DISABLED) return -EINVAL; + if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) { + enum nl80211_channel_type chantype; + + chantype = nla_get_u32( + info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]); + + switch (chantype) { + case NL80211_CHAN_NO_HT: + case NL80211_CHAN_HT20: + case NL80211_CHAN_HT40PLUS: + case NL80211_CHAN_HT40MINUS: + cfg80211_chandef_create(chandef, chandef->chan, + chantype); + break; + default: + return -EINVAL; + } + } else if (info->attrs[NL80211_ATTR_CHANNEL_WIDTH]) { + chandef->width = + nla_get_u32(info->attrs[NL80211_ATTR_CHANNEL_WIDTH]); + if (info->attrs[NL80211_ATTR_CENTER_FREQ1]) + chandef->center_freq1 = + nla_get_u32( + info->attrs[NL80211_ATTR_CENTER_FREQ1]); + if (info->attrs[NL80211_ATTR_CENTER_FREQ2]) + chandef->center_freq2 = + nla_get_u32( + info->attrs[NL80211_ATTR_CENTER_FREQ2]); + } + ht_cap = &rdev->wiphy.bands[chandef->chan->band]->ht_cap; + vht_cap = &rdev->wiphy.bands[chandef->chan->band]->vht_cap; - switch (chandef->_type) { - case NL80211_CHAN_NO_HT: + if (!cfg80211_chan_def_valid(chandef)) + return -EINVAL; + + switch (chandef->width) { + case NL80211_CHAN_WIDTH_20: + if (!ht_cap->ht_supported) + return -EINVAL; + case NL80211_CHAN_WIDTH_20_NOHT: + width = 20; break; - case NL80211_CHAN_HT40MINUS: - if (chandef->chan->flags & IEEE80211_CHAN_NO_HT40MINUS) + case NL80211_CHAN_WIDTH_40: + width = 40; + /* quick early regulatory check */ + if (chandef->center_freq1 < control_freq && + chandef->chan->flags & IEEE80211_CHAN_NO_HT40MINUS) + return -EINVAL; + if (chandef->center_freq1 > control_freq && + chandef->chan->flags & IEEE80211_CHAN_NO_HT40PLUS) + return -EINVAL; + if (!ht_cap->ht_supported) return -EINVAL; - offs = -20; - /* fall through */ - case NL80211_CHAN_HT40PLUS: - if (chandef->_type == NL80211_CHAN_HT40PLUS) { - if (chandef->chan->flags & IEEE80211_CHAN_NO_HT40PLUS) - return -EINVAL; - offs = 20; - } if (!(ht_cap->cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) || ht_cap->cap & IEEE80211_HT_CAP_40MHZ_INTOLERANT) return -EINVAL; - - sc = ieee80211_get_channel(&rdev->wiphy, - chandef->chan->center_freq + offs); - if (!sc || sc->flags & IEEE80211_CHAN_DISABLED) + break; + case NL80211_CHAN_WIDTH_80: + width = 80; + if (!vht_cap->vht_supported) return -EINVAL; - /* fall through */ - case NL80211_CHAN_HT20: - if (!ht_cap->ht_supported) + break; + case NL80211_CHAN_WIDTH_80P80: + width = 80; + if (!vht_cap->vht_supported) + return -EINVAL; + if (!(vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ)) + return -EINVAL; + break; + case NL80211_CHAN_WIDTH_160: + width = 160; + if (!vht_cap->vht_supported) + return -EINVAL; + if (!(vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ)) return -EINVAL; break; + default: + return -EINVAL; } + if (!cfg80211_secondary_chans_ok(&rdev->wiphy, chandef->center_freq1, + width, IEEE80211_CHAN_DISABLED)) + return -EINVAL; + if (chandef->center_freq2 && + !cfg80211_secondary_chans_ok(&rdev->wiphy, chandef->center_freq2, + width, IEEE80211_CHAN_DISABLED)) + return -EINVAL; + + /* TODO: missing regulatory check on bandwidth */ + return 0; } @@ -1800,10 +1841,28 @@ static inline u64 wdev_id(struct wireless_dev *wdev) static int nl80211_send_chandef(struct sk_buff *msg, struct cfg80211_chan_def *chandef) { + WARN_ON(!cfg80211_chan_def_valid(chandef)); + if (nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, chandef->chan->center_freq)) return -ENOBUFS; - if (nla_put_u32(msg, NL80211_ATTR_WIPHY_CHANNEL_TYPE, chandef->_type)) + switch (chandef->width) { + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_40: + if (nla_put_u32(msg, NL80211_ATTR_WIPHY_CHANNEL_TYPE, + cfg80211_get_chandef_type(chandef))) + return -ENOBUFS; + break; + default: + break; + } + if (nla_put_u32(msg, NL80211_ATTR_CHANNEL_WIDTH, chandef->width)) + return -ENOBUFS; + if (nla_put_u32(msg, NL80211_ATTR_CENTER_FREQ1, chandef->center_freq1)) + return -ENOBUFS; + if (chandef->center_freq2 && + nla_put_u32(msg, NL80211_ATTR_CENTER_FREQ2, chandef->center_freq2)) return -ENOBUFS; return 0; } @@ -5447,7 +5506,8 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(connkeys)) return PTR_ERR(connkeys); - if ((ibss.chandef._type != NL80211_CHAN_NO_HT) && no_ht) { + if ((ibss.chandef.width != NL80211_CHAN_WIDTH_20_NOHT) && + no_ht) { kfree(connkeys); return -EINVAL; } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 1370d52b1393..3c7aa1221563 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -126,25 +126,33 @@ #define CHAN_PR_FMT ", band: %d, freq: %u" #define CHAN_PR_ARG __entry->band, __entry->center_freq -#define CHAN_DEF_ENTRY __field(enum ieee80211_band, band) \ - __field(u16, center_freq) \ - __field(u32, channel_type) +#define CHAN_DEF_ENTRY __field(enum ieee80211_band, band) \ + __field(u32, control_freq) \ + __field(u32, width) \ + __field(u32, center_freq1) \ + __field(u32, center_freq2) #define CHAN_DEF_ASSIGN(chandef) \ do { \ if ((chandef) && (chandef)->chan) { \ __entry->band = (chandef)->chan->band; \ - __entry->center_freq = \ + __entry->control_freq = \ (chandef)->chan->center_freq; \ - __entry->channel_type = (chandef)->_type; \ + __entry->width = (chandef)->width; \ + __entry->center_freq1 = (chandef)->center_freq1;\ + __entry->center_freq2 = (chandef)->center_freq2;\ } else { \ __entry->band = 0; \ - __entry->center_freq = 0; \ - __entry->channel_type = 0; \ + __entry->control_freq = 0; \ + __entry->width = 0; \ + __entry->center_freq1 = 0; \ + __entry->center_freq2 = 0; \ } \ } while (0) -#define CHAN_DEF_PR_FMT ", band: %d, freq: %u, chantype: %d" -#define CHAN_DEF_PR_ARG __entry->band, __entry->center_freq, \ - __entry->channel_type +#define CHAN_DEF_PR_FMT \ + ", band: %d, control freq: %u, width: %d, cf1: %u, cf2: %u" +#define CHAN_DEF_PR_ARG __entry->band, __entry->control_freq, \ + __entry->width, __entry->center_freq1, \ + __entry->center_freq2 #define SINFO_ENTRY __field(int, generation) \ __field(u32, connected_time) \ diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index da3307f32362..f9680c9cf9b3 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -785,7 +785,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, struct wireless_dev *wdev = dev->ieee80211_ptr; struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy); struct cfg80211_chan_def chandef = { - ._type = NL80211_CHAN_NO_HT, + .width = NL80211_CHAN_WIDTH_20_NOHT, }; int freq, err; @@ -800,6 +800,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, return freq; if (freq == 0) return -EINVAL; + chandef.center_freq1 = freq; chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq); if (!chandef.chan) return -EINVAL; @@ -813,6 +814,7 @@ static int cfg80211_wext_siwfreq(struct net_device *dev, return freq; if (freq == 0) return -EINVAL; + chandef.center_freq1 = freq; chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq); if (!chandef.chan) return -EINVAL; diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index e6e5dbf2f616..873af63187c0 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -120,7 +120,8 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev, */ if (chan && !wdev->wext.connect.ssid_len) { struct cfg80211_chan_def chandef = { - ._type = NL80211_CHAN_NO_HT, + .width = NL80211_CHAN_WIDTH_20_NOHT, + .center_freq1 = freq, }; chandef.chan = ieee80211_get_channel(&rdev->wiphy, freq); -- cgit v1.2.3 From db9c64cf8d9d3fcbc34b09d037f266d1fc9f928c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 9 Nov 2012 14:56:41 +0100 Subject: nl80211/cfg80211: add VHT MCS support Add support for reporting and calculating VHT MCSes. Note that I'm not completely sure that the bitrate calculations are correct, nor that they can't be simplified. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 24 +++++++++----- include/uapi/linux/nl80211.h | 12 ++++++- net/wireless/nl80211.c | 58 +++++++++++++++++++++++++--------- net/wireless/util.c | 74 +++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 144 insertions(+), 24 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 977da58fb7ea..e78db2cf3d1b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -662,16 +662,24 @@ enum station_info_flags { * Used by the driver to indicate the specific rate transmission * type for 802.11n transmissions. * - * @RATE_INFO_FLAGS_MCS: @tx_bitrate_mcs filled - * @RATE_INFO_FLAGS_40_MHZ_WIDTH: 40 Mhz width transmission + * @RATE_INFO_FLAGS_MCS: mcs field filled with HT MCS + * @RATE_INFO_FLAGS_VHT_MCS: mcs field filled with VHT MCS + * @RATE_INFO_FLAGS_40_MHZ_WIDTH: 40 MHz width transmission + * @RATE_INFO_FLAGS_80_MHZ_WIDTH: 80 MHz width transmission + * @RATE_INFO_FLAGS_80P80_MHZ_WIDTH: 80+80 MHz width transmission + * @RATE_INFO_FLAGS_160_MHZ_WIDTH: 160 MHz width transmission * @RATE_INFO_FLAGS_SHORT_GI: 400ns guard interval - * @RATE_INFO_FLAGS_60G: 60gHz MCS + * @RATE_INFO_FLAGS_60G: 60GHz MCS */ enum rate_info_flags { - RATE_INFO_FLAGS_MCS = 1<<0, - RATE_INFO_FLAGS_40_MHZ_WIDTH = 1<<1, - RATE_INFO_FLAGS_SHORT_GI = 1<<2, - RATE_INFO_FLAGS_60G = 1<<3, + RATE_INFO_FLAGS_MCS = BIT(0), + RATE_INFO_FLAGS_VHT_MCS = BIT(1), + RATE_INFO_FLAGS_40_MHZ_WIDTH = BIT(2), + RATE_INFO_FLAGS_80_MHZ_WIDTH = BIT(3), + RATE_INFO_FLAGS_80P80_MHZ_WIDTH = BIT(4), + RATE_INFO_FLAGS_160_MHZ_WIDTH = BIT(5), + RATE_INFO_FLAGS_SHORT_GI = BIT(6), + RATE_INFO_FLAGS_60G = BIT(7), }; /** @@ -682,11 +690,13 @@ enum rate_info_flags { * @flags: bitflag of flags from &enum rate_info_flags * @mcs: mcs index if struct describes a 802.11n bitrate * @legacy: bitrate in 100kbit/s for 802.11abg + * @nss: number of streams (VHT only) */ struct rate_info { u8 flags; u8 mcs; u16 legacy; + u8 nss; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 84f9c7d84c69..33a417481ad8 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1734,10 +1734,15 @@ struct nl80211_sta_flag_update { * @__NL80211_RATE_INFO_INVALID: attribute number 0 is reserved * @NL80211_RATE_INFO_BITRATE: total bitrate (u16, 100kbit/s) * @NL80211_RATE_INFO_MCS: mcs index for 802.11n (u8) - * @NL80211_RATE_INFO_40_MHZ_WIDTH: 40 Mhz dualchannel bitrate + * @NL80211_RATE_INFO_40_MHZ_WIDTH: 40 MHz dualchannel bitrate * @NL80211_RATE_INFO_SHORT_GI: 400ns guard interval * @NL80211_RATE_INFO_BITRATE32: total bitrate (u32, 100kbit/s) * @NL80211_RATE_INFO_MAX: highest rate_info number currently defined + * @NL80211_RATE_INFO_VHT_MCS: MCS index for VHT (u8) + * @NL80211_RATE_INFO_VHT_NSS: number of streams in VHT (u8) + * @NL80211_RATE_INFO_80_MHZ_WIDTH: 80 MHz VHT rate + * @NL80211_RATE_INFO_80P80_MHZ_WIDTH: 80+80 MHz VHT rate + * @NL80211_RATE_INFO_160_MHZ_WIDTH: 160 MHz VHT rate * @__NL80211_RATE_INFO_AFTER_LAST: internal use */ enum nl80211_rate_info { @@ -1747,6 +1752,11 @@ enum nl80211_rate_info { NL80211_RATE_INFO_40_MHZ_WIDTH, NL80211_RATE_INFO_SHORT_GI, NL80211_RATE_INFO_BITRATE32, + NL80211_RATE_INFO_VHT_MCS, + NL80211_RATE_INFO_VHT_NSS, + NL80211_RATE_INFO_80_MHZ_WIDTH, + NL80211_RATE_INFO_80P80_MHZ_WIDTH, + NL80211_RATE_INFO_160_MHZ_WIDTH, /* keep last */ __NL80211_RATE_INFO_AFTER_LAST, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 15158a3d64a3..d038fa45ecd1 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -2890,29 +2890,52 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info, rate = nla_nest_start(msg, attr); if (!rate) - goto nla_put_failure; + return false; /* cfg80211_calculate_bitrate will return 0 for mcs >= 32 */ bitrate = cfg80211_calculate_bitrate(info); /* report 16-bit bitrate only if we can */ bitrate_compat = bitrate < (1UL << 16) ? bitrate : 0; - if ((bitrate > 0 && - nla_put_u32(msg, NL80211_RATE_INFO_BITRATE32, bitrate)) || - (bitrate_compat > 0 && - nla_put_u16(msg, NL80211_RATE_INFO_BITRATE, bitrate_compat)) || - ((info->flags & RATE_INFO_FLAGS_MCS) && - nla_put_u8(msg, NL80211_RATE_INFO_MCS, info->mcs)) || - ((info->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH) && - nla_put_flag(msg, NL80211_RATE_INFO_40_MHZ_WIDTH)) || - ((info->flags & RATE_INFO_FLAGS_SHORT_GI) && - nla_put_flag(msg, NL80211_RATE_INFO_SHORT_GI))) - goto nla_put_failure; + if (bitrate > 0 && + nla_put_u32(msg, NL80211_RATE_INFO_BITRATE32, bitrate)) + return false; + if (bitrate_compat > 0 && + nla_put_u16(msg, NL80211_RATE_INFO_BITRATE, bitrate_compat)) + return false; + + if (info->flags & RATE_INFO_FLAGS_MCS) { + if (nla_put_u8(msg, NL80211_RATE_INFO_MCS, info->mcs)) + return false; + if (info->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH && + nla_put_flag(msg, NL80211_RATE_INFO_40_MHZ_WIDTH)) + return false; + if (info->flags & RATE_INFO_FLAGS_SHORT_GI && + nla_put_flag(msg, NL80211_RATE_INFO_SHORT_GI)) + return false; + } else if (info->flags & RATE_INFO_FLAGS_VHT_MCS) { + if (nla_put_u8(msg, NL80211_RATE_INFO_VHT_MCS, info->mcs)) + return false; + if (nla_put_u8(msg, NL80211_RATE_INFO_VHT_NSS, info->nss)) + return false; + if (info->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH && + nla_put_flag(msg, NL80211_RATE_INFO_40_MHZ_WIDTH)) + return false; + if (info->flags & RATE_INFO_FLAGS_80_MHZ_WIDTH && + nla_put_flag(msg, NL80211_RATE_INFO_80_MHZ_WIDTH)) + return false; + if (info->flags & RATE_INFO_FLAGS_80P80_MHZ_WIDTH && + nla_put_flag(msg, NL80211_RATE_INFO_80P80_MHZ_WIDTH)) + return false; + if (info->flags & RATE_INFO_FLAGS_160_MHZ_WIDTH && + nla_put_flag(msg, NL80211_RATE_INFO_160_MHZ_WIDTH)) + return false; + if (info->flags & RATE_INFO_FLAGS_SHORT_GI && + nla_put_flag(msg, NL80211_RATE_INFO_SHORT_GI)) + return false; + } nla_nest_end(msg, rate); return true; - -nla_put_failure: - return false; } static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, @@ -5475,6 +5498,11 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) if (!cfg80211_reg_can_beacon(&rdev->wiphy, &ibss.chandef)) return -EINVAL; + if (ibss.chandef.width > NL80211_CHAN_WIDTH_40) + return -EINVAL; + if (ibss.chandef.width != NL80211_CHAN_WIDTH_20_NOHT && + !(rdev->wiphy.features & NL80211_FEATURE_HT_IBSS)) + ibss.channel_fixed = !!info->attrs[NL80211_ATTR_FREQ_FIXED]; ibss.privacy = !!info->attrs[NL80211_ATTR_PRIVACY]; diff --git a/net/wireless/util.c b/net/wireless/util.c index db61fe8a6b6d..3cce6e486219 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -944,14 +944,86 @@ static u32 cfg80211_calculate_bitrate_60g(struct rate_info *rate) return __mcs2bitrate[rate->mcs]; } +static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate) +{ + static const u32 base[4][10] = { + { 6500000, + 13000000, + 19500000, + 26000000, + 39000000, + 52000000, + 58500000, + 65000000, + 78000000, + 0, + }, + { 13500000, + 27000000, + 40500000, + 54000000, + 81000000, + 108000000, + 121500000, + 135000000, + 162000000, + 180000000, + }, + { 29300000, + 58500000, + 87800000, + 117000000, + 175500000, + 234000000, + 263300000, + 292500000, + 351000000, + 390000000, + }, + { 58500000, + 117000000, + 175500000, + 234000000, + 351000000, + 468000000, + 526500000, + 585000000, + 702000000, + 780000000, + }, + }; + u32 bitrate; + int idx; + + if (WARN_ON_ONCE(rate->mcs > 9)) + return 0; + + idx = rate->flags & (RATE_INFO_FLAGS_160_MHZ_WIDTH | + RATE_INFO_FLAGS_80P80_MHZ_WIDTH) ? 3 : + rate->flags & RATE_INFO_FLAGS_80_MHZ_WIDTH ? 2 : + rate->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH ? 1 : 0; + + bitrate = base[idx][rate->mcs]; + bitrate *= rate->nss; + + if (rate->flags & RATE_INFO_FLAGS_SHORT_GI) + bitrate = (bitrate / 9) * 10; + + /* do NOT round down here */ + return (bitrate + 50000) / 100000; +} + u32 cfg80211_calculate_bitrate(struct rate_info *rate) { int modulation, streams, bitrate; - if (!(rate->flags & RATE_INFO_FLAGS_MCS)) + if (!(rate->flags & RATE_INFO_FLAGS_MCS) && + !(rate->flags & RATE_INFO_FLAGS_VHT_MCS)) return rate->legacy; if (rate->flags & RATE_INFO_FLAGS_60G) return cfg80211_calculate_bitrate_60g(rate); + if (rate->flags & RATE_INFO_FLAGS_VHT_MCS) + return cfg80211_calculate_bitrate_vht(rate); /* the formula below does only work for MCS values smaller than 32 */ if (WARN_ON_ONCE(rate->mcs >= 32)) -- cgit v1.2.3 From 53cabad70ecf0c245b41285de64a74a6c3ee9933 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 14 Nov 2012 15:17:28 +0100 Subject: nl80211: support P2P GO powersave configuration If a driver supports P2P GO powersave, allow it to set the new feature flags for it and allow userspace to configure the parameters for it. This can be done at GO startup and later changed with SET_BSS. Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 7 ++++++ include/uapi/linux/nl80211.h | 16 +++++++++++++ net/wireless/nl80211.c | 56 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 79 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a238f41e55c2..731b48fa238b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -538,6 +538,8 @@ struct cfg80211_beacon_data { * @privacy: the BSS uses privacy * @auth_type: Authentication type (algorithm) * @inactivity_timeout: time in seconds to determine station's inactivity. + * @p2p_ctwindow: P2P CT Window + * @p2p_opp_ps: P2P opportunistic PS */ struct cfg80211_ap_settings { struct cfg80211_chan_def chandef; @@ -552,6 +554,8 @@ struct cfg80211_ap_settings { bool privacy; enum nl80211_auth_type auth_type; int inactivity_timeout; + u8 p2p_ctwindow; + bool p2p_opp_ps; }; /** @@ -913,6 +917,8 @@ struct mpath_info { * @ap_isolate: do not forward packets between connected stations * @ht_opmode: HT Operation mode * (u16 = opmode, -1 = do not change) + * @p2p_ctwindow: P2P CT Window (-1 = no change) + * @p2p_opp_ps: P2P opportunistic PS (-1 = no change) */ struct bss_parameters { int use_cts_prot; @@ -922,6 +928,7 @@ struct bss_parameters { u8 basic_rates_len; int ap_isolate; int ht_opmode; + s8 p2p_ctwindow, p2p_opp_ps; }; /** diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 33a417481ad8..e3e19f8b16f2 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -1303,6 +1303,13 @@ enum nl80211_commands { * * @NL80211_ATTR_SCAN_FLAGS: scan request control flags (u32) * + * @NL80211_ATTR_P2P_CTWINDOW: P2P GO Client Traffic Window (u8), used with + * the START_AP and SET_BSS commands + * @NL80211_ATTR_P2P_OPPPS: P2P GO opportunistic PS (u8), used with the + * START_AP and SET_BSS commands. This can have the values 0 or 1; + * if not given in START_AP 0 is assumed, if not given in SET_BSS + * no change is made. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -1570,6 +1577,9 @@ enum nl80211_attrs { NL80211_ATTR_CENTER_FREQ1, NL80211_ATTR_CENTER_FREQ2, + NL80211_ATTR_P2P_CTWINDOW, + NL80211_ATTR_P2P_OPPPS, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -3126,6 +3136,10 @@ enum nl80211_ap_sme_features { * @NL80211_FEATURE_NEED_OBSS_SCAN: The driver expects userspace to perform * OBSS scans and generate 20/40 BSS coex reports. This flag is used only * for drivers implementing the CONNECT API, for AUTH/ASSOC it is implied. + * @NL80211_FEATURE_P2P_GO_CTWIN: P2P GO implementation supports CT Window + * setting + * @NL80211_FEATURE_P2P_GO_OPPPS: P2P GO implementation supports opportunistic + * powersave */ enum nl80211_feature_flags { NL80211_FEATURE_SK_TX_STATUS = 1 << 0, @@ -3139,6 +3153,8 @@ enum nl80211_feature_flags { NL80211_FEATURE_AP_SCAN = 1 << 8, NL80211_FEATURE_VIF_TXPOWER = 1 << 9, NL80211_FEATURE_NEED_OBSS_SCAN = 1 << 10, + NL80211_FEATURE_P2P_GO_CTWIN = 1 << 11, + NL80211_FEATURE_P2P_GO_OPPPS = 1 << 12, }; /** diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index eb0aa71a02b3..7f53aafd47f7 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -363,6 +363,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_SAE_DATA] = { .type = NLA_BINARY, }, [NL80211_ATTR_VHT_CAPABILITY] = { .len = NL80211_VHT_CAPABILITY_LEN }, [NL80211_ATTR_SCAN_FLAGS] = { .type = NLA_U32 }, + [NL80211_ATTR_P2P_CTWINDOW] = { .type = NLA_U8 }, + [NL80211_ATTR_P2P_OPPPS] = { .type = NLA_U8 }, }; /* policy for the key attributes */ @@ -2702,6 +2704,32 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) info->attrs[NL80211_ATTR_INACTIVITY_TIMEOUT]); } + if (info->attrs[NL80211_ATTR_P2P_CTWINDOW]) { + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + return -EINVAL; + params.p2p_ctwindow = + nla_get_u8(info->attrs[NL80211_ATTR_P2P_CTWINDOW]); + if (params.p2p_ctwindow > 127) + return -EINVAL; + if (params.p2p_ctwindow != 0 && + !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_CTWIN)) + return -EINVAL; + } + + if (info->attrs[NL80211_ATTR_P2P_OPPPS]) { + u8 tmp; + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + return -EINVAL; + tmp = nla_get_u8(info->attrs[NL80211_ATTR_P2P_OPPPS]); + if (tmp > 1) + return -EINVAL; + params.p2p_opp_ps = tmp; + if (params.p2p_opp_ps != 0 && + !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_OPPPS)) + return -EINVAL; + } + if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { err = nl80211_parse_chandef(rdev, info, ¶ms.chandef); if (err) @@ -3668,6 +3696,8 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) params.use_short_slot_time = -1; params.ap_isolate = -1; params.ht_opmode = -1; + params.p2p_ctwindow = -1; + params.p2p_opp_ps = -1; if (info->attrs[NL80211_ATTR_BSS_CTS_PROT]) params.use_cts_prot = @@ -3690,6 +3720,32 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) params.ht_opmode = nla_get_u16(info->attrs[NL80211_ATTR_BSS_HT_OPMODE]); + if (info->attrs[NL80211_ATTR_P2P_CTWINDOW]) { + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + return -EINVAL; + params.p2p_ctwindow = + nla_get_s8(info->attrs[NL80211_ATTR_P2P_CTWINDOW]); + if (params.p2p_ctwindow < 0) + return -EINVAL; + if (params.p2p_ctwindow != 0 && + !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_CTWIN)) + return -EINVAL; + } + + if (info->attrs[NL80211_ATTR_P2P_OPPPS]) { + u8 tmp; + + if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO) + return -EINVAL; + tmp = nla_get_u8(info->attrs[NL80211_ATTR_P2P_OPPPS]); + if (tmp > 1) + return -EINVAL; + params.p2p_opp_ps = tmp; + if (params.p2p_opp_ps && + !(rdev->wiphy.features & NL80211_FEATURE_P2P_GO_OPPPS)) + return -EINVAL; + } + if (!rdev->ops->change_bss) return -EOPNOTSUPP; -- cgit v1.2.3 From d871befe357ccc262edbb0a4f9aeea650012edf5 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 27 Nov 2012 14:49:42 +0100 Subject: netfilter: ctnetlink: dump entries from the dying and unconfirmed lists This patch adds a new operation to dump the content of the dying and unconfirmed lists. Under some situations, the global conntrack counter can be inconsistent with the number of entries that we can dump from the conntrack table. The way to resolve this is to allow dumping the content of the unconfirmed and dying lists, so far it was not possible to look at its content. This provides some extra instrumentation to resolve problematic situations in which anyone suspects memory leaks. Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nfnetlink_conntrack.h | 2 + net/netfilter/nf_conntrack_netlink.c | 108 +++++++++++++++++++++ 2 files changed, 110 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index 43bfe3e1685b..86e930cf3dfb 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -9,6 +9,8 @@ enum cntl_msg_types { IPCTNL_MSG_CT_GET_CTRZERO, IPCTNL_MSG_CT_GET_STATS_CPU, IPCTNL_MSG_CT_GET_STATS, + IPCTNL_MSG_CT_GET_DYING, + IPCTNL_MSG_CT_GET_UNCONFIRMED, IPCTNL_MSG_MAX }; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 34370a928360..c24a00a73c7b 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1089,6 +1089,112 @@ out: return err == -EAGAIN ? -ENOBUFS : err; } +static int ctnetlink_done_list(struct netlink_callback *cb) +{ + if (cb->args[1]) + nf_ct_put((struct nf_conn *)cb->args[1]); + return 0; +} + +static int +ctnetlink_dump_list(struct sk_buff *skb, struct netlink_callback *cb, + struct hlist_nulls_head *list) +{ + struct nf_conn *ct, *last; + struct nf_conntrack_tuple_hash *h; + struct hlist_nulls_node *n; + struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); + u_int8_t l3proto = nfmsg->nfgen_family; + int res; + + if (cb->args[2]) + return 0; + + spin_lock_bh(&nf_conntrack_lock); + last = (struct nf_conn *)cb->args[1]; +restart: + hlist_nulls_for_each_entry(h, n, list, hnnode) { + ct = nf_ct_tuplehash_to_ctrack(h); + if (l3proto && nf_ct_l3num(ct) != l3proto) + continue; + if (cb->args[1]) { + if (ct != last) + continue; + cb->args[1] = 0; + } + rcu_read_lock(); + res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NFNL_MSG_TYPE(cb->nlh->nlmsg_type), + ct); + rcu_read_unlock(); + if (res < 0) { + nf_conntrack_get(&ct->ct_general); + cb->args[1] = (unsigned long)ct; + goto out; + } + } + if (cb->args[1]) { + cb->args[1] = 0; + goto restart; + } else + cb->args[2] = 1; +out: + spin_unlock_bh(&nf_conntrack_lock); + if (last) + nf_ct_put(last); + + return skb->len; +} + +static int +ctnetlink_dump_dying(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + + return ctnetlink_dump_list(skb, cb, &net->ct.dying); +} + +static int +ctnetlink_get_ct_dying(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) +{ + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = ctnetlink_dump_dying, + .done = ctnetlink_done_list, + }; + return netlink_dump_start(ctnl, skb, nlh, &c); + } + + return -EOPNOTSUPP; +} + +static int +ctnetlink_dump_unconfirmed(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net *net = sock_net(skb->sk); + + return ctnetlink_dump_list(skb, cb, &net->ct.unconfirmed); +} + +static int +ctnetlink_get_ct_unconfirmed(struct sock *ctnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, + const struct nlattr * const cda[]) +{ + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = ctnetlink_dump_unconfirmed, + .done = ctnetlink_done_list, + }; + return netlink_dump_start(ctnl, skb, nlh, &c); + } + + return -EOPNOTSUPP; +} + #ifdef CONFIG_NF_NAT_NEEDED static int ctnetlink_parse_nat_setup(struct nf_conn *ct, @@ -2712,6 +2818,8 @@ static const struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = { .policy = ct_nla_policy }, [IPCTNL_MSG_CT_GET_STATS_CPU] = { .call = ctnetlink_stat_ct_cpu }, [IPCTNL_MSG_CT_GET_STATS] = { .call = ctnetlink_stat_ct }, + [IPCTNL_MSG_CT_GET_DYING] = { .call = ctnetlink_get_ct_dying }, + [IPCTNL_MSG_CT_GET_UNCONFIRMED] = { .call = ctnetlink_get_ct_unconfirmed }, }; static const struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = { -- cgit v1.2.3 From 5d097109257c03a71845729f8db6b5770c4bbedc Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 3 Dec 2012 10:07:14 +0000 Subject: tun: only queue packets on device Historically tun supported two modes of operation: - in default mode, a small number of packets would get queued at the device, the rest would be queued in qdisc - in one queue mode, all packets would get queued at the device This might have made sense up to a point where we made the queue depth for both modes the same and set it to a huge value (500) so unless the consumer is stuck the chance of losing packets is small. Thus in practice both modes behave the same, but the default mode has some problems: - if packets are never consumed, fragments are never orphaned which cases a DOS for sender using zero copy transmit - overrun errors are hard to diagnose: fifo error is incremented only once so you can not distinguish between userspace that is stuck and a transient failure, tcpdump on the device does not show any traffic Userspace solves this simply by enabling IFF_ONE_QUEUE but there seems to be little point in not doing the right thing for everyone, by default. Signed-off-by: Michael S. Tsirkin Signed-off-by: David S. Miller --- drivers/net/tun.c | 24 ++++++++---------------- include/uapi/linux/if_tun.h | 2 ++ 2 files changed, 10 insertions(+), 16 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 71f6874d8048..a1b2389e6d7f 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -690,21 +690,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) * number of queues. */ if (skb_queue_len(&tfile->socket.sk->sk_receive_queue) - >= dev->tx_queue_len / tun->numqueues){ - if (!(tun->flags & TUN_ONE_QUEUE)) { - /* Normal queueing mode. */ - /* Packet scheduler handles dropping of further packets. */ - netif_stop_subqueue(dev, txq); - - /* We won't see all dropped packets individually, so overrun - * error is more appropriate. */ - dev->stats.tx_fifo_errors++; - } else { - /* Single queue mode. - * Driver handles dropping of all packets itself. */ - goto drop; - } - } + >= dev->tx_queue_len / tun->numqueues) + goto drop; /* Orphan the skb - required as we might hang on to it * for indefinite time. */ @@ -1319,7 +1306,6 @@ static ssize_t tun_do_read(struct tun_struct *tun, struct tun_file *tfile, schedule(); continue; } - netif_wake_subqueue(tun->dev, tfile->queue_index); ret = tun_put_user(tun, tfile, skb, iv, len); kfree_skb(skb); @@ -1482,6 +1468,9 @@ static int tun_flags(struct tun_struct *tun) if (tun->flags & TUN_NO_PI) flags |= IFF_NO_PI; + /* This flag has no real effect. We track the value for backwards + * compatibility. + */ if (tun->flags & TUN_ONE_QUEUE) flags |= IFF_ONE_QUEUE; @@ -1632,6 +1621,9 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) else tun->flags &= ~TUN_NO_PI; + /* This flag has no real effect. We track the value for backwards + * compatibility. + */ if (ifr->ifr_flags & IFF_ONE_QUEUE) tun->flags |= TUN_ONE_QUEUE; else diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h index 958497ad5bb5..2835b85fd46d 100644 --- a/include/uapi/linux/if_tun.h +++ b/include/uapi/linux/if_tun.h @@ -31,6 +31,7 @@ #define TUN_FASYNC 0x0010 #define TUN_NOCHECKSUM 0x0020 #define TUN_NO_PI 0x0040 +/* This flag has no real effect */ #define TUN_ONE_QUEUE 0x0080 #define TUN_PERSIST 0x0100 #define TUN_VNET_HDR 0x0200 @@ -60,6 +61,7 @@ #define IFF_TUN 0x0001 #define IFF_TAP 0x0002 #define IFF_NO_PI 0x1000 +/* This flag has no real effect */ #define IFF_ONE_QUEUE 0x2000 #define IFF_VNET_HDR 0x4000 #define IFF_TUN_EXCL 0x8000 -- cgit v1.2.3 From d67b8c616b48df30e2836d797795f2420d109bc9 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 4 Dec 2012 01:13:35 +0000 Subject: netconf: advertise mc_forwarding status This patch advertise the MC_FORWARDING status for IPv4 and IPv6. This field is readonly, only multicast engine in the kernel updates it. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 3 +++ include/net/addrconf.h | 3 +++ include/uapi/linux/netconf.h | 1 + net/ipv4/devinet.c | 10 ++++++++-- net/ipv4/ipmr.c | 12 ++++++++++++ net/ipv6/addrconf.c | 10 ++++++++-- net/ipv6/ip6mr.c | 20 ++++++++++++++++++-- 7 files changed, 53 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index d032780d0ce5..a9d828976a77 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -171,6 +171,9 @@ struct in_ifaddr { extern int register_inetaddr_notifier(struct notifier_block *nb); extern int unregister_inetaddr_notifier(struct notifier_block *nb); +extern void inet_netconf_notify_devconf(struct net *net, int type, int ifindex, + struct ipv4_devconf *devconf); + extern struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref); static inline struct net_device *ip_dev_find(struct net *net, __be32 addr) { diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 9e63e76b20e7..df4ef9453384 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -172,6 +172,9 @@ extern bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, extern int register_inet6addr_notifier(struct notifier_block *nb); extern int unregister_inet6addr_notifier(struct notifier_block *nb); +extern void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex, + struct ipv6_devconf *devconf); + /** * __in6_dev_get - get inet6_dev pointer from netdevice * @dev: network device diff --git a/include/uapi/linux/netconf.h b/include/uapi/linux/netconf.h index 75dcbc587fb5..64804a798b0c 100644 --- a/include/uapi/linux/netconf.h +++ b/include/uapi/linux/netconf.h @@ -13,6 +13,7 @@ enum { NETCONFA_IFINDEX, NETCONFA_FORWARDING, NETCONFA_RP_FILTER, + NETCONFA_MC_FORWARDING, __NETCONFA_MAX }; #define NETCONFA_MAX (__NETCONFA_MAX - 1) diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index e13183abd7f6..cc06a47f1216 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1453,6 +1453,8 @@ static int inet_netconf_msgsize_devconf(int type) size += nla_total_size(4); if (type == -1 || type == NETCONFA_RP_FILTER) size += nla_total_size(4); + if (type == -1 || type == NETCONFA_MC_FORWARDING) + size += nla_total_size(4); return size; } @@ -1485,6 +1487,10 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex, nla_put_s32(skb, NETCONFA_RP_FILTER, IPV4_DEVCONF(*devconf, RP_FILTER)) < 0) goto nla_put_failure; + if ((type == -1 || type == NETCONFA_MC_FORWARDING) && + nla_put_s32(skb, NETCONFA_MC_FORWARDING, + IPV4_DEVCONF(*devconf, MC_FORWARDING)) < 0) + goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -1493,8 +1499,8 @@ nla_put_failure: return -EMSGSIZE; } -static void inet_netconf_notify_devconf(struct net *net, int type, int ifindex, - struct ipv4_devconf *devconf) +void inet_netconf_notify_devconf(struct net *net, int type, int ifindex, + struct ipv4_devconf *devconf) { struct sk_buff *skb; int err = -ENOBUFS; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 58e4160fdcee..0c452e3fdc1b 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -65,6 +65,7 @@ #include #include #include +#include #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2) #define CONFIG_IP_PIMSM 1 @@ -582,6 +583,9 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify, in_dev = __in_dev_get_rtnl(dev); if (in_dev) { IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--; + inet_netconf_notify_devconf(dev_net(dev), + NETCONFA_MC_FORWARDING, + dev->ifindex, &in_dev->cnf); ip_rt_multicast_event(in_dev); } @@ -772,6 +776,8 @@ static int vif_add(struct net *net, struct mr_table *mrt, return -EADDRNOTAVAIL; } IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++; + inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, dev->ifindex, + &in_dev->cnf); ip_rt_multicast_event(in_dev); /* Fill in the VIF structures */ @@ -1185,6 +1191,9 @@ static void mrtsock_destruct(struct sock *sk) ipmr_for_each_table(mrt, net) { if (sk == rtnl_dereference(mrt->mroute_sk)) { IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; + inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, + NETCONFA_IFINDEX_ALL, + net->ipv4.devconf_all); RCU_INIT_POINTER(mrt->mroute_sk, NULL); mroute_clean_tables(mrt); } @@ -1236,6 +1245,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi if (ret == 0) { rcu_assign_pointer(mrt->mroute_sk, sk); IPV4_DEVCONF_ALL(net, MC_FORWARDING)++; + inet_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, + NETCONFA_IFINDEX_ALL, + net->ipv4.devconf_all); } rtnl_unlock(); return ret; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 22ae75d54017..28e0e627229c 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -469,6 +469,8 @@ static int inet6_netconf_msgsize_devconf(int type) /* type -1 is used for ALL */ if (type == -1 || type == NETCONFA_FORWARDING) size += nla_total_size(4); + if (type == -1 || type == NETCONFA_MC_FORWARDING) + size += nla_total_size(4); return size; } @@ -496,6 +498,10 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, if ((type == -1 || type == NETCONFA_FORWARDING) && nla_put_s32(skb, NETCONFA_FORWARDING, devconf->forwarding) < 0) goto nla_put_failure; + if ((type == -1 || type == NETCONFA_MC_FORWARDING) && + nla_put_s32(skb, NETCONFA_MC_FORWARDING, + devconf->mc_forwarding) < 0) + goto nla_put_failure; return nlmsg_end(skb, nlh); @@ -504,8 +510,8 @@ nla_put_failure: return -EMSGSIZE; } -static void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex, - struct ipv6_devconf *devconf) +void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex, + struct ipv6_devconf *devconf) { struct sk_buff *skb; int err = -ENOBUFS; diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 926ea544f499..1c05fe604d37 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -52,6 +52,7 @@ #include #include #include +#include struct mr6_table { struct list_head list; @@ -805,8 +806,12 @@ static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head) dev_set_allmulti(dev, -1); in6_dev = __in6_dev_get(dev); - if (in6_dev) + if (in6_dev) { in6_dev->cnf.mc_forwarding--; + inet6_netconf_notify_devconf(dev_net(dev), + NETCONFA_MC_FORWARDING, + dev->ifindex, &in6_dev->cnf); + } if (v->flags & MIFF_REGISTER) unregister_netdevice_queue(dev, head); @@ -958,8 +963,12 @@ static int mif6_add(struct net *net, struct mr6_table *mrt, } in6_dev = __in6_dev_get(dev); - if (in6_dev) + if (in6_dev) { in6_dev->cnf.mc_forwarding++; + inet6_netconf_notify_devconf(dev_net(dev), + NETCONFA_MC_FORWARDING, + dev->ifindex, &in6_dev->cnf); + } /* * Fill in the VIF structures @@ -1513,6 +1522,9 @@ static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk) if (likely(mrt->mroute6_sk == NULL)) { mrt->mroute6_sk = sk; net->ipv6.devconf_all->mc_forwarding++; + inet6_netconf_notify_devconf(net, NETCONFA_MC_FORWARDING, + NETCONFA_IFINDEX_ALL, + net->ipv6.devconf_all); } else err = -EADDRINUSE; @@ -1535,6 +1547,10 @@ int ip6mr_sk_done(struct sock *sk) write_lock_bh(&mrt_lock); mrt->mroute6_sk = NULL; net->ipv6.devconf_all->mc_forwarding--; + inet6_netconf_notify_devconf(net, + NETCONFA_MC_FORWARDING, + NETCONFA_IFINDEX_ALL, + net->ipv6.devconf_all); write_unlock_bh(&mrt_lock); mroute_clean_tables(mrt); -- cgit v1.2.3 From adfa85e45dac616ff4f8bfceff1621ccafc0b1ff Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 4 Dec 2012 01:13:37 +0000 Subject: ipmr/ip6mr: advertise mfc stats via rtnetlink These statistics can be checked only via /proc/net/ip_mr_cache or SIOCGETSGCNT[_IN6] and thus only for the table RT_TABLE_DEFAULT. Advertising them via rtnetlink allows to get statistics for all cache entries, whatever the table is. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 7 +++++++ net/ipv4/ipmr.c | 7 +++++++ net/ipv6/ip6mr.c | 7 +++++++ 3 files changed, 21 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 3dee071770d5..80abe27dc2a7 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -288,6 +288,7 @@ enum rtattr_type_t { RTA_MP_ALGO, /* no longer used */ RTA_TABLE, RTA_MARK, + RTA_MFC_STATS, __RTA_MAX }; @@ -408,6 +409,12 @@ struct rta_session { } u; }; +struct rta_mfc_stats { + __u64 mfcs_packets; + __u64 mfcs_bytes; + __u64 mfcs_wrong_if; +}; + /**** * General form of address family dependent message. ****/ diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 0c452e3fdc1b..c5617d646b93 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2046,6 +2046,7 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, int ct; struct rtnexthop *nhp; struct nlattr *mp_attr; + struct rta_mfc_stats mfcs; /* If cache is unresolved, don't try to parse IIF and OIF */ if (c->mfc_parent >= MAXVIFS) @@ -2074,6 +2075,12 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, nla_nest_end(skb, mp_attr); + mfcs.mfcs_packets = c->mfc_un.res.pkt; + mfcs.mfcs_bytes = c->mfc_un.res.bytes; + mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; + if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0) + return -EMSGSIZE; + rtm->rtm_type = RTN_MULTICAST; return 1; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 23f364a9efb5..4220a7b93386 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2120,6 +2120,7 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, int ct; struct rtnexthop *nhp; struct nlattr *mp_attr; + struct rta_mfc_stats mfcs; /* If cache is unresolved, don't try to parse IIF and OIF */ if (c->mf6c_parent >= MAXMIFS) @@ -2149,6 +2150,12 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, nla_nest_end(skb, mp_attr); + mfcs.mfcs_packets = c->mfc_un.res.pkt; + mfcs.mfcs_bytes = c->mfc_un.res.bytes; + mfcs.mfcs_wrong_if = c->mfc_un.res.wrong_if; + if (nla_put(skb, RTA_MFC_STATS, sizeof(mfcs), &mfcs) < 0) + return -EMSGSIZE; + rtm->rtm_type = RTN_MULTICAST; return 1; } -- cgit v1.2.3 From 9a68ac72a44ecb6d4dc4a7cadf45e1a2cd183885 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 4 Dec 2012 01:13:38 +0000 Subject: ipmr/ip6mr: report origin of mfc entry into rtnl msg A mfc entry can be static or not (added via the mroute_sk socket). The patch reports MFC_STATIC flag into rtm_protocol by setting rtm_protocol to RTPROT_STATIC or RTPROT_MROUTED. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 1 + net/ipv4/ipmr.c | 5 ++++- net/ipv6/ip6mr.c | 5 ++++- 3 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 80abe27dc2a7..33d29cea37ea 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -227,6 +227,7 @@ enum { #define RTPROT_XORP 14 /* XORP */ #define RTPROT_NTK 15 /* Netsukuku */ #define RTPROT_DHCP 16 /* DHCP client */ +#define RTPROT_MROUTED 17 /* Multicast daemon */ /* rtm_scope diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index c5617d646b93..91782a7634c2 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -2169,7 +2169,10 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, goto nla_put_failure; rtm->rtm_type = RTN_MULTICAST; rtm->rtm_scope = RT_SCOPE_UNIVERSE; - rtm->rtm_protocol = RTPROT_UNSPEC; + if (c->mfc_flags & MFC_STATIC) + rtm->rtm_protocol = RTPROT_STATIC; + else + rtm->rtm_protocol = RTPROT_MROUTED; rtm->rtm_flags = 0; if (nla_put_be32(skb, RTA_SRC, c->mfc_origin) || diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 4220a7b93386..d51b91122866 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -2249,7 +2249,10 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, if (nla_put_u32(skb, RTA_TABLE, mrt->id)) goto nla_put_failure; rtm->rtm_scope = RT_SCOPE_UNIVERSE; - rtm->rtm_protocol = RTPROT_UNSPEC; + if (c->mfc_flags & MFC_STATIC) + rtm->rtm_protocol = RTPROT_STATIC; + else + rtm->rtm_protocol = RTPROT_MROUTED; rtm->rtm_flags = 0; if (nla_put(skb, RTA_SRC, 16, &c->mf6c_origin) || -- cgit v1.2.3 From 7793eeabc89fd342b96fdadce5a50c46ab77f3f9 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 5 Dec 2012 13:51:17 -0700 Subject: PCI: Add and use standard PCI-X Capability register names Add and use #defines for PCI-X Capability registers and fields. Note that the PCI-X Capability has a different layout for type 0 (endpoint) and type 1 (bridge) devices. Signed-off-by: Bjorn Helgaas --- drivers/pci/probe.c | 15 +++++++++------ include/uapi/linux/pci_regs.h | 15 ++++++++++++++- 2 files changed, 23 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index ec909afa90b6..81d06676ce34 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -579,14 +579,16 @@ static void pci_set_bus_speed(struct pci_bus *bus) if (pos) { u16 status; enum pci_bus_speed max; - pci_read_config_word(bridge, pos + 2, &status); - if (status & 0x8000) { + pci_read_config_word(bridge, pos + PCI_X_BRIDGE_SSTATUS, + &status); + + if (status & PCI_X_SSTATUS_533MHZ) { max = PCI_SPEED_133MHz_PCIX_533; - } else if (status & 0x4000) { + } else if (status & PCI_X_SSTATUS_266MHZ) { max = PCI_SPEED_133MHz_PCIX_266; - } else if (status & 0x0002) { - if (((status >> 12) & 0x3) == 2) { + } else if (status & PCI_X_SSTATUS_133MHZ) { + if ((status & PCI_X_SSTATUS_VERS) == PCI_X_SSTATUS_V2) { max = PCI_SPEED_133MHz_PCIX_ECC; } else { max = PCI_SPEED_133MHz_PCIX; @@ -596,7 +598,8 @@ static void pci_set_bus_speed(struct pci_bus *bus) } bus->max_bus_speed = max; - bus->cur_bus_speed = pcix_bus_speed[(status >> 6) & 0xf]; + bus->cur_bus_speed = pcix_bus_speed[ + (status & PCI_X_SSTATUS_FREQ) >> 6]; return; } diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 20ae747ddf34..4cca834f9abd 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -349,7 +349,7 @@ #define PCI_AF_STATUS_TP 0x01 #define PCI_CAP_AF_SIZEOF 6 /* size of AF registers */ -/* PCI-X registers */ +/* PCI-X registers (Type 0 (non-bridge) devices) */ #define PCI_X_CMD 2 /* Modes & Features */ #define PCI_X_CMD_DPERR_E 0x0001 /* Data Parity Error Recovery Enable */ @@ -389,6 +389,19 @@ #define PCI_CAP_PCIX_SIZEOF_V1 24 /* size for Version 1 */ #define PCI_CAP_PCIX_SIZEOF_V2 PCI_CAP_PCIX_SIZEOF_V1 /* Same for v2 */ +/* PCI-X registers (Type 1 (bridge) devices) */ + +#define PCI_X_BRIDGE_SSTATUS 2 /* Secondary Status */ +#define PCI_X_SSTATUS_64BIT 0x0001 /* Secondary AD interface is 64 bits */ +#define PCI_X_SSTATUS_133MHZ 0x0002 /* 133 MHz capable */ +#define PCI_X_SSTATUS_FREQ 0x03c0 /* Secondary Bus Mode and Frequency */ +#define PCI_X_SSTATUS_VERS 0x3000 /* PCI-X Capability Version */ +#define PCI_X_SSTATUS_V1 0x1000 /* Mode 2, not Mode 1 */ +#define PCI_X_SSTATUS_V2 0x2000 /* Mode 1 or Modes 1 and 2 */ +#define PCI_X_SSTATUS_266MHZ 0x4000 /* 266 MHz capable */ +#define PCI_X_SSTATUS_533MHZ 0x8000 /* 533 MHz capable */ +#define PCI_X_BRIDGE_STATUS 4 /* Bridge Status */ + /* PCI Bridge Subsystem ID registers */ #define PCI_SSVID_VENDOR_ID 4 /* PCI-Bridge subsystem vendor id register */ -- cgit v1.2.3 From c2d3babfafbb9f6629cfb47139758e59a5eb0d80 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 5 Dec 2012 16:24:45 -0500 Subject: bridge: implement multicast fast leave V3: make it a flag V2: make the toggle per-port Fast leave allows bridge to immediately stops the multicast traffic on the port receives IGMP Leave when IGMP snooping is enabled, no timeouts are observed. Cc: Herbert Xu Cc: Stephen Hemminger Cc: "David S. Miller" Signed-off-by: Cong Wang --- include/uapi/linux/if_link.h | 1 + net/bridge/br_multicast.c | 2 +- net/bridge/br_netlink.c | 4 +++- net/bridge/br_private.h | 2 +- net/bridge/br_sysfs_if.c | 19 +------------------ 5 files changed, 7 insertions(+), 21 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index bb58aeb7f34d..60f3b6b90602 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -218,6 +218,7 @@ enum { IFLA_BRPORT_MODE, /* mode (hairpin) */ IFLA_BRPORT_GUARD, /* bpdu guard */ IFLA_BRPORT_PROTECT, /* root port protection */ + IFLA_BRPORT_FAST_LEAVE, /* multicast fast leave */ __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 2391bae4f733..a2a7a1a79081 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1225,7 +1225,7 @@ static void br_multicast_leave_group(struct net_bridge *br, if (!mp) goto out; - if (port && port->multicast_fast_leave) { + if (port && (port->flags & BR_MULTICAST_FAST_LEAVE)) { struct net_bridge_port_group __rcu **pp; for (pp = &mp->ports; diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 65429b99a2a3..850b7d1f3a41 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -53,7 +53,8 @@ static int br_port_fill_attrs(struct sk_buff *skb, nla_put_u32(skb, IFLA_BRPORT_COST, p->path_cost) || nla_put_u8(skb, IFLA_BRPORT_MODE, mode) || nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD)) || - nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK))) + nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK)) || + nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE))) return -EMSGSIZE; return 0; @@ -210,6 +211,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) br_set_port_flag(p, tb, IFLA_BRPORT_MODE, BR_HAIRPIN_MODE); br_set_port_flag(p, tb, IFLA_BRPORT_GUARD, BR_BPDU_GUARD); + br_set_port_flag(p, tb, IFLA_BRPORT_FAST_LEAVE, BR_MULTICAST_FAST_LEAVE); if (tb[IFLA_BRPORT_COST]) { err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST])); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index cdbf9047a659..cd86222cf5e3 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -137,11 +137,11 @@ struct net_bridge_port #define BR_HAIRPIN_MODE 0x00000001 #define BR_BPDU_GUARD 0x00000002 #define BR_ROOT_BLOCK 0x00000004 +#define BR_MULTICAST_FAST_LEAVE 0x00000008 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING u32 multicast_startup_queries_sent; unsigned char multicast_router; - unsigned char multicast_fast_leave; struct timer_list multicast_router_timer; struct timer_list multicast_query_timer; struct hlist_head mglist; diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index dc484ace0be3..a1ef1b6e14dc 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -173,24 +173,7 @@ static int store_multicast_router(struct net_bridge_port *p, static BRPORT_ATTR(multicast_router, S_IRUGO | S_IWUSR, show_multicast_router, store_multicast_router); -static ssize_t show_multicast_fast_leave(struct net_bridge_port *p, - char *buf) -{ - return sprintf(buf, "%d\n", p->multicast_fast_leave); -} - -static int store_multicast_fast_leave(struct net_bridge_port *p, - unsigned long v) -{ - if (p->br->multicast_disabled) - return -EINVAL; - - p->multicast_fast_leave = !!v; - return 0; -} - -static BRPORT_ATTR(multicast_fast_leave, S_IRUGO | S_IWUSR, - show_multicast_fast_leave, store_multicast_fast_leave); +BRPORT_ATTR_FLAG(multicast_fast_leave, BR_MULTICAST_FAST_LEAVE); #endif static const struct brport_attribute *brport_attrs[] = { -- cgit v1.2.3 From a2932923ccf63c419c77aaa18ac09be98f2c94d8 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 19 Nov 2012 22:57:20 +0000 Subject: KVM: PPC: Book3S HV: Provide a method for userspace to read and write the HPT A new ioctl, KVM_PPC_GET_HTAB_FD, returns a file descriptor. Reads on this fd return the contents of the HPT (hashed page table), writes create and/or remove entries in the HPT. There is a new capability, KVM_CAP_PPC_HTAB_FD, to indicate the presence of the ioctl. The ioctl takes an argument structure with the index of the first HPT entry to read out and a set of flags. The flags indicate whether the user is intending to read or write the HPT, and whether to return all entries or only the "bolted" entries (those with the bolted bit, 0x10, set in the first doubleword). This is intended for use in implementing qemu's savevm/loadvm and for live migration. Therefore, on reads, the first pass returns information about all HPTEs (or all bolted HPTEs). When the first pass reaches the end of the HPT, it returns from the read. Subsequent reads only return information about HPTEs that have changed since they were last read. A read that finds no changed HPTEs in the HPT following where the last read finished will return 0 bytes. The format of the data provides a simple run-length compression of the invalid entries. Each block of data starts with a header that indicates the index (position in the HPT, which is just an array), the number of valid entries starting at that index (may be zero), and the number of invalid entries following those valid entries. The valid entries, 16 bytes each, follow the header. The invalid entries are not explicitly represented. Signed-off-by: Paul Mackerras [agraf: fix documentation] Signed-off-by: Alexander Graf --- Documentation/virtual/kvm/api.txt | 54 +++++ arch/powerpc/include/asm/kvm_book3s_64.h | 22 ++ arch/powerpc/include/asm/kvm_ppc.h | 2 + arch/powerpc/include/uapi/asm/kvm.h | 25 +++ arch/powerpc/kvm/book3s_64_mmu_hv.c | 344 +++++++++++++++++++++++++++++++ arch/powerpc/kvm/book3s_hv.c | 12 -- arch/powerpc/kvm/powerpc.c | 17 ++ include/uapi/linux/kvm.h | 3 + 8 files changed, 467 insertions(+), 12 deletions(-) (limited to 'include/uapi/linux') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 6671fdc0afb1..a5607c571cb3 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2071,6 +2071,60 @@ KVM_S390_INT_EXTERNAL_CALL (vcpu) - sigp external call; source cpu in parm Note that the vcpu ioctl is asynchronous to vcpu execution. +4.78 KVM_PPC_GET_HTAB_FD + +Capability: KVM_CAP_PPC_HTAB_FD +Architectures: powerpc +Type: vm ioctl +Parameters: Pointer to struct kvm_get_htab_fd (in) +Returns: file descriptor number (>= 0) on success, -1 on error + +This returns a file descriptor that can be used either to read out the +entries in the guest's hashed page table (HPT), or to write entries to +initialize the HPT. The returned fd can only be written to if the +KVM_GET_HTAB_WRITE bit is set in the flags field of the argument, and +can only be read if that bit is clear. The argument struct looks like +this: + +/* For KVM_PPC_GET_HTAB_FD */ +struct kvm_get_htab_fd { + __u64 flags; + __u64 start_index; + __u64 reserved[2]; +}; + +/* Values for kvm_get_htab_fd.flags */ +#define KVM_GET_HTAB_BOLTED_ONLY ((__u64)0x1) +#define KVM_GET_HTAB_WRITE ((__u64)0x2) + +The `start_index' field gives the index in the HPT of the entry at +which to start reading. It is ignored when writing. + +Reads on the fd will initially supply information about all +"interesting" HPT entries. Interesting entries are those with the +bolted bit set, if the KVM_GET_HTAB_BOLTED_ONLY bit is set, otherwise +all entries. When the end of the HPT is reached, the read() will +return. If read() is called again on the fd, it will start again from +the beginning of the HPT, but will only return HPT entries that have +changed since they were last read. + +Data read or written is structured as a header (8 bytes) followed by a +series of valid HPT entries (16 bytes) each. The header indicates how +many valid HPT entries there are and how many invalid entries follow +the valid entries. The invalid entries are not represented explicitly +in the stream. The header format is: + +struct kvm_get_htab_header { + __u32 index; + __u16 n_valid; + __u16 n_invalid; +}; + +Writes to the fd create HPT entries starting at the index given in the +header; first `n_valid' valid entries with contents from the data +written, then `n_invalid' invalid entries, invalidating any previously +valid entries found. + 5. The kvm_run structure ------------------------ diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index b322e5bd6964..38bec1dc9928 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -246,4 +246,26 @@ static inline bool slot_is_aligned(struct kvm_memory_slot *memslot, return !(memslot->base_gfn & mask) && !(memslot->npages & mask); } +/* + * This works for 4k, 64k and 16M pages on POWER7, + * and 4k and 16M pages on PPC970. + */ +static inline unsigned long slb_pgsize_encoding(unsigned long psize) +{ + unsigned long senc = 0; + + if (psize > 0x1000) { + senc = SLB_VSID_L; + if (psize == 0x10000) + senc |= SLB_VSID_LP_01; + } + return senc; +} + +static inline int is_vrma_hpte(unsigned long hpte_v) +{ + return (hpte_v & ~0xffffffUL) == + (HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16))); +} + #endif /* __ASM_KVM_BOOK3S_64_H__ */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 609cca3e9426..1ca31e92ee75 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -164,6 +164,8 @@ extern void kvmppc_bookehv_exit(void); extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu); +extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *); + /* * Cuts out inst bits with ordering according to spec. * That means the leftmost bit is zero. All given bits are included. diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index b89ae4db45ce..514883dd311e 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -331,6 +331,31 @@ struct kvm_book3e_206_tlb_params { __u32 reserved[8]; }; +/* For KVM_PPC_GET_HTAB_FD */ +struct kvm_get_htab_fd { + __u64 flags; + __u64 start_index; + __u64 reserved[2]; +}; + +/* Values for kvm_get_htab_fd.flags */ +#define KVM_GET_HTAB_BOLTED_ONLY ((__u64)0x1) +#define KVM_GET_HTAB_WRITE ((__u64)0x2) + +/* + * Data read on the file descriptor is formatted as a series of + * records, each consisting of a header followed by a series of + * `n_valid' HPTEs (16 bytes each), which are all valid. Following + * those valid HPTEs there are `n_invalid' invalid HPTEs, which + * are not represented explicitly in the stream. The same format + * is used for writing. + */ +struct kvm_get_htab_header { + __u32 index; + __u16 n_valid; + __u16 n_invalid; +}; + #define KVM_REG_PPC_HIOR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x1) #define KVM_REG_PPC_IAC1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x2) #define KVM_REG_PPC_IAC2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x3) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 6ee6516a0bee..0aa40734c8f6 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #include @@ -1145,6 +1147,348 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va) put_page(page); } +/* + * Functions for reading and writing the hash table via reads and + * writes on a file descriptor. + * + * Reads return the guest view of the hash table, which has to be + * pieced together from the real hash table and the guest_rpte + * values in the revmap array. + * + * On writes, each HPTE written is considered in turn, and if it + * is valid, it is written to the HPT as if an H_ENTER with the + * exact flag set was done. When the invalid count is non-zero + * in the header written to the stream, the kernel will make + * sure that that many HPTEs are invalid, and invalidate them + * if not. + */ + +struct kvm_htab_ctx { + unsigned long index; + unsigned long flags; + struct kvm *kvm; + int first_pass; +}; + +#define HPTE_SIZE (2 * sizeof(unsigned long)) + +static long record_hpte(unsigned long flags, unsigned long *hptp, + unsigned long *hpte, struct revmap_entry *revp, + int want_valid, int first_pass) +{ + unsigned long v, r; + int ok = 1; + int valid, dirty; + + /* Unmodified entries are uninteresting except on the first pass */ + dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED); + if (!first_pass && !dirty) + return 0; + + valid = 0; + if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) { + valid = 1; + if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && + !(hptp[0] & HPTE_V_BOLTED)) + valid = 0; + } + if (valid != want_valid) + return 0; + + v = r = 0; + if (valid || dirty) { + /* lock the HPTE so it's stable and read it */ + preempt_disable(); + while (!try_lock_hpte(hptp, HPTE_V_HVLOCK)) + cpu_relax(); + v = hptp[0]; + if (v & HPTE_V_ABSENT) { + v &= ~HPTE_V_ABSENT; + v |= HPTE_V_VALID; + } + /* re-evaluate valid and dirty from synchronized HPTE value */ + valid = !!(v & HPTE_V_VALID); + if ((flags & KVM_GET_HTAB_BOLTED_ONLY) && !(v & HPTE_V_BOLTED)) + valid = 0; + r = revp->guest_rpte | (hptp[1] & (HPTE_R_R | HPTE_R_C)); + dirty = !!(revp->guest_rpte & HPTE_GR_MODIFIED); + /* only clear modified if this is the right sort of entry */ + if (valid == want_valid && dirty) { + r &= ~HPTE_GR_MODIFIED; + revp->guest_rpte = r; + } + asm volatile(PPC_RELEASE_BARRIER "" : : : "memory"); + hptp[0] &= ~HPTE_V_HVLOCK; + preempt_enable(); + if (!(valid == want_valid && (first_pass || dirty))) + ok = 0; + } + hpte[0] = v; + hpte[1] = r; + return ok; +} + +static ssize_t kvm_htab_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) +{ + struct kvm_htab_ctx *ctx = file->private_data; + struct kvm *kvm = ctx->kvm; + struct kvm_get_htab_header hdr; + unsigned long *hptp; + struct revmap_entry *revp; + unsigned long i, nb, nw; + unsigned long __user *lbuf; + struct kvm_get_htab_header __user *hptr; + unsigned long flags; + int first_pass; + unsigned long hpte[2]; + + if (!access_ok(VERIFY_WRITE, buf, count)) + return -EFAULT; + + first_pass = ctx->first_pass; + flags = ctx->flags; + + i = ctx->index; + hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); + revp = kvm->arch.revmap + i; + lbuf = (unsigned long __user *)buf; + + nb = 0; + while (nb + sizeof(hdr) + HPTE_SIZE < count) { + /* Initialize header */ + hptr = (struct kvm_get_htab_header __user *)buf; + hdr.index = i; + hdr.n_valid = 0; + hdr.n_invalid = 0; + nw = nb; + nb += sizeof(hdr); + lbuf = (unsigned long __user *)(buf + sizeof(hdr)); + + /* Skip uninteresting entries, i.e. clean on not-first pass */ + if (!first_pass) { + while (i < kvm->arch.hpt_npte && + !(revp->guest_rpte & HPTE_GR_MODIFIED)) { + ++i; + hptp += 2; + ++revp; + } + } + + /* Grab a series of valid entries */ + while (i < kvm->arch.hpt_npte && + hdr.n_valid < 0xffff && + nb + HPTE_SIZE < count && + record_hpte(flags, hptp, hpte, revp, 1, first_pass)) { + /* valid entry, write it out */ + ++hdr.n_valid; + if (__put_user(hpte[0], lbuf) || + __put_user(hpte[1], lbuf + 1)) + return -EFAULT; + nb += HPTE_SIZE; + lbuf += 2; + ++i; + hptp += 2; + ++revp; + } + /* Now skip invalid entries while we can */ + while (i < kvm->arch.hpt_npte && + hdr.n_invalid < 0xffff && + record_hpte(flags, hptp, hpte, revp, 0, first_pass)) { + /* found an invalid entry */ + ++hdr.n_invalid; + ++i; + hptp += 2; + ++revp; + } + + if (hdr.n_valid || hdr.n_invalid) { + /* write back the header */ + if (__copy_to_user(hptr, &hdr, sizeof(hdr))) + return -EFAULT; + nw = nb; + buf = (char __user *)lbuf; + } else { + nb = nw; + } + + /* Check if we've wrapped around the hash table */ + if (i >= kvm->arch.hpt_npte) { + i = 0; + ctx->first_pass = 0; + break; + } + } + + ctx->index = i; + + return nb; +} + +static ssize_t kvm_htab_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct kvm_htab_ctx *ctx = file->private_data; + struct kvm *kvm = ctx->kvm; + struct kvm_get_htab_header hdr; + unsigned long i, j; + unsigned long v, r; + unsigned long __user *lbuf; + unsigned long *hptp; + unsigned long tmp[2]; + ssize_t nb; + long int err, ret; + int rma_setup; + + if (!access_ok(VERIFY_READ, buf, count)) + return -EFAULT; + + /* lock out vcpus from running while we're doing this */ + mutex_lock(&kvm->lock); + rma_setup = kvm->arch.rma_setup_done; + if (rma_setup) { + kvm->arch.rma_setup_done = 0; /* temporarily */ + /* order rma_setup_done vs. vcpus_running */ + smp_mb(); + if (atomic_read(&kvm->arch.vcpus_running)) { + kvm->arch.rma_setup_done = 1; + mutex_unlock(&kvm->lock); + return -EBUSY; + } + } + + err = 0; + for (nb = 0; nb + sizeof(hdr) <= count; ) { + err = -EFAULT; + if (__copy_from_user(&hdr, buf, sizeof(hdr))) + break; + + err = 0; + if (nb + hdr.n_valid * HPTE_SIZE > count) + break; + + nb += sizeof(hdr); + buf += sizeof(hdr); + + err = -EINVAL; + i = hdr.index; + if (i >= kvm->arch.hpt_npte || + i + hdr.n_valid + hdr.n_invalid > kvm->arch.hpt_npte) + break; + + hptp = (unsigned long *)(kvm->arch.hpt_virt + (i * HPTE_SIZE)); + lbuf = (unsigned long __user *)buf; + for (j = 0; j < hdr.n_valid; ++j) { + err = -EFAULT; + if (__get_user(v, lbuf) || __get_user(r, lbuf + 1)) + goto out; + err = -EINVAL; + if (!(v & HPTE_V_VALID)) + goto out; + lbuf += 2; + nb += HPTE_SIZE; + + if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) + kvmppc_do_h_remove(kvm, 0, i, 0, tmp); + err = -EIO; + ret = kvmppc_virtmode_do_h_enter(kvm, H_EXACT, i, v, r, + tmp); + if (ret != H_SUCCESS) { + pr_err("kvm_htab_write ret %ld i=%ld v=%lx " + "r=%lx\n", ret, i, v, r); + goto out; + } + if (!rma_setup && is_vrma_hpte(v)) { + unsigned long psize = hpte_page_size(v, r); + unsigned long senc = slb_pgsize_encoding(psize); + unsigned long lpcr; + + kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T | + (VRMA_VSID << SLB_VSID_SHIFT_1T); + lpcr = kvm->arch.lpcr & ~LPCR_VRMASD; + lpcr |= senc << (LPCR_VRMASD_SH - 4); + kvm->arch.lpcr = lpcr; + rma_setup = 1; + } + ++i; + hptp += 2; + } + + for (j = 0; j < hdr.n_invalid; ++j) { + if (hptp[0] & (HPTE_V_VALID | HPTE_V_ABSENT)) + kvmppc_do_h_remove(kvm, 0, i, 0, tmp); + ++i; + hptp += 2; + } + err = 0; + } + + out: + /* Order HPTE updates vs. rma_setup_done */ + smp_wmb(); + kvm->arch.rma_setup_done = rma_setup; + mutex_unlock(&kvm->lock); + + if (err) + return err; + return nb; +} + +static int kvm_htab_release(struct inode *inode, struct file *filp) +{ + struct kvm_htab_ctx *ctx = filp->private_data; + + filp->private_data = NULL; + if (!(ctx->flags & KVM_GET_HTAB_WRITE)) + atomic_dec(&ctx->kvm->arch.hpte_mod_interest); + kvm_put_kvm(ctx->kvm); + kfree(ctx); + return 0; +} + +static struct file_operations kvm_htab_fops = { + .read = kvm_htab_read, + .write = kvm_htab_write, + .llseek = default_llseek, + .release = kvm_htab_release, +}; + +int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *ghf) +{ + int ret; + struct kvm_htab_ctx *ctx; + int rwflag; + + /* reject flags we don't recognize */ + if (ghf->flags & ~(KVM_GET_HTAB_BOLTED_ONLY | KVM_GET_HTAB_WRITE)) + return -EINVAL; + ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + kvm_get_kvm(kvm); + ctx->kvm = kvm; + ctx->index = ghf->start_index; + ctx->flags = ghf->flags; + ctx->first_pass = 1; + + rwflag = (ghf->flags & KVM_GET_HTAB_WRITE) ? O_WRONLY : O_RDONLY; + ret = anon_inode_getfd("kvm-htab", &kvm_htab_fops, ctx, rwflag); + if (ret < 0) { + kvm_put_kvm(kvm); + return ret; + } + + if (rwflag == O_RDONLY) { + mutex_lock(&kvm->slots_lock); + atomic_inc(&kvm->arch.hpte_mod_interest); + /* make sure kvmppc_do_h_enter etc. see the increment */ + synchronize_srcu_expedited(&kvm->srcu); + mutex_unlock(&kvm->slots_lock); + } + + return ret; +} + void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu) { struct kvmppc_mmu *mmu = &vcpu->arch.mmu; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 843eb754a1d5..a4f59dbcd800 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1563,18 +1563,6 @@ out: return r; } -static unsigned long slb_pgsize_encoding(unsigned long psize) -{ - unsigned long senc = 0; - - if (psize > 0x1000) { - senc = SLB_VSID_L; - if (psize == 0x10000) - senc |= SLB_VSID_LP_01; - } - return senc; -} - static void unpin_slot(struct kvm_memory_slot *memslot) { unsigned long *physp; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index d583ea15e151..70739a089560 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -354,6 +354,12 @@ int kvm_dev_ioctl_check_extension(long ext) r = 1; #else r = 0; + break; +#endif +#ifdef CONFIG_KVM_BOOK3S_64_HV + case KVM_CAP_PPC_HTAB_FD: + r = 1; + break; #endif break; case KVM_CAP_NR_VCPUS: @@ -954,6 +960,17 @@ long kvm_arch_vm_ioctl(struct file *filp, r = 0; break; } + + case KVM_PPC_GET_HTAB_FD: { + struct kvm *kvm = filp->private_data; + struct kvm_get_htab_fd ghf; + + r = -EFAULT; + if (copy_from_user(&ghf, argp, sizeof(ghf))) + break; + r = kvm_vm_ioctl_get_htab_fd(kvm, &ghf); + break; + } #endif /* CONFIG_KVM_BOOK3S_64_HV */ #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 494a84c37c3e..e6e5d4b13708 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -634,6 +634,7 @@ struct kvm_ppc_smmu_info { #endif #define KVM_CAP_IRQFD_RESAMPLE 82 #define KVM_CAP_PPC_BOOKE_WATCHDOG 83 +#define KVM_CAP_PPC_HTAB_FD 84 #ifdef KVM_CAP_IRQ_ROUTING @@ -859,6 +860,8 @@ struct kvm_s390_ucas_mapping { #define KVM_CREATE_SPAPR_TCE _IOW(KVMIO, 0xa8, struct kvm_create_spapr_tce) /* Available with KVM_CAP_RMA */ #define KVM_ALLOCATE_RMA _IOR(KVMIO, 0xa9, struct kvm_allocate_rma) +/* Available with KVM_CAP_PPC_HTAB_FD */ +#define KVM_PPC_GET_HTAB_FD _IOW(KVMIO, 0xaa, struct kvm_get_htab_fd) /* * ioctls for vcpu fds -- cgit v1.2.3 From cf66bb93e0f75e0a4ba1ec070692618fa028e994 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 3 Dec 2012 16:25:40 +0000 Subject: byteorder: allow arch to opt to use GCC intrinsics for byteswapping Since GCC 4.4, there have been __builtin_bswap32() and __builtin_bswap16() intrinsics. A __builtin_bswap16() came a little later (4.6 for PowerPC, 48 for other platforms). By using these instead of the inline assembler that most architectures have in their __arch_swabXX() macros, we let the compiler see what's actually happening. The resulting code should be at least as good, and much *better* in the cases where it can be combined with a nearby load or store, using a load-and-byteswap or store-and-byteswap instruction (e.g. lwbrx/stwbrx on PowerPC, movbe on Atom). When GCC is sufficiently recent *and* the architecture opts in to using the intrinsics by setting CONFIG_ARCH_USE_BUILTIN_BSWAP, they will be used in preference to the __arch_swabXX() macros. An architecture which does not set ARCH_USE_BUILTIN_BSWAP will continue to use its own hand-crafted macros. Signed-off-by: David Woodhouse Acked-by: H. Peter Anvin --- arch/Kconfig | 19 +++++++++++++++++++ include/linux/compiler-gcc4.h | 10 ++++++++++ include/linux/compiler-intel.h | 7 +++++++ include/uapi/linux/swab.h | 12 +++++++++--- 4 files changed, 45 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/arch/Kconfig b/arch/Kconfig index 366ec06a5185..c31416b10586 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -112,6 +112,25 @@ config HAVE_EFFICIENT_UNALIGNED_ACCESS See Documentation/unaligned-memory-access.txt for more information on the topic of unaligned memory accesses. +config ARCH_USE_BUILTIN_BSWAP + bool + help + Modern versions of GCC (since 4.4) have builtin functions + for handling byte-swapping. Using these, instead of the old + inline assembler that the architecture code provides in the + __arch_bswapXX() macros, allows the compiler to see what's + happening and offers more opportunity for optimisation. In + particular, the compiler will be able to combine the byteswap + with a nearby load or store and use load-and-swap or + store-and-swap instructions if the architecture has them. It + should almost *never* result in code which is worse than the + hand-coded assembler in . But just in case it + does, the use of the builtins is optional. + + Any architecture with load-and-swap or store-and-swap + instructions should set this. And it shouldn't hurt to set it + on architectures that don't have such instructions. + config HAVE_SYSCALL_WRAPPERS bool diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index 412bc6c2b023..dc16a858e77c 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -63,3 +63,13 @@ #define __compiletime_warning(message) __attribute__((warning(message))) #define __compiletime_error(message) __attribute__((error(message))) #endif + +#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP +#if __GNUC_MINOR__ >= 4 +#define __HAVE_BUILTIN_BSWAP32__ +#define __HAVE_BUILTIN_BSWAP64__ +#endif +#if __GNUC_MINOR__ >= 8 || (defined(__powerpc__) && __GNUC_MINOR__ >= 6) +#define __HAVE_BUILTIN_BSWAP16__ +#endif +#endif diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h index d8e636e5607d..973ce10c40b6 100644 --- a/include/linux/compiler-intel.h +++ b/include/linux/compiler-intel.h @@ -29,3 +29,10 @@ #endif #define uninitialized_var(x) x + +#ifndef __HAVE_BUILTIN_BSWAP16__ +/* icc has this, but it's called _bswap16 */ +#define __HAVE_BUILTIN_BSWAP16__ +#define __builtin_bswap16 _bswap16 +#endif + diff --git a/include/uapi/linux/swab.h b/include/uapi/linux/swab.h index e811474724c2..0e011eb91b5d 100644 --- a/include/uapi/linux/swab.h +++ b/include/uapi/linux/swab.h @@ -45,7 +45,9 @@ static inline __attribute_const__ __u16 __fswab16(__u16 val) { -#ifdef __arch_swab16 +#ifdef __HAVE_BUILTIN_BSWAP16__ + return __builtin_bswap16(val); +#elif defined (__arch_swab16) return __arch_swab16(val); #else return ___constant_swab16(val); @@ -54,7 +56,9 @@ static inline __attribute_const__ __u16 __fswab16(__u16 val) static inline __attribute_const__ __u32 __fswab32(__u32 val) { -#ifdef __arch_swab32 +#ifdef __HAVE_BUILTIN_BSWAP32__ + return __builtin_bswap32(val); +#elif defined(__arch_swab32) return __arch_swab32(val); #else return ___constant_swab32(val); @@ -63,7 +67,9 @@ static inline __attribute_const__ __u32 __fswab32(__u32 val) static inline __attribute_const__ __u64 __fswab64(__u64 val) { -#ifdef __arch_swab64 +#ifdef __HAVE_BUILTIN_BSWAP64__ + return __builtin_bswap64(val); +#elif defined (__arch_swab64) return __arch_swab64(val); #elif defined(__SWAB_64_THRU_32__) __u32 h = val >> 32; -- cgit v1.2.3 From 7508320678b7819ac6aeb89580b8622a424ce586 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 5 Dec 2012 13:51:19 -0700 Subject: PCI: Add standard PCIe Capability Link ASPM field names Add standard #defines for ASPM fields in PCI Express Link Capability and Link Control registers. Previously we used PCIE_LINK_STATE_L0S and PCIE_LINK_STATE_L1 directly, but these are defined for the Linux ASPM interfaces, e.g., pci_disable_link_state(), and only coincidentally match the actual register bits. PCIE_LINK_STATE_CLKPM, also part of that interface, does not match the register bit. Signed-off-by: Bjorn Helgaas Reviewed-by: Kenji Kaneshige Acked-by: Kenji Kaneshige --- drivers/pci/pcie/aspm.c | 11 ++++++----- include/uapi/linux/pci_regs.h | 2 ++ 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index 213753b283a6..c2faf9d0ffde 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -427,7 +427,8 @@ static void pcie_aspm_cap_init(struct pcie_link_state *link, int blacklist) static void pcie_config_aspm_dev(struct pci_dev *pdev, u32 val) { - pcie_capability_clear_and_set_word(pdev, PCI_EXP_LNKCTL, 0x3, val); + pcie_capability_clear_and_set_word(pdev, PCI_EXP_LNKCTL, + PCI_EXP_LNKCTL_ASPMC, val); } static void pcie_config_aspm_link(struct pcie_link_state *link, u32 state) @@ -442,12 +443,12 @@ static void pcie_config_aspm_link(struct pcie_link_state *link, u32 state) return; /* Convert ASPM state to upstream/downstream ASPM register state */ if (state & ASPM_STATE_L0S_UP) - dwstream |= PCIE_LINK_STATE_L0S; + dwstream |= PCI_EXP_LNKCTL_ASPM_L0S; if (state & ASPM_STATE_L0S_DW) - upstream |= PCIE_LINK_STATE_L0S; + upstream |= PCI_EXP_LNKCTL_ASPM_L0S; if (state & ASPM_STATE_L1) { - upstream |= PCIE_LINK_STATE_L1; - dwstream |= PCIE_LINK_STATE_L1; + upstream |= PCI_EXP_LNKCTL_ASPM_L1; + dwstream |= PCI_EXP_LNKCTL_ASPM_L1; } /* * Spec 2.0 suggests all functions should be configured the diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 4cca834f9abd..0b6dbe49dc1e 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -469,6 +469,8 @@ #define PCI_EXP_LNKCAP_PN 0xff000000 /* Port Number */ #define PCI_EXP_LNKCTL 16 /* Link Control */ #define PCI_EXP_LNKCTL_ASPMC 0x0003 /* ASPM Control */ +#define PCI_EXP_LNKCTL_ASPM_L0S 0x01 /* L0s Enable */ +#define PCI_EXP_LNKCTL_ASPM_L1 0x02 /* L1 Enable */ #define PCI_EXP_LNKCTL_RCB 0x0008 /* Read Completion Boundary */ #define PCI_EXP_LNKCTL_LD 0x0010 /* Link Disable */ #define PCI_EXP_LNKCTL_RL 0x0020 /* Retrain Link */ -- cgit v1.2.3 From ee07c6e7a6f8a25c18f0a6b18152fbd7499245f6 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Fri, 7 Dec 2012 00:04:48 +0000 Subject: bridge: export multicast database via netlink V5: fix two bugs pointed out by Thomas remove seq check for now, mark it as TODO V4: remove some useless #include some coding style fix V3: drop debugging printk's update selinux perm table as well V2: drop patch 1/2, export ifindex directly Redesign netlink attributes Improve netlink seq check Handle IPv6 addr as well This patch exports bridge multicast database via netlink message type RTM_GETMDB. Similar to fdb, but currently bridge-specific. We may need to support modify multicast database too (RTM_{ADD,DEL}MDB). (Thanks to Thomas for patient reviews) Cc: Herbert Xu Cc: Stephen Hemminger Cc: "David S. Miller" Cc: Thomas Graf Cc: Jesper Dangaard Brouer Signed-off-by: Cong Wang Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 55 ++++++++++++++ include/uapi/linux/rtnetlink.h | 3 + net/bridge/Makefile | 2 +- net/bridge/br_mdb.c | 163 +++++++++++++++++++++++++++++++++++++++++ net/bridge/br_multicast.c | 1 + net/bridge/br_private.h | 1 + security/selinux/nlmsgtab.c | 1 + 7 files changed, 225 insertions(+), 1 deletion(-) create mode 100644 net/bridge/br_mdb.c (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index b3885791e11e..9a0f6ff0d7e7 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -116,4 +116,59 @@ enum { __IFLA_BRIDGE_MAX, }; #define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1) + +/* Bridge multicast database attributes + * [MDBA_MDB] = { + * [MDBA_MDB_ENTRY] = { + * [MDBA_MDB_ENTRY_INFO] + * } + * } + * [MDBA_ROUTER] = { + * [MDBA_ROUTER_PORT] + * } + */ +enum { + MDBA_UNSPEC, + MDBA_MDB, + MDBA_ROUTER, + __MDBA_MAX, +}; +#define MDBA_MAX (__MDBA_MAX - 1) + +enum { + MDBA_MDB_UNSPEC, + MDBA_MDB_ENTRY, + __MDBA_MDB_MAX, +}; +#define MDBA_MDB_MAX (__MDBA_MDB_MAX - 1) + +enum { + MDBA_MDB_ENTRY_UNSPEC, + MDBA_MDB_ENTRY_INFO, + __MDBA_MDB_ENTRY_MAX, +}; +#define MDBA_MDB_ENTRY_MAX (__MDBA_MDB_ENTRY_MAX - 1) + +enum { + MDBA_ROUTER_UNSPEC, + MDBA_ROUTER_PORT, + __MDBA_ROUTER_MAX, +}; +#define MDBA_ROUTER_MAX (__MDBA_ROUTER_MAX - 1) + +struct br_port_msg { + __u32 ifindex; +}; + +struct br_mdb_entry { + __u32 ifindex; + struct { + union { + __be32 ip4; + struct in6_addr ip6; + } u; + __be16 proto; + } addr; +}; + #endif /* _UAPI_LINUX_IF_BRIDGE_H */ diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 33d29cea37ea..354a1e7d32a3 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -125,6 +125,9 @@ enum { RTM_GETNETCONF = 82, #define RTM_GETNETCONF RTM_GETNETCONF + RTM_GETMDB = 86, +#define RTM_GETMDB RTM_GETMDB + __RTM_MAX, #define RTM_MAX (((__RTM_MAX + 3) & ~3) - 1) }; diff --git a/net/bridge/Makefile b/net/bridge/Makefile index d0359ea8ee79..e859098f5ee9 100644 --- a/net/bridge/Makefile +++ b/net/bridge/Makefile @@ -12,6 +12,6 @@ bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o bridge-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o -bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o +bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o obj-$(CONFIG_BRIDGE_NF_EBTABLES) += netfilter/ diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c new file mode 100644 index 000000000000..edc0d731f6b2 --- /dev/null +++ b/net/bridge/br_mdb.c @@ -0,0 +1,163 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#if IS_ENABLED(CONFIG_IPV6) +#include +#endif + +#include "br_private.h" + +static int br_rports_fill_info(struct sk_buff *skb, struct netlink_callback *cb, + struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + struct net_bridge_port *p; + struct hlist_node *n; + struct nlattr *nest; + + if (!br->multicast_router || hlist_empty(&br->router_list)) + return 0; + + nest = nla_nest_start(skb, MDBA_ROUTER); + if (nest == NULL) + return -EMSGSIZE; + + hlist_for_each_entry_rcu(p, n, &br->router_list, rlist) { + if (p && nla_put_u32(skb, MDBA_ROUTER_PORT, p->dev->ifindex)) + goto fail; + } + + nla_nest_end(skb, nest); + return 0; +fail: + nla_nest_cancel(skb, nest); + return -EMSGSIZE; +} + +static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, + struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + struct net_bridge_mdb_htable *mdb; + struct nlattr *nest, *nest2; + int i, err = 0; + int idx = 0, s_idx = cb->args[1]; + + if (br->multicast_disabled) + return 0; + + mdb = rcu_dereference(br->mdb); + if (!mdb) + return 0; + + nest = nla_nest_start(skb, MDBA_MDB); + if (nest == NULL) + return -EMSGSIZE; + + for (i = 0; i < mdb->max; i++) { + struct hlist_node *h; + struct net_bridge_mdb_entry *mp; + struct net_bridge_port_group *p, **pp; + struct net_bridge_port *port; + + hlist_for_each_entry_rcu(mp, h, &mdb->mhash[i], hlist[mdb->ver]) { + if (idx < s_idx) + goto skip; + + nest2 = nla_nest_start(skb, MDBA_MDB_ENTRY); + if (nest2 == NULL) { + err = -EMSGSIZE; + goto out; + } + + for (pp = &mp->ports; + (p = rcu_dereference(*pp)) != NULL; + pp = &p->next) { + port = p->port; + if (port) { + struct br_mdb_entry e; + e.ifindex = port->dev->ifindex; + e.addr.u.ip4 = p->addr.u.ip4; +#if IS_ENABLED(CONFIG_IPV6) + e.addr.u.ip6 = p->addr.u.ip6; +#endif + e.addr.proto = p->addr.proto; + if (nla_put(skb, MDBA_MDB_ENTRY_INFO, sizeof(e), &e)) { + nla_nest_cancel(skb, nest2); + err = -EMSGSIZE; + goto out; + } + } + } + nla_nest_end(skb, nest2); + skip: + idx++; + } + } + +out: + cb->args[1] = idx; + nla_nest_end(skb, nest); + return err; +} + +static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct net_device *dev; + struct net *net = sock_net(skb->sk); + struct nlmsghdr *nlh = NULL; + int idx = 0, s_idx; + + s_idx = cb->args[0]; + + rcu_read_lock(); + + /* TODO: in case of rehashing, we need to check + * consistency for dumping. + */ + cb->seq = net->dev_base_seq; + + for_each_netdev_rcu(net, dev) { + if (dev->priv_flags & IFF_EBRIDGE) { + struct br_port_msg *bpm; + + if (idx < s_idx) + goto skip; + + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, RTM_GETMDB, + sizeof(*bpm), NLM_F_MULTI); + if (nlh == NULL) + break; + + bpm = nlmsg_data(nlh); + bpm->ifindex = dev->ifindex; + if (br_mdb_fill_info(skb, cb, dev) < 0) + goto out; + if (br_rports_fill_info(skb, cb, dev) < 0) + goto out; + + cb->args[1] = 0; + nlmsg_end(skb, nlh); + skip: + idx++; + } + } + +out: + if (nlh) + nlmsg_end(skb, nlh); + rcu_read_unlock(); + cb->args[0] = idx; + return skb->len; +} + +void br_mdb_init(void) +{ + rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, NULL); +} diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index a2a7a1a79081..68e375ac93bd 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -1605,6 +1605,7 @@ void br_multicast_init(struct net_bridge *br) br_multicast_querier_expired, (unsigned long)br); setup_timer(&br->multicast_query_timer, br_multicast_query_expired, (unsigned long)br); + br_mdb_init(); } void br_multicast_open(struct net_bridge *br) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index cd86222cf5e3..ae0a6ec0a702 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -433,6 +433,7 @@ extern int br_multicast_set_port_router(struct net_bridge_port *p, extern int br_multicast_toggle(struct net_bridge *br, unsigned long val); extern int br_multicast_set_querier(struct net_bridge *br, unsigned long val); extern int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val); +extern void br_mdb_init(void); static inline bool br_multicast_is_router(struct net_bridge *br) { diff --git a/security/selinux/nlmsgtab.c b/security/selinux/nlmsgtab.c index d309e7f472d8..163aaa77d5aa 100644 --- a/security/selinux/nlmsgtab.c +++ b/security/selinux/nlmsgtab.c @@ -67,6 +67,7 @@ static struct nlmsg_perm nlmsg_route_perms[] = { RTM_GETADDRLABEL, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_GETDCB, NETLINK_ROUTE_SOCKET__NLMSG_READ }, { RTM_SETDCB, NETLINK_ROUTE_SOCKET__NLMSG_WRITE }, + { RTM_GETMDB, NETLINK_ROUTE_SOCKET__NLMSG_READ }, }; static struct nlmsg_perm nlmsg_tcpdiag_perms[] = -- cgit v1.2.3 From 986a4f4d452dec004697f667439d27c3fda9c928 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 7 Dec 2012 07:04:56 +0000 Subject: virtio_net: multiqueue support This patch adds the multiqueue (VIRTIO_NET_F_MQ) support to virtio_net driver. VIRTIO_NET_F_MQ capable device could allow the driver to do packet transmission and reception through multiple queue pairs and does the packet steering to get better performance. By default, one one queue pair is used, user could change the number of queue pairs by ethtool in the next patch. When multiple queue pairs is used and the number of queue pairs is equal to the number of vcpus. Driver does the following optimizations to implement per-cpu virt queue pairs: - select the txq based on the smp processor id. - smp affinity hint to the cpu that owns the queue pairs. This could be used with the flow steering support of the device to guarantee the packets of a single flow is handled by the same cpu. Signed-off-by: Krishna Kumar Signed-off-by: Jason Wang Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 473 +++++++++++++++++++++++++++++++--------- include/uapi/linux/virtio_net.h | 27 +++ 2 files changed, 402 insertions(+), 98 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 02a71021565e..c0830488a390 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -58,6 +58,9 @@ struct send_queue { /* TX: fragments + linear part + virtio header */ struct scatterlist sg[MAX_SKB_FRAGS + 2]; + + /* Name of the send queue: output.$index */ + char name[40]; }; /* Internal representation of a receive virtqueue */ @@ -75,22 +78,34 @@ struct receive_queue { /* RX: fragments + linear part + virtio header */ struct scatterlist sg[MAX_SKB_FRAGS + 2]; + + /* Name of this receive queue: input.$index */ + char name[40]; }; struct virtnet_info { struct virtio_device *vdev; struct virtqueue *cvq; struct net_device *dev; - struct send_queue sq; - struct receive_queue rq; + struct send_queue *sq; + struct receive_queue *rq; unsigned int status; + /* Max # of queue pairs supported by the device */ + u16 max_queue_pairs; + + /* # of queue pairs currently used by the driver */ + u16 curr_queue_pairs; + /* I like... big packets and I cannot lie! */ bool big_packets; /* Host will merge rx buffers for big packets (shake it! shake it!) */ bool mergeable_rx_bufs; + /* Has control virtqueue */ + bool has_cvq; + /* enable config space updates */ bool config_enable; @@ -105,6 +120,9 @@ struct virtnet_info { /* Lock for config space updates */ struct mutex config_lock; + + /* Does the affinity hint is set for virtqueues? */ + bool affinity_hint_set; }; struct skb_vnet_hdr { @@ -125,6 +143,29 @@ struct padded_vnet_hdr { char padding[6]; }; +/* Converting between virtqueue no. and kernel tx/rx queue no. + * 0:rx0 1:tx0 2:rx1 3:tx1 ... 2N:rxN 2N+1:txN 2N+2:cvq + */ +static int vq2txq(struct virtqueue *vq) +{ + return (virtqueue_get_queue_index(vq) - 1) / 2; +} + +static int txq2vq(int txq) +{ + return txq * 2 + 1; +} + +static int vq2rxq(struct virtqueue *vq) +{ + return virtqueue_get_queue_index(vq) / 2; +} + +static int rxq2vq(int rxq) +{ + return rxq * 2; +} + static inline struct skb_vnet_hdr *skb_vnet_hdr(struct sk_buff *skb) { return (struct skb_vnet_hdr *)skb->cb; @@ -165,7 +206,7 @@ static void skb_xmit_done(struct virtqueue *vq) virtqueue_disable_cb(vq); /* We were probably waiting for more output buffers. */ - netif_wake_queue(vi->dev); + netif_wake_subqueue(vi->dev, vq2txq(vq)); } static void set_skb_frag(struct sk_buff *skb, struct page *page, @@ -502,7 +543,7 @@ static bool try_fill_recv(struct receive_queue *rq, gfp_t gfp) static void skb_recv_done(struct virtqueue *rvq) { struct virtnet_info *vi = rvq->vdev->priv; - struct receive_queue *rq = &vi->rq; + struct receive_queue *rq = &vi->rq[vq2rxq(rvq)]; /* Schedule NAPI, Suppress further interrupts if successful. */ if (napi_schedule_prep(&rq->napi)) { @@ -532,15 +573,21 @@ static void refill_work(struct work_struct *work) struct virtnet_info *vi = container_of(work, struct virtnet_info, refill.work); bool still_empty; + int i; + + for (i = 0; i < vi->max_queue_pairs; i++) { + struct receive_queue *rq = &vi->rq[i]; - napi_disable(&vi->rq.napi); - still_empty = !try_fill_recv(&vi->rq, GFP_KERNEL); - virtnet_napi_enable(&vi->rq); + napi_disable(&rq->napi); + still_empty = !try_fill_recv(rq, GFP_KERNEL); + virtnet_napi_enable(rq); - /* In theory, this can happen: if we don't get any buffers in - * we will *never* try to fill again. */ - if (still_empty) - schedule_delayed_work(&vi->refill, HZ/2); + /* In theory, this can happen: if we don't get any buffers in + * we will *never* try to fill again. + */ + if (still_empty) + schedule_delayed_work(&vi->refill, HZ/2); + } } static int virtnet_poll(struct napi_struct *napi, int budget) @@ -578,6 +625,21 @@ again: return received; } +static int virtnet_open(struct net_device *dev) +{ + struct virtnet_info *vi = netdev_priv(dev); + int i; + + for (i = 0; i < vi->max_queue_pairs; i++) { + /* Make sure we have some buffers: if oom use wq. */ + if (!try_fill_recv(&vi->rq[i], GFP_KERNEL)) + schedule_delayed_work(&vi->refill, 0); + virtnet_napi_enable(&vi->rq[i]); + } + + return 0; +} + static unsigned int free_old_xmit_skbs(struct send_queue *sq) { struct sk_buff *skb; @@ -650,7 +712,8 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb) static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) { struct virtnet_info *vi = netdev_priv(dev); - struct send_queue *sq = &vi->sq; + int qnum = skb_get_queue_mapping(skb); + struct send_queue *sq = &vi->sq[qnum]; int capacity; /* Free up any pending old buffers before queueing new ones. */ @@ -664,13 +727,14 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) if (likely(capacity == -ENOMEM)) { if (net_ratelimit()) dev_warn(&dev->dev, - "TX queue failure: out of memory\n"); + "TXQ (%d) failure: out of memory\n", + qnum); } else { dev->stats.tx_fifo_errors++; if (net_ratelimit()) dev_warn(&dev->dev, - "Unexpected TX queue failure: %d\n", - capacity); + "Unexpected TXQ (%d) failure: %d\n", + qnum, capacity); } dev->stats.tx_dropped++; kfree_skb(skb); @@ -685,12 +749,12 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) /* Apparently nice girls don't return TX_BUSY; stop the queue * before it gets out of hand. Naturally, this wastes entries. */ if (capacity < 2+MAX_SKB_FRAGS) { - netif_stop_queue(dev); + netif_stop_subqueue(dev, qnum); if (unlikely(!virtqueue_enable_cb_delayed(sq->vq))) { /* More just got used, free them then recheck. */ capacity += free_old_xmit_skbs(sq); if (capacity >= 2+MAX_SKB_FRAGS) { - netif_start_queue(dev); + netif_start_subqueue(dev, qnum); virtqueue_disable_cb(sq->vq); } } @@ -758,23 +822,13 @@ static struct rtnl_link_stats64 *virtnet_stats(struct net_device *dev, static void virtnet_netpoll(struct net_device *dev) { struct virtnet_info *vi = netdev_priv(dev); + int i; - napi_schedule(&vi->rq.napi); + for (i = 0; i < vi->curr_queue_pairs; i++) + napi_schedule(&vi->rq[i].napi); } #endif -static int virtnet_open(struct net_device *dev) -{ - struct virtnet_info *vi = netdev_priv(dev); - - /* Make sure we have some buffers: if oom use wq. */ - if (!try_fill_recv(&vi->rq, GFP_KERNEL)) - schedule_delayed_work(&vi->refill, 0); - - virtnet_napi_enable(&vi->rq); - return 0; -} - /* * Send command via the control virtqueue and check status. Commands * supported by the hypervisor, as indicated by feature bits, should @@ -830,13 +884,39 @@ static void virtnet_ack_link_announce(struct virtnet_info *vi) rtnl_unlock(); } +static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) +{ + struct scatterlist sg; + struct virtio_net_ctrl_mq s; + struct net_device *dev = vi->dev; + + if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ)) + return 0; + + s.virtqueue_pairs = queue_pairs; + sg_init_one(&sg, &s, sizeof(s)); + + if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, + VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg, 1, 0)){ + dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n", + queue_pairs); + return -EINVAL; + } else + vi->curr_queue_pairs = queue_pairs; + + return 0; +} + static int virtnet_close(struct net_device *dev) { struct virtnet_info *vi = netdev_priv(dev); + int i; /* Make sure refill_work doesn't re-enable napi! */ cancel_delayed_work_sync(&vi->refill); - napi_disable(&vi->rq.napi); + + for (i = 0; i < vi->max_queue_pairs; i++) + napi_disable(&vi->rq[i].napi); return 0; } @@ -943,13 +1023,41 @@ static int virtnet_vlan_rx_kill_vid(struct net_device *dev, u16 vid) return 0; } +static void virtnet_set_affinity(struct virtnet_info *vi, bool set) +{ + int i; + + /* In multiqueue mode, when the number of cpu is equal to the number of + * queue pairs, we let the queue pairs to be private to one cpu by + * setting the affinity hint to eliminate the contention. + */ + if ((vi->curr_queue_pairs == 1 || + vi->max_queue_pairs != num_online_cpus()) && set) { + if (vi->affinity_hint_set) + set = false; + else + return; + } + + for (i = 0; i < vi->max_queue_pairs; i++) { + int cpu = set ? i : -1; + virtqueue_set_affinity(vi->rq[i].vq, cpu); + virtqueue_set_affinity(vi->sq[i].vq, cpu); + } + + if (set) + vi->affinity_hint_set = true; + else + vi->affinity_hint_set = false; +} + static void virtnet_get_ringparam(struct net_device *dev, struct ethtool_ringparam *ring) { struct virtnet_info *vi = netdev_priv(dev); - ring->rx_max_pending = virtqueue_get_vring_size(vi->rq.vq); - ring->tx_max_pending = virtqueue_get_vring_size(vi->sq.vq); + ring->rx_max_pending = virtqueue_get_vring_size(vi->rq[0].vq); + ring->tx_max_pending = virtqueue_get_vring_size(vi->sq[0].vq); ring->rx_pending = ring->rx_max_pending; ring->tx_pending = ring->tx_max_pending; } @@ -984,6 +1092,21 @@ static int virtnet_change_mtu(struct net_device *dev, int new_mtu) return 0; } +/* To avoid contending a lock hold by a vcpu who would exit to host, select the + * txq based on the processor id. + * TODO: handle cpu hotplug. + */ +static u16 virtnet_select_queue(struct net_device *dev, struct sk_buff *skb) +{ + int txq = skb_rx_queue_recorded(skb) ? skb_get_rx_queue(skb) : + smp_processor_id(); + + while (unlikely(txq >= dev->real_num_tx_queues)) + txq -= dev->real_num_tx_queues; + + return txq; +} + static const struct net_device_ops virtnet_netdev = { .ndo_open = virtnet_open, .ndo_stop = virtnet_close, @@ -995,6 +1118,7 @@ static const struct net_device_ops virtnet_netdev = { .ndo_get_stats64 = virtnet_stats, .ndo_vlan_rx_add_vid = virtnet_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = virtnet_vlan_rx_kill_vid, + .ndo_select_queue = virtnet_select_queue, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = virtnet_netpoll, #endif @@ -1030,10 +1154,10 @@ static void virtnet_config_changed_work(struct work_struct *work) if (vi->status & VIRTIO_NET_S_LINK_UP) { netif_carrier_on(vi->dev); - netif_wake_queue(vi->dev); + netif_tx_wake_all_queues(vi->dev); } else { netif_carrier_off(vi->dev); - netif_stop_queue(vi->dev); + netif_tx_stop_all_queues(vi->dev); } done: mutex_unlock(&vi->config_lock); @@ -1046,48 +1170,203 @@ static void virtnet_config_changed(struct virtio_device *vdev) schedule_work(&vi->config_work); } +static void virtnet_free_queues(struct virtnet_info *vi) +{ + kfree(vi->rq); + kfree(vi->sq); +} + +static void free_receive_bufs(struct virtnet_info *vi) +{ + int i; + + for (i = 0; i < vi->max_queue_pairs; i++) { + while (vi->rq[i].pages) + __free_pages(get_a_page(&vi->rq[i], GFP_KERNEL), 0); + } +} + +static void free_unused_bufs(struct virtnet_info *vi) +{ + void *buf; + int i; + + for (i = 0; i < vi->max_queue_pairs; i++) { + struct virtqueue *vq = vi->sq[i].vq; + while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) + dev_kfree_skb(buf); + } + + for (i = 0; i < vi->max_queue_pairs; i++) { + struct virtqueue *vq = vi->rq[i].vq; + + while ((buf = virtqueue_detach_unused_buf(vq)) != NULL) { + if (vi->mergeable_rx_bufs || vi->big_packets) + give_pages(&vi->rq[i], buf); + else + dev_kfree_skb(buf); + --vi->rq[i].num; + } + BUG_ON(vi->rq[i].num != 0); + } +} + static void virtnet_del_vqs(struct virtnet_info *vi) { struct virtio_device *vdev = vi->vdev; + virtnet_set_affinity(vi, false); + vdev->config->del_vqs(vdev); + + virtnet_free_queues(vi); } -static int init_vqs(struct virtnet_info *vi) +static int virtnet_find_vqs(struct virtnet_info *vi) { - struct virtqueue *vqs[3]; - vq_callback_t *callbacks[] = { skb_recv_done, skb_xmit_done, NULL}; - const char *names[] = { "input", "output", "control" }; - int nvqs, err; - - /* We expect two virtqueues, receive then send, - * and optionally control. */ - nvqs = virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2; - - err = vi->vdev->config->find_vqs(vi->vdev, nvqs, vqs, callbacks, names); - if (err) - return err; + vq_callback_t **callbacks; + struct virtqueue **vqs; + int ret = -ENOMEM; + int i, total_vqs; + const char **names; + + /* We expect 1 RX virtqueue followed by 1 TX virtqueue, followed by + * possible N-1 RX/TX queue pairs used in multiqueue mode, followed by + * possible control vq. + */ + total_vqs = vi->max_queue_pairs * 2 + + virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ); + + /* Allocate space for find_vqs parameters */ + vqs = kzalloc(total_vqs * sizeof(*vqs), GFP_KERNEL); + if (!vqs) + goto err_vq; + callbacks = kmalloc(total_vqs * sizeof(*callbacks), GFP_KERNEL); + if (!callbacks) + goto err_callback; + names = kmalloc(total_vqs * sizeof(*names), GFP_KERNEL); + if (!names) + goto err_names; + + /* Parameters for control virtqueue, if any */ + if (vi->has_cvq) { + callbacks[total_vqs - 1] = NULL; + names[total_vqs - 1] = "control"; + } - vi->rq.vq = vqs[0]; - vi->sq.vq = vqs[1]; + /* Allocate/initialize parameters for send/receive virtqueues */ + for (i = 0; i < vi->max_queue_pairs; i++) { + callbacks[rxq2vq(i)] = skb_recv_done; + callbacks[txq2vq(i)] = skb_xmit_done; + sprintf(vi->rq[i].name, "input.%d", i); + sprintf(vi->sq[i].name, "output.%d", i); + names[rxq2vq(i)] = vi->rq[i].name; + names[txq2vq(i)] = vi->sq[i].name; + } - if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ)) { - vi->cvq = vqs[2]; + ret = vi->vdev->config->find_vqs(vi->vdev, total_vqs, vqs, callbacks, + names); + if (ret) + goto err_find; + if (vi->has_cvq) { + vi->cvq = vqs[total_vqs - 1]; if (virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VLAN)) vi->dev->features |= NETIF_F_HW_VLAN_FILTER; } + + for (i = 0; i < vi->max_queue_pairs; i++) { + vi->rq[i].vq = vqs[rxq2vq(i)]; + vi->sq[i].vq = vqs[txq2vq(i)]; + } + + kfree(names); + kfree(callbacks); + kfree(vqs); + return 0; + +err_find: + kfree(names); +err_names: + kfree(callbacks); +err_callback: + kfree(vqs); +err_vq: + return ret; +} + +static int virtnet_alloc_queues(struct virtnet_info *vi) +{ + int i; + + vi->sq = kzalloc(sizeof(*vi->sq) * vi->max_queue_pairs, GFP_KERNEL); + if (!vi->sq) + goto err_sq; + vi->rq = kzalloc(sizeof(*vi->rq) * vi->max_queue_pairs, GFP_KERNEL); + if (!vi->sq) + goto err_rq; + + INIT_DELAYED_WORK(&vi->refill, refill_work); + for (i = 0; i < vi->max_queue_pairs; i++) { + vi->rq[i].pages = NULL; + netif_napi_add(vi->dev, &vi->rq[i].napi, virtnet_poll, + napi_weight); + + sg_init_table(vi->rq[i].sg, ARRAY_SIZE(vi->rq[i].sg)); + sg_init_table(vi->sq[i].sg, ARRAY_SIZE(vi->sq[i].sg)); + } + + return 0; + +err_rq: + kfree(vi->sq); +err_sq: + return -ENOMEM; +} + +static int init_vqs(struct virtnet_info *vi) +{ + int ret; + + /* Allocate send & receive queues */ + ret = virtnet_alloc_queues(vi); + if (ret) + goto err; + + ret = virtnet_find_vqs(vi); + if (ret) + goto err_free; + + virtnet_set_affinity(vi, true); + return 0; + +err_free: + virtnet_free_queues(vi); +err: + return ret; } static int virtnet_probe(struct virtio_device *vdev) { - int err; + int i, err; struct net_device *dev; struct virtnet_info *vi; + u16 max_queue_pairs; + + /* Find if host supports multiqueue virtio_net device */ + err = virtio_config_val(vdev, VIRTIO_NET_F_MQ, + offsetof(struct virtio_net_config, + max_virtqueue_pairs), &max_queue_pairs); + + /* We need at least 2 queue's */ + if (err || max_queue_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || + max_queue_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX || + !virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) + max_queue_pairs = 1; /* Allocate ourselves a network device with room for our info */ - dev = alloc_etherdev(sizeof(struct virtnet_info)); + dev = alloc_etherdev_mq(sizeof(struct virtnet_info), max_queue_pairs); if (!dev) return -ENOMEM; @@ -1133,22 +1412,17 @@ static int virtnet_probe(struct virtio_device *vdev) /* Set up our device-specific information */ vi = netdev_priv(dev); - netif_napi_add(dev, &vi->rq.napi, virtnet_poll, napi_weight); vi->dev = dev; vi->vdev = vdev; vdev->priv = vi; - vi->rq.pages = NULL; vi->stats = alloc_percpu(struct virtnet_stats); err = -ENOMEM; if (vi->stats == NULL) goto free; - INIT_DELAYED_WORK(&vi->refill, refill_work); mutex_init(&vi->config_lock); vi->config_enable = true; INIT_WORK(&vi->config_work, virtnet_config_changed_work); - sg_init_table(vi->rq.sg, ARRAY_SIZE(vi->rq.sg)); - sg_init_table(vi->sq.sg, ARRAY_SIZE(vi->sq.sg)); /* If we can receive ANY GSO packets, we must allocate large ones. */ if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || @@ -1159,10 +1433,21 @@ static int virtnet_probe(struct virtio_device *vdev) if (virtio_has_feature(vdev, VIRTIO_NET_F_MRG_RXBUF)) vi->mergeable_rx_bufs = true; + if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_VQ)) + vi->has_cvq = true; + + /* Use single tx/rx queue pair as default */ + vi->curr_queue_pairs = 1; + vi->max_queue_pairs = max_queue_pairs; + + /* Allocate/initialize the rx/tx queues, and invoke find_vqs */ err = init_vqs(vi); if (err) goto free_stats; + netif_set_real_num_tx_queues(dev, 1); + netif_set_real_num_rx_queues(dev, 1); + err = register_netdev(dev); if (err) { pr_debug("virtio_net: registering device failed\n"); @@ -1170,12 +1455,15 @@ static int virtnet_probe(struct virtio_device *vdev) } /* Last of all, set up some receive buffers. */ - try_fill_recv(&vi->rq, GFP_KERNEL); - - /* If we didn't even get one input buffer, we're useless. */ - if (vi->rq.num == 0) { - err = -ENOMEM; - goto unregister; + for (i = 0; i < vi->max_queue_pairs; i++) { + try_fill_recv(&vi->rq[i], GFP_KERNEL); + + /* If we didn't even get one input buffer, we're useless. */ + if (vi->rq[i].num == 0) { + free_unused_bufs(vi); + err = -ENOMEM; + goto free_recv_bufs; + } } /* Assume link up if device can't report link status, @@ -1188,12 +1476,16 @@ static int virtnet_probe(struct virtio_device *vdev) netif_carrier_on(dev); } - pr_debug("virtnet: registered device %s\n", dev->name); + pr_debug("virtnet: registered device %s with %d RX and TX vq's\n", + dev->name, max_queue_pairs); + return 0; -unregister: +free_recv_bufs: + free_receive_bufs(vi); unregister_netdev(dev); free_vqs: + cancel_delayed_work_sync(&vi->refill); virtnet_del_vqs(vi); free_stats: free_percpu(vi->stats); @@ -1202,28 +1494,6 @@ free: return err; } -static void free_unused_bufs(struct virtnet_info *vi) -{ - void *buf; - while (1) { - buf = virtqueue_detach_unused_buf(vi->sq.vq); - if (!buf) - break; - dev_kfree_skb(buf); - } - while (1) { - buf = virtqueue_detach_unused_buf(vi->rq.vq); - if (!buf) - break; - if (vi->mergeable_rx_bufs || vi->big_packets) - give_pages(&vi->rq, buf); - else - dev_kfree_skb(buf); - --vi->rq.num; - } - BUG_ON(vi->rq.num != 0); -} - static void remove_vq_common(struct virtnet_info *vi) { vi->vdev->config->reset(vi->vdev); @@ -1231,10 +1501,9 @@ static void remove_vq_common(struct virtnet_info *vi) /* Free unused buffers in both send and recv, if any. */ free_unused_bufs(vi); - virtnet_del_vqs(vi); + free_receive_bufs(vi); - while (vi->rq.pages) - __free_pages(get_a_page(&vi->rq, GFP_KERNEL), 0); + virtnet_del_vqs(vi); } static void virtnet_remove(struct virtio_device *vdev) @@ -1260,6 +1529,7 @@ static void virtnet_remove(struct virtio_device *vdev) static int virtnet_freeze(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; + int i; /* Prevent config work handler from accessing the device */ mutex_lock(&vi->config_lock); @@ -1270,7 +1540,10 @@ static int virtnet_freeze(struct virtio_device *vdev) cancel_delayed_work_sync(&vi->refill); if (netif_running(vi->dev)) - napi_disable(&vi->rq.napi); + for (i = 0; i < vi->max_queue_pairs; i++) { + napi_disable(&vi->rq[i].napi); + netif_napi_del(&vi->rq[i].napi); + } remove_vq_common(vi); @@ -1282,24 +1555,28 @@ static int virtnet_freeze(struct virtio_device *vdev) static int virtnet_restore(struct virtio_device *vdev) { struct virtnet_info *vi = vdev->priv; - int err; + int err, i; err = init_vqs(vi); if (err) return err; if (netif_running(vi->dev)) - virtnet_napi_enable(&vi->rq); + for (i = 0; i < vi->max_queue_pairs; i++) + virtnet_napi_enable(&vi->rq[i]); netif_device_attach(vi->dev); - if (!try_fill_recv(&vi->rq, GFP_KERNEL)) - schedule_delayed_work(&vi->refill, 0); + for (i = 0; i < vi->max_queue_pairs; i++) + if (!try_fill_recv(&vi->rq[i], GFP_KERNEL)) + schedule_delayed_work(&vi->refill, 0); mutex_lock(&vi->config_lock); vi->config_enable = true; mutex_unlock(&vi->config_lock); + virtnet_set_queues(vi, vi->curr_queue_pairs); + return 0; } #endif @@ -1317,7 +1594,7 @@ static unsigned int features[] = { VIRTIO_NET_F_GUEST_ECN, VIRTIO_NET_F_GUEST_UFO, VIRTIO_NET_F_MRG_RXBUF, VIRTIO_NET_F_STATUS, VIRTIO_NET_F_CTRL_VQ, VIRTIO_NET_F_CTRL_RX, VIRTIO_NET_F_CTRL_VLAN, - VIRTIO_NET_F_GUEST_ANNOUNCE, + VIRTIO_NET_F_GUEST_ANNOUNCE, VIRTIO_NET_F_MQ, }; static struct virtio_driver virtio_net_driver = { diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index 2470f541af50..848e3584d7c8 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -51,6 +51,8 @@ #define VIRTIO_NET_F_CTRL_RX_EXTRA 20 /* Extra RX mode control support */ #define VIRTIO_NET_F_GUEST_ANNOUNCE 21 /* Guest can announce device on the * network */ +#define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow + * Steering */ #define VIRTIO_NET_S_LINK_UP 1 /* Link is up */ #define VIRTIO_NET_S_ANNOUNCE 2 /* Announcement is needed */ @@ -60,6 +62,11 @@ struct virtio_net_config { __u8 mac[6]; /* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */ __u16 status; + /* Maximum number of each of transmit and receive queues; + * see VIRTIO_NET_F_MQ and VIRTIO_NET_CTRL_MQ. + * Legal values are between 1 and 0x8000 + */ + __u16 max_virtqueue_pairs; } __attribute__((packed)); /* This is the first element of the scatter-gather list. If you don't @@ -166,4 +173,24 @@ struct virtio_net_ctrl_mac { #define VIRTIO_NET_CTRL_ANNOUNCE 3 #define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0 +/* + * Control Receive Flow Steering + * + * The command VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET + * enables Receive Flow Steering, specifying the number of the transmit and + * receive queues that will be used. After the command is consumed and acked by + * the device, the device will not steer new packets on receive virtqueues + * other than specified nor read from transmit virtqueues other than specified. + * Accordingly, driver should not transmit new packets on virtqueues other than + * specified. + */ +struct virtio_net_ctrl_mq { + u16 virtqueue_pairs; +}; + +#define VIRTIO_NET_CTRL_MQ 4 + #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 0 + #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1 + #define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000 + #endif /* _LINUX_VIRTIO_NET_H */ -- cgit v1.2.3 From 39a53e0ce0df01b3cf4bb898c7ae2fd2189647d5 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 28 Nov 2012 13:37:31 +0900 Subject: f2fs: add superblock and major in-memory structure This adds the following major in-memory structures in f2fs. - f2fs_sb_info: contains f2fs-specific information, two special inode pointers for node and meta address spaces, and orphan inode management. - f2fs_inode_info: contains vfs_inode and other fs-specific information. - f2fs_nm_info: contains node manager information such as NAT entry cache, free nid list, and NAT page management. - f2fs_node_info: represents a node as node id, inode number, block address, and its version. - f2fs_sm_info: contains segment manager information such as SIT entry cache, free segment map, current active logs, dirty segment management, and segment utilization. The specific structures are sit_info, free_segmap_info, dirty_seglist_info, curseg_info. In addition, add F2FS_SUPER_MAGIC in magic.h. Signed-off-by: Chul Lee Signed-off-by: Jaegeuk Kim --- fs/f2fs/f2fs.h | 1062 ++++++++++++++++++++++++++++++++++++++++++++ fs/f2fs/node.h | 353 +++++++++++++++ fs/f2fs/segment.h | 615 +++++++++++++++++++++++++ include/uapi/linux/magic.h | 1 + 4 files changed, 2031 insertions(+) create mode 100644 fs/f2fs/f2fs.h create mode 100644 fs/f2fs/node.h create mode 100644 fs/f2fs/segment.h (limited to 'include/uapi/linux') diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h new file mode 100644 index 000000000000..7aa70b54172d --- /dev/null +++ b/fs/f2fs/f2fs.h @@ -0,0 +1,1062 @@ +/** + * fs/f2fs/f2fs.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef _LINUX_F2FS_H +#define _LINUX_F2FS_H + +#include +#include +#include +#include +#include +#include +#include + +/* + * For mount options + */ +#define F2FS_MOUNT_BG_GC 0x00000001 +#define F2FS_MOUNT_DISABLE_ROLL_FORWARD 0x00000002 +#define F2FS_MOUNT_DISCARD 0x00000004 +#define F2FS_MOUNT_NOHEAP 0x00000008 +#define F2FS_MOUNT_XATTR_USER 0x00000010 +#define F2FS_MOUNT_POSIX_ACL 0x00000020 +#define F2FS_MOUNT_DISABLE_EXT_IDENTIFY 0x00000040 + +#define clear_opt(sbi, option) (sbi->mount_opt.opt &= ~F2FS_MOUNT_##option) +#define set_opt(sbi, option) (sbi->mount_opt.opt |= F2FS_MOUNT_##option) +#define test_opt(sbi, option) (sbi->mount_opt.opt & F2FS_MOUNT_##option) + +#define ver_after(a, b) (typecheck(unsigned long long, a) && \ + typecheck(unsigned long long, b) && \ + ((long long)((a) - (b)) > 0)) + +typedef u64 block_t; +typedef u32 nid_t; + +struct f2fs_mount_info { + unsigned int opt; +}; + +static inline __u32 f2fs_crc32(void *buff, size_t len) +{ + return crc32_le(F2FS_SUPER_MAGIC, buff, len); +} + +static inline bool f2fs_crc_valid(__u32 blk_crc, void *buff, size_t buff_size) +{ + return f2fs_crc32(buff, buff_size) == blk_crc; +} + +/* + * For checkpoint manager + */ +enum { + NAT_BITMAP, + SIT_BITMAP +}; + +/* for the list of orphan inodes */ +struct orphan_inode_entry { + struct list_head list; /* list head */ + nid_t ino; /* inode number */ +}; + +/* for the list of directory inodes */ +struct dir_inode_entry { + struct list_head list; /* list head */ + struct inode *inode; /* vfs inode pointer */ +}; + +/* for the list of fsync inodes, used only during recovery */ +struct fsync_inode_entry { + struct list_head list; /* list head */ + struct inode *inode; /* vfs inode pointer */ + block_t blkaddr; /* block address locating the last inode */ +}; + +#define nats_in_cursum(sum) (le16_to_cpu(sum->n_nats)) +#define sits_in_cursum(sum) (le16_to_cpu(sum->n_sits)) + +#define nat_in_journal(sum, i) (sum->nat_j.entries[i].ne) +#define nid_in_journal(sum, i) (sum->nat_j.entries[i].nid) +#define sit_in_journal(sum, i) (sum->sit_j.entries[i].se) +#define segno_in_journal(sum, i) (sum->sit_j.entries[i].segno) + +static inline int update_nats_in_cursum(struct f2fs_summary_block *rs, int i) +{ + int before = nats_in_cursum(rs); + rs->n_nats = cpu_to_le16(before + i); + return before; +} + +static inline int update_sits_in_cursum(struct f2fs_summary_block *rs, int i) +{ + int before = sits_in_cursum(rs); + rs->n_sits = cpu_to_le16(before + i); + return before; +} + +/* + * For INODE and NODE manager + */ +#define XATTR_NODE_OFFSET (-1) /* + * store xattrs to one node block per + * file keeping -1 as its node offset to + * distinguish from index node blocks. + */ +#define RDONLY_NODE 1 /* + * specify a read-only mode when getting + * a node block. 0 is read-write mode. + * used by get_dnode_of_data(). + */ +#define F2FS_LINK_MAX 32000 /* maximum link count per file */ + +/* for in-memory extent cache entry */ +struct extent_info { + rwlock_t ext_lock; /* rwlock for consistency */ + unsigned int fofs; /* start offset in a file */ + u32 blk_addr; /* start block address of the extent */ + unsigned int len; /* lenth of the extent */ +}; + +/* + * i_advise uses FADVISE_XXX_BIT. We can add additional hints later. + */ +#define FADVISE_COLD_BIT 0x01 + +struct f2fs_inode_info { + struct inode vfs_inode; /* serve a vfs inode */ + unsigned long i_flags; /* keep an inode flags for ioctl */ + unsigned char i_advise; /* use to give file attribute hints */ + unsigned int i_current_depth; /* use only in directory structure */ + umode_t i_acl_mode; /* keep file acl mode temporarily */ + + /* Use below internally in f2fs*/ + unsigned long flags; /* use to pass per-file flags */ + unsigned long long data_version;/* lastes version of data for fsync */ + atomic_t dirty_dents; /* # of dirty dentry pages */ + f2fs_hash_t chash; /* hash value of given file name */ + unsigned int clevel; /* maximum level of given file name */ + nid_t i_xattr_nid; /* node id that contains xattrs */ + struct extent_info ext; /* in-memory extent cache entry */ +}; + +static inline void get_extent_info(struct extent_info *ext, + struct f2fs_extent i_ext) +{ + write_lock(&ext->ext_lock); + ext->fofs = le32_to_cpu(i_ext.fofs); + ext->blk_addr = le32_to_cpu(i_ext.blk_addr); + ext->len = le32_to_cpu(i_ext.len); + write_unlock(&ext->ext_lock); +} + +static inline void set_raw_extent(struct extent_info *ext, + struct f2fs_extent *i_ext) +{ + read_lock(&ext->ext_lock); + i_ext->fofs = cpu_to_le32(ext->fofs); + i_ext->blk_addr = cpu_to_le32(ext->blk_addr); + i_ext->len = cpu_to_le32(ext->len); + read_unlock(&ext->ext_lock); +} + +struct f2fs_nm_info { + block_t nat_blkaddr; /* base disk address of NAT */ + nid_t max_nid; /* maximum possible node ids */ + nid_t init_scan_nid; /* the first nid to be scanned */ + nid_t next_scan_nid; /* the next nid to be scanned */ + + /* NAT cache management */ + struct radix_tree_root nat_root;/* root of the nat entry cache */ + rwlock_t nat_tree_lock; /* protect nat_tree_lock */ + unsigned int nat_cnt; /* the # of cached nat entries */ + struct list_head nat_entries; /* cached nat entry list (clean) */ + struct list_head dirty_nat_entries; /* cached nat entry list (dirty) */ + + /* free node ids management */ + struct list_head free_nid_list; /* a list for free nids */ + spinlock_t free_nid_list_lock; /* protect free nid list */ + unsigned int fcnt; /* the number of free node id */ + struct mutex build_lock; /* lock for build free nids */ + + /* for checkpoint */ + char *nat_bitmap; /* NAT bitmap pointer */ + int bitmap_size; /* bitmap size */ +}; + +/* + * this structure is used as one of function parameters. + * all the information are dedicated to a given direct node block determined + * by the data offset in a file. + */ +struct dnode_of_data { + struct inode *inode; /* vfs inode pointer */ + struct page *inode_page; /* its inode page, NULL is possible */ + struct page *node_page; /* cached direct node page */ + nid_t nid; /* node id of the direct node block */ + unsigned int ofs_in_node; /* data offset in the node page */ + bool inode_page_locked; /* inode page is locked or not */ + block_t data_blkaddr; /* block address of the node block */ +}; + +static inline void set_new_dnode(struct dnode_of_data *dn, struct inode *inode, + struct page *ipage, struct page *npage, nid_t nid) +{ + dn->inode = inode; + dn->inode_page = ipage; + dn->node_page = npage; + dn->nid = nid; + dn->inode_page_locked = 0; +} + +/* + * For SIT manager + * + * By default, there are 6 active log areas across the whole main area. + * When considering hot and cold data separation to reduce cleaning overhead, + * we split 3 for data logs and 3 for node logs as hot, warm, and cold types, + * respectively. + * In the current design, you should not change the numbers intentionally. + * Instead, as a mount option such as active_logs=x, you can use 2, 4, and 6 + * logs individually according to the underlying devices. (default: 6) + * Just in case, on-disk layout covers maximum 16 logs that consist of 8 for + * data and 8 for node logs. + */ +#define NR_CURSEG_DATA_TYPE (3) +#define NR_CURSEG_NODE_TYPE (3) +#define NR_CURSEG_TYPE (NR_CURSEG_DATA_TYPE + NR_CURSEG_NODE_TYPE) + +enum { + CURSEG_HOT_DATA = 0, /* directory entry blocks */ + CURSEG_WARM_DATA, /* data blocks */ + CURSEG_COLD_DATA, /* multimedia or GCed data blocks */ + CURSEG_HOT_NODE, /* direct node blocks of directory files */ + CURSEG_WARM_NODE, /* direct node blocks of normal files */ + CURSEG_COLD_NODE, /* indirect node blocks */ + NO_CHECK_TYPE +}; + +struct f2fs_sm_info { + struct sit_info *sit_info; /* whole segment information */ + struct free_segmap_info *free_info; /* free segment information */ + struct dirty_seglist_info *dirty_info; /* dirty segment information */ + struct curseg_info *curseg_array; /* active segment information */ + + struct list_head wblist_head; /* list of under-writeback pages */ + spinlock_t wblist_lock; /* lock for checkpoint */ + + block_t seg0_blkaddr; /* block address of 0'th segment */ + block_t main_blkaddr; /* start block address of main area */ + block_t ssa_blkaddr; /* start block address of SSA area */ + + unsigned int segment_count; /* total # of segments */ + unsigned int main_segments; /* # of segments in main area */ + unsigned int reserved_segments; /* # of reserved segments */ + unsigned int ovp_segments; /* # of overprovision segments */ +}; + +/* + * For directory operation + */ +#define NODE_DIR1_BLOCK (ADDRS_PER_INODE + 1) +#define NODE_DIR2_BLOCK (ADDRS_PER_INODE + 2) +#define NODE_IND1_BLOCK (ADDRS_PER_INODE + 3) +#define NODE_IND2_BLOCK (ADDRS_PER_INODE + 4) +#define NODE_DIND_BLOCK (ADDRS_PER_INODE + 5) + +/* + * For superblock + */ +/* + * COUNT_TYPE for monitoring + * + * f2fs monitors the number of several block types such as on-writeback, + * dirty dentry blocks, dirty node blocks, and dirty meta blocks. + */ +enum count_type { + F2FS_WRITEBACK, + F2FS_DIRTY_DENTS, + F2FS_DIRTY_NODES, + F2FS_DIRTY_META, + NR_COUNT_TYPE, +}; + +/* + * FS_LOCK nesting subclasses for the lock validator: + * + * The locking order between these classes is + * RENAME -> DENTRY_OPS -> DATA_WRITE -> DATA_NEW + * -> DATA_TRUNC -> NODE_WRITE -> NODE_NEW -> NODE_TRUNC + */ +enum lock_type { + RENAME, /* for renaming operations */ + DENTRY_OPS, /* for directory operations */ + DATA_WRITE, /* for data write */ + DATA_NEW, /* for data allocation */ + DATA_TRUNC, /* for data truncate */ + NODE_NEW, /* for node allocation */ + NODE_TRUNC, /* for node truncate */ + NODE_WRITE, /* for node write */ + NR_LOCK_TYPE, +}; + +/* + * The below are the page types of bios used in submti_bio(). + * The available types are: + * DATA User data pages. It operates as async mode. + * NODE Node pages. It operates as async mode. + * META FS metadata pages such as SIT, NAT, CP. + * NR_PAGE_TYPE The number of page types. + * META_FLUSH Make sure the previous pages are written + * with waiting the bio's completion + * ... Only can be used with META. + */ +enum page_type { + DATA, + NODE, + META, + NR_PAGE_TYPE, + META_FLUSH, +}; + +struct f2fs_sb_info { + struct super_block *sb; /* pointer to VFS super block */ + struct buffer_head *raw_super_buf; /* buffer head of raw sb */ + struct f2fs_super_block *raw_super; /* raw super block pointer */ + int s_dirty; /* dirty flag for checkpoint */ + + /* for node-related operations */ + struct f2fs_nm_info *nm_info; /* node manager */ + struct inode *node_inode; /* cache node blocks */ + + /* for segment-related operations */ + struct f2fs_sm_info *sm_info; /* segment manager */ + struct bio *bio[NR_PAGE_TYPE]; /* bios to merge */ + sector_t last_block_in_bio[NR_PAGE_TYPE]; /* last block number */ + struct rw_semaphore bio_sem; /* IO semaphore */ + + /* for checkpoint */ + struct f2fs_checkpoint *ckpt; /* raw checkpoint pointer */ + struct inode *meta_inode; /* cache meta blocks */ + struct mutex cp_mutex; /* for checkpoint procedure */ + struct mutex fs_lock[NR_LOCK_TYPE]; /* for blocking FS operations */ + struct mutex write_inode; /* mutex for write inode */ + struct mutex writepages; /* mutex for writepages() */ + int por_doing; /* recovery is doing or not */ + + /* for orphan inode management */ + struct list_head orphan_inode_list; /* orphan inode list */ + struct mutex orphan_inode_mutex; /* for orphan inode list */ + unsigned int n_orphans; /* # of orphan inodes */ + + /* for directory inode management */ + struct list_head dir_inode_list; /* dir inode list */ + spinlock_t dir_inode_lock; /* for dir inode list lock */ + unsigned int n_dirty_dirs; /* # of dir inodes */ + + /* basic file system units */ + unsigned int log_sectors_per_block; /* log2 sectors per block */ + unsigned int log_blocksize; /* log2 block size */ + unsigned int blocksize; /* block size */ + unsigned int root_ino_num; /* root inode number*/ + unsigned int node_ino_num; /* node inode number*/ + unsigned int meta_ino_num; /* meta inode number*/ + unsigned int log_blocks_per_seg; /* log2 blocks per segment */ + unsigned int blocks_per_seg; /* blocks per segment */ + unsigned int segs_per_sec; /* segments per section */ + unsigned int secs_per_zone; /* sections per zone */ + unsigned int total_sections; /* total section count */ + unsigned int total_node_count; /* total node block count */ + unsigned int total_valid_node_count; /* valid node block count */ + unsigned int total_valid_inode_count; /* valid inode count */ + int active_logs; /* # of active logs */ + + block_t user_block_count; /* # of user blocks */ + block_t total_valid_block_count; /* # of valid blocks */ + block_t alloc_valid_block_count; /* # of allocated blocks */ + block_t last_valid_block_count; /* for recovery */ + u32 s_next_generation; /* for NFS support */ + atomic_t nr_pages[NR_COUNT_TYPE]; /* # of pages, see count_type */ + + struct f2fs_mount_info mount_opt; /* mount options */ + + /* for cleaning operations */ + struct mutex gc_mutex; /* mutex for GC */ + struct f2fs_gc_kthread *gc_thread; /* GC thread */ + + /* + * for stat information. + * one is for the LFS mode, and the other is for the SSR mode. + */ + struct f2fs_stat_info *stat_info; /* FS status information */ + unsigned int segment_count[2]; /* # of allocated segments */ + unsigned int block_count[2]; /* # of allocated blocks */ + unsigned int last_victim[2]; /* last victim segment # */ + int total_hit_ext, read_hit_ext; /* extent cache hit ratio */ + int bg_gc; /* background gc calls */ + spinlock_t stat_lock; /* lock for stat operations */ +}; + +/* + * Inline functions + */ +static inline struct f2fs_inode_info *F2FS_I(struct inode *inode) +{ + return container_of(inode, struct f2fs_inode_info, vfs_inode); +} + +static inline struct f2fs_sb_info *F2FS_SB(struct super_block *sb) +{ + return sb->s_fs_info; +} + +static inline struct f2fs_super_block *F2FS_RAW_SUPER(struct f2fs_sb_info *sbi) +{ + return (struct f2fs_super_block *)(sbi->raw_super); +} + +static inline struct f2fs_checkpoint *F2FS_CKPT(struct f2fs_sb_info *sbi) +{ + return (struct f2fs_checkpoint *)(sbi->ckpt); +} + +static inline struct f2fs_nm_info *NM_I(struct f2fs_sb_info *sbi) +{ + return (struct f2fs_nm_info *)(sbi->nm_info); +} + +static inline struct f2fs_sm_info *SM_I(struct f2fs_sb_info *sbi) +{ + return (struct f2fs_sm_info *)(sbi->sm_info); +} + +static inline struct sit_info *SIT_I(struct f2fs_sb_info *sbi) +{ + return (struct sit_info *)(SM_I(sbi)->sit_info); +} + +static inline struct free_segmap_info *FREE_I(struct f2fs_sb_info *sbi) +{ + return (struct free_segmap_info *)(SM_I(sbi)->free_info); +} + +static inline struct dirty_seglist_info *DIRTY_I(struct f2fs_sb_info *sbi) +{ + return (struct dirty_seglist_info *)(SM_I(sbi)->dirty_info); +} + +static inline void F2FS_SET_SB_DIRT(struct f2fs_sb_info *sbi) +{ + sbi->s_dirty = 1; +} + +static inline void F2FS_RESET_SB_DIRT(struct f2fs_sb_info *sbi) +{ + sbi->s_dirty = 0; +} + +static inline void mutex_lock_op(struct f2fs_sb_info *sbi, enum lock_type t) +{ + mutex_lock_nested(&sbi->fs_lock[t], t); +} + +static inline void mutex_unlock_op(struct f2fs_sb_info *sbi, enum lock_type t) +{ + mutex_unlock(&sbi->fs_lock[t]); +} + +/* + * Check whether the given nid is within node id range. + */ +static inline void check_nid_range(struct f2fs_sb_info *sbi, nid_t nid) +{ + BUG_ON((nid >= NM_I(sbi)->max_nid)); +} + +#define F2FS_DEFAULT_ALLOCATED_BLOCKS 1 + +/* + * Check whether the inode has blocks or not + */ +static inline int F2FS_HAS_BLOCKS(struct inode *inode) +{ + if (F2FS_I(inode)->i_xattr_nid) + return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS + 1); + else + return (inode->i_blocks > F2FS_DEFAULT_ALLOCATED_BLOCKS); +} + +static inline bool inc_valid_block_count(struct f2fs_sb_info *sbi, + struct inode *inode, blkcnt_t count) +{ + block_t valid_block_count; + + spin_lock(&sbi->stat_lock); + valid_block_count = + sbi->total_valid_block_count + (block_t)count; + if (valid_block_count > sbi->user_block_count) { + spin_unlock(&sbi->stat_lock); + return false; + } + inode->i_blocks += count; + sbi->total_valid_block_count = valid_block_count; + sbi->alloc_valid_block_count += (block_t)count; + spin_unlock(&sbi->stat_lock); + return true; +} + +static inline int dec_valid_block_count(struct f2fs_sb_info *sbi, + struct inode *inode, + blkcnt_t count) +{ + spin_lock(&sbi->stat_lock); + BUG_ON(sbi->total_valid_block_count < (block_t) count); + BUG_ON(inode->i_blocks < count); + inode->i_blocks -= count; + sbi->total_valid_block_count -= (block_t)count; + spin_unlock(&sbi->stat_lock); + return 0; +} + +static inline void inc_page_count(struct f2fs_sb_info *sbi, int count_type) +{ + atomic_inc(&sbi->nr_pages[count_type]); + F2FS_SET_SB_DIRT(sbi); +} + +static inline void inode_inc_dirty_dents(struct inode *inode) +{ + atomic_inc(&F2FS_I(inode)->dirty_dents); +} + +static inline void dec_page_count(struct f2fs_sb_info *sbi, int count_type) +{ + atomic_dec(&sbi->nr_pages[count_type]); +} + +static inline void inode_dec_dirty_dents(struct inode *inode) +{ + atomic_dec(&F2FS_I(inode)->dirty_dents); +} + +static inline int get_pages(struct f2fs_sb_info *sbi, int count_type) +{ + return atomic_read(&sbi->nr_pages[count_type]); +} + +static inline block_t valid_user_blocks(struct f2fs_sb_info *sbi) +{ + block_t ret; + spin_lock(&sbi->stat_lock); + ret = sbi->total_valid_block_count; + spin_unlock(&sbi->stat_lock); + return ret; +} + +static inline unsigned long __bitmap_size(struct f2fs_sb_info *sbi, int flag) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + + /* return NAT or SIT bitmap */ + if (flag == NAT_BITMAP) + return le32_to_cpu(ckpt->nat_ver_bitmap_bytesize); + else if (flag == SIT_BITMAP) + return le32_to_cpu(ckpt->sit_ver_bitmap_bytesize); + + return 0; +} + +static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag) +{ + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + int offset = (flag == NAT_BITMAP) ? ckpt->sit_ver_bitmap_bytesize : 0; + return &ckpt->sit_nat_version_bitmap + offset; +} + +static inline block_t __start_cp_addr(struct f2fs_sb_info *sbi) +{ + block_t start_addr; + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + unsigned long long ckpt_version = le64_to_cpu(ckpt->checkpoint_ver); + + start_addr = le64_to_cpu(F2FS_RAW_SUPER(sbi)->cp_blkaddr); + + /* + * odd numbered checkpoint should at cp segment 0 + * and even segent must be at cp segment 1 + */ + if (!(ckpt_version & 1)) + start_addr += sbi->blocks_per_seg; + + return start_addr; +} + +static inline block_t __start_sum_addr(struct f2fs_sb_info *sbi) +{ + return le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum); +} + +static inline bool inc_valid_node_count(struct f2fs_sb_info *sbi, + struct inode *inode, + unsigned int count) +{ + block_t valid_block_count; + unsigned int valid_node_count; + + spin_lock(&sbi->stat_lock); + + valid_block_count = sbi->total_valid_block_count + (block_t)count; + sbi->alloc_valid_block_count += (block_t)count; + valid_node_count = sbi->total_valid_node_count + count; + + if (valid_block_count > sbi->user_block_count) { + spin_unlock(&sbi->stat_lock); + return false; + } + + if (valid_node_count > sbi->total_node_count) { + spin_unlock(&sbi->stat_lock); + return false; + } + + if (inode) + inode->i_blocks += count; + sbi->total_valid_node_count = valid_node_count; + sbi->total_valid_block_count = valid_block_count; + spin_unlock(&sbi->stat_lock); + + return true; +} + +static inline void dec_valid_node_count(struct f2fs_sb_info *sbi, + struct inode *inode, + unsigned int count) +{ + spin_lock(&sbi->stat_lock); + + BUG_ON(sbi->total_valid_block_count < count); + BUG_ON(sbi->total_valid_node_count < count); + BUG_ON(inode->i_blocks < count); + + inode->i_blocks -= count; + sbi->total_valid_node_count -= count; + sbi->total_valid_block_count -= (block_t)count; + + spin_unlock(&sbi->stat_lock); +} + +static inline unsigned int valid_node_count(struct f2fs_sb_info *sbi) +{ + unsigned int ret; + spin_lock(&sbi->stat_lock); + ret = sbi->total_valid_node_count; + spin_unlock(&sbi->stat_lock); + return ret; +} + +static inline void inc_valid_inode_count(struct f2fs_sb_info *sbi) +{ + spin_lock(&sbi->stat_lock); + BUG_ON(sbi->total_valid_inode_count == sbi->total_node_count); + sbi->total_valid_inode_count++; + spin_unlock(&sbi->stat_lock); +} + +static inline int dec_valid_inode_count(struct f2fs_sb_info *sbi) +{ + spin_lock(&sbi->stat_lock); + BUG_ON(!sbi->total_valid_inode_count); + sbi->total_valid_inode_count--; + spin_unlock(&sbi->stat_lock); + return 0; +} + +static inline unsigned int valid_inode_count(struct f2fs_sb_info *sbi) +{ + unsigned int ret; + spin_lock(&sbi->stat_lock); + ret = sbi->total_valid_inode_count; + spin_unlock(&sbi->stat_lock); + return ret; +} + +static inline void f2fs_put_page(struct page *page, int unlock) +{ + if (!page || IS_ERR(page)) + return; + + if (unlock) { + BUG_ON(!PageLocked(page)); + unlock_page(page); + } + page_cache_release(page); +} + +static inline void f2fs_put_dnode(struct dnode_of_data *dn) +{ + if (dn->node_page) + f2fs_put_page(dn->node_page, 1); + if (dn->inode_page && dn->node_page != dn->inode_page) + f2fs_put_page(dn->inode_page, 0); + dn->node_page = NULL; + dn->inode_page = NULL; +} + +static inline struct kmem_cache *f2fs_kmem_cache_create(const char *name, + size_t size, void (*ctor)(void *)) +{ + return kmem_cache_create(name, size, 0, SLAB_RECLAIM_ACCOUNT, ctor); +} + +#define RAW_IS_INODE(p) ((p)->footer.nid == (p)->footer.ino) + +static inline bool IS_INODE(struct page *page) +{ + struct f2fs_node *p = (struct f2fs_node *)page_address(page); + return RAW_IS_INODE(p); +} + +static inline __le32 *blkaddr_in_node(struct f2fs_node *node) +{ + return RAW_IS_INODE(node) ? node->i.i_addr : node->dn.addr; +} + +static inline block_t datablock_addr(struct page *node_page, + unsigned int offset) +{ + struct f2fs_node *raw_node; + __le32 *addr_array; + raw_node = (struct f2fs_node *)page_address(node_page); + addr_array = blkaddr_in_node(raw_node); + return le32_to_cpu(addr_array[offset]); +} + +static inline int f2fs_test_bit(unsigned int nr, char *addr) +{ + int mask; + + addr += (nr >> 3); + mask = 1 << (7 - (nr & 0x07)); + return mask & *addr; +} + +static inline int f2fs_set_bit(unsigned int nr, char *addr) +{ + int mask; + int ret; + + addr += (nr >> 3); + mask = 1 << (7 - (nr & 0x07)); + ret = mask & *addr; + *addr |= mask; + return ret; +} + +static inline int f2fs_clear_bit(unsigned int nr, char *addr) +{ + int mask; + int ret; + + addr += (nr >> 3); + mask = 1 << (7 - (nr & 0x07)); + ret = mask & *addr; + *addr &= ~mask; + return ret; +} + +/* used for f2fs_inode_info->flags */ +enum { + FI_NEW_INODE, /* indicate newly allocated inode */ + FI_NEED_CP, /* need to do checkpoint during fsync */ + FI_INC_LINK, /* need to increment i_nlink */ + FI_ACL_MODE, /* indicate acl mode */ + FI_NO_ALLOC, /* should not allocate any blocks */ +}; + +static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag) +{ + set_bit(flag, &fi->flags); +} + +static inline int is_inode_flag_set(struct f2fs_inode_info *fi, int flag) +{ + return test_bit(flag, &fi->flags); +} + +static inline void clear_inode_flag(struct f2fs_inode_info *fi, int flag) +{ + clear_bit(flag, &fi->flags); +} + +static inline void set_acl_inode(struct f2fs_inode_info *fi, umode_t mode) +{ + fi->i_acl_mode = mode; + set_inode_flag(fi, FI_ACL_MODE); +} + +static inline int cond_clear_inode_flag(struct f2fs_inode_info *fi, int flag) +{ + if (is_inode_flag_set(fi, FI_ACL_MODE)) { + clear_inode_flag(fi, FI_ACL_MODE); + return 1; + } + return 0; +} + +/* + * file.c + */ +int f2fs_sync_file(struct file *, loff_t, loff_t, int); +void truncate_data_blocks(struct dnode_of_data *); +void f2fs_truncate(struct inode *); +int f2fs_setattr(struct dentry *, struct iattr *); +int truncate_hole(struct inode *, pgoff_t, pgoff_t); +long f2fs_ioctl(struct file *, unsigned int, unsigned long); + +/* + * inode.c + */ +void f2fs_set_inode_flags(struct inode *); +struct inode *f2fs_iget_nowait(struct super_block *, unsigned long); +struct inode *f2fs_iget(struct super_block *, unsigned long); +void update_inode(struct inode *, struct page *); +int f2fs_write_inode(struct inode *, struct writeback_control *); +void f2fs_evict_inode(struct inode *); + +/* + * namei.c + */ +struct dentry *f2fs_get_parent(struct dentry *child); + +/* + * dir.c + */ +struct f2fs_dir_entry *f2fs_find_entry(struct inode *, struct qstr *, + struct page **); +struct f2fs_dir_entry *f2fs_parent_dir(struct inode *, struct page **); +ino_t f2fs_inode_by_name(struct inode *, struct qstr *); +void f2fs_set_link(struct inode *, struct f2fs_dir_entry *, + struct page *, struct inode *); +void init_dent_inode(struct dentry *, struct page *); +int f2fs_add_link(struct dentry *, struct inode *); +void f2fs_delete_entry(struct f2fs_dir_entry *, struct page *, struct inode *); +int f2fs_make_empty(struct inode *, struct inode *); +bool f2fs_empty_dir(struct inode *); + +/* + * super.c + */ +int f2fs_sync_fs(struct super_block *, int); + +/* + * hash.c + */ +f2fs_hash_t f2fs_dentry_hash(const char *, int); + +/* + * node.c + */ +struct dnode_of_data; +struct node_info; + +int is_checkpointed_node(struct f2fs_sb_info *, nid_t); +void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *); +int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int); +int truncate_inode_blocks(struct inode *, pgoff_t); +int remove_inode_page(struct inode *); +int new_inode_page(struct inode *, struct dentry *); +struct page *new_node_page(struct dnode_of_data *, unsigned int); +void ra_node_page(struct f2fs_sb_info *, nid_t); +struct page *get_node_page(struct f2fs_sb_info *, pgoff_t); +struct page *get_node_page_ra(struct page *, int); +void sync_inode_page(struct dnode_of_data *); +int sync_node_pages(struct f2fs_sb_info *, nid_t, struct writeback_control *); +bool alloc_nid(struct f2fs_sb_info *, nid_t *); +void alloc_nid_done(struct f2fs_sb_info *, nid_t); +void alloc_nid_failed(struct f2fs_sb_info *, nid_t); +void recover_node_page(struct f2fs_sb_info *, struct page *, + struct f2fs_summary *, struct node_info *, block_t); +int recover_inode_page(struct f2fs_sb_info *, struct page *); +int restore_node_summary(struct f2fs_sb_info *, unsigned int, + struct f2fs_summary_block *); +void flush_nat_entries(struct f2fs_sb_info *); +int build_node_manager(struct f2fs_sb_info *); +void destroy_node_manager(struct f2fs_sb_info *); +int create_node_manager_caches(void); +void destroy_node_manager_caches(void); + +/* + * segment.c + */ +void f2fs_balance_fs(struct f2fs_sb_info *); +void invalidate_blocks(struct f2fs_sb_info *, block_t); +void locate_dirty_segment(struct f2fs_sb_info *, unsigned int); +void clear_prefree_segments(struct f2fs_sb_info *); +int npages_for_summary_flush(struct f2fs_sb_info *); +void allocate_new_segments(struct f2fs_sb_info *); +struct page *get_sum_page(struct f2fs_sb_info *, unsigned int); +struct bio *f2fs_bio_alloc(struct block_device *, sector_t, int, gfp_t); +void f2fs_submit_bio(struct f2fs_sb_info *, enum page_type, bool sync); +int write_meta_page(struct f2fs_sb_info *, struct page *, + struct writeback_control *); +void write_node_page(struct f2fs_sb_info *, struct page *, unsigned int, + block_t, block_t *); +void write_data_page(struct inode *, struct page *, struct dnode_of_data*, + block_t, block_t *); +void rewrite_data_page(struct f2fs_sb_info *, struct page *, block_t); +void recover_data_page(struct f2fs_sb_info *, struct page *, + struct f2fs_summary *, block_t, block_t); +void rewrite_node_page(struct f2fs_sb_info *, struct page *, + struct f2fs_summary *, block_t, block_t); +void write_data_summaries(struct f2fs_sb_info *, block_t); +void write_node_summaries(struct f2fs_sb_info *, block_t); +int lookup_journal_in_cursum(struct f2fs_summary_block *, + int, unsigned int, int); +void flush_sit_entries(struct f2fs_sb_info *); +int build_segment_manager(struct f2fs_sb_info *); +void reset_victim_segmap(struct f2fs_sb_info *); +void destroy_segment_manager(struct f2fs_sb_info *); + +/* + * checkpoint.c + */ +struct page *grab_meta_page(struct f2fs_sb_info *, pgoff_t); +struct page *get_meta_page(struct f2fs_sb_info *, pgoff_t); +long sync_meta_pages(struct f2fs_sb_info *, enum page_type, long); +int check_orphan_space(struct f2fs_sb_info *); +void add_orphan_inode(struct f2fs_sb_info *, nid_t); +void remove_orphan_inode(struct f2fs_sb_info *, nid_t); +int recover_orphan_inodes(struct f2fs_sb_info *); +int get_valid_checkpoint(struct f2fs_sb_info *); +void set_dirty_dir_page(struct inode *, struct page *); +void remove_dirty_dir_inode(struct inode *); +void sync_dirty_dir_inodes(struct f2fs_sb_info *); +void block_operations(struct f2fs_sb_info *); +void write_checkpoint(struct f2fs_sb_info *, bool, bool); +void init_orphan_info(struct f2fs_sb_info *); +int create_checkpoint_caches(void); +void destroy_checkpoint_caches(void); + +/* + * data.c + */ +int reserve_new_block(struct dnode_of_data *); +void update_extent_cache(block_t, struct dnode_of_data *); +struct page *find_data_page(struct inode *, pgoff_t); +struct page *get_lock_data_page(struct inode *, pgoff_t); +struct page *get_new_data_page(struct inode *, pgoff_t, bool); +int f2fs_readpage(struct f2fs_sb_info *, struct page *, block_t, int); +int do_write_data_page(struct page *); + +/* + * gc.c + */ +int start_gc_thread(struct f2fs_sb_info *); +void stop_gc_thread(struct f2fs_sb_info *); +block_t start_bidx_of_node(unsigned int); +int f2fs_gc(struct f2fs_sb_info *, int); +void build_gc_manager(struct f2fs_sb_info *); +int create_gc_caches(void); +void destroy_gc_caches(void); + +/* + * recovery.c + */ +void recover_fsync_data(struct f2fs_sb_info *); +bool space_for_roll_forward(struct f2fs_sb_info *); + +/* + * debug.c + */ +#ifdef CONFIG_F2FS_STAT_FS +struct f2fs_stat_info { + struct list_head stat_list; + struct f2fs_sb_info *sbi; + struct mutex stat_lock; + int all_area_segs, sit_area_segs, nat_area_segs, ssa_area_segs; + int main_area_segs, main_area_sections, main_area_zones; + int hit_ext, total_ext; + int ndirty_node, ndirty_dent, ndirty_dirs, ndirty_meta; + int nats, sits, fnids; + int total_count, utilization; + int bg_gc; + unsigned int valid_count, valid_node_count, valid_inode_count; + unsigned int bimodal, avg_vblocks; + int util_free, util_valid, util_invalid; + int rsvd_segs, overp_segs; + int dirty_count, node_pages, meta_pages; + int prefree_count, call_count; + int tot_segs, node_segs, data_segs, free_segs, free_secs; + int tot_blks, data_blks, node_blks; + int curseg[NR_CURSEG_TYPE]; + int cursec[NR_CURSEG_TYPE]; + int curzone[NR_CURSEG_TYPE]; + + unsigned int segment_count[2]; + unsigned int block_count[2]; + unsigned base_mem, cache_mem; +}; + +#define stat_inc_call_count(si) ((si)->call_count++) + +#define stat_inc_seg_count(sbi, type) \ + do { \ + struct f2fs_stat_info *si = sbi->stat_info; \ + (si)->tot_segs++; \ + if (type == SUM_TYPE_DATA) \ + si->data_segs++; \ + else \ + si->node_segs++; \ + } while (0) + +#define stat_inc_tot_blk_count(si, blks) \ + (si->tot_blks += (blks)) + +#define stat_inc_data_blk_count(sbi, blks) \ + do { \ + struct f2fs_stat_info *si = sbi->stat_info; \ + stat_inc_tot_blk_count(si, blks); \ + si->data_blks += (blks); \ + } while (0) + +#define stat_inc_node_blk_count(sbi, blks) \ + do { \ + struct f2fs_stat_info *si = sbi->stat_info; \ + stat_inc_tot_blk_count(si, blks); \ + si->node_blks += (blks); \ + } while (0) + +int f2fs_build_stats(struct f2fs_sb_info *); +void f2fs_destroy_stats(struct f2fs_sb_info *); +void destroy_root_stats(void); +#else +#define stat_inc_call_count(si) +#define stat_inc_seg_count(si, type) +#define stat_inc_tot_blk_count(si, blks) +#define stat_inc_data_blk_count(si, blks) +#define stat_inc_node_blk_count(sbi, blks) + +static inline int f2fs_build_stats(struct f2fs_sb_info *sbi) { return 0; } +static inline void f2fs_destroy_stats(struct f2fs_sb_info *sbi) { } +static inline void destroy_root_stats(void) { } +#endif + +extern const struct file_operations f2fs_dir_operations; +extern const struct file_operations f2fs_file_operations; +extern const struct inode_operations f2fs_file_inode_operations; +extern const struct address_space_operations f2fs_dblock_aops; +extern const struct address_space_operations f2fs_node_aops; +extern const struct address_space_operations f2fs_meta_aops; +extern const struct inode_operations f2fs_dir_inode_operations; +extern const struct inode_operations f2fs_symlink_inode_operations; +extern const struct inode_operations f2fs_special_inode_operations; +#endif diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h new file mode 100644 index 000000000000..5d525ed312ba --- /dev/null +++ b/fs/f2fs/node.h @@ -0,0 +1,353 @@ +/** + * fs/f2fs/node.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +/* start node id of a node block dedicated to the given node id */ +#define START_NID(nid) ((nid / NAT_ENTRY_PER_BLOCK) * NAT_ENTRY_PER_BLOCK) + +/* node block offset on the NAT area dedicated to the given start node id */ +#define NAT_BLOCK_OFFSET(start_nid) (start_nid / NAT_ENTRY_PER_BLOCK) + +/* # of pages to perform readahead before building free nids */ +#define FREE_NID_PAGES 4 + +/* maximum # of free node ids to produce during build_free_nids */ +#define MAX_FREE_NIDS (NAT_ENTRY_PER_BLOCK * FREE_NID_PAGES) + +/* maximum readahead size for node during getting data blocks */ +#define MAX_RA_NODE 128 + +/* maximum cached nat entries to manage memory footprint */ +#define NM_WOUT_THRESHOLD (64 * NAT_ENTRY_PER_BLOCK) + +/* vector size for gang look-up from nat cache that consists of radix tree */ +#define NATVEC_SIZE 64 + +/* + * For node information + */ +struct node_info { + nid_t nid; /* node id */ + nid_t ino; /* inode number of the node's owner */ + block_t blk_addr; /* block address of the node */ + unsigned char version; /* version of the node */ +}; + +struct nat_entry { + struct list_head list; /* for clean or dirty nat list */ + bool checkpointed; /* whether it is checkpointed or not */ + struct node_info ni; /* in-memory node information */ +}; + +#define nat_get_nid(nat) (nat->ni.nid) +#define nat_set_nid(nat, n) (nat->ni.nid = n) +#define nat_get_blkaddr(nat) (nat->ni.blk_addr) +#define nat_set_blkaddr(nat, b) (nat->ni.blk_addr = b) +#define nat_get_ino(nat) (nat->ni.ino) +#define nat_set_ino(nat, i) (nat->ni.ino = i) +#define nat_get_version(nat) (nat->ni.version) +#define nat_set_version(nat, v) (nat->ni.version = v) + +#define __set_nat_cache_dirty(nm_i, ne) \ + list_move_tail(&ne->list, &nm_i->dirty_nat_entries); +#define __clear_nat_cache_dirty(nm_i, ne) \ + list_move_tail(&ne->list, &nm_i->nat_entries); +#define inc_node_version(version) (++version) + +static inline void node_info_from_raw_nat(struct node_info *ni, + struct f2fs_nat_entry *raw_ne) +{ + ni->ino = le32_to_cpu(raw_ne->ino); + ni->blk_addr = le32_to_cpu(raw_ne->block_addr); + ni->version = raw_ne->version; +} + +/* + * For free nid mangement + */ +enum nid_state { + NID_NEW, /* newly added to free nid list */ + NID_ALLOC /* it is allocated */ +}; + +struct free_nid { + struct list_head list; /* for free node id list */ + nid_t nid; /* node id */ + int state; /* in use or not: NID_NEW or NID_ALLOC */ +}; + +static inline int next_free_nid(struct f2fs_sb_info *sbi, nid_t *nid) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct free_nid *fnid; + + if (nm_i->fcnt <= 0) + return -1; + spin_lock(&nm_i->free_nid_list_lock); + fnid = list_entry(nm_i->free_nid_list.next, struct free_nid, list); + *nid = fnid->nid; + spin_unlock(&nm_i->free_nid_list_lock); + return 0; +} + +/* + * inline functions + */ +static inline void get_nat_bitmap(struct f2fs_sb_info *sbi, void *addr) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + memcpy(addr, nm_i->nat_bitmap, nm_i->bitmap_size); +} + +static inline pgoff_t current_nat_addr(struct f2fs_sb_info *sbi, nid_t start) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + pgoff_t block_off; + pgoff_t block_addr; + int seg_off; + + block_off = NAT_BLOCK_OFFSET(start); + seg_off = block_off >> sbi->log_blocks_per_seg; + + block_addr = (pgoff_t)(nm_i->nat_blkaddr + + (seg_off << sbi->log_blocks_per_seg << 1) + + (block_off & ((1 << sbi->log_blocks_per_seg) - 1))); + + if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) + block_addr += sbi->blocks_per_seg; + + return block_addr; +} + +static inline pgoff_t next_nat_addr(struct f2fs_sb_info *sbi, + pgoff_t block_addr) +{ + struct f2fs_nm_info *nm_i = NM_I(sbi); + + block_addr -= nm_i->nat_blkaddr; + if ((block_addr >> sbi->log_blocks_per_seg) % 2) + block_addr -= sbi->blocks_per_seg; + else + block_addr += sbi->blocks_per_seg; + + return block_addr + nm_i->nat_blkaddr; +} + +static inline void set_to_next_nat(struct f2fs_nm_info *nm_i, nid_t start_nid) +{ + unsigned int block_off = NAT_BLOCK_OFFSET(start_nid); + + if (f2fs_test_bit(block_off, nm_i->nat_bitmap)) + f2fs_clear_bit(block_off, nm_i->nat_bitmap); + else + f2fs_set_bit(block_off, nm_i->nat_bitmap); +} + +static inline void fill_node_footer(struct page *page, nid_t nid, + nid_t ino, unsigned int ofs, bool reset) +{ + void *kaddr = page_address(page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + if (reset) + memset(rn, 0, sizeof(*rn)); + rn->footer.nid = cpu_to_le32(nid); + rn->footer.ino = cpu_to_le32(ino); + rn->footer.flag = cpu_to_le32(ofs << OFFSET_BIT_SHIFT); +} + +static inline void copy_node_footer(struct page *dst, struct page *src) +{ + void *src_addr = page_address(src); + void *dst_addr = page_address(dst); + struct f2fs_node *src_rn = (struct f2fs_node *)src_addr; + struct f2fs_node *dst_rn = (struct f2fs_node *)dst_addr; + memcpy(&dst_rn->footer, &src_rn->footer, sizeof(struct node_footer)); +} + +static inline void fill_node_footer_blkaddr(struct page *page, block_t blkaddr) +{ + struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); + struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi); + void *kaddr = page_address(page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + rn->footer.cp_ver = ckpt->checkpoint_ver; + rn->footer.next_blkaddr = blkaddr; +} + +static inline nid_t ino_of_node(struct page *node_page) +{ + void *kaddr = page_address(node_page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + return le32_to_cpu(rn->footer.ino); +} + +static inline nid_t nid_of_node(struct page *node_page) +{ + void *kaddr = page_address(node_page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + return le32_to_cpu(rn->footer.nid); +} + +static inline unsigned int ofs_of_node(struct page *node_page) +{ + void *kaddr = page_address(node_page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + unsigned flag = le32_to_cpu(rn->footer.flag); + return flag >> OFFSET_BIT_SHIFT; +} + +static inline unsigned long long cpver_of_node(struct page *node_page) +{ + void *kaddr = page_address(node_page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + return le64_to_cpu(rn->footer.cp_ver); +} + +static inline block_t next_blkaddr_of_node(struct page *node_page) +{ + void *kaddr = page_address(node_page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + return le32_to_cpu(rn->footer.next_blkaddr); +} + +/* + * f2fs assigns the following node offsets described as (num). + * N = NIDS_PER_BLOCK + * + * Inode block (0) + * |- direct node (1) + * |- direct node (2) + * |- indirect node (3) + * | `- direct node (4 => 4 + N - 1) + * |- indirect node (4 + N) + * | `- direct node (5 + N => 5 + 2N - 1) + * `- double indirect node (5 + 2N) + * `- indirect node (6 + 2N) + * `- direct node (x(N + 1)) + */ +static inline bool IS_DNODE(struct page *node_page) +{ + unsigned int ofs = ofs_of_node(node_page); + if (ofs == 3 || ofs == 4 + NIDS_PER_BLOCK || + ofs == 5 + 2 * NIDS_PER_BLOCK) + return false; + if (ofs >= 6 + 2 * NIDS_PER_BLOCK) { + ofs -= 6 + 2 * NIDS_PER_BLOCK; + if ((long int)ofs % (NIDS_PER_BLOCK + 1)) + return false; + } + return true; +} + +static inline void set_nid(struct page *p, int off, nid_t nid, bool i) +{ + struct f2fs_node *rn = (struct f2fs_node *)page_address(p); + + wait_on_page_writeback(p); + + if (i) + rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid); + else + rn->in.nid[off] = cpu_to_le32(nid); + set_page_dirty(p); +} + +static inline nid_t get_nid(struct page *p, int off, bool i) +{ + struct f2fs_node *rn = (struct f2fs_node *)page_address(p); + if (i) + return le32_to_cpu(rn->i.i_nid[off - NODE_DIR1_BLOCK]); + return le32_to_cpu(rn->in.nid[off]); +} + +/* + * Coldness identification: + * - Mark cold files in f2fs_inode_info + * - Mark cold node blocks in their node footer + * - Mark cold data pages in page cache + */ +static inline int is_cold_file(struct inode *inode) +{ + return F2FS_I(inode)->i_advise & FADVISE_COLD_BIT; +} + +static inline int is_cold_data(struct page *page) +{ + return PageChecked(page); +} + +static inline void set_cold_data(struct page *page) +{ + SetPageChecked(page); +} + +static inline void clear_cold_data(struct page *page) +{ + ClearPageChecked(page); +} + +static inline int is_cold_node(struct page *page) +{ + void *kaddr = page_address(page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + unsigned int flag = le32_to_cpu(rn->footer.flag); + return flag & (0x1 << COLD_BIT_SHIFT); +} + +static inline unsigned char is_fsync_dnode(struct page *page) +{ + void *kaddr = page_address(page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + unsigned int flag = le32_to_cpu(rn->footer.flag); + return flag & (0x1 << FSYNC_BIT_SHIFT); +} + +static inline unsigned char is_dent_dnode(struct page *page) +{ + void *kaddr = page_address(page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + unsigned int flag = le32_to_cpu(rn->footer.flag); + return flag & (0x1 << DENT_BIT_SHIFT); +} + +static inline void set_cold_node(struct inode *inode, struct page *page) +{ + struct f2fs_node *rn = (struct f2fs_node *)page_address(page); + unsigned int flag = le32_to_cpu(rn->footer.flag); + + if (S_ISDIR(inode->i_mode)) + flag &= ~(0x1 << COLD_BIT_SHIFT); + else + flag |= (0x1 << COLD_BIT_SHIFT); + rn->footer.flag = cpu_to_le32(flag); +} + +static inline void set_fsync_mark(struct page *page, int mark) +{ + void *kaddr = page_address(page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + unsigned int flag = le32_to_cpu(rn->footer.flag); + if (mark) + flag |= (0x1 << FSYNC_BIT_SHIFT); + else + flag &= ~(0x1 << FSYNC_BIT_SHIFT); + rn->footer.flag = cpu_to_le32(flag); +} + +static inline void set_dentry_mark(struct page *page, int mark) +{ + void *kaddr = page_address(page); + struct f2fs_node *rn = (struct f2fs_node *)kaddr; + unsigned int flag = le32_to_cpu(rn->footer.flag); + if (mark) + flag |= (0x1 << DENT_BIT_SHIFT); + else + flag &= ~(0x1 << DENT_BIT_SHIFT); + rn->footer.flag = cpu_to_le32(flag); +} diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h new file mode 100644 index 000000000000..e380a8ef13f5 --- /dev/null +++ b/fs/f2fs/segment.h @@ -0,0 +1,615 @@ +/** + * fs/f2fs/segment.h + * + * Copyright (c) 2012 Samsung Electronics Co., Ltd. + * http://www.samsung.com/ + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +/* constant macro */ +#define NULL_SEGNO ((unsigned int)(~0)) + +/* V: Logical segment # in volume, R: Relative segment # in main area */ +#define GET_L2R_SEGNO(free_i, segno) (segno - free_i->start_segno) +#define GET_R2L_SEGNO(free_i, segno) (segno + free_i->start_segno) + +#define IS_DATASEG(t) \ + ((t == CURSEG_HOT_DATA) || (t == CURSEG_COLD_DATA) || \ + (t == CURSEG_WARM_DATA)) + +#define IS_NODESEG(t) \ + ((t == CURSEG_HOT_NODE) || (t == CURSEG_COLD_NODE) || \ + (t == CURSEG_WARM_NODE)) + +#define IS_CURSEG(sbi, segno) \ + ((segno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno) || \ + (segno == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno) || \ + (segno == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno) || \ + (segno == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno) || \ + (segno == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno) || \ + (segno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno)) + +#define IS_CURSEC(sbi, secno) \ + ((secno == CURSEG_I(sbi, CURSEG_HOT_DATA)->segno / \ + sbi->segs_per_sec) || \ + (secno == CURSEG_I(sbi, CURSEG_WARM_DATA)->segno / \ + sbi->segs_per_sec) || \ + (secno == CURSEG_I(sbi, CURSEG_COLD_DATA)->segno / \ + sbi->segs_per_sec) || \ + (secno == CURSEG_I(sbi, CURSEG_HOT_NODE)->segno / \ + sbi->segs_per_sec) || \ + (secno == CURSEG_I(sbi, CURSEG_WARM_NODE)->segno / \ + sbi->segs_per_sec) || \ + (secno == CURSEG_I(sbi, CURSEG_COLD_NODE)->segno / \ + sbi->segs_per_sec)) \ + +#define START_BLOCK(sbi, segno) \ + (SM_I(sbi)->seg0_blkaddr + \ + (GET_R2L_SEGNO(FREE_I(sbi), segno) << sbi->log_blocks_per_seg)) +#define NEXT_FREE_BLKADDR(sbi, curseg) \ + (START_BLOCK(sbi, curseg->segno) + curseg->next_blkoff) + +#define MAIN_BASE_BLOCK(sbi) (SM_I(sbi)->main_blkaddr) + +#define GET_SEGOFF_FROM_SEG0(sbi, blk_addr) \ + ((blk_addr) - SM_I(sbi)->seg0_blkaddr) +#define GET_SEGNO_FROM_SEG0(sbi, blk_addr) \ + (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) >> sbi->log_blocks_per_seg) +#define GET_SEGNO(sbi, blk_addr) \ + (((blk_addr == NULL_ADDR) || (blk_addr == NEW_ADDR)) ? \ + NULL_SEGNO : GET_L2R_SEGNO(FREE_I(sbi), \ + GET_SEGNO_FROM_SEG0(sbi, blk_addr))) +#define GET_SECNO(sbi, segno) \ + ((segno) / sbi->segs_per_sec) +#define GET_ZONENO_FROM_SEGNO(sbi, segno) \ + ((segno / sbi->segs_per_sec) / sbi->secs_per_zone) + +#define GET_SUM_BLOCK(sbi, segno) \ + ((sbi->sm_info->ssa_blkaddr) + segno) + +#define GET_SUM_TYPE(footer) ((footer)->entry_type) +#define SET_SUM_TYPE(footer, type) ((footer)->entry_type = type) + +#define SIT_ENTRY_OFFSET(sit_i, segno) \ + (segno % sit_i->sents_per_block) +#define SIT_BLOCK_OFFSET(sit_i, segno) \ + (segno / SIT_ENTRY_PER_BLOCK) +#define START_SEGNO(sit_i, segno) \ + (SIT_BLOCK_OFFSET(sit_i, segno) * SIT_ENTRY_PER_BLOCK) +#define f2fs_bitmap_size(nr) \ + (BITS_TO_LONGS(nr) * sizeof(unsigned long)) +#define TOTAL_SEGS(sbi) (SM_I(sbi)->main_segments) + +/* during checkpoint, bio_private is used to synchronize the last bio */ +struct bio_private { + struct f2fs_sb_info *sbi; + bool is_sync; + void *wait; +}; + +/* + * indicate a block allocation direction: RIGHT and LEFT. + * RIGHT means allocating new sections towards the end of volume. + * LEFT means the opposite direction. + */ +enum { + ALLOC_RIGHT = 0, + ALLOC_LEFT +}; + +/* + * In the victim_sel_policy->alloc_mode, there are two block allocation modes. + * LFS writes data sequentially with cleaning operations. + * SSR (Slack Space Recycle) reuses obsolete space without cleaning operations. + */ +enum { + LFS = 0, + SSR +}; + +/* + * In the victim_sel_policy->gc_mode, there are two gc, aka cleaning, modes. + * GC_CB is based on cost-benefit algorithm. + * GC_GREEDY is based on greedy algorithm. + */ +enum { + GC_CB = 0, + GC_GREEDY +}; + +/* + * BG_GC means the background cleaning job. + * FG_GC means the on-demand cleaning job. + */ +enum { + BG_GC = 0, + FG_GC +}; + +/* for a function parameter to select a victim segment */ +struct victim_sel_policy { + int alloc_mode; /* LFS or SSR */ + int gc_mode; /* GC_CB or GC_GREEDY */ + unsigned long *dirty_segmap; /* dirty segment bitmap */ + unsigned int offset; /* last scanned bitmap offset */ + unsigned int ofs_unit; /* bitmap search unit */ + unsigned int min_cost; /* minimum cost */ + unsigned int min_segno; /* segment # having min. cost */ +}; + +struct seg_entry { + unsigned short valid_blocks; /* # of valid blocks */ + unsigned char *cur_valid_map; /* validity bitmap of blocks */ + /* + * # of valid blocks and the validity bitmap stored in the the last + * checkpoint pack. This information is used by the SSR mode. + */ + unsigned short ckpt_valid_blocks; + unsigned char *ckpt_valid_map; + unsigned char type; /* segment type like CURSEG_XXX_TYPE */ + unsigned long long mtime; /* modification time of the segment */ +}; + +struct sec_entry { + unsigned int valid_blocks; /* # of valid blocks in a section */ +}; + +struct segment_allocation { + void (*allocate_segment)(struct f2fs_sb_info *, int, bool); +}; + +struct sit_info { + const struct segment_allocation *s_ops; + + block_t sit_base_addr; /* start block address of SIT area */ + block_t sit_blocks; /* # of blocks used by SIT area */ + block_t written_valid_blocks; /* # of valid blocks in main area */ + char *sit_bitmap; /* SIT bitmap pointer */ + unsigned int bitmap_size; /* SIT bitmap size */ + + unsigned long *dirty_sentries_bitmap; /* bitmap for dirty sentries */ + unsigned int dirty_sentries; /* # of dirty sentries */ + unsigned int sents_per_block; /* # of SIT entries per block */ + struct mutex sentry_lock; /* to protect SIT cache */ + struct seg_entry *sentries; /* SIT segment-level cache */ + struct sec_entry *sec_entries; /* SIT section-level cache */ + + /* for cost-benefit algorithm in cleaning procedure */ + unsigned long long elapsed_time; /* elapsed time after mount */ + unsigned long long mounted_time; /* mount time */ + unsigned long long min_mtime; /* min. modification time */ + unsigned long long max_mtime; /* max. modification time */ +}; + +struct free_segmap_info { + unsigned int start_segno; /* start segment number logically */ + unsigned int free_segments; /* # of free segments */ + unsigned int free_sections; /* # of free sections */ + rwlock_t segmap_lock; /* free segmap lock */ + unsigned long *free_segmap; /* free segment bitmap */ + unsigned long *free_secmap; /* free section bitmap */ +}; + +/* Notice: The order of dirty type is same with CURSEG_XXX in f2fs.h */ +enum dirty_type { + DIRTY_HOT_DATA, /* dirty segments assigned as hot data logs */ + DIRTY_WARM_DATA, /* dirty segments assigned as warm data logs */ + DIRTY_COLD_DATA, /* dirty segments assigned as cold data logs */ + DIRTY_HOT_NODE, /* dirty segments assigned as hot node logs */ + DIRTY_WARM_NODE, /* dirty segments assigned as warm node logs */ + DIRTY_COLD_NODE, /* dirty segments assigned as cold node logs */ + DIRTY, /* to count # of dirty segments */ + PRE, /* to count # of entirely obsolete segments */ + NR_DIRTY_TYPE +}; + +struct dirty_seglist_info { + const struct victim_selection *v_ops; /* victim selction operation */ + unsigned long *dirty_segmap[NR_DIRTY_TYPE]; + struct mutex seglist_lock; /* lock for segment bitmaps */ + int nr_dirty[NR_DIRTY_TYPE]; /* # of dirty segments */ + unsigned long *victim_segmap[2]; /* BG_GC, FG_GC */ +}; + +/* victim selection function for cleaning and SSR */ +struct victim_selection { + int (*get_victim)(struct f2fs_sb_info *, unsigned int *, + int, int, char); +}; + +/* for active log information */ +struct curseg_info { + struct mutex curseg_mutex; /* lock for consistency */ + struct f2fs_summary_block *sum_blk; /* cached summary block */ + unsigned char alloc_type; /* current allocation type */ + unsigned int segno; /* current segment number */ + unsigned short next_blkoff; /* next block offset to write */ + unsigned int zone; /* current zone number */ + unsigned int next_segno; /* preallocated segment */ +}; + +/* + * inline functions + */ +static inline struct curseg_info *CURSEG_I(struct f2fs_sb_info *sbi, int type) +{ + return (struct curseg_info *)(SM_I(sbi)->curseg_array + type); +} + +static inline struct seg_entry *get_seg_entry(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct sit_info *sit_i = SIT_I(sbi); + return &sit_i->sentries[segno]; +} + +static inline struct sec_entry *get_sec_entry(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct sit_info *sit_i = SIT_I(sbi); + return &sit_i->sec_entries[GET_SECNO(sbi, segno)]; +} + +static inline unsigned int get_valid_blocks(struct f2fs_sb_info *sbi, + unsigned int segno, int section) +{ + /* + * In order to get # of valid blocks in a section instantly from many + * segments, f2fs manages two counting structures separately. + */ + if (section > 1) + return get_sec_entry(sbi, segno)->valid_blocks; + else + return get_seg_entry(sbi, segno)->valid_blocks; +} + +static inline void seg_info_from_raw_sit(struct seg_entry *se, + struct f2fs_sit_entry *rs) +{ + se->valid_blocks = GET_SIT_VBLOCKS(rs); + se->ckpt_valid_blocks = GET_SIT_VBLOCKS(rs); + memcpy(se->cur_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); + memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); + se->type = GET_SIT_TYPE(rs); + se->mtime = le64_to_cpu(rs->mtime); +} + +static inline void seg_info_to_raw_sit(struct seg_entry *se, + struct f2fs_sit_entry *rs) +{ + unsigned short raw_vblocks = (se->type << SIT_VBLOCKS_SHIFT) | + se->valid_blocks; + rs->vblocks = cpu_to_le16(raw_vblocks); + memcpy(rs->valid_map, se->cur_valid_map, SIT_VBLOCK_MAP_SIZE); + memcpy(se->ckpt_valid_map, rs->valid_map, SIT_VBLOCK_MAP_SIZE); + se->ckpt_valid_blocks = se->valid_blocks; + rs->mtime = cpu_to_le64(se->mtime); +} + +static inline unsigned int find_next_inuse(struct free_segmap_info *free_i, + unsigned int max, unsigned int segno) +{ + unsigned int ret; + read_lock(&free_i->segmap_lock); + ret = find_next_bit(free_i->free_segmap, max, segno); + read_unlock(&free_i->segmap_lock); + return ret; +} + +static inline void __set_free(struct f2fs_sb_info *sbi, unsigned int segno) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int secno = segno / sbi->segs_per_sec; + unsigned int start_segno = secno * sbi->segs_per_sec; + unsigned int next; + + write_lock(&free_i->segmap_lock); + clear_bit(segno, free_i->free_segmap); + free_i->free_segments++; + + next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), start_segno); + if (next >= start_segno + sbi->segs_per_sec) { + clear_bit(secno, free_i->free_secmap); + free_i->free_sections++; + } + write_unlock(&free_i->segmap_lock); +} + +static inline void __set_inuse(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int secno = segno / sbi->segs_per_sec; + set_bit(segno, free_i->free_segmap); + free_i->free_segments--; + if (!test_and_set_bit(secno, free_i->free_secmap)) + free_i->free_sections--; +} + +static inline void __set_test_and_free(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int secno = segno / sbi->segs_per_sec; + unsigned int start_segno = secno * sbi->segs_per_sec; + unsigned int next; + + write_lock(&free_i->segmap_lock); + if (test_and_clear_bit(segno, free_i->free_segmap)) { + free_i->free_segments++; + + next = find_next_bit(free_i->free_segmap, TOTAL_SEGS(sbi), + start_segno); + if (next >= start_segno + sbi->segs_per_sec) { + if (test_and_clear_bit(secno, free_i->free_secmap)) + free_i->free_sections++; + } + } + write_unlock(&free_i->segmap_lock); +} + +static inline void __set_test_and_inuse(struct f2fs_sb_info *sbi, + unsigned int segno) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int secno = segno / sbi->segs_per_sec; + write_lock(&free_i->segmap_lock); + if (!test_and_set_bit(segno, free_i->free_segmap)) { + free_i->free_segments--; + if (!test_and_set_bit(secno, free_i->free_secmap)) + free_i->free_sections--; + } + write_unlock(&free_i->segmap_lock); +} + +static inline void get_sit_bitmap(struct f2fs_sb_info *sbi, + void *dst_addr) +{ + struct sit_info *sit_i = SIT_I(sbi); + memcpy(dst_addr, sit_i->sit_bitmap, sit_i->bitmap_size); +} + +static inline block_t written_block_count(struct f2fs_sb_info *sbi) +{ + struct sit_info *sit_i = SIT_I(sbi); + block_t vblocks; + + mutex_lock(&sit_i->sentry_lock); + vblocks = sit_i->written_valid_blocks; + mutex_unlock(&sit_i->sentry_lock); + + return vblocks; +} + +static inline unsigned int free_segments(struct f2fs_sb_info *sbi) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int free_segs; + + read_lock(&free_i->segmap_lock); + free_segs = free_i->free_segments; + read_unlock(&free_i->segmap_lock); + + return free_segs; +} + +static inline int reserved_segments(struct f2fs_sb_info *sbi) +{ + return SM_I(sbi)->reserved_segments; +} + +static inline unsigned int free_sections(struct f2fs_sb_info *sbi) +{ + struct free_segmap_info *free_i = FREE_I(sbi); + unsigned int free_secs; + + read_lock(&free_i->segmap_lock); + free_secs = free_i->free_sections; + read_unlock(&free_i->segmap_lock); + + return free_secs; +} + +static inline unsigned int prefree_segments(struct f2fs_sb_info *sbi) +{ + return DIRTY_I(sbi)->nr_dirty[PRE]; +} + +static inline unsigned int dirty_segments(struct f2fs_sb_info *sbi) +{ + return DIRTY_I(sbi)->nr_dirty[DIRTY_HOT_DATA] + + DIRTY_I(sbi)->nr_dirty[DIRTY_WARM_DATA] + + DIRTY_I(sbi)->nr_dirty[DIRTY_COLD_DATA] + + DIRTY_I(sbi)->nr_dirty[DIRTY_HOT_NODE] + + DIRTY_I(sbi)->nr_dirty[DIRTY_WARM_NODE] + + DIRTY_I(sbi)->nr_dirty[DIRTY_COLD_NODE]; +} + +static inline int overprovision_segments(struct f2fs_sb_info *sbi) +{ + return SM_I(sbi)->ovp_segments; +} + +static inline int overprovision_sections(struct f2fs_sb_info *sbi) +{ + return ((unsigned int) overprovision_segments(sbi)) / sbi->segs_per_sec; +} + +static inline int reserved_sections(struct f2fs_sb_info *sbi) +{ + return ((unsigned int) reserved_segments(sbi)) / sbi->segs_per_sec; +} + +static inline bool need_SSR(struct f2fs_sb_info *sbi) +{ + return (free_sections(sbi) < overprovision_sections(sbi)); +} + +static inline int get_ssr_segment(struct f2fs_sb_info *sbi, int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + return DIRTY_I(sbi)->v_ops->get_victim(sbi, + &(curseg)->next_segno, BG_GC, type, SSR); +} + +static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi) +{ + return free_sections(sbi) <= reserved_sections(sbi); +} + +static inline int utilization(struct f2fs_sb_info *sbi) +{ + return (long int)valid_user_blocks(sbi) * 100 / + (long int)sbi->user_block_count; +} + +/* + * Sometimes f2fs may be better to drop out-of-place update policy. + * So, if fs utilization is over MIN_IPU_UTIL, then f2fs tries to write + * data in the original place likewise other traditional file systems. + * But, currently set 100 in percentage, which means it is disabled. + * See below need_inplace_update(). + */ +#define MIN_IPU_UTIL 100 +static inline bool need_inplace_update(struct inode *inode) +{ + struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + if (S_ISDIR(inode->i_mode)) + return false; + if (need_SSR(sbi) && utilization(sbi) > MIN_IPU_UTIL) + return true; + return false; +} + +static inline unsigned int curseg_segno(struct f2fs_sb_info *sbi, + int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + return curseg->segno; +} + +static inline unsigned char curseg_alloc_type(struct f2fs_sb_info *sbi, + int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + return curseg->alloc_type; +} + +static inline unsigned short curseg_blkoff(struct f2fs_sb_info *sbi, int type) +{ + struct curseg_info *curseg = CURSEG_I(sbi, type); + return curseg->next_blkoff; +} + +static inline void check_seg_range(struct f2fs_sb_info *sbi, unsigned int segno) +{ + unsigned int end_segno = SM_I(sbi)->segment_count - 1; + BUG_ON(segno > end_segno); +} + +/* + * This function is used for only debugging. + * NOTE: In future, we have to remove this function. + */ +static inline void verify_block_addr(struct f2fs_sb_info *sbi, block_t blk_addr) +{ + struct f2fs_sm_info *sm_info = SM_I(sbi); + block_t total_blks = sm_info->segment_count << sbi->log_blocks_per_seg; + block_t start_addr = sm_info->seg0_blkaddr; + block_t end_addr = start_addr + total_blks - 1; + BUG_ON(blk_addr < start_addr); + BUG_ON(blk_addr > end_addr); +} + +/* + * Summary block is always treated as invalid block + */ +static inline void check_block_count(struct f2fs_sb_info *sbi, + int segno, struct f2fs_sit_entry *raw_sit) +{ + struct f2fs_sm_info *sm_info = SM_I(sbi); + unsigned int end_segno = sm_info->segment_count - 1; + int valid_blocks = 0; + int i; + + /* check segment usage */ + BUG_ON(GET_SIT_VBLOCKS(raw_sit) > sbi->blocks_per_seg); + + /* check boundary of a given segment number */ + BUG_ON(segno > end_segno); + + /* check bitmap with valid block count */ + for (i = 0; i < sbi->blocks_per_seg; i++) + if (f2fs_test_bit(i, raw_sit->valid_map)) + valid_blocks++; + BUG_ON(GET_SIT_VBLOCKS(raw_sit) != valid_blocks); +} + +static inline pgoff_t current_sit_addr(struct f2fs_sb_info *sbi, + unsigned int start) +{ + struct sit_info *sit_i = SIT_I(sbi); + unsigned int offset = SIT_BLOCK_OFFSET(sit_i, start); + block_t blk_addr = sit_i->sit_base_addr + offset; + + check_seg_range(sbi, start); + + /* calculate sit block address */ + if (f2fs_test_bit(offset, sit_i->sit_bitmap)) + blk_addr += sit_i->sit_blocks; + + return blk_addr; +} + +static inline pgoff_t next_sit_addr(struct f2fs_sb_info *sbi, + pgoff_t block_addr) +{ + struct sit_info *sit_i = SIT_I(sbi); + block_addr -= sit_i->sit_base_addr; + if (block_addr < sit_i->sit_blocks) + block_addr += sit_i->sit_blocks; + else + block_addr -= sit_i->sit_blocks; + + return block_addr + sit_i->sit_base_addr; +} + +static inline void set_to_next_sit(struct sit_info *sit_i, unsigned int start) +{ + unsigned int block_off = SIT_BLOCK_OFFSET(sit_i, start); + + if (f2fs_test_bit(block_off, sit_i->sit_bitmap)) + f2fs_clear_bit(block_off, sit_i->sit_bitmap); + else + f2fs_set_bit(block_off, sit_i->sit_bitmap); +} + +static inline unsigned long long get_mtime(struct f2fs_sb_info *sbi) +{ + struct sit_info *sit_i = SIT_I(sbi); + return sit_i->elapsed_time + CURRENT_TIME_SEC.tv_sec - + sit_i->mounted_time; +} + +static inline void set_summary(struct f2fs_summary *sum, nid_t nid, + unsigned int ofs_in_node, unsigned char version) +{ + sum->nid = cpu_to_le32(nid); + sum->ofs_in_node = cpu_to_le16(ofs_in_node); + sum->version = version; +} + +static inline block_t start_sum_block(struct f2fs_sb_info *sbi) +{ + return __start_cp_addr(sbi) + + le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_start_sum); +} + +static inline block_t sum_blk_addr(struct f2fs_sb_info *sbi, int base, int type) +{ + return __start_cp_addr(sbi) + + le32_to_cpu(F2FS_CKPT(sbi)->cp_pack_total_block_count) + - (base + 1) + type; +} diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h index e15192cb9cf4..66353ffd06a7 100644 --- a/include/uapi/linux/magic.h +++ b/include/uapi/linux/magic.h @@ -23,6 +23,7 @@ #define EXT4_SUPER_MAGIC 0xEF53 #define BTRFS_SUPER_MAGIC 0x9123683E #define NILFS_SUPER_MAGIC 0x3434 +#define F2FS_SUPER_MAGIC 0xF2F52010 #define HPFS_SUPER_MAGIC 0xf995e849 #define ISOFS_SUPER_MAGIC 0x9660 #define JFFS2_SUPER_MAGIC 0x72b6 -- cgit v1.2.3 From 479e2802d09f1e18a97262c4c6f8f17ae5884bd8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Oct 2012 14:16:28 +0200 Subject: mm: mempolicy: Make MPOL_LOCAL a real policy Make MPOL_LOCAL a real and exposed policy such that applications that relied on the previous default behaviour can explicitly request it. Requested-by: Christoph Lameter Reviewed-by: Rik van Riel Cc: Lee Schermerhorn Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar Signed-off-by: Mel Gorman --- include/uapi/linux/mempolicy.h | 1 + mm/mempolicy.c | 9 ++++++--- 2 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h index 23e62e0537e2..3e835c9d847b 100644 --- a/include/uapi/linux/mempolicy.h +++ b/include/uapi/linux/mempolicy.h @@ -20,6 +20,7 @@ enum { MPOL_PREFERRED, MPOL_BIND, MPOL_INTERLEAVE, + MPOL_LOCAL, MPOL_MAX, /* always last member of enum */ }; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 66e90ecc2350..54bd3e5ed776 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -269,6 +269,10 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, (flags & MPOL_F_RELATIVE_NODES))) return ERR_PTR(-EINVAL); } + } else if (mode == MPOL_LOCAL) { + if (!nodes_empty(*nodes)) + return ERR_PTR(-EINVAL); + mode = MPOL_PREFERRED; } else if (nodes_empty(*nodes)) return ERR_PTR(-EINVAL); policy = kmem_cache_alloc(policy_cache, GFP_KERNEL); @@ -2399,7 +2403,6 @@ void numa_default_policy(void) * "local" is pseudo-policy: MPOL_PREFERRED with MPOL_F_LOCAL flag * Used only for mpol_parse_str() and mpol_to_str() */ -#define MPOL_LOCAL MPOL_MAX static const char * const policy_modes[] = { [MPOL_DEFAULT] = "default", @@ -2452,12 +2455,12 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) if (flags) *flags++ = '\0'; /* terminate mode string */ - for (mode = 0; mode <= MPOL_LOCAL; mode++) { + for (mode = 0; mode < MPOL_MAX; mode++) { if (!strcmp(str, policy_modes[mode])) { break; } } - if (mode > MPOL_LOCAL) + if (mode >= MPOL_MAX) goto out; switch (mode) { -- cgit v1.2.3 From d3a710337b0590f43fd236d5e6518439afc7410a Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Thu, 25 Oct 2012 14:16:29 +0200 Subject: mm: mempolicy: Add MPOL_NOOP This patch augments the MPOL_MF_LAZY feature by adding a "NOOP" policy to mbind(). When the NOOP policy is used with the 'MOVE and 'LAZY flags, mbind() will map the pages PROT_NONE so that they will be migrated on the next touch. This allows an application to prepare for a new phase of operation where different regions of shared storage will be assigned to worker threads, w/o changing policy. Note that we could just use "default" policy in this case. However, this also allows an application to request that pages be migrated, only if necessary, to follow any arbitrary policy that might currently apply to a range of pages, without knowing the policy, or without specifying multiple mbind()s for ranges with different policies. [ Bug in early version of mpol_parse_str() reported by Fengguang Wu. ] Bug-Reported-by: Reported-by: Fengguang Wu Signed-off-by: Lee Schermerhorn Reviewed-by: Rik van Riel Cc: Andrew Morton Cc: Linus Torvalds Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar Signed-off-by: Mel Gorman --- include/uapi/linux/mempolicy.h | 1 + mm/mempolicy.c | 11 ++++++----- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h index 3e835c9d847b..d23dca8367cc 100644 --- a/include/uapi/linux/mempolicy.h +++ b/include/uapi/linux/mempolicy.h @@ -21,6 +21,7 @@ enum { MPOL_BIND, MPOL_INTERLEAVE, MPOL_LOCAL, + MPOL_NOOP, /* retain existing policy for range */ MPOL_MAX, /* always last member of enum */ }; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 54bd3e5ed776..c21e91477c4f 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -251,10 +251,10 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, pr_debug("setting mode %d flags %d nodes[0] %lx\n", mode, flags, nodes ? nodes_addr(*nodes)[0] : -1); - if (mode == MPOL_DEFAULT) { + if (mode == MPOL_DEFAULT || mode == MPOL_NOOP) { if (nodes && !nodes_empty(*nodes)) return ERR_PTR(-EINVAL); - return NULL; /* simply delete any existing policy */ + return NULL; } VM_BUG_ON(!nodes); @@ -1147,7 +1147,7 @@ static long do_mbind(unsigned long start, unsigned long len, if (start & ~PAGE_MASK) return -EINVAL; - if (mode == MPOL_DEFAULT) + if (mode == MPOL_DEFAULT || mode == MPOL_NOOP) flags &= ~MPOL_MF_STRICT; len = (len + PAGE_SIZE - 1) & PAGE_MASK; @@ -2409,7 +2409,8 @@ static const char * const policy_modes[] = [MPOL_PREFERRED] = "prefer", [MPOL_BIND] = "bind", [MPOL_INTERLEAVE] = "interleave", - [MPOL_LOCAL] = "local" + [MPOL_LOCAL] = "local", + [MPOL_NOOP] = "noop", /* should not actually be used */ }; @@ -2460,7 +2461,7 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) break; } } - if (mode >= MPOL_MAX) + if (mode >= MPOL_MAX || mode == MPOL_NOOP) goto out; switch (mode) { -- cgit v1.2.3 From 771fb4d806a92bf6c988fcfbd286ae40a9374332 Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Thu, 25 Oct 2012 14:16:30 +0200 Subject: mm: mempolicy: Check for misplaced page This patch provides a new function to test whether a page resides on a node that is appropriate for the mempolicy for the vma and address where the page is supposed to be mapped. This involves looking up the node where the page belongs. So, the function returns that node so that it may be used to allocated the page without consulting the policy again. A subsequent patch will call this function from the fault path. Because of this, I don't want to go ahead and allocate the page, e.g., via alloc_page_vma() only to have to free it if it has the correct policy. So, I just mimic the alloc_page_vma() node computation logic--sort of. Note: we could use this function to implement a MPOL_MF_STRICT behavior when migrating pages to match mbind() mempolicy--e.g., to ensure that pages in an interleaved range are reinterleaved rather than left where they are when they reside on any page in the interleave nodemask. Signed-off-by: Lee Schermerhorn Reviewed-by: Rik van Riel Cc: Andrew Morton Cc: Linus Torvalds [ Added MPOL_F_LAZY to trigger migrate-on-fault; simplified code now that we don't have to bother with special crap for interleaved ] Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar Signed-off-by: Mel Gorman --- include/linux/mempolicy.h | 8 +++++ include/uapi/linux/mempolicy.h | 1 + mm/mempolicy.c | 76 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 85 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index e5ccb9ddd90e..c511e2523560 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -198,6 +198,8 @@ static inline int vma_migratable(struct vm_area_struct *vma) return 1; } +extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long); + #else struct mempolicy {}; @@ -323,5 +325,11 @@ static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol, return 0; } +static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma, + unsigned long address) +{ + return -1; /* no node preference */ +} + #endif /* CONFIG_NUMA */ #endif diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h index d23dca8367cc..472de8a5d37e 100644 --- a/include/uapi/linux/mempolicy.h +++ b/include/uapi/linux/mempolicy.h @@ -61,6 +61,7 @@ enum mpol_rebind_step { #define MPOL_F_SHARED (1 << 0) /* identify shared policies */ #define MPOL_F_LOCAL (1 << 1) /* preferred local allocation */ #define MPOL_F_REBINDING (1 << 2) /* identify policies in rebinding */ +#define MPOL_F_MOF (1 << 3) /* this policy wants migrate on fault */ #endif /* _UAPI_LINUX_MEMPOLICY_H */ diff --git a/mm/mempolicy.c b/mm/mempolicy.c index c21e91477c4f..df1466d3d2d8 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -2181,6 +2181,82 @@ static void sp_free(struct sp_node *n) kmem_cache_free(sn_cache, n); } +/** + * mpol_misplaced - check whether current page node is valid in policy + * + * @page - page to be checked + * @vma - vm area where page mapped + * @addr - virtual address where page mapped + * + * Lookup current policy node id for vma,addr and "compare to" page's + * node id. + * + * Returns: + * -1 - not misplaced, page is in the right node + * node - node id where the page should be + * + * Policy determination "mimics" alloc_page_vma(). + * Called from fault path where we know the vma and faulting address. + */ +int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long addr) +{ + struct mempolicy *pol; + struct zone *zone; + int curnid = page_to_nid(page); + unsigned long pgoff; + int polnid = -1; + int ret = -1; + + BUG_ON(!vma); + + pol = get_vma_policy(current, vma, addr); + if (!(pol->flags & MPOL_F_MOF)) + goto out; + + switch (pol->mode) { + case MPOL_INTERLEAVE: + BUG_ON(addr >= vma->vm_end); + BUG_ON(addr < vma->vm_start); + + pgoff = vma->vm_pgoff; + pgoff += (addr - vma->vm_start) >> PAGE_SHIFT; + polnid = offset_il_node(pol, vma, pgoff); + break; + + case MPOL_PREFERRED: + if (pol->flags & MPOL_F_LOCAL) + polnid = numa_node_id(); + else + polnid = pol->v.preferred_node; + break; + + case MPOL_BIND: + /* + * allows binding to multiple nodes. + * use current page if in policy nodemask, + * else select nearest allowed node, if any. + * If no allowed nodes, use current [!misplaced]. + */ + if (node_isset(curnid, pol->v.nodes)) + goto out; + (void)first_zones_zonelist( + node_zonelist(numa_node_id(), GFP_HIGHUSER), + gfp_zone(GFP_HIGHUSER), + &pol->v.nodes, &zone); + polnid = zone->node; + break; + + default: + BUG(); + } + if (curnid != polnid) + ret = polnid; +out: + mpol_cond_put(pol); + + return ret; +} + static void sp_delete(struct shared_policy *sp, struct sp_node *n) { pr_debug("deleting %lx-l%lx\n", n->start, n->end); -- cgit v1.2.3 From b24f53a0bea38b266d219ee651b22dba727c44ae Mon Sep 17 00:00:00 2001 From: Lee Schermerhorn Date: Thu, 25 Oct 2012 14:16:32 +0200 Subject: mm: mempolicy: Add MPOL_MF_LAZY NOTE: Once again there is a lot of patch stealing and the end result is sufficiently different that I had to drop the signed-offs. Will re-add if the original authors are ok with that. This patch adds another mbind() flag to request "lazy migration". The flag, MPOL_MF_LAZY, modifies MPOL_MF_MOVE* such that the selected pages are marked PROT_NONE. The pages will be migrated in the fault path on "first touch", if the policy dictates at that time. "Lazy Migration" will allow testing of migrate-on-fault via mbind(). Also allows applications to specify that only subsequently touched pages be migrated to obey new policy, instead of all pages in range. This can be useful for multi-threaded applications working on a large shared data area that is initialized by an initial thread resulting in all pages on one [or a few, if overflowed] nodes. After PROT_NONE, the pages in regions assigned to the worker threads will be automatically migrated local to the threads on 1st touch. Signed-off-by: Mel Gorman Reviewed-by: Rik van Riel --- include/linux/mm.h | 5 ++ include/uapi/linux/mempolicy.h | 13 ++- mm/mempolicy.c | 185 +++++++++++++++++++++++++++++++++++++---- 3 files changed, 185 insertions(+), 18 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index fa1615211159..471185e29bab 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1551,6 +1551,11 @@ static inline pgprot_t vm_get_page_prot(unsigned long vm_flags) } #endif +#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE +void change_prot_numa(struct vm_area_struct *vma, + unsigned long start, unsigned long end); +#endif + struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr); int remap_pfn_range(struct vm_area_struct *, unsigned long addr, unsigned long pfn, unsigned long size, pgprot_t); diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h index 472de8a5d37e..6a1baae3775d 100644 --- a/include/uapi/linux/mempolicy.h +++ b/include/uapi/linux/mempolicy.h @@ -49,9 +49,16 @@ enum mpol_rebind_step { /* Flags for mbind */ #define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */ -#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform to mapping */ -#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to mapping */ -#define MPOL_MF_INTERNAL (1<<3) /* Internal flags start here */ +#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform + to policy */ +#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to policy */ +#define MPOL_MF_LAZY (1<<3) /* Modifies '_MOVE: lazy migrate on fault */ +#define MPOL_MF_INTERNAL (1<<4) /* Internal flags start here */ + +#define MPOL_MF_VALID (MPOL_MF_STRICT | \ + MPOL_MF_MOVE | \ + MPOL_MF_MOVE_ALL | \ + MPOL_MF_LAZY) /* * Internal flags that share the struct mempolicy flags word with diff --git a/mm/mempolicy.c b/mm/mempolicy.c index df1466d3d2d8..51d3ebd8561e 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -90,6 +90,7 @@ #include #include #include +#include #include #include @@ -565,6 +566,145 @@ static inline int check_pgd_range(struct vm_area_struct *vma, return 0; } +#ifdef CONFIG_ARCH_USES_NUMA_PROT_NONE +/* + * Here we search for not shared page mappings (mapcount == 1) and we + * set up the pmd/pte_numa on those mappings so the very next access + * will fire a NUMA hinting page fault. + */ +static int +change_prot_numa_range(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long address) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *pte, *_pte; + struct page *page; + unsigned long _address, end; + spinlock_t *ptl; + int ret = 0; + + VM_BUG_ON(address & ~PAGE_MASK); + + pgd = pgd_offset(mm, address); + if (!pgd_present(*pgd)) + goto out; + + pud = pud_offset(pgd, address); + if (!pud_present(*pud)) + goto out; + + pmd = pmd_offset(pud, address); + if (pmd_none(*pmd)) + goto out; + + if (pmd_trans_huge_lock(pmd, vma) == 1) { + int page_nid; + ret = HPAGE_PMD_NR; + + VM_BUG_ON(address & ~HPAGE_PMD_MASK); + + if (pmd_numa(*pmd)) { + spin_unlock(&mm->page_table_lock); + goto out; + } + + page = pmd_page(*pmd); + + /* only check non-shared pages */ + if (page_mapcount(page) != 1) { + spin_unlock(&mm->page_table_lock); + goto out; + } + + page_nid = page_to_nid(page); + + if (pmd_numa(*pmd)) { + spin_unlock(&mm->page_table_lock); + goto out; + } + + set_pmd_at(mm, address, pmd, pmd_mknuma(*pmd)); + ret += HPAGE_PMD_NR; + /* defer TLB flush to lower the overhead */ + spin_unlock(&mm->page_table_lock); + goto out; + } + + if (pmd_trans_unstable(pmd)) + goto out; + VM_BUG_ON(!pmd_present(*pmd)); + + end = min(vma->vm_end, (address + PMD_SIZE) & PMD_MASK); + pte = pte_offset_map_lock(mm, pmd, address, &ptl); + for (_address = address, _pte = pte; _address < end; + _pte++, _address += PAGE_SIZE) { + pte_t pteval = *_pte; + if (!pte_present(pteval)) + continue; + if (pte_numa(pteval)) + continue; + page = vm_normal_page(vma, _address, pteval); + if (unlikely(!page)) + continue; + /* only check non-shared pages */ + if (page_mapcount(page) != 1) + continue; + + set_pte_at(mm, _address, _pte, pte_mknuma(pteval)); + + /* defer TLB flush to lower the overhead */ + ret++; + } + pte_unmap_unlock(pte, ptl); + + if (ret && !pmd_numa(*pmd)) { + spin_lock(&mm->page_table_lock); + set_pmd_at(mm, address, pmd, pmd_mknuma(*pmd)); + spin_unlock(&mm->page_table_lock); + /* defer TLB flush to lower the overhead */ + } + +out: + return ret; +} + +/* Assumes mmap_sem is held */ +void +change_prot_numa(struct vm_area_struct *vma, + unsigned long address, unsigned long end) +{ + struct mm_struct *mm = vma->vm_mm; + int progress = 0; + + while (address < end) { + VM_BUG_ON(address < vma->vm_start || + address + PAGE_SIZE > vma->vm_end); + + progress += change_prot_numa_range(mm, vma, address); + address = (address + PMD_SIZE) & PMD_MASK; + } + + /* + * Flush the TLB for the mm to start the NUMA hinting + * page faults after we finish scanning this vma part + * if there were any PTE updates + */ + if (progress) { + mmu_notifier_invalidate_range_start(vma->vm_mm, address, end); + flush_tlb_range(vma, address, end); + mmu_notifier_invalidate_range_end(vma->vm_mm, address, end); + } +} +#else +static unsigned long change_prot_numa(struct vm_area_struct *vma, + unsigned long addr, unsigned long end) +{ + return 0; +} +#endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */ + /* * Check if all pages in a range are on a set of nodes. * If pagelist != NULL then isolate pages from the LRU and @@ -583,22 +723,32 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, return ERR_PTR(-EFAULT); prev = NULL; for (vma = first; vma && vma->vm_start < end; vma = vma->vm_next) { + unsigned long endvma = vma->vm_end; + + if (endvma > end) + endvma = end; + if (vma->vm_start > start) + start = vma->vm_start; + if (!(flags & MPOL_MF_DISCONTIG_OK)) { if (!vma->vm_next && vma->vm_end < end) return ERR_PTR(-EFAULT); if (prev && prev->vm_end < vma->vm_start) return ERR_PTR(-EFAULT); } - if (!is_vm_hugetlb_page(vma) && - ((flags & MPOL_MF_STRICT) || + + if (is_vm_hugetlb_page(vma)) + goto next; + + if (flags & MPOL_MF_LAZY) { + change_prot_numa(vma, start, endvma); + goto next; + } + + if ((flags & MPOL_MF_STRICT) || ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) && - vma_migratable(vma)))) { - unsigned long endvma = vma->vm_end; + vma_migratable(vma))) { - if (endvma > end) - endvma = end; - if (vma->vm_start > start) - start = vma->vm_start; err = check_pgd_range(vma, start, endvma, nodes, flags, private); if (err) { @@ -606,6 +756,7 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end, break; } } +next: prev = vma; } return first; @@ -1138,8 +1289,7 @@ static long do_mbind(unsigned long start, unsigned long len, int err; LIST_HEAD(pagelist); - if (flags & ~(unsigned long)(MPOL_MF_STRICT | - MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) + if (flags & ~(unsigned long)MPOL_MF_VALID) return -EINVAL; if ((flags & MPOL_MF_MOVE_ALL) && !capable(CAP_SYS_NICE)) return -EPERM; @@ -1162,6 +1312,9 @@ static long do_mbind(unsigned long start, unsigned long len, if (IS_ERR(new)) return PTR_ERR(new); + if (flags & MPOL_MF_LAZY) + new->flags |= MPOL_F_MOF; + /* * If we are using the default policy then operation * on discontinuous address spaces is okay after all @@ -1198,13 +1351,15 @@ static long do_mbind(unsigned long start, unsigned long len, vma = check_range(mm, start, end, nmask, flags | MPOL_MF_INVERT, &pagelist); - err = PTR_ERR(vma); - if (!IS_ERR(vma)) { - int nr_failed = 0; - + err = PTR_ERR(vma); /* maybe ... */ + if (!IS_ERR(vma) && mode != MPOL_NOOP) err = mbind_range(mm, start, end, new); + if (!err) { + int nr_failed = 0; + if (!list_empty(&pagelist)) { + WARN_ON_ONCE(flags & MPOL_MF_LAZY); nr_failed = migrate_pages(&pagelist, new_vma_page, (unsigned long)vma, false, MIGRATE_SYNC, @@ -1213,7 +1368,7 @@ static long do_mbind(unsigned long start, unsigned long len, putback_lru_pages(&pagelist); } - if (!err && nr_failed && (flags & MPOL_MF_STRICT)) + if (nr_failed && (flags & MPOL_MF_STRICT)) err = -EIO; } else putback_lru_pages(&pagelist); -- cgit v1.2.3 From a720094ded8cbb303111035be91858011d2eac71 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 16 Nov 2012 09:37:58 +0000 Subject: mm: mempolicy: Hide MPOL_NOOP and MPOL_MF_LAZY from userspace for now The use of MPOL_NOOP and MPOL_MF_LAZY to allow an application to explicitly request lazy migration is a good idea but the actual API has not been well reviewed and once released we have to support it. For now this patch prevents an application using the services. This will need to be revisited. Signed-off-by: Mel Gorman --- include/uapi/linux/mempolicy.h | 4 +--- mm/mempolicy.c | 9 ++++----- 2 files changed, 5 insertions(+), 8 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h index 6a1baae3775d..16fb4e6efbc4 100644 --- a/include/uapi/linux/mempolicy.h +++ b/include/uapi/linux/mempolicy.h @@ -21,7 +21,6 @@ enum { MPOL_BIND, MPOL_INTERLEAVE, MPOL_LOCAL, - MPOL_NOOP, /* retain existing policy for range */ MPOL_MAX, /* always last member of enum */ }; @@ -57,8 +56,7 @@ enum mpol_rebind_step { #define MPOL_MF_VALID (MPOL_MF_STRICT | \ MPOL_MF_MOVE | \ - MPOL_MF_MOVE_ALL | \ - MPOL_MF_LAZY) + MPOL_MF_MOVE_ALL) /* * Internal flags that share the struct mempolicy flags word with diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 75d4600a5e92..a7a62fe7c280 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -252,7 +252,7 @@ static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, pr_debug("setting mode %d flags %d nodes[0] %lx\n", mode, flags, nodes ? nodes_addr(*nodes)[0] : -1); - if (mode == MPOL_DEFAULT || mode == MPOL_NOOP) { + if (mode == MPOL_DEFAULT) { if (nodes && !nodes_empty(*nodes)) return ERR_PTR(-EINVAL); return NULL; @@ -1186,7 +1186,7 @@ static long do_mbind(unsigned long start, unsigned long len, if (start & ~PAGE_MASK) return -EINVAL; - if (mode == MPOL_DEFAULT || mode == MPOL_NOOP) + if (mode == MPOL_DEFAULT) flags &= ~MPOL_MF_STRICT; len = (len + PAGE_SIZE - 1) & PAGE_MASK; @@ -1241,7 +1241,7 @@ static long do_mbind(unsigned long start, unsigned long len, flags | MPOL_MF_INVERT, &pagelist); err = PTR_ERR(vma); /* maybe ... */ - if (!IS_ERR(vma) && mode != MPOL_NOOP) + if (!IS_ERR(vma)) err = mbind_range(mm, start, end, new); if (!err) { @@ -2530,7 +2530,6 @@ static const char * const policy_modes[] = [MPOL_BIND] = "bind", [MPOL_INTERLEAVE] = "interleave", [MPOL_LOCAL] = "local", - [MPOL_NOOP] = "noop", /* should not actually be used */ }; @@ -2581,7 +2580,7 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context) break; } } - if (mode >= MPOL_MAX || mode == MPOL_NOOP) + if (mode >= MPOL_MAX) goto out; switch (mode) { -- cgit v1.2.3 From 5606e3877ad8baea42f3a71ebde0a03622bbb551 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 2 Nov 2012 18:19:13 +0000 Subject: mm: numa: Migrate on reference policy This is the simplest possible policy that still does something of note. When a pte_numa is faulted, it is moved immediately. Any replacement policy must at least do better than this and in all likelihood this policy regresses normal workloads. Signed-off-by: Mel Gorman Acked-by: Rik van Riel --- include/uapi/linux/mempolicy.h | 1 + mm/mempolicy.c | 38 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 37 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h index 16fb4e6efbc4..0d11c3dcd3a1 100644 --- a/include/uapi/linux/mempolicy.h +++ b/include/uapi/linux/mempolicy.h @@ -67,6 +67,7 @@ enum mpol_rebind_step { #define MPOL_F_LOCAL (1 << 1) /* preferred local allocation */ #define MPOL_F_REBINDING (1 << 2) /* identify policies in rebinding */ #define MPOL_F_MOF (1 << 3) /* this policy wants migrate on fault */ +#define MPOL_F_MORON (1 << 4) /* Migrate On pte_numa Reference On Node */ #endif /* _UAPI_LINUX_MEMPOLICY_H */ diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 516491fbfaa8..4c1c8d83ac6a 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -118,6 +118,26 @@ static struct mempolicy default_policy = { .flags = MPOL_F_LOCAL, }; +static struct mempolicy preferred_node_policy[MAX_NUMNODES]; + +static struct mempolicy *get_task_policy(struct task_struct *p) +{ + struct mempolicy *pol = p->mempolicy; + int node; + + if (!pol) { + node = numa_node_id(); + if (node != -1) + pol = &preferred_node_policy[node]; + + /* preferred_node_policy is not initialised early in boot */ + if (!pol->mode) + pol = NULL; + } + + return pol; +} + static const struct mempolicy_operations { int (*create)(struct mempolicy *pol, const nodemask_t *nodes); /* @@ -1598,7 +1618,7 @@ asmlinkage long compat_sys_mbind(compat_ulong_t start, compat_ulong_t len, struct mempolicy *get_vma_policy(struct task_struct *task, struct vm_area_struct *vma, unsigned long addr) { - struct mempolicy *pol = task->mempolicy; + struct mempolicy *pol = get_task_policy(task); if (vma) { if (vma->vm_ops && vma->vm_ops->get_policy) { @@ -2021,7 +2041,7 @@ retry_cpuset: */ struct page *alloc_pages_current(gfp_t gfp, unsigned order) { - struct mempolicy *pol = current->mempolicy; + struct mempolicy *pol = get_task_policy(current); struct page *page; unsigned int cpuset_mems_cookie; @@ -2295,6 +2315,11 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long default: BUG(); } + + /* Migrate the page towards the node whose CPU is referencing it */ + if (pol->flags & MPOL_F_MORON) + polnid = numa_node_id(); + if (curnid != polnid) ret = polnid; out: @@ -2483,6 +2508,15 @@ void __init numa_policy_init(void) sizeof(struct sp_node), 0, SLAB_PANIC, NULL); + for_each_node(nid) { + preferred_node_policy[nid] = (struct mempolicy) { + .refcnt = ATOMIC_INIT(1), + .mode = MPOL_PREFERRED, + .flags = MPOL_F_MOF | MPOL_F_MORON, + .v = { .preferred_node = nid, }, + }; + } + /* * Set interleaving policy for system init. Interleaving is only * enabled across suitably sized nodes (default is >= 16MB), or -- cgit v1.2.3 From 895464fa4b52f0e5a2ceffc173bad012be02b465 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 12 Dec 2012 06:58:52 +0000 Subject: uapi: add missing netconf.h to export list Add netconf.h for use by iproute2. Signed-off-by: Stephen Hemminger Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/uapi/linux/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index e194387ef784..2564a968ca48 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -258,6 +258,7 @@ header-y += neighbour.h header-y += net.h header-y += net_dropmon.h header-y += net_tstamp.h +header-y += netconf.h header-y += netdevice.h header-y += netfilter.h header-y += netfilter_arp.h -- cgit v1.2.3 From 37a393bc4932d7bac360f40064aaafc01ab44901 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 11 Dec 2012 22:23:07 +0000 Subject: bridge: notify mdb changes via netlink As Stephen mentioned, we need to monitor the mdb changes in user-space, so add notifications via netlink too. Cc: Herbert Xu Cc: Stephen Hemminger Cc: "David S. Miller" Cc: Thomas Graf Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/uapi/linux/rtnetlink.h | 6 ++++ net/bridge/br_mdb.c | 80 ++++++++++++++++++++++++++++++++++++++++++ net/bridge/br_multicast.c | 2 ++ net/bridge/br_private.h | 2 ++ 4 files changed, 90 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h index 354a1e7d32a3..7a5eb196ade9 100644 --- a/include/uapi/linux/rtnetlink.h +++ b/include/uapi/linux/rtnetlink.h @@ -125,6 +125,10 @@ enum { RTM_GETNETCONF = 82, #define RTM_GETNETCONF RTM_GETNETCONF + RTM_NEWMDB = 84, +#define RTM_NEWMDB RTM_NEWMDB + RTM_DELMDB = 85, +#define RTM_DELMDB RTM_DELMDB RTM_GETMDB = 86, #define RTM_GETMDB RTM_GETMDB @@ -607,6 +611,8 @@ enum rtnetlink_groups { #define RTNLGRP_IPV4_NETCONF RTNLGRP_IPV4_NETCONF RTNLGRP_IPV6_NETCONF, #define RTNLGRP_IPV6_NETCONF RTNLGRP_IPV6_NETCONF + RTNLGRP_MDB, +#define RTNLGRP_MDB RTNLGRP_MDB __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index ccc43a9bff80..a8cfbf5f3c68 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -155,6 +155,86 @@ out: return skb->len; } +static int nlmsg_populate_mdb_fill(struct sk_buff *skb, + struct net_device *dev, + struct br_mdb_entry *entry, u32 pid, + u32 seq, int type, unsigned int flags) +{ + struct nlmsghdr *nlh; + struct br_port_msg *bpm; + struct nlattr *nest, *nest2; + + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*bpm), NLM_F_MULTI); + if (!nlh) + return -EMSGSIZE; + + bpm = nlmsg_data(nlh); + bpm->family = AF_BRIDGE; + bpm->ifindex = dev->ifindex; + nest = nla_nest_start(skb, MDBA_MDB); + if (nest == NULL) + goto cancel; + nest2 = nla_nest_start(skb, MDBA_MDB_ENTRY); + if (nest2 == NULL) + goto end; + + if (nla_put(skb, MDBA_MDB_ENTRY_INFO, sizeof(*entry), entry)) + goto end; + + nla_nest_end(skb, nest2); + nla_nest_end(skb, nest); + return nlmsg_end(skb, nlh); + +end: + nla_nest_end(skb, nest); +cancel: + nlmsg_cancel(skb, nlh); + return -EMSGSIZE; +} + +static inline size_t rtnl_mdb_nlmsg_size(void) +{ + return NLMSG_ALIGN(sizeof(struct br_port_msg)) + + nla_total_size(sizeof(struct br_mdb_entry)); +} + +static void __br_mdb_notify(struct net_device *dev, struct br_mdb_entry *entry, + int type) +{ + struct net *net = dev_net(dev); + struct sk_buff *skb; + int err = -ENOBUFS; + + skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC); + if (!skb) + goto errout; + + err = nlmsg_populate_mdb_fill(skb, dev, entry, 0, 0, type, NTF_SELF); + if (err < 0) { + kfree_skb(skb); + goto errout; + } + + rtnl_notify(skb, net, 0, RTNLGRP_MDB, NULL, GFP_ATOMIC); + return; +errout: + rtnl_set_sk_err(net, RTNLGRP_MDB, err); +} + +void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, + struct br_ip *group, int type) +{ + struct br_mdb_entry entry; + + entry.ifindex = port->dev->ifindex; + entry.addr.proto = group->proto; + entry.addr.u.ip4 = group->u.ip4; +#if IS_ENABLED(CONFIG_IPV6) + entry.addr.u.ip6 = group->u.ip6; +#endif + __br_mdb_notify(dev, &entry, type); +} + void br_mdb_init(void) { rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, NULL); diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 847b98a1d5e0..d929586ce39e 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -681,6 +681,7 @@ static int br_multicast_add_group(struct net_bridge *br, (unsigned long)p); rcu_assign_pointer(*pp, p); + br_mdb_notify(br->dev, port, group, RTM_NEWMDB); found: mod_timer(&p->timer, now + br->multicast_membership_interval); @@ -1240,6 +1241,7 @@ static void br_multicast_leave_group(struct net_bridge *br, hlist_del_init(&p->mglist); del_timer(&p->timer); call_rcu_bh(&p->rcu, br_multicast_free_pg); + br_mdb_notify(br->dev, port, group, RTM_DELMDB); if (!mp->ports && !mp->mglist && netif_running(br->dev)) diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index f95b766c7a98..2807c7680c38 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -435,6 +435,8 @@ extern int br_multicast_toggle(struct net_bridge *br, unsigned long val); extern int br_multicast_set_querier(struct net_bridge *br, unsigned long val); extern int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val); extern void br_mdb_init(void); +extern void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, + struct br_ip *group, int type); static inline bool br_multicast_is_router(struct net_bridge *br) { -- cgit v1.2.3 From cfd567543590f71ca0af397437e2554f9756d750 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 11 Dec 2012 22:23:08 +0000 Subject: bridge: add support of adding and deleting mdb entries This patch implents adding/deleting mdb entries via netlink. Currently all entries are temp, we probably need a flag to distinguish permanent entries too. Cc: Herbert Xu Cc: Stephen Hemminger Cc: "David S. Miller" Cc: Thomas Graf Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 8 ++ net/bridge/br_mdb.c | 240 +++++++++++++++++++++++++++++++++++++++++ net/bridge/br_multicast.c | 55 +++++----- net/bridge/br_private.h | 23 ++++ 4 files changed, 297 insertions(+), 29 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index 9a0f6ff0d7e7..afbb18a0227c 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -157,6 +157,7 @@ enum { #define MDBA_ROUTER_MAX (__MDBA_ROUTER_MAX - 1) struct br_port_msg { + __u8 family; __u32 ifindex; }; @@ -171,4 +172,11 @@ struct br_mdb_entry { } addr; }; +enum { + MDBA_SET_ENTRY_UNSPEC, + MDBA_SET_ENTRY, + __MDBA_SET_ENTRY_MAX, +}; +#define MDBA_SET_ENTRY_MAX (__MDBA_SET_ENTRY_MAX - 1) + #endif /* _UAPI_LINUX_IF_BRIDGE_H */ diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index a8cfbf5f3c68..6f0a2eebcb27 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #if IS_ENABLED(CONFIG_IPV6) @@ -235,7 +236,246 @@ void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, __br_mdb_notify(dev, &entry, type); } +static bool is_valid_mdb_entry(struct br_mdb_entry *entry) +{ + if (entry->ifindex == 0) + return false; + + if (entry->addr.proto == htons(ETH_P_IP)) { + if (!ipv4_is_multicast(entry->addr.u.ip4)) + return false; + if (ipv4_is_local_multicast(entry->addr.u.ip4)) + return false; +#if IS_ENABLED(CONFIG_IPV6) + } else if (entry->addr.proto == htons(ETH_P_IPV6)) { + if (!ipv6_is_transient_multicast(&entry->addr.u.ip6)) + return false; +#endif + } else + return false; + + return true; +} + +static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh, + struct net_device **pdev, struct br_mdb_entry **pentry) +{ + struct net *net = sock_net(skb->sk); + struct br_mdb_entry *entry; + struct br_port_msg *bpm; + struct nlattr *tb[MDBA_SET_ENTRY_MAX+1]; + struct net_device *dev; + int err; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + err = nlmsg_parse(nlh, sizeof(*bpm), tb, MDBA_SET_ENTRY, NULL); + if (err < 0) + return err; + + bpm = nlmsg_data(nlh); + if (bpm->ifindex == 0) { + pr_info("PF_BRIDGE: br_mdb_parse() with invalid ifindex\n"); + return -EINVAL; + } + + dev = __dev_get_by_index(net, bpm->ifindex); + if (dev == NULL) { + pr_info("PF_BRIDGE: br_mdb_parse() with unknown ifindex\n"); + return -ENODEV; + } + + if (!(dev->priv_flags & IFF_EBRIDGE)) { + pr_info("PF_BRIDGE: br_mdb_parse() with non-bridge\n"); + return -EOPNOTSUPP; + } + + *pdev = dev; + + if (!tb[MDBA_SET_ENTRY] || + nla_len(tb[MDBA_SET_ENTRY]) != sizeof(struct br_mdb_entry)) { + pr_info("PF_BRIDGE: br_mdb_parse() with invalid attr\n"); + return -EINVAL; + } + + entry = nla_data(tb[MDBA_SET_ENTRY]); + if (!is_valid_mdb_entry(entry)) { + pr_info("PF_BRIDGE: br_mdb_parse() with invalid entry\n"); + return -EINVAL; + } + + *pentry = entry; + return 0; +} + +static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, + struct br_ip *group) +{ + struct net_bridge_mdb_entry *mp; + struct net_bridge_port_group *p; + struct net_bridge_port_group __rcu **pp; + struct net_bridge_mdb_htable *mdb; + int err; + + mdb = mlock_dereference(br->mdb, br); + mp = br_mdb_ip_get(mdb, group); + if (!mp) { + mp = br_multicast_new_group(br, port, group); + err = PTR_ERR(mp); + if (IS_ERR(mp)) + return err; + } + + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { + if (p->port == port) + return -EEXIST; + if ((unsigned long)p->port < (unsigned long)port) + break; + } + + p = br_multicast_new_port_group(port, group, *pp); + if (unlikely(!p)) + return -ENOMEM; + rcu_assign_pointer(*pp, p); + + br_mdb_notify(br->dev, port, group, RTM_NEWMDB); + return 0; +} + +static int __br_mdb_add(struct net *net, struct net_bridge *br, + struct br_mdb_entry *entry) +{ + struct br_ip ip; + struct net_device *dev; + struct net_bridge_port *p; + int ret; + + if (!netif_running(br->dev) || br->multicast_disabled) + return -EINVAL; + + dev = __dev_get_by_index(net, entry->ifindex); + if (!dev) + return -ENODEV; + + p = br_port_get_rtnl(dev); + if (!p || p->br != br || p->state == BR_STATE_DISABLED) + return -EINVAL; + + ip.proto = entry->addr.proto; + if (ip.proto == htons(ETH_P_IP)) + ip.u.ip4 = entry->addr.u.ip4; +#if IS_ENABLED(CONFIG_IPV6) + else + ip.u.ip6 = entry->addr.u.ip6; +#endif + + spin_lock_bh(&br->multicast_lock); + ret = br_mdb_add_group(br, p, &ip); + spin_unlock_bh(&br->multicast_lock); + return ret; +} + +static int br_mdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct net *net = sock_net(skb->sk); + struct br_mdb_entry *entry; + struct net_device *dev; + struct net_bridge *br; + int err; + + err = br_mdb_parse(skb, nlh, &dev, &entry); + if (err < 0) + return err; + + br = netdev_priv(dev); + + err = __br_mdb_add(net, br, entry); + if (!err) + __br_mdb_notify(dev, entry, RTM_NEWMDB); + return err; +} + +static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) +{ + struct net_bridge_mdb_htable *mdb; + struct net_bridge_mdb_entry *mp; + struct net_bridge_port_group *p; + struct net_bridge_port_group __rcu **pp; + struct br_ip ip; + int err = -EINVAL; + + if (!netif_running(br->dev) || br->multicast_disabled) + return -EINVAL; + + if (timer_pending(&br->multicast_querier_timer)) + return -EBUSY; + + ip.proto = entry->addr.proto; + if (ip.proto == htons(ETH_P_IP)) + ip.u.ip4 = entry->addr.u.ip4; +#if IS_ENABLED(CONFIG_IPV6) + else + ip.u.ip6 = entry->addr.u.ip6; +#endif + + spin_lock_bh(&br->multicast_lock); + mdb = mlock_dereference(br->mdb, br); + + mp = br_mdb_ip_get(mdb, &ip); + if (!mp) + goto unlock; + + for (pp = &mp->ports; + (p = mlock_dereference(*pp, br)) != NULL; + pp = &p->next) { + if (!p->port || p->port->dev->ifindex != entry->ifindex) + continue; + + if (p->port->state == BR_STATE_DISABLED) + goto unlock; + + rcu_assign_pointer(*pp, p->next); + hlist_del_init(&p->mglist); + del_timer(&p->timer); + call_rcu_bh(&p->rcu, br_multicast_free_pg); + err = 0; + + if (!mp->ports && !mp->mglist && + netif_running(br->dev)) + mod_timer(&mp->timer, jiffies); + break; + } + +unlock: + spin_unlock_bh(&br->multicast_lock); + return err; +} + +static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +{ + struct net_device *dev; + struct br_mdb_entry *entry; + struct net_bridge *br; + int err; + + err = br_mdb_parse(skb, nlh, &dev, &entry); + if (err < 0) + return err; + + br = netdev_priv(dev); + + err = __br_mdb_del(br, entry); + if (!err) + __br_mdb_notify(dev, entry, RTM_DELMDB); + return err; +} + void br_mdb_init(void) { rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, NULL); + rtnl_register(PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, NULL); + rtnl_register(PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, NULL); } diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index d929586ce39e..977c3ee02e65 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -27,27 +27,14 @@ #if IS_ENABLED(CONFIG_IPV6) #include #include -#include #include #endif #include "br_private.h" -#define mlock_dereference(X, br) \ - rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) - static void br_multicast_start_querier(struct net_bridge *br); unsigned int br_mdb_rehash_seq; -#if IS_ENABLED(CONFIG_IPV6) -static inline int ipv6_is_transient_multicast(const struct in6_addr *addr) -{ - if (ipv6_addr_is_multicast(addr) && IPV6_ADDR_MC_FLAG_TRANSIENT(addr)) - return 1; - return 0; -} -#endif - static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b) { if (a->proto != b->proto) @@ -104,8 +91,8 @@ static struct net_bridge_mdb_entry *__br_mdb_ip_get( return NULL; } -static struct net_bridge_mdb_entry *br_mdb_ip_get( - struct net_bridge_mdb_htable *mdb, struct br_ip *dst) +struct net_bridge_mdb_entry *br_mdb_ip_get(struct net_bridge_mdb_htable *mdb, + struct br_ip *dst) { if (!mdb) return NULL; @@ -208,7 +195,7 @@ static int br_mdb_copy(struct net_bridge_mdb_htable *new, return maxlen > elasticity ? -EINVAL : 0; } -static void br_multicast_free_pg(struct rcu_head *head) +void br_multicast_free_pg(struct rcu_head *head) { struct net_bridge_port_group *p = container_of(head, struct net_bridge_port_group, rcu); @@ -584,9 +571,8 @@ err: return mp; } -static struct net_bridge_mdb_entry *br_multicast_new_group( - struct net_bridge *br, struct net_bridge_port *port, - struct br_ip *group) +struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br, + struct net_bridge_port *port, struct br_ip *group) { struct net_bridge_mdb_htable *mdb; struct net_bridge_mdb_entry *mp; @@ -633,6 +619,26 @@ out: return mp; } +struct net_bridge_port_group *br_multicast_new_port_group( + struct net_bridge_port *port, + struct br_ip *group, + struct net_bridge_port_group *next) +{ + struct net_bridge_port_group *p; + + p = kzalloc(sizeof(*p), GFP_ATOMIC); + if (unlikely(!p)) + return NULL; + + p->addr = *group; + p->port = port; + p->next = next; + hlist_add_head(&p->mglist, &port->mglist); + setup_timer(&p->timer, br_multicast_port_group_expired, + (unsigned long)p); + return p; +} + static int br_multicast_add_group(struct net_bridge *br, struct net_bridge_port *port, struct br_ip *group) @@ -668,18 +674,9 @@ static int br_multicast_add_group(struct net_bridge *br, break; } - p = kzalloc(sizeof(*p), GFP_ATOMIC); - err = -ENOMEM; + p = br_multicast_new_port_group(port, group, *pp); if (unlikely(!p)) goto err; - - p->addr = *group; - p->port = port; - p->next = *pp; - hlist_add_head(&p->mglist, &port->mglist); - setup_timer(&p->timer, br_multicast_port_group_expired, - (unsigned long)p); - rcu_assign_pointer(*pp, p); br_mdb_notify(br->dev, port, group, RTM_NEWMDB); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 2807c7680c38..f21a739a6186 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -434,10 +434,33 @@ extern int br_multicast_set_port_router(struct net_bridge_port *p, extern int br_multicast_toggle(struct net_bridge *br, unsigned long val); extern int br_multicast_set_querier(struct net_bridge *br, unsigned long val); extern int br_multicast_set_hash_max(struct net_bridge *br, unsigned long val); +extern struct net_bridge_mdb_entry *br_mdb_ip_get( + struct net_bridge_mdb_htable *mdb, + struct br_ip *dst); +extern struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br, + struct net_bridge_port *port, struct br_ip *group); +extern void br_multicast_free_pg(struct rcu_head *head); +extern struct net_bridge_port_group *br_multicast_new_port_group( + struct net_bridge_port *port, + struct br_ip *group, + struct net_bridge_port_group *next); extern void br_mdb_init(void); extern void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, struct br_ip *group, int type); +#define mlock_dereference(X, br) \ + rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock)) + +#if IS_ENABLED(CONFIG_IPV6) +#include +static inline int ipv6_is_transient_multicast(const struct in6_addr *addr) +{ + if (ipv6_addr_is_multicast(addr) && IPV6_ADDR_MC_FLAG_TRANSIENT(addr)) + return 1; + return 0; +} +#endif + static inline bool br_multicast_is_router(struct net_bridge *br) { return br->multicast_router == 2 || -- cgit v1.2.3 From d4676eac0de2e6d88eb3e2c02b4e9813d7d7f205 Mon Sep 17 00:00:00 2001 From: Yan Burman Date: Wed, 12 Dec 2012 02:13:17 +0000 Subject: net: ethtool: Add destination MAC address to flow steering API Add ability to specify destination MAC address for L3/L4 flow spec in order to be able to specify action for different VM's under vSwitch configuration. This change is transparent to older userspace. Signed-off-by: Yan Burman Signed-off-by: Amir Vadai Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index d3eaaaf1009e..be8c41e2dc15 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -500,13 +500,15 @@ union ethtool_flow_union { struct ethtool_ah_espip4_spec esp_ip4_spec; struct ethtool_usrip4_spec usr_ip4_spec; struct ethhdr ether_spec; - __u8 hdata[60]; + __u8 hdata[52]; }; struct ethtool_flow_ext { - __be16 vlan_etype; - __be16 vlan_tci; - __be32 data[2]; + __u8 padding[2]; + unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ + __be16 vlan_etype; + __be16 vlan_tci; + __be32 data[2]; }; /** @@ -1027,6 +1029,7 @@ enum ethtool_sfeatures_retval_bits { #define ETHER_FLOW 0x12 /* spec only (ether_spec) */ /* Flag to enable additional fields in struct ethtool_rx_flow_spec */ #define FLOW_EXT 0x80000000 +#define FLOW_MAC_EXT 0x40000000 /* L3-L4 network traffic flow hash options */ #define RXH_L2DA (1 << 1) -- cgit v1.2.3 From dc2e57340deb8be1133b1eae2c7d4303133c133c Mon Sep 17 00:00:00 2001 From: Yan Burman Date: Thu, 13 Dec 2012 05:20:59 +0000 Subject: net: ethool: Document struct ethtool_flow_ext Add documentation for struct ethtool_flow_ext especially in regard to what flags are needed for which fields. Signed-off-by: Yan Burman Reviewed-by: Ben Hutchings Signed-off-by: David S. Miller --- include/uapi/linux/ethtool.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index be8c41e2dc15..0c9b44871df0 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -503,9 +503,20 @@ union ethtool_flow_union { __u8 hdata[52]; }; +/** + * struct ethtool_flow_ext - additional RX flow fields + * @h_dest: destination MAC address + * @vlan_etype: VLAN EtherType + * @vlan_tci: VLAN tag control information + * @data: user defined data + * + * Note, @vlan_etype, @vlan_tci, and @data are only valid if %FLOW_EXT + * is set in &struct ethtool_rx_flow_spec @flow_type. + * @h_dest is valid if %FLOW_MAC_EXT is set. + */ struct ethtool_flow_ext { __u8 padding[2]; - unsigned char h_dest[ETH_ALEN]; /* destination eth addr */ + unsigned char h_dest[ETH_ALEN]; __be16 vlan_etype; __be16 vlan_tci; __be32 data[2]; @@ -519,7 +530,8 @@ struct ethtool_flow_ext { * @m_u: Masks for flow field bits to be matched * @m_ext: Masks for additional field bits to be matched * Note, all additional fields must be ignored unless @flow_type - * includes the %FLOW_EXT flag. + * includes the %FLOW_EXT or %FLOW_MAC_EXT flag + * (see &struct ethtool_flow_ext description). * @ring_cookie: RX ring/queue index to deliver to, or %RX_CLS_FLOW_DISC * if packets should be discarded * @location: Location of rule in the table. Locations must be -- cgit v1.2.3 From 2f3238aebedb243804f58d62d57244edec4149b2 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 22 Oct 2012 18:09:41 +1030 Subject: module: add flags arg to sys_finit_module() Thanks to Michael Kerrisk for keeping us honest. These flags are actually useful for eliminating the only case where kmod has to mangle a module's internals: for overriding module versioning. Signed-off-by: Rusty Russell Acked-by: Lucas De Marchi Acked-by: Kees Cook --- include/linux/syscalls.h | 2 +- include/uapi/linux/module.h | 8 ++++++++ kernel/module.c | 40 ++++++++++++++++++++++++++-------------- 3 files changed, 35 insertions(+), 15 deletions(-) create mode 100644 include/uapi/linux/module.h (limited to 'include/uapi/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 32bc035bcd68..8cf7b508cb50 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -868,5 +868,5 @@ asmlinkage long sys_process_vm_writev(pid_t pid, asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type, unsigned long idx1, unsigned long idx2); -asmlinkage long sys_finit_module(int fd, const char __user *uargs); +asmlinkage long sys_finit_module(int fd, const char __user *uargs, int flags); #endif diff --git a/include/uapi/linux/module.h b/include/uapi/linux/module.h new file mode 100644 index 000000000000..38da4258b12f --- /dev/null +++ b/include/uapi/linux/module.h @@ -0,0 +1,8 @@ +#ifndef _UAPI_LINUX_MODULE_H +#define _UAPI_LINUX_MODULE_H + +/* Flags for sys_finit_module: */ +#define MODULE_INIT_IGNORE_MODVERSIONS 1 +#define MODULE_INIT_IGNORE_VERMAGIC 2 + +#endif /* _UAPI_LINUX_MODULE_H */ diff --git a/kernel/module.c b/kernel/module.c index 6d2c4e4ca1f5..1395ca382fb5 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -60,6 +60,7 @@ #include #include #include +#include #include "module-internal.h" #define CREATE_TRACE_POINTS @@ -2553,7 +2554,7 @@ static void free_copy(struct load_info *info) vfree(info->hdr); } -static int rewrite_section_headers(struct load_info *info) +static int rewrite_section_headers(struct load_info *info, int flags) { unsigned int i; @@ -2581,7 +2582,10 @@ static int rewrite_section_headers(struct load_info *info) } /* Track but don't keep modinfo and version sections. */ - info->index.vers = find_sec(info, "__versions"); + if (flags & MODULE_INIT_IGNORE_MODVERSIONS) + info->index.vers = 0; /* Pretend no __versions section! */ + else + info->index.vers = find_sec(info, "__versions"); info->index.info = find_sec(info, ".modinfo"); info->sechdrs[info->index.info].sh_flags &= ~(unsigned long)SHF_ALLOC; info->sechdrs[info->index.vers].sh_flags &= ~(unsigned long)SHF_ALLOC; @@ -2596,7 +2600,7 @@ static int rewrite_section_headers(struct load_info *info) * Return the temporary module pointer (we'll replace it with the final * one when we move the module sections around). */ -static struct module *setup_load_info(struct load_info *info) +static struct module *setup_load_info(struct load_info *info, int flags) { unsigned int i; int err; @@ -2607,7 +2611,7 @@ static struct module *setup_load_info(struct load_info *info) info->secstrings = (void *)info->hdr + info->sechdrs[info->hdr->e_shstrndx].sh_offset; - err = rewrite_section_headers(info); + err = rewrite_section_headers(info, flags); if (err) return ERR_PTR(err); @@ -2645,11 +2649,14 @@ static struct module *setup_load_info(struct load_info *info) return mod; } -static int check_modinfo(struct module *mod, struct load_info *info) +static int check_modinfo(struct module *mod, struct load_info *info, int flags) { const char *modmagic = get_modinfo(info, "vermagic"); int err; + if (flags & MODULE_INIT_IGNORE_VERMAGIC) + modmagic = NULL; + /* This is allowed: modprobe --force will invalidate it. */ if (!modmagic) { err = try_to_force_load(mod, "bad vermagic"); @@ -2885,18 +2892,18 @@ int __weak module_frob_arch_sections(Elf_Ehdr *hdr, return 0; } -static struct module *layout_and_allocate(struct load_info *info) +static struct module *layout_and_allocate(struct load_info *info, int flags) { /* Module within temporary copy. */ struct module *mod; Elf_Shdr *pcpusec; int err; - mod = setup_load_info(info); + mod = setup_load_info(info, flags); if (IS_ERR(mod)) return mod; - err = check_modinfo(mod, info); + err = check_modinfo(mod, info, flags); if (err) return ERR_PTR(err); @@ -3078,7 +3085,8 @@ static int may_init_module(void) /* Allocate and load the module: note that size of section 0 is always zero, and we rely on this for optional sections. */ -static int load_module(struct load_info *info, const char __user *uargs) +static int load_module(struct load_info *info, const char __user *uargs, + int flags) { struct module *mod, *old; long err; @@ -3092,7 +3100,7 @@ static int load_module(struct load_info *info, const char __user *uargs) goto free_copy; /* Figure out module layout, and allocate all the memory. */ - mod = layout_and_allocate(info); + mod = layout_and_allocate(info, flags); if (IS_ERR(mod)) { err = PTR_ERR(mod); goto free_copy; @@ -3241,10 +3249,10 @@ SYSCALL_DEFINE3(init_module, void __user *, umod, if (err) return err; - return load_module(&info, uargs); + return load_module(&info, uargs, 0); } -SYSCALL_DEFINE2(finit_module, int, fd, const char __user *, uargs) +SYSCALL_DEFINE3(finit_module, int, fd, const char __user *, uargs, int, flags) { int err; struct load_info info = { }; @@ -3253,13 +3261,17 @@ SYSCALL_DEFINE2(finit_module, int, fd, const char __user *, uargs) if (err) return err; - pr_debug("finit_module: fd=%d, uargs=%p\n", fd, uargs); + pr_debug("finit_module: fd=%d, uargs=%p, flags=%i\n", fd, uargs, flags); + + if (flags & ~(MODULE_INIT_IGNORE_MODVERSIONS + |MODULE_INIT_IGNORE_VERMAGIC)) + return -EINVAL; err = copy_module_from_fd(fd, &info); if (err) return err; - return load_module(&info, uargs); + return load_module(&info, uargs, flags); } static inline int within(unsigned long addr, void *start, unsigned long size) -- cgit v1.2.3 From ccb1c31a7a8744cd153a7d92b726a56b56ad61d3 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Fri, 14 Dec 2012 22:09:51 +0000 Subject: bridge: add flags to distinguish permanent mdb entires This patch adds a flag to each mdb entry, so that we can distinguish permanent entries with temporary entries. Cc: Herbert Xu Cc: Stephen Hemminger Cc: "David S. Miller" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/uapi/linux/if_bridge.h | 3 +++ net/bridge/br_mdb.c | 9 ++++++--- net/bridge/br_multicast.c | 8 +++++--- net/bridge/br_private.h | 4 +++- 4 files changed, 17 insertions(+), 7 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index afbb18a0227c..5db297514aec 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -163,6 +163,9 @@ struct br_port_msg { struct br_mdb_entry { __u32 ifindex; +#define MDB_TEMPORARY 0 +#define MDB_PERMANENT 1 + __u8 state; struct { union { __be32 ip4; diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index 6f0a2eebcb27..9cf5d2b28c76 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -83,6 +83,7 @@ static int br_mdb_fill_info(struct sk_buff *skb, struct netlink_callback *cb, if (port) { struct br_mdb_entry e; e.ifindex = port->dev->ifindex; + e.state = p->state; e.addr.u.ip4 = p->addr.u.ip4; #if IS_ENABLED(CONFIG_IPV6) e.addr.u.ip6 = p->addr.u.ip6; @@ -253,6 +254,8 @@ static bool is_valid_mdb_entry(struct br_mdb_entry *entry) #endif } else return false; + if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY) + return false; return true; } @@ -310,7 +313,7 @@ static int br_mdb_parse(struct sk_buff *skb, struct nlmsghdr *nlh, } static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, - struct br_ip *group) + struct br_ip *group, unsigned char state) { struct net_bridge_mdb_entry *mp; struct net_bridge_port_group *p; @@ -336,7 +339,7 @@ static int br_mdb_add_group(struct net_bridge *br, struct net_bridge_port *port, break; } - p = br_multicast_new_port_group(port, group, *pp); + p = br_multicast_new_port_group(port, group, *pp, state); if (unlikely(!p)) return -ENOMEM; rcu_assign_pointer(*pp, p); @@ -373,7 +376,7 @@ static int __br_mdb_add(struct net *net, struct net_bridge *br, #endif spin_lock_bh(&br->multicast_lock); - ret = br_mdb_add_group(br, p, &ip); + ret = br_mdb_add_group(br, p, &ip, entry->state); spin_unlock_bh(&br->multicast_lock); return ret; } diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 2561af9d18a2..dce9defae3c6 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -279,7 +279,7 @@ static void br_multicast_port_group_expired(unsigned long data) spin_lock(&br->multicast_lock); if (!netif_running(br->dev) || timer_pending(&pg->timer) || - hlist_unhashed(&pg->mglist)) + hlist_unhashed(&pg->mglist) || pg->state & MDB_PERMANENT) goto out; br_multicast_del_pg(br, pg); @@ -622,7 +622,8 @@ out: struct net_bridge_port_group *br_multicast_new_port_group( struct net_bridge_port *port, struct br_ip *group, - struct net_bridge_port_group __rcu *next) + struct net_bridge_port_group __rcu *next, + unsigned char state) { struct net_bridge_port_group *p; @@ -632,6 +633,7 @@ struct net_bridge_port_group *br_multicast_new_port_group( p->addr = *group; p->port = port; + p->state = state; rcu_assign_pointer(p->next, next); hlist_add_head(&p->mglist, &port->mglist); setup_timer(&p->timer, br_multicast_port_group_expired, @@ -674,7 +676,7 @@ static int br_multicast_add_group(struct net_bridge *br, break; } - p = br_multicast_new_port_group(port, group, *pp); + p = br_multicast_new_port_group(port, group, *pp, MDB_TEMPORARY); if (unlikely(!p)) goto err; rcu_assign_pointer(*pp, p); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index f21a739a6186..49b85af44016 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -83,6 +83,7 @@ struct net_bridge_port_group { struct rcu_head rcu; struct timer_list timer; struct br_ip addr; + unsigned char state; }; struct net_bridge_mdb_entry @@ -443,7 +444,8 @@ extern void br_multicast_free_pg(struct rcu_head *head); extern struct net_bridge_port_group *br_multicast_new_port_group( struct net_bridge_port *port, struct br_ip *group, - struct net_bridge_port_group *next); + struct net_bridge_port_group *next, + unsigned char state); extern void br_mdb_init(void); extern void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, struct br_ip *group, int type); -- cgit v1.2.3 From 992fb6e170639b0849bace8e49bf31bd37c4123c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 17 Dec 2012 16:03:07 -0800 Subject: ptrace: introduce PTRACE_O_EXITKILL Ptrace jailers want to be sure that the tracee can never escape from the control. However if the tracer dies unexpectedly the tracee continues to run in potentially unsafe mode. Add the new ptrace option PTRACE_O_EXITKILL. If the tracer exits it sends SIGKILL to every tracee which has this bit set. Note that the new option is not equal to the last-option << 1. Because currently all options have an event, and the new one starts the eventless group. It uses the random 20 bit, so we have the room for 12 more events, but we can also add the new eventless options below this one. Suggested by Amnon Shiloh. Signed-off-by: Oleg Nesterov Tested-by: Amnon Shiloh Cc: Denys Vlasenko Cc: Michael Kerrisk Cc: Serge Hallyn Cc: Chris Evans Cc: David Howells Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ptrace.h | 2 ++ include/uapi/linux/ptrace.h | 5 ++++- kernel/ptrace.c | 3 +++ 3 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index a89ff04bddd9..addfbe7c180e 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -32,6 +32,8 @@ #define PT_TRACE_EXIT PT_EVENT_FLAG(PTRACE_EVENT_EXIT) #define PT_TRACE_SECCOMP PT_EVENT_FLAG(PTRACE_EVENT_SECCOMP) +#define PT_EXITKILL (PTRACE_O_EXITKILL << PT_OPT_FLAG_SHIFT) + /* single stepping state bits (used on ARM and PA-RISC) */ #define PT_SINGLESTEP_BIT 31 #define PT_SINGLESTEP (1< diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 1f5e55dda955..ec8118ab2a47 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -457,6 +457,9 @@ void exit_ptrace(struct task_struct *tracer) return; list_for_each_entry_safe(p, n, &tracer->ptraced, ptrace_entry) { + if (unlikely(p->ptrace & PT_EXITKILL)) + send_sig_info(SIGKILL, SEND_SIG_FORCED, p); + if (__ptrace_detach(tracer, p)) list_add(&p->ptrace_entry, &ptrace_dead); } -- cgit v1.2.3 From 1b6370463e88b0c1c317de16d7b962acc1dab4f2 Mon Sep 17 00:00:00 2001 From: Sjur Brændeland Date: Fri, 14 Dec 2012 14:40:51 +1030 Subject: virtio_console: Add support for remoteproc serial MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a simple serial connection driver called VIRTIO_ID_RPROC_SERIAL (11) for communicating with a remote processor in an asymmetric multi-processing configuration. This implementation reuses the existing virtio_console implementation, and adds support for DMA allocation of data buffers and disables use of tty console and the virtio control queue. Signed-off-by: Sjur Brændeland Acked-by: Amit Shah Signed-off-by: Rusty Russell --- drivers/char/virtio_console.c | 192 +++++++++++++++++++++++++++++++++++----- include/uapi/linux/virtio_ids.h | 1 + 2 files changed, 170 insertions(+), 23 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 548224686963..55a89a4ae42f 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -37,8 +37,12 @@ #include #include #include +#include +#include #include "../tty/hvc/hvc_console.h" +#define is_rproc_enabled IS_ENABLED(CONFIG_REMOTEPROC) + /* * This is a global struct for storing common data for all the devices * this driver handles. @@ -112,6 +116,15 @@ struct port_buffer { /* offset in the buf from which to consume data */ size_t offset; + /* DMA address of buffer */ + dma_addr_t dma; + + /* Device we got DMA memory from */ + struct device *dev; + + /* List of pending dma buffers to free */ + struct list_head list; + /* If sgpages == 0 then buf is used */ unsigned int sgpages; @@ -331,6 +344,11 @@ static bool is_console_port(struct port *port) return false; } +static bool is_rproc_serial(const struct virtio_device *vdev) +{ + return is_rproc_enabled && vdev->id.device == VIRTIO_ID_RPROC_SERIAL; +} + static inline bool use_multiport(struct ports_device *portdev) { /* @@ -342,11 +360,13 @@ static inline bool use_multiport(struct ports_device *portdev) return portdev->vdev->features[0] & (1 << VIRTIO_CONSOLE_F_MULTIPORT); } -static void free_buf(struct port_buffer *buf) +static DEFINE_SPINLOCK(dma_bufs_lock); +static LIST_HEAD(pending_free_dma_bufs); + +static void free_buf(struct port_buffer *buf, bool can_sleep) { unsigned int i; - kfree(buf->buf); for (i = 0; i < buf->sgpages; i++) { struct page *page = sg_page(&buf->sg[i]); if (!page) @@ -354,14 +374,57 @@ static void free_buf(struct port_buffer *buf) put_page(page); } + if (!buf->dev) { + kfree(buf->buf); + } else if (is_rproc_enabled) { + unsigned long flags; + + /* dma_free_coherent requires interrupts to be enabled. */ + if (!can_sleep) { + /* queue up dma-buffers to be freed later */ + spin_lock_irqsave(&dma_bufs_lock, flags); + list_add_tail(&buf->list, &pending_free_dma_bufs); + spin_unlock_irqrestore(&dma_bufs_lock, flags); + return; + } + dma_free_coherent(buf->dev, buf->size, buf->buf, buf->dma); + + /* Release device refcnt and allow it to be freed */ + put_device(buf->dev); + } + kfree(buf); } +static void reclaim_dma_bufs(void) +{ + unsigned long flags; + struct port_buffer *buf, *tmp; + LIST_HEAD(tmp_list); + + if (list_empty(&pending_free_dma_bufs)) + return; + + /* Create a copy of the pending_free_dma_bufs while holding the lock */ + spin_lock_irqsave(&dma_bufs_lock, flags); + list_cut_position(&tmp_list, &pending_free_dma_bufs, + pending_free_dma_bufs.prev); + spin_unlock_irqrestore(&dma_bufs_lock, flags); + + /* Release the dma buffers, without irqs enabled */ + list_for_each_entry_safe(buf, tmp, &tmp_list, list) { + list_del(&buf->list); + free_buf(buf, true); + } +} + static struct port_buffer *alloc_buf(struct virtqueue *vq, size_t buf_size, int pages) { struct port_buffer *buf; + reclaim_dma_bufs(); + /* * Allocate buffer and the sg list. The sg list array is allocated * directly after the port_buffer struct. @@ -373,11 +436,34 @@ static struct port_buffer *alloc_buf(struct virtqueue *vq, size_t buf_size, buf->sgpages = pages; if (pages > 0) { + buf->dev = NULL; buf->buf = NULL; return buf; } - buf->buf = kmalloc(buf_size, GFP_KERNEL); + if (is_rproc_serial(vq->vdev)) { + /* + * Allocate DMA memory from ancestor. When a virtio + * device is created by remoteproc, the DMA memory is + * associated with the grandparent device: + * vdev => rproc => platform-dev. + * The code here would have been less quirky if + * DMA_MEMORY_INCLUDES_CHILDREN had been supported + * in dma-coherent.c + */ + if (!vq->vdev->dev.parent || !vq->vdev->dev.parent->parent) + goto free_buf; + buf->dev = vq->vdev->dev.parent->parent; + + /* Increase device refcnt to avoid freeing it */ + get_device(buf->dev); + buf->buf = dma_alloc_coherent(buf->dev, buf_size, &buf->dma, + GFP_KERNEL); + } else { + buf->dev = NULL; + buf->buf = kmalloc(buf_size, GFP_KERNEL); + } + if (!buf->buf) goto free_buf; buf->len = 0; @@ -446,7 +532,7 @@ static void discard_port_data(struct port *port) port->stats.bytes_discarded += buf->len - buf->offset; if (add_inbuf(port->in_vq, buf) < 0) { err++; - free_buf(buf); + free_buf(buf, false); } port->inbuf = NULL; buf = get_inbuf(port); @@ -518,7 +604,7 @@ static void reclaim_consumed_buffers(struct port *port) return; } while ((buf = virtqueue_get_buf(port->out_vq, &len))) { - free_buf(buf); + free_buf(buf, false); port->outvq_full = false; } } @@ -765,7 +851,7 @@ static ssize_t port_fops_write(struct file *filp, const char __user *ubuf, goto out; free_buf: - free_buf(buf); + free_buf(buf, true); out: return ret; } @@ -839,6 +925,15 @@ static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe, .u.data = &sgl, }; + /* + * Rproc_serial does not yet support splice. To support splice + * pipe_to_sg() must allocate dma-buffers and copy content from + * regular pages to dma pages. And alloc_buf and free_buf must + * support allocating and freeing such a list of dma-buffers. + */ + if (is_rproc_serial(port->out_vq->vdev)) + return -EINVAL; + ret = wait_port_writable(port, filp->f_flags & O_NONBLOCK); if (ret < 0) return ret; @@ -857,7 +952,7 @@ static ssize_t port_fops_splice_write(struct pipe_inode_info *pipe, ret = __send_to_port(port, buf->sg, sgl.n, sgl.len, buf, true); if (unlikely(ret <= 0)) - kfree(sgl.sg); + free_buf(buf, true); return ret; } @@ -906,6 +1001,7 @@ static int port_fops_release(struct inode *inode, struct file *filp) reclaim_consumed_buffers(port); spin_unlock_irq(&port->outvq_lock); + reclaim_dma_bufs(); /* * Locks aren't necessary here as a port can't be opened after * unplug, and if a port isn't unplugged, a kref would already @@ -1057,7 +1153,10 @@ static void resize_console(struct port *port) return; vdev = port->portdev->vdev; - if (virtio_has_feature(vdev, VIRTIO_CONSOLE_F_SIZE)) + + /* Don't test F_SIZE at all if we're rproc: not a valid feature! */ + if (!is_rproc_serial(vdev) && + virtio_has_feature(vdev, VIRTIO_CONSOLE_F_SIZE)) hvc_resize(port->cons.hvc, port->cons.ws); } @@ -1249,7 +1348,7 @@ static unsigned int fill_queue(struct virtqueue *vq, spinlock_t *lock) ret = add_inbuf(vq, buf); if (ret < 0) { spin_unlock_irq(lock); - free_buf(buf); + free_buf(buf, true); break; } nr_added_bufs++; @@ -1337,10 +1436,18 @@ static int add_port(struct ports_device *portdev, u32 id) goto free_device; } - /* - * If we're not using multiport support, this has to be a console port - */ - if (!use_multiport(port->portdev)) { + if (is_rproc_serial(port->portdev->vdev)) + /* + * For rproc_serial assume remote processor is connected. + * rproc_serial does not want the console port, only + * the generic port implementation. + */ + port->host_connected = true; + else if (!use_multiport(port->portdev)) { + /* + * If we're not using multiport support, + * this has to be a console port. + */ err = init_port_console(port); if (err) goto free_inbufs; @@ -1373,7 +1480,7 @@ static int add_port(struct ports_device *portdev, u32 id) free_inbufs: while ((buf = virtqueue_detach_unused_buf(port->in_vq))) - free_buf(buf); + free_buf(buf, true); free_device: device_destroy(pdrvdata.class, port->dev->devt); free_cdev: @@ -1415,11 +1522,11 @@ static void remove_port_data(struct port *port) /* Remove buffers we queued up for the Host to send us data in. */ while ((buf = virtqueue_detach_unused_buf(port->in_vq))) - free_buf(buf); + free_buf(buf, true); /* Free pending buffers from the out-queue. */ while ((buf = virtqueue_detach_unused_buf(port->out_vq))) - free_buf(buf); + free_buf(buf, true); } /* @@ -1621,7 +1728,7 @@ static void control_work_handler(struct work_struct *work) if (add_inbuf(portdev->c_ivq, buf) < 0) { dev_warn(&portdev->vdev->dev, "Error adding buffer to queue\n"); - free_buf(buf); + free_buf(buf, false); } } spin_unlock(&portdev->cvq_lock); @@ -1817,10 +1924,10 @@ static void remove_controlq_data(struct ports_device *portdev) return; while ((buf = virtqueue_get_buf(portdev->c_ivq, &len))) - free_buf(buf); + free_buf(buf, true); while ((buf = virtqueue_detach_unused_buf(portdev->c_ivq))) - free_buf(buf); + free_buf(buf, true); } /* @@ -1867,11 +1974,15 @@ static int __devinit virtcons_probe(struct virtio_device *vdev) multiport = false; portdev->config.max_nr_ports = 1; - if (virtio_config_val(vdev, VIRTIO_CONSOLE_F_MULTIPORT, - offsetof(struct virtio_console_config, - max_nr_ports), - &portdev->config.max_nr_ports) == 0) + + /* Don't test MULTIPORT at all if we're rproc: not a valid feature! */ + if (!is_rproc_serial(vdev) && + virtio_config_val(vdev, VIRTIO_CONSOLE_F_MULTIPORT, + offsetof(struct virtio_console_config, + max_nr_ports), + &portdev->config.max_nr_ports) == 0) { multiport = true; + } err = init_vqs(portdev); if (err < 0) { @@ -1981,6 +2092,16 @@ static unsigned int features[] = { VIRTIO_CONSOLE_F_MULTIPORT, }; +static struct virtio_device_id rproc_serial_id_table[] = { +#if IS_ENABLED(CONFIG_REMOTEPROC) + { VIRTIO_ID_RPROC_SERIAL, VIRTIO_DEV_ANY_ID }, +#endif + { 0 }, +}; + +static unsigned int rproc_serial_features[] = { +}; + #ifdef CONFIG_PM static int virtcons_freeze(struct virtio_device *vdev) { @@ -2065,6 +2186,20 @@ static struct virtio_driver virtio_console = { #endif }; +/* + * virtio_rproc_serial refers to __devinit function which causes + * section mismatch warnings. So use __refdata to silence warnings. + */ +static struct virtio_driver __refdata virtio_rproc_serial = { + .feature_table = rproc_serial_features, + .feature_table_size = ARRAY_SIZE(rproc_serial_features), + .driver.name = "virtio_rproc_serial", + .driver.owner = THIS_MODULE, + .id_table = rproc_serial_id_table, + .probe = virtcons_probe, + .remove = virtcons_remove, +}; + static int __init init(void) { int err; @@ -2089,7 +2224,15 @@ static int __init init(void) pr_err("Error %d registering virtio driver\n", err); goto free; } + err = register_virtio_driver(&virtio_rproc_serial); + if (err < 0) { + pr_err("Error %d registering virtio rproc serial driver\n", + err); + goto unregister; + } return 0; +unregister: + unregister_virtio_driver(&virtio_console); free: if (pdrvdata.debugfs_dir) debugfs_remove_recursive(pdrvdata.debugfs_dir); @@ -2099,7 +2242,10 @@ free: static void __exit fini(void) { + reclaim_dma_bufs(); + unregister_virtio_driver(&virtio_console); + unregister_virtio_driver(&virtio_rproc_serial); class_destroy(pdrvdata.class); if (pdrvdata.debugfs_dir) diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h index 270fb22c5811..a7630d04029f 100644 --- a/include/uapi/linux/virtio_ids.h +++ b/include/uapi/linux/virtio_ids.h @@ -37,5 +37,6 @@ #define VIRTIO_ID_RPMSG 7 /* virtio remote processor messaging */ #define VIRTIO_ID_SCSI 8 /* virtio scsi */ #define VIRTIO_ID_9P 9 /* 9p virtio console */ +#define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */ #endif /* _LINUX_VIRTIO_IDS_H */ -- cgit v1.2.3 From 031b6566983ad9c0247087f039af22b3f87596a3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 18 Nov 2012 15:13:17 -0500 Subject: unify SS_ONSTACK/SS_DISABLE definitions Signed-off-by: Al Viro --- arch/alpha/include/asm/signal.h | 6 ------ arch/arm/include/uapi/asm/signal.h | 7 ------- arch/avr32/include/uapi/asm/signal.h | 6 ------ arch/cris/include/asm/signal.h | 6 ------ arch/h8300/include/asm/signal.h | 6 ------ arch/ia64/include/uapi/asm/signal.h | 6 ------ arch/m32r/include/asm/signal.h | 6 ------ arch/m68k/include/uapi/asm/signal.h | 6 ------ arch/mips/include/uapi/asm/signal.h | 6 ------ arch/mn10300/include/uapi/asm/signal.h | 6 ------ arch/parisc/include/uapi/asm/signal.h | 6 ------ arch/powerpc/include/uapi/asm/signal.h | 6 ------ arch/s390/include/uapi/asm/signal.h | 6 ------ arch/sparc/include/uapi/asm/signal.h | 6 ------ arch/x86/include/asm/signal.h | 6 ------ arch/xtensa/include/uapi/asm/signal.h | 6 ------ include/uapi/asm-generic/signal.h | 6 ------ include/uapi/linux/signal.h | 2 ++ 18 files changed, 2 insertions(+), 103 deletions(-) (limited to 'include/uapi/linux') diff --git a/arch/alpha/include/asm/signal.h b/arch/alpha/include/asm/signal.h index 45552862cc10..a9aa0133f656 100644 --- a/arch/alpha/include/asm/signal.h +++ b/arch/alpha/include/asm/signal.h @@ -98,12 +98,6 @@ typedef unsigned long sigset_t; #define SA_ONESHOT SA_RESETHAND #define SA_NOMASK SA_NODEFER -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 4096 #define SIGSTKSZ 16384 diff --git a/arch/arm/include/uapi/asm/signal.h b/arch/arm/include/uapi/asm/signal.h index 921c57fdc52e..33073bdcf091 100644 --- a/arch/arm/include/uapi/asm/signal.h +++ b/arch/arm/include/uapi/asm/signal.h @@ -87,13 +87,6 @@ typedef unsigned long sigset_t; #define SA_NOMASK SA_NODEFER #define SA_ONESHOT SA_RESETHAND - -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/avr32/include/uapi/asm/signal.h b/arch/avr32/include/uapi/asm/signal.h index eb46f61adb7d..1b77a93eff50 100644 --- a/arch/avr32/include/uapi/asm/signal.h +++ b/arch/avr32/include/uapi/asm/signal.h @@ -89,12 +89,6 @@ typedef unsigned long sigset_t; #define SA_NOMASK SA_NODEFER #define SA_ONESHOT SA_RESETHAND -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/cris/include/asm/signal.h b/arch/cris/include/asm/signal.h index 72dbbf59dfae..fdbffb773d81 100644 --- a/arch/cris/include/asm/signal.h +++ b/arch/cris/include/asm/signal.h @@ -97,12 +97,6 @@ typedef unsigned long sigset_t; #define SA_RESTORER 0x04000000 -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/h8300/include/asm/signal.h b/arch/h8300/include/asm/signal.h index c43c0a7d2c2e..575c79a008f4 100644 --- a/arch/h8300/include/asm/signal.h +++ b/arch/h8300/include/asm/signal.h @@ -96,12 +96,6 @@ typedef unsigned long sigset_t; #define SA_RESTORER 0x04000000 -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/ia64/include/uapi/asm/signal.h b/arch/ia64/include/uapi/asm/signal.h index e531c424434c..c0ea2855e96b 100644 --- a/arch/ia64/include/uapi/asm/signal.h +++ b/arch/ia64/include/uapi/asm/signal.h @@ -78,12 +78,6 @@ #define SA_RESTORER 0x04000000 -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - /* * The minimum stack size needs to be fairly large because we want to * be sure that an app compiled for today's CPUs will continue to run diff --git a/arch/m32r/include/asm/signal.h b/arch/m32r/include/asm/signal.h index e4d2e2ad5f1e..ba714c45259d 100644 --- a/arch/m32r/include/asm/signal.h +++ b/arch/m32r/include/asm/signal.h @@ -98,12 +98,6 @@ typedef unsigned long sigset_t; #define SA_RESTORER 0x04000000 -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/m68k/include/uapi/asm/signal.h b/arch/m68k/include/uapi/asm/signal.h index 2b450f311bd9..cba6f858bb46 100644 --- a/arch/m68k/include/uapi/asm/signal.h +++ b/arch/m68k/include/uapi/asm/signal.h @@ -80,12 +80,6 @@ typedef unsigned long sigset_t; #define SA_NOMASK SA_NODEFER #define SA_ONESHOT SA_RESETHAND -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/mips/include/uapi/asm/signal.h b/arch/mips/include/uapi/asm/signal.h index 3f1237c6c80e..770732cb8d03 100644 --- a/arch/mips/include/uapi/asm/signal.h +++ b/arch/mips/include/uapi/asm/signal.h @@ -86,12 +86,6 @@ typedef unsigned long old_sigset_t; /* at least 32 bits */ #define SA_RESTORER 0x04000000 /* Only for o32 */ -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/mn10300/include/uapi/asm/signal.h b/arch/mn10300/include/uapi/asm/signal.h index 08dcd6a85618..f423a08d7eeb 100644 --- a/arch/mn10300/include/uapi/asm/signal.h +++ b/arch/mn10300/include/uapi/asm/signal.h @@ -92,12 +92,6 @@ typedef unsigned long sigset_t; #define SA_RESTORER 0x04000000 -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/parisc/include/uapi/asm/signal.h b/arch/parisc/include/uapi/asm/signal.h index b1ddaa243376..a2fa297196bc 100644 --- a/arch/parisc/include/uapi/asm/signal.h +++ b/arch/parisc/include/uapi/asm/signal.h @@ -71,12 +71,6 @@ #define SA_RESTORER 0x04000000 /* obsolete -- ignored */ -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/powerpc/include/uapi/asm/signal.h b/arch/powerpc/include/uapi/asm/signal.h index 48fa8d3f2f9a..e079fb39d5bc 100644 --- a/arch/powerpc/include/uapi/asm/signal.h +++ b/arch/powerpc/include/uapi/asm/signal.h @@ -85,12 +85,6 @@ typedef struct { #define SA_RESTORER 0x04000000U -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/s390/include/uapi/asm/signal.h b/arch/s390/include/uapi/asm/signal.h index 8c6a49e392ee..2f43cfbf5f1a 100644 --- a/arch/s390/include/uapi/asm/signal.h +++ b/arch/s390/include/uapi/asm/signal.h @@ -90,12 +90,6 @@ typedef unsigned long sigset_t; #define SA_RESTORER 0x04000000 -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/sparc/include/uapi/asm/signal.h b/arch/sparc/include/uapi/asm/signal.h index 1a041892538f..c4ffd6c97106 100644 --- a/arch/sparc/include/uapi/asm/signal.h +++ b/arch/sparc/include/uapi/asm/signal.h @@ -147,12 +147,6 @@ struct sigstack { #define SIG_UNBLOCK 0x02 /* for unblocking signals */ #define SIG_SETMASK 0x04 /* for setting the signal mask */ -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 4096 #define SIGSTKSZ 16384 diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 0dba8b7a6ac7..7bb5490b3aa2 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -112,12 +112,6 @@ typedef unsigned long sigset_t; #define SA_RESTORER 0x04000000 -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/arch/xtensa/include/uapi/asm/signal.h b/arch/xtensa/include/uapi/asm/signal.h index b88ce96f2af9..dacf716dd3e0 100644 --- a/arch/xtensa/include/uapi/asm/signal.h +++ b/arch/xtensa/include/uapi/asm/signal.h @@ -97,12 +97,6 @@ typedef struct { #define SA_RESTORER 0x04000000 -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/include/uapi/asm-generic/signal.h b/include/uapi/asm-generic/signal.h index 0a78028984de..6fae30fd16ab 100644 --- a/include/uapi/asm-generic/signal.h +++ b/include/uapi/asm-generic/signal.h @@ -80,12 +80,6 @@ * SA_RESTORER 0x04000000 */ -/* - * sigaltstack controls - */ -#define SS_ONSTACK 1 -#define SS_DISABLE 2 - #define MINSIGSTKSZ 2048 #define SIGSTKSZ 8192 diff --git a/include/uapi/linux/signal.h b/include/uapi/linux/signal.h index dff452ed6d00..e1bd50c29ded 100644 --- a/include/uapi/linux/signal.h +++ b/include/uapi/linux/signal.h @@ -4,5 +4,7 @@ #include #include +#define SS_ONSTACK 1 +#define SS_DISABLE 2 #endif /* _UAPI_LINUX_SIGNAL_H */ -- cgit v1.2.3 From 5023e5cf58e1dae904e2e8b5b9779c33512b75a1 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 21 Dec 2012 20:23:36 +0000 Subject: dm ioctl: remove PF_MEMALLOC When allocating memory for the userspace ioctl data, set some appropriate GPF flags directly instead of using PF_MEMALLOC. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-ioctl.c | 13 ++++--------- include/uapi/linux/dm-ioctl.h | 4 ++-- 2 files changed, 6 insertions(+), 11 deletions(-) (limited to 'include/uapi/linux') diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index a651d528f80d..a37aeba7dc1b 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1556,7 +1556,10 @@ static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param) secure_data = tmp.flags & DM_SECURE_DATA_FLAG; - dmi = vmalloc(tmp.data_size); + /* + * Try to avoid low memory issues when a device is suspended. + */ + dmi = __vmalloc(tmp.data_size, GFP_NOIO | __GFP_REPEAT | __GFP_HIGH, PAGE_KERNEL); if (!dmi) { if (secure_data && clear_user(user, tmp.data_size)) return -EFAULT; @@ -1656,19 +1659,11 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user) return -ENOTTY; } - /* - * Trying to avoid low memory issues when a device is - * suspended. - */ - current->flags |= PF_MEMALLOC; - /* * Copy the parameters into kernel space. */ r = copy_params(user, ¶m); - current->flags &= ~PF_MEMALLOC; - if (r) return r; diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index 91e3a360f611..539b179b349c 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -268,8 +268,8 @@ enum { #define DM_VERSION_MAJOR 4 #define DM_VERSION_MINOR 23 -#define DM_VERSION_PATCHLEVEL 0 -#define DM_VERSION_EXTRA "-ioctl (2012-07-25)" +#define DM_VERSION_PATCHLEVEL 1 +#define DM_VERSION_EXTRA "-ioctl (2012-12-18)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ -- cgit v1.2.3 From 130f1b8f35f14d27c43da755f3c9226318c17f57 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 26 Dec 2012 10:39:23 -0700 Subject: PCI: Add PCIe Link Capability link speed and width names Add standard #defines for the Supported Link Speeds field in the PCIe Link Capabilities register. Note that prior to PCIe spec r3.0, these encodings were defined: 0001b 2.5GT/s Link speed supported 0010b 5.0GT/s and 2.5GT/s Link speed supported Starting with spec r3.0, these encodings refer to bits 0 and 1 in the Supported Link Speeds Vector in the Link Capabilities 2 register, and bits 0 and 1 there mean 2.5 GT/s and 5.0 GT/s, respectively. Therefore, code that followed r2.0 and interpreted 0x1 as 2.5GT/s and 0x2 as 5.0GT/s will continue to work, and we can identify a device using the new encodings because it will have a non-zero Link Capabilities 2 register. Signed-off-by: Bjorn Helgaas --- include/uapi/linux/pci_regs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 6b7b6f1e2fd6..ebfadc56d1b4 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -458,6 +458,8 @@ #define PCI_EXP_DEVSTA_TRPND 0x20 /* Transactions Pending */ #define PCI_EXP_LNKCAP 12 /* Link Capabilities */ #define PCI_EXP_LNKCAP_SLS 0x0000000f /* Supported Link Speeds */ +#define PCI_EXP_LNKCAP_SLS_2_5GB 0x1 /* LNKCAP2 SLS Vector bit 0 (2.5GT/s) */ +#define PCI_EXP_LNKCAP_SLS_5_0GB 0x2 /* LNKCAP2 SLS Vector bit 1 (5.0GT/s) */ #define PCI_EXP_LNKCAP_MLW 0x000003f0 /* Maximum Link Width */ #define PCI_EXP_LNKCAP_ASPMS 0x00000c00 /* ASPM Support */ #define PCI_EXP_LNKCAP_L0SEL 0x00007000 /* L0s Exit Latency */ -- cgit v1.2.3 From 4a674f34ba04a002244edaf891b5da7fc1473ae8 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Fri, 4 Jan 2013 15:34:55 -0800 Subject: ipc: introduce message queue copy feature This patch is required for checkpoint/restore in userspace. c/r requires some way to get all pending IPC messages without deleting them from the queue (checkpoint can fail and in this case tasks will be resumed, so queue have to be valid). To achive this, new operation flag MSG_COPY for sys_msgrcv() system call was introduced. If this flag was specified, then mtype is interpreted as number of the message to copy. If MSG_COPY is set, then kernel will allocate dummy message with passed size, and then use new copy_msg() helper function to copy desired message (instead of unlinking it from the queue). Notes: 1) Return -ENOSYS if MSG_COPY is specified, but CONFIG_CHECKPOINT_RESTORE is not set. Signed-off-by: Stanislav Kinsbursky Cc: Serge Hallyn Cc: "Eric W. Biederman" Cc: Pavel Emelyanov Cc: Al Viro Cc: KOSAKI Motohiro Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/msg.h | 1 + ipc/msg.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++-- ipc/msgutil.c | 38 ++++++++++++++++++++++++++++ ipc/util.h | 1 + 4 files changed, 102 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/msg.h b/include/uapi/linux/msg.h index 78dbd2f996a3..22d95c6854e0 100644 --- a/include/uapi/linux/msg.h +++ b/include/uapi/linux/msg.h @@ -10,6 +10,7 @@ /* msgrcv options */ #define MSG_NOERROR 010000 /* no error if message is too big */ #define MSG_EXCEPT 020000 /* recv any msg except of specified type.*/ +#define MSG_COPY 040000 /* copy (not remove) all queue messages */ /* Obsolete, used only for backwards compatibility and libc5 compiles */ struct msqid_ds { diff --git a/ipc/msg.c b/ipc/msg.c index cefc24f46e3e..d20ffc7d3f24 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -769,6 +769,45 @@ static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz) return msgsz; } +#ifdef CONFIG_CHECKPOINT_RESTORE +static inline struct msg_msg *fill_copy(unsigned long copy_nr, + unsigned long msg_nr, + struct msg_msg *msg, + struct msg_msg *copy) +{ + if (copy_nr == msg_nr) + return copy_msg(msg, copy); + return NULL; +} + +static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz, + int msgflg, long *msgtyp, + unsigned long *copy_number) +{ + struct msg_msg *copy; + + *copy_number = *msgtyp; + *msgtyp = 0; + /* + * Create dummy message to copy real message to. + */ + copy = load_msg(buf, bufsz); + if (!IS_ERR(copy)) + copy->m_ts = bufsz; + return copy; +} + +static inline void free_copy(int msgflg, struct msg_msg *copy) +{ + if (msgflg & MSG_COPY) + free_msg(copy); +} +#else +#define free_copy(msgflg, copy) do {} while (0) +#define prepare_copy(buf, sz, msgflg, msgtyp, copy_nr) ERR_PTR(-ENOSYS) +#define fill_copy(copy_nr, msg_nr, msg, copy) NULL +#endif + long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, int msgflg, long (*msg_handler)(void __user *, struct msg_msg *, size_t)) @@ -777,19 +816,29 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, struct msg_msg *msg; int mode; struct ipc_namespace *ns; + struct msg_msg *copy; + unsigned long __maybe_unused copy_number; if (msqid < 0 || (long) bufsz < 0) return -EINVAL; + if (msgflg & MSG_COPY) { + copy = prepare_copy(buf, bufsz, msgflg, &msgtyp, ©_number); + if (IS_ERR(copy)) + return PTR_ERR(copy); + } mode = convert_mode(&msgtyp, msgflg); ns = current->nsproxy->ipc_ns; msq = msg_lock_check(ns, msqid); - if (IS_ERR(msq)) + if (IS_ERR(msq)) { + free_copy(msgflg, copy); return PTR_ERR(msq); + } for (;;) { struct msg_receiver msr_d; struct list_head *tmp; + long msg_counter = 0; msg = ERR_PTR(-EACCES); if (ipcperms(ns, &msq->q_perm, S_IRUGO)) @@ -809,8 +858,15 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, if (mode == SEARCH_LESSEQUAL && walk_msg->m_type != 1) { msgtyp = walk_msg->m_type - 1; + } else if (msgflg & MSG_COPY) { + msg = fill_copy(copy_number, + msg_counter, + walk_msg, copy); + if (msg) + break; } else break; + msg_counter++; } tmp = tmp->next; } @@ -823,6 +879,8 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp, msg = ERR_PTR(-E2BIG); goto out_unlock; } + if (msgflg & MSG_COPY) + goto out_unlock; list_del(&msg->m_list); msq->q_qnum--; msq->q_rtime = get_seconds(); @@ -906,8 +964,10 @@ out_unlock: break; } } - if (IS_ERR(msg)) + if (IS_ERR(msg)) { + free_copy(msgflg, copy); return PTR_ERR(msg); + } bufsz = msg_handler(buf, msg, bufsz); free_msg(msg); diff --git a/ipc/msgutil.c b/ipc/msgutil.c index 6471f1bdae96..7eecdad40efc 100644 --- a/ipc/msgutil.c +++ b/ipc/msgutil.c @@ -102,7 +102,45 @@ out_err: free_msg(msg); return ERR_PTR(err); } +#ifdef CONFIG_CHECKPOINT_RESTORE +struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst) +{ + struct msg_msgseg *dst_pseg, *src_pseg; + int len = src->m_ts; + int alen; + + BUG_ON(dst == NULL); + if (src->m_ts > dst->m_ts) + return ERR_PTR(-EINVAL); + + alen = len; + if (alen > DATALEN_MSG) + alen = DATALEN_MSG; + + dst->next = NULL; + dst->security = NULL; + memcpy(dst + 1, src + 1, alen); + + len -= alen; + dst_pseg = dst->next; + src_pseg = src->next; + while (len > 0) { + alen = len; + if (alen > DATALEN_SEG) + alen = DATALEN_SEG; + memcpy(dst_pseg + 1, src_pseg + 1, alen); + dst_pseg = dst_pseg->next; + len -= alen; + src_pseg = src_pseg->next; + } + + dst->m_type = src->m_type; + dst->m_ts = src->m_ts; + + return dst; +} +#endif int store_msg(void __user *dest, struct msg_msg *msg, int len) { int alen; diff --git a/ipc/util.h b/ipc/util.h index a61e0ca2bffd..eeb79a1fbd83 100644 --- a/ipc/util.h +++ b/ipc/util.h @@ -140,6 +140,7 @@ int ipc_parse_version (int *cmd); extern void free_msg(struct msg_msg *msg); extern struct msg_msg *load_msg(const void __user *src, int len); +extern struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst); extern int store_msg(void __user *dest, struct msg_msg *msg, int len); extern void recompute_msgmni(struct ipc_namespace *); -- cgit v1.2.3 From 7b9205bd775afc4439ed86d617f9042ee9e76a71 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 11 Jan 2013 14:32:05 -0800 Subject: audit: create explicit AUDIT_SECCOMP event type The seccomp path was using AUDIT_ANOM_ABEND from when seccomp mode 1 could only kill a process. While we still want to make sure an audit record is forced on a kill, this should use a separate record type since seccomp mode 2 introduces other behaviors. In the case of "handled" behaviors (process wasn't killed), only emit a record if the process is under inspection. This change also fixes userspace examination of seccomp audit events, since it was considered malformed due to missing fields of the AUDIT_ANOM_ABEND event type. Signed-off-by: Kees Cook Cc: Al Viro Cc: Eric Paris Cc: Jeff Layton Cc: "Eric W. Biederman" Cc: Julien Tinnes Acked-by: Will Drewry Acked-by: Steve Grubb Cc: Andrea Arcangeli Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/audit.h | 3 ++- include/uapi/linux/audit.h | 1 + kernel/auditsc.c | 14 +++++++++++--- 3 files changed, 14 insertions(+), 4 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index bce729afbcf9..9d5104d7aba9 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -157,7 +157,8 @@ void audit_core_dumps(long signr); static inline void audit_seccomp(unsigned long syscall, long signr, int code) { - if (unlikely(!audit_dummy_context())) + /* Force a record to be reported if a signal was delivered. */ + if (signr || unlikely(!audit_dummy_context())) __audit_seccomp(syscall, signr, code); } diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 76352ac45f24..09a2d94ab113 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -106,6 +106,7 @@ #define AUDIT_MMAP 1323 /* Record showing descriptor and flags in mmap */ #define AUDIT_NETFILTER_PKT 1324 /* Packets traversing netfilter chains */ #define AUDIT_NETFILTER_CFG 1325 /* Netfilter chain modifications */ +#define AUDIT_SECCOMP 1326 /* Secure Computing event */ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ diff --git a/kernel/auditsc.c b/kernel/auditsc.c index e37e6a12c5e3..3e46d1dec613 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2675,7 +2675,7 @@ void __audit_mmap_fd(int fd, int flags) context->type = AUDIT_MMAP; } -static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr) +static void audit_log_task(struct audit_buffer *ab) { kuid_t auid, uid; kgid_t gid; @@ -2693,6 +2693,11 @@ static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr) audit_log_task_context(ab); audit_log_format(ab, " pid=%d comm=", current->pid); audit_log_untrustedstring(ab, current->comm); +} + +static void audit_log_abend(struct audit_buffer *ab, char *reason, long signr) +{ + audit_log_task(ab); audit_log_format(ab, " reason="); audit_log_string(ab, reason); audit_log_format(ab, " sig=%ld", signr); @@ -2723,8 +2728,11 @@ void __audit_seccomp(unsigned long syscall, long signr, int code) { struct audit_buffer *ab; - ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND); - audit_log_abend(ab, "seccomp", signr); + ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_SECCOMP); + if (unlikely(!ab)) + return; + audit_log_task(ab); + audit_log_format(ab, " sig=%ld", signr); audit_log_format(ab, " syscall=%ld", syscall); audit_log_format(ab, " compat=%d", is_compat_task()); audit_log_format(ab, " ip=0x%lx", KSTK_EIP(current)); -- cgit v1.2.3 From c0a3a20b6c4b5229ef5d26fd9b1c4b1957632aa7 Mon Sep 17 00:00:00 2001 From: Mike Frysinger Date: Fri, 11 Jan 2013 14:32:13 -0800 Subject: linux/audit.h: move ptrace.h include to kernel header While the kernel internals want pt_regs (and so it includes linux/ptrace.h), the user version of audit.h does not need it. So move the include out of the uapi version. This avoids issues where people want the audit defines and userland ptrace api. Including both the kernel ptrace and the userland ptrace headers can easily lead to failure. Signed-off-by: Mike Frysinger Cc: Eric Paris Cc: Al Viro Reviewed-by: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/audit.h | 1 + include/uapi/linux/audit.h | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 9d5104d7aba9..5a6d718adf34 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -24,6 +24,7 @@ #define _LINUX_AUDIT_H_ #include +#include #include struct audit_sig_info { diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 09a2d94ab113..9f096f1c0907 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -26,7 +26,6 @@ #include #include -#include /* The netlink messages for the audit system is divided into blocks: * 1000 - 1099 are for commanding the audit system -- cgit v1.2.3