From a390e85cfe91c346ff4745bcd45ad0a7e7101aa2 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Thu, 6 Oct 2011 10:07:44 +0300
Subject: wl12xx: move common init code from bus modules to main

Move all common parts from sdio.c and spi.c to main.c, since they now
can be handled as part of the platform driver.

Signed-off-by: Felipe Balbi <balbi@ti.com>
[forward-ported, cleaned-up and rephrased commit message]
[added a bunch of fixes and a new pdata element]
[moved some new code into main.c as well]
Signed-off-by: Luciano Coelho <coelho@ti.com>
---
 include/linux/wl12xx.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/wl12xx.h b/include/linux/wl12xx.h
index 4b697395326e..0d6373195d32 100644
--- a/include/linux/wl12xx.h
+++ b/include/linux/wl12xx.h
@@ -54,6 +54,9 @@ struct wl12xx_platform_data {
 	int board_ref_clock;
 	int board_tcxo_clock;
 	unsigned long platform_quirks;
+	bool pwr_in_suspend;
+
+	struct wl1271_if_operations *ops;
 };
 
 /* Platform does not support level trigger interrupts */
@@ -73,6 +76,6 @@ int wl12xx_set_platform_data(const struct wl12xx_platform_data *data)
 
 #endif
 
-const struct wl12xx_platform_data *wl12xx_get_platform_data(void);
+struct wl12xx_platform_data *wl12xx_get_platform_data(void);
 
 #endif
-- 
cgit v1.2.3


From c74d084f914e16e42730bcf625ab3f37a4cae8d4 Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@googlemail.com>
Date: Sat, 15 Oct 2011 00:14:49 +0200
Subject: mac80211: handle HT PHY BSS membership selector value correctly

802.11n-2009 extends the supported rates element with a
magic value which can be used to prevent legacy stations
from joining the BSS.

However, this magic value is not a rate like the others
and the magic can simply be ignored/skipped at this late
stage.

Signed-off-by: Christian Lamparter <chunkeey@googlemail.com>---
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h |  3 ++
 net/mac80211/mlme.c       | 90 ++++++++++++++++++++++++++---------------------
 2 files changed, 52 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 48363c3c40f8..9789aedb2453 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -770,6 +770,9 @@ struct ieee80211_mgmt {
 	} u;
 } __attribute__ ((packed));
 
+/* Supported Rates value encodings in 802.11n-2009 7.3.2.2 */
+#define BSS_MEMBERSHIP_SELECTOR_HT_PHY	127
+
 /* mgmt header + 1 byte category code */
 #define IEEE80211_MIN_ACTION_SIZE offsetof(struct ieee80211_mgmt, u.action.u)
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index d3b408cda08d..b25567a32f92 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1466,6 +1466,47 @@ ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata,
 	return RX_MGMT_CFG80211_DISASSOC;
 }
 
+static void ieee80211_get_rates(struct ieee80211_supported_band *sband,
+				u8 *supp_rates, unsigned int supp_rates_len,
+				u32 *rates, u32 *basic_rates,
+				bool *have_higher_than_11mbit,
+				int *min_rate, int *min_rate_index)
+{
+	int i, j;
+
+	for (i = 0; i < supp_rates_len; i++) {
+		int rate = (supp_rates[i] & 0x7f) * 5;
+		bool is_basic = !!(supp_rates[i] & 0x80);
+
+		if (rate > 110)
+			*have_higher_than_11mbit = true;
+
+		/*
+		 * BSS_MEMBERSHIP_SELECTOR_HT_PHY is defined in 802.11n-2009
+		 * 7.3.2.2 as a magic value instead of a rate. Hence, skip it.
+		 *
+		 * Note: Even through the membership selector and the basic
+		 *	 rate flag share the same bit, they are not exactly
+		 *	 the same.
+		 */
+		if (!!(supp_rates[i] & 0x80) &&
+		    (supp_rates[i] & 0x7f) == BSS_MEMBERSHIP_SELECTOR_HT_PHY)
+			continue;
+
+		for (j = 0; j < sband->n_bitrates; j++) {
+			if (sband->bitrates[j].bitrate == rate) {
+				*rates |= BIT(j);
+				if (is_basic)
+					*basic_rates |= BIT(j);
+				if (rate < *min_rate) {
+					*min_rate = rate;
+					*min_rate_index = j;
+				}
+				break;
+			}
+		}
+	}
+}
 
 static bool ieee80211_assoc_success(struct ieee80211_work *wk,
 				    struct ieee80211_mgmt *mgmt, size_t len)
@@ -1482,7 +1523,7 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk,
 	struct ieee802_11_elems elems;
 	struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
 	u32 changed = 0;
-	int i, j, err;
+	int err;
 	bool have_higher_than_11mbit = false;
 	u16 ap_ht_cap_flags;
 	int min_rate = INT_MAX, min_rate_index = -1;
@@ -1540,47 +1581,14 @@ static bool ieee80211_assoc_success(struct ieee80211_work *wk,
 	basic_rates = 0;
 	sband = local->hw.wiphy->bands[wk->chan->band];
 
-	for (i = 0; i < elems.supp_rates_len; i++) {
-		int rate = (elems.supp_rates[i] & 0x7f) * 5;
-		bool is_basic = !!(elems.supp_rates[i] & 0x80);
-
-		if (rate > 110)
-			have_higher_than_11mbit = true;
+	ieee80211_get_rates(sband, elems.supp_rates, elems.supp_rates_len,
+			    &rates, &basic_rates, &have_higher_than_11mbit,
+			    &min_rate, &min_rate_index);
 
-		for (j = 0; j < sband->n_bitrates; j++) {
-			if (sband->bitrates[j].bitrate == rate) {
-				rates |= BIT(j);
-				if (is_basic)
-					basic_rates |= BIT(j);
-				if (rate < min_rate) {
-					min_rate = rate;
-					min_rate_index = j;
-				}
-				break;
-			}
-		}
-	}
-
-	for (i = 0; i < elems.ext_supp_rates_len; i++) {
-		int rate = (elems.ext_supp_rates[i] & 0x7f) * 5;
-		bool is_basic = !!(elems.ext_supp_rates[i] & 0x80);
-
-		if (rate > 110)
-			have_higher_than_11mbit = true;
-
-		for (j = 0; j < sband->n_bitrates; j++) {
-			if (sband->bitrates[j].bitrate == rate) {
-				rates |= BIT(j);
-				if (is_basic)
-					basic_rates |= BIT(j);
-				if (rate < min_rate) {
-					min_rate = rate;
-					min_rate_index = j;
-				}
-				break;
-			}
-		}
-	}
+	ieee80211_get_rates(sband, elems.ext_supp_rates,
+			    elems.ext_supp_rates_len, &rates, &basic_rates,
+			    &have_higher_than_11mbit,
+			    &min_rate, &min_rate_index);
 
 	/*
 	 * some buggy APs don't advertise basic_rates. use the lowest
-- 
cgit v1.2.3


From c2e889a7f7947bc346e0a341e793fd5cb471d884 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@qca.qualcomm.com>
Date: Wed, 2 Nov 2011 23:34:56 +0200
Subject: ieee80211: Define cipher suite selector for WPI-SMS4

This value is used for WPI-SMS4 in ISO/IEC JTC 1 N 9880.

Signed-off-by: Jouni Malinen <jouni@qca.qualcomm.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 9789aedb2453..ffc073ab3ff8 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1555,6 +1555,8 @@ enum ieee80211_sa_query_action {
 #define WLAN_CIPHER_SUITE_WEP104	0x000FAC05
 #define WLAN_CIPHER_SUITE_AES_CMAC	0x000FAC06
 
+#define WLAN_CIPHER_SUITE_SMS4		0x00147201
+
 /* AKM suite selectors */
 #define WLAN_AKM_SUITE_8021X		0x000FAC01
 #define WLAN_AKM_SUITE_PSK		0x000FAC02
-- 
cgit v1.2.3


From 6e3e939f3b1bf8534b32ad09ff199d88800835a0 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 9 Nov 2011 10:15:42 +0100
Subject: net: add wireless TX status socket option

The 802.1X EAPOL handshake hostapd does requires
knowing whether the frame was ack'ed by the peer.
Currently, we fudge this pretty badly by not even
transmitting the frame as a normal data frame but
injecting it with radiotap and getting the status
out of radiotap monitor as well. This is rather
complex, confuses users (mon.wlan0 presence) and
doesn't work with all hardware.

To get rid of that hack, introduce a real wifi TX
status option for data frame transmissions.

This works similar to the existing TX timestamping
in that it reflects the SKB back to the socket's
error queue with a SCM_WIFI_STATUS cmsg that has
an int indicating ACK status (0/1).

Since it is possible that at some point we will
want to have TX timestamping and wifi status in a
single errqueue SKB (there's little point in not
doing that), redefine SO_EE_ORIGIN_TIMESTAMPING
to SO_EE_ORIGIN_TXSTATUS which can collect more
than just the timestamp; keep the old constant
as an alias of course. Currently the internal APIs
don't make that possible, but it wouldn't be hard
to split them up in a way that makes it possible.

Thanks to Neil Horman for helping me figure out
the functions that add the control messages.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 arch/alpha/include/asm/socket.h   |  3 +++
 arch/arm/include/asm/socket.h     |  3 +++
 arch/avr32/include/asm/socket.h   |  3 +++
 arch/cris/include/asm/socket.h    |  3 +++
 arch/frv/include/asm/socket.h     |  3 +++
 arch/h8300/include/asm/socket.h   |  3 +++
 arch/ia64/include/asm/socket.h    |  3 +++
 arch/m32r/include/asm/socket.h    |  3 +++
 arch/m68k/include/asm/socket.h    |  3 +++
 arch/mips/include/asm/socket.h    |  3 +++
 arch/mn10300/include/asm/socket.h |  3 +++
 arch/parisc/include/asm/socket.h  |  3 +++
 arch/powerpc/include/asm/socket.h |  3 +++
 arch/s390/include/asm/socket.h    |  3 +++
 arch/sparc/include/asm/socket.h   |  3 +++
 arch/xtensa/include/asm/socket.h  |  3 +++
 include/asm-generic/socket.h      |  3 +++
 include/linux/errqueue.h          |  3 ++-
 include/linux/skbuff.h            | 19 +++++++++++++++++--
 include/net/sock.h                |  6 ++++++
 net/core/skbuff.c                 | 20 ++++++++++++++++++++
 net/core/sock.c                   |  9 +++++++++
 net/socket.c                      | 18 ++++++++++++++++++
 23 files changed, 123 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/socket.h b/arch/alpha/include/asm/socket.h
index 06edfefc3373..082355f159e6 100644
--- a/arch/alpha/include/asm/socket.h
+++ b/arch/alpha/include/asm/socket.h
@@ -69,6 +69,9 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 /* O_NONBLOCK clashes with the bits used for socket types.  Therefore we
  * have to define SOCK_NONBLOCK to a different value here.
  */
diff --git a/arch/arm/include/asm/socket.h b/arch/arm/include/asm/socket.h
index 90ffd04b8e74..dec6f9afb3cf 100644
--- a/arch/arm/include/asm/socket.h
+++ b/arch/arm/include/asm/socket.h
@@ -62,4 +62,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/avr32/include/asm/socket.h b/arch/avr32/include/asm/socket.h
index c8d1fae49476..247b88c760be 100644
--- a/arch/avr32/include/asm/socket.h
+++ b/arch/avr32/include/asm/socket.h
@@ -62,4 +62,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 #endif /* __ASM_AVR32_SOCKET_H */
diff --git a/arch/cris/include/asm/socket.h b/arch/cris/include/asm/socket.h
index 1a4a61909ca8..e269264df7c4 100644
--- a/arch/cris/include/asm/socket.h
+++ b/arch/cris/include/asm/socket.h
@@ -64,6 +64,9 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 #endif /* _ASM_SOCKET_H */
 
 
diff --git a/arch/frv/include/asm/socket.h b/arch/frv/include/asm/socket.h
index a6b26880c1ec..ce80fdadcce5 100644
--- a/arch/frv/include/asm/socket.h
+++ b/arch/frv/include/asm/socket.h
@@ -62,5 +62,8 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 #endif /* _ASM_SOCKET_H */
 
diff --git a/arch/h8300/include/asm/socket.h b/arch/h8300/include/asm/socket.h
index 04c0f4596eb5..cf1daab6f27e 100644
--- a/arch/h8300/include/asm/socket.h
+++ b/arch/h8300/include/asm/socket.h
@@ -62,4 +62,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/ia64/include/asm/socket.h b/arch/ia64/include/asm/socket.h
index 51427eaa51ba..4b03664e3fb5 100644
--- a/arch/ia64/include/asm/socket.h
+++ b/arch/ia64/include/asm/socket.h
@@ -71,4 +71,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 #endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/m32r/include/asm/socket.h b/arch/m32r/include/asm/socket.h
index 469787c30098..e8b8c5bb053c 100644
--- a/arch/m32r/include/asm/socket.h
+++ b/arch/m32r/include/asm/socket.h
@@ -62,4 +62,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 #endif /* _ASM_M32R_SOCKET_H */
diff --git a/arch/m68k/include/asm/socket.h b/arch/m68k/include/asm/socket.h
index 9bf49c87d954..d4708ce466e0 100644
--- a/arch/m68k/include/asm/socket.h
+++ b/arch/m68k/include/asm/socket.h
@@ -62,4 +62,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/mips/include/asm/socket.h b/arch/mips/include/asm/socket.h
index 9de5190f2487..ad5c0a7a02a7 100644
--- a/arch/mips/include/asm/socket.h
+++ b/arch/mips/include/asm/socket.h
@@ -82,6 +82,9 @@ To add: #define SO_REUSEPORT 0x0200	/* Allow local address and port reuse.  */
 
 #define SO_RXQ_OVFL             40
 
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 #ifdef __KERNEL__
 
 /** sock_type - Socket types
diff --git a/arch/mn10300/include/asm/socket.h b/arch/mn10300/include/asm/socket.h
index 4e60c4281288..876356d78522 100644
--- a/arch/mn10300/include/asm/socket.h
+++ b/arch/mn10300/include/asm/socket.h
@@ -62,4 +62,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/parisc/include/asm/socket.h b/arch/parisc/include/asm/socket.h
index 225b7d6a1a0a..d28c51b61067 100644
--- a/arch/parisc/include/asm/socket.h
+++ b/arch/parisc/include/asm/socket.h
@@ -61,6 +61,9 @@
 
 #define SO_RXQ_OVFL             0x4021
 
+#define SO_WIFI_STATUS		0x4022
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 /* O_NONBLOCK clashes with the bits used for socket types.  Therefore we
  * have to define SOCK_NONBLOCK to a different value here.
  */
diff --git a/arch/powerpc/include/asm/socket.h b/arch/powerpc/include/asm/socket.h
index 866f7606da68..2fc2af8fbf59 100644
--- a/arch/powerpc/include/asm/socket.h
+++ b/arch/powerpc/include/asm/socket.h
@@ -69,4 +69,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 #endif	/* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/s390/include/asm/socket.h b/arch/s390/include/asm/socket.h
index fdff1e995c73..67b5c1b14b51 100644
--- a/arch/s390/include/asm/socket.h
+++ b/arch/s390/include/asm/socket.h
@@ -70,4 +70,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/sparc/include/asm/socket.h b/arch/sparc/include/asm/socket.h
index 9d3fefcff2f5..8af1b64168b3 100644
--- a/arch/sparc/include/asm/socket.h
+++ b/arch/sparc/include/asm/socket.h
@@ -58,6 +58,9 @@
 
 #define SO_RXQ_OVFL             0x0024
 
+#define SO_WIFI_STATUS		0x0025
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 /* Security levels - as per NRL IPv6 - don't actually do anything */
 #define SO_SECURITY_AUTHENTICATION		0x5001
 #define SO_SECURITY_ENCRYPTION_TRANSPORT	0x5002
diff --git a/arch/xtensa/include/asm/socket.h b/arch/xtensa/include/asm/socket.h
index cbdf2ffaacff..bb06968be227 100644
--- a/arch/xtensa/include/asm/socket.h
+++ b/arch/xtensa/include/asm/socket.h
@@ -73,4 +73,7 @@
 
 #define SO_RXQ_OVFL             40
 
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS		SO_WIFI_STATUS
+
 #endif	/* _XTENSA_SOCKET_H */
diff --git a/include/asm-generic/socket.h b/include/asm-generic/socket.h
index 9a6115e7cf63..49c1704173e7 100644
--- a/include/asm-generic/socket.h
+++ b/include/asm-generic/socket.h
@@ -64,4 +64,7 @@
 #define SO_DOMAIN		39
 
 #define SO_RXQ_OVFL             40
+
+#define SO_WIFI_STATUS		41
+#define SCM_WIFI_STATUS	SO_WIFI_STATUS
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/include/linux/errqueue.h b/include/linux/errqueue.h
index 034072cea853..c9f522bd17e4 100644
--- a/include/linux/errqueue.h
+++ b/include/linux/errqueue.h
@@ -17,7 +17,8 @@ struct sock_extended_err {
 #define SO_EE_ORIGIN_LOCAL	1
 #define SO_EE_ORIGIN_ICMP	2
 #define SO_EE_ORIGIN_ICMP6	3
-#define SO_EE_ORIGIN_TIMESTAMPING 4
+#define SO_EE_ORIGIN_TXSTATUS	4
+#define SO_EE_ORIGIN_TIMESTAMPING SO_EE_ORIGIN_TXSTATUS
 
 #define SO_EE_OFFENDER(ee)	((struct sockaddr*)((ee)+1))
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6a6b352326d7..ff7e1306a2d2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -218,6 +218,9 @@ enum {
 
 	/* device driver supports TX zero-copy buffers */
 	SKBTX_DEV_ZEROCOPY = 1 << 4,
+
+	/* generate wifi status information (where possible) */
+	SKBTX_WIFI_STATUS = 1 << 5,
 };
 
 /*
@@ -352,6 +355,8 @@ typedef unsigned char *sk_buff_data_t;
  *	@ooo_okay: allow the mapping of a socket to a queue to be changed
  *	@l4_rxhash: indicate rxhash is a canonical 4-tuple hash over transport
  *		ports.
+ *	@wifi_acked_valid: wifi_acked was set
+ *	@wifi_acked: whether frame was acked on wifi or not
  *	@dma_cookie: a cookie to one of several possible DMA operations
  *		done by skb DMA functions
  *	@secmark: security marking
@@ -445,10 +450,11 @@ struct sk_buff {
 #endif
 	__u8			ooo_okay:1;
 	__u8			l4_rxhash:1;
+	__u8			wifi_acked_valid:1;
+	__u8			wifi_acked:1;
+	/* 10/12 bit hole (depending on ndisc_nodetype presence) */
 	kmemcheck_bitfield_end(flags2);
 
-	/* 0/13 bit hole */
-
 #ifdef CONFIG_NET_DMA
 	dma_cookie_t		dma_cookie;
 #endif
@@ -2263,6 +2269,15 @@ static inline void skb_tx_timestamp(struct sk_buff *skb)
 	sw_tx_timestamp(skb);
 }
 
+/**
+ * skb_complete_wifi_ack - deliver skb with wifi status
+ *
+ * @skb: the original outgoing packet
+ * @acked: ack status
+ *
+ */
+void skb_complete_wifi_ack(struct sk_buff *skb, bool acked);
+
 extern __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len);
 extern __sum16 __skb_checksum_complete(struct sk_buff *skb);
 
diff --git a/include/net/sock.h b/include/net/sock.h
index 5ac682f73d63..fa6f5381c5d6 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -564,6 +564,7 @@ enum sock_flags {
 	SOCK_FASYNC, /* fasync() active */
 	SOCK_RXQ_OVFL,
 	SOCK_ZEROCOPY, /* buffers from userspace */
+	SOCK_WIFI_STATUS, /* push wifi status to userspace */
 };
 
 static inline void sock_copy_flags(struct sock *nsk, struct sock *osk)
@@ -1714,6 +1715,8 @@ static inline int sock_intr_errno(long timeo)
 
 extern void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 	struct sk_buff *skb);
+extern void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
+	struct sk_buff *skb);
 
 static __inline__ void
 sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
@@ -1741,6 +1744,9 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
 		__sock_recv_timestamp(msg, sk, skb);
 	else
 		sk->sk_stamp = kt;
+
+	if (sock_flag(sk, SOCK_WIFI_STATUS) && skb->wifi_acked_valid)
+		__sock_recv_wifi_status(msg, sk, skb);
 }
 
 extern void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index ca4db40e75b8..2f6babd5a570 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3168,6 +3168,26 @@ void skb_tstamp_tx(struct sk_buff *orig_skb,
 }
 EXPORT_SYMBOL_GPL(skb_tstamp_tx);
 
+void skb_complete_wifi_ack(struct sk_buff *skb, bool acked)
+{
+	struct sock *sk = skb->sk;
+	struct sock_exterr_skb *serr;
+	int err;
+
+	skb->wifi_acked_valid = 1;
+	skb->wifi_acked = acked;
+
+	serr = SKB_EXT_ERR(skb);
+	memset(serr, 0, sizeof(*serr));
+	serr->ee.ee_errno = ENOMSG;
+	serr->ee.ee_origin = SO_EE_ORIGIN_TXSTATUS;
+
+	err = sock_queue_err_skb(sk, skb);
+	if (err)
+		kfree_skb(skb);
+}
+EXPORT_SYMBOL_GPL(skb_complete_wifi_ack);
+
 
 /**
  * skb_partial_csum_set - set up and verify partial csum values for packet
diff --git a/net/core/sock.c b/net/core/sock.c
index 4ed7b1d12f5e..cbdf51c0d5ac 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -740,6 +740,11 @@ set_rcvbuf:
 	case SO_RXQ_OVFL:
 		sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
 		break;
+
+	case SO_WIFI_STATUS:
+		sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
+		break;
+
 	default:
 		ret = -ENOPROTOOPT;
 		break;
@@ -961,6 +966,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 		v.val = !!sock_flag(sk, SOCK_RXQ_OVFL);
 		break;
 
+	case SO_WIFI_STATUS:
+		v.val = !!sock_flag(sk, SOCK_WIFI_STATUS);
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
diff --git a/net/socket.c b/net/socket.c
index 2877647f347b..425ef4270460 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -538,6 +538,8 @@ int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags)
 		*tx_flags |= SKBTX_HW_TSTAMP;
 	if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
 		*tx_flags |= SKBTX_SW_TSTAMP;
+	if (sock_flag(sk, SOCK_WIFI_STATUS))
+		*tx_flags |= SKBTX_WIFI_STATUS;
 	return 0;
 }
 EXPORT_SYMBOL(sock_tx_timestamp);
@@ -674,6 +676,22 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 }
 EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
 
+void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk,
+	struct sk_buff *skb)
+{
+	int ack;
+
+	if (!sock_flag(sk, SOCK_WIFI_STATUS))
+		return;
+	if (!skb->wifi_acked_valid)
+		return;
+
+	ack = skb->wifi_acked;
+
+	put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack);
+}
+EXPORT_SYMBOL_GPL(__sock_recv_wifi_status);
+
 static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
 				   struct sk_buff *skb)
 {
-- 
cgit v1.2.3


From 6cc00d545a21ed26696f3bda865ebf11eccbf2b5 Mon Sep 17 00:00:00 2001
From: Thomas Pedersen <thomas@cozybit.com>
Date: Thu, 3 Nov 2011 21:11:11 -0700
Subject: mac80211: QoS multicast frames have No Ack policy

Previously QoS multicast frames had the Normal Acknowledgment QoS
control bits set. This would cause broadcast frames to be discarded by
peers with which we have a BA session, since their sequence number would
fall outside the allowed range. Set No Ack QoS control bits on multicast
QoS frames and filter these in de-aggregation code.

Signed-off-by: Thomas Pedersen <thomas@cozybit.com>

v2: Use proper QoS Ack Policy ctl field mask (Christian)

v3: Clean up conditional (Johannes)
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 1 +
 net/mac80211/rx.c         | 9 ++++++++-
 net/mac80211/wme.c        | 3 ++-
 3 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index ffc073ab3ff8..66cedf6eb5c2 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -128,6 +128,7 @@
 #define IEEE80211_QOS_CTL_ACK_POLICY_NOACK	0x0020
 #define IEEE80211_QOS_CTL_ACK_POLICY_NO_EXPL	0x0040
 #define IEEE80211_QOS_CTL_ACK_POLICY_BLOCKACK	0x0060
+#define IEEE80211_QOS_CTL_ACK_POLICY_MASK	0x0060
 /* A-MSDU 802.11n */
 #define IEEE80211_QOS_CTL_A_MSDU_PRESENT	0x0080
 /* Mesh Control 802.11s */
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 3173dcfc2136..72c1eb4eb451 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -747,7 +747,7 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx)
 	struct sta_info *sta = rx->sta;
 	struct tid_ampdu_rx *tid_agg_rx;
 	u16 sc;
-	int tid;
+	u8 tid, ack_policy;
 
 	if (!ieee80211_is_data_qos(hdr->frame_control))
 		goto dont_reorder;
@@ -760,6 +760,8 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx)
 	if (!sta)
 		goto dont_reorder;
 
+	ack_policy = *ieee80211_get_qos_ctl(hdr) &
+		     IEEE80211_QOS_CTL_ACK_POLICY_MASK;
 	tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
 
 	tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
@@ -770,6 +772,11 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx)
 	if (unlikely(hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_NULLFUNC)))
 		goto dont_reorder;
 
+	/* not part of a BA session */
+	if (ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_BLOCKACK &&
+	    ack_policy != IEEE80211_QOS_CTL_ACK_POLICY_NORMAL)
+		goto dont_reorder;
+
 	/* new, potentially un-ordered, ampdu frame - process it */
 
 	/* reset session timer */
diff --git a/net/mac80211/wme.c b/net/mac80211/wme.c
index d0240bba45f3..d4f789a4e4f1 100644
--- a/net/mac80211/wme.c
+++ b/net/mac80211/wme.c
@@ -150,7 +150,8 @@ void ieee80211_set_qos_hdr(struct ieee80211_sub_if_data *sdata,
 		/* preserve EOSP bit */
 		ack_policy = *p & IEEE80211_QOS_CTL_EOSP;
 
-		if (unlikely(sdata->local->wifi_wme_noack_test))
+		if (unlikely(sdata->local->wifi_wme_noack_test) ||
+		    is_multicast_ether_addr(hdr->addr1))
 			ack_policy |= IEEE80211_QOS_CTL_ACK_POLICY_NOACK;
 		/* qos header is 2 bytes */
 		*p++ = ack_policy | tid;
-- 
cgit v1.2.3


From 28946da763e8b8d8ffd01ab861b684a4afb4bc3b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 4 Nov 2011 11:18:12 +0100
Subject: nl80211: allow subscribing to unexpected class3 frames

To implement AP mode without monitor interfaces we
need to be able to send a deauth to stations that
send frames without being associated. Enable this
by adding a new nl80211 event for such frames that
an application can subscribe to.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 12 +++++++++
 include/net/cfg80211.h  | 17 +++++++++++++
 net/wireless/mlme.c     | 16 ++++++++++++
 net/wireless/nl80211.c  | 66 +++++++++++++++++++++++++++++++++++++++++++++++++
 net/wireless/nl80211.h  |  3 +++
 5 files changed, 114 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 8049bf77d799..9107adc73e0b 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -509,6 +509,16 @@
  * @NL80211_CMD_TDLS_OPER: Perform a high-level TDLS command (e.g. link setup).
  * @NL80211_CMD_TDLS_MGMT: Send a TDLS management frame.
  *
+ * @NL80211_CMD_UNEXPECTED_FRAME: Used by an application controlling an AP
+ *	(or GO) interface (i.e. hostapd) to ask for unexpected frames to
+ *	implement sending deauth to stations that send unexpected class 3
+ *	frames. Also used as the event sent by the kernel when such a frame
+ *	is received.
+ *	For the event, the %NL80211_ATTR_MAC attribute carries the TA and
+ *	other attributes like the interface index are present.
+ *	If used as the command it must have an interface index and you can
+ *	only unsubscribe from the event by closing the socket.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -638,6 +648,8 @@ enum nl80211_commands {
 	NL80211_CMD_TDLS_OPER,
 	NL80211_CMD_TDLS_MGMT,
 
+	NL80211_CMD_UNEXPECTED_FRAME,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 0c71d4a30cd6..ef118e452589 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2183,6 +2183,8 @@ struct wireless_dev {
 
 	int beacon_interval;
 
+	u32 ap_unexpected_nlpid;
+
 #ifdef CONFIG_CFG80211_WEXT
 	/* wext data */
 	struct {
@@ -3193,6 +3195,21 @@ void cfg80211_gtk_rekey_notify(struct net_device *dev, const u8 *bssid,
 void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index,
 				     const u8 *bssid, bool preauth, gfp_t gfp);
 
+/**
+ * cfg80211_rx_spurious_frame - inform userspace about a spurious frame
+ * @dev: The device the frame matched to
+ * @addr: the transmitter address
+ * @gfp: context flags
+ *
+ * This function is used in AP mode (only!) to inform userspace that
+ * a spurious class 3 frame was received, to be able to deauth the
+ * sender.
+ * Returns %true if the frame was passed to userspace (or this failed
+ * for a reason other than not having a subscription.)
+ */
+bool cfg80211_rx_spurious_frame(struct net_device *dev,
+				const u8 *addr, gfp_t gfp);
+
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
 
 /* wiphy_printk helpers, similar to dev_printk */
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 21fc9702f81c..f4d868b1e11c 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -879,6 +879,9 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid)
 	}
 
 	spin_unlock_bh(&wdev->mgmt_registrations_lock);
+
+	if (nlpid == wdev->ap_unexpected_nlpid)
+		wdev->ap_unexpected_nlpid = 0;
 }
 
 void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev)
@@ -1107,3 +1110,16 @@ void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index,
 	nl80211_pmksa_candidate_notify(rdev, dev, index, bssid, preauth, gfp);
 }
 EXPORT_SYMBOL(cfg80211_pmksa_candidate_notify);
+
+bool cfg80211_rx_spurious_frame(struct net_device *dev,
+				const u8 *addr, gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP &&
+		    wdev->iftype != NL80211_IFTYPE_P2P_GO))
+		return false;
+
+	return nl80211_unexpected_frame(dev, addr, gfp);
+}
+EXPORT_SYMBOL(cfg80211_rx_spurious_frame);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 2bcaa579cebf..9910c3cb9a85 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5832,6 +5832,23 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info)
 	return err;
 }
 
+static int nl80211_register_unexpected_frame(struct sk_buff *skb,
+					     struct genl_info *info)
+{
+	struct net_device *dev = info->user_ptr[1];
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+	if (wdev->iftype != NL80211_IFTYPE_AP &&
+	    wdev->iftype != NL80211_IFTYPE_P2P_GO)
+		return -EINVAL;
+
+	if (wdev->ap_unexpected_nlpid)
+		return -EBUSY;
+
+	wdev->ap_unexpected_nlpid = info->snd_pid;
+	return 0;
+}
+
 #define NL80211_FLAG_NEED_WIPHY		0x01
 #define NL80211_FLAG_NEED_NETDEV	0x02
 #define NL80211_FLAG_NEED_RTNL		0x04
@@ -6387,6 +6404,14 @@ static struct genl_ops nl80211_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL80211_CMD_UNEXPECTED_FRAME,
+		.doit = nl80211_register_unexpected_frame,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV |
+				  NL80211_FLAG_NEED_RTNL,
+	},
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
@@ -7171,6 +7196,47 @@ void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev,
 	nlmsg_free(msg);
 }
 
+bool nl80211_unexpected_frame(struct net_device *dev, const u8 *addr, gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct sk_buff *msg;
+	void *hdr;
+	int err;
+	u32 nlpid = ACCESS_ONCE(wdev->ap_unexpected_nlpid);
+
+	if (!nlpid)
+		return false;
+
+	msg = nlmsg_new(100, gfp);
+	if (!msg)
+		return true;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_UNEXPECTED_FRAME);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return true;
+	}
+
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex);
+	NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, addr);
+
+	err = genlmsg_end(msg, hdr);
+	if (err < 0) {
+		nlmsg_free(msg);
+		return true;
+	}
+
+	genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid);
+	return true;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+	return true;
+}
+
 int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
 		      struct net_device *netdev, u32 nlpid,
 		      int freq, const u8 *buf, size_t len, gfp_t gfp)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index f24a1fbeaf19..d94456e54f4e 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -117,4 +117,7 @@ void nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev,
 				    struct net_device *netdev, int index,
 				    const u8 *bssid, bool preauth, gfp_t gfp);
 
+bool nl80211_unexpected_frame(struct net_device *dev,
+			      const u8 *addr, gfp_t gfp);
+
 #endif /* __NET_WIRELESS_NL80211_H */
-- 
cgit v1.2.3


From 562a74803f4881772ba2375ec4e5aa0ad90f4caa Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 7 Nov 2011 12:39:33 +0100
Subject: nl80211: advertise device AP SME

Add the ability to advertise that the device
contains the AP SME and what features it can
support. There are currently no features in
the bitmap -- probe response offload will be
advertised by a few patches Arik is working
on now (who took over from Guy Eilam) and a
device with AP SME will typically implement
and require response offload.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath6kl/init.c |  3 ++-
 include/linux/nl80211.h                | 15 +++++++++++++++
 include/net/cfg80211.h                 |  6 ++++++
 net/wireless/core.c                    |  4 ++++
 net/wireless/nl80211.c                 |  4 ++++
 5 files changed, 31 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/ath/ath6kl/init.c b/drivers/net/wireless/ath/ath6kl/init.c
index c1d2366704b5..81e0031012ca 100644
--- a/drivers/net/wireless/ath/ath6kl/init.c
+++ b/drivers/net/wireless/ath/ath6kl/init.c
@@ -1548,7 +1548,8 @@ static int ath6kl_init(struct net_device *dev)
 	ar->conf_flags = ATH6KL_CONF_IGNORE_ERP_BARKER |
 			 ATH6KL_CONF_ENABLE_11N | ATH6KL_CONF_ENABLE_TX_BURST;
 
-	ar->wdev->wiphy->flags |= WIPHY_FLAG_SUPPORTS_FW_ROAM;
+	ar->wdev->wiphy->flags |= WIPHY_FLAG_SUPPORTS_FW_ROAM |
+				  WIPHY_FLAG_HAVE_AP_SME;
 
 	status = ath6kl_target_config_wlan_params(ar);
 	if (!status)
diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 9107adc73e0b..ff39e4b234d4 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1121,6 +1121,11 @@ enum nl80211_commands {
  *	%NL80211_CMD_TDLS_MGMT. Otherwise %NL80211_CMD_TDLS_OPER should be
  *	used for asking the driver to perform a TDLS operation.
  *
+ * @NL80211_ATTR_DEVICE_AP_SME: This u32 attribute may be listed for devices
+ *	that have AP support to indicate that they have the AP SME integrated
+ *	with support for the features listed in this attribute, see
+ *	&enum nl80211_ap_sme_features.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1349,6 +1354,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_TDLS_SUPPORT,
 	NL80211_ATTR_TDLS_EXTERNAL_SETUP,
 
+	NL80211_ATTR_DEVICE_AP_SME,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -2662,4 +2669,12 @@ enum nl80211_tdls_operation {
 	NL80211_TDLS_DISABLE_LINK,
 };
 
+/*
+ * enum nl80211_ap_sme_features - device-integrated AP features
+ * Reserved for future use, no bits are defined in
+ * NL80211_ATTR_DEVICE_AP_SME yet.
+enum nl80211_ap_sme_features {
+};
+ */
+
 #endif /* __LINUX_NL80211_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index ef118e452589..86d207da6cce 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1679,6 +1679,7 @@ struct cfg80211_ops {
  *	teardown packets should be sent through the @NL80211_CMD_TDLS_MGMT
  *	command. When this flag is not set, @NL80211_CMD_TDLS_OPER should be
  *	used for asking the driver/firmware to perform a TDLS operation.
+ * @WIPHY_FLAG_HAVE_AP_SME: device integrates AP SME
  */
 enum wiphy_flags {
 	WIPHY_FLAG_CUSTOM_REGULATORY		= BIT(0),
@@ -1697,6 +1698,7 @@ enum wiphy_flags {
 	WIPHY_FLAG_AP_UAPSD			= BIT(14),
 	WIPHY_FLAG_SUPPORTS_TDLS		= BIT(15),
 	WIPHY_FLAG_TDLS_EXTERNAL_SETUP		= BIT(16),
+	WIPHY_FLAG_HAVE_AP_SME			= BIT(17),
 };
 
 /**
@@ -1907,6 +1909,8 @@ struct wiphy_wowlan_support {
  *	may request, if implemented.
  *
  * @wowlan: WoWLAN support information
+ *
+ * @ap_sme_capa: AP SME capabilities, flags from &enum nl80211_ap_sme_features.
  */
 struct wiphy {
 	/* assign these fields before you register the wiphy */
@@ -1930,6 +1934,8 @@ struct wiphy {
 
 	u32 flags;
 
+	u32 ap_sme_capa;
+
 	enum cfg80211_signal_type signal_type;
 
 	int bss_priv_size;
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 220f3bd176f8..ccdfed897651 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -492,6 +492,10 @@ int wiphy_register(struct wiphy *wiphy)
 		    !(wiphy->wowlan.flags & WIPHY_WOWLAN_SUPPORTS_GTK_REKEY)))
 		return -EINVAL;
 
+	if (WARN_ON(wiphy->ap_sme_capa &&
+		    !(wiphy->flags & WIPHY_FLAG_HAVE_AP_SME)))
+		return -EINVAL;
+
 	if (WARN_ON(wiphy->addresses && !wiphy->n_addresses))
 		return -EINVAL;
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 9910c3cb9a85..2094c8468d78 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1007,6 +1007,10 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	if (nl80211_put_iface_combinations(&dev->wiphy, msg))
 		goto nla_put_failure;
 
+	if (dev->wiphy.flags & WIPHY_FLAG_HAVE_AP_SME)
+		NLA_PUT_U32(msg, NL80211_ATTR_DEVICE_AP_SME,
+			    dev->wiphy.ap_sme_capa);
+
 	return genlmsg_end(msg, hdr);
 
  nla_put_failure:
-- 
cgit v1.2.3


From 7f6cf311a594c1e7ca8120367dd1d4c685aabff1 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 4 Nov 2011 11:18:15 +0100
Subject: nl80211: add API to probe a client

When the AP SME in hostapd is used it wants to
probe the clients when they have been idle for
some time. Add explicit API to support this.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  10 +++++
 include/net/cfg80211.h  |  17 ++++++++
 net/wireless/nl80211.c  | 104 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 131 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index ff39e4b234d4..901a70d327d1 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -519,6 +519,14 @@
  *	If used as the command it must have an interface index and you can
  *	only unsubscribe from the event by closing the socket.
  *
+ * @NL80211_CMD_PROBE_CLIENT: Probe an associated station on an AP interface
+ *	by sending a null data frame to it and reporting when the frame is
+ *	acknowleged. This is used to allow timing out inactive clients. Uses
+ *	%NL80211_ATTR_IFINDEX and %NL80211_ATTR_MAC. The command returns a
+ *	direct reply with an %NL80211_ATTR_COOKIE that is later used to match
+ *	up the event with the request. The event includes the same data and
+ *	has %NL80211_ATTR_ACK set if the frame was ACKed.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -650,6 +658,8 @@ enum nl80211_commands {
 
 	NL80211_CMD_UNEXPECTED_FRAME,
 
+	NL80211_CMD_PROBE_CLIENT,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 86d207da6cce..389e85e8c03d 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1432,6 +1432,9 @@ struct cfg80211_gtk_rekey_data {
  *
  * @tdls_mgmt: Transmit a TDLS management frame.
  * @tdls_oper: Perform a high-level TDLS operation (e.g. TDLS link setup).
+ *
+ * @probe_client: probe an associated client, must return a cookie that it
+ *	later passes to cfg80211_probe_status().
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -1621,6 +1624,9 @@ struct cfg80211_ops {
 			     u16 status_code, const u8 *buf, size_t len);
 	int	(*tdls_oper)(struct wiphy *wiphy, struct net_device *dev,
 			     u8 *peer, enum nl80211_tdls_operation oper);
+
+	int	(*probe_client)(struct wiphy *wiphy, struct net_device *dev,
+				const u8 *peer, u64 *cookie);
 };
 
 /*
@@ -3216,6 +3222,17 @@ void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index,
 bool cfg80211_rx_spurious_frame(struct net_device *dev,
 				const u8 *addr, gfp_t gfp);
 
+/**
+ * cfg80211_probe_status - notify userspace about probe status
+ * @dev: the device the probe was sent on
+ * @addr: the address of the peer
+ * @cookie: the cookie filled in @probe_client previously
+ * @acked: indicates whether probe was acked or not
+ * @gfp: allocation flags
+ */
+void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
+			   u64 cookie, bool acked, gfp_t gfp);
+
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
 
 /* wiphy_printk helpers, similar to dev_printk */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 2094c8468d78..a8eda12b46a8 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -890,6 +890,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	}
 	if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
 		CMD(sched_scan_start, START_SCHED_SCAN);
+	CMD(probe_client, PROBE_CLIENT);
 
 #undef CMD
 
@@ -5853,6 +5854,59 @@ static int nl80211_register_unexpected_frame(struct sk_buff *skb,
 	return 0;
 }
 
+static int nl80211_probe_client(struct sk_buff *skb,
+				struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct sk_buff *msg;
+	void *hdr;
+	const u8 *addr;
+	u64 cookie;
+	int err;
+
+	if (wdev->iftype != NL80211_IFTYPE_AP &&
+	    wdev->iftype != NL80211_IFTYPE_P2P_GO)
+		return -EOPNOTSUPP;
+
+	if (!info->attrs[NL80211_ATTR_MAC])
+		return -EINVAL;
+
+	if (!rdev->ops->probe_client)
+		return -EOPNOTSUPP;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
+			     NL80211_CMD_PROBE_CLIENT);
+
+	if (IS_ERR(hdr)) {
+		err = PTR_ERR(hdr);
+		goto free_msg;
+	}
+
+	addr = nla_data(info->attrs[NL80211_ATTR_MAC]);
+
+	err = rdev->ops->probe_client(&rdev->wiphy, dev, addr, &cookie);
+	if (err)
+		goto free_msg;
+
+	NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie);
+
+	genlmsg_end(msg, hdr);
+
+	return genlmsg_reply(msg, info);
+
+ nla_put_failure:
+	err = -ENOBUFS;
+ free_msg:
+	nlmsg_free(msg);
+	return err;
+}
+
 #define NL80211_FLAG_NEED_WIPHY		0x01
 #define NL80211_FLAG_NEED_NETDEV	0x02
 #define NL80211_FLAG_NEED_RTNL		0x04
@@ -6416,6 +6470,14 @@ static struct genl_ops nl80211_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL80211_CMD_PROBE_CLIENT,
+		.doit = nl80211_probe_client,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV |
+				  NL80211_FLAG_NEED_RTNL,
+	},
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
@@ -7478,6 +7540,48 @@ nl80211_send_cqm_pktloss_notify(struct cfg80211_registered_device *rdev,
 	nlmsg_free(msg);
 }
 
+void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
+			   u64 cookie, bool acked, gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
+	struct sk_buff *msg;
+	void *hdr;
+	int err;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_PROBE_CLIENT);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, dev->ifindex);
+	NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, addr);
+	NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie);
+	if (acked)
+		NLA_PUT_FLAG(msg, NL80211_ATTR_ACK);
+
+	err = genlmsg_end(msg, hdr);
+	if (err < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_mlme_mcgrp.id, gfp);
+	return;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+}
+EXPORT_SYMBOL(cfg80211_probe_status);
+
 static int nl80211_netlink_notify(struct notifier_block * nb,
 				  unsigned long state,
 				  void *_notify)
-- 
cgit v1.2.3


From 5e760230e42cf759bd923457ca2753aacf2e656e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 4 Nov 2011 11:18:17 +0100
Subject: cfg80211: allow registering to beacons

Add the ability to register to received beacon frames
to allow implementing OLBC logic in userspace. The
registration is per wiphy since there's no point in
receiving the same frame multiple times.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  7 +++++
 include/net/cfg80211.h  | 20 ++++++++++++++
 net/wireless/core.h     |  2 ++
 net/wireless/nl80211.c  | 70 ++++++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 98 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 901a70d327d1..c29a284c27e6 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -527,6 +527,11 @@
  *	up the event with the request. The event includes the same data and
  *	has %NL80211_ATTR_ACK set if the frame was ACKed.
  *
+ * @NL80211_CMD_REGISTER_BEACONS: Register this socket to receive beacons from
+ *	other BSSes when any interfaces are in AP mode. This helps implement
+ *	OLBC handling in hostapd. Beacons are reported in %NL80211_CMD_FRAME
+ *	messages. Note that per PHY only one application may register.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -660,6 +665,8 @@ enum nl80211_commands {
 
 	NL80211_CMD_PROBE_CLIENT,
 
+	NL80211_CMD_REGISTER_BEACONS,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 389e85e8c03d..d01307f54faa 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1686,6 +1686,9 @@ struct cfg80211_ops {
  *	command. When this flag is not set, @NL80211_CMD_TDLS_OPER should be
  *	used for asking the driver/firmware to perform a TDLS operation.
  * @WIPHY_FLAG_HAVE_AP_SME: device integrates AP SME
+ * @WIPHY_FLAG_REPORTS_OBSS: the device will report beacons from other BSSes
+ *	when there are virtual interfaces in AP mode by calling
+ *	cfg80211_report_obss_beacon().
  */
 enum wiphy_flags {
 	WIPHY_FLAG_CUSTOM_REGULATORY		= BIT(0),
@@ -1705,6 +1708,7 @@ enum wiphy_flags {
 	WIPHY_FLAG_SUPPORTS_TDLS		= BIT(15),
 	WIPHY_FLAG_TDLS_EXTERNAL_SETUP		= BIT(16),
 	WIPHY_FLAG_HAVE_AP_SME			= BIT(17),
+	WIPHY_FLAG_REPORTS_OBSS			= BIT(18),
 };
 
 /**
@@ -3233,6 +3237,22 @@ bool cfg80211_rx_spurious_frame(struct net_device *dev,
 void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
 			   u64 cookie, bool acked, gfp_t gfp);
 
+/**
+ * cfg80211_report_obss_beacon - report beacon from other APs
+ * @wiphy: The wiphy that received the beacon
+ * @frame: the frame
+ * @len: length of the frame
+ * @freq: frequency the frame was received on
+ * @gfp: allocation flags
+ *
+ * Use this function to report to userspace when a beacon was
+ * received. It is not useful to call this when there is no
+ * netdev that is in AP/GO mode.
+ */
+void cfg80211_report_obss_beacon(struct wiphy *wiphy,
+				 const u8 *frame, size_t len,
+				 int freq, gfp_t gfp);
+
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
 
 /* wiphy_printk helpers, similar to dev_printk */
diff --git a/net/wireless/core.h b/net/wireless/core.h
index b9ec3061ed72..4c6ff4024356 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -54,6 +54,8 @@ struct cfg80211_registered_device {
 	int opencount; /* also protected by devlist_mtx */
 	wait_queue_head_t dev_wait;
 
+	u32 ap_beacons_nlpid;
+
 	/* BSSes/scanning */
 	spinlock_t bss_lock;
 	struct list_head bss_list;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index a8eda12b46a8..68b6708b996f 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -891,6 +891,10 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
 		CMD(sched_scan_start, START_SCHED_SCAN);
 	CMD(probe_client, PROBE_CLIENT);
+	if (dev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) {
+		i++;
+		NLA_PUT_U32(msg, i, NL80211_CMD_REGISTER_BEACONS);
+	}
 
 #undef CMD
 
@@ -5907,6 +5911,21 @@ static int nl80211_probe_client(struct sk_buff *skb,
 	return err;
 }
 
+static int nl80211_register_beacons(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+
+	if (!(rdev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS))
+		return -EOPNOTSUPP;
+
+	if (rdev->ap_beacons_nlpid)
+		return -EBUSY;
+
+	rdev->ap_beacons_nlpid = info->snd_pid;
+
+	return 0;
+}
+
 #define NL80211_FLAG_NEED_WIPHY		0x01
 #define NL80211_FLAG_NEED_NETDEV	0x02
 #define NL80211_FLAG_NEED_RTNL		0x04
@@ -6478,6 +6497,14 @@ static struct genl_ops nl80211_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_NETDEV |
 				  NL80211_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL80211_CMD_REGISTER_BEACONS,
+		.doit = nl80211_register_beacons,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_WIPHY |
+				  NL80211_FLAG_NEED_RTNL,
+	},
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
@@ -7582,6 +7609,44 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
 }
 EXPORT_SYMBOL(cfg80211_probe_status);
 
+void cfg80211_report_obss_beacon(struct wiphy *wiphy,
+				 const u8 *frame, size_t len,
+				 int freq, gfp_t gfp)
+{
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+	struct sk_buff *msg;
+	void *hdr;
+	u32 nlpid = ACCESS_ONCE(rdev->ap_beacons_nlpid);
+
+	if (!nlpid)
+		return;
+
+	msg = nlmsg_new(len + 100, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FRAME);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+	if (freq)
+		NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_FREQ, freq);
+	NLA_PUT(msg, NL80211_ATTR_FRAME, len, frame);
+
+	genlmsg_end(msg, hdr);
+
+	genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid);
+	return;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+}
+EXPORT_SYMBOL(cfg80211_report_obss_beacon);
+
 static int nl80211_netlink_notify(struct notifier_block * nb,
 				  unsigned long state,
 				  void *_notify)
@@ -7595,9 +7660,12 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
 
 	rcu_read_lock();
 
-	list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list)
+	list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) {
 		list_for_each_entry_rcu(wdev, &rdev->netdev_list, list)
 			cfg80211_mlme_unregister_socket(wdev, notify->pid);
+		if (rdev->ap_beacons_nlpid == notify->pid)
+			rdev->ap_beacons_nlpid = 0;
+	}
 
 	rcu_read_unlock();
 
-- 
cgit v1.2.3


From b92ab5d86dafc2b3733c5fdd5def40c8fe7ea7c9 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 4 Nov 2011 11:18:19 +0100
Subject: cfg80211: add event for unexpected 4addr frames

The frames are used by AP/STA WDS mode, and hostapd
needs to know when such a frame was received to set
up the VLAN appropriately to allow using it.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 10 +++++++++-
 include/net/cfg80211.h  | 16 ++++++++++++++++
 net/wireless/mlme.c     | 14 ++++++++++++++
 net/wireless/nl80211.c  | 19 +++++++++++++++++--
 net/wireless/nl80211.h  |  2 ++
 5 files changed, 58 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index c29a284c27e6..09474ab7de8c 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -517,7 +517,13 @@
  *	For the event, the %NL80211_ATTR_MAC attribute carries the TA and
  *	other attributes like the interface index are present.
  *	If used as the command it must have an interface index and you can
- *	only unsubscribe from the event by closing the socket.
+ *	only unsubscribe from the event by closing the socket. Subscription
+ *	is also for %NL80211_CMD_UNEXPECTED_4ADDR_FRAME events.
+ *
+ * @NL80211_CMD_UNEXPECTED_4ADDR_FRAME: Sent as an event indicating that the
+ *	associated station identified by %NL80211_ATTR_MAC sent a 4addr frame
+ *	and wasn't already in a 4-addr VLAN. The event will be sent similarly
+ *	to the %NL80211_CMD_UNEXPECTED_FRAME event, to the same listener.
  *
  * @NL80211_CMD_PROBE_CLIENT: Probe an associated station on an AP interface
  *	by sending a null data frame to it and reporting when the frame is
@@ -667,6 +673,8 @@ enum nl80211_commands {
 
 	NL80211_CMD_REGISTER_BEACONS,
 
+	NL80211_CMD_UNEXPECTED_4ADDR_FRAME,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index d01307f54faa..be3535f0895e 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3226,6 +3226,22 @@ void cfg80211_pmksa_candidate_notify(struct net_device *dev, int index,
 bool cfg80211_rx_spurious_frame(struct net_device *dev,
 				const u8 *addr, gfp_t gfp);
 
+/**
+ * cfg80211_rx_unexpected_4addr_frame - inform about unexpected WDS frame
+ * @dev: The device the frame matched to
+ * @addr: the transmitter address
+ * @gfp: context flags
+ *
+ * This function is used in AP mode (only!) to inform userspace that
+ * an associated station sent a 4addr frame but that wasn't expected.
+ * It is allowed and desirable to send this event only once for each
+ * station to avoid event flooding.
+ * Returns %true if the frame was passed to userspace (or this failed
+ * for a reason other than not having a subscription.)
+ */
+bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev,
+					const u8 *addr, gfp_t gfp);
+
 /**
  * cfg80211_probe_status - notify userspace about probe status
  * @dev: the device the probe was sent on
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index f4d868b1e11c..34891e08c54a 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -1123,3 +1123,17 @@ bool cfg80211_rx_spurious_frame(struct net_device *dev,
 	return nl80211_unexpected_frame(dev, addr, gfp);
 }
 EXPORT_SYMBOL(cfg80211_rx_spurious_frame);
+
+bool cfg80211_rx_unexpected_4addr_frame(struct net_device *dev,
+					const u8 *addr, gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+
+	if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP &&
+		    wdev->iftype != NL80211_IFTYPE_P2P_GO &&
+		    wdev->iftype != NL80211_IFTYPE_AP_VLAN))
+		return false;
+
+	return nl80211_unexpected_4addr_frame(dev, addr, gfp);
+}
+EXPORT_SYMBOL(cfg80211_rx_unexpected_4addr_frame);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 68b6708b996f..5b659068b020 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -7289,7 +7289,8 @@ void nl80211_send_sta_del_event(struct cfg80211_registered_device *rdev,
 	nlmsg_free(msg);
 }
 
-bool nl80211_unexpected_frame(struct net_device *dev, const u8 *addr, gfp_t gfp)
+static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd,
+				       const u8 *addr, gfp_t gfp)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
@@ -7305,7 +7306,7 @@ bool nl80211_unexpected_frame(struct net_device *dev, const u8 *addr, gfp_t gfp)
 	if (!msg)
 		return true;
 
-	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_UNEXPECTED_FRAME);
+	hdr = nl80211hdr_put(msg, 0, 0, 0, cmd);
 	if (!hdr) {
 		nlmsg_free(msg);
 		return true;
@@ -7330,6 +7331,20 @@ bool nl80211_unexpected_frame(struct net_device *dev, const u8 *addr, gfp_t gfp)
 	return true;
 }
 
+bool nl80211_unexpected_frame(struct net_device *dev, const u8 *addr, gfp_t gfp)
+{
+	return __nl80211_unexpected_frame(dev, NL80211_CMD_UNEXPECTED_FRAME,
+					  addr, gfp);
+}
+
+bool nl80211_unexpected_4addr_frame(struct net_device *dev,
+				    const u8 *addr, gfp_t gfp)
+{
+	return __nl80211_unexpected_frame(dev,
+					  NL80211_CMD_UNEXPECTED_4ADDR_FRAME,
+					  addr, gfp);
+}
+
 int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
 		      struct net_device *netdev, u32 nlpid,
 		      int freq, const u8 *buf, size_t len, gfp_t gfp)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index d94456e54f4e..12bf4d185abe 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -119,5 +119,7 @@ void nl80211_pmksa_candidate_notify(struct cfg80211_registered_device *rdev,
 
 bool nl80211_unexpected_frame(struct net_device *dev,
 			      const u8 *addr, gfp_t gfp);
+bool nl80211_unexpected_4addr_frame(struct net_device *dev,
+				    const u8 *addr, gfp_t gfp);
 
 #endif /* __NET_WIRELESS_NL80211_H */
-- 
cgit v1.2.3


From e247bd9068e3e86c3571147c128883596ace9d05 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 4 Nov 2011 11:18:21 +0100
Subject: cfg80211/mac80211: allow management TX to not wait for ACK

For probe responses it can be useful to not wait for ACK to
avoid retransmissions if the station that sent the probe is
already on the next channel, so allow userspace to request
not caring about the ACK with a new nl80211 flag.

Since mac80211 needs to be updated for the new function
prototype anyway implement it right away -- it's just a
few lines of code.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath6kl/cfg80211.c |  3 ++-
 include/linux/nl80211.h                    |  7 ++++++
 include/net/cfg80211.h                     |  2 +-
 net/mac80211/cfg.c                         | 11 ++++++---
 net/wireless/core.h                        |  2 +-
 net/wireless/mlme.c                        |  5 ++--
 net/wireless/nl80211.c                     | 39 ++++++++++++++++++------------
 7 files changed, 46 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c
index 3aff36bad5d3..daf444bf8d48 100644
--- a/drivers/net/wireless/ath/ath6kl/cfg80211.c
+++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c
@@ -1732,7 +1732,8 @@ static int ath6kl_mgmt_tx(struct wiphy *wiphy, struct net_device *dev,
 			  struct ieee80211_channel *chan, bool offchan,
 			  enum nl80211_channel_type channel_type,
 			  bool channel_type_valid, unsigned int wait,
-			  const u8 *buf, size_t len, bool no_cck, u64 *cookie)
+			  const u8 *buf, size_t len, bool no_cck,
+			  bool dont_wait_for_ack, u64 *cookie)
 {
 	struct ath6kl *ar = ath6kl_priv(dev);
 	u32 id;
diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 09474ab7de8c..165e16fc7af1 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1151,6 +1151,11 @@ enum nl80211_commands {
  *	with support for the features listed in this attribute, see
  *	&enum nl80211_ap_sme_features.
  *
+ * @NL80211_ATTR_DONT_WAIT_FOR_ACK: Used with %NL80211_CMD_FRAME, this tells
+ *	the driver to not wait for an acknowledgement. Note that due to this,
+ *	it will also not give a status callback nor return a cookie. This is
+ *	mostly useful for probe responses to save airtime.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1381,6 +1386,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_DEVICE_AP_SME,
 
+	NL80211_ATTR_DONT_WAIT_FOR_ACK,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index be3535f0895e..00287bdef919 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1588,7 +1588,7 @@ struct cfg80211_ops {
 			  enum nl80211_channel_type channel_type,
 			  bool channel_type_valid, unsigned int wait,
 			  const u8 *buf, size_t len, bool no_cck,
-			  u64 *cookie);
+			  bool dont_wait_for_ack, u64 *cookie);
 	int	(*mgmt_tx_cancel_wait)(struct wiphy *wiphy,
 				       struct net_device *dev,
 				       u64 cookie);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index e072fea69a30..ab3258ac0b2c 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1936,7 +1936,7 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev,
 			     enum nl80211_channel_type channel_type,
 			     bool channel_type_valid, unsigned int wait,
 			     const u8 *buf, size_t len, bool no_cck,
-			     u64 *cookie)
+			     bool dont_wait_for_ack, u64 *cookie)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = sdata->local;
@@ -1944,10 +1944,15 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev,
 	struct sta_info *sta;
 	struct ieee80211_work *wk;
 	const struct ieee80211_mgmt *mgmt = (void *)buf;
-	u32 flags = IEEE80211_TX_INTFL_NL80211_FRAME_TX |
-		    IEEE80211_TX_CTL_REQ_TX_STATUS;
+	u32 flags;
 	bool is_offchan = false;
 
+	if (dont_wait_for_ack)
+		flags = IEEE80211_TX_CTL_NO_ACK;
+	else
+		flags = IEEE80211_TX_INTFL_NL80211_FRAME_TX |
+			IEEE80211_TX_CTL_REQ_TX_STATUS;
+
 	/* Check that we are on the requested channel for transmission */
 	if (chan != local->tmp_channel &&
 	    chan != local->oper_channel)
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 4c6ff4024356..1c7d4df5418c 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -378,7 +378,7 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
 			  enum nl80211_channel_type channel_type,
 			  bool channel_type_valid, unsigned int wait,
 			  const u8 *buf, size_t len, bool no_cck,
-			  u64 *cookie);
+			  bool dont_wait_for_ack, u64 *cookie);
 
 /* SME */
 int __cfg80211_connect(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 34891e08c54a..6c1bafd508c8 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -904,7 +904,7 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
 			  enum nl80211_channel_type channel_type,
 			  bool channel_type_valid, unsigned int wait,
 			  const u8 *buf, size_t len, bool no_cck,
-			  u64 *cookie)
+			  bool dont_wait_for_ack, u64 *cookie)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	const struct ieee80211_mgmt *mgmt;
@@ -995,7 +995,8 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
 	/* Transmit the Action frame as requested by user space */
 	return rdev->ops->mgmt_tx(&rdev->wiphy, dev, chan, offchan,
 				  channel_type, channel_type_valid,
-				  wait, buf, len, no_cck, cookie);
+				  wait, buf, len, no_cck, dont_wait_for_ack,
+				  cookie);
 }
 
 bool cfg80211_rx_mgmt(struct net_device *dev, int freq, const u8 *buf,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 5b659068b020..0ef09415c89a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -196,6 +196,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_TDLS_OPERATION] = { .type = NLA_U8 },
 	[NL80211_ATTR_TDLS_SUPPORT] = { .type = NLA_FLAG },
 	[NL80211_ATTR_TDLS_EXTERNAL_SETUP] = { .type = NLA_FLAG },
+	[NL80211_ATTR_DONT_WAIT_FOR_ACK] = { .type = NLA_FLAG },
 };
 
 /* policy for the key attributes */
@@ -5282,10 +5283,11 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
 	int err;
 	void *hdr;
 	u64 cookie;
-	struct sk_buff *msg;
+	struct sk_buff *msg = NULL;
 	unsigned int wait = 0;
-	bool offchan;
-	bool no_cck;
+	bool offchan, no_cck, dont_wait_for_ack;
+
+	dont_wait_for_ack = info->attrs[NL80211_ATTR_DONT_WAIT_FOR_ACK];
 
 	if (!info->attrs[NL80211_ATTR_FRAME] ||
 	    !info->attrs[NL80211_ATTR_WIPHY_FREQ])
@@ -5329,29 +5331,36 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
 	if (chan == NULL)
 		return -EINVAL;
 
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return -ENOMEM;
+	if (!dont_wait_for_ack) {
+		msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+		if (!msg)
+			return -ENOMEM;
 
-	hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
-			     NL80211_CMD_FRAME);
+		hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0,
+				     NL80211_CMD_FRAME);
 
-	if (IS_ERR(hdr)) {
-		err = PTR_ERR(hdr);
-		goto free_msg;
+		if (IS_ERR(hdr)) {
+			err = PTR_ERR(hdr);
+			goto free_msg;
+		}
 	}
+
 	err = cfg80211_mlme_mgmt_tx(rdev, dev, chan, offchan, channel_type,
 				    channel_type_valid, wait,
 				    nla_data(info->attrs[NL80211_ATTR_FRAME]),
 				    nla_len(info->attrs[NL80211_ATTR_FRAME]),
-				    no_cck, &cookie);
+				    no_cck, dont_wait_for_ack, &cookie);
 	if (err)
 		goto free_msg;
 
-	NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie);
+	if (msg) {
+		NLA_PUT_U64(msg, NL80211_ATTR_COOKIE, cookie);
 
-	genlmsg_end(msg, hdr);
-	return genlmsg_reply(msg, info);
+		genlmsg_end(msg, hdr);
+		return genlmsg_reply(msg, info);
+	}
+
+	return 0;
 
  nla_put_failure:
 	err = -ENOBUFS;
-- 
cgit v1.2.3


From 1f074bd8eb7a4a210a5119cd7220f89da6c7a2c3 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sun, 6 Nov 2011 14:13:33 +0100
Subject: nl80211: advertise socket TX status capability

The new wifi socket TX capability should be
supported by wifi drivers, let them advertise
whether they do or not.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 18 ++++++++++++++++++
 include/net/cfg80211.h  |  3 ++-
 net/wireless/nl80211.c  |  2 ++
 3 files changed, 22 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 165e16fc7af1..3152ddfb4294 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -695,6 +695,8 @@ enum nl80211_commands {
 #define NL80211_CMD_DISASSOCIATE NL80211_CMD_DISASSOCIATE
 #define NL80211_CMD_REG_BEACON_HINT NL80211_CMD_REG_BEACON_HINT
 
+#define NL80211_ATTR_FEATURE_FLAGS NL80211_ATTR_FEATURE_FLAGS
+
 /* source-level API compatibility */
 #define NL80211_CMD_GET_MESH_PARAMS NL80211_CMD_GET_MESH_CONFIG
 #define NL80211_CMD_SET_MESH_PARAMS NL80211_CMD_SET_MESH_CONFIG
@@ -1156,6 +1158,9 @@ enum nl80211_commands {
  *	it will also not give a status callback nor return a cookie. This is
  *	mostly useful for probe responses to save airtime.
  *
+ * @NL80211_ATTR_FEATURE_FLAGS: This u32 attribute contains flags from
+ *	&enum nl80211_feature_flags and is advertised in wiphy information.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1388,6 +1393,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_DONT_WAIT_FOR_ACK,
 
+	NL80211_ATTR_FEATURE_FLAGS,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -1422,6 +1429,7 @@ enum nl80211_attrs {
 #define NL80211_ATTR_AKM_SUITES NL80211_ATTR_AKM_SUITES
 #define NL80211_ATTR_KEY NL80211_ATTR_KEY
 #define NL80211_ATTR_KEYS NL80211_ATTR_KEYS
+#define NL80211_ATTR_FEATURE_FLAGS NL80211_ATTR_FEATURE_FLAGS
 
 #define NL80211_MAX_SUPP_RATES			32
 #define NL80211_MAX_SUPP_REG_RULES		32
@@ -2709,4 +2717,14 @@ enum nl80211_ap_sme_features {
 };
  */
 
+/**
+ * enum nl80211_feature_flags - device/driver features
+ * @NL80211_FEATURE_SK_TX_STATUS: This driver supports reflecting back
+ *	TX status to the socket error queue when requested with the
+ *	socket option.
+ */
+enum nl80211_feature_flags {
+	NL80211_FEATURE_SK_TX_STATUS	= 1 << 0,
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 00287bdef919..e1ee1416631d 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1881,6 +1881,7 @@ struct wiphy_wowlan_support {
  * @software_iftypes: bitmask of software interface types, these are not
  *	subject to any restrictions since they are purely managed in SW.
  * @flags: wiphy flags, see &enum wiphy_flags
+ * @features: features advertised to nl80211, see &enum nl80211_feature_flags.
  * @bss_priv_size: each BSS struct has private data allocated with it,
  *	this variable determines its size
  * @max_scan_ssids: maximum number of SSIDs the device can scan for in
@@ -1942,7 +1943,7 @@ struct wiphy {
 	/* Supported interface modes, OR together BIT(NL80211_IFTYPE_...) */
 	u16 interface_modes;
 
-	u32 flags;
+	u32 flags, features;
 
 	u32 ap_sme_capa;
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 0ef09415c89a..864fcb6f217e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1017,6 +1017,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 		NLA_PUT_U32(msg, NL80211_ATTR_DEVICE_AP_SME,
 			    dev->wiphy.ap_sme_capa);
 
+	NLA_PUT_U32(msg, NL80211_ATTR_FEATURE_FLAGS, dev->wiphy.features);
+
 	return genlmsg_end(msg, hdr);
 
  nla_put_failure:
-- 
cgit v1.2.3


From 87bbbe22f84b91d0bcd3a7fc638e4f5e8224cc4e Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Thu, 10 Nov 2011 11:28:55 +0200
Subject: nl80211: Add probe response offload attribute

Notify user-space about probe-response offloading support in the driver.

A wiphy flag is used to indicate support and a bitmap of protocols
determines which protocols are supported.

Signed-off-by: Guy Eilam <guy@wizery.com>
Signed-off-by: Arik Nemtsov <arik@wizery.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 28 ++++++++++++++++++++++++++++
 include/net/cfg80211.h  | 10 ++++++++++
 net/wireless/nl80211.c  |  4 ++++
 3 files changed, 42 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 3152ddfb4294..be92333cf8fe 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1160,6 +1160,11 @@ enum nl80211_commands {
  *
  * @NL80211_ATTR_FEATURE_FLAGS: This u32 attribute contains flags from
  *	&enum nl80211_feature_flags and is advertised in wiphy information.
+ * @NL80211_ATTR_PROBE_RESP_OFFLOAD: Indicates that the HW responds to probe
+ *
+ *	requests while operating in AP-mode.
+ *	This attribute holds a bitmap of the supported protocols for
+ *	offloading (see &enum nl80211_probe_resp_offload_support_attr).
  *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -1395,6 +1400,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_FEATURE_FLAGS,
 
+	NL80211_ATTR_PROBE_RESP_OFFLOAD,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -2727,4 +2734,25 @@ enum nl80211_feature_flags {
 	NL80211_FEATURE_SK_TX_STATUS	= 1 << 0,
 };
 
+/**
+ * enum nl80211_probe_resp_offload_support_attr - optional supported
+ *	protocols for probe-response offloading by the driver/FW.
+ *	To be used with the %NL80211_ATTR_PROBE_RESP_OFFLOAD attribute.
+ *	Each enum value represents a bit in the bitmap of supported
+ *	protocols. Typically a subset of probe-requests belonging to a
+ *	supported protocol will be excluded from offload and uploaded
+ *	to the host.
+ *
+ * @NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS: Support for WPS ver. 1
+ * @NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS2: Support for WPS ver. 2
+ * @NL80211_PROBE_RESP_OFFLOAD_SUPPORT_P2P: Support for P2P
+ * @NL80211_PROBE_RESP_OFFLOAD_SUPPORT_80211U: Support for 802.11u
+ */
+enum nl80211_probe_resp_offload_support_attr {
+	NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS =	1<<0,
+	NL80211_PROBE_RESP_OFFLOAD_SUPPORT_WPS2 =	1<<1,
+	NL80211_PROBE_RESP_OFFLOAD_SUPPORT_P2P =	1<<2,
+	NL80211_PROBE_RESP_OFFLOAD_SUPPORT_80211U =	1<<3,
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 50e3608f5656..093f538f65d6 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1694,6 +1694,8 @@ struct cfg80211_ops {
  * @WIPHY_FLAG_REPORTS_OBSS: the device will report beacons from other BSSes
  *	when there are virtual interfaces in AP mode by calling
  *	cfg80211_report_obss_beacon().
+ * @WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD: When operating as an AP, the device
+ *	responds to probe-requests in hardware.
  */
 enum wiphy_flags {
 	WIPHY_FLAG_CUSTOM_REGULATORY		= BIT(0),
@@ -1714,6 +1716,7 @@ enum wiphy_flags {
 	WIPHY_FLAG_TDLS_EXTERNAL_SETUP		= BIT(16),
 	WIPHY_FLAG_HAVE_AP_SME			= BIT(17),
 	WIPHY_FLAG_REPORTS_OBSS			= BIT(18),
+	WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD	= BIT(19),
 };
 
 /**
@@ -1982,6 +1985,13 @@ struct wiphy {
 	u32 available_antennas_tx;
 	u32 available_antennas_rx;
 
+	/*
+	 * Bitmap of supported protocols for probe response offloading
+	 * see &enum nl80211_probe_resp_offload_support_attr. Only valid
+	 * when the wiphy flag @WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD is set.
+	 */
+	u32 probe_resp_offload;
+
 	/* If multiple wiphys are registered and you're handed e.g.
 	 * a regular netdev with assigned ieee80211_ptr, you won't
 	 * know whether it points to a wiphy your driver has registered
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 258fb881c8e3..f395a06c114a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -759,6 +759,10 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_ANTENNA_AVAIL_RX,
 		    dev->wiphy.available_antennas_rx);
 
+	if (dev->wiphy.flags & WIPHY_FLAG_AP_PROBE_RESP_OFFLOAD)
+		NLA_PUT_U32(msg, NL80211_ATTR_PROBE_RESP_OFFLOAD,
+			    dev->wiphy.probe_resp_offload);
+
 	if ((dev->wiphy.available_antennas_tx ||
 	     dev->wiphy.available_antennas_rx) && dev->ops->get_antenna) {
 		u32 tx_ant = 0, rx_ant = 0;
-- 
cgit v1.2.3


From 00f740e1a3b7abb51980371ee8fa113df22ae0b8 Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Thu, 10 Nov 2011 11:28:56 +0200
Subject: nl80211: Pass probe response data to drivers

Pass probe-response data from usermode via beacon parameters.

Signed-off-by: Guy Eilam <guy@wizery.com>
Signed-off-by: Arik Nemtsov <arik@wizery.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 6 ++++++
 include/net/cfg80211.h  | 4 ++++
 net/wireless/nl80211.c  | 9 +++++++++
 3 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index be92333cf8fe..f9261c253735 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1166,6 +1166,10 @@ enum nl80211_commands {
  *	This attribute holds a bitmap of the supported protocols for
  *	offloading (see &enum nl80211_probe_resp_offload_support_attr).
  *
+ * @NL80211_ATTR_PROBE_RESP: Probe Response template data. Contains the entire
+ *	probe-response frame. The DA field in the 802.11 header is zero-ed out,
+ *	to be filled by the FW.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1402,6 +1406,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_PROBE_RESP_OFFLOAD,
 
+	NL80211_ATTR_PROBE_RESP,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 093f538f65d6..8d7ba0961d3e 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -391,6 +391,8 @@ struct cfg80211_crypto_settings {
  * @assocresp_ies: extra information element(s) to add into (Re)Association
  *	Response frames or %NULL
  * @assocresp_ies_len: length of assocresp_ies in octets
+ * @probe_resp_len: length of probe response template (@probe_resp)
+ * @probe_resp: probe response template (AP mode only)
  */
 struct beacon_parameters {
 	u8 *head, *tail;
@@ -408,6 +410,8 @@ struct beacon_parameters {
 	size_t proberesp_ies_len;
 	const u8 *assocresp_ies;
 	size_t assocresp_ies_len;
+	int probe_resp_len;
+	u8 *probe_resp;
 };
 
 /**
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index f395a06c114a..6bc7c4b32fa5 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -197,6 +197,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_TDLS_SUPPORT] = { .type = NLA_FLAG },
 	[NL80211_ATTR_TDLS_EXTERNAL_SETUP] = { .type = NLA_FLAG },
 	[NL80211_ATTR_DONT_WAIT_FOR_ACK] = { .type = NLA_FLAG },
+	[NL80211_ATTR_PROBE_RESP] = { .type = NLA_BINARY,
+				      .len = IEEE80211_MAX_DATA_LEN },
 };
 
 /* policy for the key attributes */
@@ -2171,6 +2173,13 @@ static int nl80211_addset_beacon(struct sk_buff *skb, struct genl_info *info)
 			nla_len(info->attrs[NL80211_ATTR_IE_ASSOC_RESP]);
 	}
 
+	if (info->attrs[NL80211_ATTR_PROBE_RESP]) {
+		params.probe_resp =
+			nla_data(info->attrs[NL80211_ATTR_PROBE_RESP]);
+		params.probe_resp_len =
+			nla_len(info->attrs[NL80211_ATTR_PROBE_RESP]);
+	}
+
 	err = call(&rdev->wiphy, dev, &params);
 	if (!err && params.interval)
 		wdev->beacon_interval = params.interval;
-- 
cgit v1.2.3


From 3d249d4ca7d0ed6629a135ea1ea21c72286c0d80 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Fri, 11 Nov 2011 22:16:48 +0000
Subject: net: introduce ethernet teaming device

This patch introduces new network device called team. It supposes to be
very fast, simple, userspace-driven alternative to existing bonding
driver.

Userspace library called libteam with couple of demo apps is available
here:
https://github.com/jpirko/libteam
Note it's still in its dipers atm.

team<->libteam use generic netlink for communication. That and rtnl
suppose to be the only way to configure team device, no sysfs etc.

Python binding of libteam was recently introduced.
Daemon providing arpmon/miimon active-backup functionality will be
introduced shortly. All what's necessary is already implemented in
kernel team driver.

v7->v8:
	- check ndo_ndo_vlan_rx_[add/kill]_vid functions before calling
	  them.
	- use dev_kfree_skb_any() instead of dev_kfree_skb()

v6->v7:
	- transmit and receive functions are not checked in hot paths.
	  That also resolves memory leak on transmit when no port is
	  present

v5->v6:
	- changed couple of _rcu calls to non _rcu ones in non-readers

v4->v5:
	- team_change_mtu() uses team->lock while travesing though port
	  list
	- mac address changes are moved completely to jurisdiction of
	  userspace daemon. This way the daemon can do FOM1, FOM2 and
	  possibly other weird things with mac addresses.
	  Only round-robin mode sets up all ports to bond's address then
	  enslaved.
	- Extended Kconfig text

v3->v4:
	- remove redundant synchronize_rcu from __team_change_mode()
	- revert "set and clear of mode_ops happens per pointer, not per
	  byte"
	- extend comment of function __team_change_mode()

v2->v3:
	- team_change_mtu() uses rcu version of list traversal to unwind
	- set and clear of mode_ops happens per pointer, not per byte
	- port hashlist changed to be embedded into team structure
	- error branch in team_port_enter() does cleanup now
	- fixed rtln->rtnl

v1->v2:
	- modes are made as modules. Makes team more modular and
	  extendable.
	- several commenters' nitpicks found on v1 were fixed
	- several other bugs were fixed.
	- note I ignored Eric's comment about roundrobin port selector
	  as Eric's way may be easily implemented as another mode (mode
	  "random") in future.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/team.txt         |    2 +
 MAINTAINERS                               |    7 +
 drivers/net/Kconfig                       |    2 +
 drivers/net/Makefile                      |    1 +
 drivers/net/team/Kconfig                  |   43 +
 drivers/net/team/Makefile                 |    7 +
 drivers/net/team/team.c                   | 1583 +++++++++++++++++++++++++++++
 drivers/net/team/team_mode_activebackup.c |  137 +++
 drivers/net/team/team_mode_roundrobin.c   |  107 ++
 include/linux/Kbuild                      |    1 +
 include/linux/if.h                        |    1 +
 include/linux/if_team.h                   |  242 +++++
 12 files changed, 2133 insertions(+)
 create mode 100644 Documentation/networking/team.txt
 create mode 100644 drivers/net/team/Kconfig
 create mode 100644 drivers/net/team/Makefile
 create mode 100644 drivers/net/team/team.c
 create mode 100644 drivers/net/team/team_mode_activebackup.c
 create mode 100644 drivers/net/team/team_mode_roundrobin.c
 create mode 100644 include/linux/if_team.h

(limited to 'include/linux')

diff --git a/Documentation/networking/team.txt b/Documentation/networking/team.txt
new file mode 100644
index 000000000000..5a013686b9ea
--- /dev/null
+++ b/Documentation/networking/team.txt
@@ -0,0 +1,2 @@
+Team devices are driven from userspace via libteam library which is here:
+	https://github.com/jpirko/libteam
diff --git a/MAINTAINERS b/MAINTAINERS
index 4808256446f2..8d941692c394 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6484,6 +6484,13 @@ W:	http://tcp-lp-mod.sourceforge.net/
 S:	Maintained
 F:	net/ipv4/tcp_lp.c
 
+TEAM DRIVER
+M:	Jiri Pirko <jpirko@redhat.com>
+L:	netdev@vger.kernel.org
+S:	Supported
+F:	drivers/net/team/
+F:	include/linux/if_team.h
+
 TEGRA SUPPORT
 M:	Colin Cross <ccross@android.com>
 M:	Olof Johansson <olof@lixom.net>
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 583f66cd5bbd..b3020bea39e4 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -125,6 +125,8 @@ config IFB
 	  'ifb1' etc.
 	  Look at the iproute2 documentation directory for usage etc
 
+source "drivers/net/team/Kconfig"
+
 config MACVLAN
 	tristate "MAC-VLAN support (EXPERIMENTAL)"
 	depends on EXPERIMENTAL
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index fa877cd2b139..4e4ebfe1aa53 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_NET) += Space.o loopback.o
 obj-$(CONFIG_NETCONSOLE) += netconsole.o
 obj-$(CONFIG_PHYLIB) += phy/
 obj-$(CONFIG_RIONET) += rionet.o
+obj-$(CONFIG_NET_TEAM) += team/
 obj-$(CONFIG_TUN) += tun.o
 obj-$(CONFIG_VETH) += veth.o
 obj-$(CONFIG_VIRTIO_NET) += virtio_net.o
diff --git a/drivers/net/team/Kconfig b/drivers/net/team/Kconfig
new file mode 100644
index 000000000000..248a144033ca
--- /dev/null
+++ b/drivers/net/team/Kconfig
@@ -0,0 +1,43 @@
+menuconfig NET_TEAM
+	tristate "Ethernet team driver support (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	---help---
+	  This allows one to create virtual interfaces that teams together
+	  multiple ethernet devices.
+
+	  Team devices can be added using the "ip" command from the
+	  iproute2 package:
+
+	  "ip link add link [ address MAC ] [ NAME ] type team"
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called team.
+
+if NET_TEAM
+
+config NET_TEAM_MODE_ROUNDROBIN
+	tristate "Round-robin mode support"
+	depends on NET_TEAM
+	---help---
+	  Basic mode where port used for transmitting packets is selected in
+	  round-robin fashion using packet counter.
+
+	  All added ports are setup to have bond's mac address.
+
+	  To compile this team mode as a module, choose M here: the module
+	  will be called team_mode_roundrobin.
+
+config NET_TEAM_MODE_ACTIVEBACKUP
+	tristate "Active-backup mode support"
+	depends on NET_TEAM
+	---help---
+	  Only one port is active at a time and the rest of ports are used
+	  for backup.
+
+	  Mac addresses of ports are not modified. Userspace is responsible
+	  to do so.
+
+	  To compile this team mode as a module, choose M here: the module
+	  will be called team_mode_activebackup.
+
+endif # NET_TEAM
diff --git a/drivers/net/team/Makefile b/drivers/net/team/Makefile
new file mode 100644
index 000000000000..85f2028a87af
--- /dev/null
+++ b/drivers/net/team/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the network team driver
+#
+
+obj-$(CONFIG_NET_TEAM) += team.o
+obj-$(CONFIG_NET_TEAM_MODE_ROUNDROBIN) += team_mode_roundrobin.o
+obj-$(CONFIG_NET_TEAM_MODE_ACTIVEBACKUP) += team_mode_activebackup.o
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
new file mode 100644
index 000000000000..60672bb09960
--- /dev/null
+++ b/drivers/net/team/team.c
@@ -0,0 +1,1583 @@
+/*
+ * net/drivers/team/team.c - Network team device driver
+ * Copyright (c) 2011 Jiri Pirko <jpirko@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/rcupdate.h>
+#include <linux/errno.h>
+#include <linux/ctype.h>
+#include <linux/notifier.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/socket.h>
+#include <linux/etherdevice.h>
+#include <linux/rtnetlink.h>
+#include <net/rtnetlink.h>
+#include <net/genetlink.h>
+#include <net/netlink.h>
+#include <linux/if_team.h>
+
+#define DRV_NAME "team"
+
+
+/**********
+ * Helpers
+ **********/
+
+#define team_port_exists(dev) (dev->priv_flags & IFF_TEAM_PORT)
+
+static struct team_port *team_port_get_rcu(const struct net_device *dev)
+{
+	struct team_port *port = rcu_dereference(dev->rx_handler_data);
+
+	return team_port_exists(dev) ? port : NULL;
+}
+
+static struct team_port *team_port_get_rtnl(const struct net_device *dev)
+{
+	struct team_port *port = rtnl_dereference(dev->rx_handler_data);
+
+	return team_port_exists(dev) ? port : NULL;
+}
+
+/*
+ * Since the ability to change mac address for open port device is tested in
+ * team_port_add, this function can be called without control of return value
+ */
+static int __set_port_mac(struct net_device *port_dev,
+			  const unsigned char *dev_addr)
+{
+	struct sockaddr addr;
+
+	memcpy(addr.sa_data, dev_addr, ETH_ALEN);
+	addr.sa_family = ARPHRD_ETHER;
+	return dev_set_mac_address(port_dev, &addr);
+}
+
+int team_port_set_orig_mac(struct team_port *port)
+{
+	return __set_port_mac(port->dev, port->orig.dev_addr);
+}
+
+int team_port_set_team_mac(struct team_port *port)
+{
+	return __set_port_mac(port->dev, port->team->dev->dev_addr);
+}
+EXPORT_SYMBOL(team_port_set_team_mac);
+
+
+/*******************
+ * Options handling
+ *******************/
+
+void team_options_register(struct team *team, struct team_option *option,
+			   size_t option_count)
+{
+	int i;
+
+	for (i = 0; i < option_count; i++, option++)
+		list_add_tail(&option->list, &team->option_list);
+}
+EXPORT_SYMBOL(team_options_register);
+
+static void __team_options_change_check(struct team *team,
+					struct team_option *changed_option);
+
+static void __team_options_unregister(struct team *team,
+				      struct team_option *option,
+				      size_t option_count)
+{
+	int i;
+
+	for (i = 0; i < option_count; i++, option++)
+		list_del(&option->list);
+}
+
+void team_options_unregister(struct team *team, struct team_option *option,
+			     size_t option_count)
+{
+	__team_options_unregister(team, option, option_count);
+	__team_options_change_check(team, NULL);
+}
+EXPORT_SYMBOL(team_options_unregister);
+
+static int team_option_get(struct team *team, struct team_option *option,
+			   void *arg)
+{
+	return option->getter(team, arg);
+}
+
+static int team_option_set(struct team *team, struct team_option *option,
+			   void *arg)
+{
+	int err;
+
+	err = option->setter(team, arg);
+	if (err)
+		return err;
+
+	__team_options_change_check(team, option);
+	return err;
+}
+
+/****************
+ * Mode handling
+ ****************/
+
+static LIST_HEAD(mode_list);
+static DEFINE_SPINLOCK(mode_list_lock);
+
+static struct team_mode *__find_mode(const char *kind)
+{
+	struct team_mode *mode;
+
+	list_for_each_entry(mode, &mode_list, list) {
+		if (strcmp(mode->kind, kind) == 0)
+			return mode;
+	}
+	return NULL;
+}
+
+static bool is_good_mode_name(const char *name)
+{
+	while (*name != '\0') {
+		if (!isalpha(*name) && !isdigit(*name) && *name != '_')
+			return false;
+		name++;
+	}
+	return true;
+}
+
+int team_mode_register(struct team_mode *mode)
+{
+	int err = 0;
+
+	if (!is_good_mode_name(mode->kind) ||
+	    mode->priv_size > TEAM_MODE_PRIV_SIZE)
+		return -EINVAL;
+	spin_lock(&mode_list_lock);
+	if (__find_mode(mode->kind)) {
+		err = -EEXIST;
+		goto unlock;
+	}
+	list_add_tail(&mode->list, &mode_list);
+unlock:
+	spin_unlock(&mode_list_lock);
+	return err;
+}
+EXPORT_SYMBOL(team_mode_register);
+
+int team_mode_unregister(struct team_mode *mode)
+{
+	spin_lock(&mode_list_lock);
+	list_del_init(&mode->list);
+	spin_unlock(&mode_list_lock);
+	return 0;
+}
+EXPORT_SYMBOL(team_mode_unregister);
+
+static struct team_mode *team_mode_get(const char *kind)
+{
+	struct team_mode *mode;
+
+	spin_lock(&mode_list_lock);
+	mode = __find_mode(kind);
+	if (!mode) {
+		spin_unlock(&mode_list_lock);
+		request_module("team-mode-%s", kind);
+		spin_lock(&mode_list_lock);
+		mode = __find_mode(kind);
+	}
+	if (mode)
+		if (!try_module_get(mode->owner))
+			mode = NULL;
+
+	spin_unlock(&mode_list_lock);
+	return mode;
+}
+
+static void team_mode_put(const struct team_mode *mode)
+{
+	module_put(mode->owner);
+}
+
+static bool team_dummy_transmit(struct team *team, struct sk_buff *skb)
+{
+	dev_kfree_skb_any(skb);
+	return false;
+}
+
+rx_handler_result_t team_dummy_receive(struct team *team,
+				       struct team_port *port,
+				       struct sk_buff *skb)
+{
+	return RX_HANDLER_ANOTHER;
+}
+
+static void team_adjust_ops(struct team *team)
+{
+	/*
+	 * To avoid checks in rx/tx skb paths, ensure here that non-null and
+	 * correct ops are always set.
+	 */
+
+	if (list_empty(&team->port_list) ||
+	    !team->mode || !team->mode->ops->transmit)
+		team->ops.transmit = team_dummy_transmit;
+	else
+		team->ops.transmit = team->mode->ops->transmit;
+
+	if (list_empty(&team->port_list) ||
+	    !team->mode || !team->mode->ops->receive)
+		team->ops.receive = team_dummy_receive;
+	else
+		team->ops.receive = team->mode->ops->receive;
+}
+
+/*
+ * We can benefit from the fact that it's ensured no port is present
+ * at the time of mode change. Therefore no packets are in fly so there's no
+ * need to set mode operations in any special way.
+ */
+static int __team_change_mode(struct team *team,
+			      const struct team_mode *new_mode)
+{
+	/* Check if mode was previously set and do cleanup if so */
+	if (team->mode) {
+		void (*exit_op)(struct team *team) = team->ops.exit;
+
+		/* Clear ops area so no callback is called any longer */
+		memset(&team->ops, 0, sizeof(struct team_mode_ops));
+		team_adjust_ops(team);
+
+		if (exit_op)
+			exit_op(team);
+		team_mode_put(team->mode);
+		team->mode = NULL;
+		/* zero private data area */
+		memset(&team->mode_priv, 0,
+		       sizeof(struct team) - offsetof(struct team, mode_priv));
+	}
+
+	if (!new_mode)
+		return 0;
+
+	if (new_mode->ops->init) {
+		int err;
+
+		err = new_mode->ops->init(team);
+		if (err)
+			return err;
+	}
+
+	team->mode = new_mode;
+	memcpy(&team->ops, new_mode->ops, sizeof(struct team_mode_ops));
+	team_adjust_ops(team);
+
+	return 0;
+}
+
+static int team_change_mode(struct team *team, const char *kind)
+{
+	struct team_mode *new_mode;
+	struct net_device *dev = team->dev;
+	int err;
+
+	if (!list_empty(&team->port_list)) {
+		netdev_err(dev, "No ports can be present during mode change\n");
+		return -EBUSY;
+	}
+
+	if (team->mode && strcmp(team->mode->kind, kind) == 0) {
+		netdev_err(dev, "Unable to change to the same mode the team is in\n");
+		return -EINVAL;
+	}
+
+	new_mode = team_mode_get(kind);
+	if (!new_mode) {
+		netdev_err(dev, "Mode \"%s\" not found\n", kind);
+		return -EINVAL;
+	}
+
+	err = __team_change_mode(team, new_mode);
+	if (err) {
+		netdev_err(dev, "Failed to change to mode \"%s\"\n", kind);
+		team_mode_put(new_mode);
+		return err;
+	}
+
+	netdev_info(dev, "Mode changed to \"%s\"\n", kind);
+	return 0;
+}
+
+
+/************************
+ * Rx path frame handler
+ ************************/
+
+/* note: already called with rcu_read_lock */
+static rx_handler_result_t team_handle_frame(struct sk_buff **pskb)
+{
+	struct sk_buff *skb = *pskb;
+	struct team_port *port;
+	struct team *team;
+	rx_handler_result_t res;
+
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (!skb)
+		return RX_HANDLER_CONSUMED;
+
+	*pskb = skb;
+
+	port = team_port_get_rcu(skb->dev);
+	team = port->team;
+
+	res = team->ops.receive(team, port, skb);
+	if (res == RX_HANDLER_ANOTHER) {
+		struct team_pcpu_stats *pcpu_stats;
+
+		pcpu_stats = this_cpu_ptr(team->pcpu_stats);
+		u64_stats_update_begin(&pcpu_stats->syncp);
+		pcpu_stats->rx_packets++;
+		pcpu_stats->rx_bytes += skb->len;
+		if (skb->pkt_type == PACKET_MULTICAST)
+			pcpu_stats->rx_multicast++;
+		u64_stats_update_end(&pcpu_stats->syncp);
+
+		skb->dev = team->dev;
+	} else {
+		this_cpu_inc(team->pcpu_stats->rx_dropped);
+	}
+
+	return res;
+}
+
+
+/****************
+ * Port handling
+ ****************/
+
+static bool team_port_find(const struct team *team,
+			   const struct team_port *port)
+{
+	struct team_port *cur;
+
+	list_for_each_entry(cur, &team->port_list, list)
+		if (cur == port)
+			return true;
+	return false;
+}
+
+/*
+ * Add/delete port to the team port list. Write guarded by rtnl_lock.
+ * Takes care of correct port->index setup (might be racy).
+ */
+static void team_port_list_add_port(struct team *team,
+				    struct team_port *port)
+{
+	port->index = team->port_count++;
+	hlist_add_head_rcu(&port->hlist,
+			   team_port_index_hash(team, port->index));
+	list_add_tail_rcu(&port->list, &team->port_list);
+}
+
+static void __reconstruct_port_hlist(struct team *team, int rm_index)
+{
+	int i;
+	struct team_port *port;
+
+	for (i = rm_index + 1; i < team->port_count; i++) {
+		port = team_get_port_by_index(team, i);
+		hlist_del_rcu(&port->hlist);
+		port->index--;
+		hlist_add_head_rcu(&port->hlist,
+				   team_port_index_hash(team, port->index));
+	}
+}
+
+static void team_port_list_del_port(struct team *team,
+				   struct team_port *port)
+{
+	int rm_index = port->index;
+
+	hlist_del_rcu(&port->hlist);
+	list_del_rcu(&port->list);
+	__reconstruct_port_hlist(team, rm_index);
+	team->port_count--;
+}
+
+#define TEAM_VLAN_FEATURES (NETIF_F_ALL_CSUM | NETIF_F_SG | \
+			    NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \
+			    NETIF_F_HIGHDMA | NETIF_F_LRO)
+
+static void __team_compute_features(struct team *team)
+{
+	struct team_port *port;
+	u32 vlan_features = TEAM_VLAN_FEATURES;
+	unsigned short max_hard_header_len = ETH_HLEN;
+
+	list_for_each_entry(port, &team->port_list, list) {
+		vlan_features = netdev_increment_features(vlan_features,
+					port->dev->vlan_features,
+					TEAM_VLAN_FEATURES);
+
+		if (port->dev->hard_header_len > max_hard_header_len)
+			max_hard_header_len = port->dev->hard_header_len;
+	}
+
+	team->dev->vlan_features = vlan_features;
+	team->dev->hard_header_len = max_hard_header_len;
+
+	netdev_change_features(team->dev);
+}
+
+static void team_compute_features(struct team *team)
+{
+	spin_lock(&team->lock);
+	__team_compute_features(team);
+	spin_unlock(&team->lock);
+}
+
+static int team_port_enter(struct team *team, struct team_port *port)
+{
+	int err = 0;
+
+	dev_hold(team->dev);
+	port->dev->priv_flags |= IFF_TEAM_PORT;
+	if (team->ops.port_enter) {
+		err = team->ops.port_enter(team, port);
+		if (err) {
+			netdev_err(team->dev, "Device %s failed to enter team mode\n",
+				   port->dev->name);
+			goto err_port_enter;
+		}
+	}
+
+	return 0;
+
+err_port_enter:
+	port->dev->priv_flags &= ~IFF_TEAM_PORT;
+	dev_put(team->dev);
+
+	return err;
+}
+
+static void team_port_leave(struct team *team, struct team_port *port)
+{
+	if (team->ops.port_leave)
+		team->ops.port_leave(team, port);
+	port->dev->priv_flags &= ~IFF_TEAM_PORT;
+	dev_put(team->dev);
+}
+
+static void __team_port_change_check(struct team_port *port, bool linkup);
+
+static int team_port_add(struct team *team, struct net_device *port_dev)
+{
+	struct net_device *dev = team->dev;
+	struct team_port *port;
+	char *portname = port_dev->name;
+	int err;
+
+	if (port_dev->flags & IFF_LOOPBACK ||
+	    port_dev->type != ARPHRD_ETHER) {
+		netdev_err(dev, "Device %s is of an unsupported type\n",
+			   portname);
+		return -EINVAL;
+	}
+
+	if (team_port_exists(port_dev)) {
+		netdev_err(dev, "Device %s is already a port "
+				"of a team device\n", portname);
+		return -EBUSY;
+	}
+
+	if (port_dev->flags & IFF_UP) {
+		netdev_err(dev, "Device %s is up. Set it down before adding it as a team port\n",
+			   portname);
+		return -EBUSY;
+	}
+
+	port = kzalloc(sizeof(struct team_port), GFP_KERNEL);
+	if (!port)
+		return -ENOMEM;
+
+	port->dev = port_dev;
+	port->team = team;
+
+	port->orig.mtu = port_dev->mtu;
+	err = dev_set_mtu(port_dev, dev->mtu);
+	if (err) {
+		netdev_dbg(dev, "Error %d calling dev_set_mtu\n", err);
+		goto err_set_mtu;
+	}
+
+	memcpy(port->orig.dev_addr, port_dev->dev_addr, ETH_ALEN);
+
+	err = team_port_enter(team, port);
+	if (err) {
+		netdev_err(dev, "Device %s failed to enter team mode\n",
+			   portname);
+		goto err_port_enter;
+	}
+
+	err = dev_open(port_dev);
+	if (err) {
+		netdev_dbg(dev, "Device %s opening failed\n",
+			   portname);
+		goto err_dev_open;
+	}
+
+	err = netdev_set_master(port_dev, dev);
+	if (err) {
+		netdev_err(dev, "Device %s failed to set master\n", portname);
+		goto err_set_master;
+	}
+
+	err = netdev_rx_handler_register(port_dev, team_handle_frame,
+					 port);
+	if (err) {
+		netdev_err(dev, "Device %s failed to register rx_handler\n",
+			   portname);
+		goto err_handler_register;
+	}
+
+	team_port_list_add_port(team, port);
+	team_adjust_ops(team);
+	__team_compute_features(team);
+	__team_port_change_check(port, !!netif_carrier_ok(port_dev));
+
+	netdev_info(dev, "Port device %s added\n", portname);
+
+	return 0;
+
+err_handler_register:
+	netdev_set_master(port_dev, NULL);
+
+err_set_master:
+	dev_close(port_dev);
+
+err_dev_open:
+	team_port_leave(team, port);
+	team_port_set_orig_mac(port);
+
+err_port_enter:
+	dev_set_mtu(port_dev, port->orig.mtu);
+
+err_set_mtu:
+	kfree(port);
+
+	return err;
+}
+
+static int team_port_del(struct team *team, struct net_device *port_dev)
+{
+	struct net_device *dev = team->dev;
+	struct team_port *port;
+	char *portname = port_dev->name;
+
+	port = team_port_get_rtnl(port_dev);
+	if (!port || !team_port_find(team, port)) {
+		netdev_err(dev, "Device %s does not act as a port of this team\n",
+			   portname);
+		return -ENOENT;
+	}
+
+	__team_port_change_check(port, false);
+	team_port_list_del_port(team, port);
+	team_adjust_ops(team);
+	netdev_rx_handler_unregister(port_dev);
+	netdev_set_master(port_dev, NULL);
+	dev_close(port_dev);
+	team_port_leave(team, port);
+	team_port_set_orig_mac(port);
+	dev_set_mtu(port_dev, port->orig.mtu);
+	synchronize_rcu();
+	kfree(port);
+	netdev_info(dev, "Port device %s removed\n", portname);
+	__team_compute_features(team);
+
+	return 0;
+}
+
+
+/*****************
+ * Net device ops
+ *****************/
+
+static const char team_no_mode_kind[] = "*NOMODE*";
+
+static int team_mode_option_get(struct team *team, void *arg)
+{
+	const char **str = arg;
+
+	*str = team->mode ? team->mode->kind : team_no_mode_kind;
+	return 0;
+}
+
+static int team_mode_option_set(struct team *team, void *arg)
+{
+	const char **str = arg;
+
+	return team_change_mode(team, *str);
+}
+
+static struct team_option team_options[] = {
+	{
+		.name = "mode",
+		.type = TEAM_OPTION_TYPE_STRING,
+		.getter = team_mode_option_get,
+		.setter = team_mode_option_set,
+	},
+};
+
+static int team_init(struct net_device *dev)
+{
+	struct team *team = netdev_priv(dev);
+	int i;
+
+	team->dev = dev;
+	spin_lock_init(&team->lock);
+
+	team->pcpu_stats = alloc_percpu(struct team_pcpu_stats);
+	if (!team->pcpu_stats)
+		return -ENOMEM;
+
+	for (i = 0; i < TEAM_PORT_HASHENTRIES; i++)
+		INIT_HLIST_HEAD(&team->port_hlist[i]);
+	INIT_LIST_HEAD(&team->port_list);
+
+	team_adjust_ops(team);
+
+	INIT_LIST_HEAD(&team->option_list);
+	team_options_register(team, team_options, ARRAY_SIZE(team_options));
+	netif_carrier_off(dev);
+
+	return 0;
+}
+
+static void team_uninit(struct net_device *dev)
+{
+	struct team *team = netdev_priv(dev);
+	struct team_port *port;
+	struct team_port *tmp;
+
+	spin_lock(&team->lock);
+	list_for_each_entry_safe(port, tmp, &team->port_list, list)
+		team_port_del(team, port->dev);
+
+	__team_change_mode(team, NULL); /* cleanup */
+	__team_options_unregister(team, team_options, ARRAY_SIZE(team_options));
+	spin_unlock(&team->lock);
+}
+
+static void team_destructor(struct net_device *dev)
+{
+	struct team *team = netdev_priv(dev);
+
+	free_percpu(team->pcpu_stats);
+	free_netdev(dev);
+}
+
+static int team_open(struct net_device *dev)
+{
+	netif_carrier_on(dev);
+	return 0;
+}
+
+static int team_close(struct net_device *dev)
+{
+	netif_carrier_off(dev);
+	return 0;
+}
+
+/*
+ * note: already called with rcu_read_lock
+ */
+static netdev_tx_t team_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct team *team = netdev_priv(dev);
+	bool tx_success = false;
+	unsigned int len = skb->len;
+
+	tx_success = team->ops.transmit(team, skb);
+	if (tx_success) {
+		struct team_pcpu_stats *pcpu_stats;
+
+		pcpu_stats = this_cpu_ptr(team->pcpu_stats);
+		u64_stats_update_begin(&pcpu_stats->syncp);
+		pcpu_stats->tx_packets++;
+		pcpu_stats->tx_bytes += len;
+		u64_stats_update_end(&pcpu_stats->syncp);
+	} else {
+		this_cpu_inc(team->pcpu_stats->tx_dropped);
+	}
+
+	return NETDEV_TX_OK;
+}
+
+static void team_change_rx_flags(struct net_device *dev, int change)
+{
+	struct team *team = netdev_priv(dev);
+	struct team_port *port;
+	int inc;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(port, &team->port_list, list) {
+		if (change & IFF_PROMISC) {
+			inc = dev->flags & IFF_PROMISC ? 1 : -1;
+			dev_set_promiscuity(port->dev, inc);
+		}
+		if (change & IFF_ALLMULTI) {
+			inc = dev->flags & IFF_ALLMULTI ? 1 : -1;
+			dev_set_allmulti(port->dev, inc);
+		}
+	}
+	rcu_read_unlock();
+}
+
+static void team_set_rx_mode(struct net_device *dev)
+{
+	struct team *team = netdev_priv(dev);
+	struct team_port *port;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(port, &team->port_list, list) {
+		dev_uc_sync(port->dev, dev);
+		dev_mc_sync(port->dev, dev);
+	}
+	rcu_read_unlock();
+}
+
+static int team_set_mac_address(struct net_device *dev, void *p)
+{
+	struct team *team = netdev_priv(dev);
+	struct team_port *port;
+	struct sockaddr *addr = p;
+
+	memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
+	rcu_read_lock();
+	list_for_each_entry_rcu(port, &team->port_list, list)
+		if (team->ops.port_change_mac)
+			team->ops.port_change_mac(team, port);
+	rcu_read_unlock();
+	return 0;
+}
+
+static int team_change_mtu(struct net_device *dev, int new_mtu)
+{
+	struct team *team = netdev_priv(dev);
+	struct team_port *port;
+	int err;
+
+	/*
+	 * Alhough this is reader, it's guarded by team lock. It's not possible
+	 * to traverse list in reverse under rcu_read_lock
+	 */
+	spin_lock(&team->lock);
+	list_for_each_entry(port, &team->port_list, list) {
+		err = dev_set_mtu(port->dev, new_mtu);
+		if (err) {
+			netdev_err(dev, "Device %s failed to change mtu",
+				   port->dev->name);
+			goto unwind;
+		}
+	}
+	spin_unlock(&team->lock);
+
+	dev->mtu = new_mtu;
+
+	return 0;
+
+unwind:
+	list_for_each_entry_continue_reverse(port, &team->port_list, list)
+		dev_set_mtu(port->dev, dev->mtu);
+	spin_unlock(&team->lock);
+
+	return err;
+}
+
+static struct rtnl_link_stats64 *
+team_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
+{
+	struct team *team = netdev_priv(dev);
+	struct team_pcpu_stats *p;
+	u64 rx_packets, rx_bytes, rx_multicast, tx_packets, tx_bytes;
+	u32 rx_dropped = 0, tx_dropped = 0;
+	unsigned int start;
+	int i;
+
+	for_each_possible_cpu(i) {
+		p = per_cpu_ptr(team->pcpu_stats, i);
+		do {
+			start = u64_stats_fetch_begin_bh(&p->syncp);
+			rx_packets	= p->rx_packets;
+			rx_bytes	= p->rx_bytes;
+			rx_multicast	= p->rx_multicast;
+			tx_packets	= p->tx_packets;
+			tx_bytes	= p->tx_bytes;
+		} while (u64_stats_fetch_retry_bh(&p->syncp, start));
+
+		stats->rx_packets	+= rx_packets;
+		stats->rx_bytes		+= rx_bytes;
+		stats->multicast	+= rx_multicast;
+		stats->tx_packets	+= tx_packets;
+		stats->tx_bytes		+= tx_bytes;
+		/*
+		 * rx_dropped & tx_dropped are u32, updated
+		 * without syncp protection.
+		 */
+		rx_dropped	+= p->rx_dropped;
+		tx_dropped	+= p->tx_dropped;
+	}
+	stats->rx_dropped	= rx_dropped;
+	stats->tx_dropped	= tx_dropped;
+	return stats;
+}
+
+static void team_vlan_rx_add_vid(struct net_device *dev, uint16_t vid)
+{
+	struct team *team = netdev_priv(dev);
+	struct team_port *port;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(port, &team->port_list, list) {
+		const struct net_device_ops *ops = port->dev->netdev_ops;
+
+		if (ops->ndo_vlan_rx_add_vid)
+			ops->ndo_vlan_rx_add_vid(port->dev, vid);
+	}
+	rcu_read_unlock();
+}
+
+static void team_vlan_rx_kill_vid(struct net_device *dev, uint16_t vid)
+{
+	struct team *team = netdev_priv(dev);
+	struct team_port *port;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(port, &team->port_list, list) {
+		const struct net_device_ops *ops = port->dev->netdev_ops;
+
+		if (ops->ndo_vlan_rx_kill_vid)
+			ops->ndo_vlan_rx_kill_vid(port->dev, vid);
+	}
+	rcu_read_unlock();
+}
+
+static int team_add_slave(struct net_device *dev, struct net_device *port_dev)
+{
+	struct team *team = netdev_priv(dev);
+	int err;
+
+	spin_lock(&team->lock);
+	err = team_port_add(team, port_dev);
+	spin_unlock(&team->lock);
+	return err;
+}
+
+static int team_del_slave(struct net_device *dev, struct net_device *port_dev)
+{
+	struct team *team = netdev_priv(dev);
+	int err;
+
+	spin_lock(&team->lock);
+	err = team_port_del(team, port_dev);
+	spin_unlock(&team->lock);
+	return err;
+}
+
+static const struct net_device_ops team_netdev_ops = {
+	.ndo_init		= team_init,
+	.ndo_uninit		= team_uninit,
+	.ndo_open		= team_open,
+	.ndo_stop		= team_close,
+	.ndo_start_xmit		= team_xmit,
+	.ndo_change_rx_flags	= team_change_rx_flags,
+	.ndo_set_rx_mode	= team_set_rx_mode,
+	.ndo_set_mac_address	= team_set_mac_address,
+	.ndo_change_mtu		= team_change_mtu,
+	.ndo_get_stats64	= team_get_stats64,
+	.ndo_vlan_rx_add_vid	= team_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid	= team_vlan_rx_kill_vid,
+	.ndo_add_slave		= team_add_slave,
+	.ndo_del_slave		= team_del_slave,
+};
+
+
+/***********************
+ * rt netlink interface
+ ***********************/
+
+static void team_setup(struct net_device *dev)
+{
+	ether_setup(dev);
+
+	dev->netdev_ops = &team_netdev_ops;
+	dev->destructor	= team_destructor;
+	dev->tx_queue_len = 0;
+	dev->flags |= IFF_MULTICAST;
+	dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
+
+	/*
+	 * Indicate we support unicast address filtering. That way core won't
+	 * bring us to promisc mode in case a unicast addr is added.
+	 * Let this up to underlay drivers.
+	 */
+	dev->priv_flags |= IFF_UNICAST_FLT;
+
+	dev->features |= NETIF_F_LLTX;
+	dev->features |= NETIF_F_GRO;
+	dev->hw_features = NETIF_F_HW_VLAN_TX |
+			   NETIF_F_HW_VLAN_RX |
+			   NETIF_F_HW_VLAN_FILTER;
+
+	dev->features |= dev->hw_features;
+}
+
+static int team_newlink(struct net *src_net, struct net_device *dev,
+			struct nlattr *tb[], struct nlattr *data[])
+{
+	int err;
+
+	if (tb[IFLA_ADDRESS] == NULL)
+		random_ether_addr(dev->dev_addr);
+
+	err = register_netdevice(dev);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int team_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+	if (tb[IFLA_ADDRESS]) {
+		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
+			return -EINVAL;
+		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
+			return -EADDRNOTAVAIL;
+	}
+	return 0;
+}
+
+static struct rtnl_link_ops team_link_ops __read_mostly = {
+	.kind		= DRV_NAME,
+	.priv_size	= sizeof(struct team),
+	.setup		= team_setup,
+	.newlink	= team_newlink,
+	.validate	= team_validate,
+};
+
+
+/***********************************
+ * Generic netlink custom interface
+ ***********************************/
+
+static struct genl_family team_nl_family = {
+	.id		= GENL_ID_GENERATE,
+	.name		= TEAM_GENL_NAME,
+	.version	= TEAM_GENL_VERSION,
+	.maxattr	= TEAM_ATTR_MAX,
+	.netnsok	= true,
+};
+
+static const struct nla_policy team_nl_policy[TEAM_ATTR_MAX + 1] = {
+	[TEAM_ATTR_UNSPEC]			= { .type = NLA_UNSPEC, },
+	[TEAM_ATTR_TEAM_IFINDEX]		= { .type = NLA_U32 },
+	[TEAM_ATTR_LIST_OPTION]			= { .type = NLA_NESTED },
+	[TEAM_ATTR_LIST_PORT]			= { .type = NLA_NESTED },
+};
+
+static const struct nla_policy
+team_nl_option_policy[TEAM_ATTR_OPTION_MAX + 1] = {
+	[TEAM_ATTR_OPTION_UNSPEC]		= { .type = NLA_UNSPEC, },
+	[TEAM_ATTR_OPTION_NAME] = {
+		.type = NLA_STRING,
+		.len = TEAM_STRING_MAX_LEN,
+	},
+	[TEAM_ATTR_OPTION_CHANGED]		= { .type = NLA_FLAG },
+	[TEAM_ATTR_OPTION_TYPE]			= { .type = NLA_U8 },
+	[TEAM_ATTR_OPTION_DATA] = {
+		.type = NLA_BINARY,
+		.len = TEAM_STRING_MAX_LEN,
+	},
+};
+
+static int team_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *msg;
+	void *hdr;
+	int err;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq,
+			  &team_nl_family, 0, TEAM_CMD_NOOP);
+	if (IS_ERR(hdr)) {
+		err = PTR_ERR(hdr);
+		goto err_msg_put;
+	}
+
+	genlmsg_end(msg, hdr);
+
+	return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid);
+
+err_msg_put:
+	nlmsg_free(msg);
+
+	return err;
+}
+
+/*
+ * Netlink cmd functions should be locked by following two functions.
+ * To ensure team_uninit would not be called in between, hold rcu_read_lock
+ * all the time.
+ */
+static struct team *team_nl_team_get(struct genl_info *info)
+{
+	struct net *net = genl_info_net(info);
+	int ifindex;
+	struct net_device *dev;
+	struct team *team;
+
+	if (!info->attrs[TEAM_ATTR_TEAM_IFINDEX])
+		return NULL;
+
+	ifindex = nla_get_u32(info->attrs[TEAM_ATTR_TEAM_IFINDEX]);
+	rcu_read_lock();
+	dev = dev_get_by_index_rcu(net, ifindex);
+	if (!dev || dev->netdev_ops != &team_netdev_ops) {
+		rcu_read_unlock();
+		return NULL;
+	}
+
+	team = netdev_priv(dev);
+	spin_lock(&team->lock);
+	return team;
+}
+
+static void team_nl_team_put(struct team *team)
+{
+	spin_unlock(&team->lock);
+	rcu_read_unlock();
+}
+
+static int team_nl_send_generic(struct genl_info *info, struct team *team,
+				int (*fill_func)(struct sk_buff *skb,
+						 struct genl_info *info,
+						 int flags, struct team *team))
+{
+	struct sk_buff *skb;
+	int err;
+
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	err = fill_func(skb, info, NLM_F_ACK, team);
+	if (err < 0)
+		goto err_fill;
+
+	err = genlmsg_unicast(genl_info_net(info), skb, info->snd_pid);
+	return err;
+
+err_fill:
+	nlmsg_free(skb);
+	return err;
+}
+
+static int team_nl_fill_options_get_changed(struct sk_buff *skb,
+					    u32 pid, u32 seq, int flags,
+					    struct team *team,
+					    struct team_option *changed_option)
+{
+	struct nlattr *option_list;
+	void *hdr;
+	struct team_option *option;
+
+	hdr = genlmsg_put(skb, pid, seq, &team_nl_family, flags,
+			  TEAM_CMD_OPTIONS_GET);
+	if (IS_ERR(hdr))
+		return PTR_ERR(hdr);
+
+	NLA_PUT_U32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex);
+	option_list = nla_nest_start(skb, TEAM_ATTR_LIST_OPTION);
+	if (!option_list)
+		return -EMSGSIZE;
+
+	list_for_each_entry(option, &team->option_list, list) {
+		struct nlattr *option_item;
+		long arg;
+
+		option_item = nla_nest_start(skb, TEAM_ATTR_ITEM_OPTION);
+		if (!option_item)
+			goto nla_put_failure;
+		NLA_PUT_STRING(skb, TEAM_ATTR_OPTION_NAME, option->name);
+		if (option == changed_option)
+			NLA_PUT_FLAG(skb, TEAM_ATTR_OPTION_CHANGED);
+		switch (option->type) {
+		case TEAM_OPTION_TYPE_U32:
+			NLA_PUT_U8(skb, TEAM_ATTR_OPTION_TYPE, NLA_U32);
+			team_option_get(team, option, &arg);
+			NLA_PUT_U32(skb, TEAM_ATTR_OPTION_DATA, arg);
+			break;
+		case TEAM_OPTION_TYPE_STRING:
+			NLA_PUT_U8(skb, TEAM_ATTR_OPTION_TYPE, NLA_STRING);
+			team_option_get(team, option, &arg);
+			NLA_PUT_STRING(skb, TEAM_ATTR_OPTION_DATA,
+				       (char *) arg);
+			break;
+		default:
+			BUG();
+		}
+		nla_nest_end(skb, option_item);
+	}
+
+	nla_nest_end(skb, option_list);
+	return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int team_nl_fill_options_get(struct sk_buff *skb,
+				    struct genl_info *info, int flags,
+				    struct team *team)
+{
+	return team_nl_fill_options_get_changed(skb, info->snd_pid,
+						info->snd_seq, NLM_F_ACK,
+						team, NULL);
+}
+
+static int team_nl_cmd_options_get(struct sk_buff *skb, struct genl_info *info)
+{
+	struct team *team;
+	int err;
+
+	team = team_nl_team_get(info);
+	if (!team)
+		return -EINVAL;
+
+	err = team_nl_send_generic(info, team, team_nl_fill_options_get);
+
+	team_nl_team_put(team);
+
+	return err;
+}
+
+static int team_nl_cmd_options_set(struct sk_buff *skb, struct genl_info *info)
+{
+	struct team *team;
+	int err = 0;
+	int i;
+	struct nlattr *nl_option;
+
+	team = team_nl_team_get(info);
+	if (!team)
+		return -EINVAL;
+
+	err = -EINVAL;
+	if (!info->attrs[TEAM_ATTR_LIST_OPTION]) {
+		err = -EINVAL;
+		goto team_put;
+	}
+
+	nla_for_each_nested(nl_option, info->attrs[TEAM_ATTR_LIST_OPTION], i) {
+		struct nlattr *mode_attrs[TEAM_ATTR_OPTION_MAX + 1];
+		enum team_option_type opt_type;
+		struct team_option *option;
+		char *opt_name;
+		bool opt_found = false;
+
+		if (nla_type(nl_option) != TEAM_ATTR_ITEM_OPTION) {
+			err = -EINVAL;
+			goto team_put;
+		}
+		err = nla_parse_nested(mode_attrs, TEAM_ATTR_OPTION_MAX,
+				       nl_option, team_nl_option_policy);
+		if (err)
+			goto team_put;
+		if (!mode_attrs[TEAM_ATTR_OPTION_NAME] ||
+		    !mode_attrs[TEAM_ATTR_OPTION_TYPE] ||
+		    !mode_attrs[TEAM_ATTR_OPTION_DATA]) {
+			err = -EINVAL;
+			goto team_put;
+		}
+		switch (nla_get_u8(mode_attrs[TEAM_ATTR_OPTION_TYPE])) {
+		case NLA_U32:
+			opt_type = TEAM_OPTION_TYPE_U32;
+			break;
+		case NLA_STRING:
+			opt_type = TEAM_OPTION_TYPE_STRING;
+			break;
+		default:
+			goto team_put;
+		}
+
+		opt_name = nla_data(mode_attrs[TEAM_ATTR_OPTION_NAME]);
+		list_for_each_entry(option, &team->option_list, list) {
+			long arg;
+			struct nlattr *opt_data_attr;
+
+			if (option->type != opt_type ||
+			    strcmp(option->name, opt_name))
+				continue;
+			opt_found = true;
+			opt_data_attr = mode_attrs[TEAM_ATTR_OPTION_DATA];
+			switch (opt_type) {
+			case TEAM_OPTION_TYPE_U32:
+				arg = nla_get_u32(opt_data_attr);
+				break;
+			case TEAM_OPTION_TYPE_STRING:
+				arg = (long) nla_data(opt_data_attr);
+				break;
+			default:
+				BUG();
+			}
+			err = team_option_set(team, option, &arg);
+			if (err)
+				goto team_put;
+		}
+		if (!opt_found) {
+			err = -ENOENT;
+			goto team_put;
+		}
+	}
+
+team_put:
+	team_nl_team_put(team);
+
+	return err;
+}
+
+static int team_nl_fill_port_list_get_changed(struct sk_buff *skb,
+					      u32 pid, u32 seq, int flags,
+					      struct team *team,
+					      struct team_port *changed_port)
+{
+	struct nlattr *port_list;
+	void *hdr;
+	struct team_port *port;
+
+	hdr = genlmsg_put(skb, pid, seq, &team_nl_family, flags,
+			  TEAM_CMD_PORT_LIST_GET);
+	if (IS_ERR(hdr))
+		return PTR_ERR(hdr);
+
+	NLA_PUT_U32(skb, TEAM_ATTR_TEAM_IFINDEX, team->dev->ifindex);
+	port_list = nla_nest_start(skb, TEAM_ATTR_LIST_PORT);
+	if (!port_list)
+		return -EMSGSIZE;
+
+	list_for_each_entry(port, &team->port_list, list) {
+		struct nlattr *port_item;
+
+		port_item = nla_nest_start(skb, TEAM_ATTR_ITEM_PORT);
+		if (!port_item)
+			goto nla_put_failure;
+		NLA_PUT_U32(skb, TEAM_ATTR_PORT_IFINDEX, port->dev->ifindex);
+		if (port == changed_port)
+			NLA_PUT_FLAG(skb, TEAM_ATTR_PORT_CHANGED);
+		if (port->linkup)
+			NLA_PUT_FLAG(skb, TEAM_ATTR_PORT_LINKUP);
+		NLA_PUT_U32(skb, TEAM_ATTR_PORT_SPEED, port->speed);
+		NLA_PUT_U8(skb, TEAM_ATTR_PORT_DUPLEX, port->duplex);
+		nla_nest_end(skb, port_item);
+	}
+
+	nla_nest_end(skb, port_list);
+	return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int team_nl_fill_port_list_get(struct sk_buff *skb,
+				      struct genl_info *info, int flags,
+				      struct team *team)
+{
+	return team_nl_fill_port_list_get_changed(skb, info->snd_pid,
+						  info->snd_seq, NLM_F_ACK,
+						  team, NULL);
+}
+
+static int team_nl_cmd_port_list_get(struct sk_buff *skb,
+				     struct genl_info *info)
+{
+	struct team *team;
+	int err;
+
+	team = team_nl_team_get(info);
+	if (!team)
+		return -EINVAL;
+
+	err = team_nl_send_generic(info, team, team_nl_fill_port_list_get);
+
+	team_nl_team_put(team);
+
+	return err;
+}
+
+static struct genl_ops team_nl_ops[] = {
+	{
+		.cmd = TEAM_CMD_NOOP,
+		.doit = team_nl_cmd_noop,
+		.policy = team_nl_policy,
+	},
+	{
+		.cmd = TEAM_CMD_OPTIONS_SET,
+		.doit = team_nl_cmd_options_set,
+		.policy = team_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = TEAM_CMD_OPTIONS_GET,
+		.doit = team_nl_cmd_options_get,
+		.policy = team_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = TEAM_CMD_PORT_LIST_GET,
+		.doit = team_nl_cmd_port_list_get,
+		.policy = team_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+};
+
+static struct genl_multicast_group team_change_event_mcgrp = {
+	.name = TEAM_GENL_CHANGE_EVENT_MC_GRP_NAME,
+};
+
+static int team_nl_send_event_options_get(struct team *team,
+					  struct team_option *changed_option)
+{
+	struct sk_buff *skb;
+	int err;
+	struct net *net = dev_net(team->dev);
+
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	err = team_nl_fill_options_get_changed(skb, 0, 0, 0, team,
+					       changed_option);
+	if (err < 0)
+		goto err_fill;
+
+	err = genlmsg_multicast_netns(net, skb, 0, team_change_event_mcgrp.id,
+				      GFP_KERNEL);
+	return err;
+
+err_fill:
+	nlmsg_free(skb);
+	return err;
+}
+
+static int team_nl_send_event_port_list_get(struct team_port *port)
+{
+	struct sk_buff *skb;
+	int err;
+	struct net *net = dev_net(port->team->dev);
+
+	skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	err = team_nl_fill_port_list_get_changed(skb, 0, 0, 0,
+						 port->team, port);
+	if (err < 0)
+		goto err_fill;
+
+	err = genlmsg_multicast_netns(net, skb, 0, team_change_event_mcgrp.id,
+				      GFP_KERNEL);
+	return err;
+
+err_fill:
+	nlmsg_free(skb);
+	return err;
+}
+
+static int team_nl_init(void)
+{
+	int err;
+
+	err = genl_register_family_with_ops(&team_nl_family, team_nl_ops,
+					    ARRAY_SIZE(team_nl_ops));
+	if (err)
+		return err;
+
+	err = genl_register_mc_group(&team_nl_family, &team_change_event_mcgrp);
+	if (err)
+		goto err_change_event_grp_reg;
+
+	return 0;
+
+err_change_event_grp_reg:
+	genl_unregister_family(&team_nl_family);
+
+	return err;
+}
+
+static void team_nl_fini(void)
+{
+	genl_unregister_family(&team_nl_family);
+}
+
+
+/******************
+ * Change checkers
+ ******************/
+
+static void __team_options_change_check(struct team *team,
+					struct team_option *changed_option)
+{
+	int err;
+
+	err = team_nl_send_event_options_get(team, changed_option);
+	if (err)
+		netdev_warn(team->dev, "Failed to send options change via netlink\n");
+}
+
+/* rtnl lock is held */
+static void __team_port_change_check(struct team_port *port, bool linkup)
+{
+	int err;
+
+	if (port->linkup == linkup)
+		return;
+
+	port->linkup = linkup;
+	if (linkup) {
+		struct ethtool_cmd ecmd;
+
+		err = __ethtool_get_settings(port->dev, &ecmd);
+		if (!err) {
+			port->speed = ethtool_cmd_speed(&ecmd);
+			port->duplex = ecmd.duplex;
+			goto send_event;
+		}
+	}
+	port->speed = 0;
+	port->duplex = 0;
+
+send_event:
+	err = team_nl_send_event_port_list_get(port);
+	if (err)
+		netdev_warn(port->team->dev, "Failed to send port change of device %s via netlink\n",
+			    port->dev->name);
+
+}
+
+static void team_port_change_check(struct team_port *port, bool linkup)
+{
+	struct team *team = port->team;
+
+	spin_lock(&team->lock);
+	__team_port_change_check(port, linkup);
+	spin_unlock(&team->lock);
+}
+
+/************************************
+ * Net device notifier event handler
+ ************************************/
+
+static int team_device_event(struct notifier_block *unused,
+			     unsigned long event, void *ptr)
+{
+	struct net_device *dev = (struct net_device *) ptr;
+	struct team_port *port;
+
+	port = team_port_get_rtnl(dev);
+	if (!port)
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_UP:
+		if (netif_carrier_ok(dev))
+			team_port_change_check(port, true);
+	case NETDEV_DOWN:
+		team_port_change_check(port, false);
+	case NETDEV_CHANGE:
+		if (netif_running(port->dev))
+			team_port_change_check(port,
+					       !!netif_carrier_ok(port->dev));
+		break;
+	case NETDEV_UNREGISTER:
+		team_del_slave(port->team->dev, dev);
+		break;
+	case NETDEV_FEAT_CHANGE:
+		team_compute_features(port->team);
+		break;
+	case NETDEV_CHANGEMTU:
+		/* Forbid to change mtu of underlaying device */
+		return NOTIFY_BAD;
+	case NETDEV_PRE_TYPE_CHANGE:
+		/* Forbid to change type of underlaying device */
+		return NOTIFY_BAD;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block team_notifier_block __read_mostly = {
+	.notifier_call = team_device_event,
+};
+
+
+/***********************
+ * Module init and exit
+ ***********************/
+
+static int __init team_module_init(void)
+{
+	int err;
+
+	register_netdevice_notifier(&team_notifier_block);
+
+	err = rtnl_link_register(&team_link_ops);
+	if (err)
+		goto err_rtnl_reg;
+
+	err = team_nl_init();
+	if (err)
+		goto err_nl_init;
+
+	return 0;
+
+err_nl_init:
+	rtnl_link_unregister(&team_link_ops);
+
+err_rtnl_reg:
+	unregister_netdevice_notifier(&team_notifier_block);
+
+	return err;
+}
+
+static void __exit team_module_exit(void)
+{
+	team_nl_fini();
+	rtnl_link_unregister(&team_link_ops);
+	unregister_netdevice_notifier(&team_notifier_block);
+}
+
+module_init(team_module_init);
+module_exit(team_module_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Jiri Pirko <jpirko@redhat.com>");
+MODULE_DESCRIPTION("Ethernet team device driver");
+MODULE_ALIAS_RTNL_LINK(DRV_NAME);
diff --git a/drivers/net/team/team_mode_activebackup.c b/drivers/net/team/team_mode_activebackup.c
new file mode 100644
index 000000000000..6fe920c440b3
--- /dev/null
+++ b/drivers/net/team/team_mode_activebackup.c
@@ -0,0 +1,137 @@
+/*
+ * net/drivers/team/team_mode_activebackup.c - Active-backup mode for team
+ * Copyright (c) 2011 Jiri Pirko <jpirko@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <net/rtnetlink.h>
+#include <linux/if_team.h>
+
+struct ab_priv {
+	struct team_port __rcu *active_port;
+};
+
+static struct ab_priv *ab_priv(struct team *team)
+{
+	return (struct ab_priv *) &team->mode_priv;
+}
+
+static rx_handler_result_t ab_receive(struct team *team, struct team_port *port,
+				      struct sk_buff *skb) {
+	struct team_port *active_port;
+
+	active_port = rcu_dereference(ab_priv(team)->active_port);
+	if (active_port != port)
+		return RX_HANDLER_EXACT;
+	return RX_HANDLER_ANOTHER;
+}
+
+static bool ab_transmit(struct team *team, struct sk_buff *skb)
+{
+	struct team_port *active_port;
+
+	active_port = rcu_dereference(ab_priv(team)->active_port);
+	if (unlikely(!active_port))
+		goto drop;
+	skb->dev = active_port->dev;
+	if (dev_queue_xmit(skb))
+		return false;
+	return true;
+
+drop:
+	dev_kfree_skb_any(skb);
+	return false;
+}
+
+static void ab_port_leave(struct team *team, struct team_port *port)
+{
+	if (ab_priv(team)->active_port == port)
+		rcu_assign_pointer(ab_priv(team)->active_port, NULL);
+}
+
+static int ab_active_port_get(struct team *team, void *arg)
+{
+	u32 *ifindex = arg;
+
+	*ifindex = 0;
+	if (ab_priv(team)->active_port)
+		*ifindex = ab_priv(team)->active_port->dev->ifindex;
+	return 0;
+}
+
+static int ab_active_port_set(struct team *team, void *arg)
+{
+	u32 *ifindex = arg;
+	struct team_port *port;
+
+	list_for_each_entry_rcu(port, &team->port_list, list) {
+		if (port->dev->ifindex == *ifindex) {
+			rcu_assign_pointer(ab_priv(team)->active_port, port);
+			return 0;
+		}
+	}
+	return -ENOENT;
+}
+
+static struct team_option ab_options[] = {
+	{
+		.name = "activeport",
+		.type = TEAM_OPTION_TYPE_U32,
+		.getter = ab_active_port_get,
+		.setter = ab_active_port_set,
+	},
+};
+
+int ab_init(struct team *team)
+{
+	team_options_register(team, ab_options, ARRAY_SIZE(ab_options));
+	return 0;
+}
+
+void ab_exit(struct team *team)
+{
+	team_options_unregister(team, ab_options, ARRAY_SIZE(ab_options));
+}
+
+static const struct team_mode_ops ab_mode_ops = {
+	.init			= ab_init,
+	.exit			= ab_exit,
+	.receive		= ab_receive,
+	.transmit		= ab_transmit,
+	.port_leave		= ab_port_leave,
+};
+
+static struct team_mode ab_mode = {
+	.kind		= "activebackup",
+	.owner		= THIS_MODULE,
+	.priv_size	= sizeof(struct ab_priv),
+	.ops		= &ab_mode_ops,
+};
+
+static int __init ab_init_module(void)
+{
+	return team_mode_register(&ab_mode);
+}
+
+static void __exit ab_cleanup_module(void)
+{
+	team_mode_unregister(&ab_mode);
+}
+
+module_init(ab_init_module);
+module_exit(ab_cleanup_module);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Jiri Pirko <jpirko@redhat.com>");
+MODULE_DESCRIPTION("Active-backup mode for team");
+MODULE_ALIAS("team-mode-activebackup");
diff --git a/drivers/net/team/team_mode_roundrobin.c b/drivers/net/team/team_mode_roundrobin.c
new file mode 100644
index 000000000000..a0e8f806331a
--- /dev/null
+++ b/drivers/net/team/team_mode_roundrobin.c
@@ -0,0 +1,107 @@
+/*
+ * net/drivers/team/team_mode_roundrobin.c - Round-robin mode for team
+ * Copyright (c) 2011 Jiri Pirko <jpirko@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/if_team.h>
+
+struct rr_priv {
+	unsigned int sent_packets;
+};
+
+static struct rr_priv *rr_priv(struct team *team)
+{
+	return (struct rr_priv *) &team->mode_priv;
+}
+
+static struct team_port *__get_first_port_up(struct team *team,
+					     struct team_port *port)
+{
+	struct team_port *cur;
+
+	if (port->linkup)
+		return port;
+	cur = port;
+	list_for_each_entry_continue_rcu(cur, &team->port_list, list)
+		if (cur->linkup)
+			return cur;
+	list_for_each_entry_rcu(cur, &team->port_list, list) {
+		if (cur == port)
+			break;
+		if (cur->linkup)
+			return cur;
+	}
+	return NULL;
+}
+
+static bool rr_transmit(struct team *team, struct sk_buff *skb)
+{
+	struct team_port *port;
+	int port_index;
+
+	port_index = rr_priv(team)->sent_packets++ % team->port_count;
+	port = team_get_port_by_index_rcu(team, port_index);
+	port = __get_first_port_up(team, port);
+	if (unlikely(!port))
+		goto drop;
+	skb->dev = port->dev;
+	if (dev_queue_xmit(skb))
+		return false;
+	return true;
+
+drop:
+	dev_kfree_skb_any(skb);
+	return false;
+}
+
+static int rr_port_enter(struct team *team, struct team_port *port)
+{
+	return team_port_set_team_mac(port);
+}
+
+static void rr_port_change_mac(struct team *team, struct team_port *port)
+{
+	team_port_set_team_mac(port);
+}
+
+static const struct team_mode_ops rr_mode_ops = {
+	.transmit		= rr_transmit,
+	.port_enter		= rr_port_enter,
+	.port_change_mac	= rr_port_change_mac,
+};
+
+static struct team_mode rr_mode = {
+	.kind		= "roundrobin",
+	.owner		= THIS_MODULE,
+	.priv_size	= sizeof(struct rr_priv),
+	.ops		= &rr_mode_ops,
+};
+
+static int __init rr_init_module(void)
+{
+	return team_mode_register(&rr_mode);
+}
+
+static void __exit rr_cleanup_module(void)
+{
+	team_mode_unregister(&rr_mode);
+}
+
+module_init(rr_init_module);
+module_exit(rr_cleanup_module);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Jiri Pirko <jpirko@redhat.com>");
+MODULE_DESCRIPTION("Round-robin mode for team");
+MODULE_ALIAS("team-mode-roundrobin");
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 619b5657af77..0b091b32267d 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -185,6 +185,7 @@ header-y += if_pppol2tp.h
 header-y += if_pppox.h
 header-y += if_slip.h
 header-y += if_strip.h
+header-y += if_team.h
 header-y += if_tr.h
 header-y += if_tun.h
 header-y += if_tunnel.h
diff --git a/include/linux/if.h b/include/linux/if.h
index db20bd4fd16b..06b6ef60c821 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -79,6 +79,7 @@
 #define IFF_TX_SKB_SHARING	0x10000	/* The interface supports sharing
 					 * skbs on transmit */
 #define IFF_UNICAST_FLT	0x20000		/* Supports unicast filtering	*/
+#define IFF_TEAM_PORT	0x40000		/* device used as team port */
 
 #define IF_GET_IFACE	0x0001		/* for querying only */
 #define IF_GET_PROTO	0x0002
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
new file mode 100644
index 000000000000..14f6388f5460
--- /dev/null
+++ b/include/linux/if_team.h
@@ -0,0 +1,242 @@
+/*
+ * include/linux/if_team.h - Network team device driver header
+ * Copyright (c) 2011 Jiri Pirko <jpirko@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _LINUX_IF_TEAM_H_
+#define _LINUX_IF_TEAM_H_
+
+#ifdef __KERNEL__
+
+struct team_pcpu_stats {
+	u64			rx_packets;
+	u64			rx_bytes;
+	u64			rx_multicast;
+	u64			tx_packets;
+	u64			tx_bytes;
+	struct u64_stats_sync	syncp;
+	u32			rx_dropped;
+	u32			tx_dropped;
+};
+
+struct team;
+
+struct team_port {
+	struct net_device *dev;
+	struct hlist_node hlist; /* node in hash list */
+	struct list_head list; /* node in ordinary list */
+	struct team *team;
+	int index;
+
+	/*
+	 * A place for storing original values of the device before it
+	 * become a port.
+	 */
+	struct {
+		unsigned char dev_addr[MAX_ADDR_LEN];
+		unsigned int mtu;
+	} orig;
+
+	bool linkup;
+	u32 speed;
+	u8 duplex;
+
+	struct rcu_head rcu;
+};
+
+struct team_mode_ops {
+	int (*init)(struct team *team);
+	void (*exit)(struct team *team);
+	rx_handler_result_t (*receive)(struct team *team,
+				       struct team_port *port,
+				       struct sk_buff *skb);
+	bool (*transmit)(struct team *team, struct sk_buff *skb);
+	int (*port_enter)(struct team *team, struct team_port *port);
+	void (*port_leave)(struct team *team, struct team_port *port);
+	void (*port_change_mac)(struct team *team, struct team_port *port);
+};
+
+enum team_option_type {
+	TEAM_OPTION_TYPE_U32,
+	TEAM_OPTION_TYPE_STRING,
+};
+
+struct team_option {
+	struct list_head list;
+	const char *name;
+	enum team_option_type type;
+	int (*getter)(struct team *team, void *arg);
+	int (*setter)(struct team *team, void *arg);
+};
+
+struct team_mode {
+	struct list_head list;
+	const char *kind;
+	struct module *owner;
+	size_t priv_size;
+	const struct team_mode_ops *ops;
+};
+
+#define TEAM_PORT_HASHBITS 4
+#define TEAM_PORT_HASHENTRIES (1 << TEAM_PORT_HASHBITS)
+
+#define TEAM_MODE_PRIV_LONGS 4
+#define TEAM_MODE_PRIV_SIZE (sizeof(long) * TEAM_MODE_PRIV_LONGS)
+
+struct team {
+	struct net_device *dev; /* associated netdevice */
+	struct team_pcpu_stats __percpu *pcpu_stats;
+
+	spinlock_t lock; /* used for overall locking, e.g. port lists write */
+
+	/*
+	 * port lists with port count
+	 */
+	int port_count;
+	struct hlist_head port_hlist[TEAM_PORT_HASHENTRIES];
+	struct list_head port_list;
+
+	struct list_head option_list;
+
+	const struct team_mode *mode;
+	struct team_mode_ops ops;
+	long mode_priv[TEAM_MODE_PRIV_LONGS];
+};
+
+static inline struct hlist_head *team_port_index_hash(struct team *team,
+						      int port_index)
+{
+	return &team->port_hlist[port_index & (TEAM_PORT_HASHENTRIES - 1)];
+}
+
+static inline struct team_port *team_get_port_by_index(struct team *team,
+						       int port_index)
+{
+	struct hlist_node *p;
+	struct team_port *port;
+	struct hlist_head *head = team_port_index_hash(team, port_index);
+
+	hlist_for_each_entry(port, p, head, hlist)
+		if (port->index == port_index)
+			return port;
+	return NULL;
+}
+static inline struct team_port *team_get_port_by_index_rcu(struct team *team,
+							   int port_index)
+{
+	struct hlist_node *p;
+	struct team_port *port;
+	struct hlist_head *head = team_port_index_hash(team, port_index);
+
+	hlist_for_each_entry_rcu(port, p, head, hlist)
+		if (port->index == port_index)
+			return port;
+	return NULL;
+}
+
+extern int team_port_set_team_mac(struct team_port *port);
+extern void team_options_register(struct team *team,
+				  struct team_option *option,
+				  size_t option_count);
+extern void team_options_unregister(struct team *team,
+				    struct team_option *option,
+				    size_t option_count);
+extern int team_mode_register(struct team_mode *mode);
+extern int team_mode_unregister(struct team_mode *mode);
+
+#endif /* __KERNEL__ */
+
+#define TEAM_STRING_MAX_LEN 32
+
+/**********************************
+ * NETLINK_GENERIC netlink family.
+ **********************************/
+
+enum {
+	TEAM_CMD_NOOP,
+	TEAM_CMD_OPTIONS_SET,
+	TEAM_CMD_OPTIONS_GET,
+	TEAM_CMD_PORT_LIST_GET,
+
+	__TEAM_CMD_MAX,
+	TEAM_CMD_MAX = (__TEAM_CMD_MAX - 1),
+};
+
+enum {
+	TEAM_ATTR_UNSPEC,
+	TEAM_ATTR_TEAM_IFINDEX,		/* u32 */
+	TEAM_ATTR_LIST_OPTION,		/* nest */
+	TEAM_ATTR_LIST_PORT,		/* nest */
+
+	__TEAM_ATTR_MAX,
+	TEAM_ATTR_MAX = __TEAM_ATTR_MAX - 1,
+};
+
+/* Nested layout of get/set msg:
+ *
+ *	[TEAM_ATTR_LIST_OPTION]
+ *		[TEAM_ATTR_ITEM_OPTION]
+ *			[TEAM_ATTR_OPTION_*], ...
+ *		[TEAM_ATTR_ITEM_OPTION]
+ *			[TEAM_ATTR_OPTION_*], ...
+ *		...
+ *	[TEAM_ATTR_LIST_PORT]
+ *		[TEAM_ATTR_ITEM_PORT]
+ *			[TEAM_ATTR_PORT_*], ...
+ *		[TEAM_ATTR_ITEM_PORT]
+ *			[TEAM_ATTR_PORT_*], ...
+ *		...
+ */
+
+enum {
+	TEAM_ATTR_ITEM_OPTION_UNSPEC,
+	TEAM_ATTR_ITEM_OPTION,		/* nest */
+
+	__TEAM_ATTR_ITEM_OPTION_MAX,
+	TEAM_ATTR_ITEM_OPTION_MAX = __TEAM_ATTR_ITEM_OPTION_MAX - 1,
+};
+
+enum {
+	TEAM_ATTR_OPTION_UNSPEC,
+	TEAM_ATTR_OPTION_NAME,		/* string */
+	TEAM_ATTR_OPTION_CHANGED,	/* flag */
+	TEAM_ATTR_OPTION_TYPE,		/* u8 */
+	TEAM_ATTR_OPTION_DATA,		/* dynamic */
+
+	__TEAM_ATTR_OPTION_MAX,
+	TEAM_ATTR_OPTION_MAX = __TEAM_ATTR_OPTION_MAX - 1,
+};
+
+enum {
+	TEAM_ATTR_ITEM_PORT_UNSPEC,
+	TEAM_ATTR_ITEM_PORT,		/* nest */
+
+	__TEAM_ATTR_ITEM_PORT_MAX,
+	TEAM_ATTR_ITEM_PORT_MAX = __TEAM_ATTR_ITEM_PORT_MAX - 1,
+};
+
+enum {
+	TEAM_ATTR_PORT_UNSPEC,
+	TEAM_ATTR_PORT_IFINDEX,		/* u32 */
+	TEAM_ATTR_PORT_CHANGED,		/* flag */
+	TEAM_ATTR_PORT_LINKUP,		/* flag */
+	TEAM_ATTR_PORT_SPEED,		/* u32 */
+	TEAM_ATTR_PORT_DUPLEX,		/* u8 */
+
+	__TEAM_ATTR_PORT_MAX,
+	TEAM_ATTR_PORT_MAX = __TEAM_ATTR_PORT_MAX - 1,
+};
+
+/*
+ * NETLINK_GENERIC related info
+ */
+#define TEAM_GENL_NAME "team"
+#define TEAM_GENL_VERSION 0x1
+#define TEAM_GENL_CHANGE_EVENT_MC_GRP_NAME "change_event"
+
+#endif /* _LINUX_IF_TEAM_H_ */
-- 
cgit v1.2.3


From 8b5c171bb3dc0686b2647a84e990199c5faa9ef8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 9 Nov 2011 12:07:14 +0000
Subject: neigh: new unresolved queue limits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Le mercredi 09 novembre 2011 à 16:21 -0500, David Miller a écrit :
> From: David Miller <davem@davemloft.net>
> Date: Wed, 09 Nov 2011 16:16:44 -0500 (EST)
>
> > From: Eric Dumazet <eric.dumazet@gmail.com>
> > Date: Wed, 09 Nov 2011 12:14:09 +0100
> >
> >> unres_qlen is the number of frames we are able to queue per unresolved
> >> neighbour. Its default value (3) was never changed and is responsible
> >> for strange drops, especially if IP fragments are used, or multiple
> >> sessions start in parallel. Even a single tcp flow can hit this limit.
> >  ...
> >
> > Ok, I've applied this, let's see what happens :-)
>
> Early answer, build fails.
>
> Please test build this patch with DECNET enabled and resubmit.  The
> decnet neigh layer still refers to the removed ->queue_len member.
>
> Thanks.

Ouch, this was fixed on one machine yesterday, but not the other one I
used this morning, sorry.

[PATCH V5 net-next] neigh: new unresolved queue limits

unres_qlen is the number of frames we are able to queue per unresolved
neighbour. Its default value (3) was never changed and is responsible
for strange drops, especially if IP fragments are used, or multiple
sessions start in parallel. Even a single tcp flow can hit this limit.

$ arp -d 192.168.20.108 ; ping -c 2 -s 8000 192.168.20.108
PING 192.168.20.108 (192.168.20.108) 8000(8028) bytes of data.
8008 bytes from 192.168.20.108: icmp_seq=2 ttl=64 time=0.322 ms

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt |  10 ++
 include/linux/neighbour.h              |   1 +
 include/net/neighbour.h                |   3 +-
 net/atm/clip.c                         |   2 +-
 net/core/neighbour.c                   | 162 ++++++++++++++++++++++-----------
 net/decnet/dn_neigh.c                  |   2 +-
 net/ipv4/arp.c                         |   2 +-
 net/ipv6/ndisc.c                       |   2 +-
 8 files changed, 128 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index f049a1ca186f..b8867061fce4 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -31,6 +31,16 @@ neigh/default/gc_thresh3 - INTEGER
 	when using large numbers of interfaces and when communicating
 	with large numbers of directly-connected peers.
 
+neigh/default/unres_qlen_bytes - INTEGER
+	The maximum number of bytes which may be used by packets
+	queued for each	unresolved address by other network layers.
+	(added in linux 3.3)
+
+neigh/default/unres_qlen - INTEGER
+	The maximum number of packets which may be queued for each
+	unresolved address by other network layers.
+	(deprecated in linux 3.3) : use unres_qlen_bytes instead.
+
 mtu_expires - INTEGER
 	Time, in seconds, that cached PMTU information is kept.
 
diff --git a/include/linux/neighbour.h b/include/linux/neighbour.h
index a7003b7a695d..b188f68a08c9 100644
--- a/include/linux/neighbour.h
+++ b/include/linux/neighbour.h
@@ -116,6 +116,7 @@ enum {
 	NDTPA_PROXY_DELAY,		/* u64, msecs */
 	NDTPA_PROXY_QLEN,		/* u32 */
 	NDTPA_LOCKTIME,			/* u64, msecs */
+	NDTPA_QUEUE_LENBYTES,		/* u32 */
 	__NDTPA_MAX
 };
 #define NDTPA_MAX (__NDTPA_MAX - 1)
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 2720884287c3..7ae5acff96e9 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -59,7 +59,7 @@ struct neigh_parms {
 	int	reachable_time;
 	int	delay_probe_time;
 
-	int	queue_len;
+	int	queue_len_bytes;
 	int	ucast_probes;
 	int	app_probes;
 	int	mcast_probes;
@@ -99,6 +99,7 @@ struct neighbour {
 	rwlock_t		lock;
 	atomic_t		refcnt;
 	struct sk_buff_head	arp_queue;
+	unsigned int		arp_queue_len_bytes;
 	struct timer_list	timer;
 	unsigned long		used;
 	atomic_t		probes;
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 852394072fa1..32c41b8a803e 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -329,7 +329,7 @@ static struct neigh_table clip_tbl = {
 		.gc_staletime 		= 60 * HZ,
 		.reachable_time 	= 30 * HZ,
 		.delay_probe_time 	= 5 * HZ,
-		.queue_len 		= 3,
+		.queue_len_bytes 	= 64 * 1024,
 		.ucast_probes 		= 3,
 		.mcast_probes 		= 3,
 		.anycast_delay 		= 1 * HZ,
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 039d51e6c284..2684794458ca 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -238,6 +238,7 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
 				   it to safe state.
 				 */
 				skb_queue_purge(&n->arp_queue);
+				n->arp_queue_len_bytes = 0;
 				n->output = neigh_blackhole;
 				if (n->nud_state & NUD_VALID)
 					n->nud_state = NUD_NOARP;
@@ -702,6 +703,7 @@ void neigh_destroy(struct neighbour *neigh)
 		printk(KERN_WARNING "Impossible event.\n");
 
 	skb_queue_purge(&neigh->arp_queue);
+	neigh->arp_queue_len_bytes = 0;
 
 	dev_put(neigh->dev);
 	neigh_parms_put(neigh->parms);
@@ -842,6 +844,7 @@ static void neigh_invalidate(struct neighbour *neigh)
 		write_lock(&neigh->lock);
 	}
 	skb_queue_purge(&neigh->arp_queue);
+	neigh->arp_queue_len_bytes = 0;
 }
 
 static void neigh_probe(struct neighbour *neigh)
@@ -980,15 +983,20 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 
 	if (neigh->nud_state == NUD_INCOMPLETE) {
 		if (skb) {
-			if (skb_queue_len(&neigh->arp_queue) >=
-			    neigh->parms->queue_len) {
+			while (neigh->arp_queue_len_bytes + skb->truesize >
+			       neigh->parms->queue_len_bytes) {
 				struct sk_buff *buff;
+
 				buff = __skb_dequeue(&neigh->arp_queue);
+				if (!buff)
+					break;
+				neigh->arp_queue_len_bytes -= buff->truesize;
 				kfree_skb(buff);
 				NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
 			}
 			skb_dst_force(skb);
 			__skb_queue_tail(&neigh->arp_queue, skb);
+			neigh->arp_queue_len_bytes += skb->truesize;
 		}
 		rc = 1;
 	}
@@ -1175,6 +1183,7 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
 			write_lock_bh(&neigh->lock);
 		}
 		skb_queue_purge(&neigh->arp_queue);
+		neigh->arp_queue_len_bytes = 0;
 	}
 out:
 	if (update_isrouter) {
@@ -1747,7 +1756,11 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
 		NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex);
 
 	NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt));
-	NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len);
+	NLA_PUT_U32(skb, NDTPA_QUEUE_LENBYTES, parms->queue_len_bytes);
+	/* approximative value for deprecated QUEUE_LEN (in packets) */
+	NLA_PUT_U32(skb, NDTPA_QUEUE_LEN,
+		    DIV_ROUND_UP(parms->queue_len_bytes,
+				 SKB_TRUESIZE(ETH_FRAME_LEN)));
 	NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen);
 	NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes);
 	NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes);
@@ -1974,7 +1987,11 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
 
 			switch (i) {
 			case NDTPA_QUEUE_LEN:
-				p->queue_len = nla_get_u32(tbp[i]);
+				p->queue_len_bytes = nla_get_u32(tbp[i]) *
+						     SKB_TRUESIZE(ETH_FRAME_LEN);
+				break;
+			case NDTPA_QUEUE_LENBYTES:
+				p->queue_len_bytes = nla_get_u32(tbp[i]);
 				break;
 			case NDTPA_PROXY_QLEN:
 				p->proxy_qlen = nla_get_u32(tbp[i]);
@@ -2635,117 +2652,158 @@ EXPORT_SYMBOL(neigh_app_ns);
 
 #ifdef CONFIG_SYSCTL
 
-#define NEIGH_VARS_MAX 19
+static int proc_unres_qlen(ctl_table *ctl, int write, void __user *buffer,
+			   size_t *lenp, loff_t *ppos)
+{
+	int size, ret;
+	ctl_table tmp = *ctl;
+
+	tmp.data = &size;
+	size = DIV_ROUND_UP(*(int *)ctl->data, SKB_TRUESIZE(ETH_FRAME_LEN));
+	ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
+	if (write && !ret)
+		*(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
+	return ret;
+}
+
+enum {
+	NEIGH_VAR_MCAST_PROBE,
+	NEIGH_VAR_UCAST_PROBE,
+	NEIGH_VAR_APP_PROBE,
+	NEIGH_VAR_RETRANS_TIME,
+	NEIGH_VAR_BASE_REACHABLE_TIME,
+	NEIGH_VAR_DELAY_PROBE_TIME,
+	NEIGH_VAR_GC_STALETIME,
+	NEIGH_VAR_QUEUE_LEN,
+	NEIGH_VAR_QUEUE_LEN_BYTES,
+	NEIGH_VAR_PROXY_QLEN,
+	NEIGH_VAR_ANYCAST_DELAY,
+	NEIGH_VAR_PROXY_DELAY,
+	NEIGH_VAR_LOCKTIME,
+	NEIGH_VAR_RETRANS_TIME_MS,
+	NEIGH_VAR_BASE_REACHABLE_TIME_MS,
+	NEIGH_VAR_GC_INTERVAL,
+	NEIGH_VAR_GC_THRESH1,
+	NEIGH_VAR_GC_THRESH2,
+	NEIGH_VAR_GC_THRESH3,
+	NEIGH_VAR_MAX
+};
 
 static struct neigh_sysctl_table {
 	struct ctl_table_header *sysctl_header;
-	struct ctl_table neigh_vars[NEIGH_VARS_MAX];
+	struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
 	char *dev_name;
 } neigh_sysctl_template __read_mostly = {
 	.neigh_vars = {
-		{
+		[NEIGH_VAR_MCAST_PROBE] = {
 			.procname	= "mcast_solicit",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec,
 		},
-		{
+		[NEIGH_VAR_UCAST_PROBE] = {
 			.procname	= "ucast_solicit",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec,
 		},
-		{
+		[NEIGH_VAR_APP_PROBE] = {
 			.procname	= "app_solicit",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec,
 		},
-		{
+		[NEIGH_VAR_RETRANS_TIME] = {
 			.procname	= "retrans_time",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec_userhz_jiffies,
 		},
-		{
+		[NEIGH_VAR_BASE_REACHABLE_TIME] = {
 			.procname	= "base_reachable_time",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec_jiffies,
 		},
-		{
+		[NEIGH_VAR_DELAY_PROBE_TIME] = {
 			.procname	= "delay_first_probe_time",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec_jiffies,
 		},
-		{
+		[NEIGH_VAR_GC_STALETIME] = {
 			.procname	= "gc_stale_time",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec_jiffies,
 		},
-		{
+		[NEIGH_VAR_QUEUE_LEN] = {
 			.procname	= "unres_qlen",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
+			.proc_handler	= proc_unres_qlen,
+		},
+		[NEIGH_VAR_QUEUE_LEN_BYTES] = {
+			.procname	= "unres_qlen_bytes",
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
 			.proc_handler	= proc_dointvec,
 		},
-		{
+		[NEIGH_VAR_PROXY_QLEN] = {
 			.procname	= "proxy_qlen",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec,
 		},
-		{
+		[NEIGH_VAR_ANYCAST_DELAY] = {
 			.procname	= "anycast_delay",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec_userhz_jiffies,
 		},
-		{
+		[NEIGH_VAR_PROXY_DELAY] = {
 			.procname	= "proxy_delay",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec_userhz_jiffies,
 		},
-		{
+		[NEIGH_VAR_LOCKTIME] = {
 			.procname	= "locktime",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec_userhz_jiffies,
 		},
-		{
+		[NEIGH_VAR_RETRANS_TIME_MS] = {
 			.procname	= "retrans_time_ms",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec_ms_jiffies,
 		},
-		{
+		[NEIGH_VAR_BASE_REACHABLE_TIME_MS] = {
 			.procname	= "base_reachable_time_ms",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec_ms_jiffies,
 		},
-		{
+		[NEIGH_VAR_GC_INTERVAL] = {
 			.procname	= "gc_interval",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec_jiffies,
 		},
-		{
+		[NEIGH_VAR_GC_THRESH1] = {
 			.procname	= "gc_thresh1",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec,
 		},
-		{
+		[NEIGH_VAR_GC_THRESH2] = {
 			.procname	= "gc_thresh2",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec,
 		},
-		{
+		[NEIGH_VAR_GC_THRESH3] = {
 			.procname	= "gc_thresh3",
 			.maxlen		= sizeof(int),
 			.mode		= 0644,
@@ -2778,47 +2836,49 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
 	if (!t)
 		goto err;
 
-	t->neigh_vars[0].data  = &p->mcast_probes;
-	t->neigh_vars[1].data  = &p->ucast_probes;
-	t->neigh_vars[2].data  = &p->app_probes;
-	t->neigh_vars[3].data  = &p->retrans_time;
-	t->neigh_vars[4].data  = &p->base_reachable_time;
-	t->neigh_vars[5].data  = &p->delay_probe_time;
-	t->neigh_vars[6].data  = &p->gc_staletime;
-	t->neigh_vars[7].data  = &p->queue_len;
-	t->neigh_vars[8].data  = &p->proxy_qlen;
-	t->neigh_vars[9].data  = &p->anycast_delay;
-	t->neigh_vars[10].data = &p->proxy_delay;
-	t->neigh_vars[11].data = &p->locktime;
-	t->neigh_vars[12].data  = &p->retrans_time;
-	t->neigh_vars[13].data  = &p->base_reachable_time;
+	t->neigh_vars[NEIGH_VAR_MCAST_PROBE].data  = &p->mcast_probes;
+	t->neigh_vars[NEIGH_VAR_UCAST_PROBE].data  = &p->ucast_probes;
+	t->neigh_vars[NEIGH_VAR_APP_PROBE].data  = &p->app_probes;
+	t->neigh_vars[NEIGH_VAR_RETRANS_TIME].data  = &p->retrans_time;
+	t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].data  = &p->base_reachable_time;
+	t->neigh_vars[NEIGH_VAR_DELAY_PROBE_TIME].data  = &p->delay_probe_time;
+	t->neigh_vars[NEIGH_VAR_GC_STALETIME].data  = &p->gc_staletime;
+	t->neigh_vars[NEIGH_VAR_QUEUE_LEN].data  = &p->queue_len_bytes;
+	t->neigh_vars[NEIGH_VAR_QUEUE_LEN_BYTES].data  = &p->queue_len_bytes;
+	t->neigh_vars[NEIGH_VAR_PROXY_QLEN].data  = &p->proxy_qlen;
+	t->neigh_vars[NEIGH_VAR_ANYCAST_DELAY].data  = &p->anycast_delay;
+	t->neigh_vars[NEIGH_VAR_PROXY_DELAY].data = &p->proxy_delay;
+	t->neigh_vars[NEIGH_VAR_LOCKTIME].data = &p->locktime;
+	t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].data  = &p->retrans_time;
+	t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].data  = &p->base_reachable_time;
 
 	if (dev) {
 		dev_name_source = dev->name;
 		/* Terminate the table early */
-		memset(&t->neigh_vars[14], 0, sizeof(t->neigh_vars[14]));
+		memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
+		       sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
 	} else {
 		dev_name_source = neigh_path[NEIGH_CTL_PATH_DEV].procname;
-		t->neigh_vars[14].data = (int *)(p + 1);
-		t->neigh_vars[15].data = (int *)(p + 1) + 1;
-		t->neigh_vars[16].data = (int *)(p + 1) + 2;
-		t->neigh_vars[17].data = (int *)(p + 1) + 3;
+		t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
+		t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
+		t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
+		t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
 	}
 
 
 	if (handler) {
 		/* RetransTime */
-		t->neigh_vars[3].proc_handler = handler;
-		t->neigh_vars[3].extra1 = dev;
+		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
+		t->neigh_vars[NEIGH_VAR_RETRANS_TIME].extra1 = dev;
 		/* ReachableTime */
-		t->neigh_vars[4].proc_handler = handler;
-		t->neigh_vars[4].extra1 = dev;
+		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
+		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].extra1 = dev;
 		/* RetransTime (in milliseconds)*/
-		t->neigh_vars[12].proc_handler = handler;
-		t->neigh_vars[12].extra1 = dev;
+		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
+		t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].extra1 = dev;
 		/* ReachableTime (in milliseconds) */
-		t->neigh_vars[13].proc_handler = handler;
-		t->neigh_vars[13].extra1 = dev;
+		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
+		t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].extra1 = dev;
 	}
 
 	t->dev_name = kstrdup(dev_name_source, GFP_KERNEL);
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 7f0eb087dc11..3532ac64c82d 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -107,7 +107,7 @@ struct neigh_table dn_neigh_table = {
 		.gc_staletime =	60 * HZ,
 		.reachable_time =		30 * HZ,
 		.delay_probe_time =	5 * HZ,
-		.queue_len =		3,
+		.queue_len_bytes =	64*1024,
 		.ucast_probes =	0,
 		.app_probes =		0,
 		.mcast_probes =	0,
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 96a164aa1367..d732827b32b9 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -177,7 +177,7 @@ struct neigh_table arp_tbl = {
 		.gc_staletime		= 60 * HZ,
 		.reachable_time		= 30 * HZ,
 		.delay_probe_time	= 5 * HZ,
-		.queue_len		= 3,
+		.queue_len_bytes	= 64*1024,
 		.ucast_probes		= 3,
 		.mcast_probes		= 3,
 		.anycast_delay		= 1 * HZ,
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 44e5b7f2a6c1..4a2098222625 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -141,7 +141,7 @@ struct neigh_table nd_tbl = {
 		.gc_staletime		= 60 * HZ,
 		.reachable_time		= ND_REACHABLE_TIME,
 		.delay_probe_time	= 5 * HZ,
-		.queue_len		= 3,
+		.queue_len_bytes	= 64*1024,
 		.ucast_probes		= 3,
 		.mcast_probes		= 3,
 		.anycast_delay		= 1 * HZ,
-- 
cgit v1.2.3


From b2b5ce9d1ccf1c45f8ac68e5d901112ab76ba199 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 14 Nov 2011 06:03:34 +0000
Subject: net: introduce build_skb()

One of the thing we discussed during netdev 2011 conference was the idea
to change some network drivers to allocate/populate their skb at RX
completion time, right before feeding the skb to network stack.

In old days, we allocated skbs when populating the RX ring.

This means bringing into cpu cache sk_buff and skb_shared_info cache
lines (since we clear/initialize them), then 'queue' skb->data to NIC.

By the time NIC fills a frame in skb->data buffer and host can process
it, cpu probably threw away the cache lines from its caches, because lot
of things happened between the allocation and final use.

So the deal would be to allocate only the data buffer for the NIC to
populate its RX ring buffer. And use build_skb() at RX completion to
attach a data buffer (now filled with an ethernet frame) to a new skb,
initialize the skb_shared_info portion, and give the hot skb to network
stack.

build_skb() is the function to allocate an skb, caller providing the
data buffer that should be attached to it. Drivers are expected to call
skb_reserve() right after build_skb() to adjust skb->data to the
Ethernet frame (usually skipping NET_SKB_PAD and NET_IP_ALIGN, but some
drivers might add a hardware provided alignment)

Data provided to build_skb() MUST have been allocated by a prior
kmalloc() call, with enough room to add SKB_DATA_ALIGN(sizeof(struct
skb_shared_info)) bytes at the end of the data without corrupting
incoming frame.

data = kmalloc(NET_SKB_PAD + NET_IP_ALIGN + 1536 +
               SKB_DATA_ALIGN(sizeof(struct skb_shared_info)),
	       GFP_ATOMIC);
...
skb = build_skb(data);
if (!skb) {
	recycle_data(data);
} else {
	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
	...
}

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Eilon Greenstein <eilong@broadcom.com>
CC: Ben Hutchings <bhutchings@solarflare.com>
CC: Tom Herbert <therbert@google.com>
CC: Jamal Hadi Salim <hadi@mojatatu.com>
CC: Stephen Hemminger <shemminger@vyatta.com>
CC: Thomas Graf <tgraf@infradead.org>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  1 +
 net/core/skbuff.c      | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index fe864885c1ed..abad8a0941e8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -540,6 +540,7 @@ extern void consume_skb(struct sk_buff *skb);
 extern void	       __kfree_skb(struct sk_buff *skb);
 extern struct sk_buff *__alloc_skb(unsigned int size,
 				   gfp_t priority, int fclone, int node);
+extern struct sk_buff *build_skb(void *data);
 static inline struct sk_buff *alloc_skb(unsigned int size,
 					gfp_t priority)
 {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 18a3cebb753d..8d2c5b32f172 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -244,6 +244,55 @@ nodata:
 }
 EXPORT_SYMBOL(__alloc_skb);
 
+/**
+ * build_skb - build a network buffer
+ * @data: data buffer provided by caller
+ *
+ * Allocate a new &sk_buff. Caller provides space holding head and
+ * skb_shared_info. @data must have been allocated by kmalloc()
+ * The return is the new skb buffer.
+ * On a failure the return is %NULL, and @data is not freed.
+ * Notes :
+ *  Before IO, driver allocates only data buffer where NIC put incoming frame
+ *  Driver should add room at head (NET_SKB_PAD) and
+ *  MUST add room at tail (SKB_DATA_ALIGN(skb_shared_info))
+ *  After IO, driver calls build_skb(), to allocate sk_buff and populate it
+ *  before giving packet to stack.
+ *  RX rings only contains data buffers, not full skbs.
+ */
+struct sk_buff *build_skb(void *data)
+{
+	struct skb_shared_info *shinfo;
+	struct sk_buff *skb;
+	unsigned int size;
+
+	skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
+	if (!skb)
+		return NULL;
+
+	size = ksize(data) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
+	memset(skb, 0, offsetof(struct sk_buff, tail));
+	skb->truesize = SKB_TRUESIZE(size);
+	atomic_set(&skb->users, 1);
+	skb->head = data;
+	skb->data = data;
+	skb_reset_tail_pointer(skb);
+	skb->end = skb->tail + size;
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+	skb->mac_header = ~0U;
+#endif
+
+	/* make sure we initialize shinfo sequentially */
+	shinfo = skb_shinfo(skb);
+	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
+	atomic_set(&shinfo->dataref, 1);
+	kmemcheck_annotate_variable(shinfo->destructor_arg);
+
+	return skb;
+}
+EXPORT_SYMBOL(build_skb);
+
 /**
  *	__netdev_alloc_skb - allocate an skbuff for rx on a specific device
  *	@dev: network device to receive on
-- 
cgit v1.2.3


From 64882709ef07f3eae29c7afc5aa8b84d12733a72 Mon Sep 17 00:00:00 2001
From: Srinivas Kandagatla <srinivas.kandagatla@st.com>
Date: Tue, 15 Nov 2011 11:54:15 +0000
Subject: mdio-gpio: Add reset functionality to mdio-gpio driver(v2).

This patch adds phy reset functionality to mdio-gpio driver. Now
mdio_gpio_platform_data has new member as function pointer which can be
filled at the bsp level for a callback from phy infrastructure. Also the
mdio-bitbang driver fills-in the reset function of mii_bus structure.

Without this patch the bsp level code has to takecare of the reseting
PHY's on the bus, which become bit hacky for every bsp and
phy-infrastructure is ignored aswell.

Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio-bitbang.c | 9 +++++++++
 drivers/net/phy/mdio-gpio.c    | 1 +
 include/linux/mdio-bitbang.h   | 2 ++
 include/linux/mdio-gpio.h      | 2 ++
 4 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/mdio-bitbang.c b/drivers/net/phy/mdio-bitbang.c
index 65391891d8c4..daec9b05d168 100644
--- a/drivers/net/phy/mdio-bitbang.c
+++ b/drivers/net/phy/mdio-bitbang.c
@@ -202,6 +202,14 @@ static int mdiobb_write(struct mii_bus *bus, int phy, int reg, u16 val)
 	return 0;
 }
 
+static int mdiobb_reset(struct mii_bus *bus)
+{
+	struct mdiobb_ctrl *ctrl = bus->priv;
+	if (ctrl->reset)
+		ctrl->reset(bus);
+	return 0;
+}
+
 struct mii_bus *alloc_mdio_bitbang(struct mdiobb_ctrl *ctrl)
 {
 	struct mii_bus *bus;
@@ -214,6 +222,7 @@ struct mii_bus *alloc_mdio_bitbang(struct mdiobb_ctrl *ctrl)
 
 	bus->read = mdiobb_read;
 	bus->write = mdiobb_write;
+	bus->reset = mdiobb_reset;
 	bus->priv = ctrl;
 
 	return bus;
diff --git a/drivers/net/phy/mdio-gpio.c b/drivers/net/phy/mdio-gpio.c
index 2843c90f712f..89c5a3eccc12 100644
--- a/drivers/net/phy/mdio-gpio.c
+++ b/drivers/net/phy/mdio-gpio.c
@@ -95,6 +95,7 @@ static struct mii_bus * __devinit mdio_gpio_bus_init(struct device *dev,
 		goto out;
 
 	bitbang->ctrl.ops = &mdio_gpio_ops;
+	bitbang->ctrl.reset = pdata->reset;
 	bitbang->mdc = pdata->mdc;
 	bitbang->mdio = pdata->mdio;
 
diff --git a/include/linux/mdio-bitbang.h b/include/linux/mdio-bitbang.h
index 0fe00cd4c93c..76f52bbbb2f4 100644
--- a/include/linux/mdio-bitbang.h
+++ b/include/linux/mdio-bitbang.h
@@ -32,6 +32,8 @@ struct mdiobb_ops {
 
 struct mdiobb_ctrl {
 	const struct mdiobb_ops *ops;
+	/* reset callback */
+	int (*reset)(struct mii_bus *bus);
 };
 
 /* The returned bus is not yet registered with the phy layer. */
diff --git a/include/linux/mdio-gpio.h b/include/linux/mdio-gpio.h
index e9d3fdfe41d7..7c9fe3c2be73 100644
--- a/include/linux/mdio-gpio.h
+++ b/include/linux/mdio-gpio.h
@@ -20,6 +20,8 @@ struct mdio_gpio_platform_data {
 
 	unsigned int phy_mask;
 	int irqs[PHY_MAX_ADDR];
+	/* reset callback */
+	int (*reset)(struct mii_bus *bus);
 };
 
 #endif /* __LINUX_MDIO_GPIO_H */
-- 
cgit v1.2.3


From 66846048f55c6c05a4c46c2daabb773173f8f28d Mon Sep 17 00:00:00 2001
From: Rick Jones <rick.jones2@hp.com>
Date: Mon, 14 Nov 2011 14:17:08 +0000
Subject: enable virtio_net to return bus_info in ethtool -i consistent with
 emulated NICs

Add a new .bus_name to virtio_config_ops then modify virtio_net to
call through to it in an ethtool .get_drvinfo routine to report
bus_info in ethtool -i output which is consistent with other
emulated NICs and the output of lspci.

Signed-off-by: Rick Jones <rick.jones2@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/lguest/lguest_device.c |  6 ++++++
 drivers/net/virtio_net.c       | 15 +++++++++++++++
 drivers/s390/kvm/kvm_virtio.c  |  6 ++++++
 drivers/virtio/virtio_mmio.c   |  6 ++++++
 drivers/virtio/virtio_pci.c    |  8 ++++++++
 include/linux/virtio_config.h  | 14 ++++++++++++++
 6 files changed, 55 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index 0dc30ffde5ad..595d73197016 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -381,6 +381,11 @@ error:
 	return PTR_ERR(vqs[i]);
 }
 
+static const char *lg_bus_name(struct virtio_device *vdev)
+{
+	return "";
+}
+
 /* The ops structure which hooks everything together. */
 static struct virtio_config_ops lguest_config_ops = {
 	.get_features = lg_get_features,
@@ -392,6 +397,7 @@ static struct virtio_config_ops lguest_config_ops = {
 	.reset = lg_reset,
 	.find_vqs = lg_find_vqs,
 	.del_vqs = lg_del_vqs,
+	.bus_name = lg_bus_name,
 };
 
 /*
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 6ee8410443c4..4dc9d842a7a3 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -39,6 +39,7 @@ module_param(gso, bool, 0444);
 #define GOOD_COPY_LEN	128
 
 #define VIRTNET_SEND_COMMAND_SG_MAX    2
+#define VIRTNET_DRIVER_VERSION "1.0.0"
 
 struct virtnet_stats {
 	struct u64_stats_sync syncp;
@@ -889,7 +890,21 @@ static void virtnet_get_ringparam(struct net_device *dev,
 
 }
 
+
+static void virtnet_get_drvinfo(struct net_device *dev,
+				struct ethtool_drvinfo *info)
+{
+	struct virtnet_info *vi = netdev_priv(dev);
+	struct virtio_device *vdev = vi->vdev;
+
+	strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
+	strlcpy(info->version, VIRTNET_DRIVER_VERSION, sizeof(info->version));
+	strlcpy(info->bus_info, virtio_bus_name(vdev), sizeof(info->bus_info));
+
+}
+
 static const struct ethtool_ops virtnet_ethtool_ops = {
+	.get_drvinfo = virtnet_get_drvinfo,
 	.get_link = ethtool_op_get_link,
 	.get_ringparam = virtnet_get_ringparam,
 };
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index 94f49ffa70ba..8af868bab20b 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -263,6 +263,11 @@ error:
 	return PTR_ERR(vqs[i]);
 }
 
+static const char *kvm_bus_name(struct virtio_device *vdev)
+{
+	return "";
+}
+
 /*
  * The config ops structure as defined by virtio config
  */
@@ -276,6 +281,7 @@ static struct virtio_config_ops kvm_vq_configspace_ops = {
 	.reset = kvm_reset,
 	.find_vqs = kvm_find_vqs,
 	.del_vqs = kvm_del_vqs,
+	.bus_name = kvm_bus_name,
 };
 
 /*
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index acc5e43c373e..2f57380d7ed4 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -361,7 +361,12 @@ static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 	return 0;
 }
 
+static const char *vm_bus_name(struct virtio_device *vdev)
+{
+	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
 
+	return vm_dev->pdev->name;
+}
 
 static struct virtio_config_ops virtio_mmio_config_ops = {
 	.get		= vm_get,
@@ -373,6 +378,7 @@ static struct virtio_config_ops virtio_mmio_config_ops = {
 	.del_vqs	= vm_del_vqs,
 	.get_features	= vm_get_features,
 	.finalize_features = vm_finalize_features,
+	.bus_name	= vm_bus_name,
 };
 
 
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 79a31e5b4b68..764ec05ea3e8 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -580,6 +580,13 @@ static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
 				  false, false);
 }
 
+static const char *vp_bus_name(struct virtio_device *vdev)
+{
+	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+
+	return pci_name(vp_dev->pci_dev);
+}
+
 static struct virtio_config_ops virtio_pci_config_ops = {
 	.get		= vp_get,
 	.set		= vp_set,
@@ -590,6 +597,7 @@ static struct virtio_config_ops virtio_pci_config_ops = {
 	.del_vqs	= vp_del_vqs,
 	.get_features	= vp_get_features,
 	.finalize_features = vp_finalize_features,
+	.bus_name	= vp_bus_name,
 };
 
 static void virtio_pci_release_dev(struct device *_d)
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index add4790b21fe..63f98d0a8efa 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -100,6 +100,10 @@
  *	vdev: the virtio_device
  *	This gives the final feature bits for the device: it can change
  *	the dev->feature bits if it wants.
+ * @bus_name: return the bus name associated with the device
+ *	vdev: the virtio_device
+ *      This returns a pointer to the bus name a la pci_name from which
+ *      the caller can then copy.
  */
 typedef void vq_callback_t(struct virtqueue *);
 struct virtio_config_ops {
@@ -117,6 +121,7 @@ struct virtio_config_ops {
 	void (*del_vqs)(struct virtio_device *);
 	u32 (*get_features)(struct virtio_device *vdev);
 	void (*finalize_features)(struct virtio_device *vdev);
+	const char *(*bus_name)(struct virtio_device *vdev);
 };
 
 /* If driver didn't advertise the feature, it will never appear. */
@@ -182,5 +187,14 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
 		return ERR_PTR(err);
 	return vq;
 }
+
+static inline
+const char *virtio_bus_name(struct virtio_device *vdev)
+{
+	if (!vdev->config->bus_name)
+		return "virtio";
+	return vdev->config->bus_name(vdev);
+}
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_VIRTIO_CONFIG_H */
-- 
cgit v1.2.3


From bc5787c6125cc2c868eaace46c46ce6e83dcfcb6 Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Tue, 15 Nov 2011 15:29:55 +0000
Subject: net: remove legacy ethtool ops
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As all drivers are converted, we may now remove discrete offload setting
callback handling.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Acked-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h   |  53 ------
 include/linux/netdevice.h |  16 --
 net/8021q/vlan_dev.c      |   6 +-
 net/core/dev.c            |  12 +-
 net/core/ethtool.c        | 418 +++-------------------------------------------
 5 files changed, 26 insertions(+), 479 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index de33de1e2052..20db5b275c3f 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -724,9 +724,6 @@ enum ethtool_sfeatures_retval_bits {
 
 #include <linux/rculist.h>
 
-/* needed by dev_disable_lro() */
-extern int __ethtool_set_flags(struct net_device *dev, u32 flags);
-
 extern int __ethtool_get_settings(struct net_device *dev,
 				  struct ethtool_cmd *cmd);
 
@@ -750,19 +747,6 @@ struct net_device;
 
 /* Some generic methods drivers may use in their ethtool_ops */
 u32 ethtool_op_get_link(struct net_device *dev);
-u32 ethtool_op_get_tx_csum(struct net_device *dev);
-int ethtool_op_set_tx_csum(struct net_device *dev, u32 data);
-int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data);
-int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data);
-u32 ethtool_op_get_sg(struct net_device *dev);
-int ethtool_op_set_sg(struct net_device *dev, u32 data);
-u32 ethtool_op_get_tso(struct net_device *dev);
-int ethtool_op_set_tso(struct net_device *dev, u32 data);
-u32 ethtool_op_get_ufo(struct net_device *dev);
-int ethtool_op_set_ufo(struct net_device *dev, u32 data);
-u32 ethtool_op_get_flags(struct net_device *dev);
-int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported);
-bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported);
 
 /**
  * struct ethtool_ops - optional netdev operations
@@ -807,22 +791,6 @@ bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported);
  * @get_pauseparam: Report pause parameters
  * @set_pauseparam: Set pause parameters.  Returns a negative error code
  *	or zero.
- * @get_rx_csum: Deprecated in favour of the netdev feature %NETIF_F_RXCSUM.
- *	Report whether receive checksums are turned on or off.
- * @set_rx_csum: Deprecated in favour of generic netdev features.  Turn
- *	receive checksum on or off.  Returns a negative error code or zero.
- * @get_tx_csum: Deprecated as redundant. Report whether transmit checksums
- *	are turned on or off.
- * @set_tx_csum: Deprecated in favour of generic netdev features.  Turn
- *	transmit checksums on or off.  Returns a negative error code or zero.
- * @get_sg: Deprecated as redundant.  Report whether scatter-gather is
- *	enabled.  
- * @set_sg: Deprecated in favour of generic netdev features.  Turn
- *	scatter-gather on or off. Returns a negative error code or zero.
- * @get_tso: Deprecated as redundant.  Report whether TCP segmentation
- *	offload is enabled.
- * @set_tso: Deprecated in favour of generic netdev features.  Turn TCP
- *	segmentation offload on or off.  Returns a negative error code or zero.
  * @self_test: Run specified self-tests
  * @get_strings: Return a set of strings that describe the requested objects
  * @set_phys_id: Identify the physical devices, e.g. by flashing an LED
@@ -844,15 +812,6 @@ bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported);
  *	negative error code or zero.
  * @complete: Function to be called after any other operation except
  *	@begin.  Will be called even if the other operation failed.
- * @get_ufo: Deprecated as redundant.  Report whether UDP fragmentation
- *	offload is enabled.
- * @set_ufo: Deprecated in favour of generic netdev features.  Turn UDP
- *	fragmentation offload on or off.  Returns a negative error code or zero.
- * @get_flags: Deprecated as redundant.  Report features included in
- *	&enum ethtool_flags that are enabled.  
- * @set_flags: Deprecated in favour of generic netdev features.  Turn
- *	features included in &enum ethtool_flags on or off.  Returns a
- *	negative error code or zero.
  * @get_priv_flags: Report driver-specific feature flags.
  * @set_priv_flags: Set driver-specific feature flags.  Returns a negative
  *	error code or zero.
@@ -917,14 +876,6 @@ struct ethtool_ops {
 				  struct ethtool_pauseparam*);
 	int	(*set_pauseparam)(struct net_device *,
 				  struct ethtool_pauseparam*);
-	u32	(*get_rx_csum)(struct net_device *);
-	int	(*set_rx_csum)(struct net_device *, u32);
-	u32	(*get_tx_csum)(struct net_device *);
-	int	(*set_tx_csum)(struct net_device *, u32);
-	u32	(*get_sg)(struct net_device *);
-	int	(*set_sg)(struct net_device *, u32);
-	u32	(*get_tso)(struct net_device *);
-	int	(*set_tso)(struct net_device *, u32);
 	void	(*self_test)(struct net_device *, struct ethtool_test *, u64 *);
 	void	(*get_strings)(struct net_device *, u32 stringset, u8 *);
 	int	(*set_phys_id)(struct net_device *, enum ethtool_phys_id_state);
@@ -932,10 +883,6 @@ struct ethtool_ops {
 				     struct ethtool_stats *, u64 *);
 	int	(*begin)(struct net_device *);
 	void	(*complete)(struct net_device *);
-	u32	(*get_ufo)(struct net_device *);
-	int	(*set_ufo)(struct net_device *, u32);
-	u32	(*get_flags)(struct net_device *);
-	int	(*set_flags)(struct net_device *, u32);
 	u32	(*get_priv_flags)(struct net_device *);
 	int	(*set_priv_flags)(struct net_device *, u32);
 	int	(*get_sset_count)(struct net_device *, int);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cbeb5867cff7..e34717a792b4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2592,22 +2592,6 @@ static inline int netif_is_bond_slave(struct net_device *dev)
 
 extern struct pernet_operations __net_initdata loopback_net_ops;
 
-static inline u32 dev_ethtool_get_rx_csum(struct net_device *dev)
-{
-	if (dev->features & NETIF_F_RXCSUM)
-		return 1;
-	if (!dev->ethtool_ops || !dev->ethtool_ops->get_rx_csum)
-		return 0;
-	return dev->ethtool_ops->get_rx_csum(dev);
-}
-
-static inline u32 dev_ethtool_get_flags(struct net_device *dev)
-{
-	if (!dev->ethtool_ops || !dev->ethtool_ops->get_flags)
-		return 0;
-	return dev->ethtool_ops->get_flags(dev);
-}
-
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
 
 /* netdev_printk helpers, similar to dev_printk */
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index bc2528624583..6a4e0cb897b7 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -596,13 +596,11 @@ static u32 vlan_dev_fix_features(struct net_device *dev, u32 features)
 	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
 	u32 old_features = features;
 
-	features &= real_dev->features;
 	features &= real_dev->vlan_features;
+	features |= NETIF_F_RXCSUM;
+	features &= real_dev->features;
 
 	features |= old_features & NETIF_F_SOFT_FEATURES;
-
-	if (dev_ethtool_get_rx_csum(real_dev))
-		features |= NETIF_F_RXCSUM;
 	features |= NETIF_F_LLTX;
 
 	return features;
diff --git a/net/core/dev.c b/net/core/dev.c
index 51f89cd0a3f4..185e246d61fd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1321,8 +1321,6 @@ EXPORT_SYMBOL(dev_close);
  */
 void dev_disable_lro(struct net_device *dev)
 {
-	u32 flags;
-
 	/*
 	 * If we're trying to disable lro on a vlan device
 	 * use the underlying physical device instead
@@ -1330,15 +1328,9 @@ void dev_disable_lro(struct net_device *dev)
 	if (is_vlan_dev(dev))
 		dev = vlan_dev_real_dev(dev);
 
-	if (dev->ethtool_ops && dev->ethtool_ops->get_flags)
-		flags = dev->ethtool_ops->get_flags(dev);
-	else
-		flags = ethtool_op_get_flags(dev);
-
-	if (!(flags & ETH_FLAG_LRO))
-		return;
+	dev->wanted_features &= ~NETIF_F_LRO;
+	netdev_update_features(dev);
 
-	__ethtool_set_flags(dev, flags & ~ETH_FLAG_LRO);
 	if (unlikely(dev->features & NETIF_F_LRO))
 		netdev_WARN(dev, "failed to disable LRO!\n");
 }
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index f44481707124..db8a77bb557b 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -36,236 +36,10 @@ u32 ethtool_op_get_link(struct net_device *dev)
 }
 EXPORT_SYMBOL(ethtool_op_get_link);
 
-u32 ethtool_op_get_tx_csum(struct net_device *dev)
-{
-	return (dev->features & NETIF_F_ALL_CSUM) != 0;
-}
-EXPORT_SYMBOL(ethtool_op_get_tx_csum);
-
-int ethtool_op_set_tx_csum(struct net_device *dev, u32 data)
-{
-	if (data)
-		dev->features |= NETIF_F_IP_CSUM;
-	else
-		dev->features &= ~NETIF_F_IP_CSUM;
-
-	return 0;
-}
-EXPORT_SYMBOL(ethtool_op_set_tx_csum);
-
-int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data)
-{
-	if (data)
-		dev->features |= NETIF_F_HW_CSUM;
-	else
-		dev->features &= ~NETIF_F_HW_CSUM;
-
-	return 0;
-}
-EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
-
-int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data)
-{
-	if (data)
-		dev->features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
-	else
-		dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
-
-	return 0;
-}
-EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum);
-
-u32 ethtool_op_get_sg(struct net_device *dev)
-{
-	return (dev->features & NETIF_F_SG) != 0;
-}
-EXPORT_SYMBOL(ethtool_op_get_sg);
-
-int ethtool_op_set_sg(struct net_device *dev, u32 data)
-{
-	if (data)
-		dev->features |= NETIF_F_SG;
-	else
-		dev->features &= ~NETIF_F_SG;
-
-	return 0;
-}
-EXPORT_SYMBOL(ethtool_op_set_sg);
-
-u32 ethtool_op_get_tso(struct net_device *dev)
-{
-	return (dev->features & NETIF_F_TSO) != 0;
-}
-EXPORT_SYMBOL(ethtool_op_get_tso);
-
-int ethtool_op_set_tso(struct net_device *dev, u32 data)
-{
-	if (data)
-		dev->features |= NETIF_F_TSO;
-	else
-		dev->features &= ~NETIF_F_TSO;
-
-	return 0;
-}
-EXPORT_SYMBOL(ethtool_op_set_tso);
-
-u32 ethtool_op_get_ufo(struct net_device *dev)
-{
-	return (dev->features & NETIF_F_UFO) != 0;
-}
-EXPORT_SYMBOL(ethtool_op_get_ufo);
-
-int ethtool_op_set_ufo(struct net_device *dev, u32 data)
-{
-	if (data)
-		dev->features |= NETIF_F_UFO;
-	else
-		dev->features &= ~NETIF_F_UFO;
-	return 0;
-}
-EXPORT_SYMBOL(ethtool_op_set_ufo);
-
-/* the following list of flags are the same as their associated
- * NETIF_F_xxx values in include/linux/netdevice.h
- */
-static const u32 flags_dup_features =
-	(ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | ETH_FLAG_NTUPLE |
-	 ETH_FLAG_RXHASH);
-
-u32 ethtool_op_get_flags(struct net_device *dev)
-{
-	/* in the future, this function will probably contain additional
-	 * handling for flags which are not so easily handled
-	 * by a simple masking operation
-	 */
-
-	return dev->features & flags_dup_features;
-}
-EXPORT_SYMBOL(ethtool_op_get_flags);
-
-/* Check if device can enable (or disable) particular feature coded in "data"
- * argument. Flags "supported" describe features that can be toggled by device.
- * If feature can not be toggled, it state (enabled or disabled) must match
- * hardcoded device features state, otherwise flags are marked as invalid.
- */
-bool ethtool_invalid_flags(struct net_device *dev, u32 data, u32 supported)
-{
-	u32 features = dev->features & flags_dup_features;
-	/* "data" can contain only flags_dup_features bits,
-	 * see __ethtool_set_flags */
-
-	return (features & ~supported) != (data & ~supported);
-}
-EXPORT_SYMBOL(ethtool_invalid_flags);
-
-int ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported)
-{
-	if (ethtool_invalid_flags(dev, data, supported))
-		return -EINVAL;
-
-	dev->features = ((dev->features & ~flags_dup_features) |
-			 (data & flags_dup_features));
-	return 0;
-}
-EXPORT_SYMBOL(ethtool_op_set_flags);
-
 /* Handlers for each ethtool command */
 
 #define ETHTOOL_DEV_FEATURE_WORDS	1
 
-static void ethtool_get_features_compat(struct net_device *dev,
-	struct ethtool_get_features_block *features)
-{
-	if (!dev->ethtool_ops)
-		return;
-
-	/* getting RX checksum */
-	if (dev->ethtool_ops->get_rx_csum)
-		if (dev->ethtool_ops->get_rx_csum(dev))
-			features[0].active |= NETIF_F_RXCSUM;
-
-	/* mark legacy-changeable features */
-	if (dev->ethtool_ops->set_sg)
-		features[0].available |= NETIF_F_SG;
-	if (dev->ethtool_ops->set_tx_csum)
-		features[0].available |= NETIF_F_ALL_CSUM;
-	if (dev->ethtool_ops->set_tso)
-		features[0].available |= NETIF_F_ALL_TSO;
-	if (dev->ethtool_ops->set_rx_csum)
-		features[0].available |= NETIF_F_RXCSUM;
-	if (dev->ethtool_ops->set_flags)
-		features[0].available |= flags_dup_features;
-}
-
-static int ethtool_set_feature_compat(struct net_device *dev,
-	int (*legacy_set)(struct net_device *, u32),
-	struct ethtool_set_features_block *features, u32 mask)
-{
-	u32 do_set;
-
-	if (!legacy_set)
-		return 0;
-
-	if (!(features[0].valid & mask))
-		return 0;
-
-	features[0].valid &= ~mask;
-
-	do_set = !!(features[0].requested & mask);
-
-	if (legacy_set(dev, do_set) < 0)
-		netdev_info(dev,
-			"Legacy feature change (%s) failed for 0x%08x\n",
-			do_set ? "set" : "clear", mask);
-
-	return 1;
-}
-
-static int ethtool_set_flags_compat(struct net_device *dev,
-	int (*legacy_set)(struct net_device *, u32),
-	struct ethtool_set_features_block *features, u32 mask)
-{
-	u32 value;
-
-	if (!legacy_set)
-		return 0;
-
-	if (!(features[0].valid & mask))
-		return 0;
-
-	value = dev->features & ~features[0].valid;
-	value |= features[0].requested;
-
-	features[0].valid &= ~mask;
-
-	if (legacy_set(dev, value & mask) < 0)
-		netdev_info(dev, "Legacy flags change failed\n");
-
-	return 1;
-}
-
-static int ethtool_set_features_compat(struct net_device *dev,
-	struct ethtool_set_features_block *features)
-{
-	int compat;
-
-	if (!dev->ethtool_ops)
-		return 0;
-
-	compat  = ethtool_set_feature_compat(dev, dev->ethtool_ops->set_sg,
-		features, NETIF_F_SG);
-	compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tx_csum,
-		features, NETIF_F_ALL_CSUM);
-	compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_tso,
-		features, NETIF_F_ALL_TSO);
-	compat |= ethtool_set_feature_compat(dev, dev->ethtool_ops->set_rx_csum,
-		features, NETIF_F_RXCSUM);
-	compat |= ethtool_set_flags_compat(dev, dev->ethtool_ops->set_flags,
-		features, flags_dup_features);
-
-	return compat;
-}
-
 static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_gfeatures cmd = {
@@ -283,8 +57,6 @@ static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
 	u32 __user *sizeaddr;
 	u32 copy_size;
 
-	ethtool_get_features_compat(dev, features);
-
 	sizeaddr = useraddr + offsetof(struct ethtool_gfeatures, size);
 	if (get_user(copy_size, sizeaddr))
 		return -EFAULT;
@@ -320,9 +92,6 @@ static int ethtool_set_features(struct net_device *dev, void __user *useraddr)
 	if (features[0].valid & ~NETIF_F_ETHTOOL_BITS)
 		return -EINVAL;
 
-	if (ethtool_set_features_compat(dev, features))
-		ret |= ETHTOOL_F_COMPAT;
-
 	if (features[0].valid & ~dev->hw_features) {
 		features[0].valid &= dev->hw_features;
 		ret |= ETHTOOL_F_UNSUPPORTED;
@@ -433,34 +202,6 @@ static u32 ethtool_get_feature_mask(u32 eth_cmd)
 	}
 }
 
-static void *__ethtool_get_one_feature_actor(struct net_device *dev, u32 ethcmd)
-{
-	const struct ethtool_ops *ops = dev->ethtool_ops;
-
-	if (!ops)
-		return NULL;
-
-	switch (ethcmd) {
-	case ETHTOOL_GTXCSUM:
-		return ops->get_tx_csum;
-	case ETHTOOL_GRXCSUM:
-		return ops->get_rx_csum;
-	case ETHTOOL_SSG:
-		return ops->get_sg;
-	case ETHTOOL_STSO:
-		return ops->get_tso;
-	case ETHTOOL_SUFO:
-		return ops->get_ufo;
-	default:
-		return NULL;
-	}
-}
-
-static u32 __ethtool_get_rx_csum_oldbug(struct net_device *dev)
-{
-	return !!(dev->features & NETIF_F_ALL_CSUM);
-}
-
 static int ethtool_get_one_feature(struct net_device *dev,
 	char __user *useraddr, u32 ethcmd)
 {
@@ -470,31 +211,11 @@ static int ethtool_get_one_feature(struct net_device *dev,
 		.data = !!(dev->features & mask),
 	};
 
-	/* compatibility with discrete get_ ops */
-	if (!(dev->hw_features & mask)) {
-		u32 (*actor)(struct net_device *);
-
-		actor = __ethtool_get_one_feature_actor(dev, ethcmd);
-
-		/* bug compatibility with old get_rx_csum */
-		if (ethcmd == ETHTOOL_GRXCSUM && !actor)
-			actor = __ethtool_get_rx_csum_oldbug;
-
-		if (actor)
-			edata.data = actor(dev);
-	}
-
 	if (copy_to_user(useraddr, &edata, sizeof(edata)))
 		return -EFAULT;
 	return 0;
 }
 
-static int __ethtool_set_tx_csum(struct net_device *dev, u32 data);
-static int __ethtool_set_rx_csum(struct net_device *dev, u32 data);
-static int __ethtool_set_sg(struct net_device *dev, u32 data);
-static int __ethtool_set_tso(struct net_device *dev, u32 data);
-static int __ethtool_set_ufo(struct net_device *dev, u32 data);
-
 static int ethtool_set_one_feature(struct net_device *dev,
 	void __user *useraddr, u32 ethcmd)
 {
@@ -506,56 +227,38 @@ static int ethtool_set_one_feature(struct net_device *dev,
 
 	mask = ethtool_get_feature_mask(ethcmd);
 	mask &= dev->hw_features;
-	if (mask) {
-		if (edata.data)
-			dev->wanted_features |= mask;
-		else
-			dev->wanted_features &= ~mask;
+	if (!mask)
+		return -EOPNOTSUPP;
 
-		__netdev_update_features(dev);
-		return 0;
-	}
+	if (edata.data)
+		dev->wanted_features |= mask;
+	else
+		dev->wanted_features &= ~mask;
 
-	/* Driver is not converted to ndo_fix_features or does not
-	 * support changing this offload. In the latter case it won't
-	 * have corresponding ethtool_ops field set.
-	 *
-	 * Following part is to be removed after all drivers advertise
-	 * their changeable features in netdev->hw_features and stop
-	 * using discrete offload setting ops.
-	 */
+	__netdev_update_features(dev);
 
-	switch (ethcmd) {
-	case ETHTOOL_STXCSUM:
-		return __ethtool_set_tx_csum(dev, edata.data);
-	case ETHTOOL_SRXCSUM:
-		return __ethtool_set_rx_csum(dev, edata.data);
-	case ETHTOOL_SSG:
-		return __ethtool_set_sg(dev, edata.data);
-	case ETHTOOL_STSO:
-		return __ethtool_set_tso(dev, edata.data);
-	case ETHTOOL_SUFO:
-		return __ethtool_set_ufo(dev, edata.data);
-	default:
-		return -EOPNOTSUPP;
-	}
+	return 0;
+}
+
+/* the following list of flags are the same as their associated
+ * NETIF_F_xxx values in include/linux/netdevice.h
+ */
+static const u32 flags_dup_features =
+	(ETH_FLAG_LRO | ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | ETH_FLAG_NTUPLE |
+	 ETH_FLAG_RXHASH);
+
+static u32 __ethtool_get_flags(struct net_device *dev)
+{
+	return dev->features & flags_dup_features;
 }
 
-int __ethtool_set_flags(struct net_device *dev, u32 data)
+static int __ethtool_set_flags(struct net_device *dev, u32 data)
 {
 	u32 changed;
 
 	if (data & ~flags_dup_features)
 		return -EINVAL;
 
-	/* legacy set_flags() op */
-	if (dev->ethtool_ops->set_flags) {
-		if (unlikely(dev->hw_features & flags_dup_features))
-			netdev_warn(dev,
-				"driver BUG: mixed hw_features and set_flags()\n");
-		return dev->ethtool_ops->set_flags(dev, data);
-	}
-
 	/* allow changing only bits set in hw_features */
 	changed = (data ^ dev->features) & flags_dup_features;
 	if (changed & ~dev->hw_features)
@@ -1231,81 +934,6 @@ static int ethtool_set_pauseparam(struct net_device *dev, void __user *useraddr)
 	return dev->ethtool_ops->set_pauseparam(dev, &pauseparam);
 }
 
-static int __ethtool_set_sg(struct net_device *dev, u32 data)
-{
-	int err;
-
-	if (!dev->ethtool_ops->set_sg)
-		return -EOPNOTSUPP;
-
-	if (data && !(dev->features & NETIF_F_ALL_CSUM))
-		return -EINVAL;
-
-	if (!data && dev->ethtool_ops->set_tso) {
-		err = dev->ethtool_ops->set_tso(dev, 0);
-		if (err)
-			return err;
-	}
-
-	if (!data && dev->ethtool_ops->set_ufo) {
-		err = dev->ethtool_ops->set_ufo(dev, 0);
-		if (err)
-			return err;
-	}
-	return dev->ethtool_ops->set_sg(dev, data);
-}
-
-static int __ethtool_set_tx_csum(struct net_device *dev, u32 data)
-{
-	int err;
-
-	if (!dev->ethtool_ops->set_tx_csum)
-		return -EOPNOTSUPP;
-
-	if (!data && dev->ethtool_ops->set_sg) {
-		err = __ethtool_set_sg(dev, 0);
-		if (err)
-			return err;
-	}
-
-	return dev->ethtool_ops->set_tx_csum(dev, data);
-}
-
-static int __ethtool_set_rx_csum(struct net_device *dev, u32 data)
-{
-	if (!dev->ethtool_ops->set_rx_csum)
-		return -EOPNOTSUPP;
-
-	if (!data)
-		dev->features &= ~NETIF_F_GRO;
-
-	return dev->ethtool_ops->set_rx_csum(dev, data);
-}
-
-static int __ethtool_set_tso(struct net_device *dev, u32 data)
-{
-	if (!dev->ethtool_ops->set_tso)
-		return -EOPNOTSUPP;
-
-	if (data && !(dev->features & NETIF_F_SG))
-		return -EINVAL;
-
-	return dev->ethtool_ops->set_tso(dev, data);
-}
-
-static int __ethtool_set_ufo(struct net_device *dev, u32 data)
-{
-	if (!dev->ethtool_ops->set_ufo)
-		return -EOPNOTSUPP;
-	if (data && !(dev->features & NETIF_F_SG))
-		return -EINVAL;
-	if (data && !((dev->features & NETIF_F_GEN_CSUM) ||
-		(dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
-			== (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM)))
-		return -EINVAL;
-	return dev->ethtool_ops->set_ufo(dev, data);
-}
-
 static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
 {
 	struct ethtool_test test;
@@ -1771,9 +1399,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 		break;
 	case ETHTOOL_GFLAGS:
 		rc = ethtool_get_value(dev, useraddr, ethcmd,
-				       (dev->ethtool_ops->get_flags ?
-					dev->ethtool_ops->get_flags :
-					ethtool_op_get_flags));
+					__ethtool_get_flags);
 		break;
 	case ETHTOOL_SFLAGS:
 		rc = ethtool_set_value(dev, useraddr, __ethtool_set_flags);
-- 
cgit v1.2.3


From a59e2ecb859f2ab03bb2e230709f8039472ad2c3 Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Tue, 15 Nov 2011 15:29:55 +0000
Subject: net: split netdev features to separate header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move features definitions to separate header so that linux/skbuff.h won't
need to include linux/netdevice.h after netdev_features_t is introduced.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Acked-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h | 90 +++++++++++++++++++++++++++++++++++++++++
 include/linux/netdevice.h       | 80 +-----------------------------------
 2 files changed, 92 insertions(+), 78 deletions(-)
 create mode 100644 include/linux/netdev_features.h

(limited to 'include/linux')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
new file mode 100644
index 000000000000..32640edf4d78
--- /dev/null
+++ b/include/linux/netdev_features.h
@@ -0,0 +1,90 @@
+/*
+ * Network device features.
+ *
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _LINUX_NETDEV_FEATURES_H
+#define _LINUX_NETDEV_FEATURES_H
+
+/* Net device feature bits; if you change something,
+ * also update netdev_features_strings[] in ethtool.c */
+
+#define NETIF_F_SG		1	/* Scatter/gather IO. */
+#define NETIF_F_IP_CSUM		2	/* Can checksum TCP/UDP over IPv4. */
+#define NETIF_F_NO_CSUM		4	/* Does not require checksum. F.e. loopack. */
+#define NETIF_F_HW_CSUM		8	/* Can checksum all the packets. */
+#define NETIF_F_IPV6_CSUM	16	/* Can checksum TCP/UDP over IPV6 */
+#define NETIF_F_HIGHDMA		32	/* Can DMA to high memory. */
+#define NETIF_F_FRAGLIST	64	/* Scatter/gather IO. */
+#define NETIF_F_HW_VLAN_TX	128	/* Transmit VLAN hw acceleration */
+#define NETIF_F_HW_VLAN_RX	256	/* Receive VLAN hw acceleration */
+#define NETIF_F_HW_VLAN_FILTER	512	/* Receive filtering on VLAN */
+#define NETIF_F_VLAN_CHALLENGED	1024	/* Device cannot handle VLAN packets */
+#define NETIF_F_GSO		2048	/* Enable software GSO. */
+#define NETIF_F_LLTX		4096	/* LockLess TX - deprecated. Please */
+					/* do not use LLTX in new drivers */
+#define NETIF_F_NETNS_LOCAL	8192	/* Does not change network namespaces */
+#define NETIF_F_GRO		16384	/* Generic receive offload */
+#define NETIF_F_LRO		32768	/* large receive offload */
+
+/* the GSO_MASK reserves bits 16 through 23 */
+#define NETIF_F_FCOE_CRC	(1 << 24) /* FCoE CRC32 */
+#define NETIF_F_SCTP_CSUM	(1 << 25) /* SCTP checksum offload */
+#define NETIF_F_FCOE_MTU	(1 << 26) /* Supports max FCoE MTU, 2158 bytes*/
+#define NETIF_F_NTUPLE		(1 << 27) /* N-tuple filters supported */
+#define NETIF_F_RXHASH		(1 << 28) /* Receive hashing offload */
+#define NETIF_F_RXCSUM		(1 << 29) /* Receive checksumming offload */
+#define NETIF_F_NOCACHE_COPY	(1 << 30) /* Use no-cache copyfromuser */
+#define NETIF_F_LOOPBACK	(1 << 31) /* Enable loopback */
+
+/* Segmentation offload features */
+#define NETIF_F_GSO_SHIFT	16
+#define NETIF_F_GSO_MASK	0x00ff0000
+#define NETIF_F_TSO		(SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT)
+#define NETIF_F_UFO		(SKB_GSO_UDP << NETIF_F_GSO_SHIFT)
+#define NETIF_F_GSO_ROBUST	(SKB_GSO_DODGY << NETIF_F_GSO_SHIFT)
+#define NETIF_F_TSO_ECN		(SKB_GSO_TCP_ECN << NETIF_F_GSO_SHIFT)
+#define NETIF_F_TSO6		(SKB_GSO_TCPV6 << NETIF_F_GSO_SHIFT)
+#define NETIF_F_FSO		(SKB_GSO_FCOE << NETIF_F_GSO_SHIFT)
+
+/* Features valid for ethtool to change */
+/* = all defined minus driver/device-class-related */
+#define NETIF_F_NEVER_CHANGE	(NETIF_F_VLAN_CHALLENGED | \
+				 NETIF_F_LLTX | NETIF_F_NETNS_LOCAL)
+#define NETIF_F_ETHTOOL_BITS	(0xff3fffff & ~NETIF_F_NEVER_CHANGE)
+
+/* List of features with software fallbacks. */
+#define NETIF_F_GSO_SOFTWARE	(NETIF_F_TSO | NETIF_F_TSO_ECN | \
+				 NETIF_F_TSO6 | NETIF_F_UFO)
+
+#define NETIF_F_GEN_CSUM	(NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)
+#define NETIF_F_V4_CSUM		(NETIF_F_GEN_CSUM | NETIF_F_IP_CSUM)
+#define NETIF_F_V6_CSUM		(NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM)
+#define NETIF_F_ALL_CSUM	(NETIF_F_V4_CSUM | NETIF_F_V6_CSUM)
+
+#define NETIF_F_ALL_TSO 	(NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
+
+#define NETIF_F_ALL_FCOE	(NETIF_F_FCOE_CRC | NETIF_F_FCOE_MTU | \
+				 NETIF_F_FSO)
+
+/*
+ * If one device supports one of these features, then enable them
+ * for all in netdev_increment_features.
+ */
+#define NETIF_F_ONE_FOR_ALL	(NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ROBUST | \
+				 NETIF_F_SG | NETIF_F_HIGHDMA |		\
+				 NETIF_F_FRAGLIST | NETIF_F_VLAN_CHALLENGED)
+/*
+ * If one device doesn't support one of these features, then disable it
+ * for all in netdev_increment_features.
+ */
+#define NETIF_F_ALL_FOR_ALL	(NETIF_F_NOCACHE_COPY | NETIF_F_FSO)
+
+/* changeable features with no special hardware requirements */
+#define NETIF_F_SOFT_FEATURES	(NETIF_F_GSO | NETIF_F_GRO)
+
+#endif	/* _LINUX_NETDEV_FEATURES_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e34717a792b4..9cf6e90b171d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -51,6 +51,8 @@
 #include <net/dcbnl.h>
 #endif
 
+#include <linux/netdev_features.h>
+
 struct vlan_group;
 struct netpoll_info;
 struct phy_device;
@@ -1005,84 +1007,6 @@ struct net_device {
 	/* mask of features inheritable by VLAN devices */
 	u32			vlan_features;
 
-	/* Net device feature bits; if you change something,
-	 * also update netdev_features_strings[] in ethtool.c */
-
-#define NETIF_F_SG		1	/* Scatter/gather IO. */
-#define NETIF_F_IP_CSUM		2	/* Can checksum TCP/UDP over IPv4. */
-#define NETIF_F_NO_CSUM		4	/* Does not require checksum. F.e. loopack. */
-#define NETIF_F_HW_CSUM		8	/* Can checksum all the packets. */
-#define NETIF_F_IPV6_CSUM	16	/* Can checksum TCP/UDP over IPV6 */
-#define NETIF_F_HIGHDMA		32	/* Can DMA to high memory. */
-#define NETIF_F_FRAGLIST	64	/* Scatter/gather IO. */
-#define NETIF_F_HW_VLAN_TX	128	/* Transmit VLAN hw acceleration */
-#define NETIF_F_HW_VLAN_RX	256	/* Receive VLAN hw acceleration */
-#define NETIF_F_HW_VLAN_FILTER	512	/* Receive filtering on VLAN */
-#define NETIF_F_VLAN_CHALLENGED	1024	/* Device cannot handle VLAN packets */
-#define NETIF_F_GSO		2048	/* Enable software GSO. */
-#define NETIF_F_LLTX		4096	/* LockLess TX - deprecated. Please */
-					/* do not use LLTX in new drivers */
-#define NETIF_F_NETNS_LOCAL	8192	/* Does not change network namespaces */
-#define NETIF_F_GRO		16384	/* Generic receive offload */
-#define NETIF_F_LRO		32768	/* large receive offload */
-
-/* the GSO_MASK reserves bits 16 through 23 */
-#define NETIF_F_FCOE_CRC	(1 << 24) /* FCoE CRC32 */
-#define NETIF_F_SCTP_CSUM	(1 << 25) /* SCTP checksum offload */
-#define NETIF_F_FCOE_MTU	(1 << 26) /* Supports max FCoE MTU, 2158 bytes*/
-#define NETIF_F_NTUPLE		(1 << 27) /* N-tuple filters supported */
-#define NETIF_F_RXHASH		(1 << 28) /* Receive hashing offload */
-#define NETIF_F_RXCSUM		(1 << 29) /* Receive checksumming offload */
-#define NETIF_F_NOCACHE_COPY	(1 << 30) /* Use no-cache copyfromuser */
-#define NETIF_F_LOOPBACK	(1 << 31) /* Enable loopback */
-
-	/* Segmentation offload features */
-#define NETIF_F_GSO_SHIFT	16
-#define NETIF_F_GSO_MASK	0x00ff0000
-#define NETIF_F_TSO		(SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT)
-#define NETIF_F_UFO		(SKB_GSO_UDP << NETIF_F_GSO_SHIFT)
-#define NETIF_F_GSO_ROBUST	(SKB_GSO_DODGY << NETIF_F_GSO_SHIFT)
-#define NETIF_F_TSO_ECN		(SKB_GSO_TCP_ECN << NETIF_F_GSO_SHIFT)
-#define NETIF_F_TSO6		(SKB_GSO_TCPV6 << NETIF_F_GSO_SHIFT)
-#define NETIF_F_FSO		(SKB_GSO_FCOE << NETIF_F_GSO_SHIFT)
-
-	/* Features valid for ethtool to change */
-	/* = all defined minus driver/device-class-related */
-#define NETIF_F_NEVER_CHANGE	(NETIF_F_VLAN_CHALLENGED | \
-				  NETIF_F_LLTX | NETIF_F_NETNS_LOCAL)
-#define NETIF_F_ETHTOOL_BITS	(0xff3fffff & ~NETIF_F_NEVER_CHANGE)
-
-	/* List of features with software fallbacks. */
-#define NETIF_F_GSO_SOFTWARE	(NETIF_F_TSO | NETIF_F_TSO_ECN | \
-				 NETIF_F_TSO6 | NETIF_F_UFO)
-
-
-#define NETIF_F_GEN_CSUM	(NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)
-#define NETIF_F_V4_CSUM		(NETIF_F_GEN_CSUM | NETIF_F_IP_CSUM)
-#define NETIF_F_V6_CSUM		(NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM)
-#define NETIF_F_ALL_CSUM	(NETIF_F_V4_CSUM | NETIF_F_V6_CSUM)
-
-#define NETIF_F_ALL_TSO 	(NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_TSO_ECN)
-
-#define NETIF_F_ALL_FCOE	(NETIF_F_FCOE_CRC | NETIF_F_FCOE_MTU | \
-				 NETIF_F_FSO)
-
-	/*
-	 * If one device supports one of these features, then enable them
-	 * for all in netdev_increment_features.
-	 */
-#define NETIF_F_ONE_FOR_ALL	(NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ROBUST | \
-				 NETIF_F_SG | NETIF_F_HIGHDMA |		\
-				 NETIF_F_FRAGLIST | NETIF_F_VLAN_CHALLENGED)
-	/*
-	 * If one device doesn't support one of these features, then disable it
-	 * for all in netdev_increment_features.
-	 */
-#define NETIF_F_ALL_FOR_ALL	(NETIF_F_NOCACHE_COPY | NETIF_F_FSO)
-
-	/* changeable features with no special hardware requirements */
-#define NETIF_F_SOFT_FEATURES	(NETIF_F_GSO | NETIF_F_GRO)
-
 	/* Interface index. Unique device identifier	*/
 	int			ifindex;
 	int			iflink;
-- 
cgit v1.2.3


From c8f44affb7244f2ac3e703cab13d55ede27621bb Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Tue, 15 Nov 2011 15:29:55 +0000
Subject: net: introduce and use netdev_features_t for device features sets
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

v2:	add couple missing conversions in drivers
	split unexporting netdev_fix_features()
	implemented %pNF
	convert sock::sk_route_(no?)caps

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c                    |  9 ++---
 drivers/net/ethernet/atheros/atl1c/atl1c_main.c    | 13 ++++---
 drivers/net/ethernet/atheros/atl1e/atl1e_main.c    | 13 ++++---
 drivers/net/ethernet/atheros/atlx/atl2.c           | 13 ++++---
 drivers/net/ethernet/atheros/atlx/atlx.c           | 13 ++++---
 drivers/net/ethernet/broadcom/bnx2.c               |  6 ++--
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c    |  5 +--
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h    |  5 +--
 drivers/net/ethernet/broadcom/tg3.c                | 11 +++---
 drivers/net/ethernet/chelsio/cxgb/cxgb2.c          |  7 ++--
 drivers/net/ethernet/chelsio/cxgb/sge.c            |  2 +-
 drivers/net/ethernet/chelsio/cxgb/sge.h            |  2 +-
 drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c    |  9 ++---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c    | 12 ++++---
 .../net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c    |  8 +++--
 drivers/net/ethernet/davicom/dm9000.c              |  5 +--
 drivers/net/ethernet/freescale/gianfar.c           |  2 +-
 drivers/net/ethernet/freescale/gianfar.h           |  4 +--
 drivers/net/ethernet/freescale/gianfar_ethtool.c   |  4 +--
 drivers/net/ethernet/ibm/ibmveth.c                 |  6 ++--
 drivers/net/ethernet/intel/e1000/e1000_main.c      | 14 +++++---
 drivers/net/ethernet/intel/e1000e/netdev.c         |  5 +--
 drivers/net/ethernet/intel/igb/igb_main.c          | 12 ++++---
 drivers/net/ethernet/intel/igbvf/netdev.c          |  3 +-
 drivers/net/ethernet/intel/ixgb/ixgb_main.c        |  8 ++---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c      |  6 ++--
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c  |  3 +-
 drivers/net/ethernet/jme.c                         |  8 ++---
 drivers/net/ethernet/marvell/mv643xx_eth.c         |  4 +--
 drivers/net/ethernet/marvell/sky2.c                | 13 +++----
 drivers/net/ethernet/micrel/ksz884x.c              |  3 +-
 drivers/net/ethernet/myricom/myri10ge/myri10ge.c   |  5 +--
 drivers/net/ethernet/neterion/s2io.c               |  4 +--
 drivers/net/ethernet/neterion/vxge/vxge-main.c     |  9 ++---
 drivers/net/ethernet/nvidia/forcedeth.c            | 11 +++---
 .../net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c   |  5 +--
 .../net/ethernet/qlogic/netxen/netxen_nic_main.c   |  6 ++--
 drivers/net/ethernet/qlogic/qlcnic/qlcnic.h        |  5 +--
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c     |  9 ++---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c   |  2 +-
 drivers/net/ethernet/qlogic/qlge/qlge_main.c       | 10 +++---
 drivers/net/ethernet/realtek/8139cp.c              |  2 +-
 drivers/net/ethernet/realtek/r8169.c               |  6 ++--
 drivers/net/ethernet/sfc/efx.c                     |  2 +-
 drivers/net/ethernet/sfc/net_driver.h              |  2 +-
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c  |  3 +-
 drivers/net/tun.c                                  |  7 ++--
 drivers/net/usb/smsc75xx.c                         |  3 +-
 drivers/net/usb/smsc95xx.c                         |  3 +-
 drivers/net/vmxnet3/vmxnet3_ethtool.c              |  4 +--
 drivers/net/vmxnet3/vmxnet3_int.h                  |  2 +-
 drivers/net/xen-netback/interface.c                |  3 +-
 drivers/net/xen-netfront.c                         |  8 +++--
 drivers/s390/net/qeth_l3_main.c                    |  6 ++--
 include/linux/netdev_features.h                    |  4 +++
 include/linux/netdevice.h                          | 41 ++++++++++++----------
 include/linux/skbuff.h                             |  4 ++-
 include/net/protocol.h                             |  4 +--
 include/net/sock.h                                 |  6 ++--
 include/net/tcp.h                                  |  3 +-
 include/net/udp.h                                  |  3 +-
 lib/vsprintf.c                                     | 19 ++++++++++
 net/8021q/vlan_dev.c                               |  3 +-
 net/bridge/br_device.c                             |  3 +-
 net/bridge/br_if.c                                 |  5 +--
 net/bridge/br_private.h                            |  3 +-
 net/core/dev.c                                     | 38 +++++++++++---------
 net/core/ethtool.c                                 |  9 +++--
 net/core/skbuff.c                                  |  2 +-
 net/ipv4/af_inet.c                                 |  3 +-
 net/ipv4/tcp.c                                     |  3 +-
 net/ipv4/udp.c                                     |  3 +-
 net/ipv6/af_inet6.c                                |  3 +-
 net/ipv6/udp.c                                     |  3 +-
 74 files changed, 305 insertions(+), 202 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index b0c577256487..ac5337a04639 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1325,11 +1325,12 @@ static int bond_sethwaddr(struct net_device *bond_dev,
 	return 0;
 }
 
-static u32 bond_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t bond_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct slave *slave;
 	struct bonding *bond = netdev_priv(dev);
-	u32 mask;
+	netdev_features_t mask;
 	int i;
 
 	read_lock(&bond->lock);
@@ -1363,7 +1364,7 @@ static void bond_compute_features(struct bonding *bond)
 {
 	struct slave *slave;
 	struct net_device *bond_dev = bond->dev;
-	u32 vlan_features = BOND_VLAN_FEATURES;
+	netdev_features_t vlan_features = BOND_VLAN_FEATURES;
 	unsigned short max_hard_header_len = ETH_HLEN;
 	int i;
 
@@ -1897,7 +1898,7 @@ int bond_release(struct net_device *bond_dev, struct net_device *slave_dev)
 	struct bonding *bond = netdev_priv(bond_dev);
 	struct slave *slave, *oldcurrent;
 	struct sockaddr addr;
-	u32 old_features = bond_dev->features;
+	netdev_features_t old_features = bond_dev->features;
 
 	/* slave is not a slave or master is not master of this slave */
 	if (!(slave_dev->flags & IFF_SLAVE) ||
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 02c7ed8d9eca..b8591246eb4c 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -411,7 +411,7 @@ static void atl1c_set_multi(struct net_device *netdev)
 	}
 }
 
-static void __atl1c_vlan_mode(u32 features, u32 *mac_ctrl_data)
+static void __atl1c_vlan_mode(netdev_features_t features, u32 *mac_ctrl_data)
 {
 	if (features & NETIF_F_HW_VLAN_RX) {
 		/* enable VLAN tag insert/strip */
@@ -422,7 +422,8 @@ static void __atl1c_vlan_mode(u32 features, u32 *mac_ctrl_data)
 	}
 }
 
-static void atl1c_vlan_mode(struct net_device *netdev, u32 features)
+static void atl1c_vlan_mode(struct net_device *netdev,
+	netdev_features_t features)
 {
 	struct atl1c_adapter *adapter = netdev_priv(netdev);
 	struct pci_dev *pdev = adapter->pdev;
@@ -482,7 +483,8 @@ static void atl1c_set_rxbufsize(struct atl1c_adapter *adapter,
 		roundup(mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN, 8) : AT_RX_BUF_SIZE;
 }
 
-static u32 atl1c_fix_features(struct net_device *netdev, u32 features)
+static netdev_features_t atl1c_fix_features(struct net_device *netdev,
+	netdev_features_t features)
 {
 	/*
 	 * Since there is no support for separate rx/tx vlan accel
@@ -499,9 +501,10 @@ static u32 atl1c_fix_features(struct net_device *netdev, u32 features)
 	return features;
 }
 
-static int atl1c_set_features(struct net_device *netdev, u32 features)
+static int atl1c_set_features(struct net_device *netdev,
+	netdev_features_t features)
 {
-	u32 changed = netdev->features ^ features;
+	netdev_features_t changed = netdev->features ^ features;
 
 	if (changed & NETIF_F_HW_VLAN_RX)
 		atl1c_vlan_mode(netdev, features);
diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
index 95483bcac1d0..c915c0873810 100644
--- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
+++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
@@ -313,7 +313,7 @@ static void atl1e_set_multi(struct net_device *netdev)
 	}
 }
 
-static void __atl1e_vlan_mode(u32 features, u32 *mac_ctrl_data)
+static void __atl1e_vlan_mode(netdev_features_t features, u32 *mac_ctrl_data)
 {
 	if (features & NETIF_F_HW_VLAN_RX) {
 		/* enable VLAN tag insert/strip */
@@ -324,7 +324,8 @@ static void __atl1e_vlan_mode(u32 features, u32 *mac_ctrl_data)
 	}
 }
 
-static void atl1e_vlan_mode(struct net_device *netdev, u32 features)
+static void atl1e_vlan_mode(struct net_device *netdev,
+	netdev_features_t features)
 {
 	struct atl1e_adapter *adapter = netdev_priv(netdev);
 	u32 mac_ctrl_data = 0;
@@ -370,7 +371,8 @@ static int atl1e_set_mac_addr(struct net_device *netdev, void *p)
 	return 0;
 }
 
-static u32 atl1e_fix_features(struct net_device *netdev, u32 features)
+static netdev_features_t atl1e_fix_features(struct net_device *netdev,
+	netdev_features_t features)
 {
 	/*
 	 * Since there is no support for separate rx/tx vlan accel
@@ -384,9 +386,10 @@ static u32 atl1e_fix_features(struct net_device *netdev, u32 features)
 	return features;
 }
 
-static int atl1e_set_features(struct net_device *netdev, u32 features)
+static int atl1e_set_features(struct net_device *netdev,
+	netdev_features_t features)
 {
-	u32 changed = netdev->features ^ features;
+	netdev_features_t changed = netdev->features ^ features;
 
 	if (changed & NETIF_F_HW_VLAN_RX)
 		atl1e_vlan_mode(netdev, features);
diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c
index db3f43046d32..071f4c858969 100644
--- a/drivers/net/ethernet/atheros/atlx/atl2.c
+++ b/drivers/net/ethernet/atheros/atlx/atl2.c
@@ -361,7 +361,7 @@ static inline void atl2_irq_disable(struct atl2_adapter *adapter)
     synchronize_irq(adapter->pdev->irq);
 }
 
-static void __atl2_vlan_mode(u32 features, u32 *ctrl)
+static void __atl2_vlan_mode(netdev_features_t features, u32 *ctrl)
 {
 	if (features & NETIF_F_HW_VLAN_RX) {
 		/* enable VLAN tag insert/strip */
@@ -372,7 +372,8 @@ static void __atl2_vlan_mode(u32 features, u32 *ctrl)
 	}
 }
 
-static void atl2_vlan_mode(struct net_device *netdev, u32 features)
+static void atl2_vlan_mode(struct net_device *netdev,
+	netdev_features_t features)
 {
 	struct atl2_adapter *adapter = netdev_priv(netdev);
 	u32 ctrl;
@@ -391,7 +392,8 @@ static void atl2_restore_vlan(struct atl2_adapter *adapter)
 	atl2_vlan_mode(adapter->netdev, adapter->netdev->features);
 }
 
-static u32 atl2_fix_features(struct net_device *netdev, u32 features)
+static netdev_features_t atl2_fix_features(struct net_device *netdev,
+	netdev_features_t features)
 {
 	/*
 	 * Since there is no support for separate rx/tx vlan accel
@@ -405,9 +407,10 @@ static u32 atl2_fix_features(struct net_device *netdev, u32 features)
 	return features;
 }
 
-static int atl2_set_features(struct net_device *netdev, u32 features)
+static int atl2_set_features(struct net_device *netdev,
+	netdev_features_t features)
 {
-	u32 changed = netdev->features ^ features;
+	netdev_features_t changed = netdev->features ^ features;
 
 	if (changed & NETIF_F_HW_VLAN_RX)
 		atl2_vlan_mode(netdev, features);
diff --git a/drivers/net/ethernet/atheros/atlx/atlx.c b/drivers/net/ethernet/atheros/atlx/atlx.c
index aabcf4b5745a..8ff7411094d5 100644
--- a/drivers/net/ethernet/atheros/atlx/atlx.c
+++ b/drivers/net/ethernet/atheros/atlx/atlx.c
@@ -211,7 +211,7 @@ static void atlx_link_chg_task(struct work_struct *work)
 	spin_unlock_irqrestore(&adapter->lock, flags);
 }
 
-static void __atlx_vlan_mode(u32 features, u32 *ctrl)
+static void __atlx_vlan_mode(netdev_features_t features, u32 *ctrl)
 {
 	if (features & NETIF_F_HW_VLAN_RX) {
 		/* enable VLAN tag insert/strip */
@@ -222,7 +222,8 @@ static void __atlx_vlan_mode(u32 features, u32 *ctrl)
 	}
 }
 
-static void atlx_vlan_mode(struct net_device *netdev, u32 features)
+static void atlx_vlan_mode(struct net_device *netdev,
+	netdev_features_t features)
 {
 	struct atlx_adapter *adapter = netdev_priv(netdev);
 	unsigned long flags;
@@ -242,7 +243,8 @@ static void atlx_restore_vlan(struct atlx_adapter *adapter)
 	atlx_vlan_mode(adapter->netdev, adapter->netdev->features);
 }
 
-static u32 atlx_fix_features(struct net_device *netdev, u32 features)
+static netdev_features_t atlx_fix_features(struct net_device *netdev,
+	netdev_features_t features)
 {
 	/*
 	 * Since there is no support for separate rx/tx vlan accel
@@ -256,9 +258,10 @@ static u32 atlx_fix_features(struct net_device *netdev, u32 features)
 	return features;
 }
 
-static int atlx_set_features(struct net_device *netdev, u32 features)
+static int atlx_set_features(struct net_device *netdev,
+	netdev_features_t features)
 {
-	u32 changed = netdev->features ^ features;
+	netdev_features_t changed = netdev->features ^ features;
 
 	if (changed & NETIF_F_HW_VLAN_RX)
 		atlx_vlan_mode(netdev, features);
diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index 32d1f92a2479..7203f37d2ef3 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -7571,8 +7571,8 @@ bnx2_set_phys_id(struct net_device *dev, enum ethtool_phys_id_state state)
 	return 0;
 }
 
-static u32
-bnx2_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t
+bnx2_fix_features(struct net_device *dev, netdev_features_t features)
 {
 	struct bnx2 *bp = netdev_priv(dev);
 
@@ -7583,7 +7583,7 @@ bnx2_fix_features(struct net_device *dev, u32 features)
 }
 
 static int
-bnx2_set_features(struct net_device *dev, u32 features)
+bnx2_set_features(struct net_device *dev, netdev_features_t features)
 {
 	struct bnx2 *bp = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 0d60b9e633ad..8336c784db49 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -3398,7 +3398,8 @@ int bnx2x_change_mtu(struct net_device *dev, int new_mtu)
 	return bnx2x_reload_if_running(dev);
 }
 
-u32 bnx2x_fix_features(struct net_device *dev, u32 features)
+netdev_features_t bnx2x_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct bnx2x *bp = netdev_priv(dev);
 
@@ -3409,7 +3410,7 @@ u32 bnx2x_fix_features(struct net_device *dev, u32 features)
 	return features;
 }
 
-int bnx2x_set_features(struct net_device *dev, u32 features)
+int bnx2x_set_features(struct net_device *dev, netdev_features_t features)
 {
 	struct bnx2x *bp = netdev_priv(dev);
 	u32 flags = bp->flags;
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index 41eb17e7720f..80c5ed08e419 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -533,8 +533,9 @@ int bnx2x_change_mtu(struct net_device *dev, int new_mtu);
  */
 int bnx2x_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type);
 #endif
-u32 bnx2x_fix_features(struct net_device *dev, u32 features);
-int bnx2x_set_features(struct net_device *dev, u32 features);
+netdev_features_t bnx2x_fix_features(struct net_device *dev,
+	netdev_features_t features);
+int bnx2x_set_features(struct net_device *dev, netdev_features_t features);
 
 /**
  * bnx2x_tx_timeout - tx timeout netdev callback
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index cd3623416a4e..365cd47e2298 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -6968,7 +6968,7 @@ static int tg3_phy_lpbk_set(struct tg3 *tp, u32 speed, bool extlpbk)
 	return 0;
 }
 
-static void tg3_set_loopback(struct net_device *dev, u32 features)
+static void tg3_set_loopback(struct net_device *dev, netdev_features_t features)
 {
 	struct tg3 *tp = netdev_priv(dev);
 
@@ -6994,7 +6994,8 @@ static void tg3_set_loopback(struct net_device *dev, u32 features)
 	}
 }
 
-static u32 tg3_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t tg3_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct tg3 *tp = netdev_priv(dev);
 
@@ -7004,9 +7005,9 @@ static u32 tg3_fix_features(struct net_device *dev, u32 features)
 	return features;
 }
 
-static int tg3_set_features(struct net_device *dev, u32 features)
+static int tg3_set_features(struct net_device *dev, netdev_features_t features)
 {
-	u32 changed = dev->features ^ features;
+	netdev_features_t changed = dev->features ^ features;
 
 	if ((changed & NETIF_F_LOOPBACK) && netif_running(dev))
 		tg3_set_loopback(dev, features);
@@ -15313,7 +15314,7 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
 	u32 sndmbx, rcvmbx, intmbx;
 	char str[40];
 	u64 dma_mask, persist_dma_mask;
-	u32 features = 0;
+	netdev_features_t features = 0;
 
 	printk_once(KERN_INFO "%s\n", version);
 
diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
index 26d0fd2d9c9d..a971796b2262 100644
--- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
+++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
@@ -850,7 +850,8 @@ static int t1_set_mac_addr(struct net_device *dev, void *p)
 	return 0;
 }
 
-static u32 t1_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t t1_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	/*
 	 * Since there is no support for separate rx/tx vlan accel
@@ -864,9 +865,9 @@ static u32 t1_fix_features(struct net_device *dev, u32 features)
 	return features;
 }
 
-static int t1_set_features(struct net_device *dev, u32 features)
+static int t1_set_features(struct net_device *dev, netdev_features_t features)
 {
-	u32 changed = dev->features ^ features;
+	netdev_features_t changed = dev->features ^ features;
 	struct adapter *adapter = dev->ml_priv;
 
 	if (changed & NETIF_F_HW_VLAN_RX)
diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c
index f9b602300040..47a84359d4e4 100644
--- a/drivers/net/ethernet/chelsio/cxgb/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
@@ -742,7 +742,7 @@ static inline void setup_ring_params(struct adapter *adapter, u64 addr,
 /*
  * Enable/disable VLAN acceleration.
  */
-void t1_vlan_mode(struct adapter *adapter, u32 features)
+void t1_vlan_mode(struct adapter *adapter, netdev_features_t features)
 {
 	struct sge *sge = adapter->sge;
 
diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.h b/drivers/net/ethernet/chelsio/cxgb/sge.h
index e03980bcdd65..b9bf16b385f7 100644
--- a/drivers/net/ethernet/chelsio/cxgb/sge.h
+++ b/drivers/net/ethernet/chelsio/cxgb/sge.h
@@ -79,7 +79,7 @@ irqreturn_t t1_interrupt(int irq, void *cookie);
 int t1_poll(struct napi_struct *, int);
 
 netdev_tx_t t1_start_xmit(struct sk_buff *skb, struct net_device *dev);
-void t1_vlan_mode(struct adapter *adapter, u32 features);
+void t1_vlan_mode(struct adapter *adapter, netdev_features_t features);
 void t1_sge_start(struct sge *);
 void t1_sge_stop(struct sge *);
 int t1_sge_intr_error_handler(struct sge *);
diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
index 053560da6347..63ffaa7e255f 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
@@ -2532,7 +2532,7 @@ static void t3_synchronize_rx(struct adapter *adap, const struct port_info *p)
 	}
 }
 
-static void cxgb_vlan_mode(struct net_device *dev, u32 features)
+static void cxgb_vlan_mode(struct net_device *dev, netdev_features_t features)
 {
 	struct port_info *pi = netdev_priv(dev);
 	struct adapter *adapter = pi->adapter;
@@ -2553,7 +2553,8 @@ static void cxgb_vlan_mode(struct net_device *dev, u32 features)
 	t3_synchronize_rx(adapter, pi);
 }
 
-static u32 cxgb_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t cxgb_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	/*
 	 * Since there is no support for separate rx/tx vlan accel
@@ -2567,9 +2568,9 @@ static u32 cxgb_fix_features(struct net_device *dev, u32 features)
 	return features;
 }
 
-static int cxgb_set_features(struct net_device *dev, u32 features)
+static int cxgb_set_features(struct net_device *dev, netdev_features_t features)
 {
-	u32 changed = dev->features ^ features;
+	netdev_features_t changed = dev->features ^ features;
 
 	if (changed & NETIF_F_HW_VLAN_RX)
 		cxgb_vlan_mode(dev, features);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 48ffe11d9aa9..fd6d460ea475 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -1856,10 +1856,10 @@ static int set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 	return err;
 }
 
-static int cxgb_set_features(struct net_device *dev, u32 features)
+static int cxgb_set_features(struct net_device *dev, netdev_features_t features)
 {
 	const struct port_info *pi = netdev_priv(dev);
-	u32 changed = dev->features ^ features;
+	netdev_features_t changed = dev->features ^ features;
 	int err;
 
 	if (!(changed & NETIF_F_HW_VLAN_RX))
@@ -3538,7 +3538,7 @@ static int __devinit init_one(struct pci_dev *pdev,
 {
 	int func, i, err;
 	struct port_info *pi;
-	unsigned int highdma = 0;
+	bool highdma = false;
 	struct adapter *adapter = NULL;
 
 	printk_once(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION);
@@ -3564,7 +3564,7 @@ static int __devinit init_one(struct pci_dev *pdev,
 	}
 
 	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64))) {
-		highdma = NETIF_F_HIGHDMA;
+		highdma = true;
 		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
 		if (err) {
 			dev_err(&pdev->dev, "unable to obtain 64-bit DMA for "
@@ -3638,7 +3638,9 @@ static int __devinit init_one(struct pci_dev *pdev,
 			NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
 			NETIF_F_RXCSUM | NETIF_F_RXHASH |
 			NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
-		netdev->features |= netdev->hw_features | highdma;
+		if (highdma)
+			netdev->hw_features |= NETIF_F_HIGHDMA;
+		netdev->features |= netdev->hw_features;
 		netdev->vlan_features = netdev->features & VLAN_FEAT;
 
 		netdev->priv_flags |= IFF_UNICAST_FLT;
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index ee81d8e798ea..8155cfecae19 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -1092,7 +1092,8 @@ static int cxgb4vf_change_mtu(struct net_device *dev, int new_mtu)
 	return ret;
 }
 
-static u32 cxgb4vf_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t cxgb4vf_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	/*
 	 * Since there is no support for separate rx/tx vlan accel
@@ -1106,10 +1107,11 @@ static u32 cxgb4vf_fix_features(struct net_device *dev, u32 features)
 	return features;
 }
 
-static int cxgb4vf_set_features(struct net_device *dev, u32 features)
+static int cxgb4vf_set_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct port_info *pi = netdev_priv(dev);
-	u32 changed = dev->features ^ features;
+	netdev_features_t changed = dev->features ^ features;
 
 	if (changed & NETIF_F_HW_VLAN_RX)
 		t4vf_set_rxmode(pi->adapter, pi->viid, -1, -1, -1, -1,
diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index 438f4580bf66..26be1dfc1577 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -474,10 +474,11 @@ static int dm9000_nway_reset(struct net_device *dev)
 	return mii_nway_restart(&dm->mii);
 }
 
-static int dm9000_set_features(struct net_device *dev, u32 features)
+static int dm9000_set_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	board_info_t *dm = to_dm9000_board(dev);
-	u32 changed = dev->features ^ features;
+	netdev_features_t changed = dev->features ^ features;
 	unsigned long flags;
 
 	if (!(changed & NETIF_F_RXCSUM))
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 83199fd0d62b..ff3e8b0f0da3 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -2306,7 +2306,7 @@ void gfar_check_rx_parser_mode(struct gfar_private *priv)
 }
 
 /* Enables and disables VLAN insertion/extraction */
-void gfar_vlan_mode(struct net_device *dev, u32 features)
+void gfar_vlan_mode(struct net_device *dev, netdev_features_t features)
 {
 	struct gfar_private *priv = netdev_priv(dev);
 	struct gfar __iomem *regs = NULL;
diff --git a/drivers/net/ethernet/freescale/gianfar.h b/drivers/net/ethernet/freescale/gianfar.h
index 9aa43773e8e3..cda6cb2eb1d2 100644
--- a/drivers/net/ethernet/freescale/gianfar.h
+++ b/drivers/net/ethernet/freescale/gianfar.h
@@ -1179,9 +1179,9 @@ extern void gfar_phy_test(struct mii_bus *bus, struct phy_device *phydev,
 extern void gfar_configure_coalescing(struct gfar_private *priv,
 		unsigned long tx_mask, unsigned long rx_mask);
 void gfar_init_sysfs(struct net_device *dev);
-int gfar_set_features(struct net_device *dev, u32 features);
+int gfar_set_features(struct net_device *dev, netdev_features_t features);
 extern void gfar_check_rx_parser_mode(struct gfar_private *priv);
-extern void gfar_vlan_mode(struct net_device *dev, u32 features);
+extern void gfar_vlan_mode(struct net_device *dev, netdev_features_t features);
 
 extern const struct ethtool_ops gfar_ethtool_ops;
 
diff --git a/drivers/net/ethernet/freescale/gianfar_ethtool.c b/drivers/net/ethernet/freescale/gianfar_ethtool.c
index 212736bab6bb..1ea0eb9ee643 100644
--- a/drivers/net/ethernet/freescale/gianfar_ethtool.c
+++ b/drivers/net/ethernet/freescale/gianfar_ethtool.c
@@ -519,12 +519,12 @@ static int gfar_sringparam(struct net_device *dev, struct ethtool_ringparam *rva
 	return err;
 }
 
-int gfar_set_features(struct net_device *dev, u32 features)
+int gfar_set_features(struct net_device *dev, netdev_features_t features)
 {
 	struct gfar_private *priv = netdev_priv(dev);
 	unsigned long flags;
 	int err = 0, i = 0;
-	u32 changed = dev->features ^ features;
+	netdev_features_t changed = dev->features ^ features;
 
 	if (changed & (NETIF_F_HW_VLAN_TX|NETIF_F_HW_VLAN_RX))
 		gfar_vlan_mode(dev, features);
diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index b1cd41b9c61c..e877371680a9 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -735,7 +735,8 @@ static void netdev_get_drvinfo(struct net_device *dev,
 		sizeof(info->version) - 1);
 }
 
-static u32 ibmveth_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t ibmveth_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	/*
 	 * Since the ibmveth firmware interface does not have the
@@ -838,7 +839,8 @@ static int ibmveth_set_csum_offload(struct net_device *dev, u32 data)
 	return rc1 ? rc1 : rc2;
 }
 
-static int ibmveth_set_features(struct net_device *dev, u32 features)
+static int ibmveth_set_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct ibmveth_adapter *adapter = netdev_priv(dev);
 	int rx_csum = !!(features & NETIF_F_RXCSUM);
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index cf480b554622..82f4ef142259 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -167,7 +167,8 @@ static int e1000_82547_fifo_workaround(struct e1000_adapter *adapter,
                                        struct sk_buff *skb);
 
 static bool e1000_vlan_used(struct e1000_adapter *adapter);
-static void e1000_vlan_mode(struct net_device *netdev, u32 features);
+static void e1000_vlan_mode(struct net_device *netdev,
+			    netdev_features_t features);
 static void e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid);
 static void e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid);
 static void e1000_restore_vlan(struct e1000_adapter *adapter);
@@ -806,7 +807,8 @@ static int e1000_is_need_ioport(struct pci_dev *pdev)
 	}
 }
 
-static u32 e1000_fix_features(struct net_device *netdev, u32 features)
+static netdev_features_t e1000_fix_features(struct net_device *netdev,
+	netdev_features_t features)
 {
 	/*
 	 * Since there is no support for separate rx/tx vlan accel
@@ -820,10 +822,11 @@ static u32 e1000_fix_features(struct net_device *netdev, u32 features)
 	return features;
 }
 
-static int e1000_set_features(struct net_device *netdev, u32 features)
+static int e1000_set_features(struct net_device *netdev,
+	netdev_features_t features)
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
-	u32 changed = features ^ netdev->features;
+	netdev_features_t changed = features ^ netdev->features;
 
 	if (changed & NETIF_F_HW_VLAN_RX)
 		e1000_vlan_mode(netdev, features);
@@ -4577,7 +4580,8 @@ static void e1000_vlan_filter_on_off(struct e1000_adapter *adapter,
 		e1000_irq_enable(adapter);
 }
 
-static void e1000_vlan_mode(struct net_device *netdev, u32 features)
+static void e1000_vlan_mode(struct net_device *netdev,
+	netdev_features_t features)
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index a855db1ad249..d85fac626a80 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -5859,10 +5859,11 @@ static void e1000_eeprom_checks(struct e1000_adapter *adapter)
 	}
 }
 
-static int e1000_set_features(struct net_device *netdev, u32 features)
+static int e1000_set_features(struct net_device *netdev,
+	netdev_features_t features)
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
-	u32 changed = features ^ netdev->features;
+	netdev_features_t changed = features ^ netdev->features;
 
 	if (changed & (NETIF_F_TSO | NETIF_F_TSO6))
 		adapter->flags |= FLAG_TSO_FORCE;
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index ced544499f1b..1fcba22c6403 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -145,7 +145,7 @@ static bool igb_clean_rx_irq(struct igb_q_vector *, int);
 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
 static void igb_tx_timeout(struct net_device *);
 static void igb_reset_task(struct work_struct *);
-static void igb_vlan_mode(struct net_device *netdev, u32 features);
+static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
 static void igb_vlan_rx_add_vid(struct net_device *, u16);
 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
 static void igb_restore_vlan(struct igb_adapter *);
@@ -1742,7 +1742,8 @@ void igb_reset(struct igb_adapter *adapter)
 	igb_get_phy_info(hw);
 }
 
-static u32 igb_fix_features(struct net_device *netdev, u32 features)
+static netdev_features_t igb_fix_features(struct net_device *netdev,
+	netdev_features_t features)
 {
 	/*
 	 * Since there is no support for separate rx/tx vlan accel
@@ -1756,9 +1757,10 @@ static u32 igb_fix_features(struct net_device *netdev, u32 features)
 	return features;
 }
 
-static int igb_set_features(struct net_device *netdev, u32 features)
+static int igb_set_features(struct net_device *netdev,
+	netdev_features_t features)
 {
-	u32 changed = netdev->features ^ features;
+	netdev_features_t changed = netdev->features ^ features;
 
 	if (changed & NETIF_F_HW_VLAN_RX)
 		igb_vlan_mode(netdev, features);
@@ -6467,7 +6469,7 @@ s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
 	return 0;
 }
 
-static void igb_vlan_mode(struct net_device *netdev, u32 features)
+static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index cca78124be31..2a05658938bd 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -2532,7 +2532,8 @@ static void igbvf_print_device_info(struct igbvf_adapter *adapter)
 	dev_info(&pdev->dev, "Address: %pM\n", netdev->dev_addr);
 }
 
-static int igbvf_set_features(struct net_device *netdev, u32 features)
+static int igbvf_set_features(struct net_device *netdev,
+	netdev_features_t features)
 {
 	struct igbvf_adapter *adapter = netdev_priv(netdev);
 
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
index e21148f8b160..247cf9219e03 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
@@ -325,8 +325,8 @@ ixgb_reset(struct ixgb_adapter *adapter)
 	}
 }
 
-static u32
-ixgb_fix_features(struct net_device *netdev, u32 features)
+static netdev_features_t
+ixgb_fix_features(struct net_device *netdev, netdev_features_t features)
 {
 	/*
 	 * Tx VLAN insertion does not work per HW design when Rx stripping is
@@ -339,10 +339,10 @@ ixgb_fix_features(struct net_device *netdev, u32 features)
 }
 
 static int
-ixgb_set_features(struct net_device *netdev, u32 features)
+ixgb_set_features(struct net_device *netdev, netdev_features_t features)
 {
 	struct ixgb_adapter *adapter = netdev_priv(netdev);
-	u32 changed = features ^ netdev->features;
+	netdev_features_t changed = features ^ netdev->features;
 
 	if (!(changed & (NETIF_F_RXCSUM|NETIF_F_HW_VLAN_RX)))
 		return 0;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 8ef92d1a6aa1..820fc040c241 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7174,7 +7174,8 @@ void ixgbe_do_reset(struct net_device *netdev)
 		ixgbe_reset(adapter);
 }
 
-static u32 ixgbe_fix_features(struct net_device *netdev, u32 data)
+static netdev_features_t ixgbe_fix_features(struct net_device *netdev,
+	netdev_features_t data)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 
@@ -7204,7 +7205,8 @@ static u32 ixgbe_fix_features(struct net_device *netdev, u32 data)
 	return data;
 }
 
-static int ixgbe_set_features(struct net_device *netdev, u32 data)
+static int ixgbe_set_features(struct net_device *netdev,
+	netdev_features_t data)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	bool need_reset = false;
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 4c8e19951d57..3e6ec088c50d 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -3249,7 +3249,8 @@ static struct rtnl_link_stats64 *ixgbevf_get_stats(struct net_device *netdev,
 	return stats;
 }
 
-static int ixgbevf_set_features(struct net_device *netdev, u32 features)
+static int ixgbevf_set_features(struct net_device *netdev,
+	netdev_features_t features)
 {
 	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
 
diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
index 7d88c7c28a7c..df3ab831b1ad 100644
--- a/drivers/net/ethernet/jme.c
+++ b/drivers/net/ethernet/jme.c
@@ -1917,7 +1917,7 @@ jme_map_tx_skb(struct jme_adapter *jme, struct sk_buff *skb, int idx)
 	struct jme_ring *txring = &(jme->txring[0]);
 	struct txdesc *txdesc = txring->desc, *ctxdesc;
 	struct jme_buffer_info *txbi = txring->bufinf, *ctxbi;
-	u8 hidma = jme->dev->features & NETIF_F_HIGHDMA;
+	u8 hidma = !!(jme->dev->features & NETIF_F_HIGHDMA);
 	int i, nr_frags = skb_shinfo(skb)->nr_frags;
 	int mask = jme->tx_ring_mask;
 	const struct skb_frag_struct *frag;
@@ -2620,8 +2620,8 @@ jme_set_msglevel(struct net_device *netdev, u32 value)
 	jme->msg_enable = value;
 }
 
-static u32
-jme_fix_features(struct net_device *netdev, u32 features)
+static netdev_features_t
+jme_fix_features(struct net_device *netdev, netdev_features_t features)
 {
 	if (netdev->mtu > 1900)
 		features &= ~(NETIF_F_ALL_TSO | NETIF_F_ALL_CSUM);
@@ -2629,7 +2629,7 @@ jme_fix_features(struct net_device *netdev, u32 features)
 }
 
 static int
-jme_set_features(struct net_device *netdev, u32 features)
+jme_set_features(struct net_device *netdev, netdev_features_t features)
 {
 	struct jme_adapter *jme = netdev_priv(netdev);
 
diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index f6b4304ca459..157c5c17fdcc 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -1579,10 +1579,10 @@ mv643xx_eth_set_ringparam(struct net_device *dev, struct ethtool_ringparam *er)
 
 
 static int
-mv643xx_eth_set_features(struct net_device *dev, u32 features)
+mv643xx_eth_set_features(struct net_device *dev, netdev_features_t features)
 {
 	struct mv643xx_eth_private *mp = netdev_priv(dev);
-	u32 rx_csum = features & NETIF_F_RXCSUM;
+	int rx_csum = !!(features & NETIF_F_RXCSUM);
 
 	wrlp(mp, PORT_CONFIG, rx_csum ? 0x02000000 : 0x00000000);
 
diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index 553d1a315b3a..c79dc5447658 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -1275,7 +1275,7 @@ static void rx_set_checksum(struct sky2_port *sky2)
 }
 
 /* Enable/disable receive hash calculation (RSS) */
-static void rx_set_rss(struct net_device *dev, u32 features)
+static void rx_set_rss(struct net_device *dev, netdev_features_t features)
 {
 	struct sky2_port *sky2 = netdev_priv(dev);
 	struct sky2_hw *hw = sky2->hw;
@@ -1396,7 +1396,7 @@ static int sky2_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 
 #define SKY2_VLAN_OFFLOADS (NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO)
 
-static void sky2_vlan_mode(struct net_device *dev, u32 features)
+static void sky2_vlan_mode(struct net_device *dev, netdev_features_t features)
 {
 	struct sky2_port *sky2 = netdev_priv(dev);
 	struct sky2_hw *hw = sky2->hw;
@@ -4282,7 +4282,8 @@ static int sky2_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom
 	return sky2_vpd_write(sky2->hw, cap, data, eeprom->offset, eeprom->len);
 }
 
-static u32 sky2_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t sky2_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	const struct sky2_port *sky2 = netdev_priv(dev);
 	const struct sky2_hw *hw = sky2->hw;
@@ -4306,13 +4307,13 @@ static u32 sky2_fix_features(struct net_device *dev, u32 features)
 	return features;
 }
 
-static int sky2_set_features(struct net_device *dev, u32 features)
+static int sky2_set_features(struct net_device *dev, netdev_features_t features)
 {
 	struct sky2_port *sky2 = netdev_priv(dev);
-	u32 changed = dev->features ^ features;
+	netdev_features_t changed = dev->features ^ features;
 
 	if (changed & NETIF_F_RXCSUM) {
-		u32 on = features & NETIF_F_RXCSUM;
+		int on = !!(features & NETIF_F_RXCSUM);
 		sky2_write32(sky2->hw, Q_ADDR(rxqaddr[sky2->port], Q_CSR),
 			     on ? BMU_ENA_RX_CHKSUM : BMU_DIS_RX_CHKSUM);
 	}
diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c
index 3b67fe65404a..8d846bd09711 100644
--- a/drivers/net/ethernet/micrel/ksz884x.c
+++ b/drivers/net/ethernet/micrel/ksz884x.c
@@ -6588,7 +6588,8 @@ static void netdev_get_ethtool_stats(struct net_device *dev,
  *
  * Return 0 if successful; otherwise an error code.
  */
-static int netdev_set_features(struct net_device *dev, u32 features)
+static int netdev_set_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct dev_priv *priv = netdev_priv(dev);
 	struct dev_info *hw_priv = priv->adapter;
diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index 0778edcf7b9a..20b72ecb020a 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -1491,7 +1491,7 @@ myri10ge_clean_rx_done(struct myri10ge_slice_state *ss, int budget)
 	 * access to avoid theoretical race condition with functions that
 	 * change NETIF_F_LRO flag at runtime.
 	 */
-	bool lro_enabled = ACCESS_ONCE(mgp->dev->features) & NETIF_F_LRO;
+	bool lro_enabled = !!(ACCESS_ONCE(mgp->dev->features) & NETIF_F_LRO);
 
 	while (rx_done->entry[idx].length != 0 && work_done < budget) {
 		length = ntohs(rx_done->entry[idx].length);
@@ -3149,7 +3149,8 @@ static int myri10ge_set_mac_address(struct net_device *dev, void *addr)
 	return 0;
 }
 
-static u32 myri10ge_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t myri10ge_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	if (!(features & NETIF_F_RXCSUM))
 		features &= ~NETIF_F_LRO;
diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
index e6c90a5ac5d4..76ae47627200 100644
--- a/drivers/net/ethernet/neterion/s2io.c
+++ b/drivers/net/ethernet/neterion/s2io.c
@@ -6616,10 +6616,10 @@ static void s2io_ethtool_get_strings(struct net_device *dev,
 	}
 }
 
-static int s2io_set_features(struct net_device *dev, u32 features)
+static int s2io_set_features(struct net_device *dev, netdev_features_t features)
 {
 	struct s2io_nic *sp = netdev_priv(dev);
-	u32 changed = (features ^ dev->features) & NETIF_F_LRO;
+	netdev_features_t changed = (features ^ dev->features) & NETIF_F_LRO;
 
 	if (changed && netif_running(dev)) {
 		int rc;
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c
index a83197d757c1..16d4d8e913c3 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c
@@ -2662,9 +2662,10 @@ static void vxge_poll_vp_lockup(unsigned long data)
 	mod_timer(&vdev->vp_lockup_timer, jiffies + HZ / 1000);
 }
 
-static u32 vxge_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t vxge_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
-	u32 changed = dev->features ^ features;
+	netdev_features_t changed = dev->features ^ features;
 
 	/* Enabling RTH requires some of the logic in vxge_device_register and a
 	 * vpath reset.  Due to these restrictions, only allow modification
@@ -2676,10 +2677,10 @@ static u32 vxge_fix_features(struct net_device *dev, u32 features)
 	return features;
 }
 
-static int vxge_set_features(struct net_device *dev, u32 features)
+static int vxge_set_features(struct net_device *dev, netdev_features_t features)
 {
 	struct vxgedev *vdev = netdev_priv(dev);
-	u32 changed = dev->features ^ features;
+	netdev_features_t changed = dev->features ^ features;
 
 	if (!(changed & NETIF_F_RXHASH))
 		return 0;
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index e8a5ae356407..01bb7bfe14e6 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -4536,7 +4536,7 @@ static int nv_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam*
 	return 0;
 }
 
-static int nv_set_loopback(struct net_device *dev, u32 features)
+static int nv_set_loopback(struct net_device *dev, netdev_features_t features)
 {
 	struct fe_priv *np = netdev_priv(dev);
 	unsigned long flags;
@@ -4591,7 +4591,8 @@ static int nv_set_loopback(struct net_device *dev, u32 features)
 	return retval;
 }
 
-static u32 nv_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t nv_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	/* vlan is dependent on rx checksum offload */
 	if (features & (NETIF_F_HW_VLAN_TX|NETIF_F_HW_VLAN_RX))
@@ -4600,7 +4601,7 @@ static u32 nv_fix_features(struct net_device *dev, u32 features)
 	return features;
 }
 
-static void nv_vlan_mode(struct net_device *dev, u32 features)
+static void nv_vlan_mode(struct net_device *dev, netdev_features_t features)
 {
 	struct fe_priv *np = get_nvpriv(dev);
 
@@ -4621,11 +4622,11 @@ static void nv_vlan_mode(struct net_device *dev, u32 features)
 	spin_unlock_irq(&np->lock);
 }
 
-static int nv_set_features(struct net_device *dev, u32 features)
+static int nv_set_features(struct net_device *dev, netdev_features_t features)
 {
 	struct fe_priv *np = netdev_priv(dev);
 	u8 __iomem *base = get_hwbase(dev);
-	u32 changed = dev->features ^ features;
+	netdev_features_t changed = dev->features ^ features;
 	int retval;
 
 	if ((changed & NETIF_F_LOOPBACK) && netif_running(dev)) {
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
index 48406ca382f1..964e9c0948bc 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
@@ -2109,10 +2109,11 @@ static int pch_gbe_change_mtu(struct net_device *netdev, int new_mtu)
  * Returns
  *	0:		HW state updated successfully
  */
-static int pch_gbe_set_features(struct net_device *netdev, u32 features)
+static int pch_gbe_set_features(struct net_device *netdev,
+	netdev_features_t features)
 {
 	struct pch_gbe_adapter *adapter = netdev_priv(netdev);
-	u32 changed = features ^ netdev->features;
+	netdev_features_t changed = features ^ netdev->features;
 
 	if (!(changed & NETIF_F_RXCSUM))
 		return 0;
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index 8cf3173ba488..7dd9a4b107e6 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -544,7 +544,8 @@ static void netxen_set_multicast_list(struct net_device *dev)
 	adapter->set_multi(dev);
 }
 
-static u32 netxen_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t netxen_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	if (!(features & NETIF_F_RXCSUM)) {
 		netdev_info(dev, "disabling LRO as RXCSUM is off\n");
@@ -555,7 +556,8 @@ static u32 netxen_fix_features(struct net_device *dev, u32 features)
 	return features;
 }
 
-static int netxen_set_features(struct net_device *dev, u32 features)
+static int netxen_set_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct netxen_adapter *adapter = netdev_priv(dev);
 	int hw_lro;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index 7ed53dbb8646..60976fc4ccc6 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
@@ -1466,8 +1466,9 @@ void qlcnic_advert_link_change(struct qlcnic_adapter *adapter, int linkup);
 
 int qlcnic_fw_cmd_set_mtu(struct qlcnic_adapter *adapter, int mtu);
 int qlcnic_change_mtu(struct net_device *netdev, int new_mtu);
-u32 qlcnic_fix_features(struct net_device *netdev, u32 features);
-int qlcnic_set_features(struct net_device *netdev, u32 features);
+netdev_features_t qlcnic_fix_features(struct net_device *netdev,
+	netdev_features_t features);
+int qlcnic_set_features(struct net_device *netdev, netdev_features_t features);
 int qlcnic_config_hw_lro(struct qlcnic_adapter *adapter, int enable);
 int qlcnic_config_bridged_mode(struct qlcnic_adapter *adapter, u32 enable);
 int qlcnic_send_lro_cleanup(struct qlcnic_adapter *adapter);
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
index bcb81e47543a..b528e52a8ee1 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
@@ -817,12 +817,13 @@ int qlcnic_change_mtu(struct net_device *netdev, int mtu)
 }
 
 
-u32 qlcnic_fix_features(struct net_device *netdev, u32 features)
+netdev_features_t qlcnic_fix_features(struct net_device *netdev,
+	netdev_features_t features)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 
 	if ((adapter->flags & QLCNIC_ESWITCH_ENABLED)) {
-		u32 changed = features ^ netdev->features;
+		netdev_features_t changed = features ^ netdev->features;
 		features ^= changed & (NETIF_F_ALL_CSUM | NETIF_F_RXCSUM);
 	}
 
@@ -833,10 +834,10 @@ u32 qlcnic_fix_features(struct net_device *netdev, u32 features)
 }
 
 
-int qlcnic_set_features(struct net_device *netdev, u32 features)
+int qlcnic_set_features(struct net_device *netdev, netdev_features_t features)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
-	u32 changed = netdev->features ^ features;
+	netdev_features_t changed = netdev->features ^ features;
 	int hw_lro = (features & NETIF_F_LRO) ? QLCNIC_LRO_ENABLED : 0;
 
 	if (!(changed & NETIF_F_LRO))
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 0bd163828e33..823f845ddc04 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -792,7 +792,7 @@ qlcnic_set_netdev_features(struct qlcnic_adapter *adapter,
 		struct qlcnic_esw_func_cfg *esw_cfg)
 {
 	struct net_device *netdev = adapter->netdev;
-	unsigned long features, vlan_features;
+	netdev_features_t features, vlan_features;
 
 	features = (NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
 			NETIF_F_IPV6_CSUM | NETIF_F_GRO);
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
index c92afcd912e2..1ce4e08037b8 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
@@ -2307,7 +2307,7 @@ static int ql_napi_poll_msix(struct napi_struct *napi, int budget)
 	return work_done;
 }
 
-static void qlge_vlan_mode(struct net_device *ndev, u32 features)
+static void qlge_vlan_mode(struct net_device *ndev, netdev_features_t features)
 {
 	struct ql_adapter *qdev = netdev_priv(ndev);
 
@@ -2323,7 +2323,8 @@ static void qlge_vlan_mode(struct net_device *ndev, u32 features)
 	}
 }
 
-static u32 qlge_fix_features(struct net_device *ndev, u32 features)
+static netdev_features_t qlge_fix_features(struct net_device *ndev,
+	netdev_features_t features)
 {
 	/*
 	 * Since there is no support for separate rx/tx vlan accel
@@ -2337,9 +2338,10 @@ static u32 qlge_fix_features(struct net_device *ndev, u32 features)
 	return features;
 }
 
-static int qlge_set_features(struct net_device *ndev, u32 features)
+static int qlge_set_features(struct net_device *ndev,
+	netdev_features_t features)
 {
-	u32 changed = ndev->features ^ features;
+	netdev_features_t changed = ndev->features ^ features;
 
 	if (changed & NETIF_F_HW_VLAN_RX)
 		qlge_vlan_mode(ndev, features);
diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c
index 6cfc5dc0f76e..87cff10f7be7 100644
--- a/drivers/net/ethernet/realtek/8139cp.c
+++ b/drivers/net/ethernet/realtek/8139cp.c
@@ -1392,7 +1392,7 @@ static void cp_set_msglevel(struct net_device *dev, u32 value)
 	cp->msg_enable = value;
 }
 
-static int cp_set_features(struct net_device *dev, u32 features)
+static int cp_set_features(struct net_device *dev, netdev_features_t features)
 {
 	struct cp_private *cp = netdev_priv(dev);
 	unsigned long flags;
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index cdf66d68d849..2dfb0c0ea01b 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -1553,7 +1553,8 @@ static int rtl8169_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 	return ret;
 }
 
-static u32 rtl8169_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t rtl8169_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
 
@@ -1567,7 +1568,8 @@ static u32 rtl8169_fix_features(struct net_device *dev, u32 features)
 	return features;
 }
 
-static int rtl8169_set_features(struct net_device *dev, u32 features)
+static int rtl8169_set_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
 	void __iomem *ioaddr = tp->mmio_addr;
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index d5731f1fe6d6..14e134d3b4d7 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -1900,7 +1900,7 @@ static void efx_set_multicast_list(struct net_device *net_dev)
 	/* Otherwise efx_start_port() will do this */
 }
 
-static int efx_set_features(struct net_device *net_dev, u32 data)
+static int efx_set_features(struct net_device *net_dev, netdev_features_t data)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
 
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index b8e251a1ee48..c49502bab6a3 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -908,7 +908,7 @@ struct efx_nic_type {
 	unsigned int phys_addr_channels;
 	unsigned int tx_dc_base;
 	unsigned int rx_dc_base;
-	u32 offload_features;
+	netdev_features_t offload_features;
 };
 
 /**************************************************************************
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 20546bbbb8db..643ca97a2d9a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -1419,7 +1419,8 @@ static int stmmac_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
-static u32 stmmac_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t stmmac_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
 
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 8592523b0bb5..3dd13d606d00 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -123,7 +123,7 @@ struct tun_struct {
 	gid_t			group;
 
 	struct net_device	*dev;
-	u32			set_features;
+	netdev_features_t	set_features;
 #define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \
 			  NETIF_F_TSO6|NETIF_F_UFO)
 	struct fasync_struct	*fasync;
@@ -454,7 +454,8 @@ tun_net_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
-static u32 tun_net_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t tun_net_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct tun_struct *tun = netdev_priv(dev);
 
@@ -1196,7 +1197,7 @@ static int tun_get_iff(struct net *net, struct tun_struct *tun,
  * privs required. */
 static int set_offload(struct tun_struct *tun, unsigned long arg)
 {
-	u32 features = 0;
+	netdev_features_t features = 0;
 
 	if (arg & TUN_F_CSUM) {
 		features |= NETIF_F_HW_CSUM;
diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index a5b9b12ef268..7d62c39f65cf 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -728,7 +728,8 @@ static int smsc75xx_change_mtu(struct net_device *netdev, int new_mtu)
 }
 
 /* Enable or disable Rx checksum offload engine */
-static int smsc75xx_set_features(struct net_device *netdev, u32 features)
+static int smsc75xx_set_features(struct net_device *netdev,
+	netdev_features_t features)
 {
 	struct usbnet *dev = netdev_priv(netdev);
 	struct smsc75xx_priv *pdata = (struct smsc75xx_priv *)(dev->data[0]);
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index eff67678c5a6..56f3894d701a 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -516,7 +516,8 @@ static void smsc95xx_status(struct usbnet *dev, struct urb *urb)
 }
 
 /* Enable or disable Tx & Rx checksum offload engines */
-static int smsc95xx_set_features(struct net_device *netdev, u32 features)
+static int smsc95xx_set_features(struct net_device *netdev,
+	netdev_features_t features)
 {
 	struct usbnet *dev = netdev_priv(netdev);
 	u32 read_buf;
diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c
index e662cbc8bfbd..77f723415c9c 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethtool.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c
@@ -262,11 +262,11 @@ vmxnet3_get_strings(struct net_device *netdev, u32 stringset, u8 *buf)
 	}
 }
 
-int vmxnet3_set_features(struct net_device *netdev, u32 features)
+int vmxnet3_set_features(struct net_device *netdev, netdev_features_t features)
 {
 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
 	unsigned long flags;
-	u32 changed = features ^ netdev->features;
+	netdev_features_t changed = features ^ netdev->features;
 
 	if (changed & (NETIF_F_RXCSUM | NETIF_F_LRO | NETIF_F_HW_VLAN_RX)) {
 		if (features & NETIF_F_RXCSUM)
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index b18eac1dccaa..ed54797db191 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -401,7 +401,7 @@ void
 vmxnet3_rq_destroy_all(struct vmxnet3_adapter *adapter);
 
 int
-vmxnet3_set_features(struct net_device *netdev, u32 features);
+vmxnet3_set_features(struct net_device *netdev, netdev_features_t features);
 
 int
 vmxnet3_create_queues(struct vmxnet3_adapter *adapter,
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 182562952c79..0b5c18feb303 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -165,7 +165,8 @@ static int xenvif_change_mtu(struct net_device *dev, int mtu)
 	return 0;
 }
 
-static u32 xenvif_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t xenvif_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct xenvif *vif = netdev_priv(dev);
 
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 226faab23603..a6e379fbf377 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -203,7 +203,7 @@ static void xennet_sysfs_delif(struct net_device *netdev);
 
 static int xennet_can_sg(struct net_device *dev)
 {
-	return dev->features & NETIF_F_SG;
+	return !!(dev->features & NETIF_F_SG);
 }
 
 
@@ -1190,7 +1190,8 @@ static void xennet_uninit(struct net_device *dev)
 	gnttab_free_grant_references(np->gref_rx_head);
 }
 
-static u32 xennet_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t xennet_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct netfront_info *np = netdev_priv(dev);
 	int val;
@@ -1216,7 +1217,8 @@ static u32 xennet_fix_features(struct net_device *dev, u32 features)
 	return features;
 }
 
-static int xennet_set_features(struct net_device *dev, u32 features)
+static int xennet_set_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	if (!(features & NETIF_F_SG) && dev->mtu > ETH_DATA_LEN) {
 		netdev_info(dev, "Reducing MTU because no SG offload");
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index e4c1176ee25b..a64f9e789b0a 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -3202,7 +3202,8 @@ static int qeth_l3_stop(struct net_device *dev)
 	return 0;
 }
 
-static u32 qeth_l3_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t qeth_l3_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct qeth_card *card = dev->ml_priv;
 
@@ -3216,7 +3217,8 @@ static u32 qeth_l3_fix_features(struct net_device *dev, u32 features)
 	return features;
 }
 
-static int qeth_l3_set_features(struct net_device *dev, u32 features)
+static int qeth_l3_set_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct qeth_card *card = dev->ml_priv;
 	u32 changed = dev->features ^ features;
diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 32640edf4d78..af5238121826 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -10,6 +10,10 @@
 #ifndef _LINUX_NETDEV_FEATURES_H
 #define _LINUX_NETDEV_FEATURES_H
 
+#include <linux/types.h>
+
+typedef u32 netdev_features_t;
+
 /* Net device feature bits; if you change something,
  * also update netdev_features_strings[] in ethtool.c */
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9cf6e90b171d..b35ffd735ecc 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -847,12 +847,13 @@ struct netdev_tc_txq {
  *	Called to release previously enslaved netdev.
  *
  *      Feature/offload setting functions.
- * u32 (*ndo_fix_features)(struct net_device *dev, u32 features);
+ * netdev_features_t (*ndo_fix_features)(struct net_device *dev,
+ *		netdev_features_t features);
  *	Adjusts the requested feature flags according to device-specific
  *	constraints, and returns the resulting flags. Must not modify
  *	the device state.
  *
- * int (*ndo_set_features)(struct net_device *dev, u32 features);
+ * int (*ndo_set_features)(struct net_device *dev, netdev_features_t features);
  *	Called to update device configuration to new features. Passed
  *	feature set might be less than what was returned by ndo_fix_features()).
  *	Must return >0 or -errno if it changed dev->features itself.
@@ -946,10 +947,10 @@ struct net_device_ops {
 						 struct net_device *slave_dev);
 	int			(*ndo_del_slave)(struct net_device *dev,
 						 struct net_device *slave_dev);
-	u32			(*ndo_fix_features)(struct net_device *dev,
-						    u32 features);
+	netdev_features_t	(*ndo_fix_features)(struct net_device *dev,
+						    netdev_features_t features);
 	int			(*ndo_set_features)(struct net_device *dev,
-						    u32 features);
+						    netdev_features_t features);
 };
 
 /*
@@ -999,13 +1000,13 @@ struct net_device {
 	struct list_head	unreg_list;
 
 	/* currently active device features */
-	u32			features;
+	netdev_features_t	features;
 	/* user-changeable features */
-	u32			hw_features;
+	netdev_features_t	hw_features;
 	/* user-requested features */
-	u32			wanted_features;
+	netdev_features_t	wanted_features;
 	/* mask of features inheritable by VLAN devices */
-	u32			vlan_features;
+	netdev_features_t	vlan_features;
 
 	/* Interface index. Unique device identifier	*/
 	int			ifindex;
@@ -1439,7 +1440,7 @@ struct packet_type {
 					 struct packet_type *,
 					 struct net_device *);
 	struct sk_buff		*(*gso_segment)(struct sk_buff *skb,
-						u32 features);
+						netdev_features_t features);
 	int			(*gso_send_check)(struct sk_buff *skb);
 	struct sk_buff		**(*gro_receive)(struct sk_buff **head,
 					       struct sk_buff *skb);
@@ -2444,7 +2445,8 @@ extern int		netdev_set_master(struct net_device *dev, struct net_device *master)
 extern int netdev_set_bond_master(struct net_device *dev,
 				  struct net_device *master);
 extern int skb_checksum_help(struct sk_buff *skb);
-extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features);
+extern struct sk_buff *skb_gso_segment(struct sk_buff *skb,
+	netdev_features_t features);
 #ifdef CONFIG_BUG
 extern void netdev_rx_csum_fault(struct net_device *dev);
 #else
@@ -2471,11 +2473,13 @@ extern const char *netdev_drivername(const struct net_device *dev);
 
 extern void linkwatch_run_queue(void);
 
-static inline u32 netdev_get_wanted_features(struct net_device *dev)
+static inline netdev_features_t netdev_get_wanted_features(
+	struct net_device *dev)
 {
 	return (dev->features & ~dev->hw_features) | dev->wanted_features;
 }
-u32 netdev_increment_features(u32 all, u32 one, u32 mask);
+netdev_features_t netdev_increment_features(netdev_features_t all,
+	netdev_features_t one, netdev_features_t mask);
 int __netdev_update_features(struct net_device *dev);
 void netdev_update_features(struct net_device *dev);
 void netdev_change_features(struct net_device *dev);
@@ -2483,21 +2487,22 @@ void netdev_change_features(struct net_device *dev);
 void netif_stacked_transfer_operstate(const struct net_device *rootdev,
 					struct net_device *dev);
 
-u32 netif_skb_features(struct sk_buff *skb);
+netdev_features_t netif_skb_features(struct sk_buff *skb);
 
-static inline int net_gso_ok(u32 features, int gso_type)
+static inline int net_gso_ok(netdev_features_t features, int gso_type)
 {
-	int feature = gso_type << NETIF_F_GSO_SHIFT;
+	netdev_features_t feature = gso_type << NETIF_F_GSO_SHIFT;
 	return (features & feature) == feature;
 }
 
-static inline int skb_gso_ok(struct sk_buff *skb, u32 features)
+static inline int skb_gso_ok(struct sk_buff *skb, netdev_features_t features)
 {
 	return net_gso_ok(features, skb_shinfo(skb)->gso_type) &&
 	       (!skb_has_frag_list(skb) || (features & NETIF_F_FRAGLIST));
 }
 
-static inline int netif_needs_gso(struct sk_buff *skb, int features)
+static inline int netif_needs_gso(struct sk_buff *skb,
+	netdev_features_t features)
 {
 	return skb_is_gso(skb) && (!skb_gso_ok(skb, features) ||
 		unlikely(skb->ip_summed != CHECKSUM_PARTIAL));
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index abad8a0941e8..a10e487c0864 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -30,6 +30,7 @@
 #include <linux/dmaengine.h>
 #include <linux/hrtimer.h>
 #include <linux/dma-mapping.h>
+#include <linux/netdev_features.h>
 
 /* Don't change this without changing skb_csum_unnecessary! */
 #define CHECKSUM_NONE 0
@@ -2106,7 +2107,8 @@ extern void	       skb_split(struct sk_buff *skb,
 extern int	       skb_shift(struct sk_buff *tgt, struct sk_buff *skb,
 				 int shiftlen);
 
-extern struct sk_buff *skb_segment(struct sk_buff *skb, u32 features);
+extern struct sk_buff *skb_segment(struct sk_buff *skb,
+				   netdev_features_t features);
 
 static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
 				       int len, void *buffer)
diff --git a/include/net/protocol.h b/include/net/protocol.h
index 6f7eb800974a..e182e13d6391 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -38,7 +38,7 @@ struct net_protocol {
 	void			(*err_handler)(struct sk_buff *skb, u32 info);
 	int			(*gso_send_check)(struct sk_buff *skb);
 	struct sk_buff	       *(*gso_segment)(struct sk_buff *skb,
-					       u32 features);
+					       netdev_features_t features);
 	struct sk_buff	      **(*gro_receive)(struct sk_buff **head,
 					       struct sk_buff *skb);
 	int			(*gro_complete)(struct sk_buff *skb);
@@ -57,7 +57,7 @@ struct inet6_protocol {
 
 	int	(*gso_send_check)(struct sk_buff *skb);
 	struct sk_buff *(*gso_segment)(struct sk_buff *skb,
-				       u32 features);
+				       netdev_features_t features);
 	struct sk_buff **(*gro_receive)(struct sk_buff **head,
 					struct sk_buff *skb);
 	int	(*gro_complete)(struct sk_buff *skb);
diff --git a/include/net/sock.h b/include/net/sock.h
index 67cd4581b6da..1331008ad885 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -306,8 +306,8 @@ struct sock {
 	kmemcheck_bitfield_end(flags);
 	int			sk_wmem_queued;
 	gfp_t			sk_allocation;
-	int			sk_route_caps;
-	int			sk_route_nocaps;
+	netdev_features_t	sk_route_caps;
+	netdev_features_t	sk_route_nocaps;
 	int			sk_gso_type;
 	unsigned int		sk_gso_max_size;
 	int			sk_rcvlowat;
@@ -1393,7 +1393,7 @@ static inline int sk_can_gso(const struct sock *sk)
 
 extern void sk_setup_caps(struct sock *sk, struct dst_entry *dst);
 
-static inline void sk_nocaps_add(struct sock *sk, int flags)
+static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags)
 {
 	sk->sk_route_nocaps |= flags;
 	sk->sk_route_caps &= ~flags;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index bb18c4d69aba..113160b84588 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1430,7 +1430,8 @@ extern struct request_sock_ops tcp6_request_sock_ops;
 extern void tcp_v4_destroy_sock(struct sock *sk);
 
 extern int tcp_v4_gso_send_check(struct sk_buff *skb);
-extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features);
+extern struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
+				       netdev_features_t features);
 extern struct sk_buff **tcp_gro_receive(struct sk_buff **head,
 					struct sk_buff *skb);
 extern struct sk_buff **tcp4_gro_receive(struct sk_buff **head,
diff --git a/include/net/udp.h b/include/net/udp.h
index 3b285f402f48..f54a5156b248 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -258,5 +258,6 @@ extern void udp4_proc_exit(void);
 extern void udp_init(void);
 
 extern int udp4_ufo_send_check(struct sk_buff *skb);
-extern struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, u32 features);
+extern struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
+	netdev_features_t features);
 #endif	/* _UDP_H */
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 993599e66e5a..8e75003d62f6 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -777,6 +777,18 @@ char *uuid_string(char *buf, char *end, const u8 *addr,
 	return string(buf, end, uuid, spec);
 }
 
+static
+char *netdev_feature_string(char *buf, char *end, const u8 *addr,
+		      struct printf_spec spec)
+{
+	spec.flags |= SPECIAL | SMALL | ZEROPAD;
+	if (spec.field_width == -1)
+		spec.field_width = 2 + 2 * sizeof(netdev_features_t);
+	spec.base = 16;
+
+	return number(buf, end, *(const netdev_features_t *)addr, spec);
+}
+
 int kptr_restrict __read_mostly;
 
 /*
@@ -824,6 +836,7 @@ int kptr_restrict __read_mostly;
  *       Do not use this feature without some mechanism to verify the
  *       correctness of the format string and va_list arguments.
  * - 'K' For a kernel pointer that should be hidden from unprivileged users
+ * - 'NF' For a netdev_features_t
  *
  * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
  * function pointers are really function descriptors, which contain a
@@ -896,6 +909,12 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 		       has_capability_noaudit(current, CAP_SYSLOG))))
 			ptr = NULL;
 		break;
+	case 'N':
+		switch (fmt[1]) {
+		case 'F':
+			return netdev_feature_string(buf, end, ptr, spec);
+		}
+		break;
 	}
 	spec.flags |= SMALL;
 	if (spec.field_width == -1) {
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 6a4e0cb897b7..2b5fcde1f629 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -591,7 +591,8 @@ static void vlan_dev_uninit(struct net_device *dev)
 	}
 }
 
-static u32 vlan_dev_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
 	u32 old_features = features;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index feb77ea7b58e..772bad34794c 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -186,7 +186,8 @@ static void br_getinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 	strcpy(info->bus_info, "N/A");
 }
 
-static u32 br_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t br_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	struct net_bridge *br = netdev_priv(dev);
 
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index f603e5b0b930..0a942fbccc9a 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -296,10 +296,11 @@ int br_min_mtu(const struct net_bridge *br)
 /*
  * Recomputes features using slave's features
  */
-u32 br_features_recompute(struct net_bridge *br, u32 features)
+netdev_features_t br_features_recompute(struct net_bridge *br,
+	netdev_features_t features)
 {
 	struct net_bridge_port *p;
-	u32 mask;
+	netdev_features_t mask;
 
 	if (list_empty(&br->port_list))
 		return features;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index d7d6fb05411f..4027029aa5e4 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -387,7 +387,8 @@ extern int br_add_if(struct net_bridge *br,
 extern int br_del_if(struct net_bridge *br,
 	      struct net_device *dev);
 extern int br_min_mtu(const struct net_bridge *br);
-extern u32 br_features_recompute(struct net_bridge *br, u32 features);
+extern netdev_features_t br_features_recompute(struct net_bridge *br,
+	netdev_features_t features);
 
 /* br_input.c */
 extern int br_handle_frame_finish(struct sk_buff *skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index 185e246d61fd..f1cca59c4638 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1914,7 +1914,8 @@ EXPORT_SYMBOL(skb_checksum_help);
  *	It may return NULL if the skb requires no segmentation.  This is
  *	only possible when GSO is used for verifying header integrity.
  */
-struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features)
+struct sk_buff *skb_gso_segment(struct sk_buff *skb,
+	netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
 	struct packet_type *ptype;
@@ -1944,9 +1945,9 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, u32 features)
 		if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
 			dev->ethtool_ops->get_drvinfo(dev, &info);
 
-		WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d ip_summed=%d\n",
-		     info.driver, dev ? dev->features : 0L,
-		     skb->sk ? skb->sk->sk_route_caps : 0L,
+		WARN(1, "%s: caps=(%pNF, %pNF) len=%d data_len=%d ip_summed=%d\n",
+		     info.driver, dev ? &dev->features : NULL,
+		     skb->sk ? &skb->sk->sk_route_caps : NULL,
 		     skb->len, skb->data_len, skb->ip_summed);
 
 		if (skb_header_cloned(skb) &&
@@ -2055,7 +2056,7 @@ static void dev_gso_skb_destructor(struct sk_buff *skb)
  *	This function segments the given skb and stores the list of segments
  *	in skb->next.
  */
-static int dev_gso_segment(struct sk_buff *skb, int features)
+static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
 {
 	struct sk_buff *segs;
 
@@ -2094,7 +2095,7 @@ static inline void skb_orphan_try(struct sk_buff *skb)
 	}
 }
 
-static bool can_checksum_protocol(unsigned long features, __be16 protocol)
+static bool can_checksum_protocol(netdev_features_t features, __be16 protocol)
 {
 	return ((features & NETIF_F_GEN_CSUM) ||
 		((features & NETIF_F_V4_CSUM) &&
@@ -2105,7 +2106,8 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
 		 protocol == htons(ETH_P_FCOE)));
 }
 
-static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features)
+static netdev_features_t harmonize_features(struct sk_buff *skb,
+	__be16 protocol, netdev_features_t features)
 {
 	if (!can_checksum_protocol(features, protocol)) {
 		features &= ~NETIF_F_ALL_CSUM;
@@ -2117,10 +2119,10 @@ static u32 harmonize_features(struct sk_buff *skb, __be16 protocol, u32 features
 	return features;
 }
 
-u32 netif_skb_features(struct sk_buff *skb)
+netdev_features_t netif_skb_features(struct sk_buff *skb)
 {
 	__be16 protocol = skb->protocol;
-	u32 features = skb->dev->features;
+	netdev_features_t features = skb->dev->features;
 
 	if (protocol == htons(ETH_P_8021Q)) {
 		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
@@ -2166,7 +2168,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 	unsigned int skb_len;
 
 	if (likely(!skb->next)) {
-		u32 features;
+		netdev_features_t features;
 
 		/*
 		 * If device doesn't need skb->dst, release it right now while
@@ -5350,7 +5352,8 @@ static void rollback_registered(struct net_device *dev)
 	list_del(&single);
 }
 
-static u32 netdev_fix_features(struct net_device *dev, u32 features)
+static netdev_features_t netdev_fix_features(struct net_device *dev,
+	netdev_features_t features)
 {
 	/* Fix illegal checksum combinations */
 	if ((features & NETIF_F_HW_CSUM) &&
@@ -5412,7 +5415,7 @@ static u32 netdev_fix_features(struct net_device *dev, u32 features)
 
 int __netdev_update_features(struct net_device *dev)
 {
-	u32 features;
+	netdev_features_t features;
 	int err = 0;
 
 	ASSERT_RTNL();
@@ -5428,16 +5431,16 @@ int __netdev_update_features(struct net_device *dev)
 	if (dev->features == features)
 		return 0;
 
-	netdev_dbg(dev, "Features changed: 0x%08x -> 0x%08x\n",
-		dev->features, features);
+	netdev_dbg(dev, "Features changed: %pNF -> %pNF\n",
+		&dev->features, &features);
 
 	if (dev->netdev_ops->ndo_set_features)
 		err = dev->netdev_ops->ndo_set_features(dev, features);
 
 	if (unlikely(err < 0)) {
 		netdev_err(dev,
-			"set_features() failed (%d); wanted 0x%08x, left 0x%08x\n",
-			err, features, dev->features);
+			"set_features() failed (%d); wanted %pNF, left %pNF\n",
+			err, &features, &dev->features);
 		return -1;
 	}
 
@@ -6361,7 +6364,8 @@ static int dev_cpu_callback(struct notifier_block *nfb,
  *	@one to the master device with current feature set @all.  Will not
  *	enable anything that is off in @mask. Returns the new feature set.
  */
-u32 netdev_increment_features(u32 all, u32 one, u32 mask)
+netdev_features_t netdev_increment_features(netdev_features_t all,
+	netdev_features_t one, netdev_features_t mask)
 {
 	if (mask & NETIF_F_GEN_CSUM)
 		mask |= NETIF_F_ALL_CSUM;
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index a354919a32ac..f135f1c92c9d 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -171,7 +171,7 @@ static void __ethtool_get_strings(struct net_device *dev,
 		ops->get_strings(dev, stringset, data);
 }
 
-static u32 ethtool_get_feature_mask(u32 eth_cmd)
+static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd)
 {
 	/* feature masks of legacy discrete ethtool ops */
 
@@ -205,7 +205,7 @@ static u32 ethtool_get_feature_mask(u32 eth_cmd)
 static int ethtool_get_one_feature(struct net_device *dev,
 	char __user *useraddr, u32 ethcmd)
 {
-	u32 mask = ethtool_get_feature_mask(ethcmd);
+	netdev_features_t mask = ethtool_get_feature_mask(ethcmd);
 	struct ethtool_value edata = {
 		.cmd = ethcmd,
 		.data = !!(dev->features & mask),
@@ -220,7 +220,7 @@ static int ethtool_set_one_feature(struct net_device *dev,
 	void __user *useraddr, u32 ethcmd)
 {
 	struct ethtool_value edata;
-	u32 mask;
+	netdev_features_t mask;
 
 	if (copy_from_user(&edata, useraddr, sizeof(edata)))
 		return -EFAULT;
@@ -260,8 +260,7 @@ static u32 __ethtool_get_flags(struct net_device *dev)
 
 static int __ethtool_set_flags(struct net_device *dev, u32 data)
 {
-	u32 features = 0;
-	u32 changed;
+	netdev_features_t features = 0, changed;
 
 	if (data & ~ETH_ALL_FLAGS)
 		return -EINVAL;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8d2c5b32f172..cbc003b2914a 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2670,7 +2670,7 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum);
  *	a pointer to the first in a list of new skbs for the segments.
  *	In case of error it returns ERR_PTR(err).
  */
-struct sk_buff *skb_segment(struct sk_buff *skb, u32 features)
+struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
 {
 	struct sk_buff *segs = NULL;
 	struct sk_buff *tail = NULL;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index b2bbcd0ebd19..15dc4c4828de 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1250,7 +1250,8 @@ out:
 	return err;
 }
 
-static struct sk_buff *inet_gso_segment(struct sk_buff *skb, u32 features)
+static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
+	netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	struct iphdr *iph;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 34f5db1e1c8b..50c359645665 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2653,7 +2653,8 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
 EXPORT_SYMBOL(compat_tcp_getsockopt);
 #endif
 
-struct sk_buff *tcp_tso_segment(struct sk_buff *skb, u32 features)
+struct sk_buff *tcp_tso_segment(struct sk_buff *skb,
+	netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	struct tcphdr *th;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 6854f581313f..b867ea23ece9 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2247,7 +2247,8 @@ int udp4_ufo_send_check(struct sk_buff *skb)
 	return 0;
 }
 
-struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, u32 features)
+struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
+	netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	unsigned int mss;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 282dc7a91f32..ee3319487c4f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -769,7 +769,8 @@ out:
 	return err;
 }
 
-static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, u32 features)
+static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
+	netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	struct ipv6hdr *ipv6h;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index b4a4a15fa96f..ccfb0451b1c3 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1300,7 +1300,8 @@ static int udp6_ufo_send_check(struct sk_buff *skb)
 	return 0;
 }
 
-static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, u32 features)
+static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
+	netdev_features_t features)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	unsigned int mss;
-- 
cgit v1.2.3


From a19f2a6df28e0ccb4103b77cc17c03b62f4d573e Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Tue, 15 Nov 2011 15:29:55 +0000
Subject: net: Define enum for net device features.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Define feature values by bit position instead of direct 2**i values
and force the values to be of type netdev_features_t.

Cleaned and extended from patch by Mahesh Bandewar <maheshb@google.com>:
+ added netdev_features_t casts
+ included bits under NETIF_F_GSO_MASK
+ moved feature #defines out of struct net_device definition

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h | 133 ++++++++++++++++++++++++++++------------
 1 file changed, 93 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index af5238121826..04ac8f8433e9 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -14,52 +14,105 @@
 
 typedef u32 netdev_features_t;
 
-/* Net device feature bits; if you change something,
- * also update netdev_features_strings[] in ethtool.c */
-
-#define NETIF_F_SG		1	/* Scatter/gather IO. */
-#define NETIF_F_IP_CSUM		2	/* Can checksum TCP/UDP over IPv4. */
-#define NETIF_F_NO_CSUM		4	/* Does not require checksum. F.e. loopack. */
-#define NETIF_F_HW_CSUM		8	/* Can checksum all the packets. */
-#define NETIF_F_IPV6_CSUM	16	/* Can checksum TCP/UDP over IPV6 */
-#define NETIF_F_HIGHDMA		32	/* Can DMA to high memory. */
-#define NETIF_F_FRAGLIST	64	/* Scatter/gather IO. */
-#define NETIF_F_HW_VLAN_TX	128	/* Transmit VLAN hw acceleration */
-#define NETIF_F_HW_VLAN_RX	256	/* Receive VLAN hw acceleration */
-#define NETIF_F_HW_VLAN_FILTER	512	/* Receive filtering on VLAN */
-#define NETIF_F_VLAN_CHALLENGED	1024	/* Device cannot handle VLAN packets */
-#define NETIF_F_GSO		2048	/* Enable software GSO. */
-#define NETIF_F_LLTX		4096	/* LockLess TX - deprecated. Please */
+enum {
+	NETIF_F_SG_BIT,			/* Scatter/gather IO. */
+	NETIF_F_IP_CSUM_BIT,		/* Can checksum TCP/UDP over IPv4. */
+	NETIF_F_NO_CSUM_BIT,		/* Does not require checksum. F.e. loopack. */
+	NETIF_F_HW_CSUM_BIT,		/* Can checksum all the packets. */
+	NETIF_F_IPV6_CSUM_BIT,		/* Can checksum TCP/UDP over IPV6 */
+	NETIF_F_HIGHDMA_BIT,		/* Can DMA to high memory. */
+	NETIF_F_FRAGLIST_BIT,		/* Scatter/gather IO. */
+	NETIF_F_HW_VLAN_TX_BIT,		/* Transmit VLAN hw acceleration */
+	NETIF_F_HW_VLAN_RX_BIT,		/* Receive VLAN hw acceleration */
+	NETIF_F_HW_VLAN_FILTER_BIT,	/* Receive filtering on VLAN */
+	NETIF_F_VLAN_CHALLENGED_BIT,	/* Device cannot handle VLAN packets */
+	NETIF_F_GSO_BIT,		/* Enable software GSO. */
+	NETIF_F_LLTX_BIT,		/* LockLess TX - deprecated. Please */
 					/* do not use LLTX in new drivers */
-#define NETIF_F_NETNS_LOCAL	8192	/* Does not change network namespaces */
-#define NETIF_F_GRO		16384	/* Generic receive offload */
-#define NETIF_F_LRO		32768	/* large receive offload */
-
-/* the GSO_MASK reserves bits 16 through 23 */
-#define NETIF_F_FCOE_CRC	(1 << 24) /* FCoE CRC32 */
-#define NETIF_F_SCTP_CSUM	(1 << 25) /* SCTP checksum offload */
-#define NETIF_F_FCOE_MTU	(1 << 26) /* Supports max FCoE MTU, 2158 bytes*/
-#define NETIF_F_NTUPLE		(1 << 27) /* N-tuple filters supported */
-#define NETIF_F_RXHASH		(1 << 28) /* Receive hashing offload */
-#define NETIF_F_RXCSUM		(1 << 29) /* Receive checksumming offload */
-#define NETIF_F_NOCACHE_COPY	(1 << 30) /* Use no-cache copyfromuser */
-#define NETIF_F_LOOPBACK	(1 << 31) /* Enable loopback */
-
-/* Segmentation offload features */
-#define NETIF_F_GSO_SHIFT	16
-#define NETIF_F_GSO_MASK	0x00ff0000
-#define NETIF_F_TSO		(SKB_GSO_TCPV4 << NETIF_F_GSO_SHIFT)
-#define NETIF_F_UFO		(SKB_GSO_UDP << NETIF_F_GSO_SHIFT)
-#define NETIF_F_GSO_ROBUST	(SKB_GSO_DODGY << NETIF_F_GSO_SHIFT)
-#define NETIF_F_TSO_ECN		(SKB_GSO_TCP_ECN << NETIF_F_GSO_SHIFT)
-#define NETIF_F_TSO6		(SKB_GSO_TCPV6 << NETIF_F_GSO_SHIFT)
-#define NETIF_F_FSO		(SKB_GSO_FCOE << NETIF_F_GSO_SHIFT)
+	NETIF_F_NETNS_LOCAL_BIT,	/* Does not change network namespaces */
+	NETIF_F_GRO_BIT,		/* Generic receive offload */
+	NETIF_F_LRO_BIT,		/* large receive offload */
+
+	/**/NETIF_F_GSO_SHIFT,		/* keep the order of SKB_GSO_* bits */
+	NETIF_F_TSO_BIT			/* ... TCPv4 segmentation */
+		= NETIF_F_GSO_SHIFT,
+	NETIF_F_UFO_BIT,		/* ... UDPv4 fragmentation */
+	NETIF_F_GSO_ROBUST_BIT,		/* ... ->SKB_GSO_DODGY */
+	NETIF_F_TSO_ECN_BIT,		/* ... TCP ECN support */
+	NETIF_F_TSO6_BIT,		/* ... TCPv6 segmentation */
+	NETIF_F_FSO_BIT,		/* ... FCoE segmentation */
+	NETIF_F_GSO_RESERVED1,		/* ... free (fill GSO_MASK to 8 bits) */
+	/**/NETIF_F_GSO_LAST,		/* [can't be last bit, see GSO_MASK] */
+	NETIF_F_GSO_RESERVED2		/* ... free (fill GSO_MASK to 8 bits) */
+		= NETIF_F_GSO_LAST,
+
+	NETIF_F_FCOE_CRC_BIT,		/* FCoE CRC32 */
+	NETIF_F_SCTP_CSUM_BIT,		/* SCTP checksum offload */
+	NETIF_F_FCOE_MTU_BIT,		/* Supports max FCoE MTU, 2158 bytes*/
+	NETIF_F_NTUPLE_BIT,		/* N-tuple filters supported */
+	NETIF_F_RXHASH_BIT,		/* Receive hashing offload */
+	NETIF_F_RXCSUM_BIT,		/* Receive checksumming offload */
+	NETIF_F_NOCACHE_COPY_BIT,	/* Use no-cache copyfromuser */
+	NETIF_F_LOOPBACK_BIT,		/* Enable loopback */
+
+	/*
+	 * Add your fresh new feature above and remember to update
+	 * netdev_features_strings[] in net/core/ethtool.c and maybe
+	 * some feature mask #defines below. Please also describe it
+	 * in Documentation/networking/netdev-features.txt.
+	 */
+
+	/**/NETDEV_FEATURE_COUNT
+};
+
+/* copy'n'paste compression ;) */
+#define __NETIF_F_BIT(bit)	((netdev_features_t)1 << (bit))
+#define __NETIF_F(name)		__NETIF_F_BIT(NETIF_F_##name##_BIT)
+
+#define NETIF_F_FCOE_CRC	__NETIF_F(FCOE_CRC)
+#define NETIF_F_FCOE_MTU	__NETIF_F(FCOE_MTU)
+#define NETIF_F_FRAGLIST	__NETIF_F(FRAGLIST)
+#define NETIF_F_FSO		__NETIF_F(FSO)
+#define NETIF_F_GRO		__NETIF_F(GRO)
+#define NETIF_F_GSO		__NETIF_F(GSO)
+#define NETIF_F_GSO_ROBUST	__NETIF_F(GSO_ROBUST)
+#define NETIF_F_HIGHDMA		__NETIF_F(HIGHDMA)
+#define NETIF_F_HW_CSUM		__NETIF_F(HW_CSUM)
+#define NETIF_F_HW_VLAN_FILTER	__NETIF_F(HW_VLAN_FILTER)
+#define NETIF_F_HW_VLAN_RX	__NETIF_F(HW_VLAN_RX)
+#define NETIF_F_HW_VLAN_TX	__NETIF_F(HW_VLAN_TX)
+#define NETIF_F_IP_CSUM		__NETIF_F(IP_CSUM)
+#define NETIF_F_IPV6_CSUM	__NETIF_F(IPV6_CSUM)
+#define NETIF_F_LLTX		__NETIF_F(LLTX)
+#define NETIF_F_LOOPBACK	__NETIF_F(LOOPBACK)
+#define NETIF_F_LRO		__NETIF_F(LRO)
+#define NETIF_F_NETNS_LOCAL	__NETIF_F(NETNS_LOCAL)
+#define NETIF_F_NOCACHE_COPY	__NETIF_F(NOCACHE_COPY)
+#define NETIF_F_NO_CSUM		__NETIF_F(NO_CSUM)
+#define NETIF_F_NTUPLE		__NETIF_F(NTUPLE)
+#define NETIF_F_RXCSUM		__NETIF_F(RXCSUM)
+#define NETIF_F_RXHASH		__NETIF_F(RXHASH)
+#define NETIF_F_SCTP_CSUM	__NETIF_F(SCTP_CSUM)
+#define NETIF_F_SG		__NETIF_F(SG)
+#define NETIF_F_TSO6		__NETIF_F(TSO6)
+#define NETIF_F_TSO_ECN		__NETIF_F(TSO_ECN)
+#define NETIF_F_TSO		__NETIF_F(TSO)
+#define NETIF_F_UFO		__NETIF_F(UFO)
+#define NETIF_F_VLAN_CHALLENGED	__NETIF_F(VLAN_CHALLENGED)
 
 /* Features valid for ethtool to change */
 /* = all defined minus driver/device-class-related */
 #define NETIF_F_NEVER_CHANGE	(NETIF_F_VLAN_CHALLENGED | \
 				 NETIF_F_LLTX | NETIF_F_NETNS_LOCAL)
-#define NETIF_F_ETHTOOL_BITS	(0xff3fffff & ~NETIF_F_NEVER_CHANGE)
+
+/* remember that ((t)1 << t_BITS) is undefined in C99 */
+#define NETIF_F_ETHTOOL_BITS	((__NETIF_F_BIT(NETDEV_FEATURE_COUNT - 1) | \
+		(__NETIF_F_BIT(NETDEV_FEATURE_COUNT - 1) - 1)) & \
+		~NETIF_F_NEVER_CHANGE)
+
+/* Segmentation offload feature mask */
+#define NETIF_F_GSO_MASK	(__NETIF_F_BIT(NETIF_F_GSO_LAST + 1) - \
+		__NETIF_F_BIT(NETIF_F_GSO_SHIFT))
 
 /* List of features with software fallbacks. */
 #define NETIF_F_GSO_SOFTWARE	(NETIF_F_TSO | NETIF_F_TSO_ECN | \
-- 
cgit v1.2.3


From a861a8b233e9024303fb8e73e465e81ad7119d5a Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Tue, 15 Nov 2011 15:29:55 +0000
Subject: net: extend netdev_features_t to 64 bits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 04ac8f8433e9..20e3a1f9892d 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -12,7 +12,7 @@
 
 #include <linux/types.h>
 
-typedef u32 netdev_features_t;
+typedef u64 netdev_features_t;
 
 enum {
 	NETIF_F_SG_BIT,			/* Scatter/gather IO. */
-- 
cgit v1.2.3


From 34324dc2bf27c1773045fea63cb11f7e2a6ad2b9 Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Tue, 15 Nov 2011 15:29:55 +0000
Subject: net: remove NETIF_F_NO_CSUM feature bit
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Only distinct use is checking if NETIF_F_NOCACHE_COPY should be
enabled by default. The check heuristics is altered a bit here,
so it hits other people than before. The default shouldn't be
trusted for performance-critical cases anyway.

For all other uses NETIF_F_NO_CSUM is equivalent to NETIF_F_HW_CSUM.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ieee802154/fakehard.c   |  2 +-
 drivers/misc/sgi-xp/xpnet.c     |  2 +-
 drivers/net/bonding/bond_main.c |  2 +-
 drivers/net/can/dev.c           |  2 +-
 drivers/net/can/slcan.c         |  2 +-
 drivers/net/dummy.c             |  2 +-
 drivers/net/ifb.c               |  2 +-
 drivers/net/loopback.c          |  2 +-
 drivers/net/veth.c              |  2 +-
 include/linux/netdev_features.h |  5 ++---
 include/linux/skbuff.h          |  1 -
 net/bridge/br_device.c          |  4 ++--
 net/core/dev.c                  | 21 ++++++---------------
 net/core/ethtool.c              |  1 -
 14 files changed, 19 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ieee802154/fakehard.c b/drivers/ieee802154/fakehard.c
index eb0e2ccc79ae..73d453159408 100644
--- a/drivers/ieee802154/fakehard.c
+++ b/drivers/ieee802154/fakehard.c
@@ -343,7 +343,7 @@ static void ieee802154_fake_setup(struct net_device *dev)
 {
 	dev->addr_len		= IEEE802154_ADDR_LEN;
 	memset(dev->broadcast, 0xff, IEEE802154_ADDR_LEN);
-	dev->features		= NETIF_F_NO_CSUM;
+	dev->features		= NETIF_F_HW_CSUM;
 	dev->needed_tailroom	= 2; /* FCS */
 	dev->mtu		= 127;
 	dev->tx_queue_len	= 10;
diff --git a/drivers/misc/sgi-xp/xpnet.c b/drivers/misc/sgi-xp/xpnet.c
index 42f067347bc7..3fac67a5204c 100644
--- a/drivers/misc/sgi-xp/xpnet.c
+++ b/drivers/misc/sgi-xp/xpnet.c
@@ -576,7 +576,7 @@ xpnet_init(void)
 	 * report an error if the data is not retrievable and the
 	 * packet will be dropped.
 	 */
-	xpnet_device->features = NETIF_F_NO_CSUM;
+	xpnet_device->features = NETIF_F_HW_CSUM;
 
 	result = register_netdev(xpnet_device);
 	if (result != 0) {
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index ac5337a04639..25a44d94be17 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4361,7 +4361,7 @@ static void bond_setup(struct net_device *bond_dev)
 				NETIF_F_HW_VLAN_RX |
 				NETIF_F_HW_VLAN_FILTER;
 
-	bond_dev->hw_features &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_NO_CSUM);
+	bond_dev->hw_features &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_HW_CSUM);
 	bond_dev->features |= bond_dev->hw_features;
 }
 
diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 25695bde0549..120f1ab5a2ce 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -454,7 +454,7 @@ static void can_setup(struct net_device *dev)
 
 	/* New-style flags. */
 	dev->flags = IFF_NOARP;
-	dev->features = NETIF_F_NO_CSUM;
+	dev->features = NETIF_F_HW_CSUM;
 }
 
 struct sk_buff *alloc_can_skb(struct net_device *dev, struct can_frame **cf)
diff --git a/drivers/net/can/slcan.c b/drivers/net/can/slcan.c
index a979b006f459..3f1ebcc2cb83 100644
--- a/drivers/net/can/slcan.c
+++ b/drivers/net/can/slcan.c
@@ -387,7 +387,7 @@ static void slc_setup(struct net_device *dev)
 
 	/* New-style flags. */
 	dev->flags		= IFF_NOARP;
-	dev->features           = NETIF_F_NO_CSUM;
+	dev->features           = NETIF_F_HW_CSUM;
 }
 
 /******************************************
diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c
index a7c5e8831e8c..087648ea1edb 100644
--- a/drivers/net/dummy.c
+++ b/drivers/net/dummy.c
@@ -134,7 +134,7 @@ static void dummy_setup(struct net_device *dev)
 	dev->flags |= IFF_NOARP;
 	dev->flags &= ~IFF_MULTICAST;
 	dev->features	|= NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_TSO;
-	dev->features	|= NETIF_F_NO_CSUM | NETIF_F_HIGHDMA | NETIF_F_LLTX;
+	dev->features	|= NETIF_F_HW_CSUM | NETIF_F_HIGHDMA | NETIF_F_LLTX;
 	random_ether_addr(dev->dev_addr);
 }
 
diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index 46b5f5fd686b..e05b645bbc32 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -164,7 +164,7 @@ static const struct net_device_ops ifb_netdev_ops = {
 	.ndo_validate_addr = eth_validate_addr,
 };
 
-#define IFB_FEATURES (NETIF_F_NO_CSUM | NETIF_F_SG  | NETIF_F_FRAGLIST	| \
+#define IFB_FEATURES (NETIF_F_HW_CSUM | NETIF_F_SG  | NETIF_F_FRAGLIST	| \
 		      NETIF_F_TSO_ECN | NETIF_F_TSO | NETIF_F_TSO6	| \
 		      NETIF_F_HIGHDMA | NETIF_F_HW_VLAN_TX)
 
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 4ce9e5f2c069..b71998d0b5b4 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -169,7 +169,7 @@ static void loopback_setup(struct net_device *dev)
 	dev->features 		= NETIF_F_SG | NETIF_F_FRAGLIST
 		| NETIF_F_ALL_TSO
 		| NETIF_F_UFO
-		| NETIF_F_NO_CSUM
+		| NETIF_F_HW_CSUM
 		| NETIF_F_RXCSUM
 		| NETIF_F_HIGHDMA
 		| NETIF_F_LLTX
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index d32a75fb6d21..b576812bdc59 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -271,7 +271,7 @@ static void veth_setup(struct net_device *dev)
 	dev->features |= NETIF_F_LLTX;
 	dev->destructor = veth_dev_free;
 
-	dev->hw_features = NETIF_F_NO_CSUM | NETIF_F_SG | NETIF_F_RXCSUM;
+	dev->hw_features = NETIF_F_HW_CSUM | NETIF_F_SG | NETIF_F_RXCSUM;
 }
 
 /*
diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 20e3a1f9892d..77f5202977ce 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -17,7 +17,7 @@ typedef u64 netdev_features_t;
 enum {
 	NETIF_F_SG_BIT,			/* Scatter/gather IO. */
 	NETIF_F_IP_CSUM_BIT,		/* Can checksum TCP/UDP over IPv4. */
-	NETIF_F_NO_CSUM_BIT,		/* Does not require checksum. F.e. loopack. */
+	__UNUSED_NETIF_F_1,
 	NETIF_F_HW_CSUM_BIT,		/* Can checksum all the packets. */
 	NETIF_F_IPV6_CSUM_BIT,		/* Can checksum TCP/UDP over IPV6 */
 	NETIF_F_HIGHDMA_BIT,		/* Can DMA to high memory. */
@@ -88,7 +88,6 @@ enum {
 #define NETIF_F_LRO		__NETIF_F(LRO)
 #define NETIF_F_NETNS_LOCAL	__NETIF_F(NETNS_LOCAL)
 #define NETIF_F_NOCACHE_COPY	__NETIF_F(NOCACHE_COPY)
-#define NETIF_F_NO_CSUM		__NETIF_F(NO_CSUM)
 #define NETIF_F_NTUPLE		__NETIF_F(NTUPLE)
 #define NETIF_F_RXCSUM		__NETIF_F(RXCSUM)
 #define NETIF_F_RXHASH		__NETIF_F(RXHASH)
@@ -118,7 +117,7 @@ enum {
 #define NETIF_F_GSO_SOFTWARE	(NETIF_F_TSO | NETIF_F_TSO_ECN | \
 				 NETIF_F_TSO6 | NETIF_F_UFO)
 
-#define NETIF_F_GEN_CSUM	(NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)
+#define NETIF_F_GEN_CSUM	NETIF_F_HW_CSUM
 #define NETIF_F_V4_CSUM		(NETIF_F_GEN_CSUM | NETIF_F_IP_CSUM)
 #define NETIF_F_V6_CSUM		(NETIF_F_GEN_CSUM | NETIF_F_IPV6_CSUM)
 #define NETIF_F_ALL_CSUM	(NETIF_F_V4_CSUM | NETIF_F_V6_CSUM)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a10e487c0864..b93117389cfe 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -88,7 +88,6 @@
  *	at device setup time.
  *	NETIF_F_HW_CSUM	- it is clever device, it is able to checksum
  *			  everything.
- *	NETIF_F_NO_CSUM - loopback or reliable single hop media.
  *	NETIF_F_IP_CSUM - device is dumb. It is able to csum only
  *			  TCP/UDP over IPv4. Sigh. Vendors like this
  *			  way by an unknown reason. Though, see comment above
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 772bad34794c..a3754ac262c3 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -342,10 +342,10 @@ void br_dev_setup(struct net_device *dev)
 	dev->priv_flags = IFF_EBRIDGE;
 
 	dev->features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
-			NETIF_F_GSO_MASK | NETIF_F_NO_CSUM | NETIF_F_LLTX |
+			NETIF_F_GSO_MASK | NETIF_F_HW_CSUM | NETIF_F_LLTX |
 			NETIF_F_NETNS_LOCAL | NETIF_F_HW_VLAN_TX;
 	dev->hw_features = NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HIGHDMA |
-			   NETIF_F_GSO_MASK | NETIF_F_NO_CSUM |
+			   NETIF_F_GSO_MASK | NETIF_F_HW_CSUM |
 			   NETIF_F_HW_VLAN_TX;
 
 	br->dev = dev;
diff --git a/net/core/dev.c b/net/core/dev.c
index f1cca59c4638..26c49d55e79d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5362,12 +5362,6 @@ static netdev_features_t netdev_fix_features(struct net_device *dev,
 		features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
 	}
 
-	if ((features & NETIF_F_NO_CSUM) &&
-	    (features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
-		netdev_warn(dev, "mixed no checksumming and other settings.\n");
-		features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
-	}
-
 	/* Fix illegal SG+CSUM combinations. */
 	if ((features & NETIF_F_SG) &&
 	    !(features & NETIF_F_ALL_CSUM)) {
@@ -5624,11 +5618,12 @@ int register_netdevice(struct net_device *dev)
 	dev->wanted_features = dev->features & dev->hw_features;
 
 	/* Turn on no cache copy if HW is doing checksum */
-	dev->hw_features |= NETIF_F_NOCACHE_COPY;
-	if ((dev->features & NETIF_F_ALL_CSUM) &&
-	    !(dev->features & NETIF_F_NO_CSUM)) {
-		dev->wanted_features |= NETIF_F_NOCACHE_COPY;
-		dev->features |= NETIF_F_NOCACHE_COPY;
+	if (!(dev->flags & IFF_LOOPBACK)) {
+		dev->hw_features |= NETIF_F_NOCACHE_COPY;
+		if (dev->features & NETIF_F_ALL_CSUM) {
+			dev->wanted_features |= NETIF_F_NOCACHE_COPY;
+			dev->features |= NETIF_F_NOCACHE_COPY;
+		}
 	}
 
 	/* Make NETIF_F_HIGHDMA inheritable to VLAN devices.
@@ -6374,10 +6369,6 @@ netdev_features_t netdev_increment_features(netdev_features_t all,
 	all |= one & (NETIF_F_ONE_FOR_ALL|NETIF_F_ALL_CSUM) & mask;
 	all &= one | ~NETIF_F_ALL_FOR_ALL;
 
-	/* If device needs checksumming, downgrade to it. */
-	if (all & (NETIF_F_ALL_CSUM & ~NETIF_F_NO_CSUM))
-		all &= ~NETIF_F_NO_CSUM;
-
 	/* If one device supports hw checksumming, set for all. */
 	if (all & NETIF_F_GEN_CSUM)
 		all &= ~(NETIF_F_ALL_CSUM & ~NETIF_F_GEN_CSUM);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index bbf84fe0096e..d2eff9ec88be 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -43,7 +43,6 @@ EXPORT_SYMBOL(ethtool_op_get_link);
 static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] = {
 	[NETIF_F_SG_BIT] =               "tx-scatter-gather",
 	[NETIF_F_IP_CSUM_BIT] =          "tx-checksum-ipv4",
-	[NETIF_F_NO_CSUM_BIT] =          "tx-checksum-unneeded",
 	[NETIF_F_HW_CSUM_BIT] =          "tx-checksum-ip-generic",
 	[NETIF_F_IPV6_CSUM_BIT] =        "tx-checksum-ipv6",
 	[NETIF_F_HIGHDMA_BIT] =          "highdma",
-- 
cgit v1.2.3


From 61dc3461b9549bc10a2f16d254250680cadafcce Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Wed, 16 Nov 2011 11:09:08 +0000
Subject: team: convert overall spinlock to mutex

No need to have spinlock for this purpose. So convert this to mutex and
avoid current schedule while atomic problems in netlink code.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team.c | 32 ++++++++++++++++----------------
 include/linux/if_team.h |  2 +-
 2 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index e5390c73a75d..7db219cd3153 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -443,9 +443,9 @@ static void __team_compute_features(struct team *team)
 
 static void team_compute_features(struct team *team)
 {
-	spin_lock(&team->lock);
+	mutex_lock(&team->lock);
 	__team_compute_features(team);
-	spin_unlock(&team->lock);
+	mutex_unlock(&team->lock);
 }
 
 static int team_port_enter(struct team *team, struct team_port *port)
@@ -647,7 +647,7 @@ static int team_init(struct net_device *dev)
 	int i;
 
 	team->dev = dev;
-	spin_lock_init(&team->lock);
+	mutex_init(&team->lock);
 
 	team->pcpu_stats = alloc_percpu(struct team_pcpu_stats);
 	if (!team->pcpu_stats)
@@ -672,13 +672,13 @@ static void team_uninit(struct net_device *dev)
 	struct team_port *port;
 	struct team_port *tmp;
 
-	spin_lock(&team->lock);
+	mutex_lock(&team->lock);
 	list_for_each_entry_safe(port, tmp, &team->port_list, list)
 		team_port_del(team, port->dev);
 
 	__team_change_mode(team, NULL); /* cleanup */
 	__team_options_unregister(team, team_options, ARRAY_SIZE(team_options));
-	spin_unlock(&team->lock);
+	mutex_unlock(&team->lock);
 }
 
 static void team_destructor(struct net_device *dev)
@@ -784,7 +784,7 @@ static int team_change_mtu(struct net_device *dev, int new_mtu)
 	 * Alhough this is reader, it's guarded by team lock. It's not possible
 	 * to traverse list in reverse under rcu_read_lock
 	 */
-	spin_lock(&team->lock);
+	mutex_lock(&team->lock);
 	list_for_each_entry(port, &team->port_list, list) {
 		err = dev_set_mtu(port->dev, new_mtu);
 		if (err) {
@@ -793,7 +793,7 @@ static int team_change_mtu(struct net_device *dev, int new_mtu)
 			goto unwind;
 		}
 	}
-	spin_unlock(&team->lock);
+	mutex_unlock(&team->lock);
 
 	dev->mtu = new_mtu;
 
@@ -802,7 +802,7 @@ static int team_change_mtu(struct net_device *dev, int new_mtu)
 unwind:
 	list_for_each_entry_continue_reverse(port, &team->port_list, list)
 		dev_set_mtu(port->dev, dev->mtu);
-	spin_unlock(&team->lock);
+	mutex_unlock(&team->lock);
 
 	return err;
 }
@@ -880,9 +880,9 @@ static int team_add_slave(struct net_device *dev, struct net_device *port_dev)
 	struct team *team = netdev_priv(dev);
 	int err;
 
-	spin_lock(&team->lock);
+	mutex_lock(&team->lock);
 	err = team_port_add(team, port_dev);
-	spin_unlock(&team->lock);
+	mutex_unlock(&team->lock);
 	return err;
 }
 
@@ -891,9 +891,9 @@ static int team_del_slave(struct net_device *dev, struct net_device *port_dev)
 	struct team *team = netdev_priv(dev);
 	int err;
 
-	spin_lock(&team->lock);
+	mutex_lock(&team->lock);
 	err = team_port_del(team, port_dev);
-	spin_unlock(&team->lock);
+	mutex_unlock(&team->lock);
 	return err;
 }
 
@@ -1064,13 +1064,13 @@ static struct team *team_nl_team_get(struct genl_info *info)
 	}
 
 	team = netdev_priv(dev);
-	spin_lock(&team->lock);
+	mutex_lock(&team->lock);
 	return team;
 }
 
 static void team_nl_team_put(struct team *team)
 {
-	spin_unlock(&team->lock);
+	mutex_unlock(&team->lock);
 	dev_put(team->dev);
 }
 
@@ -1486,9 +1486,9 @@ static void team_port_change_check(struct team_port *port, bool linkup)
 {
 	struct team *team = port->team;
 
-	spin_lock(&team->lock);
+	mutex_lock(&team->lock);
 	__team_port_change_check(port, linkup);
-	spin_unlock(&team->lock);
+	mutex_unlock(&team->lock);
 }
 
 /************************************
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index 14f6388f5460..a6eac126a99a 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -92,7 +92,7 @@ struct team {
 	struct net_device *dev; /* associated netdevice */
 	struct team_pcpu_stats __percpu *pcpu_stats;
 
-	spinlock_t lock; /* used for overall locking, e.g. port lists write */
+	struct mutex lock; /* used for overall locking, e.g. port lists write */
 
 	/*
 	 * port lists with port count
-- 
cgit v1.2.3


From 358b838291f618278080bbed435b755f9b46748e Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Wed, 16 Nov 2011 11:09:09 +0000
Subject: team: replicate options on register

Since multiple team instances are putting defined options into their
option list, during register each option must be cloned before added
into list. This resolves uncool memory corruptions when using multiple
teams.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/team/team.c                   | 76 +++++++++++++++++++++++++++----
 drivers/net/team/team_mode_activebackup.c |  5 +-
 include/linux/if_team.h                   |  8 ++--
 3 files changed, 72 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 7db219cd3153..f3092749b072 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -80,30 +80,78 @@ EXPORT_SYMBOL(team_port_set_team_mac);
  * Options handling
  *******************/
 
-void team_options_register(struct team *team, struct team_option *option,
-			   size_t option_count)
+struct team_option *__team_find_option(struct team *team, const char *opt_name)
+{
+	struct team_option *option;
+
+	list_for_each_entry(option, &team->option_list, list) {
+		if (strcmp(option->name, opt_name) == 0)
+			return option;
+	}
+	return NULL;
+}
+
+int team_options_register(struct team *team,
+			  const struct team_option *option,
+			  size_t option_count)
 {
 	int i;
+	struct team_option *dst_opts[option_count];
+	int err;
+
+	memset(dst_opts, 0, sizeof(dst_opts));
+	for (i = 0; i < option_count; i++, option++) {
+		struct team_option *dst_opt;
+
+		if (__team_find_option(team, option->name)) {
+			err = -EEXIST;
+			goto rollback;
+		}
+		dst_opt = kmalloc(sizeof(*option), GFP_KERNEL);
+		if (!dst_opt) {
+			err = -ENOMEM;
+			goto rollback;
+		}
+		memcpy(dst_opt, option, sizeof(*option));
+		dst_opts[i] = dst_opt;
+	}
+
+	for (i = 0; i < option_count; i++)
+		list_add_tail(&dst_opts[i]->list, &team->option_list);
 
-	for (i = 0; i < option_count; i++, option++)
-		list_add_tail(&option->list, &team->option_list);
+	return 0;
+
+rollback:
+	for (i = 0; i < option_count; i++)
+		kfree(dst_opts[i]);
+
+	return err;
 }
+
 EXPORT_SYMBOL(team_options_register);
 
 static void __team_options_change_check(struct team *team,
 					struct team_option *changed_option);
 
 static void __team_options_unregister(struct team *team,
-				      struct team_option *option,
+				      const struct team_option *option,
 				      size_t option_count)
 {
 	int i;
 
-	for (i = 0; i < option_count; i++, option++)
-		list_del(&option->list);
+	for (i = 0; i < option_count; i++, option++) {
+		struct team_option *del_opt;
+
+		del_opt = __team_find_option(team, option->name);
+		if (del_opt) {
+			list_del(&del_opt->list);
+			kfree(del_opt);
+		}
+	}
 }
 
-void team_options_unregister(struct team *team, struct team_option *option,
+void team_options_unregister(struct team *team,
+			     const struct team_option *option,
 			     size_t option_count)
 {
 	__team_options_unregister(team, option, option_count);
@@ -632,7 +680,7 @@ static int team_mode_option_set(struct team *team, void *arg)
 	return team_change_mode(team, *str);
 }
 
-static struct team_option team_options[] = {
+static const struct team_option team_options[] = {
 	{
 		.name = "mode",
 		.type = TEAM_OPTION_TYPE_STRING,
@@ -645,6 +693,7 @@ static int team_init(struct net_device *dev)
 {
 	struct team *team = netdev_priv(dev);
 	int i;
+	int err;
 
 	team->dev = dev;
 	mutex_init(&team->lock);
@@ -660,10 +709,17 @@ static int team_init(struct net_device *dev)
 	team_adjust_ops(team);
 
 	INIT_LIST_HEAD(&team->option_list);
-	team_options_register(team, team_options, ARRAY_SIZE(team_options));
+	err = team_options_register(team, team_options, ARRAY_SIZE(team_options));
+	if (err)
+		goto err_options_register;
 	netif_carrier_off(dev);
 
 	return 0;
+
+err_options_register:
+	free_percpu(team->pcpu_stats);
+
+	return err;
 }
 
 static void team_uninit(struct net_device *dev)
diff --git a/drivers/net/team/team_mode_activebackup.c b/drivers/net/team/team_mode_activebackup.c
index 6fe920c440b3..b34427502b54 100644
--- a/drivers/net/team/team_mode_activebackup.c
+++ b/drivers/net/team/team_mode_activebackup.c
@@ -83,7 +83,7 @@ static int ab_active_port_set(struct team *team, void *arg)
 	return -ENOENT;
 }
 
-static struct team_option ab_options[] = {
+static const struct team_option ab_options[] = {
 	{
 		.name = "activeport",
 		.type = TEAM_OPTION_TYPE_U32,
@@ -94,8 +94,7 @@ static struct team_option ab_options[] = {
 
 int ab_init(struct team *team)
 {
-	team_options_register(team, ab_options, ARRAY_SIZE(ab_options));
-	return 0;
+	return team_options_register(team, ab_options, ARRAY_SIZE(ab_options));
 }
 
 void ab_exit(struct team *team)
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index a6eac126a99a..828181fbad5d 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -140,11 +140,11 @@ static inline struct team_port *team_get_port_by_index_rcu(struct team *team,
 }
 
 extern int team_port_set_team_mac(struct team_port *port);
-extern void team_options_register(struct team *team,
-				  struct team_option *option,
-				  size_t option_count);
+extern int team_options_register(struct team *team,
+				 const struct team_option *option,
+				 size_t option_count);
 extern void team_options_unregister(struct team *team,
-				    struct team_option *option,
+				    const struct team_option *option,
 				    size_t option_count);
 extern int team_mode_register(struct team_mode *mode);
 extern int team_mode_unregister(struct team_mode *mode);
-- 
cgit v1.2.3


From 28011cf19b75df9d3f35489a7599a97ec0b3f1a0 Mon Sep 17 00:00:00 2001
From: Matt Carlson <mcarlson@broadcom.com>
Date: Wed, 16 Nov 2011 18:36:59 -0500
Subject: net: Add ethtool to mii advertisment conversion helpers

Translating between ethtool advertisement settings and MII
advertisements are common operations for ethernet drivers.  This patch
adds a set of helper functions that implements the conversion.  The
patch then modifies a couple of the drivers to use the new functions.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2.c |  15 +---
 drivers/net/ethernet/broadcom/tg3.c  |  53 +++--------
 drivers/net/ethernet/sun/niu.c       |  15 +---
 drivers/net/mii.c                    |  48 +++-------
 drivers/net/phy/phy_device.c         |  20 +----
 include/linux/mii.h                  | 166 +++++++++++++++++++++++++++++++++++
 6 files changed, 197 insertions(+), 120 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index 7203f37d2ef3..6b7cd1e80ada 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -2064,21 +2064,12 @@ __acquires(&bp->phy_lock)
 		bnx2_read_phy(bp, MII_CTRL1000, &adv1000_reg);
 		adv1000_reg &= PHY_ALL_1000_SPEED;
 
-		if (bp->advertising & ADVERTISED_10baseT_Half)
-			new_adv_reg |= ADVERTISE_10HALF;
-		if (bp->advertising & ADVERTISED_10baseT_Full)
-			new_adv_reg |= ADVERTISE_10FULL;
-		if (bp->advertising & ADVERTISED_100baseT_Half)
-			new_adv_reg |= ADVERTISE_100HALF;
-		if (bp->advertising & ADVERTISED_100baseT_Full)
-			new_adv_reg |= ADVERTISE_100FULL;
-		if (bp->advertising & ADVERTISED_1000baseT_Full)
-			new_adv1000_reg |= ADVERTISE_1000FULL;
-
+		new_adv_reg = ethtool_adv_to_mii_100bt(bp->advertising);
 		new_adv_reg |= ADVERTISE_CSMA;
-
 		new_adv_reg |= bnx2_phy_get_pause_adv(bp);
 
+		new_adv1000_reg |= ethtool_adv_to_mii_1000T(bp->advertising);
+
 		if ((adv1000_reg != new_adv1000_reg) ||
 			(adv_reg != new_adv_reg) ||
 			((bmcr & BMCR_ANENABLE) == 0)) {
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 365cd47e2298..024ca1d4d028 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -3594,15 +3594,7 @@ static int tg3_phy_autoneg_cfg(struct tg3 *tp, u32 advertise, u32 flowctrl)
 	u32 val, new_adv;
 
 	new_adv = ADVERTISE_CSMA;
-	if (advertise & ADVERTISED_10baseT_Half)
-		new_adv |= ADVERTISE_10HALF;
-	if (advertise & ADVERTISED_10baseT_Full)
-		new_adv |= ADVERTISE_10FULL;
-	if (advertise & ADVERTISED_100baseT_Half)
-		new_adv |= ADVERTISE_100HALF;
-	if (advertise & ADVERTISED_100baseT_Full)
-		new_adv |= ADVERTISE_100FULL;
-
+	new_adv |= ethtool_adv_to_mii_100bt(advertise);
 	new_adv |= tg3_advert_flowctrl_1000T(flowctrl);
 
 	err = tg3_writephy(tp, MII_ADVERTISE, new_adv);
@@ -3612,11 +3604,7 @@ static int tg3_phy_autoneg_cfg(struct tg3 *tp, u32 advertise, u32 flowctrl)
 	if (tp->phy_flags & TG3_PHYFLG_10_100_ONLY)
 		goto done;
 
-	new_adv = 0;
-	if (advertise & ADVERTISED_1000baseT_Half)
-		new_adv |= ADVERTISE_1000HALF;
-	if (advertise & ADVERTISED_1000baseT_Full)
-		new_adv |= ADVERTISE_1000FULL;
+	new_adv = ethtool_adv_to_mii_1000T(advertise);
 
 	if (tp->pci_chip_rev_id == CHIPREV_ID_5701_A0 ||
 	    tp->pci_chip_rev_id == CHIPREV_ID_5701_B0)
@@ -3790,14 +3778,7 @@ static int tg3_copper_is_advertising_all(struct tg3 *tp, u32 mask)
 {
 	u32 adv_reg, all_mask = 0;
 
-	if (mask & ADVERTISED_10baseT_Half)
-		all_mask |= ADVERTISE_10HALF;
-	if (mask & ADVERTISED_10baseT_Full)
-		all_mask |= ADVERTISE_10FULL;
-	if (mask & ADVERTISED_100baseT_Half)
-		all_mask |= ADVERTISE_100HALF;
-	if (mask & ADVERTISED_100baseT_Full)
-		all_mask |= ADVERTISE_100FULL;
+	all_mask = ethtool_adv_to_mii_100bt(mask);
 
 	if (tg3_readphy(tp, MII_ADVERTISE, &adv_reg))
 		return 0;
@@ -3808,11 +3789,7 @@ static int tg3_copper_is_advertising_all(struct tg3 *tp, u32 mask)
 	if (!(tp->phy_flags & TG3_PHYFLG_10_100_ONLY)) {
 		u32 tg3_ctrl;
 
-		all_mask = 0;
-		if (mask & ADVERTISED_1000baseT_Half)
-			all_mask |= ADVERTISE_1000HALF;
-		if (mask & ADVERTISED_1000baseT_Full)
-			all_mask |= ADVERTISE_1000FULL;
+		all_mask = ethtool_adv_to_mii_1000T(mask);
 
 		if (tg3_readphy(tp, MII_CTRL1000, &tg3_ctrl))
 			return 0;
@@ -4903,23 +4880,19 @@ static int tg3_setup_fiber_mii_phy(struct tg3 *tp, int force_reset)
 	    (tp->phy_flags & TG3_PHYFLG_PARALLEL_DETECT)) {
 		/* do nothing, just check for link up at the end */
 	} else if (tp->link_config.autoneg == AUTONEG_ENABLE) {
-		u32 adv, new_adv;
+		u32 adv, newadv;
 
 		err |= tg3_readphy(tp, MII_ADVERTISE, &adv);
-		new_adv = adv & ~(ADVERTISE_1000XFULL | ADVERTISE_1000XHALF |
-				  ADVERTISE_1000XPAUSE |
-				  ADVERTISE_1000XPSE_ASYM |
-				  ADVERTISE_SLCT);
-
-		new_adv |= tg3_advert_flowctrl_1000X(tp->link_config.flowctrl);
+		newadv = adv & ~(ADVERTISE_1000XFULL | ADVERTISE_1000XHALF |
+				 ADVERTISE_1000XPAUSE |
+				 ADVERTISE_1000XPSE_ASYM |
+				 ADVERTISE_SLCT);
 
-		if (tp->link_config.advertising & ADVERTISED_1000baseT_Half)
-			new_adv |= ADVERTISE_1000XHALF;
-		if (tp->link_config.advertising & ADVERTISED_1000baseT_Full)
-			new_adv |= ADVERTISE_1000XFULL;
+		newadv |= tg3_advert_flowctrl_1000X(tp->link_config.flowctrl);
+		newadv |= ethtool_adv_to_mii_1000X(tp->link_config.advertising);
 
-		if ((new_adv != adv) || !(bmcr & BMCR_ANENABLE)) {
-			tg3_writephy(tp, MII_ADVERTISE, new_adv);
+		if ((newadv != adv) || !(bmcr & BMCR_ANENABLE)) {
+			tg3_writephy(tp, MII_ADVERTISE, newadv);
 			bmcr |= BMCR_ANENABLE | BMCR_ANRESTART;
 			tg3_writephy(tp, MII_BMCR, bmcr);
 
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index 3ebeb9d400fb..9997be525089 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -1151,19 +1151,8 @@ static int link_status_mii(struct niu *np, int *link_up_p)
 		supported |= SUPPORTED_1000baseT_Full;
 	lp->supported = supported;
 
-	advertising = 0;
-	if (advert & ADVERTISE_10HALF)
-		advertising |= ADVERTISED_10baseT_Half;
-	if (advert & ADVERTISE_10FULL)
-		advertising |= ADVERTISED_10baseT_Full;
-	if (advert & ADVERTISE_100HALF)
-		advertising |= ADVERTISED_100baseT_Half;
-	if (advert & ADVERTISE_100FULL)
-		advertising |= ADVERTISED_100baseT_Full;
-	if (ctrl1000 & ADVERTISE_1000HALF)
-		advertising |= ADVERTISED_1000baseT_Half;
-	if (ctrl1000 & ADVERTISE_1000FULL)
-		advertising |= ADVERTISED_1000baseT_Full;
+	advertising = mii_adv_to_ethtool_100bt(advert);
+	advertising |= mii_adv_to_ethtool_1000T(ctrl1000);
 
 	if (bmcr & BMCR_ANENABLE) {
 		int neg, neg1000;
diff --git a/drivers/net/mii.c b/drivers/net/mii.c
index c62e7816d548..d0a296272713 100644
--- a/drivers/net/mii.c
+++ b/drivers/net/mii.c
@@ -41,20 +41,8 @@ static u32 mii_get_an(struct mii_if_info *mii, u16 addr)
 	advert = mii->mdio_read(mii->dev, mii->phy_id, addr);
 	if (advert & LPA_LPACK)
 		result |= ADVERTISED_Autoneg;
-	if (advert & ADVERTISE_10HALF)
-		result |= ADVERTISED_10baseT_Half;
-	if (advert & ADVERTISE_10FULL)
-		result |= ADVERTISED_10baseT_Full;
-	if (advert & ADVERTISE_100HALF)
-		result |= ADVERTISED_100baseT_Half;
-	if (advert & ADVERTISE_100FULL)
-		result |= ADVERTISED_100baseT_Full;
-	if (advert & ADVERTISE_PAUSE_CAP)
-		result |= ADVERTISED_Pause;
-	if (advert & ADVERTISE_PAUSE_ASYM)
-		result |= ADVERTISED_Asym_Pause;
-
-	return result;
+
+	return result | mii_adv_to_ethtool_100bt(advert);
 }
 
 /**
@@ -104,19 +92,13 @@ int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
 		ecmd->autoneg = AUTONEG_ENABLE;
 
 		ecmd->advertising |= mii_get_an(mii, MII_ADVERTISE);
-		if (ctrl1000 & ADVERTISE_1000HALF)
-			ecmd->advertising |= ADVERTISED_1000baseT_Half;
-		if (ctrl1000 & ADVERTISE_1000FULL)
-			ecmd->advertising |= ADVERTISED_1000baseT_Full;
+		if (mii->supports_gmii)
+			ecmd->advertising |= mii_adv_to_ethtool_1000T(ctrl1000);
 
 		if (bmsr & BMSR_ANEGCOMPLETE) {
 			ecmd->lp_advertising = mii_get_an(mii, MII_LPA);
-			if (stat1000 & LPA_1000HALF)
-				ecmd->lp_advertising |=
-					ADVERTISED_1000baseT_Half;
-			if (stat1000 & LPA_1000FULL)
-				ecmd->lp_advertising |=
-					ADVERTISED_1000baseT_Full;
+			ecmd->lp_advertising |=
+					     mii_lpa_to_ethtool_1000T(stat1000);
 		} else {
 			ecmd->lp_advertising = 0;
 		}
@@ -204,20 +186,10 @@ int mii_ethtool_sset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
 			advert2 = mii->mdio_read(dev, mii->phy_id, MII_CTRL1000);
 			tmp2 = advert2 & ~(ADVERTISE_1000HALF | ADVERTISE_1000FULL);
 		}
-		if (ecmd->advertising & ADVERTISED_10baseT_Half)
-			tmp |= ADVERTISE_10HALF;
-		if (ecmd->advertising & ADVERTISED_10baseT_Full)
-			tmp |= ADVERTISE_10FULL;
-		if (ecmd->advertising & ADVERTISED_100baseT_Half)
-			tmp |= ADVERTISE_100HALF;
-		if (ecmd->advertising & ADVERTISED_100baseT_Full)
-			tmp |= ADVERTISE_100FULL;
-		if (mii->supports_gmii) {
-			if (ecmd->advertising & ADVERTISED_1000baseT_Half)
-				tmp2 |= ADVERTISE_1000HALF;
-			if (ecmd->advertising & ADVERTISED_1000baseT_Full)
-				tmp2 |= ADVERTISE_1000FULL;
-		}
+		tmp |= ethtool_adv_to_mii_100bt(ecmd->advertising);
+
+		if (mii->supports_gmii)
+			tmp2 |= ethtool_adv_to_mii_1000T(ecmd->advertising);
 		if (advert != tmp) {
 			mii->mdio_write(dev, mii->phy_id, MII_ADVERTISE, tmp);
 			mii->advertising = tmp;
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 83a5a5afec67..edb905f80115 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -563,20 +563,9 @@ static int genphy_config_advert(struct phy_device *phydev)
 	if (adv < 0)
 		return adv;
 
-	adv &= ~(ADVERTISE_ALL | ADVERTISE_100BASE4 | ADVERTISE_PAUSE_CAP | 
+	adv &= ~(ADVERTISE_ALL | ADVERTISE_100BASE4 | ADVERTISE_PAUSE_CAP |
 		 ADVERTISE_PAUSE_ASYM);
-	if (advertise & ADVERTISED_10baseT_Half)
-		adv |= ADVERTISE_10HALF;
-	if (advertise & ADVERTISED_10baseT_Full)
-		adv |= ADVERTISE_10FULL;
-	if (advertise & ADVERTISED_100baseT_Half)
-		adv |= ADVERTISE_100HALF;
-	if (advertise & ADVERTISED_100baseT_Full)
-		adv |= ADVERTISE_100FULL;
-	if (advertise & ADVERTISED_Pause)
-		adv |= ADVERTISE_PAUSE_CAP;
-	if (advertise & ADVERTISED_Asym_Pause)
-		adv |= ADVERTISE_PAUSE_ASYM;
+	adv |= ethtool_adv_to_mii_100bt(advertise);
 
 	if (adv != oldadv) {
 		err = phy_write(phydev, MII_ADVERTISE, adv);
@@ -595,10 +584,7 @@ static int genphy_config_advert(struct phy_device *phydev)
 			return adv;
 
 		adv &= ~(ADVERTISE_1000FULL | ADVERTISE_1000HALF);
-		if (advertise & SUPPORTED_1000baseT_Half)
-			adv |= ADVERTISE_1000HALF;
-		if (advertise & SUPPORTED_1000baseT_Full)
-			adv |= ADVERTISE_1000FULL;
+		adv |= ethtool_adv_to_mii_1000T(advertise);
 
 		if (adv != oldadv) {
 			err = phy_write(phydev, MII_CTRL1000, adv);
diff --git a/include/linux/mii.h b/include/linux/mii.h
index 27748230aa69..6697b9112014 100644
--- a/include/linux/mii.h
+++ b/include/linux/mii.h
@@ -9,6 +9,7 @@
 #define __LINUX_MII_H__
 
 #include <linux/types.h>
+#include <linux/ethtool.h>
 
 /* Generic MII registers. */
 #define MII_BMCR		0x00	/* Basic mode control register */
@@ -239,6 +240,171 @@ static inline unsigned int mii_duplex (unsigned int duplex_lock,
 	return 0;
 }
 
+/**
+ * ethtool_adv_to_mii_100bt
+ * @ethadv: the ethtool advertisement settings
+ *
+ * A small helper function that translates ethtool advertisement
+ * settings to phy autonegotiation advertisements for the
+ * MII_ADVERTISE register.
+ */
+static inline u32 ethtool_adv_to_mii_100bt(u32 ethadv)
+{
+	u32 result = 0;
+
+	if (ethadv & ADVERTISED_10baseT_Half)
+		result |= ADVERTISE_10HALF;
+	if (ethadv & ADVERTISED_10baseT_Full)
+		result |= ADVERTISE_10FULL;
+	if (ethadv & ADVERTISED_100baseT_Half)
+		result |= ADVERTISE_100HALF;
+	if (ethadv & ADVERTISED_100baseT_Full)
+		result |= ADVERTISE_100FULL;
+	if (ethadv & ADVERTISED_Pause)
+		result |= ADVERTISE_PAUSE_CAP;
+	if (ethadv & ADVERTISED_Asym_Pause)
+		result |= ADVERTISE_PAUSE_ASYM;
+
+	return result;
+}
+
+/**
+ * mii_adv_to_ethtool_100bt
+ * @adv: value of the MII_ADVERTISE register
+ *
+ * A small helper function that translates MII_ADVERTISE bits
+ * to ethtool advertisement settings.
+ */
+static inline u32 mii_adv_to_ethtool_100bt(u32 adv)
+{
+	u32 result = 0;
+
+	if (adv & ADVERTISE_10HALF)
+		result |= ADVERTISED_10baseT_Half;
+	if (adv & ADVERTISE_10FULL)
+		result |= ADVERTISED_10baseT_Full;
+	if (adv & ADVERTISE_100HALF)
+		result |= ADVERTISED_100baseT_Half;
+	if (adv & ADVERTISE_100FULL)
+		result |= ADVERTISED_100baseT_Full;
+	if (adv & ADVERTISE_PAUSE_CAP)
+		result |= ADVERTISED_Pause;
+	if (adv & ADVERTISE_PAUSE_ASYM)
+		result |= ADVERTISED_Asym_Pause;
+
+	return result;
+}
+
+/**
+ * ethtool_adv_to_mii_1000T
+ * @ethadv: the ethtool advertisement settings
+ *
+ * A small helper function that translates ethtool advertisement
+ * settings to phy autonegotiation advertisements for the
+ * MII_CTRL1000 register when in 1000T mode.
+ */
+static inline u32 ethtool_adv_to_mii_1000T(u32 ethadv)
+{
+	u32 result = 0;
+
+	if (ethadv & ADVERTISED_1000baseT_Half)
+		result |= ADVERTISE_1000HALF;
+	if (ethadv & ADVERTISED_1000baseT_Full)
+		result |= ADVERTISE_1000FULL;
+
+	return result;
+}
+
+/**
+ * mii_adv_to_ethtool_1000T
+ * @adv: value of the MII_CTRL1000 register
+ *
+ * A small helper function that translates MII_CTRL1000
+ * bits, when in 1000Base-T mode, to ethtool
+ * advertisement settings.
+ */
+static inline u32 mii_adv_to_ethtool_1000T(u32 adv)
+{
+	u32 result = 0;
+
+	if (adv & ADVERTISE_1000HALF)
+		result |= ADVERTISED_1000baseT_Half;
+	if (adv & ADVERTISE_1000FULL)
+		result |= ADVERTISED_1000baseT_Full;
+
+	return result;
+}
+
+#define mii_lpa_to_ethtool_100bt(lpa)	mii_adv_to_ethtool_100bt(lpa)
+
+/**
+ * mii_lpa_to_ethtool_1000T
+ * @adv: value of the MII_STAT1000 register
+ *
+ * A small helper function that translates MII_STAT1000
+ * bits, when in 1000Base-T mode, to ethtool
+ * advertisement settings.
+ */
+static inline u32 mii_lpa_to_ethtool_1000T(u32 lpa)
+{
+	u32 result = 0;
+
+	if (lpa & LPA_1000HALF)
+		result |= ADVERTISED_1000baseT_Half;
+	if (lpa & LPA_1000FULL)
+		result |= ADVERTISED_1000baseT_Full;
+
+	return result;
+}
+
+/**
+ * ethtool_adv_to_mii_1000X
+ * @ethadv: the ethtool advertisement settings
+ *
+ * A small helper function that translates ethtool advertisement
+ * settings to phy autonegotiation advertisements for the
+ * MII_CTRL1000 register when in 1000Base-X mode.
+ */
+static inline u32 ethtool_adv_to_mii_1000X(u32 ethadv)
+{
+	u32 result = 0;
+
+	if (ethadv & ADVERTISED_1000baseT_Half)
+		result |= ADVERTISE_1000XHALF;
+	if (ethadv & ADVERTISED_1000baseT_Full)
+		result |= ADVERTISE_1000XFULL;
+	if (ethadv & ADVERTISED_Pause)
+		result |= ADVERTISE_1000XPAUSE;
+	if (ethadv & ADVERTISED_Asym_Pause)
+		result |= ADVERTISE_1000XPSE_ASYM;
+
+	return result;
+}
+
+/**
+ * mii_adv_to_ethtool_1000X
+ * @adv: value of the MII_CTRL1000 register
+ *
+ * A small helper function that translates MII_CTRL1000
+ * bits, when in 1000Base-X mode, to ethtool
+ * advertisement settings.
+ */
+static inline u32 mii_adv_to_ethtool_1000X(u32 adv)
+{
+	u32 result = 0;
+
+	if (adv & ADVERTISE_1000XHALF)
+		result |= ADVERTISED_1000baseT_Half;
+	if (adv & ADVERTISE_1000XFULL)
+		result |= ADVERTISED_1000baseT_Full;
+	if (adv & ADVERTISE_1000XPAUSE)
+		result |= ADVERTISED_Pause;
+	if (adv & ADVERTISE_1000XPSE_ASYM)
+		result |= ADVERTISED_Asym_Pause;
+
+	return result;
+}
+
 /**
  * mii_advertise_flowctrl - get flow control advertisement flags
  * @cap: Flow control capabilities (FLOW_CTRL_RX, FLOW_CTRL_TX or both)
-- 
cgit v1.2.3


From 0345e1864283207bc236120dd3e13ff2391fa85f Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Wed, 16 Nov 2011 14:05:33 +0000
Subject: net: verify GSO flag bits against netdev features
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b35ffd735ecc..31da3bbe7b1b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2492,6 +2492,15 @@ netdev_features_t netif_skb_features(struct sk_buff *skb);
 static inline int net_gso_ok(netdev_features_t features, int gso_type)
 {
 	netdev_features_t feature = gso_type << NETIF_F_GSO_SHIFT;
+
+	/* check flags correspondence */
+	BUILD_BUG_ON(SKB_GSO_TCPV4   != (NETIF_F_TSO >> NETIF_F_GSO_SHIFT));
+	BUILD_BUG_ON(SKB_GSO_UDP     != (NETIF_F_UFO >> NETIF_F_GSO_SHIFT));
+	BUILD_BUG_ON(SKB_GSO_DODGY   != (NETIF_F_GSO_ROBUST >> NETIF_F_GSO_SHIFT));
+	BUILD_BUG_ON(SKB_GSO_TCP_ECN != (NETIF_F_TSO_ECN >> NETIF_F_GSO_SHIFT));
+	BUILD_BUG_ON(SKB_GSO_TCPV6   != (NETIF_F_TSO6 >> NETIF_F_GSO_SHIFT));
+	BUILD_BUG_ON(SKB_GSO_FCOE    != (NETIF_F_FSO >> NETIF_F_GSO_SHIFT));
+
 	return (features & feature) == feature;
 }
 
-- 
cgit v1.2.3


From ccf5ff69fbbd8d877377f5786369cf5aa78a15fc Mon Sep 17 00:00:00 2001
From: david decotigny <david.decotigny@google.com>
Date: Wed, 16 Nov 2011 12:15:10 +0000
Subject: net: new counter for tx_timeout errors in sysfs

This adds the /sys/class/net/DEV/queues/Q/tx_timeout attribute
containing the total number of timeout events on the given queue. It
is always available with CONFIG_SYSFS, independently of
CONFIG_RPS/XPS.

Credits to Stephen Hemminger for a preliminary version of this patch.

Tested:
  without CONFIG_SYSFS (compilation only)
  with sysfs and without CONFIG_RPS & CONFIG_XPS
  with sysfs and without CONFIG_RPS
  with sysfs and without CONFIG_XPS
  with defaults

Signed-off-by: David Decotigny <david.decotigny@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 12 ++++++++++--
 net/core/net-sysfs.c      | 37 +++++++++++++++++++++++++++++++------
 net/sched/sch_generic.c   |  1 +
 3 files changed, 42 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 31da3bbe7b1b..4d5698aa828b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -532,7 +532,7 @@ struct netdev_queue {
 	struct Qdisc		*qdisc;
 	unsigned long		state;
 	struct Qdisc		*qdisc_sleeping;
-#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
+#ifdef CONFIG_SYSFS
 	struct kobject		kobj;
 #endif
 #if defined(CONFIG_XPS) && defined(CONFIG_NUMA)
@@ -547,6 +547,12 @@ struct netdev_queue {
 	 * please use this field instead of dev->trans_start
 	 */
 	unsigned long		trans_start;
+
+	/*
+	 * Number of TX timeouts for this queue
+	 * (/sys/class/net/DEV/Q/trans_timeout)
+	 */
+	unsigned long		trans_timeout;
 } ____cacheline_aligned_in_smp;
 
 static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
@@ -1109,9 +1115,11 @@ struct net_device {
 
 	unsigned char		broadcast[MAX_ADDR_LEN];	/* hw bcast add	*/
 
-#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
+#ifdef CONFIG_SYSFS
 	struct kset		*queues_kset;
+#endif
 
+#ifdef CONFIG_RPS
 	struct netdev_rx_queue	*_rx;
 
 	/* Number of RX queues allocated at register_netdev() time */
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index a64382f201b8..602b1419998c 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -780,7 +780,7 @@ net_rx_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
 #endif
 }
 
-#ifdef CONFIG_XPS
+#ifdef CONFIG_SYSFS
 /*
  * netdev_queue sysfs structures and functions.
  */
@@ -826,6 +826,23 @@ static const struct sysfs_ops netdev_queue_sysfs_ops = {
 	.store = netdev_queue_attr_store,
 };
 
+static ssize_t show_trans_timeout(struct netdev_queue *queue,
+				  struct netdev_queue_attribute *attribute,
+				  char *buf)
+{
+	unsigned long trans_timeout;
+
+	spin_lock_irq(&queue->_xmit_lock);
+	trans_timeout = queue->trans_timeout;
+	spin_unlock_irq(&queue->_xmit_lock);
+
+	return sprintf(buf, "%lu", trans_timeout);
+}
+
+static struct netdev_queue_attribute queue_trans_timeout =
+	__ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL);
+
+#ifdef CONFIG_XPS
 static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
 {
 	struct net_device *dev = queue->dev;
@@ -1020,12 +1037,17 @@ error:
 
 static struct netdev_queue_attribute xps_cpus_attribute =
     __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map);
+#endif /* CONFIG_XPS */
 
 static struct attribute *netdev_queue_default_attrs[] = {
+	&queue_trans_timeout.attr,
+#ifdef CONFIG_XPS
 	&xps_cpus_attribute.attr,
+#endif
 	NULL
 };
 
+#ifdef CONFIG_XPS
 static void netdev_queue_release(struct kobject *kobj)
 {
 	struct netdev_queue *queue = to_netdev_queue(kobj);
@@ -1076,10 +1098,13 @@ static void netdev_queue_release(struct kobject *kobj)
 	memset(kobj, 0, sizeof(*kobj));
 	dev_put(queue->dev);
 }
+#endif /* CONFIG_XPS */
 
 static struct kobj_type netdev_queue_ktype = {
 	.sysfs_ops = &netdev_queue_sysfs_ops,
+#ifdef CONFIG_XPS
 	.release = netdev_queue_release,
+#endif
 	.default_attrs = netdev_queue_default_attrs,
 };
 
@@ -1102,12 +1127,12 @@ static int netdev_queue_add_kobject(struct net_device *net, int index)
 
 	return error;
 }
-#endif /* CONFIG_XPS */
+#endif /* CONFIG_SYSFS */
 
 int
 netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
 {
-#ifdef CONFIG_XPS
+#ifdef CONFIG_SYSFS
 	int i;
 	int error = 0;
 
@@ -1125,14 +1150,14 @@ netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
 	return error;
 #else
 	return 0;
-#endif
+#endif /* CONFIG_SYSFS */
 }
 
 static int register_queue_kobjects(struct net_device *net)
 {
 	int error = 0, txq = 0, rxq = 0, real_rx = 0, real_tx = 0;
 
-#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
+#ifdef CONFIG_SYSFS
 	net->queues_kset = kset_create_and_add("queues",
 	    NULL, &net->dev.kobj);
 	if (!net->queues_kset)
@@ -1173,7 +1198,7 @@ static void remove_queue_kobjects(struct net_device *net)
 
 	net_rx_queue_update_kobjects(net, real_rx, 0);
 	netdev_queue_update_kobjects(net, real_tx, 0);
-#if defined(CONFIG_RPS) || defined(CONFIG_XPS)
+#ifdef CONFIG_SYSFS
 	kset_unregister(net->queues_kset);
 #endif
 }
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 69fca2798804..79ac1458c2ba 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -246,6 +246,7 @@ static void dev_watchdog(unsigned long arg)
 				    time_after(jiffies, (trans_start +
 							 dev->watchdog_timeo))) {
 					some_queue_timedout = 1;
+					txq->trans_timeout++;
 					break;
 				}
 			}
-- 
cgit v1.2.3


From adc9300e78e6091a7eaa1821213836379d4dbaa8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 17 Nov 2011 03:13:26 +0000
Subject: net: use jump_label to shortcut RPS if not setup

Most machines dont use RPS/RFS, and pay a fair amount of instructions in
netif_receive_skb() / netif_rx() / get_rps_cpu() just to discover
RPS/RFS is not setup.

Add a jump_label named rps_needed.

If no device rps_map or global rps_sock_flow_table is setup,
netif_receive_skb() / netif_rx() do a single instruction instead of many
ones, including conditional jumps.

jmp +0    (if CONFIG_JUMP_LABEL=y)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h  |  5 +++++
 net/core/dev.c             | 21 +++++++++------------
 net/core/net-sysfs.c       |  7 +++++--
 net/core/sysctl_net_core.c |  9 +++++++--
 4 files changed, 26 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4d5698aa828b..0bbe030fc014 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -214,6 +214,11 @@ enum {
 #include <linux/cache.h>
 #include <linux/skbuff.h>
 
+#ifdef CONFIG_RPS
+#include <linux/jump_label.h>
+extern struct jump_label_key rps_needed;
+#endif
+
 struct neighbour;
 struct neigh_parms;
 struct sk_buff;
diff --git a/net/core/dev.c b/net/core/dev.c
index 26c49d55e79d..f78959996148 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2711,6 +2711,8 @@ EXPORT_SYMBOL(__skb_get_rxhash);
 struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
 EXPORT_SYMBOL(rps_sock_flow_table);
 
+struct jump_label_key rps_needed __read_mostly;
+
 static struct rps_dev_flow *
 set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 	    struct rps_dev_flow *rflow, u16 next_cpu)
@@ -2994,7 +2996,7 @@ int netif_rx(struct sk_buff *skb)
 
 	trace_netif_rx(skb);
 #ifdef CONFIG_RPS
-	{
+	if (static_branch(&rps_needed))	{
 		struct rps_dev_flow voidflow, *rflow = &voidflow;
 		int cpu;
 
@@ -3009,14 +3011,13 @@ int netif_rx(struct sk_buff *skb)
 
 		rcu_read_unlock();
 		preempt_enable();
-	}
-#else
+	} else
+#endif
 	{
 		unsigned int qtail;
 		ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
 		put_cpu();
 	}
-#endif
 	return ret;
 }
 EXPORT_SYMBOL(netif_rx);
@@ -3359,7 +3360,7 @@ int netif_receive_skb(struct sk_buff *skb)
 		return NET_RX_SUCCESS;
 
 #ifdef CONFIG_RPS
-	{
+	if (static_branch(&rps_needed)) {
 		struct rps_dev_flow voidflow, *rflow = &voidflow;
 		int cpu, ret;
 
@@ -3370,16 +3371,12 @@ int netif_receive_skb(struct sk_buff *skb)
 		if (cpu >= 0) {
 			ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
 			rcu_read_unlock();
-		} else {
-			rcu_read_unlock();
-			ret = __netif_receive_skb(skb);
+			return ret;
 		}
-
-		return ret;
+		rcu_read_unlock();
 	}
-#else
-	return __netif_receive_skb(skb);
 #endif
+	return __netif_receive_skb(skb);
 }
 EXPORT_SYMBOL(netif_receive_skb);
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 602b1419998c..db6c2f83633f 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -606,9 +606,12 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
 	rcu_assign_pointer(queue->rps_map, map);
 	spin_unlock(&rps_map_lock);
 
-	if (old_map)
+	if (map)
+		jump_label_inc(&rps_needed);
+	if (old_map) {
 		kfree_rcu(old_map, rcu);
-
+		jump_label_dec(&rps_needed);
+	}
 	free_cpumask_var(mask);
 	return len;
 }
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 77a65f031488..d05559d4d9cd 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -68,8 +68,13 @@ static int rps_sock_flow_sysctl(ctl_table *table, int write,
 
 		if (sock_table != orig_sock_table) {
 			rcu_assign_pointer(rps_sock_flow_table, sock_table);
-			synchronize_rcu();
-			vfree(orig_sock_table);
+			if (sock_table)
+				jump_label_inc(&rps_needed);
+			if (orig_sock_table) {
+				jump_label_dec(&rps_needed);
+				synchronize_rcu();
+				vfree(orig_sock_table);
+			}
 		}
 	}
 
-- 
cgit v1.2.3


From 56c978f1da1f630ef18aa668a9748c6c23ab819b Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 18 Nov 2011 02:20:05 +0000
Subject: net: Remove LL_ALLOCATED_SPACE

net: Remove LL_ALLOCATED_SPACE

The macro LL_ALLOCATED_SPACE was ill-conceived.  It applies the
alignment to the sum of needed_headroom and needed_tailroom.  As
the amount that is then reserved for head room is needed_headroom
with alignment, this means that the tail room left may be too small.

Now that all uses of LL_ALLOCATED_SPACE have been removed, this
patch finally removes the macro itself.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0bbe030fc014..3eb383a9b5ed 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -279,16 +279,11 @@ struct hh_cache {
  *
  * We could use other alignment values, but we must maintain the
  * relationship HH alignment <= LL alignment.
- *
- * LL_ALLOCATED_SPACE also takes into account the tailroom the device
- * may need.
  */
 #define LL_RESERVED_SPACE(dev) \
 	((((dev)->hard_header_len+(dev)->needed_headroom)&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
 #define LL_RESERVED_SPACE_EXTRA(dev,extra) \
 	((((dev)->hard_header_len+(dev)->needed_headroom+(extra))&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
-#define LL_ALLOCATED_SPACE(dev) \
-	((((dev)->hard_header_len+(dev)->needed_headroom+(dev)->needed_tailroom)&~(HH_DATA_MOD - 1)) + HH_DATA_MOD)
 
 struct header_ops {
 	int	(*create) (struct sk_buff *skb, struct net_device *dev,
-- 
cgit v1.2.3


From bdb6e697b2a76c541960b86ab8fda88f3de1adf2 Mon Sep 17 00:00:00 2001
From: Dinesh Kumar Sharma <dinesh.sharma@stericsson.com>
Date: Fri, 18 Nov 2011 01:22:05 +0000
Subject: Phonet: set the pipe handle using setsockopt
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This provides flexibility to set the pipe handle
using setsockopt. The pipe can be enabled (if disabled) later
using ioctl.

Signed-off-by: Hemant Ramdasi <hemant.ramdasi@stericsson.com>
Signed-off-by: Dinesh Kumar Sharma <dinesh.sharma@stericsson.com>
Acked-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phonet.h |   2 +
 net/phonet/pep.c       | 106 ++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 97 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phonet.h b/include/linux/phonet.h
index f53a4167c5f4..f48bfc80cb4b 100644
--- a/include/linux/phonet.h
+++ b/include/linux/phonet.h
@@ -38,6 +38,7 @@
 #define PNPIPE_ENCAP		1
 #define PNPIPE_IFINDEX		2
 #define PNPIPE_HANDLE		3
+#define PNPIPE_INITSTATE	4
 
 #define PNADDR_ANY		0
 #define PNADDR_BROADCAST	0xFC
@@ -49,6 +50,7 @@
 
 /* ioctls */
 #define SIOCPNGETOBJECT		(SIOCPROTOPRIVATE + 0)
+#define SIOCPNENABLEPIPE	(SIOCPROTOPRIVATE + 13)
 #define SIOCPNADDRESOURCE	(SIOCPROTOPRIVATE + 14)
 #define SIOCPNDELRESOURCE	(SIOCPROTOPRIVATE + 15)
 
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 2ba6e9fb4cbc..9f60008740e3 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -534,6 +534,29 @@ static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb)
 	return pipe_handler_send_created_ind(sk);
 }
 
+static int pep_enableresp_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	struct pnpipehdr *hdr = pnp_hdr(skb);
+
+	if (hdr->error_code != PN_PIPE_NO_ERROR)
+		return -ECONNREFUSED;
+
+	return pep_indicate(sk, PNS_PIPE_ENABLED_IND, 0 /* sub-blocks */,
+		NULL, 0, GFP_ATOMIC);
+
+}
+
+static void pipe_start_flow_control(struct sock *sk)
+{
+	struct pep_sock *pn = pep_sk(sk);
+
+	if (!pn_flow_safe(pn->tx_fc)) {
+		atomic_set(&pn->tx_credits, 1);
+		sk->sk_write_space(sk);
+	}
+	pipe_grant_credits(sk, GFP_ATOMIC);
+}
+
 /* Queue an skb to an actively connected sock.
  * Socket lock must be held. */
 static int pipe_handler_do_rcv(struct sock *sk, struct sk_buff *skb)
@@ -579,13 +602,25 @@ static int pipe_handler_do_rcv(struct sock *sk, struct sk_buff *skb)
 			sk->sk_state = TCP_CLOSE_WAIT;
 			break;
 		}
+		if (pn->init_enable == PN_PIPE_DISABLE)
+			sk->sk_state = TCP_SYN_RECV;
+		else {
+			sk->sk_state = TCP_ESTABLISHED;
+			pipe_start_flow_control(sk);
+		}
+		break;
 
-		sk->sk_state = TCP_ESTABLISHED;
-		if (!pn_flow_safe(pn->tx_fc)) {
-			atomic_set(&pn->tx_credits, 1);
-			sk->sk_write_space(sk);
+	case PNS_PEP_ENABLE_RESP:
+		if (sk->sk_state != TCP_SYN_SENT)
+			break;
+
+		if (pep_enableresp_rcv(sk, skb)) {
+			sk->sk_state = TCP_CLOSE_WAIT;
+			break;
 		}
-		pipe_grant_credits(sk, GFP_ATOMIC);
+
+		sk->sk_state = TCP_ESTABLISHED;
+		pipe_start_flow_control(sk);
 		break;
 
 	case PNS_PEP_DISCONNECT_RESP:
@@ -864,14 +899,32 @@ static int pep_sock_connect(struct sock *sk, struct sockaddr *addr, int len)
 	int err;
 	u8 data[4] = { 0 /* sub-blocks */, PAD, PAD, PAD };
 
-	pn->pipe_handle = 1; /* anything but INVALID_HANDLE */
+	if (pn->pipe_handle == PN_PIPE_INVALID_HANDLE)
+		pn->pipe_handle = 1; /* anything but INVALID_HANDLE */
+
 	err = pipe_handler_request(sk, PNS_PEP_CONNECT_REQ,
-					PN_PIPE_ENABLE, data, 4);
+				pn->init_enable, data, 4);
 	if (err) {
 		pn->pipe_handle = PN_PIPE_INVALID_HANDLE;
 		return err;
 	}
+
 	sk->sk_state = TCP_SYN_SENT;
+
+	return 0;
+}
+
+static int pep_sock_enable(struct sock *sk, struct sockaddr *addr, int len)
+{
+	int err;
+
+	err = pipe_handler_request(sk, PNS_PEP_ENABLE_REQ, PAD,
+				NULL, 0);
+	if (err)
+		return err;
+
+	sk->sk_state = TCP_SYN_SENT;
+
 	return 0;
 }
 
@@ -879,11 +932,14 @@ static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg)
 {
 	struct pep_sock *pn = pep_sk(sk);
 	int answ;
+	int ret = -ENOIOCTLCMD;
 
 	switch (cmd) {
 	case SIOCINQ:
-		if (sk->sk_state == TCP_LISTEN)
-			return -EINVAL;
+		if (sk->sk_state == TCP_LISTEN) {
+			ret = -EINVAL;
+			break;
+		}
 
 		lock_sock(sk);
 		if (sock_flag(sk, SOCK_URGINLINE) &&
@@ -894,10 +950,22 @@ static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg)
 		else
 			answ = 0;
 		release_sock(sk);
-		return put_user(answ, (int __user *)arg);
+		ret = put_user(answ, (int __user *)arg);
+		break;
+
+	case SIOCPNENABLEPIPE:
+		lock_sock(sk);
+		if (sk->sk_state == TCP_SYN_SENT)
+			ret =  -EBUSY;
+		else if (sk->sk_state == TCP_ESTABLISHED)
+			ret = -EISCONN;
+		else
+			ret = pep_sock_enable(sk, NULL, 0);
+		release_sock(sk);
+		break;
 	}
 
-	return -ENOIOCTLCMD;
+	return ret;
 }
 
 static int pep_init(struct sock *sk)
@@ -960,6 +1028,18 @@ static int pep_setsockopt(struct sock *sk, int level, int optname,
 		}
 		goto out_norel;
 
+	case PNPIPE_HANDLE:
+		if ((sk->sk_state == TCP_CLOSE) &&
+			(val >= 0) && (val < PN_PIPE_INVALID_HANDLE))
+			pn->pipe_handle = val;
+		else
+			err = -EINVAL;
+		break;
+
+	case PNPIPE_INITSTATE:
+		pn->init_enable = !!val;
+		break;
+
 	default:
 		err = -ENOPROTOOPT;
 	}
@@ -995,6 +1075,10 @@ static int pep_getsockopt(struct sock *sk, int level, int optname,
 			return -EINVAL;
 		break;
 
+	case PNPIPE_INITSTATE:
+		val = pn->init_enable;
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
-- 
cgit v1.2.3


From 37f07023d30708b5da091fe6d6be9b60783c6d82 Mon Sep 17 00:00:00 2001
From: Matt Carlson <mcarlson@broadcom.com>
Date: Thu, 17 Nov 2011 14:30:55 +0000
Subject: net: Change mii to ethtool advertisement function names

This patch implements advice by Ben Hutchings to change the mii side of
the function names to look more like the register whose values they
convert.  New LPA translation functions have been added as well.

Signed-off-by: Matt Carlson <mcarlson@broadcom.com>
Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2.c | 20 +++++------
 drivers/net/ethernet/broadcom/tg3.c  | 10 +++---
 drivers/net/ethernet/sun/niu.c       |  4 +--
 drivers/net/mii.c                    | 15 ++++-----
 drivers/net/phy/phy_device.c         |  4 +--
 include/linux/mii.h                  | 64 +++++++++++++++++++++++++++---------
 6 files changed, 75 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index 66f6e7f654c3..83d8cefba8c0 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -2054,8 +2054,8 @@ __acquires(&bp->phy_lock)
 
 	if (bp->autoneg & AUTONEG_SPEED) {
 		u32 adv_reg, adv1000_reg;
-		u32 new_adv_reg = 0;
-		u32 new_adv1000_reg = 0;
+		u32 new_adv = 0;
+		u32 new_adv1000 = 0;
 
 		bnx2_read_phy(bp, bp->mii_adv, &adv_reg);
 		adv_reg &= (PHY_ALL_10_100_SPEED | ADVERTISE_PAUSE_CAP |
@@ -2064,18 +2064,18 @@ __acquires(&bp->phy_lock)
 		bnx2_read_phy(bp, MII_CTRL1000, &adv1000_reg);
 		adv1000_reg &= PHY_ALL_1000_SPEED;
 
-		new_adv_reg = ethtool_adv_to_mii_100bt(bp->advertising);
-		new_adv_reg |= ADVERTISE_CSMA;
-		new_adv_reg |= bnx2_phy_get_pause_adv(bp);
+		new_adv = ethtool_adv_to_mii_adv_t(bp->advertising);
+		new_adv |= ADVERTISE_CSMA;
+		new_adv |= bnx2_phy_get_pause_adv(bp);
 
-		new_adv1000_reg |= ethtool_adv_to_mii_1000T(bp->advertising);
+		new_adv1000 |= ethtool_adv_to_mii_ctrl1000_t(bp->advertising);
 
-		if ((adv1000_reg != new_adv1000_reg) ||
-			(adv_reg != new_adv_reg) ||
+		if ((adv1000_reg != new_adv1000) ||
+			(adv_reg != new_adv) ||
 			((bmcr & BMCR_ANENABLE) == 0)) {
 
-			bnx2_write_phy(bp, bp->mii_adv, new_adv_reg);
-			bnx2_write_phy(bp, MII_CTRL1000, new_adv1000_reg);
+			bnx2_write_phy(bp, bp->mii_adv, new_adv);
+			bnx2_write_phy(bp, MII_CTRL1000, new_adv1000);
 			bnx2_write_phy(bp, bp->mii_bmcr, BMCR_ANRESTART |
 				BMCR_ANENABLE);
 		}
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 024ca1d4d028..47c0e3a1f58d 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -3594,7 +3594,7 @@ static int tg3_phy_autoneg_cfg(struct tg3 *tp, u32 advertise, u32 flowctrl)
 	u32 val, new_adv;
 
 	new_adv = ADVERTISE_CSMA;
-	new_adv |= ethtool_adv_to_mii_100bt(advertise);
+	new_adv |= ethtool_adv_to_mii_adv_t(advertise);
 	new_adv |= tg3_advert_flowctrl_1000T(flowctrl);
 
 	err = tg3_writephy(tp, MII_ADVERTISE, new_adv);
@@ -3604,7 +3604,7 @@ static int tg3_phy_autoneg_cfg(struct tg3 *tp, u32 advertise, u32 flowctrl)
 	if (tp->phy_flags & TG3_PHYFLG_10_100_ONLY)
 		goto done;
 
-	new_adv = ethtool_adv_to_mii_1000T(advertise);
+	new_adv = ethtool_adv_to_mii_ctrl1000_t(advertise);
 
 	if (tp->pci_chip_rev_id == CHIPREV_ID_5701_A0 ||
 	    tp->pci_chip_rev_id == CHIPREV_ID_5701_B0)
@@ -3778,7 +3778,7 @@ static int tg3_copper_is_advertising_all(struct tg3 *tp, u32 mask)
 {
 	u32 adv_reg, all_mask = 0;
 
-	all_mask = ethtool_adv_to_mii_100bt(mask);
+	all_mask = ethtool_adv_to_mii_adv_t(mask);
 
 	if (tg3_readphy(tp, MII_ADVERTISE, &adv_reg))
 		return 0;
@@ -3789,7 +3789,7 @@ static int tg3_copper_is_advertising_all(struct tg3 *tp, u32 mask)
 	if (!(tp->phy_flags & TG3_PHYFLG_10_100_ONLY)) {
 		u32 tg3_ctrl;
 
-		all_mask = ethtool_adv_to_mii_1000T(mask);
+		all_mask = ethtool_adv_to_mii_ctrl1000_t(mask);
 
 		if (tg3_readphy(tp, MII_CTRL1000, &tg3_ctrl))
 			return 0;
@@ -4889,7 +4889,7 @@ static int tg3_setup_fiber_mii_phy(struct tg3 *tp, int force_reset)
 				 ADVERTISE_SLCT);
 
 		newadv |= tg3_advert_flowctrl_1000X(tp->link_config.flowctrl);
-		newadv |= ethtool_adv_to_mii_1000X(tp->link_config.advertising);
+		newadv |= ethtool_adv_to_mii_adv_x(tp->link_config.advertising);
 
 		if ((newadv != adv) || !(bmcr & BMCR_ANENABLE)) {
 			tg3_writephy(tp, MII_ADVERTISE, newadv);
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index 9997be525089..680b107fdabd 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -1151,8 +1151,8 @@ static int link_status_mii(struct niu *np, int *link_up_p)
 		supported |= SUPPORTED_1000baseT_Full;
 	lp->supported = supported;
 
-	advertising = mii_adv_to_ethtool_100bt(advert);
-	advertising |= mii_adv_to_ethtool_1000T(ctrl1000);
+	advertising = mii_adv_to_ethtool_adv_t(advert);
+	advertising |= mii_ctrl1000_to_ethtool_adv_t(ctrl1000);
 
 	if (bmcr & BMCR_ANENABLE) {
 		int neg, neg1000;
diff --git a/drivers/net/mii.c b/drivers/net/mii.c
index d0a296272713..c70c2332d15e 100644
--- a/drivers/net/mii.c
+++ b/drivers/net/mii.c
@@ -35,14 +35,11 @@
 
 static u32 mii_get_an(struct mii_if_info *mii, u16 addr)
 {
-	u32 result = 0;
 	int advert;
 
 	advert = mii->mdio_read(mii->dev, mii->phy_id, addr);
-	if (advert & LPA_LPACK)
-		result |= ADVERTISED_Autoneg;
 
-	return result | mii_adv_to_ethtool_100bt(advert);
+	return mii_lpa_to_ethtool_lpa_t(advert);
 }
 
 /**
@@ -93,12 +90,13 @@ int mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
 
 		ecmd->advertising |= mii_get_an(mii, MII_ADVERTISE);
 		if (mii->supports_gmii)
-			ecmd->advertising |= mii_adv_to_ethtool_1000T(ctrl1000);
+			ecmd->advertising |=
+					mii_ctrl1000_to_ethtool_adv_t(ctrl1000);
 
 		if (bmsr & BMSR_ANEGCOMPLETE) {
 			ecmd->lp_advertising = mii_get_an(mii, MII_LPA);
 			ecmd->lp_advertising |=
-					     mii_lpa_to_ethtool_1000T(stat1000);
+					mii_stat1000_to_ethtool_lpa_t(stat1000);
 		} else {
 			ecmd->lp_advertising = 0;
 		}
@@ -186,10 +184,11 @@ int mii_ethtool_sset(struct mii_if_info *mii, struct ethtool_cmd *ecmd)
 			advert2 = mii->mdio_read(dev, mii->phy_id, MII_CTRL1000);
 			tmp2 = advert2 & ~(ADVERTISE_1000HALF | ADVERTISE_1000FULL);
 		}
-		tmp |= ethtool_adv_to_mii_100bt(ecmd->advertising);
+		tmp |= ethtool_adv_to_mii_adv_t(ecmd->advertising);
 
 		if (mii->supports_gmii)
-			tmp2 |= ethtool_adv_to_mii_1000T(ecmd->advertising);
+			tmp2 |=
+			      ethtool_adv_to_mii_ctrl1000_t(ecmd->advertising);
 		if (advert != tmp) {
 			mii->mdio_write(dev, mii->phy_id, MII_ADVERTISE, tmp);
 			mii->advertising = tmp;
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index edb905f80115..f320f466f03b 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -565,7 +565,7 @@ static int genphy_config_advert(struct phy_device *phydev)
 
 	adv &= ~(ADVERTISE_ALL | ADVERTISE_100BASE4 | ADVERTISE_PAUSE_CAP |
 		 ADVERTISE_PAUSE_ASYM);
-	adv |= ethtool_adv_to_mii_100bt(advertise);
+	adv |= ethtool_adv_to_mii_adv_t(advertise);
 
 	if (adv != oldadv) {
 		err = phy_write(phydev, MII_ADVERTISE, adv);
@@ -584,7 +584,7 @@ static int genphy_config_advert(struct phy_device *phydev)
 			return adv;
 
 		adv &= ~(ADVERTISE_1000FULL | ADVERTISE_1000HALF);
-		adv |= ethtool_adv_to_mii_1000T(advertise);
+		adv |= ethtool_adv_to_mii_ctrl1000_t(advertise);
 
 		if (adv != oldadv) {
 			err = phy_write(phydev, MII_CTRL1000, adv);
diff --git a/include/linux/mii.h b/include/linux/mii.h
index 6697b9112014..2783eca629a0 100644
--- a/include/linux/mii.h
+++ b/include/linux/mii.h
@@ -241,14 +241,14 @@ static inline unsigned int mii_duplex (unsigned int duplex_lock,
 }
 
 /**
- * ethtool_adv_to_mii_100bt
+ * ethtool_adv_to_mii_adv_t
  * @ethadv: the ethtool advertisement settings
  *
  * A small helper function that translates ethtool advertisement
  * settings to phy autonegotiation advertisements for the
  * MII_ADVERTISE register.
  */
-static inline u32 ethtool_adv_to_mii_100bt(u32 ethadv)
+static inline u32 ethtool_adv_to_mii_adv_t(u32 ethadv)
 {
 	u32 result = 0;
 
@@ -269,13 +269,13 @@ static inline u32 ethtool_adv_to_mii_100bt(u32 ethadv)
 }
 
 /**
- * mii_adv_to_ethtool_100bt
+ * mii_adv_to_ethtool_adv_t
  * @adv: value of the MII_ADVERTISE register
  *
  * A small helper function that translates MII_ADVERTISE bits
  * to ethtool advertisement settings.
  */
-static inline u32 mii_adv_to_ethtool_100bt(u32 adv)
+static inline u32 mii_adv_to_ethtool_adv_t(u32 adv)
 {
 	u32 result = 0;
 
@@ -296,14 +296,14 @@ static inline u32 mii_adv_to_ethtool_100bt(u32 adv)
 }
 
 /**
- * ethtool_adv_to_mii_1000T
+ * ethtool_adv_to_mii_ctrl1000_t
  * @ethadv: the ethtool advertisement settings
  *
  * A small helper function that translates ethtool advertisement
  * settings to phy autonegotiation advertisements for the
  * MII_CTRL1000 register when in 1000T mode.
  */
-static inline u32 ethtool_adv_to_mii_1000T(u32 ethadv)
+static inline u32 ethtool_adv_to_mii_ctrl1000_t(u32 ethadv)
 {
 	u32 result = 0;
 
@@ -316,14 +316,14 @@ static inline u32 ethtool_adv_to_mii_1000T(u32 ethadv)
 }
 
 /**
- * mii_adv_to_ethtool_1000T
+ * mii_ctrl1000_to_ethtool_adv_t
  * @adv: value of the MII_CTRL1000 register
  *
  * A small helper function that translates MII_CTRL1000
  * bits, when in 1000Base-T mode, to ethtool
  * advertisement settings.
  */
-static inline u32 mii_adv_to_ethtool_1000T(u32 adv)
+static inline u32 mii_ctrl1000_to_ethtool_adv_t(u32 adv)
 {
 	u32 result = 0;
 
@@ -335,17 +335,33 @@ static inline u32 mii_adv_to_ethtool_1000T(u32 adv)
 	return result;
 }
 
-#define mii_lpa_to_ethtool_100bt(lpa)	mii_adv_to_ethtool_100bt(lpa)
+/**
+ * mii_lpa_to_ethtool_lpa_t
+ * @adv: value of the MII_LPA register
+ *
+ * A small helper function that translates MII_LPA
+ * bits, when in 1000Base-T mode, to ethtool
+ * LP advertisement settings.
+ */
+static inline u32 mii_lpa_to_ethtool_lpa_t(u32 lpa)
+{
+	u32 result = 0;
+
+	if (lpa & LPA_LPACK)
+		result |= ADVERTISED_Autoneg;
+
+	return result | mii_adv_to_ethtool_adv_t(lpa);
+}
 
 /**
- * mii_lpa_to_ethtool_1000T
+ * mii_stat1000_to_ethtool_lpa_t
  * @adv: value of the MII_STAT1000 register
  *
  * A small helper function that translates MII_STAT1000
  * bits, when in 1000Base-T mode, to ethtool
  * advertisement settings.
  */
-static inline u32 mii_lpa_to_ethtool_1000T(u32 lpa)
+static inline u32 mii_stat1000_to_ethtool_lpa_t(u32 lpa)
 {
 	u32 result = 0;
 
@@ -358,14 +374,14 @@ static inline u32 mii_lpa_to_ethtool_1000T(u32 lpa)
 }
 
 /**
- * ethtool_adv_to_mii_1000X
+ * ethtool_adv_to_mii_adv_x
  * @ethadv: the ethtool advertisement settings
  *
  * A small helper function that translates ethtool advertisement
  * settings to phy autonegotiation advertisements for the
  * MII_CTRL1000 register when in 1000Base-X mode.
  */
-static inline u32 ethtool_adv_to_mii_1000X(u32 ethadv)
+static inline u32 ethtool_adv_to_mii_adv_x(u32 ethadv)
 {
 	u32 result = 0;
 
@@ -382,14 +398,14 @@ static inline u32 ethtool_adv_to_mii_1000X(u32 ethadv)
 }
 
 /**
- * mii_adv_to_ethtool_1000X
+ * mii_adv_to_ethtool_adv_x
  * @adv: value of the MII_CTRL1000 register
  *
  * A small helper function that translates MII_CTRL1000
  * bits, when in 1000Base-X mode, to ethtool
  * advertisement settings.
  */
-static inline u32 mii_adv_to_ethtool_1000X(u32 adv)
+static inline u32 mii_adv_to_ethtool_adv_x(u32 adv)
 {
 	u32 result = 0;
 
@@ -405,6 +421,24 @@ static inline u32 mii_adv_to_ethtool_1000X(u32 adv)
 	return result;
 }
 
+/**
+ * mii_lpa_to_ethtool_lpa_x
+ * @adv: value of the MII_LPA register
+ *
+ * A small helper function that translates MII_LPA
+ * bits, when in 1000Base-X mode, to ethtool
+ * LP advertisement settings.
+ */
+static inline u32 mii_lpa_to_ethtool_lpa_x(u32 lpa)
+{
+	u32 result = 0;
+
+	if (lpa & LPA_LPACK)
+		result |= ADVERTISED_Autoneg;
+
+	return result | mii_adv_to_ethtool_adv_x(lpa);
+}
+
 /**
  * mii_advertise_flowctrl - get flow control advertisement flags
  * @cap: Flow control capabilities (FLOW_CTRL_RX, FLOW_CTRL_TX or both)
-- 
cgit v1.2.3


From 8b60b07805d557542160d852874fa6a1b969184e Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <mcgrof@qca.qualcomm.com>
Date: Tue, 11 Oct 2011 10:59:02 -0700
Subject: cfg80211: process regulatory DFS region for countries

The wireless-regdb now has support for mapping a country to
one DFS region. CRDA sends this to us now so process it
so we can provide that hint to drivers. This will later be
used by code for processing DFS in a way that meets the
criteria for the DFS region the country belongs to.

Signed-off-by: Luis R. Rodriguez <mcgrof@qca.qualcomm.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h  | 21 +++++++++++++++++++++
 include/net/regulatory.h |  1 +
 net/wireless/nl80211.c   | 15 +++++++++++++++
 net/wireless/reg.c       | 37 +++++++++++++++++++++++++++++++++++++
 net/wireless/reg.h       |  1 +
 5 files changed, 75 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index f9261c253735..6396819a7e41 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1170,6 +1170,10 @@ enum nl80211_commands {
  *	probe-response frame. The DA field in the 802.11 header is zero-ed out,
  *	to be filled by the FW.
  *
+ * @NL80211_ATTR_DFS_REGION: region for regulatory rules which this country
+ *    abides to when initiating radiation on DFS channels. A country maps
+ *    to one DFS region.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1408,6 +1412,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_PROBE_RESP,
 
+	NL80211_ATTR_DFS_REGION,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -1916,6 +1922,21 @@ enum nl80211_reg_rule_flags {
 	NL80211_RRF_NO_IBSS		= 1<<8,
 };
 
+/**
+ * enum nl80211_dfs_regions - regulatory DFS regions
+ *
+ * @NL80211_DFS_UNSET: Country has no DFS master region specified
+ * @NL80211_DFS_FCC_: Country follows DFS master rules from FCC
+ * @NL80211_DFS_FCC_: Country follows DFS master rules from ETSI
+ * @NL80211_DFS_JP_: Country follows DFS master rules from JP/MKK/Telec
+ */
+enum nl80211_dfs_regions {
+	NL80211_DFS_UNSET	= 0,
+	NL80211_DFS_FCC		= 1,
+	NL80211_DFS_ETSI	= 2,
+	NL80211_DFS_JP		= 3,
+};
+
 /**
  * enum nl80211_survey_info - survey information
  *
diff --git a/include/net/regulatory.h b/include/net/regulatory.h
index eb7d3c2d4274..7399c93cb4bc 100644
--- a/include/net/regulatory.h
+++ b/include/net/regulatory.h
@@ -93,6 +93,7 @@ struct ieee80211_reg_rule {
 struct ieee80211_regdomain {
 	u32 n_reg_rules;
 	char alpha2[2];
+	u8 dfs_region;
 	struct ieee80211_reg_rule reg_rules[];
 };
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 6bc7c4b32fa5..50482e129263 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -199,6 +199,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_DONT_WAIT_FOR_ACK] = { .type = NLA_FLAG },
 	[NL80211_ATTR_PROBE_RESP] = { .type = NLA_BINARY,
 				      .len = IEEE80211_MAX_DATA_LEN },
+	[NL80211_ATTR_DFS_REGION] = { .type = NLA_U8 },
 };
 
 /* policy for the key attributes */
@@ -3382,6 +3383,9 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
 
 	NLA_PUT_STRING(msg, NL80211_ATTR_REG_ALPHA2,
 		cfg80211_regdomain->alpha2);
+	if (cfg80211_regdomain->dfs_region)
+		NLA_PUT_U8(msg, NL80211_ATTR_DFS_REGION,
+			   cfg80211_regdomain->dfs_region);
 
 	nl_reg_rules = nla_nest_start(msg, NL80211_ATTR_REG_RULES);
 	if (!nl_reg_rules)
@@ -3440,6 +3444,7 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
 	char *alpha2 = NULL;
 	int rem_reg_rules = 0, r = 0;
 	u32 num_rules = 0, rule_idx = 0, size_of_regd;
+	u8 dfs_region = 0;
 	struct ieee80211_regdomain *rd = NULL;
 
 	if (!info->attrs[NL80211_ATTR_REG_ALPHA2])
@@ -3450,6 +3455,9 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
 
 	alpha2 = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]);
 
+	if (info->attrs[NL80211_ATTR_DFS_REGION])
+		dfs_region = nla_get_u8(info->attrs[NL80211_ATTR_DFS_REGION]);
+
 	nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES],
 			rem_reg_rules) {
 		num_rules++;
@@ -3477,6 +3485,13 @@ static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
 	rd->alpha2[0] = alpha2[0];
 	rd->alpha2[1] = alpha2[1];
 
+	/*
+	 * Disable DFS master mode if the DFS region was
+	 * not supported or known on this kernel.
+	 */
+	if (reg_supported_dfs_region(dfs_region))
+		rd->dfs_region = dfs_region;
+
 	nla_for_each_nested(nl_reg_rule, info->attrs[NL80211_ATTR_REG_RULES],
 			rem_reg_rules) {
 		nla_parse(tb, NL80211_REG_RULE_ATTR_MAX,
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 2520a1b7e7db..69141ed1f6df 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1946,6 +1946,42 @@ static void print_rd_rules(const struct ieee80211_regdomain *rd)
 	}
 }
 
+bool reg_supported_dfs_region(u8 dfs_region)
+{
+	switch (dfs_region) {
+	case NL80211_DFS_UNSET:
+	case NL80211_DFS_FCC:
+	case NL80211_DFS_ETSI:
+	case NL80211_DFS_JP:
+		return true;
+	default:
+		REG_DBG_PRINT("Ignoring uknown DFS master region: %d\n",
+			      dfs_region);
+		return false;
+	}
+}
+
+static void print_dfs_region(u8 dfs_region)
+{
+	if (!dfs_region)
+		return;
+
+	switch (dfs_region) {
+	case NL80211_DFS_FCC:
+		pr_info(" DFS Master region FCC");
+		break;
+	case NL80211_DFS_ETSI:
+		pr_info(" DFS Master region ETSI");
+		break;
+	case NL80211_DFS_JP:
+		pr_info(" DFS Master region JP");
+		break;
+	default:
+		pr_info(" DFS Master region Uknown");
+		break;
+	}
+}
+
 static void print_regdomain(const struct ieee80211_regdomain *rd)
 {
 
@@ -1973,6 +2009,7 @@ static void print_regdomain(const struct ieee80211_regdomain *rd)
 			pr_info("Regulatory domain changed to country: %c%c\n",
 				rd->alpha2[0], rd->alpha2[1]);
 	}
+	print_dfs_region(rd->dfs_region);
 	print_rd_rules(rd);
 }
 
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index 4a56799d868d..786e414afd91 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -5,6 +5,7 @@ extern const struct ieee80211_regdomain *cfg80211_regdomain;
 
 bool is_world_regdom(const char *alpha2);
 bool reg_is_valid_request(const char *alpha2);
+bool reg_supported_dfs_region(u8 dfs_region);
 
 int regulatory_hint_user(const char *alpha2);
 
-- 
cgit v1.2.3


From 7e7c8926b2f4e3453b8aeb39cd814d2af3fec24f Mon Sep 17 00:00:00 2001
From: Ben Greear <greearb@candelatech.com>
Date: Fri, 18 Nov 2011 11:31:59 -0800
Subject: wireless: Support ht-capabilities over-rides.

This allows users to disable features such as HT, HT40,
and to modify the MCS, AMPDU, and AMSDU settings for
drivers that support it.

The MCS, AMPDU, and AMSDU features that may be disabled are
are reported in the phy-info netlink message as a mask.

Attemping to disable features that are not supported will
take no affect, but will not return errors.  This is to aid
backwards compatibility in user-space apps that may not be
clever enough to deal with parsing the the capabilities mask.

This patch only enables the infrastructure.  An additional
patch will enable the feature in mac80211.

Signed-off-by: Ben Greear <greearb@candelatech.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 14 ++++++++++++++
 include/net/cfg80211.h  | 27 +++++++++++++++++++++++++++
 net/wireless/core.h     | 10 ++++++++--
 net/wireless/mlme.c     | 37 ++++++++++++++++++++++++++++++++++---
 net/wireless/nl80211.c  | 44 +++++++++++++++++++++++++++++++++++++++++++-
 net/wireless/sme.c      |  7 ++++++-
 6 files changed, 132 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 6396819a7e41..97bfebfcce90 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1169,6 +1169,17 @@ enum nl80211_commands {
  * @NL80211_ATTR_PROBE_RESP: Probe Response template data. Contains the entire
  *	probe-response frame. The DA field in the 802.11 header is zero-ed out,
  *	to be filled by the FW.
+ * @NL80211_ATTR_DISABLE_HT:  Force HT capable interfaces to disable
+ *      this feature.  Currently, only supported in mac80211 drivers.
+ * @NL80211_ATTR_HT_CAPABILITY_MASK: Specify which bits of the
+ *      ATTR_HT_CAPABILITY to which attention should be paid.
+ *      Currently, only mac80211 NICs support this feature.
+ *      The values that may be configured are:
+ *       MCS rates, MAX-AMSDU, HT-20-40 and HT_CAP_SGI_40
+ *       AMPDU density and AMPDU factor.
+ *      All values are treated as suggestions and may be ignored
+ *      by the driver as required.  The actual values may be seen in
+ *      the station debugfs ht_caps file.
  *
  * @NL80211_ATTR_DFS_REGION: region for regulatory rules which this country
  *    abides to when initiating radiation on DFS channels. A country maps
@@ -1414,6 +1425,9 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_DFS_REGION,
 
+	NL80211_ATTR_DISABLE_HT,
+	NL80211_ATTR_HT_CAPABILITY_MASK,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 6a1d849c597a..d5e18913f293 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1043,6 +1043,15 @@ struct cfg80211_auth_request {
 	bool local_state_change;
 };
 
+/**
+ * enum cfg80211_assoc_req_flags - Over-ride default behaviour in association.
+ *
+ * @ASSOC_REQ_DISABLE_HT:  Disable HT (802.11n)
+ */
+enum cfg80211_assoc_req_flags {
+	ASSOC_REQ_DISABLE_HT		= BIT(0),
+};
+
 /**
  * struct cfg80211_assoc_request - (Re)Association request data
  *
@@ -1054,6 +1063,10 @@ struct cfg80211_auth_request {
  * @use_mfp: Use management frame protection (IEEE 802.11w) in this association
  * @crypto: crypto settings
  * @prev_bssid: previous BSSID, if not %NULL use reassociate frame
+ * @flags:  See &enum cfg80211_assoc_req_flags
+ * @ht_capa:  HT Capabilities over-rides.  Values set in ht_capa_mask
+ *   will be used in ht_capa.  Un-supported values will be ignored.
+ * @ht_capa_mask:  The bits of ht_capa which are to be used.
  */
 struct cfg80211_assoc_request {
 	struct cfg80211_bss *bss;
@@ -1061,6 +1074,9 @@ struct cfg80211_assoc_request {
 	size_t ie_len;
 	struct cfg80211_crypto_settings crypto;
 	bool use_mfp;
+	u32 flags;
+	struct ieee80211_ht_cap ht_capa;
+	struct ieee80211_ht_cap ht_capa_mask;
 };
 
 /**
@@ -1159,6 +1175,10 @@ struct cfg80211_ibss_params {
  * @key_len: length of WEP key for shared key authentication
  * @key_idx: index of WEP key for shared key authentication
  * @key: WEP key for shared key authentication
+ * @flags:  See &enum cfg80211_assoc_req_flags
+ * @ht_capa:  HT Capabilities over-rides.  Values set in ht_capa_mask
+ *   will be used in ht_capa.  Un-supported values will be ignored.
+ * @ht_capa_mask:  The bits of ht_capa which are to be used.
  */
 struct cfg80211_connect_params {
 	struct ieee80211_channel *channel;
@@ -1172,6 +1192,9 @@ struct cfg80211_connect_params {
 	struct cfg80211_crypto_settings crypto;
 	const u8 *key;
 	u8 key_len, key_idx;
+	u32 flags;
+	struct ieee80211_ht_cap ht_capa;
+	struct ieee80211_ht_cap ht_capa_mask;
 };
 
 /**
@@ -1938,6 +1961,8 @@ struct wiphy_wowlan_support {
  * @wowlan: WoWLAN support information
  *
  * @ap_sme_capa: AP SME capabilities, flags from &enum nl80211_ap_sme_features.
+ * @ht_capa_mod_mask:  Specify what ht_cap values can be over-ridden.
+ *	If null, then none can be over-ridden.
  */
 struct wiphy {
 	/* assign these fields before you register the wiphy */
@@ -2027,6 +2052,8 @@ struct wiphy {
 	/* dir in debugfs: ieee80211/<wiphyname> */
 	struct dentry *debugfsdir;
 
+	const struct ieee80211_ht_cap *ht_capa_mod_mask;
+
 #ifdef CONFIG_NET_NS
 	/* the network namespace this phy lives in currently */
 	struct net *_net;
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 1c7d4df5418c..fb08c28fc90a 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -341,13 +341,17 @@ int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 			  const u8 *bssid, const u8 *prev_bssid,
 			  const u8 *ssid, int ssid_len,
 			  const u8 *ie, int ie_len, bool use_mfp,
-			  struct cfg80211_crypto_settings *crypt);
+			  struct cfg80211_crypto_settings *crypt,
+			  u32 assoc_flags, struct ieee80211_ht_cap *ht_capa,
+			  struct ieee80211_ht_cap *ht_capa_mask);
 int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 			struct net_device *dev, struct ieee80211_channel *chan,
 			const u8 *bssid, const u8 *prev_bssid,
 			const u8 *ssid, int ssid_len,
 			const u8 *ie, int ie_len, bool use_mfp,
-			struct cfg80211_crypto_settings *crypt);
+			struct cfg80211_crypto_settings *crypt,
+			u32 assoc_flags, struct ieee80211_ht_cap *ht_capa,
+			struct ieee80211_ht_cap *ht_capa_mask);
 int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
 			   struct net_device *dev, const u8 *bssid,
 			   const u8 *ie, int ie_len, u16 reason,
@@ -379,6 +383,8 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
 			  bool channel_type_valid, unsigned int wait,
 			  const u8 *buf, size_t len, bool no_cck,
 			  bool dont_wait_for_ack, u64 *cookie);
+void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa,
+			       const struct ieee80211_ht_cap *ht_capa_mask);
 
 /* SME */
 int __cfg80211_connect(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 6c1bafd508c8..438dfc105b4a 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -501,13 +501,32 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 	return err;
 }
 
+/*  Do a logical ht_capa &= ht_capa_mask.  */
+void cfg80211_oper_and_ht_capa(struct ieee80211_ht_cap *ht_capa,
+			       const struct ieee80211_ht_cap *ht_capa_mask)
+{
+	int i;
+	u8 *p1, *p2;
+	if (!ht_capa_mask) {
+		memset(ht_capa, 0, sizeof(*ht_capa));
+		return;
+	}
+
+	p1 = (u8*)(ht_capa);
+	p2 = (u8*)(ht_capa_mask);
+	for (i = 0; i<sizeof(*ht_capa); i++)
+		p1[i] &= p2[i];
+}
+
 int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 			  struct net_device *dev,
 			  struct ieee80211_channel *chan,
 			  const u8 *bssid, const u8 *prev_bssid,
 			  const u8 *ssid, int ssid_len,
 			  const u8 *ie, int ie_len, bool use_mfp,
-			  struct cfg80211_crypto_settings *crypt)
+			  struct cfg80211_crypto_settings *crypt,
+			  u32 assoc_flags, struct ieee80211_ht_cap *ht_capa,
+			  struct ieee80211_ht_cap *ht_capa_mask)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_assoc_request req;
@@ -537,6 +556,15 @@ int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 	memcpy(&req.crypto, crypt, sizeof(req.crypto));
 	req.use_mfp = use_mfp;
 	req.prev_bssid = prev_bssid;
+	req.flags = assoc_flags;
+	if (ht_capa)
+		memcpy(&req.ht_capa, ht_capa, sizeof(req.ht_capa));
+	if (ht_capa_mask)
+		memcpy(&req.ht_capa_mask, ht_capa_mask,
+		       sizeof(req.ht_capa_mask));
+	cfg80211_oper_and_ht_capa(&req.ht_capa_mask,
+				  rdev->wiphy.ht_capa_mod_mask);
+
 	req.bss = cfg80211_get_bss(&rdev->wiphy, chan, bssid, ssid, ssid_len,
 				   WLAN_CAPABILITY_ESS, WLAN_CAPABILITY_ESS);
 	if (!req.bss) {
@@ -574,14 +602,17 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 			const u8 *bssid, const u8 *prev_bssid,
 			const u8 *ssid, int ssid_len,
 			const u8 *ie, int ie_len, bool use_mfp,
-			struct cfg80211_crypto_settings *crypt)
+			struct cfg80211_crypto_settings *crypt,
+			u32 assoc_flags, struct ieee80211_ht_cap *ht_capa,
+			struct ieee80211_ht_cap *ht_capa_mask)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	int err;
 
 	wdev_lock(wdev);
 	err = __cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid,
-				    ssid, ssid_len, ie, ie_len, use_mfp, crypt);
+				    ssid, ssid_len, ie, ie_len, use_mfp, crypt,
+				    assoc_flags, ht_capa, ht_capa_mask);
 	wdev_unlock(wdev);
 
 	return err;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 889f06483862..a1cabde7cb5f 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -200,6 +200,10 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_PROBE_RESP] = { .type = NLA_BINARY,
 				      .len = IEEE80211_MAX_DATA_LEN },
 	[NL80211_ATTR_DFS_REGION] = { .type = NLA_U8 },
+	[NL80211_ATTR_DISABLE_HT] = { .type = NLA_FLAG },
+	[NL80211_ATTR_HT_CAPABILITY_MASK] = {
+		.len = NL80211_HT_CAPABILITY_LEN
+	},
 };
 
 /* policy for the key attributes */
@@ -1032,6 +1036,11 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 
 	NLA_PUT_U32(msg, NL80211_ATTR_FEATURE_FLAGS, dev->wiphy.features);
 
+	if (dev->wiphy.ht_capa_mod_mask)
+		NLA_PUT(msg, NL80211_ATTR_HT_CAPABILITY_MASK,
+			sizeof(*dev->wiphy.ht_capa_mod_mask),
+			dev->wiphy.ht_capa_mod_mask);
+
 	return genlmsg_end(msg, hdr);
 
  nla_put_failure:
@@ -4413,6 +4422,9 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 	const u8 *bssid, *ssid, *ie = NULL, *prev_bssid = NULL;
 	int err, ssid_len, ie_len = 0;
 	bool use_mfp = false;
+	u32 flags = 0;
+	struct ieee80211_ht_cap *ht_capa = NULL;
+	struct ieee80211_ht_cap *ht_capa_mask = NULL;
 
 	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
 		return -EINVAL;
@@ -4456,11 +4468,25 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 	if (info->attrs[NL80211_ATTR_PREV_BSSID])
 		prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]);
 
+	if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HT]))
+		flags |= ASSOC_REQ_DISABLE_HT;
+
+	if (info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK])
+		ht_capa_mask =
+			nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]);
+
+	if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) {
+		if (!ht_capa_mask)
+			return -EINVAL;
+		ht_capa = nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]);
+	}
+
 	err = nl80211_crypto_settings(rdev, info, &crypto, 1);
 	if (!err)
 		err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid,
 					  ssid, ssid_len, ie, ie_len, use_mfp,
-					  &crypto);
+					  &crypto, flags, ht_capa,
+					  ht_capa_mask);
 
 	return err;
 }
@@ -4950,6 +4976,22 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
 			return PTR_ERR(connkeys);
 	}
 
+	if (nla_get_flag(info->attrs[NL80211_ATTR_DISABLE_HT]))
+		connect.flags |= ASSOC_REQ_DISABLE_HT;
+
+	if (info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK])
+		memcpy(&connect.ht_capa_mask,
+		       nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK]),
+		       sizeof(connect.ht_capa_mask));
+
+	if (info->attrs[NL80211_ATTR_HT_CAPABILITY]) {
+		if (!info->attrs[NL80211_ATTR_HT_CAPABILITY_MASK])
+			return -EINVAL;
+		memcpy(&connect.ht_capa,
+		       nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]),
+		       sizeof(connect.ht_capa));
+	}
+
 	err = cfg80211_connect(rdev, dev, &connect, connkeys);
 	if (err)
 		kfree(connkeys);
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 6e86d5acf145..ed9d0e6f4a06 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -189,7 +189,9 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
 					    prev_bssid,
 					    params->ssid, params->ssid_len,
 					    params->ie, params->ie_len,
-					    false, &params->crypto);
+					    false, &params->crypto,
+					    params->flags, &params->ht_capa,
+					    &params->ht_capa_mask);
 		if (err)
 			__cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid,
 					       NULL, 0,
@@ -773,6 +775,9 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev,
 		wdev->connect_keys = NULL;
 	}
 
+	cfg80211_oper_and_ht_capa(&connect->ht_capa_mask,
+				  rdev->wiphy.ht_capa_mod_mask);
+
 	if (connkeys && connkeys->def >= 0) {
 		int idx;
 		u32 cipher;
-- 
cgit v1.2.3


From a2d7ec58ac09f30ab726f216827f7c7095b2a98f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 18 Nov 2011 17:32:46 +0000
Subject: netfilter: use jump_label for nf_hooks

On configs where CONFIG_JUMP_LABEL=y, we can replace in fast path a
load/compare/conditional jump by a single jump with no dcache reference.

Jump target is modified as soon as nf_hooks[pf][hook] switches from
empty state to non empty states. jump_label state is kept outside of
nf_hooks array so has no cost on cpu caches.

This patch removes the test on CONFIG_NETFILTER_DEBUG : No need to call
nf_hook_slow() at all if nf_hooks[pf][hook] is empty, this didnt give
useful information, but slowed down things a lot.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Patrick McHardy <kaber@trash.net>
CC: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter.h | 26 +++++++++++++++++++++-----
 net/netfilter/core.c      | 13 ++++++++++++-
 2 files changed, 33 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 857f5026ced6..b809265607d0 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -162,6 +162,24 @@ extern struct ctl_path nf_net_ipv4_netfilter_sysctl_path[];
 
 extern struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
 
+#if defined(CONFIG_JUMP_LABEL)
+#include <linux/jump_label.h>
+extern struct jump_label_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
+static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook)
+{
+	if (__builtin_constant_p(pf) &&
+	    __builtin_constant_p(hook))
+		return static_branch(&nf_hooks_needed[pf][hook]);
+
+	return !list_empty(&nf_hooks[pf][hook]);
+}
+#else
+static inline bool nf_hooks_active(u_int8_t pf, unsigned int hook)
+{
+	return !list_empty(&nf_hooks[pf][hook]);
+}
+#endif
+
 int nf_hook_slow(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
 		 struct net_device *indev, struct net_device *outdev,
 		 int (*okfn)(struct sk_buff *), int thresh);
@@ -179,11 +197,9 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
 				 struct net_device *outdev,
 				 int (*okfn)(struct sk_buff *), int thresh)
 {
-#ifndef CONFIG_NETFILTER_DEBUG
-	if (list_empty(&nf_hooks[pf][hook]))
-		return 1;
-#endif
-	return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh);
+	if (nf_hooks_active(pf, hook))
+		return nf_hook_slow(pf, hook, skb, indev, outdev, okfn, thresh);
+	return 1;
 }
 
 static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sk_buff *skb,
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index afca6c78948c..4aa0f4b19bd8 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -54,6 +54,12 @@ EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
 
 struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly;
 EXPORT_SYMBOL(nf_hooks);
+
+#if defined(CONFIG_JUMP_LABEL)
+struct jump_label_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
+EXPORT_SYMBOL(nf_hooks_needed);
+#endif
+
 static DEFINE_MUTEX(nf_hook_mutex);
 
 int nf_register_hook(struct nf_hook_ops *reg)
@@ -70,6 +76,9 @@ int nf_register_hook(struct nf_hook_ops *reg)
 	}
 	list_add_rcu(&reg->list, elem->list.prev);
 	mutex_unlock(&nf_hook_mutex);
+#if defined(CONFIG_JUMP_LABEL)
+	jump_label_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
+#endif
 	return 0;
 }
 EXPORT_SYMBOL(nf_register_hook);
@@ -79,7 +88,9 @@ void nf_unregister_hook(struct nf_hook_ops *reg)
 	mutex_lock(&nf_hook_mutex);
 	list_del_rcu(&reg->list);
 	mutex_unlock(&nf_hook_mutex);
-
+#if defined(CONFIG_JUMP_LABEL)
+	jump_label_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
+#endif
 	synchronize_net();
 }
 EXPORT_SYMBOL(nf_unregister_hook);
-- 
cgit v1.2.3


From 570e57bcbcc4df5581b1e9c806ab2b16e96ea7d3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 21 Nov 2011 19:51:34 +0000
Subject: atm: use SKB_TRUESIZE() in atm_guess_pdu2truesize()

SKB_TRUESIZE() provides a better approximation of expected skb truesize.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/atmdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index 49a83ca900ba..43ea1b2de3ee 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -452,7 +452,7 @@ void atm_dev_release_vccs(struct atm_dev *dev);
 
 static inline int atm_guess_pdu2truesize(int size)
 {
-	return SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info);
+	return SKB_TRUESIZE(size);
 }
 
 
-- 
cgit v1.2.3


From 5bc1421e34ecfe0bd4b26dc3232b7d5e25179144 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Tue, 22 Nov 2011 05:10:51 +0000
Subject: net: add network priority cgroup infrastructure (v4)

This patch adds in the infrastructure code to create the network priority
cgroup.  The cgroup, in addition to the standard processes file creates two
control files:

1) prioidx - This is a read-only file that exports the index of this cgroup.
This is a value that is both arbitrary and unique to a cgroup in this subsystem,
and is used to index the per-device priority map

2) priomap - This is a writeable file.  On read it reports a table of 2-tuples
<name:priority> where name is the name of a network interface and priority is
indicates the priority assigned to frames egresessing on the named interface and
originating from a pid in this cgroup

This cgroup allows for skb priority to be set prior to a root qdisc getting
selected. This is benenficial for DCB enabled systems, in that it allows for any
application to use dcb configured priorities so without application modification

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
CC: Robert Love <robert.w.love@intel.com>
CC: "David S. Miller" <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/cgroup_subsys.h |   8 +
 include/linux/netdevice.h     |   4 +
 include/net/netprio_cgroup.h  |  65 ++++++++
 include/net/sock.h            |   3 +
 net/Kconfig                   |   7 +
 net/core/Makefile             |   1 +
 net/core/dev.c                |  14 ++
 net/core/netprio_cgroup.c     | 344 ++++++++++++++++++++++++++++++++++++++++++
 net/core/sock.c               |  22 ++-
 net/socket.c                  |   2 +
 10 files changed, 469 insertions(+), 1 deletion(-)
 create mode 100644 include/net/netprio_cgroup.h
 create mode 100644 net/core/netprio_cgroup.c

(limited to 'include/linux')

diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index ac663c18776c..0bd390ce98b2 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -59,8 +59,16 @@ SUBSYS(net_cls)
 SUBSYS(blkio)
 #endif
 
+/* */
+
 #ifdef CONFIG_CGROUP_PERF
 SUBSYS(perf)
 #endif
 
 /* */
+
+#ifdef CONFIG_NETPRIO_CGROUP
+SUBSYS(net_prio)
+#endif
+
+/* */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3eb383a9b5ed..999bb264fe27 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -50,6 +50,7 @@
 #ifdef CONFIG_DCB
 #include <net/dcbnl.h>
 #endif
+#include <net/netprio_cgroup.h>
 
 #include <linux/netdev_features.h>
 
@@ -1244,6 +1245,9 @@ struct net_device {
 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
 	/* max exchange id for FCoE LRO by ddp */
 	unsigned int		fcoe_ddp_xid;
+#endif
+#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
+	struct netprio_map __rcu *priomap;
 #endif
 	/* phy device may attach itself for hardware timestamping */
 	struct phy_device *phydev;
diff --git a/include/net/netprio_cgroup.h b/include/net/netprio_cgroup.h
new file mode 100644
index 000000000000..c432e99942af
--- /dev/null
+++ b/include/net/netprio_cgroup.h
@@ -0,0 +1,65 @@
+/*
+ * netprio_cgroup.h			Control Group Priority set
+ *
+ *
+ * Authors:	Neil Horman <nhorman@tuxdriver.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#ifndef _NETPRIO_CGROUP_H
+#define _NETPRIO_CGROUP_H
+#include <linux/module.h>
+#include <linux/cgroup.h>
+#include <linux/hardirq.h>
+#include <linux/rcupdate.h>
+
+struct cgroup_netprio_state
+{
+	struct cgroup_subsys_state css;
+	u32 prioidx;
+};
+
+struct netprio_map {
+	struct rcu_head rcu;
+	u32 priomap_len;
+	u32 priomap[];
+};
+
+#ifdef CONFIG_CGROUPS
+
+#ifndef CONFIG_NETPRIO_CGROUP
+extern int net_prio_subsys_id;
+#endif
+
+extern void sock_update_netprioidx(struct sock *sk);
+
+static inline struct cgroup_netprio_state
+		*task_netprio_state(struct task_struct *p)
+{
+#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
+	return container_of(task_subsys_state(p, net_prio_subsys_id),
+			    struct cgroup_netprio_state, css);
+#else
+	return NULL;
+#endif
+}
+
+#else
+
+#define sock_update_netprioidx(sk)
+#define skb_update_prio(skb)
+
+static inline struct cgroup_netprio_state
+		*task_netprio_state(struct task_struct *p)
+{
+	return NULL;
+}
+
+#endif
+
+#endif  /* _NET_CLS_CGROUP_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index 1c28f394d8ec..8ac338cb39ce 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -320,6 +320,9 @@ struct sock {
 	unsigned short		sk_ack_backlog;
 	unsigned short		sk_max_ack_backlog;
 	__u32			sk_priority;
+#ifdef CONFIG_CGROUPS
+	__u32			sk_cgrp_prioidx;
+#endif
 	struct pid		*sk_peer_pid;
 	const struct cred	*sk_peer_cred;
 	long			sk_rcvtimeo;
diff --git a/net/Kconfig b/net/Kconfig
index a07314844238..63d2c5dc36ff 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -232,6 +232,13 @@ config XPS
 	depends on SMP && SYSFS && USE_GENERIC_SMP_HELPERS
 	default y
 
+config NETPRIO_CGROUP
+	tristate "Network priority cgroup"
+	depends on CGROUPS
+	---help---
+	  Cgroup subsystem for use in assigning processes to network priorities on
+	  a per-interface basis
+
 config HAVE_BPF_JIT
 	bool
 
diff --git a/net/core/Makefile b/net/core/Makefile
index 0d357b1c4e57..3606d40aae62 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -19,3 +19,4 @@ obj-$(CONFIG_FIB_RULES) += fib_rules.o
 obj-$(CONFIG_TRACEPOINTS) += net-traces.o
 obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
 obj-$(CONFIG_NETWORK_PHY_TIMESTAMPING) += timestamping.o
+obj-$(CONFIG_NETPRIO_CGROUP) += netprio_cgroup.o
diff --git a/net/core/dev.c b/net/core/dev.c
index f78959996148..8afb244b205f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2449,6 +2449,18 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 	return rc;
 }
 
+#if IS_ENABLED(CONFIG_NETPRIO_CGROUP)
+static void skb_update_prio(struct sk_buff *skb)
+{
+	struct netprio_map *map = rcu_dereference(skb->dev->priomap);
+
+	if ((!skb->priority) && (skb->sk) && map)
+		skb->priority = map->priomap[skb->sk->sk_cgrp_prioidx];
+}
+#else
+#define skb_update_prio(skb)
+#endif
+
 static DEFINE_PER_CPU(int, xmit_recursion);
 #define RECURSION_LIMIT 10
 
@@ -2489,6 +2501,8 @@ int dev_queue_xmit(struct sk_buff *skb)
 	 */
 	rcu_read_lock_bh();
 
+	skb_update_prio(skb);
+
 	txq = dev_pick_tx(dev, skb);
 	q = rcu_dereference_bh(txq->qdisc);
 
diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c
new file mode 100644
index 000000000000..72ad0bc6841e
--- /dev/null
+++ b/net/core/netprio_cgroup.c
@@ -0,0 +1,344 @@
+/*
+ * net/core/netprio_cgroup.c	Priority Control Group
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ * Authors:	Neil Horman <nhorman@tuxdriver.com>
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/cgroup.h>
+#include <linux/rcupdate.h>
+#include <linux/atomic.h>
+#include <net/rtnetlink.h>
+#include <net/pkt_cls.h>
+#include <net/sock.h>
+#include <net/netprio_cgroup.h>
+
+static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
+					       struct cgroup *cgrp);
+static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp);
+static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp);
+
+struct cgroup_subsys net_prio_subsys = {
+	.name		= "net_prio",
+	.create		= cgrp_create,
+	.destroy	= cgrp_destroy,
+	.populate	= cgrp_populate,
+#ifdef CONFIG_NETPRIO_CGROUP
+	.subsys_id	= net_prio_subsys_id,
+#endif
+	.module		= THIS_MODULE
+};
+
+#define PRIOIDX_SZ 128
+
+static unsigned long prioidx_map[PRIOIDX_SZ];
+static DEFINE_SPINLOCK(prioidx_map_lock);
+static atomic_t max_prioidx = ATOMIC_INIT(0);
+
+static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp)
+{
+	return container_of(cgroup_subsys_state(cgrp, net_prio_subsys_id),
+			    struct cgroup_netprio_state, css);
+}
+
+static int get_prioidx(u32 *prio)
+{
+	unsigned long flags;
+	u32 prioidx;
+
+	spin_lock_irqsave(&prioidx_map_lock, flags);
+	prioidx = find_first_zero_bit(prioidx_map, sizeof(unsigned long) * PRIOIDX_SZ);
+	set_bit(prioidx, prioidx_map);
+	spin_unlock_irqrestore(&prioidx_map_lock, flags);
+	if (prioidx == sizeof(unsigned long) * PRIOIDX_SZ)
+		return -ENOSPC;
+
+	atomic_set(&max_prioidx, prioidx);
+	*prio = prioidx;
+	return 0;
+}
+
+static void put_prioidx(u32 idx)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&prioidx_map_lock, flags);
+	clear_bit(idx, prioidx_map);
+	spin_unlock_irqrestore(&prioidx_map_lock, flags);
+}
+
+static void extend_netdev_table(struct net_device *dev, u32 new_len)
+{
+	size_t new_size = sizeof(struct netprio_map) +
+			   ((sizeof(u32) * new_len));
+	struct netprio_map *new_priomap = kzalloc(new_size, GFP_KERNEL);
+	struct netprio_map *old_priomap;
+	int i;
+
+	old_priomap  = rtnl_dereference(dev->priomap);
+
+	if (!new_priomap) {
+		printk(KERN_WARNING "Unable to alloc new priomap!\n");
+		return;
+	}
+
+	for (i = 0;
+	     old_priomap && (i < old_priomap->priomap_len);
+	     i++)
+		new_priomap->priomap[i] = old_priomap->priomap[i];
+
+	new_priomap->priomap_len = new_len;
+
+	rcu_assign_pointer(dev->priomap, new_priomap);
+	if (old_priomap)
+		kfree_rcu(old_priomap, rcu);
+}
+
+static void update_netdev_tables(void)
+{
+	struct net_device *dev;
+	u32 max_len = atomic_read(&max_prioidx);
+	struct netprio_map *map;
+
+	rtnl_lock();
+	for_each_netdev(&init_net, dev) {
+		map = rtnl_dereference(dev->priomap);
+		if ((!map) ||
+		    (map->priomap_len < max_len))
+			extend_netdev_table(dev, max_len);
+	}
+	rtnl_unlock();
+}
+
+static struct cgroup_subsys_state *cgrp_create(struct cgroup_subsys *ss,
+						 struct cgroup *cgrp)
+{
+	struct cgroup_netprio_state *cs;
+	int ret;
+
+	cs = kzalloc(sizeof(*cs), GFP_KERNEL);
+	if (!cs)
+		return ERR_PTR(-ENOMEM);
+
+	if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) {
+		kfree(cs);
+		return ERR_PTR(-EINVAL);
+	}
+
+	ret = get_prioidx(&cs->prioidx);
+	if (ret != 0) {
+		printk(KERN_WARNING "No space in priority index array\n");
+		kfree(cs);
+		return ERR_PTR(ret);
+	}
+
+	return &cs->css;
+}
+
+static void cgrp_destroy(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+	struct cgroup_netprio_state *cs;
+	struct net_device *dev;
+	struct netprio_map *map;
+
+	cs = cgrp_netprio_state(cgrp);
+	rtnl_lock();
+	for_each_netdev(&init_net, dev) {
+		map = rtnl_dereference(dev->priomap);
+		if (map)
+			map->priomap[cs->prioidx] = 0;
+	}
+	rtnl_unlock();
+	put_prioidx(cs->prioidx);
+	kfree(cs);
+}
+
+static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft)
+{
+	return (u64)cgrp_netprio_state(cgrp)->prioidx;
+}
+
+static int read_priomap(struct cgroup *cont, struct cftype *cft,
+			struct cgroup_map_cb *cb)
+{
+	struct net_device *dev;
+	u32 prioidx = cgrp_netprio_state(cont)->prioidx;
+	u32 priority;
+	struct netprio_map *map;
+
+	rcu_read_lock();
+	for_each_netdev_rcu(&init_net, dev) {
+		map = rcu_dereference(dev->priomap);
+		priority = map ? map->priomap[prioidx] : 0;
+		cb->fill(cb, dev->name, priority);
+	}
+	rcu_read_unlock();
+	return 0;
+}
+
+static int write_priomap(struct cgroup *cgrp, struct cftype *cft,
+			 const char *buffer)
+{
+	char *devname = kstrdup(buffer, GFP_KERNEL);
+	int ret = -EINVAL;
+	u32 prioidx = cgrp_netprio_state(cgrp)->prioidx;
+	unsigned long priority;
+	char *priostr;
+	struct net_device *dev;
+	struct netprio_map *map;
+
+	if (!devname)
+		return -ENOMEM;
+
+	/*
+	 * Minimally sized valid priomap string
+	 */
+	if (strlen(devname) < 3)
+		goto out_free_devname;
+
+	priostr = strstr(devname, " ");
+	if (!priostr)
+		goto out_free_devname;
+
+	/*
+	 *Separate the devname from the associated priority
+	 *and advance the priostr poitner to the priority value
+	 */
+	*priostr = '\0';
+	priostr++;
+
+	/*
+	 * If the priostr points to NULL, we're at the end of the passed
+	 * in string, and its not a valid write
+	 */
+	if (*priostr == '\0')
+		goto out_free_devname;
+
+	ret = kstrtoul(priostr, 10, &priority);
+	if (ret < 0)
+		goto out_free_devname;
+
+	ret = -ENODEV;
+
+	dev = dev_get_by_name(&init_net, devname);
+	if (!dev)
+		goto out_free_devname;
+
+	update_netdev_tables();
+	ret = 0;
+	rcu_read_lock();
+	map = rcu_dereference(dev->priomap);
+	if (map)
+		map->priomap[prioidx] = priority;
+	rcu_read_unlock();
+	dev_put(dev);
+
+out_free_devname:
+	kfree(devname);
+	return ret;
+}
+
+static struct cftype ss_files[] = {
+	{
+		.name = "prioidx",
+		.read_u64 = read_prioidx,
+	},
+	{
+		.name = "ifpriomap",
+		.read_map = read_priomap,
+		.write_string = write_priomap,
+	},
+};
+
+static int cgrp_populate(struct cgroup_subsys *ss, struct cgroup *cgrp)
+{
+	return cgroup_add_files(cgrp, ss, ss_files, ARRAY_SIZE(ss_files));
+}
+
+static int netprio_device_event(struct notifier_block *unused,
+				unsigned long event, void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct netprio_map *old;
+	u32 max_len = atomic_read(&max_prioidx);
+
+	/*
+	 * Note this is called with rtnl_lock held so we have update side
+	 * protection on our rcu assignments
+	 */
+
+	switch (event) {
+
+	case NETDEV_REGISTER:
+		if (max_len)
+			extend_netdev_table(dev, max_len);
+		break;
+	case NETDEV_UNREGISTER:
+		old = rtnl_dereference(dev->priomap);
+		rcu_assign_pointer(dev->priomap, NULL);
+		if (old)
+			kfree_rcu(old, rcu);
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block netprio_device_notifier = {
+	.notifier_call = netprio_device_event
+};
+
+static int __init init_cgroup_netprio(void)
+{
+	int ret;
+
+	ret = cgroup_load_subsys(&net_prio_subsys);
+	if (ret)
+		goto out;
+#ifndef CONFIG_NETPRIO_CGROUP
+	smp_wmb();
+	net_prio_subsys_id = net_prio_subsys.subsys_id;
+#endif
+
+	register_netdevice_notifier(&netprio_device_notifier);
+
+out:
+	return ret;
+}
+
+static void __exit exit_cgroup_netprio(void)
+{
+	struct netprio_map *old;
+	struct net_device *dev;
+
+	unregister_netdevice_notifier(&netprio_device_notifier);
+
+	cgroup_unload_subsys(&net_prio_subsys);
+
+#ifndef CONFIG_NETPRIO_CGROUP
+	net_prio_subsys_id = -1;
+	synchronize_rcu();
+#endif
+
+	rtnl_lock();
+	for_each_netdev(&init_net, dev) {
+		old = rtnl_dereference(dev->priomap);
+		rcu_assign_pointer(dev->priomap, NULL);
+		if (old)
+			kfree_rcu(old, rcu);
+	}
+	rtnl_unlock();
+}
+
+module_init(init_cgroup_netprio);
+module_exit(exit_cgroup_netprio);
+MODULE_LICENSE("GPL v2");
diff --git a/net/core/sock.c b/net/core/sock.c
index 9a8b3fac1401..16069139797c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -125,6 +125,7 @@
 #include <net/xfrm.h>
 #include <linux/ipsec.h>
 #include <net/cls_cgroup.h>
+#include <net/netprio_cgroup.h>
 
 #include <linux/filter.h>
 
@@ -221,10 +222,16 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
 EXPORT_SYMBOL(sysctl_optmem_max);
 
-#if defined(CONFIG_CGROUPS) && !defined(CONFIG_NET_CLS_CGROUP)
+#if defined(CONFIG_CGROUPS)
+#if !defined(CONFIG_NET_CLS_CGROUP)
 int net_cls_subsys_id = -1;
 EXPORT_SYMBOL_GPL(net_cls_subsys_id);
 #endif
+#if !defined(CONFIG_NETPRIO_CGROUP)
+int net_prio_subsys_id = -1;
+EXPORT_SYMBOL_GPL(net_prio_subsys_id);
+#endif
+#endif
 
 static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen)
 {
@@ -1120,6 +1127,18 @@ void sock_update_classid(struct sock *sk)
 		sk->sk_classid = classid;
 }
 EXPORT_SYMBOL(sock_update_classid);
+
+void sock_update_netprioidx(struct sock *sk)
+{
+	struct cgroup_netprio_state *state;
+	if (in_interrupt())
+		return;
+	rcu_read_lock();
+	state = task_netprio_state(current);
+	sk->sk_cgrp_prioidx = state ? state->prioidx : 0;
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(sock_update_netprioidx);
 #endif
 
 /**
@@ -1147,6 +1166,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
 		atomic_set(&sk->sk_wmem_alloc, 1);
 
 		sock_update_classid(sk);
+		sock_update_netprioidx(sk);
 	}
 
 	return sk;
diff --git a/net/socket.c b/net/socket.c
index 425ef4270460..e62b4f055071 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -551,6 +551,8 @@ static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
 
 	sock_update_classid(sock->sk);
 
+	sock_update_netprioidx(sock->sk);
+
 	si->sock = sock;
 	si->scm = NULL;
 	si->msg = msg;
-- 
cgit v1.2.3


From 4e3fd7a06dc20b2d8ec6892233ad2012968fe7b6 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 21 Nov 2011 03:39:03 +0000
Subject: net: remove ipv6_addr_copy()

C assignment can handle struct in6_addr copying.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/core/addr.c         |  6 ++---
 drivers/infiniband/core/cma.c          |  8 +++---
 drivers/net/bonding/bond_ipv6.c        |  8 +++---
 drivers/net/ethernet/broadcom/cnic.c   |  2 +-
 fs/dlm/lowcomms.c                      |  2 +-
 include/linux/sunrpc/clnt.h            |  2 +-
 include/net/inetpeer.h                 |  2 +-
 include/net/ip_vs.h                    |  8 +++---
 include/net/ipv6.h                     |  5 ----
 include/net/xfrm.h                     |  4 +--
 net/bridge/br_multicast.c              | 10 ++++----
 net/core/pktgen.c                      | 15 +++++------
 net/dccp/ipv6.c                        | 42 +++++++++++++++----------------
 net/dccp/minisocks.c                   |  4 +--
 net/ipv4/inet_diag.c                   | 18 +++++--------
 net/ipv4/tcp_minisocks.c               |  4 +--
 net/ipv6/addrconf.c                    |  8 +++---
 net/ipv6/af_inet6.c                    | 14 +++++------
 net/ipv6/ah6.c                         | 12 ++++-----
 net/ipv6/anycast.c                     |  4 +--
 net/ipv6/datagram.c                    | 34 ++++++++++++-------------
 net/ipv6/exthdrs.c                     | 18 ++++++-------
 net/ipv6/fib6_rules.c                  |  2 +-
 net/ipv6/icmp.c                        | 18 ++++++-------
 net/ipv6/inet6_connection_sock.c       | 12 ++++-----
 net/ipv6/ip6_flowlabel.c               |  2 +-
 net/ipv6/ip6_output.c                  | 18 ++++++-------
 net/ipv6/ip6_tunnel.c                  | 12 ++++-----
 net/ipv6/ip6mr.c                       | 12 ++++-----
 net/ipv6/ipv6_sockglue.c               |  8 +++---
 net/ipv6/mcast.c                       |  6 ++---
 net/ipv6/mip6.c                        |  4 +--
 net/ipv6/ndisc.c                       |  6 ++---
 net/ipv6/netfilter/ip6t_REJECT.c       |  8 +++---
 net/ipv6/raw.c                         | 10 ++++----
 net/ipv6/reassembly.c                  |  4 +--
 net/ipv6/route.c                       | 42 +++++++++++++++----------------
 net/ipv6/sit.c                         |  4 +--
 net/ipv6/syncookies.c                  |  8 +++---
 net/ipv6/tcp_ipv6.c                    | 46 ++++++++++++++++------------------
 net/ipv6/udp.c                         |  7 +++---
 net/ipv6/xfrm6_mode_beet.c             |  8 +++---
 net/ipv6/xfrm6_mode_tunnel.c           |  4 +--
 net/ipv6/xfrm6_output.c                |  4 +--
 net/ipv6/xfrm6_policy.c                |  4 +--
 net/ipv6/xfrm6_state.c                 |  4 +--
 net/key/af_key.c                       |  2 +-
 net/netfilter/ipset/ip_set_hash_ip.c   |  2 +-
 net/netfilter/ipset/ip_set_hash_net.c  |  2 +-
 net/netfilter/ipvs/ip_vs_core.c        |  2 +-
 net/netfilter/ipvs/ip_vs_sync.c        |  6 ++---
 net/netfilter/ipvs/ip_vs_xmit.c        | 10 ++++----
 net/netfilter/nf_conntrack_h323_main.c |  4 +--
 net/netfilter/xt_TCPMSS.c              |  2 +-
 net/netfilter/xt_addrtype.c            |  2 +-
 net/netlabel/netlabel_kapi.c           |  4 +--
 net/netlabel/netlabel_mgmt.c           |  4 +--
 net/netlabel/netlabel_unlabeled.c      |  4 +--
 net/sctp/ipv6.c                        | 40 ++++++++++++++---------------
 net/sctp/socket.c                      |  2 +-
 net/sunrpc/svcauth_unix.c              |  6 ++---
 net/sunrpc/svcsock.c                   |  4 +--
 net/xfrm/xfrm_state.c                  | 12 +++------
 security/lsm_audit.c                   |  4 +--
 security/selinux/hooks.c               |  6 ++---
 security/selinux/netnode.c             |  2 +-
 66 files changed, 288 insertions(+), 315 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 691276bafd78..adf0757280ed 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -243,8 +243,8 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
 	int ret;
 
 	memset(&fl6, 0, sizeof fl6);
-	ipv6_addr_copy(&fl6.daddr, &dst_in->sin6_addr);
-	ipv6_addr_copy(&fl6.saddr, &src_in->sin6_addr);
+	fl6.daddr = dst_in->sin6_addr;
+	fl6.saddr = src_in->sin6_addr;
 	fl6.flowi6_oif = addr->bound_dev_if;
 
 	dst = ip6_route_output(&init_net, NULL, &fl6);
@@ -258,7 +258,7 @@ static int addr6_resolve(struct sockaddr_in6 *src_in,
 			goto put;
 
 		src_in->sin6_family = AF_INET6;
-		ipv6_addr_copy(&src_in->sin6_addr, &fl6.saddr);
+		src_in->sin6_addr = fl6.saddr;
 	}
 
 	if (dst->dev->flags & IFF_LOOPBACK) {
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index 75ff821c0af0..09e66cce05d3 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -2005,11 +2005,11 @@ static int cma_resolve_loopback(struct rdma_id_private *id_priv)
 	if (cma_zero_addr(src)) {
 		dst = (struct sockaddr *) &id_priv->id.route.addr.dst_addr;
 		if ((src->sa_family = dst->sa_family) == AF_INET) {
-			((struct sockaddr_in *) src)->sin_addr.s_addr =
-				((struct sockaddr_in *) dst)->sin_addr.s_addr;
+			((struct sockaddr_in *)src)->sin_addr =
+				((struct sockaddr_in *)dst)->sin_addr;
 		} else {
-			ipv6_addr_copy(&((struct sockaddr_in6 *) src)->sin6_addr,
-				       &((struct sockaddr_in6 *) dst)->sin6_addr);
+			((struct sockaddr_in6 *)src)->sin6_addr =
+				((struct sockaddr_in6 *)dst)->sin6_addr;
 		}
 	}
 
diff --git a/drivers/net/bonding/bond_ipv6.c b/drivers/net/bonding/bond_ipv6.c
index 027a0ee7d85b..7e6632221a75 100644
--- a/drivers/net/bonding/bond_ipv6.c
+++ b/drivers/net/bonding/bond_ipv6.c
@@ -50,7 +50,7 @@ static void bond_glean_dev_ipv6(struct net_device *dev, struct in6_addr *addr)
 		struct inet6_ifaddr *ifa
 			= list_first_entry(&idev->addr_list,
 					   struct inet6_ifaddr, if_list);
-		ipv6_addr_copy(addr, &ifa->addr);
+		*addr = ifa->addr;
 	} else
 		ipv6_addr_set(addr, 0, 0, 0, 0);
 
@@ -168,8 +168,7 @@ static int bond_inet6addr_event(struct notifier_block *this,
 			switch (event) {
 			case NETDEV_UP:
 				if (ipv6_addr_any(&bond->master_ipv6))
-					ipv6_addr_copy(&bond->master_ipv6,
-						       &ifa->addr);
+					bond->master_ipv6 = ifa->addr;
 				return NOTIFY_OK;
 			case NETDEV_DOWN:
 				if (ipv6_addr_equal(&bond->master_ipv6,
@@ -191,8 +190,7 @@ static int bond_inet6addr_event(struct notifier_block *this,
 				switch (event) {
 				case NETDEV_UP:
 					if (ipv6_addr_any(&vlan->vlan_ipv6))
-						ipv6_addr_copy(&vlan->vlan_ipv6,
-							       &ifa->addr);
+						vlan->vlan_ipv6 = ifa->addr;
 					return NOTIFY_OK;
 				case NETDEV_DOWN:
 					if (ipv6_addr_equal(&vlan->vlan_ipv6,
diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c
index 6f10c6939834..099f41d99ec0 100644
--- a/drivers/net/ethernet/broadcom/cnic.c
+++ b/drivers/net/ethernet/broadcom/cnic.c
@@ -3475,7 +3475,7 @@ static int cnic_get_v6_route(struct sockaddr_in6 *dst_addr,
 	struct flowi6 fl6;
 
 	memset(&fl6, 0, sizeof(fl6));
-	ipv6_addr_copy(&fl6.daddr, &dst_addr->sin6_addr);
+	fl6.daddr = dst_addr->sin6_addr;
 	if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL)
 		fl6.flowi6_oif = dst_addr->sin6_scope_id;
 
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 990626e7da80..0b3109ee4257 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -281,7 +281,7 @@ static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr)
 	} else {
 		struct sockaddr_in6 *in6  = (struct sockaddr_in6 *) &addr;
 		struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr;
-		ipv6_addr_copy(&ret6->sin6_addr, &in6->sin6_addr);
+		ret6->sin6_addr = in6->sin6_addr;
 	}
 
 	return 0;
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 3d8f9c44e27d..f15fd985b08a 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -237,7 +237,7 @@ static inline bool __rpc_copy_addr6(struct sockaddr *dst,
 	struct sockaddr_in6 *dsin6 = (struct sockaddr_in6 *) dst;
 
 	dsin6->sin6_family = ssin6->sin6_family;
-	ipv6_addr_copy(&dsin6->sin6_addr, &ssin6->sin6_addr);
+	dsin6->sin6_addr = ssin6->sin6_addr;
 	return true;
 }
 #else	/* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 78c83e62218f..73a5c26c01ea 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -86,7 +86,7 @@ static inline struct inet_peer *inet_getpeer_v6(const struct in6_addr *v6daddr,
 {
 	struct inetpeer_addr daddr;
 
-	ipv6_addr_copy((struct in6_addr *)daddr.addr.a6, v6daddr);
+	*(struct in6_addr *)daddr.addr.a6 = *v6daddr;
 	daddr.family = AF_INET6;
 	return inet_getpeer(&daddr, create);
 }
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 873d5be7926c..48fd12e9d3af 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -21,7 +21,7 @@
 #include <linux/netfilter.h>		/* for union nf_inet_addr */
 #include <linux/ip.h>
 #include <linux/ipv6.h>			/* for struct ipv6hdr */
-#include <net/ipv6.h>			/* for ipv6_addr_copy */
+#include <net/ipv6.h>
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 #include <net/netfilter/nf_conntrack.h>
 #endif
@@ -119,8 +119,8 @@ ip_vs_fill_iphdr(int af, const void *nh, struct ip_vs_iphdr *iphdr)
 		const struct ipv6hdr *iph = nh;
 		iphdr->len = sizeof(struct ipv6hdr);
 		iphdr->protocol = iph->nexthdr;
-		ipv6_addr_copy(&iphdr->saddr.in6, &iph->saddr);
-		ipv6_addr_copy(&iphdr->daddr.in6, &iph->daddr);
+		iphdr->saddr.in6 = iph->saddr;
+		iphdr->daddr.in6 = iph->daddr;
 	} else
 #endif
 	{
@@ -137,7 +137,7 @@ static inline void ip_vs_addr_copy(int af, union nf_inet_addr *dst,
 {
 #ifdef CONFIG_IP_VS_IPV6
 	if (af == AF_INET6)
-		ipv6_addr_copy(&dst->in6, &src->in6);
+		dst->in6 = src->in6;
 	else
 #endif
 	dst->ip = src->ip;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 3f0258d2ef01..f35188e002d9 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -309,11 +309,6 @@ ipv6_masked_addr_cmp(const struct in6_addr *a1, const struct in6_addr *m,
 		  ((a1->s6_addr32[3] ^ a2->s6_addr32[3]) & m->s6_addr32[3]));
 }
 
-static inline void ipv6_addr_copy(struct in6_addr *a1, const struct in6_addr *a2)
-{
-	memcpy(a1, a2, sizeof(struct in6_addr));
-}
-
 static inline void ipv6_addr_prefix(struct in6_addr *pfx, 
 				    const struct in6_addr *addr,
 				    int plen)
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 4de7ed9016d9..89174e29dca9 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1217,8 +1217,8 @@ void xfrm_flowi_addr_get(const struct flowi *fl,
 		memcpy(&daddr->a4, &fl->u.ip4.daddr, sizeof(daddr->a4));
 		break;
 	case AF_INET6:
-		ipv6_addr_copy((struct in6_addr *)&saddr->a6, &fl->u.ip6.saddr);
-		ipv6_addr_copy((struct in6_addr *)&daddr->a6, &fl->u.ip6.daddr);
+		*(struct in6_addr *)saddr->a6 = fl->u.ip6.saddr;
+		*(struct in6_addr *)daddr->a6 = fl->u.ip6.daddr;
 		break;
 	}
 }
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index a5f4e5769809..7743e0d109ea 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -127,7 +127,7 @@ static struct net_bridge_mdb_entry *br_mdb_ip6_get(
 {
 	struct br_ip br_dst;
 
-	ipv6_addr_copy(&br_dst.u.ip6, dst);
+	br_dst.u.ip6 = *dst;
 	br_dst.proto = htons(ETH_P_IPV6);
 
 	return br_mdb_ip_get(mdb, &br_dst);
@@ -154,7 +154,7 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
 		break;
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	case htons(ETH_P_IPV6):
-		ipv6_addr_copy(&ip.u.ip6, &ipv6_hdr(skb)->daddr);
+		ip.u.ip6 = ipv6_hdr(skb)->daddr;
 		break;
 #endif
 	default:
@@ -474,7 +474,7 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
 	mldq->mld_cksum = 0;
 	mldq->mld_maxdelay = htons((u16)jiffies_to_msecs(interval));
 	mldq->mld_reserved = 0;
-	ipv6_addr_copy(&mldq->mld_mca, group);
+	mldq->mld_mca = *group;
 
 	/* checksum */
 	mldq->mld_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
@@ -783,7 +783,7 @@ static int br_ip6_multicast_add_group(struct net_bridge *br,
 	if (!ipv6_is_transient_multicast(group))
 		return 0;
 
-	ipv6_addr_copy(&br_group.u.ip6, group);
+	br_group.u.ip6 = *group;
 	br_group.proto = htons(ETH_P_IPV6);
 
 	return br_multicast_add_group(br, port, &br_group);
@@ -1344,7 +1344,7 @@ static void br_ip6_multicast_leave_group(struct net_bridge *br,
 	if (!ipv6_is_transient_multicast(group))
 		return;
 
-	ipv6_addr_copy(&br_group.u.ip6, group);
+	br_group.u.ip6 = *group;
 	br_group.proto = htons(ETH_P_IPV6);
 
 	br_multicast_leave_group(br, port, &br_group);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 0001c243b35c..aa53a35a631b 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -1304,7 +1304,7 @@ static ssize_t pktgen_if_write(struct file *file,
 		scan_ip6(buf, pkt_dev->in6_daddr.s6_addr);
 		snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_daddr);
 
-		ipv6_addr_copy(&pkt_dev->cur_in6_daddr, &pkt_dev->in6_daddr);
+		pkt_dev->cur_in6_daddr = pkt_dev->in6_daddr;
 
 		if (debug)
 			printk(KERN_DEBUG "pktgen: dst6 set to: %s\n", buf);
@@ -1327,8 +1327,7 @@ static ssize_t pktgen_if_write(struct file *file,
 		scan_ip6(buf, pkt_dev->min_in6_daddr.s6_addr);
 		snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->min_in6_daddr);
 
-		ipv6_addr_copy(&pkt_dev->cur_in6_daddr,
-			       &pkt_dev->min_in6_daddr);
+		pkt_dev->cur_in6_daddr = pkt_dev->min_in6_daddr;
 		if (debug)
 			printk(KERN_DEBUG "pktgen: dst6_min set to: %s\n", buf);
 
@@ -1371,7 +1370,7 @@ static ssize_t pktgen_if_write(struct file *file,
 		scan_ip6(buf, pkt_dev->in6_saddr.s6_addr);
 		snprintf(buf, sizeof(buf), "%pI6c", &pkt_dev->in6_saddr);
 
-		ipv6_addr_copy(&pkt_dev->cur_in6_saddr, &pkt_dev->in6_saddr);
+		pkt_dev->cur_in6_saddr = pkt_dev->in6_saddr;
 
 		if (debug)
 			printk(KERN_DEBUG "pktgen: src6 set to: %s\n", buf);
@@ -2079,9 +2078,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev)
 				     ifp = ifp->if_next) {
 					if (ifp->scope == IFA_LINK &&
 					    !(ifp->flags & IFA_F_TENTATIVE)) {
-						ipv6_addr_copy(&pkt_dev->
-							       cur_in6_saddr,
-							       &ifp->addr);
+						pkt_dev->cur_in6_saddr = ifp->addr;
 						err = 0;
 						break;
 					}
@@ -2958,8 +2955,8 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev,
 	iph->payload_len = htons(sizeof(struct udphdr) + datalen);
 	iph->nexthdr = IPPROTO_UDP;
 
-	ipv6_addr_copy(&iph->daddr, &pkt_dev->cur_in6_daddr);
-	ipv6_addr_copy(&iph->saddr, &pkt_dev->cur_in6_saddr);
+	iph->daddr = pkt_dev->cur_in6_daddr;
+	iph->saddr = pkt_dev->cur_in6_saddr;
 
 	skb->mac_header = (skb->network_header - ETH_HLEN -
 			   pkt_dev->pkt_overhead);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 17ee85ce148d..ce903f747e64 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -150,8 +150,8 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			 */
 			memset(&fl6, 0, sizeof(fl6));
 			fl6.flowi6_proto = IPPROTO_DCCP;
-			ipv6_addr_copy(&fl6.daddr, &np->daddr);
-			ipv6_addr_copy(&fl6.saddr, &np->saddr);
+			fl6.daddr = np->daddr;
+			fl6.saddr = np->saddr;
 			fl6.flowi6_oif = sk->sk_bound_dev_if;
 			fl6.fl6_dport = inet->inet_dport;
 			fl6.fl6_sport = inet->inet_sport;
@@ -244,8 +244,8 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
 
 	memset(&fl6, 0, sizeof(fl6));
 	fl6.flowi6_proto = IPPROTO_DCCP;
-	ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr);
-	ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr);
+	fl6.daddr = ireq6->rmt_addr;
+	fl6.saddr = ireq6->loc_addr;
 	fl6.flowlabel = 0;
 	fl6.flowi6_oif = ireq6->iif;
 	fl6.fl6_dport = inet_rsk(req)->rmt_port;
@@ -270,7 +270,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req,
 		dh->dccph_checksum = dccp_v6_csum_finish(skb,
 							 &ireq6->loc_addr,
 							 &ireq6->rmt_addr);
-		ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr);
+		fl6.daddr = ireq6->rmt_addr;
 		err = ip6_xmit(sk, skb, &fl6, opt, np->tclass);
 		err = net_xmit_eval(err);
 	}
@@ -313,8 +313,8 @@ static void dccp_v6_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
 							    &rxip6h->daddr);
 
 	memset(&fl6, 0, sizeof(fl6));
-	ipv6_addr_copy(&fl6.daddr, &rxip6h->saddr);
-	ipv6_addr_copy(&fl6.saddr, &rxip6h->daddr);
+	fl6.daddr = rxip6h->saddr;
+	fl6.saddr = rxip6h->daddr;
 
 	fl6.flowi6_proto = IPPROTO_DCCP;
 	fl6.flowi6_oif = inet6_iif(rxskb);
@@ -419,8 +419,8 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 		goto drop_and_free;
 
 	ireq6 = inet6_rsk(req);
-	ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr);
-	ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr);
+	ireq6->rmt_addr = ipv6_hdr(skb)->saddr;
+	ireq6->loc_addr = ipv6_hdr(skb)->daddr;
 
 	if (ipv6_opt_accepted(sk, skb) ||
 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
@@ -491,7 +491,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 
 		ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
 
-		ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
+		newnp->rcv_saddr = newnp->saddr;
 
 		inet_csk(newsk)->icsk_af_ops = &dccp_ipv6_mapped;
 		newsk->sk_backlog_rcv = dccp_v4_do_rcv;
@@ -526,9 +526,9 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 
 		memset(&fl6, 0, sizeof(fl6));
 		fl6.flowi6_proto = IPPROTO_DCCP;
-		ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr);
+		fl6.daddr = ireq6->rmt_addr;
 		final_p = fl6_update_dst(&fl6, opt, &final);
-		ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr);
+		fl6.saddr = ireq6->loc_addr;
 		fl6.flowi6_oif = sk->sk_bound_dev_if;
 		fl6.fl6_dport = inet_rsk(req)->rmt_port;
 		fl6.fl6_sport = inet_rsk(req)->loc_port;
@@ -559,9 +559,9 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 
 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
 
-	ipv6_addr_copy(&newnp->daddr, &ireq6->rmt_addr);
-	ipv6_addr_copy(&newnp->saddr, &ireq6->loc_addr);
-	ipv6_addr_copy(&newnp->rcv_saddr, &ireq6->loc_addr);
+	newnp->daddr = ireq6->rmt_addr;
+	newnp->saddr = ireq6->loc_addr;
+	newnp->rcv_saddr = ireq6->loc_addr;
 	newsk->sk_bound_dev_if = ireq6->iif;
 
 	/* Now IPv6 options...
@@ -877,7 +877,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
 			if (flowlabel == NULL)
 				return -EINVAL;
-			ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
+			usin->sin6_addr = flowlabel->dst;
 			fl6_sock_release(flowlabel);
 		}
 	}
@@ -910,7 +910,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 			return -EINVAL;
 	}
 
-	ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
+	np->daddr = usin->sin6_addr;
 	np->flow_label = fl6.flowlabel;
 
 	/*
@@ -949,8 +949,8 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 		saddr = &np->rcv_saddr;
 
 	fl6.flowi6_proto = IPPROTO_DCCP;
-	ipv6_addr_copy(&fl6.daddr, &np->daddr);
-	ipv6_addr_copy(&fl6.saddr, saddr ? saddr : &np->saddr);
+	fl6.daddr = np->daddr;
+	fl6.saddr = saddr ? *saddr : np->saddr;
 	fl6.flowi6_oif = sk->sk_bound_dev_if;
 	fl6.fl6_dport = usin->sin6_port;
 	fl6.fl6_sport = inet->inet_sport;
@@ -966,11 +966,11 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 
 	if (saddr == NULL) {
 		saddr = &fl6.saddr;
-		ipv6_addr_copy(&np->rcv_saddr, saddr);
+		np->rcv_saddr = *saddr;
 	}
 
 	/* set the source address */
-	ipv6_addr_copy(&np->saddr, saddr);
+	np->saddr = *saddr;
 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
 
 	__ip6_dst_store(sk, dst, NULL, NULL);
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index 563b7c74e49d..b50d5fd3d696 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -60,8 +60,8 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
 
 			tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot);
 			tw6 = inet6_twsk((struct sock *)tw);
-			ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr);
-			ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr);
+			tw6->tw_v6_daddr = np->daddr;
+			tw6->tw_v6_rcv_saddr = np->rcv_saddr;
 			tw->tw_ipv6only = np->ipv6only;
 		}
 #endif
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 68e8ac514383..bbebdecd7234 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -129,10 +129,8 @@ static int inet_csk_diag_fill(struct sock *sk,
 	if (r->idiag_family == AF_INET6) {
 		const struct ipv6_pinfo *np = inet6_sk(sk);
 
-		ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
-			       &np->rcv_saddr);
-		ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
-			       &np->daddr);
+		*(struct in6_addr *)r->id.idiag_src = np->rcv_saddr;
+		*(struct in6_addr *)r->id.idiag_dst = np->daddr;
 		if (ext & (1 << (INET_DIAG_TCLASS - 1)))
 			RTA_PUT_U8(skb, INET_DIAG_TCLASS, np->tclass);
 	}
@@ -224,10 +222,8 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
 		const struct inet6_timewait_sock *tw6 =
 						inet6_twsk((struct sock *)tw);
 
-		ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
-			       &tw6->tw_v6_rcv_saddr);
-		ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
-			       &tw6->tw_v6_daddr);
+		*(struct in6_addr *)r->id.idiag_src = tw6->tw_v6_rcv_saddr;
+		*(struct in6_addr *)r->id.idiag_dst = tw6->tw_v6_daddr;
 	}
 #endif
 	nlh->nlmsg_len = skb_tail_pointer(skb) - previous_tail;
@@ -603,10 +599,8 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
 	r->idiag_inode = 0;
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 	if (r->idiag_family == AF_INET6) {
-		ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
-			       &inet6_rsk(req)->loc_addr);
-		ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
-			       &inet6_rsk(req)->rmt_addr);
+		*(struct in6_addr *)r->id.idiag_src = inet6_rsk(req)->loc_addr;
+		*(struct in6_addr *)r->id.idiag_dst = inet6_rsk(req)->rmt_addr;
 	}
 #endif
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 0a7e3398c461..945efffdd929 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -343,8 +343,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 
 			tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot);
 			tw6 = inet6_twsk((struct sock *)tw);
-			ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr);
-			ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr);
+			tw6->tw_v6_daddr = np->daddr;
+			tw6->tw_v6_rcv_saddr = np->rcv_saddr;
 			tw->tw_tclass = np->tclass;
 			tw->tw_ipv6only = np->ipv6only;
 		}
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index cf88df82e2c2..586051726341 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -636,7 +636,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 		goto out;
 	}
 
-	ipv6_addr_copy(&ifa->addr, addr);
+	ifa->addr = *addr;
 
 	spin_lock_init(&ifa->lock);
 	spin_lock_init(&ifa->state_lock);
@@ -1228,7 +1228,7 @@ try_nextdev:
 	if (!hiscore->ifa)
 		return -EADDRNOTAVAIL;
 
-	ipv6_addr_copy(saddr, &hiscore->ifa->addr);
+	*saddr = hiscore->ifa->addr;
 	in6_ifa_put(hiscore->ifa);
 	return 0;
 }
@@ -1249,7 +1249,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr,
 		list_for_each_entry(ifp, &idev->addr_list, if_list) {
 			if (ifp->scope == IFA_LINK &&
 			    !(ifp->flags & banned_flags)) {
-				ipv6_addr_copy(addr, &ifp->addr);
+				*addr = ifp->addr;
 				err = 0;
 				break;
 			}
@@ -1700,7 +1700,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
 		.fc_protocol = RTPROT_KERNEL,
 	};
 
-	ipv6_addr_copy(&cfg.fc_dst, pfx);
+	cfg.fc_dst = *pfx;
 
 	/* Prevent useless cloning on PtP SIT.
 	   This thing is done here expecting that the whole
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index ee3319487c4f..7694c82e629d 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -361,10 +361,10 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	inet->inet_rcv_saddr = v4addr;
 	inet->inet_saddr = v4addr;
 
-	ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr);
+	np->rcv_saddr = addr->sin6_addr;
 
 	if (!(addr_type & IPV6_ADDR_MULTICAST))
-		ipv6_addr_copy(&np->saddr, &addr->sin6_addr);
+		np->saddr = addr->sin6_addr;
 
 	/* Make sure we are allowed to bind here. */
 	if (sk->sk_prot->get_port(sk, snum)) {
@@ -458,14 +458,14 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
 		    peer == 1)
 			return -ENOTCONN;
 		sin->sin6_port = inet->inet_dport;
-		ipv6_addr_copy(&sin->sin6_addr, &np->daddr);
+		sin->sin6_addr = np->daddr;
 		if (np->sndflow)
 			sin->sin6_flowinfo = np->flow_label;
 	} else {
 		if (ipv6_addr_any(&np->rcv_saddr))
-			ipv6_addr_copy(&sin->sin6_addr, &np->saddr);
+			sin->sin6_addr = np->saddr;
 		else
-			ipv6_addr_copy(&sin->sin6_addr, &np->rcv_saddr);
+			sin->sin6_addr = np->rcv_saddr;
 
 		sin->sin6_port = inet->inet_sport;
 	}
@@ -660,8 +660,8 @@ int inet6_sk_rebuild_header(struct sock *sk)
 
 		memset(&fl6, 0, sizeof(fl6));
 		fl6.flowi6_proto = sk->sk_protocol;
-		ipv6_addr_copy(&fl6.daddr, &np->daddr);
-		ipv6_addr_copy(&fl6.saddr, &np->saddr);
+		fl6.daddr = np->daddr;
+		fl6.saddr = np->saddr;
 		fl6.flowlabel = np->flow_label;
 		fl6.flowi6_oif = sk->sk_bound_dev_if;
 		fl6.flowi6_mark = sk->sk_mark;
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 4c0f894d0843..2ae79dbeec2f 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -193,9 +193,9 @@ static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *des
 						printk(KERN_WARNING "destopt hao: invalid header length: %u\n", hao->length);
 					goto bad;
 				}
-				ipv6_addr_copy(&final_addr, &hao->addr);
-				ipv6_addr_copy(&hao->addr, &iph->saddr);
-				ipv6_addr_copy(&iph->saddr, &final_addr);
+				final_addr = hao->addr;
+				hao->addr = iph->saddr;
+				iph->saddr = final_addr;
 			}
 			break;
 		}
@@ -241,13 +241,13 @@ static void ipv6_rearrange_rthdr(struct ipv6hdr *iph, struct ipv6_rt_hdr *rthdr)
 	segments = rthdr->hdrlen >> 1;
 
 	addrs = ((struct rt0_hdr *)rthdr)->addr;
-	ipv6_addr_copy(&final_addr, addrs + segments - 1);
+	final_addr = addrs[segments - 1];
 
 	addrs += segments - segments_left;
 	memmove(addrs + 1, addrs, (segments_left - 1) * sizeof(*addrs));
 
-	ipv6_addr_copy(addrs, &iph->daddr);
-	ipv6_addr_copy(&iph->daddr, &final_addr);
+	addrs[0] = iph->daddr;
+	iph->daddr = final_addr;
 }
 
 static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir)
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index 674255f5e6b7..fc1cdcd7041a 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -75,7 +75,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	if (pac == NULL)
 		return -ENOMEM;
 	pac->acl_next = NULL;
-	ipv6_addr_copy(&pac->acl_addr, addr);
+	pac->acl_addr = *addr;
 
 	rcu_read_lock();
 	if (ifindex == 0) {
@@ -296,7 +296,7 @@ int ipv6_dev_ac_inc(struct net_device *dev, const struct in6_addr *addr)
 		goto out;
 	}
 
-	ipv6_addr_copy(&aca->aca_addr, addr);
+	aca->aca_addr = *addr;
 	aca->aca_idev = idev;
 	aca->aca_rt = rt;
 	aca->aca_users = 1;
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 83037af4fa7b..ae08aee1773c 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -71,7 +71,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
 			if (flowlabel == NULL)
 				return -EINVAL;
-			ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
+			usin->sin6_addr = flowlabel->dst;
 		}
 	}
 
@@ -143,7 +143,7 @@ ipv4_connected:
 		}
 	}
 
-	ipv6_addr_copy(&np->daddr, daddr);
+	np->daddr = *daddr;
 	np->flow_label = fl6.flowlabel;
 
 	inet->inet_dport = usin->sin6_port;
@@ -154,8 +154,8 @@ ipv4_connected:
 	 */
 
 	fl6.flowi6_proto = sk->sk_protocol;
-	ipv6_addr_copy(&fl6.daddr, &np->daddr);
-	ipv6_addr_copy(&fl6.saddr, &np->saddr);
+	fl6.daddr = np->daddr;
+	fl6.saddr = np->saddr;
 	fl6.flowi6_oif = sk->sk_bound_dev_if;
 	fl6.flowi6_mark = sk->sk_mark;
 	fl6.fl6_dport = inet->inet_dport;
@@ -179,10 +179,10 @@ ipv4_connected:
 	/* source address lookup done in ip6_dst_lookup */
 
 	if (ipv6_addr_any(&np->saddr))
-		ipv6_addr_copy(&np->saddr, &fl6.saddr);
+		np->saddr = fl6.saddr;
 
 	if (ipv6_addr_any(&np->rcv_saddr)) {
-		ipv6_addr_copy(&np->rcv_saddr, &fl6.saddr);
+		np->rcv_saddr = fl6.saddr;
 		inet->inet_rcv_saddr = LOOPBACK4_IPV6;
 		if (sk->sk_prot->rehash)
 			sk->sk_prot->rehash(sk);
@@ -257,7 +257,7 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info)
 	skb_put(skb, sizeof(struct ipv6hdr));
 	skb_reset_network_header(skb);
 	iph = ipv6_hdr(skb);
-	ipv6_addr_copy(&iph->daddr, &fl6->daddr);
+	iph->daddr = fl6->daddr;
 
 	serr = SKB_EXT_ERR(skb);
 	serr->ee.ee_errno = err;
@@ -294,7 +294,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu)
 	skb_put(skb, sizeof(struct ipv6hdr));
 	skb_reset_network_header(skb);
 	iph = ipv6_hdr(skb);
-	ipv6_addr_copy(&iph->daddr, &fl6->daddr);
+	iph->daddr = fl6->daddr;
 
 	mtu_info = IP6CBMTU(skb);
 
@@ -303,7 +303,7 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu)
 	mtu_info->ip6m_addr.sin6_port = 0;
 	mtu_info->ip6m_addr.sin6_flowinfo = 0;
 	mtu_info->ip6m_addr.sin6_scope_id = fl6->flowi6_oif;
-	ipv6_addr_copy(&mtu_info->ip6m_addr.sin6_addr, &ipv6_hdr(skb)->daddr);
+	mtu_info->ip6m_addr.sin6_addr = ipv6_hdr(skb)->daddr;
 
 	__skb_pull(skb, skb_tail_pointer(skb) - skb->data);
 	skb_reset_transport_header(skb);
@@ -354,8 +354,8 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
 		sin->sin6_port = serr->port;
 		sin->sin6_scope_id = 0;
 		if (skb->protocol == htons(ETH_P_IPV6)) {
-			ipv6_addr_copy(&sin->sin6_addr,
-				  (struct in6_addr *)(nh + serr->addr_offset));
+			sin->sin6_addr =
+				*(struct in6_addr *)(nh + serr->addr_offset);
 			if (np->sndflow)
 				sin->sin6_flowinfo =
 					(*(__be32 *)(nh + serr->addr_offset - 24) &
@@ -376,7 +376,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)
 		sin->sin6_flowinfo = 0;
 		sin->sin6_scope_id = 0;
 		if (skb->protocol == htons(ETH_P_IPV6)) {
-			ipv6_addr_copy(&sin->sin6_addr, &ipv6_hdr(skb)->saddr);
+			sin->sin6_addr = ipv6_hdr(skb)->saddr;
 			if (np->rxopt.all)
 				datagram_recv_ctl(sk, msg, skb);
 			if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL)
@@ -451,7 +451,7 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len)
 		sin->sin6_flowinfo = 0;
 		sin->sin6_port = 0;
 		sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id;
-		ipv6_addr_copy(&sin->sin6_addr, &mtu_info.ip6m_addr.sin6_addr);
+		sin->sin6_addr = mtu_info.ip6m_addr.sin6_addr;
 	}
 
 	put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info);
@@ -475,7 +475,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 		struct in6_pktinfo src_info;
 
 		src_info.ipi6_ifindex = opt->iif;
-		ipv6_addr_copy(&src_info.ipi6_addr, &ipv6_hdr(skb)->daddr);
+		src_info.ipi6_addr = ipv6_hdr(skb)->daddr;
 		put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
 	}
 
@@ -550,7 +550,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 		struct in6_pktinfo src_info;
 
 		src_info.ipi6_ifindex = opt->iif;
-		ipv6_addr_copy(&src_info.ipi6_addr, &ipv6_hdr(skb)->daddr);
+		src_info.ipi6_addr = ipv6_hdr(skb)->daddr;
 		put_cmsg(msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
 	}
 	if (np->rxopt.bits.rxohlim) {
@@ -584,7 +584,7 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 			 */
 
 			sin6.sin6_family = AF_INET6;
-			ipv6_addr_copy(&sin6.sin6_addr, &ipv6_hdr(skb)->daddr);
+			sin6.sin6_addr = ipv6_hdr(skb)->daddr;
 			sin6.sin6_port = ports[1];
 			sin6.sin6_flowinfo = 0;
 			sin6.sin6_scope_id = 0;
@@ -659,7 +659,7 @@ int datagram_send_ctl(struct net *net, struct sock *sk,
 						   strict ? dev : NULL, 0))
 					err = -EINVAL;
 				else
-					ipv6_addr_copy(&fl6->saddr, &src_info->ipi6_addr);
+					fl6->saddr = src_info->ipi6_addr;
 			}
 
 			rcu_read_unlock();
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index bf22a225f422..3d641b6e9b09 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -243,9 +243,9 @@ static int ipv6_dest_hao(struct sk_buff *skb, int optoff)
 	if (skb->ip_summed == CHECKSUM_COMPLETE)
 		skb->ip_summed = CHECKSUM_NONE;
 
-	ipv6_addr_copy(&tmp_addr, &ipv6h->saddr);
-	ipv6_addr_copy(&ipv6h->saddr, &hao->addr);
-	ipv6_addr_copy(&hao->addr, &tmp_addr);
+	tmp_addr = ipv6h->saddr;
+	ipv6h->saddr = hao->addr;
+	hao->addr = tmp_addr;
 
 	if (skb->tstamp.tv64 == 0)
 		__net_timestamp(skb);
@@ -461,9 +461,9 @@ looped_back:
 		return -1;
 	}
 
-	ipv6_addr_copy(&daddr, addr);
-	ipv6_addr_copy(addr, &ipv6_hdr(skb)->daddr);
-	ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &daddr);
+	daddr = *addr;
+	*addr = ipv6_hdr(skb)->daddr;
+	ipv6_hdr(skb)->daddr = daddr;
 
 	skb_dst_drop(skb);
 	ip6_route_input(skb);
@@ -690,7 +690,7 @@ static void ipv6_push_rthdr(struct sk_buff *skb, u8 *proto,
 		memcpy(phdr->addr, ihdr->addr + 1,
 		       (hops - 1) * sizeof(struct in6_addr));
 
-	ipv6_addr_copy(phdr->addr + (hops - 1), *addr_p);
+	phdr->addr[hops - 1] = **addr_p;
 	*addr_p = ihdr->addr;
 
 	phdr->rt_hdr.nexthdr = *proto;
@@ -888,8 +888,8 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6,
 	if (!opt || !opt->srcrt)
 		return NULL;
 
-	ipv6_addr_copy(orig, &fl6->daddr);
-	ipv6_addr_copy(&fl6->daddr, ((struct rt0_hdr *)opt->srcrt)->addr);
+	*orig = fl6->daddr;
+	fl6->daddr = *((struct rt0_hdr *)opt->srcrt)->addr;
 	return orig;
 }
 
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 295571576f83..b6c573152067 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -96,7 +96,7 @@ static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp,
 			if (!ipv6_prefix_equal(&saddr, &r->src.addr,
 					       r->src.plen))
 				goto again;
-			ipv6_addr_copy(&flp6->saddr, &saddr);
+			flp6->saddr = saddr;
 		}
 		goto out;
 	}
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 90868fb42757..9e2bdccf9143 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -290,9 +290,9 @@ static void mip6_addr_swap(struct sk_buff *skb)
 		if (likely(off >= 0)) {
 			hao = (struct ipv6_destopt_hao *)
 					(skb_network_header(skb) + off);
-			ipv6_addr_copy(&tmp, &iph->saddr);
-			ipv6_addr_copy(&iph->saddr, &hao->addr);
-			ipv6_addr_copy(&hao->addr, &tmp);
+			tmp = iph->saddr;
+			iph->saddr = hao->addr;
+			hao->addr = tmp;
 		}
 	}
 }
@@ -444,9 +444,9 @@ void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
 
 	memset(&fl6, 0, sizeof(fl6));
 	fl6.flowi6_proto = IPPROTO_ICMPV6;
-	ipv6_addr_copy(&fl6.daddr, &hdr->saddr);
+	fl6.daddr = hdr->saddr;
 	if (saddr)
-		ipv6_addr_copy(&fl6.saddr, saddr);
+		fl6.saddr = *saddr;
 	fl6.flowi6_oif = iif;
 	fl6.fl6_icmp_type = type;
 	fl6.fl6_icmp_code = code;
@@ -538,9 +538,9 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 
 	memset(&fl6, 0, sizeof(fl6));
 	fl6.flowi6_proto = IPPROTO_ICMPV6;
-	ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->saddr);
+	fl6.daddr = ipv6_hdr(skb)->saddr;
 	if (saddr)
-		ipv6_addr_copy(&fl6.saddr, saddr);
+		fl6.saddr = *saddr;
 	fl6.flowi6_oif = skb->dev->ifindex;
 	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY;
 	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
@@ -786,8 +786,8 @@ void icmpv6_flow_init(struct sock *sk, struct flowi6 *fl6,
 		      int oif)
 {
 	memset(fl6, 0, sizeof(*fl6));
-	ipv6_addr_copy(&fl6->saddr, saddr);
-	ipv6_addr_copy(&fl6->daddr, daddr);
+	fl6->saddr = *saddr;
+	fl6->daddr = *daddr;
 	fl6->flowi6_proto 	= IPPROTO_ICMPV6;
 	fl6->fl6_icmp_type	= type;
 	fl6->fl6_icmp_code	= 0;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index fee46d5a2f12..4d7bfb321c75 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -65,9 +65,9 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,
 
 	memset(&fl6, 0, sizeof(fl6));
 	fl6.flowi6_proto = IPPROTO_TCP;
-	ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr);
+	fl6.daddr = treq->rmt_addr;
 	final_p = fl6_update_dst(&fl6, np->opt, &final);
-	ipv6_addr_copy(&fl6.saddr, &treq->loc_addr);
+	fl6.saddr = treq->loc_addr;
 	fl6.flowi6_oif = sk->sk_bound_dev_if;
 	fl6.flowi6_mark = sk->sk_mark;
 	fl6.fl6_dport = inet_rsk(req)->rmt_port;
@@ -157,7 +157,7 @@ void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
 
 	sin6->sin6_family = AF_INET6;
-	ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
+	sin6->sin6_addr = np->daddr;
 	sin6->sin6_port	= inet_sk(sk)->inet_dport;
 	/* We do not store received flowlabel for TCP */
 	sin6->sin6_flowinfo = 0;
@@ -215,8 +215,8 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
 
 	memset(&fl6, 0, sizeof(fl6));
 	fl6.flowi6_proto = sk->sk_protocol;
-	ipv6_addr_copy(&fl6.daddr, &np->daddr);
-	ipv6_addr_copy(&fl6.saddr, &np->saddr);
+	fl6.daddr = np->daddr;
+	fl6.saddr = np->saddr;
 	fl6.flowlabel = np->flow_label;
 	IP6_ECN_flow_xmit(sk, fl6.flowlabel);
 	fl6.flowi6_oif = sk->sk_bound_dev_if;
@@ -246,7 +246,7 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)
 	skb_dst_set_noref(skb, dst);
 
 	/* Restore final destination back after routing done */
-	ipv6_addr_copy(&fl6.daddr, &np->daddr);
+	fl6.daddr = np->daddr;
 
 	res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
 	rcu_read_unlock();
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index 4566dbd916d3..b7867a1215b1 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -386,7 +386,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
 		err = -EINVAL;
 		goto done;
 	}
-	ipv6_addr_copy(&fl->dst, &freq->flr_dst);
+	fl->dst = freq->flr_dst;
 	atomic_set(&fl->users, 1);
 	switch (fl->share) {
 	case IPV6_FL_S_EXCL:
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 68ef97f353b6..a24e15557843 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -238,8 +238,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 	hdr->nexthdr = proto;
 	hdr->hop_limit = hlimit;
 
-	ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
-	ipv6_addr_copy(&hdr->daddr, first_hop);
+	hdr->saddr = fl6->saddr;
+	hdr->daddr = *first_hop;
 
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
@@ -290,8 +290,8 @@ int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
 	hdr->nexthdr = proto;
 	hdr->hop_limit = np->hop_limit;
 
-	ipv6_addr_copy(&hdr->saddr, saddr);
-	ipv6_addr_copy(&hdr->daddr, daddr);
+	hdr->saddr = *saddr;
+	hdr->daddr = *daddr;
 
 	return 0;
 }
@@ -1063,7 +1063,7 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
 	if (err)
 		return ERR_PTR(err);
 	if (final_dst)
-		ipv6_addr_copy(&fl6->daddr, final_dst);
+		fl6->daddr = *final_dst;
 	if (can_sleep)
 		fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
 
@@ -1099,7 +1099,7 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
 	if (err)
 		return ERR_PTR(err);
 	if (final_dst)
-		ipv6_addr_copy(&fl6->daddr, final_dst);
+		fl6->daddr = *final_dst;
 	if (can_sleep)
 		fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
 
@@ -1592,7 +1592,7 @@ int ip6_push_pending_frames(struct sock *sk)
 	if (np->pmtudisc < IPV6_PMTUDISC_DO)
 		skb->local_df = 1;
 
-	ipv6_addr_copy(final_dst, &fl6->daddr);
+	*final_dst = fl6->daddr;
 	__skb_pull(skb, skb_network_header_len(skb));
 	if (opt && opt->opt_flen)
 		ipv6_push_frag_opts(skb, opt, &proto);
@@ -1608,8 +1608,8 @@ int ip6_push_pending_frames(struct sock *sk)
 
 	hdr->hop_limit = np->cork.hop_limit;
 	hdr->nexthdr = proto;
-	ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
-	ipv6_addr_copy(&hdr->daddr, final_dst);
+	hdr->saddr = fl6->saddr;
+	hdr->daddr = *final_dst;
 
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 83f0e31c5fbd..f5f98f558acb 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -979,8 +979,8 @@ static int ip6_tnl_xmit2(struct sk_buff *skb,
 	ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield);
 	ipv6h->hop_limit = t->parms.hop_limit;
 	ipv6h->nexthdr = proto;
-	ipv6_addr_copy(&ipv6h->saddr, &fl6->saddr);
-	ipv6_addr_copy(&ipv6h->daddr, &fl6->daddr);
+	ipv6h->saddr = fl6->saddr;
+	ipv6h->daddr = fl6->daddr;
 	nf_reset(skb);
 	pkt_len = skb->len;
 	err = ip6_local_out(skb);
@@ -1155,8 +1155,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
 	memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr));
 
 	/* Set up flowi template */
-	ipv6_addr_copy(&fl6->saddr, &p->laddr);
-	ipv6_addr_copy(&fl6->daddr, &p->raddr);
+	fl6->saddr = p->laddr;
+	fl6->daddr = p->raddr;
 	fl6->flowi6_oif = p->link;
 	fl6->flowlabel = 0;
 
@@ -1212,8 +1212,8 @@ static void ip6_tnl_link_config(struct ip6_tnl *t)
 static int
 ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
 {
-	ipv6_addr_copy(&t->parms.laddr, &p->laddr);
-	ipv6_addr_copy(&t->parms.raddr, &p->raddr);
+	t->parms.laddr = p->laddr;
+	t->parms.raddr = p->raddr;
 	t->parms.flags = p->flags;
 	t->parms.hop_limit = p->hop_limit;
 	t->parms.encap_limit = p->encap_limit;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 449a9185b8f2..c7e95c8c579f 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1105,8 +1105,8 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
 		msg->im6_mif = mrt->mroute_reg_vif_num;
 		msg->im6_pad = 0;
-		ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
-		ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
+		msg->im6_src = ipv6_hdr(pkt)->saddr;
+		msg->im6_dst = ipv6_hdr(pkt)->daddr;
 
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 	} else
@@ -1131,8 +1131,8 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
 	msg->im6_msgtype = assert;
 	msg->im6_mif = mifi;
 	msg->im6_pad = 0;
-	ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
-	ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
+	msg->im6_src = ipv6_hdr(pkt)->saddr;
+	msg->im6_dst = ipv6_hdr(pkt)->daddr;
 
 	skb_dst_set(skb, dst_clone(skb_dst(pkt)));
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
@@ -2181,8 +2181,8 @@ int ip6mr_get_route(struct net *net,
 		iph->payload_len = 0;
 		iph->nexthdr = IPPROTO_NONE;
 		iph->hop_limit = 0;
-		ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
-		ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
+		iph->saddr = rt->rt6i_src.addr;
+		iph->daddr = rt->rt6i_dst.addr;
 
 		err = ip6mr_cache_unresolved(mrt, vif, skb2);
 		read_unlock(&mrt_lock);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index c99e3ee9781f..29993b7079a5 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -435,7 +435,7 @@ sticky_done:
 			goto e_inval;
 
 		np->sticky_pktinfo.ipi6_ifindex = pkt.ipi6_ifindex;
-		ipv6_addr_copy(&np->sticky_pktinfo.ipi6_addr, &pkt.ipi6_addr);
+		np->sticky_pktinfo.ipi6_addr = pkt.ipi6_addr;
 		retv = 0;
 		break;
 	}
@@ -980,8 +980,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 				struct in6_pktinfo src_info;
 				src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :
 					np->sticky_pktinfo.ipi6_ifindex;
-				np->mcast_oif? ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr) :
-					ipv6_addr_copy(&src_info.ipi6_addr, &(np->sticky_pktinfo.ipi6_addr));
+				src_info.ipi6_addr = np->mcast_oif ? np->daddr : np->sticky_pktinfo.ipi6_addr;
 				put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
 			}
 			if (np->rxopt.bits.rxhlim) {
@@ -992,8 +991,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 				struct in6_pktinfo src_info;
 				src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :
 					np->sticky_pktinfo.ipi6_ifindex;
-				np->mcast_oif? ipv6_addr_copy(&src_info.ipi6_addr, &np->daddr) :
-					ipv6_addr_copy(&src_info.ipi6_addr, &(np->sticky_pktinfo.ipi6_addr));
+				src_info.ipi6_addr = np->mcast_oif ? np->daddr : np->sticky_pktinfo.ipi6_addr;
 				put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);
 			}
 			if (np->rxopt.bits.rxohlim) {
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 7b94bebb73b1..6cc4d1fb8c13 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -155,7 +155,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 		return -ENOMEM;
 
 	mc_lst->next = NULL;
-	ipv6_addr_copy(&mc_lst->addr, addr);
+	mc_lst->addr = *addr;
 
 	rcu_read_lock();
 	if (ifindex == 0) {
@@ -858,7 +858,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr)
 
 	setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc);
 
-	ipv6_addr_copy(&mc->mca_addr, addr);
+	mc->mca_addr = *addr;
 	mc->idev = idev; /* (reference taken) */
 	mc->mca_users = 1;
 	/* mca_stamp should be updated upon changes */
@@ -1776,7 +1776,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 	hdr = (struct mld_msg *) skb_put(skb, sizeof(struct mld_msg));
 	memset(hdr, 0, sizeof(struct mld_msg));
 	hdr->mld_type = type;
-	ipv6_addr_copy(&hdr->mld_mca, addr);
+	hdr->mld_mca = *addr;
 
 	hdr->mld_cksum = csum_ipv6_magic(saddr, snd_addr, len,
 					 IPPROTO_ICMPV6,
diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c
index 43242e6e6103..7e1e0fbfef21 100644
--- a/net/ipv6/mip6.c
+++ b/net/ipv6/mip6.c
@@ -195,8 +195,8 @@ static inline int mip6_report_rl_allow(struct timeval *stamp,
 		mip6_report_rl.stamp.tv_sec = stamp->tv_sec;
 		mip6_report_rl.stamp.tv_usec = stamp->tv_usec;
 		mip6_report_rl.iif = iif;
-		ipv6_addr_copy(&mip6_report_rl.src, src);
-		ipv6_addr_copy(&mip6_report_rl.dst, dst);
+		mip6_report_rl.src = *src;
+		mip6_report_rl.dst = *dst;
 		allow = 1;
 	}
 	spin_unlock_bh(&mip6_report_rl.lock);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index d699ddcad4ce..a4769881c5b5 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -481,7 +481,7 @@ struct sk_buff *ndisc_build_skb(struct net_device *dev,
 
 	opt = skb_transport_header(skb) + sizeof(struct icmp6hdr);
 	if (target) {
-		ipv6_addr_copy((struct in6_addr *)opt, target);
+		*(struct in6_addr *)opt = *target;
 		opt += sizeof(*target);
 	}
 
@@ -1622,9 +1622,9 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
 	 */
 
 	addrp = (struct in6_addr *)(icmph + 1);
-	ipv6_addr_copy(addrp, target);
+	*addrp = *target;
 	addrp++;
-	ipv6_addr_copy(addrp, &ipv6_hdr(skb)->daddr);
+	*addrp = ipv6_hdr(skb)->daddr;
 
 	opt = (u8*) (addrp + 1);
 
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index a5a4c5dd5396..b5a2aa58a03a 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -93,8 +93,8 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
 
 	memset(&fl6, 0, sizeof(fl6));
 	fl6.flowi6_proto = IPPROTO_TCP;
-	ipv6_addr_copy(&fl6.saddr, &oip6h->daddr);
-	ipv6_addr_copy(&fl6.daddr, &oip6h->saddr);
+	fl6.saddr = oip6h->daddr;
+	fl6.daddr = oip6h->saddr;
 	fl6.fl6_sport = otcph.dest;
 	fl6.fl6_dport = otcph.source;
 	security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6));
@@ -129,8 +129,8 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
 	*(__be32 *)ip6h =  htonl(0x60000000 | (tclass << 20));
 	ip6h->hop_limit = ip6_dst_hoplimit(dst);
 	ip6h->nexthdr = IPPROTO_TCP;
-	ipv6_addr_copy(&ip6h->saddr, &oip6h->daddr);
-	ipv6_addr_copy(&ip6h->daddr, &oip6h->saddr);
+	ip6h->saddr = oip6h->daddr;
+	ip6h->daddr = oip6h->saddr;
 
 	tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
 	/* Truncate to length (no data) */
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index a1aa869a9ce7..a4894f4f1944 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -299,9 +299,9 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	}
 
 	inet->inet_rcv_saddr = inet->inet_saddr = v4addr;
-	ipv6_addr_copy(&np->rcv_saddr, &addr->sin6_addr);
+	np->rcv_saddr = addr->sin6_addr;
 	if (!(addr_type & IPV6_ADDR_MULTICAST))
-		ipv6_addr_copy(&np->saddr, &addr->sin6_addr);
+		np->saddr = addr->sin6_addr;
 	err = 0;
 out_unlock:
 	rcu_read_unlock();
@@ -495,7 +495,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 	if (sin6) {
 		sin6->sin6_family = AF_INET6;
 		sin6->sin6_port = 0;
-		ipv6_addr_copy(&sin6->sin6_addr, &ipv6_hdr(skb)->saddr);
+		sin6->sin6_addr = ipv6_hdr(skb)->saddr;
 		sin6->sin6_flowinfo = 0;
 		sin6->sin6_scope_id = 0;
 		if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
@@ -846,11 +846,11 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		goto out;
 
 	if (!ipv6_addr_any(daddr))
-		ipv6_addr_copy(&fl6.daddr, daddr);
+		fl6.daddr = *daddr;
 	else
 		fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
 	if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr))
-		ipv6_addr_copy(&fl6.saddr, &np->saddr);
+		fl6.saddr = np->saddr;
 
 	final_p = fl6_update_dst(&fl6, opt, &final);
 
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index dfb164e9051a..b69fae76a6f1 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -153,8 +153,8 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a)
 
 	fq->id = arg->id;
 	fq->user = arg->user;
-	ipv6_addr_copy(&fq->saddr, arg->src);
-	ipv6_addr_copy(&fq->daddr, arg->dst);
+	fq->saddr = *arg->src;
+	fq->daddr = *arg->dst;
 }
 EXPORT_SYMBOL(ip6_frag_init);
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 05c89be04c9f..2897403fdaff 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -729,14 +729,14 @@ static struct rt6_info *rt6_alloc_cow(const struct rt6_info *ort,
 			if (rt->rt6i_dst.plen != 128 &&
 			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr))
 				rt->rt6i_flags |= RTF_ANYCAST;
-			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
+			rt->rt6i_gateway = *daddr;
 		}
 
 		rt->rt6i_flags |= RTF_CACHE;
 
 #ifdef CONFIG_IPV6_SUBTREES
 		if (rt->rt6i_src.plen && saddr) {
-			ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
+			rt->rt6i_src.addr = *saddr;
 			rt->rt6i_src.plen = 128;
 		}
 #endif
@@ -932,7 +932,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori
 			in6_dev_hold(rt->rt6i_idev);
 		rt->rt6i_expires = 0;
 
-		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
+		rt->rt6i_gateway = ort->rt6i_gateway;
 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
 		rt->rt6i_metric = 0;
 
@@ -1087,7 +1087,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
 	rt->dst.output  = ip6_output;
 	dst_set_neighbour(&rt->dst, neigh);
 	atomic_set(&rt->dst.__refcnt, 1);
-	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
+	rt->rt6i_dst.addr = *addr;
 	rt->rt6i_dst.plen = 128;
 	rt->rt6i_idev     = idev;
 	dst_metric_set(&rt->dst, RTAX_HOPLIMIT, 255);
@@ -1324,7 +1324,7 @@ int ip6_route_add(struct fib6_config *cfg)
 		int gwa_type;
 
 		gw_addr = &cfg->fc_gateway;
-		ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
+		rt->rt6i_gateway = *gw_addr;
 		gwa_type = ipv6_addr_type(gw_addr);
 
 		if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
@@ -1378,7 +1378,7 @@ int ip6_route_add(struct fib6_config *cfg)
 			err = -EINVAL;
 			goto out;
 		}
-		ipv6_addr_copy(&rt->rt6i_prefsrc.addr, &cfg->fc_prefsrc);
+		rt->rt6i_prefsrc.addr = cfg->fc_prefsrc;
 		rt->rt6i_prefsrc.plen = 128;
 	} else
 		rt->rt6i_prefsrc.plen = 0;
@@ -1575,7 +1575,7 @@ static struct rt6_info *ip6_route_redirect(const struct in6_addr *dest,
 		},
 	};
 
-	ipv6_addr_copy(&rdfl.gateway, gateway);
+	rdfl.gateway = *gateway;
 
 	if (rt6_need_strict(dest))
 		flags |= RT6_LOOKUP_F_IFACE;
@@ -1631,7 +1631,7 @@ void rt6_redirect(const struct in6_addr *dest, const struct in6_addr *src,
 	if (on_link)
 		nrt->rt6i_flags &= ~RTF_GATEWAY;
 
-	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
+	nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
 	dst_set_neighbour(&nrt->dst, neigh_clone(neigh));
 
 	if (ip6_ins_rt(nrt))
@@ -1777,7 +1777,7 @@ static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
 		rt->dst.output = ort->dst.output;
 		rt->dst.flags |= DST_HOST;
 
-		ipv6_addr_copy(&rt->rt6i_dst.addr, dest);
+		rt->rt6i_dst.addr = *dest;
 		rt->rt6i_dst.plen = 128;
 		dst_copy_metrics(&rt->dst, &ort->dst);
 		rt->dst.error = ort->dst.error;
@@ -1787,7 +1787,7 @@ static struct rt6_info *ip6_rt_copy(const struct rt6_info *ort,
 		rt->dst.lastuse = jiffies;
 		rt->rt6i_expires = 0;
 
-		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
+		rt->rt6i_gateway = ort->rt6i_gateway;
 		rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
 		rt->rt6i_metric = 0;
 
@@ -1850,8 +1850,8 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
 		.fc_nlinfo.nl_net = net,
 	};
 
-	ipv6_addr_copy(&cfg.fc_dst, prefix);
-	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
+	cfg.fc_dst = *prefix;
+	cfg.fc_gateway = *gwaddr;
 
 	/* We should treat it as a default route if prefix length is 0. */
 	if (!prefixlen)
@@ -1900,7 +1900,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
 		.fc_nlinfo.nl_net = dev_net(dev),
 	};
 
-	ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
+	cfg.fc_gateway = *gwaddr;
 
 	ip6_route_add(&cfg);
 
@@ -1946,9 +1946,9 @@ static void rtmsg_to_fib6_config(struct net *net,
 
 	cfg->fc_nlinfo.nl_net = net;
 
-	ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
-	ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
-	ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
+	cfg->fc_dst = rtmsg->rtmsg_dst;
+	cfg->fc_src = rtmsg->rtmsg_src;
+	cfg->fc_gateway = rtmsg->rtmsg_gateway;
 }
 
 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
@@ -2082,7 +2082,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 	}
 	dst_set_neighbour(&rt->dst, neigh);
 
-	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
+	rt->rt6i_dst.addr = *addr;
 	rt->rt6i_dst.plen = 128;
 	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
 
@@ -2100,7 +2100,7 @@ int ip6_route_get_saddr(struct net *net,
 	struct inet6_dev *idev = ip6_dst_idev((struct dst_entry*)rt);
 	int err = 0;
 	if (rt->rt6i_prefsrc.plen)
-		ipv6_addr_copy(saddr, &rt->rt6i_prefsrc.addr);
+		*saddr = rt->rt6i_prefsrc.addr;
 	else
 		err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL,
 					 daddr, prefs, saddr);
@@ -2439,7 +2439,7 @@ static int rt6_fill_node(struct net *net,
 
 	if (rt->rt6i_prefsrc.plen) {
 		struct in6_addr saddr_buf;
-		ipv6_addr_copy(&saddr_buf, &rt->rt6i_prefsrc.addr);
+		saddr_buf = rt->rt6i_prefsrc.addr;
 		NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
 	}
 
@@ -2513,14 +2513,14 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
 		if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
 			goto errout;
 
-		ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
+		fl6.saddr = *(struct in6_addr *)nla_data(tb[RTA_SRC]);
 	}
 
 	if (tb[RTA_DST]) {
 		if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
 			goto errout;
 
-		ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
+		fl6.daddr = *(struct in6_addr *)nla_data(tb[RTA_DST]);
 	}
 
 	if (tb[RTA_IIF])
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index cec09382282d..50968f226e75 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -914,7 +914,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 				goto done;
 #ifdef CONFIG_IPV6_SIT_6RD
 		} else {
-			ipv6_addr_copy(&ip6rd.prefix, &t->ip6rd.prefix);
+			ip6rd.prefix = t->ip6rd.prefix;
 			ip6rd.relay_prefix = t->ip6rd.relay_prefix;
 			ip6rd.prefixlen = t->ip6rd.prefixlen;
 			ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen;
@@ -1082,7 +1082,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
 			if (relay_prefix != ip6rd.relay_prefix)
 				goto done;
 
-			ipv6_addr_copy(&t->ip6rd.prefix, &prefix);
+			t->ip6rd.prefix = prefix;
 			t->ip6rd.relay_prefix = relay_prefix;
 			t->ip6rd.prefixlen = ip6rd.prefixlen;
 			t->ip6rd.relay_prefixlen = ip6rd.relay_prefixlen;
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 5a0d6648bbbc..8e951d8d3b81 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -200,8 +200,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	req->mss = mss;
 	ireq->rmt_port = th->source;
 	ireq->loc_port = th->dest;
-	ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr);
-	ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr);
+	ireq6->rmt_addr = ipv6_hdr(skb)->saddr;
+	ireq6->loc_addr = ipv6_hdr(skb)->daddr;
 	if (ipv6_opt_accepted(sk, skb) ||
 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
 	    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
@@ -237,9 +237,9 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 		struct flowi6 fl6;
 		memset(&fl6, 0, sizeof(fl6));
 		fl6.flowi6_proto = IPPROTO_TCP;
-		ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr);
+		fl6.daddr = ireq6->rmt_addr;
 		final_p = fl6_update_dst(&fl6, np->opt, &final);
-		ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr);
+		fl6.saddr = ireq6->loc_addr;
 		fl6.flowi6_oif = sk->sk_bound_dev_if;
 		fl6.flowi6_mark = sk->sk_mark;
 		fl6.fl6_dport = inet_rsk(req)->rmt_port;
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 36131d122a6f..fd98dd010fcb 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -153,7 +153,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
 			if (flowlabel == NULL)
 				return -EINVAL;
-			ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
+			usin->sin6_addr = flowlabel->dst;
 			fl6_sock_release(flowlabel);
 		}
 	}
@@ -195,7 +195,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 		tp->write_seq = 0;
 	}
 
-	ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
+	np->daddr = usin->sin6_addr;
 	np->flow_label = fl6.flowlabel;
 
 	/*
@@ -244,9 +244,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 		saddr = &np->rcv_saddr;
 
 	fl6.flowi6_proto = IPPROTO_TCP;
-	ipv6_addr_copy(&fl6.daddr, &np->daddr);
-	ipv6_addr_copy(&fl6.saddr,
-		       (saddr ? saddr : &np->saddr));
+	fl6.daddr = np->daddr;
+	fl6.saddr = saddr ? *saddr : np->saddr;
 	fl6.flowi6_oif = sk->sk_bound_dev_if;
 	fl6.flowi6_mark = sk->sk_mark;
 	fl6.fl6_dport = usin->sin6_port;
@@ -264,11 +263,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 
 	if (saddr == NULL) {
 		saddr = &fl6.saddr;
-		ipv6_addr_copy(&np->rcv_saddr, saddr);
+		np->rcv_saddr = *saddr;
 	}
 
 	/* set the source address */
-	ipv6_addr_copy(&np->saddr, saddr);
+	np->saddr = *saddr;
 	inet->inet_rcv_saddr = LOOPBACK4_IPV6;
 
 	sk->sk_gso_type = SKB_GSO_TCPV6;
@@ -398,8 +397,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			 */
 			memset(&fl6, 0, sizeof(fl6));
 			fl6.flowi6_proto = IPPROTO_TCP;
-			ipv6_addr_copy(&fl6.daddr, &np->daddr);
-			ipv6_addr_copy(&fl6.saddr, &np->saddr);
+			fl6.daddr = np->daddr;
+			fl6.saddr = np->saddr;
 			fl6.flowi6_oif = sk->sk_bound_dev_if;
 			fl6.flowi6_mark = sk->sk_mark;
 			fl6.fl6_dport = inet->inet_dport;
@@ -489,8 +488,8 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 
 	memset(&fl6, 0, sizeof(fl6));
 	fl6.flowi6_proto = IPPROTO_TCP;
-	ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr);
-	ipv6_addr_copy(&fl6.saddr, &treq->loc_addr);
+	fl6.daddr = treq->rmt_addr;
+	fl6.saddr = treq->loc_addr;
 	fl6.flowlabel = 0;
 	fl6.flowi6_oif = treq->iif;
 	fl6.flowi6_mark = sk->sk_mark;
@@ -512,7 +511,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 	if (skb) {
 		__tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
 
-		ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr);
+		fl6.daddr = treq->rmt_addr;
 		err = ip6_xmit(sk, skb, &fl6, opt, np->tclass);
 		err = net_xmit_eval(err);
 	}
@@ -617,8 +616,7 @@ static int tcp_v6_md5_do_add(struct sock *sk, const struct in6_addr *peer,
 			tp->md5sig_info->alloced6++;
 		}
 
-		ipv6_addr_copy(&tp->md5sig_info->keys6[tp->md5sig_info->entries6].addr,
-			       peer);
+		tp->md5sig_info->keys6[tp->md5sig_info->entries6].addr = *peer;
 		tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.key = newkey;
 		tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.keylen = newkeylen;
 
@@ -750,8 +748,8 @@ static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
 
 	bp = &hp->md5_blk.ip6;
 	/* 1. TCP pseudo-header (RFC2460) */
-	ipv6_addr_copy(&bp->saddr, saddr);
-	ipv6_addr_copy(&bp->daddr, daddr);
+	bp->saddr = *saddr;
+	bp->daddr = *daddr;
 	bp->protocol = cpu_to_be32(IPPROTO_TCP);
 	bp->len = cpu_to_be32(nbytes);
 
@@ -1039,8 +1037,8 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
 #endif
 
 	memset(&fl6, 0, sizeof(fl6));
-	ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->saddr);
-	ipv6_addr_copy(&fl6.saddr, &ipv6_hdr(skb)->daddr);
+	fl6.daddr = ipv6_hdr(skb)->saddr;
+	fl6.saddr = ipv6_hdr(skb)->daddr;
 
 	buff->ip_summed = CHECKSUM_PARTIAL;
 	buff->csum = 0;
@@ -1250,8 +1248,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	tcp_openreq_init(req, &tmp_opt, skb);
 
 	treq = inet6_rsk(req);
-	ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
-	ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
+	treq->rmt_addr = ipv6_hdr(skb)->saddr;
+	treq->loc_addr = ipv6_hdr(skb)->daddr;
 	if (!want_cookie || tmp_opt.tstamp_ok)
 		TCP_ECN_create_request(req, tcp_hdr(skb));
 
@@ -1380,7 +1378,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 		ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
 
-		ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
+		newnp->rcv_saddr = newnp->saddr;
 
 		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
@@ -1444,9 +1442,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 	memcpy(newnp, np, sizeof(struct ipv6_pinfo));
 
-	ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
-	ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
-	ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
+	newnp->daddr = treq->rmt_addr;
+	newnp->saddr = treq->loc_addr;
+	newnp->rcv_saddr = treq->loc_addr;
 	newsk->sk_bound_dev_if = treq->iif;
 
 	/* Now IPv6 options...
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index ccfb0451b1c3..84ec9db86ee0 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -417,8 +417,7 @@ try_again:
 			ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr,
 					       &sin6->sin6_addr);
 		else {
-			ipv6_addr_copy(&sin6->sin6_addr,
-				       &ipv6_hdr(skb)->saddr);
+			sin6->sin6_addr = ipv6_hdr(skb)->saddr;
 			if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
 				sin6->sin6_scope_id = IP6CB(skb)->iif;
 		}
@@ -1115,11 +1114,11 @@ do_udp_sendmsg:
 
 	fl6.flowi6_proto = sk->sk_protocol;
 	if (!ipv6_addr_any(daddr))
-		ipv6_addr_copy(&fl6.daddr, daddr);
+		fl6.daddr = *daddr;
 	else
 		fl6.daddr.s6_addr[15] = 0x1; /* :: means loopback (BSD'ism) */
 	if (ipv6_addr_any(&fl6.saddr) && !ipv6_addr_any(&np->saddr))
-		ipv6_addr_copy(&fl6.saddr, &np->saddr);
+		fl6.saddr = np->saddr;
 	fl6.fl6_sport = inet->inet_sport;
 
 	final_p = fl6_update_dst(&fl6, opt, &final);
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index 3437d7d4eed6..a81ce9450750 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -72,8 +72,8 @@ static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
 		top_iph->nexthdr = IPPROTO_BEETPH;
 	}
 
-	ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
-	ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
+	top_iph->saddr = *(struct in6_addr *)&x->props.saddr;
+	top_iph->daddr = *(struct in6_addr *)&x->id.daddr;
 	return 0;
 }
 
@@ -99,8 +99,8 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	ip6h = ipv6_hdr(skb);
 	ip6h->payload_len = htons(skb->len - size);
-	ipv6_addr_copy(&ip6h->daddr, (struct in6_addr *) &x->sel.daddr.a6);
-	ipv6_addr_copy(&ip6h->saddr, (struct in6_addr *) &x->sel.saddr.a6);
+	ip6h->daddr = *(struct in6_addr *)&x->sel.daddr.a6;
+	ip6h->saddr = *(struct in6_addr *)&x->sel.saddr.a6;
 	err = 0;
 out:
 	return err;
diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c
index 4d6edff0498f..261e6e6f487e 100644
--- a/net/ipv6/xfrm6_mode_tunnel.c
+++ b/net/ipv6/xfrm6_mode_tunnel.c
@@ -55,8 +55,8 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)
 		dsfield &= ~INET_ECN_MASK;
 	ipv6_change_dsfield(top_iph, 0, dsfield);
 	top_iph->hop_limit = ip6_dst_hoplimit(dst->child);
-	ipv6_addr_copy(&top_iph->saddr, (const struct in6_addr *)&x->props.saddr);
-	ipv6_addr_copy(&top_iph->daddr, (const struct in6_addr *)&x->id.daddr);
+	top_iph->saddr = *(struct in6_addr *)&x->props.saddr;
+	top_iph->daddr = *(struct in6_addr *)&x->id.daddr;
 	return 0;
 }
 
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index faae41737fca..4eeff89c1aaa 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -49,7 +49,7 @@ static void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu)
 	struct sock *sk = skb->sk;
 
 	fl6.flowi6_oif = sk->sk_bound_dev_if;
-	ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->daddr);
+	fl6.daddr = ipv6_hdr(skb)->daddr;
 
 	ipv6_local_rxpmtu(sk, &fl6, mtu);
 }
@@ -60,7 +60,7 @@ static void xfrm6_local_error(struct sk_buff *skb, u32 mtu)
 	struct sock *sk = skb->sk;
 
 	fl6.fl6_dport = inet_sk(sk)->inet_dport;
-	ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->daddr);
+	fl6.daddr = ipv6_hdr(skb)->daddr;
 
 	ipv6_local_error(sk, EMSGSIZE, &fl6, mtu);
 }
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index d879f7efbd10..8ea65e032733 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -132,8 +132,8 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 	memset(fl6, 0, sizeof(struct flowi6));
 	fl6->flowi6_mark = skb->mark;
 
-	ipv6_addr_copy(&fl6->daddr, reverse ? &hdr->saddr : &hdr->daddr);
-	ipv6_addr_copy(&fl6->saddr, reverse ? &hdr->daddr : &hdr->saddr);
+	fl6->daddr = reverse ? hdr->saddr : hdr->daddr;
+	fl6->saddr = reverse ? hdr->daddr : hdr->saddr;
 
 	while (nh + offset + 1 < skb->data ||
 	       pskb_may_pull(skb, nh + offset + 1 - skb->data)) {
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index f2d72b8a3faa..3f2f7c4ab721 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -27,8 +27,8 @@ __xfrm6_init_tempsel(struct xfrm_selector *sel, const struct flowi *fl)
 
 	/* Initialize temporary selector matching only
 	 * to current session. */
-	ipv6_addr_copy((struct in6_addr *)&sel->daddr, &fl6->daddr);
-	ipv6_addr_copy((struct in6_addr *)&sel->saddr, &fl6->saddr);
+	*(struct in6_addr *)&sel->daddr = fl6->daddr;
+	*(struct in6_addr *)&sel->saddr = fl6->saddr;
 	sel->dport = xfrm_flowi_dport(fl, &fl6->uli);
 	sel->dport_mask = htons(0xffff);
 	sel->sport = xfrm_flowi_sport(fl, &fl6->uli);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 1e733e9073d0..bfc0bef170cb 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -712,7 +712,7 @@ static unsigned int pfkey_sockaddr_fill(const xfrm_address_t *xaddr, __be16 port
 		sin6->sin6_family = AF_INET6;
 		sin6->sin6_port = port;
 		sin6->sin6_flowinfo = 0;
-		ipv6_addr_copy(&sin6->sin6_addr, (const struct in6_addr *)xaddr->a6);
+		sin6->sin6_addr = *(struct in6_addr *)xaddr->a6;
 		sin6->sin6_scope_id = 0;
 		return 128;
 	    }
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index f2d576e6b769..4015fcaf87bc 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -241,7 +241,7 @@ hash_ip6_data_isnull(const struct hash_ip6_elem *elem)
 static inline void
 hash_ip6_data_copy(struct hash_ip6_elem *dst, const struct hash_ip6_elem *src)
 {
-	ipv6_addr_copy(&dst->ip.in6, &src->ip.in6);
+	dst->ip.in6 = src->ip.in6;
 }
 
 static inline void
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 60d016541c58..28988196775e 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -267,7 +267,7 @@ static inline void
 hash_net6_data_copy(struct hash_net6_elem *dst,
 		    const struct hash_net6_elem *src)
 {
-	ipv6_addr_copy(&dst->ip.in6, &src->ip.in6);
+	dst->ip.in6 = src->ip.in6;
 	dst->cidr = src->cidr;
 }
 
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 093cc327020f..611c3359b94d 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -983,7 +983,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,
 	if (!cp)
 		return NF_ACCEPT;
 
-	ipv6_addr_copy(&snet.in6, &iph->saddr);
+	snet.in6 = iph->saddr;
 	return handle_response_icmp(AF_INET6, skb, &snet, cih->nexthdr, cp,
 				    pp, offset, sizeof(struct ipv6hdr));
 }
diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c
index 3cdd479f9b5d..bcf5563e4837 100644
--- a/net/netfilter/ipvs/ip_vs_sync.c
+++ b/net/netfilter/ipvs/ip_vs_sync.c
@@ -603,9 +603,9 @@ sloop:
 #ifdef CONFIG_IP_VS_IPV6
 	if (cp->af == AF_INET6) {
 		p += sizeof(struct ip_vs_sync_v6);
-		ipv6_addr_copy(&s->v6.caddr, &cp->caddr.in6);
-		ipv6_addr_copy(&s->v6.vaddr, &cp->vaddr.in6);
-		ipv6_addr_copy(&s->v6.daddr, &cp->daddr.in6);
+		s->v6.caddr = cp->caddr.in6;
+		s->v6.vaddr = cp->vaddr.in6;
+		s->v6.daddr = cp->daddr.in6;
 	} else
 #endif
 	{
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index aa2d7206ee8a..38a576d05b4b 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -235,7 +235,7 @@ __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,
 			goto out_err;
 		}
 	}
-	ipv6_addr_copy(ret_saddr, &fl6.saddr);
+	*ret_saddr = fl6.saddr;
 	return dst;
 
 out_err:
@@ -279,7 +279,7 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,
 				  atomic_read(&rt->dst.__refcnt));
 		}
 		if (ret_saddr)
-			ipv6_addr_copy(ret_saddr, &dest->dst_saddr.in6);
+			*ret_saddr = dest->dst_saddr.in6;
 		spin_unlock(&dest->dst_lock);
 	} else {
 		dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm);
@@ -705,7 +705,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	/* mangle the packet */
 	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
 		goto tx_error;
-	ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &cp->daddr.in6);
+	ipv6_hdr(skb)->daddr = cp->daddr.in6;
 
 	if (!local || !skb->dev) {
 		/* drop the old route when skb is not shared */
@@ -967,8 +967,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
 	be16_add_cpu(&iph->payload_len, sizeof(*old_iph));
 	iph->priority		=	old_iph->priority;
 	memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl));
-	ipv6_addr_copy(&iph->daddr, &cp->daddr.in6);
-	ipv6_addr_copy(&iph->saddr, &saddr);
+	iph->daddr = cp->daddr.in6;
+	iph->saddr = saddr;
 	iph->hop_limit		=	old_iph->hop_limit;
 
 	/* Another hack: avoid icmp_send in ip_fragment */
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index f03c2d4539f6..f9368f33e7af 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -750,10 +750,10 @@ static int callforward_do_filter(const union nf_inet_addr *src,
 		struct rt6_info *rt1, *rt2;
 
 		memset(&fl1, 0, sizeof(fl1));
-		ipv6_addr_copy(&fl1.daddr, &src->in6);
+		fl1.daddr = src->in6;
 
 		memset(&fl2, 0, sizeof(fl2));
-		ipv6_addr_copy(&fl2.daddr, &dst->in6);
+		fl2.daddr = dst->in6;
 		if (!afinfo->route(&init_net, (struct dst_entry **)&rt1,
 				   flowi6_to_flowi(&fl1), false)) {
 			if (!afinfo->route(&init_net, (struct dst_entry **)&rt2,
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 9e63b43faeed..3ecade3966d5 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -161,7 +161,7 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb,
 		struct flowi6 *fl6 = &fl.u.ip6;
 
 		memset(fl6, 0, sizeof(*fl6));
-		ipv6_addr_copy(&fl6->daddr, &ipv6_hdr(skb)->saddr);
+		fl6->daddr = ipv6_hdr(skb)->saddr;
 	}
 	rcu_read_lock();
 	ai = nf_get_afinfo(family);
diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c
index b77d383cec78..c047de2046ad 100644
--- a/net/netfilter/xt_addrtype.c
+++ b/net/netfilter/xt_addrtype.c
@@ -42,7 +42,7 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,
 	int route_err;
 
 	memset(&flow, 0, sizeof(flow));
-	ipv6_addr_copy(&flow.daddr, addr);
+	flow.daddr = *addr;
 	if (dev)
 		flow.flowi6_oif = dev->ifindex;
 
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 9c24de10a657..8ed67dccf11d 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -155,12 +155,12 @@ int netlbl_cfg_unlbl_map_add(const char *domain,
 			if (map6 == NULL)
 				goto cfg_unlbl_map_add_failure;
 			map6->type = NETLBL_NLTYPE_UNLABELED;
-			ipv6_addr_copy(&map6->list.addr, addr6);
+			map6->list.addr = *addr6;
 			map6->list.addr.s6_addr32[0] &= mask6->s6_addr32[0];
 			map6->list.addr.s6_addr32[1] &= mask6->s6_addr32[1];
 			map6->list.addr.s6_addr32[2] &= mask6->s6_addr32[2];
 			map6->list.addr.s6_addr32[3] &= mask6->s6_addr32[3];
-			ipv6_addr_copy(&map6->list.mask, mask6);
+			map6->list.mask = *mask6;
 			map6->list.valid = 1;
 			ret_val = netlbl_af4list_add(&map4->list,
 						     &addrmap->list4);
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index bfa555869775..9879300beefd 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -216,12 +216,12 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
 			ret_val = -ENOMEM;
 			goto add_failure;
 		}
-		ipv6_addr_copy(&map->list.addr, addr);
+		map->list.addr = *addr;
 		map->list.addr.s6_addr32[0] &= mask->s6_addr32[0];
 		map->list.addr.s6_addr32[1] &= mask->s6_addr32[1];
 		map->list.addr.s6_addr32[2] &= mask->s6_addr32[2];
 		map->list.addr.s6_addr32[3] &= mask->s6_addr32[3];
-		ipv6_addr_copy(&map->list.mask, mask);
+		map->list.mask = *mask;
 		map->list.valid = 1;
 		map->type = entry->type;
 
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index e251c2c88521..049ccd2447d7 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -300,12 +300,12 @@ static int netlbl_unlhsh_add_addr6(struct netlbl_unlhsh_iface *iface,
 	if (entry == NULL)
 		return -ENOMEM;
 
-	ipv6_addr_copy(&entry->list.addr, addr);
+	entry->list.addr = *addr;
 	entry->list.addr.s6_addr32[0] &= mask->s6_addr32[0];
 	entry->list.addr.s6_addr32[1] &= mask->s6_addr32[1];
 	entry->list.addr.s6_addr32[2] &= mask->s6_addr32[2];
 	entry->list.addr.s6_addr32[3] &= mask->s6_addr32[3];
-	ipv6_addr_copy(&entry->list.mask, mask);
+	entry->list.mask = *mask;
 	entry->list.valid = 1;
 	entry->secid = secid;
 
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 810427833bcd..91f479121c55 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -107,7 +107,7 @@ static int sctp_inet6addr_event(struct notifier_block *this, unsigned long ev,
 		if (addr) {
 			addr->a.v6.sin6_family = AF_INET6;
 			addr->a.v6.sin6_port = 0;
-			ipv6_addr_copy(&addr->a.v6.sin6_addr, &ifa->addr);
+			addr->a.v6.sin6_addr = ifa->addr;
 			addr->a.v6.sin6_scope_id = ifa->idev->dev->ifindex;
 			addr->valid = 1;
 			spin_lock_bh(&sctp_local_addr_lock);
@@ -219,8 +219,8 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
 	/* Fill in the dest address from the route entry passed with the skb
 	 * and the source address from the transport.
 	 */
-	ipv6_addr_copy(&fl6.daddr, &transport->ipaddr.v6.sin6_addr);
-	ipv6_addr_copy(&fl6.saddr, &transport->saddr.v6.sin6_addr);
+	fl6.daddr = transport->ipaddr.v6.sin6_addr;
+	fl6.saddr = transport->saddr.v6.sin6_addr;
 
 	fl6.flowlabel = np->flow_label;
 	IP6_ECN_flow_xmit(sk, fl6.flowlabel);
@@ -231,7 +231,7 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *transport)
 
 	if (np->opt && np->opt->srcrt) {
 		struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
-		ipv6_addr_copy(&fl6.daddr, rt0->addr);
+		fl6.daddr = *rt0->addr;
 	}
 
 	SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, src:%pI6 dst:%pI6\n",
@@ -265,7 +265,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
 	sctp_scope_t scope;
 
 	memset(fl6, 0, sizeof(struct flowi6));
-	ipv6_addr_copy(&fl6->daddr, &daddr->v6.sin6_addr);
+	fl6->daddr = daddr->v6.sin6_addr;
 	fl6->fl6_dport = daddr->v6.sin6_port;
 	fl6->flowi6_proto = IPPROTO_SCTP;
 	if (ipv6_addr_type(&daddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
@@ -277,7 +277,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
 		fl6->fl6_sport = htons(asoc->base.bind_addr.port);
 
 	if (saddr) {
-		ipv6_addr_copy(&fl6->saddr, &saddr->v6.sin6_addr);
+		fl6->saddr = saddr->v6.sin6_addr;
 		fl6->fl6_sport = saddr->v6.sin6_port;
 		SCTP_DEBUG_PRINTK("SRC=%pI6 - ", &fl6->saddr);
 	}
@@ -334,7 +334,7 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr,
 	}
 	rcu_read_unlock();
 	if (baddr) {
-		ipv6_addr_copy(&fl6->saddr, &baddr->v6.sin6_addr);
+		fl6->saddr = baddr->v6.sin6_addr;
 		fl6->fl6_sport = baddr->v6.sin6_port;
 		dst = ip6_dst_lookup_flow(sk, fl6, NULL, false);
 	}
@@ -375,7 +375,7 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk,
 
 	if (t->dst) {
 		saddr->v6.sin6_family = AF_INET6;
-		ipv6_addr_copy(&saddr->v6.sin6_addr, &fl6->saddr);
+		saddr->v6.sin6_addr = fl6->saddr;
 	}
 }
 
@@ -400,7 +400,7 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist,
 		if (addr) {
 			addr->a.v6.sin6_family = AF_INET6;
 			addr->a.v6.sin6_port = 0;
-			ipv6_addr_copy(&addr->a.v6.sin6_addr, &ifp->addr);
+			addr->a.v6.sin6_addr = ifp->addr;
 			addr->a.v6.sin6_scope_id = dev->ifindex;
 			addr->valid = 1;
 			INIT_LIST_HEAD(&addr->list);
@@ -416,7 +416,6 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist,
 static void sctp_v6_from_skb(union sctp_addr *addr,struct sk_buff *skb,
 			     int is_saddr)
 {
-	void *from;
 	__be16 *port;
 	struct sctphdr *sh;
 
@@ -428,12 +427,11 @@ static void sctp_v6_from_skb(union sctp_addr *addr,struct sk_buff *skb,
 	sh = sctp_hdr(skb);
 	if (is_saddr) {
 		*port  = sh->source;
-		from = &ipv6_hdr(skb)->saddr;
+		addr->v6.sin6_addr = ipv6_hdr(skb)->saddr;
 	} else {
 		*port = sh->dest;
-		from = &ipv6_hdr(skb)->daddr;
+		addr->v6.sin6_addr = ipv6_hdr(skb)->daddr;
 	}
-	ipv6_addr_copy(&addr->v6.sin6_addr, from);
 }
 
 /* Initialize an sctp_addr from a socket. */
@@ -441,7 +439,7 @@ static void sctp_v6_from_sk(union sctp_addr *addr, struct sock *sk)
 {
 	addr->v6.sin6_family = AF_INET6;
 	addr->v6.sin6_port = 0;
-	ipv6_addr_copy(&addr->v6.sin6_addr, &inet6_sk(sk)->rcv_saddr);
+	addr->v6.sin6_addr = inet6_sk(sk)->rcv_saddr;
 }
 
 /* Initialize sk->sk_rcv_saddr from sctp_addr. */
@@ -454,7 +452,7 @@ static void sctp_v6_to_sk_saddr(union sctp_addr *addr, struct sock *sk)
 		inet6_sk(sk)->rcv_saddr.s6_addr32[3] =
 			addr->v4.sin_addr.s_addr;
 	} else {
-		ipv6_addr_copy(&inet6_sk(sk)->rcv_saddr, &addr->v6.sin6_addr);
+		inet6_sk(sk)->rcv_saddr = addr->v6.sin6_addr;
 	}
 }
 
@@ -467,7 +465,7 @@ static void sctp_v6_to_sk_daddr(union sctp_addr *addr, struct sock *sk)
 		inet6_sk(sk)->daddr.s6_addr32[2] = htonl(0x0000ffff);
 		inet6_sk(sk)->daddr.s6_addr32[3] = addr->v4.sin_addr.s_addr;
 	} else {
-		ipv6_addr_copy(&inet6_sk(sk)->daddr, &addr->v6.sin6_addr);
+		inet6_sk(sk)->daddr = addr->v6.sin6_addr;
 	}
 }
 
@@ -479,7 +477,7 @@ static void sctp_v6_from_addr_param(union sctp_addr *addr,
 	addr->v6.sin6_family = AF_INET6;
 	addr->v6.sin6_port = port;
 	addr->v6.sin6_flowinfo = 0; /* BUG */
-	ipv6_addr_copy(&addr->v6.sin6_addr, &param->v6.addr);
+	addr->v6.sin6_addr = param->v6.addr;
 	addr->v6.sin6_scope_id = iif;
 }
 
@@ -493,7 +491,7 @@ static int sctp_v6_to_addr_param(const union sctp_addr *addr,
 
 	param->v6.param_hdr.type = SCTP_PARAM_IPV6_ADDRESS;
 	param->v6.param_hdr.length = htons(length);
-	ipv6_addr_copy(&param->v6.addr, &addr->v6.sin6_addr);
+	param->v6.addr = addr->v6.sin6_addr;
 
 	return length;
 }
@@ -504,7 +502,7 @@ static void sctp_v6_to_addr(union sctp_addr *addr, struct in6_addr *saddr,
 {
 	addr->sa.sa_family = AF_INET6;
 	addr->v6.sin6_port = port;
-	ipv6_addr_copy(&addr->v6.sin6_addr, saddr);
+	addr->v6.sin6_addr = *saddr;
 }
 
 /* Compare addresses exactly.
@@ -759,7 +757,7 @@ static void sctp_inet6_event_msgname(struct sctp_ulpevent *event,
 		}
 
 		sin6from = &asoc->peer.primary_addr.v6;
-		ipv6_addr_copy(&sin6->sin6_addr, &sin6from->sin6_addr);
+		sin6->sin6_addr = sin6from->sin6_addr;
 		if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
 			sin6->sin6_scope_id = sin6from->sin6_scope_id;
 	}
@@ -787,7 +785,7 @@ static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname,
 		}
 
 		/* Otherwise, just copy the v6 address. */
-		ipv6_addr_copy(&sin6->sin6_addr, &ipv6_hdr(skb)->saddr);
+		sin6->sin6_addr = ipv6_hdr(skb)->saddr;
 		if (ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL) {
 			struct sctp_ulpevent *ev = sctp_skb2event(skb);
 			sin6->sin6_scope_id = ev->iif;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 13bf5fcdbff1..d56c07a3d435 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -804,7 +804,7 @@ static int sctp_send_asconf_del_ip(struct sock		*sk,
 				struct sockaddr_in6 *sin6;
 
 				sin6 = (struct sockaddr_in6 *)addrs;
-				ipv6_addr_copy(&asoc->asconf_addr_del_pending->v6.sin6_addr, &sin6->sin6_addr);
+				asoc->asconf_addr_del_pending->v6.sin6_addr = sin6->sin6_addr;
 			}
 			SCTP_DEBUG_PRINTK_IPADDR("send_asconf_del_ip: keep the last address asoc: %p ",
 			    " at %p\n", asoc, asoc->asconf_addr_del_pending,
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index ce136323da8b..fe258fc37f50 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -134,7 +134,7 @@ static void ip_map_init(struct cache_head *cnew, struct cache_head *citem)
 	struct ip_map *item = container_of(citem, struct ip_map, h);
 
 	strcpy(new->m_class, item->m_class);
-	ipv6_addr_copy(&new->m_addr, &item->m_addr);
+	new->m_addr = item->m_addr;
 }
 static void update(struct cache_head *cnew, struct cache_head *citem)
 {
@@ -274,7 +274,7 @@ static int ip_map_show(struct seq_file *m,
 	}
 	im = container_of(h, struct ip_map, h);
 	/* class addr domain */
-	ipv6_addr_copy(&addr, &im->m_addr);
+	addr = im->m_addr;
 
 	if (test_bit(CACHE_VALID, &h->flags) &&
 	    !test_bit(CACHE_NEGATIVE, &h->flags))
@@ -297,7 +297,7 @@ static struct ip_map *__ip_map_lookup(struct cache_detail *cd, char *class,
 	struct cache_head *ch;
 
 	strcpy(ip.m_class, class);
-	ipv6_addr_copy(&ip.m_addr, addr);
+	ip.m_addr = *addr;
 	ch = sunrpc_cache_lookup(cd, &ip.h,
 				 hash_str(class, IP_HASHBITS) ^
 				 hash_ip6(*addr));
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 71bed1c1c77a..4653286fcc9e 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -157,7 +157,7 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
 			cmh->cmsg_level = SOL_IPV6;
 			cmh->cmsg_type = IPV6_PKTINFO;
 			pki->ipi6_ifindex = daddr->sin6_scope_id;
-			ipv6_addr_copy(&pki->ipi6_addr,	&daddr->sin6_addr);
+			pki->ipi6_addr = daddr->sin6_addr;
 			cmh->cmsg_len = CMSG_LEN(sizeof(*pki));
 		}
 		break;
@@ -523,7 +523,7 @@ static int svc_udp_get_dest_address6(struct svc_rqst *rqstp,
 		return 0;
 
 	daddr->sin6_family = AF_INET6;
-	ipv6_addr_copy(&daddr->sin6_addr, &pki->ipi6_addr);
+	daddr->sin6_addr = pki->ipi6_addr;
 	daddr->sin6_scope_id = pki->ipi6_ifindex;
 	return 1;
 }
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 9414b9c5b1e4..5b228f97d4b3 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1035,16 +1035,12 @@ static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m,
 			break;
 
 		case AF_INET6:
-			ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
-				       (const struct in6_addr *)daddr);
-			ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
-				       (const struct in6_addr *)saddr);
+			*(struct in6_addr *)x->sel.daddr.a6 = *(struct in6_addr *)daddr;
+			*(struct in6_addr *)x->sel.saddr.a6 = *(struct in6_addr *)saddr;
 			x->sel.prefixlen_d = 128;
 			x->sel.prefixlen_s = 128;
-			ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
-				       (const struct in6_addr *)saddr);
-			ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
-				       (const struct in6_addr *)daddr);
+			*(struct in6_addr *)x->props.saddr.a6 = *(struct in6_addr *)saddr;
+			*(struct in6_addr *)x->id.daddr.a6 = *(struct in6_addr *)daddr;
 			break;
 		}
 
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index 893af8a2fa1e..199616bb68d3 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -118,8 +118,8 @@ int ipv6_skb_to_auditdata(struct sk_buff *skb,
 	ip6 = ipv6_hdr(skb);
 	if (ip6 == NULL)
 		return -EINVAL;
-	ipv6_addr_copy(&ad->u.net.v6info.saddr, &ip6->saddr);
-	ipv6_addr_copy(&ad->u.net.v6info.daddr, &ip6->daddr);
+	ad->u.net.v6info.saddr = ip6->saddr;
+	ad->u.net.v6info.daddr = ip6->daddr;
 	ret = 0;
 	/* IPv6 can have several extension header before the Transport header
 	 * skip them */
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 1126c10a5e82..7e6c2564e741 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3567,8 +3567,8 @@ static int selinux_parse_skb_ipv6(struct sk_buff *skb,
 	if (ip6 == NULL)
 		goto out;
 
-	ipv6_addr_copy(&ad->u.net.v6info.saddr, &ip6->saddr);
-	ipv6_addr_copy(&ad->u.net.v6info.daddr, &ip6->daddr);
+	ad->u.net.v6info.saddr = ip6->saddr;
+	ad->u.net.v6info.daddr = ip6->daddr;
 	ret = 0;
 
 	nexthdr = ip6->nexthdr;
@@ -3871,7 +3871,7 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in
 		if (family == PF_INET)
 			ad.u.net.v4info.saddr = addr4->sin_addr.s_addr;
 		else
-			ipv6_addr_copy(&ad.u.net.v6info.saddr, &addr6->sin6_addr);
+			ad.u.net.v6info.saddr = addr6->sin6_addr;
 
 		err = avc_has_perm(sksec->sid, sid,
 				   sksec->sclass, node_perm, &ad);
diff --git a/security/selinux/netnode.c b/security/selinux/netnode.c
index 3bf46abaa688..86365857c088 100644
--- a/security/selinux/netnode.c
+++ b/security/selinux/netnode.c
@@ -220,7 +220,7 @@ static int sel_netnode_sid_slow(void *addr, u16 family, u32 *sid)
 	case PF_INET6:
 		ret = security_node_sid(PF_INET6,
 					addr, sizeof(struct in6_addr), sid);
-		ipv6_addr_copy(&new->nsec.addr.ipv6, addr);
+		new->nsec.addr.ipv6 = *(struct in6_addr *)addr;
 		break;
 	default:
 		BUG();
-- 
cgit v1.2.3


From 1f2149c1df50c8c712950872675f46e6e44629f0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 22 Nov 2011 10:57:41 +0000
Subject: net: remove netdev_alloc_page and use __GFP_COLD

Given we dont use anymore the struct net_device *dev argument, and this
interface brings litle benefit, remove netdev_{alloc|free}_page(), to
debloat include/linux/skbuff.h a bit.

(Some drivers used a mix of these interfaces and alloc_pages())

When allocating a page given to device for DMA transfer (device to
memory), it makes sense to use a cold one (__GFP_COLD)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
CC: Dimitris Michailidis <dm@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/sge.c          |  6 ++---
 drivers/net/ethernet/chelsio/cxgb4vf/sge.c        |  5 ++--
 drivers/net/ethernet/intel/igb/igb_main.c         |  2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c     |  2 +-
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c |  2 +-
 drivers/net/usb/cdc-phonet.c                      | 10 +++----
 drivers/usb/gadget/f_phonet.c                     | 11 ++++----
 include/linux/skbuff.h                            | 32 -----------------------
 8 files changed, 18 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 140254c7cba9..2dae7959f000 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -491,7 +491,7 @@ static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n,
 	__be64 *d = &q->desc[q->pidx];
 	struct rx_sw_desc *sd = &q->sdesc[q->pidx];
 
-	gfp |= __GFP_NOWARN;         /* failures are expected */
+	gfp |= __GFP_NOWARN | __GFP_COLD;
 
 #if FL_PG_ORDER > 0
 	/*
@@ -528,7 +528,7 @@ static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n,
 #endif
 
 	while (n--) {
-		pg = __netdev_alloc_page(adap->port[0], gfp);
+		pg = alloc_page(gfp);
 		if (unlikely(!pg)) {
 			q->alloc_failed++;
 			break;
@@ -537,7 +537,7 @@ static unsigned int refill_fl(struct adapter *adap, struct sge_fl *q, int n,
 		mapping = dma_map_page(adap->pdev_dev, pg, 0, PAGE_SIZE,
 				       PCI_DMA_FROMDEVICE);
 		if (unlikely(dma_mapping_error(adap->pdev_dev, mapping))) {
-			netdev_free_page(adap->port[0], pg);
+			put_page(pg);
 			goto out;
 		}
 		*d++ = cpu_to_be64(mapping);
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
index 8d5d55ad102d..c381db23e713 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
@@ -653,8 +653,7 @@ static unsigned int refill_fl(struct adapter *adapter, struct sge_fl *fl,
 
 alloc_small_pages:
 	while (n--) {
-		page = __netdev_alloc_page(adapter->port[0],
-					   gfp | __GFP_NOWARN);
+		page = alloc_page(gfp | __GFP_NOWARN | __GFP_COLD);
 		if (unlikely(!page)) {
 			fl->alloc_failed++;
 			break;
@@ -664,7 +663,7 @@ alloc_small_pages:
 		dma_addr = dma_map_page(adapter->pdev_dev, page, 0, PAGE_SIZE,
 				       PCI_DMA_FROMDEVICE);
 		if (unlikely(dma_mapping_error(adapter->pdev_dev, dma_addr))) {
-			netdev_free_page(adapter->port[0], page);
+			put_page(page);
 			break;
 		}
 		*d++ = cpu_to_be64(dma_addr);
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index bd9b30e6ae9d..b66b8aa751e7 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -6135,7 +6135,7 @@ static bool igb_alloc_mapped_page(struct igb_ring *rx_ring,
 		return true;
 
 	if (!page) {
-		page = netdev_alloc_page(rx_ring->netdev);
+		page = alloc_page(GFP_ATOMIC | __GFP_COLD);
 		bi->page = page;
 		if (unlikely(!page)) {
 			rx_ring->rx_stats.alloc_failed++;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 820fc040c241..1b28ed9d8cc1 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1140,7 +1140,7 @@ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count)
 
 		if (ring_is_ps_enabled(rx_ring)) {
 			if (!bi->page) {
-				bi->page = netdev_alloc_page(rx_ring->netdev);
+				bi->page = alloc_page(GFP_ATOMIC | __GFP_COLD);
 				if (!bi->page) {
 					rx_ring->rx_stats.alloc_rx_page_failed++;
 					goto no_buffers;
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 0c39bb1ac3bb..5d1a64398169 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -366,7 +366,7 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_adapter *adapter,
 		if (!bi->page_dma &&
 		    (adapter->flags & IXGBE_FLAG_RX_PS_ENABLED)) {
 			if (!bi->page) {
-				bi->page = netdev_alloc_page(adapter->netdev);
+				bi->page = alloc_page(GFP_ATOMIC | __GFP_COLD);
 				if (!bi->page) {
 					adapter->alloc_rx_page_failed++;
 					goto no_buffers;
diff --git a/drivers/net/usb/cdc-phonet.c b/drivers/net/usb/cdc-phonet.c
index a60d0069cc45..331e44056f5a 100644
--- a/drivers/net/usb/cdc-phonet.c
+++ b/drivers/net/usb/cdc-phonet.c
@@ -130,7 +130,7 @@ static int rx_submit(struct usbpn_dev *pnd, struct urb *req, gfp_t gfp_flags)
 	struct page *page;
 	int err;
 
-	page = __netdev_alloc_page(dev, gfp_flags);
+	page = alloc_page(gfp_flags);
 	if (!page)
 		return -ENOMEM;
 
@@ -140,7 +140,7 @@ static int rx_submit(struct usbpn_dev *pnd, struct urb *req, gfp_t gfp_flags)
 	err = usb_submit_urb(req, gfp_flags);
 	if (unlikely(err)) {
 		dev_dbg(&dev->dev, "RX submit error (%d)\n", err);
-		netdev_free_page(dev, page);
+		put_page(page);
 	}
 	return err;
 }
@@ -208,9 +208,9 @@ static void rx_complete(struct urb *req)
 	dev->stats.rx_errors++;
 resubmit:
 	if (page)
-		netdev_free_page(dev, page);
+		put_page(page);
 	if (req)
-		rx_submit(pnd, req, GFP_ATOMIC);
+		rx_submit(pnd, req, GFP_ATOMIC | __GFP_COLD);
 }
 
 static int usbpn_close(struct net_device *dev);
@@ -229,7 +229,7 @@ static int usbpn_open(struct net_device *dev)
 	for (i = 0; i < rxq_size; i++) {
 		struct urb *req = usb_alloc_urb(0, GFP_KERNEL);
 
-		if (!req || rx_submit(pnd, req, GFP_KERNEL)) {
+		if (!req || rx_submit(pnd, req, GFP_KERNEL | __GFP_COLD)) {
 			usbpn_close(dev);
 			return -ENOMEM;
 		}
diff --git a/drivers/usb/gadget/f_phonet.c b/drivers/usb/gadget/f_phonet.c
index 16a509ae517b..7cdcb63b21ff 100644
--- a/drivers/usb/gadget/f_phonet.c
+++ b/drivers/usb/gadget/f_phonet.c
@@ -298,11 +298,10 @@ static void pn_net_setup(struct net_device *dev)
 static int
 pn_rx_submit(struct f_phonet *fp, struct usb_request *req, gfp_t gfp_flags)
 {
-	struct net_device *dev = fp->dev;
 	struct page *page;
 	int err;
 
-	page = __netdev_alloc_page(dev, gfp_flags);
+	page = alloc_page(gfp_flags);
 	if (!page)
 		return -ENOMEM;
 
@@ -312,7 +311,7 @@ pn_rx_submit(struct f_phonet *fp, struct usb_request *req, gfp_t gfp_flags)
 
 	err = usb_ep_queue(fp->out_ep, req, gfp_flags);
 	if (unlikely(err))
-		netdev_free_page(dev, page);
+		put_page(page);
 	return err;
 }
 
@@ -374,9 +373,9 @@ static void pn_rx_complete(struct usb_ep *ep, struct usb_request *req)
 	}
 
 	if (page)
-		netdev_free_page(dev, page);
+		put_page(page);
 	if (req)
-		pn_rx_submit(fp, req, GFP_ATOMIC);
+		pn_rx_submit(fp, req, GFP_ATOMIC | __GFP_COLD);
 }
 
 /*-------------------------------------------------------------------------*/
@@ -436,7 +435,7 @@ static int pn_set_alt(struct usb_function *f, unsigned intf, unsigned alt)
 
 			netif_carrier_on(dev);
 			for (i = 0; i < phonet_rxq_size; i++)
-				pn_rx_submit(fp, fp->out_reqv[i], GFP_ATOMIC);
+				pn_rx_submit(fp, fp->out_reqv[i], GFP_ATOMIC | __GFP_COLD);
 		}
 		spin_unlock(&port->lock);
 		return 0;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 09b7ea566d66..cec0657d0d32 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1668,38 +1668,6 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev,
 	return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC);
 }
 
-/**
- *	__netdev_alloc_page - allocate a page for ps-rx on a specific device
- *	@dev: network device to receive on
- *	@gfp_mask: alloc_pages_node mask
- *
- * 	Allocate a new page. dev currently unused.
- *
- * 	%NULL is returned if there is no free memory.
- */
-static inline struct page *__netdev_alloc_page(struct net_device *dev, gfp_t gfp_mask)
-{
-	return alloc_pages_node(NUMA_NO_NODE, gfp_mask, 0);
-}
-
-/**
- *	netdev_alloc_page - allocate a page for ps-rx on a specific device
- *	@dev: network device to receive on
- *
- * 	Allocate a new page. dev currently unused.
- *
- * 	%NULL is returned if there is no free memory.
- */
-static inline struct page *netdev_alloc_page(struct net_device *dev)
-{
-	return __netdev_alloc_page(dev, GFP_ATOMIC);
-}
-
-static inline void netdev_free_page(struct net_device *dev, struct page *page)
-{
-	__free_page(page);
-}
-
 /**
  * skb_frag_page - retrieve the page refered to by a paged fragment
  * @frag: the paged fragment
-- 
cgit v1.2.3


From cf50dcc24f82a6dc2bce523eec2a979eb1b106e2 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Fri, 25 Nov 2011 14:32:52 +0000
Subject: dsa: Change dsa_uses_{dsa, trailer}_tags() into inline functions

eth_type_trans() will use these functions if DSA is enabled, which
blocks building DSA as a module.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 +-
 include/net/dsa.h         | 53 +++++++++++++++++++++++++++++++++++++++++++++--
 net/dsa/dsa.c             | 23 --------------------
 net/dsa/dsa_priv.h        | 33 -----------------------------
 4 files changed, 52 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 999bb264fe27..63721a69804c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1080,7 +1080,7 @@ struct net_device {
 	struct vlan_group __rcu	*vlgrp;		/* VLAN group */
 #endif
 #ifdef CONFIG_NET_DSA
-	void			*dsa_ptr;	/* dsa specific data */
+	struct dsa_switch_tree	*dsa_ptr;	/* dsa specific data */
 #endif
 	void 			*atalk_ptr;	/* AppleTalk link 	*/
 	struct in_device __rcu	*ip_ptr;	/* IPv4 specific data	*/
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 839f768f9e35..32a1b49e8a8c 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -11,6 +11,9 @@
 #ifndef __LINUX_NET_DSA_H
 #define __LINUX_NET_DSA_H
 
+#include <linux/timer.h>
+#include <linux/workqueue.h>
+
 #define DSA_MAX_SWITCHES	4
 #define DSA_MAX_PORTS		12
 
@@ -54,8 +57,54 @@ struct dsa_platform_data {
 	struct dsa_chip_data	*chip;
 };
 
-extern bool dsa_uses_dsa_tags(void *dsa_ptr);
-extern bool dsa_uses_trailer_tags(void *dsa_ptr);
+struct dsa_switch_tree {
+	/*
+	 * Configuration data for the platform device that owns
+	 * this dsa switch tree instance.
+	 */
+	struct dsa_platform_data	*pd;
+
+	/*
+	 * Reference to network device to use, and which tagging
+	 * protocol to use.
+	 */
+	struct net_device	*master_netdev;
+	__be16			tag_protocol;
+
+	/*
+	 * The switch and port to which the CPU is attached.
+	 */
+	s8			cpu_switch;
+	s8			cpu_port;
+
+	/*
+	 * Link state polling.
+	 */
+	int			link_poll_needed;
+	struct work_struct	link_poll_work;
+	struct timer_list	link_poll_timer;
+
+	/*
+	 * Data for the individual switch chips.
+	 */
+	struct dsa_switch	*ds[DSA_MAX_SWITCHES];
+};
+
+/*
+ * The original DSA tag format and some other tag formats have no
+ * ethertype, which means that we need to add a little hack to the
+ * networking receive path to make sure that received frames get
+ * the right ->protocol assigned to them when one of those tag
+ * formats is in use.
+ */
+static inline bool dsa_uses_dsa_tags(struct dsa_switch_tree *dst)
+{
+	return !!(dst->tag_protocol == htons(ETH_P_DSA));
+}
 
+static inline bool dsa_uses_trailer_tags(struct dsa_switch_tree *dst)
+{
+	return !!(dst->tag_protocol == htons(ETH_P_TRAILER));
+}
 
 #endif
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 0dc1589343c3..66f5c0460cd3 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -199,29 +199,6 @@ static void dsa_switch_destroy(struct dsa_switch *ds)
 }
 
 
-/* hooks for ethertype-less tagging formats *********************************/
-/*
- * The original DSA tag format and some other tag formats have no
- * ethertype, which means that we need to add a little hack to the
- * networking receive path to make sure that received frames get
- * the right ->protocol assigned to them when one of those tag
- * formats is in use.
- */
-bool dsa_uses_dsa_tags(void *dsa_ptr)
-{
-	struct dsa_switch_tree *dst = dsa_ptr;
-
-	return !!(dst->tag_protocol == htons(ETH_P_DSA));
-}
-
-bool dsa_uses_trailer_tags(void *dsa_ptr)
-{
-	struct dsa_switch_tree *dst = dsa_ptr;
-
-	return !!(dst->tag_protocol == htons(ETH_P_TRAILER));
-}
-
-
 /* link polling *************************************************************/
 static void dsa_link_poll_work(struct work_struct *ugly)
 {
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 4b0ea0540442..a45186cb6daf 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -48,39 +48,6 @@ struct dsa_switch {
 	struct net_device	*ports[DSA_MAX_PORTS];
 };
 
-struct dsa_switch_tree {
-	/*
-	 * Configuration data for the platform device that owns
-	 * this dsa switch tree instance.
-	 */
-	struct dsa_platform_data	*pd;
-
-	/*
-	 * Reference to network device to use, and which tagging
-	 * protocol to use.
-	 */
-	struct net_device	*master_netdev;
-	__be16			tag_protocol;
-
-	/*
-	 * The switch and port to which the CPU is attached.
-	 */
-	s8			cpu_switch;
-	s8			cpu_port;
-
-	/*
-	 * Link state polling.
-	 */
-	int			link_poll_needed;
-	struct work_struct	link_poll_work;
-	struct timer_list	link_poll_timer;
-
-	/*
-	 * Data for the individual switch chips.
-	 */
-	struct dsa_switch	*ds[DSA_MAX_SWITCHES];
-};
-
 static inline bool dsa_is_cpu_port(struct dsa_switch *ds, int p)
 {
 	return !!(ds->index == ds->dst->cpu_switch && p == ds->dst->cpu_port);
-- 
cgit v1.2.3


From 34a430d7bd26b35ca3a7d3fc83663de8ea6e33f6 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Fri, 25 Nov 2011 14:38:38 +0000
Subject: dsa: Allow core and drivers to be built as modules

Change the kconfig types to tristate and adjust the condition for
declaring net_device::dsa_ptr to allow for this.

Adjust the makefile so that if NET_DSA_MV88E6123_61_65=y and
NET_DSA_MV88E6131=m or vice versa then both drivers are built-in.  We
could leave these options as bool and make NET_DSA_MV88E6XXX a
user-selected option, but that would break existing configurations.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 +-
 net/dsa/Kconfig           | 10 +++++-----
 net/dsa/Makefile          |  8 ++++++--
 3 files changed, 12 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 63721a69804c..87f7353a2407 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1079,7 +1079,7 @@ struct net_device {
 #if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
 	struct vlan_group __rcu	*vlgrp;		/* VLAN group */
 #endif
-#ifdef CONFIG_NET_DSA
+#if IS_ENABLED(CONFIG_NET_DSA)
 	struct dsa_switch_tree	*dsa_ptr;	/* dsa specific data */
 #endif
 	void 			*atalk_ptr;	/* AppleTalk link 	*/
diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index c53ded2a98df..7e12303827e8 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -1,5 +1,5 @@
 menuconfig NET_DSA
-	bool "Distributed Switch Architecture support"
+	tristate "Distributed Switch Architecture support"
 	default n
 	depends on EXPERIMENTAL && NETDEVICES && !S390
 	select PHYLIB
@@ -26,11 +26,11 @@ config NET_DSA_TAG_TRAILER
 
 # switch drivers
 config NET_DSA_MV88E6XXX
-	bool
+	tristate
 	default n
 
 config NET_DSA_MV88E6060
-	bool "Marvell 88E6060 ethernet switch chip support"
+	tristate "Marvell 88E6060 ethernet switch chip support"
 	select NET_DSA_TAG_TRAILER
 	---help---
 	  This enables support for the Marvell 88E6060 ethernet switch
@@ -41,7 +41,7 @@ config NET_DSA_MV88E6XXX_NEED_PPU
 	default n
 
 config NET_DSA_MV88E6131
-	bool "Marvell 88E6085/6095/6095F/6131 ethernet switch chip support"
+	tristate "Marvell 88E6085/6095/6095F/6131 ethernet switch chip support"
 	select NET_DSA_MV88E6XXX
 	select NET_DSA_MV88E6XXX_NEED_PPU
 	select NET_DSA_TAG_DSA
@@ -50,7 +50,7 @@ config NET_DSA_MV88E6131
 	  ethernet switch chips.
 
 config NET_DSA_MV88E6123_61_65
-	bool "Marvell 88E6123/6161/6165 ethernet switch chip support"
+	tristate "Marvell 88E6123/6161/6165 ethernet switch chip support"
 	select NET_DSA_MV88E6XXX
 	select NET_DSA_TAG_EDSA
 	---help---
diff --git a/net/dsa/Makefile b/net/dsa/Makefile
index 5c48ac556997..191dd482e557 100644
--- a/net/dsa/Makefile
+++ b/net/dsa/Makefile
@@ -11,5 +11,9 @@ dsa_core-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o
 obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o
 obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx_drv.o
 mv88e6xxx_drv-y += mv88e6xxx.o
-mv88e6xxx_drv-$(CONFIG_NET_DSA_MV88E6123_61_65) += mv88e6123_61_65.o
-mv88e6xxx_drv-$(CONFIG_NET_DSA_MV88E6131) += mv88e6131.o
+ifdef CONFIG_NET_DSA_MV88E6123_61_65
+mv88e6xxx_drv-y += mv88e6123_61_65.o
+endif
+ifdef CONFIG_NET_DSA_MV88E6131
+mv88e6xxx_drv-y += mv88e6131.o
+endif
-- 
cgit v1.2.3


From d11ead75672d655652dbfc1b1a2c359e5b65536d Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Fri, 25 Nov 2011 14:40:26 +0000
Subject: net: Use IS_ENABLED() in netdevice.h as appropriate

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 87f7353a2407..ac9a4b9344ca 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -144,22 +144,20 @@ static inline bool dev_xmit_complete(int rc)
  *	used.
  */
 
-#if defined(CONFIG_WLAN) || defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE)
+#if defined(CONFIG_WLAN) || IS_ENABLED(CONFIG_AX25)
 # if defined(CONFIG_MAC80211_MESH)
 #  define LL_MAX_HEADER 128
 # else
 #  define LL_MAX_HEADER 96
 # endif
-#elif defined(CONFIG_TR) || defined(CONFIG_TR_MODULE)
+#elif IS_ENABLED(CONFIG_TR)
 # define LL_MAX_HEADER 48
 #else
 # define LL_MAX_HEADER 32
 #endif
 
-#if !defined(CONFIG_NET_IPIP) && !defined(CONFIG_NET_IPIP_MODULE) && \
-    !defined(CONFIG_NET_IPGRE) &&  !defined(CONFIG_NET_IPGRE_MODULE) && \
-    !defined(CONFIG_IPV6_SIT) && !defined(CONFIG_IPV6_SIT_MODULE) && \
-    !defined(CONFIG_IPV6_TUNNEL) && !defined(CONFIG_IPV6_TUNNEL_MODULE)
+#if !IS_ENABLED(CONFIG_NET_IPIP) && !IS_ENABLED(CONFIG_NET_IPGRE) && \
+    !IS_ENABLED(CONFIG_IPV6_SIT) && !IS_ENABLED(CONFIG_IPV6_TUNNEL)
 #define MAX_HEADER LL_MAX_HEADER
 #else
 #define MAX_HEADER (LL_MAX_HEADER + 48)
@@ -922,7 +920,7 @@ struct net_device_ops {
 	int			(*ndo_get_vf_port)(struct net_device *dev,
 						   int vf, struct sk_buff *skb);
 	int			(*ndo_setup_tc)(struct net_device *dev, u8 tc);
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#if IS_ENABLED(CONFIG_FCOE)
 	int			(*ndo_fcoe_enable)(struct net_device *dev);
 	int			(*ndo_fcoe_disable)(struct net_device *dev);
 	int			(*ndo_fcoe_ddp_setup)(struct net_device *dev,
@@ -937,7 +935,7 @@ struct net_device_ops {
 						       unsigned int sgc);
 #endif
 
-#if defined(CONFIG_LIBFCOE) || defined(CONFIG_LIBFCOE_MODULE)
+#if IS_ENABLED(CONFIG_LIBFCOE)
 #define NETDEV_FCOE_WWNN 0
 #define NETDEV_FCOE_WWPN 1
 	int			(*ndo_fcoe_get_wwn)(struct net_device *dev,
@@ -1076,7 +1074,7 @@ struct net_device {
 
 	/* Protocol specific pointers */
 
-#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE)
+#if IS_ENABLED(CONFIG_VLAN_8021Q)
 	struct vlan_group __rcu	*vlgrp;		/* VLAN group */
 #endif
 #if IS_ENABLED(CONFIG_NET_DSA)
@@ -1242,7 +1240,7 @@ struct net_device {
 	struct netdev_tc_txq tc_to_txq[TC_MAX_QUEUE];
 	u8 prio_tc_map[TC_BITMASK + 1];
 
-#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+#if IS_ENABLED(CONFIG_FCOE)
 	/* max exchange id for FCoE LRO by ddp */
 	unsigned int		fcoe_ddp_xid;
 #endif
-- 
cgit v1.2.3


From b30f8bdcfa7dd05f4268348f3388ff903132f28e Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Mon, 21 Nov 2011 08:57:56 +0000
Subject: eeprom_93cx6: Add data direction control.

Some devices need to know if the data is to be output or read, so add a
data direction into the eeprom structure to tell the driver whether the
data line should be driven.

The user in this case is the Micrel KS8851 which has a direction
control for the EEPROM data line and thus needs to know whether
to drive it (writing) or to tristate it for receiving.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Cc: Wolfram Sang <w.sang@pengutronix.de>
Cc: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/misc/eeprom/eeprom_93cx6.c | 3 +++
 include/linux/eeprom_93cx6.h       | 2 ++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/misc/eeprom/eeprom_93cx6.c b/drivers/misc/eeprom/eeprom_93cx6.c
index 7b33de95c4bf..a6037af6f076 100644
--- a/drivers/misc/eeprom/eeprom_93cx6.c
+++ b/drivers/misc/eeprom/eeprom_93cx6.c
@@ -63,6 +63,7 @@ static void eeprom_93cx6_startup(struct eeprom_93cx6 *eeprom)
 	eeprom->reg_data_out = 0;
 	eeprom->reg_data_clock = 0;
 	eeprom->reg_chip_select = 1;
+	eeprom->drive_data = 1;
 	eeprom->register_write(eeprom);
 
 	/*
@@ -101,6 +102,7 @@ static void eeprom_93cx6_write_bits(struct eeprom_93cx6 *eeprom,
 	 */
 	eeprom->reg_data_in = 0;
 	eeprom->reg_data_out = 0;
+	eeprom->drive_data = 1;
 
 	/*
 	 * Start writing all bits.
@@ -140,6 +142,7 @@ static void eeprom_93cx6_read_bits(struct eeprom_93cx6 *eeprom,
 	 */
 	eeprom->reg_data_in = 0;
 	eeprom->reg_data_out = 0;
+	eeprom->drive_data = 0;
 
 	/*
 	 * Start reading all bits.
diff --git a/include/linux/eeprom_93cx6.h b/include/linux/eeprom_93cx6.h
index c4627cbdb8e0..e04546e9c592 100644
--- a/include/linux/eeprom_93cx6.h
+++ b/include/linux/eeprom_93cx6.h
@@ -46,6 +46,7 @@
  * @register_write(struct eeprom_93cx6 *eeprom): handler to
  * write to the eeprom register by using all reg_* fields.
  * @width: eeprom width, should be one of the PCI_EEPROM_WIDTH_* defines
+ * @drive_data: Set if we're driving the data line.
  * @reg_data_in: register field to indicate data input
  * @reg_data_out: register field to indicate data output
  * @reg_data_clock: register field to set the data clock
@@ -62,6 +63,7 @@ struct eeprom_93cx6 {
 
 	int width;
 
+	char drive_data;
 	char reg_data_in;
 	char reg_data_out;
 	char reg_data_clock;
-- 
cgit v1.2.3


From 072bc80156729f853e8bcafe1b17c48c74462887 Mon Sep 17 00:00:00 2001
From: Ben Dooks <ben@simtec.co.uk>
Date: Mon, 21 Nov 2011 08:57:57 +0000
Subject: eeprom_93cx6: Add write support

Add support for writing data to EEPROM.

Signed-off-by: Ben Dooks <ben@simtec.co.uk>
Cc: Wolfram Sang <w.sang@pengutronix.de>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Linux Kernel <linux-kernel@vger.kernel.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/misc/eeprom/eeprom_93cx6.c | 85 ++++++++++++++++++++++++++++++++++++++
 include/linux/eeprom_93cx6.h       |  6 +++
 2 files changed, 91 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/misc/eeprom/eeprom_93cx6.c b/drivers/misc/eeprom/eeprom_93cx6.c
index a6037af6f076..0ff4b02177be 100644
--- a/drivers/misc/eeprom/eeprom_93cx6.c
+++ b/drivers/misc/eeprom/eeprom_93cx6.c
@@ -234,3 +234,88 @@ void eeprom_93cx6_multiread(struct eeprom_93cx6 *eeprom, const u8 word,
 }
 EXPORT_SYMBOL_GPL(eeprom_93cx6_multiread);
 
+/**
+ * eeprom_93cx6_wren - set the write enable state
+ * @eeprom: Pointer to eeprom structure
+ * @enable: true to enable writes, otherwise disable writes
+ *
+ * Set the EEPROM write enable state to either allow or deny
+ * writes depending on the @enable value.
+ */
+void eeprom_93cx6_wren(struct eeprom_93cx6 *eeprom, bool enable)
+{
+	u16 command;
+
+	/* start the command */
+	eeprom_93cx6_startup(eeprom);
+
+	/* create command to enable/disable */
+
+	command = enable ? PCI_EEPROM_EWEN_OPCODE : PCI_EEPROM_EWDS_OPCODE;
+	command <<= (eeprom->width - 2);
+
+	eeprom_93cx6_write_bits(eeprom, command,
+				PCI_EEPROM_WIDTH_OPCODE + eeprom->width);
+
+	eeprom_93cx6_cleanup(eeprom);
+}
+EXPORT_SYMBOL_GPL(eeprom_93cx6_wren);
+
+/**
+ * eeprom_93cx6_write - write data to the EEPROM
+ * @eeprom: Pointer to eeprom structure
+ * @addr: Address to write data to.
+ * @data: The data to write to address @addr.
+ *
+ * Write the @data to the specified @addr in the EEPROM and
+ * waiting for the device to finish writing.
+ *
+ * Note, since we do not expect large number of write operations
+ * we delay in between parts of the operation to avoid using excessive
+ * amounts of CPU time busy waiting.
+ */
+void eeprom_93cx6_write(struct eeprom_93cx6 *eeprom, u8 addr, u16 data)
+{
+	int timeout = 100;
+	u16 command;
+
+	/* start the command */
+	eeprom_93cx6_startup(eeprom);
+
+	command = PCI_EEPROM_WRITE_OPCODE << eeprom->width;
+	command |= addr;
+
+	/* send write command */
+	eeprom_93cx6_write_bits(eeprom, command,
+				PCI_EEPROM_WIDTH_OPCODE + eeprom->width);
+
+	/* send data */
+	eeprom_93cx6_write_bits(eeprom, data, 16);
+
+	/* get ready to check for busy */
+	eeprom->drive_data = 0;
+	eeprom->reg_chip_select = 1;
+	eeprom->register_write(eeprom);
+
+	/* wait at-least 250ns to get DO to be the busy signal */
+	usleep_range(1000, 2000);
+
+	/* wait for DO to go high to signify finish */
+
+	while (true) {
+		eeprom->register_read(eeprom);
+
+		if (eeprom->reg_data_out)
+			break;
+
+		usleep_range(1000, 2000);
+
+		if (--timeout <= 0) {
+			printk(KERN_ERR "%s: timeout\n", __func__);
+			break;
+		}
+	}
+
+	eeprom_93cx6_cleanup(eeprom);
+}
+EXPORT_SYMBOL_GPL(eeprom_93cx6_write);
diff --git a/include/linux/eeprom_93cx6.h b/include/linux/eeprom_93cx6.h
index e04546e9c592..e50f98b0297a 100644
--- a/include/linux/eeprom_93cx6.h
+++ b/include/linux/eeprom_93cx6.h
@@ -33,6 +33,7 @@
 #define PCI_EEPROM_WIDTH_93C86	8
 #define PCI_EEPROM_WIDTH_OPCODE	3
 #define PCI_EEPROM_WRITE_OPCODE	0x05
+#define PCI_EEPROM_ERASE_OPCODE 0x07
 #define PCI_EEPROM_READ_OPCODE	0x06
 #define PCI_EEPROM_EWDS_OPCODE	0x10
 #define PCI_EEPROM_EWEN_OPCODE	0x13
@@ -74,3 +75,8 @@ extern void eeprom_93cx6_read(struct eeprom_93cx6 *eeprom,
 	const u8 word, u16 *data);
 extern void eeprom_93cx6_multiread(struct eeprom_93cx6 *eeprom,
 	const u8 word, __le16 *data, const u16 words);
+
+extern void eeprom_93cx6_wren(struct eeprom_93cx6 *eeprom, bool enable);
+
+extern void eeprom_93cx6_write(struct eeprom_93cx6 *eeprom,
+			       u8 addr, u16 data);
-- 
cgit v1.2.3


From 49f5ed4250c757cb19d953fcac2737a35ca14d76 Mon Sep 17 00:00:00 2001
From: chas williams - CONTRACTOR <chas@cmf.nrl.navy.mil>
Date: Tue, 22 Nov 2011 12:51:56 +0000
Subject: atm: eliminate atm_guess_pdu2truesize()

Signed-off-by: Chas Williams - CONTRACTOR <chas@cmf.nrl.navy.mil>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/atm/iphase.c   |  4 ++--
 include/linux/atmdev.h | 10 ----------
 net/atm/atm_misc.c     |  2 +-
 3 files changed, 3 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c
index 3d0c2b0fed9c..9e373ba20308 100644
--- a/drivers/atm/iphase.c
+++ b/drivers/atm/iphase.c
@@ -1320,8 +1320,8 @@ static void rx_dle_intr(struct atm_dev *dev)
           if (ia_vcc == NULL)
           {
              atomic_inc(&vcc->stats->rx_err);
+             atm_return(vcc, skb->truesize);
              dev_kfree_skb_any(skb);
-             atm_return(vcc, atm_guess_pdu2truesize(len));
              goto INCR_DLE;
            }
           // get real pkt length  pwang_test
@@ -1334,8 +1334,8 @@ static void rx_dle_intr(struct atm_dev *dev)
              atomic_inc(&vcc->stats->rx_err);
              IF_ERR(printk("rx_dle_intr: Bad  AAL5 trailer %d (skb len %d)", 
                                                             length, skb->len);)
+             atm_return(vcc, skb->truesize);
              dev_kfree_skb_any(skb);
-             atm_return(vcc, atm_guess_pdu2truesize(len));
              goto INCR_DLE;
           }
           skb_trim(skb, length);
diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index 43ea1b2de3ee..f4ff882cb2da 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -445,16 +445,6 @@ void vcc_insert_socket(struct sock *sk);
 
 void atm_dev_release_vccs(struct atm_dev *dev);
 
-/*
- * This is approximately the algorithm used by alloc_skb.
- *
- */
-
-static inline int atm_guess_pdu2truesize(int size)
-{
-	return SKB_TRUESIZE(size);
-}
-
 
 static inline void atm_force_charge(struct atm_vcc *vcc,int truesize)
 {
diff --git a/net/atm/atm_misc.c b/net/atm/atm_misc.c
index f41f02656ff4..876fbe83e2e4 100644
--- a/net/atm/atm_misc.c
+++ b/net/atm/atm_misc.c
@@ -26,7 +26,7 @@ struct sk_buff *atm_alloc_charge(struct atm_vcc *vcc, int pdu_size,
 				 gfp_t gfp_flags)
 {
 	struct sock *sk = sk_atm(vcc);
-	int guess = atm_guess_pdu2truesize(pdu_size);
+	int guess = SKB_TRUESIZE(pdu_size);
 
 	atm_force_charge(vcc, guess);
 	if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) {
-- 
cgit v1.2.3


From 876f6e67d1c617c098c67934a8d00b065bb9688b Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Sat, 26 Nov 2011 19:54:58 +0000
Subject: net/mlx4: move RSS related definitions to be global

Towards adding RSS support for IB drivers/application who use
the mlx4 HW, make the RSS related definitions global and change
the mlx4_en driver to use them.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Shlomo Pongratz <shlomop@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_resources.c |  2 +-
 drivers/net/ethernet/mellanox/mlx4/en_rx.c        | 10 +++++----
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h      | 10 ---------
 include/linux/mlx4/qp.h                           | 27 +++++++++++++++++++++++
 4 files changed, 34 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
index 0dfb4ec8a9dd..bcbc54c16947 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
@@ -44,7 +44,7 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
 	struct mlx4_en_dev *mdev = priv->mdev;
 
 	memset(context, 0, sizeof *context);
-	context->flags = cpu_to_be32(7 << 16 | rss << 13);
+	context->flags = cpu_to_be32(7 << 16 | rss << MLX4_RSS_QPC_FLAG_OFFSET);
 	context->pd = cpu_to_be32(mdev->priv_pdn);
 	context->mtu_msgmax = 0xff;
 	if (!is_tx && !rss)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index c2df6c358603..d4bad5d57fb7 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -837,9 +837,10 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
 	struct mlx4_en_dev *mdev = priv->mdev;
 	struct mlx4_en_rss_map *rss_map = &priv->rss_map;
 	struct mlx4_qp_context context;
-	struct mlx4_en_rss_context *rss_context;
+	struct mlx4_rss_context *rss_context;
 	void *ptr;
-	u8 rss_mask = 0x3f;
+	u8 rss_mask = (MLX4_RSS_IPV4 | MLX4_RSS_TCP_IPV4 | MLX4_RSS_IPV6 |
+		MLX4_RSS_TCP_IPV6 | MLX4_RSS_UDP_IPV4 | MLX4_RSS_UDP_IPV6);
 	int i, qpn;
 	int err = 0;
 	int good_qps = 0;
@@ -877,13 +878,14 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
 	mlx4_en_fill_qp_context(priv, 0, 0, 0, 1, priv->base_qpn,
 				priv->rx_ring[0].cqn, &context);
 
-	ptr = ((void *) &context) + 0x3c;
+	ptr = ((void *) &context) + offsetof(struct mlx4_qp_context, pri_path)
+					+ MLX4_RSS_OFFSET_IN_QPC_PRI_PATH;
 	rss_context = ptr;
 	rss_context->base_qpn = cpu_to_be32(ilog2(priv->rx_ring_num) << 24 |
 					    (rss_map->base_qpn));
 	rss_context->default_qpn = cpu_to_be32(rss_map->base_qpn);
 	rss_context->flags = rss_mask;
-	rss_context->hash_fn = 1;
+	rss_context->hash_fn = MLX4_RSS_HASH_TOP;
 	for (i = 0; i < 10; i++)
 		rss_context->rss_key[i] = rsskey[i];
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 207b5add3ca8..ef7dfcff588d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -366,16 +366,6 @@ struct mlx4_en_rss_map {
 	enum mlx4_qp_state indir_state;
 };
 
-struct mlx4_en_rss_context {
-	__be32 base_qpn;
-	__be32 default_qpn;
-	u16 reserved;
-	u8 hash_fn;
-	u8 flags;
-	__be32 rss_key[10];
-	__be32 base_qpn_udp;
-};
-
 struct mlx4_en_port_state {
 	int link_state;
 	int link_speed;
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 48cc4cb97858..6562ff6aa9d6 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -97,6 +97,33 @@ enum {
 	MLX4_QP_BIT_RIC				= 1 <<	4,
 };
 
+enum {
+	MLX4_RSS_HASH_XOR			= 0,
+	MLX4_RSS_HASH_TOP			= 1,
+
+	MLX4_RSS_UDP_IPV6			= 1 << 0,
+	MLX4_RSS_UDP_IPV4			= 1 << 1,
+	MLX4_RSS_TCP_IPV6			= 1 << 2,
+	MLX4_RSS_IPV6				= 1 << 3,
+	MLX4_RSS_TCP_IPV4			= 1 << 4,
+	MLX4_RSS_IPV4				= 1 << 5,
+
+	/* offset of mlx4_rss_context within mlx4_qp_context.pri_path */
+	MLX4_RSS_OFFSET_IN_QPC_PRI_PATH		= 0x24,
+	/* offset of being RSS indirection QP within mlx4_qp_context.flags */
+	MLX4_RSS_QPC_FLAG_OFFSET		= 13,
+};
+
+struct mlx4_rss_context {
+	__be32			base_qpn;
+	__be32			default_qpn;
+	u16			reserved;
+	u8			hash_fn;
+	u8			flags;
+	__be32			rss_key[10];
+	__be32			base_qpn_udp;
+};
+
 struct mlx4_qp_path {
 	u8			fl;
 	u8			reserved1[2];
-- 
cgit v1.2.3


From 559a9f1d354b577af28f84181751820ff7d29feb Mon Sep 17 00:00:00 2001
From: Oren Duer <oren@mellanox.co.il>
Date: Sat, 26 Nov 2011 19:55:15 +0000
Subject: net/mlx4_en: fix WOL handlers were always looking at port2 capability
 bit

There are 2 capability bits for WOL, one for each port.
WOL handlers were looking only on the second bit, regardless of the port.

Signed-off-by: Oren Duer <oren@mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 20 ++++++++++++++++++--
 include/linux/mlx4/device.h                     |  3 ++-
 2 files changed, 20 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index ee637a200915..7dbc6a230779 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -106,8 +106,17 @@ static void mlx4_en_get_wol(struct net_device *netdev,
 	struct mlx4_en_priv *priv = netdev_priv(netdev);
 	int err = 0;
 	u64 config = 0;
+	u64 mask;
 
-	if (!(priv->mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_WOL)) {
+	if ((priv->port < 1) || (priv->port > 2)) {
+		en_err(priv, "Failed to get WoL information\n");
+		return;
+	}
+
+	mask = (priv->port == 1) ? MLX4_DEV_CAP_FLAG_WOL_PORT1 :
+		MLX4_DEV_CAP_FLAG_WOL_PORT2;
+
+	if (!(priv->mdev->dev->caps.flags & mask)) {
 		wol->supported = 0;
 		wol->wolopts = 0;
 		return;
@@ -136,8 +145,15 @@ static int mlx4_en_set_wol(struct net_device *netdev,
 	struct mlx4_en_priv *priv = netdev_priv(netdev);
 	u64 config = 0;
 	int err = 0;
+	u64 mask;
+
+	if ((priv->port < 1) || (priv->port > 2))
+		return -EOPNOTSUPP;
+
+	mask = (priv->port == 1) ? MLX4_DEV_CAP_FLAG_WOL_PORT1 :
+		MLX4_DEV_CAP_FLAG_WOL_PORT2;
 
-	if (!(priv->mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_WOL))
+	if (!(priv->mdev->dev->caps.flags & mask))
 		return -EOPNOTSUPP;
 
 	if (wol->supported & ~WAKE_MAGIC)
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 84b0b1848f17..ca2c39771c38 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -77,7 +77,8 @@ enum {
 	MLX4_DEV_CAP_FLAG_IBOE		= 1LL << 30,
 	MLX4_DEV_CAP_FLAG_UC_LOOPBACK	= 1LL << 32,
 	MLX4_DEV_CAP_FLAG_FCS_KEEP	= 1LL << 34,
-	MLX4_DEV_CAP_FLAG_WOL		= 1LL << 38,
+	MLX4_DEV_CAP_FLAG_WOL_PORT1	= 1LL << 37,
+	MLX4_DEV_CAP_FLAG_WOL_PORT2	= 1LL << 38,
 	MLX4_DEV_CAP_FLAG_UDP_RSS	= 1LL << 40,
 	MLX4_DEV_CAP_FLAG_VEP_UC_STEER	= 1LL << 41,
 	MLX4_DEV_CAP_FLAG_VEP_MC_STEER	= 1LL << 42,
-- 
cgit v1.2.3


From 60d6fe99e4a507f77b63c090eb8aacb67e21687a Mon Sep 17 00:00:00 2001
From: Amir Vadai <amirv@mellanox.co.il>
Date: Sat, 26 Nov 2011 19:55:19 +0000
Subject: net/mlx4_en: adding loopback support

Device must be in promiscuous mode or DMAC must be same as the host MAC, or
else packet will be dropped by the HW rx filtering.

Signed-off-by: Amir Vadai <amirv@mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 19 +++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx4/en_tx.c     |  3 +--
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |  1 +
 include/linux/mlx4/qp.h                        |  1 +
 4 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 78d776bc355c..4c5bbb3aad31 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -974,6 +974,21 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
+static int mlx4_en_set_features(struct net_device *netdev,
+		netdev_features_t features)
+{
+	struct mlx4_en_priv *priv = netdev_priv(netdev);
+
+	if (features & NETIF_F_LOOPBACK)
+		priv->ctrl_flags |= cpu_to_be32(MLX4_WQE_CTRL_FORCE_LOOPBACK);
+	else
+		priv->ctrl_flags &=
+			cpu_to_be32(~MLX4_WQE_CTRL_FORCE_LOOPBACK);
+
+	return 0;
+
+}
+
 static const struct net_device_ops mlx4_netdev_ops = {
 	.ndo_open		= mlx4_en_open,
 	.ndo_stop		= mlx4_en_close,
@@ -990,6 +1005,7 @@ static const struct net_device_ops mlx4_netdev_ops = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= mlx4_en_netpoll,
 #endif
+	.ndo_set_features	= mlx4_en_set_features,
 };
 
 int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
@@ -1022,6 +1038,8 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 	priv->port = port;
 	priv->port_up = false;
 	priv->flags = prof->flags;
+	priv->ctrl_flags = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE |
+			MLX4_WQE_CTRL_SOLICITED);
 	priv->tx_ring_num = prof->tx_ring_num;
 	priv->rx_ring_num = prof->rx_ring_num;
 	priv->mac_index = -1;
@@ -1088,6 +1106,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 	dev->features = dev->hw_features | NETIF_F_HIGHDMA |
 			NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX |
 			NETIF_F_HW_VLAN_FILTER;
+	dev->hw_features |= NETIF_F_LOOPBACK;
 
 	mdev->pndev[port] = dev;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 3094f940b928..807c2186548c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -679,8 +679,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 	tx_desc->ctrl.vlan_tag = cpu_to_be16(vlan_tag);
 	tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_VLAN * !!vlan_tag;
 	tx_desc->ctrl.fence_size = (real_size / 16) & 0x3f;
-	tx_desc->ctrl.srcrb_flags = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE |
-						MLX4_WQE_CTRL_SOLICITED);
+	tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
 	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
 		tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM |
 							 MLX4_WQE_CTRL_TCP_UDP_CSUM);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index d2dd97fa091f..ea2ba6899e9a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -453,6 +453,7 @@ struct mlx4_en_priv {
 	int base_qpn;
 
 	struct mlx4_en_rss_map rss_map;
+	u32 ctrl_flags;
 	u32 flags;
 #define MLX4_EN_FLAG_PROMISC	0x1
 #define MLX4_EN_FLAG_MC_PROMISC	0x2
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 6562ff6aa9d6..bee8fa231276 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -210,6 +210,7 @@ struct mlx4_wqe_ctrl_seg {
 	 * [4]   IP checksum
 	 * [3:2] C (generate completion queue entry)
 	 * [1]   SE (solicited event)
+	 * [0]   FL (force loopback)
 	 */
 	__be32			srcrb_flags;
 	/*
-- 
cgit v1.2.3


From 1d9d9213d526f2f4ef9a3aa198a29a0b1a670fa1 Mon Sep 17 00:00:00 2001
From: Simon Wunderlich <simon.wunderlich@s2003.tu-chemnitz.de>
Date: Fri, 18 Nov 2011 14:20:43 +0100
Subject: wireless: Add NoAck per tid support

This patch contains the configuration changes in nl80211/cfg80211.

Signed-off-by: Simon Wunderlich <siwu@hrz.tu-chemnitz.de>
Signed-off-by: Mathias Kretschmer <mathias.kretschmer@fokus.fraunhofer.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 10 ++++++++++
 include/net/cfg80211.h  |  6 ++++++
 net/wireless/nl80211.c  | 28 ++++++++++++++++++++++++++++
 3 files changed, 44 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 97bfebfcce90..1fc04853ec95 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -538,6 +538,9 @@
  *	OLBC handling in hostapd. Beacons are reported in %NL80211_CMD_FRAME
  *	messages. Note that per PHY only one application may register.
  *
+ * @NL80211_CMD_SET_NOACK_MAP: sets a bitmap for the individual TIDs whether
+ *      No Acknowledgement Policy should be applied.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -675,6 +678,8 @@ enum nl80211_commands {
 
 	NL80211_CMD_UNEXPECTED_4ADDR_FRAME,
 
+	NL80211_CMD_SET_NOACK_MAP,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -1185,6 +1190,9 @@ enum nl80211_commands {
  *    abides to when initiating radiation on DFS channels. A country maps
  *    to one DFS region.
  *
+ * @NL80211_ATTR_NOACK_MAP: This u16 bitmap contains the No Ack Policy of
+ *      up to 16 TIDs.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1428,6 +1436,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_DISABLE_HT,
 	NL80211_ATTR_HT_CAPABILITY_MASK,
 
+	NL80211_ATTR_NOACK_MAP,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index d5e18913f293..38ce452da20f 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1465,6 +1465,8 @@ struct cfg80211_gtk_rekey_data {
  *
  * @probe_client: probe an associated client, must return a cookie that it
  *	later passes to cfg80211_probe_status().
+ *
+ * @set_noack_map: Set the NoAck Map for the TIDs.
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -1658,6 +1660,10 @@ struct cfg80211_ops {
 	int	(*probe_client)(struct wiphy *wiphy, struct net_device *dev,
 				const u8 *peer, u64 *cookie);
 
+	int	(*set_noack_map)(struct wiphy *wiphy,
+				  struct net_device *dev,
+				  u16 noack_map);
+
 	struct ieee80211_channel *(*get_channel)(struct wiphy *wiphy);
 };
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index a1cabde7cb5f..6026c29c338d 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -204,6 +204,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_HT_CAPABILITY_MASK] = {
 		.len = NL80211_HT_CAPABILITY_LEN
 	},
+	[NL80211_ATTR_NOACK_MAP] = { .type = NLA_U16 },
 };
 
 /* policy for the key attributes */
@@ -904,6 +905,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	if (dev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
 		CMD(sched_scan_start, START_SCHED_SCAN);
 	CMD(probe_client, PROBE_CLIENT);
+	CMD(set_noack_map, SET_NOACK_MAP);
 	if (dev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS) {
 		i++;
 		NLA_PUT_U32(msg, i, NL80211_CMD_REGISTER_BEACONS);
@@ -1759,6 +1761,23 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info)
 	return rdev->ops->del_virtual_intf(&rdev->wiphy, dev);
 }
 
+static int nl80211_set_noack_map(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	u16 noack_map;
+
+	if (!info->attrs[NL80211_ATTR_NOACK_MAP])
+		return -EINVAL;
+
+	if (!rdev->ops->set_noack_map)
+		return -EOPNOTSUPP;
+
+	noack_map = nla_get_u16(info->attrs[NL80211_ATTR_NOACK_MAP]);
+
+	return rdev->ops->set_noack_map(&rdev->wiphy, dev, noack_map);
+}
+
 struct get_key_cookie {
 	struct sk_buff *msg;
 	int error;
@@ -6604,6 +6623,15 @@ static struct genl_ops nl80211_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_WIPHY |
 				  NL80211_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL80211_CMD_SET_NOACK_MAP,
+		.doit = nl80211_set_noack_map,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV |
+				  NL80211_FLAG_NEED_RTNL,
+	},
+
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
-- 
cgit v1.2.3


From dca7e9430cb3e492437a5ce891b8b3e315c147ca Mon Sep 17 00:00:00 2001
From: Thomas Pedersen <thomas@cozybit.com>
Date: Thu, 24 Nov 2011 17:15:24 -0800
Subject: {nl,cfg,mac}80211: implement dot11MeshHWMPperrMinInterval

As per 802.11mb 13.9.11.3

Signed-off-by: Thomas Pedersen <thomas@cozybit.com>
Signed-off-by: Javier Cardona <javier@cozybit.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h       | 5 +++++
 include/net/cfg80211.h        | 1 +
 net/mac80211/cfg.c            | 3 +++
 net/mac80211/debugfs_netdev.c | 3 +++
 net/mac80211/ieee80211_i.h    | 4 +++-
 net/mac80211/mesh.c           | 1 +
 net/mac80211/mesh_hwmp.c      | 6 ++++++
 net/wireless/mesh.c           | 2 ++
 net/wireless/nl80211.c        | 6 ++++++
 9 files changed, 30 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 1fc04853ec95..f51e3bf93a96 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -2094,6 +2094,10 @@ enum nl80211_mntr_flags {
  * access to a broader network beyond the MBSS.  This is done via Root
  * Announcement frames.
  *
+ * @NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL: The minimum interval of time (in
+ * TUs) during which a mesh STA can send only one Action frame containing a
+ * PERR element.
+ *
  * @NL80211_MESHCONF_ATTR_MAX: highest possible mesh configuration attribute
  *
  * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use
@@ -2117,6 +2121,7 @@ enum nl80211_meshconf_params {
 	NL80211_MESHCONF_ELEMENT_TTL,
 	NL80211_MESHCONF_HWMP_RANN_INTERVAL,
 	NL80211_MESHCONF_GATE_ANNOUNCEMENTS,
+	NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL,
 
 	/* keep last */
 	__NL80211_MESHCONF_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 232d1a5c5672..ce6236b5473d 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -782,6 +782,7 @@ struct mesh_config {
 	u16 min_discovery_timeout;
 	u32 dot11MeshHWMPactivePathTimeout;
 	u16 dot11MeshHWMPpreqMinInterval;
+	u16 dot11MeshHWMPperrMinInterval;
 	u16 dot11MeshHWMPnetDiameterTraversalTime;
 	u8  dot11MeshHWMPRootMode;
 	u16 dot11MeshHWMPRannInterval;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 7ccba83dc8c8..393b2a4445b8 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1272,6 +1272,9 @@ static int ieee80211_update_mesh_config(struct wiphy *wiphy,
 	if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL, mask))
 		conf->dot11MeshHWMPpreqMinInterval =
 			nconf->dot11MeshHWMPpreqMinInterval;
+	if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL, mask))
+		conf->dot11MeshHWMPperrMinInterval =
+			nconf->dot11MeshHWMPperrMinInterval;
 	if (_chg_mesh_attr(NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME,
 			   mask))
 		conf->dot11MeshHWMPnetDiameterTraversalTime =
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 9352819a986b..8df28910b8ee 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -405,6 +405,8 @@ IEEE80211_IF_FILE(dot11MeshHWMPactivePathTimeout,
 		u.mesh.mshcfg.dot11MeshHWMPactivePathTimeout, DEC);
 IEEE80211_IF_FILE(dot11MeshHWMPpreqMinInterval,
 		u.mesh.mshcfg.dot11MeshHWMPpreqMinInterval, DEC);
+IEEE80211_IF_FILE(dot11MeshHWMPperrMinInterval,
+		u.mesh.mshcfg.dot11MeshHWMPperrMinInterval, DEC);
 IEEE80211_IF_FILE(dot11MeshHWMPnetDiameterTraversalTime,
 		u.mesh.mshcfg.dot11MeshHWMPnetDiameterTraversalTime, DEC);
 IEEE80211_IF_FILE(dot11MeshHWMPmaxPREQretries,
@@ -534,6 +536,7 @@ static void add_mesh_config(struct ieee80211_sub_if_data *sdata)
 	MESHPARAMS_ADD(dot11MeshMaxPeerLinks);
 	MESHPARAMS_ADD(dot11MeshHWMPactivePathTimeout);
 	MESHPARAMS_ADD(dot11MeshHWMPpreqMinInterval);
+	MESHPARAMS_ADD(dot11MeshHWMPperrMinInterval);
 	MESHPARAMS_ADD(dot11MeshHWMPnetDiameterTraversalTime);
 	MESHPARAMS_ADD(dot11MeshHWMPmaxPREQretries);
 	MESHPARAMS_ADD(path_refresh_time);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 7a757a97ba37..a785d61defe1 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -514,7 +514,9 @@ struct ieee80211_if_mesh {
 	atomic_t mpaths;
 	/* Timestamp of last SN update */
 	unsigned long last_sn_update;
-	/* Timestamp of last SN sent */
+	/* Time when it's ok to send next PERR */
+	unsigned long next_perr;
+	/* Timestamp of last PREQ sent */
 	unsigned long last_preq;
 	struct mesh_rmc *rmc;
 	spinlock_t mesh_preq_queue_lock;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index ee82d2f7f114..c707c8bf6d2c 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -749,6 +749,7 @@ void ieee80211_mesh_init_sdata(struct ieee80211_sub_if_data *sdata)
 	atomic_set(&ifmsh->mpaths, 0);
 	mesh_rmc_init(sdata);
 	ifmsh->last_preq = jiffies;
+	ifmsh->next_perr = jiffies;
 	/* Allocate all mesh structures when creating the first mesh interface. */
 	if (!mesh_allocated)
 		ieee80211s_init();
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index 208ba35661f9..fe93386d6aa9 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -241,11 +241,15 @@ int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn,
 {
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
+	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct ieee80211_mgmt *mgmt;
 	u8 *pos, ie_len;
 	int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.mesh_action) +
 		      sizeof(mgmt->u.action.u.mesh_action);
 
+	if (time_before(jiffies, ifmsh->next_perr))
+		return -EAGAIN;
+
 	skb = dev_alloc_skb(local->hw.extra_tx_headroom +
 			    hdr_len +
 			    2 + 15 /* PERR IE */);
@@ -290,6 +294,8 @@ int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn,
 
 	/* see note in function header */
 	prepare_frame_for_deferred_tx(sdata, skb);
+	ifmsh->next_perr = TU_TO_EXP_TIME(
+				   ifmsh->mshcfg.dot11MeshHWMPperrMinInterval);
 	ieee80211_add_pending_skb(local, skb);
 	return 0;
 }
diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c
index b7b7868f4128..8c550df13037 100644
--- a/net/wireless/mesh.c
+++ b/net/wireless/mesh.c
@@ -20,6 +20,7 @@
  * interface
  */
 #define MESH_PREQ_MIN_INT	10
+#define MESH_PERR_MIN_INT	100
 #define MESH_DIAM_TRAVERSAL_TIME 50
 
 /*
@@ -47,6 +48,7 @@ const struct mesh_config default_mesh_config = {
 	.dot11MeshMaxPeerLinks = MESH_MAX_ESTAB_PLINKS,
 	.dot11MeshHWMPactivePathTimeout = MESH_PATH_TIMEOUT,
 	.dot11MeshHWMPpreqMinInterval = MESH_PREQ_MIN_INT,
+	.dot11MeshHWMPperrMinInterval = MESH_PERR_MIN_INT,
 	.dot11MeshHWMPnetDiameterTraversalTime = MESH_DIAM_TRAVERSAL_TIME,
 	.dot11MeshHWMPmaxPREQretries = MESH_MAX_PREQ_RETRIES,
 	.path_refresh_time = MESH_PATH_REFRESH_TIME,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 5699c3b1aba4..0ee512b85a1f 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3195,6 +3195,8 @@ static int nl80211_get_mesh_config(struct sk_buff *skb,
 			cur_params.dot11MeshHWMPactivePathTimeout);
 	NLA_PUT_U16(msg, NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL,
 			cur_params.dot11MeshHWMPpreqMinInterval);
+	NLA_PUT_U16(msg, NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL,
+			cur_params.dot11MeshHWMPperrMinInterval);
 	NLA_PUT_U16(msg, NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME,
 			cur_params.dot11MeshHWMPnetDiameterTraversalTime);
 	NLA_PUT_U8(msg, NL80211_MESHCONF_HWMP_ROOTMODE,
@@ -3229,6 +3231,7 @@ static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_A
 	[NL80211_MESHCONF_MIN_DISCOVERY_TIMEOUT] = { .type = NLA_U16 },
 	[NL80211_MESHCONF_HWMP_ACTIVE_PATH_TIMEOUT] = { .type = NLA_U32 },
 	[NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL] = { .type = NLA_U16 },
+	[NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL] = { .type = NLA_U16 },
 	[NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME] = { .type = NLA_U16 },
 	[NL80211_MESHCONF_HWMP_ROOTMODE] = { .type = NLA_U8 },
 	[NL80211_MESHCONF_HWMP_RANN_INTERVAL] = { .type = NLA_U16 },
@@ -3303,6 +3306,9 @@ do {\
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPpreqMinInterval,
 			mask, NL80211_MESHCONF_HWMP_PREQ_MIN_INTERVAL,
 			nla_get_u16);
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshHWMPperrMinInterval,
+			mask, NL80211_MESHCONF_HWMP_PERR_MIN_INTERVAL,
+			nla_get_u16);
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg,
 			dot11MeshHWMPnetDiameterTraversalTime,
 			mask, NL80211_MESHCONF_HWMP_NET_DIAM_TRVS_TIME,
-- 
cgit v1.2.3


From 75957ba36c05b979701e9ec64b37819adc12f830 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 28 Nov 2011 16:32:35 +0000
Subject: dql: Dynamic queue limits

Implementation of dynamic queue limits (dql).  This is a libary which
allows a queue limit to be dynamically managed.  The goal of dql is
to set the queue limit, number of objects to the queue, to be minimized
without allowing the queue to be starved.

dql would be used with a queue which has these properties:

1) Objects are queued up to some limit which can be expressed as a
   count of objects.
2) Periodically a completion process executes which retires consumed
   objects.
3) Starvation occurs when limit has been reached, all queued data has
   actually been consumed but completion processing has not yet run,
   so queuing new data is blocked.
4) Minimizing the amount of queued data is desirable.

A canonical example of such a queue would be a NIC HW transmit queue.

The queue limit is dynamic, it will increase or decrease over time
depending on the workload.  The queue limit is recalculated each time
completion processing is done.  Increases occur when the queue is
starved and can exponentially increase over successive intervals.
Decreases occur when more data is being maintained in the queue than
needed to prevent starvation.  The number of extra objects, or "slack",
is measured over successive intervals, and to avoid hysteresis the
limit is only reduced by the miminum slack seen over a configurable
time period.

dql API provides routines to manage the queue:
- dql_init is called to intialize the dql structure
- dql_reset is called to reset dynamic values
- dql_queued called when objects are being enqueued
- dql_avail returns availability in the queue
- dql_completed is called when objects have be consumed in the queue

Configuration consists of:
- max_limit, maximum limit
- min_limit, minimum limit
- slack_hold_time, time to measure instances of slack before reducing
  queue limit

Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dynamic_queue_limits.h |  97 +++++++++++++++++++++++++
 lib/Kconfig                          |   3 +
 lib/Makefile                         |   2 +
 lib/dynamic_queue_limits.c           | 133 +++++++++++++++++++++++++++++++++++
 4 files changed, 235 insertions(+)
 create mode 100644 include/linux/dynamic_queue_limits.h
 create mode 100644 lib/dynamic_queue_limits.c

(limited to 'include/linux')

diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h
new file mode 100644
index 000000000000..5621547d631b
--- /dev/null
+++ b/include/linux/dynamic_queue_limits.h
@@ -0,0 +1,97 @@
+/*
+ * Dynamic queue limits (dql) - Definitions
+ *
+ * Copyright (c) 2011, Tom Herbert <therbert@google.com>
+ *
+ * This header file contains the definitions for dynamic queue limits (dql).
+ * dql would be used in conjunction with a producer/consumer type queue
+ * (possibly a HW queue).  Such a queue would have these general properties:
+ *
+ *   1) Objects are queued up to some limit specified as number of objects.
+ *   2) Periodically a completion process executes which retires consumed
+ *      objects.
+ *   3) Starvation occurs when limit has been reached, all queued data has
+ *      actually been consumed, but completion processing has not yet run
+ *      so queuing new data is blocked.
+ *   4) Minimizing the amount of queued data is desirable.
+ *
+ * The goal of dql is to calculate the limit as the minimum number of objects
+ * needed to prevent starvation.
+ *
+ * The primary functions of dql are:
+ *    dql_queued - called when objects are enqueued to record number of objects
+ *    dql_avail - returns how many objects are available to be queued based
+ *      on the object limit and how many objects are already enqueued
+ *    dql_completed - called at completion time to indicate how many objects
+ *      were retired from the queue
+ *
+ * The dql implementation does not implement any locking for the dql data
+ * structures, the higher layer should provide this.  dql_queued should
+ * be serialized to prevent concurrent execution of the function; this
+ * is also true for  dql_completed.  However, dql_queued and dlq_completed  can
+ * be executed concurrently (i.e. they can be protected by different locks).
+ */
+
+#ifndef _LINUX_DQL_H
+#define _LINUX_DQL_H
+
+#ifdef __KERNEL__
+
+struct dql {
+	/* Fields accessed in enqueue path (dql_queued) */
+	unsigned int	num_queued;		/* Total ever queued */
+	unsigned int	adj_limit;		/* limit + num_completed */
+	unsigned int	last_obj_cnt;		/* Count at last queuing */
+
+	/* Fields accessed only by completion path (dql_completed) */
+
+	unsigned int	limit ____cacheline_aligned_in_smp; /* Current limit */
+	unsigned int	num_completed;		/* Total ever completed */
+
+	unsigned int	prev_ovlimit;		/* Previous over limit */
+	unsigned int	prev_num_queued;	/* Previous queue total */
+	unsigned int	prev_last_obj_cnt;	/* Previous queuing cnt */
+
+	unsigned int	lowest_slack;		/* Lowest slack found */
+	unsigned long	slack_start_time;	/* Time slacks seen */
+
+	/* Configuration */
+	unsigned int	max_limit;		/* Max limit */
+	unsigned int	min_limit;		/* Minimum limit */
+	unsigned int	slack_hold_time;	/* Time to measure slack */
+};
+
+/* Set some static maximums */
+#define DQL_MAX_OBJECT (UINT_MAX / 16)
+#define DQL_MAX_LIMIT ((UINT_MAX / 2) - DQL_MAX_OBJECT)
+
+/*
+ * Record number of objects queued. Assumes that caller has already checked
+ * availability in the queue with dql_avail.
+ */
+static inline void dql_queued(struct dql *dql, unsigned int count)
+{
+	BUG_ON(count > DQL_MAX_OBJECT);
+
+	dql->num_queued += count;
+	dql->last_obj_cnt = count;
+}
+
+/* Returns how many objects can be queued, < 0 indicates over limit. */
+static inline int dql_avail(const struct dql *dql)
+{
+	return dql->adj_limit - dql->num_queued;
+}
+
+/* Record number of completed objects and recalculate the limit. */
+void dql_completed(struct dql *dql, unsigned int count);
+
+/* Reset dql state */
+void dql_reset(struct dql *dql);
+
+/* Initialize dql state */
+int dql_init(struct dql *dql, unsigned hold_time);
+
+#endif /* _KERNEL_ */
+
+#endif /* _LINUX_DQL_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index 32f3e5ae2be5..63b5782732ed 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -244,6 +244,9 @@ config CPU_RMAP
 	bool
 	depends on SMP
 
+config DQL
+	bool
+
 #
 # Netlink attribute parsing support is select'ed if needed
 #
diff --git a/lib/Makefile b/lib/Makefile
index a4da283f5dc0..ff00d4dcb7ed 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -115,6 +115,8 @@ obj-$(CONFIG_CPU_RMAP) += cpu_rmap.o
 
 obj-$(CONFIG_CORDIC) += cordic.o
 
+obj-$(CONFIG_DQL) += dynamic_queue_limits.o
+
 hostprogs-y	:= gen_crc32table
 clean-files	:= crc32table.h
 
diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c
new file mode 100644
index 000000000000..3d1bdcdd7db4
--- /dev/null
+++ b/lib/dynamic_queue_limits.c
@@ -0,0 +1,133 @@
+/*
+ * Dynamic byte queue limits.  See include/linux/dynamic_queue_limits.h
+ *
+ * Copyright (c) 2011, Tom Herbert <therbert@google.com>
+ */
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/kernel.h>
+#include <linux/dynamic_queue_limits.h>
+
+#define POSDIFF(A, B) ((A) > (B) ? (A) - (B) : 0)
+
+/* Records completed count and recalculates the queue limit */
+void dql_completed(struct dql *dql, unsigned int count)
+{
+	unsigned int inprogress, prev_inprogress, limit;
+	unsigned int ovlimit, all_prev_completed, completed;
+
+	/* Can't complete more than what's in queue */
+	BUG_ON(count > dql->num_queued - dql->num_completed);
+
+	completed = dql->num_completed + count;
+	limit = dql->limit;
+	ovlimit = POSDIFF(dql->num_queued - dql->num_completed, limit);
+	inprogress = dql->num_queued - completed;
+	prev_inprogress = dql->prev_num_queued - dql->num_completed;
+	all_prev_completed = POSDIFF(completed, dql->prev_num_queued);
+
+	if ((ovlimit && !inprogress) ||
+	    (dql->prev_ovlimit && all_prev_completed)) {
+		/*
+		 * Queue considered starved if:
+		 *   - The queue was over-limit in the last interval,
+		 *     and there is no more data in the queue.
+		 *  OR
+		 *   - The queue was over-limit in the previous interval and
+		 *     when enqueuing it was possible that all queued data
+		 *     had been consumed.  This covers the case when queue
+		 *     may have becomes starved between completion processing
+		 *     running and next time enqueue was scheduled.
+		 *
+		 *     When queue is starved increase the limit by the amount
+		 *     of bytes both sent and completed in the last interval,
+		 *     plus any previous over-limit.
+		 */
+		limit += POSDIFF(completed, dql->prev_num_queued) +
+		     dql->prev_ovlimit;
+		dql->slack_start_time = jiffies;
+		dql->lowest_slack = UINT_MAX;
+	} else if (inprogress && prev_inprogress && !all_prev_completed) {
+		/*
+		 * Queue was not starved, check if the limit can be decreased.
+		 * A decrease is only considered if the queue has been busy in
+		 * the whole interval (the check above).
+		 *
+		 * If there is slack, the amount of execess data queued above
+		 * the the amount needed to prevent starvation, the queue limit
+		 * can be decreased.  To avoid hysteresis we consider the
+		 * minimum amount of slack found over several iterations of the
+		 * completion routine.
+		 */
+		unsigned int slack, slack_last_objs;
+
+		/*
+		 * Slack is the maximum of
+		 *   - The queue limit plus previous over-limit minus twice
+		 *     the number of objects completed.  Note that two times
+		 *     number of completed bytes is a basis for an upper bound
+		 *     of the limit.
+		 *   - Portion of objects in the last queuing operation that
+		 *     was not part of non-zero previous over-limit.  That is
+		 *     "round down" by non-overlimit portion of the last
+		 *     queueing operation.
+		 */
+		slack = POSDIFF(limit + dql->prev_ovlimit,
+		    2 * (completed - dql->num_completed));
+		slack_last_objs = dql->prev_ovlimit ?
+		    POSDIFF(dql->prev_last_obj_cnt, dql->prev_ovlimit) : 0;
+
+		slack = max(slack, slack_last_objs);
+
+		if (slack < dql->lowest_slack)
+			dql->lowest_slack = slack;
+
+		if (time_after(jiffies,
+			       dql->slack_start_time + dql->slack_hold_time)) {
+			limit = POSDIFF(limit, dql->lowest_slack);
+			dql->slack_start_time = jiffies;
+			dql->lowest_slack = UINT_MAX;
+		}
+	}
+
+	/* Enforce bounds on limit */
+	limit = clamp(limit, dql->min_limit, dql->max_limit);
+
+	if (limit != dql->limit) {
+		dql->limit = limit;
+		ovlimit = 0;
+	}
+
+	dql->adj_limit = limit + completed;
+	dql->prev_ovlimit = ovlimit;
+	dql->prev_last_obj_cnt = dql->last_obj_cnt;
+	dql->num_completed = completed;
+	dql->prev_num_queued = dql->num_queued;
+}
+EXPORT_SYMBOL(dql_completed);
+
+void dql_reset(struct dql *dql)
+{
+	/* Reset all dynamic values */
+	dql->limit = 0;
+	dql->num_queued = 0;
+	dql->num_completed = 0;
+	dql->last_obj_cnt = 0;
+	dql->prev_num_queued = 0;
+	dql->prev_last_obj_cnt = 0;
+	dql->prev_ovlimit = 0;
+	dql->lowest_slack = UINT_MAX;
+	dql->slack_start_time = jiffies;
+}
+EXPORT_SYMBOL(dql_reset);
+
+int dql_init(struct dql *dql, unsigned hold_time)
+{
+	dql->max_limit = DQL_MAX_LIMIT;
+	dql->min_limit = 0;
+	dql->slack_hold_time = hold_time;
+	dql_reset(dql);
+	return 0;
+}
+EXPORT_SYMBOL(dql_init);
-- 
cgit v1.2.3


From 7346649826382b769cfadf4a2fe8a84d060c55e9 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 28 Nov 2011 16:32:44 +0000
Subject: net: Add queue state xoff flag for stack

Create separate queue state flags so that either the stack or drivers
can turn on XOFF.  Added a set of functions used in the stack to determine
if a queue is really stopped (either by stack or driver)

Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 41 ++++++++++++++++++++++++++++++-----------
 net/core/dev.c            |  4 ++--
 net/core/netpoll.c        |  4 ++--
 net/core/pktgen.c         |  2 +-
 net/sched/sch_generic.c   |  8 ++++----
 net/sched/sch_multiq.c    |  6 ++++--
 net/sched/sch_teql.c      |  6 +++---
 7 files changed, 46 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ac9a4b9344ca..d19f93265cac 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -517,11 +517,23 @@ static inline void napi_synchronize(const struct napi_struct *n)
 #endif
 
 enum netdev_queue_state_t {
-	__QUEUE_STATE_XOFF,
+	__QUEUE_STATE_DRV_XOFF,
+	__QUEUE_STATE_STACK_XOFF,
 	__QUEUE_STATE_FROZEN,
-#define QUEUE_STATE_XOFF_OR_FROZEN ((1 << __QUEUE_STATE_XOFF)		| \
-				    (1 << __QUEUE_STATE_FROZEN))
+#define QUEUE_STATE_ANY_XOFF ((1 << __QUEUE_STATE_DRV_XOFF)		| \
+			      (1 << __QUEUE_STATE_STACK_XOFF))
+#define QUEUE_STATE_ANY_XOFF_OR_FROZEN (QUEUE_STATE_ANY_XOFF		| \
+					(1 << __QUEUE_STATE_FROZEN))
 };
+/*
+ * __QUEUE_STATE_DRV_XOFF is used by drivers to stop the transmit queue.  The
+ * netif_tx_* functions below are used to manipulate this flag.  The
+ * __QUEUE_STATE_STACK_XOFF flag is used by the stack to stop the transmit
+ * queue independently.  The netif_xmit_*stopped functions below are called
+ * to check if the queue has been stopped by the driver or stack (either
+ * of the XOFF bits are set in the state).  Drivers should not need to call
+ * netif_xmit*stopped functions, they should only be using netif_tx_*.
+ */
 
 struct netdev_queue {
 /*
@@ -1718,7 +1730,7 @@ extern void __netif_schedule(struct Qdisc *q);
 
 static inline void netif_schedule_queue(struct netdev_queue *txq)
 {
-	if (!test_bit(__QUEUE_STATE_XOFF, &txq->state))
+	if (!(txq->state & QUEUE_STATE_ANY_XOFF))
 		__netif_schedule(txq->qdisc);
 }
 
@@ -1732,7 +1744,7 @@ static inline void netif_tx_schedule_all(struct net_device *dev)
 
 static inline void netif_tx_start_queue(struct netdev_queue *dev_queue)
 {
-	clear_bit(__QUEUE_STATE_XOFF, &dev_queue->state);
+	clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
 }
 
 /**
@@ -1764,7 +1776,7 @@ static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue)
 		return;
 	}
 #endif
-	if (test_and_clear_bit(__QUEUE_STATE_XOFF, &dev_queue->state))
+	if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state))
 		__netif_schedule(dev_queue->qdisc);
 }
 
@@ -1796,7 +1808,7 @@ static inline void netif_tx_stop_queue(struct netdev_queue *dev_queue)
 		pr_info("netif_stop_queue() cannot be called before register_netdev()\n");
 		return;
 	}
-	set_bit(__QUEUE_STATE_XOFF, &dev_queue->state);
+	set_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
 }
 
 /**
@@ -1823,7 +1835,7 @@ static inline void netif_tx_stop_all_queues(struct net_device *dev)
 
 static inline int netif_tx_queue_stopped(const struct netdev_queue *dev_queue)
 {
-	return test_bit(__QUEUE_STATE_XOFF, &dev_queue->state);
+	return test_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state);
 }
 
 /**
@@ -1837,9 +1849,16 @@ static inline int netif_queue_stopped(const struct net_device *dev)
 	return netif_tx_queue_stopped(netdev_get_tx_queue(dev, 0));
 }
 
-static inline int netif_tx_queue_frozen_or_stopped(const struct netdev_queue *dev_queue)
+static inline int netif_xmit_stopped(const struct netdev_queue *dev_queue)
 {
-	return dev_queue->state & QUEUE_STATE_XOFF_OR_FROZEN;
+	return dev_queue->state & QUEUE_STATE_ANY_XOFF;
+}
+
+static inline int netif_xmit_frozen_or_stopped(const struct netdev_queue *dev_queue)
+{
+	return dev_queue->state & QUEUE_STATE_ANY_XOFF_OR_FROZEN;
+}
+
 }
 
 /**
@@ -1926,7 +1945,7 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
 	if (netpoll_trap())
 		return;
 #endif
-	if (test_and_clear_bit(__QUEUE_STATE_XOFF, &txq->state))
+	if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state))
 		__netif_schedule(txq->qdisc);
 }
 
diff --git a/net/core/dev.c b/net/core/dev.c
index c7ef6c5d3782..cb8f753b4238 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2270,7 +2270,7 @@ gso:
 			return rc;
 		}
 		txq_trans_update(txq);
-		if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
+		if (unlikely(netif_xmit_stopped(txq) && skb->next))
 			return NETDEV_TX_BUSY;
 	} while (skb->next);
 
@@ -2558,7 +2558,7 @@ int dev_queue_xmit(struct sk_buff *skb)
 
 			HARD_TX_LOCK(dev, txq, cpu);
 
-			if (!netif_tx_queue_stopped(txq)) {
+			if (!netif_xmit_stopped(txq)) {
 				__this_cpu_inc(xmit_recursion);
 				rc = dev_hard_start_xmit(skb, dev, txq);
 				__this_cpu_dec(xmit_recursion);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 1a7d8e2c9768..0d38808a2305 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -76,7 +76,7 @@ static void queue_process(struct work_struct *work)
 
 		local_irq_save(flags);
 		__netif_tx_lock(txq, smp_processor_id());
-		if (netif_tx_queue_frozen_or_stopped(txq) ||
+		if (netif_xmit_frozen_or_stopped(txq) ||
 		    ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
 			skb_queue_head(&npinfo->txq, skb);
 			__netif_tx_unlock(txq);
@@ -317,7 +317,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 		for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
 		     tries > 0; --tries) {
 			if (__netif_tx_trylock(txq)) {
-				if (!netif_tx_queue_stopped(txq)) {
+				if (!netif_xmit_stopped(txq)) {
 					status = ops->ndo_start_xmit(skb, dev);
 					if (status == NETDEV_TX_OK)
 						txq_trans_update(txq);
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index aa53a35a631b..449fe0f068f8 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3342,7 +3342,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 
 	__netif_tx_lock_bh(txq);
 
-	if (unlikely(netif_tx_queue_frozen_or_stopped(txq))) {
+	if (unlikely(netif_xmit_frozen_or_stopped(txq))) {
 		ret = NETDEV_TX_BUSY;
 		pkt_dev->last_ok = 0;
 		goto unlock;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 79ac1458c2ba..67fc573e013a 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -60,7 +60,7 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
 
 		/* check the reason of requeuing without tx lock first */
 		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
-		if (!netif_tx_queue_frozen_or_stopped(txq)) {
+		if (!netif_xmit_frozen_or_stopped(txq)) {
 			q->gso_skb = NULL;
 			q->q.qlen--;
 		} else
@@ -121,7 +121,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 	spin_unlock(root_lock);
 
 	HARD_TX_LOCK(dev, txq, smp_processor_id());
-	if (!netif_tx_queue_frozen_or_stopped(txq))
+	if (!netif_xmit_frozen_or_stopped(txq))
 		ret = dev_hard_start_xmit(skb, dev, txq);
 
 	HARD_TX_UNLOCK(dev, txq);
@@ -143,7 +143,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 		ret = dev_requeue_skb(skb, q);
 	}
 
-	if (ret && netif_tx_queue_frozen_or_stopped(txq))
+	if (ret && netif_xmit_frozen_or_stopped(txq))
 		ret = 0;
 
 	return ret;
@@ -242,7 +242,7 @@ static void dev_watchdog(unsigned long arg)
 				 * old device drivers set dev->trans_start
 				 */
 				trans_start = txq->trans_start ? : dev->trans_start;
-				if (netif_tx_queue_stopped(txq) &&
+				if (netif_xmit_stopped(txq) &&
 				    time_after(jiffies, (trans_start +
 							 dev->watchdog_timeo))) {
 					some_queue_timedout = 1;
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index edc1950e0e77..49131d7a7446 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -107,7 +107,8 @@ static struct sk_buff *multiq_dequeue(struct Qdisc *sch)
 		/* Check that target subqueue is available before
 		 * pulling an skb to avoid head-of-line blocking.
 		 */
-		if (!__netif_subqueue_stopped(qdisc_dev(sch), q->curband)) {
+		if (!netif_xmit_stopped(
+		    netdev_get_tx_queue(qdisc_dev(sch), q->curband))) {
 			qdisc = q->queues[q->curband];
 			skb = qdisc->dequeue(qdisc);
 			if (skb) {
@@ -138,7 +139,8 @@ static struct sk_buff *multiq_peek(struct Qdisc *sch)
 		/* Check that target subqueue is available before
 		 * pulling an skb to avoid head-of-line blocking.
 		 */
-		if (!__netif_subqueue_stopped(qdisc_dev(sch), curband)) {
+		if (!netif_xmit_stopped(
+		    netdev_get_tx_queue(qdisc_dev(sch), curband))) {
 			qdisc = q->queues[curband];
 			skb = qdisc->ops->peek(qdisc);
 			if (skb)
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index a3b7120fcc74..283bfe3de59d 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -301,7 +301,7 @@ restart:
 
 		if (slave_txq->qdisc_sleeping != q)
 			continue;
-		if (__netif_subqueue_stopped(slave, subq) ||
+		if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
 		    !netif_running(slave)) {
 			busy = 1;
 			continue;
@@ -312,7 +312,7 @@ restart:
 			if (__netif_tx_trylock(slave_txq)) {
 				unsigned int length = qdisc_pkt_len(skb);
 
-				if (!netif_tx_queue_frozen_or_stopped(slave_txq) &&
+				if (!netif_xmit_frozen_or_stopped(slave_txq) &&
 				    slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
 					txq_trans_update(slave_txq);
 					__netif_tx_unlock(slave_txq);
@@ -324,7 +324,7 @@ restart:
 				}
 				__netif_tx_unlock(slave_txq);
 			}
-			if (netif_queue_stopped(dev))
+			if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
 				busy = 1;
 			break;
 		case 1:
-- 
cgit v1.2.3


From c5d67bd78c5dc540e3461c36fb3d389fbe0de4c3 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 28 Nov 2011 16:32:52 +0000
Subject: net: Add netdev interfaces for recording sends/comp

Add interfaces for drivers to call for recording number of packets and
bytes at send time and transmit completion.  Also, added a function to
"reset" a queue.  These will be used by Byte Queue Limits.

Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d19f93265cac..9b24cc7a54d1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1859,6 +1859,34 @@ static inline int netif_xmit_frozen_or_stopped(const struct netdev_queue *dev_qu
 	return dev_queue->state & QUEUE_STATE_ANY_XOFF_OR_FROZEN;
 }
 
+static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue,
+					unsigned int bytes)
+{
+}
+
+static inline void netdev_sent_queue(struct net_device *dev, unsigned int bytes)
+{
+	netdev_tx_sent_queue(netdev_get_tx_queue(dev, 0), bytes);
+}
+
+static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue,
+					     unsigned pkts, unsigned bytes)
+{
+}
+
+static inline void netdev_completed_queue(struct net_device *dev,
+					  unsigned pkts, unsigned bytes)
+{
+	netdev_tx_completed_queue(netdev_get_tx_queue(dev, 0), pkts, bytes);
+}
+
+static inline void netdev_tx_reset_queue(struct netdev_queue *q)
+{
+}
+
+static inline void netdev_reset_queue(struct net_device *dev_queue)
+{
+	netdev_tx_reset_queue(netdev_get_tx_queue(dev_queue, 0));
 }
 
 /**
-- 
cgit v1.2.3


From 114cf5802165ee93e3ab461c9c505cd94a08b800 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 28 Nov 2011 16:33:09 +0000
Subject: bql: Byte queue limits

Networking stack support for byte queue limits, uses dynamic queue
limits library.  Byte queue limits are maintained per transmit queue,
and a dql structure has been added to netdev_queue structure for this
purpose.

Configuration of bql is in the tx-<n> sysfs directory for the queue
under the byte_queue_limits directory.  Configuration includes:
limit_min, bql minimum limit
limit_max, bql maximum limit
hold_time, bql slack hold time

Also under the directory are:
limit, current byte limit
inflight, current number of bytes on the queue

Signed-off-by: Tom Herbert <therbert@google.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  32 ++++++++++-
 net/Kconfig               |   6 ++
 net/core/dev.c            |   3 +
 net/core/net-sysfs.c      | 140 +++++++++++++++++++++++++++++++++++++++++++---
 4 files changed, 172 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9b24cc7a54d1..97edb3215a5a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -43,6 +43,7 @@
 #include <linux/rculist.h>
 #include <linux/dmaengine.h>
 #include <linux/workqueue.h>
+#include <linux/dynamic_queue_limits.h>
 
 #include <linux/ethtool.h>
 #include <net/net_namespace.h>
@@ -541,7 +542,6 @@ struct netdev_queue {
  */
 	struct net_device	*dev;
 	struct Qdisc		*qdisc;
-	unsigned long		state;
 	struct Qdisc		*qdisc_sleeping;
 #ifdef CONFIG_SYSFS
 	struct kobject		kobj;
@@ -564,6 +564,12 @@ struct netdev_queue {
 	 * (/sys/class/net/DEV/Q/trans_timeout)
 	 */
 	unsigned long		trans_timeout;
+
+	unsigned long		state;
+
+#ifdef CONFIG_BQL
+	struct dql		dql;
+#endif
 } ____cacheline_aligned_in_smp;
 
 static inline int netdev_queue_numa_node_read(const struct netdev_queue *q)
@@ -1862,6 +1868,15 @@ static inline int netif_xmit_frozen_or_stopped(const struct netdev_queue *dev_qu
 static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue,
 					unsigned int bytes)
 {
+#ifdef CONFIG_BQL
+	dql_queued(&dev_queue->dql, bytes);
+	if (unlikely(dql_avail(&dev_queue->dql) < 0)) {
+		set_bit(__QUEUE_STATE_STACK_XOFF, &dev_queue->state);
+		if (unlikely(dql_avail(&dev_queue->dql) >= 0))
+			clear_bit(__QUEUE_STATE_STACK_XOFF,
+			    &dev_queue->state);
+	}
+#endif
 }
 
 static inline void netdev_sent_queue(struct net_device *dev, unsigned int bytes)
@@ -1872,6 +1887,18 @@ static inline void netdev_sent_queue(struct net_device *dev, unsigned int bytes)
 static inline void netdev_tx_completed_queue(struct netdev_queue *dev_queue,
 					     unsigned pkts, unsigned bytes)
 {
+#ifdef CONFIG_BQL
+	if (likely(bytes)) {
+		dql_completed(&dev_queue->dql, bytes);
+		if (unlikely(test_bit(__QUEUE_STATE_STACK_XOFF,
+		    &dev_queue->state) &&
+		    dql_avail(&dev_queue->dql) >= 0)) {
+			if (test_and_clear_bit(__QUEUE_STATE_STACK_XOFF,
+			     &dev_queue->state))
+				netif_schedule_queue(dev_queue);
+		}
+	}
+#endif
 }
 
 static inline void netdev_completed_queue(struct net_device *dev,
@@ -1882,6 +1909,9 @@ static inline void netdev_completed_queue(struct net_device *dev,
 
 static inline void netdev_tx_reset_queue(struct netdev_queue *q)
 {
+#ifdef CONFIG_BQL
+	dql_reset(&q->dql);
+#endif
 }
 
 static inline void netdev_reset_queue(struct net_device *dev_queue)
diff --git a/net/Kconfig b/net/Kconfig
index 63d2c5dc36ff..2d998735c4d8 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -239,6 +239,12 @@ config NETPRIO_CGROUP
 	  Cgroup subsystem for use in assigning processes to network priorities on
 	  a per-interface basis
 
+config BQL
+	boolean
+	depends on SYSFS
+	select DQL
+	default y
+
 config HAVE_BPF_JIT
 	bool
 
diff --git a/net/core/dev.c b/net/core/dev.c
index cb8f753b4238..91a599122074 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5470,6 +5470,9 @@ static void netdev_init_one_queue(struct net_device *dev,
 	queue->xmit_lock_owner = -1;
 	netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
 	queue->dev = dev;
+#ifdef CONFIG_BQL
+	dql_init(&queue->dql, HZ);
+#endif
 }
 
 static int netif_alloc_netdev_queues(struct net_device *dev)
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index b17c14a0fce9..3bf72b638d34 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -21,6 +21,7 @@
 #include <linux/wireless.h>
 #include <linux/vmalloc.h>
 #include <linux/export.h>
+#include <linux/jiffies.h>
 #include <net/wext.h>
 
 #include "net-sysfs.h"
@@ -845,6 +846,116 @@ static ssize_t show_trans_timeout(struct netdev_queue *queue,
 static struct netdev_queue_attribute queue_trans_timeout =
 	__ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL);
 
+#ifdef CONFIG_BQL
+/*
+ * Byte queue limits sysfs structures and functions.
+ */
+static ssize_t bql_show(char *buf, unsigned int value)
+{
+	return sprintf(buf, "%u\n", value);
+}
+
+static ssize_t bql_set(const char *buf, const size_t count,
+		       unsigned int *pvalue)
+{
+	unsigned int value;
+	int err;
+
+	if (!strcmp(buf, "max") || !strcmp(buf, "max\n"))
+		value = DQL_MAX_LIMIT;
+	else {
+		err = kstrtouint(buf, 10, &value);
+		if (err < 0)
+			return err;
+		if (value > DQL_MAX_LIMIT)
+			return -EINVAL;
+	}
+
+	*pvalue = value;
+
+	return count;
+}
+
+static ssize_t bql_show_hold_time(struct netdev_queue *queue,
+				  struct netdev_queue_attribute *attr,
+				  char *buf)
+{
+	struct dql *dql = &queue->dql;
+
+	return sprintf(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time));
+}
+
+static ssize_t bql_set_hold_time(struct netdev_queue *queue,
+				 struct netdev_queue_attribute *attribute,
+				 const char *buf, size_t len)
+{
+	struct dql *dql = &queue->dql;
+	unsigned value;
+	int err;
+
+	err = kstrtouint(buf, 10, &value);
+	if (err < 0)
+		return err;
+
+	dql->slack_hold_time = msecs_to_jiffies(value);
+
+	return len;
+}
+
+static struct netdev_queue_attribute bql_hold_time_attribute =
+	__ATTR(hold_time, S_IRUGO | S_IWUSR, bql_show_hold_time,
+	    bql_set_hold_time);
+
+static ssize_t bql_show_inflight(struct netdev_queue *queue,
+				 struct netdev_queue_attribute *attr,
+				 char *buf)
+{
+	struct dql *dql = &queue->dql;
+
+	return sprintf(buf, "%u\n", dql->num_queued - dql->num_completed);
+}
+
+static struct netdev_queue_attribute bql_inflight_attribute =
+	__ATTR(inflight, S_IRUGO | S_IWUSR, bql_show_inflight, NULL);
+
+#define BQL_ATTR(NAME, FIELD)						\
+static ssize_t bql_show_ ## NAME(struct netdev_queue *queue,		\
+				 struct netdev_queue_attribute *attr,	\
+				 char *buf)				\
+{									\
+	return bql_show(buf, queue->dql.FIELD);				\
+}									\
+									\
+static ssize_t bql_set_ ## NAME(struct netdev_queue *queue,		\
+				struct netdev_queue_attribute *attr,	\
+				const char *buf, size_t len)		\
+{									\
+	return bql_set(buf, len, &queue->dql.FIELD);			\
+}									\
+									\
+static struct netdev_queue_attribute bql_ ## NAME ## _attribute =	\
+	__ATTR(NAME, S_IRUGO | S_IWUSR, bql_show_ ## NAME,		\
+	    bql_set_ ## NAME);
+
+BQL_ATTR(limit, limit)
+BQL_ATTR(limit_max, max_limit)
+BQL_ATTR(limit_min, min_limit)
+
+static struct attribute *dql_attrs[] = {
+	&bql_limit_attribute.attr,
+	&bql_limit_max_attribute.attr,
+	&bql_limit_min_attribute.attr,
+	&bql_hold_time_attribute.attr,
+	&bql_inflight_attribute.attr,
+	NULL
+};
+
+static struct attribute_group dql_group = {
+	.name  = "byte_queue_limits",
+	.attrs  = dql_attrs,
+};
+#endif /* CONFIG_BQL */
+
 #ifdef CONFIG_XPS
 static inline unsigned int get_netdev_queue_index(struct netdev_queue *queue)
 {
@@ -1096,17 +1207,17 @@ static struct attribute *netdev_queue_default_attrs[] = {
 	NULL
 };
 
-#ifdef CONFIG_XPS
 static void netdev_queue_release(struct kobject *kobj)
 {
 	struct netdev_queue *queue = to_netdev_queue(kobj);
 
+#ifdef CONFIG_XPS
 	xps_queue_release(queue);
+#endif
 
 	memset(kobj, 0, sizeof(*kobj));
 	dev_put(queue->dev);
 }
-#endif /* CONFIG_XPS */
 
 static struct kobj_type netdev_queue_ktype = {
 	.sysfs_ops = &netdev_queue_sysfs_ops,
@@ -1125,14 +1236,21 @@ static int netdev_queue_add_kobject(struct net_device *net, int index)
 	kobj->kset = net->queues_kset;
 	error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL,
 	    "tx-%u", index);
-	if (error) {
-		kobject_put(kobj);
-		return error;
-	}
+	if (error)
+		goto exit;
+
+#ifdef CONFIG_BQL
+	error = sysfs_create_group(kobj, &dql_group);
+	if (error)
+		goto exit;
+#endif
 
 	kobject_uevent(kobj, KOBJ_ADD);
 	dev_hold(queue->dev);
 
+	return 0;
+exit:
+	kobject_put(kobj);
 	return error;
 }
 #endif /* CONFIG_SYSFS */
@@ -1152,8 +1270,14 @@ netdev_queue_update_kobjects(struct net_device *net, int old_num, int new_num)
 		}
 	}
 
-	while (--i >= new_num)
-		kobject_put(&net->_tx[i].kobj);
+	while (--i >= new_num) {
+		struct netdev_queue *queue = net->_tx + i;
+
+#ifdef CONFIG_BQL
+		sysfs_remove_group(&queue->kobj, &dql_group);
+#endif
+		kobject_put(&queue->kobj);
+	}
 
 	return error;
 #else
-- 
cgit v1.2.3


From 596b9b68ef118f7409afbc78487263e08ef96261 Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Mon, 25 Jul 2011 00:01:25 +0000
Subject: neigh: Add infrastructure for allocating device neigh privates.

netdev->neigh_priv_len records the private area length.

This will trigger for neigh_table objects which set tbl->entry_size
to zero, and the first instances of this will be forthcoming.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c |  2 ++
 include/linux/netdevice.h                 |  1 +
 net/atm/clip.c                            |  1 +
 net/core/neighbour.c                      | 14 +++++++++++---
 4 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index efd7a9636aff..57ae9b9265e3 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1218,6 +1218,8 @@ static struct net_device *ipoib_add_port(const char *format,
 	priv->dev->mtu  = IPOIB_UD_MTU(priv->max_ib_mtu);
 	priv->mcast_mtu  = priv->admin_mtu = priv->dev->mtu;
 
+	priv->dev->neigh_priv_len = sizeof(struct ipoib_neigh);
+
 	result = ib_query_pkey(hca, port, 0, &priv->pkey);
 	if (result) {
 		printk(KERN_WARNING "%s: ib_query_pkey port %d failed (ret = %d)\n",
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 97edb3215a5a..5462c2cd5eab 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1080,6 +1080,7 @@ struct net_device {
 	unsigned char		perm_addr[MAX_ADDR_LEN]; /* permanent hw address */
 	unsigned char		addr_assign_type; /* hw address assignment type */
 	unsigned char		addr_len;	/* hardware address length	*/
+	unsigned char		neigh_priv_len;
 	unsigned short          dev_id;		/* for shared network cards */
 
 	spinlock_t		addr_list_lock;
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 11439a7f6782..aea7cad2ece1 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -535,6 +535,7 @@ static void clip_setup(struct net_device *dev)
 {
 	dev->netdev_ops = &clip_netdev_ops;
 	dev->type = ARPHRD_ATM;
+	dev->neigh_priv_len = sizeof(struct atmarp_entry);
 	dev->hard_header_len = RFC1483LLC_LEN;
 	dev->mtu = RFC1626_MTU;
 	dev->tx_queue_len = 100;	/* "normal" queue (packets) */
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 661ad12e0cc9..ef750ff7497e 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -273,7 +273,7 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
 }
 EXPORT_SYMBOL(neigh_ifdown);
 
-static struct neighbour *neigh_alloc(struct neigh_table *tbl)
+static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
 {
 	struct neighbour *n = NULL;
 	unsigned long now = jiffies;
@@ -288,7 +288,15 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)
 			goto out_entries;
 	}
 
-	n = kzalloc(tbl->entry_size, GFP_ATOMIC);
+	if (tbl->entry_size)
+		n = kzalloc(tbl->entry_size, GFP_ATOMIC);
+	else {
+		int sz = sizeof(*n) + tbl->key_len;
+
+		sz = ALIGN(sz, NEIGH_PRIV_ALIGN);
+		sz += dev->neigh_priv_len;
+		n = kzalloc(sz, GFP_ATOMIC);
+	}
 	if (!n)
 		goto out_entries;
 
@@ -463,7 +471,7 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
 	u32 hash_val;
 	int key_len = tbl->key_len;
 	int error;
-	struct neighbour *n1, *rc, *n = neigh_alloc(tbl);
+	struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
 	struct neigh_hash_table *nht;
 
 	if (!n) {
-- 
cgit v1.2.3


From da6a8fa0275e2178c44a875374cae80d057538d1 Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Mon, 25 Jul 2011 00:01:38 +0000
Subject: neigh: Add device constructor/destructor capability.

If the neigh entry has device private state, it will need
constructor/destructor ops.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 ++
 net/core/neighbour.c      | 15 ++++++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5462c2cd5eab..1c4ddb37f2b5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -974,6 +974,8 @@ struct net_device_ops {
 						    netdev_features_t features);
 	int			(*ndo_set_features)(struct net_device *dev,
 						    netdev_features_t features);
+	int			(*ndo_neigh_construct)(struct neighbour *n);
+	int			(*ndo_neigh_destroy)(struct neighbour *n);
 };
 
 /*
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index ef750ff7497e..cdf8dc34f0ba 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -489,6 +489,14 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
 		goto out_neigh_release;
 	}
 
+	if (dev->netdev_ops->ndo_neigh_construct) {
+		error = dev->netdev_ops->ndo_neigh_construct(n);
+		if (error < 0) {
+			rc = ERR_PTR(error);
+			goto out_neigh_release;
+		}
+	}
+
 	/* Device specific setup. */
 	if (n->parms->neigh_setup &&
 	    (error = n->parms->neigh_setup(n)) < 0) {
@@ -692,6 +700,8 @@ static inline void neigh_parms_put(struct neigh_parms *parms)
  */
 void neigh_destroy(struct neighbour *neigh)
 {
+	struct net_device *dev = neigh->dev;
+
 	NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
 
 	if (!neigh->dead) {
@@ -707,7 +717,10 @@ void neigh_destroy(struct neighbour *neigh)
 	skb_queue_purge(&neigh->arp_queue);
 	neigh->arp_queue_len_bytes = 0;
 
-	dev_put(neigh->dev);
+	if (dev->netdev_ops->ndo_neigh_destroy)
+		dev->netdev_ops->ndo_neigh_destroy(neigh);
+
+	dev_put(dev);
 	neigh_parms_put(neigh->parms);
 
 	NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
-- 
cgit v1.2.3


From 7bc0f28c7a0cd19f40e5a6e4d0a117db9a4e4cd5 Mon Sep 17 00:00:00 2001
From: Hagen Paul Pfeifer <hagen@jauu.net>
Date: Wed, 30 Nov 2011 12:20:26 +0000
Subject: netem: rate extension

Currently netem is not in the ability to emulate channel bandwidth. Only static
delay (and optional random jitter) can be configured.

To emulate the channel rate the token bucket filter (sch_tbf) can be used.  But
TBF has some major emulation flaws. The buffer (token bucket depth/rate) cannot
be 0. Also the idea behind TBF is that the credit (token in buckets) fills if
no packet is transmitted. So that there is always a "positive" credit for new
packets. In real life this behavior contradicts the law of nature where
nothing can travel faster as speed of light. E.g.: on an emulated 1000 byte/s
link a small IPv4/TCP SYN packet with ~50 byte require ~0.05 seconds - not 0
seconds.

Netem is an excellent place to implement a rate limiting feature: static
delay is already implemented, tfifo already has time information and the
user can skip TBF configuration completely.

This patch implement rate feature which can be configured via tc. e.g:

	tc qdisc add dev eth0 root netem rate 10kbit

To emulate a link of 5000byte/s and add an additional static delay of 10ms:

	tc qdisc add dev eth0 root netem delay 10ms rate 5KBps

Note: similar to TBF the rate extension is bounded to the kernel timing
system. Depending on the architecture timer granularity, higher rates (e.g.
10mbit/s and higher) tend to transmission bursts. Also note: further queues
living in network adaptors; see ethtool(8).

Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@drr.davemloft.net>
---
 include/linux/pkt_sched.h |  5 +++++
 net/sched/sch_netem.c     | 40 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 45 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index 7281d5acf2f9..fb556dc594d3 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -465,6 +465,7 @@ enum {
 	TCA_NETEM_REORDER,
 	TCA_NETEM_CORRUPT,
 	TCA_NETEM_LOSS,
+	TCA_NETEM_RATE,
 	__TCA_NETEM_MAX,
 };
 
@@ -495,6 +496,10 @@ struct tc_netem_corrupt {
 	__u32	correlation;
 };
 
+struct tc_netem_rate {
+	__u32	rate;	/* byte/s */
+};
+
 enum {
 	NETEM_LOSS_UNSPEC,
 	NETEM_LOSS_GI,		/* General Intuitive - 4 state model */
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index eb3b9a86c6ed..9b7af9f1272f 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -79,6 +79,7 @@ struct netem_sched_data {
 	u32 duplicate;
 	u32 reorder;
 	u32 corrupt;
+	u32 rate;
 
 	struct crndstate {
 		u32 last;
@@ -298,6 +299,11 @@ static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
 	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
 }
 
+static psched_time_t packet_len_2_sched_time(unsigned int len, u32 rate)
+{
+	return PSCHED_NS2TICKS((u64)len * NSEC_PER_SEC / rate);
+}
+
 /*
  * Insert one skb into qdisc.
  * Note: parent depends on return value to account for queue length.
@@ -371,6 +377,24 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 				  &q->delay_cor, q->delay_dist);
 
 		now = psched_get_time();
+
+		if (q->rate) {
+			struct sk_buff_head *list = &q->qdisc->q;
+
+			delay += packet_len_2_sched_time(skb->len, q->rate);
+
+			if (!skb_queue_empty(list)) {
+				/*
+				 * Last packet in queue is reference point (now).
+				 * First packet in queue is already in flight,
+				 * calculate this time bonus and substract
+				 * from delay.
+				 */
+				delay -= now - netem_skb_cb(skb_peek(list))->time_to_send;
+				now = netem_skb_cb(skb_peek_tail(list))->time_to_send;
+			}
+		}
+
 		cb->time_to_send = now + delay;
 		++q->counter;
 		ret = qdisc_enqueue(skb, q->qdisc);
@@ -535,6 +559,14 @@ static void get_corrupt(struct Qdisc *sch, const struct nlattr *attr)
 	init_crandom(&q->corrupt_cor, r->correlation);
 }
 
+static void get_rate(struct Qdisc *sch, const struct nlattr *attr)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	const struct tc_netem_rate *r = nla_data(attr);
+
+	q->rate = r->rate;
+}
+
 static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
@@ -594,6 +626,7 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = {
 	[TCA_NETEM_CORR]	= { .len = sizeof(struct tc_netem_corr) },
 	[TCA_NETEM_REORDER]	= { .len = sizeof(struct tc_netem_reorder) },
 	[TCA_NETEM_CORRUPT]	= { .len = sizeof(struct tc_netem_corrupt) },
+	[TCA_NETEM_RATE]	= { .len = sizeof(struct tc_netem_rate) },
 	[TCA_NETEM_LOSS]	= { .type = NLA_NESTED },
 };
 
@@ -666,6 +699,9 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt)
 	if (tb[TCA_NETEM_CORRUPT])
 		get_corrupt(sch, tb[TCA_NETEM_CORRUPT]);
 
+	if (tb[TCA_NETEM_RATE])
+		get_rate(sch, tb[TCA_NETEM_RATE]);
+
 	q->loss_model = CLG_RANDOM;
 	if (tb[TCA_NETEM_LOSS])
 		ret = get_loss_clg(sch, tb[TCA_NETEM_LOSS]);
@@ -846,6 +882,7 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 	struct tc_netem_corr cor;
 	struct tc_netem_reorder reorder;
 	struct tc_netem_corrupt corrupt;
+	struct tc_netem_rate rate;
 
 	qopt.latency = q->latency;
 	qopt.jitter = q->jitter;
@@ -868,6 +905,9 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 	corrupt.correlation = q->corrupt_cor.rho;
 	NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
 
+	rate.rate = q->rate;
+	NLA_PUT(skb, TCA_NETEM_RATE, sizeof(rate), &rate);
+
 	if (dump_loss_model(q, skb) != 0)
 		goto nla_put_failure;
 
-- 
cgit v1.2.3


From 2a367c3a82557cd11a04949ef2160658987fa772 Mon Sep 17 00:00:00 2001
From: Wolfgang Grandegger <wg@grandegger.com>
Date: Wed, 30 Nov 2011 23:41:18 +0000
Subject: can: cc770: add driver core for the Bosch CC770 and Intel AN82527

Signed-off-by: Wolfgang Grandegger <wg@grandegger.com>
Acked-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/can/Kconfig            |   2 +
 drivers/net/can/Makefile           |   1 +
 drivers/net/can/cc770/Kconfig      |   3 +
 drivers/net/can/cc770/Makefile     |   7 +
 drivers/net/can/cc770/cc770.c      | 881 +++++++++++++++++++++++++++++++++++++
 drivers/net/can/cc770/cc770.h      | 203 +++++++++
 include/linux/can/platform/cc770.h |  33 ++
 7 files changed, 1130 insertions(+)
 create mode 100644 drivers/net/can/cc770/Kconfig
 create mode 100644 drivers/net/can/cc770/Makefile
 create mode 100644 drivers/net/can/cc770/cc770.c
 create mode 100644 drivers/net/can/cc770/cc770.h
 create mode 100644 include/linux/can/platform/cc770.h

(limited to 'include/linux')

diff --git a/drivers/net/can/Kconfig b/drivers/net/can/Kconfig
index f6c98fb4a517..ab45758c49a4 100644
--- a/drivers/net/can/Kconfig
+++ b/drivers/net/can/Kconfig
@@ -116,6 +116,8 @@ source "drivers/net/can/sja1000/Kconfig"
 
 source "drivers/net/can/c_can/Kconfig"
 
+source "drivers/net/can/cc770/Kconfig"
+
 source "drivers/net/can/usb/Kconfig"
 
 source "drivers/net/can/softing/Kconfig"
diff --git a/drivers/net/can/Makefile b/drivers/net/can/Makefile
index 24ebfe8d758a..938be37b670c 100644
--- a/drivers/net/can/Makefile
+++ b/drivers/net/can/Makefile
@@ -14,6 +14,7 @@ obj-y				+= softing/
 obj-$(CONFIG_CAN_SJA1000)	+= sja1000/
 obj-$(CONFIG_CAN_MSCAN)		+= mscan/
 obj-$(CONFIG_CAN_C_CAN)		+= c_can/
+obj-$(CONFIG_CAN_CC770)		+= cc770/
 obj-$(CONFIG_CAN_AT91)		+= at91_can.o
 obj-$(CONFIG_CAN_TI_HECC)	+= ti_hecc.o
 obj-$(CONFIG_CAN_MCP251X)	+= mcp251x.o
diff --git a/drivers/net/can/cc770/Kconfig b/drivers/net/can/cc770/Kconfig
new file mode 100644
index 000000000000..225131b7ac93
--- /dev/null
+++ b/drivers/net/can/cc770/Kconfig
@@ -0,0 +1,3 @@
+menuconfig CAN_CC770
+	tristate "Bosch CC770 and Intel AN82527 devices"
+	depends on CAN_DEV && HAS_IOMEM
diff --git a/drivers/net/can/cc770/Makefile b/drivers/net/can/cc770/Makefile
new file mode 100644
index 000000000000..34e818026157
--- /dev/null
+++ b/drivers/net/can/cc770/Makefile
@@ -0,0 +1,7 @@
+#
+#  Makefile for the Bosch CC770 CAN controller drivers.
+#
+
+obj-$(CONFIG_CAN_CC770) += cc770.o
+
+ccflags-$(CONFIG_CAN_DEBUG_DEVICES) := -DDEBUG
diff --git a/drivers/net/can/cc770/cc770.c b/drivers/net/can/cc770/cc770.c
new file mode 100644
index 000000000000..766896747643
--- /dev/null
+++ b/drivers/net/can/cc770/cc770.c
@@ -0,0 +1,881 @@
+/*
+ * Core driver for the CC770 and AN82527 CAN controllers
+ *
+ * Copyright (C) 2009, 2011 Wolfgang Grandegger <wg@grandegger.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the version 2 of the GNU General Public License
+ * as published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/interrupt.h>
+#include <linux/ptrace.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <linux/if_arp.h>
+#include <linux/if_ether.h>
+#include <linux/skbuff.h>
+#include <linux/delay.h>
+
+#include <linux/can.h>
+#include <linux/can/dev.h>
+#include <linux/can/error.h>
+#include <linux/can/dev.h>
+#include <linux/can/platform/cc770.h>
+
+#include "cc770.h"
+
+MODULE_AUTHOR("Wolfgang Grandegger <wg@grandegger.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION(KBUILD_MODNAME "CAN netdevice driver");
+
+/*
+ * The CC770 is a CAN controller from Bosch, which is 100% compatible
+ * with the AN82527 from Intel, but with "bugs" being fixed and some
+ * additional functionality, mainly:
+ *
+ * 1. RX and TX error counters are readable.
+ * 2. Support of silent (listen-only) mode.
+ * 3. Message object 15 can receive all types of frames, also RTR and EFF.
+ *
+ * Details are available from Bosch's "CC770_Product_Info_2007-01.pdf",
+ * which explains in detail the compatibility between the CC770 and the
+ * 82527. This driver use the additional functionality 3. on real CC770
+ * devices. Unfortunately, the CC770 does still not store the message
+ * identifier of received remote transmission request frames and
+ * therefore it's set to 0.
+ *
+ * The message objects 1..14 can be used for TX and RX while the message
+ * objects 15 is optimized for RX. It has a shadow register for reliable
+ * data receiption under heavy bus load. Therefore it makes sense to use
+ * this message object for the needed use case. The frame type (EFF/SFF)
+ * for the message object 15 can be defined via kernel module parameter
+ * "msgobj15_eff". If not equal 0, it will receive 29-bit EFF frames,
+ * otherwise 11 bit SFF messages.
+ */
+static int msgobj15_eff;
+module_param(msgobj15_eff, int, S_IRUGO);
+MODULE_PARM_DESC(msgobj15_eff, "Extended 29-bit frames for message object 15 "
+		 "(default: 11-bit standard frames)");
+
+static int i82527_compat;
+module_param(i82527_compat, int, S_IRUGO);
+MODULE_PARM_DESC(i82527_compat, "Strict Intel 82527 comptibility mode "
+		 "without using additional functions");
+
+/*
+ * This driver uses the last 5 message objects 11..15. The definitions
+ * and structure below allows to configure and assign them to the real
+ * message object.
+ */
+static unsigned char cc770_obj_flags[CC770_OBJ_MAX] = {
+	[CC770_OBJ_RX0] = CC770_OBJ_FLAG_RX,
+	[CC770_OBJ_RX1] = CC770_OBJ_FLAG_RX | CC770_OBJ_FLAG_EFF,
+	[CC770_OBJ_RX_RTR0] = CC770_OBJ_FLAG_RX | CC770_OBJ_FLAG_RTR,
+	[CC770_OBJ_RX_RTR1] = CC770_OBJ_FLAG_RX | CC770_OBJ_FLAG_RTR |
+			      CC770_OBJ_FLAG_EFF,
+	[CC770_OBJ_TX] = 0,
+};
+
+static struct can_bittiming_const cc770_bittiming_const = {
+	.name = KBUILD_MODNAME,
+	.tseg1_min = 1,
+	.tseg1_max = 16,
+	.tseg2_min = 1,
+	.tseg2_max = 8,
+	.sjw_max = 4,
+	.brp_min = 1,
+	.brp_max = 64,
+	.brp_inc = 1,
+};
+
+static inline int intid2obj(unsigned int intid)
+{
+	if (intid == 2)
+		return 0;
+	else
+		return MSGOBJ_LAST + 2 - intid;
+}
+
+static void enable_all_objs(const struct net_device *dev)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+	u8 msgcfg;
+	unsigned char obj_flags;
+	unsigned int o, mo;
+
+	for (o = 0; o < ARRAY_SIZE(priv->obj_flags); o++) {
+		obj_flags = priv->obj_flags[o];
+		mo = obj2msgobj(o);
+
+		if (obj_flags & CC770_OBJ_FLAG_RX) {
+			/*
+			 * We don't need extra objects for RTR and EFF if
+			 * the additional CC770 functions are enabled.
+			 */
+			if (priv->control_normal_mode & CTRL_EAF) {
+				if (o > 0)
+					continue;
+				netdev_dbg(dev, "Message object %d for "
+					   "RX data, RTR, SFF and EFF\n", mo);
+			} else {
+				netdev_dbg(dev,
+					   "Message object %d for RX %s %s\n",
+					   mo, obj_flags & CC770_OBJ_FLAG_RTR ?
+					   "RTR" : "data",
+					   obj_flags & CC770_OBJ_FLAG_EFF ?
+					   "EFF" : "SFF");
+			}
+
+			if (obj_flags & CC770_OBJ_FLAG_EFF)
+				msgcfg = MSGCFG_XTD;
+			else
+				msgcfg = 0;
+			if (obj_flags & CC770_OBJ_FLAG_RTR)
+				msgcfg |= MSGCFG_DIR;
+
+			cc770_write_reg(priv, msgobj[mo].config, msgcfg);
+			cc770_write_reg(priv, msgobj[mo].ctrl0,
+					MSGVAL_SET | TXIE_RES |
+					RXIE_SET | INTPND_RES);
+
+			if (obj_flags & CC770_OBJ_FLAG_RTR)
+				cc770_write_reg(priv, msgobj[mo].ctrl1,
+						NEWDAT_RES | CPUUPD_SET |
+						TXRQST_RES | RMTPND_RES);
+			else
+				cc770_write_reg(priv, msgobj[mo].ctrl1,
+						NEWDAT_RES | MSGLST_RES |
+						TXRQST_RES | RMTPND_RES);
+		} else {
+			netdev_dbg(dev, "Message object %d for "
+				   "TX data, RTR, SFF and EFF\n", mo);
+
+			cc770_write_reg(priv, msgobj[mo].ctrl1,
+					RMTPND_RES | TXRQST_RES |
+					CPUUPD_RES | NEWDAT_RES);
+			cc770_write_reg(priv, msgobj[mo].ctrl0,
+					MSGVAL_RES | TXIE_RES |
+					RXIE_RES | INTPND_RES);
+		}
+	}
+}
+
+static void disable_all_objs(const struct cc770_priv *priv)
+{
+	int o, mo;
+
+	for (o = 0; o <  ARRAY_SIZE(priv->obj_flags); o++) {
+		mo = obj2msgobj(o);
+
+		if (priv->obj_flags[o] & CC770_OBJ_FLAG_RX) {
+			if (o > 0 && priv->control_normal_mode & CTRL_EAF)
+				continue;
+
+			cc770_write_reg(priv, msgobj[mo].ctrl1,
+					NEWDAT_RES | MSGLST_RES |
+					TXRQST_RES | RMTPND_RES);
+			cc770_write_reg(priv, msgobj[mo].ctrl0,
+					MSGVAL_RES | TXIE_RES |
+					RXIE_RES | INTPND_RES);
+		} else {
+			/* Clear message object for send */
+			cc770_write_reg(priv, msgobj[mo].ctrl1,
+					RMTPND_RES | TXRQST_RES |
+					CPUUPD_RES | NEWDAT_RES);
+			cc770_write_reg(priv, msgobj[mo].ctrl0,
+					MSGVAL_RES | TXIE_RES |
+					RXIE_RES | INTPND_RES);
+		}
+	}
+}
+
+static void set_reset_mode(struct net_device *dev)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+
+	/* Enable configuration and puts chip in bus-off, disable interrupts */
+	cc770_write_reg(priv, control, CTRL_CCE | CTRL_INI);
+
+	priv->can.state = CAN_STATE_STOPPED;
+
+	/* Clear interrupts */
+	cc770_read_reg(priv, interrupt);
+
+	/* Clear status register */
+	cc770_write_reg(priv, status, 0);
+
+	/* Disable all used message objects */
+	disable_all_objs(priv);
+}
+
+static void set_normal_mode(struct net_device *dev)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+
+	/* Clear interrupts */
+	cc770_read_reg(priv, interrupt);
+
+	/* Clear status register and pre-set last error code */
+	cc770_write_reg(priv, status, STAT_LEC_MASK);
+
+	/* Enable all used message objects*/
+	enable_all_objs(dev);
+
+	/*
+	 * Clear bus-off, interrupts only for errors,
+	 * not for status change
+	 */
+	cc770_write_reg(priv, control, priv->control_normal_mode);
+
+	priv->can.state = CAN_STATE_ERROR_ACTIVE;
+}
+
+static void chipset_init(struct cc770_priv *priv)
+{
+	int mo, id, data;
+
+	/* Enable configuration and put chip in bus-off, disable interrupts */
+	cc770_write_reg(priv, control, (CTRL_CCE | CTRL_INI));
+
+	/* Set CLKOUT divider and slew rates */
+	cc770_write_reg(priv, clkout, priv->clkout);
+
+	/* Configure CPU interface / CLKOUT enable */
+	cc770_write_reg(priv, cpu_interface, priv->cpu_interface);
+
+	/* Set bus configuration  */
+	cc770_write_reg(priv, bus_config, priv->bus_config);
+
+	/* Clear interrupts */
+	cc770_read_reg(priv, interrupt);
+
+	/* Clear status register */
+	cc770_write_reg(priv, status, 0);
+
+	/* Clear and invalidate message objects */
+	for (mo = MSGOBJ_FIRST; mo <= MSGOBJ_LAST; mo++) {
+		cc770_write_reg(priv, msgobj[mo].ctrl0,
+				INTPND_UNC | RXIE_RES |
+				TXIE_RES | MSGVAL_RES);
+		cc770_write_reg(priv, msgobj[mo].ctrl0,
+				INTPND_RES | RXIE_RES |
+				TXIE_RES | MSGVAL_RES);
+		cc770_write_reg(priv, msgobj[mo].ctrl1,
+				NEWDAT_RES | MSGLST_RES |
+				TXRQST_RES | RMTPND_RES);
+		for (data = 0; data < 8; data++)
+			cc770_write_reg(priv, msgobj[mo].data[data], 0);
+		for (id = 0; id < 4; id++)
+			cc770_write_reg(priv, msgobj[mo].id[id], 0);
+		cc770_write_reg(priv, msgobj[mo].config, 0);
+	}
+
+	/* Set all global ID masks to "don't care" */
+	cc770_write_reg(priv, global_mask_std[0], 0);
+	cc770_write_reg(priv, global_mask_std[1], 0);
+	cc770_write_reg(priv, global_mask_ext[0], 0);
+	cc770_write_reg(priv, global_mask_ext[1], 0);
+	cc770_write_reg(priv, global_mask_ext[2], 0);
+	cc770_write_reg(priv, global_mask_ext[3], 0);
+
+}
+
+static int cc770_probe_chip(struct net_device *dev)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+
+	/* Enable configuration, put chip in bus-off, disable ints */
+	cc770_write_reg(priv, control, CTRL_CCE | CTRL_EAF | CTRL_INI);
+	/* Configure cpu interface / CLKOUT disable */
+	cc770_write_reg(priv, cpu_interface, priv->cpu_interface);
+
+	/*
+	 * Check if hardware reset is still inactive or maybe there
+	 * is no chip in this address space
+	 */
+	if (cc770_read_reg(priv, cpu_interface) & CPUIF_RST) {
+		netdev_info(dev, "probing @0x%p failed (reset)\n",
+			    priv->reg_base);
+		return -ENODEV;
+	}
+
+	/* Write and read back test pattern (some arbitrary values) */
+	cc770_write_reg(priv, msgobj[1].data[1], 0x25);
+	cc770_write_reg(priv, msgobj[2].data[3], 0x52);
+	cc770_write_reg(priv, msgobj[10].data[6], 0xc3);
+	if ((cc770_read_reg(priv, msgobj[1].data[1]) != 0x25) ||
+	    (cc770_read_reg(priv, msgobj[2].data[3]) != 0x52) ||
+	    (cc770_read_reg(priv, msgobj[10].data[6]) != 0xc3)) {
+		netdev_info(dev, "probing @0x%p failed (pattern)\n",
+			    priv->reg_base);
+		return -ENODEV;
+	}
+
+	/* Check if this chip is a CC770 supporting additional functions */
+	if (cc770_read_reg(priv, control) & CTRL_EAF)
+		priv->control_normal_mode |= CTRL_EAF;
+
+	return 0;
+}
+
+static void cc770_start(struct net_device *dev)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+
+	/* leave reset mode */
+	if (priv->can.state != CAN_STATE_STOPPED)
+		set_reset_mode(dev);
+
+	/* leave reset mode */
+	set_normal_mode(dev);
+}
+
+static int cc770_set_mode(struct net_device *dev, enum can_mode mode)
+{
+	switch (mode) {
+	case CAN_MODE_START:
+		cc770_start(dev);
+		netif_wake_queue(dev);
+		break;
+
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int cc770_set_bittiming(struct net_device *dev)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+	struct can_bittiming *bt = &priv->can.bittiming;
+	u8 btr0, btr1;
+
+	btr0 = ((bt->brp - 1) & 0x3f) | (((bt->sjw - 1) & 0x3) << 6);
+	btr1 = ((bt->prop_seg + bt->phase_seg1 - 1) & 0xf) |
+		(((bt->phase_seg2 - 1) & 0x7) << 4);
+	if (priv->can.ctrlmode & CAN_CTRLMODE_3_SAMPLES)
+		btr1 |= 0x80;
+
+	netdev_info(dev, "setting BTR0=0x%02x BTR1=0x%02x\n", btr0, btr1);
+
+	cc770_write_reg(priv, bit_timing_0, btr0);
+	cc770_write_reg(priv, bit_timing_1, btr1);
+
+	return 0;
+}
+
+static int cc770_get_berr_counter(const struct net_device *dev,
+				  struct can_berr_counter *bec)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+
+	bec->txerr = cc770_read_reg(priv, tx_error_counter);
+	bec->rxerr = cc770_read_reg(priv, rx_error_counter);
+
+	return 0;
+}
+
+static netdev_tx_t cc770_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+	struct net_device_stats *stats = &dev->stats;
+	struct can_frame *cf = (struct can_frame *)skb->data;
+	unsigned int mo = obj2msgobj(CC770_OBJ_TX);
+	u8 dlc, rtr;
+	u32 id;
+	int i;
+
+	if (can_dropped_invalid_skb(dev, skb))
+		return NETDEV_TX_OK;
+
+	if ((cc770_read_reg(priv,
+			    msgobj[mo].ctrl1) & TXRQST_UNC) == TXRQST_SET) {
+		netdev_err(dev, "TX register is still occupied!\n");
+		return NETDEV_TX_BUSY;
+	}
+
+	netif_stop_queue(dev);
+
+	dlc = cf->can_dlc;
+	id = cf->can_id;
+	if (cf->can_id & CAN_RTR_FLAG)
+		rtr = 0;
+	else
+		rtr = MSGCFG_DIR;
+	cc770_write_reg(priv, msgobj[mo].ctrl1,
+			RMTPND_RES | TXRQST_RES | CPUUPD_SET | NEWDAT_RES);
+	cc770_write_reg(priv, msgobj[mo].ctrl0,
+			MSGVAL_SET | TXIE_SET | RXIE_RES | INTPND_RES);
+	if (id & CAN_EFF_FLAG) {
+		id &= CAN_EFF_MASK;
+		cc770_write_reg(priv, msgobj[mo].config,
+				(dlc << 4) | rtr | MSGCFG_XTD);
+		cc770_write_reg(priv, msgobj[mo].id[3], id << 3);
+		cc770_write_reg(priv, msgobj[mo].id[2], id >> 5);
+		cc770_write_reg(priv, msgobj[mo].id[1], id >> 13);
+		cc770_write_reg(priv, msgobj[mo].id[0], id >> 21);
+	} else {
+		id &= CAN_SFF_MASK;
+		cc770_write_reg(priv, msgobj[mo].config, (dlc << 4) | rtr);
+		cc770_write_reg(priv, msgobj[mo].id[0], id >> 3);
+		cc770_write_reg(priv, msgobj[mo].id[1], id << 5);
+	}
+
+	for (i = 0; i < dlc; i++)
+		cc770_write_reg(priv, msgobj[mo].data[i], cf->data[i]);
+
+	cc770_write_reg(priv, msgobj[mo].ctrl1,
+			RMTPND_RES | TXRQST_SET | CPUUPD_RES | NEWDAT_UNC);
+
+	stats->tx_bytes += dlc;
+
+	can_put_echo_skb(skb, dev, 0);
+
+	/*
+	 * HM: We had some cases of repeated IRQs so make sure the
+	 * INT is acknowledged I know it's already further up, but
+	 * doing again fixed the issue
+	 */
+	cc770_write_reg(priv, msgobj[mo].ctrl0,
+			MSGVAL_UNC | TXIE_UNC | RXIE_UNC | INTPND_RES);
+
+	return NETDEV_TX_OK;
+}
+
+static void cc770_rx(struct net_device *dev, unsigned int mo, u8 ctrl1)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+	struct net_device_stats *stats = &dev->stats;
+	struct can_frame *cf;
+	struct sk_buff *skb;
+	u8 config;
+	u32 id;
+	int i;
+
+	skb = alloc_can_skb(dev, &cf);
+	if (!skb)
+		return;
+
+	config = cc770_read_reg(priv, msgobj[mo].config);
+
+	if (ctrl1 & RMTPND_SET) {
+		/*
+		 * Unfortunately, the chip does not store the real message
+		 * identifier of the received remote transmission request
+		 * frame. Therefore we set it to 0.
+		 */
+		cf->can_id = CAN_RTR_FLAG;
+		if (config & MSGCFG_XTD)
+			cf->can_id |= CAN_EFF_FLAG;
+		cf->can_dlc = 0;
+	} else {
+		if (config & MSGCFG_XTD) {
+			id = cc770_read_reg(priv, msgobj[mo].id[3]);
+			id |= cc770_read_reg(priv, msgobj[mo].id[2]) << 8;
+			id |= cc770_read_reg(priv, msgobj[mo].id[1]) << 16;
+			id |= cc770_read_reg(priv, msgobj[mo].id[0]) << 24;
+			id >>= 3;
+			id |= CAN_EFF_FLAG;
+		} else {
+			id = cc770_read_reg(priv, msgobj[mo].id[1]);
+			id |= cc770_read_reg(priv, msgobj[mo].id[0]) << 8;
+			id >>= 5;
+		}
+
+		cf->can_id = id;
+		cf->can_dlc = get_can_dlc((config & 0xf0) >> 4);
+		for (i = 0; i < cf->can_dlc; i++)
+			cf->data[i] = cc770_read_reg(priv, msgobj[mo].data[i]);
+	}
+	netif_rx(skb);
+
+	stats->rx_packets++;
+	stats->rx_bytes += cf->can_dlc;
+}
+
+static int cc770_err(struct net_device *dev, u8 status)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+	struct net_device_stats *stats = &dev->stats;
+	struct can_frame *cf;
+	struct sk_buff *skb;
+	u8 lec;
+
+	netdev_dbg(dev, "status interrupt (%#x)\n", status);
+
+	skb = alloc_can_err_skb(dev, &cf);
+	if (!skb)
+		return -ENOMEM;
+
+	/* Use extended functions of the CC770 */
+	if (priv->control_normal_mode & CTRL_EAF) {
+		cf->data[6] = cc770_read_reg(priv, tx_error_counter);
+		cf->data[7] = cc770_read_reg(priv, rx_error_counter);
+	}
+
+	if (status & STAT_BOFF) {
+		/* Disable interrupts */
+		cc770_write_reg(priv, control, CTRL_INI);
+		cf->can_id |= CAN_ERR_BUSOFF;
+		priv->can.state = CAN_STATE_BUS_OFF;
+		can_bus_off(dev);
+	} else if (status & STAT_WARN) {
+		cf->can_id |= CAN_ERR_CRTL;
+		/* Only the CC770 does show error passive */
+		if (cf->data[7] > 127) {
+			cf->data[1] = CAN_ERR_CRTL_RX_PASSIVE |
+				CAN_ERR_CRTL_TX_PASSIVE;
+			priv->can.state = CAN_STATE_ERROR_PASSIVE;
+			priv->can.can_stats.error_passive++;
+		} else {
+			cf->data[1] = CAN_ERR_CRTL_RX_WARNING |
+				CAN_ERR_CRTL_TX_WARNING;
+			priv->can.state = CAN_STATE_ERROR_WARNING;
+			priv->can.can_stats.error_warning++;
+		}
+	} else {
+		/* Back to error avtive */
+		cf->can_id |= CAN_ERR_PROT;
+		cf->data[2] = CAN_ERR_PROT_ACTIVE;
+		priv->can.state = CAN_STATE_ERROR_ACTIVE;
+	}
+
+	lec = status & STAT_LEC_MASK;
+	if (lec < 7 && lec > 0) {
+		if (lec == STAT_LEC_ACK) {
+			cf->can_id |= CAN_ERR_ACK;
+		} else {
+			cf->can_id |= CAN_ERR_PROT;
+			switch (lec) {
+			case STAT_LEC_STUFF:
+				cf->data[2] |= CAN_ERR_PROT_STUFF;
+				break;
+			case STAT_LEC_FORM:
+				cf->data[2] |= CAN_ERR_PROT_FORM;
+				break;
+			case STAT_LEC_BIT1:
+				cf->data[2] |= CAN_ERR_PROT_BIT1;
+				break;
+			case STAT_LEC_BIT0:
+				cf->data[2] |= CAN_ERR_PROT_BIT0;
+				break;
+			case STAT_LEC_CRC:
+				cf->data[3] |= CAN_ERR_PROT_LOC_CRC_SEQ;
+				break;
+			}
+		}
+	}
+
+	netif_rx(skb);
+
+	stats->rx_packets++;
+	stats->rx_bytes += cf->can_dlc;
+
+	return 0;
+}
+
+static int cc770_status_interrupt(struct net_device *dev)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+	u8 status;
+
+	status = cc770_read_reg(priv, status);
+	/* Reset the status register including RXOK and TXOK */
+	cc770_write_reg(priv, status, STAT_LEC_MASK);
+
+	if (status & (STAT_WARN | STAT_BOFF) ||
+	    (status & STAT_LEC_MASK) != STAT_LEC_MASK) {
+		cc770_err(dev, status);
+		return status & STAT_BOFF;
+	}
+
+	return 0;
+}
+
+static void cc770_rx_interrupt(struct net_device *dev, unsigned int o)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+	struct net_device_stats *stats = &dev->stats;
+	unsigned int mo = obj2msgobj(o);
+	u8 ctrl1;
+	int n = CC770_MAX_MSG;
+
+	while (n--) {
+		ctrl1 = cc770_read_reg(priv, msgobj[mo].ctrl1);
+
+		if (!(ctrl1 & NEWDAT_SET))  {
+			/* Check for RTR if additional functions are enabled */
+			if (priv->control_normal_mode & CTRL_EAF) {
+				if (!(cc770_read_reg(priv, msgobj[mo].ctrl0) &
+				      INTPND_SET))
+					break;
+			} else {
+				break;
+			}
+		}
+
+		if (ctrl1 & MSGLST_SET) {
+			stats->rx_over_errors++;
+			stats->rx_errors++;
+		}
+		if (mo < MSGOBJ_LAST)
+			cc770_write_reg(priv, msgobj[mo].ctrl1,
+					NEWDAT_RES | MSGLST_RES |
+					TXRQST_UNC | RMTPND_UNC);
+		cc770_rx(dev, mo, ctrl1);
+
+		cc770_write_reg(priv, msgobj[mo].ctrl0,
+				MSGVAL_SET | TXIE_RES |
+				RXIE_SET | INTPND_RES);
+		cc770_write_reg(priv, msgobj[mo].ctrl1,
+				NEWDAT_RES | MSGLST_RES |
+				TXRQST_RES | RMTPND_RES);
+	}
+}
+
+static void cc770_rtr_interrupt(struct net_device *dev, unsigned int o)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+	unsigned int mo = obj2msgobj(o);
+	u8 ctrl0, ctrl1;
+	int n = CC770_MAX_MSG;
+
+	while (n--) {
+		ctrl0 = cc770_read_reg(priv, msgobj[mo].ctrl0);
+		if (!(ctrl0 & INTPND_SET))
+			break;
+
+		ctrl1 = cc770_read_reg(priv, msgobj[mo].ctrl1);
+		cc770_rx(dev, mo, ctrl1);
+
+		cc770_write_reg(priv, msgobj[mo].ctrl0,
+				MSGVAL_SET | TXIE_RES |
+				RXIE_SET | INTPND_RES);
+		cc770_write_reg(priv, msgobj[mo].ctrl1,
+				NEWDAT_RES | CPUUPD_SET |
+				TXRQST_RES | RMTPND_RES);
+	}
+}
+
+static void cc770_tx_interrupt(struct net_device *dev, unsigned int o)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+	struct net_device_stats *stats = &dev->stats;
+	unsigned int mo = obj2msgobj(o);
+
+	/* Nothing more to send, switch off interrupts */
+	cc770_write_reg(priv, msgobj[mo].ctrl0,
+			MSGVAL_RES | TXIE_RES | RXIE_RES | INTPND_RES);
+	/*
+	 * We had some cases of repeated IRQ so make sure the
+	 * INT is acknowledged
+	 */
+	cc770_write_reg(priv, msgobj[mo].ctrl0,
+			MSGVAL_UNC | TXIE_UNC | RXIE_UNC | INTPND_RES);
+
+	stats->tx_packets++;
+	can_get_echo_skb(dev, 0);
+	netif_wake_queue(dev);
+}
+
+irqreturn_t cc770_interrupt(int irq, void *dev_id)
+{
+	struct net_device *dev = (struct net_device *)dev_id;
+	struct cc770_priv *priv = netdev_priv(dev);
+	u8 intid;
+	int o, n = 0;
+
+	/* Shared interrupts and IRQ off? */
+	if (priv->can.state == CAN_STATE_STOPPED)
+		return IRQ_NONE;
+
+	if (priv->pre_irq)
+		priv->pre_irq(priv);
+
+	while (n < CC770_MAX_IRQ) {
+		/* Read the highest pending interrupt request */
+		intid = cc770_read_reg(priv, interrupt);
+		if (!intid)
+			break;
+		n++;
+
+		if (intid == 1) {
+			/* Exit in case of bus-off */
+			if (cc770_status_interrupt(dev))
+				break;
+		} else {
+			o = intid2obj(intid);
+
+			if (o >= CC770_OBJ_MAX) {
+				netdev_err(dev, "Unexpected interrupt id %d\n",
+					   intid);
+				continue;
+			}
+
+			if (priv->obj_flags[o] & CC770_OBJ_FLAG_RTR)
+				cc770_rtr_interrupt(dev, o);
+			else if (priv->obj_flags[o] & CC770_OBJ_FLAG_RX)
+				cc770_rx_interrupt(dev, o);
+			else
+				cc770_tx_interrupt(dev, o);
+		}
+	}
+
+	if (priv->post_irq)
+		priv->post_irq(priv);
+
+	if (n >= CC770_MAX_IRQ)
+		netdev_dbg(dev, "%d messages handled in ISR", n);
+
+	return (n) ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static int cc770_open(struct net_device *dev)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+	int err;
+
+	/* set chip into reset mode */
+	set_reset_mode(dev);
+
+	/* common open */
+	err = open_candev(dev);
+	if (err)
+		return err;
+
+	err = request_irq(dev->irq, &cc770_interrupt, priv->irq_flags,
+			  dev->name, dev);
+	if (err) {
+		close_candev(dev);
+		return -EAGAIN;
+	}
+
+	/* init and start chip */
+	cc770_start(dev);
+
+	netif_start_queue(dev);
+
+	return 0;
+}
+
+static int cc770_close(struct net_device *dev)
+{
+	netif_stop_queue(dev);
+	set_reset_mode(dev);
+
+	free_irq(dev->irq, dev);
+	close_candev(dev);
+
+	return 0;
+}
+
+struct net_device *alloc_cc770dev(int sizeof_priv)
+{
+	struct net_device *dev;
+	struct cc770_priv *priv;
+
+	dev = alloc_candev(sizeof(struct cc770_priv) + sizeof_priv,
+			   CC770_ECHO_SKB_MAX);
+	if (!dev)
+		return NULL;
+
+	priv = netdev_priv(dev);
+
+	priv->dev = dev;
+	priv->can.bittiming_const = &cc770_bittiming_const;
+	priv->can.do_set_bittiming = cc770_set_bittiming;
+	priv->can.do_set_mode = cc770_set_mode;
+	priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES;
+
+	memcpy(priv->obj_flags, cc770_obj_flags, sizeof(cc770_obj_flags));
+
+	if (sizeof_priv)
+		priv->priv = (void *)priv + sizeof(struct cc770_priv);
+
+	return dev;
+}
+EXPORT_SYMBOL_GPL(alloc_cc770dev);
+
+void free_cc770dev(struct net_device *dev)
+{
+	free_candev(dev);
+}
+EXPORT_SYMBOL_GPL(free_cc770dev);
+
+static const struct net_device_ops cc770_netdev_ops = {
+	.ndo_open = cc770_open,
+	.ndo_stop = cc770_close,
+	.ndo_start_xmit = cc770_start_xmit,
+};
+
+int register_cc770dev(struct net_device *dev)
+{
+	struct cc770_priv *priv = netdev_priv(dev);
+	int err;
+
+	err = cc770_probe_chip(dev);
+	if (err)
+		return err;
+
+	dev->netdev_ops = &cc770_netdev_ops;
+
+	dev->flags |= IFF_ECHO;	/* we support local echo */
+
+	/* Should we use additional functions? */
+	if (!i82527_compat && priv->control_normal_mode & CTRL_EAF) {
+		priv->can.do_get_berr_counter = cc770_get_berr_counter;
+		priv->control_normal_mode = CTRL_IE | CTRL_EAF | CTRL_EIE;
+		netdev_dbg(dev, "i82527 mode with additional functions\n");
+	} else {
+		priv->control_normal_mode = CTRL_IE | CTRL_EIE;
+		netdev_dbg(dev, "strict i82527 compatibility mode\n");
+	}
+
+	chipset_init(priv);
+	set_reset_mode(dev);
+
+	return register_candev(dev);
+}
+EXPORT_SYMBOL_GPL(register_cc770dev);
+
+void unregister_cc770dev(struct net_device *dev)
+{
+	set_reset_mode(dev);
+	unregister_candev(dev);
+}
+EXPORT_SYMBOL_GPL(unregister_cc770dev);
+
+static __init int cc770_init(void)
+{
+	if (msgobj15_eff) {
+		cc770_obj_flags[CC770_OBJ_RX0] |= CC770_OBJ_FLAG_EFF;
+		cc770_obj_flags[CC770_OBJ_RX1] &= ~CC770_OBJ_FLAG_EFF;
+	}
+
+	pr_info("CAN netdevice driver\n");
+
+	return 0;
+}
+module_init(cc770_init);
+
+static __exit void cc770_exit(void)
+{
+	pr_info("driver removed\n");
+}
+module_exit(cc770_exit);
diff --git a/drivers/net/can/cc770/cc770.h b/drivers/net/can/cc770/cc770.h
new file mode 100644
index 000000000000..a1739db98d91
--- /dev/null
+++ b/drivers/net/can/cc770/cc770.h
@@ -0,0 +1,203 @@
+/*
+ * Core driver for the CC770 and AN82527 CAN controllers
+ *
+ * Copyright (C) 2009, 2011 Wolfgang Grandegger <wg@grandegger.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the version 2 of the GNU General Public License
+ * as published by the Free Software Foundation
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef CC770_DEV_H
+#define CC770_DEV_H
+
+#include <linux/can/dev.h>
+
+struct cc770_msgobj {
+	u8 ctrl0;
+	u8 ctrl1;
+	u8 id[4];
+	u8 config;
+	u8 data[8];
+	u8 dontuse;		/* padding */
+} __packed;
+
+struct cc770_regs {
+	union {
+		struct cc770_msgobj msgobj[16]; /* Message object 1..15 */
+		struct {
+			u8 control;		/* Control Register */
+			u8 status;		/* Status Register */
+			u8 cpu_interface;	/* CPU Interface Register */
+			u8 dontuse1;
+			u8 high_speed_read[2];	/* High Speed Read */
+			u8 global_mask_std[2];	/* Standard Global Mask */
+			u8 global_mask_ext[4];	/* Extended Global Mask */
+			u8 msg15_mask[4];	/* Message 15 Mask */
+			u8 dontuse2[15];
+			u8 clkout;		/* Clock Out Register */
+			u8 dontuse3[15];
+			u8 bus_config;		/* Bus Configuration Register */
+			u8 dontuse4[15];
+			u8 bit_timing_0;	/* Bit Timing Register byte 0 */
+			u8 dontuse5[15];
+			u8 bit_timing_1;	/* Bit Timing Register byte 1 */
+			u8 dontuse6[15];
+			u8 interrupt;		/* Interrupt Register */
+			u8 dontuse7[15];
+			u8 rx_error_counter;	/* Receive Error Counter */
+			u8 dontuse8[15];
+			u8 tx_error_counter;	/* Transmit Error Counter */
+			u8 dontuse9[31];
+			u8 p1_conf;
+			u8 dontuse10[15];
+			u8 p2_conf;
+			u8 dontuse11[15];
+			u8 p1_in;
+			u8 dontuse12[15];
+			u8 p2_in;
+			u8 dontuse13[15];
+			u8 p1_out;
+			u8 dontuse14[15];
+			u8 p2_out;
+			u8 dontuse15[15];
+			u8 serial_reset_addr;
+		};
+	};
+} __packed;
+
+/* Control Register (0x00) */
+#define CTRL_INI	0x01	/* Initialization */
+#define CTRL_IE		0x02	/* Interrupt Enable */
+#define CTRL_SIE	0x04	/* Status Interrupt Enable */
+#define CTRL_EIE	0x08	/* Error Interrupt Enable */
+#define CTRL_EAF	0x20	/* Enable additional functions */
+#define CTRL_CCE	0x40	/* Change Configuration Enable */
+
+/* Status Register (0x01) */
+#define STAT_LEC_STUFF	0x01	/* Stuff error */
+#define STAT_LEC_FORM	0x02	/* Form error */
+#define STAT_LEC_ACK	0x03	/* Acknowledgement error */
+#define STAT_LEC_BIT1	0x04	/* Bit1 error */
+#define STAT_LEC_BIT0	0x05	/* Bit0 error */
+#define STAT_LEC_CRC	0x06	/* CRC error */
+#define STAT_LEC_MASK	0x07	/* Last Error Code mask */
+#define STAT_TXOK	0x08	/* Transmit Message Successfully */
+#define STAT_RXOK	0x10	/* Receive Message Successfully */
+#define STAT_WAKE	0x20	/* Wake Up Status */
+#define STAT_WARN	0x40	/* Warning Status */
+#define STAT_BOFF	0x80	/* Bus Off Status */
+
+/*
+ * CPU Interface Register (0x02)
+ * Clock Out Register (0x1f)
+ * Bus Configuration Register (0x2f)
+ *
+ * see include/linux/can/platform/cc770.h
+ */
+
+/* Message Control Register 0 (Base Address + 0x0) */
+#define INTPND_RES	0x01	/* No Interrupt pending */
+#define INTPND_SET	0x02	/* Interrupt pending */
+#define INTPND_UNC	0x03
+#define RXIE_RES	0x04	/* Receive Interrupt Disable */
+#define RXIE_SET	0x08	/* Receive Interrupt Enable */
+#define RXIE_UNC	0x0c
+#define TXIE_RES	0x10	/* Transmit Interrupt Disable */
+#define TXIE_SET	0x20	/* Transmit Interrupt Enable */
+#define TXIE_UNC	0x30
+#define MSGVAL_RES	0x40	/* Message Invalid */
+#define MSGVAL_SET	0x80	/* Message Valid */
+#define MSGVAL_UNC	0xc0
+
+/* Message Control Register 1 (Base Address + 0x01) */
+#define NEWDAT_RES	0x01	/* No New Data */
+#define NEWDAT_SET	0x02	/* New Data */
+#define NEWDAT_UNC	0x03
+#define MSGLST_RES	0x04	/* No Message Lost */
+#define MSGLST_SET	0x08	/* Message Lost */
+#define MSGLST_UNC	0x0c
+#define CPUUPD_RES	0x04	/* No CPU Updating */
+#define CPUUPD_SET	0x08	/* CPU Updating */
+#define CPUUPD_UNC	0x0c
+#define TXRQST_RES	0x10	/* No Transmission Request */
+#define TXRQST_SET	0x20	/* Transmission Request */
+#define TXRQST_UNC	0x30
+#define RMTPND_RES	0x40	/* No Remote Request Pending */
+#define RMTPND_SET	0x80	/* Remote Request Pending */
+#define RMTPND_UNC	0xc0
+
+/* Message Configuration Register (Base Address + 0x06) */
+#define MSGCFG_XTD	0x04	/* Extended Identifier */
+#define MSGCFG_DIR	0x08	/* Direction is Transmit */
+
+#define MSGOBJ_FIRST	1
+#define MSGOBJ_LAST	15
+
+#define CC770_IO_SIZE	0x100
+#define CC770_MAX_IRQ	20	/* max. number of interrupts handled in ISR */
+#define CC770_MAX_MSG	4	/* max. number of messages handled in ISR */
+
+#define CC770_ECHO_SKB_MAX	1
+
+#define cc770_read_reg(priv, member)					\
+	priv->read_reg(priv, offsetof(struct cc770_regs, member))
+
+#define cc770_write_reg(priv, member, value)				\
+	priv->write_reg(priv, offsetof(struct cc770_regs, member), value)
+
+/*
+ * Message objects and flags used by this driver
+ */
+#define CC770_OBJ_FLAG_RX	0x01
+#define CC770_OBJ_FLAG_RTR	0x02
+#define CC770_OBJ_FLAG_EFF	0x04
+
+enum {
+	CC770_OBJ_RX0 = 0,	/* for receiving normal messages */
+	CC770_OBJ_RX1,		/* for receiving normal messages */
+	CC770_OBJ_RX_RTR0,	/* for receiving remote transmission requests */
+	CC770_OBJ_RX_RTR1,	/* for receiving remote transmission requests */
+	CC770_OBJ_TX,		/* for sending messages */
+	CC770_OBJ_MAX
+};
+
+#define obj2msgobj(o)	(MSGOBJ_LAST - (o)) /* message object 11..15 */
+
+/*
+ * CC770 private data structure
+ */
+struct cc770_priv {
+	struct can_priv can;	/* must be the first member */
+	struct sk_buff *echo_skb;
+
+	/* the lower-layer is responsible for appropriate locking */
+	u8 (*read_reg)(const struct cc770_priv *priv, int reg);
+	void (*write_reg)(const struct cc770_priv *priv, int reg, u8 val);
+	void (*pre_irq)(const struct cc770_priv *priv);
+	void (*post_irq)(const struct cc770_priv *priv);
+
+	void *priv;		/* for board-specific data */
+	struct net_device *dev;
+
+	void __iomem *reg_base;	 /* ioremap'ed address to registers */
+	unsigned long irq_flags; /* for request_irq() */
+
+	unsigned char obj_flags[CC770_OBJ_MAX];
+	u8 control_normal_mode;	/* Control register for normal mode */
+	u8 cpu_interface;	/* CPU interface register */
+	u8 clkout;		/* Clock out register */
+	u8 bus_config;		/* Bus conffiguration register */
+};
+
+struct net_device *alloc_cc770dev(int sizeof_priv);
+void free_cc770dev(struct net_device *dev);
+int register_cc770dev(struct net_device *dev);
+void unregister_cc770dev(struct net_device *dev);
+
+#endif /* CC770_DEV_H */
diff --git a/include/linux/can/platform/cc770.h b/include/linux/can/platform/cc770.h
new file mode 100644
index 000000000000..7702641f87ee
--- /dev/null
+++ b/include/linux/can/platform/cc770.h
@@ -0,0 +1,33 @@
+#ifndef _CAN_PLATFORM_CC770_H_
+#define _CAN_PLATFORM_CC770_H_
+
+/* CPU Interface Register (0x02) */
+#define CPUIF_CEN	0x01	/* Clock Out Enable */
+#define CPUIF_MUX	0x04	/* Multiplex */
+#define CPUIF_SLP	0x08	/* Sleep */
+#define CPUIF_PWD	0x10	/* Power Down Mode */
+#define CPUIF_DMC	0x20	/* Divide Memory Clock */
+#define CPUIF_DSC	0x40	/* Divide System Clock */
+#define CPUIF_RST	0x80	/* Hardware Reset Status */
+
+/* Clock Out Register (0x1f) */
+#define CLKOUT_CD_MASK  0x0f	/* Clock Divider mask */
+#define CLKOUT_SL_MASK	0x30	/* Slew Rate mask */
+#define CLKOUT_SL_SHIFT	4
+
+/* Bus Configuration Register (0x2f) */
+#define BUSCFG_DR0	0x01	/* Disconnect RX0 Input / Select RX input */
+#define BUSCFG_DR1	0x02	/* Disconnect RX1 Input / Silent mode */
+#define BUSCFG_DT1	0x08	/* Disconnect TX1 Output */
+#define BUSCFG_POL	0x20	/* Polarity dominant or recessive */
+#define BUSCFG_CBY	0x40	/* Input Comparator Bypass */
+
+struct cc770_platform_data {
+	u32 osc_freq;	/* CAN bus oscillator frequency in Hz */
+
+	u8 cir;		/* CPU Interface Register */
+	u8 cor;		/* Clock Out Register */
+	u8 bcr;		/* Bus Configuration Register */
+};
+
+#endif	/* !_CAN_PLATFORM_CC770_H_ */
-- 
cgit v1.2.3


From 65698610f58153eb7621be3fb4f57ca318b19c60 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 1 Dec 2011 14:16:04 -0500
Subject: net: Make ndo_neigh_destroy return void.

The return value isn't used.

Suggested by Ben Hucthings.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1c4ddb37f2b5..21440e31fdab 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -975,7 +975,7 @@ struct net_device_ops {
 	int			(*ndo_set_features)(struct net_device *dev,
 						    netdev_features_t features);
 	int			(*ndo_neigh_construct)(struct neighbour *n);
-	int			(*ndo_neigh_destroy)(struct neighbour *n);
+	void			(*ndo_neigh_destroy)(struct neighbour *n);
 };
 
 /*
-- 
cgit v1.2.3


From 86b1309c7e411b7c25dc0dc7a092582a4d291044 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Thu, 10 Nov 2011 19:14:51 -0800
Subject: genetlink: Add lockdep_genl_is_held().

Open vSwitch uses genl_mutex locking to protect datapath
data-structures like flow-table, flow-actions. Following patch adds
lockdep_genl_is_held() which is used for rcu annotation to prove
locking.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 include/linux/genetlink.h | 3 +++
 net/netlink/genetlink.c   | 8 ++++++++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h
index 61549b26ad6f..59311adfb0e0 100644
--- a/include/linux/genetlink.h
+++ b/include/linux/genetlink.h
@@ -85,6 +85,9 @@ enum {
 /* All generic netlink requests are serialized by a global lock.  */
 extern void genl_lock(void);
 extern void genl_unlock(void);
+#ifdef CONFIG_PROVE_LOCKING
+extern int lockdep_genl_is_held(void);
+#endif
 
 #endif /* __KERNEL__ */
 
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 8a36599d3555..28453ae2a97b 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -33,6 +33,14 @@ void genl_unlock(void)
 }
 EXPORT_SYMBOL(genl_unlock);
 
+#ifdef CONFIG_PROVE_LOCKING
+int lockdep_genl_is_held(void)
+{
+	return lockdep_is_held(&genl_mutex);
+}
+EXPORT_SYMBOL(lockdep_genl_is_held);
+#endif
+
 #define GENL_FAM_TAB_SIZE	16
 #define GENL_FAM_TAB_MASK	(GENL_FAM_TAB_SIZE - 1)
 
-- 
cgit v1.2.3


From b4e16611c4e1cd98765269c8fdaf43f96baa57b1 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Sat, 19 Nov 2011 16:21:37 -0800
Subject: genetlink: Add rcu_dereference_genl and genl_dereference.

This adds rcu_dereference_genl and genl_dereference, which are genl
variants of the RTNL functions to enforce proper locking with lockdep
and sparse.

Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 include/linux/genetlink.h | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h
index 59311adfb0e0..73c28dea10ae 100644
--- a/include/linux/genetlink.h
+++ b/include/linux/genetlink.h
@@ -89,6 +89,27 @@ extern void genl_unlock(void);
 extern int lockdep_genl_is_held(void);
 #endif
 
+/**
+ * rcu_dereference_genl - rcu_dereference with debug checking
+ * @p: The pointer to read, prior to dereferencing
+ *
+ * Do an rcu_dereference(p), but check caller either holds rcu_read_lock()
+ * or genl mutex. Note : Please prefer genl_dereference() or rcu_dereference()
+ */
+#define rcu_dereference_genl(p)					\
+	rcu_dereference_check(p, lockdep_genl_is_held())
+
+/**
+ * genl_dereference - fetch RCU pointer when updates are prevented by genl mutex
+ * @p: The pointer to read, prior to dereferencing
+ *
+ * Return the value of the specified RCU-protected pointer, but omit
+ * both the smp_read_barrier_depends() and the ACCESS_ONCE(), because
+ * caller holds genl mutex.
+ */
+#define genl_dereference(p)					\
+	rcu_dereference_protected(p, lockdep_genl_is_held())
+
 #endif /* __KERNEL__ */
 
 #endif	/* __LINUX_GENERIC_NETLINK_H */
-- 
cgit v1.2.3


From 396cf9430505cfba529a2f2a037d782719fa5844 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Fri, 18 Nov 2011 13:15:54 -0800
Subject: vlan: Move vlan_set_encap_proto() to vlan header file

Open vSwitch needs this function for vlan handling.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 include/linux/if_vlan.h | 34 ++++++++++++++++++++++++++++++++++
 net/8021q/vlan_core.c   | 33 ---------------------------------
 2 files changed, 34 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 12d5543b14f2..070ac50c1d2d 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -310,6 +310,40 @@ static inline __be16 vlan_get_protocol(const struct sk_buff *skb)
 
 	return protocol;
 }
+
+static inline void vlan_set_encap_proto(struct sk_buff *skb,
+					struct vlan_hdr *vhdr)
+{
+	__be16 proto;
+	unsigned char *rawp;
+
+	/*
+	 * Was a VLAN packet, grab the encapsulated protocol, which the layer
+	 * three protocols care about.
+	 */
+
+	proto = vhdr->h_vlan_encapsulated_proto;
+	if (ntohs(proto) >= 1536) {
+		skb->protocol = proto;
+		return;
+	}
+
+	rawp = skb->data;
+	if (*(unsigned short *) rawp == 0xFFFF)
+		/*
+		 * This is a magic hack to spot IPX packets. Older Novell
+		 * breaks the protocol design and runs IPX over 802.3 without
+		 * an 802.2 LLC layer. We look for FFFF which isn't a used
+		 * 802.2 SSAP/DSAP. This won't work for fault tolerant netware
+		 * but does for the rest.
+		 */
+		skb->protocol = htons(ETH_P_802_3);
+	else
+		/*
+		 * Real 802.2 LLC
+		 */
+		skb->protocol = htons(ETH_P_802_2);
+}
 #endif /* __KERNEL__ */
 
 /* VLAN IOCTLs are found in sockios.h */
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index f5ffc02729d6..9c95e8e054f9 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -110,39 +110,6 @@ static struct sk_buff *vlan_reorder_header(struct sk_buff *skb)
 	return skb;
 }
 
-static void vlan_set_encap_proto(struct sk_buff *skb, struct vlan_hdr *vhdr)
-{
-	__be16 proto;
-	unsigned char *rawp;
-
-	/*
-	 * Was a VLAN packet, grab the encapsulated protocol, which the layer
-	 * three protocols care about.
-	 */
-
-	proto = vhdr->h_vlan_encapsulated_proto;
-	if (ntohs(proto) >= 1536) {
-		skb->protocol = proto;
-		return;
-	}
-
-	rawp = skb->data;
-	if (*(unsigned short *) rawp == 0xFFFF)
-		/*
-		 * This is a magic hack to spot IPX packets. Older Novell
-		 * breaks the protocol design and runs IPX over 802.3 without
-		 * an 802.2 LLC layer. We look for FFFF which isn't a used
-		 * 802.2 SSAP/DSAP. This won't work for fault tolerant netware
-		 * but does for the rest.
-		 */
-		skb->protocol = htons(ETH_P_802_3);
-	else
-		/*
-		 * Real 802.2 LLC
-		 */
-		skb->protocol = htons(ETH_P_802_2);
-}
-
 struct sk_buff *vlan_untag(struct sk_buff *skb)
 {
 	struct vlan_hdr *vhdr;
-- 
cgit v1.2.3


From ccb1352e76cff0524e7ccb2074826a092dd13016 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Tue, 25 Oct 2011 19:26:31 -0700
Subject: net: Add Open vSwitch kernel components.

Open vSwitch is a multilayer Ethernet switch targeted at virtualized
environments.  In addition to supporting a variety of features
expected in a traditional hardware switch, it enables fine-grained
programmatic extension and flow-based control of the network.
This control is useful in a wide variety of applications but is
particularly important in multi-server virtualization deployments,
which are often characterized by highly dynamic endpoints and the need
to maintain logical abstractions for multiple tenants.

The Open vSwitch datapath provides an in-kernel fast path for packet
forwarding.  It is complemented by a userspace daemon, ovs-vswitchd,
which is able to accept configuration from a variety of sources and
translate it into packet processing rules.

See http://openvswitch.org for more information and userspace
utilities.

Signed-off-by: Jesse Gross <jesse@nicira.com>
---
 Documentation/networking/00-INDEX        |    2 +
 Documentation/networking/openvswitch.txt |  195 +++
 MAINTAINERS                              |    8 +
 include/linux/openvswitch.h              |  452 +++++++
 net/Kconfig                              |    1 +
 net/Makefile                             |    1 +
 net/openvswitch/Kconfig                  |   28 +
 net/openvswitch/Makefile                 |   14 +
 net/openvswitch/actions.c                |  415 +++++++
 net/openvswitch/datapath.c               | 1912 ++++++++++++++++++++++++++++++
 net/openvswitch/datapath.h               |  125 ++
 net/openvswitch/dp_notify.c              |   66 ++
 net/openvswitch/flow.c                   | 1346 +++++++++++++++++++++
 net/openvswitch/flow.h                   |  199 ++++
 net/openvswitch/vport-internal_dev.c     |  241 ++++
 net/openvswitch/vport-internal_dev.h     |   28 +
 net/openvswitch/vport-netdev.c           |  198 ++++
 net/openvswitch/vport-netdev.h           |   42 +
 net/openvswitch/vport.c                  |  396 +++++++
 net/openvswitch/vport.h                  |  205 ++++
 20 files changed, 5874 insertions(+)
 create mode 100644 Documentation/networking/openvswitch.txt
 create mode 100644 include/linux/openvswitch.h
 create mode 100644 net/openvswitch/Kconfig
 create mode 100644 net/openvswitch/Makefile
 create mode 100644 net/openvswitch/actions.c
 create mode 100644 net/openvswitch/datapath.c
 create mode 100644 net/openvswitch/datapath.h
 create mode 100644 net/openvswitch/dp_notify.c
 create mode 100644 net/openvswitch/flow.c
 create mode 100644 net/openvswitch/flow.h
 create mode 100644 net/openvswitch/vport-internal_dev.c
 create mode 100644 net/openvswitch/vport-internal_dev.h
 create mode 100644 net/openvswitch/vport-netdev.c
 create mode 100644 net/openvswitch/vport-netdev.h
 create mode 100644 net/openvswitch/vport.c
 create mode 100644 net/openvswitch/vport.h

(limited to 'include/linux')

diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX
index bbce1215434a..9ad9ddeb384c 100644
--- a/Documentation/networking/00-INDEX
+++ b/Documentation/networking/00-INDEX
@@ -144,6 +144,8 @@ nfc.txt
 	- The Linux Near Field Communication (NFS) subsystem.
 olympic.txt
 	- IBM PCI Pit/Pit-Phy/Olympic Token Ring driver info.
+openvswitch.txt
+	- Open vSwitch developer documentation.
 operstates.txt
 	- Overview of network interface operational states.
 packet_mmap.txt
diff --git a/Documentation/networking/openvswitch.txt b/Documentation/networking/openvswitch.txt
new file mode 100644
index 000000000000..b8a048b8df3a
--- /dev/null
+++ b/Documentation/networking/openvswitch.txt
@@ -0,0 +1,195 @@
+Open vSwitch datapath developer documentation
+=============================================
+
+The Open vSwitch kernel module allows flexible userspace control over
+flow-level packet processing on selected network devices.  It can be
+used to implement a plain Ethernet switch, network device bonding,
+VLAN processing, network access control, flow-based network control,
+and so on.
+
+The kernel module implements multiple "datapaths" (analogous to
+bridges), each of which can have multiple "vports" (analogous to ports
+within a bridge).  Each datapath also has associated with it a "flow
+table" that userspace populates with "flows" that map from keys based
+on packet headers and metadata to sets of actions.  The most common
+action forwards the packet to another vport; other actions are also
+implemented.
+
+When a packet arrives on a vport, the kernel module processes it by
+extracting its flow key and looking it up in the flow table.  If there
+is a matching flow, it executes the associated actions.  If there is
+no match, it queues the packet to userspace for processing (as part of
+its processing, userspace will likely set up a flow to handle further
+packets of the same type entirely in-kernel).
+
+
+Flow key compatibility
+----------------------
+
+Network protocols evolve over time.  New protocols become important
+and existing protocols lose their prominence.  For the Open vSwitch
+kernel module to remain relevant, it must be possible for newer
+versions to parse additional protocols as part of the flow key.  It
+might even be desirable, someday, to drop support for parsing
+protocols that have become obsolete.  Therefore, the Netlink interface
+to Open vSwitch is designed to allow carefully written userspace
+applications to work with any version of the flow key, past or future.
+
+To support this forward and backward compatibility, whenever the
+kernel module passes a packet to userspace, it also passes along the
+flow key that it parsed from the packet.  Userspace then extracts its
+own notion of a flow key from the packet and compares it against the
+kernel-provided version:
+
+    - If userspace's notion of the flow key for the packet matches the
+      kernel's, then nothing special is necessary.
+
+    - If the kernel's flow key includes more fields than the userspace
+      version of the flow key, for example if the kernel decoded IPv6
+      headers but userspace stopped at the Ethernet type (because it
+      does not understand IPv6), then again nothing special is
+      necessary.  Userspace can still set up a flow in the usual way,
+      as long as it uses the kernel-provided flow key to do it.
+
+    - If the userspace flow key includes more fields than the
+      kernel's, for example if userspace decoded an IPv6 header but
+      the kernel stopped at the Ethernet type, then userspace can
+      forward the packet manually, without setting up a flow in the
+      kernel.  This case is bad for performance because every packet
+      that the kernel considers part of the flow must go to userspace,
+      but the forwarding behavior is correct.  (If userspace can
+      determine that the values of the extra fields would not affect
+      forwarding behavior, then it could set up a flow anyway.)
+
+How flow keys evolve over time is important to making this work, so
+the following sections go into detail.
+
+
+Flow key format
+---------------
+
+A flow key is passed over a Netlink socket as a sequence of Netlink
+attributes.  Some attributes represent packet metadata, defined as any
+information about a packet that cannot be extracted from the packet
+itself, e.g. the vport on which the packet was received.  Most
+attributes, however, are extracted from headers within the packet,
+e.g. source and destination addresses from Ethernet, IP, or TCP
+headers.
+
+The <linux/openvswitch.h> header file defines the exact format of the
+flow key attributes.  For informal explanatory purposes here, we write
+them as comma-separated strings, with parentheses indicating arguments
+and nesting.  For example, the following could represent a flow key
+corresponding to a TCP packet that arrived on vport 1:
+
+    in_port(1), eth(src=e0:91:f5:21:d0:b2, dst=00:02:e3:0f:80:a4),
+    eth_type(0x0800), ipv4(src=172.16.0.20, dst=172.18.0.52, proto=17, tos=0,
+    frag=no), tcp(src=49163, dst=80)
+
+Often we ellipsize arguments not important to the discussion, e.g.:
+
+    in_port(1), eth(...), eth_type(0x0800), ipv4(...), tcp(...)
+
+
+Basic rule for evolving flow keys
+---------------------------------
+
+Some care is needed to really maintain forward and backward
+compatibility for applications that follow the rules listed under
+"Flow key compatibility" above.
+
+The basic rule is obvious:
+
+    ------------------------------------------------------------------
+    New network protocol support must only supplement existing flow
+    key attributes.  It must not change the meaning of already defined
+    flow key attributes.
+    ------------------------------------------------------------------
+
+This rule does have less-obvious consequences so it is worth working
+through a few examples.  Suppose, for example, that the kernel module
+did not already implement VLAN parsing.  Instead, it just interpreted
+the 802.1Q TPID (0x8100) as the Ethertype then stopped parsing the
+packet.  The flow key for any packet with an 802.1Q header would look
+essentially like this, ignoring metadata:
+
+    eth(...), eth_type(0x8100)
+
+Naively, to add VLAN support, it makes sense to add a new "vlan" flow
+key attribute to contain the VLAN tag, then continue to decode the
+encapsulated headers beyond the VLAN tag using the existing field
+definitions.  With this change, an TCP packet in VLAN 10 would have a
+flow key much like this:
+
+    eth(...), vlan(vid=10, pcp=0), eth_type(0x0800), ip(proto=6, ...), tcp(...)
+
+But this change would negatively affect a userspace application that
+has not been updated to understand the new "vlan" flow key attribute.
+The application could, following the flow compatibility rules above,
+ignore the "vlan" attribute that it does not understand and therefore
+assume that the flow contained IP packets.  This is a bad assumption
+(the flow only contains IP packets if one parses and skips over the
+802.1Q header) and it could cause the application's behavior to change
+across kernel versions even though it follows the compatibility rules.
+
+The solution is to use a set of nested attributes.  This is, for
+example, why 802.1Q support uses nested attributes.  A TCP packet in
+VLAN 10 is actually expressed as:
+
+    eth(...), eth_type(0x8100), vlan(vid=10, pcp=0), encap(eth_type(0x0800),
+    ip(proto=6, ...), tcp(...)))
+
+Notice how the "eth_type", "ip", and "tcp" flow key attributes are
+nested inside the "encap" attribute.  Thus, an application that does
+not understand the "vlan" key will not see either of those attributes
+and therefore will not misinterpret them.  (Also, the outer eth_type
+is still 0x8100, not changed to 0x0800.)
+
+Handling malformed packets
+--------------------------
+
+Don't drop packets in the kernel for malformed protocol headers, bad
+checksums, etc.  This would prevent userspace from implementing a
+simple Ethernet switch that forwards every packet.
+
+Instead, in such a case, include an attribute with "empty" content.
+It doesn't matter if the empty content could be valid protocol values,
+as long as those values are rarely seen in practice, because userspace
+can always forward all packets with those values to userspace and
+handle them individually.
+
+For example, consider a packet that contains an IP header that
+indicates protocol 6 for TCP, but which is truncated just after the IP
+header, so that the TCP header is missing.  The flow key for this
+packet would include a tcp attribute with all-zero src and dst, like
+this:
+
+    eth(...), eth_type(0x0800), ip(proto=6, ...), tcp(src=0, dst=0)
+
+As another example, consider a packet with an Ethernet type of 0x8100,
+indicating that a VLAN TCI should follow, but which is truncated just
+after the Ethernet type.  The flow key for this packet would include
+an all-zero-bits vlan and an empty encap attribute, like this:
+
+    eth(...), eth_type(0x8100), vlan(0), encap()
+
+Unlike a TCP packet with source and destination ports 0, an
+all-zero-bits VLAN TCI is not that rare, so the CFI bit (aka
+VLAN_TAG_PRESENT inside the kernel) is ordinarily set in a vlan
+attribute expressly to allow this situation to be distinguished.
+Thus, the flow key in this second example unambiguously indicates a
+missing or malformed VLAN TCI.
+
+Other rules
+-----------
+
+The other rules for flow keys are much less subtle:
+
+    - Duplicate attributes are not allowed at a given nesting level.
+
+    - Ordering of attributes is not significant.
+
+    - When the kernel sends a given flow key to userspace, it always
+      composes it the same way.  This allows userspace to hash and
+      compare entire flow keys that it may not be able to fully
+      interpret.
diff --git a/MAINTAINERS b/MAINTAINERS
index c88eb7bb3a69..209ad0695ba2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4868,6 +4868,14 @@ S:	Maintained
 T:	git git://openrisc.net/~jonas/linux
 F:	arch/openrisc
 
+OPENVSWITCH
+M:	Jesse Gross <jesse@nicira.com>
+L:	dev@openvswitch.org
+W:	http://openvswitch.org
+T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jesse/openvswitch.git
+S:	Maintained
+F:	net/openvswitch/
+
 OPL4 DRIVER
 M:	Clemens Ladisch <clemens@ladisch.de>
 L:	alsa-devel@alsa-project.org (moderated for non-subscribers)
diff --git a/include/linux/openvswitch.h b/include/linux/openvswitch.h
new file mode 100644
index 000000000000..eb1efa54fe84
--- /dev/null
+++ b/include/linux/openvswitch.h
@@ -0,0 +1,452 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef _LINUX_OPENVSWITCH_H
+#define _LINUX_OPENVSWITCH_H 1
+
+#include <linux/types.h>
+
+/**
+ * struct ovs_header - header for OVS Generic Netlink messages.
+ * @dp_ifindex: ifindex of local port for datapath (0 to make a request not
+ * specific to a datapath).
+ *
+ * Attributes following the header are specific to a particular OVS Generic
+ * Netlink family, but all of the OVS families use this header.
+ */
+
+struct ovs_header {
+	int dp_ifindex;
+};
+
+/* Datapaths. */
+
+#define OVS_DATAPATH_FAMILY  "ovs_datapath"
+#define OVS_DATAPATH_MCGROUP "ovs_datapath"
+#define OVS_DATAPATH_VERSION 0x1
+
+enum ovs_datapath_cmd {
+	OVS_DP_CMD_UNSPEC,
+	OVS_DP_CMD_NEW,
+	OVS_DP_CMD_DEL,
+	OVS_DP_CMD_GET,
+	OVS_DP_CMD_SET
+};
+
+/**
+ * enum ovs_datapath_attr - attributes for %OVS_DP_* commands.
+ * @OVS_DP_ATTR_NAME: Name of the network device that serves as the "local
+ * port".  This is the name of the network device whose dp_ifindex is given in
+ * the &struct ovs_header.  Always present in notifications.  Required in
+ * %OVS_DP_NEW requests.  May be used as an alternative to specifying
+ * dp_ifindex in other requests (with a dp_ifindex of 0).
+ * @OVS_DP_ATTR_UPCALL_PID: The Netlink socket in userspace that is initially
+ * set on the datapath port (for OVS_ACTION_ATTR_MISS).  Only valid on
+ * %OVS_DP_CMD_NEW requests. A value of zero indicates that upcalls should
+ * not be sent.
+ * @OVS_DP_ATTR_STATS: Statistics about packets that have passed through the
+ * datapath.  Always present in notifications.
+ *
+ * These attributes follow the &struct ovs_header within the Generic Netlink
+ * payload for %OVS_DP_* commands.
+ */
+enum ovs_datapath_attr {
+	OVS_DP_ATTR_UNSPEC,
+	OVS_DP_ATTR_NAME,       /* name of dp_ifindex netdev */
+	OVS_DP_ATTR_UPCALL_PID, /* Netlink PID to receive upcalls */
+	OVS_DP_ATTR_STATS,      /* struct ovs_dp_stats */
+	__OVS_DP_ATTR_MAX
+};
+
+#define OVS_DP_ATTR_MAX (__OVS_DP_ATTR_MAX - 1)
+
+struct ovs_dp_stats {
+	__u64 n_hit;             /* Number of flow table matches. */
+	__u64 n_missed;          /* Number of flow table misses. */
+	__u64 n_lost;            /* Number of misses not sent to userspace. */
+	__u64 n_flows;           /* Number of flows present */
+};
+
+struct ovs_vport_stats {
+	__u64   rx_packets;		/* total packets received       */
+	__u64   tx_packets;		/* total packets transmitted    */
+	__u64   rx_bytes;		/* total bytes received         */
+	__u64   tx_bytes;		/* total bytes transmitted      */
+	__u64   rx_errors;		/* bad packets received         */
+	__u64   tx_errors;		/* packet transmit problems     */
+	__u64   rx_dropped;		/* no space in linux buffers    */
+	__u64   tx_dropped;		/* no space available in linux  */
+};
+
+/* Fixed logical ports. */
+#define OVSP_LOCAL      ((__u16)0)
+
+/* Packet transfer. */
+
+#define OVS_PACKET_FAMILY "ovs_packet"
+#define OVS_PACKET_VERSION 0x1
+
+enum ovs_packet_cmd {
+	OVS_PACKET_CMD_UNSPEC,
+
+	/* Kernel-to-user notifications. */
+	OVS_PACKET_CMD_MISS,    /* Flow table miss. */
+	OVS_PACKET_CMD_ACTION,  /* OVS_ACTION_ATTR_USERSPACE action. */
+
+	/* Userspace commands. */
+	OVS_PACKET_CMD_EXECUTE  /* Apply actions to a packet. */
+};
+
+/**
+ * enum ovs_packet_attr - attributes for %OVS_PACKET_* commands.
+ * @OVS_PACKET_ATTR_PACKET: Present for all notifications.  Contains the entire
+ * packet as received, from the start of the Ethernet header onward.  For
+ * %OVS_PACKET_CMD_ACTION, %OVS_PACKET_ATTR_PACKET reflects changes made by
+ * actions preceding %OVS_ACTION_ATTR_USERSPACE, but %OVS_PACKET_ATTR_KEY is
+ * the flow key extracted from the packet as originally received.
+ * @OVS_PACKET_ATTR_KEY: Present for all notifications.  Contains the flow key
+ * extracted from the packet as nested %OVS_KEY_ATTR_* attributes.  This allows
+ * userspace to adapt its flow setup strategy by comparing its notion of the
+ * flow key against the kernel's.
+ * @OVS_PACKET_ATTR_ACTIONS: Contains actions for the packet.  Used
+ * for %OVS_PACKET_CMD_EXECUTE.  It has nested %OVS_ACTION_ATTR_* attributes.
+ * @OVS_PACKET_ATTR_USERDATA: Present for an %OVS_PACKET_CMD_ACTION
+ * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an
+ * %OVS_USERSPACE_ATTR_USERDATA attribute.
+ *
+ * These attributes follow the &struct ovs_header within the Generic Netlink
+ * payload for %OVS_PACKET_* commands.
+ */
+enum ovs_packet_attr {
+	OVS_PACKET_ATTR_UNSPEC,
+	OVS_PACKET_ATTR_PACKET,      /* Packet data. */
+	OVS_PACKET_ATTR_KEY,         /* Nested OVS_KEY_ATTR_* attributes. */
+	OVS_PACKET_ATTR_ACTIONS,     /* Nested OVS_ACTION_ATTR_* attributes. */
+	OVS_PACKET_ATTR_USERDATA,    /* u64 OVS_ACTION_ATTR_USERSPACE arg. */
+	__OVS_PACKET_ATTR_MAX
+};
+
+#define OVS_PACKET_ATTR_MAX (__OVS_PACKET_ATTR_MAX - 1)
+
+/* Virtual ports. */
+
+#define OVS_VPORT_FAMILY  "ovs_vport"
+#define OVS_VPORT_MCGROUP "ovs_vport"
+#define OVS_VPORT_VERSION 0x1
+
+enum ovs_vport_cmd {
+	OVS_VPORT_CMD_UNSPEC,
+	OVS_VPORT_CMD_NEW,
+	OVS_VPORT_CMD_DEL,
+	OVS_VPORT_CMD_GET,
+	OVS_VPORT_CMD_SET
+};
+
+enum ovs_vport_type {
+	OVS_VPORT_TYPE_UNSPEC,
+	OVS_VPORT_TYPE_NETDEV,   /* network device */
+	OVS_VPORT_TYPE_INTERNAL, /* network device implemented by datapath */
+	__OVS_VPORT_TYPE_MAX
+};
+
+#define OVS_VPORT_TYPE_MAX (__OVS_VPORT_TYPE_MAX - 1)
+
+/**
+ * enum ovs_vport_attr - attributes for %OVS_VPORT_* commands.
+ * @OVS_VPORT_ATTR_PORT_NO: 32-bit port number within datapath.
+ * @OVS_VPORT_ATTR_TYPE: 32-bit %OVS_VPORT_TYPE_* constant describing the type
+ * of vport.
+ * @OVS_VPORT_ATTR_NAME: Name of vport.  For a vport based on a network device
+ * this is the name of the network device.  Maximum length %IFNAMSIZ-1 bytes
+ * plus a null terminator.
+ * @OVS_VPORT_ATTR_OPTIONS: Vport-specific configuration information.
+ * @OVS_VPORT_ATTR_UPCALL_PID: The Netlink socket in userspace that
+ * OVS_PACKET_CMD_MISS upcalls will be directed to for packets received on
+ * this port.  A value of zero indicates that upcalls should not be sent.
+ * @OVS_VPORT_ATTR_STATS: A &struct ovs_vport_stats giving statistics for
+ * packets sent or received through the vport.
+ *
+ * These attributes follow the &struct ovs_header within the Generic Netlink
+ * payload for %OVS_VPORT_* commands.
+ *
+ * For %OVS_VPORT_CMD_NEW requests, the %OVS_VPORT_ATTR_TYPE and
+ * %OVS_VPORT_ATTR_NAME attributes are required.  %OVS_VPORT_ATTR_PORT_NO is
+ * optional; if not specified a free port number is automatically selected.
+ * Whether %OVS_VPORT_ATTR_OPTIONS is required or optional depends on the type
+ * of vport.
+ * and other attributes are ignored.
+ *
+ * For other requests, if %OVS_VPORT_ATTR_NAME is specified then it is used to
+ * look up the vport to operate on; otherwise dp_idx from the &struct
+ * ovs_header plus %OVS_VPORT_ATTR_PORT_NO determine the vport.
+ */
+enum ovs_vport_attr {
+	OVS_VPORT_ATTR_UNSPEC,
+	OVS_VPORT_ATTR_PORT_NO,	/* u32 port number within datapath */
+	OVS_VPORT_ATTR_TYPE,	/* u32 OVS_VPORT_TYPE_* constant. */
+	OVS_VPORT_ATTR_NAME,	/* string name, up to IFNAMSIZ bytes long */
+	OVS_VPORT_ATTR_OPTIONS, /* nested attributes, varies by vport type */
+	OVS_VPORT_ATTR_UPCALL_PID, /* u32 Netlink PID to receive upcalls */
+	OVS_VPORT_ATTR_STATS,	/* struct ovs_vport_stats */
+	__OVS_VPORT_ATTR_MAX
+};
+
+#define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1)
+
+/* Flows. */
+
+#define OVS_FLOW_FAMILY  "ovs_flow"
+#define OVS_FLOW_MCGROUP "ovs_flow"
+#define OVS_FLOW_VERSION 0x1
+
+enum ovs_flow_cmd {
+	OVS_FLOW_CMD_UNSPEC,
+	OVS_FLOW_CMD_NEW,
+	OVS_FLOW_CMD_DEL,
+	OVS_FLOW_CMD_GET,
+	OVS_FLOW_CMD_SET
+};
+
+struct ovs_flow_stats {
+	__u64 n_packets;         /* Number of matched packets. */
+	__u64 n_bytes;           /* Number of matched bytes. */
+};
+
+enum ovs_key_attr {
+	OVS_KEY_ATTR_UNSPEC,
+	OVS_KEY_ATTR_ENCAP,	/* Nested set of encapsulated attributes. */
+	OVS_KEY_ATTR_PRIORITY,  /* u32 skb->priority */
+	OVS_KEY_ATTR_IN_PORT,   /* u32 OVS dp port number */
+	OVS_KEY_ATTR_ETHERNET,  /* struct ovs_key_ethernet */
+	OVS_KEY_ATTR_VLAN,	/* be16 VLAN TCI */
+	OVS_KEY_ATTR_ETHERTYPE,	/* be16 Ethernet type */
+	OVS_KEY_ATTR_IPV4,      /* struct ovs_key_ipv4 */
+	OVS_KEY_ATTR_IPV6,      /* struct ovs_key_ipv6 */
+	OVS_KEY_ATTR_TCP,       /* struct ovs_key_tcp */
+	OVS_KEY_ATTR_UDP,       /* struct ovs_key_udp */
+	OVS_KEY_ATTR_ICMP,      /* struct ovs_key_icmp */
+	OVS_KEY_ATTR_ICMPV6,    /* struct ovs_key_icmpv6 */
+	OVS_KEY_ATTR_ARP,       /* struct ovs_key_arp */
+	OVS_KEY_ATTR_ND,        /* struct ovs_key_nd */
+	__OVS_KEY_ATTR_MAX
+};
+
+#define OVS_KEY_ATTR_MAX (__OVS_KEY_ATTR_MAX - 1)
+
+/**
+ * enum ovs_frag_type - IPv4 and IPv6 fragment type
+ * @OVS_FRAG_TYPE_NONE: Packet is not a fragment.
+ * @OVS_FRAG_TYPE_FIRST: Packet is a fragment with offset 0.
+ * @OVS_FRAG_TYPE_LATER: Packet is a fragment with nonzero offset.
+ *
+ * Used as the @ipv4_frag in &struct ovs_key_ipv4 and as @ipv6_frag &struct
+ * ovs_key_ipv6.
+ */
+enum ovs_frag_type {
+	OVS_FRAG_TYPE_NONE,
+	OVS_FRAG_TYPE_FIRST,
+	OVS_FRAG_TYPE_LATER,
+	__OVS_FRAG_TYPE_MAX
+};
+
+#define OVS_FRAG_TYPE_MAX (__OVS_FRAG_TYPE_MAX - 1)
+
+struct ovs_key_ethernet {
+	__u8	 eth_src[6];
+	__u8	 eth_dst[6];
+};
+
+struct ovs_key_ipv4 {
+	__be32 ipv4_src;
+	__be32 ipv4_dst;
+	__u8   ipv4_proto;
+	__u8   ipv4_tos;
+	__u8   ipv4_ttl;
+	__u8   ipv4_frag;	/* One of OVS_FRAG_TYPE_*. */
+};
+
+struct ovs_key_ipv6 {
+	__be32 ipv6_src[4];
+	__be32 ipv6_dst[4];
+	__be32 ipv6_label;	/* 20-bits in least-significant bits. */
+	__u8   ipv6_proto;
+	__u8   ipv6_tclass;
+	__u8   ipv6_hlimit;
+	__u8   ipv6_frag;	/* One of OVS_FRAG_TYPE_*. */
+};
+
+struct ovs_key_tcp {
+	__be16 tcp_src;
+	__be16 tcp_dst;
+};
+
+struct ovs_key_udp {
+	__be16 udp_src;
+	__be16 udp_dst;
+};
+
+struct ovs_key_icmp {
+	__u8 icmp_type;
+	__u8 icmp_code;
+};
+
+struct ovs_key_icmpv6 {
+	__u8 icmpv6_type;
+	__u8 icmpv6_code;
+};
+
+struct ovs_key_arp {
+	__be32 arp_sip;
+	__be32 arp_tip;
+	__be16 arp_op;
+	__u8   arp_sha[6];
+	__u8   arp_tha[6];
+};
+
+struct ovs_key_nd {
+	__u32 nd_target[4];
+	__u8  nd_sll[6];
+	__u8  nd_tll[6];
+};
+
+/**
+ * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands.
+ * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow
+ * key.  Always present in notifications.  Required for all requests (except
+ * dumps).
+ * @OVS_FLOW_ATTR_ACTIONS: Nested %OVS_ACTION_ATTR_* attributes specifying
+ * the actions to take for packets that match the key.  Always present in
+ * notifications.  Required for %OVS_FLOW_CMD_NEW requests, optional for
+ * %OVS_FLOW_CMD_SET requests.
+ * @OVS_FLOW_ATTR_STATS: &struct ovs_flow_stats giving statistics for this
+ * flow.  Present in notifications if the stats would be nonzero.  Ignored in
+ * requests.
+ * @OVS_FLOW_ATTR_TCP_FLAGS: An 8-bit value giving the OR'd value of all of the
+ * TCP flags seen on packets in this flow.  Only present in notifications for
+ * TCP flows, and only if it would be nonzero.  Ignored in requests.
+ * @OVS_FLOW_ATTR_USED: A 64-bit integer giving the time, in milliseconds on
+ * the system monotonic clock, at which a packet was last processed for this
+ * flow.  Only present in notifications if a packet has been processed for this
+ * flow.  Ignored in requests.
+ * @OVS_FLOW_ATTR_CLEAR: If present in a %OVS_FLOW_CMD_SET request, clears the
+ * last-used time, accumulated TCP flags, and statistics for this flow.
+ * Otherwise ignored in requests.  Never present in notifications.
+ *
+ * These attributes follow the &struct ovs_header within the Generic Netlink
+ * payload for %OVS_FLOW_* commands.
+ */
+enum ovs_flow_attr {
+	OVS_FLOW_ATTR_UNSPEC,
+	OVS_FLOW_ATTR_KEY,       /* Sequence of OVS_KEY_ATTR_* attributes. */
+	OVS_FLOW_ATTR_ACTIONS,   /* Nested OVS_ACTION_ATTR_* attributes. */
+	OVS_FLOW_ATTR_STATS,     /* struct ovs_flow_stats. */
+	OVS_FLOW_ATTR_TCP_FLAGS, /* 8-bit OR'd TCP flags. */
+	OVS_FLOW_ATTR_USED,      /* u64 msecs last used in monotonic time. */
+	OVS_FLOW_ATTR_CLEAR,     /* Flag to clear stats, tcp_flags, used. */
+	__OVS_FLOW_ATTR_MAX
+};
+
+#define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1)
+
+/**
+ * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action.
+ * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with
+ * @OVS_ACTION_ATTR_SAMPLE.  A value of 0 samples no packets, a value of
+ * %UINT32_MAX samples all packets and intermediate values sample intermediate
+ * fractions of packets.
+ * @OVS_SAMPLE_ATTR_ACTIONS: Set of actions to execute in sampling event.
+ * Actions are passed as nested attributes.
+ *
+ * Executes the specified actions with the given probability on a per-packet
+ * basis.
+ */
+enum ovs_sample_attr {
+	OVS_SAMPLE_ATTR_UNSPEC,
+	OVS_SAMPLE_ATTR_PROBABILITY, /* u32 number */
+	OVS_SAMPLE_ATTR_ACTIONS,     /* Nested OVS_ACTION_ATTR_* attributes. */
+	__OVS_SAMPLE_ATTR_MAX,
+};
+
+#define OVS_SAMPLE_ATTR_MAX (__OVS_SAMPLE_ATTR_MAX - 1)
+
+/**
+ * enum ovs_userspace_attr - Attributes for %OVS_ACTION_ATTR_USERSPACE action.
+ * @OVS_USERSPACE_ATTR_PID: u32 Netlink PID to which the %OVS_PACKET_CMD_ACTION
+ * message should be sent.  Required.
+ * @OVS_USERSPACE_ATTR_USERDATA: If present, its u64 argument is copied to the
+ * %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA,
+ */
+enum ovs_userspace_attr {
+	OVS_USERSPACE_ATTR_UNSPEC,
+	OVS_USERSPACE_ATTR_PID,	      /* u32 Netlink PID to receive upcalls. */
+	OVS_USERSPACE_ATTR_USERDATA,  /* u64 optional user-specified cookie. */
+	__OVS_USERSPACE_ATTR_MAX
+};
+
+#define OVS_USERSPACE_ATTR_MAX (__OVS_USERSPACE_ATTR_MAX - 1)
+
+/**
+ * struct ovs_action_push_vlan - %OVS_ACTION_ATTR_PUSH_VLAN action argument.
+ * @vlan_tpid: Tag protocol identifier (TPID) to push.
+ * @vlan_tci: Tag control identifier (TCI) to push.  The CFI bit must be set
+ * (but it will not be set in the 802.1Q header that is pushed).
+ *
+ * The @vlan_tpid value is typically %ETH_P_8021Q.  The only acceptable TPID
+ * values are those that the kernel module also parses as 802.1Q headers, to
+ * prevent %OVS_ACTION_ATTR_PUSH_VLAN followed by %OVS_ACTION_ATTR_POP_VLAN
+ * from having surprising results.
+ */
+struct ovs_action_push_vlan {
+	__be16 vlan_tpid;	/* 802.1Q TPID. */
+	__be16 vlan_tci;	/* 802.1Q TCI (VLAN ID and priority). */
+};
+
+/**
+ * enum ovs_action_attr - Action types.
+ *
+ * @OVS_ACTION_ATTR_OUTPUT: Output packet to port.
+ * @OVS_ACTION_ATTR_USERSPACE: Send packet to userspace according to nested
+ * %OVS_USERSPACE_ATTR_* attributes.
+ * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header.  The
+ * single nested %OVS_KEY_ATTR_* attribute specifies a header to modify and its
+ * value.
+ * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the
+ * packet.
+ * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet.
+ * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in
+ * the nested %OVS_SAMPLE_ATTR_* attributes.
+ *
+ * Only a single header can be set with a single %OVS_ACTION_ATTR_SET.  Not all
+ * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
+ * type may not be changed.
+ */
+
+enum ovs_action_attr {
+	OVS_ACTION_ATTR_UNSPEC,
+	OVS_ACTION_ATTR_OUTPUT,	      /* u32 port number. */
+	OVS_ACTION_ATTR_USERSPACE,    /* Nested OVS_USERSPACE_ATTR_*. */
+	OVS_ACTION_ATTR_SET,          /* One nested OVS_KEY_ATTR_*. */
+	OVS_ACTION_ATTR_PUSH_VLAN,    /* struct ovs_action_push_vlan. */
+	OVS_ACTION_ATTR_POP_VLAN,     /* No argument. */
+	OVS_ACTION_ATTR_SAMPLE,       /* Nested OVS_SAMPLE_ATTR_*. */
+	__OVS_ACTION_ATTR_MAX
+};
+
+#define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1)
+
+#endif /* _LINUX_OPENVSWITCH_H */
diff --git a/net/Kconfig b/net/Kconfig
index 2d998735c4d8..e07272d0bb2d 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -215,6 +215,7 @@ source "net/sched/Kconfig"
 source "net/dcb/Kconfig"
 source "net/dns_resolver/Kconfig"
 source "net/batman-adv/Kconfig"
+source "net/openvswitch/Kconfig"
 
 config RPS
 	boolean
diff --git a/net/Makefile b/net/Makefile
index acdde4950de4..ad432fa4d934 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -69,3 +69,4 @@ obj-$(CONFIG_DNS_RESOLVER)	+= dns_resolver/
 obj-$(CONFIG_CEPH_LIB)		+= ceph/
 obj-$(CONFIG_BATMAN_ADV)	+= batman-adv/
 obj-$(CONFIG_NFC)		+= nfc/
+obj-$(CONFIG_OPENVSWITCH)	+= openvswitch/
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
new file mode 100644
index 000000000000..d9ea33c361be
--- /dev/null
+++ b/net/openvswitch/Kconfig
@@ -0,0 +1,28 @@
+#
+# Open vSwitch
+#
+
+config OPENVSWITCH
+	tristate "Open vSwitch"
+	---help---
+	  Open vSwitch is a multilayer Ethernet switch targeted at virtualized
+	  environments.  In addition to supporting a variety of features
+	  expected in a traditional hardware switch, it enables fine-grained
+	  programmatic extension and flow-based control of the network.  This
+	  control is useful in a wide variety of applications but is
+	  particularly important in multi-server virtualization deployments,
+	  which are often characterized by highly dynamic endpoints and the
+	  need to maintain logical abstractions for multiple tenants.
+
+	  The Open vSwitch datapath provides an in-kernel fast path for packet
+	  forwarding.  It is complemented by a userspace daemon, ovs-vswitchd,
+	  which is able to accept configuration from a variety of sources and
+	  translate it into packet processing rules.
+
+	  See http://openvswitch.org for more information and userspace
+	  utilities.
+
+	  To compile this code as a module, choose M here: the module will be
+	  called openvswitch.
+
+	  If unsure, say N.
diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile
new file mode 100644
index 000000000000..15e7384745c1
--- /dev/null
+++ b/net/openvswitch/Makefile
@@ -0,0 +1,14 @@
+#
+# Makefile for Open vSwitch.
+#
+
+obj-$(CONFIG_OPENVSWITCH) += openvswitch.o
+
+openvswitch-y := \
+	actions.o \
+	datapath.o \
+	dp_notify.o \
+	flow.o \
+	vport.o \
+	vport-internal_dev.o \
+	vport-netdev.o \
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
new file mode 100644
index 000000000000..2725d1bdf291
--- /dev/null
+++ b/net/openvswitch/actions.c
@@ -0,0 +1,415 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/skbuff.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/openvswitch.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/in6.h>
+#include <linux/if_arp.h>
+#include <linux/if_vlan.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include <net/dsfield.h>
+
+#include "datapath.h"
+#include "vport.h"
+
+static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
+			const struct nlattr *attr, int len, bool keep_skb);
+
+static int make_writable(struct sk_buff *skb, int write_len)
+{
+	if (!skb_cloned(skb) || skb_clone_writable(skb, write_len))
+		return 0;
+
+	return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+}
+
+/* remove VLAN header from packet and update csum accrodingly. */
+static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
+{
+	struct vlan_hdr *vhdr;
+	int err;
+
+	err = make_writable(skb, VLAN_ETH_HLEN);
+	if (unlikely(err))
+		return err;
+
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->csum = csum_sub(skb->csum, csum_partial(skb->data
+					+ ETH_HLEN, VLAN_HLEN, 0));
+
+	vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
+	*current_tci = vhdr->h_vlan_TCI;
+
+	memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
+	__skb_pull(skb, VLAN_HLEN);
+
+	vlan_set_encap_proto(skb, vhdr);
+	skb->mac_header += VLAN_HLEN;
+	skb_reset_mac_len(skb);
+
+	return 0;
+}
+
+static int pop_vlan(struct sk_buff *skb)
+{
+	__be16 tci;
+	int err;
+
+	if (likely(vlan_tx_tag_present(skb))) {
+		skb->vlan_tci = 0;
+	} else {
+		if (unlikely(skb->protocol != htons(ETH_P_8021Q) ||
+			     skb->len < VLAN_ETH_HLEN))
+			return 0;
+
+		err = __pop_vlan_tci(skb, &tci);
+		if (err)
+			return err;
+	}
+	/* move next vlan tag to hw accel tag */
+	if (likely(skb->protocol != htons(ETH_P_8021Q) ||
+		   skb->len < VLAN_ETH_HLEN))
+		return 0;
+
+	err = __pop_vlan_tci(skb, &tci);
+	if (unlikely(err))
+		return err;
+
+	__vlan_hwaccel_put_tag(skb, ntohs(tci));
+	return 0;
+}
+
+static int push_vlan(struct sk_buff *skb, const struct ovs_action_push_vlan *vlan)
+{
+	if (unlikely(vlan_tx_tag_present(skb))) {
+		u16 current_tag;
+
+		/* push down current VLAN tag */
+		current_tag = vlan_tx_tag_get(skb);
+
+		if (!__vlan_put_tag(skb, current_tag))
+			return -ENOMEM;
+
+		if (skb->ip_summed == CHECKSUM_COMPLETE)
+			skb->csum = csum_add(skb->csum, csum_partial(skb->data
+					+ ETH_HLEN, VLAN_HLEN, 0));
+
+	}
+	__vlan_hwaccel_put_tag(skb, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
+	return 0;
+}
+
+static int set_eth_addr(struct sk_buff *skb,
+			const struct ovs_key_ethernet *eth_key)
+{
+	int err;
+	err = make_writable(skb, ETH_HLEN);
+	if (unlikely(err))
+		return err;
+
+	memcpy(eth_hdr(skb)->h_source, eth_key->eth_src, ETH_ALEN);
+	memcpy(eth_hdr(skb)->h_dest, eth_key->eth_dst, ETH_ALEN);
+
+	return 0;
+}
+
+static void set_ip_addr(struct sk_buff *skb, struct iphdr *nh,
+				__be32 *addr, __be32 new_addr)
+{
+	int transport_len = skb->len - skb_transport_offset(skb);
+
+	if (nh->protocol == IPPROTO_TCP) {
+		if (likely(transport_len >= sizeof(struct tcphdr)))
+			inet_proto_csum_replace4(&tcp_hdr(skb)->check, skb,
+						 *addr, new_addr, 1);
+	} else if (nh->protocol == IPPROTO_UDP) {
+		if (likely(transport_len >= sizeof(struct udphdr)))
+			inet_proto_csum_replace4(&udp_hdr(skb)->check, skb,
+						 *addr, new_addr, 1);
+	}
+
+	csum_replace4(&nh->check, *addr, new_addr);
+	skb->rxhash = 0;
+	*addr = new_addr;
+}
+
+static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl)
+{
+	csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
+	nh->ttl = new_ttl;
+}
+
+static int set_ipv4(struct sk_buff *skb, const struct ovs_key_ipv4 *ipv4_key)
+{
+	struct iphdr *nh;
+	int err;
+
+	err = make_writable(skb, skb_network_offset(skb) +
+				 sizeof(struct iphdr));
+	if (unlikely(err))
+		return err;
+
+	nh = ip_hdr(skb);
+
+	if (ipv4_key->ipv4_src != nh->saddr)
+		set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src);
+
+	if (ipv4_key->ipv4_dst != nh->daddr)
+		set_ip_addr(skb, nh, &nh->daddr, ipv4_key->ipv4_dst);
+
+	if (ipv4_key->ipv4_tos != nh->tos)
+		ipv4_change_dsfield(nh, 0, ipv4_key->ipv4_tos);
+
+	if (ipv4_key->ipv4_ttl != nh->ttl)
+		set_ip_ttl(skb, nh, ipv4_key->ipv4_ttl);
+
+	return 0;
+}
+
+/* Must follow make_writable() since that can move the skb data. */
+static void set_tp_port(struct sk_buff *skb, __be16 *port,
+			 __be16 new_port, __sum16 *check)
+{
+	inet_proto_csum_replace2(check, skb, *port, new_port, 0);
+	*port = new_port;
+	skb->rxhash = 0;
+}
+
+static int set_udp_port(struct sk_buff *skb,
+			const struct ovs_key_udp *udp_port_key)
+{
+	struct udphdr *uh;
+	int err;
+
+	err = make_writable(skb, skb_transport_offset(skb) +
+				 sizeof(struct udphdr));
+	if (unlikely(err))
+		return err;
+
+	uh = udp_hdr(skb);
+	if (udp_port_key->udp_src != uh->source)
+		set_tp_port(skb, &uh->source, udp_port_key->udp_src, &uh->check);
+
+	if (udp_port_key->udp_dst != uh->dest)
+		set_tp_port(skb, &uh->dest, udp_port_key->udp_dst, &uh->check);
+
+	return 0;
+}
+
+static int set_tcp_port(struct sk_buff *skb,
+			const struct ovs_key_tcp *tcp_port_key)
+{
+	struct tcphdr *th;
+	int err;
+
+	err = make_writable(skb, skb_transport_offset(skb) +
+				 sizeof(struct tcphdr));
+	if (unlikely(err))
+		return err;
+
+	th = tcp_hdr(skb);
+	if (tcp_port_key->tcp_src != th->source)
+		set_tp_port(skb, &th->source, tcp_port_key->tcp_src, &th->check);
+
+	if (tcp_port_key->tcp_dst != th->dest)
+		set_tp_port(skb, &th->dest, tcp_port_key->tcp_dst, &th->check);
+
+	return 0;
+}
+
+static int do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
+{
+	struct vport *vport;
+
+	if (unlikely(!skb))
+		return -ENOMEM;
+
+	vport = rcu_dereference(dp->ports[out_port]);
+	if (unlikely(!vport)) {
+		kfree_skb(skb);
+		return -ENODEV;
+	}
+
+	ovs_vport_send(vport, skb);
+	return 0;
+}
+
+static int output_userspace(struct datapath *dp, struct sk_buff *skb,
+			    const struct nlattr *attr)
+{
+	struct dp_upcall_info upcall;
+	const struct nlattr *a;
+	int rem;
+
+	upcall.cmd = OVS_PACKET_CMD_ACTION;
+	upcall.key = &OVS_CB(skb)->flow->key;
+	upcall.userdata = NULL;
+	upcall.pid = 0;
+
+	for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
+		 a = nla_next(a, &rem)) {
+		switch (nla_type(a)) {
+		case OVS_USERSPACE_ATTR_USERDATA:
+			upcall.userdata = a;
+			break;
+
+		case OVS_USERSPACE_ATTR_PID:
+			upcall.pid = nla_get_u32(a);
+			break;
+		}
+	}
+
+	return ovs_dp_upcall(dp, skb, &upcall);
+}
+
+static int sample(struct datapath *dp, struct sk_buff *skb,
+		  const struct nlattr *attr)
+{
+	const struct nlattr *acts_list = NULL;
+	const struct nlattr *a;
+	int rem;
+
+	for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
+		 a = nla_next(a, &rem)) {
+		switch (nla_type(a)) {
+		case OVS_SAMPLE_ATTR_PROBABILITY:
+			if (net_random() >= nla_get_u32(a))
+				return 0;
+			break;
+
+		case OVS_SAMPLE_ATTR_ACTIONS:
+			acts_list = a;
+			break;
+		}
+	}
+
+	return do_execute_actions(dp, skb, nla_data(acts_list),
+						 nla_len(acts_list), true);
+}
+
+static int execute_set_action(struct sk_buff *skb,
+				 const struct nlattr *nested_attr)
+{
+	int err = 0;
+
+	switch (nla_type(nested_attr)) {
+	case OVS_KEY_ATTR_PRIORITY:
+		skb->priority = nla_get_u32(nested_attr);
+		break;
+
+	case OVS_KEY_ATTR_ETHERNET:
+		err = set_eth_addr(skb, nla_data(nested_attr));
+		break;
+
+	case OVS_KEY_ATTR_IPV4:
+		err = set_ipv4(skb, nla_data(nested_attr));
+		break;
+
+	case OVS_KEY_ATTR_TCP:
+		err = set_tcp_port(skb, nla_data(nested_attr));
+		break;
+
+	case OVS_KEY_ATTR_UDP:
+		err = set_udp_port(skb, nla_data(nested_attr));
+		break;
+	}
+
+	return err;
+}
+
+/* Execute a list of actions against 'skb'. */
+static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
+			const struct nlattr *attr, int len, bool keep_skb)
+{
+	/* Every output action needs a separate clone of 'skb', but the common
+	 * case is just a single output action, so that doing a clone and
+	 * then freeing the original skbuff is wasteful.  So the following code
+	 * is slightly obscure just to avoid that. */
+	int prev_port = -1;
+	const struct nlattr *a;
+	int rem;
+
+	for (a = attr, rem = len; rem > 0;
+	     a = nla_next(a, &rem)) {
+		int err = 0;
+
+		if (prev_port != -1) {
+			do_output(dp, skb_clone(skb, GFP_ATOMIC), prev_port);
+			prev_port = -1;
+		}
+
+		switch (nla_type(a)) {
+		case OVS_ACTION_ATTR_OUTPUT:
+			prev_port = nla_get_u32(a);
+			break;
+
+		case OVS_ACTION_ATTR_USERSPACE:
+			output_userspace(dp, skb, a);
+			break;
+
+		case OVS_ACTION_ATTR_PUSH_VLAN:
+			err = push_vlan(skb, nla_data(a));
+			if (unlikely(err)) /* skb already freed. */
+				return err;
+			break;
+
+		case OVS_ACTION_ATTR_POP_VLAN:
+			err = pop_vlan(skb);
+			break;
+
+		case OVS_ACTION_ATTR_SET:
+			err = execute_set_action(skb, nla_data(a));
+			break;
+
+		case OVS_ACTION_ATTR_SAMPLE:
+			err = sample(dp, skb, a);
+			break;
+		}
+
+		if (unlikely(err)) {
+			kfree_skb(skb);
+			return err;
+		}
+	}
+
+	if (prev_port != -1) {
+		if (keep_skb)
+			skb = skb_clone(skb, GFP_ATOMIC);
+
+		do_output(dp, skb, prev_port);
+	} else if (!keep_skb)
+		consume_skb(skb);
+
+	return 0;
+}
+
+/* Execute a list of actions against 'skb'. */
+int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb)
+{
+	struct sw_flow_actions *acts = rcu_dereference(OVS_CB(skb)->flow->sf_acts);
+
+	return do_execute_actions(dp, skb, acts->actions,
+					 acts->actions_len, false);
+}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
new file mode 100644
index 000000000000..9a2725114e99
--- /dev/null
+++ b/net/openvswitch/datapath.c
@@ -0,0 +1,1912 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/if_arp.h>
+#include <linux/if_vlan.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/jhash.h>
+#include <linux/delay.h>
+#include <linux/time.h>
+#include <linux/etherdevice.h>
+#include <linux/genetlink.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/mutex.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/version.h>
+#include <linux/ethtool.h>
+#include <linux/wait.h>
+#include <asm/system.h>
+#include <asm/div64.h>
+#include <linux/highmem.h>
+#include <linux/netfilter_bridge.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/inetdevice.h>
+#include <linux/list.h>
+#include <linux/openvswitch.h>
+#include <linux/rculist.h>
+#include <linux/dmi.h>
+#include <linux/workqueue.h>
+#include <net/genetlink.h>
+
+#include "datapath.h"
+#include "flow.h"
+#include "vport-internal_dev.h"
+
+/**
+ * DOC: Locking:
+ *
+ * Writes to device state (add/remove datapath, port, set operations on vports,
+ * etc.) are protected by RTNL.
+ *
+ * Writes to other state (flow table modifications, set miscellaneous datapath
+ * parameters, etc.) are protected by genl_mutex.  The RTNL lock nests inside
+ * genl_mutex.
+ *
+ * Reads are protected by RCU.
+ *
+ * There are a few special cases (mostly stats) that have their own
+ * synchronization but they nest under all of above and don't interact with
+ * each other.
+ */
+
+/* Global list of datapaths to enable dumping them all out.
+ * Protected by genl_mutex.
+ */
+static LIST_HEAD(dps);
+
+#define REHASH_FLOW_INTERVAL (10 * 60 * HZ)
+static void rehash_flow_table(struct work_struct *work);
+static DECLARE_DELAYED_WORK(rehash_flow_wq, rehash_flow_table);
+
+static struct vport *new_vport(const struct vport_parms *);
+static int queue_gso_packets(int dp_ifindex, struct sk_buff *,
+			     const struct dp_upcall_info *);
+static int queue_userspace_packet(int dp_ifindex, struct sk_buff *,
+				  const struct dp_upcall_info *);
+
+/* Must be called with rcu_read_lock, genl_mutex, or RTNL lock. */
+static struct datapath *get_dp(int dp_ifindex)
+{
+	struct datapath *dp = NULL;
+	struct net_device *dev;
+
+	rcu_read_lock();
+	dev = dev_get_by_index_rcu(&init_net, dp_ifindex);
+	if (dev) {
+		struct vport *vport = ovs_internal_dev_get_vport(dev);
+		if (vport)
+			dp = vport->dp;
+	}
+	rcu_read_unlock();
+
+	return dp;
+}
+
+/* Must be called with rcu_read_lock or RTNL lock. */
+const char *ovs_dp_name(const struct datapath *dp)
+{
+	struct vport *vport = rcu_dereference_rtnl(dp->ports[OVSP_LOCAL]);
+	return vport->ops->get_name(vport);
+}
+
+static int get_dpifindex(struct datapath *dp)
+{
+	struct vport *local;
+	int ifindex;
+
+	rcu_read_lock();
+
+	local = rcu_dereference(dp->ports[OVSP_LOCAL]);
+	if (local)
+		ifindex = local->ops->get_ifindex(local);
+	else
+		ifindex = 0;
+
+	rcu_read_unlock();
+
+	return ifindex;
+}
+
+static void destroy_dp_rcu(struct rcu_head *rcu)
+{
+	struct datapath *dp = container_of(rcu, struct datapath, rcu);
+
+	ovs_flow_tbl_destroy((__force struct flow_table *)dp->table);
+	free_percpu(dp->stats_percpu);
+	kfree(dp);
+}
+
+/* Called with RTNL lock and genl_lock. */
+static struct vport *new_vport(const struct vport_parms *parms)
+{
+	struct vport *vport;
+
+	vport = ovs_vport_add(parms);
+	if (!IS_ERR(vport)) {
+		struct datapath *dp = parms->dp;
+
+		rcu_assign_pointer(dp->ports[parms->port_no], vport);
+		list_add(&vport->node, &dp->port_list);
+	}
+
+	return vport;
+}
+
+/* Called with RTNL lock. */
+void ovs_dp_detach_port(struct vport *p)
+{
+	ASSERT_RTNL();
+
+	/* First drop references to device. */
+	list_del(&p->node);
+	rcu_assign_pointer(p->dp->ports[p->port_no], NULL);
+
+	/* Then destroy it. */
+	ovs_vport_del(p);
+}
+
+/* Must be called with rcu_read_lock. */
+void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb)
+{
+	struct datapath *dp = p->dp;
+	struct sw_flow *flow;
+	struct dp_stats_percpu *stats;
+	struct sw_flow_key key;
+	u64 *stats_counter;
+	int error;
+	int key_len;
+
+	stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
+
+	/* Extract flow from 'skb' into 'key'. */
+	error = ovs_flow_extract(skb, p->port_no, &key, &key_len);
+	if (unlikely(error)) {
+		kfree_skb(skb);
+		return;
+	}
+
+	/* Look up flow. */
+	flow = ovs_flow_tbl_lookup(rcu_dereference(dp->table), &key, key_len);
+	if (unlikely(!flow)) {
+		struct dp_upcall_info upcall;
+
+		upcall.cmd = OVS_PACKET_CMD_MISS;
+		upcall.key = &key;
+		upcall.userdata = NULL;
+		upcall.pid = p->upcall_pid;
+		ovs_dp_upcall(dp, skb, &upcall);
+		consume_skb(skb);
+		stats_counter = &stats->n_missed;
+		goto out;
+	}
+
+	OVS_CB(skb)->flow = flow;
+
+	stats_counter = &stats->n_hit;
+	ovs_flow_used(OVS_CB(skb)->flow, skb);
+	ovs_execute_actions(dp, skb);
+
+out:
+	/* Update datapath statistics. */
+	u64_stats_update_begin(&stats->sync);
+	(*stats_counter)++;
+	u64_stats_update_end(&stats->sync);
+}
+
+static struct genl_family dp_packet_genl_family = {
+	.id = GENL_ID_GENERATE,
+	.hdrsize = sizeof(struct ovs_header),
+	.name = OVS_PACKET_FAMILY,
+	.version = OVS_PACKET_VERSION,
+	.maxattr = OVS_PACKET_ATTR_MAX
+};
+
+int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
+	      const struct dp_upcall_info *upcall_info)
+{
+	struct dp_stats_percpu *stats;
+	int dp_ifindex;
+	int err;
+
+	if (upcall_info->pid == 0) {
+		err = -ENOTCONN;
+		goto err;
+	}
+
+	dp_ifindex = get_dpifindex(dp);
+	if (!dp_ifindex) {
+		err = -ENODEV;
+		goto err;
+	}
+
+	if (!skb_is_gso(skb))
+		err = queue_userspace_packet(dp_ifindex, skb, upcall_info);
+	else
+		err = queue_gso_packets(dp_ifindex, skb, upcall_info);
+	if (err)
+		goto err;
+
+	return 0;
+
+err:
+	stats = per_cpu_ptr(dp->stats_percpu, smp_processor_id());
+
+	u64_stats_update_begin(&stats->sync);
+	stats->n_lost++;
+	u64_stats_update_end(&stats->sync);
+
+	return err;
+}
+
+static int queue_gso_packets(int dp_ifindex, struct sk_buff *skb,
+			     const struct dp_upcall_info *upcall_info)
+{
+	struct dp_upcall_info later_info;
+	struct sw_flow_key later_key;
+	struct sk_buff *segs, *nskb;
+	int err;
+
+	segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	/* Queue all of the segments. */
+	skb = segs;
+	do {
+		err = queue_userspace_packet(dp_ifindex, skb, upcall_info);
+		if (err)
+			break;
+
+		if (skb == segs && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) {
+			/* The initial flow key extracted by ovs_flow_extract()
+			 * in this case is for a first fragment, so we need to
+			 * properly mark later fragments.
+			 */
+			later_key = *upcall_info->key;
+			later_key.ip.frag = OVS_FRAG_TYPE_LATER;
+
+			later_info = *upcall_info;
+			later_info.key = &later_key;
+			upcall_info = &later_info;
+		}
+	} while ((skb = skb->next));
+
+	/* Free all of the segments. */
+	skb = segs;
+	do {
+		nskb = skb->next;
+		if (err)
+			kfree_skb(skb);
+		else
+			consume_skb(skb);
+	} while ((skb = nskb));
+	return err;
+}
+
+static int queue_userspace_packet(int dp_ifindex, struct sk_buff *skb,
+				  const struct dp_upcall_info *upcall_info)
+{
+	struct ovs_header *upcall;
+	struct sk_buff *nskb = NULL;
+	struct sk_buff *user_skb; /* to be queued to userspace */
+	struct nlattr *nla;
+	unsigned int len;
+	int err;
+
+	if (vlan_tx_tag_present(skb)) {
+		nskb = skb_clone(skb, GFP_ATOMIC);
+		if (!nskb)
+			return -ENOMEM;
+
+		nskb = __vlan_put_tag(nskb, vlan_tx_tag_get(nskb));
+		if (!skb)
+			return -ENOMEM;
+
+		nskb->vlan_tci = 0;
+		skb = nskb;
+	}
+
+	if (nla_attr_size(skb->len) > USHRT_MAX) {
+		err = -EFBIG;
+		goto out;
+	}
+
+	len = sizeof(struct ovs_header);
+	len += nla_total_size(skb->len);
+	len += nla_total_size(FLOW_BUFSIZE);
+	if (upcall_info->cmd == OVS_PACKET_CMD_ACTION)
+		len += nla_total_size(8);
+
+	user_skb = genlmsg_new(len, GFP_ATOMIC);
+	if (!user_skb) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
+			     0, upcall_info->cmd);
+	upcall->dp_ifindex = dp_ifindex;
+
+	nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_KEY);
+	ovs_flow_to_nlattrs(upcall_info->key, user_skb);
+	nla_nest_end(user_skb, nla);
+
+	if (upcall_info->userdata)
+		nla_put_u64(user_skb, OVS_PACKET_ATTR_USERDATA,
+			    nla_get_u64(upcall_info->userdata));
+
+	nla = __nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, skb->len);
+
+	skb_copy_and_csum_dev(skb, nla_data(nla));
+
+	err = genlmsg_unicast(&init_net, user_skb, upcall_info->pid);
+
+out:
+	kfree_skb(nskb);
+	return err;
+}
+
+/* Called with genl_mutex. */
+static int flush_flows(int dp_ifindex)
+{
+	struct flow_table *old_table;
+	struct flow_table *new_table;
+	struct datapath *dp;
+
+	dp = get_dp(dp_ifindex);
+	if (!dp)
+		return -ENODEV;
+
+	old_table = genl_dereference(dp->table);
+	new_table = ovs_flow_tbl_alloc(TBL_MIN_BUCKETS);
+	if (!new_table)
+		return -ENOMEM;
+
+	rcu_assign_pointer(dp->table, new_table);
+
+	ovs_flow_tbl_deferred_destroy(old_table);
+	return 0;
+}
+
+static int validate_actions(const struct nlattr *attr,
+				const struct sw_flow_key *key, int depth);
+
+static int validate_sample(const struct nlattr *attr,
+				const struct sw_flow_key *key, int depth)
+{
+	const struct nlattr *attrs[OVS_SAMPLE_ATTR_MAX + 1];
+	const struct nlattr *probability, *actions;
+	const struct nlattr *a;
+	int rem;
+
+	memset(attrs, 0, sizeof(attrs));
+	nla_for_each_nested(a, attr, rem) {
+		int type = nla_type(a);
+		if (!type || type > OVS_SAMPLE_ATTR_MAX || attrs[type])
+			return -EINVAL;
+		attrs[type] = a;
+	}
+	if (rem)
+		return -EINVAL;
+
+	probability = attrs[OVS_SAMPLE_ATTR_PROBABILITY];
+	if (!probability || nla_len(probability) != sizeof(u32))
+		return -EINVAL;
+
+	actions = attrs[OVS_SAMPLE_ATTR_ACTIONS];
+	if (!actions || (nla_len(actions) && nla_len(actions) < NLA_HDRLEN))
+		return -EINVAL;
+	return validate_actions(actions, key, depth + 1);
+}
+
+static int validate_set(const struct nlattr *a,
+			const struct sw_flow_key *flow_key)
+{
+	const struct nlattr *ovs_key = nla_data(a);
+	int key_type = nla_type(ovs_key);
+
+	/* There can be only one key in a action */
+	if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
+		return -EINVAL;
+
+	if (key_type > OVS_KEY_ATTR_MAX ||
+	    nla_len(ovs_key) != ovs_key_lens[key_type])
+		return -EINVAL;
+
+	switch (key_type) {
+	const struct ovs_key_ipv4 *ipv4_key;
+
+	case OVS_KEY_ATTR_PRIORITY:
+	case OVS_KEY_ATTR_ETHERNET:
+		break;
+
+	case OVS_KEY_ATTR_IPV4:
+		if (flow_key->eth.type != htons(ETH_P_IP))
+			return -EINVAL;
+
+		if (!flow_key->ipv4.addr.src || !flow_key->ipv4.addr.dst)
+			return -EINVAL;
+
+		ipv4_key = nla_data(ovs_key);
+		if (ipv4_key->ipv4_proto != flow_key->ip.proto)
+			return -EINVAL;
+
+		if (ipv4_key->ipv4_frag != flow_key->ip.frag)
+			return -EINVAL;
+
+		break;
+
+	case OVS_KEY_ATTR_TCP:
+		if (flow_key->ip.proto != IPPROTO_TCP)
+			return -EINVAL;
+
+		if (!flow_key->ipv4.tp.src || !flow_key->ipv4.tp.dst)
+			return -EINVAL;
+
+		break;
+
+	case OVS_KEY_ATTR_UDP:
+		if (flow_key->ip.proto != IPPROTO_UDP)
+			return -EINVAL;
+
+		if (!flow_key->ipv4.tp.src || !flow_key->ipv4.tp.dst)
+			return -EINVAL;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int validate_userspace(const struct nlattr *attr)
+{
+	static const struct nla_policy userspace_policy[OVS_USERSPACE_ATTR_MAX + 1] =	{
+		[OVS_USERSPACE_ATTR_PID] = {.type = NLA_U32 },
+		[OVS_USERSPACE_ATTR_USERDATA] = {.type = NLA_U64 },
+	};
+	struct nlattr *a[OVS_USERSPACE_ATTR_MAX + 1];
+	int error;
+
+	error = nla_parse_nested(a, OVS_USERSPACE_ATTR_MAX,
+				 attr, userspace_policy);
+	if (error)
+		return error;
+
+	if (!a[OVS_USERSPACE_ATTR_PID] ||
+	    !nla_get_u32(a[OVS_USERSPACE_ATTR_PID]))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int validate_actions(const struct nlattr *attr,
+				const struct sw_flow_key *key,  int depth)
+{
+	const struct nlattr *a;
+	int rem, err;
+
+	if (depth >= SAMPLE_ACTION_DEPTH)
+		return -EOVERFLOW;
+
+	nla_for_each_nested(a, attr, rem) {
+		/* Expected argument lengths, (u32)-1 for variable length. */
+		static const u32 action_lens[OVS_ACTION_ATTR_MAX + 1] = {
+			[OVS_ACTION_ATTR_OUTPUT] = sizeof(u32),
+			[OVS_ACTION_ATTR_USERSPACE] = (u32)-1,
+			[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
+			[OVS_ACTION_ATTR_POP_VLAN] = 0,
+			[OVS_ACTION_ATTR_SET] = (u32)-1,
+			[OVS_ACTION_ATTR_SAMPLE] = (u32)-1
+		};
+		const struct ovs_action_push_vlan *vlan;
+		int type = nla_type(a);
+
+		if (type > OVS_ACTION_ATTR_MAX ||
+		    (action_lens[type] != nla_len(a) &&
+		     action_lens[type] != (u32)-1))
+			return -EINVAL;
+
+		switch (type) {
+		case OVS_ACTION_ATTR_UNSPEC:
+			return -EINVAL;
+
+		case OVS_ACTION_ATTR_USERSPACE:
+			err = validate_userspace(a);
+			if (err)
+				return err;
+			break;
+
+		case OVS_ACTION_ATTR_OUTPUT:
+			if (nla_get_u32(a) >= DP_MAX_PORTS)
+				return -EINVAL;
+			break;
+
+
+		case OVS_ACTION_ATTR_POP_VLAN:
+			break;
+
+		case OVS_ACTION_ATTR_PUSH_VLAN:
+			vlan = nla_data(a);
+			if (vlan->vlan_tpid != htons(ETH_P_8021Q))
+				return -EINVAL;
+			if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT)))
+				return -EINVAL;
+			break;
+
+		case OVS_ACTION_ATTR_SET:
+			err = validate_set(a, key);
+			if (err)
+				return err;
+			break;
+
+		case OVS_ACTION_ATTR_SAMPLE:
+			err = validate_sample(a, key, depth);
+			if (err)
+				return err;
+			break;
+
+		default:
+			return -EINVAL;
+		}
+	}
+
+	if (rem > 0)
+		return -EINVAL;
+
+	return 0;
+}
+
+static void clear_stats(struct sw_flow *flow)
+{
+	flow->used = 0;
+	flow->tcp_flags = 0;
+	flow->packet_count = 0;
+	flow->byte_count = 0;
+}
+
+static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
+{
+	struct ovs_header *ovs_header = info->userhdr;
+	struct nlattr **a = info->attrs;
+	struct sw_flow_actions *acts;
+	struct sk_buff *packet;
+	struct sw_flow *flow;
+	struct datapath *dp;
+	struct ethhdr *eth;
+	int len;
+	int err;
+	int key_len;
+
+	err = -EINVAL;
+	if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
+	    !a[OVS_PACKET_ATTR_ACTIONS] ||
+	    nla_len(a[OVS_PACKET_ATTR_PACKET]) < ETH_HLEN)
+		goto err;
+
+	len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
+	packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
+	err = -ENOMEM;
+	if (!packet)
+		goto err;
+	skb_reserve(packet, NET_IP_ALIGN);
+
+	memcpy(__skb_put(packet, len), nla_data(a[OVS_PACKET_ATTR_PACKET]), len);
+
+	skb_reset_mac_header(packet);
+	eth = eth_hdr(packet);
+
+	/* Normally, setting the skb 'protocol' field would be handled by a
+	 * call to eth_type_trans(), but it assumes there's a sending
+	 * device, which we may not have. */
+	if (ntohs(eth->h_proto) >= 1536)
+		packet->protocol = eth->h_proto;
+	else
+		packet->protocol = htons(ETH_P_802_2);
+
+	/* Build an sw_flow for sending this packet. */
+	flow = ovs_flow_alloc();
+	err = PTR_ERR(flow);
+	if (IS_ERR(flow))
+		goto err_kfree_skb;
+
+	err = ovs_flow_extract(packet, -1, &flow->key, &key_len);
+	if (err)
+		goto err_flow_free;
+
+	err = ovs_flow_metadata_from_nlattrs(&flow->key.phy.priority,
+					     &flow->key.phy.in_port,
+					     a[OVS_PACKET_ATTR_KEY]);
+	if (err)
+		goto err_flow_free;
+
+	err = validate_actions(a[OVS_PACKET_ATTR_ACTIONS], &flow->key, 0);
+	if (err)
+		goto err_flow_free;
+
+	flow->hash = ovs_flow_hash(&flow->key, key_len);
+
+	acts = ovs_flow_actions_alloc(a[OVS_PACKET_ATTR_ACTIONS]);
+	err = PTR_ERR(acts);
+	if (IS_ERR(acts))
+		goto err_flow_free;
+	rcu_assign_pointer(flow->sf_acts, acts);
+
+	OVS_CB(packet)->flow = flow;
+	packet->priority = flow->key.phy.priority;
+
+	rcu_read_lock();
+	dp = get_dp(ovs_header->dp_ifindex);
+	err = -ENODEV;
+	if (!dp)
+		goto err_unlock;
+
+	local_bh_disable();
+	err = ovs_execute_actions(dp, packet);
+	local_bh_enable();
+	rcu_read_unlock();
+
+	ovs_flow_free(flow);
+	return err;
+
+err_unlock:
+	rcu_read_unlock();
+err_flow_free:
+	ovs_flow_free(flow);
+err_kfree_skb:
+	kfree_skb(packet);
+err:
+	return err;
+}
+
+static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
+	[OVS_PACKET_ATTR_PACKET] = { .type = NLA_UNSPEC },
+	[OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
+	[OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
+};
+
+static struct genl_ops dp_packet_genl_ops[] = {
+	{ .cmd = OVS_PACKET_CMD_EXECUTE,
+	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .policy = packet_policy,
+	  .doit = ovs_packet_cmd_execute
+	}
+};
+
+static void get_dp_stats(struct datapath *dp, struct ovs_dp_stats *stats)
+{
+	int i;
+	struct flow_table *table = genl_dereference(dp->table);
+
+	stats->n_flows = ovs_flow_tbl_count(table);
+
+	stats->n_hit = stats->n_missed = stats->n_lost = 0;
+	for_each_possible_cpu(i) {
+		const struct dp_stats_percpu *percpu_stats;
+		struct dp_stats_percpu local_stats;
+		unsigned int start;
+
+		percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
+
+		do {
+			start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
+			local_stats = *percpu_stats;
+		} while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
+
+		stats->n_hit += local_stats.n_hit;
+		stats->n_missed += local_stats.n_missed;
+		stats->n_lost += local_stats.n_lost;
+	}
+}
+
+static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
+	[OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
+	[OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
+	[OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
+};
+
+static struct genl_family dp_flow_genl_family = {
+	.id = GENL_ID_GENERATE,
+	.hdrsize = sizeof(struct ovs_header),
+	.name = OVS_FLOW_FAMILY,
+	.version = OVS_FLOW_VERSION,
+	.maxattr = OVS_FLOW_ATTR_MAX
+};
+
+static struct genl_multicast_group ovs_dp_flow_multicast_group = {
+	.name = OVS_FLOW_MCGROUP
+};
+
+/* Called with genl_lock. */
+static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp,
+				  struct sk_buff *skb, u32 pid,
+				  u32 seq, u32 flags, u8 cmd)
+{
+	const int skb_orig_len = skb->len;
+	const struct sw_flow_actions *sf_acts;
+	struct ovs_flow_stats stats;
+	struct ovs_header *ovs_header;
+	struct nlattr *nla;
+	unsigned long used;
+	u8 tcp_flags;
+	int err;
+
+	sf_acts = rcu_dereference_protected(flow->sf_acts,
+					    lockdep_genl_is_held());
+
+	ovs_header = genlmsg_put(skb, pid, seq, &dp_flow_genl_family, flags, cmd);
+	if (!ovs_header)
+		return -EMSGSIZE;
+
+	ovs_header->dp_ifindex = get_dpifindex(dp);
+
+	nla = nla_nest_start(skb, OVS_FLOW_ATTR_KEY);
+	if (!nla)
+		goto nla_put_failure;
+	err = ovs_flow_to_nlattrs(&flow->key, skb);
+	if (err)
+		goto error;
+	nla_nest_end(skb, nla);
+
+	spin_lock_bh(&flow->lock);
+	used = flow->used;
+	stats.n_packets = flow->packet_count;
+	stats.n_bytes = flow->byte_count;
+	tcp_flags = flow->tcp_flags;
+	spin_unlock_bh(&flow->lock);
+
+	if (used)
+		NLA_PUT_U64(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used));
+
+	if (stats.n_packets)
+		NLA_PUT(skb, OVS_FLOW_ATTR_STATS,
+			sizeof(struct ovs_flow_stats), &stats);
+
+	if (tcp_flags)
+		NLA_PUT_U8(skb, OVS_FLOW_ATTR_TCP_FLAGS, tcp_flags);
+
+	/* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
+	 * this is the first flow to be dumped into 'skb'.  This is unusual for
+	 * Netlink but individual action lists can be longer than
+	 * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
+	 * The userspace caller can always fetch the actions separately if it
+	 * really wants them.  (Most userspace callers in fact don't care.)
+	 *
+	 * This can only fail for dump operations because the skb is always
+	 * properly sized for single flows.
+	 */
+	err = nla_put(skb, OVS_FLOW_ATTR_ACTIONS, sf_acts->actions_len,
+		      sf_acts->actions);
+	if (err < 0 && skb_orig_len)
+		goto error;
+
+	return genlmsg_end(skb, ovs_header);
+
+nla_put_failure:
+	err = -EMSGSIZE;
+error:
+	genlmsg_cancel(skb, ovs_header);
+	return err;
+}
+
+static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow)
+{
+	const struct sw_flow_actions *sf_acts;
+	int len;
+
+	sf_acts = rcu_dereference_protected(flow->sf_acts,
+					    lockdep_genl_is_held());
+
+	/* OVS_FLOW_ATTR_KEY */
+	len = nla_total_size(FLOW_BUFSIZE);
+	/* OVS_FLOW_ATTR_ACTIONS */
+	len += nla_total_size(sf_acts->actions_len);
+	/* OVS_FLOW_ATTR_STATS */
+	len += nla_total_size(sizeof(struct ovs_flow_stats));
+	/* OVS_FLOW_ATTR_TCP_FLAGS */
+	len += nla_total_size(1);
+	/* OVS_FLOW_ATTR_USED */
+	len += nla_total_size(8);
+
+	len += NLMSG_ALIGN(sizeof(struct ovs_header));
+
+	return genlmsg_new(len, GFP_KERNEL);
+}
+
+static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow,
+					       struct datapath *dp,
+					       u32 pid, u32 seq, u8 cmd)
+{
+	struct sk_buff *skb;
+	int retval;
+
+	skb = ovs_flow_cmd_alloc_info(flow);
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	retval = ovs_flow_cmd_fill_info(flow, dp, skb, pid, seq, 0, cmd);
+	BUG_ON(retval < 0);
+	return skb;
+}
+
+static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct ovs_header *ovs_header = info->userhdr;
+	struct sw_flow_key key;
+	struct sw_flow *flow;
+	struct sk_buff *reply;
+	struct datapath *dp;
+	struct flow_table *table;
+	int error;
+	int key_len;
+
+	/* Extract key. */
+	error = -EINVAL;
+	if (!a[OVS_FLOW_ATTR_KEY])
+		goto error;
+	error = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
+	if (error)
+		goto error;
+
+	/* Validate actions. */
+	if (a[OVS_FLOW_ATTR_ACTIONS]) {
+		error = validate_actions(a[OVS_FLOW_ATTR_ACTIONS], &key,  0);
+		if (error)
+			goto error;
+	} else if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW) {
+		error = -EINVAL;
+		goto error;
+	}
+
+	dp = get_dp(ovs_header->dp_ifindex);
+	error = -ENODEV;
+	if (!dp)
+		goto error;
+
+	table = genl_dereference(dp->table);
+	flow = ovs_flow_tbl_lookup(table, &key, key_len);
+	if (!flow) {
+		struct sw_flow_actions *acts;
+
+		/* Bail out if we're not allowed to create a new flow. */
+		error = -ENOENT;
+		if (info->genlhdr->cmd == OVS_FLOW_CMD_SET)
+			goto error;
+
+		/* Expand table, if necessary, to make room. */
+		if (ovs_flow_tbl_need_to_expand(table)) {
+			struct flow_table *new_table;
+
+			new_table = ovs_flow_tbl_expand(table);
+			if (!IS_ERR(new_table)) {
+				rcu_assign_pointer(dp->table, new_table);
+				ovs_flow_tbl_deferred_destroy(table);
+				table = genl_dereference(dp->table);
+			}
+		}
+
+		/* Allocate flow. */
+		flow = ovs_flow_alloc();
+		if (IS_ERR(flow)) {
+			error = PTR_ERR(flow);
+			goto error;
+		}
+		flow->key = key;
+		clear_stats(flow);
+
+		/* Obtain actions. */
+		acts = ovs_flow_actions_alloc(a[OVS_FLOW_ATTR_ACTIONS]);
+		error = PTR_ERR(acts);
+		if (IS_ERR(acts))
+			goto error_free_flow;
+		rcu_assign_pointer(flow->sf_acts, acts);
+
+		/* Put flow in bucket. */
+		flow->hash = ovs_flow_hash(&key, key_len);
+		ovs_flow_tbl_insert(table, flow);
+
+		reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
+						info->snd_seq,
+						OVS_FLOW_CMD_NEW);
+	} else {
+		/* We found a matching flow. */
+		struct sw_flow_actions *old_acts;
+		struct nlattr *acts_attrs;
+
+		/* Bail out if we're not allowed to modify an existing flow.
+		 * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
+		 * because Generic Netlink treats the latter as a dump
+		 * request.  We also accept NLM_F_EXCL in case that bug ever
+		 * gets fixed.
+		 */
+		error = -EEXIST;
+		if (info->genlhdr->cmd == OVS_FLOW_CMD_NEW &&
+		    info->nlhdr->nlmsg_flags & (NLM_F_CREATE | NLM_F_EXCL))
+			goto error;
+
+		/* Update actions. */
+		old_acts = rcu_dereference_protected(flow->sf_acts,
+						     lockdep_genl_is_held());
+		acts_attrs = a[OVS_FLOW_ATTR_ACTIONS];
+		if (acts_attrs &&
+		   (old_acts->actions_len != nla_len(acts_attrs) ||
+		   memcmp(old_acts->actions, nla_data(acts_attrs),
+			  old_acts->actions_len))) {
+			struct sw_flow_actions *new_acts;
+
+			new_acts = ovs_flow_actions_alloc(acts_attrs);
+			error = PTR_ERR(new_acts);
+			if (IS_ERR(new_acts))
+				goto error;
+
+			rcu_assign_pointer(flow->sf_acts, new_acts);
+			ovs_flow_deferred_free_acts(old_acts);
+		}
+
+		reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
+					       info->snd_seq, OVS_FLOW_CMD_NEW);
+
+		/* Clear stats. */
+		if (a[OVS_FLOW_ATTR_CLEAR]) {
+			spin_lock_bh(&flow->lock);
+			clear_stats(flow);
+			spin_unlock_bh(&flow->lock);
+		}
+	}
+
+	if (!IS_ERR(reply))
+		genl_notify(reply, genl_info_net(info), info->snd_pid,
+			   ovs_dp_flow_multicast_group.id, info->nlhdr,
+			   GFP_KERNEL);
+	else
+		netlink_set_err(init_net.genl_sock, 0,
+				ovs_dp_flow_multicast_group.id, PTR_ERR(reply));
+	return 0;
+
+error_free_flow:
+	ovs_flow_free(flow);
+error:
+	return error;
+}
+
+static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct ovs_header *ovs_header = info->userhdr;
+	struct sw_flow_key key;
+	struct sk_buff *reply;
+	struct sw_flow *flow;
+	struct datapath *dp;
+	struct flow_table *table;
+	int err;
+	int key_len;
+
+	if (!a[OVS_FLOW_ATTR_KEY])
+		return -EINVAL;
+	err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
+	if (err)
+		return err;
+
+	dp = get_dp(ovs_header->dp_ifindex);
+	if (!dp)
+		return -ENODEV;
+
+	table = genl_dereference(dp->table);
+	flow = ovs_flow_tbl_lookup(table, &key, key_len);
+	if (!flow)
+		return -ENOENT;
+
+	reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid,
+					info->snd_seq, OVS_FLOW_CMD_NEW);
+	if (IS_ERR(reply))
+		return PTR_ERR(reply);
+
+	return genlmsg_reply(reply, info);
+}
+
+static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct ovs_header *ovs_header = info->userhdr;
+	struct sw_flow_key key;
+	struct sk_buff *reply;
+	struct sw_flow *flow;
+	struct datapath *dp;
+	struct flow_table *table;
+	int err;
+	int key_len;
+
+	if (!a[OVS_FLOW_ATTR_KEY])
+		return flush_flows(ovs_header->dp_ifindex);
+	err = ovs_flow_from_nlattrs(&key, &key_len, a[OVS_FLOW_ATTR_KEY]);
+	if (err)
+		return err;
+
+	dp = get_dp(ovs_header->dp_ifindex);
+	if (!dp)
+		return -ENODEV;
+
+	table = genl_dereference(dp->table);
+	flow = ovs_flow_tbl_lookup(table, &key, key_len);
+	if (!flow)
+		return -ENOENT;
+
+	reply = ovs_flow_cmd_alloc_info(flow);
+	if (!reply)
+		return -ENOMEM;
+
+	ovs_flow_tbl_remove(table, flow);
+
+	err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_pid,
+				     info->snd_seq, 0, OVS_FLOW_CMD_DEL);
+	BUG_ON(err < 0);
+
+	ovs_flow_deferred_free(flow);
+
+	genl_notify(reply, genl_info_net(info), info->snd_pid,
+		    ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL);
+	return 0;
+}
+
+static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
+	struct datapath *dp;
+	struct flow_table *table;
+
+	dp = get_dp(ovs_header->dp_ifindex);
+	if (!dp)
+		return -ENODEV;
+
+	table = genl_dereference(dp->table);
+
+	for (;;) {
+		struct sw_flow *flow;
+		u32 bucket, obj;
+
+		bucket = cb->args[0];
+		obj = cb->args[1];
+		flow = ovs_flow_tbl_next(table, &bucket, &obj);
+		if (!flow)
+			break;
+
+		if (ovs_flow_cmd_fill_info(flow, dp, skb,
+					   NETLINK_CB(cb->skb).pid,
+					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
+					   OVS_FLOW_CMD_NEW) < 0)
+			break;
+
+		cb->args[0] = bucket;
+		cb->args[1] = obj;
+	}
+	return skb->len;
+}
+
+static struct genl_ops dp_flow_genl_ops[] = {
+	{ .cmd = OVS_FLOW_CMD_NEW,
+	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .policy = flow_policy,
+	  .doit = ovs_flow_cmd_new_or_set
+	},
+	{ .cmd = OVS_FLOW_CMD_DEL,
+	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .policy = flow_policy,
+	  .doit = ovs_flow_cmd_del
+	},
+	{ .cmd = OVS_FLOW_CMD_GET,
+	  .flags = 0,		    /* OK for unprivileged users. */
+	  .policy = flow_policy,
+	  .doit = ovs_flow_cmd_get,
+	  .dumpit = ovs_flow_cmd_dump
+	},
+	{ .cmd = OVS_FLOW_CMD_SET,
+	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .policy = flow_policy,
+	  .doit = ovs_flow_cmd_new_or_set,
+	},
+};
+
+static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
+	[OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
+	[OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
+};
+
+static struct genl_family dp_datapath_genl_family = {
+	.id = GENL_ID_GENERATE,
+	.hdrsize = sizeof(struct ovs_header),
+	.name = OVS_DATAPATH_FAMILY,
+	.version = OVS_DATAPATH_VERSION,
+	.maxattr = OVS_DP_ATTR_MAX
+};
+
+static struct genl_multicast_group ovs_dp_datapath_multicast_group = {
+	.name = OVS_DATAPATH_MCGROUP
+};
+
+static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
+				u32 pid, u32 seq, u32 flags, u8 cmd)
+{
+	struct ovs_header *ovs_header;
+	struct ovs_dp_stats dp_stats;
+	int err;
+
+	ovs_header = genlmsg_put(skb, pid, seq, &dp_datapath_genl_family,
+				   flags, cmd);
+	if (!ovs_header)
+		goto error;
+
+	ovs_header->dp_ifindex = get_dpifindex(dp);
+
+	rcu_read_lock();
+	err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
+	rcu_read_unlock();
+	if (err)
+		goto nla_put_failure;
+
+	get_dp_stats(dp, &dp_stats);
+	NLA_PUT(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats), &dp_stats);
+
+	return genlmsg_end(skb, ovs_header);
+
+nla_put_failure:
+	genlmsg_cancel(skb, ovs_header);
+error:
+	return -EMSGSIZE;
+}
+
+static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 pid,
+					     u32 seq, u8 cmd)
+{
+	struct sk_buff *skb;
+	int retval;
+
+	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	retval = ovs_dp_cmd_fill_info(dp, skb, pid, seq, 0, cmd);
+	if (retval < 0) {
+		kfree_skb(skb);
+		return ERR_PTR(retval);
+	}
+	return skb;
+}
+
+/* Called with genl_mutex and optionally with RTNL lock also. */
+static struct datapath *lookup_datapath(struct ovs_header *ovs_header,
+					struct nlattr *a[OVS_DP_ATTR_MAX + 1])
+{
+	struct datapath *dp;
+
+	if (!a[OVS_DP_ATTR_NAME])
+		dp = get_dp(ovs_header->dp_ifindex);
+	else {
+		struct vport *vport;
+
+		rcu_read_lock();
+		vport = ovs_vport_locate(nla_data(a[OVS_DP_ATTR_NAME]));
+		dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
+		rcu_read_unlock();
+	}
+	return dp ? dp : ERR_PTR(-ENODEV);
+}
+
+static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct vport_parms parms;
+	struct sk_buff *reply;
+	struct datapath *dp;
+	struct vport *vport;
+	int err;
+
+	err = -EINVAL;
+	if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
+		goto err;
+
+	rtnl_lock();
+	err = -ENODEV;
+	if (!try_module_get(THIS_MODULE))
+		goto err_unlock_rtnl;
+
+	err = -ENOMEM;
+	dp = kzalloc(sizeof(*dp), GFP_KERNEL);
+	if (dp == NULL)
+		goto err_put_module;
+	INIT_LIST_HEAD(&dp->port_list);
+
+	/* Allocate table. */
+	err = -ENOMEM;
+	rcu_assign_pointer(dp->table, ovs_flow_tbl_alloc(TBL_MIN_BUCKETS));
+	if (!dp->table)
+		goto err_free_dp;
+
+	dp->stats_percpu = alloc_percpu(struct dp_stats_percpu);
+	if (!dp->stats_percpu) {
+		err = -ENOMEM;
+		goto err_destroy_table;
+	}
+
+	/* Set up our datapath device. */
+	parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
+	parms.type = OVS_VPORT_TYPE_INTERNAL;
+	parms.options = NULL;
+	parms.dp = dp;
+	parms.port_no = OVSP_LOCAL;
+	parms.upcall_pid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]);
+
+	vport = new_vport(&parms);
+	if (IS_ERR(vport)) {
+		err = PTR_ERR(vport);
+		if (err == -EBUSY)
+			err = -EEXIST;
+
+		goto err_destroy_percpu;
+	}
+
+	reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
+				      info->snd_seq, OVS_DP_CMD_NEW);
+	err = PTR_ERR(reply);
+	if (IS_ERR(reply))
+		goto err_destroy_local_port;
+
+	list_add_tail(&dp->list_node, &dps);
+	rtnl_unlock();
+
+	genl_notify(reply, genl_info_net(info), info->snd_pid,
+		    ovs_dp_datapath_multicast_group.id, info->nlhdr,
+		    GFP_KERNEL);
+	return 0;
+
+err_destroy_local_port:
+	ovs_dp_detach_port(rtnl_dereference(dp->ports[OVSP_LOCAL]));
+err_destroy_percpu:
+	free_percpu(dp->stats_percpu);
+err_destroy_table:
+	ovs_flow_tbl_destroy(genl_dereference(dp->table));
+err_free_dp:
+	kfree(dp);
+err_put_module:
+	module_put(THIS_MODULE);
+err_unlock_rtnl:
+	rtnl_unlock();
+err:
+	return err;
+}
+
+static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
+{
+	struct vport *vport, *next_vport;
+	struct sk_buff *reply;
+	struct datapath *dp;
+	int err;
+
+	rtnl_lock();
+	dp = lookup_datapath(info->userhdr, info->attrs);
+	err = PTR_ERR(dp);
+	if (IS_ERR(dp))
+		goto exit_unlock;
+
+	reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
+				      info->snd_seq, OVS_DP_CMD_DEL);
+	err = PTR_ERR(reply);
+	if (IS_ERR(reply))
+		goto exit_unlock;
+
+	list_for_each_entry_safe(vport, next_vport, &dp->port_list, node)
+		if (vport->port_no != OVSP_LOCAL)
+			ovs_dp_detach_port(vport);
+
+	list_del(&dp->list_node);
+	ovs_dp_detach_port(rtnl_dereference(dp->ports[OVSP_LOCAL]));
+
+	/* rtnl_unlock() will wait until all the references to devices that
+	 * are pending unregistration have been dropped.  We do it here to
+	 * ensure that any internal devices (which contain DP pointers) are
+	 * fully destroyed before freeing the datapath.
+	 */
+	rtnl_unlock();
+
+	call_rcu(&dp->rcu, destroy_dp_rcu);
+	module_put(THIS_MODULE);
+
+	genl_notify(reply, genl_info_net(info), info->snd_pid,
+		    ovs_dp_datapath_multicast_group.id, info->nlhdr,
+		    GFP_KERNEL);
+
+	return 0;
+
+exit_unlock:
+	rtnl_unlock();
+	return err;
+}
+
+static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *reply;
+	struct datapath *dp;
+	int err;
+
+	dp = lookup_datapath(info->userhdr, info->attrs);
+	if (IS_ERR(dp))
+		return PTR_ERR(dp);
+
+	reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
+				      info->snd_seq, OVS_DP_CMD_NEW);
+	if (IS_ERR(reply)) {
+		err = PTR_ERR(reply);
+		netlink_set_err(init_net.genl_sock, 0,
+				ovs_dp_datapath_multicast_group.id, err);
+		return 0;
+	}
+
+	genl_notify(reply, genl_info_net(info), info->snd_pid,
+		    ovs_dp_datapath_multicast_group.id, info->nlhdr,
+		    GFP_KERNEL);
+
+	return 0;
+}
+
+static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *reply;
+	struct datapath *dp;
+
+	dp = lookup_datapath(info->userhdr, info->attrs);
+	if (IS_ERR(dp))
+		return PTR_ERR(dp);
+
+	reply = ovs_dp_cmd_build_info(dp, info->snd_pid,
+				      info->snd_seq, OVS_DP_CMD_NEW);
+	if (IS_ERR(reply))
+		return PTR_ERR(reply);
+
+	return genlmsg_reply(reply, info);
+}
+
+static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct datapath *dp;
+	int skip = cb->args[0];
+	int i = 0;
+
+	list_for_each_entry(dp, &dps, list_node) {
+		if (i < skip)
+			continue;
+		if (ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid,
+					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
+					 OVS_DP_CMD_NEW) < 0)
+			break;
+		i++;
+	}
+
+	cb->args[0] = i;
+
+	return skb->len;
+}
+
+static struct genl_ops dp_datapath_genl_ops[] = {
+	{ .cmd = OVS_DP_CMD_NEW,
+	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .policy = datapath_policy,
+	  .doit = ovs_dp_cmd_new
+	},
+	{ .cmd = OVS_DP_CMD_DEL,
+	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .policy = datapath_policy,
+	  .doit = ovs_dp_cmd_del
+	},
+	{ .cmd = OVS_DP_CMD_GET,
+	  .flags = 0,		    /* OK for unprivileged users. */
+	  .policy = datapath_policy,
+	  .doit = ovs_dp_cmd_get,
+	  .dumpit = ovs_dp_cmd_dump
+	},
+	{ .cmd = OVS_DP_CMD_SET,
+	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .policy = datapath_policy,
+	  .doit = ovs_dp_cmd_set,
+	},
+};
+
+static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
+	[OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
+	[OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
+	[OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
+	[OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
+	[OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 },
+	[OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
+};
+
+static struct genl_family dp_vport_genl_family = {
+	.id = GENL_ID_GENERATE,
+	.hdrsize = sizeof(struct ovs_header),
+	.name = OVS_VPORT_FAMILY,
+	.version = OVS_VPORT_VERSION,
+	.maxattr = OVS_VPORT_ATTR_MAX
+};
+
+struct genl_multicast_group ovs_dp_vport_multicast_group = {
+	.name = OVS_VPORT_MCGROUP
+};
+
+/* Called with RTNL lock or RCU read lock. */
+static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
+				   u32 pid, u32 seq, u32 flags, u8 cmd)
+{
+	struct ovs_header *ovs_header;
+	struct ovs_vport_stats vport_stats;
+	int err;
+
+	ovs_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family,
+				 flags, cmd);
+	if (!ovs_header)
+		return -EMSGSIZE;
+
+	ovs_header->dp_ifindex = get_dpifindex(vport->dp);
+
+	NLA_PUT_U32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no);
+	NLA_PUT_U32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type);
+	NLA_PUT_STRING(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport));
+	NLA_PUT_U32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_pid);
+
+	ovs_vport_get_stats(vport, &vport_stats);
+	NLA_PUT(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats),
+		&vport_stats);
+
+	err = ovs_vport_get_options(vport, skb);
+	if (err == -EMSGSIZE)
+		goto error;
+
+	return genlmsg_end(skb, ovs_header);
+
+nla_put_failure:
+	err = -EMSGSIZE;
+error:
+	genlmsg_cancel(skb, ovs_header);
+	return err;
+}
+
+/* Called with RTNL lock or RCU read lock. */
+struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 pid,
+					 u32 seq, u8 cmd)
+{
+	struct sk_buff *skb;
+	int retval;
+
+	skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	retval = ovs_vport_cmd_fill_info(vport, skb, pid, seq, 0, cmd);
+	if (retval < 0) {
+		kfree_skb(skb);
+		return ERR_PTR(retval);
+	}
+	return skb;
+}
+
+/* Called with RTNL lock or RCU read lock. */
+static struct vport *lookup_vport(struct ovs_header *ovs_header,
+				  struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
+{
+	struct datapath *dp;
+	struct vport *vport;
+
+	if (a[OVS_VPORT_ATTR_NAME]) {
+		vport = ovs_vport_locate(nla_data(a[OVS_VPORT_ATTR_NAME]));
+		if (!vport)
+			return ERR_PTR(-ENODEV);
+		return vport;
+	} else if (a[OVS_VPORT_ATTR_PORT_NO]) {
+		u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
+
+		if (port_no >= DP_MAX_PORTS)
+			return ERR_PTR(-EFBIG);
+
+		dp = get_dp(ovs_header->dp_ifindex);
+		if (!dp)
+			return ERR_PTR(-ENODEV);
+
+		vport = rcu_dereference_rtnl(dp->ports[port_no]);
+		if (!vport)
+			return ERR_PTR(-ENOENT);
+		return vport;
+	} else
+		return ERR_PTR(-EINVAL);
+}
+
+static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct ovs_header *ovs_header = info->userhdr;
+	struct vport_parms parms;
+	struct sk_buff *reply;
+	struct vport *vport;
+	struct datapath *dp;
+	u32 port_no;
+	int err;
+
+	err = -EINVAL;
+	if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
+	    !a[OVS_VPORT_ATTR_UPCALL_PID])
+		goto exit;
+
+	rtnl_lock();
+	dp = get_dp(ovs_header->dp_ifindex);
+	err = -ENODEV;
+	if (!dp)
+		goto exit_unlock;
+
+	if (a[OVS_VPORT_ATTR_PORT_NO]) {
+		port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
+
+		err = -EFBIG;
+		if (port_no >= DP_MAX_PORTS)
+			goto exit_unlock;
+
+		vport = rtnl_dereference(dp->ports[port_no]);
+		err = -EBUSY;
+		if (vport)
+			goto exit_unlock;
+	} else {
+		for (port_no = 1; ; port_no++) {
+			if (port_no >= DP_MAX_PORTS) {
+				err = -EFBIG;
+				goto exit_unlock;
+			}
+			vport = rtnl_dereference(dp->ports[port_no]);
+			if (!vport)
+				break;
+		}
+	}
+
+	parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
+	parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
+	parms.options = a[OVS_VPORT_ATTR_OPTIONS];
+	parms.dp = dp;
+	parms.port_no = port_no;
+	parms.upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
+
+	vport = new_vport(&parms);
+	err = PTR_ERR(vport);
+	if (IS_ERR(vport))
+		goto exit_unlock;
+
+	reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
+					 OVS_VPORT_CMD_NEW);
+	if (IS_ERR(reply)) {
+		err = PTR_ERR(reply);
+		ovs_dp_detach_port(vport);
+		goto exit_unlock;
+	}
+	genl_notify(reply, genl_info_net(info), info->snd_pid,
+		    ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
+
+exit_unlock:
+	rtnl_unlock();
+exit:
+	return err;
+}
+
+static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct sk_buff *reply;
+	struct vport *vport;
+	int err;
+
+	rtnl_lock();
+	vport = lookup_vport(info->userhdr, a);
+	err = PTR_ERR(vport);
+	if (IS_ERR(vport))
+		goto exit_unlock;
+
+	err = 0;
+	if (a[OVS_VPORT_ATTR_TYPE] &&
+	    nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type)
+		err = -EINVAL;
+
+	if (!err && a[OVS_VPORT_ATTR_OPTIONS])
+		err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
+	if (!err && a[OVS_VPORT_ATTR_UPCALL_PID])
+		vport->upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]);
+
+	reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
+					 OVS_VPORT_CMD_NEW);
+	if (IS_ERR(reply)) {
+		err = PTR_ERR(reply);
+		netlink_set_err(init_net.genl_sock, 0,
+				ovs_dp_vport_multicast_group.id, err);
+		return 0;
+	}
+
+	genl_notify(reply, genl_info_net(info), info->snd_pid,
+		    ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
+
+exit_unlock:
+	rtnl_unlock();
+	return err;
+}
+
+static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct sk_buff *reply;
+	struct vport *vport;
+	int err;
+
+	rtnl_lock();
+	vport = lookup_vport(info->userhdr, a);
+	err = PTR_ERR(vport);
+	if (IS_ERR(vport))
+		goto exit_unlock;
+
+	if (vport->port_no == OVSP_LOCAL) {
+		err = -EINVAL;
+		goto exit_unlock;
+	}
+
+	reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
+					 OVS_VPORT_CMD_DEL);
+	err = PTR_ERR(reply);
+	if (IS_ERR(reply))
+		goto exit_unlock;
+
+	ovs_dp_detach_port(vport);
+
+	genl_notify(reply, genl_info_net(info), info->snd_pid,
+		    ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL);
+
+exit_unlock:
+	rtnl_unlock();
+	return err;
+}
+
+static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr **a = info->attrs;
+	struct ovs_header *ovs_header = info->userhdr;
+	struct sk_buff *reply;
+	struct vport *vport;
+	int err;
+
+	rcu_read_lock();
+	vport = lookup_vport(ovs_header, a);
+	err = PTR_ERR(vport);
+	if (IS_ERR(vport))
+		goto exit_unlock;
+
+	reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq,
+					 OVS_VPORT_CMD_NEW);
+	err = PTR_ERR(reply);
+	if (IS_ERR(reply))
+		goto exit_unlock;
+
+	rcu_read_unlock();
+
+	return genlmsg_reply(reply, info);
+
+exit_unlock:
+	rcu_read_unlock();
+	return err;
+}
+
+static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
+	struct datapath *dp;
+	u32 port_no;
+	int retval;
+
+	dp = get_dp(ovs_header->dp_ifindex);
+	if (!dp)
+		return -ENODEV;
+
+	rcu_read_lock();
+	for (port_no = cb->args[0]; port_no < DP_MAX_PORTS; port_no++) {
+		struct vport *vport;
+
+		vport = rcu_dereference(dp->ports[port_no]);
+		if (!vport)
+			continue;
+
+		if (ovs_vport_cmd_fill_info(vport, skb, NETLINK_CB(cb->skb).pid,
+					    cb->nlh->nlmsg_seq, NLM_F_MULTI,
+					    OVS_VPORT_CMD_NEW) < 0)
+			break;
+	}
+	rcu_read_unlock();
+
+	cb->args[0] = port_no;
+	retval = skb->len;
+
+	return retval;
+}
+
+static void rehash_flow_table(struct work_struct *work)
+{
+	struct datapath *dp;
+
+	genl_lock();
+
+	list_for_each_entry(dp, &dps, list_node) {
+		struct flow_table *old_table = genl_dereference(dp->table);
+		struct flow_table *new_table;
+
+		new_table = ovs_flow_tbl_rehash(old_table);
+		if (!IS_ERR(new_table)) {
+			rcu_assign_pointer(dp->table, new_table);
+			ovs_flow_tbl_deferred_destroy(old_table);
+		}
+	}
+
+	genl_unlock();
+
+	schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
+}
+
+static struct genl_ops dp_vport_genl_ops[] = {
+	{ .cmd = OVS_VPORT_CMD_NEW,
+	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .policy = vport_policy,
+	  .doit = ovs_vport_cmd_new
+	},
+	{ .cmd = OVS_VPORT_CMD_DEL,
+	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .policy = vport_policy,
+	  .doit = ovs_vport_cmd_del
+	},
+	{ .cmd = OVS_VPORT_CMD_GET,
+	  .flags = 0,		    /* OK for unprivileged users. */
+	  .policy = vport_policy,
+	  .doit = ovs_vport_cmd_get,
+	  .dumpit = ovs_vport_cmd_dump
+	},
+	{ .cmd = OVS_VPORT_CMD_SET,
+	  .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
+	  .policy = vport_policy,
+	  .doit = ovs_vport_cmd_set,
+	},
+};
+
+struct genl_family_and_ops {
+	struct genl_family *family;
+	struct genl_ops *ops;
+	int n_ops;
+	struct genl_multicast_group *group;
+};
+
+static const struct genl_family_and_ops dp_genl_families[] = {
+	{ &dp_datapath_genl_family,
+	  dp_datapath_genl_ops, ARRAY_SIZE(dp_datapath_genl_ops),
+	  &ovs_dp_datapath_multicast_group },
+	{ &dp_vport_genl_family,
+	  dp_vport_genl_ops, ARRAY_SIZE(dp_vport_genl_ops),
+	  &ovs_dp_vport_multicast_group },
+	{ &dp_flow_genl_family,
+	  dp_flow_genl_ops, ARRAY_SIZE(dp_flow_genl_ops),
+	  &ovs_dp_flow_multicast_group },
+	{ &dp_packet_genl_family,
+	  dp_packet_genl_ops, ARRAY_SIZE(dp_packet_genl_ops),
+	  NULL },
+};
+
+static void dp_unregister_genl(int n_families)
+{
+	int i;
+
+	for (i = 0; i < n_families; i++)
+		genl_unregister_family(dp_genl_families[i].family);
+}
+
+static int dp_register_genl(void)
+{
+	int n_registered;
+	int err;
+	int i;
+
+	n_registered = 0;
+	for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
+		const struct genl_family_and_ops *f = &dp_genl_families[i];
+
+		err = genl_register_family_with_ops(f->family, f->ops,
+						    f->n_ops);
+		if (err)
+			goto error;
+		n_registered++;
+
+		if (f->group) {
+			err = genl_register_mc_group(f->family, f->group);
+			if (err)
+				goto error;
+		}
+	}
+
+	return 0;
+
+error:
+	dp_unregister_genl(n_registered);
+	return err;
+}
+
+static int __init dp_init(void)
+{
+	struct sk_buff *dummy_skb;
+	int err;
+
+	BUILD_BUG_ON(sizeof(struct ovs_skb_cb) > sizeof(dummy_skb->cb));
+
+	pr_info("Open vSwitch switching datapath\n");
+
+	err = ovs_flow_init();
+	if (err)
+		goto error;
+
+	err = ovs_vport_init();
+	if (err)
+		goto error_flow_exit;
+
+	err = register_netdevice_notifier(&ovs_dp_device_notifier);
+	if (err)
+		goto error_vport_exit;
+
+	err = dp_register_genl();
+	if (err < 0)
+		goto error_unreg_notifier;
+
+	schedule_delayed_work(&rehash_flow_wq, REHASH_FLOW_INTERVAL);
+
+	return 0;
+
+error_unreg_notifier:
+	unregister_netdevice_notifier(&ovs_dp_device_notifier);
+error_vport_exit:
+	ovs_vport_exit();
+error_flow_exit:
+	ovs_flow_exit();
+error:
+	return err;
+}
+
+static void dp_cleanup(void)
+{
+	cancel_delayed_work_sync(&rehash_flow_wq);
+	rcu_barrier();
+	dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
+	unregister_netdevice_notifier(&ovs_dp_device_notifier);
+	ovs_vport_exit();
+	ovs_flow_exit();
+}
+
+module_init(dp_init);
+module_exit(dp_cleanup);
+
+MODULE_DESCRIPTION("Open vSwitch switching datapath");
+MODULE_LICENSE("GPL");
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
new file mode 100644
index 000000000000..5b9f884b7055
--- /dev/null
+++ b/net/openvswitch/datapath.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef DATAPATH_H
+#define DATAPATH_H 1
+
+#include <asm/page.h>
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/netdevice.h>
+#include <linux/skbuff.h>
+#include <linux/u64_stats_sync.h>
+#include <linux/version.h>
+
+#include "flow.h"
+
+struct vport;
+
+#define DP_MAX_PORTS 1024
+#define SAMPLE_ACTION_DEPTH 3
+
+/**
+ * struct dp_stats_percpu - per-cpu packet processing statistics for a given
+ * datapath.
+ * @n_hit: Number of received packets for which a matching flow was found in
+ * the flow table.
+ * @n_miss: Number of received packets that had no matching flow in the flow
+ * table.  The sum of @n_hit and @n_miss is the number of packets that have
+ * been received by the datapath.
+ * @n_lost: Number of received packets that had no matching flow in the flow
+ * table that could not be sent to userspace (normally due to an overflow in
+ * one of the datapath's queues).
+ */
+struct dp_stats_percpu {
+	u64 n_hit;
+	u64 n_missed;
+	u64 n_lost;
+	struct u64_stats_sync sync;
+};
+
+/**
+ * struct datapath - datapath for flow-based packet switching
+ * @rcu: RCU callback head for deferred destruction.
+ * @list_node: Element in global 'dps' list.
+ * @n_flows: Number of flows currently in flow table.
+ * @table: Current flow table.  Protected by genl_lock and RCU.
+ * @ports: Map from port number to &struct vport.  %OVSP_LOCAL port
+ * always exists, other ports may be %NULL.  Protected by RTNL and RCU.
+ * @port_list: List of all ports in @ports in arbitrary order.  RTNL required
+ * to iterate or modify.
+ * @stats_percpu: Per-CPU datapath statistics.
+ *
+ * Context: See the comment on locking at the top of datapath.c for additional
+ * locking information.
+ */
+struct datapath {
+	struct rcu_head rcu;
+	struct list_head list_node;
+
+	/* Flow table. */
+	struct flow_table __rcu *table;
+
+	/* Switch ports. */
+	struct vport __rcu *ports[DP_MAX_PORTS];
+	struct list_head port_list;
+
+	/* Stats. */
+	struct dp_stats_percpu __percpu *stats_percpu;
+};
+
+/**
+ * struct ovs_skb_cb - OVS data in skb CB
+ * @flow: The flow associated with this packet.  May be %NULL if no flow.
+ */
+struct ovs_skb_cb {
+	struct sw_flow		*flow;
+};
+#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
+
+/**
+ * struct dp_upcall - metadata to include with a packet to send to userspace
+ * @cmd: One of %OVS_PACKET_CMD_*.
+ * @key: Becomes %OVS_PACKET_ATTR_KEY.  Must be nonnull.
+ * @userdata: If nonnull, its u64 value is extracted and passed to userspace as
+ * %OVS_PACKET_ATTR_USERDATA.
+ * @pid: Netlink PID to which packet should be sent.  If @pid is 0 then no
+ * packet is sent and the packet is accounted in the datapath's @n_lost
+ * counter.
+ */
+struct dp_upcall_info {
+	u8 cmd;
+	const struct sw_flow_key *key;
+	const struct nlattr *userdata;
+	u32 pid;
+};
+
+extern struct notifier_block ovs_dp_device_notifier;
+extern struct genl_multicast_group ovs_dp_vport_multicast_group;
+
+void ovs_dp_process_received_packet(struct vport *, struct sk_buff *);
+void ovs_dp_detach_port(struct vport *);
+int ovs_dp_upcall(struct datapath *, struct sk_buff *,
+		  const struct dp_upcall_info *);
+
+const char *ovs_dp_name(const struct datapath *dp);
+struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq,
+					 u8 cmd);
+
+int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb);
+#endif /* datapath.h */
diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c
new file mode 100644
index 000000000000..46736518c453
--- /dev/null
+++ b/net/openvswitch/dp_notify.c
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include <linux/netdevice.h>
+#include <net/genetlink.h>
+
+#include "datapath.h"
+#include "vport-internal_dev.h"
+#include "vport-netdev.h"
+
+static int dp_device_event(struct notifier_block *unused, unsigned long event,
+			   void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct vport *vport;
+
+	if (ovs_is_internal_dev(dev))
+		vport = ovs_internal_dev_get_vport(dev);
+	else
+		vport = ovs_netdev_get_vport(dev);
+
+	if (!vport)
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_UNREGISTER:
+		if (!ovs_is_internal_dev(dev)) {
+			struct sk_buff *notify;
+
+			notify = ovs_vport_cmd_build_info(vport, 0, 0,
+							  OVS_VPORT_CMD_DEL);
+			ovs_dp_detach_port(vport);
+			if (IS_ERR(notify)) {
+				netlink_set_err(init_net.genl_sock, 0,
+						ovs_dp_vport_multicast_group.id,
+						PTR_ERR(notify));
+				break;
+			}
+
+			genlmsg_multicast(notify, 0, ovs_dp_vport_multicast_group.id,
+					  GFP_KERNEL);
+		}
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
+struct notifier_block ovs_dp_device_notifier = {
+	.notifier_call = dp_device_event
+};
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
new file mode 100644
index 000000000000..fe7f020a843e
--- /dev/null
+++ b/net/openvswitch/flow.c
@@ -0,0 +1,1346 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include "flow.h"
+#include "datapath.h"
+#include <linux/uaccess.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <net/llc_pdu.h>
+#include <linux/kernel.h>
+#include <linux/jhash.h>
+#include <linux/jiffies.h>
+#include <linux/llc.h>
+#include <linux/module.h>
+#include <linux/in.h>
+#include <linux/rcupdate.h>
+#include <linux/if_arp.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/rculist.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/ndisc.h>
+
+static struct kmem_cache *flow_cache;
+
+static int check_header(struct sk_buff *skb, int len)
+{
+	if (unlikely(skb->len < len))
+		return -EINVAL;
+	if (unlikely(!pskb_may_pull(skb, len)))
+		return -ENOMEM;
+	return 0;
+}
+
+static bool arphdr_ok(struct sk_buff *skb)
+{
+	return pskb_may_pull(skb, skb_network_offset(skb) +
+				  sizeof(struct arp_eth_header));
+}
+
+static int check_iphdr(struct sk_buff *skb)
+{
+	unsigned int nh_ofs = skb_network_offset(skb);
+	unsigned int ip_len;
+	int err;
+
+	err = check_header(skb, nh_ofs + sizeof(struct iphdr));
+	if (unlikely(err))
+		return err;
+
+	ip_len = ip_hdrlen(skb);
+	if (unlikely(ip_len < sizeof(struct iphdr) ||
+		     skb->len < nh_ofs + ip_len))
+		return -EINVAL;
+
+	skb_set_transport_header(skb, nh_ofs + ip_len);
+	return 0;
+}
+
+static bool tcphdr_ok(struct sk_buff *skb)
+{
+	int th_ofs = skb_transport_offset(skb);
+	int tcp_len;
+
+	if (unlikely(!pskb_may_pull(skb, th_ofs + sizeof(struct tcphdr))))
+		return false;
+
+	tcp_len = tcp_hdrlen(skb);
+	if (unlikely(tcp_len < sizeof(struct tcphdr) ||
+		     skb->len < th_ofs + tcp_len))
+		return false;
+
+	return true;
+}
+
+static bool udphdr_ok(struct sk_buff *skb)
+{
+	return pskb_may_pull(skb, skb_transport_offset(skb) +
+				  sizeof(struct udphdr));
+}
+
+static bool icmphdr_ok(struct sk_buff *skb)
+{
+	return pskb_may_pull(skb, skb_transport_offset(skb) +
+				  sizeof(struct icmphdr));
+}
+
+u64 ovs_flow_used_time(unsigned long flow_jiffies)
+{
+	struct timespec cur_ts;
+	u64 cur_ms, idle_ms;
+
+	ktime_get_ts(&cur_ts);
+	idle_ms = jiffies_to_msecs(jiffies - flow_jiffies);
+	cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC +
+		 cur_ts.tv_nsec / NSEC_PER_MSEC;
+
+	return cur_ms - idle_ms;
+}
+
+#define SW_FLOW_KEY_OFFSET(field)		\
+	(offsetof(struct sw_flow_key, field) +	\
+	 FIELD_SIZEOF(struct sw_flow_key, field))
+
+static int parse_ipv6hdr(struct sk_buff *skb, struct sw_flow_key *key,
+			 int *key_lenp)
+{
+	unsigned int nh_ofs = skb_network_offset(skb);
+	unsigned int nh_len;
+	int payload_ofs;
+	struct ipv6hdr *nh;
+	uint8_t nexthdr;
+	__be16 frag_off;
+	int err;
+
+	*key_lenp = SW_FLOW_KEY_OFFSET(ipv6.label);
+
+	err = check_header(skb, nh_ofs + sizeof(*nh));
+	if (unlikely(err))
+		return err;
+
+	nh = ipv6_hdr(skb);
+	nexthdr = nh->nexthdr;
+	payload_ofs = (u8 *)(nh + 1) - skb->data;
+
+	key->ip.proto = NEXTHDR_NONE;
+	key->ip.tos = ipv6_get_dsfield(nh);
+	key->ip.ttl = nh->hop_limit;
+	key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
+	key->ipv6.addr.src = nh->saddr;
+	key->ipv6.addr.dst = nh->daddr;
+
+	payload_ofs = ipv6_skip_exthdr(skb, payload_ofs, &nexthdr, &frag_off);
+	if (unlikely(payload_ofs < 0))
+		return -EINVAL;
+
+	if (frag_off) {
+		if (frag_off & htons(~0x7))
+			key->ip.frag = OVS_FRAG_TYPE_LATER;
+		else
+			key->ip.frag = OVS_FRAG_TYPE_FIRST;
+	}
+
+	nh_len = payload_ofs - nh_ofs;
+	skb_set_transport_header(skb, nh_ofs + nh_len);
+	key->ip.proto = nexthdr;
+	return nh_len;
+}
+
+static bool icmp6hdr_ok(struct sk_buff *skb)
+{
+	return pskb_may_pull(skb, skb_transport_offset(skb) +
+				  sizeof(struct icmp6hdr));
+}
+
+#define TCP_FLAGS_OFFSET 13
+#define TCP_FLAG_MASK 0x3f
+
+void ovs_flow_used(struct sw_flow *flow, struct sk_buff *skb)
+{
+	u8 tcp_flags = 0;
+
+	if (flow->key.eth.type == htons(ETH_P_IP) &&
+	    flow->key.ip.proto == IPPROTO_TCP) {
+		u8 *tcp = (u8 *)tcp_hdr(skb);
+		tcp_flags = *(tcp + TCP_FLAGS_OFFSET) & TCP_FLAG_MASK;
+	}
+
+	spin_lock(&flow->lock);
+	flow->used = jiffies;
+	flow->packet_count++;
+	flow->byte_count += skb->len;
+	flow->tcp_flags |= tcp_flags;
+	spin_unlock(&flow->lock);
+}
+
+struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *actions)
+{
+	int actions_len = nla_len(actions);
+	struct sw_flow_actions *sfa;
+
+	/* At least DP_MAX_PORTS actions are required to be able to flood a
+	 * packet to every port.  Factor of 2 allows for setting VLAN tags,
+	 * etc. */
+	if (actions_len > 2 * DP_MAX_PORTS * nla_total_size(4))
+		return ERR_PTR(-EINVAL);
+
+	sfa = kmalloc(sizeof(*sfa) + actions_len, GFP_KERNEL);
+	if (!sfa)
+		return ERR_PTR(-ENOMEM);
+
+	sfa->actions_len = actions_len;
+	memcpy(sfa->actions, nla_data(actions), actions_len);
+	return sfa;
+}
+
+struct sw_flow *ovs_flow_alloc(void)
+{
+	struct sw_flow *flow;
+
+	flow = kmem_cache_alloc(flow_cache, GFP_KERNEL);
+	if (!flow)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_init(&flow->lock);
+	flow->sf_acts = NULL;
+
+	return flow;
+}
+
+static struct hlist_head *find_bucket(struct flow_table *table, u32 hash)
+{
+	hash = jhash_1word(hash, table->hash_seed);
+	return flex_array_get(table->buckets,
+				(hash & (table->n_buckets - 1)));
+}
+
+static struct flex_array *alloc_buckets(unsigned int n_buckets)
+{
+	struct flex_array *buckets;
+	int i, err;
+
+	buckets = flex_array_alloc(sizeof(struct hlist_head *),
+				   n_buckets, GFP_KERNEL);
+	if (!buckets)
+		return NULL;
+
+	err = flex_array_prealloc(buckets, 0, n_buckets, GFP_KERNEL);
+	if (err) {
+		flex_array_free(buckets);
+		return NULL;
+	}
+
+	for (i = 0; i < n_buckets; i++)
+		INIT_HLIST_HEAD((struct hlist_head *)
+					flex_array_get(buckets, i));
+
+	return buckets;
+}
+
+static void free_buckets(struct flex_array *buckets)
+{
+	flex_array_free(buckets);
+}
+
+struct flow_table *ovs_flow_tbl_alloc(int new_size)
+{
+	struct flow_table *table = kmalloc(sizeof(*table), GFP_KERNEL);
+
+	if (!table)
+		return NULL;
+
+	table->buckets = alloc_buckets(new_size);
+
+	if (!table->buckets) {
+		kfree(table);
+		return NULL;
+	}
+	table->n_buckets = new_size;
+	table->count = 0;
+	table->node_ver = 0;
+	table->keep_flows = false;
+	get_random_bytes(&table->hash_seed, sizeof(u32));
+
+	return table;
+}
+
+void ovs_flow_tbl_destroy(struct flow_table *table)
+{
+	int i;
+
+	if (!table)
+		return;
+
+	if (table->keep_flows)
+		goto skip_flows;
+
+	for (i = 0; i < table->n_buckets; i++) {
+		struct sw_flow *flow;
+		struct hlist_head *head = flex_array_get(table->buckets, i);
+		struct hlist_node *node, *n;
+		int ver = table->node_ver;
+
+		hlist_for_each_entry_safe(flow, node, n, head, hash_node[ver]) {
+			hlist_del_rcu(&flow->hash_node[ver]);
+			ovs_flow_free(flow);
+		}
+	}
+
+skip_flows:
+	free_buckets(table->buckets);
+	kfree(table);
+}
+
+static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
+{
+	struct flow_table *table = container_of(rcu, struct flow_table, rcu);
+
+	ovs_flow_tbl_destroy(table);
+}
+
+void ovs_flow_tbl_deferred_destroy(struct flow_table *table)
+{
+	if (!table)
+		return;
+
+	call_rcu(&table->rcu, flow_tbl_destroy_rcu_cb);
+}
+
+struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *last)
+{
+	struct sw_flow *flow;
+	struct hlist_head *head;
+	struct hlist_node *n;
+	int ver;
+	int i;
+
+	ver = table->node_ver;
+	while (*bucket < table->n_buckets) {
+		i = 0;
+		head = flex_array_get(table->buckets, *bucket);
+		hlist_for_each_entry_rcu(flow, n, head, hash_node[ver]) {
+			if (i < *last) {
+				i++;
+				continue;
+			}
+			*last = i + 1;
+			return flow;
+		}
+		(*bucket)++;
+		*last = 0;
+	}
+
+	return NULL;
+}
+
+static void flow_table_copy_flows(struct flow_table *old, struct flow_table *new)
+{
+	int old_ver;
+	int i;
+
+	old_ver = old->node_ver;
+	new->node_ver = !old_ver;
+
+	/* Insert in new table. */
+	for (i = 0; i < old->n_buckets; i++) {
+		struct sw_flow *flow;
+		struct hlist_head *head;
+		struct hlist_node *n;
+
+		head = flex_array_get(old->buckets, i);
+
+		hlist_for_each_entry(flow, n, head, hash_node[old_ver])
+			ovs_flow_tbl_insert(new, flow);
+	}
+	old->keep_flows = true;
+}
+
+static struct flow_table *__flow_tbl_rehash(struct flow_table *table, int n_buckets)
+{
+	struct flow_table *new_table;
+
+	new_table = ovs_flow_tbl_alloc(n_buckets);
+	if (!new_table)
+		return ERR_PTR(-ENOMEM);
+
+	flow_table_copy_flows(table, new_table);
+
+	return new_table;
+}
+
+struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table)
+{
+	return __flow_tbl_rehash(table, table->n_buckets);
+}
+
+struct flow_table *ovs_flow_tbl_expand(struct flow_table *table)
+{
+	return __flow_tbl_rehash(table, table->n_buckets * 2);
+}
+
+void ovs_flow_free(struct sw_flow *flow)
+{
+	if (unlikely(!flow))
+		return;
+
+	kfree((struct sf_flow_acts __force *)flow->sf_acts);
+	kmem_cache_free(flow_cache, flow);
+}
+
+/* RCU callback used by ovs_flow_deferred_free. */
+static void rcu_free_flow_callback(struct rcu_head *rcu)
+{
+	struct sw_flow *flow = container_of(rcu, struct sw_flow, rcu);
+
+	ovs_flow_free(flow);
+}
+
+/* Schedules 'flow' to be freed after the next RCU grace period.
+ * The caller must hold rcu_read_lock for this to be sensible. */
+void ovs_flow_deferred_free(struct sw_flow *flow)
+{
+	call_rcu(&flow->rcu, rcu_free_flow_callback);
+}
+
+/* RCU callback used by ovs_flow_deferred_free_acts. */
+static void rcu_free_acts_callback(struct rcu_head *rcu)
+{
+	struct sw_flow_actions *sf_acts = container_of(rcu,
+			struct sw_flow_actions, rcu);
+	kfree(sf_acts);
+}
+
+/* Schedules 'sf_acts' to be freed after the next RCU grace period.
+ * The caller must hold rcu_read_lock for this to be sensible. */
+void ovs_flow_deferred_free_acts(struct sw_flow_actions *sf_acts)
+{
+	call_rcu(&sf_acts->rcu, rcu_free_acts_callback);
+}
+
+static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key)
+{
+	struct qtag_prefix {
+		__be16 eth_type; /* ETH_P_8021Q */
+		__be16 tci;
+	};
+	struct qtag_prefix *qp;
+
+	if (unlikely(skb->len < sizeof(struct qtag_prefix) + sizeof(__be16)))
+		return 0;
+
+	if (unlikely(!pskb_may_pull(skb, sizeof(struct qtag_prefix) +
+					 sizeof(__be16))))
+		return -ENOMEM;
+
+	qp = (struct qtag_prefix *) skb->data;
+	key->eth.tci = qp->tci | htons(VLAN_TAG_PRESENT);
+	__skb_pull(skb, sizeof(struct qtag_prefix));
+
+	return 0;
+}
+
+static __be16 parse_ethertype(struct sk_buff *skb)
+{
+	struct llc_snap_hdr {
+		u8  dsap;  /* Always 0xAA */
+		u8  ssap;  /* Always 0xAA */
+		u8  ctrl;
+		u8  oui[3];
+		__be16 ethertype;
+	};
+	struct llc_snap_hdr *llc;
+	__be16 proto;
+
+	proto = *(__be16 *) skb->data;
+	__skb_pull(skb, sizeof(__be16));
+
+	if (ntohs(proto) >= 1536)
+		return proto;
+
+	if (skb->len < sizeof(struct llc_snap_hdr))
+		return htons(ETH_P_802_2);
+
+	if (unlikely(!pskb_may_pull(skb, sizeof(struct llc_snap_hdr))))
+		return htons(0);
+
+	llc = (struct llc_snap_hdr *) skb->data;
+	if (llc->dsap != LLC_SAP_SNAP ||
+	    llc->ssap != LLC_SAP_SNAP ||
+	    (llc->oui[0] | llc->oui[1] | llc->oui[2]) != 0)
+		return htons(ETH_P_802_2);
+
+	__skb_pull(skb, sizeof(struct llc_snap_hdr));
+	return llc->ethertype;
+}
+
+static int parse_icmpv6(struct sk_buff *skb, struct sw_flow_key *key,
+			int *key_lenp, int nh_len)
+{
+	struct icmp6hdr *icmp = icmp6_hdr(skb);
+	int error = 0;
+	int key_len;
+
+	/* The ICMPv6 type and code fields use the 16-bit transport port
+	 * fields, so we need to store them in 16-bit network byte order.
+	 */
+	key->ipv6.tp.src = htons(icmp->icmp6_type);
+	key->ipv6.tp.dst = htons(icmp->icmp6_code);
+	key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
+
+	if (icmp->icmp6_code == 0 &&
+	    (icmp->icmp6_type == NDISC_NEIGHBOUR_SOLICITATION ||
+	     icmp->icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT)) {
+		int icmp_len = skb->len - skb_transport_offset(skb);
+		struct nd_msg *nd;
+		int offset;
+
+		key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
+
+		/* In order to process neighbor discovery options, we need the
+		 * entire packet.
+		 */
+		if (unlikely(icmp_len < sizeof(*nd)))
+			goto out;
+		if (unlikely(skb_linearize(skb))) {
+			error = -ENOMEM;
+			goto out;
+		}
+
+		nd = (struct nd_msg *)skb_transport_header(skb);
+		key->ipv6.nd.target = nd->target;
+		key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
+
+		icmp_len -= sizeof(*nd);
+		offset = 0;
+		while (icmp_len >= 8) {
+			struct nd_opt_hdr *nd_opt =
+				 (struct nd_opt_hdr *)(nd->opt + offset);
+			int opt_len = nd_opt->nd_opt_len * 8;
+
+			if (unlikely(!opt_len || opt_len > icmp_len))
+				goto invalid;
+
+			/* Store the link layer address if the appropriate
+			 * option is provided.  It is considered an error if
+			 * the same link layer option is specified twice.
+			 */
+			if (nd_opt->nd_opt_type == ND_OPT_SOURCE_LL_ADDR
+			    && opt_len == 8) {
+				if (unlikely(!is_zero_ether_addr(key->ipv6.nd.sll)))
+					goto invalid;
+				memcpy(key->ipv6.nd.sll,
+				    &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
+			} else if (nd_opt->nd_opt_type == ND_OPT_TARGET_LL_ADDR
+				   && opt_len == 8) {
+				if (unlikely(!is_zero_ether_addr(key->ipv6.nd.tll)))
+					goto invalid;
+				memcpy(key->ipv6.nd.tll,
+				    &nd->opt[offset+sizeof(*nd_opt)], ETH_ALEN);
+			}
+
+			icmp_len -= opt_len;
+			offset += opt_len;
+		}
+	}
+
+	goto out;
+
+invalid:
+	memset(&key->ipv6.nd.target, 0, sizeof(key->ipv6.nd.target));
+	memset(key->ipv6.nd.sll, 0, sizeof(key->ipv6.nd.sll));
+	memset(key->ipv6.nd.tll, 0, sizeof(key->ipv6.nd.tll));
+
+out:
+	*key_lenp = key_len;
+	return error;
+}
+
+/**
+ * ovs_flow_extract - extracts a flow key from an Ethernet frame.
+ * @skb: sk_buff that contains the frame, with skb->data pointing to the
+ * Ethernet header
+ * @in_port: port number on which @skb was received.
+ * @key: output flow key
+ * @key_lenp: length of output flow key
+ *
+ * The caller must ensure that skb->len >= ETH_HLEN.
+ *
+ * Returns 0 if successful, otherwise a negative errno value.
+ *
+ * Initializes @skb header pointers as follows:
+ *
+ *    - skb->mac_header: the Ethernet header.
+ *
+ *    - skb->network_header: just past the Ethernet header, or just past the
+ *      VLAN header, to the first byte of the Ethernet payload.
+ *
+ *    - skb->transport_header: If key->dl_type is ETH_P_IP or ETH_P_IPV6
+ *      on output, then just past the IP header, if one is present and
+ *      of a correct length, otherwise the same as skb->network_header.
+ *      For other key->dl_type values it is left untouched.
+ */
+int ovs_flow_extract(struct sk_buff *skb, u16 in_port, struct sw_flow_key *key,
+		 int *key_lenp)
+{
+	int error = 0;
+	int key_len = SW_FLOW_KEY_OFFSET(eth);
+	struct ethhdr *eth;
+
+	memset(key, 0, sizeof(*key));
+
+	key->phy.priority = skb->priority;
+	key->phy.in_port = in_port;
+
+	skb_reset_mac_header(skb);
+
+	/* Link layer.  We are guaranteed to have at least the 14 byte Ethernet
+	 * header in the linear data area.
+	 */
+	eth = eth_hdr(skb);
+	memcpy(key->eth.src, eth->h_source, ETH_ALEN);
+	memcpy(key->eth.dst, eth->h_dest, ETH_ALEN);
+
+	__skb_pull(skb, 2 * ETH_ALEN);
+
+	if (vlan_tx_tag_present(skb))
+		key->eth.tci = htons(skb->vlan_tci);
+	else if (eth->h_proto == htons(ETH_P_8021Q))
+		if (unlikely(parse_vlan(skb, key)))
+			return -ENOMEM;
+
+	key->eth.type = parse_ethertype(skb);
+	if (unlikely(key->eth.type == htons(0)))
+		return -ENOMEM;
+
+	skb_reset_network_header(skb);
+	__skb_push(skb, skb->data - skb_mac_header(skb));
+
+	/* Network layer. */
+	if (key->eth.type == htons(ETH_P_IP)) {
+		struct iphdr *nh;
+		__be16 offset;
+
+		key_len = SW_FLOW_KEY_OFFSET(ipv4.addr);
+
+		error = check_iphdr(skb);
+		if (unlikely(error)) {
+			if (error == -EINVAL) {
+				skb->transport_header = skb->network_header;
+				error = 0;
+			}
+			goto out;
+		}
+
+		nh = ip_hdr(skb);
+		key->ipv4.addr.src = nh->saddr;
+		key->ipv4.addr.dst = nh->daddr;
+
+		key->ip.proto = nh->protocol;
+		key->ip.tos = nh->tos;
+		key->ip.ttl = nh->ttl;
+
+		offset = nh->frag_off & htons(IP_OFFSET);
+		if (offset) {
+			key->ip.frag = OVS_FRAG_TYPE_LATER;
+			goto out;
+		}
+		if (nh->frag_off & htons(IP_MF) ||
+			 skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+			key->ip.frag = OVS_FRAG_TYPE_FIRST;
+
+		/* Transport layer. */
+		if (key->ip.proto == IPPROTO_TCP) {
+			key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
+			if (tcphdr_ok(skb)) {
+				struct tcphdr *tcp = tcp_hdr(skb);
+				key->ipv4.tp.src = tcp->source;
+				key->ipv4.tp.dst = tcp->dest;
+			}
+		} else if (key->ip.proto == IPPROTO_UDP) {
+			key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
+			if (udphdr_ok(skb)) {
+				struct udphdr *udp = udp_hdr(skb);
+				key->ipv4.tp.src = udp->source;
+				key->ipv4.tp.dst = udp->dest;
+			}
+		} else if (key->ip.proto == IPPROTO_ICMP) {
+			key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
+			if (icmphdr_ok(skb)) {
+				struct icmphdr *icmp = icmp_hdr(skb);
+				/* The ICMP type and code fields use the 16-bit
+				 * transport port fields, so we need to store
+				 * them in 16-bit network byte order. */
+				key->ipv4.tp.src = htons(icmp->type);
+				key->ipv4.tp.dst = htons(icmp->code);
+			}
+		}
+
+	} else if (key->eth.type == htons(ETH_P_ARP) && arphdr_ok(skb)) {
+		struct arp_eth_header *arp;
+
+		arp = (struct arp_eth_header *)skb_network_header(skb);
+
+		if (arp->ar_hrd == htons(ARPHRD_ETHER)
+				&& arp->ar_pro == htons(ETH_P_IP)
+				&& arp->ar_hln == ETH_ALEN
+				&& arp->ar_pln == 4) {
+
+			/* We only match on the lower 8 bits of the opcode. */
+			if (ntohs(arp->ar_op) <= 0xff)
+				key->ip.proto = ntohs(arp->ar_op);
+
+			if (key->ip.proto == ARPOP_REQUEST
+					|| key->ip.proto == ARPOP_REPLY) {
+				memcpy(&key->ipv4.addr.src, arp->ar_sip, sizeof(key->ipv4.addr.src));
+				memcpy(&key->ipv4.addr.dst, arp->ar_tip, sizeof(key->ipv4.addr.dst));
+				memcpy(key->ipv4.arp.sha, arp->ar_sha, ETH_ALEN);
+				memcpy(key->ipv4.arp.tha, arp->ar_tha, ETH_ALEN);
+				key_len = SW_FLOW_KEY_OFFSET(ipv4.arp);
+			}
+		}
+	} else if (key->eth.type == htons(ETH_P_IPV6)) {
+		int nh_len;             /* IPv6 Header + Extensions */
+
+		nh_len = parse_ipv6hdr(skb, key, &key_len);
+		if (unlikely(nh_len < 0)) {
+			if (nh_len == -EINVAL)
+				skb->transport_header = skb->network_header;
+			else
+				error = nh_len;
+			goto out;
+		}
+
+		if (key->ip.frag == OVS_FRAG_TYPE_LATER)
+			goto out;
+		if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP)
+			key->ip.frag = OVS_FRAG_TYPE_FIRST;
+
+		/* Transport layer. */
+		if (key->ip.proto == NEXTHDR_TCP) {
+			key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
+			if (tcphdr_ok(skb)) {
+				struct tcphdr *tcp = tcp_hdr(skb);
+				key->ipv6.tp.src = tcp->source;
+				key->ipv6.tp.dst = tcp->dest;
+			}
+		} else if (key->ip.proto == NEXTHDR_UDP) {
+			key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
+			if (udphdr_ok(skb)) {
+				struct udphdr *udp = udp_hdr(skb);
+				key->ipv6.tp.src = udp->source;
+				key->ipv6.tp.dst = udp->dest;
+			}
+		} else if (key->ip.proto == NEXTHDR_ICMP) {
+			key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
+			if (icmp6hdr_ok(skb)) {
+				error = parse_icmpv6(skb, key, &key_len, nh_len);
+				if (error < 0)
+					goto out;
+			}
+		}
+	}
+
+out:
+	*key_lenp = key_len;
+	return error;
+}
+
+u32 ovs_flow_hash(const struct sw_flow_key *key, int key_len)
+{
+	return jhash2((u32 *)key, DIV_ROUND_UP(key_len, sizeof(u32)), 0);
+}
+
+struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table,
+				struct sw_flow_key *key, int key_len)
+{
+	struct sw_flow *flow;
+	struct hlist_node *n;
+	struct hlist_head *head;
+	u32 hash;
+
+	hash = ovs_flow_hash(key, key_len);
+
+	head = find_bucket(table, hash);
+	hlist_for_each_entry_rcu(flow, n, head, hash_node[table->node_ver]) {
+
+		if (flow->hash == hash &&
+		    !memcmp(&flow->key, key, key_len)) {
+			return flow;
+		}
+	}
+	return NULL;
+}
+
+void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow)
+{
+	struct hlist_head *head;
+
+	head = find_bucket(table, flow->hash);
+	hlist_add_head_rcu(&flow->hash_node[table->node_ver], head);
+	table->count++;
+}
+
+void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
+{
+	hlist_del_rcu(&flow->hash_node[table->node_ver]);
+	table->count--;
+	BUG_ON(table->count < 0);
+}
+
+/* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
+const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = {
+	[OVS_KEY_ATTR_ENCAP] = -1,
+	[OVS_KEY_ATTR_PRIORITY] = sizeof(u32),
+	[OVS_KEY_ATTR_IN_PORT] = sizeof(u32),
+	[OVS_KEY_ATTR_ETHERNET] = sizeof(struct ovs_key_ethernet),
+	[OVS_KEY_ATTR_VLAN] = sizeof(__be16),
+	[OVS_KEY_ATTR_ETHERTYPE] = sizeof(__be16),
+	[OVS_KEY_ATTR_IPV4] = sizeof(struct ovs_key_ipv4),
+	[OVS_KEY_ATTR_IPV6] = sizeof(struct ovs_key_ipv6),
+	[OVS_KEY_ATTR_TCP] = sizeof(struct ovs_key_tcp),
+	[OVS_KEY_ATTR_UDP] = sizeof(struct ovs_key_udp),
+	[OVS_KEY_ATTR_ICMP] = sizeof(struct ovs_key_icmp),
+	[OVS_KEY_ATTR_ICMPV6] = sizeof(struct ovs_key_icmpv6),
+	[OVS_KEY_ATTR_ARP] = sizeof(struct ovs_key_arp),
+	[OVS_KEY_ATTR_ND] = sizeof(struct ovs_key_nd),
+};
+
+static int ipv4_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
+				  const struct nlattr *a[], u32 *attrs)
+{
+	const struct ovs_key_icmp *icmp_key;
+	const struct ovs_key_tcp *tcp_key;
+	const struct ovs_key_udp *udp_key;
+
+	switch (swkey->ip.proto) {
+	case IPPROTO_TCP:
+		if (!(*attrs & (1 << OVS_KEY_ATTR_TCP)))
+			return -EINVAL;
+		*attrs &= ~(1 << OVS_KEY_ATTR_TCP);
+
+		*key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
+		tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
+		swkey->ipv4.tp.src = tcp_key->tcp_src;
+		swkey->ipv4.tp.dst = tcp_key->tcp_dst;
+		break;
+
+	case IPPROTO_UDP:
+		if (!(*attrs & (1 << OVS_KEY_ATTR_UDP)))
+			return -EINVAL;
+		*attrs &= ~(1 << OVS_KEY_ATTR_UDP);
+
+		*key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
+		udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
+		swkey->ipv4.tp.src = udp_key->udp_src;
+		swkey->ipv4.tp.dst = udp_key->udp_dst;
+		break;
+
+	case IPPROTO_ICMP:
+		if (!(*attrs & (1 << OVS_KEY_ATTR_ICMP)))
+			return -EINVAL;
+		*attrs &= ~(1 << OVS_KEY_ATTR_ICMP);
+
+		*key_len = SW_FLOW_KEY_OFFSET(ipv4.tp);
+		icmp_key = nla_data(a[OVS_KEY_ATTR_ICMP]);
+		swkey->ipv4.tp.src = htons(icmp_key->icmp_type);
+		swkey->ipv4.tp.dst = htons(icmp_key->icmp_code);
+		break;
+	}
+
+	return 0;
+}
+
+static int ipv6_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_len,
+				  const struct nlattr *a[], u32 *attrs)
+{
+	const struct ovs_key_icmpv6 *icmpv6_key;
+	const struct ovs_key_tcp *tcp_key;
+	const struct ovs_key_udp *udp_key;
+
+	switch (swkey->ip.proto) {
+	case IPPROTO_TCP:
+		if (!(*attrs & (1 << OVS_KEY_ATTR_TCP)))
+			return -EINVAL;
+		*attrs &= ~(1 << OVS_KEY_ATTR_TCP);
+
+		*key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
+		tcp_key = nla_data(a[OVS_KEY_ATTR_TCP]);
+		swkey->ipv6.tp.src = tcp_key->tcp_src;
+		swkey->ipv6.tp.dst = tcp_key->tcp_dst;
+		break;
+
+	case IPPROTO_UDP:
+		if (!(*attrs & (1 << OVS_KEY_ATTR_UDP)))
+			return -EINVAL;
+		*attrs &= ~(1 << OVS_KEY_ATTR_UDP);
+
+		*key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
+		udp_key = nla_data(a[OVS_KEY_ATTR_UDP]);
+		swkey->ipv6.tp.src = udp_key->udp_src;
+		swkey->ipv6.tp.dst = udp_key->udp_dst;
+		break;
+
+	case IPPROTO_ICMPV6:
+		if (!(*attrs & (1 << OVS_KEY_ATTR_ICMPV6)))
+			return -EINVAL;
+		*attrs &= ~(1 << OVS_KEY_ATTR_ICMPV6);
+
+		*key_len = SW_FLOW_KEY_OFFSET(ipv6.tp);
+		icmpv6_key = nla_data(a[OVS_KEY_ATTR_ICMPV6]);
+		swkey->ipv6.tp.src = htons(icmpv6_key->icmpv6_type);
+		swkey->ipv6.tp.dst = htons(icmpv6_key->icmpv6_code);
+
+		if (swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_SOLICITATION) ||
+		    swkey->ipv6.tp.src == htons(NDISC_NEIGHBOUR_ADVERTISEMENT)) {
+			const struct ovs_key_nd *nd_key;
+
+			if (!(*attrs & (1 << OVS_KEY_ATTR_ND)))
+				return -EINVAL;
+			*attrs &= ~(1 << OVS_KEY_ATTR_ND);
+
+			*key_len = SW_FLOW_KEY_OFFSET(ipv6.nd);
+			nd_key = nla_data(a[OVS_KEY_ATTR_ND]);
+			memcpy(&swkey->ipv6.nd.target, nd_key->nd_target,
+			       sizeof(swkey->ipv6.nd.target));
+			memcpy(swkey->ipv6.nd.sll, nd_key->nd_sll, ETH_ALEN);
+			memcpy(swkey->ipv6.nd.tll, nd_key->nd_tll, ETH_ALEN);
+		}
+		break;
+	}
+
+	return 0;
+}
+
+static int parse_flow_nlattrs(const struct nlattr *attr,
+			      const struct nlattr *a[], u32 *attrsp)
+{
+	const struct nlattr *nla;
+	u32 attrs;
+	int rem;
+
+	attrs = 0;
+	nla_for_each_nested(nla, attr, rem) {
+		u16 type = nla_type(nla);
+		int expected_len;
+
+		if (type > OVS_KEY_ATTR_MAX || attrs & (1 << type))
+			return -EINVAL;
+
+		expected_len = ovs_key_lens[type];
+		if (nla_len(nla) != expected_len && expected_len != -1)
+			return -EINVAL;
+
+		attrs |= 1 << type;
+		a[type] = nla;
+	}
+	if (rem)
+		return -EINVAL;
+
+	*attrsp = attrs;
+	return 0;
+}
+
+/**
+ * ovs_flow_from_nlattrs - parses Netlink attributes into a flow key.
+ * @swkey: receives the extracted flow key.
+ * @key_lenp: number of bytes used in @swkey.
+ * @attr: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
+ * sequence.
+ */
+int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
+		      const struct nlattr *attr)
+{
+	const struct nlattr *a[OVS_KEY_ATTR_MAX + 1];
+	const struct ovs_key_ethernet *eth_key;
+	int key_len;
+	u32 attrs;
+	int err;
+
+	memset(swkey, 0, sizeof(struct sw_flow_key));
+	key_len = SW_FLOW_KEY_OFFSET(eth);
+
+	err = parse_flow_nlattrs(attr, a, &attrs);
+	if (err)
+		return err;
+
+	/* Metadata attributes. */
+	if (attrs & (1 << OVS_KEY_ATTR_PRIORITY)) {
+		swkey->phy.priority = nla_get_u32(a[OVS_KEY_ATTR_PRIORITY]);
+		attrs &= ~(1 << OVS_KEY_ATTR_PRIORITY);
+	}
+	if (attrs & (1 << OVS_KEY_ATTR_IN_PORT)) {
+		u32 in_port = nla_get_u32(a[OVS_KEY_ATTR_IN_PORT]);
+		if (in_port >= DP_MAX_PORTS)
+			return -EINVAL;
+		swkey->phy.in_port = in_port;
+		attrs &= ~(1 << OVS_KEY_ATTR_IN_PORT);
+	} else {
+		swkey->phy.in_port = USHRT_MAX;
+	}
+
+	/* Data attributes. */
+	if (!(attrs & (1 << OVS_KEY_ATTR_ETHERNET)))
+		return -EINVAL;
+	attrs &= ~(1 << OVS_KEY_ATTR_ETHERNET);
+
+	eth_key = nla_data(a[OVS_KEY_ATTR_ETHERNET]);
+	memcpy(swkey->eth.src, eth_key->eth_src, ETH_ALEN);
+	memcpy(swkey->eth.dst, eth_key->eth_dst, ETH_ALEN);
+
+	if (attrs & (1u << OVS_KEY_ATTR_ETHERTYPE) &&
+	    nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q)) {
+		const struct nlattr *encap;
+		__be16 tci;
+
+		if (attrs != ((1 << OVS_KEY_ATTR_VLAN) |
+			      (1 << OVS_KEY_ATTR_ETHERTYPE) |
+			      (1 << OVS_KEY_ATTR_ENCAP)))
+			return -EINVAL;
+
+		encap = a[OVS_KEY_ATTR_ENCAP];
+		tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]);
+		if (tci & htons(VLAN_TAG_PRESENT)) {
+			swkey->eth.tci = tci;
+
+			err = parse_flow_nlattrs(encap, a, &attrs);
+			if (err)
+				return err;
+		} else if (!tci) {
+			/* Corner case for truncated 802.1Q header. */
+			if (nla_len(encap))
+				return -EINVAL;
+
+			swkey->eth.type = htons(ETH_P_8021Q);
+			*key_lenp = key_len;
+			return 0;
+		} else {
+			return -EINVAL;
+		}
+	}
+
+	if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) {
+		swkey->eth.type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]);
+		if (ntohs(swkey->eth.type) < 1536)
+			return -EINVAL;
+		attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE);
+	} else {
+		swkey->eth.type = htons(ETH_P_802_2);
+	}
+
+	if (swkey->eth.type == htons(ETH_P_IP)) {
+		const struct ovs_key_ipv4 *ipv4_key;
+
+		if (!(attrs & (1 << OVS_KEY_ATTR_IPV4)))
+			return -EINVAL;
+		attrs &= ~(1 << OVS_KEY_ATTR_IPV4);
+
+		key_len = SW_FLOW_KEY_OFFSET(ipv4.addr);
+		ipv4_key = nla_data(a[OVS_KEY_ATTR_IPV4]);
+		if (ipv4_key->ipv4_frag > OVS_FRAG_TYPE_MAX)
+			return -EINVAL;
+		swkey->ip.proto = ipv4_key->ipv4_proto;
+		swkey->ip.tos = ipv4_key->ipv4_tos;
+		swkey->ip.ttl = ipv4_key->ipv4_ttl;
+		swkey->ip.frag = ipv4_key->ipv4_frag;
+		swkey->ipv4.addr.src = ipv4_key->ipv4_src;
+		swkey->ipv4.addr.dst = ipv4_key->ipv4_dst;
+
+		if (swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
+			err = ipv4_flow_from_nlattrs(swkey, &key_len, a, &attrs);
+			if (err)
+				return err;
+		}
+	} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
+		const struct ovs_key_ipv6 *ipv6_key;
+
+		if (!(attrs & (1 << OVS_KEY_ATTR_IPV6)))
+			return -EINVAL;
+		attrs &= ~(1 << OVS_KEY_ATTR_IPV6);
+
+		key_len = SW_FLOW_KEY_OFFSET(ipv6.label);
+		ipv6_key = nla_data(a[OVS_KEY_ATTR_IPV6]);
+		if (ipv6_key->ipv6_frag > OVS_FRAG_TYPE_MAX)
+			return -EINVAL;
+		swkey->ipv6.label = ipv6_key->ipv6_label;
+		swkey->ip.proto = ipv6_key->ipv6_proto;
+		swkey->ip.tos = ipv6_key->ipv6_tclass;
+		swkey->ip.ttl = ipv6_key->ipv6_hlimit;
+		swkey->ip.frag = ipv6_key->ipv6_frag;
+		memcpy(&swkey->ipv6.addr.src, ipv6_key->ipv6_src,
+		       sizeof(swkey->ipv6.addr.src));
+		memcpy(&swkey->ipv6.addr.dst, ipv6_key->ipv6_dst,
+		       sizeof(swkey->ipv6.addr.dst));
+
+		if (swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
+			err = ipv6_flow_from_nlattrs(swkey, &key_len, a, &attrs);
+			if (err)
+				return err;
+		}
+	} else if (swkey->eth.type == htons(ETH_P_ARP)) {
+		const struct ovs_key_arp *arp_key;
+
+		if (!(attrs & (1 << OVS_KEY_ATTR_ARP)))
+			return -EINVAL;
+		attrs &= ~(1 << OVS_KEY_ATTR_ARP);
+
+		key_len = SW_FLOW_KEY_OFFSET(ipv4.arp);
+		arp_key = nla_data(a[OVS_KEY_ATTR_ARP]);
+		swkey->ipv4.addr.src = arp_key->arp_sip;
+		swkey->ipv4.addr.dst = arp_key->arp_tip;
+		if (arp_key->arp_op & htons(0xff00))
+			return -EINVAL;
+		swkey->ip.proto = ntohs(arp_key->arp_op);
+		memcpy(swkey->ipv4.arp.sha, arp_key->arp_sha, ETH_ALEN);
+		memcpy(swkey->ipv4.arp.tha, arp_key->arp_tha, ETH_ALEN);
+	}
+
+	if (attrs)
+		return -EINVAL;
+	*key_lenp = key_len;
+
+	return 0;
+}
+
+/**
+ * ovs_flow_metadata_from_nlattrs - parses Netlink attributes into a flow key.
+ * @in_port: receives the extracted input port.
+ * @key: Netlink attribute holding nested %OVS_KEY_ATTR_* Netlink attribute
+ * sequence.
+ *
+ * This parses a series of Netlink attributes that form a flow key, which must
+ * take the same form accepted by flow_from_nlattrs(), but only enough of it to
+ * get the metadata, that is, the parts of the flow key that cannot be
+ * extracted from the packet itself.
+ */
+int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port,
+			       const struct nlattr *attr)
+{
+	const struct nlattr *nla;
+	int rem;
+
+	*in_port = USHRT_MAX;
+	*priority = 0;
+
+	nla_for_each_nested(nla, attr, rem) {
+		int type = nla_type(nla);
+
+		if (type <= OVS_KEY_ATTR_MAX && ovs_key_lens[type] > 0) {
+			if (nla_len(nla) != ovs_key_lens[type])
+				return -EINVAL;
+
+			switch (type) {
+			case OVS_KEY_ATTR_PRIORITY:
+				*priority = nla_get_u32(nla);
+				break;
+
+			case OVS_KEY_ATTR_IN_PORT:
+				if (nla_get_u32(nla) >= DP_MAX_PORTS)
+					return -EINVAL;
+				*in_port = nla_get_u32(nla);
+				break;
+			}
+		}
+	}
+	if (rem)
+		return -EINVAL;
+	return 0;
+}
+
+int ovs_flow_to_nlattrs(const struct sw_flow_key *swkey, struct sk_buff *skb)
+{
+	struct ovs_key_ethernet *eth_key;
+	struct nlattr *nla, *encap;
+
+	if (swkey->phy.priority)
+		NLA_PUT_U32(skb, OVS_KEY_ATTR_PRIORITY, swkey->phy.priority);
+
+	if (swkey->phy.in_port != USHRT_MAX)
+		NLA_PUT_U32(skb, OVS_KEY_ATTR_IN_PORT, swkey->phy.in_port);
+
+	nla = nla_reserve(skb, OVS_KEY_ATTR_ETHERNET, sizeof(*eth_key));
+	if (!nla)
+		goto nla_put_failure;
+	eth_key = nla_data(nla);
+	memcpy(eth_key->eth_src, swkey->eth.src, ETH_ALEN);
+	memcpy(eth_key->eth_dst, swkey->eth.dst, ETH_ALEN);
+
+	if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) {
+		NLA_PUT_BE16(skb, OVS_KEY_ATTR_ETHERTYPE, htons(ETH_P_8021Q));
+		NLA_PUT_BE16(skb, OVS_KEY_ATTR_VLAN, swkey->eth.tci);
+		encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP);
+		if (!swkey->eth.tci)
+			goto unencap;
+	} else {
+		encap = NULL;
+	}
+
+	if (swkey->eth.type == htons(ETH_P_802_2))
+		goto unencap;
+
+	NLA_PUT_BE16(skb, OVS_KEY_ATTR_ETHERTYPE, swkey->eth.type);
+
+	if (swkey->eth.type == htons(ETH_P_IP)) {
+		struct ovs_key_ipv4 *ipv4_key;
+
+		nla = nla_reserve(skb, OVS_KEY_ATTR_IPV4, sizeof(*ipv4_key));
+		if (!nla)
+			goto nla_put_failure;
+		ipv4_key = nla_data(nla);
+		ipv4_key->ipv4_src = swkey->ipv4.addr.src;
+		ipv4_key->ipv4_dst = swkey->ipv4.addr.dst;
+		ipv4_key->ipv4_proto = swkey->ip.proto;
+		ipv4_key->ipv4_tos = swkey->ip.tos;
+		ipv4_key->ipv4_ttl = swkey->ip.ttl;
+		ipv4_key->ipv4_frag = swkey->ip.frag;
+	} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
+		struct ovs_key_ipv6 *ipv6_key;
+
+		nla = nla_reserve(skb, OVS_KEY_ATTR_IPV6, sizeof(*ipv6_key));
+		if (!nla)
+			goto nla_put_failure;
+		ipv6_key = nla_data(nla);
+		memcpy(ipv6_key->ipv6_src, &swkey->ipv6.addr.src,
+				sizeof(ipv6_key->ipv6_src));
+		memcpy(ipv6_key->ipv6_dst, &swkey->ipv6.addr.dst,
+				sizeof(ipv6_key->ipv6_dst));
+		ipv6_key->ipv6_label = swkey->ipv6.label;
+		ipv6_key->ipv6_proto = swkey->ip.proto;
+		ipv6_key->ipv6_tclass = swkey->ip.tos;
+		ipv6_key->ipv6_hlimit = swkey->ip.ttl;
+		ipv6_key->ipv6_frag = swkey->ip.frag;
+	} else if (swkey->eth.type == htons(ETH_P_ARP)) {
+		struct ovs_key_arp *arp_key;
+
+		nla = nla_reserve(skb, OVS_KEY_ATTR_ARP, sizeof(*arp_key));
+		if (!nla)
+			goto nla_put_failure;
+		arp_key = nla_data(nla);
+		memset(arp_key, 0, sizeof(struct ovs_key_arp));
+		arp_key->arp_sip = swkey->ipv4.addr.src;
+		arp_key->arp_tip = swkey->ipv4.addr.dst;
+		arp_key->arp_op = htons(swkey->ip.proto);
+		memcpy(arp_key->arp_sha, swkey->ipv4.arp.sha, ETH_ALEN);
+		memcpy(arp_key->arp_tha, swkey->ipv4.arp.tha, ETH_ALEN);
+	}
+
+	if ((swkey->eth.type == htons(ETH_P_IP) ||
+	     swkey->eth.type == htons(ETH_P_IPV6)) &&
+	     swkey->ip.frag != OVS_FRAG_TYPE_LATER) {
+
+		if (swkey->ip.proto == IPPROTO_TCP) {
+			struct ovs_key_tcp *tcp_key;
+
+			nla = nla_reserve(skb, OVS_KEY_ATTR_TCP, sizeof(*tcp_key));
+			if (!nla)
+				goto nla_put_failure;
+			tcp_key = nla_data(nla);
+			if (swkey->eth.type == htons(ETH_P_IP)) {
+				tcp_key->tcp_src = swkey->ipv4.tp.src;
+				tcp_key->tcp_dst = swkey->ipv4.tp.dst;
+			} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
+				tcp_key->tcp_src = swkey->ipv6.tp.src;
+				tcp_key->tcp_dst = swkey->ipv6.tp.dst;
+			}
+		} else if (swkey->ip.proto == IPPROTO_UDP) {
+			struct ovs_key_udp *udp_key;
+
+			nla = nla_reserve(skb, OVS_KEY_ATTR_UDP, sizeof(*udp_key));
+			if (!nla)
+				goto nla_put_failure;
+			udp_key = nla_data(nla);
+			if (swkey->eth.type == htons(ETH_P_IP)) {
+				udp_key->udp_src = swkey->ipv4.tp.src;
+				udp_key->udp_dst = swkey->ipv4.tp.dst;
+			} else if (swkey->eth.type == htons(ETH_P_IPV6)) {
+				udp_key->udp_src = swkey->ipv6.tp.src;
+				udp_key->udp_dst = swkey->ipv6.tp.dst;
+			}
+		} else if (swkey->eth.type == htons(ETH_P_IP) &&
+			   swkey->ip.proto == IPPROTO_ICMP) {
+			struct ovs_key_icmp *icmp_key;
+
+			nla = nla_reserve(skb, OVS_KEY_ATTR_ICMP, sizeof(*icmp_key));
+			if (!nla)
+				goto nla_put_failure;
+			icmp_key = nla_data(nla);
+			icmp_key->icmp_type = ntohs(swkey->ipv4.tp.src);
+			icmp_key->icmp_code = ntohs(swkey->ipv4.tp.dst);
+		} else if (swkey->eth.type == htons(ETH_P_IPV6) &&
+			   swkey->ip.proto == IPPROTO_ICMPV6) {
+			struct ovs_key_icmpv6 *icmpv6_key;
+
+			nla = nla_reserve(skb, OVS_KEY_ATTR_ICMPV6,
+						sizeof(*icmpv6_key));
+			if (!nla)
+				goto nla_put_failure;
+			icmpv6_key = nla_data(nla);
+			icmpv6_key->icmpv6_type = ntohs(swkey->ipv6.tp.src);
+			icmpv6_key->icmpv6_code = ntohs(swkey->ipv6.tp.dst);
+
+			if (icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_SOLICITATION ||
+			    icmpv6_key->icmpv6_type == NDISC_NEIGHBOUR_ADVERTISEMENT) {
+				struct ovs_key_nd *nd_key;
+
+				nla = nla_reserve(skb, OVS_KEY_ATTR_ND, sizeof(*nd_key));
+				if (!nla)
+					goto nla_put_failure;
+				nd_key = nla_data(nla);
+				memcpy(nd_key->nd_target, &swkey->ipv6.nd.target,
+							sizeof(nd_key->nd_target));
+				memcpy(nd_key->nd_sll, swkey->ipv6.nd.sll, ETH_ALEN);
+				memcpy(nd_key->nd_tll, swkey->ipv6.nd.tll, ETH_ALEN);
+			}
+		}
+	}
+
+unencap:
+	if (encap)
+		nla_nest_end(skb, encap);
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+/* Initializes the flow module.
+ * Returns zero if successful or a negative error code. */
+int ovs_flow_init(void)
+{
+	flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow), 0,
+					0, NULL);
+	if (flow_cache == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/* Uninitializes the flow module. */
+void ovs_flow_exit(void)
+{
+	kmem_cache_destroy(flow_cache);
+}
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
new file mode 100644
index 000000000000..2747dc2c4ac1
--- /dev/null
+++ b/net/openvswitch/flow.h
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef FLOW_H
+#define FLOW_H 1
+
+#include <linux/kernel.h>
+#include <linux/netlink.h>
+#include <linux/openvswitch.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+#include <linux/rcupdate.h>
+#include <linux/if_ether.h>
+#include <linux/in6.h>
+#include <linux/jiffies.h>
+#include <linux/time.h>
+#include <linux/flex_array.h>
+#include <net/inet_ecn.h>
+
+struct sk_buff;
+
+struct sw_flow_actions {
+	struct rcu_head rcu;
+	u32 actions_len;
+	struct nlattr actions[];
+};
+
+struct sw_flow_key {
+	struct {
+		u32	priority;	/* Packet QoS priority. */
+		u16	in_port;	/* Input switch port (or USHRT_MAX). */
+	} phy;
+	struct {
+		u8     src[ETH_ALEN];	/* Ethernet source address. */
+		u8     dst[ETH_ALEN];	/* Ethernet destination address. */
+		__be16 tci;		/* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */
+		__be16 type;		/* Ethernet frame type. */
+	} eth;
+	struct {
+		u8     proto;		/* IP protocol or lower 8 bits of ARP opcode. */
+		u8     tos;		/* IP ToS. */
+		u8     ttl;		/* IP TTL/hop limit. */
+		u8     frag;		/* One of OVS_FRAG_TYPE_*. */
+	} ip;
+	union {
+		struct {
+			struct {
+				__be32 src;	/* IP source address. */
+				__be32 dst;	/* IP destination address. */
+			} addr;
+			union {
+				struct {
+					__be16 src;		/* TCP/UDP source port. */
+					__be16 dst;		/* TCP/UDP destination port. */
+				} tp;
+				struct {
+					u8 sha[ETH_ALEN];	/* ARP source hardware address. */
+					u8 tha[ETH_ALEN];	/* ARP target hardware address. */
+				} arp;
+			};
+		} ipv4;
+		struct {
+			struct {
+				struct in6_addr src;	/* IPv6 source address. */
+				struct in6_addr dst;	/* IPv6 destination address. */
+			} addr;
+			__be32 label;			/* IPv6 flow label. */
+			struct {
+				__be16 src;		/* TCP/UDP source port. */
+				__be16 dst;		/* TCP/UDP destination port. */
+			} tp;
+			struct {
+				struct in6_addr target;	/* ND target address. */
+				u8 sll[ETH_ALEN];	/* ND source link layer address. */
+				u8 tll[ETH_ALEN];	/* ND target link layer address. */
+			} nd;
+		} ipv6;
+	};
+};
+
+struct sw_flow {
+	struct rcu_head rcu;
+	struct hlist_node hash_node[2];
+	u32 hash;
+
+	struct sw_flow_key key;
+	struct sw_flow_actions __rcu *sf_acts;
+
+	spinlock_t lock;	/* Lock for values below. */
+	unsigned long used;	/* Last used time (in jiffies). */
+	u64 packet_count;	/* Number of packets matched. */
+	u64 byte_count;		/* Number of bytes matched. */
+	u8 tcp_flags;		/* Union of seen TCP flags. */
+};
+
+struct arp_eth_header {
+	__be16      ar_hrd;	/* format of hardware address   */
+	__be16      ar_pro;	/* format of protocol address   */
+	unsigned char   ar_hln;	/* length of hardware address   */
+	unsigned char   ar_pln;	/* length of protocol address   */
+	__be16      ar_op;	/* ARP opcode (command)     */
+
+	/* Ethernet+IPv4 specific members. */
+	unsigned char       ar_sha[ETH_ALEN];	/* sender hardware address  */
+	unsigned char       ar_sip[4];		/* sender IP address        */
+	unsigned char       ar_tha[ETH_ALEN];	/* target hardware address  */
+	unsigned char       ar_tip[4];		/* target IP address        */
+} __packed;
+
+int ovs_flow_init(void);
+void ovs_flow_exit(void);
+
+struct sw_flow *ovs_flow_alloc(void);
+void ovs_flow_deferred_free(struct sw_flow *);
+void ovs_flow_free(struct sw_flow *flow);
+
+struct sw_flow_actions *ovs_flow_actions_alloc(const struct nlattr *);
+void ovs_flow_deferred_free_acts(struct sw_flow_actions *);
+
+int ovs_flow_extract(struct sk_buff *, u16 in_port, struct sw_flow_key *,
+		     int *key_lenp);
+void ovs_flow_used(struct sw_flow *, struct sk_buff *);
+u64 ovs_flow_used_time(unsigned long flow_jiffies);
+
+/* Upper bound on the length of a nlattr-formatted flow key.  The longest
+ * nlattr-formatted flow key would be:
+ *
+ *                         struct  pad  nl hdr  total
+ *                         ------  ---  ------  -----
+ *  OVS_KEY_ATTR_PRIORITY      4    --     4      8
+ *  OVS_KEY_ATTR_IN_PORT       4    --     4      8
+ *  OVS_KEY_ATTR_ETHERNET     12    --     4     16
+ *  OVS_KEY_ATTR_8021Q         4    --     4      8
+ *  OVS_KEY_ATTR_ETHERTYPE     2     2     4      8
+ *  OVS_KEY_ATTR_IPV6         40    --     4     44
+ *  OVS_KEY_ATTR_ICMPV6        2     2     4      8
+ *  OVS_KEY_ATTR_ND           28    --     4     32
+ *  -------------------------------------------------
+ *  total                                       132
+ */
+#define FLOW_BUFSIZE 132
+
+int ovs_flow_to_nlattrs(const struct sw_flow_key *, struct sk_buff *);
+int ovs_flow_from_nlattrs(struct sw_flow_key *swkey, int *key_lenp,
+		      const struct nlattr *);
+int ovs_flow_metadata_from_nlattrs(u32 *priority, u16 *in_port,
+			       const struct nlattr *);
+
+#define TBL_MIN_BUCKETS		1024
+
+struct flow_table {
+	struct flex_array *buckets;
+	unsigned int count, n_buckets;
+	struct rcu_head rcu;
+	int node_ver;
+	u32 hash_seed;
+	bool keep_flows;
+};
+
+static inline int ovs_flow_tbl_count(struct flow_table *table)
+{
+	return table->count;
+}
+
+static inline int ovs_flow_tbl_need_to_expand(struct flow_table *table)
+{
+	return (table->count > table->n_buckets);
+}
+
+struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *table,
+				    struct sw_flow_key *key, int len);
+void ovs_flow_tbl_destroy(struct flow_table *table);
+void ovs_flow_tbl_deferred_destroy(struct flow_table *table);
+struct flow_table *ovs_flow_tbl_alloc(int new_size);
+struct flow_table *ovs_flow_tbl_expand(struct flow_table *table);
+struct flow_table *ovs_flow_tbl_rehash(struct flow_table *table);
+void ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow);
+void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow);
+u32 ovs_flow_hash(const struct sw_flow_key *key, int key_len);
+
+struct sw_flow *ovs_flow_tbl_next(struct flow_table *table, u32 *bucket, u32 *idx);
+extern const int ovs_key_lens[OVS_KEY_ATTR_MAX + 1];
+
+#endif /* flow.h */
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
new file mode 100644
index 000000000000..8fc28b86f2b3
--- /dev/null
+++ b/net/openvswitch/vport-internal_dev.c
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include <linux/hardirq.h>
+#include <linux/if_vlan.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/skbuff.h>
+#include <linux/version.h>
+
+#include "datapath.h"
+#include "vport-internal_dev.h"
+#include "vport-netdev.h"
+
+struct internal_dev {
+	struct vport *vport;
+};
+
+static struct internal_dev *internal_dev_priv(struct net_device *netdev)
+{
+	return netdev_priv(netdev);
+}
+
+/* This function is only called by the kernel network layer.*/
+static struct rtnl_link_stats64 *internal_dev_get_stats(struct net_device *netdev,
+							struct rtnl_link_stats64 *stats)
+{
+	struct vport *vport = ovs_internal_dev_get_vport(netdev);
+	struct ovs_vport_stats vport_stats;
+
+	ovs_vport_get_stats(vport, &vport_stats);
+
+	/* The tx and rx stats need to be swapped because the
+	 * switch and host OS have opposite perspectives. */
+	stats->rx_packets	= vport_stats.tx_packets;
+	stats->tx_packets	= vport_stats.rx_packets;
+	stats->rx_bytes		= vport_stats.tx_bytes;
+	stats->tx_bytes		= vport_stats.rx_bytes;
+	stats->rx_errors	= vport_stats.tx_errors;
+	stats->tx_errors	= vport_stats.rx_errors;
+	stats->rx_dropped	= vport_stats.tx_dropped;
+	stats->tx_dropped	= vport_stats.rx_dropped;
+
+	return stats;
+}
+
+static int internal_dev_mac_addr(struct net_device *dev, void *p)
+{
+	struct sockaddr *addr = p;
+
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+	memcpy(dev->dev_addr, addr->sa_data, dev->addr_len);
+	return 0;
+}
+
+/* Called with rcu_read_lock_bh. */
+static int internal_dev_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+	rcu_read_lock();
+	ovs_vport_receive(internal_dev_priv(netdev)->vport, skb);
+	rcu_read_unlock();
+	return 0;
+}
+
+static int internal_dev_open(struct net_device *netdev)
+{
+	netif_start_queue(netdev);
+	return 0;
+}
+
+static int internal_dev_stop(struct net_device *netdev)
+{
+	netif_stop_queue(netdev);
+	return 0;
+}
+
+static void internal_dev_getinfo(struct net_device *netdev,
+				 struct ethtool_drvinfo *info)
+{
+	strcpy(info->driver, "openvswitch");
+}
+
+static const struct ethtool_ops internal_dev_ethtool_ops = {
+	.get_drvinfo	= internal_dev_getinfo,
+	.get_link	= ethtool_op_get_link,
+};
+
+static int internal_dev_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	if (new_mtu < 68)
+		return -EINVAL;
+
+	netdev->mtu = new_mtu;
+	return 0;
+}
+
+static void internal_dev_destructor(struct net_device *dev)
+{
+	struct vport *vport = ovs_internal_dev_get_vport(dev);
+
+	ovs_vport_free(vport);
+	free_netdev(dev);
+}
+
+static const struct net_device_ops internal_dev_netdev_ops = {
+	.ndo_open = internal_dev_open,
+	.ndo_stop = internal_dev_stop,
+	.ndo_start_xmit = internal_dev_xmit,
+	.ndo_set_mac_address = internal_dev_mac_addr,
+	.ndo_change_mtu = internal_dev_change_mtu,
+	.ndo_get_stats64 = internal_dev_get_stats,
+};
+
+static void do_setup(struct net_device *netdev)
+{
+	ether_setup(netdev);
+
+	netdev->netdev_ops = &internal_dev_netdev_ops;
+
+	netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
+	netdev->destructor = internal_dev_destructor;
+	SET_ETHTOOL_OPS(netdev, &internal_dev_ethtool_ops);
+	netdev->tx_queue_len = 0;
+
+	netdev->features = NETIF_F_LLTX | NETIF_F_SG | NETIF_F_FRAGLIST |
+				NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | NETIF_F_TSO;
+
+	netdev->vlan_features = netdev->features;
+	netdev->features |= NETIF_F_HW_VLAN_TX;
+	netdev->hw_features = netdev->features & ~NETIF_F_LLTX;
+	random_ether_addr(netdev->dev_addr);
+}
+
+static struct vport *internal_dev_create(const struct vport_parms *parms)
+{
+	struct vport *vport;
+	struct netdev_vport *netdev_vport;
+	struct internal_dev *internal_dev;
+	int err;
+
+	vport = ovs_vport_alloc(sizeof(struct netdev_vport),
+				&ovs_internal_vport_ops, parms);
+	if (IS_ERR(vport)) {
+		err = PTR_ERR(vport);
+		goto error;
+	}
+
+	netdev_vport = netdev_vport_priv(vport);
+
+	netdev_vport->dev = alloc_netdev(sizeof(struct internal_dev),
+					 parms->name, do_setup);
+	if (!netdev_vport->dev) {
+		err = -ENOMEM;
+		goto error_free_vport;
+	}
+
+	internal_dev = internal_dev_priv(netdev_vport->dev);
+	internal_dev->vport = vport;
+
+	err = register_netdevice(netdev_vport->dev);
+	if (err)
+		goto error_free_netdev;
+
+	dev_set_promiscuity(netdev_vport->dev, 1);
+	netif_start_queue(netdev_vport->dev);
+
+	return vport;
+
+error_free_netdev:
+	free_netdev(netdev_vport->dev);
+error_free_vport:
+	ovs_vport_free(vport);
+error:
+	return ERR_PTR(err);
+}
+
+static void internal_dev_destroy(struct vport *vport)
+{
+	struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+
+	netif_stop_queue(netdev_vport->dev);
+	dev_set_promiscuity(netdev_vport->dev, -1);
+
+	/* unregister_netdevice() waits for an RCU grace period. */
+	unregister_netdevice(netdev_vport->dev);
+}
+
+static int internal_dev_recv(struct vport *vport, struct sk_buff *skb)
+{
+	struct net_device *netdev = netdev_vport_priv(vport)->dev;
+	int len;
+
+	len = skb->len;
+	skb->dev = netdev;
+	skb->pkt_type = PACKET_HOST;
+	skb->protocol = eth_type_trans(skb, netdev);
+
+	netif_rx(skb);
+
+	return len;
+}
+
+const struct vport_ops ovs_internal_vport_ops = {
+	.type		= OVS_VPORT_TYPE_INTERNAL,
+	.create		= internal_dev_create,
+	.destroy	= internal_dev_destroy,
+	.get_name	= ovs_netdev_get_name,
+	.get_ifindex	= ovs_netdev_get_ifindex,
+	.send		= internal_dev_recv,
+};
+
+int ovs_is_internal_dev(const struct net_device *netdev)
+{
+	return netdev->netdev_ops == &internal_dev_netdev_ops;
+}
+
+struct vport *ovs_internal_dev_get_vport(struct net_device *netdev)
+{
+	if (!ovs_is_internal_dev(netdev))
+		return NULL;
+
+	return internal_dev_priv(netdev)->vport;
+}
diff --git a/net/openvswitch/vport-internal_dev.h b/net/openvswitch/vport-internal_dev.h
new file mode 100644
index 000000000000..3454447c5f11
--- /dev/null
+++ b/net/openvswitch/vport-internal_dev.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef VPORT_INTERNAL_DEV_H
+#define VPORT_INTERNAL_DEV_H 1
+
+#include "datapath.h"
+#include "vport.h"
+
+int ovs_is_internal_dev(const struct net_device *);
+struct vport *ovs_internal_dev_get_vport(struct net_device *);
+
+#endif /* vport-internal_dev.h */
diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c
new file mode 100644
index 000000000000..c1068aed03d1
--- /dev/null
+++ b/net/openvswitch/vport-netdev.c
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/if_arp.h>
+#include <linux/if_bridge.h>
+#include <linux/if_vlan.h>
+#include <linux/kernel.h>
+#include <linux/llc.h>
+#include <linux/rtnetlink.h>
+#include <linux/skbuff.h>
+
+#include <net/llc.h>
+
+#include "datapath.h"
+#include "vport-internal_dev.h"
+#include "vport-netdev.h"
+
+/* Must be called with rcu_read_lock. */
+static void netdev_port_receive(struct vport *vport, struct sk_buff *skb)
+{
+	if (unlikely(!vport)) {
+		kfree_skb(skb);
+		return;
+	}
+
+	/* Make our own copy of the packet.  Otherwise we will mangle the
+	 * packet for anyone who came before us (e.g. tcpdump via AF_PACKET).
+	 * (No one comes after us, since we tell handle_bridge() that we took
+	 * the packet.) */
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (unlikely(!skb))
+		return;
+
+	skb_push(skb, ETH_HLEN);
+	ovs_vport_receive(vport, skb);
+}
+
+/* Called with rcu_read_lock and bottom-halves disabled. */
+static rx_handler_result_t netdev_frame_hook(struct sk_buff **pskb)
+{
+	struct sk_buff *skb = *pskb;
+	struct vport *vport;
+
+	if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
+		return RX_HANDLER_PASS;
+
+	vport = ovs_netdev_get_vport(skb->dev);
+
+	netdev_port_receive(vport, skb);
+
+	return RX_HANDLER_CONSUMED;
+}
+
+static struct vport *netdev_create(const struct vport_parms *parms)
+{
+	struct vport *vport;
+	struct netdev_vport *netdev_vport;
+	int err;
+
+	vport = ovs_vport_alloc(sizeof(struct netdev_vport),
+				&ovs_netdev_vport_ops, parms);
+	if (IS_ERR(vport)) {
+		err = PTR_ERR(vport);
+		goto error;
+	}
+
+	netdev_vport = netdev_vport_priv(vport);
+
+	netdev_vport->dev = dev_get_by_name(&init_net, parms->name);
+	if (!netdev_vport->dev) {
+		err = -ENODEV;
+		goto error_free_vport;
+	}
+
+	if (netdev_vport->dev->flags & IFF_LOOPBACK ||
+	    netdev_vport->dev->type != ARPHRD_ETHER ||
+	    ovs_is_internal_dev(netdev_vport->dev)) {
+		err = -EINVAL;
+		goto error_put;
+	}
+
+	err = netdev_rx_handler_register(netdev_vport->dev, netdev_frame_hook,
+					 vport);
+	if (err)
+		goto error_put;
+
+	dev_set_promiscuity(netdev_vport->dev, 1);
+	netdev_vport->dev->priv_flags |= IFF_OVS_DATAPATH;
+
+	return vport;
+
+error_put:
+	dev_put(netdev_vport->dev);
+error_free_vport:
+	ovs_vport_free(vport);
+error:
+	return ERR_PTR(err);
+}
+
+static void netdev_destroy(struct vport *vport)
+{
+	struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+
+	netdev_vport->dev->priv_flags &= ~IFF_OVS_DATAPATH;
+	netdev_rx_handler_unregister(netdev_vport->dev);
+	dev_set_promiscuity(netdev_vport->dev, -1);
+
+	synchronize_rcu();
+
+	dev_put(netdev_vport->dev);
+	ovs_vport_free(vport);
+}
+
+const char *ovs_netdev_get_name(const struct vport *vport)
+{
+	const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+	return netdev_vport->dev->name;
+}
+
+int ovs_netdev_get_ifindex(const struct vport *vport)
+{
+	const struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+	return netdev_vport->dev->ifindex;
+}
+
+static unsigned packet_length(const struct sk_buff *skb)
+{
+	unsigned length = skb->len - ETH_HLEN;
+
+	if (skb->protocol == htons(ETH_P_8021Q))
+		length -= VLAN_HLEN;
+
+	return length;
+}
+
+static int netdev_send(struct vport *vport, struct sk_buff *skb)
+{
+	struct netdev_vport *netdev_vport = netdev_vport_priv(vport);
+	int mtu = netdev_vport->dev->mtu;
+	int len;
+
+	if (unlikely(packet_length(skb) > mtu && !skb_is_gso(skb))) {
+		if (net_ratelimit())
+			pr_warn("%s: dropped over-mtu packet: %d > %d\n",
+				ovs_dp_name(vport->dp), packet_length(skb), mtu);
+		goto error;
+	}
+
+	if (unlikely(skb_warn_if_lro(skb)))
+		goto error;
+
+	skb->dev = netdev_vport->dev;
+	len = skb->len;
+	dev_queue_xmit(skb);
+
+	return len;
+
+error:
+	kfree_skb(skb);
+	ovs_vport_record_error(vport, VPORT_E_TX_DROPPED);
+	return 0;
+}
+
+/* Returns null if this device is not attached to a datapath. */
+struct vport *ovs_netdev_get_vport(struct net_device *dev)
+{
+	if (likely(dev->priv_flags & IFF_OVS_DATAPATH))
+		return (struct vport *)
+			rcu_dereference_rtnl(dev->rx_handler_data);
+	else
+		return NULL;
+}
+
+const struct vport_ops ovs_netdev_vport_ops = {
+	.type		= OVS_VPORT_TYPE_NETDEV,
+	.create		= netdev_create,
+	.destroy	= netdev_destroy,
+	.get_name	= ovs_netdev_get_name,
+	.get_ifindex	= ovs_netdev_get_ifindex,
+	.send		= netdev_send,
+};
diff --git a/net/openvswitch/vport-netdev.h b/net/openvswitch/vport-netdev.h
new file mode 100644
index 000000000000..fd9b008a0e6e
--- /dev/null
+++ b/net/openvswitch/vport-netdev.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef VPORT_NETDEV_H
+#define VPORT_NETDEV_H 1
+
+#include <linux/netdevice.h>
+
+#include "vport.h"
+
+struct vport *ovs_netdev_get_vport(struct net_device *dev);
+
+struct netdev_vport {
+	struct net_device *dev;
+};
+
+static inline struct netdev_vport *
+netdev_vport_priv(const struct vport *vport)
+{
+	return vport_priv(vport);
+}
+
+const char *ovs_netdev_get_name(const struct vport *);
+const char *ovs_netdev_get_config(const struct vport *);
+int ovs_netdev_get_ifindex(const struct vport *);
+
+#endif /* vport_netdev.h */
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
new file mode 100644
index 000000000000..6cd760131f15
--- /dev/null
+++ b/net/openvswitch/vport.c
@@ -0,0 +1,396 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#include <linux/dcache.h>
+#include <linux/etherdevice.h>
+#include <linux/if.h>
+#include <linux/if_vlan.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+#include <linux/rtnetlink.h>
+#include <linux/compat.h>
+#include <linux/version.h>
+
+#include "vport.h"
+#include "vport-internal_dev.h"
+
+/* List of statically compiled vport implementations.  Don't forget to also
+ * add yours to the list at the bottom of vport.h. */
+static const struct vport_ops *vport_ops_list[] = {
+	&ovs_netdev_vport_ops,
+	&ovs_internal_vport_ops,
+};
+
+/* Protected by RCU read lock for reading, RTNL lock for writing. */
+static struct hlist_head *dev_table;
+#define VPORT_HASH_BUCKETS 1024
+
+/**
+ *	ovs_vport_init - initialize vport subsystem
+ *
+ * Called at module load time to initialize the vport subsystem.
+ */
+int ovs_vport_init(void)
+{
+	dev_table = kzalloc(VPORT_HASH_BUCKETS * sizeof(struct hlist_head),
+			    GFP_KERNEL);
+	if (!dev_table)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/**
+ *	ovs_vport_exit - shutdown vport subsystem
+ *
+ * Called at module exit time to shutdown the vport subsystem.
+ */
+void ovs_vport_exit(void)
+{
+	kfree(dev_table);
+}
+
+static struct hlist_head *hash_bucket(const char *name)
+{
+	unsigned int hash = full_name_hash(name, strlen(name));
+	return &dev_table[hash & (VPORT_HASH_BUCKETS - 1)];
+}
+
+/**
+ *	ovs_vport_locate - find a port that has already been created
+ *
+ * @name: name of port to find
+ *
+ * Must be called with RTNL or RCU read lock.
+ */
+struct vport *ovs_vport_locate(const char *name)
+{
+	struct hlist_head *bucket = hash_bucket(name);
+	struct vport *vport;
+	struct hlist_node *node;
+
+	hlist_for_each_entry_rcu(vport, node, bucket, hash_node)
+		if (!strcmp(name, vport->ops->get_name(vport)))
+			return vport;
+
+	return NULL;
+}
+
+/**
+ *	ovs_vport_alloc - allocate and initialize new vport
+ *
+ * @priv_size: Size of private data area to allocate.
+ * @ops: vport device ops
+ *
+ * Allocate and initialize a new vport defined by @ops.  The vport will contain
+ * a private data area of size @priv_size that can be accessed using
+ * vport_priv().  vports that are no longer needed should be released with
+ * vport_free().
+ */
+struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops,
+			  const struct vport_parms *parms)
+{
+	struct vport *vport;
+	size_t alloc_size;
+
+	alloc_size = sizeof(struct vport);
+	if (priv_size) {
+		alloc_size = ALIGN(alloc_size, VPORT_ALIGN);
+		alloc_size += priv_size;
+	}
+
+	vport = kzalloc(alloc_size, GFP_KERNEL);
+	if (!vport)
+		return ERR_PTR(-ENOMEM);
+
+	vport->dp = parms->dp;
+	vport->port_no = parms->port_no;
+	vport->upcall_pid = parms->upcall_pid;
+	vport->ops = ops;
+
+	vport->percpu_stats = alloc_percpu(struct vport_percpu_stats);
+	if (!vport->percpu_stats)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_init(&vport->stats_lock);
+
+	return vport;
+}
+
+/**
+ *	ovs_vport_free - uninitialize and free vport
+ *
+ * @vport: vport to free
+ *
+ * Frees a vport allocated with vport_alloc() when it is no longer needed.
+ *
+ * The caller must ensure that an RCU grace period has passed since the last
+ * time @vport was in a datapath.
+ */
+void ovs_vport_free(struct vport *vport)
+{
+	free_percpu(vport->percpu_stats);
+	kfree(vport);
+}
+
+/**
+ *	ovs_vport_add - add vport device (for kernel callers)
+ *
+ * @parms: Information about new vport.
+ *
+ * Creates a new vport with the specified configuration (which is dependent on
+ * device type).  RTNL lock must be held.
+ */
+struct vport *ovs_vport_add(const struct vport_parms *parms)
+{
+	struct vport *vport;
+	int err = 0;
+	int i;
+
+	ASSERT_RTNL();
+
+	for (i = 0; i < ARRAY_SIZE(vport_ops_list); i++) {
+		if (vport_ops_list[i]->type == parms->type) {
+			vport = vport_ops_list[i]->create(parms);
+			if (IS_ERR(vport)) {
+				err = PTR_ERR(vport);
+				goto out;
+			}
+
+			hlist_add_head_rcu(&vport->hash_node,
+					   hash_bucket(vport->ops->get_name(vport)));
+			return vport;
+		}
+	}
+
+	err = -EAFNOSUPPORT;
+
+out:
+	return ERR_PTR(err);
+}
+
+/**
+ *	ovs_vport_set_options - modify existing vport device (for kernel callers)
+ *
+ * @vport: vport to modify.
+ * @port: New configuration.
+ *
+ * Modifies an existing device with the specified configuration (which is
+ * dependent on device type).  RTNL lock must be held.
+ */
+int ovs_vport_set_options(struct vport *vport, struct nlattr *options)
+{
+	ASSERT_RTNL();
+
+	if (!vport->ops->set_options)
+		return -EOPNOTSUPP;
+	return vport->ops->set_options(vport, options);
+}
+
+/**
+ *	ovs_vport_del - delete existing vport device
+ *
+ * @vport: vport to delete.
+ *
+ * Detaches @vport from its datapath and destroys it.  It is possible to fail
+ * for reasons such as lack of memory.  RTNL lock must be held.
+ */
+void ovs_vport_del(struct vport *vport)
+{
+	ASSERT_RTNL();
+
+	hlist_del_rcu(&vport->hash_node);
+
+	vport->ops->destroy(vport);
+}
+
+/**
+ *	ovs_vport_get_stats - retrieve device stats
+ *
+ * @vport: vport from which to retrieve the stats
+ * @stats: location to store stats
+ *
+ * Retrieves transmit, receive, and error stats for the given device.
+ *
+ * Must be called with RTNL lock or rcu_read_lock.
+ */
+void ovs_vport_get_stats(struct vport *vport, struct ovs_vport_stats *stats)
+{
+	int i;
+
+	memset(stats, 0, sizeof(*stats));
+
+	/* We potentially have 2 sources of stats that need to be combined:
+	 * those we have collected (split into err_stats and percpu_stats) from
+	 * set_stats() and device error stats from netdev->get_stats() (for
+	 * errors that happen  downstream and therefore aren't reported through
+	 * our vport_record_error() function).
+	 * Stats from first source are reported by ovs (OVS_VPORT_ATTR_STATS).
+	 * netdev-stats can be directly read over netlink-ioctl.
+	 */
+
+	spin_lock_bh(&vport->stats_lock);
+
+	stats->rx_errors	= vport->err_stats.rx_errors;
+	stats->tx_errors	= vport->err_stats.tx_errors;
+	stats->tx_dropped	= vport->err_stats.tx_dropped;
+	stats->rx_dropped	= vport->err_stats.rx_dropped;
+
+	spin_unlock_bh(&vport->stats_lock);
+
+	for_each_possible_cpu(i) {
+		const struct vport_percpu_stats *percpu_stats;
+		struct vport_percpu_stats local_stats;
+		unsigned int start;
+
+		percpu_stats = per_cpu_ptr(vport->percpu_stats, i);
+
+		do {
+			start = u64_stats_fetch_begin_bh(&percpu_stats->sync);
+			local_stats = *percpu_stats;
+		} while (u64_stats_fetch_retry_bh(&percpu_stats->sync, start));
+
+		stats->rx_bytes		+= local_stats.rx_bytes;
+		stats->rx_packets	+= local_stats.rx_packets;
+		stats->tx_bytes		+= local_stats.tx_bytes;
+		stats->tx_packets	+= local_stats.tx_packets;
+	}
+}
+
+/**
+ *	ovs_vport_get_options - retrieve device options
+ *
+ * @vport: vport from which to retrieve the options.
+ * @skb: sk_buff where options should be appended.
+ *
+ * Retrieves the configuration of the given device, appending an
+ * %OVS_VPORT_ATTR_OPTIONS attribute that in turn contains nested
+ * vport-specific attributes to @skb.
+ *
+ * Returns 0 if successful, -EMSGSIZE if @skb has insufficient room, or another
+ * negative error code if a real error occurred.  If an error occurs, @skb is
+ * left unmodified.
+ *
+ * Must be called with RTNL lock or rcu_read_lock.
+ */
+int ovs_vport_get_options(const struct vport *vport, struct sk_buff *skb)
+{
+	struct nlattr *nla;
+
+	nla = nla_nest_start(skb, OVS_VPORT_ATTR_OPTIONS);
+	if (!nla)
+		return -EMSGSIZE;
+
+	if (vport->ops->get_options) {
+		int err = vport->ops->get_options(vport, skb);
+		if (err) {
+			nla_nest_cancel(skb, nla);
+			return err;
+		}
+	}
+
+	nla_nest_end(skb, nla);
+	return 0;
+}
+
+/**
+ *	ovs_vport_receive - pass up received packet to the datapath for processing
+ *
+ * @vport: vport that received the packet
+ * @skb: skb that was received
+ *
+ * Must be called with rcu_read_lock.  The packet cannot be shared and
+ * skb->data should point to the Ethernet header.  The caller must have already
+ * called compute_ip_summed() to initialize the checksumming fields.
+ */
+void ovs_vport_receive(struct vport *vport, struct sk_buff *skb)
+{
+	struct vport_percpu_stats *stats;
+
+	stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id());
+
+	u64_stats_update_begin(&stats->sync);
+	stats->rx_packets++;
+	stats->rx_bytes += skb->len;
+	u64_stats_update_end(&stats->sync);
+
+	ovs_dp_process_received_packet(vport, skb);
+}
+
+/**
+ *	ovs_vport_send - send a packet on a device
+ *
+ * @vport: vport on which to send the packet
+ * @skb: skb to send
+ *
+ * Sends the given packet and returns the length of data sent.  Either RTNL
+ * lock or rcu_read_lock must be held.
+ */
+int ovs_vport_send(struct vport *vport, struct sk_buff *skb)
+{
+	int sent = vport->ops->send(vport, skb);
+
+	if (likely(sent)) {
+		struct vport_percpu_stats *stats;
+
+		stats = per_cpu_ptr(vport->percpu_stats, smp_processor_id());
+
+		u64_stats_update_begin(&stats->sync);
+		stats->tx_packets++;
+		stats->tx_bytes += sent;
+		u64_stats_update_end(&stats->sync);
+	}
+	return sent;
+}
+
+/**
+ *	ovs_vport_record_error - indicate device error to generic stats layer
+ *
+ * @vport: vport that encountered the error
+ * @err_type: one of enum vport_err_type types to indicate the error type
+ *
+ * If using the vport generic stats layer indicate that an error of the given
+ * type has occured.
+ */
+void ovs_vport_record_error(struct vport *vport, enum vport_err_type err_type)
+{
+	spin_lock(&vport->stats_lock);
+
+	switch (err_type) {
+	case VPORT_E_RX_DROPPED:
+		vport->err_stats.rx_dropped++;
+		break;
+
+	case VPORT_E_RX_ERROR:
+		vport->err_stats.rx_errors++;
+		break;
+
+	case VPORT_E_TX_DROPPED:
+		vport->err_stats.tx_dropped++;
+		break;
+
+	case VPORT_E_TX_ERROR:
+		vport->err_stats.tx_errors++;
+		break;
+	};
+
+	spin_unlock(&vport->stats_lock);
+}
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
new file mode 100644
index 000000000000..19609629dabd
--- /dev/null
+++ b/net/openvswitch/vport.h
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2007-2011 Nicira Networks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA
+ */
+
+#ifndef VPORT_H
+#define VPORT_H 1
+
+#include <linux/list.h>
+#include <linux/openvswitch.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/u64_stats_sync.h>
+
+#include "datapath.h"
+
+struct vport;
+struct vport_parms;
+
+/* The following definitions are for users of the vport subsytem: */
+
+int ovs_vport_init(void);
+void ovs_vport_exit(void);
+
+struct vport *ovs_vport_add(const struct vport_parms *);
+void ovs_vport_del(struct vport *);
+
+struct vport *ovs_vport_locate(const char *name);
+
+void ovs_vport_get_stats(struct vport *, struct ovs_vport_stats *);
+
+int ovs_vport_set_options(struct vport *, struct nlattr *options);
+int ovs_vport_get_options(const struct vport *, struct sk_buff *);
+
+int ovs_vport_send(struct vport *, struct sk_buff *);
+
+/* The following definitions are for implementers of vport devices: */
+
+struct vport_percpu_stats {
+	u64 rx_bytes;
+	u64 rx_packets;
+	u64 tx_bytes;
+	u64 tx_packets;
+	struct u64_stats_sync sync;
+};
+
+struct vport_err_stats {
+	u64 rx_dropped;
+	u64 rx_errors;
+	u64 tx_dropped;
+	u64 tx_errors;
+};
+
+/**
+ * struct vport - one port within a datapath
+ * @rcu: RCU callback head for deferred destruction.
+ * @port_no: Index into @dp's @ports array.
+ * @dp: Datapath to which this port belongs.
+ * @node: Element in @dp's @port_list.
+ * @upcall_pid: The Netlink port to use for packets received on this port that
+ * miss the flow table.
+ * @hash_node: Element in @dev_table hash table in vport.c.
+ * @ops: Class structure.
+ * @percpu_stats: Points to per-CPU statistics used and maintained by vport
+ * @stats_lock: Protects @err_stats;
+ * @err_stats: Points to error statistics used and maintained by vport
+ */
+struct vport {
+	struct rcu_head rcu;
+	u16 port_no;
+	struct datapath	*dp;
+	struct list_head node;
+	u32 upcall_pid;
+
+	struct hlist_node hash_node;
+	const struct vport_ops *ops;
+
+	struct vport_percpu_stats __percpu *percpu_stats;
+
+	spinlock_t stats_lock;
+	struct vport_err_stats err_stats;
+};
+
+/**
+ * struct vport_parms - parameters for creating a new vport
+ *
+ * @name: New vport's name.
+ * @type: New vport's type.
+ * @options: %OVS_VPORT_ATTR_OPTIONS attribute from Netlink message, %NULL if
+ * none was supplied.
+ * @dp: New vport's datapath.
+ * @port_no: New vport's port number.
+ */
+struct vport_parms {
+	const char *name;
+	enum ovs_vport_type type;
+	struct nlattr *options;
+
+	/* For ovs_vport_alloc(). */
+	struct datapath *dp;
+	u16 port_no;
+	u32 upcall_pid;
+};
+
+/**
+ * struct vport_ops - definition of a type of virtual port
+ *
+ * @type: %OVS_VPORT_TYPE_* value for this type of virtual port.
+ * @create: Create a new vport configured as specified.  On success returns
+ * a new vport allocated with ovs_vport_alloc(), otherwise an ERR_PTR() value.
+ * @destroy: Destroys a vport.  Must call vport_free() on the vport but not
+ * before an RCU grace period has elapsed.
+ * @set_options: Modify the configuration of an existing vport.  May be %NULL
+ * if modification is not supported.
+ * @get_options: Appends vport-specific attributes for the configuration of an
+ * existing vport to a &struct sk_buff.  May be %NULL for a vport that does not
+ * have any configuration.
+ * @get_name: Get the device's name.
+ * @get_config: Get the device's configuration.
+ * @get_ifindex: Get the system interface index associated with the device.
+ * May be null if the device does not have an ifindex.
+ * @send: Send a packet on the device.  Returns the length of the packet sent.
+ */
+struct vport_ops {
+	enum ovs_vport_type type;
+
+	/* Called with RTNL lock. */
+	struct vport *(*create)(const struct vport_parms *);
+	void (*destroy)(struct vport *);
+
+	int (*set_options)(struct vport *, struct nlattr *);
+	int (*get_options)(const struct vport *, struct sk_buff *);
+
+	/* Called with rcu_read_lock or RTNL lock. */
+	const char *(*get_name)(const struct vport *);
+	void (*get_config)(const struct vport *, void *);
+	int (*get_ifindex)(const struct vport *);
+
+	int (*send)(struct vport *, struct sk_buff *);
+};
+
+enum vport_err_type {
+	VPORT_E_RX_DROPPED,
+	VPORT_E_RX_ERROR,
+	VPORT_E_TX_DROPPED,
+	VPORT_E_TX_ERROR,
+};
+
+struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *,
+			      const struct vport_parms *);
+void ovs_vport_free(struct vport *);
+
+#define VPORT_ALIGN 8
+
+/**
+ *	vport_priv - access private data area of vport
+ *
+ * @vport: vport to access
+ *
+ * If a nonzero size was passed in priv_size of vport_alloc() a private data
+ * area was allocated on creation.  This allows that area to be accessed and
+ * used for any purpose needed by the vport implementer.
+ */
+static inline void *vport_priv(const struct vport *vport)
+{
+	return (u8 *)vport + ALIGN(sizeof(struct vport), VPORT_ALIGN);
+}
+
+/**
+ *	vport_from_priv - lookup vport from private data pointer
+ *
+ * @priv: Start of private data area.
+ *
+ * It is sometimes useful to translate from a pointer to the private data
+ * area to the vport, such as in the case where the private data pointer is
+ * the result of a hash table lookup.  @priv must point to the start of the
+ * private data area.
+ */
+static inline struct vport *vport_from_priv(const void *priv)
+{
+	return (struct vport *)(priv - ALIGN(sizeof(struct vport), VPORT_ALIGN));
+}
+
+void ovs_vport_receive(struct vport *, struct sk_buff *);
+void ovs_vport_record_error(struct vport *, enum vport_err_type err_type);
+
+/* List of statically compiled vport implementations.  Don't forget to also
+ * add yours to the list at the top of vport.c. */
+extern const struct vport_ops ovs_netdev_vport_ops;
+extern const struct vport_ops ovs_internal_vport_ops;
+
+#endif /* vport.h */
-- 
cgit v1.2.3


From 117632e64d2a5f464e491fe221d7169a3814a77b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 3 Dec 2011 21:39:53 +0000
Subject: tcp: take care of misalignments

We discovered that TCP stack could retransmit misaligned skbs if a
malicious peer acknowledged sub MSS frame. This currently can happen
only if output interface is non SG enabled : If SG is enabled, tcp
builds headless skbs (all payload is included in fragments), so the tcp
trimming process only removes parts of skb fragments, header stay
aligned.

Some arches cant handle misalignments, so force a head reallocation and
shrink headroom to MAX_TCP_HEADER.

Dont care about misaligments on x86 and PPC (or other arches setting
NET_IP_ALIGN to 0)

This patch introduces __pskb_copy() which can specify the headroom of
new head, and pskb_copy() becomes a wrapper on top of __pskb_copy()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 11 +++++++++--
 net/core/skbuff.c      | 11 ++++++-----
 net/ipv4/tcp_output.c  | 10 +++++++++-
 3 files changed, 24 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index cec0657d0d32..12e6fed73f8e 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -568,8 +568,9 @@ extern struct sk_buff *skb_clone(struct sk_buff *skb,
 				 gfp_t priority);
 extern struct sk_buff *skb_copy(const struct sk_buff *skb,
 				gfp_t priority);
-extern struct sk_buff *pskb_copy(struct sk_buff *skb,
-				 gfp_t gfp_mask);
+extern struct sk_buff *__pskb_copy(struct sk_buff *skb,
+				 int headroom, gfp_t gfp_mask);
+
 extern int	       pskb_expand_head(struct sk_buff *skb,
 					int nhead, int ntail,
 					gfp_t gfp_mask);
@@ -1799,6 +1800,12 @@ static inline dma_addr_t skb_frag_dma_map(struct device *dev,
 			    frag->page_offset + offset, size, dir);
 }
 
+static inline struct sk_buff *pskb_copy(struct sk_buff *skb,
+					gfp_t gfp_mask)
+{
+	return __pskb_copy(skb, skb_headroom(skb), gfp_mask);
+}
+
 /**
  *	skb_clone_writable - is the header of a clone writable
  *	@skb: buffer to check
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 678ae4e783aa..fd3646209b65 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -840,8 +840,9 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask)
 EXPORT_SYMBOL(skb_copy);
 
 /**
- *	pskb_copy	-	create copy of an sk_buff with private head.
+ *	__pskb_copy	-	create copy of an sk_buff with private head.
  *	@skb: buffer to copy
+ *	@headroom: headroom of new skb
  *	@gfp_mask: allocation priority
  *
  *	Make a copy of both an &sk_buff and part of its data, located
@@ -852,16 +853,16 @@ EXPORT_SYMBOL(skb_copy);
  *	The returned buffer has a reference count of 1.
  */
 
-struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
+struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask)
 {
-	unsigned int size = skb_end_pointer(skb) - skb->head;
+	unsigned int size = skb_headlen(skb) + headroom;
 	struct sk_buff *n = alloc_skb(size, gfp_mask);
 
 	if (!n)
 		goto out;
 
 	/* Set the data pointer */
-	skb_reserve(n, skb_headroom(skb));
+	skb_reserve(n, headroom);
 	/* Set the tail pointer and length */
 	skb_put(n, skb_headlen(skb));
 	/* Copy the bytes */
@@ -897,7 +898,7 @@ struct sk_buff *pskb_copy(struct sk_buff *skb, gfp_t gfp_mask)
 out:
 	return n;
 }
-EXPORT_SYMBOL(pskb_copy);
+EXPORT_SYMBOL(__pskb_copy);
 
 /**
  *	pskb_expand_head - reallocate header of &sk_buff
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 58f69acd3d22..50788d67bdb7 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2147,7 +2147,15 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	 */
 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
 
-	err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
+	/* make sure skb->data is aligned on arches that require it */
+	if (unlikely(NET_IP_ALIGN && ((unsigned long)skb->data & 3))) {
+		struct sk_buff *nskb = __pskb_copy(skb, MAX_TCP_HEADER,
+						   GFP_ATOMIC);
+		err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
+			     -ENOBUFS;
+	} else {
+		err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
+	}
 
 	if (err == 0) {
 		/* Update global TCP statistics. */
-- 
cgit v1.2.3


From 8f97339d3feb662037b86a925e692017c0b32323 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 4 Jul 2011 22:48:10 +0100
Subject: netfilter: add ipv4 reverse path filter match

This tries to do the same thing as fib_validate_source(), but differs
in several aspects.

The most important difference is that the reverse path filter built into
fib_validate_source uses the oif as iif when performing the reverse
lookup.  We do not do this, as the oif is not yet known by the time the
PREROUTING hook is invoked.

We can't wait until FORWARD chain because by the time FORWARD is invoked
ipv4 forward path may have already sent icmp messages is response
to to-be-discarded-via-rpfilter packets.

To avoid the such an additional lookup in PREROUTING, Patrick McHardy
suggested to attach the path information directly in the match
(i.e., just do what the standard ipv4 path does a bit earlier in PREROUTING).

This works, but it also has a few caveats. Most importantly, when using
marks in PREROUTING to re-route traffic based on the nfmark, -m rpfilter
would have to be used after the nfmark has been set; otherwise the nfmark
would have no effect (because the route is already attached).

Another problem would be interaction with -j TPROXY, as this target sets an
nfmark and uses ACCEPT instead of continue, i.e. such a version of
-m rpfilter cannot be used for the initial to-be-intercepted packets.

In case in turns out that the oif is required, we can add Patricks
suggestion with a new match option (e.g. --rpf-use-oif) to keep ruleset
compatibility.

Another difference to current builtin ipv4 rpfilter is that packets subject to ipsec
transformation are not automatically excluded. If you want this, simply
combine -m rpfilter with the policy match.

Packets arriving on loopback interfaces always match.

Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/xt_rpfilter.h |  23 ++++++
 net/ipv4/netfilter/Kconfig            |  10 +++
 net/ipv4/netfilter/Makefile           |   1 +
 net/ipv4/netfilter/ipt_rpfilter.c     | 141 ++++++++++++++++++++++++++++++++++
 4 files changed, 175 insertions(+)
 create mode 100644 include/linux/netfilter/xt_rpfilter.h
 create mode 100644 net/ipv4/netfilter/ipt_rpfilter.c

(limited to 'include/linux')

diff --git a/include/linux/netfilter/xt_rpfilter.h b/include/linux/netfilter/xt_rpfilter.h
new file mode 100644
index 000000000000..8358d4f71952
--- /dev/null
+++ b/include/linux/netfilter/xt_rpfilter.h
@@ -0,0 +1,23 @@
+#ifndef _XT_RPATH_H
+#define _XT_RPATH_H
+
+#include <linux/types.h>
+
+enum {
+	XT_RPFILTER_LOOSE = 1 << 0,
+	XT_RPFILTER_VALID_MARK = 1 << 1,
+	XT_RPFILTER_ACCEPT_LOCAL = 1 << 2,
+	XT_RPFILTER_INVERT = 1 << 3,
+#ifdef __KERNEL__
+	XT_RPFILTER_OPTION_MASK = XT_RPFILTER_LOOSE |
+				  XT_RPFILTER_VALID_MARK |
+				  XT_RPFILTER_ACCEPT_LOCAL |
+				  XT_RPFILTER_INVERT,
+#endif
+};
+
+struct xt_rpfilter_info {
+	__u8 flags;
+};
+
+#endif
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index f19f2182894c..7e1f5cdaf11e 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -82,6 +82,16 @@ config IP_NF_MATCH_ECN
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config IP_NF_MATCH_RPFILTER
+	tristate '"rpfilter" reverse path filter match support'
+	depends on NETFILTER_ADVANCED
+	---help---
+	  This option allows you to match packets whose replies would
+	  go out via the interface the packet came in.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+	  The module will be called ipt_rpfilter.
+
 config IP_NF_MATCH_TTL
 	tristate '"ttl" match support'
 	depends on NETFILTER_ADVANCED
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index dca2082ec683..123dd88cea53 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -50,6 +50,7 @@ obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o
 # matches
 obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
 obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
+obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o
 
 # targets
 obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
new file mode 100644
index 000000000000..31371be8174b
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -0,0 +1,141 @@
+/*
+ * Copyright (c) 2011 Florian Westphal <fw@strlen.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * based on fib_frontend.c; Author: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/ip.h>
+#include <net/ip.h>
+#include <net/ip_fib.h>
+#include <net/route.h>
+
+#include <linux/netfilter/xt_rpfilter.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_DESCRIPTION("iptables: ipv4 reverse path filter match");
+
+/* don't try to find route from mcast/bcast/zeronet */
+static __be32 rpfilter_get_saddr(__be32 addr)
+{
+	if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) ||
+	    ipv4_is_zeronet(addr))
+		return 0;
+	return addr;
+}
+
+static bool rpfilter_lookup_reverse(struct flowi4 *fl4,
+				const struct net_device *dev, u8 flags)
+{
+	struct fib_result res;
+	bool dev_match;
+	struct net *net = dev_net(dev);
+	int ret __maybe_unused;
+
+	if (fib_lookup(net, fl4, &res))
+		return false;
+
+	if (res.type != RTN_UNICAST) {
+		if (res.type != RTN_LOCAL || !(flags & XT_RPFILTER_ACCEPT_LOCAL))
+			return false;
+	}
+	dev_match = false;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+	for (ret = 0; ret < res.fi->fib_nhs; ret++) {
+		struct fib_nh *nh = &res.fi->fib_nh[ret];
+
+		if (nh->nh_dev == dev) {
+			dev_match = true;
+			break;
+		}
+	}
+#else
+	if (FIB_RES_DEV(res) == dev)
+		dev_match = true;
+#endif
+	if (dev_match || flags & XT_RPFILTER_LOOSE)
+		return FIB_RES_NH(res).nh_scope <= RT_SCOPE_HOST;
+	return dev_match;
+}
+
+static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_rpfilter_info *info;
+	const struct iphdr *iph;
+	struct flowi4 flow;
+	bool invert;
+
+	info = par->matchinfo;
+	invert = info->flags & XT_RPFILTER_INVERT;
+
+	if (par->in->flags & IFF_LOOPBACK)
+		return true ^ invert;
+
+	iph = ip_hdr(skb);
+	if (ipv4_is_multicast(iph->daddr)) {
+		if (ipv4_is_zeronet(iph->saddr))
+			return ipv4_is_local_multicast(iph->daddr) ^ invert;
+		flow.flowi4_iif = 0;
+	} else {
+		flow.flowi4_iif = dev_net(par->in)->loopback_dev->ifindex;
+	}
+
+	flow.daddr = iph->saddr;
+	flow.saddr = rpfilter_get_saddr(iph->daddr);
+	flow.flowi4_oif = 0;
+	flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
+	flow.flowi4_tos = RT_TOS(iph->tos);
+	flow.flowi4_scope = RT_SCOPE_UNIVERSE;
+
+	return rpfilter_lookup_reverse(&flow, par->in, info->flags) ^ invert;
+}
+
+static int rpfilter_check(const struct xt_mtchk_param *par)
+{
+	const struct xt_rpfilter_info *info = par->matchinfo;
+	unsigned int options = ~XT_RPFILTER_OPTION_MASK;
+	if (info->flags & options) {
+		pr_info("unknown options encountered");
+		return -EINVAL;
+	}
+
+	if (strcmp(par->table, "mangle") != 0 &&
+	    strcmp(par->table, "raw") != 0) {
+		pr_info("match only valid in the \'raw\' "
+			"or \'mangle\' tables, not \'%s\'.\n", par->table);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct xt_match rpfilter_mt_reg __read_mostly = {
+	.name		= "rpfilter",
+	.family		= NFPROTO_IPV4,
+	.checkentry	= rpfilter_check,
+	.match		= rpfilter_mt,
+	.matchsize	= sizeof(struct xt_rpfilter_info),
+	.hooks		= (1 << NF_INET_PRE_ROUTING),
+	.me		= THIS_MODULE
+};
+
+static int __init rpfilter_mt_init(void)
+{
+	return xt_register_match(&rpfilter_mt_reg);
+}
+
+static void __exit rpfilter_mt_exit(void)
+{
+	xt_unregister_match(&rpfilter_mt_reg);
+}
+
+module_init(rpfilter_mt_init);
+module_exit(rpfilter_mt_exit);
-- 
cgit v1.2.3


From 63afe12f4be3b08597ae41ce7c0837bfc106b0ac Mon Sep 17 00:00:00 2001
From: "sjur.brandeland@stericsson.com" <sjur.brandeland@stericsson.com>
Date: Sun, 4 Dec 2011 11:22:52 +0000
Subject: if_ether.h: Add IEEE 802.1 Local Experimental Ethertype 1.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add EthType 0x88b5.
This Ethertype value is available for public use for prototype and
vendor-specific protocol development,as defined in Amendment 802a
to IEEE Std 802.

Signed-off-by: Sjur Brændeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_ether.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index e473003e4bda..56d907a2c804 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -79,6 +79,7 @@
 #define ETH_P_PAE	0x888E		/* Port Access Entity (IEEE 802.1X) */
 #define ETH_P_AOE	0x88A2		/* ATA over Ethernet		*/
 #define ETH_P_8021AD	0x88A8          /* 802.1ad Service VLAN		*/
+#define ETH_P_802_EX1	0x88B5		/* 802.1 Local Experimental 1.  */
 #define ETH_P_TIPC	0x88CA		/* TIPC 			*/
 #define ETH_P_8021AH	0x88E7          /* 802.1ah Backbone Service Tag */
 #define ETH_P_1588	0x88F7		/* IEEE 1588 Timesync */
-- 
cgit v1.2.3


From 7f1fb60c4fc9fb29fbb406ac8c4cfb4e59e168d6 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Tue, 6 Dec 2011 07:56:43 +0000
Subject: inet_diag: Partly rename inet_ to sock_

The ultimate goal is to get the sock_diag module, that works in
family+protocol terms. Currently this is suitable to do on the
inet_diag basis, so rename parts of the code. It will be moved
to sock_diag.c later.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h  |  4 +++-
 net/dccp/diag.c          |  2 +-
 net/ipv4/inet_diag.c     | 33 +++++++++++++++++++--------------
 net/ipv4/tcp_diag.c      |  2 +-
 security/selinux/hooks.c |  2 +-
 5 files changed, 25 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 8374d2967362..52e48959cfa1 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -8,7 +8,7 @@
 #define NETLINK_UNUSED		1	/* Unused number				*/
 #define NETLINK_USERSOCK	2	/* Reserved for user mode socket protocols 	*/
 #define NETLINK_FIREWALL	3	/* Firewalling hook				*/
-#define NETLINK_INET_DIAG	4	/* INET socket monitoring			*/
+#define NETLINK_SOCK_DIAG	4	/* socket monitoring				*/
 #define NETLINK_NFLOG		5	/* netfilter/iptables ULOG */
 #define NETLINK_XFRM		6	/* ipsec */
 #define NETLINK_SELINUX		7	/* SELinux event notifications */
@@ -27,6 +27,8 @@
 #define NETLINK_RDMA		20
 #define NETLINK_CRYPTO		21	/* Crypto layer */
 
+#define NETLINK_INET_DIAG	NETLINK_SOCK_DIAG
+
 #define MAX_LINKS 32		
 
 struct sockaddr_nl {
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
index b21f261da75e..d92ba7d1c351 100644
--- a/net/dccp/diag.c
+++ b/net/dccp/diag.c
@@ -71,4 +71,4 @@ module_exit(dccp_diag_fini);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>");
 MODULE_DESCRIPTION("DCCP inet_diag handler");
-MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_INET_DIAG, DCCPDIAG_GETSOCK);
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, DCCPDIAG_GETSOCK);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 0a46c541b477..a5f3c40ac3c5 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -45,7 +45,7 @@ struct inet_diag_entry {
 	u16 userlocks;
 };
 
-static struct sock *idiagnl;
+static struct sock *sdiagnl;
 
 #define INET_DIAG_PUT(skb, attrtype, attrlen) \
 	RTA_DATA(__RTA_PUT(skb, attrtype, attrlen))
@@ -56,7 +56,7 @@ static const struct inet_diag_handler *inet_diag_lock_handler(int type)
 {
 	if (!inet_diag_table[type])
 		request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
-			       NETLINK_INET_DIAG, type);
+			       NETLINK_SOCK_DIAG, type);
 
 	mutex_lock(&inet_diag_table_mutex);
 	if (!inet_diag_table[type])
@@ -312,7 +312,7 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
 		kfree_skb(rep);
 		goto out;
 	}
-	err = netlink_unicast(idiagnl, rep, NETLINK_CB(in_skb).pid,
+	err = netlink_unicast(sdiagnl, rep, NETLINK_CB(in_skb).pid,
 			      MSG_DONTWAIT);
 	if (err > 0)
 		err = 0;
@@ -870,20 +870,25 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 				return -EINVAL;
 		}
 
-		return netlink_dump_start(idiagnl, skb, nlh,
+		return netlink_dump_start(sdiagnl, skb, nlh,
 					  inet_diag_dump, NULL, 0);
 	}
 
 	return inet_diag_get_exact(skb, nlh);
 }
 
-static DEFINE_MUTEX(inet_diag_mutex);
+static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	return inet_diag_rcv_msg(skb, nlh);
+}
+
+static DEFINE_MUTEX(sock_diag_mutex);
 
-static void inet_diag_rcv(struct sk_buff *skb)
+static void sock_diag_rcv(struct sk_buff *skb)
 {
-	mutex_lock(&inet_diag_mutex);
-	netlink_rcv_skb(skb, &inet_diag_rcv_msg);
-	mutex_unlock(&inet_diag_mutex);
+	mutex_lock(&sock_diag_mutex);
+	netlink_rcv_skb(skb, &sock_diag_rcv_msg);
+	mutex_unlock(&sock_diag_mutex);
 }
 
 int inet_diag_register(const struct inet_diag_handler *h)
@@ -929,9 +934,9 @@ static int __init inet_diag_init(void)
 	if (!inet_diag_table)
 		goto out;
 
-	idiagnl = netlink_kernel_create(&init_net, NETLINK_INET_DIAG, 0,
-					inet_diag_rcv, NULL, THIS_MODULE);
-	if (idiagnl == NULL)
+	sdiagnl = netlink_kernel_create(&init_net, NETLINK_SOCK_DIAG, 0,
+					sock_diag_rcv, NULL, THIS_MODULE);
+	if (sdiagnl == NULL)
 		goto out_free_table;
 	err = 0;
 out:
@@ -943,11 +948,11 @@ out_free_table:
 
 static void __exit inet_diag_exit(void)
 {
-	netlink_kernel_release(idiagnl);
+	netlink_kernel_release(sdiagnl);
 	kfree(inet_diag_table);
 }
 
 module_init(inet_diag_init);
 module_exit(inet_diag_exit);
 MODULE_LICENSE("GPL");
-MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_INET_DIAG);
+MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_SOCK_DIAG);
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 939edb3b8e4d..9e276b868ce8 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -54,4 +54,4 @@ static void __exit tcp_diag_exit(void)
 module_init(tcp_diag_init);
 module_exit(tcp_diag_exit);
 MODULE_LICENSE("GPL");
-MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_INET_DIAG, TCPDIAG_GETSOCK);
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, TCPDIAG_GETSOCK);
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index cca09bb46502..86305c2f555a 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -1090,7 +1090,7 @@ static inline u16 socket_type_to_security_class(int family, int type, int protoc
 			return SECCLASS_NETLINK_ROUTE_SOCKET;
 		case NETLINK_FIREWALL:
 			return SECCLASS_NETLINK_FIREWALL_SOCKET;
-		case NETLINK_INET_DIAG:
+		case NETLINK_SOCK_DIAG:
 			return SECCLASS_NETLINK_TCPDIAG_SOCKET;
 		case NETLINK_NFLOG:
 			return SECCLASS_NETLINK_NFLOG_SOCKET;
-- 
cgit v1.2.3


From 8d34172dfdb762a306cdf58b547aa10d798622ec Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Tue, 6 Dec 2011 07:57:06 +0000
Subject: sock_diag: Introduce new message type

This type will run the family+protocol based socket dumping.
Also prepare the stub function for it.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h |  1 +
 net/ipv4/inet_diag.c      | 17 +++++++++++++++--
 2 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index abf5028db981..f7baaf637426 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -6,6 +6,7 @@
 /* Just some random number */
 #define TCPDIAG_GETSOCK 18
 #define DCCPDIAG_GETSOCK 19
+#define SOCK_DIAG_BY_FAMILY 20
 
 #define INET_DIAG_GETSOCK_MAX 24
 
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index a5f3c40ac3c5..eb6bdfa9480c 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -850,7 +850,7 @@ unlock:
 	return skb->len;
 }
 
-static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	int hdrlen = sizeof(struct inet_diag_req);
 
@@ -877,9 +877,22 @@ static int inet_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 	return inet_diag_get_exact(skb, nlh);
 }
 
+static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	return -EOPNOTSUPP;
+}
+
 static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
-	return inet_diag_rcv_msg(skb, nlh);
+	switch (nlh->nlmsg_type) {
+	case TCPDIAG_GETSOCK:
+	case DCCPDIAG_GETSOCK:
+		return inet_diag_rcv_msg_compat(skb, nlh);
+	case SOCK_DIAG_BY_FAMILY:
+		return __sock_diag_rcv_msg(skb, nlh);
+	default:
+		return -EINVAL;
+	}
 }
 
 static DEFINE_MUTEX(sock_diag_mutex);
-- 
cgit v1.2.3


From d366477a52f1df29fa066ffb18e4e6101ee2ad04 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Tue, 6 Dec 2011 07:58:03 +0000
Subject: sock_diag: Initial skeleton

When receiving the SOCK_DIAG_BY_FAMILY message we have to find the
handler for provided family and pass the nl message to it.

This patch describes an infrastructure to work with such nandlers
and implements stubs for AF_INET(6) ones.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sock_diag.h |  23 +++++++++++
 net/ipv4/inet_diag.c      | 102 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 123 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/sock_diag.h

(limited to 'include/linux')

diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
new file mode 100644
index 000000000000..ba4933b1213b
--- /dev/null
+++ b/include/linux/sock_diag.h
@@ -0,0 +1,23 @@
+#ifndef __SOCK_DIAG_H__
+#define __SOCK_DIAG_H__
+struct sk_buff;
+struct nlmsghdr;
+
+struct sock_diag_req {
+	__u8	sdiag_family;
+	__u8	sdiag_protocol;
+};
+
+struct sock_diag_handler {
+	__u8 family;
+	int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh);
+};
+
+int sock_diag_register(struct sock_diag_handler *h);
+void sock_diag_unregister(struct sock_diag_handler *h);
+
+void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
+void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
+
+extern struct sock *sock_diag_nlsk;
+#endif
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 58caecc343b1..877875ea3d71 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -33,6 +33,7 @@
 #include <linux/stddef.h>
 
 #include <linux/inet_diag.h>
+#include <linux/sock_diag.h>
 
 static const struct inet_diag_handler **inet_diag_table;
 
@@ -887,9 +888,91 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
 	return inet_diag_get_exact(skb, nlh);
 }
 
+static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
+{
+	int hdrlen = sizeof(struct inet_diag_req);
+
+	if (nlmsg_len(h) < hdrlen)
+		return -EINVAL;
+
+	if (h->nlmsg_flags & NLM_F_DUMP) {
+		return -EAFNOSUPPORT;
+	}
+
+	return -EAFNOSUPPORT;
+}
+
+static struct sock_diag_handler inet_diag_handler = {
+	.family = AF_INET,
+	.dump = inet_diag_handler_dump,
+};
+
+static struct sock_diag_handler inet6_diag_handler = {
+	.family = AF_INET6,
+	.dump = inet_diag_handler_dump,
+};
+
+static struct sock_diag_handler *sock_diag_handlers[AF_MAX];
+static DEFINE_MUTEX(sock_diag_table_mutex);
+
+int sock_diag_register(struct sock_diag_handler *hndl)
+{
+	int err = 0;
+
+	if (hndl->family > AF_MAX)
+		return -EINVAL;
+
+	mutex_lock(&sock_diag_table_mutex);
+	if (sock_diag_handlers[hndl->family])
+		err = -EBUSY;
+	else
+		sock_diag_handlers[hndl->family] = hndl;
+	mutex_unlock(&sock_diag_table_mutex);
+
+	return err;
+}
+
+void sock_diag_unregister(struct sock_diag_handler *hnld)
+{
+	int family = hnld->family;
+
+	if (family > AF_MAX)
+		return;
+
+	mutex_lock(&sock_diag_table_mutex);
+	BUG_ON(sock_diag_handlers[family] != hnld);
+	sock_diag_handlers[family] = NULL;
+	mutex_unlock(&sock_diag_table_mutex);
+}
+
+static inline struct sock_diag_handler *sock_diag_lock_handler(int family)
+{
+	mutex_lock(&sock_diag_table_mutex);
+	return sock_diag_handlers[family];
+}
+
+static inline void sock_diag_unlock_handler(struct sock_diag_handler *h)
+{
+	mutex_unlock(&sock_diag_table_mutex);
+}
+
 static int __sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
-	return -EOPNOTSUPP;
+	int err;
+	struct sock_diag_req *req = NLMSG_DATA(nlh);
+	struct sock_diag_handler *hndl;
+
+	if (nlmsg_len(nlh) < sizeof(*req))
+		return -EINVAL;
+
+	hndl = sock_diag_lock_handler(req->sdiag_family);
+	if (hndl == NULL)
+		err = -ENOENT;
+	else
+		err = hndl->dump(skb, nlh);
+	sock_diag_unlock_handler(hndl);
+
+	return err;
 }
 
 static int sock_diag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
@@ -961,9 +1044,22 @@ static int __init inet_diag_init(void)
 					sock_diag_rcv, NULL, THIS_MODULE);
 	if (sdiagnl == NULL)
 		goto out_free_table;
-	err = 0;
+
+	err = sock_diag_register(&inet_diag_handler);
+	if (err)
+		goto out_free_nl;
+
+	err = sock_diag_register(&inet6_diag_handler);
+	if (err)
+		goto out_free_inet;
+
 out:
 	return err;
+
+out_free_inet:
+	sock_diag_unregister(&inet_diag_handler);
+out_free_nl:
+	netlink_kernel_release(sdiagnl);
 out_free_table:
 	kfree(inet_diag_table);
 	goto out;
@@ -971,6 +1067,8 @@ out_free_table:
 
 static void __exit inet_diag_exit(void)
 {
+	sock_diag_unregister(&inet6_diag_handler);
+	sock_diag_unregister(&inet_diag_handler);
 	netlink_kernel_release(sdiagnl);
 	kfree(inet_diag_table);
 }
-- 
cgit v1.2.3


From 126fdc3249c9ced2a0d20f916858fec26a445f61 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Tue, 6 Dec 2011 07:58:21 +0000
Subject: inet_diag: Introduce new inet_diag_req header

This one coinsides with the sock_diag_req in the beginning and
contains only used fields from its previous analogue.

The existing code is patched to use the _compat version of it
for now.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h | 11 ++++++++++-
 net/ipv4/inet_diag.c      | 14 +++++++-------
 2 files changed, 17 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index f7baaf637426..defe8ff36df8 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -23,7 +23,7 @@ struct inet_diag_sockid {
 
 /* Request structure */
 
-struct inet_diag_req {
+struct inet_diag_req_compat {
 	__u8	idiag_family;		/* Family of addresses. */
 	__u8	idiag_src_len;
 	__u8	idiag_dst_len;
@@ -35,6 +35,15 @@ struct inet_diag_req {
 	__u32	idiag_dbs;		/* Tables to dump (NI) */
 };
 
+struct inet_diag_req {
+	__u8	sdiag_family;
+	__u8	sdiag_protocol;
+	__u8	idiag_ext;
+	__u8	pad;
+	__u32	idiag_states;
+	struct inet_diag_sockid id;
+};
+
 enum {
 	INET_DIAG_REQ_NONE,
 	INET_DIAG_REQ_BYTECODE,
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 877875ea3d71..f37b1284b46b 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -265,7 +265,7 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
 {
 	int err;
 	struct sock *sk;
-	struct inet_diag_req *req = NLMSG_DATA(nlh);
+	struct inet_diag_req_compat *req = NLMSG_DATA(nlh);
 	struct sk_buff *rep;
 	struct inet_hashinfo *hashinfo;
 	const struct inet_diag_handler *handler;
@@ -504,7 +504,7 @@ static int inet_csk_diag_dump(struct sock *sk,
 			      struct netlink_callback *cb,
 			      const struct nlattr *bc)
 {
-	struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
+	struct inet_diag_req_compat *r = NLMSG_DATA(cb->nlh);
 
 	if (bc != NULL) {
 		struct inet_diag_entry entry;
@@ -541,7 +541,7 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
 			       struct netlink_callback *cb,
 			       const struct nlattr *bc)
 {
-	struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
+	struct inet_diag_req_compat *r = NLMSG_DATA(cb->nlh);
 
 	if (bc != NULL) {
 		struct inet_diag_entry entry;
@@ -629,7 +629,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
 			       const struct nlattr *bc)
 {
 	struct inet_diag_entry entry;
-	struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
+	struct inet_diag_req_compat *r = NLMSG_DATA(cb->nlh);
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct listen_sock *lopt;
 	struct inet_sock *inet = inet_sk(sk);
@@ -712,12 +712,12 @@ static int inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	int i, num;
 	int s_i, s_num;
-	struct inet_diag_req *r = NLMSG_DATA(cb->nlh);
+	struct inet_diag_req_compat *r = NLMSG_DATA(cb->nlh);
 	const struct inet_diag_handler *handler;
 	struct inet_hashinfo *hashinfo;
 	const struct nlattr *bc = NULL;
 
-	if (nlmsg_attrlen(cb->nlh, sizeof(struct inet_diag_req)))
+	if (nlmsg_attrlen(cb->nlh, sizeof(struct inet_diag_req_compat)))
 		bc = nlmsg_find_attr(cb->nlh, sizeof(*r), INET_DIAG_REQ_BYTECODE);
 
 	handler = inet_diag_lock_handler(inet_diag_type2proto(cb->nlh->nlmsg_type));
@@ -863,7 +863,7 @@ unlock:
 
 static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
-	int hdrlen = sizeof(struct inet_diag_req);
+	int hdrlen = sizeof(struct inet_diag_req_compat);
 
 	if (nlh->nlmsg_type >= INET_DIAG_GETSOCK_MAX ||
 	    nlmsg_len(nlh) < hdrlen)
-- 
cgit v1.2.3


From 54858ee5bf659f80a784303e41ee8898fd163f98 Mon Sep 17 00:00:00 2001
From: Alexander Simon <an.alexsimon@googlemail.com>
Date: Wed, 30 Nov 2011 16:56:32 +0100
Subject: nl80211: Parse channel type attribute in an ibss join request

Prepare cfg80211 for IBSS HT:
 * extend cfg80211 ibss struct with channel_type
 * Check if extension channel can be used
 * Export can_beacon_sec_chan for use in mac80211 (will be called
   from ibss.c later).

Signed-off-by: Alexander Simon <an.alexsimon@googlemail.com>
[siwu@hrz.tu-chemnitz.de: Updates]
* fix cfg80211_can_beacon_ext_chan comment
* remove implicit channel_type enum assumptions
* remove radar channel flags check
* add HT IBSS feature flag
* reword commit message

Signed-off-by: Simon Wunderlich <siwu@hrz.tu-chemnitz.de>
Signed-off-by: Mathias Kretschmer <mathias.kretschmer@fokus.fraunhofer.de>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  2 ++
 include/net/cfg80211.h  | 11 +++++++++++
 net/wireless/chan.c     | 12 +++++++-----
 net/wireless/nl80211.c  | 32 ++++++++++++++++++++++++++++++--
 4 files changed, 50 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index f51e3bf93a96..a18760684fc9 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -2785,9 +2785,11 @@ enum nl80211_ap_sme_features {
  * @NL80211_FEATURE_SK_TX_STATUS: This driver supports reflecting back
  *	TX status to the socket error queue when requested with the
  *	socket option.
+ * @NL80211_FEATURE_HT_IBSS: This driver supports IBSS with HT datarates.
  */
 enum nl80211_feature_flags {
 	NL80211_FEATURE_SK_TX_STATUS	= 1 << 0,
+	NL80211_FEATURE_HT_IBSS		= 1 << 1,
 };
 
 /**
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f0e82b2e4227..3de1c39d03e5 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1149,6 +1149,7 @@ struct cfg80211_ibss_params {
 	u8 *ssid;
 	u8 *bssid;
 	struct ieee80211_channel *channel;
+	enum nl80211_channel_type channel_type;
 	u8 *ie;
 	u8 ssid_len, ie_len;
 	u16 beacon_interval;
@@ -3267,6 +3268,16 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy,
 				 const u8 *frame, size_t len,
 				 int freq, gfp_t gfp);
 
+/*
+ * cfg80211_can_beacon_sec_chan - test if ht40 on extension channel can be used
+ * @wiphy: the wiphy
+ * @chan: main channel
+ * @channel_type: HT mode
+ */
+int cfg80211_can_beacon_sec_chan(struct wiphy *wiphy,
+				 struct ieee80211_channel *chan,
+				 enum nl80211_channel_type channel_type);
+
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
 
 /* wiphy_printk helpers, similar to dev_printk */
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 17cd0c04d139..2fcfe0993ca2 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -6,6 +6,7 @@
  * Copyright 2009	Johannes Berg <johannes@sipsolutions.net>
  */
 
+#include <linux/export.h>
 #include <net/cfg80211.h>
 #include "core.h"
 
@@ -44,9 +45,9 @@ rdev_freq_to_chan(struct cfg80211_registered_device *rdev,
 	return chan;
 }
 
-static bool can_beacon_sec_chan(struct wiphy *wiphy,
-				struct ieee80211_channel *chan,
-				enum nl80211_channel_type channel_type)
+int cfg80211_can_beacon_sec_chan(struct wiphy *wiphy,
+				  struct ieee80211_channel *chan,
+				  enum nl80211_channel_type channel_type)
 {
 	struct ieee80211_channel *sec_chan;
 	int diff;
@@ -75,6 +76,7 @@ static bool can_beacon_sec_chan(struct wiphy *wiphy,
 
 	return true;
 }
+EXPORT_SYMBOL(cfg80211_can_beacon_sec_chan);
 
 int cfg80211_set_freq(struct cfg80211_registered_device *rdev,
 		      struct wireless_dev *wdev, int freq,
@@ -109,8 +111,8 @@ int cfg80211_set_freq(struct cfg80211_registered_device *rdev,
 		switch (channel_type) {
 		case NL80211_CHAN_HT40PLUS:
 		case NL80211_CHAN_HT40MINUS:
-			if (!can_beacon_sec_chan(&rdev->wiphy, chan,
-						 channel_type)) {
+			if (!cfg80211_can_beacon_sec_chan(&rdev->wiphy, chan,
+							  channel_type)) {
 				printk(KERN_DEBUG
 				       "cfg80211: Secondary channel not "
 				       "allowed to initiate communication\n");
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 8a9b4d817ae6..ba439664c2e0 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4682,13 +4682,41 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
 		ibss.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
 	}
 
-	ibss.channel = ieee80211_get_channel(wiphy,
-		nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]));
+	if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) {
+		enum nl80211_channel_type channel_type;
+
+		channel_type = nla_get_u32(
+				info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]);
+		if (channel_type != NL80211_CHAN_NO_HT &&
+		    channel_type != NL80211_CHAN_HT20 &&
+		    channel_type != NL80211_CHAN_HT40MINUS &&
+		    channel_type != NL80211_CHAN_HT40PLUS)
+			return -EINVAL;
+
+		if (channel_type != NL80211_CHAN_NO_HT &&
+		    !(wiphy->features & NL80211_FEATURE_HT_IBSS))
+			return -EINVAL;
+
+		ibss.channel_type = channel_type;
+	} else {
+		ibss.channel_type = NL80211_CHAN_NO_HT;
+	}
+
+	ibss.channel = rdev_freq_to_chan(rdev,
+		nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]),
+		ibss.channel_type);
 	if (!ibss.channel ||
 	    ibss.channel->flags & IEEE80211_CHAN_NO_IBSS ||
 	    ibss.channel->flags & IEEE80211_CHAN_DISABLED)
 		return -EINVAL;
 
+	/* Both channels should be able to initiate communication */
+	if ((ibss.channel_type == NL80211_CHAN_HT40PLUS ||
+	     ibss.channel_type == NL80211_CHAN_HT40MINUS) &&
+	    !cfg80211_can_beacon_sec_chan(&rdev->wiphy, ibss.channel,
+					  ibss.channel_type))
+		return -EINVAL;
+
 	ibss.channel_fixed = !!info->attrs[NL80211_ATTR_FREQ_FIXED];
 	ibss.privacy = !!info->attrs[NL80211_ATTR_PRIVACY];
 
-- 
cgit v1.2.3


From 3df6eaea76a9e1351b539541c0314129a0e4b10c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 6 Dec 2011 10:39:40 +0100
Subject: mac80211: accept public action frames with mismatched BSSID

Arik's patch "mac80211: allow action frames with unknown
BSSID in GO mode" allowed any action frames in P2P mode
to go through, but only to cooked monitor interfaces as
the IEEE80211_RX_RA_MATCH was still cleared. As a result
my no-monitor patches broke invitation responses.

Instead of allowing any action frames in P2P GO mode to
go through with a wrong BSSID like that patch did, allow
all public action frames. They will never be processed
by mac80211, but can be reported via nl80211 then.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 17 +++++++++++++++++
 net/mac80211/rx.c         | 13 ++++++++++---
 2 files changed, 27 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 66cedf6eb5c2..17f2a768e2ad 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1694,6 +1694,23 @@ static inline bool ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr)
 	return false;
 }
 
+/**
+ * ieee80211_is_public_action - check if frame is a public action frame
+ * @hdr: the frame
+ * @len: length of the frame
+ */
+static inline bool ieee80211_is_public_action(struct ieee80211_hdr *hdr,
+					      size_t len)
+{
+	struct ieee80211_mgmt *mgmt = (void *)hdr;
+
+	if (len < IEEE80211_MIN_ACTION_SIZE)
+		return false;
+	if (!ieee80211_is_action(hdr->frame_control))
+		return false;
+	return mgmt->u.action.category == WLAN_CATEGORY_PUBLIC;
+}
+
 /**
  * ieee80211_fhss_chan_to_freq - get channel frequency
  * @channel: the FHSS channel
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 2a85fdfebde2..7d226417ef46 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2797,10 +2797,17 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx,
 				return 0;
 		} else if (!ieee80211_bssid_match(bssid,
 					sdata->vif.addr)) {
+			/*
+			 * Accept public action frames even when the
+			 * BSSID doesn't match, this is used for P2P
+			 * and location updates. Note that mac80211
+			 * itself never looks at these frames.
+			 */
+			if (!(status->rx_flags & IEEE80211_RX_IN_SCAN) &&
+			    ieee80211_is_public_action(hdr, skb->len))
+				return 1;
 			if (!(status->rx_flags & IEEE80211_RX_IN_SCAN) &&
-			    !ieee80211_is_beacon(hdr->frame_control) &&
-			    !(ieee80211_is_action(hdr->frame_control) &&
-			      sdata->vif.p2p))
+			    !ieee80211_is_beacon(hdr->frame_control))
 				return 0;
 			status->rx_flags &= ~IEEE80211_RX_RA_MATCH;
 		}
-- 
cgit v1.2.3


From 7da82c06ded105bf601bfa0eafc92e84eb0ceeed Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 8 Dec 2011 04:11:15 +0000
Subject: vlan: rename vlan_dev_info to vlan_dev_priv

As this structure is priv, name it approprietely. Also for pointer to it
use name "vlan".

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h  |  2 +-
 net/8021q/vlan.c         | 24 ++++++++--------
 net/8021q/vlan.h         |  8 +++---
 net/8021q/vlan_core.c    |  8 +++---
 net/8021q/vlan_dev.c     | 72 ++++++++++++++++++++++++------------------------
 net/8021q/vlan_gvrp.c    |  4 +--
 net/8021q/vlan_netlink.c | 10 +++----
 net/8021q/vlanproc.c     | 42 ++++++++++++++--------------
 8 files changed, 85 insertions(+), 85 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 070ac50c1d2d..31d7c976f063 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -386,7 +386,7 @@ struct vlan_ioctl_args {
 		unsigned int skb_priority;
 		unsigned int name_type;
 		unsigned int bind_type;
-		unsigned int flag; /* Matches vlan_dev_info flags */
+		unsigned int flag; /* Matches vlan_dev_priv flags */
         } u;
 
 	short vlan_qos;   
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 5471628d3ffe..e075625efeeb 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -99,7 +99,7 @@ static void vlan_rcu_free(struct rcu_head *rcu)
 
 void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct net_device *real_dev = vlan->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	struct vlan_group *grp;
@@ -167,7 +167,7 @@ int vlan_check_real_dev(struct net_device *real_dev, u16 vlan_id)
 
 int register_vlan_dev(struct net_device *dev)
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct net_device *real_dev = vlan->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	u16 vlan_id = vlan->vlan_id;
@@ -192,7 +192,7 @@ int register_vlan_dev(struct net_device *dev)
 	if (err < 0)
 		goto out_uninit_applicant;
 
-	/* Account for reference in struct vlan_dev_info */
+	/* Account for reference in struct vlan_dev_priv */
 	dev_hold(real_dev);
 
 	netif_stacked_transfer_operstate(real_dev, dev);
@@ -267,7 +267,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
 		snprintf(name, IFNAMSIZ, "vlan%.4i", vlan_id);
 	}
 
-	new_dev = alloc_netdev(sizeof(struct vlan_dev_info), name, vlan_setup);
+	new_dev = alloc_netdev(sizeof(struct vlan_dev_priv), name, vlan_setup);
 
 	if (new_dev == NULL)
 		return -ENOBUFS;
@@ -278,10 +278,10 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id)
 	 */
 	new_dev->mtu = real_dev->mtu;
 
-	vlan_dev_info(new_dev)->vlan_id = vlan_id;
-	vlan_dev_info(new_dev)->real_dev = real_dev;
-	vlan_dev_info(new_dev)->dent = NULL;
-	vlan_dev_info(new_dev)->flags = VLAN_FLAG_REORDER_HDR;
+	vlan_dev_priv(new_dev)->vlan_id = vlan_id;
+	vlan_dev_priv(new_dev)->real_dev = real_dev;
+	vlan_dev_priv(new_dev)->dent = NULL;
+	vlan_dev_priv(new_dev)->flags = VLAN_FLAG_REORDER_HDR;
 
 	new_dev->rtnl_link_ops = &vlan_link_ops;
 	err = register_vlan_dev(new_dev);
@@ -298,7 +298,7 @@ out_free_newdev:
 static void vlan_sync_address(struct net_device *dev,
 			      struct net_device *vlandev)
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(vlandev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev);
 
 	/* May be called without an actual change */
 	if (!compare_ether_addr(vlan->real_dev_addr, dev->dev_addr))
@@ -362,7 +362,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 	struct vlan_group *grp;
 	int i, flgs;
 	struct net_device *vlandev;
-	struct vlan_dev_info *vlan;
+	struct vlan_dev_priv *vlan;
 	LIST_HEAD(list);
 
 	if (is_vlan_dev(dev))
@@ -447,7 +447,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 			if (!(flgs & IFF_UP))
 				continue;
 
-			vlan = vlan_dev_info(vlandev);
+			vlan = vlan_dev_priv(vlandev);
 			if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING))
 				dev_change_flags(vlandev, flgs & ~IFF_UP);
 			netif_stacked_transfer_operstate(dev, vlandev);
@@ -465,7 +465,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 			if (flgs & IFF_UP)
 				continue;
 
-			vlan = vlan_dev_info(vlandev);
+			vlan = vlan_dev_priv(vlandev);
 			if (!(vlan->flags & VLAN_FLAG_LOOSE_BINDING))
 				dev_change_flags(vlandev, flgs | IFF_UP);
 			netif_stacked_transfer_operstate(dev, vlandev);
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 9fd45f3571f9..d3c4ea4a3836 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -41,7 +41,7 @@ struct vlan_pcpu_stats {
 };
 
 /**
- *	struct vlan_dev_info - VLAN private device data
+ *	struct vlan_dev_priv - VLAN private device data
  *	@nr_ingress_mappings: number of ingress priority mappings
  *	@ingress_priority_map: ingress priority mappings
  *	@nr_egress_mappings: number of egress priority mappings
@@ -53,7 +53,7 @@ struct vlan_pcpu_stats {
  *	@dent: proc dir entry
  *	@vlan_pcpu_stats: ptr to percpu rx stats
  */
-struct vlan_dev_info {
+struct vlan_dev_priv {
 	unsigned int				nr_ingress_mappings;
 	u32					ingress_priority_map[8];
 	unsigned int				nr_egress_mappings;
@@ -69,7 +69,7 @@ struct vlan_dev_info {
 	struct vlan_pcpu_stats __percpu		*vlan_pcpu_stats;
 };
 
-static inline struct vlan_dev_info *vlan_dev_info(const struct net_device *dev)
+static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev)
 {
 	return netdev_priv(dev);
 }
@@ -121,7 +121,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head);
 static inline u32 vlan_get_ingress_priority(struct net_device *dev,
 					    u16 vlan_tci)
 {
-	struct vlan_dev_info *vip = vlan_dev_info(dev);
+	struct vlan_dev_priv *vip = vlan_dev_priv(dev);
 
 	return vip->ingress_priority_map[(vlan_tci >> VLAN_PRIO_SHIFT) & 0x7];
 }
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 9c95e8e054f9..85241f044294 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -36,7 +36,7 @@ bool vlan_do_receive(struct sk_buff **skbp, bool last_handler)
 			skb->pkt_type = PACKET_HOST;
 	}
 
-	if (!(vlan_dev_info(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) {
+	if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) {
 		unsigned int offset = skb->data - skb_mac_header(skb);
 
 		/*
@@ -55,7 +55,7 @@ bool vlan_do_receive(struct sk_buff **skbp, bool last_handler)
 	skb->priority = vlan_get_ingress_priority(vlan_dev, skb->vlan_tci);
 	skb->vlan_tci = 0;
 
-	rx_stats = this_cpu_ptr(vlan_dev_info(vlan_dev)->vlan_pcpu_stats);
+	rx_stats = this_cpu_ptr(vlan_dev_priv(vlan_dev)->vlan_pcpu_stats);
 
 	u64_stats_update_begin(&rx_stats->syncp);
 	rx_stats->rx_packets++;
@@ -90,13 +90,13 @@ EXPORT_SYMBOL(__vlan_find_dev_deep);
 
 struct net_device *vlan_dev_real_dev(const struct net_device *dev)
 {
-	return vlan_dev_info(dev)->real_dev;
+	return vlan_dev_priv(dev)->real_dev;
 }
 EXPORT_SYMBOL(vlan_dev_real_dev);
 
 u16 vlan_dev_vlan_id(const struct net_device *dev)
 {
-	return vlan_dev_info(dev)->vlan_id;
+	return vlan_dev_priv(dev)->vlan_id;
 }
 EXPORT_SYMBOL(vlan_dev_vlan_id);
 
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 2b5fcde1f629..3b4db82b016f 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -72,7 +72,7 @@ vlan_dev_get_egress_qos_mask(struct net_device *dev, struct sk_buff *skb)
 {
 	struct vlan_priority_tci_mapping *mp;
 
-	mp = vlan_dev_info(dev)->egress_priority_map[(skb->priority & 0xF)];
+	mp = vlan_dev_priv(dev)->egress_priority_map[(skb->priority & 0xF)];
 	while (mp) {
 		if (mp->priority == skb->priority) {
 			return mp->vlan_qos; /* This should already be shifted
@@ -103,10 +103,10 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 	u16 vlan_tci = 0;
 	int rc;
 
-	if (!(vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR)) {
+	if (!(vlan_dev_priv(dev)->flags & VLAN_FLAG_REORDER_HDR)) {
 		vhdr = (struct vlan_hdr *) skb_push(skb, VLAN_HLEN);
 
-		vlan_tci = vlan_dev_info(dev)->vlan_id;
+		vlan_tci = vlan_dev_priv(dev)->vlan_id;
 		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
 		vhdr->h_vlan_TCI = htons(vlan_tci);
 
@@ -129,7 +129,7 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 		saddr = dev->dev_addr;
 
 	/* Now make the underlying real hard header */
-	dev = vlan_dev_info(dev)->real_dev;
+	dev = vlan_dev_priv(dev)->real_dev;
 	rc = dev_hard_header(skb, dev, type, daddr, saddr, len + vhdrlen);
 	if (rc > 0)
 		rc += vhdrlen;
@@ -149,27 +149,27 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 	 * OTHER THINGS LIKE FDDI/TokenRing/802.3 SNAPs...
 	 */
 	if (veth->h_vlan_proto != htons(ETH_P_8021Q) ||
-	    vlan_dev_info(dev)->flags & VLAN_FLAG_REORDER_HDR) {
+	    vlan_dev_priv(dev)->flags & VLAN_FLAG_REORDER_HDR) {
 		u16 vlan_tci;
-		vlan_tci = vlan_dev_info(dev)->vlan_id;
+		vlan_tci = vlan_dev_priv(dev)->vlan_id;
 		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb);
 		skb = __vlan_hwaccel_put_tag(skb, vlan_tci);
 	}
 
-	skb_set_dev(skb, vlan_dev_info(dev)->real_dev);
+	skb_set_dev(skb, vlan_dev_priv(dev)->real_dev);
 	len = skb->len;
 	ret = dev_queue_xmit(skb);
 
 	if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
 		struct vlan_pcpu_stats *stats;
 
-		stats = this_cpu_ptr(vlan_dev_info(dev)->vlan_pcpu_stats);
+		stats = this_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats);
 		u64_stats_update_begin(&stats->syncp);
 		stats->tx_packets++;
 		stats->tx_bytes += len;
 		u64_stats_update_end(&stats->syncp);
 	} else {
-		this_cpu_inc(vlan_dev_info(dev)->vlan_pcpu_stats->tx_dropped);
+		this_cpu_inc(vlan_dev_priv(dev)->vlan_pcpu_stats->tx_dropped);
 	}
 
 	return ret;
@@ -180,7 +180,7 @@ static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
 	/* TODO: gotta make sure the underlying layer can handle it,
 	 * maybe an IFF_VLAN_CAPABLE flag for devices?
 	 */
-	if (vlan_dev_info(dev)->real_dev->mtu < new_mtu)
+	if (vlan_dev_priv(dev)->real_dev->mtu < new_mtu)
 		return -ERANGE;
 
 	dev->mtu = new_mtu;
@@ -191,7 +191,7 @@ static int vlan_dev_change_mtu(struct net_device *dev, int new_mtu)
 void vlan_dev_set_ingress_priority(const struct net_device *dev,
 				   u32 skb_prio, u16 vlan_prio)
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 
 	if (vlan->ingress_priority_map[vlan_prio & 0x7] && !skb_prio)
 		vlan->nr_ingress_mappings--;
@@ -204,7 +204,7 @@ void vlan_dev_set_ingress_priority(const struct net_device *dev,
 int vlan_dev_set_egress_priority(const struct net_device *dev,
 				 u32 skb_prio, u16 vlan_prio)
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct vlan_priority_tci_mapping *mp = NULL;
 	struct vlan_priority_tci_mapping *np;
 	u32 vlan_qos = (vlan_prio << VLAN_PRIO_SHIFT) & VLAN_PRIO_MASK;
@@ -241,7 +241,7 @@ int vlan_dev_set_egress_priority(const struct net_device *dev,
 /* Flags are defined in the vlan_flags enum in include/linux/if_vlan.h file. */
 int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask)
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	u32 old_flags = vlan->flags;
 
 	if (mask & ~(VLAN_FLAG_REORDER_HDR | VLAN_FLAG_GVRP |
@@ -261,12 +261,12 @@ int vlan_dev_change_flags(const struct net_device *dev, u32 flags, u32 mask)
 
 void vlan_dev_get_realdev_name(const struct net_device *dev, char *result)
 {
-	strncpy(result, vlan_dev_info(dev)->real_dev->name, 23);
+	strncpy(result, vlan_dev_priv(dev)->real_dev->name, 23);
 }
 
 static int vlan_dev_open(struct net_device *dev)
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct net_device *real_dev = vlan->real_dev;
 	int err;
 
@@ -313,7 +313,7 @@ out:
 
 static int vlan_dev_stop(struct net_device *dev)
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct net_device *real_dev = vlan->real_dev;
 
 	dev_mc_unsync(real_dev, dev);
@@ -332,7 +332,7 @@ static int vlan_dev_stop(struct net_device *dev)
 
 static int vlan_dev_set_mac_address(struct net_device *dev, void *p)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	struct sockaddr *addr = p;
 	int err;
 
@@ -358,7 +358,7 @@ out:
 
 static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	struct ifreq ifrr;
 	int err = -EOPNOTSUPP;
@@ -383,7 +383,7 @@ static int vlan_dev_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 
 static int vlan_dev_neigh_setup(struct net_device *dev, struct neigh_parms *pa)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	int err = 0;
 
@@ -397,7 +397,7 @@ static int vlan_dev_neigh_setup(struct net_device *dev, struct neigh_parms *pa)
 static int vlan_dev_fcoe_ddp_setup(struct net_device *dev, u16 xid,
 				   struct scatterlist *sgl, unsigned int sgc)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	int rc = 0;
 
@@ -409,7 +409,7 @@ static int vlan_dev_fcoe_ddp_setup(struct net_device *dev, u16 xid,
 
 static int vlan_dev_fcoe_ddp_done(struct net_device *dev, u16 xid)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	int len = 0;
 
@@ -421,7 +421,7 @@ static int vlan_dev_fcoe_ddp_done(struct net_device *dev, u16 xid)
 
 static int vlan_dev_fcoe_enable(struct net_device *dev)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	int rc = -EINVAL;
 
@@ -432,7 +432,7 @@ static int vlan_dev_fcoe_enable(struct net_device *dev)
 
 static int vlan_dev_fcoe_disable(struct net_device *dev)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	int rc = -EINVAL;
 
@@ -443,7 +443,7 @@ static int vlan_dev_fcoe_disable(struct net_device *dev)
 
 static int vlan_dev_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	int rc = -EINVAL;
 
@@ -455,7 +455,7 @@ static int vlan_dev_fcoe_get_wwn(struct net_device *dev, u64 *wwn, int type)
 static int vlan_dev_fcoe_ddp_target(struct net_device *dev, u16 xid,
 				    struct scatterlist *sgl, unsigned int sgc)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	const struct net_device_ops *ops = real_dev->netdev_ops;
 	int rc = 0;
 
@@ -468,7 +468,7 @@ static int vlan_dev_fcoe_ddp_target(struct net_device *dev, u16 xid,
 
 static void vlan_dev_change_rx_flags(struct net_device *dev, int change)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 
 	if (dev->flags & IFF_UP) {
 		if (change & IFF_ALLMULTI)
@@ -480,8 +480,8 @@ static void vlan_dev_change_rx_flags(struct net_device *dev, int change)
 
 static void vlan_dev_set_rx_mode(struct net_device *vlan_dev)
 {
-	dev_mc_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev);
-	dev_uc_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev);
+	dev_mc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev);
+	dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev);
 }
 
 /*
@@ -519,7 +519,7 @@ static const struct net_device_ops vlan_netdev_ops;
 
 static int vlan_dev_init(struct net_device *dev)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	int subclass = 0;
 
 	netif_carrier_off(dev);
@@ -568,8 +568,8 @@ static int vlan_dev_init(struct net_device *dev)
 
 	vlan_dev_set_lockdep_class(dev, subclass);
 
-	vlan_dev_info(dev)->vlan_pcpu_stats = alloc_percpu(struct vlan_pcpu_stats);
-	if (!vlan_dev_info(dev)->vlan_pcpu_stats)
+	vlan_dev_priv(dev)->vlan_pcpu_stats = alloc_percpu(struct vlan_pcpu_stats);
+	if (!vlan_dev_priv(dev)->vlan_pcpu_stats)
 		return -ENOMEM;
 
 	return 0;
@@ -578,7 +578,7 @@ static int vlan_dev_init(struct net_device *dev)
 static void vlan_dev_uninit(struct net_device *dev)
 {
 	struct vlan_priority_tci_mapping *pm;
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	int i;
 
 	free_percpu(vlan->vlan_pcpu_stats);
@@ -594,7 +594,7 @@ static void vlan_dev_uninit(struct net_device *dev)
 static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
 	netdev_features_t features)
 {
-	struct net_device *real_dev = vlan_dev_info(dev)->real_dev;
+	struct net_device *real_dev = vlan_dev_priv(dev)->real_dev;
 	u32 old_features = features;
 
 	features &= real_dev->vlan_features;
@@ -610,7 +610,7 @@ static netdev_features_t vlan_dev_fix_features(struct net_device *dev,
 static int vlan_ethtool_get_settings(struct net_device *dev,
 				     struct ethtool_cmd *cmd)
 {
-	const struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 
 	return __ethtool_get_settings(vlan->real_dev, cmd);
 }
@@ -626,7 +626,7 @@ static void vlan_ethtool_get_drvinfo(struct net_device *dev,
 static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 {
 
-	if (vlan_dev_info(dev)->vlan_pcpu_stats) {
+	if (vlan_dev_priv(dev)->vlan_pcpu_stats) {
 		struct vlan_pcpu_stats *p;
 		u32 rx_errors = 0, tx_dropped = 0;
 		int i;
@@ -635,7 +635,7 @@ static struct rtnl_link_stats64 *vlan_dev_get_stats64(struct net_device *dev, st
 			u64 rxpackets, rxbytes, rxmulticast, txpackets, txbytes;
 			unsigned int start;
 
-			p = per_cpu_ptr(vlan_dev_info(dev)->vlan_pcpu_stats, i);
+			p = per_cpu_ptr(vlan_dev_priv(dev)->vlan_pcpu_stats, i);
 			do {
 				start = u64_stats_fetch_begin_bh(&p->syncp);
 				rxpackets	= p->rx_packets;
diff --git a/net/8021q/vlan_gvrp.c b/net/8021q/vlan_gvrp.c
index 061ceceeef12..6f9755352760 100644
--- a/net/8021q/vlan_gvrp.c
+++ b/net/8021q/vlan_gvrp.c
@@ -29,7 +29,7 @@ static struct garp_application vlan_gvrp_app __read_mostly = {
 
 int vlan_gvrp_request_join(const struct net_device *dev)
 {
-	const struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	__be16 vlan_id = htons(vlan->vlan_id);
 
 	return garp_request_join(vlan->real_dev, &vlan_gvrp_app,
@@ -38,7 +38,7 @@ int vlan_gvrp_request_join(const struct net_device *dev)
 
 void vlan_gvrp_request_leave(const struct net_device *dev)
 {
-	const struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	const struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	__be16 vlan_id = htons(vlan->vlan_id);
 
 	garp_request_leave(vlan->real_dev, &vlan_gvrp_app,
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 235c2197dbb6..50711368ad6a 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -105,7 +105,7 @@ static int vlan_changelink(struct net_device *dev,
 static int vlan_newlink(struct net *src_net, struct net_device *dev,
 			struct nlattr *tb[], struct nlattr *data[])
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct net_device *real_dev;
 	int err;
 
@@ -149,7 +149,7 @@ static inline size_t vlan_qos_map_size(unsigned int n)
 
 static size_t vlan_get_size(const struct net_device *dev)
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 
 	return nla_total_size(2) +	/* IFLA_VLAN_ID */
 	       sizeof(struct ifla_vlan_flags) + /* IFLA_VLAN_FLAGS */
@@ -159,14 +159,14 @@ static size_t vlan_get_size(const struct net_device *dev)
 
 static int vlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 {
-	struct vlan_dev_info *vlan = vlan_dev_info(dev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct vlan_priority_tci_mapping *pm;
 	struct ifla_vlan_flags f;
 	struct ifla_vlan_qos_mapping m;
 	struct nlattr *nest;
 	unsigned int i;
 
-	NLA_PUT_U16(skb, IFLA_VLAN_ID, vlan_dev_info(dev)->vlan_id);
+	NLA_PUT_U16(skb, IFLA_VLAN_ID, vlan_dev_priv(dev)->vlan_id);
 	if (vlan->flags) {
 		f.flags = vlan->flags;
 		f.mask  = ~0;
@@ -218,7 +218,7 @@ struct rtnl_link_ops vlan_link_ops __read_mostly = {
 	.kind		= "vlan",
 	.maxtype	= IFLA_VLAN_MAX,
 	.policy		= vlan_policy,
-	.priv_size	= sizeof(struct vlan_dev_info),
+	.priv_size	= sizeof(struct vlan_dev_priv),
 	.setup		= vlan_setup,
 	.validate	= vlan_validate,
 	.newlink	= vlan_newlink,
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index d34b6daf8930..c718fd3664b6 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -168,13 +168,13 @@ err:
 
 int vlan_proc_add_dev(struct net_device *vlandev)
 {
-	struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
+	struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev);
 	struct vlan_net *vn = net_generic(dev_net(vlandev), vlan_net_id);
 
-	dev_info->dent =
+	vlan->dent =
 		proc_create_data(vlandev->name, S_IFREG|S_IRUSR|S_IWUSR,
 				 vn->proc_vlan_dir, &vlandev_fops, vlandev);
-	if (!dev_info->dent)
+	if (!vlan->dent)
 		return -ENOBUFS;
 	return 0;
 }
@@ -187,10 +187,10 @@ int vlan_proc_rem_dev(struct net_device *vlandev)
 	struct vlan_net *vn = net_generic(dev_net(vlandev), vlan_net_id);
 
 	/** NOTE:  This will consume the memory pointed to by dent, it seems. */
-	if (vlan_dev_info(vlandev)->dent) {
-		remove_proc_entry(vlan_dev_info(vlandev)->dent->name,
+	if (vlan_dev_priv(vlandev)->dent) {
+		remove_proc_entry(vlan_dev_priv(vlandev)->dent->name,
 				  vn->proc_vlan_dir);
-		vlan_dev_info(vlandev)->dent = NULL;
+		vlan_dev_priv(vlandev)->dent = NULL;
 	}
 	return 0;
 }
@@ -268,10 +268,10 @@ static int vlan_seq_show(struct seq_file *seq, void *v)
 			   nmtype ? nmtype :  "UNKNOWN");
 	} else {
 		const struct net_device *vlandev = v;
-		const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
+		const struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev);
 
 		seq_printf(seq, "%-15s| %d  | %s\n",  vlandev->name,
-			   dev_info->vlan_id,    dev_info->real_dev->name);
+			   vlan->vlan_id,    vlan->real_dev->name);
 	}
 	return 0;
 }
@@ -279,7 +279,7 @@ static int vlan_seq_show(struct seq_file *seq, void *v)
 static int vlandev_seq_show(struct seq_file *seq, void *offset)
 {
 	struct net_device *vlandev = (struct net_device *) seq->private;
-	const struct vlan_dev_info *dev_info = vlan_dev_info(vlandev);
+	const struct vlan_dev_priv *vlan = vlan_dev_priv(vlandev);
 	struct rtnl_link_stats64 temp;
 	const struct rtnl_link_stats64 *stats;
 	static const char fmt64[] = "%30s %12llu\n";
@@ -291,8 +291,8 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
 	stats = dev_get_stats(vlandev, &temp);
 	seq_printf(seq,
 		   "%s  VID: %d	 REORDER_HDR: %i  dev->priv_flags: %hx\n",
-		   vlandev->name, dev_info->vlan_id,
-		   (int)(dev_info->flags & 1), vlandev->priv_flags);
+		   vlandev->name, vlan->vlan_id,
+		   (int)(vlan->flags & 1), vlandev->priv_flags);
 
 	seq_printf(seq, fmt64, "total frames received", stats->rx_packets);
 	seq_printf(seq, fmt64, "total bytes received", stats->rx_bytes);
@@ -300,23 +300,23 @@ static int vlandev_seq_show(struct seq_file *seq, void *offset)
 	seq_puts(seq, "\n");
 	seq_printf(seq, fmt64, "total frames transmitted", stats->tx_packets);
 	seq_printf(seq, fmt64, "total bytes transmitted", stats->tx_bytes);
-	seq_printf(seq, "Device: %s", dev_info->real_dev->name);
+	seq_printf(seq, "Device: %s", vlan->real_dev->name);
 	/* now show all PRIORITY mappings relating to this VLAN */
 	seq_printf(seq, "\nINGRESS priority mappings: "
 			"0:%u  1:%u  2:%u  3:%u  4:%u  5:%u  6:%u 7:%u\n",
-		   dev_info->ingress_priority_map[0],
-		   dev_info->ingress_priority_map[1],
-		   dev_info->ingress_priority_map[2],
-		   dev_info->ingress_priority_map[3],
-		   dev_info->ingress_priority_map[4],
-		   dev_info->ingress_priority_map[5],
-		   dev_info->ingress_priority_map[6],
-		   dev_info->ingress_priority_map[7]);
+		   vlan->ingress_priority_map[0],
+		   vlan->ingress_priority_map[1],
+		   vlan->ingress_priority_map[2],
+		   vlan->ingress_priority_map[3],
+		   vlan->ingress_priority_map[4],
+		   vlan->ingress_priority_map[5],
+		   vlan->ingress_priority_map[6],
+		   vlan->ingress_priority_map[7]);
 
 	seq_printf(seq, " EGRESS priority mappings: ");
 	for (i = 0; i < 16; i++) {
 		const struct vlan_priority_tci_mapping *mp
-			= dev_info->egress_priority_map[i];
+			= vlan->egress_priority_map[i];
 		while (mp) {
 			seq_printf(seq, "%u:%hu ",
 				   mp->priority, ((mp->vlan_qos >> 13) & 0x7));
-- 
cgit v1.2.3


From 8e586137e6b63af1e881b328466ab5ffbe562510 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 8 Dec 2011 19:52:37 -0500
Subject: net: make vlan ndo_vlan_rx_[add/kill]_vid return error value

Let caller know the result of adding/removing vlan id to/from vlan
filter.

In some drivers I make those functions to just return 0. But in those
where there is able to see if hw setup went correctly, return value is
set appropriately.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c                   | 10 ++++--
 drivers/net/ethernet/adaptec/starfire.c           |  8 +++--
 drivers/net/ethernet/brocade/bna/bnad.c           | 12 ++++---
 drivers/net/ethernet/cisco/enic/enic_dev.c        | 14 ++++++---
 drivers/net/ethernet/cisco/enic/enic_dev.h        |  4 +--
 drivers/net/ethernet/emulex/benet/be_main.c       | 12 ++++---
 drivers/net/ethernet/ibm/ehea/ehea_main.c         | 21 ++++++++++---
 drivers/net/ethernet/intel/e1000/e1000_main.c     | 14 ++++++---
 drivers/net/ethernet/intel/e1000e/netdev.c        | 12 ++++---
 drivers/net/ethernet/intel/igb/igb_main.c         | 12 ++++---
 drivers/net/ethernet/intel/igbvf/netdev.c         | 20 +++++++-----
 drivers/net/ethernet/intel/ixgb/ixgb_main.c       | 12 ++++---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c     |  8 +++--
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c |  8 +++--
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c    |  7 +++--
 drivers/net/ethernet/neterion/vxge/vxge-main.c    |  6 ++--
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c  | 10 +++---
 drivers/net/ethernet/qlogic/qlge/qlge_main.c      | 38 ++++++++++++++---------
 drivers/net/ethernet/tehuti/tehuti.c              |  6 ++--
 drivers/net/ethernet/via/via-rhine.c              | 10 +++---
 drivers/net/ethernet/via/via-velocity.c           |  6 ++--
 drivers/net/macvlan.c                             | 10 +++---
 drivers/net/team/team.c                           |  8 +++--
 drivers/net/virtio_net.c                          |  6 ++--
 drivers/net/vmxnet3/vmxnet3_drv.c                 |  8 +++--
 drivers/s390/net/qeth_l2_main.c                   | 18 ++++++-----
 drivers/s390/net/qeth_l3_main.c                   |  9 +++---
 include/linux/netdevice.h                         |  8 ++---
 28 files changed, 210 insertions(+), 107 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 3216c514fdc8..d72c37f03e50 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -428,7 +428,7 @@ int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,
  * @bond_dev: bonding net device that got called
  * @vid: vlan id being added
  */
-static void bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid)
+static int bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
 	struct slave *slave;
@@ -448,7 +448,10 @@ static void bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid)
 	if (res) {
 		pr_err("%s: Error: Failed to add vlan id %d\n",
 		       bond_dev->name, vid);
+		return res;
 	}
+
+	return 0;
 }
 
 /**
@@ -456,7 +459,7 @@ static void bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid)
  * @bond_dev: bonding net device that got called
  * @vid: vlan id being removed
  */
-static void bond_vlan_rx_kill_vid(struct net_device *bond_dev, uint16_t vid)
+static int bond_vlan_rx_kill_vid(struct net_device *bond_dev, uint16_t vid)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
 	struct slave *slave;
@@ -476,7 +479,10 @@ static void bond_vlan_rx_kill_vid(struct net_device *bond_dev, uint16_t vid)
 	if (res) {
 		pr_err("%s: Error: Failed to remove vlan id %d\n",
 		       bond_dev->name, vid);
+		return res;
 	}
+
+	return 0;
 }
 
 static void bond_add_vlans_on_slave(struct bonding *bond, struct net_device *slave_dev)
diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c
index a446e251908b..cb4f38a17f20 100644
--- a/drivers/net/ethernet/adaptec/starfire.c
+++ b/drivers/net/ethernet/adaptec/starfire.c
@@ -607,7 +607,7 @@ static const struct ethtool_ops ethtool_ops;
 
 
 #ifdef VLAN_SUPPORT
-static void netdev_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
+static int netdev_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 {
 	struct netdev_private *np = netdev_priv(dev);
 
@@ -617,9 +617,11 @@ static void netdev_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 	set_bit(vid, np->active_vlans);
 	set_rx_mode(dev);
 	spin_unlock(&np->lock);
+
+	return 0;
 }
 
-static void netdev_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
+static int netdev_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 {
 	struct netdev_private *np = netdev_priv(dev);
 
@@ -629,6 +631,8 @@ static void netdev_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 	clear_bit(vid, np->active_vlans);
 	set_rx_mode(dev);
 	spin_unlock(&np->lock);
+
+	return 0;
 }
 #endif /* VLAN_SUPPORT */
 
diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c
index 7f3091e7eb42..aac3a3b710a0 100644
--- a/drivers/net/ethernet/brocade/bna/bnad.c
+++ b/drivers/net/ethernet/brocade/bna/bnad.c
@@ -2968,7 +2968,7 @@ bnad_change_mtu(struct net_device *netdev, int new_mtu)
 	return err;
 }
 
-static void
+static int
 bnad_vlan_rx_add_vid(struct net_device *netdev,
 				 unsigned short vid)
 {
@@ -2976,7 +2976,7 @@ bnad_vlan_rx_add_vid(struct net_device *netdev,
 	unsigned long flags;
 
 	if (!bnad->rx_info[0].rx)
-		return;
+		return 0;
 
 	mutex_lock(&bnad->conf_mutex);
 
@@ -2986,9 +2986,11 @@ bnad_vlan_rx_add_vid(struct net_device *netdev,
 	spin_unlock_irqrestore(&bnad->bna_lock, flags);
 
 	mutex_unlock(&bnad->conf_mutex);
+
+	return 0;
 }
 
-static void
+static int
 bnad_vlan_rx_kill_vid(struct net_device *netdev,
 				  unsigned short vid)
 {
@@ -2996,7 +2998,7 @@ bnad_vlan_rx_kill_vid(struct net_device *netdev,
 	unsigned long flags;
 
 	if (!bnad->rx_info[0].rx)
-		return;
+		return 0;
 
 	mutex_lock(&bnad->conf_mutex);
 
@@ -3006,6 +3008,8 @@ bnad_vlan_rx_kill_vid(struct net_device *netdev,
 	spin_unlock_irqrestore(&bnad->bna_lock, flags);
 
 	mutex_unlock(&bnad->conf_mutex);
+
+	return 0;
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/cisco/enic/enic_dev.c b/drivers/net/ethernet/cisco/enic/enic_dev.c
index fd6247b3c0ee..bf0fc56dba19 100644
--- a/drivers/net/ethernet/cisco/enic/enic_dev.c
+++ b/drivers/net/ethernet/cisco/enic/enic_dev.c
@@ -212,23 +212,29 @@ int enic_dev_deinit_done(struct enic *enic, int *status)
 }
 
 /* rtnl lock is held */
-void enic_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
+int enic_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 {
 	struct enic *enic = netdev_priv(netdev);
+	int err;
 
 	spin_lock(&enic->devcmd_lock);
-	enic_add_vlan(enic, vid);
+	err = enic_add_vlan(enic, vid);
 	spin_unlock(&enic->devcmd_lock);
+
+	return err;
 }
 
 /* rtnl lock is held */
-void enic_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
+int enic_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 {
 	struct enic *enic = netdev_priv(netdev);
+	int err;
 
 	spin_lock(&enic->devcmd_lock);
-	enic_del_vlan(enic, vid);
+	err = enic_del_vlan(enic, vid);
 	spin_unlock(&enic->devcmd_lock);
+
+	return err;
 }
 
 int enic_dev_enable2(struct enic *enic, int active)
diff --git a/drivers/net/ethernet/cisco/enic/enic_dev.h b/drivers/net/ethernet/cisco/enic/enic_dev.h
index 1f83a4747ba0..da1cba3c410e 100644
--- a/drivers/net/ethernet/cisco/enic/enic_dev.h
+++ b/drivers/net/ethernet/cisco/enic/enic_dev.h
@@ -46,8 +46,8 @@ int enic_dev_packet_filter(struct enic *enic, int directed, int multicast,
 	int broadcast, int promisc, int allmulti);
 int enic_dev_add_addr(struct enic *enic, u8 *addr);
 int enic_dev_del_addr(struct enic *enic, u8 *addr);
-void enic_vlan_rx_add_vid(struct net_device *netdev, u16 vid);
-void enic_vlan_rx_kill_vid(struct net_device *netdev, u16 vid);
+int enic_vlan_rx_add_vid(struct net_device *netdev, u16 vid);
+int enic_vlan_rx_kill_vid(struct net_device *netdev, u16 vid);
 int enic_dev_notify_unset(struct enic *enic);
 int enic_dev_hang_notify(struct enic *enic);
 int enic_dev_set_ig_vlan_rewrite_mode(struct enic *enic);
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 3854fb0610ba..b8a526f9efc8 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -780,31 +780,35 @@ static int be_vid_config(struct be_adapter *adapter, bool vf, u32 vf_num)
 	return status;
 }
 
-static void be_vlan_add_vid(struct net_device *netdev, u16 vid)
+static int be_vlan_add_vid(struct net_device *netdev, u16 vid)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
 
 	adapter->vlans_added++;
 	if (!be_physfn(adapter))
-		return;
+		return 0;
 
 	adapter->vlan_tag[vid] = 1;
 	if (adapter->vlans_added <= (adapter->max_vlans + 1))
 		be_vid_config(adapter, false, 0);
+
+	return 0;
 }
 
-static void be_vlan_rem_vid(struct net_device *netdev, u16 vid)
+static int be_vlan_rem_vid(struct net_device *netdev, u16 vid)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
 
 	adapter->vlans_added--;
 
 	if (!be_physfn(adapter))
-		return;
+		return 0;
 
 	adapter->vlan_tag[vid] = 0;
 	if (adapter->vlans_added <= adapter->max_vlans)
 		be_vid_config(adapter, false, 0);
+
+	return 0;
 }
 
 static void be_set_rx_mode(struct net_device *netdev)
diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c
index bfeccbfde236..3554414eb5e2 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_main.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c
@@ -2114,17 +2114,19 @@ static int ehea_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	return NETDEV_TX_OK;
 }
 
-static void ehea_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
+static int ehea_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 {
 	struct ehea_port *port = netdev_priv(dev);
 	struct ehea_adapter *adapter = port->adapter;
 	struct hcp_ehea_port_cb1 *cb1;
 	int index;
 	u64 hret;
+	int err = 0;
 
 	cb1 = (void *)get_zeroed_page(GFP_KERNEL);
 	if (!cb1) {
 		pr_err("no mem for cb1\n");
+		err = -ENOMEM;
 		goto out;
 	}
 
@@ -2132,6 +2134,7 @@ static void ehea_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 				      H_PORT_CB1, H_PORT_CB1_ALL, cb1);
 	if (hret != H_SUCCESS) {
 		pr_err("query_ehea_port failed\n");
+		err = -EINVAL;
 		goto out;
 	}
 
@@ -2140,24 +2143,28 @@ static void ehea_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 
 	hret = ehea_h_modify_ehea_port(adapter->handle, port->logical_port_id,
 				       H_PORT_CB1, H_PORT_CB1_ALL, cb1);
-	if (hret != H_SUCCESS)
+	if (hret != H_SUCCESS) {
 		pr_err("modify_ehea_port failed\n");
+		err = -EINVAL;
+	}
 out:
 	free_page((unsigned long)cb1);
-	return;
+	return err;
 }
 
-static void ehea_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
+static int ehea_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 {
 	struct ehea_port *port = netdev_priv(dev);
 	struct ehea_adapter *adapter = port->adapter;
 	struct hcp_ehea_port_cb1 *cb1;
 	int index;
 	u64 hret;
+	int err = 0;
 
 	cb1 = (void *)get_zeroed_page(GFP_KERNEL);
 	if (!cb1) {
 		pr_err("no mem for cb1\n");
+		err = -ENOMEM;
 		goto out;
 	}
 
@@ -2165,6 +2172,7 @@ static void ehea_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 				      H_PORT_CB1, H_PORT_CB1_ALL, cb1);
 	if (hret != H_SUCCESS) {
 		pr_err("query_ehea_port failed\n");
+		err = -EINVAL;
 		goto out;
 	}
 
@@ -2173,10 +2181,13 @@ static void ehea_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 
 	hret = ehea_h_modify_ehea_port(adapter->handle, port->logical_port_id,
 				       H_PORT_CB1, H_PORT_CB1_ALL, cb1);
-	if (hret != H_SUCCESS)
+	if (hret != H_SUCCESS) {
 		pr_err("modify_ehea_port failed\n");
+		err = -EINVAL;
+	}
 out:
 	free_page((unsigned long)cb1);
+	return err;
 }
 
 int ehea_activate_qp(struct ehea_adapter *adapter, struct ehea_qp *qp)
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index 82f4ef142259..053f01289eff 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -169,8 +169,8 @@ static int e1000_82547_fifo_workaround(struct e1000_adapter *adapter,
 static bool e1000_vlan_used(struct e1000_adapter *adapter);
 static void e1000_vlan_mode(struct net_device *netdev,
 			    netdev_features_t features);
-static void e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid);
-static void e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid);
+static int e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid);
+static int e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid);
 static void e1000_restore_vlan(struct e1000_adapter *adapter);
 
 #ifdef CONFIG_PM
@@ -4604,7 +4604,7 @@ static void e1000_vlan_mode(struct net_device *netdev,
 		e1000_irq_enable(adapter);
 }
 
-static void e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
+static int e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
@@ -4613,7 +4613,7 @@ static void e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 	if ((hw->mng_cookie.status &
 	     E1000_MNG_DHCP_COOKIE_STATUS_VLAN_SUPPORT) &&
 	    (vid == adapter->mng_vlan_id))
-		return;
+		return 0;
 
 	if (!e1000_vlan_used(adapter))
 		e1000_vlan_filter_on_off(adapter, true);
@@ -4625,9 +4625,11 @@ static void e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 	e1000_write_vfta(hw, index, vfta);
 
 	set_bit(vid, adapter->active_vlans);
+
+	return 0;
 }
 
-static void e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
+static int e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
@@ -4648,6 +4650,8 @@ static void e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 
 	if (!e1000_vlan_used(adapter))
 		e1000_vlan_filter_on_off(adapter, false);
+
+	return 0;
 }
 
 static void e1000_restore_vlan(struct e1000_adapter *adapter)
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 93ae0c26d434..90953b4d6bfa 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -2522,7 +2522,7 @@ clean_rx:
 	return work_done;
 }
 
-static void e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
+static int e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
@@ -2532,7 +2532,7 @@ static void e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 	if ((adapter->hw.mng_cookie.status &
 	     E1000_MNG_DHCP_COOKIE_STATUS_VLAN) &&
 	    (vid == adapter->mng_vlan_id))
-		return;
+		return 0;
 
 	/* add VID to filter table */
 	if (adapter->flags & FLAG_HAS_HW_VLAN_FILTER) {
@@ -2543,9 +2543,11 @@ static void e1000_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 	}
 
 	set_bit(vid, adapter->active_vlans);
+
+	return 0;
 }
 
-static void e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
+static int e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
@@ -2556,7 +2558,7 @@ static void e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 	    (vid == adapter->mng_vlan_id)) {
 		/* release control to f/w */
 		e1000e_release_hw_control(adapter);
-		return;
+		return 0;
 	}
 
 	/* remove VID from filter table */
@@ -2568,6 +2570,8 @@ static void e1000_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 	}
 
 	clear_bit(vid, adapter->active_vlans);
+
+	return 0;
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 143cfebe3182..89d576ce5776 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -148,8 +148,8 @@ static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
 static void igb_tx_timeout(struct net_device *);
 static void igb_reset_task(struct work_struct *);
 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
-static void igb_vlan_rx_add_vid(struct net_device *, u16);
-static void igb_vlan_rx_kill_vid(struct net_device *, u16);
+static int igb_vlan_rx_add_vid(struct net_device *, u16);
+static int igb_vlan_rx_kill_vid(struct net_device *, u16);
 static void igb_restore_vlan(struct igb_adapter *);
 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
 static void igb_ping_all_vfs(struct igb_adapter *);
@@ -6491,7 +6491,7 @@ static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features)
 	igb_rlpml_set(adapter);
 }
 
-static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
+static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
@@ -6504,9 +6504,11 @@ static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 	igb_vfta_set(hw, vid, true);
 
 	set_bit(vid, adapter->active_vlans);
+
+	return 0;
 }
 
-static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
+static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
@@ -6521,6 +6523,8 @@ static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 		igb_vfta_set(hw, vid, false);
 
 	clear_bit(vid, adapter->active_vlans);
+
+	return 0;
 }
 
 static void igb_restore_vlan(struct igb_adapter *adapter)
diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index c358973ce414..fd3da3076c2f 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -1176,18 +1176,20 @@ static void igbvf_set_rlpml(struct igbvf_adapter *adapter)
 	e1000_rlpml_set_vf(hw, max_frame_size);
 }
 
-static void igbvf_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
+static int igbvf_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 {
 	struct igbvf_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
 
-	if (hw->mac.ops.set_vfta(hw, vid, true))
+	if (hw->mac.ops.set_vfta(hw, vid, true)) {
 		dev_err(&adapter->pdev->dev, "Failed to add vlan id %d\n", vid);
-	else
-		set_bit(vid, adapter->active_vlans);
+		return -EINVAL;
+	}
+	set_bit(vid, adapter->active_vlans);
+	return 0;
 }
 
-static void igbvf_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
+static int igbvf_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 {
 	struct igbvf_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
@@ -1197,11 +1199,13 @@ static void igbvf_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 	if (!test_bit(__IGBVF_DOWN, &adapter->state))
 		igbvf_irq_enable(adapter);
 
-	if (hw->mac.ops.set_vfta(hw, vid, false))
+	if (hw->mac.ops.set_vfta(hw, vid, false)) {
 		dev_err(&adapter->pdev->dev,
 		        "Failed to remove vlan id %d\n", vid);
-	else
-		clear_bit(vid, adapter->active_vlans);
+		return -EINVAL;
+	}
+	clear_bit(vid, adapter->active_vlans);
+	return 0;
 }
 
 static void igbvf_restore_vlan(struct igbvf_adapter *adapter)
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
index 247cf9219e03..c573655f3307 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
@@ -101,8 +101,8 @@ static void ixgb_tx_timeout_task(struct work_struct *work);
 
 static void ixgb_vlan_strip_enable(struct ixgb_adapter *adapter);
 static void ixgb_vlan_strip_disable(struct ixgb_adapter *adapter);
-static void ixgb_vlan_rx_add_vid(struct net_device *netdev, u16 vid);
-static void ixgb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid);
+static int ixgb_vlan_rx_add_vid(struct net_device *netdev, u16 vid);
+static int ixgb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid);
 static void ixgb_restore_vlan(struct ixgb_adapter *adapter);
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -2217,7 +2217,7 @@ ixgb_vlan_strip_disable(struct ixgb_adapter *adapter)
 	IXGB_WRITE_REG(&adapter->hw, CTRL0, ctrl);
 }
 
-static void
+static int
 ixgb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 {
 	struct ixgb_adapter *adapter = netdev_priv(netdev);
@@ -2230,9 +2230,11 @@ ixgb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 	vfta |= (1 << (vid & 0x1F));
 	ixgb_write_vfta(&adapter->hw, index, vfta);
 	set_bit(vid, adapter->active_vlans);
+
+	return 0;
 }
 
-static void
+static int
 ixgb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 {
 	struct ixgb_adapter *adapter = netdev_priv(netdev);
@@ -2245,6 +2247,8 @@ ixgb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 	vfta &= ~(1 << (vid & 0x1F));
 	ixgb_write_vfta(&adapter->hw, index, vfta);
 	clear_bit(vid, adapter->active_vlans);
+
+	return 0;
 }
 
 static void
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 1b28ed9d8cc1..5d94ce1c0fc3 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -3044,7 +3044,7 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter)
 	hw->mac.ops.enable_rx_dma(hw, rxctrl);
 }
 
-static void ixgbe_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
+static int ixgbe_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
@@ -3053,9 +3053,11 @@ static void ixgbe_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 	/* add VID to filter table */
 	hw->mac.ops.set_vfta(&adapter->hw, vid, pool_ndx, true);
 	set_bit(vid, adapter->active_vlans);
+
+	return 0;
 }
 
-static void ixgbe_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
+static int ixgbe_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
@@ -3064,6 +3066,8 @@ static void ixgbe_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 	/* remove VID from filter table */
 	hw->mac.ops.set_vfta(&adapter->hw, vid, pool_ndx, false);
 	clear_bit(vid, adapter->active_vlans);
+
+	return 0;
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 5d1a64398169..891162d1610c 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -1403,7 +1403,7 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter)
 	}
 }
 
-static void ixgbevf_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
+static int ixgbevf_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 {
 	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
@@ -1412,9 +1412,11 @@ static void ixgbevf_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 	if (hw->mac.ops.set_vfta)
 		hw->mac.ops.set_vfta(hw, vid, 0, true);
 	set_bit(vid, adapter->active_vlans);
+
+	return 0;
 }
 
-static void ixgbevf_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
+static int ixgbevf_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 {
 	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
@@ -1423,6 +1425,8 @@ static void ixgbevf_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 	if (hw->mac.ops.set_vfta)
 		hw->mac.ops.set_vfta(hw, vid, 0, false);
 	clear_bit(vid, adapter->active_vlans);
+
+	return 0;
 }
 
 static void ixgbevf_restore_vlan(struct ixgbevf_adapter *adapter)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 4c5bbb3aad31..2083f3b5d689 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -45,7 +45,7 @@
 #include "mlx4_en.h"
 #include "en_port.h"
 
-static void mlx4_en_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
+static int mlx4_en_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
@@ -67,9 +67,10 @@ static void mlx4_en_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 		en_err(priv, "failed adding vlan %d\n", vid);
 	mutex_unlock(&mdev->state_lock);
 
+	return 0;
 }
 
-static void mlx4_en_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
+static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
@@ -93,6 +94,8 @@ static void mlx4_en_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 			en_err(priv, "Failed configuring VLAN filter\n");
 	}
 	mutex_unlock(&mdev->state_lock);
+
+	return 0;
 }
 
 u64 mlx4_en_mac_to_u64(u8 *addr)
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c
index 16d4d8e913c3..ef76725454d2 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c
@@ -3305,7 +3305,7 @@ static void vxge_tx_watchdog(struct net_device *dev)
  *
  * Add the vlan id to the devices vlan id table
  */
-static void
+static int
 vxge_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 {
 	struct vxgedev *vdev = netdev_priv(dev);
@@ -3320,6 +3320,7 @@ vxge_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 		vxge_hw_vpath_vid_add(vpath->handle, vid);
 	}
 	set_bit(vid, vdev->active_vlans);
+	return 0;
 }
 
 /**
@@ -3329,7 +3330,7 @@ vxge_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
  *
  * Remove the vlan id from the device's vlan id table
  */
-static void
+static int
 vxge_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 {
 	struct vxgedev *vdev = netdev_priv(dev);
@@ -3348,6 +3349,7 @@ vxge_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 	vxge_debug_entryexit(VXGE_TRACE,
 		"%s:%d  Exiting...", __func__, __LINE__);
 	clear_bit(vid, vdev->active_vlans);
+	return 0;
 }
 
 static const struct net_device_ops vxge_netdev_ops = {
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 823f845ddc04..69b8e4ef14d9 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -97,8 +97,8 @@ static int qlcnicvf_config_bridged_mode(struct qlcnic_adapter *, u32);
 static int qlcnicvf_start_firmware(struct qlcnic_adapter *);
 static void qlcnic_set_netdev_features(struct qlcnic_adapter *,
 				struct qlcnic_esw_func_cfg *);
-static void qlcnic_vlan_rx_add(struct net_device *, u16);
-static void qlcnic_vlan_rx_del(struct net_device *, u16);
+static int qlcnic_vlan_rx_add(struct net_device *, u16);
+static int qlcnic_vlan_rx_del(struct net_device *, u16);
 
 /*  PCI Device ID Table  */
 #define ENTRY(device) \
@@ -735,20 +735,22 @@ qlcnic_set_vlan_config(struct qlcnic_adapter *adapter,
 		adapter->pvid = 0;
 }
 
-static void
+static int
 qlcnic_vlan_rx_add(struct net_device *netdev, u16 vid)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 	set_bit(vid, adapter->vlans);
+	return 0;
 }
 
-static void
+static int
 qlcnic_vlan_rx_del(struct net_device *netdev, u16 vid)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 
 	qlcnic_restore_indev_addr(netdev, NETDEV_DOWN);
 	clear_bit(vid, adapter->vlans);
+	return 0;
 }
 
 static void
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
index 1ce4e08037b8..b54898737284 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
@@ -2349,56 +2349,66 @@ static int qlge_set_features(struct net_device *ndev,
 	return 0;
 }
 
-static void __qlge_vlan_rx_add_vid(struct ql_adapter *qdev, u16 vid)
+static int __qlge_vlan_rx_add_vid(struct ql_adapter *qdev, u16 vid)
 {
 	u32 enable_bit = MAC_ADDR_E;
+	int err;
 
-	if (ql_set_mac_addr_reg
-	    (qdev, (u8 *) &enable_bit, MAC_ADDR_TYPE_VLAN, vid)) {
+	err = ql_set_mac_addr_reg(qdev, (u8 *) &enable_bit,
+				  MAC_ADDR_TYPE_VLAN, vid);
+	if (err)
 		netif_err(qdev, ifup, qdev->ndev,
 			  "Failed to init vlan address.\n");
-	}
+	return err;
 }
 
-static void qlge_vlan_rx_add_vid(struct net_device *ndev, u16 vid)
+static int qlge_vlan_rx_add_vid(struct net_device *ndev, u16 vid)
 {
 	struct ql_adapter *qdev = netdev_priv(ndev);
 	int status;
+	int err;
 
 	status = ql_sem_spinlock(qdev, SEM_MAC_ADDR_MASK);
 	if (status)
-		return;
+		return status;
 
-	__qlge_vlan_rx_add_vid(qdev, vid);
+	err = __qlge_vlan_rx_add_vid(qdev, vid);
 	set_bit(vid, qdev->active_vlans);
 
 	ql_sem_unlock(qdev, SEM_MAC_ADDR_MASK);
+
+	return err;
 }
 
-static void __qlge_vlan_rx_kill_vid(struct ql_adapter *qdev, u16 vid)
+static int __qlge_vlan_rx_kill_vid(struct ql_adapter *qdev, u16 vid)
 {
 	u32 enable_bit = 0;
+	int err;
 
-	if (ql_set_mac_addr_reg
-	    (qdev, (u8 *) &enable_bit, MAC_ADDR_TYPE_VLAN, vid)) {
+	err = ql_set_mac_addr_reg(qdev, (u8 *) &enable_bit,
+				  MAC_ADDR_TYPE_VLAN, vid);
+	if (err)
 		netif_err(qdev, ifup, qdev->ndev,
 			  "Failed to clear vlan address.\n");
-	}
+	return err;
 }
 
-static void qlge_vlan_rx_kill_vid(struct net_device *ndev, u16 vid)
+static int qlge_vlan_rx_kill_vid(struct net_device *ndev, u16 vid)
 {
 	struct ql_adapter *qdev = netdev_priv(ndev);
 	int status;
+	int err;
 
 	status = ql_sem_spinlock(qdev, SEM_MAC_ADDR_MASK);
 	if (status)
-		return;
+		return status;
 
-	__qlge_vlan_rx_kill_vid(qdev, vid);
+	err = __qlge_vlan_rx_kill_vid(qdev, vid);
 	clear_bit(vid, qdev->active_vlans);
 
 	ql_sem_unlock(qdev, SEM_MAC_ADDR_MASK);
+
+	return err;
 }
 
 static void qlge_restore_vlan(struct ql_adapter *qdev)
diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c
index 3a90af6d111c..4b19e9b0606b 100644
--- a/drivers/net/ethernet/tehuti/tehuti.c
+++ b/drivers/net/ethernet/tehuti/tehuti.c
@@ -727,9 +727,10 @@ static void __bdx_vlan_rx_vid(struct net_device *ndev, uint16_t vid, int enable)
  * @ndev network device
  * @vid  VLAN vid to add
  */
-static void bdx_vlan_rx_add_vid(struct net_device *ndev, uint16_t vid)
+static int bdx_vlan_rx_add_vid(struct net_device *ndev, uint16_t vid)
 {
 	__bdx_vlan_rx_vid(ndev, vid, 1);
+	return 0;
 }
 
 /*
@@ -737,9 +738,10 @@ static void bdx_vlan_rx_add_vid(struct net_device *ndev, uint16_t vid)
  * @ndev network device
  * @vid  VLAN vid to kill
  */
-static void bdx_vlan_rx_kill_vid(struct net_device *ndev, unsigned short vid)
+static int bdx_vlan_rx_kill_vid(struct net_device *ndev, unsigned short vid)
 {
 	__bdx_vlan_rx_vid(ndev, vid, 0);
+	return 0;
 }
 
 /**
diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
index 5587ecdf32e3..bcdbdc72b558 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c
@@ -488,8 +488,8 @@ static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd);
 static const struct ethtool_ops netdev_ethtool_ops;
 static int  rhine_close(struct net_device *dev);
 static void rhine_shutdown (struct pci_dev *pdev);
-static void rhine_vlan_rx_add_vid(struct net_device *dev, unsigned short vid);
-static void rhine_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid);
+static int rhine_vlan_rx_add_vid(struct net_device *dev, unsigned short vid);
+static int rhine_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid);
 static void rhine_set_cam(void __iomem *ioaddr, int idx, u8 *addr);
 static void rhine_set_vlan_cam(void __iomem *ioaddr, int idx, u8 *addr);
 static void rhine_set_cam_mask(void __iomem *ioaddr, u32 mask);
@@ -1261,7 +1261,7 @@ static void rhine_update_vcam(struct net_device *dev)
 	rhine_set_vlan_cam_mask(ioaddr, vCAMmask);
 }
 
-static void rhine_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
+static int rhine_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 {
 	struct rhine_private *rp = netdev_priv(dev);
 
@@ -1269,9 +1269,10 @@ static void rhine_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 	set_bit(vid, rp->active_vlans);
 	rhine_update_vcam(dev);
 	spin_unlock_irq(&rp->lock);
+	return 0;
 }
 
-static void rhine_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
+static int rhine_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 {
 	struct rhine_private *rp = netdev_priv(dev);
 
@@ -1279,6 +1280,7 @@ static void rhine_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 	clear_bit(vid, rp->active_vlans);
 	rhine_update_vcam(dev);
 	spin_unlock_irq(&rp->lock);
+	return 0;
 }
 
 static void init_registers(struct net_device *dev)
diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c
index 59bb5fd56afe..4128d6b8cc28 100644
--- a/drivers/net/ethernet/via/via-velocity.c
+++ b/drivers/net/ethernet/via/via-velocity.c
@@ -522,7 +522,7 @@ static void velocity_init_cam_filter(struct velocity_info *vptr)
 	mac_set_vlan_cam_mask(regs, vptr->vCAMmask);
 }
 
-static void velocity_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
+static int velocity_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 {
 	struct velocity_info *vptr = netdev_priv(dev);
 
@@ -530,9 +530,10 @@ static void velocity_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 	set_bit(vid, vptr->active_vlans);
 	velocity_init_cam_filter(vptr);
 	spin_unlock_irq(&vptr->lock);
+	return 0;
 }
 
-static void velocity_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
+static int velocity_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 {
 	struct velocity_info *vptr = netdev_priv(dev);
 
@@ -540,6 +541,7 @@ static void velocity_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid
 	clear_bit(vid, vptr->active_vlans);
 	velocity_init_cam_filter(vptr);
 	spin_unlock_irq(&vptr->lock);
+	return 0;
 }
 
 static void velocity_init_rx_ring_indexes(struct velocity_info *vptr)
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 74134970b709..2511bc5c34f3 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -520,7 +520,7 @@ static struct rtnl_link_stats64 *macvlan_dev_get_stats64(struct net_device *dev,
 	return stats;
 }
 
-static void macvlan_vlan_rx_add_vid(struct net_device *dev,
+static int macvlan_vlan_rx_add_vid(struct net_device *dev,
 				    unsigned short vid)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
@@ -528,10 +528,11 @@ static void macvlan_vlan_rx_add_vid(struct net_device *dev,
 	const struct net_device_ops *ops = lowerdev->netdev_ops;
 
 	if (ops->ndo_vlan_rx_add_vid)
-		ops->ndo_vlan_rx_add_vid(lowerdev, vid);
+		return ops->ndo_vlan_rx_add_vid(lowerdev, vid);
+	return 0;
 }
 
-static void macvlan_vlan_rx_kill_vid(struct net_device *dev,
+static int macvlan_vlan_rx_kill_vid(struct net_device *dev,
 				     unsigned short vid)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
@@ -539,7 +540,8 @@ static void macvlan_vlan_rx_kill_vid(struct net_device *dev,
 	const struct net_device_ops *ops = lowerdev->netdev_ops;
 
 	if (ops->ndo_vlan_rx_kill_vid)
-		ops->ndo_vlan_rx_kill_vid(lowerdev, vid);
+		return ops->ndo_vlan_rx_kill_vid(lowerdev, vid);
+	return 0;
 }
 
 static void macvlan_ethtool_get_drvinfo(struct net_device *dev,
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 064155d56bce..8e8bf958539e 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -902,7 +902,7 @@ team_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 	return stats;
 }
 
-static void team_vlan_rx_add_vid(struct net_device *dev, uint16_t vid)
+static int team_vlan_rx_add_vid(struct net_device *dev, uint16_t vid)
 {
 	struct team *team = netdev_priv(dev);
 	struct team_port *port;
@@ -915,9 +915,11 @@ static void team_vlan_rx_add_vid(struct net_device *dev, uint16_t vid)
 			ops->ndo_vlan_rx_add_vid(port->dev, vid);
 	}
 	rcu_read_unlock();
+
+	return 0;
 }
 
-static void team_vlan_rx_kill_vid(struct net_device *dev, uint16_t vid)
+static int team_vlan_rx_kill_vid(struct net_device *dev, uint16_t vid)
 {
 	struct team *team = netdev_priv(dev);
 	struct team_port *port;
@@ -930,6 +932,8 @@ static void team_vlan_rx_kill_vid(struct net_device *dev, uint16_t vid)
 			ops->ndo_vlan_rx_kill_vid(port->dev, vid);
 	}
 	rcu_read_unlock();
+
+	return 0;
 }
 
 static int team_add_slave(struct net_device *dev, struct net_device *port_dev)
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 5a961720f64c..609c51f90e6c 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -855,7 +855,7 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 	kfree(buf);
 }
 
-static void virtnet_vlan_rx_add_vid(struct net_device *dev, u16 vid)
+static int virtnet_vlan_rx_add_vid(struct net_device *dev, u16 vid)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
 	struct scatterlist sg;
@@ -865,9 +865,10 @@ static void virtnet_vlan_rx_add_vid(struct net_device *dev, u16 vid)
 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
 				  VIRTIO_NET_CTRL_VLAN_ADD, &sg, 1, 0))
 		dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid);
+	return 0;
 }
 
-static void virtnet_vlan_rx_kill_vid(struct net_device *dev, u16 vid)
+static int virtnet_vlan_rx_kill_vid(struct net_device *dev, u16 vid)
 {
 	struct virtnet_info *vi = netdev_priv(dev);
 	struct scatterlist sg;
@@ -877,6 +878,7 @@ static void virtnet_vlan_rx_kill_vid(struct net_device *dev, u16 vid)
 	if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
 				  VIRTIO_NET_CTRL_VLAN_DEL, &sg, 1, 0))
 		dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid);
+	return 0;
 }
 
 static void virtnet_get_ringparam(struct net_device *dev,
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index d96bfb1ac20b..1c2ae11a9e35 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -1926,7 +1926,7 @@ vmxnet3_restore_vlan(struct vmxnet3_adapter *adapter)
 }
 
 
-static void
+static int
 vmxnet3_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 {
 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
@@ -1943,10 +1943,12 @@ vmxnet3_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 	}
 
 	set_bit(vid, adapter->active_vlans);
+
+	return 0;
 }
 
 
-static void
+static int
 vmxnet3_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 {
 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
@@ -1963,6 +1965,8 @@ vmxnet3_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 	}
 
 	clear_bit(vid, adapter->active_vlans);
+
+	return 0;
 }
 
 
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index a21ae3d549db..c4e2004bd0e8 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -301,21 +301,21 @@ static void qeth_l2_process_vlans(struct qeth_card *card)
 	spin_unlock_bh(&card->vlanlock);
 }
 
-static void qeth_l2_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
+static int qeth_l2_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 {
 	struct qeth_card *card = dev->ml_priv;
 	struct qeth_vlan_vid *id;
 
 	QETH_CARD_TEXT_(card, 4, "aid:%d", vid);
 	if (!vid)
-		return;
+		return 0;
 	if (card->info.type == QETH_CARD_TYPE_OSM) {
 		QETH_CARD_TEXT(card, 3, "aidOSM");
-		return;
+		return 0;
 	}
 	if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) {
 		QETH_CARD_TEXT(card, 3, "aidREC");
-		return;
+		return 0;
 	}
 	id = kmalloc(sizeof(struct qeth_vlan_vid), GFP_ATOMIC);
 	if (id) {
@@ -324,10 +324,13 @@ static void qeth_l2_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 		spin_lock_bh(&card->vlanlock);
 		list_add_tail(&id->list, &card->vid_list);
 		spin_unlock_bh(&card->vlanlock);
+	} else {
+		return -ENOMEM;
 	}
+	return 0;
 }
 
-static void qeth_l2_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
+static int qeth_l2_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 {
 	struct qeth_vlan_vid *id, *tmpid = NULL;
 	struct qeth_card *card = dev->ml_priv;
@@ -335,11 +338,11 @@ static void qeth_l2_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 	QETH_CARD_TEXT_(card, 4, "kid:%d", vid);
 	if (card->info.type == QETH_CARD_TYPE_OSM) {
 		QETH_CARD_TEXT(card, 3, "kidOSM");
-		return;
+		return 0;
 	}
 	if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) {
 		QETH_CARD_TEXT(card, 3, "kidREC");
-		return;
+		return 0;
 	}
 	spin_lock_bh(&card->vlanlock);
 	list_for_each_entry(id, &card->vid_list, list) {
@@ -355,6 +358,7 @@ static void qeth_l2_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 		kfree(tmpid);
 	}
 	qeth_l2_set_multicast_list(card->dev);
+	return 0;
 }
 
 static int qeth_l2_stop_card(struct qeth_card *card, int recovery_mode)
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index b2a55e3fde0b..b3b045c21e2c 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1869,15 +1869,15 @@ static void qeth_l3_free_vlan_addresses(struct qeth_card *card,
 	qeth_l3_free_vlan_addresses6(card, vid);
 }
 
-static void qeth_l3_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
+static int qeth_l3_vlan_rx_add_vid(struct net_device *dev, unsigned short vid)
 {
 	struct qeth_card *card = dev->ml_priv;
 
 	set_bit(vid, card->active_vlans);
-	return;
+	return 0;
 }
 
-static void qeth_l3_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
+static int qeth_l3_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 {
 	struct qeth_card *card = dev->ml_priv;
 	unsigned long flags;
@@ -1885,7 +1885,7 @@ static void qeth_l3_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 	QETH_CARD_TEXT_(card, 4, "kid:%d", vid);
 	if (qeth_wait_for_threads(card, QETH_RECOVER_THREAD)) {
 		QETH_CARD_TEXT(card, 3, "kidREC");
-		return;
+		return 0;
 	}
 	spin_lock_irqsave(&card->vlanlock, flags);
 	/* unregister IP addresses of vlan device */
@@ -1893,6 +1893,7 @@ static void qeth_l3_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
 	clear_bit(vid, card->active_vlans);
 	spin_unlock_irqrestore(&card->vlanlock, flags);
 	qeth_l3_set_multicast_list(card->dev);
+	return 0;
 }
 
 static inline int qeth_l3_rebuild_skb(struct qeth_card *card,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index eef257c76a40..f7bff9615728 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -792,11 +792,11 @@ struct netdev_tc_txq {
  *	3. Update dev->stats asynchronously and atomically, and define
  *	   neither operation.
  *
- * void (*ndo_vlan_rx_add_vid)(struct net_device *dev, unsigned short vid);
+ * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, unsigned short vid);
  *	If device support VLAN filtering (dev->features & NETIF_F_HW_VLAN_FILTER)
  *	this function is called when a VLAN id is registered.
  *
- * void (*ndo_vlan_rx_kill_vid)(struct net_device *dev, unsigned short vid);
+ * int (*ndo_vlan_rx_kill_vid)(struct net_device *dev, unsigned short vid);
  *	If device support VLAN filtering (dev->features & NETIF_F_HW_VLAN_FILTER)
  *	this function is called when a VLAN id is unregistered.
  *
@@ -911,9 +911,9 @@ struct net_device_ops {
 						     struct rtnl_link_stats64 *storage);
 	struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);
 
-	void			(*ndo_vlan_rx_add_vid)(struct net_device *dev,
+	int			(*ndo_vlan_rx_add_vid)(struct net_device *dev,
 						       unsigned short vid);
-	void			(*ndo_vlan_rx_kill_vid)(struct net_device *dev,
+	int			(*ndo_vlan_rx_kill_vid)(struct net_device *dev,
 						        unsigned short vid);
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	void                    (*ndo_poll_controller)(struct net_device *dev);
-- 
cgit v1.2.3


From 87002b03baabd2b8f6281ab6411ed88d24958de1 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 8 Dec 2011 04:11:17 +0000
Subject: net: introduce vlan_vid_[add/del] and use them instead of direct
 [add/kill]_vid ndo calls

This patch adds wrapper for ndo_vlan_rx_add_vid/ndo_vlan_rx_kill_vid
functions. Check for NETIF_F_HW_VLAN_FILTER feature is done in this
wrapper.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 53 ++++++++++++++++++-----------------------
 drivers/net/macvlan.c           | 10 +++-----
 drivers/net/team/team.c         | 34 ++++++++++++++++----------
 include/linux/if_vlan.h         | 12 ++++++++++
 net/8021q/vlan.c                | 14 ++++-------
 net/8021q/vlan_core.c           | 23 ++++++++++++++++++
 6 files changed, 87 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index d72c37f03e50..0c0dacba1f51 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -431,17 +431,13 @@ int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,
 static int bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
-	struct slave *slave;
+	struct slave *slave, *stop_at;
 	int i, res;
 
 	bond_for_each_slave(bond, slave, i) {
-		struct net_device *slave_dev = slave->dev;
-		const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
-
-		if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) &&
-		    slave_ops->ndo_vlan_rx_add_vid) {
-			slave_ops->ndo_vlan_rx_add_vid(slave_dev, vid);
-		}
+		res = vlan_vid_add(slave->dev, vid);
+		if (res)
+			goto unwind;
 	}
 
 	res = bond_add_vlan(bond, vid);
@@ -452,6 +448,14 @@ static int bond_vlan_rx_add_vid(struct net_device *bond_dev, uint16_t vid)
 	}
 
 	return 0;
+
+unwind:
+	/* unwind from head to the slave that failed */
+	stop_at = slave;
+	bond_for_each_slave_from_to(bond, slave, i, bond->first_slave, stop_at)
+		vlan_vid_del(slave->dev, vid);
+
+	return res;
 }
 
 /**
@@ -465,15 +469,8 @@ static int bond_vlan_rx_kill_vid(struct net_device *bond_dev, uint16_t vid)
 	struct slave *slave;
 	int i, res;
 
-	bond_for_each_slave(bond, slave, i) {
-		struct net_device *slave_dev = slave->dev;
-		const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
-
-		if ((slave_dev->features & NETIF_F_HW_VLAN_FILTER) &&
-		    slave_ops->ndo_vlan_rx_kill_vid) {
-			slave_ops->ndo_vlan_rx_kill_vid(slave_dev, vid);
-		}
-	}
+	bond_for_each_slave(bond, slave, i)
+		vlan_vid_del(slave->dev, vid);
 
 	res = bond_del_vlan(bond, vid);
 	if (res) {
@@ -488,30 +485,26 @@ static int bond_vlan_rx_kill_vid(struct net_device *bond_dev, uint16_t vid)
 static void bond_add_vlans_on_slave(struct bonding *bond, struct net_device *slave_dev)
 {
 	struct vlan_entry *vlan;
-	const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
-
-	if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) ||
-	    !(slave_ops->ndo_vlan_rx_add_vid))
-		return;
+	int res;
 
-	list_for_each_entry(vlan, &bond->vlan_list, vlan_list)
-		slave_ops->ndo_vlan_rx_add_vid(slave_dev, vlan->vlan_id);
+	list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
+		res = vlan_vid_add(slave_dev, vlan->vlan_id);
+		if (res)
+			pr_warning("%s: Failed to add vlan id %d to device %s\n",
+				   bond->dev->name, vlan->vlan_id,
+				   slave_dev->name);
+	}
 }
 
 static void bond_del_vlans_from_slave(struct bonding *bond,
 				      struct net_device *slave_dev)
 {
-	const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
 	struct vlan_entry *vlan;
 
-	if (!(slave_dev->features & NETIF_F_HW_VLAN_FILTER) ||
-	    !(slave_ops->ndo_vlan_rx_kill_vid))
-		return;
-
 	list_for_each_entry(vlan, &bond->vlan_list, vlan_list) {
 		if (!vlan->vlan_id)
 			continue;
-		slave_ops->ndo_vlan_rx_kill_vid(slave_dev, vlan->vlan_id);
+		vlan_vid_del(slave_dev, vlan->vlan_id);
 	}
 }
 
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 2511bc5c34f3..f2f820c4b40a 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -26,6 +26,7 @@
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
 #include <linux/if_arp.h>
+#include <linux/if_vlan.h>
 #include <linux/if_link.h>
 #include <linux/if_macvlan.h>
 #include <net/rtnetlink.h>
@@ -525,11 +526,8 @@ static int macvlan_vlan_rx_add_vid(struct net_device *dev,
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 	struct net_device *lowerdev = vlan->lowerdev;
-	const struct net_device_ops *ops = lowerdev->netdev_ops;
 
-	if (ops->ndo_vlan_rx_add_vid)
-		return ops->ndo_vlan_rx_add_vid(lowerdev, vid);
-	return 0;
+	return vlan_vid_add(lowerdev, vid);
 }
 
 static int macvlan_vlan_rx_kill_vid(struct net_device *dev,
@@ -537,10 +535,8 @@ static int macvlan_vlan_rx_kill_vid(struct net_device *dev,
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 	struct net_device *lowerdev = vlan->lowerdev;
-	const struct net_device_ops *ops = lowerdev->netdev_ops;
 
-	if (ops->ndo_vlan_rx_kill_vid)
-		return ops->ndo_vlan_rx_kill_vid(lowerdev, vid);
+	vlan_vid_del(lowerdev, vid);
 	return 0;
 }
 
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 8e8bf958539e..79c2d1b52eb6 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -18,6 +18,7 @@
 #include <linux/ctype.h>
 #include <linux/notifier.h>
 #include <linux/netdevice.h>
+#include <linux/if_vlan.h>
 #include <linux/if_arp.h>
 #include <linux/socket.h>
 #include <linux/etherdevice.h>
@@ -906,17 +907,28 @@ static int team_vlan_rx_add_vid(struct net_device *dev, uint16_t vid)
 {
 	struct team *team = netdev_priv(dev);
 	struct team_port *port;
+	int err;
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(port, &team->port_list, list) {
-		const struct net_device_ops *ops = port->dev->netdev_ops;
-
-		if (ops->ndo_vlan_rx_add_vid)
-			ops->ndo_vlan_rx_add_vid(port->dev, vid);
+	/*
+	 * Alhough this is reader, it's guarded by team lock. It's not possible
+	 * to traverse list in reverse under rcu_read_lock
+	 */
+	mutex_lock(&team->lock);
+	list_for_each_entry(port, &team->port_list, list) {
+		err = vlan_vid_add(port->dev, vid);
+		if (err)
+			goto unwind;
 	}
-	rcu_read_unlock();
+	mutex_unlock(&team->lock);
 
 	return 0;
+
+unwind:
+	list_for_each_entry_continue_reverse(port, &team->port_list, list)
+		vlan_vid_del(port->dev, vid);
+	mutex_unlock(&team->lock);
+
+	return err;
 }
 
 static int team_vlan_rx_kill_vid(struct net_device *dev, uint16_t vid)
@@ -925,12 +937,8 @@ static int team_vlan_rx_kill_vid(struct net_device *dev, uint16_t vid)
 	struct team_port *port;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(port, &team->port_list, list) {
-		const struct net_device_ops *ops = port->dev->netdev_ops;
-
-		if (ops->ndo_vlan_rx_kill_vid)
-			ops->ndo_vlan_rx_kill_vid(port->dev, vid);
-	}
+	list_for_each_entry_rcu(port, &team->port_list, list)
+		vlan_vid_del(port->dev, vid);
 	rcu_read_unlock();
 
 	return 0;
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 31d7c976f063..71168a6f3347 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -109,6 +109,9 @@ extern u16 vlan_dev_vlan_id(const struct net_device *dev);
 extern bool vlan_do_receive(struct sk_buff **skb, bool last_handler);
 extern struct sk_buff *vlan_untag(struct sk_buff *skb);
 
+extern int vlan_vid_add(struct net_device *dev, unsigned short vid);
+extern void vlan_vid_del(struct net_device *dev, unsigned short vid);
+
 #else
 static inline struct net_device *
 __vlan_find_dev_deep(struct net_device *real_dev, u16 vlan_id)
@@ -139,6 +142,15 @@ static inline struct sk_buff *vlan_untag(struct sk_buff *skb)
 {
 	return skb;
 }
+
+static inline int vlan_vid_add(struct net_device *dev, unsigned short vid)
+{
+	return 0;
+}
+
+static inline void vlan_vid_del(struct net_device *dev, unsigned short vid)
+{
+}
 #endif
 
 /**
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index e075625efeeb..dd9aa400888b 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -101,7 +101,6 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 {
 	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct net_device *real_dev = vlan->real_dev;
-	const struct net_device_ops *ops = real_dev->netdev_ops;
 	struct vlan_group *grp;
 	u16 vlan_id = vlan->vlan_id;
 
@@ -114,8 +113,8 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 	 * HW accelerating devices or SW vlan input packet processing if
 	 * VLAN is not 0 (leave it there for 802.1p).
 	 */
-	if (vlan_id && (real_dev->features & NETIF_F_HW_VLAN_FILTER))
-		ops->ndo_vlan_rx_kill_vid(real_dev, vlan_id);
+	if (vlan_id)
+		vlan_vid_del(real_dev, vlan_id);
 
 	grp->nr_vlans--;
 
@@ -169,7 +168,6 @@ int register_vlan_dev(struct net_device *dev)
 {
 	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct net_device *real_dev = vlan->real_dev;
-	const struct net_device_ops *ops = real_dev->netdev_ops;
 	u16 vlan_id = vlan->vlan_id;
 	struct vlan_group *grp, *ngrp = NULL;
 	int err;
@@ -207,8 +205,7 @@ int register_vlan_dev(struct net_device *dev)
 	if (ngrp) {
 		rcu_assign_pointer(real_dev->vlgrp, ngrp);
 	}
-	if (real_dev->features & NETIF_F_HW_VLAN_FILTER)
-		ops->ndo_vlan_rx_add_vid(real_dev, vlan_id);
+	vlan_vid_add(real_dev, vlan_id);
 
 	return 0;
 
@@ -369,11 +366,10 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 		__vlan_device_event(dev, event);
 
 	if ((event == NETDEV_UP) &&
-	    (dev->features & NETIF_F_HW_VLAN_FILTER) &&
-	    dev->netdev_ops->ndo_vlan_rx_add_vid) {
+	    (dev->features & NETIF_F_HW_VLAN_FILTER)) {
 		pr_info("adding VLAN 0 to HW filter on device %s\n",
 			dev->name);
-		dev->netdev_ops->ndo_vlan_rx_add_vid(dev, 0);
+		vlan_vid_add(dev, 0);
 	}
 
 	grp = rtnl_dereference(dev->vlgrp);
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 85241f044294..544f9cb9678c 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -146,3 +146,26 @@ err_free:
 	kfree_skb(skb);
 	return NULL;
 }
+
+int vlan_vid_add(struct net_device *dev, unsigned short vid)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	if ((dev->features & NETIF_F_HW_VLAN_FILTER) &&
+	     ops->ndo_vlan_rx_add_vid) {
+		return ops->ndo_vlan_rx_add_vid(dev, vid);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(vlan_vid_add);
+
+void vlan_vid_del(struct net_device *dev, unsigned short vid)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	if ((dev->features & NETIF_F_HW_VLAN_FILTER) &&
+	     ops->ndo_vlan_rx_kill_vid) {
+		ops->ndo_vlan_rx_kill_vid(dev, vid);
+	}
+}
+EXPORT_SYMBOL(vlan_vid_del);
-- 
cgit v1.2.3


From 5b9ea6e022e9ba0fe39cb349ac40361f78d5da5b Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 8 Dec 2011 04:11:18 +0000
Subject: vlan: introduce vid list with reference counting

This allows to keep track of vids needed to be in rx vlan filters of
devices even if they are used in bond/team etc.

vlan_info as well as vlan_group previously was, is allocated when first
vid is added and dealocated whan last vid is deleted.

vlan_group definition is moved to private header.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h   |  17 +----
 include/linux/netdevice.h |   3 +-
 net/8021q/vlan.c          |  90 +++++++++----------------
 net/8021q/vlan.h          |  30 ++++++++-
 net/8021q/vlan_core.c     | 168 +++++++++++++++++++++++++++++++++++++++++++---
 5 files changed, 219 insertions(+), 89 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 71168a6f3347..0c9691305298 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -74,22 +74,7 @@ static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb)
 /* found in socket.c */
 extern void vlan_ioctl_set(int (*hook)(struct net *, void __user *));
 
-/* if this changes, algorithm will have to be reworked because this
- * depends on completely exhausting the VLAN identifier space.  Thus
- * it gives constant time look-up, but in many cases it wastes memory.
- */
-#define VLAN_GROUP_ARRAY_SPLIT_PARTS  8
-#define VLAN_GROUP_ARRAY_PART_LEN     (VLAN_N_VID/VLAN_GROUP_ARRAY_SPLIT_PARTS)
-
-struct vlan_group {
-	struct net_device	*real_dev; /* The ethernet(like) device
-					    * the vlan is attached to.
-					    */
-	unsigned int		nr_vlans;
-	struct hlist_node	hlist;	/* linked list */
-	struct net_device **vlan_devices_arrays[VLAN_GROUP_ARRAY_SPLIT_PARTS];
-	struct rcu_head		rcu;
-};
+struct vlan_info;
 
 static inline int is_vlan_dev(struct net_device *dev)
 {
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f7bff9615728..603730804da5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -55,7 +55,6 @@
 
 #include <linux/netdev_features.h>
 
-struct vlan_group;
 struct netpoll_info;
 struct phy_device;
 /* 802.11 specific */
@@ -1096,7 +1095,7 @@ struct net_device {
 	/* Protocol specific pointers */
 
 #if IS_ENABLED(CONFIG_VLAN_8021Q)
-	struct vlan_group __rcu	*vlgrp;		/* VLAN group */
+	struct vlan_info __rcu	*vlan_info;	/* VLAN info */
 #endif
 #if IS_ENABLED(CONFIG_NET_DSA)
 	struct dsa_switch_tree	*dsa_ptr;	/* dsa specific data */
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index dd9aa400888b..efea35b02e7f 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -51,27 +51,6 @@ const char vlan_version[] = DRV_VERSION;
 
 /* End of global variables definitions. */
 
-static void vlan_group_free(struct vlan_group *grp)
-{
-	int i;
-
-	for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++)
-		kfree(grp->vlan_devices_arrays[i]);
-	kfree(grp);
-}
-
-static struct vlan_group *vlan_group_alloc(struct net_device *real_dev)
-{
-	struct vlan_group *grp;
-
-	grp = kzalloc(sizeof(struct vlan_group), GFP_KERNEL);
-	if (!grp)
-		return NULL;
-
-	grp->real_dev = real_dev;
-	return grp;
-}
-
 static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id)
 {
 	struct net_device **array;
@@ -92,22 +71,20 @@ static int vlan_group_prealloc_vid(struct vlan_group *vg, u16 vlan_id)
 	return 0;
 }
 
-static void vlan_rcu_free(struct rcu_head *rcu)
-{
-	vlan_group_free(container_of(rcu, struct vlan_group, rcu));
-}
-
 void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 {
 	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct net_device *real_dev = vlan->real_dev;
+	struct vlan_info *vlan_info;
 	struct vlan_group *grp;
 	u16 vlan_id = vlan->vlan_id;
 
 	ASSERT_RTNL();
 
-	grp = rtnl_dereference(real_dev->vlgrp);
-	BUG_ON(!grp);
+	vlan_info = rtnl_dereference(real_dev->vlan_info);
+	BUG_ON(!vlan_info);
+
+	grp = &vlan_info->grp;
 
 	/* Take it out of our own structures, but be sure to interlock with
 	 * HW accelerating devices or SW vlan input packet processing if
@@ -116,7 +93,7 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 	if (vlan_id)
 		vlan_vid_del(real_dev, vlan_id);
 
-	grp->nr_vlans--;
+	grp->nr_vlan_devs--;
 
 	if (vlan->flags & VLAN_FLAG_GVRP)
 		vlan_gvrp_request_leave(dev);
@@ -128,16 +105,9 @@ void unregister_vlan_dev(struct net_device *dev, struct list_head *head)
 	 */
 	unregister_netdevice_queue(dev, head);
 
-	/* If the group is now empty, kill off the group. */
-	if (grp->nr_vlans == 0) {
+	if (grp->nr_vlan_devs == 0)
 		vlan_gvrp_uninit_applicant(real_dev);
 
-		RCU_INIT_POINTER(real_dev->vlgrp, NULL);
-
-		/* Free the group, after all cpu's are done. */
-		call_rcu(&grp->rcu, vlan_rcu_free);
-	}
-
 	/* Get rid of the vlan's reference to real_dev */
 	dev_put(real_dev);
 }
@@ -169,17 +139,23 @@ int register_vlan_dev(struct net_device *dev)
 	struct vlan_dev_priv *vlan = vlan_dev_priv(dev);
 	struct net_device *real_dev = vlan->real_dev;
 	u16 vlan_id = vlan->vlan_id;
-	struct vlan_group *grp, *ngrp = NULL;
+	struct vlan_info *vlan_info;
+	struct vlan_group *grp;
 	int err;
 
-	grp = rtnl_dereference(real_dev->vlgrp);
-	if (!grp) {
-		ngrp = grp = vlan_group_alloc(real_dev);
-		if (!grp)
-			return -ENOBUFS;
+	err = vlan_vid_add(real_dev, vlan_id);
+	if (err)
+		return err;
+
+	vlan_info = rtnl_dereference(real_dev->vlan_info);
+	/* vlan_info should be there now. vlan_vid_add took care of it */
+	BUG_ON(!vlan_info);
+
+	grp = &vlan_info->grp;
+	if (grp->nr_vlan_devs == 0) {
 		err = vlan_gvrp_init_applicant(real_dev);
 		if (err < 0)
-			goto out_free_group;
+			goto out_vid_del;
 	}
 
 	err = vlan_group_prealloc_vid(grp, vlan_id);
@@ -200,23 +176,15 @@ int register_vlan_dev(struct net_device *dev)
 	 * it into our local structure.
 	 */
 	vlan_group_set_device(grp, vlan_id, dev);
-	grp->nr_vlans++;
-
-	if (ngrp) {
-		rcu_assign_pointer(real_dev->vlgrp, ngrp);
-	}
-	vlan_vid_add(real_dev, vlan_id);
+	grp->nr_vlan_devs++;
 
 	return 0;
 
 out_uninit_applicant:
-	if (ngrp)
+	if (grp->nr_vlan_devs == 0)
 		vlan_gvrp_uninit_applicant(real_dev);
-out_free_group:
-	if (ngrp) {
-		/* Free the group, after all cpu's are done. */
-		call_rcu(&ngrp->rcu, vlan_rcu_free);
-	}
+out_vid_del:
+	vlan_vid_del(real_dev, vlan_id);
 	return err;
 }
 
@@ -357,6 +325,7 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 {
 	struct net_device *dev = ptr;
 	struct vlan_group *grp;
+	struct vlan_info *vlan_info;
 	int i, flgs;
 	struct net_device *vlandev;
 	struct vlan_dev_priv *vlan;
@@ -372,9 +341,10 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 		vlan_vid_add(dev, 0);
 	}
 
-	grp = rtnl_dereference(dev->vlgrp);
-	if (!grp)
+	vlan_info = rtnl_dereference(dev->vlan_info);
+	if (!vlan_info)
 		goto out;
+	grp = &vlan_info->grp;
 
 	/* It is OK that we do not hold the group lock right now,
 	 * as we run under the RTNL lock.
@@ -478,9 +448,9 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event,
 			if (!vlandev)
 				continue;
 
-			/* unregistration of last vlan destroys group, abort
+			/* removal of last vid destroys vlan_info, abort
 			 * afterwards */
-			if (grp->nr_vlans == 1)
+			if (vlan_info->nr_vids == 1)
 				i = VLAN_N_VID;
 
 			unregister_vlan_dev(vlandev, &list);
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index d3c4ea4a3836..28d8dc20cb6d 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -3,6 +3,7 @@
 
 #include <linux/if_vlan.h>
 #include <linux/u64_stats_sync.h>
+#include <linux/list.h>
 
 
 /**
@@ -74,6 +75,29 @@ static inline struct vlan_dev_priv *vlan_dev_priv(const struct net_device *dev)
 	return netdev_priv(dev);
 }
 
+/* if this changes, algorithm will have to be reworked because this
+ * depends on completely exhausting the VLAN identifier space.  Thus
+ * it gives constant time look-up, but in many cases it wastes memory.
+ */
+#define VLAN_GROUP_ARRAY_SPLIT_PARTS  8
+#define VLAN_GROUP_ARRAY_PART_LEN     (VLAN_N_VID/VLAN_GROUP_ARRAY_SPLIT_PARTS)
+
+struct vlan_group {
+	unsigned int		nr_vlan_devs;
+	struct hlist_node	hlist;	/* linked list */
+	struct net_device **vlan_devices_arrays[VLAN_GROUP_ARRAY_SPLIT_PARTS];
+};
+
+struct vlan_info {
+	struct net_device	*real_dev; /* The ethernet(like) device
+					    * the vlan is attached to.
+					    */
+	struct vlan_group	grp;
+	struct list_head	vid_list;
+	unsigned int		nr_vids;
+	struct rcu_head		rcu;
+};
+
 static inline struct net_device *vlan_group_get_device(struct vlan_group *vg,
 						       u16 vlan_id)
 {
@@ -97,10 +121,10 @@ static inline void vlan_group_set_device(struct vlan_group *vg,
 static inline struct net_device *vlan_find_dev(struct net_device *real_dev,
 					       u16 vlan_id)
 {
-	struct vlan_group *grp = rcu_dereference_rtnl(real_dev->vlgrp);
+	struct vlan_info *vlan_info = rcu_dereference_rtnl(real_dev->vlan_info);
 
-	if (grp)
-		return vlan_group_get_device(grp, vlan_id);
+	if (vlan_info)
+		return vlan_group_get_device(&vlan_info->grp, vlan_id);
 
 	return NULL;
 }
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 544f9cb9678c..329e0313e01f 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -71,10 +71,10 @@ bool vlan_do_receive(struct sk_buff **skbp, bool last_handler)
 struct net_device *__vlan_find_dev_deep(struct net_device *real_dev,
 					u16 vlan_id)
 {
-	struct vlan_group *grp = rcu_dereference_rtnl(real_dev->vlgrp);
+	struct vlan_info *vlan_info = rcu_dereference_rtnl(real_dev->vlan_info);
 
-	if (grp) {
-		return vlan_group_get_device(grp, vlan_id);
+	if (vlan_info) {
+		return vlan_group_get_device(&vlan_info->grp, vlan_id);
 	} else {
 		/*
 		 * Bonding slaves do not have grp assigned to themselves.
@@ -147,25 +147,177 @@ err_free:
 	return NULL;
 }
 
-int vlan_vid_add(struct net_device *dev, unsigned short vid)
+
+/*
+ * vlan info and vid list
+ */
+
+static void vlan_group_free(struct vlan_group *grp)
+{
+	int i;
+
+	for (i = 0; i < VLAN_GROUP_ARRAY_SPLIT_PARTS; i++)
+		kfree(grp->vlan_devices_arrays[i]);
+}
+
+static void vlan_info_free(struct vlan_info *vlan_info)
+{
+	vlan_group_free(&vlan_info->grp);
+	kfree(vlan_info);
+}
+
+static void vlan_info_rcu_free(struct rcu_head *rcu)
+{
+	vlan_info_free(container_of(rcu, struct vlan_info, rcu));
+}
+
+static struct vlan_info *vlan_info_alloc(struct net_device *dev)
+{
+	struct vlan_info *vlan_info;
+
+	vlan_info = kzalloc(sizeof(struct vlan_info), GFP_KERNEL);
+	if (!vlan_info)
+		return NULL;
+
+	vlan_info->real_dev = dev;
+	INIT_LIST_HEAD(&vlan_info->vid_list);
+	return vlan_info;
+}
+
+struct vlan_vid_info {
+	struct list_head list;
+	unsigned short vid;
+	int refcount;
+};
+
+static struct vlan_vid_info *vlan_vid_info_get(struct vlan_info *vlan_info,
+					       unsigned short vid)
+{
+	struct vlan_vid_info *vid_info;
+
+	list_for_each_entry(vid_info, &vlan_info->vid_list, list) {
+		if (vid_info->vid == vid)
+			return vid_info;
+	}
+	return NULL;
+}
+
+static struct vlan_vid_info *vlan_vid_info_alloc(unsigned short vid)
+{
+	struct vlan_vid_info *vid_info;
+
+	vid_info = kzalloc(sizeof(struct vlan_vid_info), GFP_KERNEL);
+	if (!vid_info)
+		return NULL;
+	vid_info->vid = vid;
+
+	return vid_info;
+}
+
+static int __vlan_vid_add(struct vlan_info *vlan_info, unsigned short vid,
+			  struct vlan_vid_info **pvid_info)
 {
+	struct net_device *dev = vlan_info->real_dev;
 	const struct net_device_ops *ops = dev->netdev_ops;
+	struct vlan_vid_info *vid_info;
+	int err;
+
+	vid_info = vlan_vid_info_alloc(vid);
+	if (!vid_info)
+		return -ENOMEM;
 
 	if ((dev->features & NETIF_F_HW_VLAN_FILTER) &&
-	     ops->ndo_vlan_rx_add_vid) {
-		return ops->ndo_vlan_rx_add_vid(dev, vid);
+	    ops->ndo_vlan_rx_add_vid) {
+		err =  ops->ndo_vlan_rx_add_vid(dev, vid);
+		if (err) {
+			kfree(vid_info);
+			return err;
+		}
 	}
+	list_add(&vid_info->list, &vlan_info->vid_list);
+	vlan_info->nr_vids++;
+	*pvid_info = vid_info;
 	return 0;
 }
+
+int vlan_vid_add(struct net_device *dev, unsigned short vid)
+{
+	struct vlan_info *vlan_info;
+	struct vlan_vid_info *vid_info;
+	bool vlan_info_created = false;
+	int err;
+
+	ASSERT_RTNL();
+
+	vlan_info = rtnl_dereference(dev->vlan_info);
+	if (!vlan_info) {
+		vlan_info = vlan_info_alloc(dev);
+		if (!vlan_info)
+			return -ENOMEM;
+		vlan_info_created = true;
+	}
+	vid_info = vlan_vid_info_get(vlan_info, vid);
+	if (!vid_info) {
+		err = __vlan_vid_add(vlan_info, vid, &vid_info);
+		if (err)
+			goto out_free_vlan_info;
+	}
+	vid_info->refcount++;
+
+	if (vlan_info_created)
+		rcu_assign_pointer(dev->vlan_info, vlan_info);
+
+	return 0;
+
+out_free_vlan_info:
+	if (vlan_info_created)
+		kfree(vlan_info);
+	return err;
+}
 EXPORT_SYMBOL(vlan_vid_add);
 
-void vlan_vid_del(struct net_device *dev, unsigned short vid)
+static void __vlan_vid_del(struct vlan_info *vlan_info,
+			   struct vlan_vid_info *vid_info)
 {
+	struct net_device *dev = vlan_info->real_dev;
 	const struct net_device_ops *ops = dev->netdev_ops;
+	unsigned short vid = vid_info->vid;
+	int err;
 
 	if ((dev->features & NETIF_F_HW_VLAN_FILTER) &&
 	     ops->ndo_vlan_rx_kill_vid) {
-		ops->ndo_vlan_rx_kill_vid(dev, vid);
+		err = ops->ndo_vlan_rx_kill_vid(dev, vid);
+		if (err) {
+			pr_warn("failed to kill vid %d for device %s\n",
+				vid, dev->name);
+		}
+	}
+	list_del(&vid_info->list);
+	kfree(vid_info);
+	vlan_info->nr_vids--;
+}
+
+void vlan_vid_del(struct net_device *dev, unsigned short vid)
+{
+	struct vlan_info *vlan_info;
+	struct vlan_vid_info *vid_info;
+
+	ASSERT_RTNL();
+
+	vlan_info = rtnl_dereference(dev->vlan_info);
+	if (!vlan_info)
+		return;
+
+	vid_info = vlan_vid_info_get(vlan_info, vid);
+	if (!vid_info)
+		return;
+	vid_info->refcount--;
+	if (vid_info->refcount == 0) {
+		__vlan_vid_del(vlan_info, vid_info);
+		if (vlan_info->nr_vids == 0) {
+			RCU_INIT_POINTER(dev->vlan_info, NULL);
+			call_rcu(&vlan_info->rcu, vlan_info_rcu_free);
+		}
 	}
 }
 EXPORT_SYMBOL(vlan_vid_del);
-- 
cgit v1.2.3


From 348a1443cc4303c72cf1ee3b26e476fec8e7b5fa Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 8 Dec 2011 04:11:19 +0000
Subject: vlan: introduce functions to do mass addition/deletion of vids by
 another device

Introduce functions handy to copy vlan ids from one driver's list to
another.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 15 +++++++++++++++
 net/8021q/vlan_core.c   | 44 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 0c9691305298..13aff1e2183b 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -97,6 +97,10 @@ extern struct sk_buff *vlan_untag(struct sk_buff *skb);
 extern int vlan_vid_add(struct net_device *dev, unsigned short vid);
 extern void vlan_vid_del(struct net_device *dev, unsigned short vid);
 
+extern int vlan_vids_add_by_dev(struct net_device *dev,
+				const struct net_device *by_dev);
+extern void vlan_vids_del_by_dev(struct net_device *dev,
+				 const struct net_device *by_dev);
 #else
 static inline struct net_device *
 __vlan_find_dev_deep(struct net_device *real_dev, u16 vlan_id)
@@ -136,6 +140,17 @@ static inline int vlan_vid_add(struct net_device *dev, unsigned short vid)
 static inline void vlan_vid_del(struct net_device *dev, unsigned short vid)
 {
 }
+
+static inline int vlan_vids_add_by_dev(struct net_device *dev,
+				       const struct net_device *by_dev)
+{
+	return 0;
+}
+
+static inline void vlan_vids_del_by_dev(struct net_device *dev,
+					const struct net_device *by_dev)
+{
+}
 #endif
 
 /**
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 329e0313e01f..1414c931bd3f 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -321,3 +321,47 @@ void vlan_vid_del(struct net_device *dev, unsigned short vid)
 	}
 }
 EXPORT_SYMBOL(vlan_vid_del);
+
+int vlan_vids_add_by_dev(struct net_device *dev,
+			 const struct net_device *by_dev)
+{
+	struct vlan_vid_info *vid_info;
+	int err;
+
+	ASSERT_RTNL();
+
+	if (!by_dev->vlan_info)
+		return 0;
+
+	list_for_each_entry(vid_info, &by_dev->vlan_info->vid_list, list) {
+		err = vlan_vid_add(dev, vid_info->vid);
+		if (err)
+			goto unwind;
+	}
+	return 0;
+
+unwind:
+	list_for_each_entry_continue_reverse(vid_info,
+					     &by_dev->vlan_info->vid_list,
+					     list) {
+		vlan_vid_del(dev, vid_info->vid);
+	}
+
+	return err;
+}
+EXPORT_SYMBOL(vlan_vids_add_by_dev);
+
+void vlan_vids_del_by_dev(struct net_device *dev,
+			  const struct net_device *by_dev)
+{
+	struct vlan_vid_info *vid_info;
+
+	ASSERT_RTNL();
+
+	if (!by_dev->vlan_info)
+		return;
+
+	list_for_each_entry(vid_info, &by_dev->vlan_info->vid_list, list)
+		vlan_vid_del(dev, vid_info->vid);
+}
+EXPORT_SYMBOL(vlan_vids_del_by_dev);
-- 
cgit v1.2.3


From 8af2a218de38f51ea4b4fa48cac1273319ae260c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 8 Dec 2011 06:06:03 +0000
Subject: sch_red: Adaptative RED AQM

Adaptative RED AQM for linux, based on paper from Sally FLoyd,
Ramakrishna Gummadi, and Scott Shenker, August 2001 :

http://icir.org/floyd/papers/adaptiveRed.pdf

Goal of Adaptative RED is to make max_p a dynamic value between 1% and
50% to reach the target average queue : (max_th - min_th) / 2

Every 500 ms:
 if (avg > target and max_p <= 0.5)
  increase max_p : max_p += alpha;
 else if (avg < target and max_p >= 0.01)
  decrease max_p : max_p *= beta;

target :[min_th + 0.4*(min_th - max_th),
          min_th + 0.6*(min_th - max_th)].
alpha : min(0.01, max_p / 4)
beta : 0.9
max_P is a Q0.32 fixed point number (unsigned, with 32 bits mantissa)

Changes against our RED implementation are :

max_p is no longer a negative power of two (1/(2^Plog)), but a Q0.32
fixed point number, to allow full range described in Adatative paper.

To deliver a random number, we now use a reciprocal divide (thats really
a multiply), but this operation is done once per marked/droped packet
when in RED_BETWEEN_TRESH window, so added cost (compared to previous
AND operation) is near zero.

dump operation gives current max_p value in a new TCA_RED_MAX_P
attribute.

Example on a 10Mbit link :

tc qdisc add dev $DEV parent 1:1 handle 10: est 1sec 8sec red \
   limit 400000 min 30000 max 90000 avpkt 1000 \
   burst 55 ecn adaptative bandwidth 10Mbit

# tc -s -d qdisc show dev eth3
...
qdisc red 10: parent 1:1 limit 400000b min 30000b max 90000b ecn
adaptative ewma 5 max_p=0.113335 Scell_log 15
 Sent 50414282 bytes 34504 pkt (dropped 35, overlimits 1392 requeues 0)
 rate 9749Kbit 831pps backlog 72056b 16p requeues 0
  marked 1357 early 35 pdrop 0 other 0

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pkt_sched.h |   6 ++-
 include/net/red.h         | 101 ++++++++++++++++++++++++++++++++++++++--------
 lib/reciprocal_div.c      |   2 +
 net/sched/sch_red.c       |  21 ++++++++++
 4 files changed, 111 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index fb556dc594d3..e41e0d4de24b 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -181,6 +181,7 @@ enum {
 	TCA_RED_UNSPEC,
 	TCA_RED_PARMS,
 	TCA_RED_STAB,
+	TCA_RED_MAX_P,
 	__TCA_RED_MAX,
 };
 
@@ -194,8 +195,9 @@ struct tc_red_qopt {
 	unsigned char   Plog;		/* log(P_max/(qth_max-qth_min))	*/
 	unsigned char   Scell_log;	/* cell size for idle damping */
 	unsigned char	flags;
-#define TC_RED_ECN	1
-#define TC_RED_HARDDROP	2
+#define TC_RED_ECN		1
+#define TC_RED_HARDDROP		2
+#define TC_RED_ADAPTATIVE	4
 };
 
 struct tc_red_xstats {
diff --git a/include/net/red.h b/include/net/red.h
index b72a3b833936..24606b22d01e 100644
--- a/include/net/red.h
+++ b/include/net/red.h
@@ -5,6 +5,7 @@
 #include <net/pkt_sched.h>
 #include <net/inet_ecn.h>
 #include <net/dsfield.h>
+#include <linux/reciprocal_div.h>
 
 /*	Random Early Detection (RED) algorithm.
 	=======================================
@@ -87,6 +88,29 @@
 	etc.
  */
 
+/*
+ * Adaptative RED : An Algorithm for Increasing the Robustness of RED's AQM
+ * (Sally FLoyd, Ramakrishna Gummadi, and Scott Shenker) August 2001
+ *
+ * Every 500 ms:
+ *  if (avg > target and max_p <= 0.5)
+ *   increase max_p : max_p += alpha;
+ *  else if (avg < target and max_p >= 0.01)
+ *   decrease max_p : max_p *= beta;
+ *
+ * target :[qth_min + 0.4*(qth_min - qth_max),
+ *          qth_min + 0.6*(qth_min - qth_max)].
+ * alpha : min(0.01, max_p / 4)
+ * beta : 0.9
+ * max_P is a Q0.32 fixed point number (with 32 bits mantissa)
+ * max_P between 0.01 and 0.5 (1% - 50%) [ Its no longer a negative power of two ]
+ */
+#define RED_ONE_PERCENT ((u32)DIV_ROUND_CLOSEST(1ULL<<32, 100))
+
+#define MAX_P_MIN (1 * RED_ONE_PERCENT)
+#define MAX_P_MAX (50 * RED_ONE_PERCENT)
+#define MAX_P_ALPHA(val) min(MAX_P_MIN, val / 4)
+
 #define RED_STAB_SIZE	256
 #define RED_STAB_MASK	(RED_STAB_SIZE - 1)
 
@@ -101,10 +125,14 @@ struct red_stats {
 
 struct red_parms {
 	/* Parameters */
-	u32		qth_min;	/* Min avg length threshold: A scaled */
-	u32		qth_max;	/* Max avg length threshold: A scaled */
+	u32		qth_min;	/* Min avg length threshold: Wlog scaled */
+	u32		qth_max;	/* Max avg length threshold: Wlog scaled */
 	u32		Scell_max;
-	u32		Rmask;		/* Cached random mask, see red_rmask */
+	u32		max_P;		/* probability, [0 .. 1.0] 32 scaled */
+	u32		max_P_reciprocal; /* reciprocal_value(max_P / qth_delta) */
+	u32		qth_delta;	/* max_th - min_th */
+	u32		target_min;	/* min_th + 0.4*(max_th - min_th) */
+	u32		target_max;	/* min_th + 0.6*(max_th - min_th) */
 	u8		Scell_log;
 	u8		Wlog;		/* log(W)		*/
 	u8		Plog;		/* random number bits	*/
@@ -115,19 +143,22 @@ struct red_parms {
 					   number generation */
 	u32		qR;		/* Cached random number */
 
-	unsigned long	qavg;		/* Average queue length: A scaled */
+	unsigned long	qavg;		/* Average queue length: Wlog scaled */
 	ktime_t		qidlestart;	/* Start of current idle period */
 };
 
-static inline u32 red_rmask(u8 Plog)
+static inline u32 red_maxp(u8 Plog)
 {
-	return Plog < 32 ? ((1 << Plog) - 1) : ~0UL;
+	return Plog < 32 ? (~0U >> Plog) : ~0U;
 }
 
+
 static inline void red_set_parms(struct red_parms *p,
 				 u32 qth_min, u32 qth_max, u8 Wlog, u8 Plog,
 				 u8 Scell_log, u8 *stab)
 {
+	int delta = qth_max - qth_min;
+
 	/* Reset average queue length, the value is strictly bound
 	 * to the parameters below, reseting hurts a bit but leaving
 	 * it might result in an unreasonable qavg for a while. --TGR
@@ -139,14 +170,29 @@ static inline void red_set_parms(struct red_parms *p,
 	p->qth_max	= qth_max << Wlog;
 	p->Wlog		= Wlog;
 	p->Plog		= Plog;
-	p->Rmask	= red_rmask(Plog);
+	if (delta < 0)
+		delta = 1;
+	p->qth_delta	= delta;
+	p->max_P	= red_maxp(Plog);
+	p->max_P	*= delta; /* max_P = (qth_max-qth_min)/2^Plog */
+
+	p->max_P_reciprocal  = reciprocal_value(p->max_P / delta);
+
+	/* RED Adaptative target :
+	 * [min_th + 0.4*(min_th - max_th),
+	 *  min_th + 0.6*(min_th - max_th)].
+	 */
+	delta /= 5;
+	p->target_min = qth_min + 2*delta;
+	p->target_max = qth_min + 3*delta;
+
 	p->Scell_log	= Scell_log;
 	p->Scell_max	= (255 << Scell_log);
 
 	memcpy(p->Stab, stab, sizeof(p->Stab));
 }
 
-static inline int red_is_idling(struct red_parms *p)
+static inline int red_is_idling(const struct red_parms *p)
 {
 	return p->qidlestart.tv64 != 0;
 }
@@ -168,7 +214,7 @@ static inline void red_restart(struct red_parms *p)
 	p->qcount = -1;
 }
 
-static inline unsigned long red_calc_qavg_from_idle_time(struct red_parms *p)
+static inline unsigned long red_calc_qavg_from_idle_time(const struct red_parms *p)
 {
 	s64 delta = ktime_us_delta(ktime_get(), p->qidlestart);
 	long us_idle = min_t(s64, delta, p->Scell_max);
@@ -215,7 +261,7 @@ static inline unsigned long red_calc_qavg_from_idle_time(struct red_parms *p)
 	}
 }
 
-static inline unsigned long red_calc_qavg_no_idle_time(struct red_parms *p,
+static inline unsigned long red_calc_qavg_no_idle_time(const struct red_parms *p,
 						       unsigned int backlog)
 {
 	/*
@@ -230,7 +276,7 @@ static inline unsigned long red_calc_qavg_no_idle_time(struct red_parms *p,
 	return p->qavg + (backlog - (p->qavg >> p->Wlog));
 }
 
-static inline unsigned long red_calc_qavg(struct red_parms *p,
+static inline unsigned long red_calc_qavg(const struct red_parms *p,
 					  unsigned int backlog)
 {
 	if (!red_is_idling(p))
@@ -239,23 +285,24 @@ static inline unsigned long red_calc_qavg(struct red_parms *p,
 		return red_calc_qavg_from_idle_time(p);
 }
 
-static inline u32 red_random(struct red_parms *p)
+
+static inline u32 red_random(const struct red_parms *p)
 {
-	return net_random() & p->Rmask;
+	return reciprocal_divide(net_random(), p->max_P_reciprocal);
 }
 
-static inline int red_mark_probability(struct red_parms *p, unsigned long qavg)
+static inline int red_mark_probability(const struct red_parms *p, unsigned long qavg)
 {
 	/* The formula used below causes questions.
 
-	   OK. qR is random number in the interval 0..Rmask
+	   OK. qR is random number in the interval
+		(0..1/max_P)*(qth_max-qth_min)
 	   i.e. 0..(2^Plog). If we used floating point
 	   arithmetics, it would be: (2^Plog)*rnd_num,
 	   where rnd_num is less 1.
 
 	   Taking into account, that qavg have fixed
-	   point at Wlog, and Plog is related to max_P by
-	   max_P = (qth_max-qth_min)/2^Plog; two lines
+	   point at Wlog, two lines
 	   below have the following floating point equivalent:
 
 	   max_P*(qavg - qth_min)/(qth_max-qth_min) < rnd/qcount
@@ -315,4 +362,24 @@ static inline int red_action(struct red_parms *p, unsigned long qavg)
 	return RED_DONT_MARK;
 }
 
+static inline void red_adaptative_algo(struct red_parms *p)
+{
+	unsigned long qavg;
+	u32 max_p_delta;
+
+	qavg = p->qavg;
+	if (red_is_idling(p))
+		qavg = red_calc_qavg_from_idle_time(p);
+
+	/* p->qavg is fixed point number with point at Wlog */
+	qavg >>= p->Wlog;
+
+	if (qavg > p->target_max && p->max_P <= MAX_P_MAX)
+		p->max_P += MAX_P_ALPHA(p->max_P); /* maxp = maxp + alpha */
+	else if (qavg < p->target_min && p->max_P >= MAX_P_MIN)
+		p->max_P = (p->max_P/10)*9; /* maxp = maxp * Beta */
+
+	max_p_delta = DIV_ROUND_CLOSEST(p->max_P, p->qth_delta);
+	p->max_P_reciprocal = reciprocal_value(max_p_delta);
+}
 #endif
diff --git a/lib/reciprocal_div.c b/lib/reciprocal_div.c
index 6a3bd48fa2a0..75510e94f7d0 100644
--- a/lib/reciprocal_div.c
+++ b/lib/reciprocal_div.c
@@ -1,5 +1,6 @@
 #include <asm/div64.h>
 #include <linux/reciprocal_div.h>
+#include <linux/export.h>
 
 u32 reciprocal_value(u32 k)
 {
@@ -7,3 +8,4 @@ u32 reciprocal_value(u32 k)
 	do_div(val, k);
 	return (u32)val;
 }
+EXPORT_SYMBOL(reciprocal_value);
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index d617161f8dd3..8f5a85bf9d10 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -39,6 +39,7 @@
 struct red_sched_data {
 	u32			limit;		/* HARD maximal queue length */
 	unsigned char		flags;
+	struct timer_list	adapt_timer;
 	struct red_parms	parms;
 	struct red_stats	stats;
 	struct Qdisc		*qdisc;
@@ -161,6 +162,8 @@ static void red_reset(struct Qdisc *sch)
 static void red_destroy(struct Qdisc *sch)
 {
 	struct red_sched_data *q = qdisc_priv(sch);
+
+	del_timer_sync(&q->adapt_timer);
 	qdisc_destroy(q->qdisc);
 }
 
@@ -209,6 +212,10 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
 				 ctl->Plog, ctl->Scell_log,
 				 nla_data(tb[TCA_RED_STAB]));
 
+	del_timer(&q->adapt_timer);
+	if (ctl->flags & TC_RED_ADAPTATIVE)
+		mod_timer(&q->adapt_timer, jiffies + HZ/2);
+
 	if (!q->qdisc->q.qlen)
 		red_start_of_idle_period(&q->parms);
 
@@ -216,11 +223,24 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
 	return 0;
 }
 
+static inline void red_adaptative_timer(unsigned long arg)
+{
+	struct Qdisc *sch = (struct Qdisc *)arg;
+	struct red_sched_data *q = qdisc_priv(sch);
+	spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch));
+
+	spin_lock(root_lock);
+	red_adaptative_algo(&q->parms);
+	mod_timer(&q->adapt_timer, jiffies + HZ/2);
+	spin_unlock(root_lock);
+}
+
 static int red_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct red_sched_data *q = qdisc_priv(sch);
 
 	q->qdisc = &noop_qdisc;
+	setup_timer(&q->adapt_timer, red_adaptative_timer, (unsigned long)sch);
 	return red_change(sch, opt);
 }
 
@@ -243,6 +263,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (opts == NULL)
 		goto nla_put_failure;
 	NLA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt);
+	NLA_PUT_U32(skb, TCA_RED_MAX_P, q->parms.max_P);
 	return nla_nest_end(skb, opts);
 
 nla_put_failure:
-- 
cgit v1.2.3


From a73ed26bbae7327370c5bd298f07de78df9e3466 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 9 Dec 2011 02:46:45 +0000
Subject: sch_red: generalize accurate MAX_P support to RED/GRED/CHOKE

Now RED uses a Q0.32 number to store max_p (max probability), allow
RED/GRED/CHOKE to use/report full resolution at config/dump time.

Old tc binaries are non aware of new attributes, and still set/get Plog.

New tc binary set/get both Plog and max_p for backward compatibility,
they display "probability value" if they get max_p from new kernels.

# tc -d  qdisc show dev ...
...
qdisc red 10: parent 1:1 limit 360Kb min 30Kb max 90Kb ecn ewma 5
probability 0.09 Scell_log 15

Make sure we avoid potential divides by 0 in reciprocal_value(), if
(max_th - min_th) is big.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pkt_sched.h |  2 ++
 include/net/red.h         | 16 +++++++++++-----
 net/sched/sch_choke.c     |  8 +++++++-
 net/sched/sch_gred.c      | 22 ++++++++++++++++++----
 net/sched/sch_red.c       |  9 +++++++--
 5 files changed, 45 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index e41e0d4de24b..8786ea741f52 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -216,6 +216,7 @@ enum {
        TCA_GRED_PARMS,
        TCA_GRED_STAB,
        TCA_GRED_DPS,
+       TCA_GRED_MAX_P,
 	   __TCA_GRED_MAX,
 };
 
@@ -255,6 +256,7 @@ enum {
 	TCA_CHOKE_UNSPEC,
 	TCA_CHOKE_PARMS,
 	TCA_CHOKE_STAB,
+	TCA_CHOKE_MAX_P,
 	__TCA_CHOKE_MAX,
 };
 
diff --git a/include/net/red.h b/include/net/red.h
index 24606b22d01e..ef715a16cce4 100644
--- a/include/net/red.h
+++ b/include/net/red.h
@@ -155,9 +155,10 @@ static inline u32 red_maxp(u8 Plog)
 
 static inline void red_set_parms(struct red_parms *p,
 				 u32 qth_min, u32 qth_max, u8 Wlog, u8 Plog,
-				 u8 Scell_log, u8 *stab)
+				 u8 Scell_log, u8 *stab, u32 max_P)
 {
 	int delta = qth_max - qth_min;
+	u32 max_p_delta;
 
 	/* Reset average queue length, the value is strictly bound
 	 * to the parameters below, reseting hurts a bit but leaving
@@ -173,10 +174,14 @@ static inline void red_set_parms(struct red_parms *p,
 	if (delta < 0)
 		delta = 1;
 	p->qth_delta	= delta;
-	p->max_P	= red_maxp(Plog);
-	p->max_P	*= delta; /* max_P = (qth_max-qth_min)/2^Plog */
-
-	p->max_P_reciprocal  = reciprocal_value(p->max_P / delta);
+	if (!max_P) {
+		max_P = red_maxp(Plog);
+		max_P *= delta; /* max_P = (qth_max - qth_min)/2^Plog */
+	}
+	p->max_P = max_P;
+	max_p_delta = max_P / delta;
+	max_p_delta = max(max_p_delta, 1U);
+	p->max_P_reciprocal  = reciprocal_value(max_p_delta);
 
 	/* RED Adaptative target :
 	 * [min_th + 0.4*(min_th - max_th),
@@ -380,6 +385,7 @@ static inline void red_adaptative_algo(struct red_parms *p)
 		p->max_P = (p->max_P/10)*9; /* maxp = maxp * Beta */
 
 	max_p_delta = DIV_ROUND_CLOSEST(p->max_P, p->qth_delta);
+	max_p_delta = max(max_p_delta, 1U);
 	p->max_P_reciprocal = reciprocal_value(max_p_delta);
 }
 #endif
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 205d369a217c..bef00acb8bd2 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -394,6 +394,7 @@ static void choke_reset(struct Qdisc *sch)
 static const struct nla_policy choke_policy[TCA_CHOKE_MAX + 1] = {
 	[TCA_CHOKE_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
 	[TCA_CHOKE_STAB]	= { .len = RED_STAB_SIZE },
+	[TCA_CHOKE_MAX_P]	= { .type = NLA_U32 },
 };
 
 
@@ -415,6 +416,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
 	int err;
 	struct sk_buff **old = NULL;
 	unsigned int mask;
+	u32 max_P;
 
 	if (opt == NULL)
 		return -EINVAL;
@@ -427,6 +429,8 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
 	    tb[TCA_CHOKE_STAB] == NULL)
 		return -EINVAL;
 
+	max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
+
 	ctl = nla_data(tb[TCA_CHOKE_PARMS]);
 
 	if (ctl->limit > CHOKE_MAX_QUEUE)
@@ -476,7 +480,8 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt)
 
 	red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
 		      ctl->Plog, ctl->Scell_log,
-		      nla_data(tb[TCA_CHOKE_STAB]));
+		      nla_data(tb[TCA_CHOKE_STAB]),
+		      max_P);
 
 	if (q->head == q->tail)
 		red_end_of_idle_period(&q->parms);
@@ -510,6 +515,7 @@ static int choke_dump(struct Qdisc *sch, struct sk_buff *skb)
 		goto nla_put_failure;
 
 	NLA_PUT(skb, TCA_CHOKE_PARMS, sizeof(opt), &opt);
+	NLA_PUT_U32(skb, TCA_CHOKE_MAX_P, q->parms.max_P);
 	return nla_nest_end(skb, opts);
 
 nla_put_failure:
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index b9493a09a870..a1b7407ac2a4 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -34,7 +34,7 @@ struct gred_sched;
 
 struct gred_sched_data {
 	u32		limit;		/* HARD maximal queue length	*/
-	u32      	DP;		/* the drop pramaters */
+	u32		DP;		/* the drop parameters */
 	u32		bytesin;	/* bytes seen on virtualQ so far*/
 	u32		packetsin;	/* packets seen on virtualQ so far*/
 	u32		backlog;	/* bytes on the virtualQ */
@@ -379,7 +379,8 @@ static inline int gred_change_table_def(struct Qdisc *sch, struct nlattr *dps)
 }
 
 static inline int gred_change_vq(struct Qdisc *sch, int dp,
-				 struct tc_gred_qopt *ctl, int prio, u8 *stab)
+				 struct tc_gred_qopt *ctl, int prio,
+				 u8 *stab, u32 max_P)
 {
 	struct gred_sched *table = qdisc_priv(sch);
 	struct gred_sched_data *q;
@@ -400,7 +401,7 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp,
 
 	red_set_parms(&q->parms,
 		      ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Plog,
-		      ctl->Scell_log, stab);
+		      ctl->Scell_log, stab, max_P);
 
 	return 0;
 }
@@ -409,6 +410,7 @@ static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = {
 	[TCA_GRED_PARMS]	= { .len = sizeof(struct tc_gred_qopt) },
 	[TCA_GRED_STAB]		= { .len = 256 },
 	[TCA_GRED_DPS]		= { .len = sizeof(struct tc_gred_sopt) },
+	[TCA_GRED_MAX_P]	= { .type = NLA_U32 },
 };
 
 static int gred_change(struct Qdisc *sch, struct nlattr *opt)
@@ -418,6 +420,7 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
 	struct nlattr *tb[TCA_GRED_MAX + 1];
 	int err, prio = GRED_DEF_PRIO;
 	u8 *stab;
+	u32 max_P;
 
 	if (opt == NULL)
 		return -EINVAL;
@@ -433,6 +436,8 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
 	    tb[TCA_GRED_STAB] == NULL)
 		return -EINVAL;
 
+	max_P = tb[TCA_GRED_MAX_P] ? nla_get_u32(tb[TCA_GRED_MAX_P]) : 0;
+
 	err = -EINVAL;
 	ctl = nla_data(tb[TCA_GRED_PARMS]);
 	stab = nla_data(tb[TCA_GRED_STAB]);
@@ -457,7 +462,7 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
 
 	sch_tree_lock(sch);
 
-	err = gred_change_vq(sch, ctl->DP, ctl, prio, stab);
+	err = gred_change_vq(sch, ctl->DP, ctl, prio, stab, max_P);
 	if (err < 0)
 		goto errout_locked;
 
@@ -498,6 +503,7 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
 	struct gred_sched *table = qdisc_priv(sch);
 	struct nlattr *parms, *opts = NULL;
 	int i;
+	u32 max_p[MAX_DPs];
 	struct tc_gred_sopt sopt = {
 		.DPs	= table->DPs,
 		.def_DP	= table->def,
@@ -509,6 +515,14 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (opts == NULL)
 		goto nla_put_failure;
 	NLA_PUT(skb, TCA_GRED_DPS, sizeof(sopt), &sopt);
+
+	for (i = 0; i < MAX_DPs; i++) {
+		struct gred_sched_data *q = table->tab[i];
+
+		max_p[i] = q ? q->parms.max_P : 0;
+	}
+	NLA_PUT(skb, TCA_GRED_MAX_P, sizeof(max_p), max_p);
+
 	parms = nla_nest_start(skb, TCA_GRED_PARMS);
 	if (parms == NULL)
 		goto nla_put_failure;
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 8f5a85bf9d10..ce2256a17d7e 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -170,6 +170,7 @@ static void red_destroy(struct Qdisc *sch)
 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
 	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
 	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
+	[TCA_RED_MAX_P] = { .type = NLA_U32 },
 };
 
 static int red_change(struct Qdisc *sch, struct nlattr *opt)
@@ -179,6 +180,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
 	struct tc_red_qopt *ctl;
 	struct Qdisc *child = NULL;
 	int err;
+	u32 max_P;
 
 	if (opt == NULL)
 		return -EINVAL;
@@ -191,6 +193,8 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
 	    tb[TCA_RED_STAB] == NULL)
 		return -EINVAL;
 
+	max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
+
 	ctl = nla_data(tb[TCA_RED_PARMS]);
 
 	if (ctl->limit > 0) {
@@ -209,8 +213,9 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt)
 	}
 
 	red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
-				 ctl->Plog, ctl->Scell_log,
-				 nla_data(tb[TCA_RED_STAB]));
+		      ctl->Plog, ctl->Scell_log,
+		      nla_data(tb[TCA_RED_STAB]),
+		      max_P);
 
 	del_timer(&q->adapt_timer);
 	if (ctl->flags & TC_RED_ADAPTATIVE)
-- 
cgit v1.2.3


From 7b35eadd7eee2e0b42421ce3efbc30f1c3c745e5 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Fri, 9 Dec 2011 06:21:16 +0000
Subject: inet_diag: Remove indirect sizeof from inet diag handlers

There's an info_size value stored on inet_diag_handler, but for existing
code this value is effectively constant, so just use sizeof(struct tcp_info)
where required.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h | 1 -
 net/dccp/diag.c           | 1 -
 net/ipv4/inet_diag.c      | 5 ++---
 net/ipv4/tcp_diag.c       | 1 -
 4 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index defe8ff36df8..851feff0747f 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -141,7 +141,6 @@ struct inet_diag_handler {
 	void			(*idiag_get_info)(struct sock *sk,
 						  struct inet_diag_msg *r,
 						  void *info);
-	__u16                   idiag_info_size;
 	__u16                   idiag_type;
 };
 
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
index 424dcd8415d7..9343f52db284 100644
--- a/net/dccp/diag.c
+++ b/net/dccp/diag.c
@@ -52,7 +52,6 @@ static const struct inet_diag_handler dccp_diag_handler = {
 	.idiag_hashinfo	 = &dccp_hashinfo,
 	.idiag_get_info	 = dccp_diag_get_info,
 	.idiag_type	 = IPPROTO_DCCP,
-	.idiag_info_size = sizeof(struct tcp_info),
 };
 
 static int __init dccp_diag_init(void)
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index b56b7ba8beeb..a247f85571c4 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -98,8 +98,7 @@ static int inet_csk_diag_fill(struct sock *sk,
 		minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO, sizeof(*minfo));
 
 	if (ext & (1 << (INET_DIAG_INFO - 1)))
-		info = INET_DIAG_PUT(skb, INET_DIAG_INFO,
-				     handler->idiag_info_size);
+		info = INET_DIAG_PUT(skb, INET_DIAG_INFO, sizeof(struct tcp_info));
 
 	if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) {
 		const size_t len = strlen(icsk->icsk_ca_ops->name);
@@ -299,7 +298,7 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
 	err = -ENOMEM;
 	rep = alloc_skb(NLMSG_SPACE((sizeof(struct inet_diag_msg) +
 				     sizeof(struct inet_diag_meminfo) +
-				     handler->idiag_info_size + 64)),
+				     sizeof(struct tcp_info) + 64)),
 			GFP_KERNEL);
 	if (!rep)
 		goto out;
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 981497795d49..42e6bec7bd3e 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -38,7 +38,6 @@ static const struct inet_diag_handler tcp_diag_handler = {
 	.idiag_hashinfo	 = &tcp_hashinfo,
 	.idiag_get_info	 = tcp_diag_get_info,
 	.idiag_type	 = IPPROTO_TCP,
-	.idiag_info_size = sizeof(struct tcp_info),
 };
 
 static int __init tcp_diag_init(void)
-- 
cgit v1.2.3


From b005ab4ef8805dc4604848c9d2ccca9d71f8fc46 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Fri, 9 Dec 2011 06:21:53 +0000
Subject: inet_diag: Export inet diag cookie checking routine

The netlink diag susbsys stores sk address bits in the nl message
as a "cookie" and uses one when dumps details about particular
socket.

The same will be required for udp diag module, so introduce a heler
in inet_diag module

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h |  2 ++
 net/ipv4/inet_diag.c      | 19 ++++++++++++++-----
 2 files changed, 16 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 851feff0747f..503674738368 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -144,6 +144,8 @@ struct inet_diag_handler {
 	__u16                   idiag_type;
 };
 
+int inet_diag_check_cookie(struct sock *sk, struct inet_diag_req *req);
+
 extern int  inet_diag_register(const struct inet_diag_handler *handler);
 extern void inet_diag_unregister(const struct inet_diag_handler *handler);
 #endif /* __KERNEL__ */
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index bd3f661803a7..ba3ae1f73abf 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -246,6 +246,18 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 	return inet_csk_diag_fill(sk, skb, r, pid, seq, nlmsg_flags, unlh);
 }
 
+int inet_diag_check_cookie(struct sock *sk, struct inet_diag_req *req)
+{
+	if ((req->id.idiag_cookie[0] != INET_DIAG_NOCOOKIE ||
+	     req->id.idiag_cookie[1] != INET_DIAG_NOCOOKIE) &&
+	    ((u32)(unsigned long)sk != req->id.idiag_cookie[0] ||
+	     (u32)((((unsigned long)sk) >> 31) >> 1) != req->id.idiag_cookie[1]))
+		return -ESTALE;
+	else
+		return 0;
+}
+EXPORT_SYMBOL_GPL(inet_diag_check_cookie);
+
 static int inet_diag_get_exact(struct sk_buff *in_skb,
 			       const struct nlmsghdr *nlh,
 			       struct inet_diag_req *req)
@@ -288,11 +300,8 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
 	if (sk == NULL)
 		goto unlock;
 
-	err = -ESTALE;
-	if ((req->id.idiag_cookie[0] != INET_DIAG_NOCOOKIE ||
-	     req->id.idiag_cookie[1] != INET_DIAG_NOCOOKIE) &&
-	    ((u32)(unsigned long)sk != req->id.idiag_cookie[0] ||
-	     (u32)((((unsigned long)sk) >> 31) >> 1) != req->id.idiag_cookie[1]))
+	err = inet_diag_check_cookie(sk, req);
+	if (err)
 		goto out;
 
 	err = -ENOMEM;
-- 
cgit v1.2.3


From 8d07d1518a074a08b90be02eee5ee15e60ac9779 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Fri, 9 Dec 2011 06:22:44 +0000
Subject: inet_diag: Introduce the byte-code run on an inet socket

The upcoming UDP module will require exactly this ability, so just
move the existing code to provide one.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h |  2 ++
 net/ipv4/inet_diag.c      | 55 ++++++++++++++++++++++++++---------------------
 2 files changed, 33 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 503674738368..907c899bd41b 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -135,6 +135,7 @@ struct tcpvegas_info {
 #ifdef __KERNEL__
 struct sock;
 struct inet_hashinfo;
+struct nlattr;
 
 struct inet_diag_handler {
 	struct inet_hashinfo    *idiag_hashinfo;
@@ -144,6 +145,7 @@ struct inet_diag_handler {
 	__u16                   idiag_type;
 };
 
+int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
 int inet_diag_check_cookie(struct sock *sk, struct inet_diag_req *req);
 
 extern int  inet_diag_register(const struct inet_diag_handler *handler);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index f50df2ed9af5..08e54989b041 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -449,6 +449,35 @@ static int inet_diag_bc_run(const struct nlattr *_bc,
 	return len == 0;
 }
 
+int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
+{
+	struct inet_diag_entry entry;
+	struct inet_sock *inet = inet_sk(sk);
+
+	if (bc == NULL)
+		return 1;
+
+	entry.family = sk->sk_family;
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+	if (entry.family == AF_INET6) {
+		struct ipv6_pinfo *np = inet6_sk(sk);
+
+		entry.saddr = np->rcv_saddr.s6_addr32;
+		entry.daddr = np->daddr.s6_addr32;
+	} else
+#endif
+	{
+		entry.saddr = &inet->inet_rcv_saddr;
+		entry.daddr = &inet->inet_daddr;
+	}
+	entry.sport = inet->inet_num;
+	entry.dport = ntohs(inet->inet_dport);
+	entry.userlocks = sk->sk_userlocks;
+
+	return inet_diag_bc_run(bc, &entry);
+}
+EXPORT_SYMBOL_GPL(inet_diag_bc_sk);
+
 static int valid_cc(const void *bc, int len, int cc)
 {
 	while (len >= 0) {
@@ -509,30 +538,8 @@ static int inet_csk_diag_dump(struct sock *sk,
 			      struct inet_diag_req *r,
 			      const struct nlattr *bc)
 {
-	if (bc != NULL) {
-		struct inet_diag_entry entry;
-		struct inet_sock *inet = inet_sk(sk);
-
-		entry.family = sk->sk_family;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
-		if (entry.family == AF_INET6) {
-			struct ipv6_pinfo *np = inet6_sk(sk);
-
-			entry.saddr = np->rcv_saddr.s6_addr32;
-			entry.daddr = np->daddr.s6_addr32;
-		} else
-#endif
-		{
-			entry.saddr = &inet->inet_rcv_saddr;
-			entry.daddr = &inet->inet_daddr;
-		}
-		entry.sport = inet->inet_num;
-		entry.dport = ntohs(inet->inet_dport);
-		entry.userlocks = sk->sk_userlocks;
-
-		if (!inet_diag_bc_run(bc, &entry))
-			return 0;
-	}
+	if (!inet_diag_bc_sk(bc, sk))
+		return 0;
 
 	return inet_csk_diag_fill(sk, skb, r,
 				  NETLINK_CB(cb->skb).pid,
-- 
cgit v1.2.3


From 3c4d05c8056724aff3abc20650807dd828fded54 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Fri, 9 Dec 2011 06:23:00 +0000
Subject: inet_diag: Introduce the inet socket dumping routine

The existing inet_csk_diag_fill dumps the inet connection sock info
into the netlink inet_diag_message. Prepare this routine to be able
to dump only the inet_sock part of a socket if the icsk part is missing.

This will be used by UDP diag module when dumping UDP sockets.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h |  7 +++++++
 net/ipv4/inet_diag.c      | 53 ++++++++++++++++++++++++++++++-----------------
 2 files changed, 41 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 907c899bd41b..eaf5865c9e8a 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -136,6 +136,8 @@ struct tcpvegas_info {
 struct sock;
 struct inet_hashinfo;
 struct nlattr;
+struct nlmsghdr;
+struct sk_buff;
 
 struct inet_diag_handler {
 	struct inet_hashinfo    *idiag_hashinfo;
@@ -145,6 +147,11 @@ struct inet_diag_handler {
 	__u16                   idiag_type;
 };
 
+struct inet_connection_sock;
+int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
+			      struct sk_buff *skb, struct inet_diag_req *req,
+			      u32 pid, u32 seq, u16 nlmsg_flags,
+			      const struct nlmsghdr *unlh);
 int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
 int inet_diag_check_cookie(struct sock *sk, struct inet_diag_req *req);
 
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 08e54989b041..dc8611e3e66f 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -70,13 +70,12 @@ static inline void inet_diag_unlock_handler(
 	mutex_unlock(&inet_diag_table_mutex);
 }
 
-static int inet_csk_diag_fill(struct sock *sk,
+int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 			      struct sk_buff *skb, struct inet_diag_req *req,
 			      u32 pid, u32 seq, u16 nlmsg_flags,
 			      const struct nlmsghdr *unlh)
 {
 	const struct inet_sock *inet = inet_sk(sk);
-	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct inet_diag_msg *r;
 	struct nlmsghdr  *nlh;
 	void *info = NULL;
@@ -97,16 +96,6 @@ static int inet_csk_diag_fill(struct sock *sk,
 	if (ext & (1 << (INET_DIAG_MEMINFO - 1)))
 		minfo = INET_DIAG_PUT(skb, INET_DIAG_MEMINFO, sizeof(*minfo));
 
-	if (ext & (1 << (INET_DIAG_INFO - 1)))
-		info = INET_DIAG_PUT(skb, INET_DIAG_INFO, sizeof(struct tcp_info));
-
-	if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) {
-		const size_t len = strlen(icsk->icsk_ca_ops->name);
-
-		strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1),
-		       icsk->icsk_ca_ops->name);
-	}
-
 	r->idiag_family = sk->sk_family;
 	r->idiag_state = sk->sk_state;
 	r->idiag_timer = 0;
@@ -138,6 +127,21 @@ static int inet_csk_diag_fill(struct sock *sk,
 	}
 #endif
 
+	r->idiag_uid = sock_i_uid(sk);
+	r->idiag_inode = sock_i_ino(sk);
+
+	if (minfo) {
+		minfo->idiag_rmem = sk_rmem_alloc_get(sk);
+		minfo->idiag_wmem = sk->sk_wmem_queued;
+		minfo->idiag_fmem = sk->sk_forward_alloc;
+		minfo->idiag_tmem = sk_wmem_alloc_get(sk);
+	}
+
+	if (icsk == NULL) {
+		r->idiag_rqueue = r->idiag_wqueue = 0;
+		goto out;
+	}
+
 #define EXPIRES_IN_MS(tmo)  DIV_ROUND_UP((tmo - jiffies) * 1000, HZ)
 
 	if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
@@ -158,14 +162,14 @@ static int inet_csk_diag_fill(struct sock *sk,
 	}
 #undef EXPIRES_IN_MS
 
-	r->idiag_uid = sock_i_uid(sk);
-	r->idiag_inode = sock_i_ino(sk);
+	if (ext & (1 << (INET_DIAG_INFO - 1)))
+		info = INET_DIAG_PUT(skb, INET_DIAG_INFO, sizeof(struct tcp_info));
 
-	if (minfo) {
-		minfo->idiag_rmem = sk_rmem_alloc_get(sk);
-		minfo->idiag_wmem = sk->sk_wmem_queued;
-		minfo->idiag_fmem = sk->sk_forward_alloc;
-		minfo->idiag_tmem = sk_wmem_alloc_get(sk);
+	if ((ext & (1 << (INET_DIAG_CONG - 1))) && icsk->icsk_ca_ops) {
+		const size_t len = strlen(icsk->icsk_ca_ops->name);
+
+		strcpy(INET_DIAG_PUT(skb, INET_DIAG_CONG, len + 1),
+		       icsk->icsk_ca_ops->name);
 	}
 
 	handler->idiag_get_info(sk, r, info);
@@ -174,6 +178,7 @@ static int inet_csk_diag_fill(struct sock *sk,
 	    icsk->icsk_ca_ops && icsk->icsk_ca_ops->get_info)
 		icsk->icsk_ca_ops->get_info(sk, ext, skb);
 
+out:
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
@@ -182,6 +187,16 @@ nlmsg_failure:
 	nlmsg_trim(skb, b);
 	return -EMSGSIZE;
 }
+EXPORT_SYMBOL_GPL(inet_sk_diag_fill);
+
+static int inet_csk_diag_fill(struct sock *sk,
+			      struct sk_buff *skb, struct inet_diag_req *req,
+			      u32 pid, u32 seq, u16 nlmsg_flags,
+			      const struct nlmsghdr *unlh)
+{
+	return inet_sk_diag_fill(sk, inet_csk(sk),
+			skb, req, pid, seq, nlmsg_flags, unlh);
+}
 
 static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
 			       struct sk_buff *skb, struct inet_diag_req *req,
-- 
cgit v1.2.3


From 1942c518ca017f376b267a7c5e78c15d37202442 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Fri, 9 Dec 2011 06:23:18 +0000
Subject: inet_diag: Generalize inet_diag dump and get_exact calls

Introduce two callbacks in inet_diag_handler -- one for dumping all
sockets (with filters) and the other one for dumping a single sk.

Replace direct calls to icsk handlers with indirect calls to callbacks
provided by handlers.

Make existing TCP and DCCP handlers use provided helpers for icsk-s.

The UDP diag module will provide its own.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h | 18 +++++++++++++++++-
 net/dccp/diag.c           | 15 ++++++++++++++-
 net/ipv4/inet_diag.c      | 11 ++++++-----
 net/ipv4/tcp_diag.c       | 15 ++++++++++++++-
 4 files changed, 51 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index eaf5865c9e8a..78972a149dff 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -138,9 +138,18 @@ struct inet_hashinfo;
 struct nlattr;
 struct nlmsghdr;
 struct sk_buff;
+struct netlink_callback;
 
 struct inet_diag_handler {
-	struct inet_hashinfo    *idiag_hashinfo;
+	void			(*dump)(struct sk_buff *skb,
+					struct netlink_callback *cb,
+					struct inet_diag_req *r,
+					struct nlattr *bc);
+
+	int			(*dump_one)(struct sk_buff *in_skb,
+					const struct nlmsghdr *nlh,
+					struct inet_diag_req *req);
+
 	void			(*idiag_get_info)(struct sock *sk,
 						  struct inet_diag_msg *r,
 						  void *info);
@@ -152,6 +161,13 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 			      struct sk_buff *skb, struct inet_diag_req *req,
 			      u32 pid, u32 seq, u16 nlmsg_flags,
 			      const struct nlmsghdr *unlh);
+void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb,
+		struct netlink_callback *cb, struct inet_diag_req *r,
+		struct nlattr *bc);
+int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
+		struct sk_buff *in_skb, const struct nlmsghdr *nlh,
+		struct inet_diag_req *req);
+
 int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
 int inet_diag_check_cookie(struct sock *sk, struct inet_diag_req *req);
 
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
index 9343f52db284..e29214d193d6 100644
--- a/net/dccp/diag.c
+++ b/net/dccp/diag.c
@@ -48,8 +48,21 @@ static void dccp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
 		dccp_get_info(sk, _info);
 }
 
+static void dccp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
+		struct inet_diag_req *r, struct nlattr *bc)
+{
+	inet_diag_dump_icsk(&dccp_hashinfo, skb, cb, r, bc);
+}
+
+static int dccp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
+		struct inet_diag_req *req)
+{
+	return inet_diag_dump_one_icsk(&dccp_hashinfo, in_skb, nlh, req);
+}
+
 static const struct inet_diag_handler dccp_diag_handler = {
-	.idiag_hashinfo	 = &dccp_hashinfo,
+	.dump		 = dccp_diag_dump,
+	.dump_one	 = dccp_diag_dump_one,
 	.idiag_get_info	 = dccp_diag_get_info,
 	.idiag_type	 = IPPROTO_DCCP,
 };
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index dc8611e3e66f..9b3e0b179cd2 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -273,7 +273,7 @@ int inet_diag_check_cookie(struct sock *sk, struct inet_diag_req *req)
 }
 EXPORT_SYMBOL_GPL(inet_diag_check_cookie);
 
-static int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb,
+int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb,
 		const struct nlmsghdr *nlh, struct inet_diag_req *req)
 {
 	int err;
@@ -339,6 +339,7 @@ out:
 out_nosk:
 	return err;
 }
+EXPORT_SYMBOL_GPL(inet_diag_dump_one_icsk);
 
 static int inet_diag_get_exact(struct sk_buff *in_skb,
 			       const struct nlmsghdr *nlh,
@@ -351,8 +352,7 @@ static int inet_diag_get_exact(struct sk_buff *in_skb,
 	if (IS_ERR(handler))
 		err = PTR_ERR(handler);
 	else
-		err = inet_diag_dump_one_icsk(handler->idiag_hashinfo,
-				in_skb, nlh, req);
+		err = handler->dump_one(in_skb, nlh, req);
 	inet_diag_unlock_handler(handler);
 
 	return err;
@@ -731,7 +731,7 @@ out:
 	return err;
 }
 
-static void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
+void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb,
 		struct netlink_callback *cb, struct inet_diag_req *r, struct nlattr *bc)
 {
 	int i, num;
@@ -880,6 +880,7 @@ done:
 out:
 	;
 }
+EXPORT_SYMBOL_GPL(inet_diag_dump_icsk);
 
 static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
 		struct inet_diag_req *r, struct nlattr *bc)
@@ -888,7 +889,7 @@ static int __inet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
 
 	handler = inet_diag_lock_handler(r->sdiag_protocol);
 	if (!IS_ERR(handler))
-		inet_diag_dump_icsk(handler->idiag_hashinfo, skb, cb, r, bc);
+		handler->dump(skb, cb, r, bc);
 	inet_diag_unlock_handler(handler);
 
 	return skb->len;
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 42e6bec7bd3e..6334b1f71f2d 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -34,8 +34,21 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
 		tcp_get_info(sk, info);
 }
 
+static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
+		struct inet_diag_req *r, struct nlattr *bc)
+{
+	inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r, bc);
+}
+
+static int tcp_diag_dump_one(struct sk_buff *in_skb, const struct nlmsghdr *nlh,
+		struct inet_diag_req *req)
+{
+	return inet_diag_dump_one_icsk(&tcp_hashinfo, in_skb, nlh, req);
+}
+
 static const struct inet_diag_handler tcp_diag_handler = {
-	.idiag_hashinfo	 = &tcp_hashinfo,
+	.dump		 = tcp_diag_dump,
+	.dump_one	 = tcp_diag_dump_one,
 	.idiag_get_info	 = tcp_diag_get_info,
 	.idiag_type	 = IPPROTO_TCP,
 };
-- 
cgit v1.2.3


From dfd56b8b38fff3586f36232db58e1e9f7885a605 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 10 Dec 2011 09:48:31 +0000
Subject: net: use IS_ENABLED(CONFIG_IPV6)

Instead of testing defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/errqueue.h               |  4 ++--
 include/linux/ipv6.h                   |  4 ++--
 include/linux/lockd/lockd.h            |  6 +++---
 include/linux/sunrpc/clnt.h            |  6 +++---
 include/net/inet6_hashtables.h         |  4 ++--
 include/net/inet_sock.h                |  6 +++---
 include/net/ip.h                       |  6 +++---
 include/net/net_namespace.h            |  2 +-
 include/net/netfilter/nf_tproxy_core.h |  2 +-
 include/net/netns/mib.h                |  2 +-
 include/net/netns/xfrm.h               |  2 +-
 include/net/protocol.h                 |  8 ++++----
 include/net/sctp/sctp.h                |  4 ++--
 include/net/sctp/structs.h             |  2 +-
 include/net/tcp.h                      |  6 +++---
 include/net/udp.h                      |  4 ++--
 net/bridge/br_multicast.c              | 32 ++++++++++++++++----------------
 net/bridge/br_private.h                |  2 +-
 net/core/secure_seq.c                  |  4 ++--
 net/dccp/dccp.h                        |  2 +-
 net/dccp/minisocks.c                   |  2 +-
 net/ipv4/inet_connection_sock.c        |  2 +-
 net/ipv4/inet_diag.c                   | 16 ++++++++--------
 net/ipv4/ip_gre.c                      |  8 ++++----
 net/ipv4/ip_sockglue.c                 |  6 +++---
 net/ipv4/tcp_input.c                   |  2 +-
 net/ipv4/tcp_minisocks.c               |  2 +-
 net/ipv4/tcp_timer.c                   |  2 +-
 net/ipv4/tunnel4.c                     | 10 +++++-----
 net/ipv4/xfrm4_tunnel.c                |  6 +++---
 net/key/af_key.c                       | 18 +++++++++---------
 net/netfilter/xt_TEE.c                 |  9 +++------
 net/netlabel/netlabel_addrlist.c       |  8 ++++----
 net/netlabel/netlabel_addrlist.h       |  2 +-
 net/netlabel/netlabel_domainhash.c     | 20 ++++++++++----------
 net/netlabel/netlabel_domainhash.h     |  2 +-
 net/netlabel/netlabel_kapi.c           | 18 +++++++++---------
 net/netlabel/netlabel_mgmt.c           |  6 +++---
 net/netlabel/netlabel_unlabeled.c      | 26 +++++++++++++-------------
 net/sctp/input.c                       |  2 +-
 net/sctp/protocol.c                    |  2 +-
 net/sctp/socket.c                      |  4 ++--
 net/sunrpc/addr.c                      |  8 ++++----
 net/sunrpc/svc.c                       |  8 ++++----
 net/sunrpc/svc_xprt.c                  |  8 ++++----
 net/sunrpc/svcauth_unix.c              |  2 +-
 net/xfrm/xfrm_policy.c                 |  6 +++---
 net/xfrm/xfrm_user.c                   | 12 ++++++------
 48 files changed, 161 insertions(+), 164 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/errqueue.h b/include/linux/errqueue.h
index c9f522bd17e4..fd0628be45ce 100644
--- a/include/linux/errqueue.h
+++ b/include/linux/errqueue.h
@@ -25,7 +25,7 @@ struct sock_extended_err {
 #ifdef __KERNEL__
 
 #include <net/ip.h>
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 #include <linux/ipv6.h>
 #endif
 
@@ -34,7 +34,7 @@ struct sock_extended_err {
 struct sock_exterr_skb {
 	union {
 		struct inet_skb_parm	h4;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		struct inet6_skb_parm	h6;
 #endif
 	} header;
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 0c997767429a..6318268dcaf5 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -404,7 +404,7 @@ struct tcp6_sock {
 
 extern int inet6_sk_rebuild_header(struct sock *sk);
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk)
 {
 	return inet_sk(__sk)->pinet6;
@@ -515,7 +515,7 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk)
 #define inet6_rcv_saddr(__sk)	NULL
 #define tcp_twsk_ipv6only(__sk)		0
 #define inet_v6_ipv6only(__sk)		0
-#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
+#endif /* IS_ENABLED(CONFIG_IPV6) */
 
 #define INET6_MATCH(__sk, __net, __hash, __saddr, __daddr, __ports, __dif)\
 	(((__sk)->sk_hash == (__hash)) && sock_net((__sk)) == (__net)	&& \
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index ff9abff55aa0..90b0656a869e 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -301,7 +301,7 @@ static inline int __nlm_privileged_request4(const struct sockaddr *sap)
 	return ipv4_is_loopback(sin->sin_addr.s_addr);
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static inline int __nlm_privileged_request6(const struct sockaddr *sap)
 {
 	const struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sap;
@@ -314,12 +314,12 @@ static inline int __nlm_privileged_request6(const struct sockaddr *sap)
 
 	return ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LOOPBACK;
 }
-#else	/* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
+#else	/* IS_ENABLED(CONFIG_IPV6) */
 static inline int __nlm_privileged_request6(const struct sockaddr *sap)
 {
 	return 0;
 }
-#endif	/* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
+#endif	/* IS_ENABLED(CONFIG_IPV6) */
 
 /*
  * Ensure incoming requests are from local privileged callers.
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index f15fd985b08a..2c5993a17c33 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -215,7 +215,7 @@ static inline bool __rpc_copy_addr4(struct sockaddr *dst,
 	return true;
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
 				   const struct sockaddr *sap2)
 {
@@ -240,7 +240,7 @@ static inline bool __rpc_copy_addr6(struct sockaddr *dst,
 	dsin6->sin6_addr = ssin6->sin6_addr;
 	return true;
 }
-#else	/* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
+#else	/* !(IS_ENABLED(CONFIG_IPV6) */
 static inline bool __rpc_cmp_addr6(const struct sockaddr *sap1,
 				   const struct sockaddr *sap2)
 {
@@ -252,7 +252,7 @@ static inline bool __rpc_copy_addr6(struct sockaddr *dst,
 {
 	return false;
 }
-#endif	/* !(CONFIG_IPV6 || CONFIG_IPV6_MODULE) */
+#endif	/* !(IS_ENABLED(CONFIG_IPV6) */
 
 /**
  * rpc_cmp_addr - compare the address portion of two sockaddrs.
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index e46674d5daea..00cbb4384c79 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -15,7 +15,7 @@
 #define _INET6_HASHTABLES_H
 
 
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 #include <linux/in6.h>
 #include <linux/ipv6.h>
 #include <linux/types.h>
@@ -110,5 +110,5 @@ extern struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo
 				 const struct in6_addr *saddr, const __be16 sport,
 				 const struct in6_addr *daddr, const __be16 dport,
 				 const int dif);
-#endif /* defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) */
+#endif /* IS_ENABLED(CONFIG_IPV6) */
 #endif /* _INET6_HASHTABLES_H */
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index f941964a9931..e3e405106afe 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -71,7 +71,7 @@ struct ip_options_data {
 
 struct inet_request_sock {
 	struct request_sock	req;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	u16			inet6_rsk_offset;
 #endif
 	__be16			loc_port;
@@ -139,7 +139,7 @@ struct rtable;
 struct inet_sock {
 	/* sk and pinet6 has to be the first two members of inet_sock */
 	struct sock		sk;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	struct ipv6_pinfo	*pinet6;
 #endif
 	/* Socket demultiplex comparisons on incoming packets. */
@@ -188,7 +188,7 @@ static inline void __inet_sk_copy_descendant(struct sock *sk_to,
 	memcpy(inet_sk(sk_to) + 1, inet_sk(sk_from) + 1,
 	       sk_from->sk_prot->obj_size - ancestor_size);
 }
-#if !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE))
+#if !(IS_ENABLED(CONFIG_IPV6))
 static inline void inet_sk_copy_descendant(struct sock *sk_to,
 					   const struct sock *sk_from)
 {
diff --git a/include/net/ip.h b/include/net/ip.h
index fd1561e88a1a..775009f9eaba 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -353,14 +353,14 @@ static inline void ip_ipgre_mc_map(__be32 naddr, const unsigned char *broadcast,
 		memcpy(buf, &naddr, sizeof(naddr));
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 #include <linux/ipv6.h>
 #endif
 
 static __inline__ void inet_reset_saddr(struct sock *sk)
 {
 	inet_sk(sk)->inet_rcv_saddr = inet_sk(sk)->inet_saddr = 0;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	if (sk->sk_family == PF_INET6) {
 		struct ipv6_pinfo *np = inet6_sk(sk);
 
@@ -379,7 +379,7 @@ static inline int sk_mc_loop(struct sock *sk)
 	switch (sk->sk_family) {
 	case AF_INET:
 		return inet_sk(sk)->mc_loop;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		return inet6_sk(sk)->mc_loop;
 #endif
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 3bb6fa0eace0..ee547c149810 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -77,7 +77,7 @@ struct net {
 	struct netns_packet	packet;
 	struct netns_unix	unx;
 	struct netns_ipv4	ipv4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	struct netns_ipv6	ipv6;
 #endif
 #if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE)
diff --git a/include/net/netfilter/nf_tproxy_core.h b/include/net/netfilter/nf_tproxy_core.h
index e505358d8999..75ca9291cf2c 100644
--- a/include/net/netfilter/nf_tproxy_core.h
+++ b/include/net/netfilter/nf_tproxy_core.h
@@ -131,7 +131,7 @@ nf_tproxy_get_sock_v4(struct net *net, const u8 protocol,
 	return sk;
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static inline struct sock *
 nf_tproxy_get_sock_v6(struct net *net, const u8 protocol,
 		      const struct in6_addr *saddr, const struct in6_addr *daddr,
diff --git a/include/net/netns/mib.h b/include/net/netns/mib.h
index 30f6728ee98c..d542a4b28cca 100644
--- a/include/net/netns/mib.h
+++ b/include/net/netns/mib.h
@@ -12,7 +12,7 @@ struct netns_mib {
 	DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics);
 	DEFINE_SNMP_STAT_ATOMIC(struct icmpmsg_mib, icmpmsg_statistics);
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	struct proc_dir_entry *proc_net_devsnmp6;
 	DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6);
 	DEFINE_SNMP_STAT(struct udp_mib, udplite_stats_in6);
diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 748f91f87cd5..5299e69a32af 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -56,7 +56,7 @@ struct netns_xfrm {
 #endif
 
 	struct dst_ops		xfrm4_dst_ops;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	struct dst_ops		xfrm6_dst_ops;
 #endif
 };
diff --git a/include/net/protocol.h b/include/net/protocol.h
index e182e13d6391..875f4895b033 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -25,7 +25,7 @@
 #define _PROTOCOL_H
 
 #include <linux/in6.h>
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 #include <linux/ipv6.h>
 #endif
 
@@ -46,7 +46,7 @@ struct net_protocol {
 				netns_ok:1;
 };
 
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 struct inet6_protocol {
 	int	(*handler)(struct sk_buff *skb);
 
@@ -91,7 +91,7 @@ struct inet_protosw {
 
 extern const struct net_protocol __rcu *inet_protos[MAX_INET_PROTOS];
 
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 extern const struct inet6_protocol __rcu *inet6_protos[MAX_INET_PROTOS];
 #endif
 
@@ -100,7 +100,7 @@ extern int	inet_del_protocol(const struct net_protocol *prot, unsigned char num)
 extern void	inet_register_protosw(struct inet_protosw *p);
 extern void	inet_unregister_protosw(struct inet_protosw *p);
 
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 extern int	inet6_add_protocol(const struct inet6_protocol *prot, unsigned char num);
 extern int	inet6_del_protocol(const struct inet6_protocol *prot, unsigned char num);
 extern int	inet6_register_protosw(struct inet_protosw *p);
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 6a72a58cde59..d3685615a8b0 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -71,7 +71,7 @@
 #include <linux/jiffies.h>
 #include <linux/idr.h>
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
 #endif
@@ -383,7 +383,7 @@ static inline void sctp_sysctl_unregister(void) { return; }
 /* Size of Supported Address Parameter for 'x' address types. */
 #define SCTP_SAT_LEN(x) (sizeof(struct sctp_paramhdr) + (x) * sizeof(__u16))
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 
 void sctp_v6_pf_init(void);
 void sctp_v6_pf_exit(void);
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 3382615bd710..ad0e31bf7450 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -365,7 +365,7 @@ static inline struct sock *sctp_opt2sk(const struct sctp_sock *sp)
        return (struct sock *)sp;
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 struct sctp6_sock {
        struct sctp_sock  sctp;
        struct ipv6_pinfo inet6;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 87e3c80bfa00..02f070d339ba 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -628,7 +628,7 @@ extern u32 __tcp_select_window(struct sock *sk);
 struct tcp_skb_cb {
 	union {
 		struct inet_skb_parm	h4;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		struct inet6_skb_parm	h6;
 #endif
 	} header;	/* For incoming frames		*/
@@ -1152,7 +1152,7 @@ struct tcp6_md5sig_key {
 /* - sock block */
 struct tcp_md5sig_info {
 	struct tcp4_md5sig_key	*keys4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	struct tcp6_md5sig_key	*keys6;
 	u32			entries6;
 	u32			alloced6;
@@ -1179,7 +1179,7 @@ struct tcp6_pseudohdr {
 
 union tcp_md5sum_block {
 	struct tcp4_pseudohdr ip4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	struct tcp6_pseudohdr ip6;
 #endif
 };
diff --git a/include/net/udp.h b/include/net/udp.h
index 1ffb39c9f324..e39592f682c3 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -41,7 +41,7 @@
 struct udp_skb_cb {
 	union {
 		struct inet_skb_parm	h4;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		struct inet6_skb_parm	h6;
 #endif
 	} header;
@@ -223,7 +223,7 @@ extern struct sock *__udp6_lib_lookup(struct net *net, const struct in6_addr *sa
 	else	    SNMP_INC_STATS_USER((net)->mib.udp_stats_in6, field);      \
 } while(0)
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 #define UDPX_INC_STATS_BH(sk, field) \
 	do { \
 		if ((sk)->sk_family == AF_INET) \
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index 375417e633c9..568d5bf17534 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -24,7 +24,7 @@
 #include <linux/slab.h>
 #include <linux/timer.h>
 #include <net/ip.h>
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 #include <net/ipv6.h>
 #include <net/mld.h>
 #include <net/addrconf.h>
@@ -36,7 +36,7 @@
 #define mlock_dereference(X, br) \
 	rcu_dereference_protected(X, lockdep_is_held(&br->multicast_lock))
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static inline int ipv6_is_transient_multicast(const struct in6_addr *addr)
 {
 	if (ipv6_addr_is_multicast(addr) && IPV6_ADDR_MC_FLAG_TRANSIENT(addr))
@@ -52,7 +52,7 @@ static inline int br_ip_equal(const struct br_ip *a, const struct br_ip *b)
 	switch (a->proto) {
 	case htons(ETH_P_IP):
 		return a->u.ip4 == b->u.ip4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case htons(ETH_P_IPV6):
 		return ipv6_addr_equal(&a->u.ip6, &b->u.ip6);
 #endif
@@ -65,7 +65,7 @@ static inline int __br_ip4_hash(struct net_bridge_mdb_htable *mdb, __be32 ip)
 	return jhash_1word(mdb->secret, (__force u32)ip) & (mdb->max - 1);
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static inline int __br_ip6_hash(struct net_bridge_mdb_htable *mdb,
 				const struct in6_addr *ip)
 {
@@ -79,7 +79,7 @@ static inline int br_ip_hash(struct net_bridge_mdb_htable *mdb,
 	switch (ip->proto) {
 	case htons(ETH_P_IP):
 		return __br_ip4_hash(mdb, ip->u.ip4);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case htons(ETH_P_IPV6):
 		return __br_ip6_hash(mdb, &ip->u.ip6);
 #endif
@@ -121,7 +121,7 @@ static struct net_bridge_mdb_entry *br_mdb_ip4_get(
 	return br_mdb_ip_get(mdb, &br_dst);
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static struct net_bridge_mdb_entry *br_mdb_ip6_get(
 	struct net_bridge_mdb_htable *mdb, const struct in6_addr *dst)
 {
@@ -152,7 +152,7 @@ struct net_bridge_mdb_entry *br_mdb_get(struct net_bridge *br,
 	case htons(ETH_P_IP):
 		ip.u.ip4 = ip_hdr(skb)->daddr;
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case htons(ETH_P_IPV6):
 		ip.u.ip6 = ipv6_hdr(skb)->daddr;
 		break;
@@ -411,7 +411,7 @@ out:
 	return skb;
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br,
 						    const struct in6_addr *group)
 {
@@ -496,7 +496,7 @@ static struct sk_buff *br_multicast_alloc_query(struct net_bridge *br,
 	switch (addr->proto) {
 	case htons(ETH_P_IP):
 		return br_ip4_multicast_alloc_query(br, addr->u.ip4);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case htons(ETH_P_IPV6):
 		return br_ip6_multicast_alloc_query(br, &addr->u.ip6);
 #endif
@@ -773,7 +773,7 @@ static int br_ip4_multicast_add_group(struct net_bridge *br,
 	return br_multicast_add_group(br, port, &br_group);
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static int br_ip6_multicast_add_group(struct net_bridge *br,
 				      struct net_bridge_port *port,
 				      const struct in6_addr *group)
@@ -845,7 +845,7 @@ static void br_multicast_send_query(struct net_bridge *br,
 	br_group.proto = htons(ETH_P_IP);
 	__br_multicast_send_query(br, port, &br_group);
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	br_group.proto = htons(ETH_P_IPV6);
 	__br_multicast_send_query(br, port, &br_group);
 #endif
@@ -989,7 +989,7 @@ static int br_ip4_multicast_igmp3_report(struct net_bridge *br,
 	return err;
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static int br_ip6_multicast_mld2_report(struct net_bridge *br,
 					struct net_bridge_port *port,
 					struct sk_buff *skb)
@@ -1185,7 +1185,7 @@ out:
 	return err;
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static int br_ip6_multicast_query(struct net_bridge *br,
 				  struct net_bridge_port *port,
 				  struct sk_buff *skb)
@@ -1334,7 +1334,7 @@ static void br_ip4_multicast_leave_group(struct net_bridge *br,
 	br_multicast_leave_group(br, port, &br_group);
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static void br_ip6_multicast_leave_group(struct net_bridge *br,
 					 struct net_bridge_port *port,
 					 const struct in6_addr *group)
@@ -1449,7 +1449,7 @@ err_out:
 	return err;
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static int br_multicast_ipv6_rcv(struct net_bridge *br,
 				 struct net_bridge_port *port,
 				 struct sk_buff *skb)
@@ -1596,7 +1596,7 @@ int br_multicast_rcv(struct net_bridge *br, struct net_bridge_port *port,
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
 		return br_multicast_ipv4_rcv(br, port, skb);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case htons(ETH_P_IPV6):
 		return br_multicast_ipv6_rcv(br, port, skb);
 #endif
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 89969080c384..57dcd1489f3f 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -56,7 +56,7 @@ struct br_ip
 {
 	union {
 		__be32	ip4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		struct in6_addr ip6;
 #endif
 	} u;
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 925991ae6f52..9fbca46f3e74 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -36,7 +36,7 @@ static u32 seq_scale(u32 seq)
 }
 #endif
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 __u32 secure_tcpv6_sequence_number(const __be32 *saddr, const __be32 *daddr,
 				   __be16 sport, __be16 dport)
 {
@@ -156,7 +156,7 @@ u64 secure_dccp_sequence_number(__be32 saddr, __be32 daddr,
 }
 EXPORT_SYMBOL(secure_dccp_sequence_number);
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 u64 secure_dccpv6_sequence_number(__be32 *saddr, __be32 *daddr,
 				  __be16 sport, __be16 dport)
 {
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 583490aaf56f..5818032e35a9 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -357,7 +357,7 @@ static inline int dccp_bad_service_code(const struct sock *sk,
 struct dccp_skb_cb {
 	union {
 		struct inet_skb_parm	h4;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		struct inet6_skb_parm	h6;
 #endif
 	} header;
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
index b50d5fd3d696..5a7f90bbffac 100644
--- a/net/dccp/minisocks.c
+++ b/net/dccp/minisocks.c
@@ -53,7 +53,7 @@ void dccp_time_wait(struct sock *sk, int state, int timeo)
 	if (tw != NULL) {
 		const struct inet_connection_sock *icsk = inet_csk(sk);
 		const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		if (tw->tw_family == PF_INET6) {
 			const struct ipv6_pinfo *np = inet6_sk(sk);
 			struct inet6_timewait_sock *tw6;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index a598768c616c..2e4e24476c4c 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -418,7 +418,7 @@ static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport,
 	return jhash_2words((__force u32)raddr, (__force u32)rport, rnd) & (synq_hsize - 1);
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 #define AF_INET_FAMILY(fam) ((fam) == AF_INET)
 #else
 #define AF_INET_FAMILY(fam) 1
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 9b3e0b179cd2..575e28c57cc9 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -116,7 +116,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 	if (ext & (1 << (INET_DIAG_TOS - 1)))
 		RTA_PUT_U8(skb, INET_DIAG_TOS, inet->tos);
 
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	if (r->idiag_family == AF_INET6) {
 		const struct ipv6_pinfo *np = inet6_sk(sk);
 
@@ -234,7 +234,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
 	r->idiag_wqueue	      = 0;
 	r->idiag_uid	      = 0;
 	r->idiag_inode	      = 0;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	if (tw->tw_family == AF_INET6) {
 		const struct inet6_timewait_sock *tw6 =
 						inet6_twsk((struct sock *)tw);
@@ -286,7 +286,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s
 				 req->id.idiag_dport, req->id.idiag_src[0],
 				 req->id.idiag_sport, req->id.idiag_if);
 	}
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	else if (req->sdiag_family == AF_INET6) {
 		sk = inet6_lookup(&init_net, hashinfo,
 				  (struct in6_addr *)req->id.idiag_dst,
@@ -473,7 +473,7 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
 		return 1;
 
 	entry.family = sk->sk_family;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	if (entry.family == AF_INET6) {
 		struct ipv6_pinfo *np = inet6_sk(sk);
 
@@ -571,7 +571,7 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw,
 		struct inet_diag_entry entry;
 
 		entry.family = tw->tw_family;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		if (tw->tw_family == AF_INET6) {
 			struct inet6_timewait_sock *tw6 =
 						inet6_twsk((struct sock *)tw);
@@ -633,7 +633,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
 	r->idiag_wqueue = 0;
 	r->idiag_uid = sock_i_uid(sk);
 	r->idiag_inode = 0;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	if (r->idiag_family == AF_INET6) {
 		*(struct in6_addr *)r->id.idiag_src = inet6_rsk(req)->loc_addr;
 		*(struct in6_addr *)r->id.idiag_dst = inet6_rsk(req)->rmt_addr;
@@ -695,13 +695,13 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
 
 			if (bc) {
 				entry.saddr =
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 					(entry.family == AF_INET6) ?
 					inet6_rsk(req)->loc_addr.s6_addr32 :
 #endif
 					&ireq->loc_addr;
 				entry.daddr =
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 					(entry.family == AF_INET6) ?
 					inet6_rsk(req)->rmt_addr.s6_addr32 :
 #endif
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index fe070c1593ab..2b53a1f7abf6 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -46,7 +46,7 @@
 #include <net/rtnetlink.h>
 #include <net/gre.h>
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 #include <net/ipv6.h>
 #include <net/ip6_fib.h>
 #include <net/ip6_route.h>
@@ -729,7 +729,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			if ((dst = rt->rt_gateway) == 0)
 				goto tx_error_icmp;
 		}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		else if (skb->protocol == htons(ETH_P_IPV6)) {
 			struct neighbour *neigh = dst_get_neighbour_noref(skb_dst(skb));
 			const struct in6_addr *addr6;
@@ -799,7 +799,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 			goto tx_error;
 		}
 	}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	else if (skb->protocol == htons(ETH_P_IPV6)) {
 		struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
 
@@ -875,7 +875,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
 	if ((iph->ttl = tiph->ttl) == 0) {
 		if (skb->protocol == htons(ETH_P_IP))
 			iph->ttl = old_iph->ttl;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		else if (skb->protocol == htons(ETH_P_IPV6))
 			iph->ttl = ((const struct ipv6hdr *)old_iph)->hop_limit;
 #endif
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 80d5fa450210..8aa87c19fa00 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -37,7 +37,7 @@
 #include <net/route.h>
 #include <net/xfrm.h>
 #include <net/compat.h>
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 #include <net/transp_v6.h>
 #endif
 
@@ -508,7 +508,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 						sock_owned_by_user(sk));
 		if (inet->is_icsk) {
 			struct inet_connection_sock *icsk = inet_csk(sk);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 			if (sk->sk_family == PF_INET ||
 			    (!((1 << sk->sk_state) &
 			       (TCPF_LISTEN | TCPF_CLOSE)) &&
@@ -519,7 +519,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 				if (opt)
 					icsk->icsk_ext_hdr_len += opt->opt.optlen;
 				icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 			}
 #endif
 		}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 0cbb44076cfa..b9cbc351c511 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2663,7 +2663,7 @@ static void DBGUNDO(struct sock *sk, const char *msg)
 		       tp->snd_ssthresh, tp->prior_ssthresh,
 		       tp->packets_out);
 	}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	else if (sk->sk_family == AF_INET6) {
 		struct ipv6_pinfo *np = inet6_sk(sk);
 		printk(KERN_DEBUG "Undo %s %pI6/%u c%u l%u ss%u/%u p%u\n",
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 9dc146e5ed65..550e755747e0 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -336,7 +336,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 		tcptw->tw_ts_recent	= tp->rx_opt.ts_recent;
 		tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		if (tw->tw_family == PF_INET6) {
 			struct ipv6_pinfo *np = inet6_sk(sk);
 			struct inet6_timewait_sock *tw6;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 2e0f0af76c19..aa39a692f4c8 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -340,7 +340,7 @@ void tcp_retransmit_timer(struct sock *sk)
 			       &inet->inet_daddr, ntohs(inet->inet_dport),
 			       inet->inet_num, tp->snd_una, tp->snd_nxt);
 		}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		else if (sk->sk_family == AF_INET6) {
 			struct ipv6_pinfo *np = inet6_sk(sk);
 			LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n",
diff --git a/net/ipv4/tunnel4.c b/net/ipv4/tunnel4.c
index ac3b3ee4b07c..01775983b997 100644
--- a/net/ipv4/tunnel4.c
+++ b/net/ipv4/tunnel4.c
@@ -105,7 +105,7 @@ drop:
 	return 0;
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static int tunnel64_rcv(struct sk_buff *skb)
 {
 	struct xfrm_tunnel *handler;
@@ -134,7 +134,7 @@ static void tunnel4_err(struct sk_buff *skb, u32 info)
 			break;
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static void tunnel64_err(struct sk_buff *skb, u32 info)
 {
 	struct xfrm_tunnel *handler;
@@ -152,7 +152,7 @@ static const struct net_protocol tunnel4_protocol = {
 	.netns_ok	=	1,
 };
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static const struct net_protocol tunnel64_protocol = {
 	.handler	=	tunnel64_rcv,
 	.err_handler	=	tunnel64_err,
@@ -167,7 +167,7 @@ static int __init tunnel4_init(void)
 		printk(KERN_ERR "tunnel4 init: can't add protocol\n");
 		return -EAGAIN;
 	}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	if (inet_add_protocol(&tunnel64_protocol, IPPROTO_IPV6)) {
 		printk(KERN_ERR "tunnel64 init: can't add protocol\n");
 		inet_del_protocol(&tunnel4_protocol, IPPROTO_IPIP);
@@ -179,7 +179,7 @@ static int __init tunnel4_init(void)
 
 static void __exit tunnel4_fini(void)
 {
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	if (inet_del_protocol(&tunnel64_protocol, IPPROTO_IPV6))
 		printk(KERN_ERR "tunnel64 close: can't remove protocol\n");
 #endif
diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
index 82806455e859..9247d9d70e9d 100644
--- a/net/ipv4/xfrm4_tunnel.c
+++ b/net/ipv4/xfrm4_tunnel.c
@@ -64,7 +64,7 @@ static struct xfrm_tunnel xfrm_tunnel_handler __read_mostly = {
 	.priority	=	2,
 };
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static struct xfrm_tunnel xfrm64_tunnel_handler __read_mostly = {
 	.handler	=	xfrm_tunnel_rcv,
 	.err_handler	=	xfrm_tunnel_err,
@@ -84,7 +84,7 @@ static int __init ipip_init(void)
 		xfrm_unregister_type(&ipip_type, AF_INET);
 		return -EAGAIN;
 	}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	if (xfrm4_tunnel_register(&xfrm64_tunnel_handler, AF_INET6)) {
 		printk(KERN_INFO "ipip init: can't add xfrm handler for AF_INET6\n");
 		xfrm4_tunnel_deregister(&xfrm_tunnel_handler, AF_INET);
@@ -97,7 +97,7 @@ static int __init ipip_init(void)
 
 static void __exit ipip_fini(void)
 {
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	if (xfrm4_tunnel_deregister(&xfrm64_tunnel_handler, AF_INET6))
 		printk(KERN_INFO "ipip close: can't remove xfrm handler for AF_INET6\n");
 #endif
diff --git a/net/key/af_key.c b/net/key/af_key.c
index bfc0bef170cb..11dbb2255ccb 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -375,7 +375,7 @@ static int verify_address_len(const void *p)
 	const struct sadb_address *sp = p;
 	const struct sockaddr *addr = (const struct sockaddr *)(sp + 1);
 	const struct sockaddr_in *sin;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	const struct sockaddr_in6 *sin6;
 #endif
 	int len;
@@ -387,7 +387,7 @@ static int verify_address_len(const void *p)
 		    sp->sadb_address_prefixlen > 32)
 			return -EINVAL;
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		len = DIV_ROUND_UP(sizeof(*sp) + sizeof(*sin6), sizeof(uint64_t));
 		if (sp->sadb_address_len != len ||
@@ -469,7 +469,7 @@ static int present_and_same_family(const struct sadb_address *src,
 	if (s_addr->sa_family != d_addr->sa_family)
 		return 0;
 	if (s_addr->sa_family != AF_INET
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	    && s_addr->sa_family != AF_INET6
 #endif
 		)
@@ -579,7 +579,7 @@ static inline int pfkey_sockaddr_len(sa_family_t family)
 	switch (family) {
 	case AF_INET:
 		return sizeof(struct sockaddr_in);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		return sizeof(struct sockaddr_in6);
 #endif
@@ -595,7 +595,7 @@ int pfkey_sockaddr_extract(const struct sockaddr *sa, xfrm_address_t *xaddr)
 		xaddr->a4 =
 			((struct sockaddr_in *)sa)->sin_addr.s_addr;
 		return AF_INET;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		memcpy(xaddr->a6,
 		       &((struct sockaddr_in6 *)sa)->sin6_addr,
@@ -639,7 +639,7 @@ static struct  xfrm_state *pfkey_xfrm_state_lookup(struct net *net, const struct
 	case AF_INET:
 		xaddr = (xfrm_address_t *)&((const struct sockaddr_in *)(addr + 1))->sin_addr;
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		xaddr = (xfrm_address_t *)&((const struct sockaddr_in6 *)(addr + 1))->sin6_addr;
 		break;
@@ -705,7 +705,7 @@ static unsigned int pfkey_sockaddr_fill(const xfrm_address_t *xaddr, __be16 port
 		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
 		return 32;
 	    }
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 	    {
 		struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sa;
@@ -1311,7 +1311,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_
 		xdaddr = (xfrm_address_t *)&((struct sockaddr_in *)(daddr + 1))->sin_addr.s_addr;
 		xsaddr = (xfrm_address_t *)&((struct sockaddr_in *)(saddr + 1))->sin_addr.s_addr;
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		xdaddr = (xfrm_address_t *)&((struct sockaddr_in6 *)(daddr + 1))->sin6_addr;
 		xsaddr = (xfrm_address_t *)&((struct sockaddr_in6 *)(saddr + 1))->sin6_addr;
@@ -3146,7 +3146,7 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt,
 			return NULL;
 		}
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		if (opt != IPV6_IPSEC_POLICY) {
 			*dir = -EOPNOTSUPP;
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 5f054a0dbbb1..68349c31083c 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -29,9 +29,6 @@
 #	define WITH_CONNTRACK 1
 #	include <net/netfilter/nf_conntrack.h>
 #endif
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-#	define WITH_IPV6 1
-#endif
 
 struct xt_tee_priv {
 	struct notifier_block	notifier;
@@ -136,7 +133,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 	return XT_CONTINUE;
 }
 
-#ifdef WITH_IPV6
+#if IS_ENABLED(CONFIG_IPV6)
 static bool
 tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
 {
@@ -196,7 +193,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 	}
 	return XT_CONTINUE;
 }
-#endif /* WITH_IPV6 */
+#endif
 
 static int tee_netdev_event(struct notifier_block *this, unsigned long event,
 			    void *ptr)
@@ -276,7 +273,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = {
 		.destroy    = tee_tg_destroy,
 		.me         = THIS_MODULE,
 	},
-#ifdef WITH_IPV6
+#if IS_ENABLED(CONFIG_IPV6)
 	{
 		.name       = "TEE",
 		.revision   = 1,
diff --git a/net/netlabel/netlabel_addrlist.c b/net/netlabel/netlabel_addrlist.c
index 96b749dacc34..6f1701322fb6 100644
--- a/net/netlabel/netlabel_addrlist.c
+++ b/net/netlabel/netlabel_addrlist.c
@@ -96,7 +96,7 @@ struct netlbl_af4list *netlbl_af4list_search_exact(__be32 addr,
 }
 
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 /**
  * netlbl_af6list_search - Search for a matching IPv6 address entry
  * @addr: IPv6 address
@@ -185,7 +185,7 @@ int netlbl_af4list_add(struct netlbl_af4list *entry, struct list_head *head)
 	return 0;
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 /**
  * netlbl_af6list_add - Add a new IPv6 address entry to a list
  * @entry: address entry
@@ -263,7 +263,7 @@ struct netlbl_af4list *netlbl_af4list_remove(__be32 addr, __be32 mask,
 	return entry;
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 /**
  * netlbl_af6list_remove_entry - Remove an IPv6 address entry
  * @entry: address entry
@@ -342,7 +342,7 @@ void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf,
 	}
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 /**
  * netlbl_af6list_audit_addr - Audit an IPv6 address
  * @audit_buf: audit buffer
diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h
index fdbc1d2c7352..a1287ce18130 100644
--- a/net/netlabel/netlabel_addrlist.h
+++ b/net/netlabel/netlabel_addrlist.h
@@ -133,7 +133,7 @@ static inline void netlbl_af4list_audit_addr(struct audit_buffer *audit_buf,
 }
 #endif
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 
 #define __af6list_entry(ptr) container_of(ptr, struct netlbl_af6list, list)
 
diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c
index 3f905e5370c2..38204112b9f4 100644
--- a/net/netlabel/netlabel_domainhash.c
+++ b/net/netlabel/netlabel_domainhash.c
@@ -78,7 +78,7 @@ static void netlbl_domhsh_free_entry(struct rcu_head *entry)
 	struct netlbl_dom_map *ptr;
 	struct netlbl_af4list *iter4;
 	struct netlbl_af4list *tmp4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	struct netlbl_af6list *iter6;
 	struct netlbl_af6list *tmp6;
 #endif /* IPv6 */
@@ -90,7 +90,7 @@ static void netlbl_domhsh_free_entry(struct rcu_head *entry)
 			netlbl_af4list_remove_entry(iter4);
 			kfree(netlbl_domhsh_addr4_entry(iter4));
 		}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		netlbl_af6list_foreach_safe(iter6, tmp6,
 					    &ptr->type_def.addrsel->list6) {
 			netlbl_af6list_remove_entry(iter6);
@@ -217,7 +217,7 @@ static void netlbl_domhsh_audit_add(struct netlbl_dom_map *entry,
 			cipsov4 = map4->type_def.cipsov4;
 			netlbl_af4list_audit_addr(audit_buf, 0, NULL,
 						  addr4->addr, addr4->mask);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		} else if (addr6 != NULL) {
 			struct netlbl_domaddr6_map *map6;
 			map6 = netlbl_domhsh_addr6_entry(addr6);
@@ -306,7 +306,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
 	struct netlbl_dom_map *entry_old;
 	struct netlbl_af4list *iter4;
 	struct netlbl_af4list *tmp4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	struct netlbl_af6list *iter6;
 	struct netlbl_af6list *tmp6;
 #endif /* IPv6 */
@@ -338,7 +338,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
 					       &entry->type_def.addrsel->list4)
 				netlbl_domhsh_audit_add(entry, iter4, NULL,
 							ret_val, audit_info);
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 			netlbl_af6list_foreach_rcu(iter6,
 					       &entry->type_def.addrsel->list6)
 				netlbl_domhsh_audit_add(entry, NULL, iter6,
@@ -365,7 +365,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
 				ret_val = -EEXIST;
 				goto add_return;
 			}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		netlbl_af6list_foreach_rcu(iter6,
 					   &entry->type_def.addrsel->list6)
 			if (netlbl_af6list_search_exact(&iter6->addr,
@@ -386,7 +386,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry,
 			if (ret_val != 0)
 				goto add_return;
 		}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		netlbl_af6list_foreach_safe(iter6, tmp6,
 					    &entry->type_def.addrsel->list6) {
 			netlbl_af6list_remove_entry(iter6);
@@ -510,7 +510,7 @@ int netlbl_domhsh_remove_af4(const char *domain,
 	struct netlbl_dom_map *entry_map;
 	struct netlbl_af4list *entry_addr;
 	struct netlbl_af4list *iter4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	struct netlbl_af6list *iter6;
 #endif /* IPv6 */
 	struct netlbl_domaddr4_map *entry;
@@ -533,7 +533,7 @@ int netlbl_domhsh_remove_af4(const char *domain,
 		goto remove_af4_failure;
 	netlbl_af4list_foreach_rcu(iter4, &entry_map->type_def.addrsel->list4)
 		goto remove_af4_single_addr;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	netlbl_af6list_foreach_rcu(iter6, &entry_map->type_def.addrsel->list6)
 		goto remove_af4_single_addr;
 #endif /* IPv6 */
@@ -644,7 +644,7 @@ struct netlbl_domaddr4_map *netlbl_domhsh_getentry_af4(const char *domain,
 	return netlbl_domhsh_addr4_entry(addr_iter);
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 /**
  * netlbl_domhsh_getentry_af6 - Get an entry from the domain hash table
  * @domain: the domain name to search for
diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h
index bfcc0f7024c5..90872c4ca30f 100644
--- a/net/netlabel/netlabel_domainhash.h
+++ b/net/netlabel/netlabel_domainhash.h
@@ -104,7 +104,7 @@ int netlbl_domhsh_walk(u32 *skip_bkt,
 		     int (*callback) (struct netlbl_dom_map *entry, void *arg),
 		     void *cb_arg);
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 struct netlbl_domaddr6_map *netlbl_domhsh_getentry_af6(const char *domain,
 						  const struct in6_addr *addr);
 #endif /* IPv6 */
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 5952237c0c86..2560e7b441c6 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -147,7 +147,7 @@ int netlbl_cfg_unlbl_map_add(const char *domain,
 				goto cfg_unlbl_map_add_failure;
 			break;
 			}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		case AF_INET6: {
 			const struct in6_addr *addr6 = addr;
 			const struct in6_addr *mask6 = mask;
@@ -227,7 +227,7 @@ int netlbl_cfg_unlbl_static_add(struct net *net,
 	case AF_INET:
 		addr_len = sizeof(struct in_addr);
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		addr_len = sizeof(struct in6_addr);
 		break;
@@ -270,7 +270,7 @@ int netlbl_cfg_unlbl_static_del(struct net *net,
 	case AF_INET:
 		addr_len = sizeof(struct in_addr);
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		addr_len = sizeof(struct in6_addr);
 		break;
@@ -673,7 +673,7 @@ int netlbl_sock_setattr(struct sock *sk,
 			ret_val = -ENOENT;
 		}
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		/* since we don't support any IPv6 labeling protocols right
 		 * now we can optimize everything away until we do */
@@ -724,7 +724,7 @@ int netlbl_sock_getattr(struct sock *sk,
 	case AF_INET:
 		ret_val = cipso_v4_sock_getattr(sk, secattr);
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		ret_val = -ENOMSG;
 		break;
@@ -782,7 +782,7 @@ int netlbl_conn_setattr(struct sock *sk,
 			ret_val = -ENOENT;
 		}
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		/* since we don't support any IPv6 labeling protocols right
 		 * now we can optimize everything away until we do */
@@ -853,7 +853,7 @@ int netlbl_req_setattr(struct request_sock *req,
 			ret_val = -ENOENT;
 		}
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		/* since we don't support any IPv6 labeling protocols right
 		 * now we can optimize everything away until we do */
@@ -926,7 +926,7 @@ int netlbl_skbuff_setattr(struct sk_buff *skb,
 			ret_val = -ENOENT;
 		}
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		/* since we don't support any IPv6 labeling protocols right
 		 * now we can optimize everything away until we do */
@@ -965,7 +965,7 @@ int netlbl_skbuff_getattr(const struct sk_buff *skb,
 		    cipso_v4_skbuff_getattr(skb, secattr) == 0)
 			return 0;
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		break;
 #endif /* IPv6 */
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index 9879300beefd..4809e2e48b02 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -184,7 +184,7 @@ static int netlbl_mgmt_add_common(struct genl_info *info,
 
 		entry->type = NETLBL_NLTYPE_ADDRSELECT;
 		entry->type_def.addrsel = addrmap;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	} else if (info->attrs[NLBL_MGMT_A_IPV6ADDR]) {
 		struct in6_addr *addr;
 		struct in6_addr *mask;
@@ -270,7 +270,7 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb,
 	struct nlattr *nla_a;
 	struct nlattr *nla_b;
 	struct netlbl_af4list *iter4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	struct netlbl_af6list *iter6;
 #endif
 
@@ -324,7 +324,7 @@ static int netlbl_mgmt_listentry(struct sk_buff *skb,
 
 			nla_nest_end(skb, nla_b);
 		}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		netlbl_af6list_foreach_rcu(iter6,
 					   &entry->type_def.addrsel->list6) {
 			struct netlbl_domaddr6_map *map6;
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 049ccd2447d7..4b5fa0fe78fd 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -170,7 +170,7 @@ static void netlbl_unlhsh_free_iface(struct rcu_head *entry)
 	struct netlbl_unlhsh_iface *iface;
 	struct netlbl_af4list *iter4;
 	struct netlbl_af4list *tmp4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	struct netlbl_af6list *iter6;
 	struct netlbl_af6list *tmp6;
 #endif /* IPv6 */
@@ -184,7 +184,7 @@ static void netlbl_unlhsh_free_iface(struct rcu_head *entry)
 		netlbl_af4list_remove_entry(iter4);
 		kfree(netlbl_unlhsh_addr4_entry(iter4));
 	}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	netlbl_af6list_foreach_safe(iter6, tmp6, &iface->addr6_list) {
 		netlbl_af6list_remove_entry(iter6);
 		kfree(netlbl_unlhsh_addr6_entry(iter6));
@@ -274,7 +274,7 @@ static int netlbl_unlhsh_add_addr4(struct netlbl_unlhsh_iface *iface,
 	return ret_val;
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 /**
  * netlbl_unlhsh_add_addr6 - Add a new IPv6 address entry to the hash table
  * @iface: the associated interface entry
@@ -436,7 +436,7 @@ int netlbl_unlhsh_add(struct net *net,
 						  mask4->s_addr);
 		break;
 	}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case sizeof(struct in6_addr): {
 		const struct in6_addr *addr6 = addr;
 		const struct in6_addr *mask6 = mask;
@@ -531,7 +531,7 @@ static int netlbl_unlhsh_remove_addr4(struct net *net,
 	return 0;
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 /**
  * netlbl_unlhsh_remove_addr6 - Remove an IPv6 address entry
  * @net: network namespace
@@ -606,14 +606,14 @@ static int netlbl_unlhsh_remove_addr6(struct net *net,
 static void netlbl_unlhsh_condremove_iface(struct netlbl_unlhsh_iface *iface)
 {
 	struct netlbl_af4list *iter4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	struct netlbl_af6list *iter6;
 #endif /* IPv6 */
 
 	spin_lock(&netlbl_unlhsh_lock);
 	netlbl_af4list_foreach_rcu(iter4, &iface->addr4_list)
 		goto unlhsh_condremove_failure;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	netlbl_af6list_foreach_rcu(iter6, &iface->addr6_list)
 		goto unlhsh_condremove_failure;
 #endif /* IPv6 */
@@ -680,7 +680,7 @@ int netlbl_unlhsh_remove(struct net *net,
 						     iface, addr, mask,
 						     audit_info);
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case sizeof(struct in6_addr):
 		ret_val = netlbl_unlhsh_remove_addr6(net,
 						     iface, addr, mask,
@@ -1196,7 +1196,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
 	struct netlbl_unlhsh_iface *iface;
 	struct list_head *iter_list;
 	struct netlbl_af4list *addr4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	struct netlbl_af6list *addr6;
 #endif
 
@@ -1228,7 +1228,7 @@ static int netlbl_unlabel_staticlist(struct sk_buff *skb,
 					goto unlabel_staticlist_return;
 				}
 			}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 			netlbl_af6list_foreach_rcu(addr6,
 						   &iface->addr6_list) {
 				if (iter_addr6++ < skip_addr6)
@@ -1277,7 +1277,7 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb,
 	u32 skip_addr6 = cb->args[1];
 	u32 iter_addr4 = 0;
 	struct netlbl_af4list *addr4;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	u32 iter_addr6 = 0;
 	struct netlbl_af6list *addr6;
 #endif
@@ -1303,7 +1303,7 @@ static int netlbl_unlabel_staticlistdef(struct sk_buff *skb,
 			goto unlabel_staticlistdef_return;
 		}
 	}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	netlbl_af6list_foreach_rcu(addr6, &iface->addr6_list) {
 		if (iter_addr6++ < skip_addr6)
 			continue;
@@ -1494,7 +1494,7 @@ int netlbl_unlabel_getattr(const struct sk_buff *skb,
 		secattr->attr.secid = netlbl_unlhsh_addr4_entry(addr4)->secid;
 		break;
 	}
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case PF_INET6: {
 		struct ipv6hdr *hdr6;
 		struct netlbl_af6list *addr6;
diff --git a/net/sctp/input.c b/net/sctp/input.c
index b7692aab6e9c..80f71af71384 100644
--- a/net/sctp/input.c
+++ b/net/sctp/input.c
@@ -105,7 +105,7 @@ static inline int sctp_rcv_checksum(struct sk_buff *skb)
 struct sctp_input_cb {
 	union {
 		struct inet_skb_parm	h4;
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		struct inet6_skb_parm	h6;
 #endif
 	} header;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 61b9fca5a173..544a9b68eb53 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -637,7 +637,7 @@ void sctp_addr_wq_timeout_handler(unsigned long arg)
 		    " for cmd %d at entry %p\n", &sctp_addr_waitq, &addrw->a, addrw->state,
 		    addrw);
 
-#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		/* Now we send an ASCONF for each association */
 		/* Note. we currently don't handle link local IPv6 addressees */
 		if (addrw->a.sa.sa_family == AF_INET6) {
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index d56c07a3d435..db0308344d07 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -6841,7 +6841,7 @@ struct proto sctp_prot = {
 	.sockets_allocated = &sctp_sockets_allocated,
 };
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 
 struct proto sctpv6_prot = {
 	.name		= "SCTPv6",
@@ -6872,4 +6872,4 @@ struct proto sctpv6_prot = {
 	.memory_allocated = &sctp_memory_allocated,
 	.sockets_allocated = &sctp_sockets_allocated,
 };
-#endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
+#endif /* IS_ENABLED(CONFIG_IPV6) */
diff --git a/net/sunrpc/addr.c b/net/sunrpc/addr.c
index 67a655ee82a9..ee77742e0ed6 100644
--- a/net/sunrpc/addr.c
+++ b/net/sunrpc/addr.c
@@ -21,7 +21,7 @@
 #include <linux/slab.h>
 #include <linux/export.h>
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 
 static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap,
 				  char *buf, const int buflen)
@@ -91,7 +91,7 @@ static size_t rpc_ntop6(const struct sockaddr *sap,
 	return len;
 }
 
-#else	/* !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) */
+#else	/* !IS_ENABLED(CONFIG_IPV6) */
 
 static size_t rpc_ntop6_noscopeid(const struct sockaddr *sap,
 				  char *buf, const int buflen)
@@ -105,7 +105,7 @@ static size_t rpc_ntop6(const struct sockaddr *sap,
 	return 0;
 }
 
-#endif	/* !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) */
+#endif	/* !IS_ENABLED(CONFIG_IPV6) */
 
 static int rpc_ntop4(const struct sockaddr *sap,
 		     char *buf, const size_t buflen)
@@ -155,7 +155,7 @@ static size_t rpc_pton4(const char *buf, const size_t buflen,
 	return sizeof(struct sockaddr_in);
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 static int rpc_parse_scope_id(const char *buf, const size_t buflen,
 			      const char *delim, struct sockaddr_in6 *sin6)
 {
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 6e038884ae0c..9d01d46b05f3 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -826,7 +826,7 @@ static int __svc_rpcb_register4(const u32 program, const u32 version,
 	return error;
 }
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 /*
  * Register an "inet6" protocol family netid with the local
  * rpcbind daemon via an rpcbind v4 SET request.
@@ -872,7 +872,7 @@ static int __svc_rpcb_register6(const u32 program, const u32 version,
 
 	return error;
 }
-#endif	/* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
+#endif	/* IS_ENABLED(CONFIG_IPV6) */
 
 /*
  * Register a kernel RPC service via rpcbind version 4.
@@ -893,11 +893,11 @@ static int __svc_register(const char *progname,
 		error = __svc_rpcb_register4(program, version,
 						protocol, port);
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case PF_INET6:
 		error = __svc_rpcb_register6(program, version,
 						protocol, port);
-#endif	/* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
+#endif
 	}
 
 	if (error < 0)
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 447cd0eb415c..38649cfa4e81 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -179,13 +179,13 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
 		.sin_addr.s_addr	= htonl(INADDR_ANY),
 		.sin_port		= htons(port),
 	};
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	struct sockaddr_in6 sin6 = {
 		.sin6_family		= AF_INET6,
 		.sin6_addr		= IN6ADDR_ANY_INIT,
 		.sin6_port		= htons(port),
 	};
-#endif	/* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
+#endif
 	struct sockaddr *sap;
 	size_t len;
 
@@ -194,12 +194,12 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,
 		sap = (struct sockaddr *)&sin;
 		len = sizeof(sin);
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case PF_INET6:
 		sap = (struct sockaddr *)&sin6;
 		len = sizeof(sin6);
 		break;
-#endif	/* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
+#endif
 	default:
 		return ERR_PTR(-EAFNOSUPPORT);
 	}
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index fe258fc37f50..01153ead1dba 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -220,7 +220,7 @@ static int ip_map_parse(struct cache_detail *cd,
 		ipv6_addr_set_v4mapped(address.s4.sin_addr.s_addr,
 				&sin6.sin6_addr);
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		memcpy(&sin6, &address.s6, sizeof(sin6));
 		break;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 82e803b56952..eb6b0b7781a5 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1340,7 +1340,7 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
 	case AF_INET:
 		dst_ops = &net->xfrm.xfrm4_dst_ops;
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		dst_ops = &net->xfrm.xfrm6_dst_ops;
 		break;
@@ -2435,7 +2435,7 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
 		case AF_INET:
 			xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops;
 			break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		case AF_INET6:
 			xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops;
 			break;
@@ -2485,7 +2485,7 @@ static void __net_init xfrm_dst_ops_init(struct net *net)
 	afinfo = xfrm_policy_afinfo[AF_INET];
 	if (afinfo)
 		net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	afinfo = xfrm_policy_afinfo[AF_INET6];
 	if (afinfo)
 		net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index d0a42df5160e..e0d747a2e803 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -28,7 +28,7 @@
 #include <net/netlink.h>
 #include <net/ah.h>
 #include <asm/uaccess.h>
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 #include <linux/in6.h>
 #endif
 
@@ -150,7 +150,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 		break;
 
 	case AF_INET6:
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		break;
 #else
 		err = -EAFNOSUPPORT;
@@ -201,7 +201,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 			goto out;
 		break;
 
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case IPPROTO_DSTOPTS:
 	case IPPROTO_ROUTING:
 		if (attrs[XFRMA_ALG_COMP]	||
@@ -1160,7 +1160,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
 		break;
 
 	case AF_INET6:
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		break;
 #else
 		return  -EAFNOSUPPORT;
@@ -1231,7 +1231,7 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family)
 		switch (ut[i].family) {
 		case AF_INET:
 			break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 		case AF_INET6:
 			break;
 #endif
@@ -2604,7 +2604,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt,
 			return NULL;
 		}
 		break;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#if IS_ENABLED(CONFIG_IPV6)
 	case AF_INET6:
 		if (opt != IPV6_XFRM_POLICY) {
 			*dir = -EOPNOTSUPP;
-- 
cgit v1.2.3


From e1aab161e0135aafcd439be20b4f35e4b0922d95 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Sun, 11 Dec 2011 21:47:03 +0000
Subject: socket: initial cgroup code.

The goal of this work is to move the memory pressure tcp
controls to a cgroup, instead of just relying on global
conditions.

To avoid excessive overhead in the network fast paths,
the code that accounts allocated memory to a cgroup is
hidden inside a static_branch(). This branch is patched out
until the first non-root cgroup is created. So when nobody
is using cgroups, even if it is mounted, no significant performance
penalty should be seen.

This patch handles the generic part of the code, and has nothing
tcp-specific.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujtsu.com>
CC: Kirill A. Shutemov <kirill@shutemov.name>
CC: David S. Miller <davem@davemloft.net>
CC: Eric W. Biederman <ebiederm@xmission.com>
CC: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/cgroups/memory.txt |   4 +-
 include/linux/memcontrol.h       |  22 ++++++
 include/net/sock.h               | 156 +++++++++++++++++++++++++++++++++++++--
 mm/memcontrol.c                  |  46 +++++++++++-
 net/core/sock.c                  |  24 ++++--
 5 files changed, 235 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index f2453241142b..23a8dc5319a3 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -289,7 +289,9 @@ to trigger slab reclaim when those limits are reached.
 
 2.7.1 Current Kernel Memory resources accounted
 
-None
+* sockets memory pressure: some sockets protocols have memory pressure
+thresholds. The Memory Controller allows them to be controlled individually
+per cgroup, instead of globally.
 
 3. User Interface
 
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b87068a1a09e..f15021b9f734 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -85,6 +85,8 @@ extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page);
 extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm);
 
+extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
+
 static inline
 int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
 {
@@ -381,5 +383,25 @@ mem_cgroup_print_bad_page(struct page *page)
 }
 #endif
 
+#ifdef CONFIG_INET
+enum {
+	UNDER_LIMIT,
+	SOFT_LIMIT,
+	OVER_LIMIT,
+};
+
+struct sock;
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+void sock_update_memcg(struct sock *sk);
+void sock_release_memcg(struct sock *sk);
+#else
+static inline void sock_update_memcg(struct sock *sk)
+{
+}
+static inline void sock_release_memcg(struct sock *sk)
+{
+}
+#endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */
+#endif /* CONFIG_INET */
 #endif /* _LINUX_MEMCONTROL_H */
 
diff --git a/include/net/sock.h b/include/net/sock.h
index ed0dbf034539..d5eab256167c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -54,6 +54,7 @@
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 #include <linux/memcontrol.h>
+#include <linux/res_counter.h>
 
 #include <linux/filter.h>
 #include <linux/rculist_nulls.h>
@@ -168,6 +169,7 @@ struct sock_common {
 	/* public: */
 };
 
+struct cg_proto;
 /**
   *	struct sock - network layer representation of sockets
   *	@__sk_common: shared layout with inet_timewait_sock
@@ -228,6 +230,7 @@ struct sock_common {
   *	@sk_security: used by security modules
   *	@sk_mark: generic packet mark
   *	@sk_classid: this socket's cgroup classid
+  *	@sk_cgrp: this socket's cgroup-specific proto data
   *	@sk_write_pending: a write to stream socket waits to start
   *	@sk_state_change: callback to indicate change in the state of the sock
   *	@sk_data_ready: callback to indicate there is data to be processed
@@ -342,6 +345,7 @@ struct sock {
 #endif
 	__u32			sk_mark;
 	u32			sk_classid;
+	struct cg_proto		*sk_cgrp;
 	void			(*sk_state_change)(struct sock *sk);
 	void			(*sk_data_ready)(struct sock *sk, int bytes);
 	void			(*sk_write_space)(struct sock *sk);
@@ -838,6 +842,37 @@ struct proto {
 #ifdef SOCK_REFCNT_DEBUG
 	atomic_t		socks;
 #endif
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+	/*
+	 * cgroup specific init/deinit functions. Called once for all
+	 * protocols that implement it, from cgroups populate function.
+	 * This function has to setup any files the protocol want to
+	 * appear in the kmem cgroup filesystem.
+	 */
+	int			(*init_cgroup)(struct cgroup *cgrp,
+					       struct cgroup_subsys *ss);
+	void			(*destroy_cgroup)(struct cgroup *cgrp,
+						  struct cgroup_subsys *ss);
+	struct cg_proto		*(*proto_cgroup)(struct mem_cgroup *memcg);
+#endif
+};
+
+struct cg_proto {
+	void			(*enter_memory_pressure)(struct sock *sk);
+	struct res_counter	*memory_allocated;	/* Current allocated memory. */
+	struct percpu_counter	*sockets_allocated;	/* Current number of sockets. */
+	int			*memory_pressure;
+	long			*sysctl_mem;
+	/*
+	 * memcg field is used to find which memcg we belong directly
+	 * Each memcg struct can hold more than one cg_proto, so container_of
+	 * won't really cut.
+	 *
+	 * The elegant solution would be having an inverse function to
+	 * proto_cgroup in struct proto, but that means polluting the structure
+	 * for everybody, instead of just for memcg users.
+	 */
+	struct mem_cgroup	*memcg;
 };
 
 extern int proto_register(struct proto *prot, int alloc_slab);
@@ -856,7 +891,7 @@ static inline void sk_refcnt_debug_dec(struct sock *sk)
 	       sk->sk_prot->name, sk, atomic_read(&sk->sk_prot->socks));
 }
 
-static inline void sk_refcnt_debug_release(const struct sock *sk)
+inline void sk_refcnt_debug_release(const struct sock *sk)
 {
 	if (atomic_read(&sk->sk_refcnt) != 1)
 		printk(KERN_DEBUG "Destruction of the %s socket %p delayed, refcnt=%d\n",
@@ -868,6 +903,24 @@ static inline void sk_refcnt_debug_release(const struct sock *sk)
 #define sk_refcnt_debug_release(sk) do { } while (0)
 #endif /* SOCK_REFCNT_DEBUG */
 
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+extern struct jump_label_key memcg_socket_limit_enabled;
+static inline struct cg_proto *parent_cg_proto(struct proto *proto,
+					       struct cg_proto *cg_proto)
+{
+	return proto->proto_cgroup(parent_mem_cgroup(cg_proto->memcg));
+}
+#define mem_cgroup_sockets_enabled static_branch(&memcg_socket_limit_enabled)
+#else
+#define mem_cgroup_sockets_enabled 0
+static inline struct cg_proto *parent_cg_proto(struct proto *proto,
+					       struct cg_proto *cg_proto)
+{
+	return NULL;
+}
+#endif
+
+
 static inline bool sk_has_memory_pressure(const struct sock *sk)
 {
 	return sk->sk_prot->memory_pressure != NULL;
@@ -877,6 +930,10 @@ static inline bool sk_under_memory_pressure(const struct sock *sk)
 {
 	if (!sk->sk_prot->memory_pressure)
 		return false;
+
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+		return !!*sk->sk_cgrp->memory_pressure;
+
 	return !!*sk->sk_prot->memory_pressure;
 }
 
@@ -884,52 +941,136 @@ static inline void sk_leave_memory_pressure(struct sock *sk)
 {
 	int *memory_pressure = sk->sk_prot->memory_pressure;
 
-	if (memory_pressure && *memory_pressure)
+	if (!memory_pressure)
+		return;
+
+	if (*memory_pressure)
 		*memory_pressure = 0;
+
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
+		struct cg_proto *cg_proto = sk->sk_cgrp;
+		struct proto *prot = sk->sk_prot;
+
+		for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
+			if (*cg_proto->memory_pressure)
+				*cg_proto->memory_pressure = 0;
+	}
+
 }
 
 static inline void sk_enter_memory_pressure(struct sock *sk)
 {
-	if (sk->sk_prot->enter_memory_pressure)
-		sk->sk_prot->enter_memory_pressure(sk);
+	if (!sk->sk_prot->enter_memory_pressure)
+		return;
+
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
+		struct cg_proto *cg_proto = sk->sk_cgrp;
+		struct proto *prot = sk->sk_prot;
+
+		for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
+			cg_proto->enter_memory_pressure(sk);
+	}
+
+	sk->sk_prot->enter_memory_pressure(sk);
 }
 
 static inline long sk_prot_mem_limits(const struct sock *sk, int index)
 {
 	long *prot = sk->sk_prot->sysctl_mem;
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+		prot = sk->sk_cgrp->sysctl_mem;
 	return prot[index];
 }
 
+static inline void memcg_memory_allocated_add(struct cg_proto *prot,
+					      unsigned long amt,
+					      int *parent_status)
+{
+	struct res_counter *fail;
+	int ret;
+
+	ret = res_counter_charge(prot->memory_allocated,
+				 amt << PAGE_SHIFT, &fail);
+
+	if (ret < 0)
+		*parent_status = OVER_LIMIT;
+}
+
+static inline void memcg_memory_allocated_sub(struct cg_proto *prot,
+					      unsigned long amt)
+{
+	res_counter_uncharge(prot->memory_allocated, amt << PAGE_SHIFT);
+}
+
+static inline u64 memcg_memory_allocated_read(struct cg_proto *prot)
+{
+	u64 ret;
+	ret = res_counter_read_u64(prot->memory_allocated, RES_USAGE);
+	return ret >> PAGE_SHIFT;
+}
+
 static inline long
 sk_memory_allocated(const struct sock *sk)
 {
 	struct proto *prot = sk->sk_prot;
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+		return memcg_memory_allocated_read(sk->sk_cgrp);
+
 	return atomic_long_read(prot->memory_allocated);
 }
 
 static inline long
-sk_memory_allocated_add(struct sock *sk, int amt)
+sk_memory_allocated_add(struct sock *sk, int amt, int *parent_status)
 {
 	struct proto *prot = sk->sk_prot;
+
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
+		memcg_memory_allocated_add(sk->sk_cgrp, amt, parent_status);
+		/* update the root cgroup regardless */
+		atomic_long_add_return(amt, prot->memory_allocated);
+		return memcg_memory_allocated_read(sk->sk_cgrp);
+	}
+
 	return atomic_long_add_return(amt, prot->memory_allocated);
 }
 
 static inline void
-sk_memory_allocated_sub(struct sock *sk, int amt)
+sk_memory_allocated_sub(struct sock *sk, int amt, int parent_status)
 {
 	struct proto *prot = sk->sk_prot;
+
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp &&
+	    parent_status != OVER_LIMIT) /* Otherwise was uncharged already */
+		memcg_memory_allocated_sub(sk->sk_cgrp, amt);
+
 	atomic_long_sub(amt, prot->memory_allocated);
 }
 
 static inline void sk_sockets_allocated_dec(struct sock *sk)
 {
 	struct proto *prot = sk->sk_prot;
+
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
+		struct cg_proto *cg_proto = sk->sk_cgrp;
+
+		for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
+			percpu_counter_dec(cg_proto->sockets_allocated);
+	}
+
 	percpu_counter_dec(prot->sockets_allocated);
 }
 
 static inline void sk_sockets_allocated_inc(struct sock *sk)
 {
 	struct proto *prot = sk->sk_prot;
+
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp) {
+		struct cg_proto *cg_proto = sk->sk_cgrp;
+
+		for (; cg_proto; cg_proto = parent_cg_proto(prot, cg_proto))
+			percpu_counter_inc(cg_proto->sockets_allocated);
+	}
+
 	percpu_counter_inc(prot->sockets_allocated);
 }
 
@@ -938,6 +1079,9 @@ sk_sockets_allocated_read_positive(struct sock *sk)
 {
 	struct proto *prot = sk->sk_prot;
 
+	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
+		return percpu_counter_sum_positive(sk->sk_cgrp->sockets_allocated);
+
 	return percpu_counter_sum_positive(prot->sockets_allocated);
 }
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9fbcff71245e..3de3901ae0a7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -379,7 +379,48 @@ enum mem_type {
 
 static void mem_cgroup_get(struct mem_cgroup *memcg);
 static void mem_cgroup_put(struct mem_cgroup *memcg);
-static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
+
+/* Writing them here to avoid exposing memcg's inner layout */
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+#ifdef CONFIG_INET
+#include <net/sock.h>
+
+static bool mem_cgroup_is_root(struct mem_cgroup *memcg);
+void sock_update_memcg(struct sock *sk)
+{
+	/* A socket spends its whole life in the same cgroup */
+	if (sk->sk_cgrp) {
+		WARN_ON(1);
+		return;
+	}
+	if (static_branch(&memcg_socket_limit_enabled)) {
+		struct mem_cgroup *memcg;
+
+		BUG_ON(!sk->sk_prot->proto_cgroup);
+
+		rcu_read_lock();
+		memcg = mem_cgroup_from_task(current);
+		if (!mem_cgroup_is_root(memcg)) {
+			mem_cgroup_get(memcg);
+			sk->sk_cgrp = sk->sk_prot->proto_cgroup(memcg);
+		}
+		rcu_read_unlock();
+	}
+}
+EXPORT_SYMBOL(sock_update_memcg);
+
+void sock_release_memcg(struct sock *sk)
+{
+	if (static_branch(&memcg_socket_limit_enabled) && sk->sk_cgrp) {
+		struct mem_cgroup *memcg;
+		WARN_ON(!sk->sk_cgrp->memcg);
+		memcg = sk->sk_cgrp->memcg;
+		mem_cgroup_put(memcg);
+	}
+}
+#endif /* CONFIG_INET */
+#endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */
+
 static void drain_all_stock_async(struct mem_cgroup *memcg);
 
 static struct mem_cgroup_per_zone *
@@ -4932,12 +4973,13 @@ static void mem_cgroup_put(struct mem_cgroup *memcg)
 /*
  * Returns the parent mem_cgroup in memcgroup hierarchy with hierarchy enabled.
  */
-static struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
+struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
 {
 	if (!memcg->res.parent)
 		return NULL;
 	return mem_cgroup_from_res_counter(memcg->res.parent, res);
 }
+EXPORT_SYMBOL(parent_mem_cgroup);
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
 static void __init enable_swap_cgroup(void)
diff --git a/net/core/sock.c b/net/core/sock.c
index a3d4205e7238..6a871b8fdd20 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -111,6 +111,7 @@
 #include <linux/init.h>
 #include <linux/highmem.h>
 #include <linux/user_namespace.h>
+#include <linux/jump_label.h>
 
 #include <asm/uaccess.h>
 #include <asm/system.h>
@@ -142,6 +143,9 @@
 static struct lock_class_key af_family_keys[AF_MAX];
 static struct lock_class_key af_family_slock_keys[AF_MAX];
 
+struct jump_label_key memcg_socket_limit_enabled;
+EXPORT_SYMBOL(memcg_socket_limit_enabled);
+
 /*
  * Make lock validator output more readable. (we pre-construct these
  * strings build-time, so that runtime initialization of socket
@@ -1711,23 +1715,27 @@ int __sk_mem_schedule(struct sock *sk, int size, int kind)
 	struct proto *prot = sk->sk_prot;
 	int amt = sk_mem_pages(size);
 	long allocated;
+	int parent_status = UNDER_LIMIT;
 
 	sk->sk_forward_alloc += amt * SK_MEM_QUANTUM;
 
-	allocated = sk_memory_allocated_add(sk, amt);
+	allocated = sk_memory_allocated_add(sk, amt, &parent_status);
 
 	/* Under limit. */
-	if (allocated <= sk_prot_mem_limits(sk, 0)) {
+	if (parent_status == UNDER_LIMIT &&
+			allocated <= sk_prot_mem_limits(sk, 0)) {
 		sk_leave_memory_pressure(sk);
 		return 1;
 	}
 
-	/* Under pressure. */
-	if (allocated > sk_prot_mem_limits(sk, 1))
+	/* Under pressure. (we or our parents) */
+	if ((parent_status > SOFT_LIMIT) ||
+			allocated > sk_prot_mem_limits(sk, 1))
 		sk_enter_memory_pressure(sk);
 
-	/* Over hard limit. */
-	if (allocated > sk_prot_mem_limits(sk, 2))
+	/* Over hard limit (we or our parents) */
+	if ((parent_status == OVER_LIMIT) ||
+			(allocated > sk_prot_mem_limits(sk, 2)))
 		goto suppress_allocation;
 
 	/* guarantee minimum buffer size under pressure */
@@ -1774,7 +1782,7 @@ suppress_allocation:
 	/* Alas. Undo changes. */
 	sk->sk_forward_alloc -= amt * SK_MEM_QUANTUM;
 
-	sk_memory_allocated_sub(sk, amt);
+	sk_memory_allocated_sub(sk, amt, parent_status);
 
 	return 0;
 }
@@ -1787,7 +1795,7 @@ EXPORT_SYMBOL(__sk_mem_schedule);
 void __sk_mem_reclaim(struct sock *sk)
 {
 	sk_memory_allocated_sub(sk,
-				sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT);
+				sk->sk_forward_alloc >> SK_MEM_QUANTUM_SHIFT, 0);
 	sk->sk_forward_alloc &= SK_MEM_QUANTUM - 1;
 
 	if (sk_under_memory_pressure(sk) &&
-- 
cgit v1.2.3


From d1a4c0b37c296e600ffe08edb0db2dc1b8f550d7 Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Sun, 11 Dec 2011 21:47:04 +0000
Subject: tcp memory pressure controls

This patch introduces memory pressure controls for the tcp
protocol. It uses the generic socket memory pressure code
introduced in earlier patches, and fills in the
necessary data in cg_proto struct.

Signed-off-by: Glauber Costa <glommer@parallels.com>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujtisu.com>
CC: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/cgroups/memory.txt |  2 ++
 include/linux/memcontrol.h       |  1 +
 include/net/sock.h               |  2 ++
 include/net/tcp_memcontrol.h     | 17 +++++++++
 mm/memcontrol.c                  | 40 +++++++++++++++++++++-
 net/core/sock.c                  | 43 +++++++++++++++++++++--
 net/ipv4/Makefile                |  1 +
 net/ipv4/tcp_ipv4.c              |  9 ++++-
 net/ipv4/tcp_memcontrol.c        | 74 ++++++++++++++++++++++++++++++++++++++++
 net/ipv6/tcp_ipv6.c              |  5 +++
 10 files changed, 189 insertions(+), 5 deletions(-)
 create mode 100644 include/net/tcp_memcontrol.h
 create mode 100644 net/ipv4/tcp_memcontrol.c

(limited to 'include/linux')

diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index 23a8dc5319a3..687dea5bf1fd 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -293,6 +293,8 @@ to trigger slab reclaim when those limits are reached.
 thresholds. The Memory Controller allows them to be controlled individually
 per cgroup, instead of globally.
 
+* tcp memory pressure: sockets memory pressure for the tcp protocol.
+
 3. User Interface
 
 0. Configuration
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index f15021b9f734..1513994ce207 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -86,6 +86,7 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm);
 
 extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
+extern struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont);
 
 static inline
 int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)
diff --git a/include/net/sock.h b/include/net/sock.h
index d5eab256167c..18ecc9919d29 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -64,6 +64,8 @@
 #include <net/dst.h>
 #include <net/checksum.h>
 
+int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss);
+void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss);
 /*
  * This structure really needs to be cleaned up.
  * Most of it is for TCP, and not used by any of
diff --git a/include/net/tcp_memcontrol.h b/include/net/tcp_memcontrol.h
new file mode 100644
index 000000000000..5f5e1582d764
--- /dev/null
+++ b/include/net/tcp_memcontrol.h
@@ -0,0 +1,17 @@
+#ifndef _TCP_MEMCG_H
+#define _TCP_MEMCG_H
+
+struct tcp_memcontrol {
+	struct cg_proto cg_proto;
+	/* per-cgroup tcp memory pressure knobs */
+	struct res_counter tcp_memory_allocated;
+	struct percpu_counter tcp_sockets_allocated;
+	/* those two are read-mostly, leave them at the end */
+	long tcp_prot_mem[3];
+	int tcp_memory_pressure;
+};
+
+struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg);
+int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss);
+void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss);
+#endif /* _TCP_MEMCG_H */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 3de3901ae0a7..7266202fa7cf 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -50,6 +50,8 @@
 #include <linux/cpu.h>
 #include <linux/oom.h>
 #include "internal.h"
+#include <net/sock.h>
+#include <net/tcp_memcontrol.h>
 
 #include <asm/uaccess.h>
 
@@ -295,6 +297,10 @@ struct mem_cgroup {
 	 */
 	struct mem_cgroup_stat_cpu nocpu_base;
 	spinlock_t pcp_counter_lock;
+
+#ifdef CONFIG_INET
+	struct tcp_memcontrol tcp_mem;
+#endif
 };
 
 /* Stuffs for move charges at task migration. */
@@ -384,6 +390,7 @@ static void mem_cgroup_put(struct mem_cgroup *memcg);
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
 #ifdef CONFIG_INET
 #include <net/sock.h>
+#include <net/ip.h>
 
 static bool mem_cgroup_is_root(struct mem_cgroup *memcg);
 void sock_update_memcg(struct sock *sk)
@@ -418,6 +425,15 @@ void sock_release_memcg(struct sock *sk)
 		mem_cgroup_put(memcg);
 	}
 }
+
+struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg)
+{
+	if (!memcg || mem_cgroup_is_root(memcg))
+		return NULL;
+
+	return &memcg->tcp_mem.cg_proto;
+}
+EXPORT_SYMBOL(tcp_proto_cgroup);
 #endif /* CONFIG_INET */
 #endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */
 
@@ -800,7 +816,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
 	preempt_enable();
 }
 
-static struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
+struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
 {
 	return container_of(cgroup_subsys_state(cont,
 				mem_cgroup_subsys_id), struct mem_cgroup,
@@ -4732,14 +4748,34 @@ static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss)
 
 	ret = cgroup_add_files(cont, ss, kmem_cgroup_files,
 			       ARRAY_SIZE(kmem_cgroup_files));
+
+	/*
+	 * Part of this would be better living in a separate allocation
+	 * function, leaving us with just the cgroup tree population work.
+	 * We, however, depend on state such as network's proto_list that
+	 * is only initialized after cgroup creation. I found the less
+	 * cumbersome way to deal with it to defer it all to populate time
+	 */
+	if (!ret)
+		ret = mem_cgroup_sockets_init(cont, ss);
 	return ret;
 };
 
+static void kmem_cgroup_destroy(struct cgroup_subsys *ss,
+				struct cgroup *cont)
+{
+	mem_cgroup_sockets_destroy(cont, ss);
+}
 #else
 static int register_kmem_files(struct cgroup *cont, struct cgroup_subsys *ss)
 {
 	return 0;
 }
+
+static void kmem_cgroup_destroy(struct cgroup_subsys *ss,
+				struct cgroup *cont)
+{
+}
 #endif
 
 static struct cftype mem_cgroup_files[] = {
@@ -5098,6 +5134,8 @@ static void mem_cgroup_destroy(struct cgroup_subsys *ss,
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
 
+	kmem_cgroup_destroy(ss, cont);
+
 	mem_cgroup_put(memcg);
 }
 
diff --git a/net/core/sock.c b/net/core/sock.c
index 6a871b8fdd20..5a6a90620656 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -136,6 +136,46 @@
 #include <net/tcp.h>
 #endif
 
+static DEFINE_RWLOCK(proto_list_lock);
+static LIST_HEAD(proto_list);
+
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+int mem_cgroup_sockets_init(struct cgroup *cgrp, struct cgroup_subsys *ss)
+{
+	struct proto *proto;
+	int ret = 0;
+
+	read_lock(&proto_list_lock);
+	list_for_each_entry(proto, &proto_list, node) {
+		if (proto->init_cgroup) {
+			ret = proto->init_cgroup(cgrp, ss);
+			if (ret)
+				goto out;
+		}
+	}
+
+	read_unlock(&proto_list_lock);
+	return ret;
+out:
+	list_for_each_entry_continue_reverse(proto, &proto_list, node)
+		if (proto->destroy_cgroup)
+			proto->destroy_cgroup(cgrp, ss);
+	read_unlock(&proto_list_lock);
+	return ret;
+}
+
+void mem_cgroup_sockets_destroy(struct cgroup *cgrp, struct cgroup_subsys *ss)
+{
+	struct proto *proto;
+
+	read_lock(&proto_list_lock);
+	list_for_each_entry_reverse(proto, &proto_list, node)
+		if (proto->destroy_cgroup)
+			proto->destroy_cgroup(cgrp, ss);
+	read_unlock(&proto_list_lock);
+}
+#endif
+
 /*
  * Each address family might have different locking rules, so we have
  * one slock key per address family:
@@ -2291,9 +2331,6 @@ void sk_common_release(struct sock *sk)
 }
 EXPORT_SYMBOL(sk_common_release);
 
-static DEFINE_RWLOCK(proto_list_lock);
-static LIST_HEAD(proto_list);
-
 #ifdef CONFIG_PROC_FS
 #define PROTO_INUSE_NR	64	/* should be enough for the first time */
 struct prot_inuse {
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index e9d98e621112..ff75d3bbcd6a 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
 obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
 obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
 obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
+obj-$(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) += tcp_memcontrol.o
 obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
 
 obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f48bf312cfe8..42714cb1fef3 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -73,6 +73,7 @@
 #include <net/xfrm.h>
 #include <net/netdma.h>
 #include <net/secure_seq.h>
+#include <net/tcp_memcontrol.h>
 
 #include <linux/inet.h>
 #include <linux/ipv6.h>
@@ -1917,6 +1918,7 @@ static int tcp_v4_init_sock(struct sock *sk)
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
 
 	local_bh_disable();
+	sock_update_memcg(sk);
 	sk_sockets_allocated_inc(sk);
 	local_bh_enable();
 
@@ -1974,6 +1976,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
 	}
 
 	sk_sockets_allocated_dec(sk);
+	sock_release_memcg(sk);
 }
 EXPORT_SYMBOL(tcp_v4_destroy_sock);
 
@@ -2634,10 +2637,14 @@ struct proto tcp_prot = {
 	.compat_setsockopt	= compat_tcp_setsockopt,
 	.compat_getsockopt	= compat_tcp_getsockopt,
 #endif
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+	.init_cgroup		= tcp_init_cgroup,
+	.destroy_cgroup		= tcp_destroy_cgroup,
+	.proto_cgroup		= tcp_proto_cgroup,
+#endif
 };
 EXPORT_SYMBOL(tcp_prot);
 
-
 static int __net_init tcp_sk_init(struct net *net)
 {
 	return inet_ctl_sock_create(&net->ipv4.tcp_sock,
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
new file mode 100644
index 000000000000..4a68d2c24556
--- /dev/null
+++ b/net/ipv4/tcp_memcontrol.c
@@ -0,0 +1,74 @@
+#include <net/tcp.h>
+#include <net/tcp_memcontrol.h>
+#include <net/sock.h>
+#include <linux/memcontrol.h>
+#include <linux/module.h>
+
+static inline struct tcp_memcontrol *tcp_from_cgproto(struct cg_proto *cg_proto)
+{
+	return container_of(cg_proto, struct tcp_memcontrol, cg_proto);
+}
+
+static void memcg_tcp_enter_memory_pressure(struct sock *sk)
+{
+	if (!sk->sk_cgrp->memory_pressure)
+		*sk->sk_cgrp->memory_pressure = 1;
+}
+EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure);
+
+int tcp_init_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss)
+{
+	/*
+	 * The root cgroup does not use res_counters, but rather,
+	 * rely on the data already collected by the network
+	 * subsystem
+	 */
+	struct res_counter *res_parent = NULL;
+	struct cg_proto *cg_proto, *parent_cg;
+	struct tcp_memcontrol *tcp;
+	struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
+	struct mem_cgroup *parent = parent_mem_cgroup(memcg);
+
+	cg_proto = tcp_prot.proto_cgroup(memcg);
+	if (!cg_proto)
+		return 0;
+
+	tcp = tcp_from_cgproto(cg_proto);
+
+	tcp->tcp_prot_mem[0] = sysctl_tcp_mem[0];
+	tcp->tcp_prot_mem[1] = sysctl_tcp_mem[1];
+	tcp->tcp_prot_mem[2] = sysctl_tcp_mem[2];
+	tcp->tcp_memory_pressure = 0;
+
+	parent_cg = tcp_prot.proto_cgroup(parent);
+	if (parent_cg)
+		res_parent = parent_cg->memory_allocated;
+
+	res_counter_init(&tcp->tcp_memory_allocated, res_parent);
+	percpu_counter_init(&tcp->tcp_sockets_allocated, 0);
+
+	cg_proto->enter_memory_pressure = memcg_tcp_enter_memory_pressure;
+	cg_proto->memory_pressure = &tcp->tcp_memory_pressure;
+	cg_proto->sysctl_mem = tcp->tcp_prot_mem;
+	cg_proto->memory_allocated = &tcp->tcp_memory_allocated;
+	cg_proto->sockets_allocated = &tcp->tcp_sockets_allocated;
+	cg_proto->memcg = memcg;
+
+	return 0;
+}
+EXPORT_SYMBOL(tcp_init_cgroup);
+
+void tcp_destroy_cgroup(struct cgroup *cgrp, struct cgroup_subsys *ss)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_cont(cgrp);
+	struct cg_proto *cg_proto;
+	struct tcp_memcontrol *tcp;
+
+	cg_proto = tcp_prot.proto_cgroup(memcg);
+	if (!cg_proto)
+		return;
+
+	tcp = tcp_from_cgproto(cg_proto);
+	percpu_counter_destroy(&tcp->tcp_sockets_allocated);
+}
+EXPORT_SYMBOL(tcp_destroy_cgroup);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b69c7030aba9..95d3cfb65d39 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -62,6 +62,7 @@
 #include <net/netdma.h>
 #include <net/inet_common.h>
 #include <net/secure_seq.h>
+#include <net/tcp_memcontrol.h>
 
 #include <asm/uaccess.h>
 
@@ -1994,6 +1995,7 @@ static int tcp_v6_init_sock(struct sock *sk)
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
 
 	local_bh_disable();
+	sock_update_memcg(sk);
 	sk_sockets_allocated_inc(sk);
 	local_bh_enable();
 
@@ -2227,6 +2229,9 @@ struct proto tcpv6_prot = {
 	.compat_setsockopt	= compat_tcp_setsockopt,
 	.compat_getsockopt	= compat_tcp_getsockopt,
 #endif
+#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
+	.proto_cgroup		= tcp_proto_cgroup,
+#endif
 };
 
 static const struct inet6_protocol tcpv6_protocol = {
-- 
cgit v1.2.3


From 90b41a1cd44cc4e507b554ae5a36562a1ba9a4e8 Mon Sep 17 00:00:00 2001
From: Hagen Paul Pfeifer <hagen@jauu.net>
Date: Mon, 12 Dec 2011 14:30:00 +0000
Subject: netem: add cell concept to simulate special MAC behavior

This extension can be used to simulate special link layer
characteristics. Simulate because packet data is not modified, only the
calculation base is changed to delay a packet based on the original
packet size and artificial cell information.

packet_overhead can be used to simulate a link layer header compression
scheme (e.g. set packet_overhead to -20) or with a positive
packet_overhead value an additional MAC header can be simulated. It is
also possible to "replace" the 14 byte Ethernet header with something
else.

cell_size and cell_overhead can be used to simulate link layer schemes,
based on cells, like some TDMA schemes. Another application area are MAC
schemes using a link layer fragmentation with a (small) header each.
Cell size is the maximum amount of data bytes within one cell. Cell
overhead is an additional variable to change the per-cell-overhead
(e.g.  5 byte header per fragment).

Example (5 kbit/s, 20 byte per packet overhead, cell-size 100 byte, per
cell overhead 5 byte):

  tc qdisc add dev eth0 root netem rate 5kbit 20 100 5

Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pkt_sched.h |  3 +++
 net/sched/sch_netem.c     | 33 +++++++++++++++++++++++++++++----
 2 files changed, 32 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index 8786ea741f52..8daced32a014 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -502,6 +502,9 @@ struct tc_netem_corrupt {
 
 struct tc_netem_rate {
 	__u32	rate;	/* byte/s */
+	__s32	packet_overhead;
+	__u32	cell_size;
+	__s32	cell_overhead;
 };
 
 enum {
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 3bfd73344f76..1fa2f903d221 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -22,6 +22,7 @@
 #include <linux/skbuff.h>
 #include <linux/vmalloc.h>
 #include <linux/rtnetlink.h>
+#include <linux/reciprocal_div.h>
 
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
@@ -80,6 +81,10 @@ struct netem_sched_data {
 	u32 reorder;
 	u32 corrupt;
 	u32 rate;
+	s32 packet_overhead;
+	u32 cell_size;
+	u32 cell_size_reciprocal;
+	s32 cell_overhead;
 
 	struct crndstate {
 		u32 last;
@@ -299,11 +304,23 @@ static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma,
 	return  x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
 }
 
-static psched_time_t packet_len_2_sched_time(unsigned int len, u32 rate)
+static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q)
 {
-	u64 ticks = (u64)len * NSEC_PER_SEC;
+	u64 ticks;
 
-	do_div(ticks, rate);
+	len += q->packet_overhead;
+
+	if (q->cell_size) {
+		u32 cells = reciprocal_divide(len, q->cell_size_reciprocal);
+
+		if (len > cells * q->cell_size)	/* extra cell needed for remainder */
+			cells++;
+		len = cells * (q->cell_size + q->cell_overhead);
+	}
+
+	ticks = (u64)len * NSEC_PER_SEC;
+
+	do_div(ticks, q->rate);
 	return PSCHED_NS2TICKS(ticks);
 }
 
@@ -384,7 +401,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		if (q->rate) {
 			struct sk_buff_head *list = &q->qdisc->q;
 
-			delay += packet_len_2_sched_time(skb->len, q->rate);
+			delay += packet_len_2_sched_time(skb->len, q);
 
 			if (!skb_queue_empty(list)) {
 				/*
@@ -568,6 +585,11 @@ static void get_rate(struct Qdisc *sch, const struct nlattr *attr)
 	const struct tc_netem_rate *r = nla_data(attr);
 
 	q->rate = r->rate;
+	q->packet_overhead = r->packet_overhead;
+	q->cell_size = r->cell_size;
+	if (q->cell_size)
+		q->cell_size_reciprocal = reciprocal_value(q->cell_size);
+	q->cell_overhead = r->cell_overhead;
 }
 
 static int get_loss_clg(struct Qdisc *sch, const struct nlattr *attr)
@@ -909,6 +931,9 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 	NLA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
 
 	rate.rate = q->rate;
+	rate.packet_overhead = q->packet_overhead;
+	rate.cell_size = q->cell_size;
+	rate.cell_overhead = q->cell_overhead;
 	NLA_PUT(skb, TCA_NETEM_RATE, sizeof(rate), &rate);
 
 	if (dump_loss_model(q, skb) != 0)
-- 
cgit v1.2.3


From 623ed84b1f9553bc962c2aca92f488aa6f27ecd1 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Tue, 13 Dec 2011 04:10:33 +0000
Subject: mlx4_core: initial header-file changes for SRIOV support

These changes will not affect module operation as yet. They
are only to get some structs and enums in place for use by
subsequent patches (making those smaller).

Added here:
* sriov state structs and inlines (mlx4_is_master/slave/mfunc)
* comm-channel and vhcr support structures
* enum values for new FW and comm-channel virtual commands
  (i.e., commands, passed via the comm channel to the PF-driver).
* prototypes for many command wrapper functions (used by the
  PF context for processing FW commands passed to it by the VFs).
* struct mlx4_eqe is moved from eq.c to mlx4.h (it will be used
  by other mlx4_core source files).

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_port.h |   6 -
 drivers/net/ethernet/mellanox/mlx4/eq.c      |  39 ---
 drivers/net/ethernet/mellanox/mlx4/mlx4.h    | 456 ++++++++++++++++++++++++++-
 include/linux/mlx4/cmd.h                     |  29 +-
 include/linux/mlx4/device.h                  |  42 ++-
 5 files changed, 523 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.h b/drivers/net/ethernet/mellanox/mlx4/en_port.h
index 19eb244f5165..c1bb834414b5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_port.h
+++ b/drivers/net/ethernet/mellanox/mlx4/en_port.h
@@ -39,12 +39,6 @@
 #define SET_PORT_PROMISC_SHIFT	31
 #define SET_PORT_MC_PROMISC_SHIFT	30
 
-enum {
-	MLX4_CMD_SET_VLAN_FLTR  = 0x47,
-	MLX4_CMD_SET_MCAST_FLTR = 0x48,
-	MLX4_CMD_DUMP_ETH_STATS = 0x49,
-};
-
 enum {
 	MCAST_DIRECT_ONLY       = 0,
 	MCAST_DIRECT            = 1,
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index 24ee96775996..ad9e3770b050 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -102,45 +102,6 @@ struct mlx4_eq_context {
 			       (1ull << MLX4_EVENT_TYPE_SRQ_LIMIT)	    | \
 			       (1ull << MLX4_EVENT_TYPE_CMD))
 
-struct mlx4_eqe {
-	u8			reserved1;
-	u8			type;
-	u8			reserved2;
-	u8			subtype;
-	union {
-		u32		raw[6];
-		struct {
-			__be32	cqn;
-		} __packed comp;
-		struct {
-			u16	reserved1;
-			__be16	token;
-			u32	reserved2;
-			u8	reserved3[3];
-			u8	status;
-			__be64	out_param;
-		} __packed cmd;
-		struct {
-			__be32	qpn;
-		} __packed qp;
-		struct {
-			__be32	srqn;
-		} __packed srq;
-		struct {
-			__be32	cqn;
-			u32	reserved1;
-			u8	reserved2[3];
-			u8	syndrome;
-		} __packed cq_err;
-		struct {
-			u32	reserved1[2];
-			__be32	port;
-		} __packed port_change;
-	}			event;
-	u8			reserved3[3];
-	u8			owner;
-} __packed;
-
 static void eq_set_ci(struct mlx4_eq *eq, int req_not)
 {
 	__raw_writel((__force u32) cpu_to_be32((eq->cons_index & 0xffffff) |
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 5dfa68ffc11c..69177614666f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -46,6 +46,7 @@
 #include <linux/mlx4/device.h>
 #include <linux/mlx4/driver.h>
 #include <linux/mlx4/doorbell.h>
+#include <linux/mlx4/cmd.h>
 
 #define DRV_NAME	"mlx4_core"
 #define DRV_VERSION	"1.0"
@@ -54,7 +55,9 @@
 enum {
 	MLX4_HCR_BASE		= 0x80680,
 	MLX4_HCR_SIZE		= 0x0001c,
-	MLX4_CLR_INT_SIZE	= 0x00008
+	MLX4_CLR_INT_SIZE	= 0x00008,
+	MLX4_SLAVE_COMM_BASE	= 0x0,
+	MLX4_COMM_PAGESIZE	= 0x1000
 };
 
 enum {
@@ -80,6 +83,94 @@ enum {
 	MLX4_NUM_CMPTS		= MLX4_CMPT_NUM_TYPE << MLX4_CMPT_SHIFT
 };
 
+enum mlx4_mr_state {
+	MLX4_MR_DISABLED = 0,
+	MLX4_MR_EN_HW,
+	MLX4_MR_EN_SW
+};
+
+#define MLX4_COMM_TIME		10000
+enum {
+	MLX4_COMM_CMD_RESET,
+	MLX4_COMM_CMD_VHCR0,
+	MLX4_COMM_CMD_VHCR1,
+	MLX4_COMM_CMD_VHCR2,
+	MLX4_COMM_CMD_VHCR_EN,
+	MLX4_COMM_CMD_VHCR_POST,
+	MLX4_COMM_CMD_FLR = 254
+};
+
+/*The flag indicates that the slave should delay the RESET cmd*/
+#define MLX4_DELAY_RESET_SLAVE 0xbbbbbbb
+/*indicates how many retries will be done if we are in the middle of FLR*/
+#define NUM_OF_RESET_RETRIES	10
+#define SLEEP_TIME_IN_RESET	(2 * 1000)
+enum mlx4_resource {
+	RES_QP,
+	RES_CQ,
+	RES_SRQ,
+	RES_XRCD,
+	RES_MPT,
+	RES_MTT,
+	RES_MAC,
+	RES_VLAN,
+	RES_EQ,
+	RES_COUNTER,
+	MLX4_NUM_OF_RESOURCE_TYPE
+};
+
+enum mlx4_alloc_mode {
+	RES_OP_RESERVE,
+	RES_OP_RESERVE_AND_MAP,
+	RES_OP_MAP_ICM,
+};
+
+
+/*
+ *Virtual HCR structures.
+ * mlx4_vhcr is the sw representation, in machine endianess
+ *
+ * mlx4_vhcr_cmd is the formalized structure, the one that is passed
+ * to FW to go through communication channel.
+ * It is big endian, and has the same structure as the physical HCR
+ * used by command interface
+ */
+struct mlx4_vhcr {
+	u64	in_param;
+	u64	out_param;
+	u32	in_modifier;
+	u32	errno;
+	u16	op;
+	u16	token;
+	u8	op_modifier;
+	u8	e_bit;
+};
+
+struct mlx4_vhcr_cmd {
+	__be64 in_param;
+	__be32 in_modifier;
+	__be64 out_param;
+	__be16 token;
+	u16 reserved;
+	u8 status;
+	u8 flags;
+	__be16 opcode;
+};
+
+struct mlx4_cmd_info {
+	u16 opcode;
+	bool has_inbox;
+	bool has_outbox;
+	bool out_is_imm;
+	bool encode_slave_id;
+	int (*verify)(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr,
+		      struct mlx4_cmd_mailbox *inbox);
+	int (*wrapper)(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr,
+		       struct mlx4_cmd_mailbox *inbox,
+		       struct mlx4_cmd_mailbox *outbox,
+		       struct mlx4_cmd_info *cmd);
+};
+
 #ifdef CONFIG_MLX4_DEBUG
 extern int mlx4_debug_level;
 #else /* CONFIG_MLX4_DEBUG */
@@ -99,6 +190,9 @@ do {									\
 #define mlx4_warn(mdev, format, arg...) \
 	dev_warn(&mdev->pdev->dev, format, ##arg)
 
+#define MLX4_MAX_NUM_SLAVES	(MLX4_MAX_NUM_PF + MLX4_MAX_NUM_VF)
+#define ALL_SLAVES 0xff
+
 struct mlx4_bitmap {
 	u32			last;
 	u32			top;
@@ -130,6 +224,62 @@ struct mlx4_icm_table {
 	struct mlx4_icm	      **icm;
 };
 
+struct mlx4_eqe {
+	u8			reserved1;
+	u8			type;
+	u8			reserved2;
+	u8			subtype;
+	union {
+		u32		raw[6];
+		struct {
+			__be32	cqn;
+		} __packed comp;
+		struct {
+			u16	reserved1;
+			__be16	token;
+			u32	reserved2;
+			u8	reserved3[3];
+			u8	status;
+			__be64	out_param;
+		} __packed cmd;
+		struct {
+			__be32	qpn;
+		} __packed qp;
+		struct {
+			__be32	srqn;
+		} __packed srq;
+		struct {
+			__be32	cqn;
+			u32	reserved1;
+			u8	reserved2[3];
+			u8	syndrome;
+		} __packed cq_err;
+		struct {
+			u32	reserved1[2];
+			__be32	port;
+		} __packed port_change;
+		struct {
+			#define COMM_CHANNEL_BIT_ARRAY_SIZE	4
+			u32 reserved;
+			u32 bit_vec[COMM_CHANNEL_BIT_ARRAY_SIZE];
+		} __packed comm_channel_arm;
+		struct {
+			u8	port;
+			u8	reserved[3];
+			__be64	mac;
+		} __packed mac_update;
+		struct {
+			u8	port;
+		} __packed sw_event;
+		struct {
+			__be32	slave_id;
+		} __packed flr_event;
+	}			event;
+	u8			slave_id;
+	u8			reserved3[2];
+	u8			owner;
+} __packed;
+
 struct mlx4_eq {
 	struct mlx4_dev	       *dev;
 	void __iomem	       *doorbell;
@@ -142,6 +292,18 @@ struct mlx4_eq {
 	struct mlx4_mtt		mtt;
 };
 
+struct mlx4_slave_eqe {
+	u8 type;
+	u8 port;
+	u32 param;
+};
+
+struct mlx4_slave_event_eq_info {
+	u32 eqn;
+	u16 token;
+	u64 event_type;
+};
+
 struct mlx4_profile {
 	int			num_qp;
 	int			rdmarc_per_qp;
@@ -155,17 +317,30 @@ struct mlx4_profile {
 struct mlx4_fw {
 	u64			clr_int_base;
 	u64			catas_offset;
+	u64			comm_base;
 	struct mlx4_icm	       *fw_icm;
 	struct mlx4_icm	       *aux_icm;
 	u32			catas_size;
 	u16			fw_pages;
 	u8			clr_int_bar;
 	u8			catas_bar;
+	u8			comm_bar;
+};
+
+struct mlx4_comm {
+	u32			slave_write;
+	u32			slave_read;
 };
 
 #define MGM_QPN_MASK       0x00FFFFFF
 #define MGM_BLCK_LB_BIT    30
 
+#define VLAN_FLTR_SIZE	128
+
+struct mlx4_vlan_fltr {
+	__be32 entry[VLAN_FLTR_SIZE];
+};
+
 struct mlx4_promisc_qp {
 	struct list_head list;
 	u32 qpn;
@@ -184,12 +359,88 @@ struct mlx4_mgm {
 	u8			gid[16];
 	__be32			qp[MLX4_QP_PER_MGM];
 };
+
+struct mlx4_slave_state {
+	u8 comm_toggle;
+	u8 last_cmd;
+	u8 init_port_mask;
+	bool active;
+	u8 function;
+	dma_addr_t vhcr_dma;
+	u16 mtu[MLX4_MAX_PORTS + 1];
+	__be32 ib_cap_mask[MLX4_MAX_PORTS + 1];
+	struct mlx4_slave_eqe eq[MLX4_MFUNC_MAX_EQES];
+	struct list_head mcast_filters[MLX4_MAX_PORTS + 1];
+	struct mlx4_vlan_fltr *vlan_filter[MLX4_MAX_PORTS + 1];
+	struct mlx4_slave_event_eq_info event_eq;
+	u16 eq_pi;
+	u16 eq_ci;
+	spinlock_t lock;
+	/*initialized via the kzalloc*/
+	u8 is_slave_going_down;
+	u32 cookie;
+};
+
+struct slave_list {
+	struct mutex mutex;
+	struct list_head res_list[MLX4_NUM_OF_RESOURCE_TYPE];
+};
+
+struct mlx4_resource_tracker {
+	spinlock_t lock;
+	/* tree for each resources */
+	struct radix_tree_root res_tree[MLX4_NUM_OF_RESOURCE_TYPE];
+	/* num_of_slave's lists, one per slave */
+	struct slave_list *slave_list;
+};
+
+#define SLAVE_EVENT_EQ_SIZE	128
+struct mlx4_slave_event_eq {
+	u32 eqn;
+	u32 cons;
+	u32 prod;
+	struct mlx4_eqe event_eqe[SLAVE_EVENT_EQ_SIZE];
+};
+
+struct mlx4_master_qp0_state {
+	int proxy_qp0_active;
+	int qp0_active;
+	int port_active;
+};
+
+struct mlx4_mfunc_master_ctx {
+	struct mlx4_slave_state *slave_state;
+	struct mlx4_master_qp0_state qp0_state[MLX4_MAX_PORTS + 1];
+	int			init_port_ref[MLX4_MAX_PORTS + 1];
+	u16			max_mtu[MLX4_MAX_PORTS + 1];
+	int			disable_mcast_ref[MLX4_MAX_PORTS + 1];
+	struct mlx4_resource_tracker res_tracker;
+	struct workqueue_struct *comm_wq;
+	struct work_struct	comm_work;
+	struct work_struct	slave_event_work;
+	struct work_struct	slave_flr_event_work;
+	spinlock_t		slave_state_lock;
+	u32			comm_arm_bit_vector[4];
+	struct mlx4_eqe		cmd_eqe;
+	struct mlx4_slave_event_eq slave_eq;
+	struct mutex		gen_eqe_mutex[MLX4_MFUNC_MAX];
+};
+
+struct mlx4_mfunc {
+	struct mlx4_comm __iomem       *comm;
+	struct mlx4_vhcr_cmd	       *vhcr;
+	dma_addr_t			vhcr_dma;
+
+	struct mlx4_mfunc_master_ctx	master;
+};
+
 struct mlx4_cmd {
 	struct pci_pool	       *pool;
 	void __iomem	       *hcr;
 	struct mutex		hcr_mutex;
 	struct semaphore	poll_sem;
 	struct semaphore	event_sem;
+	struct semaphore	slave_sem;
 	int			max_cmds;
 	spinlock_t		context_lock;
 	int			free_head;
@@ -197,6 +448,7 @@ struct mlx4_cmd {
 	u16			token_mask;
 	u8			use_events;
 	u8			toggle;
+	u8			comm_toggle;
 };
 
 struct mlx4_uar_table {
@@ -333,6 +585,7 @@ struct mlx4_priv {
 
 	struct mlx4_fw		fw;
 	struct mlx4_cmd		cmd;
+	struct mlx4_mfunc	mfunc;
 
 	struct mlx4_bitmap	pd_bitmap;
 	struct mlx4_bitmap	xrcd_bitmap;
@@ -404,6 +657,42 @@ void mlx4_cleanup_qp_table(struct mlx4_dev *dev);
 void mlx4_cleanup_srq_table(struct mlx4_dev *dev);
 void mlx4_cleanup_mcg_table(struct mlx4_dev *dev);
 
+int mlx4_WRITE_MTT_wrapper(struct mlx4_dev *dev, int slave,
+			   struct mlx4_vhcr *vhcr,
+			   struct mlx4_cmd_mailbox *inbox,
+			   struct mlx4_cmd_mailbox *outbox,
+			   struct mlx4_cmd_info *cmd);
+int mlx4_SYNC_TPT_wrapper(struct mlx4_dev *dev, int slave,
+			   struct mlx4_vhcr *vhcr,
+			   struct mlx4_cmd_mailbox *inbox,
+			   struct mlx4_cmd_mailbox *outbox,
+			   struct mlx4_cmd_info *cmd);
+int mlx4_SW2HW_MPT_wrapper(struct mlx4_dev *dev, int slave,
+			   struct mlx4_vhcr *vhcr,
+			   struct mlx4_cmd_mailbox *inbox,
+			   struct mlx4_cmd_mailbox *outbox,
+			   struct mlx4_cmd_info *cmd);
+int mlx4_HW2SW_MPT_wrapper(struct mlx4_dev *dev, int slave,
+			   struct mlx4_vhcr *vhcr,
+			   struct mlx4_cmd_mailbox *inbox,
+			   struct mlx4_cmd_mailbox *outbox,
+			   struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_MPT_wrapper(struct mlx4_dev *dev, int slave,
+			   struct mlx4_vhcr *vhcr,
+			   struct mlx4_cmd_mailbox *inbox,
+			   struct mlx4_cmd_mailbox *outbox,
+			   struct mlx4_cmd_info *cmd);
+int mlx4_SW2HW_EQ_wrapper(struct mlx4_dev *dev, int slave,
+			  struct mlx4_vhcr *vhcr,
+			  struct mlx4_cmd_mailbox *inbox,
+			  struct mlx4_cmd_mailbox *outbox,
+			  struct mlx4_cmd_info *cmd);
+int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave,
+		     struct mlx4_vhcr *vhcr,
+		     struct mlx4_cmd_mailbox *inbox,
+		     struct mlx4_cmd_mailbox *outbox,
+		     struct mlx4_cmd_info *cmd);
+
 void mlx4_start_catas_poll(struct mlx4_dev *dev);
 void mlx4_stop_catas_poll(struct mlx4_dev *dev);
 void mlx4_catas_init(void);
@@ -419,6 +708,101 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
 		      struct mlx4_profile *request,
 		      struct mlx4_dev_cap *dev_cap,
 		      struct mlx4_init_hca_param *init_hca);
+void mlx4_master_comm_channel(struct work_struct *work);
+void mlx4_gen_slave_eqe(struct work_struct *work);
+void mlx4_master_handle_slave_flr(struct work_struct *work);
+
+int mlx4_ALLOC_RES_wrapper(struct mlx4_dev *dev, int slave,
+			   struct mlx4_vhcr *vhcr,
+			   struct mlx4_cmd_mailbox *inbox,
+			   struct mlx4_cmd_mailbox *outbox,
+			   struct mlx4_cmd_info *cmd);
+int mlx4_FREE_RES_wrapper(struct mlx4_dev *dev, int slave,
+			  struct mlx4_vhcr *vhcr,
+			  struct mlx4_cmd_mailbox *inbox,
+			  struct mlx4_cmd_mailbox *outbox,
+			  struct mlx4_cmd_info *cmd);
+int mlx4_MAP_EQ_wrapper(struct mlx4_dev *dev, int slave,
+			struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox,
+			struct mlx4_cmd_mailbox *outbox,
+			struct mlx4_cmd_info *cmd);
+int mlx4_COMM_INT_wrapper(struct mlx4_dev *dev, int slave,
+			  struct mlx4_vhcr *vhcr,
+			  struct mlx4_cmd_mailbox *inbox,
+			  struct mlx4_cmd_mailbox *outbox,
+			  struct mlx4_cmd_info *cmd);
+int mlx4_HW2SW_EQ_wrapper(struct mlx4_dev *dev, int slave,
+			    struct mlx4_vhcr *vhcr,
+			    struct mlx4_cmd_mailbox *inbox,
+			    struct mlx4_cmd_mailbox *outbox,
+			  struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_EQ_wrapper(struct mlx4_dev *dev, int slave,
+			  struct mlx4_vhcr *vhcr,
+			  struct mlx4_cmd_mailbox *inbox,
+			  struct mlx4_cmd_mailbox *outbox,
+			  struct mlx4_cmd_info *cmd);
+int mlx4_SW2HW_CQ_wrapper(struct mlx4_dev *dev, int slave,
+			  struct mlx4_vhcr *vhcr,
+			  struct mlx4_cmd_mailbox *inbox,
+			  struct mlx4_cmd_mailbox *outbox,
+			  struct mlx4_cmd_info *cmd);
+int mlx4_HW2SW_CQ_wrapper(struct mlx4_dev *dev, int slave,
+			  struct mlx4_vhcr *vhcr,
+			  struct mlx4_cmd_mailbox *inbox,
+			  struct mlx4_cmd_mailbox *outbox,
+			  struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_CQ_wrapper(struct mlx4_dev *dev, int slave,
+			  struct mlx4_vhcr *vhcr,
+			  struct mlx4_cmd_mailbox *inbox,
+			  struct mlx4_cmd_mailbox *outbox,
+			  struct mlx4_cmd_info *cmd);
+int mlx4_MODIFY_CQ_wrapper(struct mlx4_dev *dev, int slave,
+			  struct mlx4_vhcr *vhcr,
+			  struct mlx4_cmd_mailbox *inbox,
+			  struct mlx4_cmd_mailbox *outbox,
+			   struct mlx4_cmd_info *cmd);
+int mlx4_SW2HW_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+			   struct mlx4_vhcr *vhcr,
+			   struct mlx4_cmd_mailbox *inbox,
+			   struct mlx4_cmd_mailbox *outbox,
+			   struct mlx4_cmd_info *cmd);
+int mlx4_HW2SW_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+			   struct mlx4_vhcr *vhcr,
+			   struct mlx4_cmd_mailbox *inbox,
+			   struct mlx4_cmd_mailbox *outbox,
+			   struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+			   struct mlx4_vhcr *vhcr,
+			   struct mlx4_cmd_mailbox *inbox,
+			   struct mlx4_cmd_mailbox *outbox,
+			   struct mlx4_cmd_info *cmd);
+int mlx4_ARM_SRQ_wrapper(struct mlx4_dev *dev, int slave,
+			 struct mlx4_vhcr *vhcr,
+			 struct mlx4_cmd_mailbox *inbox,
+			 struct mlx4_cmd_mailbox *outbox,
+			 struct mlx4_cmd_info *cmd);
+int mlx4_GEN_QP_wrapper(struct mlx4_dev *dev, int slave,
+			struct mlx4_vhcr *vhcr,
+			struct mlx4_cmd_mailbox *inbox,
+			struct mlx4_cmd_mailbox *outbox,
+			struct mlx4_cmd_info *cmd);
+int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
+			     struct mlx4_vhcr *vhcr,
+			     struct mlx4_cmd_mailbox *inbox,
+			     struct mlx4_cmd_mailbox *outbox,
+			     struct mlx4_cmd_info *cmd);
+int mlx4_INIT2RTR_QP_wrapper(struct mlx4_dev *dev, int slave,
+			     struct mlx4_vhcr *vhcr,
+			     struct mlx4_cmd_mailbox *inbox,
+			     struct mlx4_cmd_mailbox *outbox,
+			     struct mlx4_cmd_info *cmd);
+int mlx4_2RST_QP_wrapper(struct mlx4_dev *dev, int slave,
+			 struct mlx4_vhcr *vhcr,
+			 struct mlx4_cmd_mailbox *inbox,
+			 struct mlx4_cmd_mailbox *outbox,
+			 struct mlx4_cmd_info *cmd);
+
+int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe);
 
 int mlx4_cmd_init(struct mlx4_dev *dev);
 void mlx4_cmd_cleanup(struct mlx4_dev *dev);
@@ -452,12 +836,82 @@ void mlx4_init_mac_table(struct mlx4_dev *dev, struct mlx4_mac_table *table);
 void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table);
 
 int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port);
+/* resource tracker functions*/
+int mlx4_get_slave_from_resource_id(struct mlx4_dev *dev,
+				    enum mlx4_resource resource_type,
+				    int resource_id, int *slave);
+void mlx4_delete_all_resources_for_slave(struct mlx4_dev *dev, int slave_id);
+int mlx4_init_resource_tracker(struct mlx4_dev *dev);
+
+void mlx4_free_resource_tracker(struct mlx4_dev *dev);
+
+int mlx4_SET_PORT_wrapper(struct mlx4_dev *dev, int slave,
+			  struct mlx4_vhcr *vhcr,
+			  struct mlx4_cmd_mailbox *inbox,
+			  struct mlx4_cmd_mailbox *outbox,
+			  struct mlx4_cmd_info *cmd);
+int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave,
+			   struct mlx4_vhcr *vhcr,
+			   struct mlx4_cmd_mailbox *inbox,
+			   struct mlx4_cmd_mailbox *outbox,
+			   struct mlx4_cmd_info *cmd);
+int mlx4_CLOSE_PORT_wrapper(struct mlx4_dev *dev, int slave,
+			    struct mlx4_vhcr *vhcr,
+			    struct mlx4_cmd_mailbox *inbox,
+			    struct mlx4_cmd_mailbox *outbox,
+			    struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_PORT_wrapper(struct mlx4_dev *dev, int slave,
+			    struct mlx4_vhcr *vhcr,
+			    struct mlx4_cmd_mailbox *inbox,
+			    struct mlx4_cmd_mailbox *outbox,
+			    struct mlx4_cmd_info *cmd);
 int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps);
 int mlx4_check_ext_port_caps(struct mlx4_dev *dev, u8 port);
 
+
+int mlx4_QP_ATTACH_wrapper(struct mlx4_dev *dev, int slave,
+			   struct mlx4_vhcr *vhcr,
+			   struct mlx4_cmd_mailbox *inbox,
+			   struct mlx4_cmd_mailbox *outbox,
+			   struct mlx4_cmd_info *cmd);
+
+int mlx4_PROMISC_wrapper(struct mlx4_dev *dev, int slave,
+			 struct mlx4_vhcr *vhcr,
+			 struct mlx4_cmd_mailbox *inbox,
+			 struct mlx4_cmd_mailbox *outbox,
+			 struct mlx4_cmd_info *cmd);
 int mlx4_qp_detach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
 			  enum mlx4_protocol prot, enum mlx4_steer_type steer);
 int mlx4_qp_attach_common(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
 			  int block_mcast_loopback, enum mlx4_protocol prot,
 			  enum mlx4_steer_type steer);
+int mlx4_SET_MCAST_FLTR_wrapper(struct mlx4_dev *dev, int slave,
+				struct mlx4_vhcr *vhcr,
+				struct mlx4_cmd_mailbox *inbox,
+				struct mlx4_cmd_mailbox *outbox,
+				struct mlx4_cmd_info *cmd);
+int mlx4_SET_VLAN_FLTR_wrapper(struct mlx4_dev *dev, int slave,
+			       struct mlx4_vhcr *vhcr,
+			       struct mlx4_cmd_mailbox *inbox,
+			       struct mlx4_cmd_mailbox *outbox,
+			       struct mlx4_cmd_info *cmd);
+int mlx4_common_set_vlan_fltr(struct mlx4_dev *dev, int function,
+				     int port, void *buf);
+int mlx4_common_dump_eth_stats(struct mlx4_dev *dev, int slave, u32 in_mod,
+				struct mlx4_cmd_mailbox *outbox);
+int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave,
+				   struct mlx4_vhcr *vhcr,
+				   struct mlx4_cmd_mailbox *inbox,
+				   struct mlx4_cmd_mailbox *outbox,
+				struct mlx4_cmd_info *cmd);
+int mlx4_PKEY_TABLE_wrapper(struct mlx4_dev *dev, int slave,
+			    struct mlx4_vhcr *vhcr,
+			    struct mlx4_cmd_mailbox *inbox,
+			    struct mlx4_cmd_mailbox *outbox,
+			    struct mlx4_cmd_info *cmd);
+int mlx4_QUERY_IF_STAT_wrapper(struct mlx4_dev *dev, int slave,
+			       struct mlx4_vhcr *vhcr,
+			       struct mlx4_cmd_mailbox *inbox,
+			       struct mlx4_cmd_mailbox *outbox,
+			       struct mlx4_cmd_info *cmd);
 #endif /* MLX4_H */
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index b56e4587208d..e8e92814c8a0 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -59,12 +59,15 @@ enum {
 	MLX4_CMD_HW_HEALTH_CHECK = 0x50,
 	MLX4_CMD_SET_PORT	 = 0xc,
 	MLX4_CMD_SET_NODE	 = 0x5a,
+	MLX4_CMD_QUERY_FUNC	 = 0x56,
 	MLX4_CMD_ACCESS_DDR	 = 0x2e,
 	MLX4_CMD_MAP_ICM	 = 0xffa,
 	MLX4_CMD_UNMAP_ICM	 = 0xff9,
 	MLX4_CMD_MAP_ICM_AUX	 = 0xffc,
 	MLX4_CMD_UNMAP_ICM_AUX	 = 0xffb,
 	MLX4_CMD_SET_ICM_SIZE	 = 0xffd,
+	/*master notify fw on finish for slave's flr*/
+	MLX4_CMD_INFORM_FLR_DONE = 0x5b,
 
 	/* TPT commands */
 	MLX4_CMD_SW2HW_MPT	 = 0xd,
@@ -119,6 +122,26 @@ enum {
 	/* miscellaneous commands */
 	MLX4_CMD_DIAG_RPRT	 = 0x30,
 	MLX4_CMD_NOP		 = 0x31,
+	MLX4_CMD_ACCESS_MEM	 = 0x2e,
+	MLX4_CMD_SET_VEP	 = 0x52,
+
+	/* Ethernet specific commands */
+	MLX4_CMD_SET_VLAN_FLTR	 = 0x47,
+	MLX4_CMD_SET_MCAST_FLTR	 = 0x48,
+	MLX4_CMD_DUMP_ETH_STATS	 = 0x49,
+
+	/* Communication channel commands */
+	MLX4_CMD_ARM_COMM_CHANNEL = 0x57,
+	MLX4_CMD_GEN_EQE	 = 0x58,
+
+	/* virtual commands */
+	MLX4_CMD_ALLOC_RES	 = 0xf00,
+	MLX4_CMD_FREE_RES	 = 0xf01,
+	MLX4_CMD_MCAST_ATTACH	 = 0xf05,
+	MLX4_CMD_UCAST_ATTACH	 = 0xf06,
+	MLX4_CMD_PROMISC         = 0xf08,
+	MLX4_CMD_QUERY_FUNC_CAP  = 0xf0a,
+	MLX4_CMD_QP_ATTACH	 = 0xf0b,
 
 	/* debug commands */
 	MLX4_CMD_QUERY_DEBUG_MSG = 0x2a,
@@ -126,6 +149,7 @@ enum {
 
 	/* statistics commands */
 	MLX4_CMD_QUERY_IF_STAT	 = 0X54,
+	MLX4_CMD_SET_IF_STAT	 = 0X55,
 };
 
 enum {
@@ -135,7 +159,8 @@ enum {
 };
 
 enum {
-	MLX4_MAILBOX_SIZE	=  4096
+	MLX4_MAILBOX_SIZE	= 4096,
+	MLX4_ACCESS_MEM_ALIGN	= 256,
 };
 
 enum {
@@ -192,4 +217,6 @@ static inline int mlx4_cmd_imm(struct mlx4_dev *dev, u64 in_param, u64 *out_para
 struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev);
 void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox);
 
+u32 mlx4_comm_get_version(void);
+
 #endif /* MLX4_CMD_H */
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index ca2c39771c38..b9466af2348f 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -47,6 +47,9 @@
 enum {
 	MLX4_FLAG_MSI_X		= 1 << 0,
 	MLX4_FLAG_OLD_PORT_CMDS	= 1 << 1,
+	MLX4_FLAG_MASTER	= 1 << 2,
+	MLX4_FLAG_SLAVE		= 1 << 3,
+	MLX4_FLAG_SRIOV		= 1 << 4,
 };
 
 enum {
@@ -57,6 +60,15 @@ enum {
 	MLX4_BOARD_ID_LEN = 64
 };
 
+enum {
+	MLX4_MAX_NUM_PF		= 16,
+	MLX4_MAX_NUM_VF		= 64,
+	MLX4_MFUNC_MAX		= 80,
+	MLX4_MFUNC_EQ_NUM	= 4,
+	MLX4_MFUNC_MAX_EQES     = 8,
+	MLX4_MFUNC_EQE_MASK     = (MLX4_MFUNC_MAX_EQES - 1)
+};
+
 enum {
 	MLX4_DEV_CAP_FLAG_RC		= 1LL <<  0,
 	MLX4_DEV_CAP_FLAG_UC		= 1LL <<  1,
@@ -117,7 +129,11 @@ enum mlx4_event {
 	MLX4_EVENT_TYPE_PORT_CHANGE	   = 0x09,
 	MLX4_EVENT_TYPE_EQ_OVERFLOW	   = 0x0f,
 	MLX4_EVENT_TYPE_ECC_DETECT	   = 0x0e,
-	MLX4_EVENT_TYPE_CMD		   = 0x0a
+	MLX4_EVENT_TYPE_CMD		   = 0x0a,
+	MLX4_EVENT_TYPE_VEP_UPDATE	   = 0x19,
+	MLX4_EVENT_TYPE_COMM_CHANNEL	   = 0x18,
+	MLX4_EVENT_TYPE_FLR_EVENT	   = 0x1c,
+	MLX4_EVENT_TYPE_NONE		   = 0xff,
 };
 
 enum {
@@ -184,6 +200,7 @@ enum mlx4_qp_region {
 };
 
 enum mlx4_port_type {
+	MLX4_PORT_TYPE_NONE	= 0,
 	MLX4_PORT_TYPE_IB	= 1,
 	MLX4_PORT_TYPE_ETH	= 2,
 	MLX4_PORT_TYPE_AUTO	= 3
@@ -216,6 +233,7 @@ static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor)
 
 struct mlx4_caps {
 	u64			fw_ver;
+	u32			function;
 	int			num_ports;
 	int			vl_cap[MLX4_MAX_PORTS + 1];
 	int			ib_mtu_cap[MLX4_MAX_PORTS + 1];
@@ -466,6 +484,7 @@ struct mlx4_counter {
 struct mlx4_dev {
 	struct pci_dev	       *pdev;
 	unsigned long		flags;
+	unsigned long		num_slaves;
 	struct mlx4_caps	caps;
 	struct radix_tree_root	qp_table_tree;
 	u8			rev_id;
@@ -494,8 +513,27 @@ struct mlx4_init_port_param {
 #define mlx4_foreach_ib_transport_port(port, dev)			\
 	for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++)	\
 		if (((dev)->caps.port_mask & 1 << ((port) - 1)) ||	\
-		    ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
+		     ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
+
+static inline int mlx4_is_master(struct mlx4_dev *dev)
+{
+	return dev->flags & MLX4_FLAG_MASTER;
+}
+
+static inline int mlx4_is_qp_reserved(struct mlx4_dev *dev, u32 qpn)
+{
+	return (qpn < dev->caps.sqp_start + 8);
+}
 
+static inline int mlx4_is_mfunc(struct mlx4_dev *dev)
+{
+	return dev->flags & (MLX4_FLAG_SLAVE | MLX4_FLAG_MASTER);
+}
+
+static inline int mlx4_is_slave(struct mlx4_dev *dev)
+{
+	return dev->flags & MLX4_FLAG_SLAVE;
+}
 
 int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
 		   struct mlx4_buf *buf);
-- 
cgit v1.2.3


From 65dab25deb8da7dba4b6dd0145a9143be7f8369f Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Tue, 13 Dec 2011 04:10:41 +0000
Subject: mlx4: Extanding port_mask functionality

Port mask now has additional state.
Port can be set as "none". In this case neither the mlx4_en or mlx4_ib
drivers take ownership of the port.
In multifunction mode there is an option to set the vfs as single ported devices.
(in single function mode, both physical ports belong to same function)

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx4/main.c         |  2 +-
 drivers/net/ethernet/mellanox/mlx4/main.c |  4 +---
 include/linux/mlx4/device.h               | 13 ++++++-------
 3 files changed, 8 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 77f3dbc0aaa1..6128b2940c49 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -177,7 +177,7 @@ mlx4_ib_port_link_layer(struct ib_device *device, u8 port_num)
 {
 	struct mlx4_dev *dev = to_mdev(device)->dev;
 
-	return dev->caps.port_mask & (1 << (port_num - 1)) ?
+	return dev->caps.port_mask[port_num] == MLX4_PORT_TYPE_IB ?
 		IB_LINK_LAYER_INFINIBAND : IB_LINK_LAYER_ETHERNET;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 94bbc85a532d..64d03f8b23ab 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -140,10 +140,8 @@ static void mlx4_set_port_mask(struct mlx4_dev *dev)
 {
 	int i;
 
-	dev->caps.port_mask = 0;
 	for (i = 1; i <= dev->caps.num_ports; ++i)
-		if (dev->caps.port_type[i] == MLX4_PORT_TYPE_IB)
-			dev->caps.port_mask |= 1 << (i - 1);
+		dev->caps.port_mask[i] = dev->caps.port_type[i];
 }
 
 static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index b9466af2348f..3333018d2913 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -302,7 +302,7 @@ struct mlx4_caps {
 	int                     log_num_prios;
 	enum mlx4_port_type	port_type[MLX4_MAX_PORTS + 1];
 	u8			supported_type[MLX4_MAX_PORTS + 1];
-	u32			port_mask;
+	u32			port_mask[MLX4_MAX_PORTS + 1];
 	enum mlx4_port_type	possible_type[MLX4_MAX_PORTS + 1];
 	u32			max_counters;
 	u8			ext_port_cap[MLX4_MAX_PORTS + 1];
@@ -507,13 +507,12 @@ struct mlx4_init_port_param {
 
 #define mlx4_foreach_port(port, dev, type)				\
 	for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++)	\
-		if (((type) == MLX4_PORT_TYPE_IB ? (dev)->caps.port_mask : \
-		     ~(dev)->caps.port_mask) & 1 << ((port) - 1))
+		if ((type) == (dev)->caps.port_mask[(port)])
 
-#define mlx4_foreach_ib_transport_port(port, dev)			\
-	for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++)	\
-		if (((dev)->caps.port_mask & 1 << ((port) - 1)) ||	\
-		     ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
+#define mlx4_foreach_ib_transport_port(port, dev)                         \
+	for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++)	  \
+		if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \
+			((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE))
 
 static inline int mlx4_is_master(struct mlx4_dev *dev)
 {
-- 
cgit v1.2.3


From f9baff509f8a05a79626defdbdf4f4aa4efd373b Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Tue, 13 Dec 2011 04:10:51 +0000
Subject: mlx4_core: Add "native" argument to mlx4_cmd and its callers (where
 needed)

For SRIOV, some Hypervisor commands can be executed directly (native = 1).
Others should go through the command wrapper flow (for tracking resource
usage, for example, or for changing some HCA configurations that slaves
need to be notified of).

This patch sets the groundwork for this capability -- adding the correct
value of "native" in each case.

Note that if SRIOV is not activated, this parameter has no effect.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx4/mad.c                 |  6 ++--
 drivers/infiniband/hw/mlx4/main.c                |  7 ++--
 drivers/net/ethernet/mellanox/mlx4/cmd.c         |  2 +-
 drivers/net/ethernet/mellanox/mlx4/cq.c          |  6 ++--
 drivers/net/ethernet/mellanox/mlx4/en_port.c     | 15 ++++----
 drivers/net/ethernet/mellanox/mlx4/en_selftest.c |  2 +-
 drivers/net/ethernet/mellanox/mlx4/eq.c          |  9 +++--
 drivers/net/ethernet/mellanox/mlx4/fw.c          | 45 ++++++++++++++----------
 drivers/net/ethernet/mellanox/mlx4/icm.c         |  5 +--
 drivers/net/ethernet/mellanox/mlx4/mcg.c         | 10 +++---
 drivers/net/ethernet/mellanox/mlx4/mr.c          |  8 +++--
 drivers/net/ethernet/mellanox/mlx4/port.c        | 12 ++++---
 drivers/net/ethernet/mellanox/mlx4/qp.c          | 11 +++---
 drivers/net/ethernet/mellanox/mlx4/sense.c       |  3 +-
 drivers/net/ethernet/mellanox/mlx4/srq.c         |  8 ++---
 include/linux/mlx4/cmd.h                         | 20 +++++++----
 16 files changed, 102 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index f36da994a85a..95c94d8f0254 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -109,7 +109,8 @@ int mlx4_MAD_IFC(struct mlx4_ib_dev *dev, int ignore_mkey, int ignore_bkey,
 
 	err = mlx4_cmd_box(dev->dev, inmailbox->dma, outmailbox->dma,
 			   in_modifier, op_modifier,
-			   MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C);
+			   MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
+			   MLX4_CMD_NATIVE);
 
 	if (!err)
 		memcpy(response_mad, outmailbox->buf, 256);
@@ -330,7 +331,8 @@ static int iboe_process_mad(struct ib_device *ibdev, int mad_flags, u8 port_num,
 		return IB_MAD_RESULT_FAILURE;
 
 	err = mlx4_cmd_box(dev->dev, 0, mailbox->dma, inmod, 0,
-			   MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C);
+			   MLX4_CMD_QUERY_IF_STAT, MLX4_CMD_TIME_CLASS_C,
+			   MLX4_CMD_WRAPPED);
 	if (err)
 		err = IB_MAD_RESULT_FAILURE;
 	else {
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 6128b2940c49..34f8a5d9da75 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -434,7 +434,7 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask,
 	memset(mailbox->buf, 0, 256);
 	memcpy(mailbox->buf, props->node_desc, 64);
 	mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0,
-		 MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A);
+		 MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
 
 	mlx4_free_cmd_mailbox(to_mdev(ibdev)->dev, mailbox);
 
@@ -463,7 +463,7 @@ static int mlx4_SET_PORT(struct mlx4_ib_dev *dev, u8 port, int reset_qkey_viols,
 	}
 
 	err = mlx4_cmd(dev->dev, mailbox->dma, port, is_eth, MLX4_CMD_SET_PORT,
-		       MLX4_CMD_TIME_CLASS_B);
+		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
 
 	mlx4_free_cmd_mailbox(dev->dev, mailbox);
 	return err;
@@ -899,7 +899,8 @@ static void update_gids_task(struct work_struct *work)
 	memcpy(gids, gw->gids, sizeof gw->gids);
 
 	err = mlx4_cmd(dev, mailbox->dma, MLX4_SET_PORT_GID_TABLE << 8 | gw->port,
-		       1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B);
+		       1, MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+		       MLX4_CMD_NATIVE);
 	if (err)
 		printk(KERN_WARNING "set port command failed\n");
 	else {
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 78f5a1a0b8c8..b27654e5d544 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -311,7 +311,7 @@ out:
 
 int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
 	       int out_is_imm, u32 in_modifier, u8 op_modifier,
-	       u16 op, unsigned long timeout)
+	       u16 op, unsigned long timeout, int native)
 {
 	if (mlx4_priv(dev)->cmd.use_events)
 		return mlx4_cmd_wait(dev, in_param, out_param, out_is_imm,
diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c
index 499a5168892a..ebd0eb234f14 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cq.c
@@ -118,14 +118,14 @@ static int mlx4_SW2HW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
 			 int cq_num)
 {
 	return mlx4_cmd(dev, mailbox->dma, cq_num, 0, MLX4_CMD_SW2HW_CQ,
-			MLX4_CMD_TIME_CLASS_A);
+			MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
 }
 
 static int mlx4_MODIFY_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
 			 int cq_num, u32 opmod)
 {
 	return mlx4_cmd(dev, mailbox->dma, cq_num, opmod, MLX4_CMD_MODIFY_CQ,
-			MLX4_CMD_TIME_CLASS_A);
+			MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
 }
 
 static int mlx4_HW2SW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
@@ -133,7 +133,7 @@ static int mlx4_HW2SW_CQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
 {
 	return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, cq_num,
 			    mailbox ? 0 : 1, MLX4_CMD_HW2SW_CQ,
-			    MLX4_CMD_TIME_CLASS_A);
+			    MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
 }
 
 int mlx4_cq_modify(struct mlx4_dev *dev, struct mlx4_cq *cq,
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c
index 03c84cd78cde..ae120effb8a5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c
@@ -45,7 +45,8 @@ int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port,
 			u64 mac, u64 clear, u8 mode)
 {
 	return mlx4_cmd(dev, (mac | (clear << 63)), port, mode,
-			MLX4_CMD_SET_MCAST_FLTR, MLX4_CMD_TIME_CLASS_B);
+			MLX4_CMD_SET_MCAST_FLTR, MLX4_CMD_TIME_CLASS_B,
+			MLX4_CMD_WRAPPED);
 }
 
 int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, struct mlx4_en_priv *priv)
@@ -72,7 +73,7 @@ int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, struct mlx4_en_priv *priv)
 		filter->entry[i] = cpu_to_be32(entry);
 	}
 	err = mlx4_cmd(dev, mailbox->dma, priv->port, 0, MLX4_CMD_SET_VLAN_FLTR,
-		       MLX4_CMD_TIME_CLASS_B);
+		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	return err;
 }
@@ -101,7 +102,7 @@ int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
 
 	in_mod = MLX4_SET_PORT_GENERAL << 8 | port;
 	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
-		       MLX4_CMD_TIME_CLASS_B);
+		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
 
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	return err;
@@ -140,7 +141,7 @@ int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
 
 	in_mod = MLX4_SET_PORT_RQP_CALC << 8 | port;
 	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
-		       MLX4_CMD_TIME_CLASS_B);
+		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
 
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	return err;
@@ -159,7 +160,8 @@ int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port)
 		return PTR_ERR(mailbox);
 	memset(mailbox->buf, 0, sizeof(*qport_context));
 	err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, port, 0,
-			   MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B);
+			   MLX4_CMD_QUERY_PORT, MLX4_CMD_TIME_CLASS_B,
+			   MLX4_CMD_WRAPPED);
 	if (err)
 		goto out;
 	qport_context = mailbox->buf;
@@ -204,7 +206,8 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
 		return PTR_ERR(mailbox);
 	memset(mailbox->buf, 0, sizeof(*mlx4_en_stats));
 	err = mlx4_cmd_box(mdev->dev, 0, mailbox->dma, in_mod, 0,
-			   MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B);
+			   MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B,
+			   MLX4_CMD_WRAPPED);
 	if (err)
 		goto out;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c
index 9fdbcecd499d..bf2e5d3f177c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c
@@ -43,7 +43,7 @@
 static int mlx4_en_test_registers(struct mlx4_en_priv *priv)
 {
 	return mlx4_cmd(priv->mdev->dev, 0, 0, 0, MLX4_CMD_HW_HEALTH_CHECK,
-			MLX4_CMD_TIME_CLASS_A);
+			MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
 }
 
 static int mlx4_en_test_loopback_xmit(struct mlx4_en_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index ad9e3770b050..9e5863dfa60a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -255,21 +255,24 @@ static int mlx4_MAP_EQ(struct mlx4_dev *dev, u64 event_mask, int unmap,
 			int eq_num)
 {
 	return mlx4_cmd(dev, event_mask, (unmap << 31) | eq_num,
-			0, MLX4_CMD_MAP_EQ, MLX4_CMD_TIME_CLASS_B);
+			0, MLX4_CMD_MAP_EQ, MLX4_CMD_TIME_CLASS_B,
+			MLX4_CMD_WRAPPED);
 }
 
 static int mlx4_SW2HW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
 			 int eq_num)
 {
 	return mlx4_cmd(dev, mailbox->dma, eq_num, 0, MLX4_CMD_SW2HW_EQ,
-			MLX4_CMD_TIME_CLASS_A);
+			MLX4_CMD_TIME_CLASS_A,
+			MLX4_CMD_WRAPPED);
 }
 
 static int mlx4_HW2SW_EQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
 			 int eq_num)
 {
 	return mlx4_cmd_box(dev, 0, mailbox->dma, eq_num, 0, MLX4_CMD_HW2SW_EQ,
-			    MLX4_CMD_TIME_CLASS_A);
+			    MLX4_CMD_TIME_CLASS_A,
+			    MLX4_CMD_WRAPPED);
 }
 
 static int mlx4_num_eq_uar(struct mlx4_dev *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 435ca6e49734..9659fb085e5e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -139,7 +139,7 @@ int mlx4_MOD_STAT_CFG(struct mlx4_dev *dev, struct mlx4_mod_stat_cfg *cfg)
 	MLX4_PUT(inbox, cfg->log_pg_sz_m, MOD_STAT_CFG_PG_SZ_M_OFFSET);
 
 	err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_MOD_STAT_CFG,
-			MLX4_CMD_TIME_CLASS_A);
+			MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
 
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	return err;
@@ -229,7 +229,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	outbox = mailbox->buf;
 
 	err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_DEV_CAP,
-			   MLX4_CMD_TIME_CLASS_A);
+			   MLX4_CMD_TIME_CLASS_A, !mlx4_is_slave(dev));
 	if (err)
 		goto out;
 
@@ -396,7 +396,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 
 		for (i = 1; i <= dev_cap->num_ports; ++i) {
 			err = mlx4_cmd_box(dev, 0, mailbox->dma, i, 0, MLX4_CMD_QUERY_PORT,
-					   MLX4_CMD_TIME_CLASS_B);
+					   MLX4_CMD_TIME_CLASS_B,
+					   !mlx4_is_slave(dev));
 			if (err)
 				goto out;
 
@@ -519,7 +520,8 @@ int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt)
 
 			if (++nent == MLX4_MAILBOX_SIZE / 16) {
 				err = mlx4_cmd(dev, mailbox->dma, nent, 0, op,
-						MLX4_CMD_TIME_CLASS_B);
+						MLX4_CMD_TIME_CLASS_B,
+						MLX4_CMD_NATIVE);
 				if (err)
 					goto out;
 				nent = 0;
@@ -528,7 +530,8 @@ int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt)
 	}
 
 	if (nent)
-		err = mlx4_cmd(dev, mailbox->dma, nent, 0, op, MLX4_CMD_TIME_CLASS_B);
+		err = mlx4_cmd(dev, mailbox->dma, nent, 0, op,
+			       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
 	if (err)
 		goto out;
 
@@ -557,13 +560,15 @@ int mlx4_MAP_FA(struct mlx4_dev *dev, struct mlx4_icm *icm)
 
 int mlx4_UNMAP_FA(struct mlx4_dev *dev)
 {
-	return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_FA, MLX4_CMD_TIME_CLASS_B);
+	return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_FA,
+			MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
 }
 
 
 int mlx4_RUN_FW(struct mlx4_dev *dev)
 {
-	return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_RUN_FW, MLX4_CMD_TIME_CLASS_A);
+	return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_RUN_FW,
+			MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
 }
 
 int mlx4_QUERY_FW(struct mlx4_dev *dev)
@@ -595,7 +600,7 @@ int mlx4_QUERY_FW(struct mlx4_dev *dev)
 	outbox = mailbox->buf;
 
 	err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_FW,
-			    MLX4_CMD_TIME_CLASS_A);
+			    MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
 	if (err)
 		goto out;
 
@@ -711,7 +716,7 @@ int mlx4_QUERY_ADAPTER(struct mlx4_dev *dev, struct mlx4_adapter *adapter)
 	outbox = mailbox->buf;
 
 	err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0, MLX4_CMD_QUERY_ADAPTER,
-			   MLX4_CMD_TIME_CLASS_A);
+			   MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
 	if (err)
 		goto out;
 
@@ -834,7 +839,8 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 	MLX4_PUT(inbox, (u8) (PAGE_SHIFT - 12), INIT_HCA_UAR_PAGE_SZ_OFFSET);
 	MLX4_PUT(inbox, param->log_uar_sz,      INIT_HCA_LOG_UAR_SZ_OFFSET);
 
-	err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, 10000);
+	err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, 10000,
+		       MLX4_CMD_NATIVE);
 
 	if (err)
 		mlx4_err(dev, "INIT_HCA returns %d\n", err);
@@ -886,12 +892,12 @@ int mlx4_INIT_PORT(struct mlx4_dev *dev, int port)
 		MLX4_PUT(inbox, field, INIT_PORT_MAX_PKEY_OFFSET);
 
 		err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_INIT_PORT,
-			       MLX4_CMD_TIME_CLASS_A);
+			       MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
 
 		mlx4_free_cmd_mailbox(dev, mailbox);
 	} else
 		err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_INIT_PORT,
-			       MLX4_CMD_TIME_CLASS_A);
+			       MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
 
 	return err;
 }
@@ -899,20 +905,22 @@ EXPORT_SYMBOL_GPL(mlx4_INIT_PORT);
 
 int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port)
 {
-	return mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, 1000);
+	return mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, 1000,
+			MLX4_CMD_WRAPPED);
 }
 EXPORT_SYMBOL_GPL(mlx4_CLOSE_PORT);
 
 int mlx4_CLOSE_HCA(struct mlx4_dev *dev, int panic)
 {
-	return mlx4_cmd(dev, 0, 0, panic, MLX4_CMD_CLOSE_HCA, 1000);
+	return mlx4_cmd(dev, 0, 0, panic, MLX4_CMD_CLOSE_HCA, 1000,
+			MLX4_CMD_NATIVE);
 }
 
 int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages)
 {
 	int ret = mlx4_cmd_imm(dev, icm_size, aux_pages, 0, 0,
 			       MLX4_CMD_SET_ICM_SIZE,
-			       MLX4_CMD_TIME_CLASS_A);
+			       MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
 	if (ret)
 		return ret;
 
@@ -929,7 +937,7 @@ int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages)
 int mlx4_NOP(struct mlx4_dev *dev)
 {
 	/* Input modifier of 0x1f means "finish as soon as possible." */
-	return mlx4_cmd(dev, 0, 0x1f, 0, MLX4_CMD_NOP, 100);
+	return mlx4_cmd(dev, 0, 0x1f, 0, MLX4_CMD_NOP, 100, MLX4_CMD_NATIVE);
 }
 
 #define MLX4_WOL_SETUP_MODE (5 << 28)
@@ -938,7 +946,8 @@ int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port)
 	u32 in_mod = MLX4_WOL_SETUP_MODE | port << 8;
 
 	return mlx4_cmd_imm(dev, 0, config, in_mod, 0x3,
-			    MLX4_CMD_MOD_STAT_CFG, MLX4_CMD_TIME_CLASS_A);
+			    MLX4_CMD_MOD_STAT_CFG, MLX4_CMD_TIME_CLASS_A,
+			    MLX4_CMD_NATIVE);
 }
 EXPORT_SYMBOL_GPL(mlx4_wol_read);
 
@@ -947,6 +956,6 @@ int mlx4_wol_write(struct mlx4_dev *dev, u64 config, int port)
 	u32 in_mod = MLX4_WOL_SETUP_MODE | port << 8;
 
 	return mlx4_cmd(dev, config, in_mod, 0x1, MLX4_CMD_MOD_STAT_CFG,
-					MLX4_CMD_TIME_CLASS_A);
+			MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
 }
 EXPORT_SYMBOL_GPL(mlx4_wol_write);
diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c
index 02393fdf44c1..a9ade1c3cad5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/icm.c
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.c
@@ -213,7 +213,7 @@ static int mlx4_MAP_ICM(struct mlx4_dev *dev, struct mlx4_icm *icm, u64 virt)
 static int mlx4_UNMAP_ICM(struct mlx4_dev *dev, u64 virt, u32 page_count)
 {
 	return mlx4_cmd(dev, virt, page_count, 0, MLX4_CMD_UNMAP_ICM,
-			MLX4_CMD_TIME_CLASS_B);
+			MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
 }
 
 int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm)
@@ -223,7 +223,8 @@ int mlx4_MAP_ICM_AUX(struct mlx4_dev *dev, struct mlx4_icm *icm)
 
 int mlx4_UNMAP_ICM_AUX(struct mlx4_dev *dev)
 {
-	return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_ICM_AUX, MLX4_CMD_TIME_CLASS_B);
+	return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_UNMAP_ICM_AUX,
+			MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
 }
 
 int mlx4_table_get(struct mlx4_dev *dev, struct mlx4_icm_table *table, int obj)
diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index 978688c31046..4187f7bbd793 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -48,14 +48,14 @@ static int mlx4_READ_ENTRY(struct mlx4_dev *dev, int index,
 			   struct mlx4_cmd_mailbox *mailbox)
 {
 	return mlx4_cmd_box(dev, 0, mailbox->dma, index, 0, MLX4_CMD_READ_MCG,
-			    MLX4_CMD_TIME_CLASS_A);
+			    MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
 }
 
 static int mlx4_WRITE_ENTRY(struct mlx4_dev *dev, int index,
 			    struct mlx4_cmd_mailbox *mailbox)
 {
 	return mlx4_cmd(dev, mailbox->dma, index, 0, MLX4_CMD_WRITE_MCG,
-			MLX4_CMD_TIME_CLASS_A);
+			MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
 }
 
 static int mlx4_WRITE_PROMISC(struct mlx4_dev *dev, u8 vep_num, u8 port, u8 steer,
@@ -65,7 +65,8 @@ static int mlx4_WRITE_PROMISC(struct mlx4_dev *dev, u8 vep_num, u8 port, u8 stee
 
 	in_mod = (u32) vep_num << 24 | (u32) port << 16 | steer << 1;
 	return mlx4_cmd(dev, mailbox->dma, in_mod, 0x1,
-			MLX4_CMD_WRITE_MCG, MLX4_CMD_TIME_CLASS_A);
+			MLX4_CMD_WRITE_MCG, MLX4_CMD_TIME_CLASS_A,
+			MLX4_CMD_NATIVE);
 }
 
 static int mlx4_GID_HASH(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
@@ -75,7 +76,8 @@ static int mlx4_GID_HASH(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
 	int err;
 
 	err = mlx4_cmd_imm(dev, mailbox->dma, &imm, 0, op_mod,
-			   MLX4_CMD_MGID_HASH, MLX4_CMD_TIME_CLASS_A);
+			   MLX4_CMD_MGID_HASH, MLX4_CMD_TIME_CLASS_A,
+			   MLX4_CMD_NATIVE);
 
 	if (!err)
 		*hash = imm;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c
index efa3e77355e4..057b22d64a05 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
@@ -254,14 +254,15 @@ static int mlx4_SW2HW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox
 			  int mpt_index)
 {
 	return mlx4_cmd(dev, mailbox->dma, mpt_index, 0, MLX4_CMD_SW2HW_MPT,
-			MLX4_CMD_TIME_CLASS_B);
+			MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
 }
 
 static int mlx4_HW2SW_MPT(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
 			  int mpt_index)
 {
 	return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, mpt_index,
-			    !mailbox, MLX4_CMD_HW2SW_MPT, MLX4_CMD_TIME_CLASS_B);
+			    !mailbox, MLX4_CMD_HW2SW_MPT,
+			    MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
 }
 
 int mlx4_mr_alloc(struct mlx4_dev *dev, u32 pd, u64 iova, u64 size, u32 access,
@@ -663,6 +664,7 @@ EXPORT_SYMBOL_GPL(mlx4_fmr_free);
 
 int mlx4_SYNC_TPT(struct mlx4_dev *dev)
 {
-	return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_SYNC_TPT, 1000);
+	return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_SYNC_TPT, 1000,
+			MLX4_CMD_WRAPPED);
 }
 EXPORT_SYMBOL_GPL(mlx4_SYNC_TPT);
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index d942aea4927b..da9f85c6da7e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -85,7 +85,7 @@ static int mlx4_set_port_mac_table(struct mlx4_dev *dev, u8 port,
 
 	in_mod = MLX4_SET_PORT_MAC_TABLE << 8 | port;
 	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
-		       MLX4_CMD_TIME_CLASS_B);
+		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
 
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	return err;
@@ -326,7 +326,7 @@ static int mlx4_set_port_vlan_table(struct mlx4_dev *dev, u8 port,
 	memcpy(mailbox->buf, entries, MLX4_VLAN_TABLE_SIZE);
 	in_mod = MLX4_SET_PORT_VLAN_TABLE << 8 | port;
 	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
-		       MLX4_CMD_TIME_CLASS_B);
+		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
 
 	mlx4_free_cmd_mailbox(dev, mailbox);
 
@@ -462,7 +462,8 @@ int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps)
 	*(__be32 *) (&inbuf[20]) = cpu_to_be32(port);
 
 	err = mlx4_cmd_box(dev, inmailbox->dma, outmailbox->dma, port, 3,
-			   MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C);
+			   MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
+			   MLX4_CMD_NATIVE);
 	if (!err)
 		*caps = *(__be32 *) (outbuf + 84);
 	mlx4_free_cmd_mailbox(dev, inmailbox);
@@ -499,7 +500,8 @@ int mlx4_check_ext_port_caps(struct mlx4_dev *dev, u8 port)
 	*(__be32 *) (&inbuf[20]) = cpu_to_be32(port);
 
 	err = mlx4_cmd_box(dev, inmailbox->dma, outmailbox->dma, port, 3,
-			   MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C);
+			   MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
+			   MLX4_CMD_NATIVE);
 
 	packet_error = be16_to_cpu(*(__be16 *) (outbuf + 4));
 
@@ -528,7 +530,7 @@ int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port)
 
 	((__be32 *) mailbox->buf)[1] = dev->caps.ib_port_def_cap[port];
 	err = mlx4_cmd(dev, mailbox->dma, port, 0, MLX4_CMD_SET_PORT,
-		       MLX4_CMD_TIME_CLASS_B);
+		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
 
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	return err;
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 15f870cb2590..e721f4cd34f8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -119,7 +119,8 @@ int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 
 	if (op[cur_state][new_state] == MLX4_CMD_2RST_QP)
 		return mlx4_cmd(dev, 0, qp->qpn, 2,
-				MLX4_CMD_2RST_QP, MLX4_CMD_TIME_CLASS_A);
+				MLX4_CMD_2RST_QP, MLX4_CMD_TIME_CLASS_A,
+				MLX4_CMD_WRAPPED);
 
 	mailbox = mlx4_alloc_cmd_mailbox(dev);
 	if (IS_ERR(mailbox))
@@ -140,7 +141,8 @@ int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 
 	ret = mlx4_cmd(dev, mailbox->dma, qp->qpn | (!!sqd_event << 31),
 		       new_state == MLX4_QP_STATE_RST ? 2 : 0,
-		       op[cur_state][new_state], MLX4_CMD_TIME_CLASS_C);
+		       op[cur_state][new_state], MLX4_CMD_TIME_CLASS_C,
+		       MLX4_CMD_WRAPPED);
 
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	return ret;
@@ -265,7 +267,7 @@ EXPORT_SYMBOL_GPL(mlx4_qp_free);
 static int mlx4_CONF_SPECIAL_QP(struct mlx4_dev *dev, u32 base_qpn)
 {
 	return mlx4_cmd(dev, 0, base_qpn, 0, MLX4_CMD_CONF_SPECIAL_QP,
-			MLX4_CMD_TIME_CLASS_B);
+			MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
 }
 
 int mlx4_init_qp_table(struct mlx4_dev *dev)
@@ -342,7 +344,8 @@ int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp,
 		return PTR_ERR(mailbox);
 
 	err = mlx4_cmd_box(dev, 0, mailbox->dma, qp->qpn, 0,
-			   MLX4_CMD_QUERY_QP, MLX4_CMD_TIME_CLASS_A);
+			   MLX4_CMD_QUERY_QP, MLX4_CMD_TIME_CLASS_A,
+			   MLX4_CMD_WRAPPED);
 	if (!err)
 		memcpy(context, mailbox->buf + 8, sizeof *context);
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/sense.c b/drivers/net/ethernet/mellanox/mlx4/sense.c
index e2337a7411d9..802498293528 100644
--- a/drivers/net/ethernet/mellanox/mlx4/sense.c
+++ b/drivers/net/ethernet/mellanox/mlx4/sense.c
@@ -45,7 +45,8 @@ int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port,
 	int err = 0;
 
 	err = mlx4_cmd_imm(dev, 0, &out_param, port, 0,
-			   MLX4_CMD_SENSE_PORT, MLX4_CMD_TIME_CLASS_B);
+			   MLX4_CMD_SENSE_PORT, MLX4_CMD_TIME_CLASS_B,
+			   MLX4_CMD_WRAPPED);
 	if (err) {
 		mlx4_err(dev, "Sense command failed for port: %d\n", port);
 		return err;
diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c
index 9cbf3fce0145..f4ca096db62a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/srq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/srq.c
@@ -86,7 +86,7 @@ static int mlx4_SW2HW_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox
 			  int srq_num)
 {
 	return mlx4_cmd(dev, mailbox->dma, srq_num, 0, MLX4_CMD_SW2HW_SRQ,
-			MLX4_CMD_TIME_CLASS_A);
+			MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
 }
 
 static int mlx4_HW2SW_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
@@ -94,20 +94,20 @@ static int mlx4_HW2SW_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox
 {
 	return mlx4_cmd_box(dev, 0, mailbox ? mailbox->dma : 0, srq_num,
 			    mailbox ? 0 : 1, MLX4_CMD_HW2SW_SRQ,
-			    MLX4_CMD_TIME_CLASS_A);
+			    MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
 }
 
 static int mlx4_ARM_SRQ(struct mlx4_dev *dev, int srq_num, int limit_watermark)
 {
 	return mlx4_cmd(dev, limit_watermark, srq_num, 0, MLX4_CMD_ARM_SRQ,
-			MLX4_CMD_TIME_CLASS_B);
+			MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
 }
 
 static int mlx4_QUERY_SRQ(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbox,
 			  int srq_num)
 {
 	return mlx4_cmd_box(dev, 0, mailbox->dma, srq_num, 0, MLX4_CMD_QUERY_SRQ,
-			    MLX4_CMD_TIME_CLASS_A);
+			    MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
 }
 
 int mlx4_srq_alloc(struct mlx4_dev *dev, u32 pdn, u32 cqn, u16 xrcd,
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index e8e92814c8a0..ae62630a665e 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -173,6 +173,11 @@ enum {
 	MLX4_SET_PORT_GID_TABLE = 0x5,
 };
 
+enum {
+	MLX4_CMD_WRAPPED,
+	MLX4_CMD_NATIVE
+};
+
 struct mlx4_dev;
 
 struct mlx4_cmd_mailbox {
@@ -182,23 +187,24 @@ struct mlx4_cmd_mailbox {
 
 int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
 	       int out_is_imm, u32 in_modifier, u8 op_modifier,
-	       u16 op, unsigned long timeout);
+	       u16 op, unsigned long timeout, int native);
 
 /* Invoke a command with no output parameter */
 static inline int mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u32 in_modifier,
-			   u8 op_modifier, u16 op, unsigned long timeout)
+			   u8 op_modifier, u16 op, unsigned long timeout,
+			   int native)
 {
 	return __mlx4_cmd(dev, in_param, NULL, 0, in_modifier,
-			  op_modifier, op, timeout);
+			  op_modifier, op, timeout, native);
 }
 
 /* Invoke a command with an output mailbox */
 static inline int mlx4_cmd_box(struct mlx4_dev *dev, u64 in_param, u64 out_param,
 			       u32 in_modifier, u8 op_modifier, u16 op,
-			       unsigned long timeout)
+			       unsigned long timeout, int native)
 {
 	return __mlx4_cmd(dev, in_param, &out_param, 0, in_modifier,
-			  op_modifier, op, timeout);
+			  op_modifier, op, timeout, native);
 }
 
 /*
@@ -208,10 +214,10 @@ static inline int mlx4_cmd_box(struct mlx4_dev *dev, u64 in_param, u64 out_param
  */
 static inline int mlx4_cmd_imm(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
 			       u32 in_modifier, u8 op_modifier, u16 op,
-			       unsigned long timeout)
+			       unsigned long timeout, int native)
 {
 	return __mlx4_cmd(dev, in_param, out_param, 1, in_modifier,
-			  op_modifier, op, timeout);
+			  op_modifier, op, timeout, native);
 }
 
 struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev);
-- 
cgit v1.2.3


From f5311ac109b21c9b47118655a5b6d887bcc686f8 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Tue, 13 Dec 2011 04:12:13 +0000
Subject: mlx4_core: Reduce number of PD bits to 17

When SRIOV is enabled on the chip (at FW burning time),
the HCA uses only 17 bits for the PD. The remaining 7 high-order bits
are ignored.

Change the allocator to return only 17 bits for the PD.  The MSB 7
bits will be used to encode the slave number for consistency
checking later on in the resource tracker.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/mlx4.h |  5 ++++-
 drivers/net/ethernet/mellanox/mlx4/pd.c   | 19 ++++++++++++++-----
 include/linux/mlx4/device.h               |  1 +
 3 files changed, 19 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 69177614666f..51cba262bafc 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -420,7 +420,7 @@ struct mlx4_mfunc_master_ctx {
 	struct work_struct	slave_event_work;
 	struct work_struct	slave_flr_event_work;
 	spinlock_t		slave_state_lock;
-	u32			comm_arm_bit_vector[4];
+	__be32			comm_arm_bit_vector[4];
 	struct mlx4_eqe		cmd_eqe;
 	struct mlx4_slave_event_eq slave_eq;
 	struct mutex		gen_eqe_mutex[MLX4_MFUNC_MAX];
@@ -914,4 +914,7 @@ int mlx4_QUERY_IF_STAT_wrapper(struct mlx4_dev *dev, int slave,
 			       struct mlx4_cmd_mailbox *inbox,
 			       struct mlx4_cmd_mailbox *outbox,
 			       struct mlx4_cmd_info *cmd);
+
+#define NOT_MASKED_PD_BITS 17
+
 #endif /* MLX4_H */
diff --git a/drivers/net/ethernet/mellanox/mlx4/pd.c b/drivers/net/ethernet/mellanox/mlx4/pd.c
index 260ed259ce9b..5c9a54df17ab 100644
--- a/drivers/net/ethernet/mellanox/mlx4/pd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/pd.c
@@ -31,6 +31,7 @@
  * SOFTWARE.
  */
 
+#include <linux/init.h>
 #include <linux/errno.h>
 #include <linux/export.h>
 #include <linux/io-mapping.h>
@@ -51,7 +52,8 @@ int mlx4_pd_alloc(struct mlx4_dev *dev, u32 *pdn)
 	*pdn = mlx4_bitmap_alloc(&priv->pd_bitmap);
 	if (*pdn == -1)
 		return -ENOMEM;
-
+	if (mlx4_is_mfunc(dev))
+		*pdn |= (dev->caps.function + 1) << NOT_MASKED_PD_BITS;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(mlx4_pd_alloc);
@@ -85,7 +87,8 @@ int mlx4_init_pd_table(struct mlx4_dev *dev)
 	struct mlx4_priv *priv = mlx4_priv(dev);
 
 	return mlx4_bitmap_init(&priv->pd_bitmap, dev->caps.num_pds,
-				(1 << 24) - 1, dev->caps.reserved_pds, 0);
+				(1 << NOT_MASKED_PD_BITS) - 1,
+				 dev->caps.reserved_pds, 0);
 }
 
 void mlx4_cleanup_pd_table(struct mlx4_dev *dev)
@@ -108,13 +111,19 @@ void mlx4_cleanup_xrcd_table(struct mlx4_dev *dev)
 
 int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar)
 {
+	int offset;
+
 	uar->index = mlx4_bitmap_alloc(&mlx4_priv(dev)->uar_table.bitmap);
 	if (uar->index == -1)
 		return -ENOMEM;
 
-	uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + uar->index;
+	if (mlx4_is_slave(dev))
+		offset = uar->index % ((int) pci_resource_len(dev->pdev, 2) /
+				       dev->caps.uar_page_size);
+	else
+		offset = uar->index;
+	uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + offset;
 	uar->map = NULL;
-
 	return 0;
 }
 EXPORT_SYMBOL_GPL(mlx4_uar_alloc);
@@ -232,7 +241,7 @@ int mlx4_init_uar_table(struct mlx4_dev *dev)
 
 	return mlx4_bitmap_init(&mlx4_priv(dev)->uar_table.bitmap,
 				dev->caps.num_uars, dev->caps.num_uars - 1,
-				max(128, dev->caps.reserved_uars), 0);
+				dev->caps.reserved_uars, 0);
 }
 
 void mlx4_cleanup_uar_table(struct mlx4_dev *dev)
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 3333018d2913..e4be34a908a7 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -248,6 +248,7 @@ struct mlx4_caps {
 	u64			trans_code[MLX4_MAX_PORTS + 1];
 	int			local_ca_ack_delay;
 	int			num_uars;
+	u32			uar_page_size;
 	int			bf_reg_size;
 	int			bf_regs_per_page;
 	int			max_sq_sg;
-- 
cgit v1.2.3


From ffe455ad04681f3fc48eef595fe526a795f809a3 Mon Sep 17 00:00:00 2001
From: Eugenia Emantayev <eugenia@mellanox.co.il>
Date: Tue, 13 Dec 2011 04:16:21 +0000
Subject: mlx4: Ethernet port management modifications

The physical port is now common to the PF and VFs.
The port resources and configuration is managed by the PF, VFs can
only influence the MTU of the port, it is set as max among all functions,
Each function allocates RX buffers of required size to meet it's MTU enforcement.
Port management code was moved to mlx4_core, as the mlx4_en module is
virtualization unaware

Move handling qp functionality to mlx4_get_eth_qp/mlx4_put_eth_qp
including reserve/release range and add/release unicast steering.
Let mlx4_register/unregister_mac deal only with MAC (un)registration.

Signed-off-by: Eugenia Emantayev <eugenia@mellanox.co.il>
Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/cmd.c           |  37 ++
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c     |  32 +-
 drivers/net/ethernet/mellanox/mlx4/en_port.c       |  77 ---
 drivers/net/ethernet/mellanox/mlx4/en_port.h       |  37 --
 drivers/net/ethernet/mellanox/mlx4/mcg.c           |   4 +-
 drivers/net/ethernet/mellanox/mlx4/mlx4.h          |  53 ++
 drivers/net/ethernet/mellanox/mlx4/port.c          | 606 +++++++++++++++++----
 .../net/ethernet/mellanox/mlx4/resource_tracker.c  |  33 +-
 include/linux/mlx4/device.h                        |  12 +-
 9 files changed, 630 insertions(+), 261 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 0f2069d98274..8e6e4b20b0e2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -653,6 +653,15 @@ static struct mlx4_cmd_info cmd_info[] = {
 		.verify = NULL,
 		.wrapper = mlx4_QUERY_PORT_wrapper
 	},
+	{
+		.opcode = MLX4_CMD_SET_PORT,
+		.has_inbox = true,
+		.has_outbox = false,
+		.out_is_imm = false,
+		.encode_slave_id = false,
+		.verify = NULL,
+		.wrapper = mlx4_SET_PORT_wrapper
+	},
 	{
 		.opcode = MLX4_CMD_MAP_EQ,
 		.has_inbox = false,
@@ -1005,6 +1014,34 @@ static struct mlx4_cmd_info cmd_info[] = {
 		.verify = NULL,
 		.wrapper = mlx4_PROMISC_wrapper
 	},
+	/* Ethernet specific commands */
+	{
+		.opcode = MLX4_CMD_SET_VLAN_FLTR,
+		.has_inbox = true,
+		.has_outbox = false,
+		.out_is_imm = false,
+		.encode_slave_id = false,
+		.verify = NULL,
+		.wrapper = mlx4_SET_VLAN_FLTR_wrapper
+	},
+	{
+		.opcode = MLX4_CMD_SET_MCAST_FLTR,
+		.has_inbox = false,
+		.has_outbox = false,
+		.out_is_imm = false,
+		.encode_slave_id = false,
+		.verify = NULL,
+		.wrapper = mlx4_SET_MCAST_FLTR_wrapper
+	},
+	{
+		.opcode = MLX4_CMD_DUMP_ETH_STATS,
+		.has_inbox = false,
+		.has_outbox = true,
+		.out_is_imm = false,
+		.encode_slave_id = false,
+		.verify = NULL,
+		.wrapper = mlx4_DUMP_ETH_STATS_wrapper
+	},
 	{
 		.opcode = MLX4_CMD_INFORM_FLR_DONE,
 		.has_inbox = false,
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 2083f3b5d689..1db6fea495bf 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -136,7 +136,7 @@ static void mlx4_en_do_set_mac(struct work_struct *work)
 	if (priv->port_up) {
 		/* Remove old MAC and insert the new one */
 		err = mlx4_replace_mac(mdev->dev, priv->port,
-				       priv->base_qpn, priv->mac, 0);
+				       priv->base_qpn, priv->mac);
 		if (err)
 			en_err(priv, "Failed changing HW MAC address\n");
 	} else
@@ -207,6 +207,16 @@ static void mlx4_en_do_set_multicast(struct work_struct *work)
 		goto out;
 	}
 
+	if (!netif_carrier_ok(dev)) {
+		if (!mlx4_en_QUERY_PORT(mdev, priv->port)) {
+			if (priv->port_state.link_state) {
+				priv->last_link_state = MLX4_DEV_EVENT_PORT_UP;
+				netif_carrier_on(dev);
+				en_dbg(LINK, priv, "Link Up\n");
+			}
+		}
+	}
+
 	/*
 	 * Promsicuous mode: disable all filters
 	 */
@@ -602,12 +612,12 @@ int mlx4_en_start_port(struct net_device *dev)
 		++rx_index;
 	}
 
-	/* Set port mac number */
-	en_dbg(DRV, priv, "Setting mac for port %d\n", priv->port);
-	err = mlx4_register_mac(mdev->dev, priv->port,
-				priv->mac, &priv->base_qpn, 0);
+	/* Set qp number */
+	en_dbg(DRV, priv, "Getting qp number for port %d\n", priv->port);
+	err = mlx4_get_eth_qp(mdev->dev, priv->port,
+				priv->mac, &priv->base_qpn);
 	if (err) {
-		en_err(priv, "Failed setting port mac\n");
+		en_err(priv, "Failed getting eth qp\n");
 		goto cq_err;
 	}
 	mdev->mac_removed[priv->port] = 0;
@@ -702,7 +712,7 @@ tx_err:
 
 	mlx4_en_release_rss_steer(priv);
 mac_err:
-	mlx4_unregister_mac(mdev->dev, priv->port, priv->base_qpn);
+	mlx4_put_eth_qp(mdev->dev, priv->port, priv->mac, priv->base_qpn);
 cq_err:
 	while (rx_index--)
 		mlx4_en_deactivate_cq(priv, &priv->rx_cq[rx_index]);
@@ -748,10 +758,6 @@ void mlx4_en_stop_port(struct net_device *dev)
 	/* Flush multicast filter */
 	mlx4_SET_MCAST_FLTR(mdev->dev, priv->port, 0, 1, MLX4_MCAST_CONFIG);
 
-	/* Unregister Mac address for the port */
-	mlx4_unregister_mac(mdev->dev, priv->port, priv->base_qpn);
-	mdev->mac_removed[priv->port] = 1;
-
 	/* Free TX Rings */
 	for (i = 0; i < priv->tx_ring_num; i++) {
 		mlx4_en_deactivate_tx_ring(priv, &priv->tx_ring[i]);
@@ -765,6 +771,10 @@ void mlx4_en_stop_port(struct net_device *dev)
 	/* Free RSS qps */
 	mlx4_en_release_rss_steer(priv);
 
+	/* Unregister Mac address for the port */
+	mlx4_put_eth_qp(mdev->dev, priv->port, priv->mac, priv->base_qpn);
+	mdev->mac_removed[priv->port] = 1;
+
 	/* Free RX Rings */
 	for (i = 0; i < priv->rx_ring_num; i++) {
 		mlx4_en_deactivate_rx_ring(priv, &priv->rx_ring[i]);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c
index ae120effb8a5..331791467a22 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c
@@ -41,14 +41,6 @@
 #include "mlx4_en.h"
 
 
-int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port,
-			u64 mac, u64 clear, u8 mode)
-{
-	return mlx4_cmd(dev, (mac | (clear << 63)), port, mode,
-			MLX4_CMD_SET_MCAST_FLTR, MLX4_CMD_TIME_CLASS_B,
-			MLX4_CMD_WRAPPED);
-}
-
 int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, struct mlx4_en_priv *priv)
 {
 	struct mlx4_cmd_mailbox *mailbox;
@@ -78,75 +70,6 @@ int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, struct mlx4_en_priv *priv)
 	return err;
 }
 
-
-int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
-			  u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx)
-{
-	struct mlx4_cmd_mailbox *mailbox;
-	struct mlx4_set_port_general_context *context;
-	int err;
-	u32 in_mod;
-
-	mailbox = mlx4_alloc_cmd_mailbox(dev);
-	if (IS_ERR(mailbox))
-		return PTR_ERR(mailbox);
-	context = mailbox->buf;
-	memset(context, 0, sizeof *context);
-
-	context->flags = SET_PORT_GEN_ALL_VALID;
-	context->mtu = cpu_to_be16(mtu);
-	context->pptx = (pptx * (!pfctx)) << 7;
-	context->pfctx = pfctx;
-	context->pprx = (pprx * (!pfcrx)) << 7;
-	context->pfcrx = pfcrx;
-
-	in_mod = MLX4_SET_PORT_GENERAL << 8 | port;
-	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
-		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
-
-	mlx4_free_cmd_mailbox(dev, mailbox);
-	return err;
-}
-
-int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
-			   u8 promisc)
-{
-	struct mlx4_cmd_mailbox *mailbox;
-	struct mlx4_set_port_rqp_calc_context *context;
-	int err;
-	u32 in_mod;
-	u32 m_promisc = (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) ?
-						MCAST_DIRECT : MCAST_DEFAULT;
-
-	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER  &&
-			dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER)
-		return 0;
-
-	mailbox = mlx4_alloc_cmd_mailbox(dev);
-	if (IS_ERR(mailbox))
-		return PTR_ERR(mailbox);
-	context = mailbox->buf;
-	memset(context, 0, sizeof *context);
-
-	context->base_qpn = cpu_to_be32(base_qpn);
-	context->n_mac = dev->caps.log_num_macs;
-	context->promisc = cpu_to_be32(promisc << SET_PORT_PROMISC_SHIFT |
-				       base_qpn);
-	context->mcast = cpu_to_be32(m_promisc << SET_PORT_MC_PROMISC_SHIFT |
-				     base_qpn);
-	context->intra_no_vlan = 0;
-	context->no_vlan = MLX4_NO_VLAN_IDX;
-	context->intra_vlan_miss = 0;
-	context->vlan_miss = MLX4_VLAN_MISS_IDX;
-
-	in_mod = MLX4_SET_PORT_RQP_CALC << 8 | port;
-	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
-		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_WRAPPED);
-
-	mlx4_free_cmd_mailbox(dev, mailbox);
-	return err;
-}
-
 int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port)
 {
 	struct mlx4_en_query_port_context *qport_context;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.h b/drivers/net/ethernet/mellanox/mlx4/en_port.h
index c1bb834414b5..6934fd7e66ed 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_port.h
+++ b/drivers/net/ethernet/mellanox/mlx4/en_port.h
@@ -39,43 +39,6 @@
 #define SET_PORT_PROMISC_SHIFT	31
 #define SET_PORT_MC_PROMISC_SHIFT	30
 
-enum {
-	MCAST_DIRECT_ONLY       = 0,
-	MCAST_DIRECT            = 1,
-	MCAST_DEFAULT           = 2
-};
-
-struct mlx4_set_port_general_context {
-	u8 reserved[3];
-	u8 flags;
-	u16 reserved2;
-	__be16 mtu;
-	u8 pptx;
-	u8 pfctx;
-	u16 reserved3;
-	u8 pprx;
-	u8 pfcrx;
-	u16 reserved4;
-};
-
-struct mlx4_set_port_rqp_calc_context {
-	__be32 base_qpn;
-	u8 rererved;
-	u8 n_mac;
-	u8 n_vlan;
-	u8 n_prio;
-	u8 reserved2[3];
-	u8 mac_miss;
-	u8 intra_no_vlan;
-	u8 no_vlan;
-	u8 intra_vlan_miss;
-	u8 vlan_miss;
-	u8 reserved3[3];
-	u8 no_vlan_prio;
-	__be32 promisc;
-	__be32 mcast;
-};
-
 #define VLAN_FLTR_SIZE	128
 struct mlx4_set_vlan_fltr_mbox {
 	__be32 entry[VLAN_FLTR_SIZE];
diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index b36c279bcca0..0785d9b2a265 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -913,7 +913,7 @@ int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
 }
 EXPORT_SYMBOL_GPL(mlx4_multicast_detach);
 
-static int mlx4_unicast_attach(struct mlx4_dev *dev,
+int mlx4_unicast_attach(struct mlx4_dev *dev,
 			struct mlx4_qp *qp, u8 gid[16],
 			int block_mcast_loopback, enum mlx4_protocol prot)
 {
@@ -933,7 +933,7 @@ static int mlx4_unicast_attach(struct mlx4_dev *dev,
 }
 EXPORT_SYMBOL_GPL(mlx4_unicast_attach);
 
-static int mlx4_unicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp,
+int mlx4_unicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp,
 			       u8 gid[16], enum mlx4_protocol prot)
 {
 	if (prot == MLX4_PROT_ETH &&
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index a38ffc997367..abf65d8af48d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -419,12 +419,23 @@ struct mlx4_comm {
 	u32			slave_read;
 };
 
+enum {
+	MLX4_MCAST_CONFIG       = 0,
+	MLX4_MCAST_DISABLE      = 1,
+	MLX4_MCAST_ENABLE       = 2,
+};
+
 #define VLAN_FLTR_SIZE	128
 
 struct mlx4_vlan_fltr {
 	__be32 entry[VLAN_FLTR_SIZE];
 };
 
+struct mlx4_mcast_entry {
+	struct list_head list;
+	u64 addr;
+};
+
 struct mlx4_promisc_qp {
 	struct list_head list;
 	u32 qpn;
@@ -615,6 +626,48 @@ struct mlx4_vlan_table {
 	int			max;
 };
 
+#define SET_PORT_GEN_ALL_VALID		0x7
+#define SET_PORT_PROMISC_SHIFT		31
+#define SET_PORT_MC_PROMISC_SHIFT	30
+
+enum {
+	MCAST_DIRECT_ONLY	= 0,
+	MCAST_DIRECT		= 1,
+	MCAST_DEFAULT		= 2
+};
+
+
+struct mlx4_set_port_general_context {
+	u8 reserved[3];
+	u8 flags;
+	u16 reserved2;
+	__be16 mtu;
+	u8 pptx;
+	u8 pfctx;
+	u16 reserved3;
+	u8 pprx;
+	u8 pfcrx;
+	u16 reserved4;
+};
+
+struct mlx4_set_port_rqp_calc_context {
+	__be32 base_qpn;
+	u8 rererved;
+	u8 n_mac;
+	u8 n_vlan;
+	u8 n_prio;
+	u8 reserved2[3];
+	u8 mac_miss;
+	u8 intra_no_vlan;
+	u8 no_vlan;
+	u8 intra_vlan_miss;
+	u8 vlan_miss;
+	u8 reserved3[3];
+	u8 no_vlan_prio;
+	__be32 promisc;
+	__be32 mcast;
+};
+
 struct mlx4_mac_entry {
 	u64 mac;
 };
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index da9f85c6da7e..00a9547773c1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -70,41 +70,12 @@ void mlx4_init_vlan_table(struct mlx4_dev *dev, struct mlx4_vlan_table *table)
 	table->total = 0;
 }
 
-static int mlx4_set_port_mac_table(struct mlx4_dev *dev, u8 port,
-				   __be64 *entries)
-{
-	struct mlx4_cmd_mailbox *mailbox;
-	u32 in_mod;
-	int err;
-
-	mailbox = mlx4_alloc_cmd_mailbox(dev);
-	if (IS_ERR(mailbox))
-		return PTR_ERR(mailbox);
-
-	memcpy(mailbox->buf, entries, MLX4_MAC_TABLE_SIZE);
-
-	in_mod = MLX4_SET_PORT_MAC_TABLE << 8 | port;
-	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
-		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
-
-	mlx4_free_cmd_mailbox(dev, mailbox);
-	return err;
-}
-
-static int mlx4_uc_steer_add(struct mlx4_dev *dev, u8 port,
-			     u64 mac, int *qpn, u8 reserve)
+static int mlx4_uc_steer_add(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn)
 {
 	struct mlx4_qp qp;
 	u8 gid[16] = {0};
 	int err;
 
-	if (reserve) {
-		err = mlx4_qp_reserve_range(dev, 1, 1, qpn);
-		if (err) {
-			mlx4_err(dev, "Failed to reserve qp for mac registration\n");
-			return err;
-		}
-	}
 	qp.qpn = *qpn;
 
 	mac &= 0xffffffffffffULL;
@@ -113,16 +84,15 @@ static int mlx4_uc_steer_add(struct mlx4_dev *dev, u8 port,
 	gid[5] = port;
 	gid[7] = MLX4_UC_STEER << 1;
 
-	err = mlx4_qp_attach_common(dev, &qp, gid, 0,
-				    MLX4_PROT_ETH, MLX4_UC_STEER);
-	if (err && reserve)
-		mlx4_qp_release_range(dev, *qpn, 1);
+	err = mlx4_unicast_attach(dev, &qp, gid, 0, MLX4_PROT_ETH);
+	if (err)
+		mlx4_warn(dev, "Failed Attaching Unicast\n");
 
 	return err;
 }
 
 static void mlx4_uc_steer_release(struct mlx4_dev *dev, u8 port,
-				  u64 mac, int qpn, u8 free)
+				  u64 mac, int qpn)
 {
 	struct mlx4_qp qp;
 	u8 gid[16] = {0};
@@ -134,60 +104,164 @@ static void mlx4_uc_steer_release(struct mlx4_dev *dev, u8 port,
 	gid[5] = port;
 	gid[7] = MLX4_UC_STEER << 1;
 
-	mlx4_qp_detach_common(dev, &qp, gid, MLX4_PROT_ETH, MLX4_UC_STEER);
-	if (free)
-		mlx4_qp_release_range(dev, qpn, 1);
+	mlx4_unicast_detach(dev, &qp, gid, MLX4_PROT_ETH);
+}
+
+static int validate_index(struct mlx4_dev *dev,
+			  struct mlx4_mac_table *table, int index)
+{
+	int err = 0;
+
+	if (index < 0 || index >= table->max || !table->entries[index]) {
+		mlx4_warn(dev, "No valid Mac entry for the given index\n");
+		err = -EINVAL;
+	}
+	return err;
+}
+
+static int find_index(struct mlx4_dev *dev,
+		      struct mlx4_mac_table *table, u64 mac)
+{
+	int i;
+
+	for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
+		if ((mac & MLX4_MAC_MASK) ==
+		    (MLX4_MAC_MASK & be64_to_cpu(table->entries[i])))
+			return i;
+	}
+	/* Mac not found */
+	return -EINVAL;
 }
 
-int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn, u8 wrap)
+int mlx4_get_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn)
 {
 	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
-	struct mlx4_mac_table *table = &info->mac_table;
 	struct mlx4_mac_entry *entry;
-	int i, err = 0;
-	int free = -1;
+	int index = 0;
+	int err = 0;
 
-	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER) {
-		err = mlx4_uc_steer_add(dev, port, mac, qpn, 1);
-		if (err)
-			return err;
+	mlx4_dbg(dev, "Registering MAC: 0x%llx for adding\n",
+			(unsigned long long) mac);
+	index = mlx4_register_mac(dev, port, mac);
+	if (index < 0) {
+		err = index;
+		mlx4_err(dev, "Failed adding MAC: 0x%llx\n",
+			 (unsigned long long) mac);
+		return err;
+	}
 
-		entry = kmalloc(sizeof *entry, GFP_KERNEL);
-		if (!entry) {
-			mlx4_uc_steer_release(dev, port, mac, *qpn, 1);
-			return -ENOMEM;
-		}
+	if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER)) {
+		*qpn = info->base_qpn + index;
+		return 0;
+	}
+
+	err = mlx4_qp_reserve_range(dev, 1, 1, qpn);
+	mlx4_dbg(dev, "Reserved qp %d\n", *qpn);
+	if (err) {
+		mlx4_err(dev, "Failed to reserve qp for mac registration\n");
+		goto qp_err;
+	}
+
+	err = mlx4_uc_steer_add(dev, port, mac, qpn);
+	if (err)
+		goto steer_err;
+
+	entry = kmalloc(sizeof *entry, GFP_KERNEL);
+	if (!entry) {
+		err = -ENOMEM;
+		goto alloc_err;
+	}
+	entry->mac = mac;
+	err = radix_tree_insert(&info->mac_tree, *qpn, entry);
+	if (err)
+		goto insert_err;
+	return 0;
+
+insert_err:
+	kfree(entry);
+
+alloc_err:
+	mlx4_uc_steer_release(dev, port, mac, *qpn);
+
+steer_err:
+	mlx4_qp_release_range(dev, *qpn, 1);
 
-		entry->mac = mac;
-		err = radix_tree_insert(&info->mac_tree, *qpn, entry);
-		if (err) {
+qp_err:
+	mlx4_unregister_mac(dev, port, mac);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_get_eth_qp);
+
+void mlx4_put_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int qpn)
+{
+	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
+	struct mlx4_mac_entry *entry;
+
+	mlx4_dbg(dev, "Registering MAC: 0x%llx for deleting\n",
+		 (unsigned long long) mac);
+	mlx4_unregister_mac(dev, port, mac);
+
+	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER) {
+		entry = radix_tree_lookup(&info->mac_tree, qpn);
+		if (entry) {
+			mlx4_dbg(dev, "Releasing qp: port %d, mac 0x%llx,"
+				 " qpn %d\n", port,
+				 (unsigned long long) mac, qpn);
+			mlx4_uc_steer_release(dev, port, entry->mac, qpn);
+			mlx4_qp_release_range(dev, qpn, 1);
+			radix_tree_delete(&info->mac_tree, qpn);
 			kfree(entry);
-			mlx4_uc_steer_release(dev, port, mac, *qpn, 1);
-			return err;
 		}
 	}
+}
+EXPORT_SYMBOL_GPL(mlx4_put_eth_qp);
+
+static int mlx4_set_port_mac_table(struct mlx4_dev *dev, u8 port,
+				   __be64 *entries)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	u32 in_mod;
+	int err;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+
+	memcpy(mailbox->buf, entries, MLX4_MAC_TABLE_SIZE);
+
+	in_mod = MLX4_SET_PORT_MAC_TABLE << 8 | port;
 
-	mlx4_dbg(dev, "Registering MAC: 0x%llx\n", (unsigned long long) mac);
+	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
+		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+
+int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac)
+{
+	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
+	struct mlx4_mac_table *table = &info->mac_table;
+	int i, err = 0;
+	int free = -1;
+
+	mlx4_dbg(dev, "Registering MAC: 0x%llx for port %d\n",
+		 (unsigned long long) mac, port);
 
 	mutex_lock(&table->mutex);
-	for (i = 0; i < MLX4_MAX_MAC_NUM - 1; i++) {
-		if (free < 0 && !table->refs[i]) {
+	for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
+		if (free < 0 && !table->entries[i]) {
 			free = i;
 			continue;
 		}
 
 		if (mac == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i]))) {
-			/* MAC already registered, increase references count */
-			++table->refs[i];
+			/* MAC already registered, Must not have duplicates */
+			err = -EEXIST;
 			goto out;
 		}
 	}
 
-	if (free < 0) {
-		err = -ENOMEM;
-		goto out;
-	}
-
 	mlx4_dbg(dev, "Free MAC index is %d\n", free);
 
 	if (table->total == table->max) {
@@ -197,103 +271,103 @@ int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn, u8 wrap)
 	}
 
 	/* Register new MAC */
-	table->refs[free] = 1;
 	table->entries[free] = cpu_to_be64(mac | MLX4_MAC_VALID);
 
 	err = mlx4_set_port_mac_table(dev, port, table->entries);
 	if (unlikely(err)) {
-		mlx4_err(dev, "Failed adding MAC: 0x%llx\n", (unsigned long long) mac);
-		table->refs[free] = 0;
+		mlx4_err(dev, "Failed adding MAC: 0x%llx\n",
+			 (unsigned long long) mac);
 		table->entries[free] = 0;
 		goto out;
 	}
 
-	if (!(dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER))
-		*qpn = info->base_qpn + free;
+	err = free;
 	++table->total;
 out:
 	mutex_unlock(&table->mutex);
 	return err;
 }
-EXPORT_SYMBOL_GPL(mlx4_register_mac);
+EXPORT_SYMBOL_GPL(__mlx4_register_mac);
 
-static int validate_index(struct mlx4_dev *dev,
-			  struct mlx4_mac_table *table, int index)
+int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac)
 {
-	int err = 0;
+	u64 out_param;
+	int err;
 
-	if (index < 0 || index >= table->max || !table->entries[index]) {
-		mlx4_warn(dev, "No valid Mac entry for the given index\n");
-		err = -EINVAL;
-	}
-	return err;
-}
+	if (mlx4_is_mfunc(dev)) {
+		set_param_l(&out_param, port);
+		err = mlx4_cmd_imm(dev, mac, &out_param, RES_MAC,
+				   RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES,
+				   MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+		if (err)
+			return err;
 
-static int find_index(struct mlx4_dev *dev,
-		      struct mlx4_mac_table *table, u64 mac)
-{
-	int i;
-	for (i = 0; i < MLX4_MAX_MAC_NUM; i++) {
-		if (mac == (MLX4_MAC_MASK & be64_to_cpu(table->entries[i])))
-			return i;
+		return get_param_l(&out_param);
 	}
-	/* Mac not found */
-	return -EINVAL;
+	return __mlx4_register_mac(dev, port, mac);
 }
+EXPORT_SYMBOL_GPL(mlx4_register_mac);
+
 
-void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int qpn)
+void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac)
 {
 	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
 	struct mlx4_mac_table *table = &info->mac_table;
-	int index = qpn - info->base_qpn;
-	struct mlx4_mac_entry *entry;
+	int index;
 
-	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER) {
-		entry = radix_tree_lookup(&info->mac_tree, qpn);
-		if (entry) {
-			mlx4_uc_steer_release(dev, port, entry->mac, qpn, 1);
-			radix_tree_delete(&info->mac_tree, qpn);
-			index = find_index(dev, table, entry->mac);
-			kfree(entry);
-		}
-	}
+	index = find_index(dev, table, mac);
 
 	mutex_lock(&table->mutex);
 
 	if (validate_index(dev, table, index))
 		goto out;
 
-	/* Check whether this address has reference count */
-	if (!(--table->refs[index])) {
-		table->entries[index] = 0;
-		mlx4_set_port_mac_table(dev, port, table->entries);
-		--table->total;
-	}
+	table->entries[index] = 0;
+	mlx4_set_port_mac_table(dev, port, table->entries);
+	--table->total;
 out:
 	mutex_unlock(&table->mutex);
 }
+EXPORT_SYMBOL_GPL(__mlx4_unregister_mac);
+
+void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac)
+{
+	u64 out_param;
+	int err;
+
+	if (mlx4_is_mfunc(dev)) {
+		set_param_l(&out_param, port);
+		err = mlx4_cmd_imm(dev, mac, &out_param, RES_MAC,
+				   RES_OP_RESERVE_AND_MAP, MLX4_CMD_FREE_RES,
+				   MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+		return;
+	}
+	__mlx4_unregister_mac(dev, port, mac);
+	return;
+}
 EXPORT_SYMBOL_GPL(mlx4_unregister_mac);
 
-int mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac, u8 wrap)
+int mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac)
 {
 	struct mlx4_port_info *info = &mlx4_priv(dev)->port[port];
 	struct mlx4_mac_table *table = &info->mac_table;
-	int index = qpn - info->base_qpn;
 	struct mlx4_mac_entry *entry;
-	int err;
+	int index = qpn - info->base_qpn;
+	int err = 0;
 
 	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER) {
 		entry = radix_tree_lookup(&info->mac_tree, qpn);
 		if (!entry)
 			return -EINVAL;
-		index = find_index(dev, table, entry->mac);
-		mlx4_uc_steer_release(dev, port, entry->mac, qpn, 0);
+		mlx4_uc_steer_release(dev, port, entry->mac, qpn);
+		mlx4_unregister_mac(dev, port, entry->mac);
 		entry->mac = new_mac;
-		err = mlx4_uc_steer_add(dev, port, entry->mac, &qpn, 0);
-		if (err || index < 0)
-			return err;
+		mlx4_register_mac(dev, port, new_mac);
+		err = mlx4_uc_steer_add(dev, port, entry->mac, &qpn);
+		return err;
 	}
 
+	/* CX1 doesn't support multi-functions */
 	mutex_lock(&table->mutex);
 
 	err = validate_index(dev, table, index);
@@ -304,7 +378,8 @@ int mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac, u8 wra
 
 	err = mlx4_set_port_mac_table(dev, port, table->entries);
 	if (unlikely(err)) {
-		mlx4_err(dev, "Failed adding MAC: 0x%llx\n", (unsigned long long) new_mac);
+		mlx4_err(dev, "Failed adding MAC: 0x%llx\n",
+			 (unsigned long long) new_mac);
 		table->entries[index] = 0;
 	}
 out:
@@ -312,6 +387,7 @@ out:
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx4_replace_mac);
+
 static int mlx4_set_port_vlan_table(struct mlx4_dev *dev, u8 port,
 				    __be32 *entries)
 {
@@ -352,7 +428,8 @@ int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx)
 }
 EXPORT_SYMBOL_GPL(mlx4_find_cached_vlan);
 
-int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index)
+static int __mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan,
+				int *index)
 {
 	struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table;
 	int i, err = 0;
@@ -387,7 +464,7 @@ int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index)
 		goto out;
 	}
 
-	/* Register new MAC */
+	/* Register new VLAN */
 	table->refs[free] = 1;
 	table->entries[free] = cpu_to_be32(vlan | MLX4_VLAN_VALID);
 
@@ -405,9 +482,27 @@ out:
 	mutex_unlock(&table->mutex);
 	return err;
 }
+
+int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index)
+{
+	u64 out_param;
+	int err;
+
+	if (mlx4_is_mfunc(dev)) {
+		set_param_l(&out_param, port);
+		err = mlx4_cmd_imm(dev, vlan, &out_param, RES_VLAN,
+				   RES_OP_RESERVE_AND_MAP, MLX4_CMD_ALLOC_RES,
+				   MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
+		if (!err)
+			*index = get_param_l(&out_param);
+
+		return err;
+	}
+	return __mlx4_register_vlan(dev, port, vlan, index);
+}
 EXPORT_SYMBOL_GPL(mlx4_register_vlan);
 
-void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index)
+static void __mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index)
 {
 	struct mlx4_vlan_table *table = &mlx4_priv(dev)->port[port].vlan_table;
 
@@ -432,6 +527,25 @@ void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index)
 out:
 	mutex_unlock(&table->mutex);
 }
+
+void mlx4_unregister_vlan(struct mlx4_dev *dev, u8 port, int index)
+{
+	u64 in_param;
+	int err;
+
+	if (mlx4_is_mfunc(dev)) {
+		set_param_l(&in_param, port);
+		err = mlx4_cmd(dev, in_param, RES_VLAN, RES_OP_RESERVE_AND_MAP,
+			       MLX4_CMD_FREE_RES, MLX4_CMD_TIME_CLASS_A,
+			       MLX4_CMD_WRAPPED);
+		if (!err)
+			mlx4_warn(dev, "Failed freeing vlan at index:%d\n",
+					index);
+
+		return;
+	}
+	__mlx4_unregister_vlan(dev, port, index);
+}
 EXPORT_SYMBOL_GPL(mlx4_unregister_vlan);
 
 int mlx4_get_port_ib_caps(struct mlx4_dev *dev, u8 port, __be32 *caps)
@@ -514,6 +628,139 @@ int mlx4_check_ext_port_caps(struct mlx4_dev *dev, u8 port)
 	return err;
 }
 
+static int mlx4_common_set_port(struct mlx4_dev *dev, int slave, u32 in_mod,
+				u8 op_mod, struct mlx4_cmd_mailbox *inbox)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	struct mlx4_port_info *port_info;
+	struct mlx4_mfunc_master_ctx *master = &priv->mfunc.master;
+	struct mlx4_slave_state *slave_st = &master->slave_state[slave];
+	struct mlx4_set_port_rqp_calc_context *qpn_context;
+	struct mlx4_set_port_general_context *gen_context;
+	int reset_qkey_viols;
+	int port;
+	int is_eth;
+	u32 in_modifier;
+	u32 promisc;
+	u16 mtu, prev_mtu;
+	int err;
+	int i;
+	__be32 agg_cap_mask;
+	__be32 slave_cap_mask;
+	__be32 new_cap_mask;
+
+	port = in_mod & 0xff;
+	in_modifier = in_mod >> 8;
+	is_eth = op_mod;
+	port_info = &priv->port[port];
+
+	/* Slaves cannot perform SET_PORT operations except changing MTU */
+	if (is_eth) {
+		if (slave != dev->caps.function &&
+		    in_modifier != MLX4_SET_PORT_GENERAL) {
+			mlx4_warn(dev, "denying SET_PORT for slave:%d\n",
+					slave);
+			return -EINVAL;
+		}
+		switch (in_modifier) {
+		case MLX4_SET_PORT_RQP_CALC:
+			qpn_context = inbox->buf;
+			qpn_context->base_qpn =
+				cpu_to_be32(port_info->base_qpn);
+			qpn_context->n_mac = 0x7;
+			promisc = be32_to_cpu(qpn_context->promisc) >>
+				SET_PORT_PROMISC_SHIFT;
+			qpn_context->promisc = cpu_to_be32(
+				promisc << SET_PORT_PROMISC_SHIFT |
+				port_info->base_qpn);
+			promisc = be32_to_cpu(qpn_context->mcast) >>
+				SET_PORT_MC_PROMISC_SHIFT;
+			qpn_context->mcast = cpu_to_be32(
+				promisc << SET_PORT_MC_PROMISC_SHIFT |
+				port_info->base_qpn);
+			break;
+		case MLX4_SET_PORT_GENERAL:
+			gen_context = inbox->buf;
+			/* Mtu is configured as the max MTU among all the
+			 * the functions on the port. */
+			mtu = be16_to_cpu(gen_context->mtu);
+			mtu = min_t(int, mtu, dev->caps.eth_mtu_cap[port]);
+			prev_mtu = slave_st->mtu[port];
+			slave_st->mtu[port] = mtu;
+			if (mtu > master->max_mtu[port])
+				master->max_mtu[port] = mtu;
+			if (mtu < prev_mtu && prev_mtu ==
+						master->max_mtu[port]) {
+				slave_st->mtu[port] = mtu;
+				master->max_mtu[port] = mtu;
+				for (i = 0; i < dev->num_slaves; i++) {
+					master->max_mtu[port] =
+					max(master->max_mtu[port],
+					    master->slave_state[i].mtu[port]);
+				}
+			}
+
+			gen_context->mtu = cpu_to_be16(master->max_mtu[port]);
+			break;
+		}
+		return mlx4_cmd(dev, inbox->dma, in_mod, op_mod,
+				MLX4_CMD_SET_PORT, MLX4_CMD_TIME_CLASS_B,
+				MLX4_CMD_NATIVE);
+	}
+
+	/* For IB, we only consider:
+	 * - The capability mask, which is set to the aggregate of all
+	 *   slave function capabilities
+	 * - The QKey violatin counter - reset according to each request.
+	 */
+
+	if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
+		reset_qkey_viols = (*(u8 *) inbox->buf) & 0x40;
+		new_cap_mask = ((__be32 *) inbox->buf)[2];
+	} else {
+		reset_qkey_viols = ((u8 *) inbox->buf)[3] & 0x1;
+		new_cap_mask = ((__be32 *) inbox->buf)[1];
+	}
+
+	agg_cap_mask = 0;
+	slave_cap_mask =
+		priv->mfunc.master.slave_state[slave].ib_cap_mask[port];
+	priv->mfunc.master.slave_state[slave].ib_cap_mask[port] = new_cap_mask;
+	for (i = 0; i < dev->num_slaves; i++)
+		agg_cap_mask |=
+			priv->mfunc.master.slave_state[i].ib_cap_mask[port];
+
+	/* only clear mailbox for guests.  Master may be setting
+	* MTU or PKEY table size
+	*/
+	if (slave != dev->caps.function)
+		memset(inbox->buf, 0, 256);
+	if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
+		*(u8 *) inbox->buf	   = !!reset_qkey_viols << 6;
+		((__be32 *) inbox->buf)[2] = agg_cap_mask;
+	} else {
+		((u8 *) inbox->buf)[3]     = !!reset_qkey_viols;
+		((__be32 *) inbox->buf)[1] = agg_cap_mask;
+	}
+
+	err = mlx4_cmd(dev, inbox->dma, port, is_eth, MLX4_CMD_SET_PORT,
+		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+	if (err)
+		priv->mfunc.master.slave_state[slave].ib_cap_mask[port] =
+			slave_cap_mask;
+	return err;
+}
+
+int mlx4_SET_PORT_wrapper(struct mlx4_dev *dev, int slave,
+			  struct mlx4_vhcr *vhcr,
+			  struct mlx4_cmd_mailbox *inbox,
+			  struct mlx4_cmd_mailbox *outbox,
+			  struct mlx4_cmd_info *cmd)
+{
+	return mlx4_common_set_port(dev, slave, vhcr->in_modifier,
+				    vhcr->op_modifier, inbox);
+}
+
 int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port)
 {
 	struct mlx4_cmd_mailbox *mailbox;
@@ -535,3 +782,122 @@ int mlx4_SET_PORT(struct mlx4_dev *dev, u8 port)
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	return err;
 }
+
+static int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
+			  u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	struct mlx4_set_port_general_context *context;
+	int err;
+	u32 in_mod;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	context = mailbox->buf;
+	memset(context, 0, sizeof *context);
+
+	context->flags = SET_PORT_GEN_ALL_VALID;
+	context->mtu = cpu_to_be16(mtu);
+	context->pptx = (pptx * (!pfctx)) << 7;
+	context->pfctx = pfctx;
+	context->pprx = (pprx * (!pfcrx)) << 7;
+	context->pfcrx = pfcrx;
+
+	in_mod = MLX4_SET_PORT_GENERAL << 8 | port;
+	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
+		       MLX4_CMD_TIME_CLASS_B,  MLX4_CMD_WRAPPED);
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+EXPORT_SYMBOL(mlx4_SET_PORT_general);
+
+static int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
+			   u8 promisc)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	struct mlx4_set_port_rqp_calc_context *context;
+	int err;
+	u32 in_mod;
+	u32 m_promisc = (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER) ?
+		MCAST_DIRECT : MCAST_DEFAULT;
+
+	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER  &&
+	    dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER)
+		return 0;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	context = mailbox->buf;
+	memset(context, 0, sizeof *context);
+
+	context->base_qpn = cpu_to_be32(base_qpn);
+	context->n_mac = dev->caps.log_num_macs;
+	context->promisc = cpu_to_be32(promisc << SET_PORT_PROMISC_SHIFT |
+				       base_qpn);
+	context->mcast = cpu_to_be32(m_promisc << SET_PORT_MC_PROMISC_SHIFT |
+				     base_qpn);
+	context->intra_no_vlan = 0;
+	context->no_vlan = MLX4_NO_VLAN_IDX;
+	context->intra_vlan_miss = 0;
+	context->vlan_miss = MLX4_VLAN_MISS_IDX;
+
+	in_mod = MLX4_SET_PORT_RQP_CALC << 8 | port;
+	err = mlx4_cmd(dev, mailbox->dma, in_mod, 1, MLX4_CMD_SET_PORT,
+		       MLX4_CMD_TIME_CLASS_B,  MLX4_CMD_WRAPPED);
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+EXPORT_SYMBOL(mlx4_SET_PORT_qpn_calc);
+
+int mlx4_SET_MCAST_FLTR_wrapper(struct mlx4_dev *dev, int slave,
+				struct mlx4_vhcr *vhcr,
+				struct mlx4_cmd_mailbox *inbox,
+				struct mlx4_cmd_mailbox *outbox,
+				struct mlx4_cmd_info *cmd)
+{
+	int err = 0;
+
+	return err;
+}
+
+int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port,
+			u64 mac, u64 clear, u8 mode)
+{
+	return mlx4_cmd(dev, (mac | (clear << 63)), port, mode,
+			MLX4_CMD_SET_MCAST_FLTR, MLX4_CMD_TIME_CLASS_B,
+			MLX4_CMD_WRAPPED);
+}
+EXPORT_SYMBOL(mlx4_SET_MCAST_FLTR);
+
+int mlx4_SET_VLAN_FLTR_wrapper(struct mlx4_dev *dev, int slave,
+			       struct mlx4_vhcr *vhcr,
+			       struct mlx4_cmd_mailbox *inbox,
+			       struct mlx4_cmd_mailbox *outbox,
+			       struct mlx4_cmd_info *cmd)
+{
+	int err = 0;
+
+	return err;
+}
+
+int mlx4_common_dump_eth_stats(struct mlx4_dev *dev, int slave,
+			       u32 in_mod, struct mlx4_cmd_mailbox *outbox)
+{
+	return mlx4_cmd_box(dev, 0, outbox->dma, in_mod, 0,
+			    MLX4_CMD_DUMP_ETH_STATS, MLX4_CMD_TIME_CLASS_B,
+			    MLX4_CMD_NATIVE);
+}
+
+int mlx4_DUMP_ETH_STATS_wrapper(struct mlx4_dev *dev, int slave,
+				struct mlx4_vhcr *vhcr,
+				struct mlx4_cmd_mailbox *inbox,
+				struct mlx4_cmd_mailbox *outbox,
+				struct mlx4_cmd_info *cmd)
+{
+	return mlx4_common_dump_eth_stats(dev, slave,
+					  vhcr->in_modifier, outbox);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 59fc35ee66ad..0d99f57f9c8c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -223,17 +223,6 @@ static const char *ResourceType(enum mlx4_resource rt)
 	};
 }
 
-/* dummy procedures */
-int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac)
-{
-	return 0;
-}
-
-void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac)
-{
-}
-/* end dummies */
-
 int mlx4_init_resource_tracker(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -1271,6 +1260,12 @@ static int mac_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
 	return err;
 }
 
+static int vlan_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+			 u64 in_param, u64 *out_param)
+{
+	return 0;
+}
+
 int mlx4_ALLOC_RES_wrapper(struct mlx4_dev *dev, int slave,
 			   struct mlx4_vhcr *vhcr,
 			   struct mlx4_cmd_mailbox *inbox,
@@ -1311,6 +1306,11 @@ int mlx4_ALLOC_RES_wrapper(struct mlx4_dev *dev, int slave,
 				    vhcr->in_param, &vhcr->out_param);
 		break;
 
+	case RES_VLAN:
+		err = vlan_alloc_res(dev, slave, vhcr->op_modifier, alop,
+				    vhcr->in_param, &vhcr->out_param);
+		break;
+
 	default:
 		err = -EINVAL;
 		break;
@@ -1487,6 +1487,12 @@ static int mac_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
 
 }
 
+static int vlan_free_res(struct mlx4_dev *dev, int slave, int op, int cmd,
+			    u64 in_param, u64 *out_param)
+{
+	return 0;
+}
+
 int mlx4_FREE_RES_wrapper(struct mlx4_dev *dev, int slave,
 			  struct mlx4_vhcr *vhcr,
 			  struct mlx4_cmd_mailbox *inbox,
@@ -1527,6 +1533,11 @@ int mlx4_FREE_RES_wrapper(struct mlx4_dev *dev, int slave,
 				   vhcr->in_param, &vhcr->out_param);
 		break;
 
+	case RES_VLAN:
+		err = vlan_free_res(dev, slave, vhcr->op_modifier, alop,
+				   vhcr->in_param, &vhcr->out_param);
+		break;
+
 	default:
 		break;
 	}
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index e4be34a908a7..3ef73b05e24e 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -599,6 +599,10 @@ int mlx4_srq_query(struct mlx4_dev *dev, struct mlx4_srq *srq, int *limit_waterm
 int mlx4_INIT_PORT(struct mlx4_dev *dev, int port);
 int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port);
 
+int mlx4_unicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+			int block_mcast_loopback, enum mlx4_protocol prot);
+int mlx4_unicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
+			enum mlx4_protocol prot);
 int mlx4_multicast_attach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
 			  int block_mcast_loopback, enum mlx4_protocol protocol);
 int mlx4_multicast_detach(struct mlx4_dev *dev, struct mlx4_qp *qp, u8 gid[16],
@@ -609,9 +613,11 @@ int mlx4_unicast_promisc_add(struct mlx4_dev *dev, u32 qpn, u8 port);
 int mlx4_unicast_promisc_remove(struct mlx4_dev *dev, u32 qpn, u8 port);
 int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 mode);
 
-int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn, u8 wrap);
-void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, int qpn);
-int mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac, u8 wrap);
+int mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac);
+void mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac);
+int mlx4_replace_mac(struct mlx4_dev *dev, u8 port, int qpn, u64 new_mac);
+int mlx4_get_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int *qpn);
+void mlx4_put_eth_qp(struct mlx4_dev *dev, u8 port, u64 mac, int qpn);
 
 int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx);
 int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index);
-- 
cgit v1.2.3


From 2b8fb2867ca2736a715a88067fd0ec2904777cbe Mon Sep 17 00:00:00 2001
From: Marcel Apfelbaum <marcela@dev.mellanox.co.il>
Date: Tue, 13 Dec 2011 04:16:56 +0000
Subject: mlx4_core: mtts resources units changed to offset

In the previous implementation mtts are managed by:
1. order     - log(mtt segments), 'mtt segment' groups several mtts together.
2. first_seg - segment location relative to mtt table.
In the current implementation:
1. order     - log(mtts) rather than segments
2. offset    - mtt index in mtt table

Note: The actual mtt allocation is made in segments but it is
      transparent to callers.

Rational: The mtt resource holders are not interested on how the allocation
          of mtt is done, but rather on how they will use it.

Signed-off-by: Marcel Apfelbaum <marcela@dev.mellanox.co.il>
Reviewed-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/fw.c            |   2 +-
 drivers/net/ethernet/mellanox/mlx4/main.c          |  11 +--
 drivers/net/ethernet/mellanox/mlx4/mlx4.h          |   3 +-
 drivers/net/ethernet/mellanox/mlx4/mr.c            | 107 +++++++++++----------
 drivers/net/ethernet/mellanox/mlx4/profile.c       |   4 +-
 .../net/ethernet/mellanox/mlx4/resource_tracker.c  |  59 +++++-------
 include/linux/mlx4/device.h                        |   5 +-
 7 files changed, 92 insertions(+), 99 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 49bb2ead805a..99415fec9fdb 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -209,7 +209,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 		size = dev->caps.num_mpts;
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET);
 
-		size = dev->caps.num_mtt_segs * dev->caps.mtts_per_seg;
+		size = dev->caps.num_mtts;
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MTT_QUOTA_OFFSET);
 
 		size = dev->caps.num_mgms + dev->caps.num_amgms;
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 8be56326b04a..19363b618295 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -112,7 +112,7 @@ module_param_named(use_prio, use_prio, bool, 0444);
 MODULE_PARM_DESC(use_prio, "Enable steering by VLAN priority on ETH ports "
 		  "(0/1, default 0)");
 
-static int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG);
+int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG);
 module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
 MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-7)");
 
@@ -222,9 +222,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev->caps.max_cqes	     = dev_cap->max_cq_sz - 1;
 	dev->caps.reserved_cqs	     = dev_cap->reserved_cqs;
 	dev->caps.reserved_eqs	     = dev_cap->reserved_eqs;
-	dev->caps.mtts_per_seg	     = 1 << log_mtts_per_seg;
-	dev->caps.reserved_mtts	     = DIV_ROUND_UP(dev_cap->reserved_mtts,
-						    dev->caps.mtts_per_seg);
+	dev->caps.reserved_mtts      = dev_cap->reserved_mtts;
 	dev->caps.reserved_mrws	     = dev_cap->reserved_mrws;
 	dev->caps.reserved_uars	     = dev_cap->reserved_uars;
 	dev->caps.reserved_pds	     = dev_cap->reserved_pds;
@@ -232,7 +230,8 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 					dev_cap->reserved_xrcds : 0;
 	dev->caps.max_xrcds          = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
 					dev_cap->max_xrcds : 0;
-	dev->caps.mtt_entry_sz	     = dev->caps.mtts_per_seg * dev_cap->mtt_entry_sz;
+	dev->caps.mtt_entry_sz       = dev_cap->mtt_entry_sz;
+
 	dev->caps.max_msg_sz         = dev_cap->max_msg_sz;
 	dev->caps.page_size_cap	     = ~(u32) (dev_cap->min_page_sz - 1);
 	dev->caps.flags		     = dev_cap->flags;
@@ -569,7 +568,7 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
 	err = mlx4_init_icm_table(dev, &priv->mr_table.mtt_table,
 				  init_hca->mtt_base,
 				  dev->caps.mtt_entry_sz,
-				  dev->caps.num_mtt_segs,
+				  dev->caps.num_mtts,
 				  dev->caps.reserved_mtts, 1, 0);
 	if (err) {
 		mlx4_err(dev, "Failed to map MTT context memory, aborting.\n");
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index abf65d8af48d..879f825c6f6a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -191,6 +191,7 @@ do {									\
 	dev_warn(&mdev->pdev->dev, format, ##arg)
 
 extern int mlx4_log_num_mgm_entry_size;
+extern int log_mtts_per_seg;
 
 #define MLX4_MAX_NUM_SLAVES	(MLX4_MAX_NUM_PF + MLX4_MAX_NUM_VF)
 #define ALL_SLAVES 0xff
@@ -240,7 +241,7 @@ struct mlx4_mpt_entry {
 	__be32 win_cnt;
 	u8	reserved1[3];
 	u8	mtt_rep;
-	__be64 mtt_seg;
+	__be64 mtt_addr;
 	__be32 mtt_sz;
 	__be32 entity_size;
 	__be32 first_byte_offset;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c
index f8fd0a1d73af..f7243b26bdf5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
@@ -166,18 +166,24 @@ u32 __mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order)
 {
 	struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
 	u32 seg;
+	int seg_order;
+	u32 offset;
 
-	seg = mlx4_buddy_alloc(&mr_table->mtt_buddy, order);
+	seg_order = max_t(int, order - log_mtts_per_seg, 0);
+
+	seg = mlx4_buddy_alloc(&mr_table->mtt_buddy, seg_order);
 	if (seg == -1)
 		return -1;
 
-	if (mlx4_table_get_range(dev, &mr_table->mtt_table, seg,
-				 seg + (1 << order) - 1)) {
-		mlx4_buddy_free(&mr_table->mtt_buddy, seg, order);
+	offset = seg * (1 << log_mtts_per_seg);
+
+	if (mlx4_table_get_range(dev, &mr_table->mtt_table, offset,
+				 offset + (1 << order) - 1)) {
+		mlx4_buddy_free(&mr_table->mtt_buddy, seg, seg_order);
 		return -1;
 	}
 
-	return seg;
+	return offset;
 }
 
 static u32 mlx4_alloc_mtt_range(struct mlx4_dev *dev, int order)
@@ -212,45 +218,49 @@ int mlx4_mtt_init(struct mlx4_dev *dev, int npages, int page_shift,
 	} else
 		mtt->page_shift = page_shift;
 
-	for (mtt->order = 0, i = dev->caps.mtts_per_seg; i < npages; i <<= 1)
+	for (mtt->order = 0, i = 1; i < npages; i <<= 1)
 		++mtt->order;
 
-	mtt->first_seg = mlx4_alloc_mtt_range(dev, mtt->order);
-	if (mtt->first_seg == -1)
+	mtt->offset = mlx4_alloc_mtt_range(dev, mtt->order);
+	if (mtt->offset == -1)
 		return -ENOMEM;
 
 	return 0;
 }
 EXPORT_SYMBOL_GPL(mlx4_mtt_init);
 
-void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 first_seg,
-				  int order)
+void __mlx4_free_mtt_range(struct mlx4_dev *dev, u32 offset, int order)
 {
+	u32 first_seg;
+	int seg_order;
 	struct mlx4_mr_table *mr_table = &mlx4_priv(dev)->mr_table;
 
-	mlx4_buddy_free(&mr_table->mtt_buddy, first_seg, order);
+	seg_order = max_t(int, order - log_mtts_per_seg, 0);
+	first_seg = offset / (1 << log_mtts_per_seg);
+
+	mlx4_buddy_free(&mr_table->mtt_buddy, first_seg, seg_order);
 	mlx4_table_put_range(dev, &mr_table->mtt_table, first_seg,
-				     first_seg + (1 << order) - 1);
+			     first_seg + (1 << seg_order) - 1);
 }
 
-static void mlx4_free_mtt_range(struct mlx4_dev *dev, u32 first_seg, int order)
+static void mlx4_free_mtt_range(struct mlx4_dev *dev, u32 offset, int order)
 {
 	u64 in_param;
 	int err;
 
 	if (mlx4_is_mfunc(dev)) {
-		set_param_l(&in_param, first_seg);
+		set_param_l(&in_param, offset);
 		set_param_h(&in_param, order);
 		err = mlx4_cmd(dev, in_param, RES_MTT, RES_OP_RESERVE_AND_MAP,
 						       MLX4_CMD_FREE_RES,
 						       MLX4_CMD_TIME_CLASS_A,
 						       MLX4_CMD_WRAPPED);
 		if (err)
-			mlx4_warn(dev, "Failed to free mtt range at:%d"
-				  " order:%d\n", first_seg, order);
+			mlx4_warn(dev, "Failed to free mtt range at:"
+				  "%d order:%d\n", offset, order);
 		return;
 	}
-	__mlx4_free_mtt_range(dev, first_seg, order);
+	 __mlx4_free_mtt_range(dev, offset, order);
 }
 
 void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt)
@@ -258,13 +268,13 @@ void mlx4_mtt_cleanup(struct mlx4_dev *dev, struct mlx4_mtt *mtt)
 	if (mtt->order < 0)
 		return;
 
-	mlx4_free_mtt_range(dev, mtt->first_seg, mtt->order);
+	mlx4_free_mtt_range(dev, mtt->offset, mtt->order);
 }
 EXPORT_SYMBOL_GPL(mlx4_mtt_cleanup);
 
 u64 mlx4_mtt_addr(struct mlx4_dev *dev, struct mlx4_mtt *mtt)
 {
-	return (u64) mtt->first_seg * dev->caps.mtt_entry_sz;
+	return (u64) mtt->offset * dev->caps.mtt_entry_sz;
 }
 EXPORT_SYMBOL_GPL(mlx4_mtt_addr);
 
@@ -504,9 +514,10 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
 
 	if (mr->mtt.order < 0) {
 		mpt_entry->flags |= cpu_to_be32(MLX4_MPT_FLAG_PHYSICAL);
-		mpt_entry->mtt_seg = 0;
+		mpt_entry->mtt_addr = 0;
 	} else {
-		mpt_entry->mtt_seg = cpu_to_be64(mlx4_mtt_addr(dev, &mr->mtt));
+		mpt_entry->mtt_addr = cpu_to_be64(mlx4_mtt_addr(dev,
+						  &mr->mtt));
 	}
 
 	if (mr->mtt.order >= 0 && mr->mtt.page_shift == 0) {
@@ -514,8 +525,7 @@ int mlx4_mr_enable(struct mlx4_dev *dev, struct mlx4_mr *mr)
 		mpt_entry->flags    |= cpu_to_be32(MLX4_MPT_FLAG_FREE);
 		mpt_entry->pd_flags |= cpu_to_be32(MLX4_MPT_PD_FLAG_FAST_REG |
 						   MLX4_MPT_PD_FLAG_RAE);
-		mpt_entry->mtt_sz    = cpu_to_be32((1 << mr->mtt.order) *
-						   dev->caps.mtts_per_seg);
+		mpt_entry->mtt_sz    = cpu_to_be32(1 << mr->mtt.order);
 	} else {
 		mpt_entry->flags    |= cpu_to_be32(MLX4_MPT_FLAG_SW_OWNS);
 	}
@@ -548,18 +558,10 @@ static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 	__be64 *mtts;
 	dma_addr_t dma_handle;
 	int i;
-	int s = start_index * sizeof (u64);
-
-	/* All MTTs must fit in the same page */
-	if (start_index / (PAGE_SIZE / sizeof (u64)) !=
-	    (start_index + npages - 1) / (PAGE_SIZE / sizeof (u64)))
-		return -EINVAL;
 
-	if (start_index & (dev->caps.mtts_per_seg - 1))
-		return -EINVAL;
+	mtts = mlx4_table_find(&priv->mr_table.mtt_table, mtt->offset +
+			       start_index, &dma_handle);
 
-	mtts = mlx4_table_find(&priv->mr_table.mtt_table, mtt->first_seg +
-				s / dev->caps.mtt_entry_sz, &dma_handle);
 	if (!mtts)
 		return -ENOMEM;
 
@@ -580,15 +582,25 @@ int __mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 {
 	int err = 0;
 	int chunk;
+	int mtts_per_page;
+	int max_mtts_first_page;
+
+	/* compute how may mtts fit in the first page */
+	mtts_per_page = PAGE_SIZE / sizeof(u64);
+	max_mtts_first_page = mtts_per_page - (mtt->offset + start_index)
+			      % mtts_per_page;
+
+	chunk = min_t(int, max_mtts_first_page, npages);
 
 	while (npages > 0) {
-		chunk = min_t(int, PAGE_SIZE / sizeof(u64), npages);
 		err = mlx4_write_mtt_chunk(dev, mtt, start_index, chunk, page_list);
 		if (err)
 			return err;
 		npages      -= chunk;
 		start_index += chunk;
 		page_list   += chunk;
+
+		chunk = min_t(int, mtts_per_page, npages);
 	}
 	return err;
 }
@@ -612,18 +624,9 @@ int mlx4_write_mtt(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 		inbox = mailbox->buf;
 
 		while (npages > 0) {
-			int s = mtt->first_seg * dev->caps.mtts_per_seg +
-				start_index;
-			chunk = min_t(int, MLX4_MAILBOX_SIZE / sizeof(u64) -
-				      dev->caps.mtts_per_seg, npages);
-			if (s / (PAGE_SIZE / sizeof(u64)) !=
-			    (s + chunk - 1) / (PAGE_SIZE / sizeof(u64)))
-				chunk = PAGE_SIZE / sizeof(u64) -
-					(s % (PAGE_SIZE / sizeof(u64)));
-
-			inbox[0] = cpu_to_be64(mtt->first_seg *
-					       dev->caps.mtts_per_seg +
-					       start_index);
+			chunk = min_t(int, MLX4_MAILBOX_SIZE / sizeof(u64) - 2,
+				      npages);
+			inbox[0] = cpu_to_be64(mtt->offset + start_index);
 			inbox[1] = 0;
 			for (i = 0; i < chunk; ++i)
 				inbox[i + 2] = cpu_to_be64(page_list[i] |
@@ -690,7 +693,8 @@ int mlx4_init_mr_table(struct mlx4_dev *dev)
 		return err;
 
 	err = mlx4_buddy_init(&mr_table->mtt_buddy,
-			      ilog2(dev->caps.num_mtt_segs));
+			      ilog2(dev->caps.num_mtts /
+			      (1 << log_mtts_per_seg)));
 	if (err)
 		goto err_buddy;
 
@@ -809,7 +813,7 @@ int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages,
 		   int max_maps, u8 page_shift, struct mlx4_fmr *fmr)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
-	u64 mtt_seg;
+	u64 mtt_offset;
 	int err = -ENOMEM;
 
 	if (page_shift < (ffs(dev->caps.page_size_cap) - 1) || page_shift >= 32)
@@ -829,11 +833,12 @@ int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages,
 	if (err)
 		return err;
 
-	mtt_seg = fmr->mr.mtt.first_seg * dev->caps.mtt_entry_sz;
+	mtt_offset = fmr->mr.mtt.offset * dev->caps.mtt_entry_sz;
 
 	fmr->mtts = mlx4_table_find(&priv->mr_table.mtt_table,
-				    fmr->mr.mtt.first_seg,
+				    fmr->mr.mtt.offset,
 				    &fmr->dma_handle);
+
 	if (!fmr->mtts) {
 		err = -ENOMEM;
 		goto err_free;
@@ -872,7 +877,7 @@ static int mlx4_fmr_alloc_reserved(struct mlx4_dev *dev, u32 mridx,
 		return err;
 
 	fmr->mtts = mlx4_table_find(&priv->mr_table.mtt_table,
-				    fmr->mr.mtt.first_seg,
+				    fmr->mr.mtt.offset,
 				    &fmr->dma_handle);
 	if (!fmr->mtts) {
 		err = -ENOMEM;
diff --git a/drivers/net/ethernet/mellanox/mlx4/profile.c b/drivers/net/ethernet/mellanox/mlx4/profile.c
index 771c4605ef86..66f91ca7a7c6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/profile.c
+++ b/drivers/net/ethernet/mellanox/mlx4/profile.c
@@ -98,7 +98,7 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
 	profile[MLX4_RES_EQ].size     = dev_cap->eqc_entry_sz;
 	profile[MLX4_RES_DMPT].size   = dev_cap->dmpt_entry_sz;
 	profile[MLX4_RES_CMPT].size   = dev_cap->cmpt_entry_sz;
-	profile[MLX4_RES_MTT].size    = dev->caps.mtts_per_seg * dev_cap->mtt_entry_sz;
+	profile[MLX4_RES_MTT].size    = dev_cap->mtt_entry_sz;
 	profile[MLX4_RES_MCG].size    = mlx4_get_mgm_entry_size(dev);
 
 	profile[MLX4_RES_QP].num      = request->num_qp;
@@ -210,7 +210,7 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
 			init_hca->cmpt_base	 = profile[i].start;
 			break;
 		case MLX4_RES_MTT:
-			dev->caps.num_mtt_segs	 = profile[i].num;
+			dev->caps.num_mtts	 = profile[i].num;
 			priv->mr_table.mtt_base	 = profile[i].start;
 			init_hca->mtt_base	 = profile[i].start;
 			break;
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 0d99f57f9c8c..bdd61c35d044 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -1550,9 +1550,9 @@ static int mr_phys_mpt(struct mlx4_mpt_entry *mpt)
 	return (be32_to_cpu(mpt->flags) >> 9) & 1;
 }
 
-static int mr_get_mtt_seg(struct mlx4_mpt_entry *mpt)
+static int mr_get_mtt_addr(struct mlx4_mpt_entry *mpt)
 {
-	return (int)be64_to_cpu(mpt->mtt_seg) & 0xfffffff8;
+	return (int)be64_to_cpu(mpt->mtt_addr) & 0xfffffff8;
 }
 
 static int mr_get_mtt_size(struct mlx4_mpt_entry *mpt)
@@ -1565,12 +1565,12 @@ static int mr_get_pdn(struct mlx4_mpt_entry *mpt)
 	return be32_to_cpu(mpt->pd_flags) & 0xffffff;
 }
 
-static int qp_get_mtt_seg(struct mlx4_qp_context *qpc)
+static int qp_get_mtt_addr(struct mlx4_qp_context *qpc)
 {
 	return be32_to_cpu(qpc->mtt_base_addr_l) & 0xfffffff8;
 }
 
-static int srq_get_mtt_seg(struct mlx4_srq_context *srqc)
+static int srq_get_mtt_addr(struct mlx4_srq_context *srqc)
 {
 	return be32_to_cpu(srqc->mtt_base_addr_l) & 0xfffffff8;
 }
@@ -1614,8 +1614,8 @@ static int pdn2slave(int pdn)
 static int check_mtt_range(struct mlx4_dev *dev, int slave, int start,
 			   int size, struct res_mtt *mtt)
 {
-	int res_start = mtt->com.res_id * dev->caps.mtts_per_seg;
-	int res_size = (1 << mtt->order) * dev->caps.mtts_per_seg;
+	int res_start = mtt->com.res_id;
+	int res_size = (1 << mtt->order);
 
 	if (start < res_start || start + size > res_start + res_size)
 		return -EPERM;
@@ -1632,8 +1632,7 @@ int mlx4_SW2HW_MPT_wrapper(struct mlx4_dev *dev, int slave,
 	int index = vhcr->in_modifier;
 	struct res_mtt *mtt;
 	struct res_mpt *mpt;
-	int mtt_base = (mr_get_mtt_seg(inbox->buf) / dev->caps.mtt_entry_sz) *
-		dev->caps.mtts_per_seg;
+	int mtt_base = mr_get_mtt_addr(inbox->buf) / dev->caps.mtt_entry_sz;
 	int phys;
 	int id;
 
@@ -1644,8 +1643,7 @@ int mlx4_SW2HW_MPT_wrapper(struct mlx4_dev *dev, int slave,
 
 	phys = mr_phys_mpt(inbox->buf);
 	if (!phys) {
-		err = get_res(dev, slave, mtt_base / dev->caps.mtts_per_seg,
-			      RES_MTT, &mtt);
+		err = get_res(dev, slave, mtt_base, RES_MTT, &mtt);
 		if (err)
 			goto ex_abort;
 
@@ -1769,8 +1767,7 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
 	struct res_mtt *mtt;
 	struct res_qp *qp;
 	struct mlx4_qp_context *qpc = inbox->buf + 8;
-	int mtt_base = (qp_get_mtt_seg(qpc) / dev->caps.mtt_entry_sz) *
-		dev->caps.mtts_per_seg;
+	int mtt_base = qp_get_mtt_addr(qpc) / dev->caps.mtt_entry_sz;
 	int mtt_size = qp_get_mtt_size(qpc);
 	struct res_cq *rcq;
 	struct res_cq *scq;
@@ -1786,8 +1783,7 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
 		return err;
 	qp->local_qpn = local_qpn;
 
-	err = get_res(dev, slave, mtt_base / dev->caps.mtts_per_seg, RES_MTT,
-		      &mtt);
+	err = get_res(dev, slave, mtt_base, RES_MTT, &mtt);
 	if (err)
 		goto ex_abort;
 
@@ -1836,7 +1832,7 @@ int mlx4_RST2INIT_QP_wrapper(struct mlx4_dev *dev, int slave,
 		qp->srq = srq;
 	}
 	put_res(dev, slave, rcqn, RES_CQ);
-	put_res(dev, slave, mtt_base  / dev->caps.mtts_per_seg, RES_MTT);
+	put_res(dev, slave, mtt_base, RES_MTT);
 	res_end_move(dev, slave, RES_QP, qpn);
 
 	return 0;
@@ -1850,14 +1846,14 @@ ex_put_scq:
 ex_put_rcq:
 	put_res(dev, slave, rcqn, RES_CQ);
 ex_put_mtt:
-	put_res(dev, slave, mtt_base / dev->caps.mtts_per_seg, RES_MTT);
+	put_res(dev, slave, mtt_base, RES_MTT);
 ex_abort:
 	res_abort_move(dev, slave, RES_QP, qpn);
 
 	return err;
 }
 
-static int eq_get_mtt_seg(struct mlx4_eq_context *eqc)
+static int eq_get_mtt_addr(struct mlx4_eq_context *eqc)
 {
 	return be32_to_cpu(eqc->mtt_base_addr_l) & 0xfffffff8;
 }
@@ -1873,7 +1869,7 @@ static int eq_get_mtt_size(struct mlx4_eq_context *eqc)
 	return 1 << (log_eq_size + 5 - page_shift);
 }
 
-static int cq_get_mtt_seg(struct mlx4_cq_context *cqc)
+static int cq_get_mtt_addr(struct mlx4_cq_context *cqc)
 {
 	return be32_to_cpu(cqc->mtt_base_addr_l) & 0xfffffff8;
 }
@@ -1899,8 +1895,7 @@ int mlx4_SW2HW_EQ_wrapper(struct mlx4_dev *dev, int slave,
 	int eqn = vhcr->in_modifier;
 	int res_id = (slave << 8) | eqn;
 	struct mlx4_eq_context *eqc = inbox->buf;
-	int mtt_base = (eq_get_mtt_seg(eqc) / dev->caps.mtt_entry_sz) *
-		dev->caps.mtts_per_seg;
+	int mtt_base = eq_get_mtt_addr(eqc) / dev->caps.mtt_entry_sz;
 	int mtt_size = eq_get_mtt_size(eqc);
 	struct res_eq *eq;
 	struct res_mtt *mtt;
@@ -1912,8 +1907,7 @@ int mlx4_SW2HW_EQ_wrapper(struct mlx4_dev *dev, int slave,
 	if (err)
 		goto out_add;
 
-	err = get_res(dev, slave, mtt_base / dev->caps.mtts_per_seg, RES_MTT,
-		      &mtt);
+	err = get_res(dev, slave, mtt_base, RES_MTT, &mtt);
 	if (err)
 		goto out_move;
 
@@ -1986,7 +1980,8 @@ int mlx4_WRITE_MTT_wrapper(struct mlx4_dev *dev, int slave,
 	/* Call the SW implementation of write_mtt:
 	 * - Prepare a dummy mtt struct
 	 * - Translate inbox contents to simple addresses in host endianess */
-	mtt.first_seg = 0;
+	mtt.offset = 0;  /* TBD this is broken but I don't handle it since
+			    we don't really use it */
 	mtt.order = 0;
 	mtt.page_shift = 0;
 	for (i = 0; i < npages; ++i)
@@ -2137,16 +2132,14 @@ int mlx4_SW2HW_CQ_wrapper(struct mlx4_dev *dev, int slave,
 	int err;
 	int cqn = vhcr->in_modifier;
 	struct mlx4_cq_context *cqc = inbox->buf;
-	int mtt_base = (cq_get_mtt_seg(cqc) / dev->caps.mtt_entry_sz) *
-		dev->caps.mtts_per_seg;
+	int mtt_base = cq_get_mtt_addr(cqc) / dev->caps.mtt_entry_sz;
 	struct res_cq *cq;
 	struct res_mtt *mtt;
 
 	err = cq_res_start_move_to(dev, slave, cqn, RES_CQ_HW, &cq);
 	if (err)
 		return err;
-	err = get_res(dev, slave, mtt_base / dev->caps.mtts_per_seg, RES_MTT,
-		      &mtt);
+	err = get_res(dev, slave, mtt_base, RES_MTT, &mtt);
 	if (err)
 		goto out_move;
 	err = check_mtt_range(dev, slave, mtt_base, cq_get_mtt_size(cqc), mtt);
@@ -2228,8 +2221,7 @@ static int handle_resize(struct mlx4_dev *dev, int slave,
 	struct res_mtt *orig_mtt;
 	struct res_mtt *mtt;
 	struct mlx4_cq_context *cqc = inbox->buf;
-	int mtt_base = (cq_get_mtt_seg(cqc) / dev->caps.mtt_entry_sz) *
-		dev->caps.mtts_per_seg;
+	int mtt_base = cq_get_mtt_addr(cqc) / dev->caps.mtt_entry_sz;
 
 	err = get_res(dev, slave, cq->mtt->com.res_id, RES_MTT, &orig_mtt);
 	if (err)
@@ -2240,8 +2232,7 @@ static int handle_resize(struct mlx4_dev *dev, int slave,
 		goto ex_put;
 	}
 
-	err = get_res(dev, slave, mtt_base / dev->caps.mtts_per_seg, RES_MTT,
-		      &mtt);
+	err = get_res(dev, slave, mtt_base, RES_MTT, &mtt);
 	if (err)
 		goto ex_put;
 
@@ -2325,8 +2316,7 @@ int mlx4_SW2HW_SRQ_wrapper(struct mlx4_dev *dev, int slave,
 	struct res_mtt *mtt;
 	struct res_srq *srq;
 	struct mlx4_srq_context *srqc = inbox->buf;
-	int mtt_base = (srq_get_mtt_seg(srqc) / dev->caps.mtt_entry_sz) *
-		dev->caps.mtts_per_seg;
+	int mtt_base = srq_get_mtt_addr(srqc) / dev->caps.mtt_entry_sz;
 
 	if (srqn != (be32_to_cpu(srqc->state_logsize_srqn) & 0xffffff))
 		return -EINVAL;
@@ -2334,8 +2324,7 @@ int mlx4_SW2HW_SRQ_wrapper(struct mlx4_dev *dev, int slave,
 	err = srq_res_start_move_to(dev, slave, srqn, RES_SRQ_HW, &srq);
 	if (err)
 		return err;
-	err = get_res(dev, slave, mtt_base / dev->caps.mtts_per_seg,
-		      RES_MTT, &mtt);
+	err = get_res(dev, slave, mtt_base, RES_MTT, &mtt);
 	if (err)
 		goto ex_abort;
 	err = check_mtt_range(dev, slave, mtt_base, srq_get_mtt_size(srqc),
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 3ef73b05e24e..65bb466c575f 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -272,8 +272,7 @@ struct mlx4_caps {
 	int			num_comp_vectors;
 	int			comp_pool;
 	int			num_mpts;
-	int			num_mtt_segs;
-	int			mtts_per_seg;
+	int			num_mtts;
 	int			fmr_reserved_mtts;
 	int			reserved_mtts;
 	int			reserved_mrws;
@@ -323,7 +322,7 @@ struct mlx4_buf {
 };
 
 struct mlx4_mtt {
-	u32			first_seg;
+	u32			offset;
 	int			order;
 	int			page_shift;
 };
-- 
cgit v1.2.3


From ab9c17a009ee8eb8c667f22dc0be0709effceab9 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Tue, 13 Dec 2011 04:18:30 +0000
Subject: mlx4_core: Modify driver initialization flow to accommodate SRIOV for
 Ethernet

1. Added module parameters sr_iov and probe_vf for controlling enablement of
   SRIOV mode.
2. Increased default max num-qps, num-mpts and log_num_macs to accomodate
   SRIOV mode
3. Added port_type_array as a module parameter to allow driver startup with
   ports configured as desired.
   In SRIOV mode, only ETH is supported, and this array is ignored; otherwise,
   for the case where the FW supports both port types (ETH and IB), the
   port_type_array parameter is used.
   By default, the port_type_array is set to configure both ports as IB.
4. When running in sriov mode, the master needs to initialize the ICM eq table
   to hold the eq's for itself and also for all the slaves.
5. mlx4_set_port_mask() now invoked from mlx4_init_hca, instead of in mlx4_dev_cap.
6. Introduced sriov VF (slave) device startup/teardown logic (mainly procedures
   mlx4_init_slave, mlx4_slave_exit, mlx4_slave_cap, mlx4_slave_exit and flow
   modifications in __mlx4_init_one, mlx4_init_hca, and mlx4_setup_hca).
   VFs obtain their startup information from the PF (master) device via the
   comm channel.
7. In SRIOV mode (both PF and VF), MSI_X must be enabled, or the driver
   aborts loading the device.
8. Do not allow setting port type via sysfs when running in SRIOV mode.
9. mlx4_get_ownership:  Currently, only one PF is supported by the driver.
   If the HCA is burned with FW which enables more than one PF, only one
   of the PFs is allowed to run.  The first one up grabs a FW ownership
   semaphone -- all other PFs will find that semaphore taken, and the
   driver will not allow them to run.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Signed-off-by: Liran Liss <liranl@mellanox.co.il>
Signed-off-by: Marcel Apfelbaum <marcela@mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/cmd.c  | 170 +++++-
 drivers/net/ethernet/mellanox/mlx4/fw.c   |  68 ++-
 drivers/net/ethernet/mellanox/mlx4/fw.h   |   2 +
 drivers/net/ethernet/mellanox/mlx4/main.c | 842 ++++++++++++++++++++++++------
 drivers/net/ethernet/mellanox/mlx4/mlx4.h |   6 +
 include/linux/mlx4/cmd.h                  |   2 +
 include/linux/mlx4/device.h               |   1 +
 7 files changed, 934 insertions(+), 157 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 8e6e4b20b0e2..c4fef839168c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -257,7 +257,7 @@ out:
 	return err;
 }
 
-static int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param,
+int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param,
 		  unsigned long timeout)
 {
 	if (mlx4_priv(dev)->cmd.use_events)
@@ -1390,6 +1390,153 @@ void mlx4_master_comm_channel(struct work_struct *work)
 		mlx4_warn(dev, "Failed to arm comm channel events\n");
 }
 
+static int sync_toggles(struct mlx4_dev *dev)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	int wr_toggle;
+	int rd_toggle;
+	unsigned long end;
+
+	wr_toggle = swab32(readl(&priv->mfunc.comm->slave_write)) >> 31;
+	end = jiffies + msecs_to_jiffies(5000);
+
+	while (time_before(jiffies, end)) {
+		rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)) >> 31;
+		if (rd_toggle == wr_toggle) {
+			priv->cmd.comm_toggle = rd_toggle;
+			return 0;
+		}
+
+		cond_resched();
+	}
+
+	/*
+	 * we could reach here if for example the previous VM using this
+	 * function misbehaved and left the channel with unsynced state. We
+	 * should fix this here and give this VM a chance to use a properly
+	 * synced channel
+	 */
+	mlx4_warn(dev, "recovering from previously mis-behaved VM\n");
+	__raw_writel((__force u32) 0, &priv->mfunc.comm->slave_read);
+	__raw_writel((__force u32) 0, &priv->mfunc.comm->slave_write);
+	priv->cmd.comm_toggle = 0;
+
+	return 0;
+}
+
+int mlx4_multi_func_init(struct mlx4_dev *dev)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	struct mlx4_slave_state *s_state;
+	int i, err, port;
+
+	priv->mfunc.vhcr = dma_alloc_coherent(&(dev->pdev->dev), PAGE_SIZE,
+					    &priv->mfunc.vhcr_dma,
+					    GFP_KERNEL);
+	if (!priv->mfunc.vhcr) {
+		mlx4_err(dev, "Couldn't allocate vhcr.\n");
+		return -ENOMEM;
+	}
+
+	if (mlx4_is_master(dev))
+		priv->mfunc.comm =
+		ioremap(pci_resource_start(dev->pdev, priv->fw.comm_bar) +
+			priv->fw.comm_base, MLX4_COMM_PAGESIZE);
+	else
+		priv->mfunc.comm =
+		ioremap(pci_resource_start(dev->pdev, 2) +
+			MLX4_SLAVE_COMM_BASE, MLX4_COMM_PAGESIZE);
+	if (!priv->mfunc.comm) {
+		mlx4_err(dev, "Couldn't map communication vector.\n");
+		goto err_vhcr;
+	}
+
+	if (mlx4_is_master(dev)) {
+		priv->mfunc.master.slave_state =
+			kzalloc(dev->num_slaves *
+				sizeof(struct mlx4_slave_state), GFP_KERNEL);
+		if (!priv->mfunc.master.slave_state)
+			goto err_comm;
+
+		for (i = 0; i < dev->num_slaves; ++i) {
+			s_state = &priv->mfunc.master.slave_state[i];
+			s_state->last_cmd = MLX4_COMM_CMD_RESET;
+			__raw_writel((__force u32) 0,
+				     &priv->mfunc.comm[i].slave_write);
+			__raw_writel((__force u32) 0,
+				     &priv->mfunc.comm[i].slave_read);
+			mmiowb();
+			for (port = 1; port <= MLX4_MAX_PORTS; port++) {
+				s_state->vlan_filter[port] =
+					kzalloc(sizeof(struct mlx4_vlan_fltr),
+						GFP_KERNEL);
+				if (!s_state->vlan_filter[port]) {
+					if (--port)
+						kfree(s_state->vlan_filter[port]);
+					goto err_slaves;
+				}
+				INIT_LIST_HEAD(&s_state->mcast_filters[port]);
+			}
+			spin_lock_init(&s_state->lock);
+		}
+
+		memset(&priv->mfunc.master.cmd_eqe, 0, sizeof(struct mlx4_eqe));
+		priv->mfunc.master.cmd_eqe.type = MLX4_EVENT_TYPE_CMD;
+		INIT_WORK(&priv->mfunc.master.comm_work,
+			  mlx4_master_comm_channel);
+		INIT_WORK(&priv->mfunc.master.slave_event_work,
+			  mlx4_gen_slave_eqe);
+		INIT_WORK(&priv->mfunc.master.slave_flr_event_work,
+			  mlx4_master_handle_slave_flr);
+		spin_lock_init(&priv->mfunc.master.slave_state_lock);
+		priv->mfunc.master.comm_wq =
+			create_singlethread_workqueue("mlx4_comm");
+		if (!priv->mfunc.master.comm_wq)
+			goto err_slaves;
+
+		if (mlx4_init_resource_tracker(dev))
+			goto err_thread;
+
+		sema_init(&priv->cmd.slave_sem, 1);
+		err = mlx4_ARM_COMM_CHANNEL(dev);
+		if (err) {
+			mlx4_err(dev, " Failed to arm comm channel eq: %x\n",
+				 err);
+			goto err_resource;
+		}
+
+	} else {
+		err = sync_toggles(dev);
+		if (err) {
+			mlx4_err(dev, "Couldn't sync toggles\n");
+			goto err_comm;
+		}
+
+		sema_init(&priv->cmd.slave_sem, 1);
+	}
+	return 0;
+
+err_resource:
+	mlx4_free_resource_tracker(dev);
+err_thread:
+	flush_workqueue(priv->mfunc.master.comm_wq);
+	destroy_workqueue(priv->mfunc.master.comm_wq);
+err_slaves:
+	while (--i) {
+		for (port = 1; port <= MLX4_MAX_PORTS; port++)
+			kfree(priv->mfunc.master.slave_state[i].vlan_filter[port]);
+	}
+	kfree(priv->mfunc.master.slave_state);
+err_comm:
+	iounmap(priv->mfunc.comm);
+err_vhcr:
+	dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
+					     priv->mfunc.vhcr,
+					     priv->mfunc.vhcr_dma);
+	priv->mfunc.vhcr = NULL;
+	return -ENOMEM;
+}
+
 int mlx4_cmd_init(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -1425,6 +1572,27 @@ err_hcr:
 	return -ENOMEM;
 }
 
+void mlx4_multi_func_cleanup(struct mlx4_dev *dev)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	int i, port;
+
+	if (mlx4_is_master(dev)) {
+		flush_workqueue(priv->mfunc.master.comm_wq);
+		destroy_workqueue(priv->mfunc.master.comm_wq);
+		for (i = 0; i < dev->num_slaves; i++) {
+			for (port = 1; port <= MLX4_MAX_PORTS; port++)
+				kfree(priv->mfunc.master.slave_state[i].vlan_filter[port]);
+		}
+		kfree(priv->mfunc.master.slave_state);
+		iounmap(priv->mfunc.comm);
+		dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
+						     priv->mfunc.vhcr,
+						     priv->mfunc.vhcr_dma);
+		priv->mfunc.vhcr = NULL;
+	}
+}
+
 void mlx4_cmd_cleanup(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 99415fec9fdb..f03b54e0aa53 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -1071,7 +1071,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 
 	/* UAR attributes */
 
-	MLX4_PUT(inbox, (u8) (PAGE_SHIFT - 12), INIT_HCA_UAR_PAGE_SZ_OFFSET);
+	MLX4_PUT(inbox, param->uar_page_sz,	INIT_HCA_UAR_PAGE_SZ_OFFSET);
 	MLX4_PUT(inbox, param->log_uar_sz,      INIT_HCA_LOG_UAR_SZ_OFFSET);
 
 	err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, 10000,
@@ -1084,6 +1084,72 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 	return err;
 }
 
+int mlx4_QUERY_HCA(struct mlx4_dev *dev,
+		   struct mlx4_init_hca_param *param)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	__be32 *outbox;
+	int err;
+
+#define QUERY_HCA_GLOBAL_CAPS_OFFSET	0x04
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+	outbox = mailbox->buf;
+
+	err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 0,
+			   MLX4_CMD_QUERY_HCA,
+			   MLX4_CMD_TIME_CLASS_B,
+			   !mlx4_is_slave(dev));
+	if (err)
+		goto out;
+
+	MLX4_GET(param->global_caps, outbox, QUERY_HCA_GLOBAL_CAPS_OFFSET);
+
+	/* QPC/EEC/CQC/EQC/RDMARC attributes */
+
+	MLX4_GET(param->qpc_base,      outbox, INIT_HCA_QPC_BASE_OFFSET);
+	MLX4_GET(param->log_num_qps,   outbox, INIT_HCA_LOG_QP_OFFSET);
+	MLX4_GET(param->srqc_base,     outbox, INIT_HCA_SRQC_BASE_OFFSET);
+	MLX4_GET(param->log_num_srqs,  outbox, INIT_HCA_LOG_SRQ_OFFSET);
+	MLX4_GET(param->cqc_base,      outbox, INIT_HCA_CQC_BASE_OFFSET);
+	MLX4_GET(param->log_num_cqs,   outbox, INIT_HCA_LOG_CQ_OFFSET);
+	MLX4_GET(param->altc_base,     outbox, INIT_HCA_ALTC_BASE_OFFSET);
+	MLX4_GET(param->auxc_base,     outbox, INIT_HCA_AUXC_BASE_OFFSET);
+	MLX4_GET(param->eqc_base,      outbox, INIT_HCA_EQC_BASE_OFFSET);
+	MLX4_GET(param->log_num_eqs,   outbox, INIT_HCA_LOG_EQ_OFFSET);
+	MLX4_GET(param->rdmarc_base,   outbox, INIT_HCA_RDMARC_BASE_OFFSET);
+	MLX4_GET(param->log_rd_per_qp, outbox, INIT_HCA_LOG_RD_OFFSET);
+
+	/* multicast attributes */
+
+	MLX4_GET(param->mc_base,         outbox, INIT_HCA_MC_BASE_OFFSET);
+	MLX4_GET(param->log_mc_entry_sz, outbox,
+		 INIT_HCA_LOG_MC_ENTRY_SZ_OFFSET);
+	MLX4_GET(param->log_mc_hash_sz,  outbox,
+		 INIT_HCA_LOG_MC_HASH_SZ_OFFSET);
+	MLX4_GET(param->log_mc_table_sz, outbox,
+		 INIT_HCA_LOG_MC_TABLE_SZ_OFFSET);
+
+	/* TPT attributes */
+
+	MLX4_GET(param->dmpt_base,  outbox, INIT_HCA_DMPT_BASE_OFFSET);
+	MLX4_GET(param->log_mpt_sz, outbox, INIT_HCA_LOG_MPT_SZ_OFFSET);
+	MLX4_GET(param->mtt_base,   outbox, INIT_HCA_MTT_BASE_OFFSET);
+	MLX4_GET(param->cmpt_base,  outbox, INIT_HCA_CMPT_BASE_OFFSET);
+
+	/* UAR attributes */
+
+	MLX4_GET(param->uar_page_sz, outbox, INIT_HCA_UAR_PAGE_SZ_OFFSET);
+	MLX4_GET(param->log_uar_sz, outbox, INIT_HCA_LOG_UAR_SZ_OFFSET);
+
+out:
+	mlx4_free_cmd_mailbox(dev, mailbox);
+
+	return err;
+}
+
 int mlx4_INIT_PORT_wrapper(struct mlx4_dev *dev, int slave,
 			   struct mlx4_vhcr *vhcr,
 			   struct mlx4_cmd_mailbox *inbox,
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 8f0f4cf7d2c0..3368363a8ec5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -161,6 +161,7 @@ struct mlx4_init_hca_param {
 	u8  log_mc_table_sz;
 	u8  log_mpt_sz;
 	u8  log_uar_sz;
+	u8  uar_page_sz; /* log pg sz in 4k chunks */
 };
 
 struct mlx4_init_ib_param {
@@ -197,6 +198,7 @@ int mlx4_RUN_FW(struct mlx4_dev *dev);
 int mlx4_QUERY_FW(struct mlx4_dev *dev);
 int mlx4_QUERY_ADAPTER(struct mlx4_dev *dev, struct mlx4_adapter *adapter);
 int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param);
+int mlx4_QUERY_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param);
 int mlx4_CLOSE_HCA(struct mlx4_dev *dev, int panic);
 int mlx4_map_cmd(struct mlx4_dev *dev, u16 op, struct mlx4_icm *icm, u64 virt);
 int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages);
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 19363b618295..b969bfb569e3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -40,6 +40,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
 #include <linux/io-mapping.h>
+#include <linux/delay.h>
 
 #include <linux/mlx4/device.h>
 #include <linux/mlx4/doorbell.h>
@@ -75,6 +76,14 @@ MODULE_PARM_DESC(msi_x, "attempt to use MSI-X if nonzero");
 
 #endif /* CONFIG_PCI_MSI */
 
+static int num_vfs;
+module_param(num_vfs, int, 0444);
+MODULE_PARM_DESC(num_vfs, "enable #num_vfs functions if num_vfs > 0");
+
+static int probe_vf;
+module_param(probe_vf, int, 0644);
+MODULE_PARM_DESC(probe_vf, "number of vfs to probe by pf driver (num_vfs > 0)");
+
 int mlx4_log_num_mgm_entry_size = 10;
 module_param_named(log_num_mgm_entry_size,
 			mlx4_log_num_mgm_entry_size, int, 0444);
@@ -83,21 +92,26 @@ MODULE_PARM_DESC(log_num_mgm_entry_size, "log mgm size, that defines the num"
 					 " 10 gives 248.range: 9<="
 					 " log_num_mgm_entry_size <= 12");
 
+#define MLX4_VF                                        (1 << 0)
+
+#define HCA_GLOBAL_CAP_MASK            0
+#define PF_CONTEXT_BEHAVIOUR_MASK      0
+
 static char mlx4_version[] __devinitdata =
 	DRV_NAME ": Mellanox ConnectX core driver v"
 	DRV_VERSION " (" DRV_RELDATE ")\n";
 
 static struct mlx4_profile default_profile = {
-	.num_qp		= 1 << 17,
+	.num_qp		= 1 << 18,
 	.num_srq	= 1 << 16,
 	.rdmarc_per_qp	= 1 << 4,
 	.num_cq		= 1 << 16,
 	.num_mcg	= 1 << 13,
-	.num_mpt	= 1 << 17,
+	.num_mpt	= 1 << 19,
 	.num_mtt	= 1 << 20,
 };
 
-static int log_num_mac = 2;
+static int log_num_mac = 7;
 module_param_named(log_num_mac, log_num_mac, int, 0444);
 MODULE_PARM_DESC(log_num_mac, "Log2 max number of MACs per ETH port (1-7)");
 
@@ -116,6 +130,23 @@ int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG);
 module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
 MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-7)");
 
+static int port_type_array[2] = {1, 1};
+static int arr_argc = 2;
+module_param_array(port_type_array, int, &arr_argc, 0444);
+MODULE_PARM_DESC(port_type_array, "Array of port types: IB by default");
+
+struct mlx4_port_config {
+	struct list_head list;
+	enum mlx4_port_type port_type[MLX4_MAX_PORTS + 1];
+	struct pci_dev *pdev;
+};
+
+static inline int mlx4_master_get_num_eqs(struct mlx4_dev *dev)
+{
+	return dev->caps.reserved_eqs +
+		MLX4_MFUNC_EQ_NUM * (dev->num_slaves + 1);
+}
+
 int mlx4_check_port_params(struct mlx4_dev *dev,
 			   enum mlx4_port_type *port_type)
 {
@@ -200,6 +231,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		dev->caps.trans_code[i]     = dev_cap->trans_code[i];
 	}
 
+	dev->caps.uar_page_size	     = PAGE_SIZE;
 	dev->caps.num_uars	     = dev_cap->uar_size / PAGE_SIZE;
 	dev->caps.local_ca_ack_delay = dev_cap->local_ca_ack_delay;
 	dev->caps.bf_reg_size	     = dev_cap->bf_reg_size;
@@ -224,7 +256,9 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev->caps.reserved_eqs	     = dev_cap->reserved_eqs;
 	dev->caps.reserved_mtts      = dev_cap->reserved_mtts;
 	dev->caps.reserved_mrws	     = dev_cap->reserved_mrws;
-	dev->caps.reserved_uars	     = dev_cap->reserved_uars;
+
+	/* The first 128 UARs are used for EQ doorbells */
+	dev->caps.reserved_uars	     = max_t(int, 128, dev_cap->reserved_uars);
 	dev->caps.reserved_pds	     = dev_cap->reserved_pds;
 	dev->caps.reserved_xrcds     = (dev->caps.flags & MLX4_DEV_CAP_FLAG_XRC) ?
 					dev_cap->reserved_xrcds : 0;
@@ -245,10 +279,36 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev->caps.log_num_prios = use_prio ? 3 : 0;
 
 	for (i = 1; i <= dev->caps.num_ports; ++i) {
-		if (dev->caps.supported_type[i] != MLX4_PORT_TYPE_ETH)
-			dev->caps.port_type[i] = MLX4_PORT_TYPE_IB;
-		else
-			dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
+		dev->caps.port_type[i] = MLX4_PORT_TYPE_NONE;
+		if (dev->caps.supported_type[i]) {
+			/* if only ETH is supported - assign ETH */
+			if (dev->caps.supported_type[i] == MLX4_PORT_TYPE_ETH)
+				dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
+			/* if only IB is supported,
+			 * assign IB only if SRIOV is off*/
+			else if (dev->caps.supported_type[i] ==
+				 MLX4_PORT_TYPE_IB) {
+				if (dev->flags & MLX4_FLAG_SRIOV)
+					dev->caps.port_type[i] =
+						MLX4_PORT_TYPE_NONE;
+				else
+					dev->caps.port_type[i] =
+						MLX4_PORT_TYPE_IB;
+			/* if IB and ETH are supported,
+			 * first of all check if SRIOV is on */
+			} else if (dev->flags & MLX4_FLAG_SRIOV)
+				dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
+			/* if IB and ETH are supported and SRIOV is off
+			 * use module parameters */
+			else {
+				if (port_type_array[i-1])
+					dev->caps.port_type[i] =
+						MLX4_PORT_TYPE_IB;
+				else
+					dev->caps.port_type[i] =
+						MLX4_PORT_TYPE_ETH;
+			}
+		}
 		dev->caps.possible_type[i] = dev->caps.port_type[i];
 		mlx4_priv(dev)->sense.sense_allowed[i] =
 			dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO;
@@ -267,8 +327,6 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		}
 	}
 
-	mlx4_set_port_mask(dev);
-
 	dev->caps.max_counters = 1 << ilog2(dev_cap->max_counters);
 
 	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] = dev_cap->reserved_qps;
@@ -287,6 +345,149 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 
 	return 0;
 }
+/*The function checks if there are live vf, return the num of them*/
+static int mlx4_how_many_lives_vf(struct mlx4_dev *dev)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	struct mlx4_slave_state *s_state;
+	int i;
+	int ret = 0;
+
+	for (i = 1/*the ppf is 0*/; i < dev->num_slaves; ++i) {
+		s_state = &priv->mfunc.master.slave_state[i];
+		if (s_state->active && s_state->last_cmd !=
+		    MLX4_COMM_CMD_RESET) {
+			mlx4_warn(dev, "%s: slave: %d is still active\n",
+				  __func__, i);
+			ret++;
+		}
+	}
+	return ret;
+}
+
+static int mlx4_is_slave_active(struct mlx4_dev *dev, int slave)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	struct mlx4_slave_state *s_slave;
+
+	if (!mlx4_is_master(dev))
+		return 0;
+
+	s_slave = &priv->mfunc.master.slave_state[slave];
+	return !!s_slave->active;
+}
+EXPORT_SYMBOL(mlx4_is_slave_active);
+
+static int mlx4_slave_cap(struct mlx4_dev *dev)
+{
+	int			   err;
+	u32			   page_size;
+	struct mlx4_dev_cap	   dev_cap;
+	struct mlx4_func_cap	   func_cap;
+	struct mlx4_init_hca_param hca_param;
+	int			   i;
+
+	memset(&hca_param, 0, sizeof(hca_param));
+	err = mlx4_QUERY_HCA(dev, &hca_param);
+	if (err) {
+		mlx4_err(dev, "QUERY_HCA command failed, aborting.\n");
+		return err;
+	}
+
+	/*fail if the hca has an unknown capability */
+	if ((hca_param.global_caps | HCA_GLOBAL_CAP_MASK) !=
+	    HCA_GLOBAL_CAP_MASK) {
+		mlx4_err(dev, "Unknown hca global capabilities\n");
+		return -ENOSYS;
+	}
+
+	mlx4_log_num_mgm_entry_size = hca_param.log_mc_entry_sz;
+
+	memset(&dev_cap, 0, sizeof(dev_cap));
+	err = mlx4_dev_cap(dev, &dev_cap);
+	if (err) {
+		mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
+		return err;
+	}
+
+	page_size = ~dev->caps.page_size_cap + 1;
+	mlx4_warn(dev, "HCA minimum page size:%d\n", page_size);
+	if (page_size > PAGE_SIZE) {
+		mlx4_err(dev, "HCA minimum page size of %d bigger than "
+			 "kernel PAGE_SIZE of %ld, aborting.\n",
+			 page_size, PAGE_SIZE);
+		return -ENODEV;
+	}
+
+	/* slave gets uar page size from QUERY_HCA fw command */
+	dev->caps.uar_page_size = 1 << (hca_param.uar_page_sz + 12);
+
+	/* TODO: relax this assumption */
+	if (dev->caps.uar_page_size != PAGE_SIZE) {
+		mlx4_err(dev, "UAR size:%d != kernel PAGE_SIZE of %ld\n",
+			 dev->caps.uar_page_size, PAGE_SIZE);
+		return -ENODEV;
+	}
+
+	memset(&func_cap, 0, sizeof(func_cap));
+	err = mlx4_QUERY_FUNC_CAP(dev, &func_cap);
+	if (err) {
+		mlx4_err(dev, "QUERY_FUNC_CAP command failed, aborting.\n");
+		return err;
+	}
+
+	if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) !=
+	    PF_CONTEXT_BEHAVIOUR_MASK) {
+		mlx4_err(dev, "Unknown pf context behaviour\n");
+		return -ENOSYS;
+	}
+
+	dev->caps.function		= func_cap.function;
+	dev->caps.num_ports		= func_cap.num_ports;
+	dev->caps.num_qps		= func_cap.qp_quota;
+	dev->caps.num_srqs		= func_cap.srq_quota;
+	dev->caps.num_cqs		= func_cap.cq_quota;
+	dev->caps.num_eqs               = func_cap.max_eq;
+	dev->caps.reserved_eqs          = func_cap.reserved_eq;
+	dev->caps.num_mpts		= func_cap.mpt_quota;
+	dev->caps.num_mtts		= func_cap.mtt_quota;
+	dev->caps.num_pds               = MLX4_NUM_PDS;
+	dev->caps.num_mgms              = 0;
+	dev->caps.num_amgms             = 0;
+
+	for (i = 1; i <= dev->caps.num_ports; ++i)
+		dev->caps.port_mask[i] = dev->caps.port_type[i];
+
+	if (dev->caps.num_ports > MLX4_MAX_PORTS) {
+		mlx4_err(dev, "HCA has %d ports, but we only support %d, "
+			 "aborting.\n", dev->caps.num_ports, MLX4_MAX_PORTS);
+		return -ENODEV;
+	}
+
+	if (dev->caps.uar_page_size * (dev->caps.num_uars -
+				       dev->caps.reserved_uars) >
+				       pci_resource_len(dev->pdev, 2)) {
+		mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than "
+			 "PCI resource 2 size of 0x%llx, aborting.\n",
+			 dev->caps.uar_page_size * dev->caps.num_uars,
+			 (unsigned long long) pci_resource_len(dev->pdev, 2));
+		return -ENODEV;
+	}
+
+#if 0
+	mlx4_warn(dev, "sqp_demux:%d\n", dev->caps.sqp_demux);
+	mlx4_warn(dev, "num_uars:%d reserved_uars:%d uar region:0x%x bar2:0x%llx\n",
+		  dev->caps.num_uars, dev->caps.reserved_uars,
+		  dev->caps.uar_page_size * dev->caps.num_uars,
+		  pci_resource_len(dev->pdev, 2));
+	mlx4_warn(dev, "num_eqs:%d reserved_eqs:%d\n", dev->caps.num_eqs,
+		  dev->caps.reserved_eqs);
+	mlx4_warn(dev, "num_pds:%d reserved_pds:%d slave_pd_shift:%d pd_base:%d\n",
+		  dev->caps.num_pds, dev->caps.reserved_pds,
+		  dev->caps.slave_pd_shift, dev->caps.pd_base);
+#endif
+	return 0;
+}
 
 /*
  * Change the port configuration of the device.
@@ -456,6 +657,7 @@ static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	int err;
+	int num_eqs;
 
 	err = mlx4_init_icm_table(dev, &priv->qp_table.cmpt_table,
 				  cmpt_base +
@@ -485,12 +687,14 @@ static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
 	if (err)
 		goto err_srq;
 
+	num_eqs = (mlx4_is_master(dev)) ?
+		roundup_pow_of_two(mlx4_master_get_num_eqs(dev)) :
+		dev->caps.num_eqs;
 	err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table,
 				  cmpt_base +
 				  ((u64) (MLX4_CMPT_TYPE_EQ *
 					  cmpt_entry_sz) << MLX4_CMPT_SHIFT),
-				  cmpt_entry_sz,
-				  dev->caps.num_eqs, dev->caps.num_eqs, 0, 0);
+				  cmpt_entry_sz, num_eqs, num_eqs, 0, 0);
 	if (err)
 		goto err_cq;
 
@@ -514,6 +718,7 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	u64 aux_pages;
+	int num_eqs;
 	int err;
 
 	err = mlx4_SET_ICM_SIZE(dev, icm_size, &aux_pages);
@@ -545,10 +750,13 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
 		goto err_unmap_aux;
 	}
 
+
+	num_eqs = (mlx4_is_master(dev)) ?
+		roundup_pow_of_two(mlx4_master_get_num_eqs(dev)) :
+		dev->caps.num_eqs;
 	err = mlx4_init_icm_table(dev, &priv->eq_table.table,
 				  init_hca->eqc_base, dev_cap->eqc_entry_sz,
-				  dev->caps.num_eqs, dev->caps.num_eqs,
-				  0, 0);
+				  num_eqs, num_eqs, 0, 0);
 	if (err) {
 		mlx4_err(dev, "Failed to map EQ context memory, aborting.\n");
 		goto err_unmap_cmpt;
@@ -732,6 +940,16 @@ static void mlx4_free_icms(struct mlx4_dev *dev)
 	mlx4_free_icm(dev, priv->fw.aux_icm, 0);
 }
 
+static void mlx4_slave_exit(struct mlx4_dev *dev)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+
+	down(&priv->cmd.slave_sem);
+	if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, MLX4_COMM_TIME))
+		mlx4_warn(dev, "Failed to close slave function.\n");
+	up(&priv->cmd.slave_sem);
+}
+
 static int map_bf_area(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -739,8 +957,10 @@ static int map_bf_area(struct mlx4_dev *dev)
 	resource_size_t bf_len;
 	int err = 0;
 
-	bf_start = pci_resource_start(dev->pdev, 2) + (dev->caps.num_uars << PAGE_SHIFT);
-	bf_len = pci_resource_len(dev->pdev, 2) - (dev->caps.num_uars << PAGE_SHIFT);
+	bf_start = pci_resource_start(dev->pdev, 2) +
+			(dev->caps.num_uars << PAGE_SHIFT);
+	bf_len = pci_resource_len(dev->pdev, 2) -
+			(dev->caps.num_uars << PAGE_SHIFT);
 	priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len);
 	if (!priv->bf_mapping)
 		err = -ENOMEM;
@@ -757,10 +977,81 @@ static void unmap_bf_area(struct mlx4_dev *dev)
 static void mlx4_close_hca(struct mlx4_dev *dev)
 {
 	unmap_bf_area(dev);
-	mlx4_CLOSE_HCA(dev, 0);
-	mlx4_free_icms(dev);
-	mlx4_UNMAP_FA(dev);
-	mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
+	if (mlx4_is_slave(dev))
+		mlx4_slave_exit(dev);
+	else {
+		mlx4_CLOSE_HCA(dev, 0);
+		mlx4_free_icms(dev);
+		mlx4_UNMAP_FA(dev);
+		mlx4_free_icm(dev, mlx4_priv(dev)->fw.fw_icm, 0);
+	}
+}
+
+static int mlx4_init_slave(struct mlx4_dev *dev)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	u64 dma = (u64) priv->mfunc.vhcr_dma;
+	int num_of_reset_retries = NUM_OF_RESET_RETRIES;
+	int ret_from_reset = 0;
+	u32 slave_read;
+	u32 cmd_channel_ver;
+
+	down(&priv->cmd.slave_sem);
+	priv->cmd.max_cmds = 1;
+	mlx4_warn(dev, "Sending reset\n");
+	ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0,
+				       MLX4_COMM_TIME);
+	/* if we are in the middle of flr the slave will try
+	 * NUM_OF_RESET_RETRIES times before leaving.*/
+	if (ret_from_reset) {
+		if (MLX4_DELAY_RESET_SLAVE == ret_from_reset) {
+			msleep(SLEEP_TIME_IN_RESET);
+			while (ret_from_reset && num_of_reset_retries) {
+				mlx4_warn(dev, "slave is currently in the"
+					  "middle of FLR. retrying..."
+					  "(try num:%d)\n",
+					  (NUM_OF_RESET_RETRIES -
+					   num_of_reset_retries  + 1));
+				ret_from_reset =
+					mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET,
+						      0, MLX4_COMM_TIME);
+				num_of_reset_retries = num_of_reset_retries - 1;
+			}
+		} else
+			goto err;
+	}
+
+	/* check the driver version - the slave I/F revision
+	 * must match the master's */
+	slave_read = swab32(readl(&priv->mfunc.comm->slave_read));
+	cmd_channel_ver = mlx4_comm_get_version();
+
+	if (MLX4_COMM_GET_IF_REV(cmd_channel_ver) !=
+		MLX4_COMM_GET_IF_REV(slave_read)) {
+		mlx4_err(dev, "slave driver version is not supported"
+			 " by the master\n");
+		goto err;
+	}
+
+	mlx4_warn(dev, "Sending vhcr0\n");
+	if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR0, dma >> 48,
+						    MLX4_COMM_TIME))
+		goto err;
+	if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR1, dma >> 32,
+						    MLX4_COMM_TIME))
+		goto err;
+	if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR2, dma >> 16,
+						    MLX4_COMM_TIME))
+		goto err;
+	if (mlx4_comm_cmd(dev, MLX4_COMM_CMD_VHCR_EN, dma, MLX4_COMM_TIME))
+		goto err;
+	up(&priv->cmd.slave_sem);
+	return 0;
+
+err:
+	mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0);
+	up(&priv->cmd.slave_sem);
+	return -EIO;
 }
 
 static int mlx4_init_hca(struct mlx4_dev *dev)
@@ -774,56 +1065,76 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
 	u64 icm_size;
 	int err;
 
-	err = mlx4_QUERY_FW(dev);
-	if (err) {
-		if (err == -EACCES)
-			mlx4_info(dev, "non-primary physical function, skipping.\n");
-		else
-			mlx4_err(dev, "QUERY_FW command failed, aborting.\n");
-		return err;
-	}
+	if (!mlx4_is_slave(dev)) {
+		err = mlx4_QUERY_FW(dev);
+		if (err) {
+			if (err == -EACCES)
+				mlx4_info(dev, "non-primary physical function, skipping.\n");
+			else
+				mlx4_err(dev, "QUERY_FW command failed, aborting.\n");
+			goto unmap_bf;
+		}
 
-	err = mlx4_load_fw(dev);
-	if (err) {
-		mlx4_err(dev, "Failed to start FW, aborting.\n");
-		return err;
-	}
+		err = mlx4_load_fw(dev);
+		if (err) {
+			mlx4_err(dev, "Failed to start FW, aborting.\n");
+			goto unmap_bf;
+		}
 
-	mlx4_cfg.log_pg_sz_m = 1;
-	mlx4_cfg.log_pg_sz = 0;
-	err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
-	if (err)
-		mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
+		mlx4_cfg.log_pg_sz_m = 1;
+		mlx4_cfg.log_pg_sz = 0;
+		err = mlx4_MOD_STAT_CFG(dev, &mlx4_cfg);
+		if (err)
+			mlx4_warn(dev, "Failed to override log_pg_sz parameter\n");
 
-	err = mlx4_dev_cap(dev, &dev_cap);
-	if (err) {
-		mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
-		goto err_stop_fw;
-	}
+		err = mlx4_dev_cap(dev, &dev_cap);
+		if (err) {
+			mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
+			goto err_stop_fw;
+		}
 
-	profile = default_profile;
+		profile = default_profile;
 
-	icm_size = mlx4_make_profile(dev, &profile, &dev_cap, &init_hca);
-	if ((long long) icm_size < 0) {
-		err = icm_size;
-		goto err_stop_fw;
-	}
+		icm_size = mlx4_make_profile(dev, &profile, &dev_cap,
+					     &init_hca);
+		if ((long long) icm_size < 0) {
+			err = icm_size;
+			goto err_stop_fw;
+		}
 
-	if (map_bf_area(dev))
-		mlx4_dbg(dev, "Failed to map blue flame area\n");
+		init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
+		init_hca.uar_page_sz = PAGE_SHIFT - 12;
 
-	init_hca.log_uar_sz = ilog2(dev->caps.num_uars);
+		err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size);
+		if (err)
+			goto err_stop_fw;
 
-	err = mlx4_init_icm(dev, &dev_cap, &init_hca, icm_size);
-	if (err)
-		goto err_stop_fw;
+		err = mlx4_INIT_HCA(dev, &init_hca);
+		if (err) {
+			mlx4_err(dev, "INIT_HCA command failed, aborting.\n");
+			goto err_free_icm;
+		}
+	} else {
+		err = mlx4_init_slave(dev);
+		if (err) {
+			mlx4_err(dev, "Failed to initialize slave\n");
+			goto unmap_bf;
+		}
 
-	err = mlx4_INIT_HCA(dev, &init_hca);
-	if (err) {
-		mlx4_err(dev, "INIT_HCA command failed, aborting.\n");
-		goto err_free_icm;
+		err = mlx4_slave_cap(dev);
+		if (err) {
+			mlx4_err(dev, "Failed to obtain slave caps\n");
+			goto err_close;
+		}
 	}
 
+	if (map_bf_area(dev))
+		mlx4_dbg(dev, "Failed to map blue flame area\n");
+
+	/*Only the master set the ports, all the rest got it from it.*/
+	if (!mlx4_is_slave(dev))
+		mlx4_set_port_mask(dev);
+
 	err = mlx4_QUERY_ADAPTER(dev, &adapter);
 	if (err) {
 		mlx4_err(dev, "QUERY_ADAPTER command failed, aborting.\n");
@@ -836,16 +1147,19 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
 	return 0;
 
 err_close:
-	mlx4_CLOSE_HCA(dev, 0);
+	mlx4_close_hca(dev);
 
 err_free_icm:
-	mlx4_free_icms(dev);
+	if (!mlx4_is_slave(dev))
+		mlx4_free_icms(dev);
 
 err_stop_fw:
+	if (!mlx4_is_slave(dev)) {
+		mlx4_UNMAP_FA(dev);
+		mlx4_free_icm(dev, priv->fw.fw_icm, 0);
+	}
+unmap_bf:
 	unmap_bf_area(dev);
-	mlx4_UNMAP_FA(dev);
-	mlx4_free_icm(dev, priv->fw.fw_icm, 0);
-
 	return err;
 }
 
@@ -992,55 +1306,62 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
 		goto err_srq_table_free;
 	}
 
-	err = mlx4_init_mcg_table(dev);
-	if (err) {
-		mlx4_err(dev, "Failed to initialize "
-			 "multicast group table, aborting.\n");
-		goto err_qp_table_free;
+	if (!mlx4_is_slave(dev)) {
+		err = mlx4_init_mcg_table(dev);
+		if (err) {
+			mlx4_err(dev, "Failed to initialize "
+				 "multicast group table, aborting.\n");
+			goto err_qp_table_free;
+		}
 	}
 
 	err = mlx4_init_counters_table(dev);
 	if (err && err != -ENOENT) {
 		mlx4_err(dev, "Failed to initialize counters table, aborting.\n");
-		goto err_counters_table_free;
+		goto err_mcg_table_free;
 	}
 
-	for (port = 1; port <= dev->caps.num_ports; port++) {
-		enum mlx4_port_type port_type = 0;
-		mlx4_SENSE_PORT(dev, port, &port_type);
-		if (port_type)
-			dev->caps.port_type[port] = port_type;
-		ib_port_default_caps = 0;
-		err = mlx4_get_port_ib_caps(dev, port, &ib_port_default_caps);
-		if (err)
-			mlx4_warn(dev, "failed to get port %d default "
-				  "ib capabilities (%d). Continuing with "
-				  "caps = 0\n", port, err);
-		dev->caps.ib_port_def_cap[port] = ib_port_default_caps;
-
-		err = mlx4_check_ext_port_caps(dev, port);
-		if (err)
-			mlx4_warn(dev, "failed to get port %d extended "
-				  "port capabilities support info (%d)."
-				  " Assuming not supported\n", port, err);
+	if (!mlx4_is_slave(dev)) {
+		for (port = 1; port <= dev->caps.num_ports; port++) {
+			if (!mlx4_is_mfunc(dev)) {
+				enum mlx4_port_type port_type = 0;
+				mlx4_SENSE_PORT(dev, port, &port_type);
+				if (port_type)
+					dev->caps.port_type[port] = port_type;
+			}
+			ib_port_default_caps = 0;
+			err = mlx4_get_port_ib_caps(dev, port,
+						    &ib_port_default_caps);
+			if (err)
+				mlx4_warn(dev, "failed to get port %d default "
+					  "ib capabilities (%d). Continuing "
+					  "with caps = 0\n", port, err);
+			dev->caps.ib_port_def_cap[port] = ib_port_default_caps;
+
+			err = mlx4_check_ext_port_caps(dev, port);
+			if (err)
+				mlx4_warn(dev, "failed to get port %d extended "
+					  "port capabilities support info (%d)."
+					  " Assuming not supported\n",
+					  port, err);
 
-		err = mlx4_SET_PORT(dev, port);
-		if (err) {
-			mlx4_err(dev, "Failed to set port %d, aborting\n",
-				port);
-			goto err_mcg_table_free;
+			err = mlx4_SET_PORT(dev, port);
+			if (err) {
+				mlx4_err(dev, "Failed to set port %d, aborting\n",
+					port);
+				goto err_counters_table_free;
+			}
 		}
 	}
-	mlx4_set_port_mask(dev);
 
 	return 0;
 
-err_mcg_table_free:
-	mlx4_cleanup_mcg_table(dev);
-
 err_counters_table_free:
 	mlx4_cleanup_counters_table(dev);
 
+err_mcg_table_free:
+	mlx4_cleanup_mcg_table(dev);
+
 err_qp_table_free:
 	mlx4_cleanup_qp_table(dev);
 
@@ -1087,8 +1408,16 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev)
 	int i;
 
 	if (msi_x) {
-		nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs,
-			     nreq);
+		/* In multifunction mode each function gets 2 msi-X vectors
+		 * one for data path completions anf the other for asynch events
+		 * or command completions */
+		if (mlx4_is_mfunc(dev)) {
+			nreq = 2;
+		} else {
+			nreq = min_t(int, dev->caps.num_eqs -
+				     dev->caps.reserved_eqs, nreq);
+		}
+
 		entries = kcalloc(nreq, sizeof *entries, GFP_KERNEL);
 		if (!entries)
 			goto no_msi;
@@ -1144,16 +1473,24 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
 
 	info->dev = dev;
 	info->port = port;
-	mlx4_init_mac_table(dev, &info->mac_table);
-	mlx4_init_vlan_table(dev, &info->vlan_table);
-	info->base_qpn = dev->caps.reserved_qps_base[MLX4_QP_REGION_ETH_ADDR] +
+	if (!mlx4_is_slave(dev)) {
+		INIT_RADIX_TREE(&info->mac_tree, GFP_KERNEL);
+		mlx4_init_mac_table(dev, &info->mac_table);
+		mlx4_init_vlan_table(dev, &info->vlan_table);
+		info->base_qpn =
+			dev->caps.reserved_qps_base[MLX4_QP_REGION_ETH_ADDR] +
 			(port - 1) * (1 << log_num_mac);
+	}
 
 	sprintf(info->dev_name, "mlx4_port%d", port);
 	info->port_attr.attr.name = info->dev_name;
-	info->port_attr.attr.mode = S_IRUGO | S_IWUSR;
+	if (mlx4_is_mfunc(dev))
+		info->port_attr.attr.mode = S_IRUGO;
+	else {
+		info->port_attr.attr.mode = S_IRUGO | S_IWUSR;
+		info->port_attr.store     = set_port_type;
+	}
 	info->port_attr.show      = show_port_type;
-	info->port_attr.store     = set_port_type;
 	sysfs_attr_init(&info->port_attr.attr);
 
 	err = device_create_file(&dev->pdev->dev, &info->port_attr);
@@ -1226,6 +1563,46 @@ static void mlx4_clear_steering(struct mlx4_dev *dev)
 	kfree(priv->steer);
 }
 
+static int extended_func_num(struct pci_dev *pdev)
+{
+	return PCI_SLOT(pdev->devfn) * 8 + PCI_FUNC(pdev->devfn);
+}
+
+#define MLX4_OWNER_BASE	0x8069c
+#define MLX4_OWNER_SIZE	4
+
+static int mlx4_get_ownership(struct mlx4_dev *dev)
+{
+	void __iomem *owner;
+	u32 ret;
+
+	owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE,
+			MLX4_OWNER_SIZE);
+	if (!owner) {
+		mlx4_err(dev, "Failed to obtain ownership bit\n");
+		return -ENOMEM;
+	}
+
+	ret = readl(owner);
+	iounmap(owner);
+	return (int) !!ret;
+}
+
+static void mlx4_free_ownership(struct mlx4_dev *dev)
+{
+	void __iomem *owner;
+
+	owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE,
+			MLX4_OWNER_SIZE);
+	if (!owner) {
+		mlx4_err(dev, "Failed to obtain ownership bit\n");
+		return;
+	}
+	writel(0, owner);
+	msleep(1000);
+	iounmap(owner);
+}
+
 static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct mlx4_priv *priv;
@@ -1241,13 +1618,20 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 			"aborting.\n");
 		return err;
 	}
-
+	if (num_vfs > MLX4_MAX_NUM_VF) {
+		printk(KERN_ERR "There are more VF's (%d) than allowed(%d)\n",
+		       num_vfs, MLX4_MAX_NUM_VF);
+		return -EINVAL;
+	}
 	/*
-	 * Check for BARs.  We expect 0: 1MB
+	 * Check for BARs.
 	 */
-	if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) ||
-	    pci_resource_len(pdev, 0) != 1 << 20) {
-		dev_err(&pdev->dev, "Missing DCS, aborting.\n");
+	if (((id == NULL) || !(id->driver_data & MLX4_VF)) &&
+	    !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
+		dev_err(&pdev->dev, "Missing DCS, aborting."
+			"(id == 0X%p, id->driver_data: 0x%lx,"
+			" pci_resource_flags(pdev, 0):0x%lx)\n", id,
+			id ? id->driver_data : 0, pci_resource_flags(pdev, 0));
 		err = -ENODEV;
 		goto err_disable_pdev;
 	}
@@ -1311,42 +1695,132 @@ static int __mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	mutex_init(&priv->bf_mutex);
 
 	dev->rev_id = pdev->revision;
+	/* Detect if this device is a virtual function */
+	if (id && id->driver_data & MLX4_VF) {
+		/* When acting as pf, we normally skip vfs unless explicitly
+		 * requested to probe them. */
+		if (num_vfs && extended_func_num(pdev) > probe_vf) {
+			mlx4_warn(dev, "Skipping virtual function:%d\n",
+						extended_func_num(pdev));
+			err = -ENODEV;
+			goto err_free_dev;
+		}
+		mlx4_warn(dev, "Detected virtual function - running in slave mode\n");
+		dev->flags |= MLX4_FLAG_SLAVE;
+	} else {
+		/* We reset the device and enable SRIOV only for physical
+		 * devices.  Try to claim ownership on the device;
+		 * if already taken, skip -- do not allow multiple PFs */
+		err = mlx4_get_ownership(dev);
+		if (err) {
+			if (err < 0)
+				goto err_free_dev;
+			else {
+				mlx4_warn(dev, "Multiple PFs not yet supported."
+					  " Skipping PF.\n");
+				err = -EINVAL;
+				goto err_free_dev;
+			}
+		}
 
-	/*
-	 * Now reset the HCA before we touch the PCI capabilities or
-	 * attempt a firmware command, since a boot ROM may have left
-	 * the HCA in an undefined state.
-	 */
-	err = mlx4_reset(dev);
-	if (err) {
-		mlx4_err(dev, "Failed to reset HCA, aborting.\n");
-		goto err_free_dev;
+		if (num_vfs) {
+			mlx4_warn(dev, "Enabling sriov with:%d vfs\n", num_vfs);
+			err = pci_enable_sriov(pdev, num_vfs);
+			if (err) {
+				mlx4_err(dev, "Failed to enable sriov,"
+					 "continuing without sriov enabled"
+					 " (err = %d).\n", err);
+				num_vfs = 0;
+				err = 0;
+			} else {
+				mlx4_warn(dev, "Running in master mode\n");
+				dev->flags |= MLX4_FLAG_SRIOV |
+					      MLX4_FLAG_MASTER;
+				dev->num_vfs = num_vfs;
+			}
+		}
+
+		/*
+		 * Now reset the HCA before we touch the PCI capabilities or
+		 * attempt a firmware command, since a boot ROM may have left
+		 * the HCA in an undefined state.
+		 */
+		err = mlx4_reset(dev);
+		if (err) {
+			mlx4_err(dev, "Failed to reset HCA, aborting.\n");
+			goto err_rel_own;
+		}
 	}
 
+slave_start:
 	if (mlx4_cmd_init(dev)) {
 		mlx4_err(dev, "Failed to init command interface, aborting.\n");
-		goto err_free_dev;
+		goto err_sriov;
+	}
+
+	/* In slave functions, the communication channel must be initialized
+	 * before posting commands. Also, init num_slaves before calling
+	 * mlx4_init_hca */
+	if (mlx4_is_mfunc(dev)) {
+		if (mlx4_is_master(dev))
+			dev->num_slaves = MLX4_MAX_NUM_SLAVES;
+		else {
+			dev->num_slaves = 0;
+			if (mlx4_multi_func_init(dev)) {
+				mlx4_err(dev, "Failed to init slave mfunc"
+					 " interface, aborting.\n");
+				goto err_cmd;
+			}
+		}
 	}
 
 	err = mlx4_init_hca(dev);
-	if (err)
-		goto err_cmd;
+	if (err) {
+		if (err == -EACCES) {
+			/* Not primary Physical function
+			 * Running in slave mode */
+			mlx4_cmd_cleanup(dev);
+			dev->flags |= MLX4_FLAG_SLAVE;
+			dev->flags &= ~MLX4_FLAG_MASTER;
+			goto slave_start;
+		} else
+			goto err_mfunc;
+	}
+
+	/* In master functions, the communication channel must be initialized
+	 * after obtaining its address from fw */
+	if (mlx4_is_master(dev)) {
+		if (mlx4_multi_func_init(dev)) {
+			mlx4_err(dev, "Failed to init master mfunc"
+				 "interface, aborting.\n");
+			goto err_close;
+		}
+	}
 
 	err = mlx4_alloc_eq_table(dev);
 	if (err)
-		goto err_close;
+		goto err_master_mfunc;
 
 	priv->msix_ctl.pool_bm = 0;
 	spin_lock_init(&priv->msix_ctl.pool_lock);
 
 	mlx4_enable_msi_x(dev);
-
-	err = mlx4_init_steering(dev);
-	if (err)
+	if ((mlx4_is_mfunc(dev)) &&
+	    !(dev->flags & MLX4_FLAG_MSI_X)) {
+		mlx4_err(dev, "INTx is not supported in multi-function mode."
+			 " aborting.\n");
 		goto err_free_eq;
+	}
+
+	if (!mlx4_is_slave(dev)) {
+		err = mlx4_init_steering(dev);
+		if (err)
+			goto err_free_eq;
+	}
 
 	err = mlx4_setup_hca(dev);
-	if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X)) {
+	if (err == -EBUSY && (dev->flags & MLX4_FLAG_MSI_X) &&
+	    !mlx4_is_mfunc(dev)) {
 		dev->flags &= ~MLX4_FLAG_MSI_X;
 		pci_disable_msix(pdev);
 		err = mlx4_setup_hca(dev);
@@ -1389,20 +1863,37 @@ err_port:
 	mlx4_cleanup_uar_table(dev);
 
 err_steer:
-	mlx4_clear_steering(dev);
+	if (!mlx4_is_slave(dev))
+		mlx4_clear_steering(dev);
 
 err_free_eq:
 	mlx4_free_eq_table(dev);
 
+err_master_mfunc:
+	if (mlx4_is_master(dev))
+		mlx4_multi_func_cleanup(dev);
+
 err_close:
 	if (dev->flags & MLX4_FLAG_MSI_X)
 		pci_disable_msix(pdev);
 
 	mlx4_close_hca(dev);
 
+err_mfunc:
+	if (mlx4_is_slave(dev))
+		mlx4_multi_func_cleanup(dev);
+
 err_cmd:
 	mlx4_cmd_cleanup(dev);
 
+err_sriov:
+	if (num_vfs && (dev->flags & MLX4_FLAG_SRIOV))
+		pci_disable_sriov(pdev);
+
+err_rel_own:
+	if (!mlx4_is_slave(dev))
+		mlx4_free_ownership(dev);
+
 err_free_dev:
 	kfree(priv);
 
@@ -1430,6 +1921,12 @@ static void mlx4_remove_one(struct pci_dev *pdev)
 	int p;
 
 	if (dev) {
+		/* in SRIOV it is not allowed to unload the pf's
+		 * driver while there are alive vf's */
+		if (mlx4_is_master(dev)) {
+			if (mlx4_how_many_lives_vf(dev))
+				printk(KERN_ERR "Removing PF when there are assigned VF's !!!\n");
+		}
 		mlx4_stop_sense(dev);
 		mlx4_unregister_device(dev);
 
@@ -1449,17 +1946,31 @@ static void mlx4_remove_one(struct pci_dev *pdev)
 		mlx4_cleanup_xrcd_table(dev);
 		mlx4_cleanup_pd_table(dev);
 
+		if (mlx4_is_master(dev))
+			mlx4_free_resource_tracker(dev);
+
 		iounmap(priv->kar);
 		mlx4_uar_free(dev, &priv->driver_uar);
 		mlx4_cleanup_uar_table(dev);
-		mlx4_clear_steering(dev);
+		if (!mlx4_is_slave(dev))
+			mlx4_clear_steering(dev);
 		mlx4_free_eq_table(dev);
+		if (mlx4_is_master(dev))
+			mlx4_multi_func_cleanup(dev);
 		mlx4_close_hca(dev);
+		if (mlx4_is_slave(dev))
+			mlx4_multi_func_cleanup(dev);
 		mlx4_cmd_cleanup(dev);
 
 		if (dev->flags & MLX4_FLAG_MSI_X)
 			pci_disable_msix(pdev);
+		if (num_vfs && (dev->flags & MLX4_FLAG_SRIOV)) {
+			mlx4_warn(dev, "Disabling sriov\n");
+			pci_disable_sriov(pdev);
+		}
 
+		if (!mlx4_is_slave(dev))
+			mlx4_free_ownership(dev);
 		kfree(priv);
 		pci_release_regions(pdev);
 		pci_disable_device(pdev);
@@ -1474,33 +1985,48 @@ int mlx4_restart_one(struct pci_dev *pdev)
 }
 
 static DEFINE_PCI_DEVICE_TABLE(mlx4_pci_table) = {
-	{ PCI_VDEVICE(MELLANOX, 0x6340) }, /* MT25408 "Hermon" SDR */
-	{ PCI_VDEVICE(MELLANOX, 0x634a) }, /* MT25408 "Hermon" DDR */
-	{ PCI_VDEVICE(MELLANOX, 0x6354) }, /* MT25408 "Hermon" QDR */
-	{ PCI_VDEVICE(MELLANOX, 0x6732) }, /* MT25408 "Hermon" DDR PCIe gen2 */
-	{ PCI_VDEVICE(MELLANOX, 0x673c) }, /* MT25408 "Hermon" QDR PCIe gen2 */
-	{ PCI_VDEVICE(MELLANOX, 0x6368) }, /* MT25408 "Hermon" EN 10GigE */
-	{ PCI_VDEVICE(MELLANOX, 0x6750) }, /* MT25408 "Hermon" EN 10GigE PCIe gen2 */
-	{ PCI_VDEVICE(MELLANOX, 0x6372) }, /* MT25458 ConnectX EN 10GBASE-T 10GigE */
-	{ PCI_VDEVICE(MELLANOX, 0x675a) }, /* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */
-	{ PCI_VDEVICE(MELLANOX, 0x6764) }, /* MT26468 ConnectX EN 10GigE PCIe gen2*/
-	{ PCI_VDEVICE(MELLANOX, 0x6746) }, /* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */
-	{ PCI_VDEVICE(MELLANOX, 0x676e) }, /* MT26478 ConnectX2 40GigE PCIe gen2 */
-	{ PCI_VDEVICE(MELLANOX, 0x1002) }, /* MT25400 Family [ConnectX-2 Virtual Function] */
-	{ PCI_VDEVICE(MELLANOX, 0x1003) }, /* MT27500 Family [ConnectX-3] */
-	{ PCI_VDEVICE(MELLANOX, 0x1004) }, /* MT27500 Family [ConnectX-3 Virtual Function] */
-	{ PCI_VDEVICE(MELLANOX, 0x1005) }, /* MT27510 Family */
-	{ PCI_VDEVICE(MELLANOX, 0x1006) }, /* MT27511 Family */
-	{ PCI_VDEVICE(MELLANOX, 0x1007) }, /* MT27520 Family */
-	{ PCI_VDEVICE(MELLANOX, 0x1008) }, /* MT27521 Family */
-	{ PCI_VDEVICE(MELLANOX, 0x1009) }, /* MT27530 Family */
-	{ PCI_VDEVICE(MELLANOX, 0x100a) }, /* MT27531 Family */
-	{ PCI_VDEVICE(MELLANOX, 0x100b) }, /* MT27540 Family */
-	{ PCI_VDEVICE(MELLANOX, 0x100c) }, /* MT27541 Family */
-	{ PCI_VDEVICE(MELLANOX, 0x100d) }, /* MT27550 Family */
-	{ PCI_VDEVICE(MELLANOX, 0x100e) }, /* MT27551 Family */
-	{ PCI_VDEVICE(MELLANOX, 0x100f) }, /* MT27560 Family */
-	{ PCI_VDEVICE(MELLANOX, 0x1010) }, /* MT27561 Family */
+	/* MT25408 "Hermon" SDR */
+	{ PCI_VDEVICE(MELLANOX, 0x6340), 0 },
+	/* MT25408 "Hermon" DDR */
+	{ PCI_VDEVICE(MELLANOX, 0x634a), 0 },
+	/* MT25408 "Hermon" QDR */
+	{ PCI_VDEVICE(MELLANOX, 0x6354), 0 },
+	/* MT25408 "Hermon" DDR PCIe gen2 */
+	{ PCI_VDEVICE(MELLANOX, 0x6732), 0 },
+	/* MT25408 "Hermon" QDR PCIe gen2 */
+	{ PCI_VDEVICE(MELLANOX, 0x673c), 0 },
+	/* MT25408 "Hermon" EN 10GigE */
+	{ PCI_VDEVICE(MELLANOX, 0x6368), 0 },
+	/* MT25408 "Hermon" EN 10GigE PCIe gen2 */
+	{ PCI_VDEVICE(MELLANOX, 0x6750), 0 },
+	/* MT25458 ConnectX EN 10GBASE-T 10GigE */
+	{ PCI_VDEVICE(MELLANOX, 0x6372), 0 },
+	/* MT25458 ConnectX EN 10GBASE-T+Gen2 10GigE */
+	{ PCI_VDEVICE(MELLANOX, 0x675a), 0 },
+	/* MT26468 ConnectX EN 10GigE PCIe gen2*/
+	{ PCI_VDEVICE(MELLANOX, 0x6764), 0 },
+	/* MT26438 ConnectX EN 40GigE PCIe gen2 5GT/s */
+	{ PCI_VDEVICE(MELLANOX, 0x6746), 0 },
+	/* MT26478 ConnectX2 40GigE PCIe gen2 */
+	{ PCI_VDEVICE(MELLANOX, 0x676e), 0 },
+	/* MT25400 Family [ConnectX-2 Virtual Function] */
+	{ PCI_VDEVICE(MELLANOX, 0x1002), MLX4_VF },
+	/* MT27500 Family [ConnectX-3] */
+	{ PCI_VDEVICE(MELLANOX, 0x1003), 0 },
+	/* MT27500 Family [ConnectX-3 Virtual Function] */
+	{ PCI_VDEVICE(MELLANOX, 0x1004), MLX4_VF },
+	{ PCI_VDEVICE(MELLANOX, 0x1005), 0 }, /* MT27510 Family */
+	{ PCI_VDEVICE(MELLANOX, 0x1006), 0 }, /* MT27511 Family */
+	{ PCI_VDEVICE(MELLANOX, 0x1007), 0 }, /* MT27520 Family */
+	{ PCI_VDEVICE(MELLANOX, 0x1008), 0 }, /* MT27521 Family */
+	{ PCI_VDEVICE(MELLANOX, 0x1009), 0 }, /* MT27530 Family */
+	{ PCI_VDEVICE(MELLANOX, 0x100a), 0 }, /* MT27531 Family */
+	{ PCI_VDEVICE(MELLANOX, 0x100b), 0 }, /* MT27540 Family */
+	{ PCI_VDEVICE(MELLANOX, 0x100c), 0 }, /* MT27541 Family */
+	{ PCI_VDEVICE(MELLANOX, 0x100d), 0 }, /* MT27550 Family */
+	{ PCI_VDEVICE(MELLANOX, 0x100e), 0 }, /* MT27551 Family */
+	{ PCI_VDEVICE(MELLANOX, 0x100f), 0 }, /* MT27560 Family */
+	{ PCI_VDEVICE(MELLANOX, 0x1010), 0 }, /* MT27561 Family */
 	{ 0, }
 };
 
@@ -1529,6 +2055,12 @@ static int __init mlx4_verify_params(void)
 		return -1;
 	}
 
+	/* Check if module param for ports type has legal combination */
+	if (port_type_array[0] == false && port_type_array[1] == true) {
+		printk(KERN_WARNING "Module parameter configuration ETH/IB is not supported. Switching to default configuration IB/IB\n");
+		port_type_array[0] = true;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 879f825c6f6a..3921dbf01da1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -49,6 +49,7 @@
 #include <linux/mlx4/cmd.h>
 
 #define DRV_NAME	"mlx4_core"
+#define PFX		DRV_NAME ": "
 #define DRV_VERSION	"1.0"
 #define DRV_RELDATE	"July 14, 2011"
 
@@ -957,10 +958,15 @@ int mlx4_GEN_EQE(struct mlx4_dev *dev, int slave, struct mlx4_eqe *eqe);
 
 int mlx4_cmd_init(struct mlx4_dev *dev);
 void mlx4_cmd_cleanup(struct mlx4_dev *dev);
+int mlx4_multi_func_init(struct mlx4_dev *dev);
+void mlx4_multi_func_cleanup(struct mlx4_dev *dev);
 void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param);
 int mlx4_cmd_use_events(struct mlx4_dev *dev);
 void mlx4_cmd_use_polling(struct mlx4_dev *dev);
 
+int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param,
+		  unsigned long timeout);
+
 void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn);
 void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type);
 
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index ae62630a665e..9958ff2cad3c 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -225,4 +225,6 @@ void mlx4_free_cmd_mailbox(struct mlx4_dev *dev, struct mlx4_cmd_mailbox *mailbo
 
 u32 mlx4_comm_get_version(void);
 
+#define MLX4_COMM_GET_IF_REV(cmd_chan_ver) (u8)((cmd_chan_ver) >> 8)
+
 #endif /* MLX4_CMD_H */
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 65bb466c575f..5f784ff6a36e 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -489,6 +489,7 @@ struct mlx4_dev {
 	struct radix_tree_root	qp_table_tree;
 	u8			rev_id;
 	char			board_id[MLX4_BOARD_ID_LEN];
+	int			num_vfs;
 };
 
 struct mlx4_init_port_param {
-- 
cgit v1.2.3


From 8cb25e14fe80d0fac42412364df573eb3e8e83cc Mon Sep 17 00:00:00 2001
From: Helmut Schaa <helmut.schaa@googlemail.com>
Date: Thu, 8 Dec 2011 13:11:54 +0100
Subject: ieee80211: Introduce ieee80211_is_first_frag

Signed-off-by: Helmut Schaa <helmut.schaa@googlemail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 17f2a768e2ad..210e2c325534 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -544,6 +544,15 @@ static inline int ieee80211_is_qos_nullfunc(__le16 fc)
 	       cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC);
 }
 
+/**
+ * ieee80211_is_first_frag - check if IEEE80211_SCTL_FRAG is not set
+ * @seq_ctrl: frame sequence control bytes in little-endian byteorder
+ */
+static inline int ieee80211_is_first_frag(__le16 seq_ctrl)
+{
+	return (seq_ctrl & cpu_to_le16(IEEE80211_SCTL_FRAG)) == 0;
+}
+
 struct ieee80211s_hdr {
 	u8 flags;
 	u8 ttl;
-- 
cgit v1.2.3


From 8a5ac6ecd56756ee72588627aa23ab6cf9b790db Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Thu, 8 Dec 2011 18:02:21 +0100
Subject: ssb: extract FEM info from SPROM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/pci.c            | 23 +++++++++++++++++++++++
 include/linux/ssb/ssb.h      |  9 +++++++++
 include/linux/ssb/ssb_regs.h | 17 +++++++++++++++++
 3 files changed, 49 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/ssb/pci.c b/drivers/ssb/pci.c
index 34c3bab90b9a..973223f5de8e 100644
--- a/drivers/ssb/pci.c
+++ b/drivers/ssb/pci.c
@@ -607,6 +607,29 @@ static void sprom_extract_r8(struct ssb_sprom *out, const u16 *in)
 	memcpy(&out->antenna_gain.ghz5, &out->antenna_gain.ghz24,
 	       sizeof(out->antenna_gain.ghz5));
 
+	/* Extract FEM info */
+	SPEX(fem.ghz2.tssipos, SSB_SPROM8_FEM2G,
+		SSB_SROM8_FEM_TSSIPOS, SSB_SROM8_FEM_TSSIPOS_SHIFT);
+	SPEX(fem.ghz2.extpa_gain, SSB_SPROM8_FEM2G,
+		SSB_SROM8_FEM_EXTPA_GAIN, SSB_SROM8_FEM_EXTPA_GAIN_SHIFT);
+	SPEX(fem.ghz2.pdet_range, SSB_SPROM8_FEM2G,
+		SSB_SROM8_FEM_PDET_RANGE, SSB_SROM8_FEM_PDET_RANGE_SHIFT);
+	SPEX(fem.ghz2.tr_iso, SSB_SPROM8_FEM2G,
+		SSB_SROM8_FEM_TR_ISO, SSB_SROM8_FEM_TR_ISO_SHIFT);
+	SPEX(fem.ghz2.antswlut, SSB_SPROM8_FEM2G,
+		SSB_SROM8_FEM_ANTSWLUT, SSB_SROM8_FEM_ANTSWLUT_SHIFT);
+
+	SPEX(fem.ghz5.tssipos, SSB_SPROM8_FEM5G,
+		SSB_SROM8_FEM_TSSIPOS, SSB_SROM8_FEM_TSSIPOS_SHIFT);
+	SPEX(fem.ghz5.extpa_gain, SSB_SPROM8_FEM5G,
+		SSB_SROM8_FEM_EXTPA_GAIN, SSB_SROM8_FEM_EXTPA_GAIN_SHIFT);
+	SPEX(fem.ghz5.pdet_range, SSB_SPROM8_FEM5G,
+		SSB_SROM8_FEM_PDET_RANGE, SSB_SROM8_FEM_PDET_RANGE_SHIFT);
+	SPEX(fem.ghz5.tr_iso, SSB_SPROM8_FEM5G,
+		SSB_SROM8_FEM_TR_ISO, SSB_SROM8_FEM_TR_ISO_SHIFT);
+	SPEX(fem.ghz5.antswlut, SSB_SPROM8_FEM5G,
+		SSB_SROM8_FEM_ANTSWLUT, SSB_SROM8_FEM_ANTSWLUT_SHIFT);
+
 	sprom_extract_r458(out, in);
 
 	/* TODO - get remaining rev 8 stuff needed */
diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index 061e560251b4..dcf35b0f303a 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -94,6 +94,15 @@ struct ssb_sprom {
 		} ghz5;		/* 5GHz band */
 	} antenna_gain;
 
+	struct {
+		struct {
+			u8 tssipos, extpa_gain, pdet_range, tr_iso, antswlut;
+		} ghz2;
+		struct {
+			u8 tssipos, extpa_gain, pdet_range, tr_iso, antswlut;
+		} ghz5;
+	} fem;
+
 	/* TODO - add any parameters needed from rev 2, 3, 4, 5 or 8 SPROMs */
 };
 
diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h
index 98941203a27f..c814ae6eeb22 100644
--- a/include/linux/ssb/ssb_regs.h
+++ b/include/linux/ssb/ssb_regs.h
@@ -432,6 +432,23 @@
 #define  SSB_SPROM8_RXPO2G		0x00FF	/* 2GHz RX power offset */
 #define  SSB_SPROM8_RXPO5G		0xFF00	/* 5GHz RX power offset */
 #define  SSB_SPROM8_RXPO5G_SHIFT	8
+#define SSB_SPROM8_FEM2G		0x00AE
+#define SSB_SPROM8_FEM5G		0x00B0
+#define  SSB_SROM8_FEM_TSSIPOS		0x0001
+#define  SSB_SROM8_FEM_TSSIPOS_SHIFT	0
+#define  SSB_SROM8_FEM_EXTPA_GAIN	0x0006
+#define  SSB_SROM8_FEM_EXTPA_GAIN_SHIFT	1
+#define  SSB_SROM8_FEM_PDET_RANGE	0x00F8
+#define  SSB_SROM8_FEM_PDET_RANGE_SHIFT	3
+#define  SSB_SROM8_FEM_TR_ISO		0x0700
+#define  SSB_SROM8_FEM_TR_ISO_SHIFT	8
+#define  SSB_SROM8_FEM_ANTSWLUT		0xF800
+#define  SSB_SROM8_FEM_ANTSWLUT_SHIFT	11
+#define SSB_SPROM8_THERMAL		0x00B2
+#define SSB_SPROM8_MPWR_RAWTS		0x00B4
+#define SSB_SPROM8_TS_SLP_OPT_CORRX	0x00B6
+#define SSB_SPROM8_FOC_HWIQ_IQSWP	0x00B8
+#define SSB_SPROM8_PHYCAL_TEMPDELTA	0x00BA
 #define SSB_SPROM8_MAXP_BG		0x00C0  /* Max Power 2GHz in path 1 */
 #define  SSB_SPROM8_MAXP_BG_MASK	0x00FF  /* Mask for Max Power 2GHz */
 #define  SSB_SPROM8_ITSSI_BG		0xFF00	/* Mask for path 1 itssi_bg */
-- 
cgit v1.2.3


From aee5ed563d56c713d2a51d6f16e08b83fd9665d5 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Thu, 8 Dec 2011 18:02:22 +0100
Subject: bcma: extract FEM info from SPROM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/sprom.c                        | 22 ++++++++++++++++++++++
 include/linux/bcma/bcma_driver_chipcommon.h |  1 +
 2 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/bcma/sprom.c b/drivers/bcma/sprom.c
index d7292390d236..b6c474bbd572 100644
--- a/drivers/bcma/sprom.c
+++ b/drivers/bcma/sprom.c
@@ -142,6 +142,28 @@ static void bcma_sprom_extract_r8(struct bcma_bus *bus, const u16 *sprom)
 	bus->sprom.boardflags2_hi = sprom[SPOFF(SSB_SPROM8_BFL2HI)];
 
 	bus->sprom.country_code = sprom[SPOFF(SSB_SPROM8_CCODE)];
+
+	bus->sprom.fem.ghz2.tssipos = (sprom[SPOFF(SSB_SPROM8_FEM2G)] &
+		SSB_SROM8_FEM_TSSIPOS) >> SSB_SROM8_FEM_TSSIPOS_SHIFT;
+	bus->sprom.fem.ghz2.extpa_gain = (sprom[SPOFF(SSB_SPROM8_FEM2G)] &
+		SSB_SROM8_FEM_EXTPA_GAIN) >> SSB_SROM8_FEM_EXTPA_GAIN_SHIFT;
+	bus->sprom.fem.ghz2.pdet_range = (sprom[SPOFF(SSB_SPROM8_FEM2G)] &
+		SSB_SROM8_FEM_PDET_RANGE) >> SSB_SROM8_FEM_PDET_RANGE_SHIFT;
+	bus->sprom.fem.ghz2.tr_iso = (sprom[SPOFF(SSB_SPROM8_FEM2G)] &
+		SSB_SROM8_FEM_TR_ISO) >> SSB_SROM8_FEM_TR_ISO_SHIFT;
+	bus->sprom.fem.ghz2.antswlut = (sprom[SPOFF(SSB_SPROM8_FEM2G)] &
+		SSB_SROM8_FEM_ANTSWLUT) >> SSB_SROM8_FEM_ANTSWLUT_SHIFT;
+
+	bus->sprom.fem.ghz5.tssipos = (sprom[SPOFF(SSB_SPROM8_FEM5G)] &
+		SSB_SROM8_FEM_TSSIPOS) >> SSB_SROM8_FEM_TSSIPOS_SHIFT;
+	bus->sprom.fem.ghz5.extpa_gain = (sprom[SPOFF(SSB_SPROM8_FEM5G)] &
+		SSB_SROM8_FEM_EXTPA_GAIN) >> SSB_SROM8_FEM_EXTPA_GAIN_SHIFT;
+	bus->sprom.fem.ghz5.pdet_range = (sprom[SPOFF(SSB_SPROM8_FEM5G)] &
+		SSB_SROM8_FEM_PDET_RANGE) >> SSB_SROM8_FEM_PDET_RANGE_SHIFT;
+	bus->sprom.fem.ghz5.tr_iso = (sprom[SPOFF(SSB_SPROM8_FEM5G)] &
+		SSB_SROM8_FEM_TR_ISO) >> SSB_SROM8_FEM_TR_ISO_SHIFT;
+	bus->sprom.fem.ghz5.antswlut = (sprom[SPOFF(SSB_SPROM8_FEM5G)] &
+		SSB_SROM8_FEM_ANTSWLUT) >> SSB_SROM8_FEM_ANTSWLUT_SHIFT;
 }
 
 int bcma_sprom_get(struct bcma_bus *bus)
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index 1526d965ed06..a33086a7530b 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -203,6 +203,7 @@
 #define BCMA_CC_PMU_CTL			0x0600 /* PMU control */
 #define  BCMA_CC_PMU_CTL_ILP_DIV	0xFFFF0000 /* ILP div mask */
 #define  BCMA_CC_PMU_CTL_ILP_DIV_SHIFT	16
+#define  BCMA_CC_PMU_CTL_PLL_UPD	0x00000400
 #define  BCMA_CC_PMU_CTL_NOILPONW	0x00000200 /* No ILP on wait */
 #define  BCMA_CC_PMU_CTL_HTREQEN	0x00000100 /* HT req enable */
 #define  BCMA_CC_PMU_CTL_ALPREQEN	0x00000080 /* ALP req enable */
-- 
cgit v1.2.3


From 9d08f10d355afd500310738ff09b4d921a447102 Mon Sep 17 00:00:00 2001
From: Arend van Spriel <arend@broadcom.com>
Date: Thu, 8 Dec 2011 15:06:41 -0800
Subject: bcma: add set/mask macros for 16-bit register access

The BCMA header only had definitions for 32-bit register access. Used
those as a template for the 16-bit flavour. Also changed them to inline
functions to be on the safe side. As offset parameter is used twice there
would be a problem when used like this: bcma_set32(core, offset++, val);

Reviewed-by: Pieter-Paul Giesberts <pieterpg@broadcom.com>
Reviewed-by: Alwin Beukers <alwin@broadcom.com>
Signed-off-by: Arend van Spriel <arend@broadcom.com>
Signed-off-by: Franky Lin <frankyl@broadcom.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/bcma/bcma.h | 32 ++++++++++++++++++++++++++------
 1 file changed, 26 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 4d4b59de9467..de6057f16987 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -254,12 +254,32 @@ void bcma_awrite32(struct bcma_device *core, u16 offset, u32 value)
 	core->bus->ops->awrite32(core, offset, value);
 }
 
-#define bcma_mask32(cc, offset, mask) \
-	bcma_write32(cc, offset, bcma_read32(cc, offset) & (mask))
-#define bcma_set32(cc, offset, set) \
-	bcma_write32(cc, offset, bcma_read32(cc, offset) | (set))
-#define bcma_maskset32(cc, offset, mask, set) \
-	bcma_write32(cc, offset, (bcma_read32(cc, offset) & (mask)) | (set))
+static inline void bcma_mask32(struct bcma_device *cc, u16 offset, u32 mask)
+{
+	bcma_write32(cc, offset, bcma_read32(cc, offset) & mask);
+}
+static inline void bcma_set32(struct bcma_device *cc, u16 offset, u32 set)
+{
+	bcma_write32(cc, offset, bcma_read32(cc, offset) | set);
+}
+static inline void bcma_maskset32(struct bcma_device *cc,
+				  u16 offset, u32 mask, u32 set)
+{
+	bcma_write32(cc, offset, (bcma_read32(cc, offset) & mask) | set);
+}
+static inline void bcma_mask16(struct bcma_device *cc, u16 offset, u16 mask)
+{
+	bcma_write16(cc, offset, bcma_read16(cc, offset) & mask);
+}
+static inline void bcma_set16(struct bcma_device *cc, u16 offset, u16 set)
+{
+	bcma_write16(cc, offset, bcma_read16(cc, offset) | set);
+}
+static inline void bcma_maskset16(struct bcma_device *cc,
+				  u16 offset, u16 mask, u16 set)
+{
+	bcma_write16(cc, offset, (bcma_read16(cc, offset) & mask) | set);
+}
 
 extern bool bcma_core_is_enabled(struct bcma_device *core);
 extern void bcma_core_disable(struct bcma_device *core, u32 flags);
-- 
cgit v1.2.3


From 084455524f0d46dd210b4397898aff73579b97e8 Mon Sep 17 00:00:00 2001
From: Arend van Spriel <arend@broadcom.com>
Date: Thu, 8 Dec 2011 15:06:42 -0800
Subject: bcma: use static keyword for inline function declaration in bcma.h

Just scratching an itch here, but it makes more sense to use the
static keyword if you think about how the compiler treats inline
functions.

Reviewed-by: Pieter-Paul Giesberts <pieterpg@broadcom.com>
Reviewed-by: Alwin Beukers <alwin@broadcom.com>
Signed-off-by: Arend van Spriel <arend@broadcom.com>
Signed-off-by: Franky Lin <frankyl@broadcom.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/bcma/bcma.h | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index de6057f16987..f4b8346b1a33 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -205,50 +205,51 @@ struct bcma_bus {
 	struct ssb_sprom sprom;
 };
 
-extern inline u32 bcma_read8(struct bcma_device *core, u16 offset)
+static inline u32 bcma_read8(struct bcma_device *core, u16 offset)
 {
 	return core->bus->ops->read8(core, offset);
 }
-extern inline u32 bcma_read16(struct bcma_device *core, u16 offset)
+static inline u32 bcma_read16(struct bcma_device *core, u16 offset)
 {
 	return core->bus->ops->read16(core, offset);
 }
-extern inline u32 bcma_read32(struct bcma_device *core, u16 offset)
+static inline u32 bcma_read32(struct bcma_device *core, u16 offset)
 {
 	return core->bus->ops->read32(core, offset);
 }
-extern inline
+static inline
 void bcma_write8(struct bcma_device *core, u16 offset, u32 value)
 {
 	core->bus->ops->write8(core, offset, value);
 }
-extern inline
+static inline
 void bcma_write16(struct bcma_device *core, u16 offset, u32 value)
 {
 	core->bus->ops->write16(core, offset, value);
 }
-extern inline
+static inline
 void bcma_write32(struct bcma_device *core, u16 offset, u32 value)
 {
 	core->bus->ops->write32(core, offset, value);
 }
 #ifdef CONFIG_BCMA_BLOCKIO
-extern inline void bcma_block_read(struct bcma_device *core, void *buffer,
+static inline void bcma_block_read(struct bcma_device *core, void *buffer,
 				   size_t count, u16 offset, u8 reg_width)
 {
 	core->bus->ops->block_read(core, buffer, count, offset, reg_width);
 }
-extern inline void bcma_block_write(struct bcma_device *core, const void *buffer,
-				    size_t count, u16 offset, u8 reg_width)
+static inline void bcma_block_write(struct bcma_device *core,
+				    const void *buffer, size_t count,
+				    u16 offset, u8 reg_width)
 {
 	core->bus->ops->block_write(core, buffer, count, offset, reg_width);
 }
 #endif
-extern inline u32 bcma_aread32(struct bcma_device *core, u16 offset)
+static inline u32 bcma_aread32(struct bcma_device *core, u16 offset)
 {
 	return core->bus->ops->aread32(core, offset);
 }
-extern inline
+static inline
 void bcma_awrite32(struct bcma_device *core, u16 offset, u32 value)
 {
 	core->bus->ops->awrite32(core, offset, value);
-- 
cgit v1.2.3


From 5c3ddec73d01a1fae9409c197078cb02c42238c3 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 13 Dec 2011 16:44:22 -0500
Subject: net: Remove unused neighbour layer ops.

It's simpler to just keep these things out until there is a real user
of them, so we can see what the needs actually are, rather than keep
these things around as useless overhead.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 -
 include/net/neighbour.h   |  1 -
 net/core/neighbour.c      | 10 ----------
 3 files changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 603730804da5..6b9d4edb7c26 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -974,7 +974,6 @@ struct net_device_ops {
 	int			(*ndo_set_features)(struct net_device *dev,
 						    netdev_features_t features);
 	int			(*ndo_neigh_construct)(struct neighbour *n);
-	void			(*ndo_neigh_destroy)(struct neighbour *n);
 };
 
 /*
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index e31f0a86f9b7..6814c4d61c1c 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -43,7 +43,6 @@ struct neigh_parms {
 #endif
 	struct net_device *dev;
 	struct neigh_parms *next;
-	int	(*neigh_setup)(struct neighbour *);
 	void	(*neigh_cleanup)(struct neighbour *);
 	struct neigh_table *tbl;
 
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 4af151e1bf5d..d57a40a2598c 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -497,13 +497,6 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
 		}
 	}
 
-	/* Device specific setup. */
-	if (n->parms->neigh_setup &&
-	    (error = n->parms->neigh_setup(n)) < 0) {
-		rc = ERR_PTR(error);
-		goto out_neigh_release;
-	}
-
 	n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
 
 	write_lock_bh(&tbl->lock);
@@ -717,9 +710,6 @@ void neigh_destroy(struct neighbour *neigh)
 	skb_queue_purge(&neigh->arp_queue);
 	neigh->arp_queue_len_bytes = 0;
 
-	if (dev->netdev_ops->ndo_neigh_destroy)
-		dev->netdev_ops->ndo_neigh_destroy(neigh);
-
 	dev_put(dev);
 	neigh_parms_put(neigh->parms);
 
-- 
cgit v1.2.3


From 1ed28f610653e9b18433c6d87e9d333b7e3e886e Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@linux.intel.com>
Date: Wed, 14 Dec 2011 16:43:09 +0100
Subject: NFC: Add a DEP link control netlink command

NFC-DEP (Data Exchange Protocol) is an NFC MAC layer.
This command allows to enable and disable the DEP link on to which e.g.
LLCP can run.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nfc.h   |  14 +++++
 include/net/nfc/nfc.h |  11 ++++
 net/nfc/core.c        |  77 +++++++++++++++++++++++++++
 net/nfc/netlink.c     | 144 ++++++++++++++++++++++++++++++++++++++++++++++++++
 net/nfc/nfc.h         |   9 ++++
 5 files changed, 255 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfc.h b/include/linux/nfc.h
index 36cb955b05cc..34d8303111f0 100644
--- a/include/linux/nfc.h
+++ b/include/linux/nfc.h
@@ -62,6 +62,8 @@ enum nfc_commands {
 	NFC_CMD_GET_DEVICE,
 	NFC_CMD_DEV_UP,
 	NFC_CMD_DEV_DOWN,
+	NFC_CMD_DEP_LINK_UP,
+	NFC_CMD_DEP_LINK_DOWN,
 	NFC_CMD_START_POLL,
 	NFC_CMD_STOP_POLL,
 	NFC_CMD_GET_TARGET,
@@ -86,6 +88,8 @@ enum nfc_commands {
  * @NFC_ATTR_TARGET_SENS_RES: NFC-A targets extra information such as NFCID
  * @NFC_ATTR_TARGET_SEL_RES: NFC-A targets extra information (useful if the
  *	target is not NFC-Forum compliant)
+ * @NFC_ATTR_COMM_MODE: Passive or active mode
+ * @NFC_ATTR_RF_MODE: Initiator or target
  */
 enum nfc_attrs {
 	NFC_ATTR_UNSPEC,
@@ -95,6 +99,8 @@ enum nfc_attrs {
 	NFC_ATTR_TARGET_INDEX,
 	NFC_ATTR_TARGET_SENS_RES,
 	NFC_ATTR_TARGET_SEL_RES,
+	NFC_ATTR_COMM_MODE,
+	NFC_ATTR_RF_MODE,
 /* private: internal use only */
 	__NFC_ATTR_AFTER_LAST
 };
@@ -111,6 +117,14 @@ enum nfc_attrs {
 
 #define NFC_PROTO_MAX		6
 
+/* NFC communication modes */
+#define NFC_COMM_ACTIVE  0
+#define NFC_COMM_PASSIVE 1
+
+/* NFC RF modes */
+#define NFC_RF_INITIATOR 0
+#define NFC_RF_TARGET    1
+
 /* NFC protocols masks used in bitsets */
 #define NFC_PROTO_JEWEL_MASK	(1 << NFC_PROTO_JEWEL)
 #define NFC_PROTO_MIFARE_MASK	(1 << NFC_PROTO_MIFARE)
diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h
index 3a3304c094d7..bf82d292d68c 100644
--- a/include/net/nfc/nfc.h
+++ b/include/net/nfc/nfc.h
@@ -52,6 +52,9 @@ struct nfc_ops {
 	int (*dev_down)(struct nfc_dev *dev);
 	int (*start_poll)(struct nfc_dev *dev, u32 protocols);
 	void (*stop_poll)(struct nfc_dev *dev);
+	int (*dep_link_up)(struct nfc_dev *dev, int target_idx,
+				u8 comm_mode, u8 rf_mode);
+	int (*dep_link_down)(struct nfc_dev *dev);
 	int (*activate_target)(struct nfc_dev *dev, u32 target_idx,
 							u32 protocol);
 	void (*deactivate_target)(struct nfc_dev *dev, u32 target_idx);
@@ -60,6 +63,9 @@ struct nfc_ops {
 							void *cb_context);
 };
 
+#define NFC_TARGET_IDX_ANY -1
+#define NFC_MAX_GT_LEN 48
+
 struct nfc_target {
 	u32 idx;
 	u32 supported_protocols;
@@ -83,6 +89,8 @@ struct nfc_dev {
 	bool dev_up;
 	bool polling;
 	bool remote_activated;
+	bool dep_link_up;
+	u32 dep_rf_mode;
 	struct nfc_genl_data genl_data;
 	u32 supported_protocols;
 
@@ -165,4 +173,7 @@ struct sk_buff *nfc_alloc_recv_skb(unsigned int size, gfp_t gfp);
 int nfc_targets_found(struct nfc_dev *dev, struct nfc_target *targets,
 							int ntargets);
 
+int nfc_dep_link_is_up(struct nfc_dev *dev, u32 target_idx,
+		       u8 comm_mode, u8 rf_mode);
+
 #endif /* __NET_NFC_H */
diff --git a/net/nfc/core.c b/net/nfc/core.c
index f53f88ada687..785f1f20c7ba 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -181,6 +181,83 @@ error:
 	return rc;
 }
 
+int nfc_dep_link_up(struct nfc_dev *dev, int target_index,
+					u8 comm_mode, u8 rf_mode)
+{
+	int rc = 0;
+
+	pr_debug("dev_name=%s comm:%d rf:%d\n",
+			dev_name(&dev->dev), comm_mode, rf_mode);
+
+	if (!dev->ops->dep_link_up)
+		return -EOPNOTSUPP;
+
+	device_lock(&dev->dev);
+
+	if (!device_is_registered(&dev->dev)) {
+		rc = -ENODEV;
+		goto error;
+	}
+
+	if (dev->dep_link_up == true) {
+		rc = -EALREADY;
+		goto error;
+	}
+
+	rc = dev->ops->dep_link_up(dev, target_index, comm_mode, rf_mode);
+
+error:
+	device_unlock(&dev->dev);
+	return rc;
+}
+
+int nfc_dep_link_down(struct nfc_dev *dev)
+{
+	int rc = 0;
+
+	pr_debug("dev_name=%s\n", dev_name(&dev->dev));
+
+	if (!dev->ops->dep_link_down)
+		return -EOPNOTSUPP;
+
+	device_lock(&dev->dev);
+
+	if (!device_is_registered(&dev->dev)) {
+		rc = -ENODEV;
+		goto error;
+	}
+
+	if (dev->dep_link_up == false) {
+		rc = -EALREADY;
+		goto error;
+	}
+
+	if (dev->dep_rf_mode == NFC_RF_TARGET) {
+		rc = -EOPNOTSUPP;
+		goto error;
+	}
+
+	rc = dev->ops->dep_link_down(dev);
+	if (!rc) {
+		dev->dep_link_up = false;
+		nfc_genl_dep_link_down_event(dev);
+	}
+
+error:
+	device_unlock(&dev->dev);
+	return rc;
+}
+
+int nfc_dep_link_is_up(struct nfc_dev *dev, u32 target_idx,
+					u8 comm_mode, u8 rf_mode)
+{
+	dev->dep_link_up = true;
+	dev->dep_rf_mode = rf_mode;
+
+	return nfc_genl_dep_link_up_event(dev, target_idx, comm_mode, rf_mode);
+}
+EXPORT_SYMBOL(nfc_dep_link_is_up);
+
 /**
  * nfc_activate_target - prepare the target for data exchange
  *
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 1d76d38c4a24..43a1c47756a7 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -46,6 +46,8 @@ static const struct nla_policy nfc_genl_policy[NFC_ATTR_MAX + 1] = {
 	[NFC_ATTR_DEVICE_NAME] = { .type = NLA_STRING,
 				.len = NFC_DEVICE_NAME_MAXSIZE },
 	[NFC_ATTR_PROTOCOLS] = { .type = NLA_U32 },
+	[NFC_ATTR_COMM_MODE] = { .type = NLA_U8 },
+	[NFC_ATTR_RF_MODE] = { .type = NLA_U8 },
 };
 
 static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target,
@@ -311,6 +313,75 @@ static int nfc_genl_dump_devices_done(struct netlink_callback *cb)
 	return 0;
 }
 
+int nfc_genl_dep_link_up_event(struct nfc_dev *dev, u32 target_idx,
+						u8 comm_mode, u8 rf_mode)
+{
+	struct sk_buff *msg;
+	void *hdr;
+
+	pr_debug("DEP link is up\n");
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
+				NFC_CMD_DEP_LINK_UP);
+	if (!hdr)
+		goto free_msg;
+
+	NLA_PUT_U32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx);
+	if (rf_mode == NFC_RF_INITIATOR)
+		NLA_PUT_U32(msg, NFC_ATTR_TARGET_INDEX, target_idx);
+	NLA_PUT_U8(msg, NFC_ATTR_COMM_MODE, comm_mode);
+	NLA_PUT_U8(msg, NFC_ATTR_RF_MODE, rf_mode);
+
+	genlmsg_end(msg, hdr);
+
+	dev->dep_link_up = true;
+
+	genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_ATOMIC);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+free_msg:
+	nlmsg_free(msg);
+	return -EMSGSIZE;
+}
+
+int nfc_genl_dep_link_down_event(struct nfc_dev *dev)
+{
+	struct sk_buff *msg;
+	void *hdr;
+
+	pr_debug("DEP link is down\n");
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
+				NFC_CMD_DEP_LINK_DOWN);
+	if (!hdr)
+		goto free_msg;
+
+	NLA_PUT_U32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx);
+
+	genlmsg_end(msg, hdr);
+
+	genlmsg_multicast(msg, 0, nfc_genl_event_mcgrp.id, GFP_ATOMIC);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+free_msg:
+	nlmsg_free(msg);
+	return -EMSGSIZE;
+}
+
 static int nfc_genl_get_device(struct sk_buff *skb, struct genl_info *info)
 {
 	struct sk_buff *msg;
@@ -398,6 +469,8 @@ static int nfc_genl_start_poll(struct sk_buff *skb, struct genl_info *info)
 	u32 idx;
 	u32 protocols;
 
+	pr_debug("Poll start\n");
+
 	if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
 		!info->attrs[NFC_ATTR_PROTOCOLS])
 		return -EINVAL;
@@ -452,6 +525,67 @@ out:
 	return rc;
 }
 
+static int nfc_genl_dep_link_up(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nfc_dev *dev;
+	int rc, tgt_idx;
+	u32 idx;
+	u8 comm, rf;
+
+	pr_debug("DEP link up\n");
+
+	if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
+			!info->attrs[NFC_ATTR_COMM_MODE] ||
+			!info->attrs[NFC_ATTR_RF_MODE])
+		return -EINVAL;
+
+	idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
+	if (!info->attrs[NFC_ATTR_TARGET_INDEX])
+		tgt_idx = NFC_TARGET_IDX_ANY;
+	else
+		tgt_idx = nla_get_u32(info->attrs[NFC_ATTR_TARGET_INDEX]);
+
+	comm = nla_get_u8(info->attrs[NFC_ATTR_COMM_MODE]);
+	rf = nla_get_u8(info->attrs[NFC_ATTR_RF_MODE]);
+
+	if (comm != NFC_COMM_ACTIVE && comm != NFC_COMM_PASSIVE)
+		return -EINVAL;
+
+	if (rf != NFC_RF_INITIATOR && comm != NFC_RF_TARGET)
+		return -EINVAL;
+
+	dev = nfc_get_device(idx);
+	if (!dev)
+		return -ENODEV;
+
+	rc = nfc_dep_link_up(dev, tgt_idx, comm, rf);
+
+	nfc_put_device(dev);
+
+	return rc;
+}
+
+static int nfc_genl_dep_link_down(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nfc_dev *dev;
+	int rc;
+	u32 idx;
+
+	if (!info->attrs[NFC_ATTR_DEVICE_INDEX])
+		return -EINVAL;
+
+	idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
+
+	dev = nfc_get_device(idx);
+	if (!dev)
+		return -ENODEV;
+
+	rc = nfc_dep_link_down(dev);
+
+	nfc_put_device(dev);
+	return rc;
+}
+
 static struct genl_ops nfc_genl_ops[] = {
 	{
 		.cmd = NFC_CMD_GET_DEVICE,
@@ -480,6 +614,16 @@ static struct genl_ops nfc_genl_ops[] = {
 		.doit = nfc_genl_stop_poll,
 		.policy = nfc_genl_policy,
 	},
+	{
+		.cmd = NFC_CMD_DEP_LINK_UP,
+		.doit = nfc_genl_dep_link_up,
+		.policy = nfc_genl_policy,
+	},
+	{
+		.cmd = NFC_CMD_DEP_LINK_DOWN,
+		.doit = nfc_genl_dep_link_down,
+		.policy = nfc_genl_policy,
+	},
 	{
 		.cmd = NFC_CMD_GET_TARGET,
 		.dumpit = nfc_genl_dump_targets,
diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index 67d605015304..4d0fb125d033 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h
@@ -68,6 +68,10 @@ int nfc_genl_targets_found(struct nfc_dev *dev);
 int nfc_genl_device_added(struct nfc_dev *dev);
 int nfc_genl_device_removed(struct nfc_dev *dev);
 
+int nfc_genl_dep_link_up_event(struct nfc_dev *dev, u32 target_idx,
+			       u8 comm_mode, u8 rf_mode);
+int nfc_genl_dep_link_down_event(struct nfc_dev *dev);
+
 struct nfc_dev *nfc_get_device(unsigned idx);
 
 static inline void nfc_put_device(struct nfc_dev *dev)
@@ -102,6 +106,11 @@ int nfc_start_poll(struct nfc_dev *dev, u32 protocols);
 
 int nfc_stop_poll(struct nfc_dev *dev);
 
+int nfc_dep_link_up(struct nfc_dev *dev, int target_idx,
+				u8 comm_mode, u8 rf_mode);
+
+int nfc_dep_link_down(struct nfc_dev *dev);
+
 int nfc_activate_target(struct nfc_dev *dev, u32 target_idx, u32 protocol);
 
 int nfc_deactivate_target(struct nfc_dev *dev, u32 target_idx);
-- 
cgit v1.2.3


From d646960f7986fefb460a2b062d5ccc8ccfeacc3a Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@linux.intel.com>
Date: Wed, 14 Dec 2011 16:43:12 +0100
Subject: NFC: Initial LLCP support

This patch is an initial implementation for the NFC Logical Link Control
protocol. It's also known as NFC peer to peer mode.
This is a basic implementation as it lacks SDP (services Discovery
Protocol), frames aggregation support, and frame rejecion parsing.
Follow up patches will implement those missing features.
This code has been tested against a Nexus S phone implementing LLCP 1.0.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nfc.h     |  15 +-
 net/nfc/Kconfig         |   1 +
 net/nfc/Makefile        |   1 +
 net/nfc/core.c          |  20 +-
 net/nfc/llcp/Kconfig    |   7 +
 net/nfc/llcp/commands.c | 399 ++++++++++++++++++++
 net/nfc/llcp/llcp.c     | 973 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/nfc/llcp/llcp.h     | 193 ++++++++++
 net/nfc/llcp/sock.c     | 675 +++++++++++++++++++++++++++++++++
 net/nfc/nfc.h           |  54 +++
 10 files changed, 2335 insertions(+), 3 deletions(-)
 create mode 100644 net/nfc/llcp/Kconfig
 create mode 100644 net/nfc/llcp/commands.c
 create mode 100644 net/nfc/llcp/llcp.c
 create mode 100644 net/nfc/llcp/llcp.h
 create mode 100644 net/nfc/llcp/sock.c

(limited to 'include/linux')

diff --git a/include/linux/nfc.h b/include/linux/nfc.h
index 34d8303111f0..89fee4ab1904 100644
--- a/include/linux/nfc.h
+++ b/include/linux/nfc.h
@@ -139,9 +139,22 @@ struct sockaddr_nfc {
 	__u32 nfc_protocol;
 };
 
+#define NFC_LLCP_MAX_SERVICE_NAME 63
+struct sockaddr_nfc_llcp {
+	sa_family_t sa_family;
+	__u32 dev_idx;
+	__u32 target_idx;
+	__u32 nfc_protocol;
+	__u8 dsap; /* Destination SAP, if known */
+	__u8 ssap; /* Source SAP to be bound to */
+	char service_name[NFC_LLCP_MAX_SERVICE_NAME]; /* Service name URI */;
+	size_t service_name_len;
+};
+
 /* NFC socket protocols */
 #define NFC_SOCKPROTO_RAW	0
-#define NFC_SOCKPROTO_MAX	1
+#define NFC_SOCKPROTO_LLCP	1
+#define NFC_SOCKPROTO_MAX	2
 
 #define NFC_HEADER_SIZE 1
 
diff --git a/net/nfc/Kconfig b/net/nfc/Kconfig
index 58cddadf8e8e..44c865b86d6f 100644
--- a/net/nfc/Kconfig
+++ b/net/nfc/Kconfig
@@ -14,5 +14,6 @@ menuconfig NFC
 	  be called nfc.
 
 source "net/nfc/nci/Kconfig"
+source "net/nfc/llcp/Kconfig"
 
 source "drivers/nfc/Kconfig"
diff --git a/net/nfc/Makefile b/net/nfc/Makefile
index fbb550f2377b..7b4a6dcfa566 100644
--- a/net/nfc/Makefile
+++ b/net/nfc/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_NFC) += nfc.o
 obj-$(CONFIG_NFC_NCI) += nci/
 
 nfc-objs := core.o netlink.o af_nfc.o rawsock.o
+nfc-$(CONFIG_NFC_LLCP)	+= llcp/llcp.o llcp/commands.o llcp/sock.o
diff --git a/net/nfc/core.c b/net/nfc/core.c
index 3a45f21b3b97..3ddf6e698df0 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -240,6 +240,7 @@ int nfc_dep_link_down(struct nfc_dev *dev)
 	rc = dev->ops->dep_link_down(dev);
 	if (!rc) {
 		dev->dep_link_up = false;
+		nfc_llcp_mac_is_down(dev);
 		nfc_genl_dep_link_down_event(dev);
 	}
 
@@ -254,6 +255,8 @@ int nfc_dep_link_is_up(struct nfc_dev *dev, u32 target_idx,
 	dev->dep_link_up = true;
 	dev->dep_rf_mode = rf_mode;
 
+	nfc_llcp_mac_is_up(dev, target_idx, comm_mode, rf_mode);
+
 	return nfc_genl_dep_link_up_event(dev, target_idx, comm_mode, rf_mode);
 }
 EXPORT_SYMBOL(nfc_dep_link_is_up);
@@ -360,13 +363,13 @@ int nfc_set_remote_general_bytes(struct nfc_dev *dev, u8 *gb, u8 gb_len)
 	if (gb_len > NFC_MAX_GT_LEN)
 		return -EINVAL;
 
-	return 0;
+	return nfc_llcp_set_remote_gb(dev, gb, gb_len);
 }
 EXPORT_SYMBOL(nfc_set_remote_general_bytes);
 
 u8 *nfc_get_local_general_bytes(struct nfc_dev *dev, u8 *gt_len)
 {
-	return NULL;
+	return nfc_llcp_general_bytes(dev, gt_len);
 }
 EXPORT_SYMBOL(nfc_get_local_general_bytes);
 
@@ -560,6 +563,10 @@ int nfc_register_device(struct nfc_dev *dev)
 	if (rc < 0)
 		return rc;
 
+	rc = nfc_llcp_register_device(dev);
+	if (rc)
+		pr_err("Could not register llcp device\n");
+
 	rc = nfc_genl_device_added(dev);
 	if (rc)
 		pr_debug("The userspace won't be notified that the device %s was added\n",
@@ -591,6 +598,8 @@ void nfc_unregister_device(struct nfc_dev *dev)
 
 	mutex_unlock(&nfc_devlist_mutex);
 
+	nfc_llcp_unregister_device(dev);
+
 	rc = nfc_genl_device_removed(dev);
 	if (rc)
 		pr_debug("The userspace won't be notified that the device %s was removed\n",
@@ -620,6 +629,10 @@ static int __init nfc_init(void)
 	if (rc)
 		goto err_rawsock;
 
+	rc = nfc_llcp_init();
+	if (rc)
+		goto err_llcp_sock;
+
 	rc = af_nfc_init();
 	if (rc)
 		goto err_af_nfc;
@@ -627,6 +640,8 @@ static int __init nfc_init(void)
 	return 0;
 
 err_af_nfc:
+	nfc_llcp_exit();
+err_llcp_sock:
 	rawsock_exit();
 err_rawsock:
 	nfc_genl_exit();
@@ -638,6 +653,7 @@ err_genl:
 static void __exit nfc_exit(void)
 {
 	af_nfc_exit();
+	nfc_llcp_exit();
 	rawsock_exit();
 	nfc_genl_exit();
 	class_unregister(&nfc_class);
diff --git a/net/nfc/llcp/Kconfig b/net/nfc/llcp/Kconfig
new file mode 100644
index 000000000000..fbf5e8150908
--- /dev/null
+++ b/net/nfc/llcp/Kconfig
@@ -0,0 +1,7 @@
+config NFC_LLCP
+       depends on NFC && EXPERIMENTAL
+       bool "NFC LLCP support (EXPERIMENTAL)"
+       default n
+       help
+	 Say Y here if you want to build support for a kernel NFC LLCP
+	 implementation.
\ No newline at end of file
diff --git a/net/nfc/llcp/commands.c b/net/nfc/llcp/commands.c
new file mode 100644
index 000000000000..151f2ef429c4
--- /dev/null
+++ b/net/nfc/llcp/commands.c
@@ -0,0 +1,399 @@
+/*
+ * Copyright (C) 2011  Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#define pr_fmt(fmt) "llcp: %s: " fmt, __func__
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/nfc.h>
+
+#include <net/nfc/nfc.h>
+
+#include "../nfc.h"
+#include "llcp.h"
+
+static u8 llcp_tlv_length[LLCP_TLV_MAX] = {
+	0,
+	1, /* VERSION */
+	2, /* MIUX */
+	2, /* WKS */
+	1, /* LTO */
+	1, /* RW */
+	0, /* SN */
+	1, /* OPT */
+	0, /* SDREQ */
+	2, /* SDRES */
+
+};
+
+static u8 llcp_tlv8(u8 *tlv, u8 type)
+{
+	if (tlv[0] != type || tlv[1] != llcp_tlv_length[tlv[0]])
+		return 0;
+
+	return tlv[2];
+}
+
+static u8 llcp_tlv16(u8 *tlv, u8 type)
+{
+	if (tlv[0] != type || tlv[1] != llcp_tlv_length[tlv[0]])
+		return 0;
+
+	return be16_to_cpu(*((__be16 *)(tlv + 2)));
+}
+
+
+static u8 llcp_tlv_version(u8 *tlv)
+{
+	return llcp_tlv8(tlv, LLCP_TLV_VERSION);
+}
+
+static u16 llcp_tlv_miux(u8 *tlv)
+{
+	return llcp_tlv16(tlv, LLCP_TLV_MIUX) & 0x7f;
+}
+
+static u16 llcp_tlv_wks(u8 *tlv)
+{
+	return llcp_tlv16(tlv, LLCP_TLV_WKS);
+}
+
+static u16 llcp_tlv_lto(u8 *tlv)
+{
+	return llcp_tlv8(tlv, LLCP_TLV_LTO);
+}
+
+static u8 llcp_tlv_opt(u8 *tlv)
+{
+	return llcp_tlv8(tlv, LLCP_TLV_OPT);
+}
+
+static u8 llcp_tlv_rw(u8 *tlv)
+{
+	return llcp_tlv8(tlv, LLCP_TLV_RW) & 0xf;
+}
+
+u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length)
+{
+	u8 *tlv, length;
+
+	pr_debug("type %d\n", type);
+
+	if (type >= LLCP_TLV_MAX)
+		return NULL;
+
+	length = llcp_tlv_length[type];
+	if (length == 0 && value_length == 0)
+		return NULL;
+	else
+		length = value_length;
+
+	*tlv_length = 2 + length;
+	tlv = kzalloc(2 + length, GFP_KERNEL);
+	if (tlv == NULL)
+		return tlv;
+
+	tlv[0] = type;
+	tlv[1] = length;
+	memcpy(tlv + 2, value, length);
+
+	return tlv;
+}
+
+int nfc_llcp_parse_tlv(struct nfc_llcp_local *local,
+			u8 *tlv_array, u16 tlv_array_len)
+{
+	u8 *tlv = tlv_array, type, length, offset = 0;
+
+	pr_debug("TLV array length %d\n", tlv_array_len);
+
+	if (local == NULL)
+		return -ENODEV;
+
+	while (offset < tlv_array_len) {
+		type = tlv[0];
+		length = tlv[1];
+
+		pr_debug("type 0x%x length %d\n", type, length);
+
+		switch (type) {
+		case LLCP_TLV_VERSION:
+			local->remote_version = llcp_tlv_version(tlv);
+			break;
+		case LLCP_TLV_MIUX:
+			local->remote_miu = llcp_tlv_miux(tlv) + 128;
+			break;
+		case LLCP_TLV_WKS:
+			local->remote_wks = llcp_tlv_wks(tlv);
+			break;
+		case LLCP_TLV_LTO:
+			local->remote_lto = llcp_tlv_lto(tlv) * 10;
+			break;
+		case LLCP_TLV_OPT:
+			local->remote_opt = llcp_tlv_opt(tlv);
+			break;
+		case LLCP_TLV_RW:
+			local->remote_rw = llcp_tlv_rw(tlv);
+			break;
+		default:
+			pr_err("Invalid gt tlv value 0x%x\n", type);
+			break;
+		}
+
+		offset += length + 2;
+		tlv += length + 2;
+	}
+
+	pr_debug("version 0x%x miu %d lto %d opt 0x%x wks 0x%x rw %d\n",
+		local->remote_version, local->remote_miu,
+		local->remote_lto, local->remote_opt,
+		local->remote_wks, local->remote_rw);
+
+	return 0;
+}
+
+static struct sk_buff *llcp_add_header(struct sk_buff *pdu,
+					u8 dsap, u8 ssap, u8 ptype)
+{
+	u8 header[2];
+
+	pr_debug("ptype 0x%x dsap 0x%x ssap 0x%x\n", ptype, dsap, ssap);
+
+	header[0] = (u8)((dsap << 2) | (ptype >> 2));
+	header[1] = (u8)((ptype << 6) | ssap);
+
+	pr_debug("header 0x%x 0x%x\n", header[0], header[1]);
+
+	memcpy(skb_put(pdu, LLCP_HEADER_SIZE), header, LLCP_HEADER_SIZE);
+
+	return pdu;
+}
+
+static struct sk_buff *llcp_add_tlv(struct sk_buff *pdu, u8 *tlv, u8 tlv_length)
+{
+	/* XXX Add an skb length check */
+
+	if (tlv == NULL)
+		return NULL;
+
+	memcpy(skb_put(pdu, tlv_length), tlv, tlv_length);
+
+	return pdu;
+}
+
+static struct sk_buff *llcp_allocate_pdu(struct nfc_llcp_sock *sock,
+							u8 cmd, u16 size)
+{
+	struct sk_buff *skb;
+	int err;
+
+	if (sock->ssap == 0)
+		return NULL;
+
+	skb = nfc_alloc_send_skb(sock->dev, &sock->sk, MSG_DONTWAIT,
+					size + LLCP_HEADER_SIZE, &err);
+	if (skb == NULL) {
+		pr_err("Could not allocate PDU\n");
+		return NULL;
+	}
+
+	skb = llcp_add_header(skb, sock->dsap, sock->ssap, cmd);
+
+	return skb;
+}
+
+int nfc_llcp_disconnect(struct nfc_llcp_sock *sock)
+{
+	struct sk_buff *skb;
+	struct nfc_dev *dev;
+	struct nfc_llcp_local *local;
+	u16 size = 0;
+
+	pr_debug("Sending DISC\n");
+
+	local = sock->local;
+	if (local == NULL)
+		return -ENODEV;
+
+	dev = sock->dev;
+	if (dev == NULL)
+		return -ENODEV;
+
+	size += LLCP_HEADER_SIZE;
+	size += dev->tx_headroom + dev->tx_tailroom + NFC_HEADER_SIZE;
+
+	skb = alloc_skb(size, GFP_ATOMIC);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	skb_reserve(skb, dev->tx_headroom + NFC_HEADER_SIZE);
+
+	skb = llcp_add_header(skb, sock->ssap, sock->dsap, LLCP_PDU_DISC);
+
+	skb_queue_tail(&local->tx_queue, skb);
+
+	return 0;
+}
+
+int nfc_llcp_send_symm(struct nfc_dev *dev)
+{
+	struct sk_buff *skb;
+	struct nfc_llcp_local *local;
+	u16 size = 0;
+
+	pr_debug("Sending SYMM\n");
+
+	local = nfc_llcp_find_local(dev);
+	if (local == NULL)
+		return -ENODEV;
+
+	size += LLCP_HEADER_SIZE;
+	size += dev->tx_headroom + dev->tx_tailroom + NFC_HEADER_SIZE;
+
+	skb = alloc_skb(size, GFP_KERNEL);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	skb_reserve(skb, dev->tx_headroom + NFC_HEADER_SIZE);
+
+	skb = llcp_add_header(skb, 0, 0, LLCP_PDU_SYMM);
+
+	return nfc_data_exchange(dev, local->target_idx, skb,
+					nfc_llcp_recv, local);
+}
+
+int nfc_llcp_send_connect(struct nfc_llcp_sock *sock)
+{
+	struct nfc_llcp_local *local;
+	struct sk_buff *skb;
+	u8 *service_name_tlv = NULL, service_name_tlv_length;
+	int err;
+	u16 size = 0;
+
+	pr_debug("Sending CONNECT\n");
+
+	local = sock->local;
+	if (local == NULL)
+		return -ENODEV;
+
+	if (sock->service_name != NULL) {
+		service_name_tlv = nfc_llcp_build_tlv(LLCP_TLV_SN,
+					sock->service_name,
+					sock->service_name_len,
+					&service_name_tlv_length);
+		size += service_name_tlv_length;
+	}
+
+	pr_debug("SKB size %d SN length %zu\n", size, sock->service_name_len);
+
+	skb = llcp_allocate_pdu(sock, LLCP_PDU_CONNECT, size);
+	if (skb == NULL) {
+		err = -ENOMEM;
+		goto error_tlv;
+	}
+
+	if (service_name_tlv != NULL)
+		skb = llcp_add_tlv(skb, service_name_tlv,
+					service_name_tlv_length);
+
+	skb_queue_tail(&local->tx_queue, skb);
+
+	return 0;
+
+error_tlv:
+	pr_err("error %d\n", err);
+
+	kfree(service_name_tlv);
+
+	return err;
+}
+
+int nfc_llcp_send_cc(struct nfc_llcp_sock *sock)
+{
+	struct nfc_llcp_local *local;
+	struct sk_buff *skb;
+
+	pr_debug("Sending CC\n");
+
+	local = sock->local;
+	if (local == NULL)
+		return -ENODEV;
+
+	skb = llcp_allocate_pdu(sock, LLCP_PDU_CC, 0);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	skb_queue_tail(&local->tx_queue, skb);
+
+	return 0;
+}
+
+int nfc_llcp_send_dm(struct nfc_llcp_local *local, u8 ssap, u8 dsap, u8 reason)
+{
+	struct sk_buff *skb;
+	struct nfc_dev *dev;
+	u16 size = 1; /* Reason code */
+
+	pr_debug("Sending DM reason 0x%x\n", reason);
+
+	if (local == NULL)
+		return -ENODEV;
+
+	dev = local->dev;
+	if (dev == NULL)
+		return -ENODEV;
+
+	size += LLCP_HEADER_SIZE;
+	size += dev->tx_headroom + dev->tx_tailroom + NFC_HEADER_SIZE;
+
+	skb = alloc_skb(size, GFP_KERNEL);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	skb_reserve(skb, dev->tx_headroom + NFC_HEADER_SIZE);
+
+	skb = llcp_add_header(skb, ssap, dsap, LLCP_PDU_DM);
+
+	memcpy(skb_put(skb, 1), &reason, 1);
+
+	skb_queue_head(&local->tx_queue, skb);
+
+	return 0;
+}
+
+int nfc_llcp_send_disconnect(struct nfc_llcp_sock *sock)
+{
+	struct sk_buff *skb;
+	struct nfc_llcp_local *local;
+
+	pr_debug("Send DISC\n");
+
+	local = sock->local;
+	if (local == NULL)
+		return -ENODEV;
+
+	skb = llcp_allocate_pdu(sock, LLCP_PDU_DISC, 0);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	skb_queue_head(&local->tx_queue, skb);
+
+	return 0;
+}
diff --git a/net/nfc/llcp/llcp.c b/net/nfc/llcp/llcp.c
new file mode 100644
index 000000000000..67756b23eac5
--- /dev/null
+++ b/net/nfc/llcp/llcp.c
@@ -0,0 +1,973 @@
+/*
+ * Copyright (C) 2011  Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#define pr_fmt(fmt) "llcp: %s: " fmt, __func__
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/nfc.h>
+
+#include "../nfc.h"
+#include "llcp.h"
+
+static u8 llcp_magic[3] = {0x46, 0x66, 0x6d};
+
+static struct list_head llcp_devices;
+
+static void nfc_llcp_socket_release(struct nfc_llcp_local *local)
+{
+	struct nfc_llcp_sock *parent, *s, *n;
+	struct sock *sk, *parent_sk;
+	int i;
+
+
+	mutex_lock(&local->socket_lock);
+
+	for (i = 0; i < LLCP_MAX_SAP; i++) {
+		parent = local->sockets[i];
+		if (parent == NULL)
+			continue;
+
+		/* Release all child sockets */
+		list_for_each_entry_safe(s, n, &parent->list, list) {
+			list_del(&s->list);
+			sk = &s->sk;
+
+			lock_sock(sk);
+
+			if (sk->sk_state == LLCP_CONNECTED)
+				nfc_put_device(s->dev);
+
+			sk->sk_state = LLCP_CLOSED;
+			sock_set_flag(sk, SOCK_DEAD);
+
+			release_sock(sk);
+		}
+
+		parent_sk = &parent->sk;
+
+		lock_sock(parent_sk);
+
+		if (parent_sk->sk_state == LLCP_LISTEN) {
+			struct nfc_llcp_sock *lsk, *n;
+			struct sock *accept_sk;
+
+			list_for_each_entry_safe(lsk, n, &parent->accept_queue,
+								accept_queue) {
+				accept_sk = &lsk->sk;
+				lock_sock(accept_sk);
+
+				nfc_llcp_accept_unlink(accept_sk);
+
+				accept_sk->sk_state = LLCP_CLOSED;
+				sock_set_flag(accept_sk, SOCK_DEAD);
+
+				release_sock(accept_sk);
+
+				sock_orphan(accept_sk);
+			}
+		}
+
+		if (parent_sk->sk_state == LLCP_CONNECTED)
+			nfc_put_device(parent->dev);
+
+		parent_sk->sk_state = LLCP_CLOSED;
+		sock_set_flag(parent_sk, SOCK_DEAD);
+
+		release_sock(parent_sk);
+	}
+
+	mutex_unlock(&local->socket_lock);
+}
+
+static void nfc_llcp_timeout_work(struct work_struct *work)
+{
+	struct nfc_llcp_local *local = container_of(work, struct nfc_llcp_local,
+							timeout_work);
+
+	nfc_dep_link_down(local->dev);
+}
+
+static void nfc_llcp_symm_timer(unsigned long data)
+{
+	struct nfc_llcp_local *local = (struct nfc_llcp_local *) data;
+
+	pr_err("SYMM timeout\n");
+
+	queue_work(local->timeout_wq, &local->timeout_work);
+}
+
+struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev)
+{
+	struct nfc_llcp_local *local, *n;
+
+	list_for_each_entry_safe(local, n, &llcp_devices, list)
+		if (local->dev == dev)
+			return local;
+
+	pr_debug("No device found\n");
+
+	return NULL;
+}
+
+static char *wks[] = {
+	NULL,
+	NULL, /* SDP */
+	"urn:nfc:sn:ip",
+	"urn:nfc:sn:obex",
+	"urn:nfc:sn:snep",
+};
+
+static int nfc_llcp_wks_sap(char *service_name, size_t service_name_len)
+{
+	int sap, num_wks;
+
+	pr_debug("%s\n", service_name);
+
+	if (service_name == NULL)
+		return -EINVAL;
+
+	num_wks = ARRAY_SIZE(wks);
+
+	for (sap = 0 ; sap < num_wks; sap++) {
+		if (wks[sap] == NULL)
+			continue;
+
+		if (strncmp(wks[sap], service_name, service_name_len) == 0)
+			return sap;
+	}
+
+	return -EINVAL;
+}
+
+u8 nfc_llcp_get_sdp_ssap(struct nfc_llcp_local *local,
+				struct nfc_llcp_sock *sock)
+{
+	mutex_lock(&local->sdp_lock);
+
+	if (sock->service_name != NULL && sock->service_name_len > 0) {
+		int ssap = nfc_llcp_wks_sap(sock->service_name,
+						sock->service_name_len);
+
+		if (ssap > 0) {
+			pr_debug("WKS %d\n", ssap);
+
+			/* This is a WKS, let's check if it's free */
+			if (local->local_wks & BIT(ssap)) {
+				mutex_unlock(&local->sdp_lock);
+
+				return LLCP_SAP_MAX;
+			}
+
+			set_bit(BIT(ssap), &local->local_wks);
+			mutex_unlock(&local->sdp_lock);
+
+			return ssap;
+		}
+
+		/*
+		 * This is not a well known service,
+		 * we should try to find a local SDP free spot
+		 */
+		ssap = find_first_zero_bit(&local->local_sdp, LLCP_SDP_NUM_SAP);
+		if (ssap == LLCP_SDP_NUM_SAP) {
+			mutex_unlock(&local->sdp_lock);
+
+			return LLCP_SAP_MAX;
+		}
+
+		pr_debug("SDP ssap %d\n", LLCP_WKS_NUM_SAP + ssap);
+
+		set_bit(BIT(ssap), &local->local_sdp);
+		mutex_unlock(&local->sdp_lock);
+
+		return LLCP_WKS_NUM_SAP + ssap;
+
+	} else if (sock->ssap != 0) {
+		if (sock->ssap < LLCP_WKS_NUM_SAP) {
+			if (!(local->local_wks & BIT(sock->ssap))) {
+				set_bit(BIT(sock->ssap), &local->local_wks);
+				mutex_unlock(&local->sdp_lock);
+
+				return sock->ssap;
+			}
+
+		} else if (sock->ssap < LLCP_SDP_NUM_SAP) {
+			if (!(local->local_sdp &
+				BIT(sock->ssap - LLCP_WKS_NUM_SAP))) {
+				set_bit(BIT(sock->ssap - LLCP_WKS_NUM_SAP),
+							&local->local_sdp);
+				mutex_unlock(&local->sdp_lock);
+
+				return sock->ssap;
+			}
+		}
+	}
+
+	mutex_unlock(&local->sdp_lock);
+
+	return LLCP_SAP_MAX;
+}
+
+u8 nfc_llcp_get_local_ssap(struct nfc_llcp_local *local)
+{
+	u8 local_ssap;
+
+	mutex_lock(&local->sdp_lock);
+
+	local_ssap = find_first_zero_bit(&local->local_sap, LLCP_LOCAL_NUM_SAP);
+	if (local_ssap == LLCP_LOCAL_NUM_SAP) {
+		mutex_unlock(&local->sdp_lock);
+		return LLCP_SAP_MAX;
+	}
+
+	set_bit(BIT(local_ssap), &local->local_sap);
+
+	mutex_unlock(&local->sdp_lock);
+
+	return local_ssap + LLCP_LOCAL_SAP_OFFSET;
+}
+
+void nfc_llcp_put_ssap(struct nfc_llcp_local *local, u8 ssap)
+{
+	u8 local_ssap;
+	unsigned long *sdp;
+
+	if (ssap < LLCP_WKS_NUM_SAP) {
+		local_ssap = ssap;
+		sdp = &local->local_wks;
+	} else if (ssap < LLCP_LOCAL_NUM_SAP) {
+		local_ssap = ssap - LLCP_WKS_NUM_SAP;
+		sdp = &local->local_sdp;
+	} else if (ssap < LLCP_MAX_SAP) {
+		local_ssap = ssap - LLCP_LOCAL_NUM_SAP;
+		sdp = &local->local_sap;
+	} else {
+		return;
+	}
+
+	mutex_lock(&local->sdp_lock);
+
+	clear_bit(1 << local_ssap, sdp);
+
+	mutex_unlock(&local->sdp_lock);
+}
+
+u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, u8 *general_bytes_len)
+{
+	struct nfc_llcp_local *local;
+
+	local = nfc_llcp_find_local(dev);
+	if (local == NULL) {
+		*general_bytes_len = 0;
+		return NULL;
+	}
+
+	*general_bytes_len = local->gb_len;
+
+	return local->gb;
+}
+
+static int nfc_llcp_build_gb(struct nfc_llcp_local *local)
+{
+	u8 *gb_cur, *version_tlv, version, version_length;
+	u8 *lto_tlv, lto, lto_length;
+	u8 *wks_tlv, wks_length;
+	u8 gb_len = 0;
+
+	version = LLCP_VERSION_11;
+	version_tlv = nfc_llcp_build_tlv(LLCP_TLV_VERSION, &version,
+							1, &version_length);
+	gb_len += version_length;
+
+	/* 1500 ms */
+	lto = 150;
+	lto_tlv = nfc_llcp_build_tlv(LLCP_TLV_VERSION, &lto, 1, &lto_length);
+	gb_len += lto_length;
+
+	pr_debug("Local wks 0x%lx\n", local->local_wks);
+	wks_tlv = nfc_llcp_build_tlv(LLCP_TLV_WKS, (u8 *)&local->local_wks, 2,
+								&wks_length);
+	gb_len += wks_length;
+
+	gb_len += ARRAY_SIZE(llcp_magic);
+
+	if (gb_len > NFC_MAX_GT_LEN) {
+		kfree(version_tlv);
+		return -EINVAL;
+	}
+
+	gb_cur = local->gb;
+
+	memcpy(gb_cur, llcp_magic, ARRAY_SIZE(llcp_magic));
+	gb_cur += ARRAY_SIZE(llcp_magic);
+
+	memcpy(gb_cur, version_tlv, version_length);
+	gb_cur += version_length;
+
+	memcpy(gb_cur, lto_tlv, lto_length);
+	gb_cur += lto_length;
+
+	memcpy(gb_cur, wks_tlv, wks_length);
+	gb_cur += wks_length;
+
+	kfree(version_tlv);
+	kfree(lto_tlv);
+
+	local->gb_len = gb_len;
+
+	return 0;
+}
+
+int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len)
+{
+	struct nfc_llcp_local *local = nfc_llcp_find_local(dev);
+
+	if (local == NULL) {
+		pr_err("No LLCP device\n");
+		return -ENODEV;
+	}
+
+	memset(local->remote_gb, 0, NFC_MAX_GT_LEN);
+	memcpy(local->remote_gb, gb, gb_len);
+	local->remote_gb_len = gb_len;
+
+	if (local->remote_gb == NULL ||
+			local->remote_gb_len == 0)
+		return -ENODEV;
+
+	if (memcmp(local->remote_gb, llcp_magic, 3)) {
+		pr_err("MAC does not support LLCP\n");
+		return -EINVAL;
+	}
+
+	return nfc_llcp_parse_tlv(local,
+			&local->remote_gb[3], local->remote_gb_len - 3);
+}
+
+static void nfc_llcp_tx_work(struct work_struct *work)
+{
+	struct nfc_llcp_local *local = container_of(work, struct nfc_llcp_local,
+							tx_work);
+	struct sk_buff *skb;
+
+	skb = skb_dequeue(&local->tx_queue);
+	if (skb != NULL) {
+		pr_debug("Sending pending skb\n");
+		nfc_data_exchange(local->dev, local->target_idx,
+					skb, nfc_llcp_recv, local);
+	} else {
+		nfc_llcp_send_symm(local->dev);
+	}
+
+	mod_timer(&local->link_timer,
+			jiffies + msecs_to_jiffies(local->remote_lto));
+}
+
+static u8 nfc_llcp_dsap(struct sk_buff *pdu)
+{
+	return (pdu->data[0] & 0xfc) >> 2;
+}
+
+static u8 nfc_llcp_ptype(struct sk_buff *pdu)
+{
+	return ((pdu->data[0] & 0x03) << 2) | ((pdu->data[1] & 0xc0) >> 6);
+}
+
+static u8 nfc_llcp_ssap(struct sk_buff *pdu)
+{
+	return pdu->data[1] & 0x3f;
+}
+
+static u8 nfc_llcp_ns(struct sk_buff *pdu)
+{
+	return pdu->data[2] >> 4;
+}
+
+static u8 nfc_llcp_nr(struct sk_buff *pdu)
+{
+	return pdu->data[2] & 0xf;
+}
+
+static void nfc_llcp_set_nrns(struct nfc_llcp_sock *sock, struct sk_buff *pdu)
+{
+	pdu->data[2] = (sock->send_n << 4) | ((sock->recv_n - 1) % 16);
+	sock->send_n = (sock->send_n + 1) % 16;
+	sock->recv_ack_n = (sock->recv_n - 1) % 16;
+}
+
+static struct nfc_llcp_sock *nfc_llcp_sock_get(struct nfc_llcp_local *local,
+						u8 ssap, u8 dsap)
+{
+	struct nfc_llcp_sock *sock, *llcp_sock, *n;
+
+	if (ssap == 0 && dsap == 0)
+		return NULL;
+
+	mutex_lock(&local->socket_lock);
+	sock = local->sockets[ssap];
+	if (sock == NULL) {
+		mutex_unlock(&local->socket_lock);
+		return NULL;
+	}
+
+	pr_debug("root dsap %d (%d)\n", sock->dsap, dsap);
+
+	if (sock->dsap == dsap) {
+		sock_hold(&sock->sk);
+		mutex_unlock(&local->socket_lock);
+		return sock;
+	}
+
+	list_for_each_entry_safe(llcp_sock, n, &sock->list, list) {
+		pr_debug("llcp_sock %p sk %p dsap %d\n", llcp_sock,
+				&llcp_sock->sk, llcp_sock->dsap);
+		if (llcp_sock->dsap == dsap) {
+			sock_hold(&llcp_sock->sk);
+			mutex_unlock(&local->socket_lock);
+			return llcp_sock;
+		}
+	}
+
+	pr_err("Could not find socket for %d %d\n", ssap, dsap);
+
+	mutex_unlock(&local->socket_lock);
+
+	return NULL;
+}
+
+static void nfc_llcp_sock_put(struct nfc_llcp_sock *sock)
+{
+	sock_put(&sock->sk);
+}
+
+static u8 *nfc_llcp_connect_sn(struct sk_buff *skb, size_t *sn_len)
+{
+	u8 *tlv = &skb->data[2], type, length;
+	size_t tlv_array_len = skb->len - LLCP_HEADER_SIZE, offset = 0;
+
+	while (offset < tlv_array_len) {
+		type = tlv[0];
+		length = tlv[1];
+
+		pr_debug("type 0x%x length %d\n", type, length);
+
+		if (type == LLCP_TLV_SN) {
+			*sn_len = length;
+			return &tlv[2];
+		}
+
+		offset += length + 2;
+		tlv += length + 2;
+	}
+
+	return NULL;
+}
+
+static void nfc_llcp_recv_connect(struct nfc_llcp_local *local,
+				struct sk_buff *skb)
+{
+	struct sock *new_sk, *parent;
+	struct nfc_llcp_sock *sock, *new_sock;
+	u8 dsap, ssap, bound_sap, reason;
+
+	dsap = nfc_llcp_dsap(skb);
+	ssap = nfc_llcp_ssap(skb);
+
+	pr_debug("%d %d\n", dsap, ssap);
+
+	nfc_llcp_parse_tlv(local, &skb->data[LLCP_HEADER_SIZE],
+				skb->len - LLCP_HEADER_SIZE);
+
+	if (dsap != LLCP_SAP_SDP) {
+		bound_sap = dsap;
+
+		mutex_lock(&local->socket_lock);
+		sock = local->sockets[dsap];
+		if (sock == NULL) {
+			mutex_unlock(&local->socket_lock);
+			reason = LLCP_DM_NOBOUND;
+			goto fail;
+		}
+
+		sock_hold(&sock->sk);
+		mutex_unlock(&local->socket_lock);
+
+		lock_sock(&sock->sk);
+
+		if (sock->dsap == LLCP_SAP_SDP &&
+				sock->sk.sk_state == LLCP_LISTEN)
+			goto enqueue;
+	} else {
+		u8 *sn;
+		size_t sn_len;
+
+		sn = nfc_llcp_connect_sn(skb, &sn_len);
+		if (sn == NULL) {
+			reason = LLCP_DM_NOBOUND;
+			goto fail;
+		}
+
+		pr_debug("Service name length %zu\n", sn_len);
+
+		mutex_lock(&local->socket_lock);
+		for (bound_sap = 0; bound_sap < LLCP_LOCAL_SAP_OFFSET;
+								bound_sap++) {
+			sock = local->sockets[bound_sap];
+			if (sock == NULL)
+				continue;
+
+			if (sock->service_name == NULL ||
+				sock->service_name_len == 0)
+					continue;
+
+			if (sock->service_name_len != sn_len)
+				continue;
+
+			if (sock->dsap == LLCP_SAP_SDP &&
+					sock->sk.sk_state == LLCP_LISTEN &&
+					!memcmp(sn, sock->service_name, sn_len)) {
+				pr_debug("Found service name at SAP %d\n",
+								bound_sap);
+				sock_hold(&sock->sk);
+				mutex_unlock(&local->socket_lock);
+
+				lock_sock(&sock->sk);
+
+				goto enqueue;
+			}
+		}
+
+	}
+
+	mutex_unlock(&local->socket_lock);
+
+	reason = LLCP_DM_NOBOUND;
+	goto fail;
+
+enqueue:
+	parent = &sock->sk;
+
+	if (sk_acceptq_is_full(parent)) {
+		reason = LLCP_DM_REJ;
+		release_sock(&sock->sk);
+		sock_put(&sock->sk);
+		goto fail;
+	}
+
+	new_sk = nfc_llcp_sock_alloc(NULL, parent->sk_type,
+				     GFP_ATOMIC);
+	if (new_sk == NULL) {
+		reason = LLCP_DM_REJ;
+		release_sock(&sock->sk);
+		sock_put(&sock->sk);
+		goto fail;
+	}
+
+	new_sock = nfc_llcp_sock(new_sk);
+	new_sock->dev = local->dev;
+	new_sock->local = local;
+	new_sock->nfc_protocol = sock->nfc_protocol;
+	new_sock->ssap = bound_sap;
+	new_sock->dsap = ssap;
+	new_sock->parent = parent;
+
+	pr_debug("new sock %p sk %p\n", new_sock, &new_sock->sk);
+
+	list_add_tail(&new_sock->list, &sock->list);
+
+	nfc_llcp_accept_enqueue(&sock->sk, new_sk);
+
+	nfc_get_device(local->dev->idx);
+
+	new_sk->sk_state = LLCP_CONNECTED;
+
+	/* Wake the listening processes */
+	parent->sk_data_ready(parent, 0);
+
+	/* Send CC */
+	nfc_llcp_send_cc(new_sock);
+
+	release_sock(&sock->sk);
+	sock_put(&sock->sk);
+
+	return;
+
+fail:
+	/* Send DM */
+	nfc_llcp_send_dm(local, dsap, ssap, reason);
+
+	return;
+
+}
+
+static void nfc_llcp_recv_hdlc(struct nfc_llcp_local *local,
+				struct sk_buff *skb)
+{
+	struct nfc_llcp_sock *llcp_sock;
+	struct sock *sk;
+	u8 dsap, ssap, ptype, ns, nr;
+
+	ptype = nfc_llcp_ptype(skb);
+	dsap = nfc_llcp_dsap(skb);
+	ssap = nfc_llcp_ssap(skb);
+	ns = nfc_llcp_ns(skb);
+	nr = nfc_llcp_nr(skb);
+
+	pr_debug("%d %d R %d S %d\n", dsap, ssap, nr, ns);
+
+	llcp_sock = nfc_llcp_sock_get(local, dsap, ssap);
+	if (llcp_sock == NULL) {
+		nfc_llcp_send_dm(local, dsap, ssap, LLCP_DM_NOCONN);
+		return;
+	}
+
+	sk = &llcp_sock->sk;
+	lock_sock(sk);
+	if (sk->sk_state == LLCP_CLOSED) {
+		release_sock(sk);
+		nfc_llcp_sock_put(llcp_sock);
+	}
+
+	if (ns == llcp_sock->recv_n)
+		llcp_sock->recv_n = (llcp_sock->recv_n + 1) % 16;
+	else
+		pr_err("Received out of sequence I PDU\n");
+
+	/* Pass the payload upstream */
+	if (ptype == LLCP_PDU_I) {
+		pr_debug("I frame, queueing on %p\n", &llcp_sock->sk);
+
+		skb_pull(skb, LLCP_HEADER_SIZE + LLCP_SEQUENCE_SIZE);
+		if (sock_queue_rcv_skb(&llcp_sock->sk, skb)) {
+			pr_err("receive queue is full\n");
+			skb_queue_head(&llcp_sock->tx_backlog_queue, skb);
+		}
+	}
+
+	/* Remove skbs from the pending queue */
+	if (llcp_sock->send_ack_n != nr) {
+		struct sk_buff *s, *tmp;
+
+		llcp_sock->send_ack_n = nr;
+
+		skb_queue_walk_safe(&llcp_sock->tx_pending_queue, s, tmp)
+			if (nfc_llcp_ns(s) <= nr) {
+				skb_unlink(s, &llcp_sock->tx_pending_queue);
+				kfree_skb(s);
+			}
+	}
+
+	/* Queue some I frames for transmission */
+	while (llcp_sock->remote_ready &&
+		skb_queue_len(&llcp_sock->tx_pending_queue) <= local->remote_rw) {
+		struct sk_buff *pdu, *pending_pdu;
+
+		pdu = skb_dequeue(&llcp_sock->tx_queue);
+		if (pdu == NULL)
+			break;
+
+		/* Update N(S)/N(R) */
+		nfc_llcp_set_nrns(llcp_sock, pdu);
+
+		pending_pdu = skb_clone(pdu, GFP_KERNEL);
+
+		skb_queue_tail(&local->tx_queue, pdu);
+		skb_queue_tail(&llcp_sock->tx_pending_queue, pending_pdu);
+	}
+
+	release_sock(sk);
+	nfc_llcp_sock_put(llcp_sock);
+}
+
+static void nfc_llcp_recv_disc(struct nfc_llcp_local *local,
+				struct sk_buff *skb)
+{
+	struct nfc_llcp_sock *llcp_sock;
+	struct sock *sk;
+	u8 dsap, ssap;
+
+	dsap = nfc_llcp_dsap(skb);
+	ssap = nfc_llcp_ssap(skb);
+
+	llcp_sock = nfc_llcp_sock_get(local, dsap, ssap);
+	if (llcp_sock == NULL) {
+		nfc_llcp_send_dm(local, dsap, ssap, LLCP_DM_NOCONN);
+		return;
+	}
+
+	sk = &llcp_sock->sk;
+	lock_sock(sk);
+	if (sk->sk_state == LLCP_CLOSED) {
+		release_sock(sk);
+		nfc_llcp_sock_put(llcp_sock);
+	}
+
+
+	if (sk->sk_state == LLCP_CONNECTED) {
+		nfc_put_device(local->dev);
+		sk->sk_state = LLCP_CLOSED;
+		sk->sk_state_change(sk);
+	}
+
+	nfc_llcp_send_dm(local, dsap, ssap, LLCP_DM_DISC);
+
+	release_sock(sk);
+	nfc_llcp_sock_put(llcp_sock);
+}
+
+static void nfc_llcp_recv_cc(struct nfc_llcp_local *local,
+				struct sk_buff *skb)
+{
+	struct nfc_llcp_sock *llcp_sock;
+	u8 dsap, ssap;
+
+
+	dsap = nfc_llcp_dsap(skb);
+	ssap = nfc_llcp_ssap(skb);
+
+	llcp_sock = nfc_llcp_sock_get(local, dsap, ssap);
+
+	if (llcp_sock == NULL)
+		llcp_sock = nfc_llcp_sock_get(local, dsap, LLCP_SAP_SDP);
+
+	if (llcp_sock == NULL) {
+		pr_err("Invalid CC\n");
+		nfc_llcp_send_dm(local, dsap, ssap, LLCP_DM_NOCONN);
+
+		return;
+	}
+
+	llcp_sock->dsap = ssap;
+
+	nfc_llcp_parse_tlv(local, &skb->data[LLCP_HEADER_SIZE],
+				skb->len - LLCP_HEADER_SIZE);
+
+	nfc_llcp_sock_put(llcp_sock);
+}
+
+static void nfc_llcp_rx_work(struct work_struct *work)
+{
+	struct nfc_llcp_local *local = container_of(work, struct nfc_llcp_local,
+								rx_work);
+	u8 dsap, ssap, ptype;
+	struct sk_buff *skb;
+
+	skb = local->rx_pending;
+	if (skb == NULL) {
+		pr_debug("No pending SKB\n");
+		return;
+	}
+
+	ptype = nfc_llcp_ptype(skb);
+	dsap = nfc_llcp_dsap(skb);
+	ssap = nfc_llcp_ssap(skb);
+
+	pr_debug("ptype 0x%x dsap 0x%x ssap 0x%x\n", ptype, dsap, ssap);
+
+	switch (ptype) {
+	case LLCP_PDU_SYMM:
+		pr_debug("SYMM\n");
+		break;
+
+	case LLCP_PDU_CONNECT:
+		pr_debug("CONNECT\n");
+		nfc_llcp_recv_connect(local, skb);
+		break;
+
+	case LLCP_PDU_DISC:
+		pr_debug("DISC\n");
+		nfc_llcp_recv_disc(local, skb);
+		break;
+
+	case LLCP_PDU_CC:
+		pr_debug("CC\n");
+		nfc_llcp_recv_cc(local, skb);
+		break;
+
+	case LLCP_PDU_I:
+	case LLCP_PDU_RR:
+		pr_debug("I frame\n");
+		nfc_llcp_recv_hdlc(local, skb);
+		break;
+
+	}
+
+	queue_work(local->tx_wq, &local->tx_work);
+	kfree_skb(local->rx_pending);
+	local->rx_pending = NULL;
+
+	return;
+}
+
+void nfc_llcp_recv(void *data, struct sk_buff *skb, int err)
+{
+	struct nfc_llcp_local *local = (struct nfc_llcp_local *) data;
+
+	pr_debug("Received an LLCP PDU\n");
+	if (err < 0) {
+		pr_err("err %d", err);
+		return;
+	}
+
+	local->rx_pending = skb_get(skb);
+	del_timer(&local->link_timer);
+	queue_work(local->rx_wq, &local->rx_work);
+
+	return;
+}
+
+void nfc_llcp_mac_is_down(struct nfc_dev *dev)
+{
+	struct nfc_llcp_local *local;
+
+	local = nfc_llcp_find_local(dev);
+	if (local == NULL)
+		return;
+
+	/* Close and purge all existing sockets */
+	nfc_llcp_socket_release(local);
+}
+
+void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx,
+			u8 comm_mode, u8 rf_mode)
+{
+	struct nfc_llcp_local *local;
+
+	pr_debug("rf mode %d\n", rf_mode);
+
+	local = nfc_llcp_find_local(dev);
+	if (local == NULL)
+		return;
+
+	local->target_idx = target_idx;
+	local->comm_mode = comm_mode;
+	local->rf_mode = rf_mode;
+
+	if (rf_mode == NFC_RF_INITIATOR) {
+		pr_debug("Queueing Tx work\n");
+
+		queue_work(local->tx_wq, &local->tx_work);
+	} else {
+		mod_timer(&local->link_timer,
+			jiffies + msecs_to_jiffies(local->remote_lto));
+	}
+}
+
+int nfc_llcp_register_device(struct nfc_dev *ndev)
+{
+	struct device *dev = &ndev->dev;
+	struct nfc_llcp_local *local;
+	char name[32];
+	int err;
+
+	local = kzalloc(sizeof(struct nfc_llcp_local), GFP_KERNEL);
+	if (local == NULL)
+		return -ENOMEM;
+
+	local->dev = ndev;
+	INIT_LIST_HEAD(&local->list);
+	mutex_init(&local->sdp_lock);
+	mutex_init(&local->socket_lock);
+	init_timer(&local->link_timer);
+	local->link_timer.data = (unsigned long) local;
+	local->link_timer.function = nfc_llcp_symm_timer;
+
+	skb_queue_head_init(&local->tx_queue);
+	INIT_WORK(&local->tx_work, nfc_llcp_tx_work);
+	snprintf(name, sizeof(name), "%s_llcp_tx_wq", dev_name(dev));
+	local->tx_wq = alloc_workqueue(name,
+			WQ_NON_REENTRANT | WQ_UNBOUND | WQ_MEM_RECLAIM, 1);
+	if (local->tx_wq == NULL) {
+		err = -ENOMEM;
+		goto err_local;
+	}
+
+	local->rx_pending = NULL;
+	INIT_WORK(&local->rx_work, nfc_llcp_rx_work);
+	snprintf(name, sizeof(name), "%s_llcp_rx_wq", dev_name(dev));
+	local->rx_wq = alloc_workqueue(name,
+			WQ_NON_REENTRANT | WQ_UNBOUND | WQ_MEM_RECLAIM, 1);
+	if (local->rx_wq == NULL) {
+		err = -ENOMEM;
+		goto err_tx_wq;
+	}
+
+	INIT_WORK(&local->timeout_work, nfc_llcp_timeout_work);
+	snprintf(name, sizeof(name), "%s_llcp_timeout_wq", dev_name(dev));
+	local->timeout_wq = alloc_workqueue(name,
+			WQ_NON_REENTRANT | WQ_UNBOUND | WQ_MEM_RECLAIM, 1);
+	if (local->timeout_wq == NULL) {
+		err = -ENOMEM;
+		goto err_rx_wq;
+	}
+
+	nfc_llcp_build_gb(local);
+
+	local->remote_miu = LLCP_DEFAULT_MIU;
+	local->remote_lto = LLCP_DEFAULT_LTO;
+	local->remote_rw = LLCP_DEFAULT_RW;
+
+	list_add(&llcp_devices, &local->list);
+
+	return 0;
+
+err_rx_wq:
+	destroy_workqueue(local->rx_wq);
+
+err_tx_wq:
+	destroy_workqueue(local->tx_wq);
+
+err_local:
+	kfree(local);
+
+	return 0;
+}
+
+void nfc_llcp_unregister_device(struct nfc_dev *dev)
+{
+	struct nfc_llcp_local *local = nfc_llcp_find_local(dev);
+
+	if (local == NULL) {
+		pr_debug("No such device\n");
+		return;
+	}
+
+	list_del(&local->list);
+	nfc_llcp_socket_release(local);
+	del_timer_sync(&local->link_timer);
+	skb_queue_purge(&local->tx_queue);
+	destroy_workqueue(local->tx_wq);
+	destroy_workqueue(local->rx_wq);
+	kfree(local->rx_pending);
+	kfree(local);
+}
+
+int __init nfc_llcp_init(void)
+{
+	INIT_LIST_HEAD(&llcp_devices);
+
+	return nfc_llcp_sock_init();
+}
+
+void nfc_llcp_exit(void)
+{
+	nfc_llcp_sock_exit();
+}
diff --git a/net/nfc/llcp/llcp.h b/net/nfc/llcp/llcp.h
new file mode 100644
index 000000000000..0ad2e3361584
--- /dev/null
+++ b/net/nfc/llcp/llcp.h
@@ -0,0 +1,193 @@
+/*
+ * Copyright (C) 2011  Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+enum llcp_state {
+	LLCP_CONNECTED = 1, /* wait_for_packet() wants that */
+	LLCP_CLOSED,
+	LLCP_BOUND,
+	LLCP_LISTEN,
+};
+
+#define LLCP_DEFAULT_LTO 100
+#define LLCP_DEFAULT_RW  1
+#define LLCP_DEFAULT_MIU 128
+
+#define LLCP_WKS_NUM_SAP   16
+#define LLCP_SDP_NUM_SAP   16
+#define LLCP_LOCAL_NUM_SAP 32
+#define LLCP_LOCAL_SAP_OFFSET (LLCP_WKS_NUM_SAP + LLCP_SDP_NUM_SAP)
+#define LLCP_MAX_SAP (LLCP_WKS_NUM_SAP + LLCP_SDP_NUM_SAP + LLCP_LOCAL_NUM_SAP)
+
+struct nfc_llcp_sock;
+
+struct nfc_llcp_local {
+	struct list_head list;
+	struct nfc_dev *dev;
+
+	struct mutex sdp_lock;
+	struct mutex socket_lock;
+
+	struct timer_list link_timer;
+	struct sk_buff_head tx_queue;
+	struct workqueue_struct	*tx_wq;
+	struct work_struct	 tx_work;
+	struct workqueue_struct	*rx_wq;
+	struct work_struct	 rx_work;
+	struct sk_buff *rx_pending;
+	struct workqueue_struct	*timeout_wq;
+	struct work_struct	 timeout_work;
+
+	u32 target_idx;
+	u8 rf_mode;
+	u8 comm_mode;
+	unsigned long local_wks;      /* Well known services */
+	unsigned long local_sdp;      /* Local services  */
+	unsigned long local_sap; /* Local SAPs, not available for discovery */
+
+	/* local */
+	u8 gb[NFC_MAX_GT_LEN];
+	u8 gb_len;
+
+	/* remote */
+	u8 remote_gb[NFC_MAX_GT_LEN];
+	u8 remote_gb_len;
+
+	u8  remote_version;
+	u16 remote_miu;
+	u16 remote_lto;
+	u8  remote_opt;
+	u16 remote_wks;
+	u8  remote_rw;
+
+	/* sockets array */
+	struct nfc_llcp_sock *sockets[LLCP_MAX_SAP];
+};
+
+struct nfc_llcp_sock {
+	struct sock sk;
+	struct list_head list;
+	struct nfc_dev *dev;
+	struct nfc_llcp_local *local;
+	u32 target_idx;
+	u32 nfc_protocol;
+
+	u8 ssap;
+	u8 dsap;
+	char *service_name;
+	size_t service_name_len;
+
+	/* Link variables */
+	u8 send_n;
+	u8 send_ack_n;
+	u8 recv_n;
+	u8 recv_ack_n;
+
+	/* Is the remote peer ready to receive */
+	u8 remote_ready;
+
+	struct sk_buff_head tx_queue;
+	struct sk_buff_head tx_pending_queue;
+	struct sk_buff_head tx_backlog_queue;
+
+	struct list_head accept_queue;
+	struct sock *parent;
+};
+
+#define nfc_llcp_sock(sk) ((struct nfc_llcp_sock *) (sk))
+#define nfc_llcp_dev(sk)  (nfc_llcp_sock((sk))->dev)
+
+#define LLCP_HEADER_SIZE   2
+#define LLCP_SEQUENCE_SIZE 1
+
+/* LLCP versions: 1.1 is 1.0 plus SDP */
+#define LLCP_VERSION_10 0x10
+#define LLCP_VERSION_11 0x11
+
+/* LLCP PDU types */
+#define LLCP_PDU_SYMM     0x0
+#define LLCP_PDU_PAX      0x1
+#define LLCP_PDU_AGF      0x2
+#define LLCP_PDU_UI       0x3
+#define LLCP_PDU_CONNECT  0x4
+#define LLCP_PDU_DISC     0x5
+#define LLCP_PDU_CC       0x6
+#define LLCP_PDU_DM       0x7
+#define LLCP_PDU_FRMR     0x8
+#define LLCP_PDU_SNL      0x9
+#define LLCP_PDU_I        0xc
+#define LLCP_PDU_RR       0xd
+#define LLCP_PDU_RNR      0xe
+
+/* Parameters TLV types */
+#define LLCP_TLV_VERSION 0x1
+#define LLCP_TLV_MIUX    0x2
+#define LLCP_TLV_WKS     0x3
+#define LLCP_TLV_LTO     0x4
+#define LLCP_TLV_RW      0x5
+#define LLCP_TLV_SN      0x6
+#define LLCP_TLV_OPT     0x7
+#define LLCP_TLV_SDREQ   0x8
+#define LLCP_TLV_SDRES   0x9
+#define LLCP_TLV_MAX     0xa
+
+/* Well known LLCP SAP */
+#define LLCP_SAP_SDP   0x1
+#define LLCP_SAP_IP    0x2
+#define LLCP_SAP_OBEX  0x3
+#define LLCP_SAP_SNEP  0x4
+#define LLCP_SAP_MAX   0xff
+
+/* Disconnection reason code */
+#define LLCP_DM_DISC    0x00
+#define LLCP_DM_NOCONN  0x01
+#define LLCP_DM_NOBOUND 0x02
+#define LLCP_DM_REJ     0x03
+
+
+struct nfc_llcp_local *nfc_llcp_find_local(struct nfc_dev *dev);
+u8 nfc_llcp_get_sdp_ssap(struct nfc_llcp_local *local,
+				struct nfc_llcp_sock *sock);
+u8 nfc_llcp_get_local_ssap(struct nfc_llcp_local *local);
+void nfc_llcp_put_ssap(struct nfc_llcp_local *local, u8 ssap);
+
+/* Sock API */
+struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp);
+void nfc_llcp_sock_free(struct nfc_llcp_sock *sock);
+void nfc_llcp_accept_unlink(struct sock *sk);
+void nfc_llcp_accept_enqueue(struct sock *parent, struct sock *sk);
+struct sock *nfc_llcp_accept_dequeue(struct sock *sk, struct socket *newsock);
+
+/* TLV API */
+int nfc_llcp_parse_tlv(struct nfc_llcp_local *local,
+			u8 *tlv_array, u16 tlv_array_len);
+
+/* Commands API */
+void nfc_llcp_recv(void *data, struct sk_buff *skb, int err);
+u8 *nfc_llcp_build_tlv(u8 type, u8 *value, u8 value_length, u8 *tlv_length);
+void nfc_llcp_recv(void *data, struct sk_buff *skb, int err);
+int nfc_llcp_disconnect(struct nfc_llcp_sock *sock);
+int nfc_llcp_send_symm(struct nfc_dev *dev);
+int nfc_llcp_send_connect(struct nfc_llcp_sock *sock);
+int nfc_llcp_send_cc(struct nfc_llcp_sock *sock);
+int nfc_llcp_send_dm(struct nfc_llcp_local *local, u8 ssap, u8 dsap, u8 reason);
+int nfc_llcp_send_disconnect(struct nfc_llcp_sock *sock);
+
+/* Socket API */
+int __init nfc_llcp_sock_init(void);
+void nfc_llcp_sock_exit(void);
diff --git a/net/nfc/llcp/sock.c b/net/nfc/llcp/sock.c
new file mode 100644
index 000000000000..f738ccd535f1
--- /dev/null
+++ b/net/nfc/llcp/sock.c
@@ -0,0 +1,675 @@
+/*
+ * Copyright (C) 2011  Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the
+ * Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#define pr_fmt(fmt) "llcp: %s: " fmt, __func__
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/nfc.h>
+
+#include "../nfc.h"
+#include "llcp.h"
+
+static struct proto llcp_sock_proto = {
+	.name     = "NFC_LLCP",
+	.owner    = THIS_MODULE,
+	.obj_size = sizeof(struct nfc_llcp_sock),
+};
+
+static int llcp_sock_bind(struct socket *sock, struct sockaddr *addr, int alen)
+{
+	struct sock *sk = sock->sk;
+	struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
+	struct nfc_llcp_local *local;
+	struct nfc_dev *dev;
+	struct sockaddr_nfc_llcp llcp_addr;
+	int len, ret = 0;
+
+	pr_debug("sk %p addr %p family %d\n", sk, addr, addr->sa_family);
+
+	if (!addr || addr->sa_family != AF_NFC)
+		return -EINVAL;
+
+	memset(&llcp_addr, 0, sizeof(llcp_addr));
+	len = min_t(unsigned int, sizeof(llcp_addr), alen);
+	memcpy(&llcp_addr, addr, len);
+
+	/* This is going to be a listening socket, dsap must be 0 */
+	if (llcp_addr.dsap != 0)
+		return -EINVAL;
+
+	lock_sock(sk);
+
+	if (sk->sk_state != LLCP_CLOSED) {
+		ret = -EBADFD;
+		goto error;
+	}
+
+	dev = nfc_get_device(llcp_addr.dev_idx);
+	if (dev == NULL) {
+		ret = -ENODEV;
+		goto error;
+	}
+
+	local = nfc_llcp_find_local(dev);
+	if (local == NULL) {
+		ret = -ENODEV;
+		goto put_dev;
+	}
+
+	llcp_sock->dev = dev;
+	llcp_sock->local = local;
+	llcp_sock->nfc_protocol = llcp_addr.nfc_protocol;
+	llcp_sock->service_name_len = min_t(unsigned int,
+			llcp_addr.service_name_len, NFC_LLCP_MAX_SERVICE_NAME);
+	llcp_sock->service_name = kmemdup(llcp_addr.service_name,
+				llcp_sock->service_name_len, GFP_KERNEL);
+
+	llcp_sock->ssap = nfc_llcp_get_sdp_ssap(local, llcp_sock);
+	if (llcp_sock->ssap == LLCP_MAX_SAP)
+		goto put_dev;
+
+	local->sockets[llcp_sock->ssap] = llcp_sock;
+
+	pr_debug("Socket bound to SAP %d\n", llcp_sock->ssap);
+
+	sk->sk_state = LLCP_BOUND;
+
+put_dev:
+	nfc_put_device(dev);
+
+error:
+	release_sock(sk);
+	return ret;
+}
+
+static int llcp_sock_listen(struct socket *sock, int backlog)
+{
+	struct sock *sk = sock->sk;
+	int ret = 0;
+
+	pr_debug("sk %p backlog %d\n", sk, backlog);
+
+	lock_sock(sk);
+
+	if ((sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM)
+			|| sk->sk_state != LLCP_BOUND) {
+		ret = -EBADFD;
+		goto error;
+	}
+
+	sk->sk_max_ack_backlog = backlog;
+	sk->sk_ack_backlog = 0;
+
+	pr_debug("Socket listening\n");
+	sk->sk_state = LLCP_LISTEN;
+
+error:
+	release_sock(sk);
+
+	return ret;
+}
+
+void nfc_llcp_accept_unlink(struct sock *sk)
+{
+	struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
+
+	pr_debug("state %d\n", sk->sk_state);
+
+	list_del_init(&llcp_sock->accept_queue);
+	sk_acceptq_removed(llcp_sock->parent);
+	llcp_sock->parent = NULL;
+
+	sock_put(sk);
+}
+
+void nfc_llcp_accept_enqueue(struct sock *parent, struct sock *sk)
+{
+	struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
+	struct nfc_llcp_sock *llcp_sock_parent = nfc_llcp_sock(parent);
+
+	/* Lock will be free from unlink */
+	sock_hold(sk);
+
+	list_add_tail(&llcp_sock->accept_queue,
+			&llcp_sock_parent->accept_queue);
+	llcp_sock->parent = parent;
+	sk_acceptq_added(parent);
+}
+
+struct sock *nfc_llcp_accept_dequeue(struct sock *parent,
+					struct socket *newsock)
+{
+	struct nfc_llcp_sock *lsk, *n, *llcp_parent;
+	struct sock *sk;
+
+	llcp_parent = nfc_llcp_sock(parent);
+
+	list_for_each_entry_safe(lsk, n, &llcp_parent->accept_queue,
+							accept_queue) {
+		sk = &lsk->sk;
+		lock_sock(sk);
+
+		if (sk->sk_state == LLCP_CLOSED) {
+			release_sock(sk);
+			nfc_llcp_accept_unlink(sk);
+			continue;
+		}
+
+		if (sk->sk_state == LLCP_CONNECTED || !newsock) {
+			nfc_llcp_accept_unlink(sk);
+			if (newsock)
+				sock_graft(sk, newsock);
+
+			release_sock(sk);
+
+			pr_debug("Returning sk state %d\n", sk->sk_state);
+
+			return sk;
+		}
+
+		release_sock(sk);
+	}
+
+	return NULL;
+}
+
+static int llcp_sock_accept(struct socket *sock, struct socket *newsock,
+								int flags)
+{
+	DECLARE_WAITQUEUE(wait, current);
+	struct sock *sk = sock->sk, *new_sk;
+	long timeo;
+	int ret = 0;
+
+	pr_debug("parent %p\n", sk);
+
+	lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
+
+	if (sk->sk_state != LLCP_LISTEN) {
+		ret = -EBADFD;
+		goto error;
+	}
+
+	timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK);
+
+	/* Wait for an incoming connection. */
+	add_wait_queue_exclusive(sk_sleep(sk), &wait);
+	while (!(new_sk = nfc_llcp_accept_dequeue(sk, newsock))) {
+		set_current_state(TASK_INTERRUPTIBLE);
+
+		if (!timeo) {
+			ret = -EAGAIN;
+			break;
+		}
+
+		if (signal_pending(current)) {
+			ret = sock_intr_errno(timeo);
+			break;
+		}
+
+		release_sock(sk);
+		timeo = schedule_timeout(timeo);
+		lock_sock_nested(sk, SINGLE_DEPTH_NESTING);
+	}
+	__set_current_state(TASK_RUNNING);
+	remove_wait_queue(sk_sleep(sk), &wait);
+
+	if (ret)
+		goto error;
+
+	newsock->state = SS_CONNECTED;
+
+	pr_debug("new socket %p\n", new_sk);
+
+error:
+	release_sock(sk);
+
+	return ret;
+}
+
+static int llcp_sock_getname(struct socket *sock, struct sockaddr *addr,
+			     int *len, int peer)
+{
+	struct sockaddr_nfc_llcp *llcp_addr = (struct sockaddr_nfc_llcp *) addr;
+	struct sock *sk = sock->sk;
+	struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
+
+	pr_debug("%p\n", sk);
+
+	addr->sa_family = AF_NFC;
+	*len = sizeof(struct sockaddr_nfc_llcp);
+
+	llcp_addr->dev_idx = llcp_sock->dev->idx;
+	llcp_addr->dsap = llcp_sock->dsap;
+	llcp_addr->ssap = llcp_sock->ssap;
+	llcp_addr->service_name_len = llcp_sock->service_name_len;
+	memcpy(llcp_addr->service_name, llcp_sock->service_name,
+					llcp_addr->service_name_len);
+
+	return 0;
+}
+
+static inline unsigned int llcp_accept_poll(struct sock *parent)
+{
+	struct nfc_llcp_sock *llcp_sock, *n, *parent_sock;
+	struct sock *sk;
+
+	parent_sock = nfc_llcp_sock(parent);
+
+	list_for_each_entry_safe(llcp_sock, n, &parent_sock->accept_queue,
+								accept_queue) {
+		sk = &llcp_sock->sk;
+
+		if (sk->sk_state == LLCP_CONNECTED)
+			return POLLIN | POLLRDNORM;
+	}
+
+	return 0;
+}
+
+static unsigned int llcp_sock_poll(struct file *file, struct socket *sock,
+							poll_table *wait)
+{
+	struct sock *sk = sock->sk;
+	unsigned int mask = 0;
+
+	pr_debug("%p\n", sk);
+
+	sock_poll_wait(file, sk_sleep(sk), wait);
+
+	if (sk->sk_state == LLCP_LISTEN)
+		return llcp_accept_poll(sk);
+
+	if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
+		mask |= POLLERR;
+
+	if (!skb_queue_empty(&sk->sk_receive_queue))
+		mask |= POLLIN;
+
+	if (sk->sk_state == LLCP_CLOSED)
+		mask |= POLLHUP;
+
+	return mask;
+}
+
+static int llcp_sock_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	struct nfc_llcp_local *local;
+	struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
+
+	if (!sk)
+		return 0;
+
+	pr_debug("%p\n", sk);
+
+	local = llcp_sock->local;
+	if (local == NULL)
+		return -ENODEV;
+
+	mutex_lock(&local->socket_lock);
+
+	if (llcp_sock == local->sockets[llcp_sock->ssap]) {
+		local->sockets[llcp_sock->ssap] = NULL;
+	} else {
+		struct nfc_llcp_sock *parent, *s, *n;
+
+		parent = local->sockets[llcp_sock->ssap];
+
+		list_for_each_entry_safe(s, n, &parent->list, list)
+			if (llcp_sock == s) {
+				list_del(&s->list);
+				break;
+			}
+
+	}
+
+	mutex_unlock(&local->socket_lock);
+
+	lock_sock(sk);
+
+	/* Send a DISC */
+	if (sk->sk_state == LLCP_CONNECTED)
+		nfc_llcp_disconnect(llcp_sock);
+
+	if (sk->sk_state == LLCP_LISTEN) {
+		struct nfc_llcp_sock *lsk, *n;
+		struct sock *accept_sk;
+
+		list_for_each_entry_safe(lsk, n, &llcp_sock->accept_queue,
+								accept_queue) {
+			accept_sk = &lsk->sk;
+			lock_sock(accept_sk);
+
+			nfc_llcp_disconnect(lsk);
+			nfc_llcp_accept_unlink(accept_sk);
+
+			release_sock(accept_sk);
+
+			sock_set_flag(sk, SOCK_DEAD);
+			sock_orphan(accept_sk);
+			sock_put(accept_sk);
+		}
+	}
+
+	/* Freeing the SAP */
+	if ((sk->sk_state == LLCP_CONNECTED
+			&& llcp_sock->ssap > LLCP_LOCAL_SAP_OFFSET) ||
+	    sk->sk_state == LLCP_BOUND ||
+	    sk->sk_state == LLCP_LISTEN)
+		nfc_llcp_put_ssap(llcp_sock->local, llcp_sock->ssap);
+
+	sock_set_flag(sk, SOCK_DEAD);
+
+	release_sock(sk);
+
+	sock_orphan(sk);
+	sock_put(sk);
+
+	return 0;
+}
+
+static int llcp_sock_connect(struct socket *sock, struct sockaddr *_addr,
+							int len, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
+	struct sockaddr_nfc_llcp *addr = (struct sockaddr_nfc_llcp *)_addr;
+	struct nfc_dev *dev;
+	struct nfc_llcp_local *local;
+	int ret = 0;
+
+	pr_debug("sock %p sk %p flags 0x%x\n", sock, sk, flags);
+
+	if (!addr || len < sizeof(struct sockaddr_nfc) ||
+			addr->sa_family != AF_NFC) {
+		pr_err("Invalid socket\n");
+		return -EINVAL;
+	}
+
+	if (addr->service_name_len == 0 && addr->dsap == 0) {
+		pr_err("Missing service name or dsap\n");
+		return -EINVAL;
+	}
+
+	pr_debug("addr dev_idx=%u target_idx=%u protocol=%u\n", addr->dev_idx,
+					addr->target_idx, addr->nfc_protocol);
+
+	lock_sock(sk);
+
+	if (sk->sk_state == LLCP_CONNECTED) {
+		ret = -EISCONN;
+		goto error;
+	}
+
+	dev = nfc_get_device(addr->dev_idx);
+	if (dev == NULL) {
+		ret = -ENODEV;
+		goto error;
+	}
+
+	local = nfc_llcp_find_local(dev);
+	if (local == NULL) {
+		ret = -ENODEV;
+		goto put_dev;
+	}
+
+	device_lock(&dev->dev);
+	if (dev->dep_link_up == false) {
+		ret = -ENOLINK;
+		device_unlock(&dev->dev);
+		goto put_dev;
+	}
+	device_unlock(&dev->dev);
+
+	if (local->rf_mode == NFC_RF_INITIATOR &&
+			addr->target_idx != local->target_idx) {
+		ret = -ENOLINK;
+		goto put_dev;
+	}
+
+	llcp_sock->dev = dev;
+	llcp_sock->local = local;
+	llcp_sock->ssap = nfc_llcp_get_local_ssap(local);
+	if (llcp_sock->ssap == LLCP_SAP_MAX) {
+		ret = -ENOMEM;
+		goto put_dev;
+	}
+	if (addr->service_name_len == 0)
+		llcp_sock->dsap = addr->dsap;
+	else
+		llcp_sock->dsap = LLCP_SAP_SDP;
+	llcp_sock->nfc_protocol = addr->nfc_protocol;
+	llcp_sock->service_name_len = min_t(unsigned int,
+			addr->service_name_len, NFC_LLCP_MAX_SERVICE_NAME);
+	llcp_sock->service_name = kmemdup(addr->service_name,
+				 llcp_sock->service_name_len, GFP_KERNEL);
+
+	local->sockets[llcp_sock->ssap] = llcp_sock;
+
+	ret = nfc_llcp_send_connect(llcp_sock);
+	if (ret)
+		goto put_dev;
+
+	sk->sk_state = LLCP_CONNECTED;
+
+	release_sock(sk);
+	return 0;
+
+put_dev:
+	nfc_put_device(dev);
+
+error:
+	release_sock(sk);
+	return ret;
+}
+
+static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
+			     struct msghdr *msg, size_t len, int flags)
+{
+	int noblock = flags & MSG_DONTWAIT;
+	struct sock *sk = sock->sk;
+	unsigned int copied, rlen;
+	struct sk_buff *skb, *cskb;
+	int err = 0;
+
+	pr_debug("%p %zu\n", sk, len);
+
+	lock_sock(sk);
+
+	if (sk->sk_state == LLCP_CLOSED &&
+			skb_queue_empty(&sk->sk_receive_queue)) {
+		release_sock(sk);
+		return 0;
+	}
+
+	release_sock(sk);
+
+	if (flags & (MSG_OOB))
+		return -EOPNOTSUPP;
+
+	skb = skb_recv_datagram(sk, flags, noblock, &err);
+	if (!skb) {
+		pr_err("Recv datagram failed state %d %d %d",
+				sk->sk_state, err, sock_error(sk));
+
+		if (sk->sk_shutdown & RCV_SHUTDOWN)
+			return 0;
+
+		return err;
+	}
+
+	rlen   = skb->len;		/* real length of skb */
+	copied = min_t(unsigned int, rlen, len);
+
+	cskb = skb;
+	if (memcpy_toiovec(msg->msg_iov, cskb->data, copied)) {
+		if (!(flags & MSG_PEEK))
+			skb_queue_head(&sk->sk_receive_queue, skb);
+		return -EFAULT;
+	}
+
+	/* Mark read part of skb as used */
+	if (!(flags & MSG_PEEK)) {
+
+		/* SOCK_STREAM: re-queue skb if it contains unreceived data */
+		if (sk->sk_type == SOCK_STREAM) {
+			skb_pull(skb, copied);
+			if (skb->len) {
+				skb_queue_head(&sk->sk_receive_queue, skb);
+				goto done;
+			}
+		}
+
+		kfree_skb(skb);
+	}
+
+	/* XXX Queue backlogged skbs */
+
+done:
+	/* SOCK_SEQPACKET: return real length if MSG_TRUNC is set */
+	if (sk->sk_type == SOCK_SEQPACKET && (flags & MSG_TRUNC))
+		copied = rlen;
+
+	return copied;
+}
+
+static const struct proto_ops llcp_sock_ops = {
+	.family         = PF_NFC,
+	.owner          = THIS_MODULE,
+	.bind           = llcp_sock_bind,
+	.connect        = llcp_sock_connect,
+	.release        = llcp_sock_release,
+	.socketpair     = sock_no_socketpair,
+	.accept         = llcp_sock_accept,
+	.getname        = llcp_sock_getname,
+	.poll           = llcp_sock_poll,
+	.ioctl          = sock_no_ioctl,
+	.listen         = llcp_sock_listen,
+	.shutdown       = sock_no_shutdown,
+	.setsockopt     = sock_no_setsockopt,
+	.getsockopt     = sock_no_getsockopt,
+	.sendmsg        = sock_no_sendmsg,
+	.recvmsg        = llcp_sock_recvmsg,
+	.mmap           = sock_no_mmap,
+};
+
+static void llcp_sock_destruct(struct sock *sk)
+{
+	struct nfc_llcp_sock *llcp_sock = nfc_llcp_sock(sk);
+
+	pr_debug("%p\n", sk);
+
+	if (sk->sk_state == LLCP_CONNECTED)
+		nfc_put_device(llcp_sock->dev);
+
+	skb_queue_purge(&sk->sk_receive_queue);
+
+	nfc_llcp_sock_free(llcp_sock);
+
+	if (!sock_flag(sk, SOCK_DEAD)) {
+		pr_err("Freeing alive NFC LLCP socket %p\n", sk);
+		return;
+	}
+}
+
+struct sock *nfc_llcp_sock_alloc(struct socket *sock, int type, gfp_t gfp)
+{
+	struct sock *sk;
+	struct nfc_llcp_sock *llcp_sock;
+
+	sk = sk_alloc(&init_net, PF_NFC, gfp, &llcp_sock_proto);
+	if (!sk)
+		return NULL;
+
+	llcp_sock = nfc_llcp_sock(sk);
+
+	sock_init_data(sock, sk);
+	sk->sk_state = LLCP_CLOSED;
+	sk->sk_protocol = NFC_SOCKPROTO_LLCP;
+	sk->sk_type = type;
+	sk->sk_destruct = llcp_sock_destruct;
+
+	llcp_sock->ssap = 0;
+	llcp_sock->dsap = LLCP_SAP_SDP;
+	llcp_sock->send_n = llcp_sock->send_ack_n = 0;
+	llcp_sock->recv_n = llcp_sock->recv_ack_n = 0;
+	llcp_sock->remote_ready = 1;
+	skb_queue_head_init(&llcp_sock->tx_queue);
+	skb_queue_head_init(&llcp_sock->tx_pending_queue);
+	skb_queue_head_init(&llcp_sock->tx_backlog_queue);
+	INIT_LIST_HEAD(&llcp_sock->list);
+	INIT_LIST_HEAD(&llcp_sock->accept_queue);
+
+	if (sock != NULL)
+		sock->state = SS_UNCONNECTED;
+
+	return sk;
+}
+
+void nfc_llcp_sock_free(struct nfc_llcp_sock *sock)
+{
+	kfree(sock->service_name);
+
+	skb_queue_purge(&sock->tx_queue);
+	skb_queue_purge(&sock->tx_pending_queue);
+	skb_queue_purge(&sock->tx_backlog_queue);
+
+	list_del_init(&sock->accept_queue);
+
+	sock->parent = NULL;
+}
+
+static int llcp_sock_create(struct net *net, struct socket *sock,
+				const struct nfc_protocol *nfc_proto)
+{
+	struct sock *sk;
+
+	pr_debug("%p\n", sock);
+
+	if (sock->type != SOCK_STREAM && sock->type != SOCK_DGRAM)
+		return -ESOCKTNOSUPPORT;
+
+	sock->ops = &llcp_sock_ops;
+
+	sk = nfc_llcp_sock_alloc(sock, sock->type, GFP_ATOMIC);
+	if (sk == NULL)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static const struct nfc_protocol llcp_nfc_proto = {
+	.id	  = NFC_SOCKPROTO_LLCP,
+	.proto    = &llcp_sock_proto,
+	.owner    = THIS_MODULE,
+	.create   = llcp_sock_create
+};
+
+int __init nfc_llcp_sock_init(void)
+{
+	return nfc_proto_register(&llcp_nfc_proto);
+}
+
+void nfc_llcp_sock_exit(void)
+{
+	nfc_proto_unregister(&llcp_nfc_proto);
+}
diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index 4d0fb125d033..2c2c4015c68b 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h
@@ -46,6 +46,60 @@ struct nfc_rawsock {
 #define to_rawsock_sk(_tx_work) \
 	((struct sock *) container_of(_tx_work, struct nfc_rawsock, tx_work))
 
+#ifdef CONFIG_NFC_LLCP
+
+void nfc_llcp_mac_is_down(struct nfc_dev *dev);
+void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx,
+			u8 comm_mode, u8 rf_mode);
+int nfc_llcp_register_device(struct nfc_dev *dev);
+void nfc_llcp_unregister_device(struct nfc_dev *dev);
+int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len);
+u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, u8 *general_bytes_len);
+int __init nfc_llcp_init(void);
+void nfc_llcp_exit(void);
+
+#else
+
+void nfc_llcp_mac_is_down(struct nfc_dev *dev)
+{
+}
+
+void nfc_llcp_mac_is_up(struct nfc_dev *dev, u32 target_idx,
+			u8 comm_mode, u8 rf_mode)
+{
+}
+
+static inline int nfc_llcp_register_device(struct nfc_dev *dev)
+{
+	return 0;
+}
+
+static inline void nfc_llcp_unregister_device(struct nfc_dev *dev)
+{
+}
+
+static inline int nfc_llcp_set_remote_gb(struct nfc_dev *dev, u8 *gb, u8 gb_len)
+{
+	return 0;
+}
+
+static inline u8 *nfc_llcp_general_bytes(struct nfc_dev *dev, u8 *gb_len)
+{
+	*gb_len = 0;
+	return NULL;
+}
+
+static inline int nfc_llcp_init(void)
+{
+	return 0;
+}
+
+static inline void nfc_llcp_exit(void)
+{
+}
+
+#endif
+
 int __init rawsock_init(void);
 void rawsock_exit(void);
 
-- 
cgit v1.2.3


From 888bdaa9b2c426dcca214e6efd388080938082cb Mon Sep 17 00:00:00 2001
From: Glauber Costa <glommer@parallels.com>
Date: Wed, 14 Dec 2011 23:34:31 +0000
Subject: Move limit definitions outside CONFIG_INET

They need to be available for other protocols as well, since
they are used in sock.c openly

Signed-off-by: Glauber Costa <glommer@parallels.com>
CC: Hiroyouki Kamezawa <kamezawa.hiroyu@jp.fujitsu.com>
CC: David S. Miller <davem@davemloft.net>
CC: Eric Dumazet <eric.dumazet@gmail.com>
CC: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/memcontrol.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 1513994ce207..9b296ea41bb8 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -384,13 +384,13 @@ mem_cgroup_print_bad_page(struct page *page)
 }
 #endif
 
-#ifdef CONFIG_INET
 enum {
 	UNDER_LIMIT,
 	SOFT_LIMIT,
 	OVER_LIMIT,
 };
 
+#ifdef CONFIG_INET
 struct sock;
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM
 void sock_update_memcg(struct sock *sk);
-- 
cgit v1.2.3


From bdd90d5e36a55271beb957b3d7ca3e29b2a90207 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 14 Dec 2011 12:20:27 +0100
Subject: cfg80211: validate nl80211 station handling better

The nl80211 station handling code is a bit messy
and doesn't do a lot of validation. It seems like
this could be an issue for drivers that don't use
mac80211 to validate everything.

As cfg80211 doesn't keep station state, move the
validation of allowing supported_rates to change
for TDLS only in station mode to mac80211.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |   6 +-
 include/net/cfg80211.h  |   7 +-
 net/mac80211/cfg.c      |   8 ++
 net/wireless/nl80211.c  | 199 +++++++++++++++++++++++++++---------------------
 4 files changed, 131 insertions(+), 89 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index a18760684fc9..f795cb7dccdd 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1536,7 +1536,11 @@ enum nl80211_iftype {
  * @NL80211_STA_FLAG_WME: station is WME/QoS capable
  * @NL80211_STA_FLAG_MFP: station uses management frame protection
  * @NL80211_STA_FLAG_AUTHENTICATED: station is authenticated
- * @NL80211_STA_FLAG_TDLS_PEER: station is a TDLS peer
+ * @NL80211_STA_FLAG_TDLS_PEER: station is a TDLS peer -- this flag should
+ *	only be used in managed mode (even in the flags mask). Note that the
+ *	flag can't be changed, it is only valid while adding a station, and
+ *	attempts to change it will silently be ignored (rather than rejected
+ *	as errors.)
  * @NL80211_STA_FLAG_MAX: highest station flag number currently defined
  * @__NL80211_STA_FLAG_AFTER_LAST: internal use
  */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 150c0ee714c2..5eda5933ae01 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1346,7 +1346,12 @@ struct cfg80211_gtk_rekey_data {
  *
  * @add_station: Add a new station.
  * @del_station: Remove a station; @mac may be NULL to remove all stations.
- * @change_station: Modify a given station.
+ * @change_station: Modify a given station. Note that flags changes are not much
+ *	validated in cfg80211, in particular the auth/assoc/authorized flags
+ *	might come to the driver in invalid combinations -- make sure to check
+ *	them, also against the existing state! Also, supported_rates changes are
+ *	not checked in station mode -- drivers need to reject (or ignore) them
+ *	for anything but TDLS peers.
  * @get_station: get station information for the station identified by @mac
  * @dump_station: dump station callback -- resume dump at index @idx
  *
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 393b2a4445b8..944051b43bad 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -976,6 +976,14 @@ static int ieee80211_change_station(struct wiphy *wiphy,
 		return -EINVAL;
 	}
 
+	/* in station mode, supported rates are only valid with TDLS */
+	if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+	    params->supported_rates &&
+	    !test_sta_flag(sta, WLAN_STA_TDLS_PEER)) {
+		rcu_read_unlock();
+		return -EINVAL;
+	}
+
 	if (params->vlan && params->vlan != sta->sdata->dev) {
 		vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan);
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index d86428145c32..b07c4fc4ae22 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2579,6 +2579,9 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 		params.ht_capa =
 			nla_data(info->attrs[NL80211_ATTR_HT_CAPABILITY]);
 
+	if (!rdev->ops->change_station)
+		return -EOPNOTSUPP;
+
 	if (parse_station_flags(info, &params))
 		return -EINVAL;
 
@@ -2590,73 +2593,84 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 		params.plink_state =
 		    nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_STATE]);
 
-	params.vlan = get_vlan(info, rdev);
-	if (IS_ERR(params.vlan))
-		return PTR_ERR(params.vlan);
-
-	/* validate settings */
-	err = 0;
-
 	switch (dev->ieee80211_ptr->iftype) {
 	case NL80211_IFTYPE_AP:
 	case NL80211_IFTYPE_AP_VLAN:
 	case NL80211_IFTYPE_P2P_GO:
 		/* disallow mesh-specific things */
 		if (params.plink_action)
-			err = -EINVAL;
+			return -EINVAL;
+
+		/* TDLS can't be set, ... */
+		if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))
+			return -EINVAL;
+		/*
+		 * ... but don't bother the driver with it. This works around
+		 * a hostapd/wpa_supplicant issue -- it always includes the
+		 * TLDS_PEER flag in the mask even for AP mode.
+		 */
+		params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
+
+		/* accept only the listed bits */
+		if (params.sta_flags_mask &
+				~(BIT(NL80211_STA_FLAG_AUTHORIZED) |
+				  BIT(NL80211_STA_FLAG_SHORT_PREAMBLE) |
+				  BIT(NL80211_STA_FLAG_WME) |
+				  BIT(NL80211_STA_FLAG_MFP)))
+			return -EINVAL;
+
+		/* must be last in here for error handling */
+		params.vlan = get_vlan(info, rdev);
+		if (IS_ERR(params.vlan))
+			return PTR_ERR(params.vlan);
 		break;
 	case NL80211_IFTYPE_P2P_CLIENT:
 	case NL80211_IFTYPE_STATION:
 		/* disallow things sta doesn't support */
 		if (params.plink_action)
-			err = -EINVAL;
-		if (params.vlan)
-			err = -EINVAL;
-		if (params.supported_rates &&
-		    !(params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)))
-			err = -EINVAL;
+			return -EINVAL;
 		if (params.ht_capa)
-			err = -EINVAL;
+			return -EINVAL;
 		if (params.listen_interval >= 0)
-			err = -EINVAL;
-		if (params.sta_flags_mask &
-				~(BIT(NL80211_STA_FLAG_AUTHORIZED) |
-				  BIT(NL80211_STA_FLAG_TDLS_PEER)))
-			err = -EINVAL;
-		/* can't change the TDLS bit */
-		if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) &&
-		    (params.sta_flags_mask & BIT(NL80211_STA_FLAG_TDLS_PEER)))
-			err = -EINVAL;
+			return -EINVAL;
+		/*
+		 * Don't allow userspace to change the TDLS_PEER flag,
+		 * but silently ignore attempts to change it since we
+		 * don't have state here to verify that it doesn't try
+		 * to change the flag.
+		 */
+		params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
+
+		/* reject any changes other than AUTHORIZED */
+		if (params.sta_flags_mask & ~BIT(NL80211_STA_FLAG_AUTHORIZED))
+			return -EINVAL;
 		break;
 	case NL80211_IFTYPE_MESH_POINT:
 		/* disallow things mesh doesn't support */
 		if (params.vlan)
-			err = -EINVAL;
+			return -EINVAL;
 		if (params.ht_capa)
-			err = -EINVAL;
+			return -EINVAL;
 		if (params.listen_interval >= 0)
-			err = -EINVAL;
+			return -EINVAL;
+		/*
+		 * No special handling for TDLS here -- the userspace
+		 * mesh code doesn't have this bug.
+		 */
 		if (params.sta_flags_mask &
 				~(BIT(NL80211_STA_FLAG_AUTHENTICATED) |
 				  BIT(NL80211_STA_FLAG_MFP) |
 				  BIT(NL80211_STA_FLAG_AUTHORIZED)))
-			err = -EINVAL;
+			return -EINVAL;
 		break;
 	default:
-		err = -EINVAL;
+		return -EOPNOTSUPP;
 	}
 
-	if (err)
-		goto out;
-
-	if (!rdev->ops->change_station) {
-		err = -EOPNOTSUPP;
-		goto out;
-	}
+	/* be aware of params.vlan when changing code here */
 
 	err = rdev->ops->change_station(&rdev->wiphy, dev, mac_addr, &params);
 
- out:
 	if (params.vlan)
 		dev_put(params.vlan);
 
@@ -2711,70 +2725,81 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 		params.plink_action =
 		    nla_get_u8(info->attrs[NL80211_ATTR_STA_PLINK_ACTION]);
 
+	if (!rdev->ops->add_station)
+		return -EOPNOTSUPP;
+
 	if (parse_station_flags(info, &params))
 		return -EINVAL;
 
-	/* parse WME attributes if sta is WME capable */
-	if ((rdev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) &&
-	    (params.sta_flags_set & BIT(NL80211_STA_FLAG_WME)) &&
-	    info->attrs[NL80211_ATTR_STA_WME]) {
-		struct nlattr *tb[NL80211_STA_WME_MAX + 1];
-		struct nlattr *nla;
+	switch (dev->ieee80211_ptr->iftype) {
+	case NL80211_IFTYPE_AP:
+	case NL80211_IFTYPE_AP_VLAN:
+	case NL80211_IFTYPE_P2P_GO:
+		/* parse WME attributes if sta is WME capable */
+		if ((rdev->wiphy.flags & WIPHY_FLAG_AP_UAPSD) &&
+		    (params.sta_flags_set & BIT(NL80211_STA_FLAG_WME)) &&
+		    info->attrs[NL80211_ATTR_STA_WME]) {
+			struct nlattr *tb[NL80211_STA_WME_MAX + 1];
+			struct nlattr *nla;
+
+			nla = info->attrs[NL80211_ATTR_STA_WME];
+			err = nla_parse_nested(tb, NL80211_STA_WME_MAX, nla,
+					       nl80211_sta_wme_policy);
+			if (err)
+				return err;
 
-		nla = info->attrs[NL80211_ATTR_STA_WME];
-		err = nla_parse_nested(tb, NL80211_STA_WME_MAX, nla,
-				       nl80211_sta_wme_policy);
-		if (err)
-			return err;
+			if (tb[NL80211_STA_WME_UAPSD_QUEUES])
+				params.uapsd_queues =
+				     nla_get_u8(tb[NL80211_STA_WME_UAPSD_QUEUES]);
+			if (params.uapsd_queues &
+					~IEEE80211_WMM_IE_STA_QOSINFO_AC_MASK)
+				return -EINVAL;
 
-		if (tb[NL80211_STA_WME_UAPSD_QUEUES])
-			params.uapsd_queues =
-			     nla_get_u8(tb[NL80211_STA_WME_UAPSD_QUEUES]);
-		if (params.uapsd_queues & ~IEEE80211_WMM_IE_STA_QOSINFO_AC_MASK)
-			return -EINVAL;
+			if (tb[NL80211_STA_WME_MAX_SP])
+				params.max_sp =
+				     nla_get_u8(tb[NL80211_STA_WME_MAX_SP]);
 
-		if (tb[NL80211_STA_WME_MAX_SP])
-			params.max_sp =
-			     nla_get_u8(tb[NL80211_STA_WME_MAX_SP]);
+			if (params.max_sp &
+					~IEEE80211_WMM_IE_STA_QOSINFO_SP_MASK)
+				return -EINVAL;
 
-		if (params.max_sp & ~IEEE80211_WMM_IE_STA_QOSINFO_SP_MASK)
+			params.sta_modify_mask |= STATION_PARAM_APPLY_UAPSD;
+		}
+		/* TDLS peers cannot be added */
+		if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))
 			return -EINVAL;
+		/* but don't bother the driver with it */
+		params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_TDLS_PEER);
 
-		params.sta_modify_mask |= STATION_PARAM_APPLY_UAPSD;
+		/* must be last in here for error handling */
+		params.vlan = get_vlan(info, rdev);
+		if (IS_ERR(params.vlan))
+			return PTR_ERR(params.vlan);
+		break;
+	case NL80211_IFTYPE_MESH_POINT:
+		/* TDLS peers cannot be added */
+		if (params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER))
+			return -EINVAL;
+		break;
+	case NL80211_IFTYPE_STATION:
+		/* Only TDLS peers can be added */
+		if (!(params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)))
+			return -EINVAL;
+		/* Can only add if TDLS ... */
+		if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS))
+			return -EOPNOTSUPP;
+		/* ... with external setup is supported */
+		if (!(rdev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP))
+			return -EOPNOTSUPP;
+		break;
+	default:
+		return -EOPNOTSUPP;
 	}
 
-	if (dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP &&
-	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_AP_VLAN &&
-	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_MESH_POINT &&
-	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_P2P_GO &&
-	    dev->ieee80211_ptr->iftype != NL80211_IFTYPE_STATION)
-		return -EINVAL;
-
-	/*
-	 * Only managed stations can add TDLS peers, and only when the
-	 * wiphy supports external TDLS setup.
-	 */
-	if (dev->ieee80211_ptr->iftype == NL80211_IFTYPE_STATION &&
-	    !((params.sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) &&
-	      (rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_TDLS) &&
-	      (rdev->wiphy.flags & WIPHY_FLAG_TDLS_EXTERNAL_SETUP)))
-		return -EINVAL;
-
-	params.vlan = get_vlan(info, rdev);
-	if (IS_ERR(params.vlan))
-		return PTR_ERR(params.vlan);
-
-	/* validate settings */
-	err = 0;
-
-	if (!rdev->ops->add_station) {
-		err = -EOPNOTSUPP;
-		goto out;
-	}
+	/* be aware of params.vlan when changing code here */
 
 	err = rdev->ops->add_station(&rdev->wiphy, dev, mac_addr, &params);
 
- out:
 	if (params.vlan)
 		dev_put(params.vlan);
 	return err;
-- 
cgit v1.2.3


From e7c466e58eb1ff9bf49c2f3902622dc11a8c7022 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Thu, 15 Dec 2011 02:42:42 +0000
Subject: sock_diag: Move the SOCK_DIAG_BY_FAMILY cmd declaration

It should belong to sock_diag, not inet_diag.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h | 1 -
 include/linux/sock_diag.h | 3 +++
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index 78972a149dff..a27e62178d43 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -6,7 +6,6 @@
 /* Just some random number */
 #define TCPDIAG_GETSOCK 18
 #define DCCPDIAG_GETSOCK 19
-#define SOCK_DIAG_BY_FAMILY 20
 
 #define INET_DIAG_GETSOCK_MAX 24
 
diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index ba4933b1213b..7999778ad08d 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -1,5 +1,8 @@
 #ifndef __SOCK_DIAG_H__
 #define __SOCK_DIAG_H__
+
+#define SOCK_DIAG_BY_FAMILY 20
+
 struct sk_buff;
 struct nlmsghdr;
 
-- 
cgit v1.2.3


From f65c1b534b99aef1809b893387b295963821549f Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Thu, 15 Dec 2011 02:43:44 +0000
Subject: sock_diag: Generalize requests cookies managements

The sk address is used as a cookie between dump/get_exact calls.
It will be required for unix socket sdumping, so move it from
inet_diag to sock_diag.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h |  1 -
 include/linux/sock_diag.h |  3 +++
 net/core/sock_diag.c      | 19 +++++++++++++++++++
 net/ipv4/inet_diag.c      | 23 ++++-------------------
 net/ipv4/udp_diag.c       |  2 +-
 5 files changed, 27 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index a27e62178d43..afa5d5c74169 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -168,7 +168,6 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo,
 		struct inet_diag_req *req);
 
 int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk);
-int inet_diag_check_cookie(struct sock *sk, struct inet_diag_req *req);
 
 extern int  inet_diag_register(const struct inet_diag_handler *handler);
 extern void inet_diag_unregister(const struct inet_diag_handler *handler);
diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index 7999778ad08d..379d5dccf8e1 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -22,5 +22,8 @@ void sock_diag_unregister(struct sock_diag_handler *h);
 void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
 void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh));
 
+int sock_diag_check_cookie(void *sk, __u32 *cookie);
+void sock_diag_save_cookie(void *sk, __u32 *cookie);
+
 extern struct sock *sock_diag_nlsk;
 #endif
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index cee96f368108..711bdefe7753 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -12,6 +12,25 @@ static struct sock_diag_handler *sock_diag_handlers[AF_MAX];
 static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
 static DEFINE_MUTEX(sock_diag_table_mutex);
 
+int sock_diag_check_cookie(void *sk, __u32 *cookie)
+{
+	if ((cookie[0] != INET_DIAG_NOCOOKIE ||
+	     cookie[1] != INET_DIAG_NOCOOKIE) &&
+	    ((u32)(unsigned long)sk != cookie[0] ||
+	     (u32)((((unsigned long)sk) >> 31) >> 1) != cookie[1]))
+		return -ESTALE;
+	else
+		return 0;
+}
+EXPORT_SYMBOL_GPL(sock_diag_check_cookie);
+
+void sock_diag_save_cookie(void *sk, __u32 *cookie)
+{
+	cookie[0] = (u32)(unsigned long)sk;
+	cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1);
+}
+EXPORT_SYMBOL_GPL(sock_diag_save_cookie);
+
 void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))
 {
 	mutex_lock(&sock_diag_table_mutex);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index fa27313765f3..fb2e47ff59f7 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -102,8 +102,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 	r->idiag_retrans = 0;
 
 	r->id.idiag_if = sk->sk_bound_dev_if;
-	r->id.idiag_cookie[0] = (u32)(unsigned long)sk;
-	r->id.idiag_cookie[1] = (u32)(((unsigned long)sk >> 31) >> 1);
+	sock_diag_save_cookie(sk, r->id.idiag_cookie);
 
 	r->id.idiag_sport = inet->inet_sport;
 	r->id.idiag_dport = inet->inet_dport;
@@ -221,8 +220,7 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
 	r->idiag_family	      = tw->tw_family;
 	r->idiag_retrans      = 0;
 	r->id.idiag_if	      = tw->tw_bound_dev_if;
-	r->id.idiag_cookie[0] = (u32)(unsigned long)tw;
-	r->id.idiag_cookie[1] = (u32)(((unsigned long)tw >> 31) >> 1);
+	sock_diag_save_cookie(tw, r->id.idiag_cookie);
 	r->id.idiag_sport     = tw->tw_sport;
 	r->id.idiag_dport     = tw->tw_dport;
 	r->id.idiag_src[0]    = tw->tw_rcv_saddr;
@@ -261,18 +259,6 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 	return inet_csk_diag_fill(sk, skb, r, pid, seq, nlmsg_flags, unlh);
 }
 
-int inet_diag_check_cookie(struct sock *sk, struct inet_diag_req *req)
-{
-	if ((req->id.idiag_cookie[0] != INET_DIAG_NOCOOKIE ||
-	     req->id.idiag_cookie[1] != INET_DIAG_NOCOOKIE) &&
-	    ((u32)(unsigned long)sk != req->id.idiag_cookie[0] ||
-	     (u32)((((unsigned long)sk) >> 31) >> 1) != req->id.idiag_cookie[1]))
-		return -ESTALE;
-	else
-		return 0;
-}
-EXPORT_SYMBOL_GPL(inet_diag_check_cookie);
-
 int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb,
 		const struct nlmsghdr *nlh, struct inet_diag_req *req)
 {
@@ -304,7 +290,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s
 	if (sk == NULL)
 		goto out_nosk;
 
-	err = inet_diag_check_cookie(sk, req);
+	err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
 	if (err)
 		goto out;
 
@@ -617,8 +603,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
 	r->idiag_retrans = req->retrans;
 
 	r->id.idiag_if = sk->sk_bound_dev_if;
-	r->id.idiag_cookie[0] = (u32)(unsigned long)req;
-	r->id.idiag_cookie[1] = (u32)(((unsigned long)req >> 31) >> 1);
+	sock_diag_save_cookie(req, r->id.idiag_cookie);
 
 	tmo = req->expires - jiffies;
 	if (tmo < 0)
diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c
index fe9db8675acb..69f8a7ca63dd 100644
--- a/net/ipv4/udp_diag.c
+++ b/net/ipv4/udp_diag.c
@@ -57,7 +57,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb,
 	if (sk == NULL)
 		goto out_nosk;
 
-	err = inet_diag_check_cookie(sk, req);
+	err = sock_diag_check_cookie(sk, req->id.idiag_cookie);
 	if (err)
 		goto out;
 
-- 
cgit v1.2.3


From 22931d3b906cd0a1726a49a09713f9220a5fab8a Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Thu, 15 Dec 2011 02:44:35 +0000
Subject: unix_diag: Basic module skeleton

Includes basic module_init/_exit functionality, dump/get_exact stubs
and declares the basic API structures for request and response.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/unix_diag.h | 24 ++++++++++++++++++++
 net/unix/diag.c           | 57 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 81 insertions(+)
 create mode 100644 include/linux/unix_diag.h
 create mode 100644 net/unix/diag.c

(limited to 'include/linux')

diff --git a/include/linux/unix_diag.h b/include/linux/unix_diag.h
new file mode 100644
index 000000000000..445184a85763
--- /dev/null
+++ b/include/linux/unix_diag.h
@@ -0,0 +1,24 @@
+#ifndef __UNIX_DIAG_H__
+#define __UNIX_DIAG_H__
+
+struct unix_diag_req {
+	__u8	sdiag_family;
+	__u8	sdiag_protocol;
+	__u16	pad;
+	__u32	udiag_states;
+	__u32	udiag_ino;
+	__u32	udiag_show;
+	__u32	udiag_cookie[2];
+};
+
+struct unix_diag_msg {
+	__u8	udiag_family;
+	__u8	udiag_type;
+	__u8	udiag_state;
+	__u8	pad;
+
+	__u32	udiag_ino;
+	__u32	udiag_cookie[2];
+};
+
+#endif
diff --git a/net/unix/diag.c b/net/unix/diag.c
new file mode 100644
index 000000000000..6be16c0ad38f
--- /dev/null
+++ b/net/unix/diag.c
@@ -0,0 +1,57 @@
+#include <linux/types.h>
+#include <linux/spinlock.h>
+#include <linux/sock_diag.h>
+#include <linux/unix_diag.h>
+#include <linux/skbuff.h>
+#include <net/netlink.h>
+#include <net/af_unix.h>
+#include <net/tcp_states.h>
+
+#define UNIX_DIAG_PUT(skb, attrtype, attrlen) \
+	RTA_DATA(__RTA_PUT(skb, attrtype, attrlen))
+
+static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	return 0;
+}
+
+static int unix_diag_get_exact(struct sk_buff *in_skb,
+			       const struct nlmsghdr *nlh,
+			       struct unix_diag_req *req)
+{
+	return -EAFNOSUPPORT;
+}
+
+static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
+{
+	int hdrlen = sizeof(struct unix_diag_req);
+
+	if (nlmsg_len(h) < hdrlen)
+		return -EINVAL;
+
+	if (h->nlmsg_flags & NLM_F_DUMP)
+		return netlink_dump_start(sock_diag_nlsk, skb, h,
+					  unix_diag_dump, NULL, 0);
+	else
+		return unix_diag_get_exact(skb, h, (struct unix_diag_req *)NLMSG_DATA(h));
+}
+
+static struct sock_diag_handler unix_diag_handler = {
+	.family = AF_UNIX,
+	.dump = unix_diag_handler_dump,
+};
+
+static int __init unix_diag_init(void)
+{
+	return sock_diag_register(&unix_diag_handler);
+}
+
+static void __exit unix_diag_exit(void)
+{
+	sock_diag_unregister(&unix_diag_handler);
+}
+
+module_init(unix_diag_init);
+module_exit(unix_diag_exit);
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 1 /* AF_LOCAL */);
-- 
cgit v1.2.3


From f5248b48a64c221dd6157ab9cbee5a36ee45e6ed Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Thu, 15 Dec 2011 02:45:24 +0000
Subject: unix_diag: Unix socket name NLA

Report the sun_path when requested as NLA. With leading '\0' if
present but without the leading AF_UNIX bits.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/unix_diag.h |  8 ++++++++
 net/unix/diag.c           | 20 ++++++++++++++++++++
 2 files changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/unix_diag.h b/include/linux/unix_diag.h
index 445184a85763..cc4df34d4c14 100644
--- a/include/linux/unix_diag.h
+++ b/include/linux/unix_diag.h
@@ -11,6 +11,8 @@ struct unix_diag_req {
 	__u32	udiag_cookie[2];
 };
 
+#define UDIAG_SHOW_NAME		0x00000001	/* show name (not path) */
+
 struct unix_diag_msg {
 	__u8	udiag_family;
 	__u8	udiag_type;
@@ -21,4 +23,10 @@ struct unix_diag_msg {
 	__u32	udiag_cookie[2];
 };
 
+enum {
+	UNIX_DIAG_NAME,
+
+	UNIX_DIAG_MAX,
+};
+
 #endif
diff --git a/net/unix/diag.c b/net/unix/diag.c
index d7bd48c49ee5..161ce6c05e31 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -10,6 +10,22 @@
 #define UNIX_DIAG_PUT(skb, attrtype, attrlen) \
 	RTA_DATA(__RTA_PUT(skb, attrtype, attrlen))
 
+static int sk_diag_dump_name(struct sock *sk, struct sk_buff *nlskb)
+{
+	struct unix_address *addr = unix_sk(sk)->addr;
+	char *s;
+
+	if (addr) {
+		s = UNIX_DIAG_PUT(nlskb, UNIX_DIAG_NAME, addr->len - sizeof(short));
+		memcpy(s, addr->name->sun_path, addr->len - sizeof(short));
+	}
+
+	return 0;
+
+rtattr_failure:
+	return -EMSGSIZE;
+}
+
 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req,
 		u32 pid, u32 seq, u32 flags, int sk_ino)
 {
@@ -28,6 +44,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
 	rep->udiag_ino = sk_ino;
 	sock_diag_save_cookie(sk, rep->udiag_cookie);
 
+	if ((req->udiag_show & UDIAG_SHOW_NAME) &&
+			sk_diag_dump_name(sk, skb))
+		goto nlmsg_failure;
+
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
-- 
cgit v1.2.3


From 5f7b0569460b7d8d01ca776430a00505a68b7584 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Thu, 15 Dec 2011 02:45:43 +0000
Subject: unix_diag: Unix inode info NLA

Actually, the socket path if it's not anonymous doesn't give
a clue to which file the socket is bound to. Even if the path
is absolute, it can be unlinked and then new socket can be
bound to it.

With this NLA it's possible to check which file a particular
socket is really bound to.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/unix_diag.h |  7 +++++++
 net/unix/diag.c           | 21 +++++++++++++++++++++
 2 files changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/unix_diag.h b/include/linux/unix_diag.h
index cc4df34d4c14..3e53adbe9c7f 100644
--- a/include/linux/unix_diag.h
+++ b/include/linux/unix_diag.h
@@ -12,6 +12,7 @@ struct unix_diag_req {
 };
 
 #define UDIAG_SHOW_NAME		0x00000001	/* show name (not path) */
+#define UDIAG_SHOW_VFS		0x00000002	/* show VFS inode info */
 
 struct unix_diag_msg {
 	__u8	udiag_family;
@@ -25,8 +26,14 @@ struct unix_diag_msg {
 
 enum {
 	UNIX_DIAG_NAME,
+	UNIX_DIAG_VFS,
 
 	UNIX_DIAG_MAX,
 };
 
+struct unix_diag_vfs {
+	__u32	udiag_vfs_ino;
+	__u32	udiag_vfs_dev;
+};
+
 #endif
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 161ce6c05e31..83799ef19b49 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -26,6 +26,23 @@ rtattr_failure:
 	return -EMSGSIZE;
 }
 
+static int sk_diag_dump_vfs(struct sock *sk, struct sk_buff *nlskb)
+{
+	struct dentry *dentry = unix_sk(sk)->dentry;
+	struct unix_diag_vfs *uv;
+
+	if (dentry) {
+		uv = UNIX_DIAG_PUT(nlskb, UNIX_DIAG_VFS, sizeof(*uv));
+		uv->udiag_vfs_ino = dentry->d_inode->i_ino;
+		uv->udiag_vfs_dev = dentry->d_sb->s_dev;
+	}
+
+	return 0;
+
+rtattr_failure:
+	return -EMSGSIZE;
+}
+
 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req,
 		u32 pid, u32 seq, u32 flags, int sk_ino)
 {
@@ -48,6 +65,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
 			sk_diag_dump_name(sk, skb))
 		goto nlmsg_failure;
 
+	if ((req->udiag_show & UDIAG_SHOW_VFS) &&
+			sk_diag_dump_vfs(sk, skb))
+		goto nlmsg_failure;
+
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
-- 
cgit v1.2.3


From ac02be8d96af9f66a4de86781ee9facc2dff99d4 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Thu, 15 Dec 2011 02:45:58 +0000
Subject: unix_diag: Unix peer inode NLA

Report the peer socket inode ID as NLA. With this it's finally
possible to find out the other end of an interesting unix connection.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/unix_diag.h |  2 ++
 net/unix/diag.c           | 24 ++++++++++++++++++++++++
 2 files changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/unix_diag.h b/include/linux/unix_diag.h
index 3e53adbe9c7f..2d74a86024ac 100644
--- a/include/linux/unix_diag.h
+++ b/include/linux/unix_diag.h
@@ -13,6 +13,7 @@ struct unix_diag_req {
 
 #define UDIAG_SHOW_NAME		0x00000001	/* show name (not path) */
 #define UDIAG_SHOW_VFS		0x00000002	/* show VFS inode info */
+#define UDIAG_SHOW_PEER		0x00000004	/* show peer socket info */
 
 struct unix_diag_msg {
 	__u8	udiag_family;
@@ -27,6 +28,7 @@ struct unix_diag_msg {
 enum {
 	UNIX_DIAG_NAME,
 	UNIX_DIAG_VFS,
+	UNIX_DIAG_PEER,
 
 	UNIX_DIAG_MAX,
 };
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 83799ef19b49..0e0fda786afe 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -43,6 +43,26 @@ rtattr_failure:
 	return -EMSGSIZE;
 }
 
+static int sk_diag_dump_peer(struct sock *sk, struct sk_buff *nlskb)
+{
+	struct sock *peer;
+	int ino;
+
+	peer = unix_peer_get(sk);
+	if (peer) {
+		unix_state_lock(peer);
+		ino = sock_i_ino(peer);
+		unix_state_unlock(peer);
+		sock_put(peer);
+
+		RTA_PUT_U32(nlskb, UNIX_DIAG_PEER, ino);
+	}
+
+	return 0;
+rtattr_failure:
+	return -EMSGSIZE;
+}
+
 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req,
 		u32 pid, u32 seq, u32 flags, int sk_ino)
 {
@@ -69,6 +89,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
 			sk_diag_dump_vfs(sk, skb))
 		goto nlmsg_failure;
 
+	if ((req->udiag_show & UDIAG_SHOW_PEER) &&
+			sk_diag_dump_peer(sk, skb))
+		goto nlmsg_failure;
+
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
-- 
cgit v1.2.3


From 2aac7a2cb0d9d8c65fc7dde3e19e46b3e878d23d Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Thu, 15 Dec 2011 02:46:14 +0000
Subject: unix_diag: Pending connections IDs NLA

When establishing a unix connection on stream sockets the
server end receives an skb with socket in its receive queue.

Report who is waiting for these ends to be accepted for
listening sockets via NLA.

There's a lokcing issue with this -- the unix sk state lock is
required to access the peer, and it is taken under the listening
sk's queue lock. Strictly speaking the queue lock should be taken
inside the state lock, but since in this case these two sockets
are different it shouldn't lead to deadlock.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/unix_diag.h |  2 ++
 net/unix/diag.c           | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 41 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/unix_diag.h b/include/linux/unix_diag.h
index 2d74a86024ac..03ffb7de15b6 100644
--- a/include/linux/unix_diag.h
+++ b/include/linux/unix_diag.h
@@ -14,6 +14,7 @@ struct unix_diag_req {
 #define UDIAG_SHOW_NAME		0x00000001	/* show name (not path) */
 #define UDIAG_SHOW_VFS		0x00000002	/* show VFS inode info */
 #define UDIAG_SHOW_PEER		0x00000004	/* show peer socket info */
+#define UDIAG_SHOW_ICONS	0x00000008	/* show pending connections */
 
 struct unix_diag_msg {
 	__u8	udiag_family;
@@ -29,6 +30,7 @@ enum {
 	UNIX_DIAG_NAME,
 	UNIX_DIAG_VFS,
 	UNIX_DIAG_PEER,
+	UNIX_DIAG_ICONS,
 
 	UNIX_DIAG_MAX,
 };
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 0e0fda786afe..24c7a65d9cb1 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -63,6 +63,41 @@ rtattr_failure:
 	return -EMSGSIZE;
 }
 
+static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb)
+{
+	struct sk_buff *skb;
+	u32 *buf;
+	int i;
+
+	if (sk->sk_state == TCP_LISTEN) {
+		spin_lock(&sk->sk_receive_queue.lock);
+		buf = UNIX_DIAG_PUT(nlskb, UNIX_DIAG_ICONS, sk->sk_receive_queue.qlen);
+		i = 0;
+		skb_queue_walk(&sk->sk_receive_queue, skb) {
+			struct sock *req, *peer;
+
+			req = skb->sk;
+			/*
+			 * The state lock is outer for the same sk's
+			 * queue lock. With the other's queue locked it's
+			 * OK to lock the state.
+			 */
+			unix_state_lock_nested(req);
+			peer = unix_sk(req)->peer;
+			if (peer)
+				buf[i++] = sock_i_ino(peer);
+			unix_state_unlock(req);
+		}
+		spin_unlock(&sk->sk_receive_queue.lock);
+	}
+
+	return 0;
+
+rtattr_failure:
+	spin_unlock(&sk->sk_receive_queue.lock);
+	return -EMSGSIZE;
+}
+
 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req,
 		u32 pid, u32 seq, u32 flags, int sk_ino)
 {
@@ -93,6 +128,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
 			sk_diag_dump_peer(sk, skb))
 		goto nlmsg_failure;
 
+	if ((req->udiag_show & UDIAG_SHOW_ICONS) &&
+			sk_diag_dump_icons(sk, skb))
+		goto nlmsg_failure;
+
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
-- 
cgit v1.2.3


From cbf391958afb9b82c72324a15891eb3102200085 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Thu, 15 Dec 2011 02:46:31 +0000
Subject: unix_diag: Receive queue lenght NLA

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/unix_diag.h |  2 ++
 net/unix/diag.c           | 13 +++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/unix_diag.h b/include/linux/unix_diag.h
index 03ffb7de15b6..3f7afb007d70 100644
--- a/include/linux/unix_diag.h
+++ b/include/linux/unix_diag.h
@@ -15,6 +15,7 @@ struct unix_diag_req {
 #define UDIAG_SHOW_VFS		0x00000002	/* show VFS inode info */
 #define UDIAG_SHOW_PEER		0x00000004	/* show peer socket info */
 #define UDIAG_SHOW_ICONS	0x00000008	/* show pending connections */
+#define UDIAG_SHOW_RQLEN	0x00000010	/* show skb receive queue len */
 
 struct unix_diag_msg {
 	__u8	udiag_family;
@@ -31,6 +32,7 @@ enum {
 	UNIX_DIAG_VFS,
 	UNIX_DIAG_PEER,
 	UNIX_DIAG_ICONS,
+	UNIX_DIAG_RQLEN,
 
 	UNIX_DIAG_MAX,
 };
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 24c7a65d9cb1..a5c4aab0380d 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -98,6 +98,15 @@ rtattr_failure:
 	return -EMSGSIZE;
 }
 
+static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb)
+{
+	RTA_PUT_U32(nlskb, UNIX_DIAG_RQLEN, sk->sk_receive_queue.qlen);
+	return 0;
+
+rtattr_failure:
+	return -EMSGSIZE;
+}
+
 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req,
 		u32 pid, u32 seq, u32 flags, int sk_ino)
 {
@@ -132,6 +141,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
 			sk_diag_dump_icons(sk, skb))
 		goto nlmsg_failure;
 
+	if ((req->udiag_show & UDIAG_SHOW_RQLEN) &&
+			sk_diag_show_rqlen(sk, skb))
+		goto nlmsg_failure;
+
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
 	return skb->len;
 
-- 
cgit v1.2.3


From 14596f7006297b67516e2b6a2b26bcb11fe08fb3 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 15 Dec 2011 13:51:16 +0000
Subject: ethtool: Clarify use of size field for ETHTOOL_GRXFHINDIR

In order to find out the device's RX flow hash table size, ethtool
initially uses ETHTOOL_GRXFHINDIR with a buffer size of zero.  This
must be supported, but it is not necessary to support any other user
buffer size less than the device table size.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 20db5b275c3f..0ec2fd412d03 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -543,8 +543,9 @@ struct compat_ethtool_rxnfc {
 /**
  * struct ethtool_rxfh_indir - command to get or set RX flow hash indirection
  * @cmd: Specific command number - %ETHTOOL_GRXFHINDIR or %ETHTOOL_SRXFHINDIR
- * @size: On entry, the array size of the user buffer.  On return from
- *	%ETHTOOL_GRXFHINDIR, the array size of the hardware indirection table.
+ * @size: On entry, the array size of the user buffer, which may be zero
+ *	for %ETHTOOL_GRXFHINDIR.  On return from %ETHTOOL_GRXFHINDIR, the
+ *	array size of the hardware indirection table.
  * @ring_index: RX ring/queue index for each hash value
  */
 struct ethtool_rxfh_indir {
-- 
cgit v1.2.3


From 7850f63f1620512631445b901ae11cd149e7375c Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 15 Dec 2011 13:55:01 +0000
Subject: ethtool: Centralise validation of ETHTOOL_{G, S}RXFHINDIR parameters

Add a new ethtool operation (get_rxfh_indir_size) to get the
indirectional table size.  Use this to validate the user buffer size
before calling get_rxfh_indir or set_rxfh_indir.  Use get_rxnfc to get
the number of RX rings, and validate the contents of the new
indirection table before calling set_rxfh_indir.  Remove this
validation from drivers.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Acked-by: Dimitris Michailidis <dm@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c    | 39 ++++-------
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c    | 27 ++++----
 drivers/net/ethernet/sfc/ethtool.c                 | 35 ++++------
 drivers/net/vmxnet3/vmxnet3_ethtool.c              | 35 ++++------
 include/linux/ethtool.h                            | 11 +--
 net/core/ethtool.c                                 | 81 +++++++++++++++-------
 6 files changed, 117 insertions(+), 111 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index 90d44af85600..a688b9d975a2 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -2302,18 +2302,20 @@ static int bnx2x_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
 	}
 }
 
-static int bnx2x_get_rxfh_indir(struct net_device *dev,
-				struct ethtool_rxfh_indir *indir)
+static u32 bnx2x_get_rxfh_indir_size(struct net_device *dev)
+{
+	struct bnx2x *bp = netdev_priv(dev);
+
+	return (bp->multi_mode == ETH_RSS_MODE_DISABLED ?
+		0 : T_ETH_INDIRECTION_TABLE_SIZE);
+}
+
+static int bnx2x_get_rxfh_indir(struct net_device *dev, u32 *indir)
 {
 	struct bnx2x *bp = netdev_priv(dev);
-	size_t copy_size =
-		min_t(size_t, indir->size, T_ETH_INDIRECTION_TABLE_SIZE);
 	u8 ind_table[T_ETH_INDIRECTION_TABLE_SIZE] = {0};
 	size_t i;
 
-	if (bp->multi_mode == ETH_RSS_MODE_DISABLED)
-		return -EOPNOTSUPP;
-
 	/* Get the current configuration of the RSS indirection table */
 	bnx2x_get_rss_ind_table(&bp->rss_conf_obj, ind_table);
 
@@ -2326,33 +2328,19 @@ static int bnx2x_get_rxfh_indir(struct net_device *dev,
 	 * align the returned table to the Client ID of the leading RSS
 	 * queue.
 	 */
-	for (i = 0; i < copy_size; i++)
-		indir->ring_index[i] = ind_table[i] - bp->fp->cl_id;
-
-	indir->size = T_ETH_INDIRECTION_TABLE_SIZE;
+	for (i = 0; i < T_ETH_INDIRECTION_TABLE_SIZE; i++)
+		indir[i] = ind_table[i] - bp->fp->cl_id;
 
 	return 0;
 }
 
-static int bnx2x_set_rxfh_indir(struct net_device *dev,
-				const struct ethtool_rxfh_indir *indir)
+static int bnx2x_set_rxfh_indir(struct net_device *dev, const u32 *indir)
 {
 	struct bnx2x *bp = netdev_priv(dev);
 	size_t i;
 	u8 ind_table[T_ETH_INDIRECTION_TABLE_SIZE] = {0};
-	u32 num_eth_queues = BNX2X_NUM_ETH_QUEUES(bp);
-
-	if (bp->multi_mode == ETH_RSS_MODE_DISABLED)
-		return -EOPNOTSUPP;
-
-	/* validate the size */
-	if (indir->size != T_ETH_INDIRECTION_TABLE_SIZE)
-		return -EINVAL;
 
 	for (i = 0; i < T_ETH_INDIRECTION_TABLE_SIZE; i++) {
-		/* validate the indices */
-		if (indir->ring_index[i] >= num_eth_queues)
-			return -EINVAL;
 		/*
 		 * The same as in bnx2x_get_rxfh_indir: we can't use a memcpy()
 		 * as an internal storage of an indirection table is a u8 array
@@ -2362,7 +2350,7 @@ static int bnx2x_set_rxfh_indir(struct net_device *dev,
 		 * align the received table to the Client ID of the leading RSS
 		 * queue
 		 */
-		ind_table[i] = indir->ring_index[i] + bp->fp->cl_id;
+		ind_table[i] = indir[i] + bp->fp->cl_id;
 	}
 
 	return bnx2x_config_rss_pf(bp, ind_table, false);
@@ -2395,6 +2383,7 @@ static const struct ethtool_ops bnx2x_ethtool_ops = {
 	.set_phys_id		= bnx2x_set_phys_id,
 	.get_ethtool_stats	= bnx2x_get_ethtool_stats,
 	.get_rxnfc		= bnx2x_get_rxnfc,
+	.get_rxfh_indir_size	= bnx2x_get_rxfh_indir_size,
 	.get_rxfh_indir		= bnx2x_get_rxfh_indir,
 	.set_rxfh_indir		= bnx2x_set_rxfh_indir,
 };
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index a34e7ce7e214..8ffd55bdef3d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -1871,30 +1871,30 @@ static int cxgb_set_features(struct net_device *dev, netdev_features_t features)
 	return err;
 }
 
-static int get_rss_table(struct net_device *dev, struct ethtool_rxfh_indir *p)
+static u32 get_rss_table_size(struct net_device *dev)
 {
 	const struct port_info *pi = netdev_priv(dev);
-	unsigned int n = min_t(unsigned int, p->size, pi->rss_size);
 
-	p->size = pi->rss_size;
+	return pi->rss_size;
+}
+
+static int get_rss_table(struct net_device *dev, u32 *p)
+{
+	const struct port_info *pi = netdev_priv(dev);
+	unsigned int n = pi->rss_size;
+
 	while (n--)
-		p->ring_index[n] = pi->rss[n];
+		p[n] = pi->rss[n];
 	return 0;
 }
 
-static int set_rss_table(struct net_device *dev,
-			 const struct ethtool_rxfh_indir *p)
+static int set_rss_table(struct net_device *dev, const u32 *p)
 {
 	unsigned int i;
 	struct port_info *pi = netdev_priv(dev);
 
-	if (p->size != pi->rss_size)
-		return -EINVAL;
-	for (i = 0; i < p->size; i++)
-		if (p->ring_index[i] >= pi->nqsets)
-			return -EINVAL;
-	for (i = 0; i < p->size; i++)
-		pi->rss[i] = p->ring_index[i];
+	for (i = 0; i < pi->rss_size; i++)
+		pi->rss[i] = p[i];
 	if (pi->adapter->flags & FULL_INIT_DONE)
 		return write_rss(pi, pi->rss);
 	return 0;
@@ -1989,6 +1989,7 @@ static struct ethtool_ops cxgb_ethtool_ops = {
 	.get_wol           = get_wol,
 	.set_wol           = set_wol,
 	.get_rxnfc         = get_rxnfc,
+	.get_rxfh_indir_size = get_rss_table_size,
 	.get_rxfh_indir    = get_rss_table,
 	.set_rxfh_indir    = set_rss_table,
 	.flash_device      = set_flash,
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index f3cd96dfa398..1be51b2bfa42 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -956,40 +956,28 @@ static int efx_ethtool_set_rx_ntuple(struct net_device *net_dev,
 	return rc < 0 ? rc : 0;
 }
 
-static int efx_ethtool_get_rxfh_indir(struct net_device *net_dev,
-				      struct ethtool_rxfh_indir *indir)
+static u32 efx_ethtool_get_rxfh_indir_size(struct net_device *net_dev)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
-	size_t copy_size =
-		min_t(size_t, indir->size, ARRAY_SIZE(efx->rx_indir_table));
 
-	if (efx_nic_rev(efx) < EFX_REV_FALCON_B0)
-		return -EOPNOTSUPP;
+	return (efx_nic_rev(efx) < EFX_REV_FALCON_B0 ?
+		0 : ARRAY_SIZE(efx->rx_indir_table));
+}
+
+static int efx_ethtool_get_rxfh_indir(struct net_device *net_dev, u32 *indir)
+{
+	struct efx_nic *efx = netdev_priv(net_dev);
 
-	indir->size = ARRAY_SIZE(efx->rx_indir_table);
-	memcpy(indir->ring_index, efx->rx_indir_table,
-	       copy_size * sizeof(indir->ring_index[0]));
+	memcpy(indir, efx->rx_indir_table, sizeof(efx->rx_indir_table));
 	return 0;
 }
 
 static int efx_ethtool_set_rxfh_indir(struct net_device *net_dev,
-				      const struct ethtool_rxfh_indir *indir)
+				      const u32 *indir)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
-	size_t i;
-
-	if (efx_nic_rev(efx) < EFX_REV_FALCON_B0)
-		return -EOPNOTSUPP;
-
-	/* Validate size and indices */
-	if (indir->size != ARRAY_SIZE(efx->rx_indir_table))
-		return -EINVAL;
-	for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++)
-		if (indir->ring_index[i] >= efx->n_rx_channels)
-			return -EINVAL;
 
-	memcpy(efx->rx_indir_table, indir->ring_index,
-	       sizeof(efx->rx_indir_table));
+	memcpy(efx->rx_indir_table, indir, sizeof(efx->rx_indir_table));
 	efx_nic_push_rx_indir_table(efx);
 	return 0;
 }
@@ -1020,6 +1008,7 @@ const struct ethtool_ops efx_ethtool_ops = {
 	.reset			= efx_ethtool_reset,
 	.get_rxnfc		= efx_ethtool_get_rxnfc,
 	.set_rx_ntuple		= efx_ethtool_set_rx_ntuple,
+	.get_rxfh_indir_size	= efx_ethtool_get_rxfh_indir_size,
 	.get_rxfh_indir		= efx_ethtool_get_rxfh_indir,
 	.set_rxfh_indir		= efx_ethtool_set_rxfh_indir,
 };
diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c
index b492ee1e5f17..a3eb75a62ea9 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethtool.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c
@@ -565,44 +565,38 @@ vmxnet3_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info,
 }
 
 #ifdef VMXNET3_RSS
+static u32
+vmxnet3_get_rss_indir_size(struct net_device *netdev)
+{
+	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
+	struct UPT1_RSSConf *rssConf = adapter->rss_conf;
+
+	return rssConf->indTableSize;
+}
+
 static int
-vmxnet3_get_rss_indir(struct net_device *netdev,
-		      struct ethtool_rxfh_indir *p)
+vmxnet3_get_rss_indir(struct net_device *netdev, u32 *p)
 {
 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
 	struct UPT1_RSSConf *rssConf = adapter->rss_conf;
-	unsigned int n = min_t(unsigned int, p->size, rssConf->indTableSize);
+	unsigned int n = rssConf->indTableSize;
 
-	p->size = rssConf->indTableSize;
 	while (n--)
-		p->ring_index[n] = rssConf->indTable[n];
+		p[n] = rssConf->indTable[n];
 	return 0;
 
 }
 
 static int
-vmxnet3_set_rss_indir(struct net_device *netdev,
-		      const struct ethtool_rxfh_indir *p)
+vmxnet3_set_rss_indir(struct net_device *netdev, const u32 *p)
 {
 	unsigned int i;
 	unsigned long flags;
 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
 	struct UPT1_RSSConf *rssConf = adapter->rss_conf;
 
-	if (p->size != rssConf->indTableSize)
-		return -EINVAL;
-	for (i = 0; i < rssConf->indTableSize; i++) {
-		/*
-		 * Return with error code if any of the queue indices
-		 * is out of range
-		 */
-		if (p->ring_index[i] < 0 ||
-		    p->ring_index[i] >= adapter->num_rx_queues)
-			return -EINVAL;
-	}
-
 	for (i = 0; i < rssConf->indTableSize; i++)
-		rssConf->indTable[i] = p->ring_index[i];
+		rssConf->indTable[i] = p[i];
 
 	spin_lock_irqsave(&adapter->cmd_lock, flags);
 	VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
@@ -629,6 +623,7 @@ static struct ethtool_ops vmxnet3_ethtool_ops = {
 	.set_ringparam     = vmxnet3_set_ringparam,
 	.get_rxnfc         = vmxnet3_get_rxnfc,
 #ifdef VMXNET3_RSS
+	.get_rxfh_indir_size = vmxnet3_get_rss_indir_size,
 	.get_rxfh_indir    = vmxnet3_get_rss_indir,
 	.set_rxfh_indir    = vmxnet3_set_rss_indir,
 #endif
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 0ec2fd412d03..3b9f09d55b5c 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -828,9 +828,13 @@ u32 ethtool_op_get_link(struct net_device *dev);
  *	error code or zero.
  * @set_rx_ntuple: Set an RX n-tuple rule.  Returns a negative error code
  *	or zero.
+ * @get_rxfh_indir_size: Get the size of the RX flow hash indirection table.
+ *	Returns zero if not supported for this specific device.
  * @get_rxfh_indir: Get the contents of the RX flow hash indirection table.
+ *	Will not be called if @get_rxfh_indir_size returns zero.
  *	Returns a negative error code or zero.
  * @set_rxfh_indir: Set the contents of the RX flow hash indirection table.
+ *	Will not be called if @get_rxfh_indir_size returns zero.
  *	Returns a negative error code or zero.
  * @get_channels: Get number of channels.
  * @set_channels: Set number of channels.  Returns a negative error code or
@@ -894,10 +898,9 @@ struct ethtool_ops {
 	int	(*reset)(struct net_device *, u32 *);
 	int	(*set_rx_ntuple)(struct net_device *,
 				 struct ethtool_rx_ntuple *);
-	int	(*get_rxfh_indir)(struct net_device *,
-				  struct ethtool_rxfh_indir *);
-	int	(*set_rxfh_indir)(struct net_device *,
-				  const struct ethtool_rxfh_indir *);
+	u32	(*get_rxfh_indir_size)(struct net_device *);
+	int	(*get_rxfh_indir)(struct net_device *, u32 *);
+	int	(*set_rxfh_indir)(struct net_device *, const u32 *);
 	void	(*get_channels)(struct net_device *, struct ethtool_channels *);
 	int	(*set_channels)(struct net_device *, struct ethtool_channels *);
 	int	(*get_dump_flag)(struct net_device *, struct ethtool_dump *);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 31b0b7f5383e..69f71b86b035 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -515,34 +515,44 @@ err_out:
 static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
 						     void __user *useraddr)
 {
-	struct ethtool_rxfh_indir *indir;
-	u32 table_size;
-	size_t full_size;
+	u32 user_size, dev_size;
+	u32 *indir;
 	int ret;
 
-	if (!dev->ethtool_ops->get_rxfh_indir)
+	if (!dev->ethtool_ops->get_rxfh_indir_size ||
+	    !dev->ethtool_ops->get_rxfh_indir)
+		return -EOPNOTSUPP;
+	dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
+	if (dev_size == 0)
 		return -EOPNOTSUPP;
 
-	if (copy_from_user(&table_size,
+	if (copy_from_user(&user_size,
 			   useraddr + offsetof(struct ethtool_rxfh_indir, size),
-			   sizeof(table_size)))
+			   sizeof(user_size)))
 		return -EFAULT;
 
-	if (table_size >
-	    (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index))
-		return -ENOMEM;
-	full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size;
-	indir = kzalloc(full_size, GFP_USER);
+	if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh_indir, size),
+			 &dev_size, sizeof(dev_size)))
+		return -EFAULT;
+
+	/* If the user buffer size is 0, this is just a query for the
+	 * device table size.  Otherwise, if it's smaller than the
+	 * device table size it's an error.
+	 */
+	if (user_size < dev_size)
+		return user_size == 0 ? 0 : -EINVAL;
+
+	indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER);
 	if (!indir)
 		return -ENOMEM;
 
-	indir->cmd = ETHTOOL_GRXFHINDIR;
-	indir->size = table_size;
 	ret = dev->ethtool_ops->get_rxfh_indir(dev, indir);
 	if (ret)
 		goto out;
 
-	if (copy_to_user(useraddr, indir, full_size))
+	if (copy_to_user(useraddr +
+			 offsetof(struct ethtool_rxfh_indir, ring_index[0]),
+			 indir, dev_size * sizeof(indir[0])))
 		ret = -EFAULT;
 
 out:
@@ -553,32 +563,51 @@ out:
 static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
 						     void __user *useraddr)
 {
-	struct ethtool_rxfh_indir *indir;
-	u32 table_size;
-	size_t full_size;
+	struct ethtool_rxnfc rx_rings;
+	u32 user_size, dev_size, i;
+	u32 *indir;
 	int ret;
 
-	if (!dev->ethtool_ops->set_rxfh_indir)
+	if (!dev->ethtool_ops->get_rxfh_indir_size ||
+	    !dev->ethtool_ops->set_rxfh_indir ||
+	    !dev->ethtool_ops->get_rxnfc)
+		return -EOPNOTSUPP;
+	dev_size = dev->ethtool_ops->get_rxfh_indir_size(dev);
+	if (dev_size == 0)
 		return -EOPNOTSUPP;
 
-	if (copy_from_user(&table_size,
+	if (copy_from_user(&user_size,
 			   useraddr + offsetof(struct ethtool_rxfh_indir, size),
-			   sizeof(table_size)))
+			   sizeof(user_size)))
 		return -EFAULT;
 
-	if (table_size >
-	    (KMALLOC_MAX_SIZE - sizeof(*indir)) / sizeof(*indir->ring_index))
-		return -ENOMEM;
-	full_size = sizeof(*indir) + sizeof(*indir->ring_index) * table_size;
-	indir = kmalloc(full_size, GFP_USER);
+	if (user_size != dev_size)
+		return -EINVAL;
+
+	indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER);
 	if (!indir)
 		return -ENOMEM;
 
-	if (copy_from_user(indir, useraddr, full_size)) {
+	if (copy_from_user(indir,
+			   useraddr +
+			   offsetof(struct ethtool_rxfh_indir, ring_index[0]),
+			   dev_size * sizeof(indir[0]))) {
 		ret = -EFAULT;
 		goto out;
 	}
 
+	/* Validate ring indices */
+	rx_rings.cmd = ETHTOOL_GRXRINGS;
+	ret = dev->ethtool_ops->get_rxnfc(dev, &rx_rings, NULL);
+	if (ret)
+		goto out;
+	for (i = 0; i < dev_size; i++) {
+		if (indir[i] >= rx_rings.data) {
+			ret = -EINVAL;
+			goto out;
+		}
+	}
+
 	ret = dev->ethtool_ops->set_rxfh_indir(dev, indir);
 
 out:
-- 
cgit v1.2.3


From 278bc4296bd64ffd1d3913b487dc8a520e423a7a Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Thu, 15 Dec 2011 13:56:49 +0000
Subject: ethtool: Define and apply a default policy for RX flow hash
 indirection

All drivers that support modification of the RX flow hash indirection
table initialise it in the same way: RX rings are assigned to table
entries in rotation.  Make that default policy explicit by having them
call a ethtool_rxfh_indir_default() function.

In the ethtool core, add support for a zero size value for
ETHTOOL_SRXFHINDIR, which resets the table to this default.

Partly-suggested-by: Matt Carlson <mcarlson@broadcom.com>
Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Acked-by: Shreyas N Bhatewara <sbhatewara@vmware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c |  3 ++-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c |  2 +-
 drivers/net/ethernet/sfc/efx.c                  |  3 ++-
 drivers/net/vmxnet3/vmxnet3_drv.c               |  3 ++-
 include/linux/ethtool.h                         | 23 ++++++++++++++---
 net/core/ethtool.c                              | 33 +++++++++++++++----------
 6 files changed, 47 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 64f5cf5c68d1..2b731b253598 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -1545,7 +1545,8 @@ static inline int bnx2x_init_rss_pf(struct bnx2x *bp)
 	if (bp->multi_mode != ETH_RSS_MODE_DISABLED) {
 		for (i = 0; i < sizeof(ind_table); i++)
 			ind_table[i] =
-				bp->fp->cl_id +	(i % num_eth_queues);
+				bp->fp->cl_id +
+				ethtool_rxfh_indir_default(i, num_eth_queues);
 	}
 
 	/*
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 8ffd55bdef3d..fccbe490c7f0 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -3449,7 +3449,7 @@ static int __devinit init_rss(struct adapter *adap)
 		if (!pi->rss)
 			return -ENOMEM;
 		for (j = 0; j < pi->rss_size; j++)
-			pi->rss[j] = j % pi->nqsets;
+			pi->rss[j] = ethtool_rxfh_indir_default(j, pi->nqsets);
 	}
 	return 0;
 }
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 14e134d3b4d7..44a82c6c60a7 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -1336,7 +1336,8 @@ static int efx_probe_nic(struct efx_nic *efx)
 	if (efx->n_channels > 1)
 		get_random_bytes(&efx->rx_hash_key, sizeof(efx->rx_hash_key));
 	for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++)
-		efx->rx_indir_table[i] = i % efx->n_rx_channels;
+		efx->rx_indir_table[i] =
+			ethtool_rxfh_indir_default(i, efx->n_rx_channels);
 
 	efx_set_channels(efx);
 	netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels);
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 1c2ae11a9e35..de7fc345148a 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -2167,7 +2167,8 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
 		rssConf->indTableSize = VMXNET3_RSS_IND_TABLE_SIZE;
 		get_random_bytes(&rssConf->hashKey[0], rssConf->hashKeySize);
 		for (i = 0; i < rssConf->indTableSize; i++)
-			rssConf->indTable[i] = i % adapter->num_rx_queues;
+			rssConf->indTable[i] = ethtool_rxfh_indir_default(
+				i, adapter->num_rx_queues);
 
 		devRead->rssConfDesc.confVer = 1;
 		devRead->rssConfDesc.confLen = sizeof(*rssConf);
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index 3b9f09d55b5c..b38bf69310ee 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -543,10 +543,15 @@ struct compat_ethtool_rxnfc {
 /**
  * struct ethtool_rxfh_indir - command to get or set RX flow hash indirection
  * @cmd: Specific command number - %ETHTOOL_GRXFHINDIR or %ETHTOOL_SRXFHINDIR
- * @size: On entry, the array size of the user buffer, which may be zero
- *	for %ETHTOOL_GRXFHINDIR.  On return from %ETHTOOL_GRXFHINDIR, the
- *	array size of the hardware indirection table.
+ * @size: On entry, the array size of the user buffer, which may be zero.
+ *	On return from %ETHTOOL_GRXFHINDIR, the array size of the hardware
+ *	indirection table.
  * @ring_index: RX ring/queue index for each hash value
+ *
+ * For %ETHTOOL_GRXFHINDIR, a @size of zero means that only the size
+ * should be returned.  For %ETHTOOL_SRXFHINDIR, a @size of zero means
+ * the table should be reset to default values.  This last feature
+ * is not supported by the original implementations.
  */
 struct ethtool_rxfh_indir {
 	__u32	cmd;
@@ -749,6 +754,18 @@ struct net_device;
 /* Some generic methods drivers may use in their ethtool_ops */
 u32 ethtool_op_get_link(struct net_device *dev);
 
+/**
+ * ethtool_rxfh_indir_default - get default value for RX flow hash indirection
+ * @index: Index in RX flow hash indirection table
+ * @n_rx_rings: Number of RX rings to use
+ *
+ * This function provides the default policy for RX flow hash indirection.
+ */
+static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings)
+{
+	return index % n_rx_rings;
+}
+
 /**
  * struct ethtool_ops - optional netdev operations
  * @get_settings: Get various device settings including Ethernet link
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 69f71b86b035..597732c989ca 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -581,31 +581,38 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
 			   sizeof(user_size)))
 		return -EFAULT;
 
-	if (user_size != dev_size)
+	if (user_size != 0 && user_size != dev_size)
 		return -EINVAL;
 
 	indir = kcalloc(dev_size, sizeof(indir[0]), GFP_USER);
 	if (!indir)
 		return -ENOMEM;
 
-	if (copy_from_user(indir,
-			   useraddr +
-			   offsetof(struct ethtool_rxfh_indir, ring_index[0]),
-			   dev_size * sizeof(indir[0]))) {
-		ret = -EFAULT;
-		goto out;
-	}
-
-	/* Validate ring indices */
 	rx_rings.cmd = ETHTOOL_GRXRINGS;
 	ret = dev->ethtool_ops->get_rxnfc(dev, &rx_rings, NULL);
 	if (ret)
 		goto out;
-	for (i = 0; i < dev_size; i++) {
-		if (indir[i] >= rx_rings.data) {
-			ret = -EINVAL;
+
+	if (user_size == 0) {
+		for (i = 0; i < dev_size; i++)
+			indir[i] = ethtool_rxfh_indir_default(i, rx_rings.data);
+	} else {
+		if (copy_from_user(indir,
+				  useraddr +
+				  offsetof(struct ethtool_rxfh_indir,
+					   ring_index[0]),
+				  dev_size * sizeof(indir[0]))) {
+			ret = -EFAULT;
 			goto out;
 		}
+
+		/* Validate ring indices */
+		for (i = 0; i < dev_size; i++) {
+			if (indir[i] >= rx_rings.data) {
+				ret = -EINVAL;
+				goto out;
+			}
+		}
 	}
 
 	ret = dev->ethtool_ops->set_rxfh_indir(dev, indir);
-- 
cgit v1.2.3


From a85e1d55974646a442d95911e3f7d7a891ea9ac5 Mon Sep 17 00:00:00 2001
From: Paul Stewart <pstew@chromium.org>
Date: Fri, 9 Dec 2011 11:01:49 -0800
Subject: cfg80211: Return beacon loss count in station

If station info contains a beacon loss count, return
it to userspace.

Signed-off-by: Paul Stewart <pstew@chromium.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 2 ++
 include/net/cfg80211.h  | 7 ++++++-
 net/mac80211/cfg.c      | 4 +++-
 net/mac80211/mlme.c     | 8 ++++++++
 net/mac80211/sta_info.h | 2 ++
 net/wireless/nl80211.c  | 3 +++
 6 files changed, 24 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index f795cb7dccdd..0f5ff3739820 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1655,6 +1655,7 @@ enum nl80211_sta_bss_param {
  *     containing info as possible, see &enum nl80211_sta_bss_param
  * @NL80211_STA_INFO_CONNECTED_TIME: time since the station is last connected
  * @NL80211_STA_INFO_STA_FLAGS: Contains a struct nl80211_sta_flag_update.
+ * @NL80211_STA_INFO_BEACON_LOSS: count of times beacon loss was detected (u32)
  * @__NL80211_STA_INFO_AFTER_LAST: internal
  * @NL80211_STA_INFO_MAX: highest possible station info attribute
  */
@@ -1677,6 +1678,7 @@ enum nl80211_sta_info {
 	NL80211_STA_INFO_BSS_PARAM,
 	NL80211_STA_INFO_CONNECTED_TIME,
 	NL80211_STA_INFO_STA_FLAGS,
+	NL80211_STA_INFO_BEACON_LOSS,
 
 	/* keep last */
 	__NL80211_STA_INFO_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 9f85fca0b676..15f4be7d768e 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -505,6 +505,7 @@ struct station_parameters {
  * @STATION_INFO_CONNECTED_TIME: @connected_time filled
  * @STATION_INFO_ASSOC_REQ_IES: @assoc_req_ies filled
  * @STATION_INFO_STA_FLAGS: @sta_flags filled
+ * @STATION_INFO_BEACON_LOSS_COUNT: @beacon_loss_count filled
  */
 enum station_info_flags {
 	STATION_INFO_INACTIVE_TIME	= 1<<0,
@@ -525,7 +526,8 @@ enum station_info_flags {
 	STATION_INFO_BSS_PARAM          = 1<<15,
 	STATION_INFO_CONNECTED_TIME	= 1<<16,
 	STATION_INFO_ASSOC_REQ_IES	= 1<<17,
-	STATION_INFO_STA_FLAGS		= 1<<18
+	STATION_INFO_STA_FLAGS		= 1<<18,
+	STATION_INFO_BEACON_LOSS_COUNT	= 1<<19
 };
 
 /**
@@ -623,6 +625,7 @@ struct sta_bss_parameters {
  *	the cfg80211_new_sta() calls to notify user space of the IEs.
  * @assoc_req_ies_len: Length of assoc_req_ies buffer in octets.
  * @sta_flags: station flags mask & values
+ * @beacon_loss_count: Number of times beacon loss event has triggered.
  */
 struct station_info {
 	u32 filled;
@@ -650,6 +653,8 @@ struct station_info {
 	const u8 *assoc_req_ies;
 	size_t assoc_req_ies_len;
 
+	u32 beacon_loss_count;
+
 	/*
 	 * Note: Add a new enum station_info_flags value for each new field and
 	 * use it to check which fields are initialized.
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 66ad9d9af87f..850bb96bd680 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -355,7 +355,8 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 			STATION_INFO_RX_DROP_MISC |
 			STATION_INFO_BSS_PARAM |
 			STATION_INFO_CONNECTED_TIME |
-			STATION_INFO_STA_FLAGS;
+			STATION_INFO_STA_FLAGS |
+			STATION_INFO_BEACON_LOSS_COUNT;
 
 	do_posix_clock_monotonic_gettime(&uptime);
 	sinfo->connected_time = uptime.tv_sec - sta->last_connected;
@@ -368,6 +369,7 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 	sinfo->tx_retries = sta->tx_retry_count;
 	sinfo->tx_failed = sta->tx_retry_failed;
 	sinfo->rx_dropped_misc = sta->rx_dropped;
+	sinfo->beacon_loss_count = sta->beacon_loss_count;
 
 	if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) ||
 	    (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) {
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index a984f1f60ddb..57989a046fca 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1381,6 +1381,14 @@ void ieee80211_beacon_connection_loss_work(struct work_struct *work)
 	struct ieee80211_sub_if_data *sdata =
 		container_of(work, struct ieee80211_sub_if_data,
 			     u.mgd.beacon_connection_loss_work);
+	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
+	struct sta_info *sta;
+
+	if (ifmgd->associated) {
+		sta = sta_info_get(sdata, ifmgd->bssid);
+		if (sta)
+			sta->beacon_loss_count++;
+	}
 
 	if (sdata->local->hw.flags & IEEE80211_HW_CONNECTION_MONITOR)
 		__ieee80211_connection_loss(sdata);
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index dee284290464..6f77f12dc3fc 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -275,6 +275,7 @@ struct sta_ampdu_mlme {
  *	EAP frames before association
  * @sta: station information we share with the driver
  * @sta_state: duplicates information about station state (for debug)
+ * @beacon_loss_count: number of times beacon loss has triggered
  */
 struct sta_info {
 	/* General information, mostly static */
@@ -367,6 +368,7 @@ struct sta_info {
 #endif
 
 	unsigned int lost_packets;
+	unsigned int beacon_loss_count;
 
 	/* should be right in front of sta to be in the same cache line */
 	bool dummy;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index b07c4fc4ae22..b3d3cf8931cb 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2390,6 +2390,9 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
 	if (sinfo->filled & STATION_INFO_TX_FAILED)
 		NLA_PUT_U32(msg, NL80211_STA_INFO_TX_FAILED,
 			    sinfo->tx_failed);
+	if (sinfo->filled & STATION_INFO_BEACON_LOSS_COUNT)
+		NLA_PUT_U32(msg, NL80211_STA_INFO_BEACON_LOSS,
+			    sinfo->beacon_loss_count);
 	if (sinfo->filled & STATION_INFO_BSS_PARAM) {
 		bss_param = nla_nest_start(msg, NL80211_STA_INFO_BSS_PARAM);
 		if (!bss_param)
-- 
cgit v1.2.3


From 58a60168d12c4e5be21c29420a3de4a41ef3470f Mon Sep 17 00:00:00 2001
From: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Date: Mon, 19 Dec 2011 04:00:26 +0000
Subject: mlx4: capability for link sensing

For ConnectX3 devices, we allow link sensing only if FW explicitly
reported it supports the feature.
For older versions (ConnectX1 and 2), if the card supports both link layer types
(Ethenet and Infiniband), link sensing is supported.

Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/main.c | 10 ++++++++--
 include/linux/mlx4/device.h               |  3 ++-
 2 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index b969bfb569e3..8f7314394cc2 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -274,6 +274,10 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev->caps.stat_rate_support  = dev_cap->stat_rate_support;
 	dev->caps.max_gso_sz	     = dev_cap->max_gso_sz;
 
+	/* Sense port always allowed on supported devices for ConnectX1 and 2 */
+	if (dev->pdev->device != 0x1003)
+		dev->caps.flags |= MLX4_DEV_CAP_FLAG_SENSE_SUPPORT;
+
 	dev->caps.log_num_macs  = log_num_mac;
 	dev->caps.log_num_vlans = MLX4_LOG_NUM_VLANS;
 	dev->caps.log_num_prios = use_prio ? 3 : 0;
@@ -311,7 +315,8 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		}
 		dev->caps.possible_type[i] = dev->caps.port_type[i];
 		mlx4_priv(dev)->sense.sense_allowed[i] =
-			dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO;
+			((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) &&
+			 (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT));
 
 		if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) {
 			dev->caps.log_num_macs = dev_cap->log_max_macs[i];
@@ -583,7 +588,8 @@ static ssize_t set_port_type(struct device *dev,
 			types[i] = mdev->caps.port_type[i+1];
 	}
 
-	if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP)) {
+	if (!(mdev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
+	    !(mdev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT)) {
 		for (i = 1; i <= mdev->caps.num_ports; i++) {
 			if (mdev->caps.possible_type[i] == MLX4_PORT_TYPE_AUTO) {
 				mdev->caps.possible_type[i] = mdev->caps.port_type[i];
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 5f784ff6a36e..b06a44ba1565 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -94,7 +94,8 @@ enum {
 	MLX4_DEV_CAP_FLAG_UDP_RSS	= 1LL << 40,
 	MLX4_DEV_CAP_FLAG_VEP_UC_STEER	= 1LL << 41,
 	MLX4_DEV_CAP_FLAG_VEP_MC_STEER	= 1LL << 42,
-	MLX4_DEV_CAP_FLAG_COUNTERS	= 1LL << 48
+	MLX4_DEV_CAP_FLAG_COUNTERS	= 1LL << 48,
+	MLX4_DEV_CAP_FLAG_SENSE_SUPPORT	= 1LL << 55
 };
 
 #define MLX4_ATTR_EXTENDED_PORT_INFO	cpu_to_be16(0xff90)
-- 
cgit v1.2.3


From 8d0fc7b61191c9433a4f738987b89e1d962eb637 Mon Sep 17 00:00:00 2001
From: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Date: Mon, 19 Dec 2011 04:00:34 +0000
Subject: mlx4_core: Changing link sensing logic

New FW can give clues to driver regarding default port type
and whether or not we should default to link sensing on the port.

2 bits are added to QUERY_PORT command:
1. suggested_type: This bit gives a hint whether the default port type should be
   IB or Ethernet.
   The driver will use this hint in case the user didn't specify explicitly the link layer
   type he wants to set.
2. default_sense: If this bit is set, we would sense the port type on start-up
   and default the port to link sensing

Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/fw.c   |  2 ++
 drivers/net/ethernet/mellanox/mlx4/fw.h   |  2 ++
 drivers/net/ethernet/mellanox/mlx4/main.c | 50 +++++++++++++++++++++----------
 include/linux/mlx4/device.h               |  2 ++
 4 files changed, 40 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index f03b54e0aa53..abefcc86e2d1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -577,6 +577,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 
 			MLX4_GET(field, outbox, QUERY_PORT_SUPPORTED_TYPE_OFFSET);
 			dev_cap->supported_port_types[i] = field & 3;
+			dev_cap->suggested_type[i] = (field >> 3) & 1;
+			dev_cap->default_sense[i] = (field >> 4) & 1;
 			MLX4_GET(field, outbox, QUERY_PORT_MTU_OFFSET);
 			dev_cap->ib_mtu[i]	   = field & 0xf;
 			MLX4_GET(field, outbox, QUERY_PORT_WIDTH_OFFSET);
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 3368363a8ec5..119e0cc9fab3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -111,6 +111,8 @@ struct mlx4_dev_cap {
 	u64 max_icm_sz;
 	int max_gso_sz;
 	u8  supported_port_types[MLX4_MAX_PORTS + 1];
+	u8  suggested_type[MLX4_MAX_PORTS + 1];
+	u8  default_sense[MLX4_MAX_PORTS + 1];
 	u8  log_max_macs[MLX4_MAX_PORTS + 1];
 	u8  log_max_vlans[MLX4_MAX_PORTS + 1];
 	u32 max_counters;
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 8f7314394cc2..e984ded2249f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -130,10 +130,11 @@ int log_mtts_per_seg = ilog2(MLX4_MTT_ENTRY_PER_SEG);
 module_param_named(log_mtts_per_seg, log_mtts_per_seg, int, 0444);
 MODULE_PARM_DESC(log_mtts_per_seg, "Log2 number of MTT entries per segment (1-7)");
 
-static int port_type_array[2] = {1, 1};
+static int port_type_array[2] = {MLX4_PORT_TYPE_NONE, MLX4_PORT_TYPE_NONE};
 static int arr_argc = 2;
 module_param_array(port_type_array, int, &arr_argc, 0444);
-MODULE_PARM_DESC(port_type_array, "Array of port types: IB by default");
+MODULE_PARM_DESC(port_type_array, "Array of port types: HW_DEFAULT (0) is default "
+				"1 for IB, 2 for Ethernet");
 
 struct mlx4_port_config {
 	struct list_head list;
@@ -225,6 +226,8 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		dev->caps.eth_mtu_cap[i]    = dev_cap->eth_mtu[i];
 		dev->caps.def_mac[i]        = dev_cap->def_mac[i];
 		dev->caps.supported_type[i] = dev_cap->supported_port_types[i];
+		dev->caps.suggested_type[i] = dev_cap->suggested_type[i];
+		dev->caps.default_sense[i] = dev_cap->default_sense[i];
 		dev->caps.trans_type[i]	    = dev_cap->trans_type[i];
 		dev->caps.vendor_oui[i]     = dev_cap->vendor_oui[i];
 		dev->caps.wavelength[i]     = dev_cap->wavelength[i];
@@ -302,22 +305,43 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 			 * first of all check if SRIOV is on */
 			} else if (dev->flags & MLX4_FLAG_SRIOV)
 				dev->caps.port_type[i] = MLX4_PORT_TYPE_ETH;
-			/* if IB and ETH are supported and SRIOV is off
-			 * use module parameters */
 			else {
-				if (port_type_array[i-1])
-					dev->caps.port_type[i] =
-						MLX4_PORT_TYPE_IB;
+				/* In non-SRIOV mode, we set the port type
+				 * according to user selection of port type,
+				 * if usere selected none, take the FW hint */
+				if (port_type_array[i-1] == MLX4_PORT_TYPE_NONE)
+					dev->caps.port_type[i] = dev->caps.suggested_type[i] ?
+						MLX4_PORT_TYPE_ETH : MLX4_PORT_TYPE_IB;
 				else
-					dev->caps.port_type[i] =
-						MLX4_PORT_TYPE_ETH;
+					dev->caps.port_type[i] = port_type_array[i-1];
 			}
 		}
-		dev->caps.possible_type[i] = dev->caps.port_type[i];
+		/*
+		 * Link sensing is allowed on the port if 3 conditions are true:
+		 * 1. Both protocols are supported on the port.
+		 * 2. Different types are supported on the port
+		 * 3. FW declared that it supports link sensing
+		 */
 		mlx4_priv(dev)->sense.sense_allowed[i] =
 			((dev->caps.supported_type[i] == MLX4_PORT_TYPE_AUTO) &&
+			 (dev->caps.flags & MLX4_DEV_CAP_FLAG_DPDP) &&
 			 (dev->caps.flags & MLX4_DEV_CAP_FLAG_SENSE_SUPPORT));
 
+		/*
+		 * If "default_sense" bit is set, we move the port to "AUTO" mode
+		 * and perform sense_port FW command to try and set the correct
+		 * port type from beginning
+		 */
+		if (mlx4_priv(dev)->sense.sense_allowed && dev->caps.default_sense[i]) {
+			enum mlx4_port_type sensed_port = MLX4_PORT_TYPE_NONE;
+			dev->caps.possible_type[i] = MLX4_PORT_TYPE_AUTO;
+			mlx4_SENSE_PORT(dev, i, &sensed_port);
+			if (sensed_port != MLX4_PORT_TYPE_NONE)
+				dev->caps.port_type[i] = sensed_port;
+		} else {
+			dev->caps.possible_type[i] = dev->caps.port_type[i];
+		}
+
 		if (dev->caps.log_num_macs > dev_cap->log_max_macs[i]) {
 			dev->caps.log_num_macs = dev_cap->log_max_macs[i];
 			mlx4_warn(dev, "Requested number of MACs is too much "
@@ -1329,12 +1353,6 @@ static int mlx4_setup_hca(struct mlx4_dev *dev)
 
 	if (!mlx4_is_slave(dev)) {
 		for (port = 1; port <= dev->caps.num_ports; port++) {
-			if (!mlx4_is_mfunc(dev)) {
-				enum mlx4_port_type port_type = 0;
-				mlx4_SENSE_PORT(dev, port, &port_type);
-				if (port_type)
-					dev->caps.port_type[port] = port_type;
-			}
 			ib_port_default_caps = 0;
 			err = mlx4_get_port_ib_caps(dev, port,
 						    &ib_port_default_caps);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index b06a44ba1565..5c4fe8e5bfe5 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -303,6 +303,8 @@ struct mlx4_caps {
 	int                     log_num_prios;
 	enum mlx4_port_type	port_type[MLX4_MAX_PORTS + 1];
 	u8			supported_type[MLX4_MAX_PORTS + 1];
+	u8                      suggested_type[MLX4_MAX_PORTS + 1];
+	u8                      default_sense[MLX4_MAX_PORTS + 1];
 	u32			port_mask[MLX4_MAX_PORTS + 1];
 	enum mlx4_port_type	possible_type[MLX4_MAX_PORTS + 1];
 	u32			max_counters;
-- 
cgit v1.2.3


From 447f219190bf0368b8b36cf60155744cb43510df Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 19 Dec 2011 15:04:41 -0500
Subject: Revert "net: Remove unused neighbour layer ops."

This reverts commit 5c3ddec73d01a1fae9409c197078cb02c42238c3.

S390 qeth driver actually still uses the setup ops.

Reported-by: Frank Blaschka <blaschka@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 include/net/neighbour.h   |  1 +
 net/core/neighbour.c      | 10 ++++++++++
 3 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 6b9d4edb7c26..603730804da5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -974,6 +974,7 @@ struct net_device_ops {
 	int			(*ndo_set_features)(struct net_device *dev,
 						    netdev_features_t features);
 	int			(*ndo_neigh_construct)(struct neighbour *n);
+	void			(*ndo_neigh_destroy)(struct neighbour *n);
 };
 
 /*
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 6814c4d61c1c..e31f0a86f9b7 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -43,6 +43,7 @@ struct neigh_parms {
 #endif
 	struct net_device *dev;
 	struct neigh_parms *next;
+	int	(*neigh_setup)(struct neighbour *);
 	void	(*neigh_cleanup)(struct neighbour *);
 	struct neigh_table *tbl;
 
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index d57a40a2598c..4af151e1bf5d 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -497,6 +497,13 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
 		}
 	}
 
+	/* Device specific setup. */
+	if (n->parms->neigh_setup &&
+	    (error = n->parms->neigh_setup(n)) < 0) {
+		rc = ERR_PTR(error);
+		goto out_neigh_release;
+	}
+
 	n->confirmed = jiffies - (n->parms->base_reachable_time << 1);
 
 	write_lock_bh(&tbl->lock);
@@ -710,6 +717,9 @@ void neigh_destroy(struct neighbour *neigh)
 	skb_queue_purge(&neigh->arp_queue);
 	neigh->arp_queue_len_bytes = 0;
 
+	if (dev->netdev_ops->ndo_neigh_destroy)
+		dev->netdev_ops->ndo_neigh_destroy(neigh);
+
 	dev_put(dev);
 	neigh_parms_put(neigh->parms);
 
-- 
cgit v1.2.3


From ab56222a32b9dbaae19c1d37f07b0ac4fc3c27ec Mon Sep 17 00:00:00 2001
From: Vijay Subramanian <subramanian.vijay@gmail.com>
Date: Tue, 20 Dec 2011 13:23:24 +0000
Subject: tcp: Replace constants with #define macros

to record the state of SACK/FACK and DSACK for better readability and maintenance.

Signed-off-by: Vijay Subramanian <subramanian.vijay@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h   | 5 +++++
 include/net/tcp.h     | 4 ++--
 net/ipv4/syncookies.c | 2 +-
 net/ipv4/tcp_input.c  | 6 +++---
 4 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 7f59ee946983..46a85c9e1f25 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -238,6 +238,11 @@ struct tcp_sack_block {
 	u32	end_seq;
 };
 
+/*These are used to set the sack_ok field in struct tcp_options_received */
+#define TCP_SACK_SEEN     (1 << 0)   /*1 = peer is SACK capable, */
+#define TCP_FACK_ENABLED  (1 << 1)   /*1 = FACK is enabled locally*/
+#define TCP_DSACK_SEEN    (1 << 2)   /*1 = DSACK was received from peer*/
+
 struct tcp_options_received {
 /*	PAWS/RTTM data	*/
 	long	ts_recent_stamp;/* Time we stored ts_recent (for aging) */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index a4f52e154843..0118ea999f67 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -773,12 +773,12 @@ static inline int tcp_is_reno(const struct tcp_sock *tp)
 
 static inline int tcp_is_fack(const struct tcp_sock *tp)
 {
-	return tp->rx_opt.sack_ok & 2;
+	return tp->rx_opt.sack_ok & TCP_FACK_ENABLED;
 }
 
 static inline void tcp_enable_fack(struct tcp_sock *tp)
 {
-	tp->rx_opt.sack_ok |= 2;
+	tp->rx_opt.sack_ok |= TCP_FACK_ENABLED;
 }
 
 static inline unsigned int tcp_left_out(const struct tcp_sock *tp)
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 90f6544c13e2..51fdbb490437 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -245,7 +245,7 @@ bool cookie_check_timestamp(struct tcp_options_received *tcp_opt, bool *ecn_ok)
 	if (!sysctl_tcp_timestamps)
 		return false;
 
-	tcp_opt->sack_ok = (options >> 4) & 0x1;
+	tcp_opt->sack_ok = (options & (1 << 4)) ? TCP_SACK_SEEN : 0;
 	*ecn_ok = (options >> 5) & 1;
 	if (*ecn_ok && !sysctl_tcp_ecn)
 		return false;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index f131d92d25ee..2877c3e09587 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -865,13 +865,13 @@ static void tcp_disable_fack(struct tcp_sock *tp)
 	/* RFC3517 uses different metric in lost marker => reset on change */
 	if (tcp_is_fack(tp))
 		tp->lost_skb_hint = NULL;
-	tp->rx_opt.sack_ok &= ~2;
+	tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED;
 }
 
 /* Take a notice that peer is sending D-SACKs */
 static void tcp_dsack_seen(struct tcp_sock *tp)
 {
-	tp->rx_opt.sack_ok |= 4;
+	tp->rx_opt.sack_ok |= TCP_DSACK_SEEN;
 }
 
 /* Initialize metrics on socket. */
@@ -3878,7 +3878,7 @@ void tcp_parse_options(const struct sk_buff *skb, struct tcp_options_received *o
 			case TCPOPT_SACK_PERM:
 				if (opsize == TCPOLEN_SACK_PERM && th->syn &&
 				    !estab && sysctl_tcp_sack) {
-					opt_rx->sack_ok = 1;
+					opt_rx->sack_ok = TCP_SACK_SEEN;
 					tcp_sack_reset(opt_rx);
 				}
 				break;
-- 
cgit v1.2.3


From 3d058d7bc2c5671ae630e0b463be8a69b5783fb9 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sun, 18 Dec 2011 01:55:54 +0100
Subject: netfilter: rework user-space expectation helper support

This partially reworks bc01befdcf3e40979eb518085a075cbf0aacede0
which added userspace expectation support.

This patch removes the nf_ct_userspace_expect_list since now we
force to use the new iptables CT target feature to add the helper
extension for conntracks that have attached expectations from
userspace.

A new version of the proof-of-concept code to implement userspace
helpers from userspace is available at:

http://people.netfilter.org/pablo/userspace-conntrack-helpers/nf-ftp-helper-POC.tar.bz2

This patch also modifies the CT target to allow to set the
conntrack's userspace helper status flags. This flag is used
to tell the conntrack system to explicitly allocate the helper
extension.

This helper extension is useful to link the userspace expectations
with the master conntrack that is being tracked from one userspace
helper.

This feature fixes a problem in the current approach of the
userspace helper support. Basically, if the master conntrack that
has got a userspace expectation vanishes, the expectations point to
one invalid memory address. Thus, triggering an oops in the
expectation deletion event path.

I decided not to add a new revision of the CT target because
I only needed to add a new flag for it. I'll document in this
issue in the iptables manpage. I have also changed the return
value from EINVAL to EOPNOTSUPP if one flag not supported is
specified. Thus, in the future adding new features that only
require a new flag can be added without a new revision.

There is no official code using this in userspace (apart from
the proof-of-concept) that uses this infrastructure but there
will be some by beginning 2012.

Reported-by: Sam Roberts <vieuxtech@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/nf_conntrack_common.h |  4 ++
 include/linux/netfilter/xt_CT.h               |  3 +-
 include/net/netfilter/nf_conntrack_expect.h   |  1 -
 net/netfilter/nf_conntrack_expect.c           | 63 +++++++++------------------
 net/netfilter/nf_conntrack_helper.c           | 12 +++++
 net/netfilter/nf_conntrack_netlink.c          |  5 ++-
 net/netfilter/xt_CT.c                         |  8 ++--
 7 files changed, 48 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index 0d3dd66322ec..9e3a2838291b 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -83,6 +83,10 @@ enum ip_conntrack_status {
 	/* Conntrack is a fake untracked entry */
 	IPS_UNTRACKED_BIT = 12,
 	IPS_UNTRACKED = (1 << IPS_UNTRACKED_BIT),
+
+	/* Conntrack has a userspace helper. */
+	IPS_USERSPACE_HELPER_BIT = 13,
+	IPS_USERSPACE_HELPER = (1 << IPS_USERSPACE_HELPER_BIT),
 };
 
 /* Connection tracking event types */
diff --git a/include/linux/netfilter/xt_CT.h b/include/linux/netfilter/xt_CT.h
index b56e76811c04..6390f0992f36 100644
--- a/include/linux/netfilter/xt_CT.h
+++ b/include/linux/netfilter/xt_CT.h
@@ -3,7 +3,8 @@
 
 #include <linux/types.h>
 
-#define XT_CT_NOTRACK	0x1
+#define XT_CT_NOTRACK		0x1
+#define XT_CT_USERSPACE_HELPER	0x2
 
 struct xt_ct_target_info {
 	__u16 flags;
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index 0f8a8c587532..4619caadd9d1 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -91,7 +91,6 @@ static inline void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
 
 void nf_ct_remove_expectations(struct nf_conn *ct);
 void nf_ct_unexpect_related(struct nf_conntrack_expect *exp);
-void nf_ct_remove_userspace_expectations(void);
 
 /* Allocate space for an expectation: this is mandatory before calling
    nf_ct_expect_related.  You will have to call put afterwards. */
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 340c80d968d4..bebb1675e6ff 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -38,8 +38,6 @@ unsigned int nf_ct_expect_max __read_mostly;
 
 static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
 
-static HLIST_HEAD(nf_ct_userspace_expect_list);
-
 /* nf_conntrack_expect helper functions */
 void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
 				u32 pid, int report)
@@ -47,14 +45,14 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
 	struct nf_conn_help *master_help = nfct_help(exp->master);
 	struct net *net = nf_ct_exp_net(exp);
 
+	NF_CT_ASSERT(master_help);
 	NF_CT_ASSERT(!timer_pending(&exp->timeout));
 
 	hlist_del_rcu(&exp->hnode);
 	net->ct.expect_count--;
 
 	hlist_del(&exp->lnode);
-	if (!(exp->flags & NF_CT_EXPECT_USERSPACE))
-		master_help->expecting[exp->class]--;
+	master_help->expecting[exp->class]--;
 
 	nf_ct_expect_event_report(IPEXP_DESTROY, exp, pid, report);
 	nf_ct_expect_put(exp);
@@ -314,37 +312,34 @@ void nf_ct_expect_put(struct nf_conntrack_expect *exp)
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_put);
 
-static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
+static int nf_ct_expect_insert(struct nf_conntrack_expect *exp)
 {
 	struct nf_conn_help *master_help = nfct_help(exp->master);
+	struct nf_conntrack_helper *helper;
 	struct net *net = nf_ct_exp_net(exp);
-	const struct nf_conntrack_expect_policy *p;
 	unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
 
 	/* two references : one for hash insert, one for the timer */
 	atomic_add(2, &exp->use);
 
-	if (master_help) {
-		hlist_add_head(&exp->lnode, &master_help->expectations);
-		master_help->expecting[exp->class]++;
-	} else if (exp->flags & NF_CT_EXPECT_USERSPACE)
-		hlist_add_head(&exp->lnode, &nf_ct_userspace_expect_list);
+	hlist_add_head(&exp->lnode, &master_help->expectations);
+	master_help->expecting[exp->class]++;
 
 	hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
 	net->ct.expect_count++;
 
 	setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
 		    (unsigned long)exp);
-	if (master_help) {
-		p = &rcu_dereference_protected(
-				master_help->helper,
-				lockdep_is_held(&nf_conntrack_lock)
-				)->expect_policy[exp->class];
-		exp->timeout.expires = jiffies + p->timeout * HZ;
+	helper = rcu_dereference_protected(master_help->helper,
+					   lockdep_is_held(&nf_conntrack_lock));
+	if (helper) {
+		exp->timeout.expires = jiffies +
+			helper->expect_policy[exp->class].timeout * HZ;
 	}
 	add_timer(&exp->timeout);
 
 	NF_CT_STAT_INC(net, expect_create);
+	return 0;
 }
 
 /* Race with expectations being used means we could have none to find; OK. */
@@ -389,14 +384,13 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
 	struct nf_conntrack_expect *i;
 	struct nf_conn *master = expect->master;
 	struct nf_conn_help *master_help = nfct_help(master);
+	struct nf_conntrack_helper *helper;
 	struct net *net = nf_ct_exp_net(expect);
 	struct hlist_node *n;
 	unsigned int h;
 	int ret = 1;
 
-	/* Don't allow expectations created from kernel-space with no helper */
-	if (!(expect->flags & NF_CT_EXPECT_USERSPACE) &&
-	    (!master_help || (master_help && !master_help->helper))) {
+	if (!master_help) {
 		ret = -ESHUTDOWN;
 		goto out;
 	}
@@ -414,11 +408,10 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
 		}
 	}
 	/* Will be over limit? */
-	if (master_help) {
-		p = &rcu_dereference_protected(
-			master_help->helper,
-			lockdep_is_held(&nf_conntrack_lock)
-			)->expect_policy[expect->class];
+	helper = rcu_dereference_protected(master_help->helper,
+					   lockdep_is_held(&nf_conntrack_lock));
+	if (helper) {
+		p = &helper->expect_policy[expect->class];
 		if (p->max_expected &&
 		    master_help->expecting[expect->class] >= p->max_expected) {
 			evict_oldest_expect(master, expect);
@@ -450,8 +443,9 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
 	if (ret <= 0)
 		goto out;
 
-	ret = 0;
-	nf_ct_expect_insert(expect);
+	ret = nf_ct_expect_insert(expect);
+	if (ret < 0)
+		goto out;
 	spin_unlock_bh(&nf_conntrack_lock);
 	nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report);
 	return ret;
@@ -461,21 +455,6 @@ out:
 }
 EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
 
-void nf_ct_remove_userspace_expectations(void)
-{
-	struct nf_conntrack_expect *exp;
-	struct hlist_node *n, *next;
-
-	hlist_for_each_entry_safe(exp, n, next,
-				  &nf_ct_userspace_expect_list, lnode) {
-		if (del_timer(&exp->timeout)) {
-			nf_ct_unlink_expect(exp);
-			nf_ct_expect_put(exp);
-		}
-	}
-}
-EXPORT_SYMBOL_GPL(nf_ct_remove_userspace_expectations);
-
 #ifdef CONFIG_PROC_FS
 struct ct_expect_iter_state {
 	struct seq_net_private p;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 93c4bdbfc1ae..c9e0de08aa87 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -121,6 +121,18 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
 	int ret = 0;
 
 	if (tmpl != NULL) {
+		/* we've got a userspace helper. */
+		if (tmpl->status & IPS_USERSPACE_HELPER) {
+			help = nf_ct_helper_ext_add(ct, flags);
+			if (help == NULL) {
+				ret = -ENOMEM;
+				goto out;
+			}
+			rcu_assign_pointer(help->helper, NULL);
+			__set_bit(IPS_USERSPACE_HELPER_BIT, &ct->status);
+			ret = 0;
+			goto out;
+		}
 		help = nfct_help(tmpl);
 		if (help != NULL)
 			helper = help->helper;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 636617ccfe25..739548029dc2 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2040,6 +2040,10 @@ ctnetlink_create_expect(struct net *net, u16 zone,
 	}
 	help = nfct_help(ct);
 	if (!help) {
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+	if (test_bit(IPS_USERSPACE_HELPER_BIT, &ct->status)) {
 		if (!cda[CTA_EXPECT_TIMEOUT]) {
 			err = -EINVAL;
 			goto out;
@@ -2264,7 +2268,6 @@ static void __exit ctnetlink_exit(void)
 {
 	pr_info("ctnetlink: unregistering from nfnetlink.\n");
 
-	nf_ct_remove_userspace_expectations();
 	unregister_pernet_subsys(&ctnetlink_net_ops);
 	nfnetlink_subsys_unregister(&ctnl_exp_subsys);
 	nfnetlink_subsys_unregister(&ctnl_subsys);
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 0221d10de75a..8e87123f1373 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -62,8 +62,8 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
 	int ret = 0;
 	u8 proto;
 
-	if (info->flags & ~XT_CT_NOTRACK)
-		return -EINVAL;
+	if (info->flags & ~(XT_CT_NOTRACK | XT_CT_USERSPACE_HELPER))
+		return -EOPNOTSUPP;
 
 	if (info->flags & XT_CT_NOTRACK) {
 		ct = nf_ct_untracked_get();
@@ -92,7 +92,9 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par)
 				  GFP_KERNEL))
 		goto err3;
 
-	if (info->helper[0]) {
+	if (info->flags & XT_CT_USERSPACE_HELPER) {
+		__set_bit(IPS_USERSPACE_HELPER_BIT, &ct->status);
+	} else if (info->helper[0]) {
 		ret = -ENOENT;
 		proto = xt_ct_find_proto(par);
 		if (!proto) {
-- 
cgit v1.2.3


From cbc9f2f4fcd70d5a627558ca9a881fa9391abf69 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 23 Dec 2011 13:59:49 +0100
Subject: netfilter: nf_nat: export NAT definitions to userspace

Export the NAT definitions to userspace. So far userspace (specifically,
iptables) has been copying the headers files from include/net. Also
rename some structures and definitions in preparation for IPv6 NAT.
Since these have never been officially exported, this doesn't affect
existing userspace code.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/Kbuild                     |  1 +
 .../linux/netfilter/nf_conntrack_tuple_common.h    | 27 ++++++++++
 include/linux/netfilter/nf_nat.h                   | 25 ++++++++++
 include/linux/netfilter_ipv4/Kbuild                |  1 -
 include/linux/netfilter_ipv4/nf_nat.h              | 58 ----------------------
 include/net/netfilter/nf_conntrack_tuple.h         |  1 -
 include/net/netfilter/nf_nat.h                     | 10 ++--
 include/net/netfilter/nf_nat_core.h                |  2 +-
 include/net/netfilter/nf_nat_protocol.h            | 14 +++---
 net/ipv4/netfilter/ipt_MASQUERADE.c                | 16 +++---
 net/ipv4/netfilter/ipt_NETMAP.c                    | 14 +++---
 net/ipv4/netfilter/ipt_REDIRECT.c                  | 16 +++---
 net/ipv4/netfilter/nf_nat_core.c                   | 54 ++++++++++----------
 net/ipv4/netfilter/nf_nat_h323.c                   | 20 ++++----
 net/ipv4/netfilter/nf_nat_helper.c                 | 10 ++--
 net/ipv4/netfilter/nf_nat_pptp.c                   | 14 +++---
 net/ipv4/netfilter/nf_nat_proto_common.c           | 24 ++++-----
 net/ipv4/netfilter/nf_nat_proto_dccp.c             |  4 +-
 net/ipv4/netfilter/nf_nat_proto_gre.c              |  8 +--
 net/ipv4/netfilter/nf_nat_proto_icmp.c             |  4 +-
 net/ipv4/netfilter/nf_nat_proto_sctp.c             |  4 +-
 net/ipv4/netfilter/nf_nat_proto_tcp.c              |  4 +-
 net/ipv4/netfilter/nf_nat_proto_udp.c              |  4 +-
 net/ipv4/netfilter/nf_nat_proto_udplite.c          |  4 +-
 net/ipv4/netfilter/nf_nat_proto_unknown.c          |  2 +-
 net/ipv4/netfilter/nf_nat_rule.c                   | 22 ++++----
 net/ipv4/netfilter/nf_nat_sip.c                    | 10 ++--
 net/ipv4/netfilter/nf_nat_standalone.c             |  2 +-
 net/netfilter/nf_conntrack_netlink.c               |  4 +-
 29 files changed, 185 insertions(+), 194 deletions(-)
 create mode 100644 include/linux/netfilter/nf_nat.h
 delete mode 100644 include/linux/netfilter_ipv4/nf_nat.h

(limited to 'include/linux')

diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index a1b410c76fc3..d81f7719b01c 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -5,6 +5,7 @@ header-y += nf_conntrack_ftp.h
 header-y += nf_conntrack_sctp.h
 header-y += nf_conntrack_tcp.h
 header-y += nf_conntrack_tuple_common.h
+header-y += nf_nat.h
 header-y += nfnetlink.h
 header-y += nfnetlink_compat.h
 header-y += nfnetlink_conntrack.h
diff --git a/include/linux/netfilter/nf_conntrack_tuple_common.h b/include/linux/netfilter/nf_conntrack_tuple_common.h
index 2ea22b018a87..2f6bbc5b8125 100644
--- a/include/linux/netfilter/nf_conntrack_tuple_common.h
+++ b/include/linux/netfilter/nf_conntrack_tuple_common.h
@@ -7,6 +7,33 @@ enum ip_conntrack_dir {
 	IP_CT_DIR_MAX
 };
 
+/* The protocol-specific manipulable parts of the tuple: always in
+ * network order
+ */
+union nf_conntrack_man_proto {
+	/* Add other protocols here. */
+	__be16 all;
+
+	struct {
+		__be16 port;
+	} tcp;
+	struct {
+		__be16 port;
+	} udp;
+	struct {
+		__be16 id;
+	} icmp;
+	struct {
+		__be16 port;
+	} dccp;
+	struct {
+		__be16 port;
+	} sctp;
+	struct {
+		__be16 key;	/* GRE key is 32bit, PPtP only uses 16bit */
+	} gre;
+};
+
 #define CTINFO2DIR(ctinfo) ((ctinfo) >= IP_CT_IS_REPLY ? IP_CT_DIR_REPLY : IP_CT_DIR_ORIGINAL)
 
 #endif /* _NF_CONNTRACK_TUPLE_COMMON_H */
diff --git a/include/linux/netfilter/nf_nat.h b/include/linux/netfilter/nf_nat.h
new file mode 100644
index 000000000000..8df2d13730b2
--- /dev/null
+++ b/include/linux/netfilter/nf_nat.h
@@ -0,0 +1,25 @@
+#ifndef _NETFILTER_NF_NAT_H
+#define _NETFILTER_NF_NAT_H
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_conntrack_tuple_common.h>
+
+#define NF_NAT_RANGE_MAP_IPS		1
+#define NF_NAT_RANGE_PROTO_SPECIFIED	2
+#define NF_NAT_RANGE_PROTO_RANDOM	4
+#define NF_NAT_RANGE_PERSISTENT		8
+
+struct nf_nat_ipv4_range {
+	unsigned int			flags;
+	__be32				min_ip;
+	__be32				max_ip;
+	union nf_conntrack_man_proto	min;
+	union nf_conntrack_man_proto	max;
+};
+
+struct nf_nat_ipv4_multi_range_compat {
+	unsigned int			rangesize;
+	struct nf_nat_ipv4_range	range[1];
+};
+
+#endif /* _NETFILTER_NF_NAT_H */
diff --git a/include/linux/netfilter_ipv4/Kbuild b/include/linux/netfilter_ipv4/Kbuild
index c3b45480ecf7..f9930c87fff3 100644
--- a/include/linux/netfilter_ipv4/Kbuild
+++ b/include/linux/netfilter_ipv4/Kbuild
@@ -12,4 +12,3 @@ header-y += ipt_ah.h
 header-y += ipt_ecn.h
 header-y += ipt_realm.h
 header-y += ipt_ttl.h
-header-y += nf_nat.h
diff --git a/include/linux/netfilter_ipv4/nf_nat.h b/include/linux/netfilter_ipv4/nf_nat.h
deleted file mode 100644
index 7a861d09fc86..000000000000
--- a/include/linux/netfilter_ipv4/nf_nat.h
+++ /dev/null
@@ -1,58 +0,0 @@
-#ifndef _LINUX_NF_NAT_H
-#define _LINUX_NF_NAT_H
-
-#include <linux/types.h>
-
-#define IP_NAT_RANGE_MAP_IPS 1
-#define IP_NAT_RANGE_PROTO_SPECIFIED 2
-#define IP_NAT_RANGE_PROTO_RANDOM 4
-#define IP_NAT_RANGE_PERSISTENT 8
-
-/* The protocol-specific manipulable parts of the tuple. */
-union nf_conntrack_man_proto {
-	/* Add other protocols here. */
-	__be16 all;
-
-	struct {
-		__be16 port;
-	} tcp;
-	struct {
-		__be16 port;
-	} udp;
-	struct {
-		__be16 id;
-	} icmp;
-	struct {
-		__be16 port;
-	} dccp;
-	struct {
-		__be16 port;
-	} sctp;
-	struct {
-		__be16 key;	/* GRE key is 32bit, PPtP only uses 16bit */
-	} gre;
-};
-
-/* Single range specification. */
-struct nf_nat_range {
-	/* Set to OR of flags above. */
-	unsigned int flags;
-
-	/* Inclusive: network order. */
-	__be32 min_ip, max_ip;
-
-	/* Inclusive: network order */
-	union nf_conntrack_man_proto min, max;
-};
-
-/* For backwards compat: don't use in modern code. */
-struct nf_nat_multi_range_compat {
-	unsigned int rangesize; /* Must be 1. */
-
-	/* hangs off end. */
-	struct nf_nat_range range[1];
-};
-
-#define nf_nat_multi_range nf_nat_multi_range_compat
-
-#endif
diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h
index 2f8fb77bfdd1..aea3f8221be0 100644
--- a/include/net/netfilter/nf_conntrack_tuple.h
+++ b/include/net/netfilter/nf_conntrack_tuple.h
@@ -12,7 +12,6 @@
 
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/nf_conntrack_tuple_common.h>
-#include <linux/netfilter_ipv4/nf_nat.h>
 #include <linux/list_nulls.h>
 
 /* A `tuple' is a structure containing the information to uniquely
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h
index b8872df7285f..b4de990b55f1 100644
--- a/include/net/netfilter/nf_nat.h
+++ b/include/net/netfilter/nf_nat.h
@@ -1,14 +1,12 @@
 #ifndef _NF_NAT_H
 #define _NF_NAT_H
 #include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/nf_nat.h>
+#include <linux/netfilter/nf_nat.h>
 #include <net/netfilter/nf_conntrack_tuple.h>
 
-#define NF_NAT_MAPPING_TYPE_MAX_NAMELEN 16
-
 enum nf_nat_manip_type {
-	IP_NAT_MANIP_SRC,
-	IP_NAT_MANIP_DST
+	NF_NAT_MANIP_SRC,
+	NF_NAT_MANIP_DST
 };
 
 /* SRC manip occurs POST_ROUTING or LOCAL_IN */
@@ -52,7 +50,7 @@ struct nf_conn_nat {
 
 /* Set up the info structure to map into this range. */
 extern unsigned int nf_nat_setup_info(struct nf_conn *ct,
-				      const struct nf_nat_range *range,
+				      const struct nf_nat_ipv4_range *range,
 				      enum nf_nat_manip_type maniptype);
 
 /* Is this tuple already taken? (not by us)*/
diff --git a/include/net/netfilter/nf_nat_core.h b/include/net/netfilter/nf_nat_core.h
index 3dc7b98effeb..b13d8d18d595 100644
--- a/include/net/netfilter/nf_nat_core.h
+++ b/include/net/netfilter/nf_nat_core.h
@@ -20,7 +20,7 @@ extern int nf_nat_icmp_reply_translation(struct nf_conn *ct,
 static inline int nf_nat_initialized(struct nf_conn *ct,
 				     enum nf_nat_manip_type manip)
 {
-	if (manip == IP_NAT_MANIP_SRC)
+	if (manip == NF_NAT_MANIP_SRC)
 		return ct->status & IPS_SRC_NAT_DONE;
 	else
 		return ct->status & IPS_DST_NAT_DONE;
diff --git a/include/net/netfilter/nf_nat_protocol.h b/include/net/netfilter/nf_nat_protocol.h
index 93cc90d28e66..7156c002b59c 100644
--- a/include/net/netfilter/nf_nat_protocol.h
+++ b/include/net/netfilter/nf_nat_protocol.h
@@ -4,7 +4,7 @@
 #include <net/netfilter/nf_nat.h>
 #include <linux/netfilter/nfnetlink_conntrack.h>
 
-struct nf_nat_range;
+struct nf_nat_ipv4_range;
 
 struct nf_nat_protocol {
 	/* Protocol number. */
@@ -30,15 +30,15 @@ struct nf_nat_protocol {
 	   possible.  Per-protocol part of tuple is initialized to the
 	   incoming packet. */
 	void (*unique_tuple)(struct nf_conntrack_tuple *tuple,
-			     const struct nf_nat_range *range,
+			     const struct nf_nat_ipv4_range *range,
 			     enum nf_nat_manip_type maniptype,
 			     const struct nf_conn *ct);
 
 	int (*range_to_nlattr)(struct sk_buff *skb,
-			       const struct nf_nat_range *range);
+			       const struct nf_nat_ipv4_range *range);
 
 	int (*nlattr_to_range)(struct nlattr *tb[],
-			       struct nf_nat_range *range);
+			       struct nf_nat_ipv4_range *range);
 };
 
 /* Protocol registration. */
@@ -61,14 +61,14 @@ extern bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple,
 				  const union nf_conntrack_man_proto *max);
 
 extern void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
-				      const struct nf_nat_range *range,
+				      const struct nf_nat_ipv4_range *range,
 				      enum nf_nat_manip_type maniptype,
 				      const struct nf_conn *ct,
 				      u_int16_t *rover);
 
 extern int nf_nat_proto_range_to_nlattr(struct sk_buff *skb,
-					const struct nf_nat_range *range);
+					const struct nf_nat_ipv4_range *range);
 extern int nf_nat_proto_nlattr_to_range(struct nlattr *tb[],
-					struct nf_nat_range *range);
+					struct nf_nat_ipv4_range *range);
 
 #endif /*_NF_NAT_PROTO_H*/
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 9931152a78b5..2f210c79dc87 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -30,9 +30,9 @@ MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
 /* FIXME: Multiple targets. --RR */
 static int masquerade_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct nf_nat_multi_range_compat *mr = par->targinfo;
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 
-	if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
+	if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) {
 		pr_debug("bad MAP_IPS.\n");
 		return -EINVAL;
 	}
@@ -49,8 +49,8 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	struct nf_conn *ct;
 	struct nf_conn_nat *nat;
 	enum ip_conntrack_info ctinfo;
-	struct nf_nat_range newrange;
-	const struct nf_nat_multi_range_compat *mr;
+	struct nf_nat_ipv4_range newrange;
+	const struct nf_nat_ipv4_multi_range_compat *mr;
 	const struct rtable *rt;
 	__be32 newsrc;
 
@@ -79,13 +79,13 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	nat->masq_index = par->out->ifindex;
 
 	/* Transfer from original range. */
-	newrange = ((struct nf_nat_range)
-		{ mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
+	newrange = ((struct nf_nat_ipv4_range)
+		{ mr->range[0].flags | NF_NAT_RANGE_MAP_IPS,
 		  newsrc, newsrc,
 		  mr->range[0].min, mr->range[0].max });
 
 	/* Hand modified range to generic setup. */
-	return nf_nat_setup_info(ct, &newrange, IP_NAT_MANIP_SRC);
+	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
 }
 
 static int
@@ -139,7 +139,7 @@ static struct xt_target masquerade_tg_reg __read_mostly = {
 	.name		= "MASQUERADE",
 	.family		= NFPROTO_IPV4,
 	.target		= masquerade_tg,
-	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
+	.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
 	.table		= "nat",
 	.hooks		= 1 << NF_INET_POST_ROUTING,
 	.checkentry	= masquerade_tg_check,
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 6cdb298f1035..b5bfbbabf70d 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -24,9 +24,9 @@ MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets");
 
 static int netmap_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct nf_nat_multi_range_compat *mr = par->targinfo;
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 
-	if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) {
+	if (!(mr->range[0].flags & NF_NAT_RANGE_MAP_IPS)) {
 		pr_debug("bad MAP_IPS.\n");
 		return -EINVAL;
 	}
@@ -43,8 +43,8 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	__be32 new_ip, netmask;
-	const struct nf_nat_multi_range_compat *mr = par->targinfo;
-	struct nf_nat_range newrange;
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
+	struct nf_nat_ipv4_range newrange;
 
 	NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
 		     par->hooknum == NF_INET_POST_ROUTING ||
@@ -61,8 +61,8 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par)
 		new_ip = ip_hdr(skb)->saddr & ~netmask;
 	new_ip |= mr->range[0].min_ip & netmask;
 
-	newrange = ((struct nf_nat_range)
-		{ mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
+	newrange = ((struct nf_nat_ipv4_range)
+		{ mr->range[0].flags | NF_NAT_RANGE_MAP_IPS,
 		  new_ip, new_ip,
 		  mr->range[0].min, mr->range[0].max });
 
@@ -74,7 +74,7 @@ static struct xt_target netmap_tg_reg __read_mostly = {
 	.name 		= "NETMAP",
 	.family		= NFPROTO_IPV4,
 	.target 	= netmap_tg,
-	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
+	.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
 	.table		= "nat",
 	.hooks		= (1 << NF_INET_PRE_ROUTING) |
 			  (1 << NF_INET_POST_ROUTING) |
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 18a0656505a0..7c0103a5203e 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -28,9 +28,9 @@ MODULE_DESCRIPTION("Xtables: Connection redirection to localhost");
 /* FIXME: Take multiple ranges --RR */
 static int redirect_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct nf_nat_multi_range_compat *mr = par->targinfo;
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 
-	if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
+	if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) {
 		pr_debug("bad MAP_IPS.\n");
 		return -EINVAL;
 	}
@@ -47,8 +47,8 @@ redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
 	__be32 newdst;
-	const struct nf_nat_multi_range_compat *mr = par->targinfo;
-	struct nf_nat_range newrange;
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
+	struct nf_nat_ipv4_range newrange;
 
 	NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
 		     par->hooknum == NF_INET_LOCAL_OUT);
@@ -76,20 +76,20 @@ redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
 	}
 
 	/* Transfer from original range. */
-	newrange = ((struct nf_nat_range)
-		{ mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
+	newrange = ((struct nf_nat_ipv4_range)
+		{ mr->range[0].flags | NF_NAT_RANGE_MAP_IPS,
 		  newdst, newdst,
 		  mr->range[0].min, mr->range[0].max });
 
 	/* Hand modified range to generic setup. */
-	return nf_nat_setup_info(ct, &newrange, IP_NAT_MANIP_DST);
+	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST);
 }
 
 static struct xt_target redirect_tg_reg __read_mostly = {
 	.name		= "REDIRECT",
 	.family		= NFPROTO_IPV4,
 	.target		= redirect_tg,
-	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
+	.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
 	.table		= "nat",
 	.hooks		= (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT),
 	.checkentry	= redirect_tg_check,
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 447bc5cfdc6c..58ab7a4611dd 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -82,14 +82,14 @@ EXPORT_SYMBOL(nf_nat_used_tuple);
  * that meet the constraints of range. */
 static int
 in_range(const struct nf_conntrack_tuple *tuple,
-	 const struct nf_nat_range *range)
+	 const struct nf_nat_ipv4_range *range)
 {
 	const struct nf_nat_protocol *proto;
 	int ret = 0;
 
 	/* If we are supposed to map IPs, then we must be in the
 	   range specified, otherwise let this drag us onto a new src IP. */
-	if (range->flags & IP_NAT_RANGE_MAP_IPS) {
+	if (range->flags & NF_NAT_RANGE_MAP_IPS) {
 		if (ntohl(tuple->src.u3.ip) < ntohl(range->min_ip) ||
 		    ntohl(tuple->src.u3.ip) > ntohl(range->max_ip))
 			return 0;
@@ -97,8 +97,8 @@ in_range(const struct nf_conntrack_tuple *tuple,
 
 	rcu_read_lock();
 	proto = __nf_nat_proto_find(tuple->dst.protonum);
-	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
-	    proto->in_range(tuple, IP_NAT_MANIP_SRC,
+	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) ||
+	    proto->in_range(tuple, NF_NAT_MANIP_SRC,
 			    &range->min, &range->max))
 		ret = 1;
 	rcu_read_unlock();
@@ -123,7 +123,7 @@ static int
 find_appropriate_src(struct net *net, u16 zone,
 		     const struct nf_conntrack_tuple *tuple,
 		     struct nf_conntrack_tuple *result,
-		     const struct nf_nat_range *range)
+		     const struct nf_nat_ipv4_range *range)
 {
 	unsigned int h = hash_by_src(net, zone, tuple);
 	const struct nf_conn_nat *nat;
@@ -157,7 +157,7 @@ find_appropriate_src(struct net *net, u16 zone,
 */
 static void
 find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
-		    const struct nf_nat_range *range,
+		    const struct nf_nat_ipv4_range *range,
 		    const struct nf_conn *ct,
 		    enum nf_nat_manip_type maniptype)
 {
@@ -166,10 +166,10 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
 	u_int32_t minip, maxip, j;
 
 	/* No IP mapping?  Do nothing. */
-	if (!(range->flags & IP_NAT_RANGE_MAP_IPS))
+	if (!(range->flags & NF_NAT_RANGE_MAP_IPS))
 		return;
 
-	if (maniptype == IP_NAT_MANIP_SRC)
+	if (maniptype == NF_NAT_MANIP_SRC)
 		var_ipp = &tuple->src.u3.ip;
 	else
 		var_ipp = &tuple->dst.u3.ip;
@@ -189,7 +189,7 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
 	minip = ntohl(range->min_ip);
 	maxip = ntohl(range->max_ip);
 	j = jhash_2words((__force u32)tuple->src.u3.ip,
-			 range->flags & IP_NAT_RANGE_PERSISTENT ?
+			 range->flags & NF_NAT_RANGE_PERSISTENT ?
 				0 : (__force u32)tuple->dst.u3.ip ^ zone, 0);
 	j = ((u64)j * (maxip - minip + 1)) >> 32;
 	*var_ipp = htonl(minip + j);
@@ -204,7 +204,7 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple,
 static void
 get_unique_tuple(struct nf_conntrack_tuple *tuple,
 		 const struct nf_conntrack_tuple *orig_tuple,
-		 const struct nf_nat_range *range,
+		 const struct nf_nat_ipv4_range *range,
 		 struct nf_conn *ct,
 		 enum nf_nat_manip_type maniptype)
 {
@@ -219,8 +219,8 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 	   This is only required for source (ie. NAT/masq) mappings.
 	   So far, we don't do local source mappings, so multiple
 	   manips not an issue.  */
-	if (maniptype == IP_NAT_MANIP_SRC &&
-	    !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
+	if (maniptype == NF_NAT_MANIP_SRC &&
+	    !(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) {
 		/* try the original tuple first */
 		if (in_range(orig_tuple, range)) {
 			if (!nf_nat_used_tuple(orig_tuple, ct)) {
@@ -247,8 +247,8 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple,
 	proto = __nf_nat_proto_find(orig_tuple->dst.protonum);
 
 	/* Only bother mapping if it's not already in range and unique */
-	if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
-		if (range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) {
+	if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) {
+		if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) {
 			if (proto->in_range(tuple, maniptype, &range->min,
 					    &range->max) &&
 			    (range->min.all == range->max.all ||
@@ -267,7 +267,7 @@ out:
 
 unsigned int
 nf_nat_setup_info(struct nf_conn *ct,
-		  const struct nf_nat_range *range,
+		  const struct nf_nat_ipv4_range *range,
 		  enum nf_nat_manip_type maniptype)
 {
 	struct net *net = nf_ct_net(ct);
@@ -284,8 +284,8 @@ nf_nat_setup_info(struct nf_conn *ct,
 		}
 	}
 
-	NF_CT_ASSERT(maniptype == IP_NAT_MANIP_SRC ||
-		     maniptype == IP_NAT_MANIP_DST);
+	NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC ||
+		     maniptype == NF_NAT_MANIP_DST);
 	BUG_ON(nf_nat_initialized(ct, maniptype));
 
 	/* What we've got will look like inverse of reply. Normally
@@ -306,13 +306,13 @@ nf_nat_setup_info(struct nf_conn *ct,
 		nf_conntrack_alter_reply(ct, &reply);
 
 		/* Non-atomic: we own this at the moment. */
-		if (maniptype == IP_NAT_MANIP_SRC)
+		if (maniptype == NF_NAT_MANIP_SRC)
 			ct->status |= IPS_SRC_NAT;
 		else
 			ct->status |= IPS_DST_NAT;
 	}
 
-	if (maniptype == IP_NAT_MANIP_SRC) {
+	if (maniptype == NF_NAT_MANIP_SRC) {
 		unsigned int srchash;
 
 		srchash = hash_by_src(net, nf_ct_zone(ct),
@@ -327,7 +327,7 @@ nf_nat_setup_info(struct nf_conn *ct,
 	}
 
 	/* It's done. */
-	if (maniptype == IP_NAT_MANIP_DST)
+	if (maniptype == NF_NAT_MANIP_DST)
 		ct->status |= IPS_DST_NAT_DONE;
 	else
 		ct->status |= IPS_SRC_NAT_DONE;
@@ -361,7 +361,7 @@ manip_pkt(u_int16_t proto,
 
 	iph = (void *)skb->data + iphdroff;
 
-	if (maniptype == IP_NAT_MANIP_SRC) {
+	if (maniptype == NF_NAT_MANIP_SRC) {
 		csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
 		iph->saddr = target->src.u3.ip;
 	} else {
@@ -381,7 +381,7 @@ unsigned int nf_nat_packet(struct nf_conn *ct,
 	unsigned long statusbit;
 	enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum);
 
-	if (mtype == IP_NAT_MANIP_SRC)
+	if (mtype == NF_NAT_MANIP_SRC)
 		statusbit = IPS_SRC_NAT;
 	else
 		statusbit = IPS_DST_NAT;
@@ -447,7 +447,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct,
 			return 0;
 	}
 
-	if (manip == IP_NAT_MANIP_SRC)
+	if (manip == NF_NAT_MANIP_SRC)
 		statusbit = IPS_SRC_NAT;
 	else
 		statusbit = IPS_DST_NAT;
@@ -602,7 +602,7 @@ static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
 
 static int nfnetlink_parse_nat_proto(struct nlattr *attr,
 				     const struct nf_conn *ct,
-				     struct nf_nat_range *range)
+				     struct nf_nat_ipv4_range *range)
 {
 	struct nlattr *tb[CTA_PROTONAT_MAX+1];
 	const struct nf_nat_protocol *npt;
@@ -626,7 +626,7 @@ static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
 
 static int
 nfnetlink_parse_nat(const struct nlattr *nat,
-		    const struct nf_conn *ct, struct nf_nat_range *range)
+		    const struct nf_conn *ct, struct nf_nat_ipv4_range *range)
 {
 	struct nlattr *tb[CTA_NAT_MAX+1];
 	int err;
@@ -646,7 +646,7 @@ nfnetlink_parse_nat(const struct nlattr *nat,
 		range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]);
 
 	if (range->min_ip)
-		range->flags |= IP_NAT_RANGE_MAP_IPS;
+		range->flags |= NF_NAT_RANGE_MAP_IPS;
 
 	if (!tb[CTA_NAT_PROTO])
 		return 0;
@@ -663,7 +663,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
 			  enum nf_nat_manip_type manip,
 			  const struct nlattr *attr)
 {
-	struct nf_nat_range range;
+	struct nf_nat_ipv4_range range;
 
 	if (nfnetlink_parse_nat(attr, ct, &range) < 0)
 		return -EINVAL;
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index b9a1136addbd..dc1dd912baf4 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -398,7 +398,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
 static void ip_nat_q931_expect(struct nf_conn *new,
 			       struct nf_conntrack_expect *this)
 {
-	struct nf_nat_range range;
+	struct nf_nat_ipv4_range range;
 
 	if (this->tuple.src.u3.ip != 0) {	/* Only accept calls from GK */
 		nf_nat_follow_master(new, this);
@@ -409,16 +409,16 @@ static void ip_nat_q931_expect(struct nf_conn *new,
 	BUG_ON(new->status & IPS_NAT_DONE_MASK);
 
 	/* Change src to where master sends to */
-	range.flags = IP_NAT_RANGE_MAP_IPS;
+	range.flags = NF_NAT_RANGE_MAP_IPS;
 	range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip;
-	nf_nat_setup_info(new, &range, IP_NAT_MANIP_SRC);
+	nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC);
 
 	/* For DST manip, map port here to where it's expected. */
-	range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+	range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
 	range.min = range.max = this->saved_proto;
 	range.min_ip = range.max_ip =
 	    new->master->tuplehash[!this->dir].tuple.src.u3.ip;
-	nf_nat_setup_info(new, &range, IP_NAT_MANIP_DST);
+	nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST);
 }
 
 /****************************************************************************/
@@ -496,21 +496,21 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
 static void ip_nat_callforwarding_expect(struct nf_conn *new,
 					 struct nf_conntrack_expect *this)
 {
-	struct nf_nat_range range;
+	struct nf_nat_ipv4_range range;
 
 	/* This must be a fresh one. */
 	BUG_ON(new->status & IPS_NAT_DONE_MASK);
 
 	/* Change src to where master sends to */
-	range.flags = IP_NAT_RANGE_MAP_IPS;
+	range.flags = NF_NAT_RANGE_MAP_IPS;
 	range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip;
-	nf_nat_setup_info(new, &range, IP_NAT_MANIP_SRC);
+	nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC);
 
 	/* For DST manip, map port here to where it's expected. */
-	range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+	range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
 	range.min = range.max = this->saved_proto;
 	range.min_ip = range.max_ip = this->saved_ip;
-	nf_nat_setup_info(new, &range, IP_NAT_MANIP_DST);
+	nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST);
 }
 
 /****************************************************************************/
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index ebc5f8894f99..049e8b7c3188 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -430,22 +430,22 @@ nf_nat_seq_adjust(struct sk_buff *skb,
 void nf_nat_follow_master(struct nf_conn *ct,
 			  struct nf_conntrack_expect *exp)
 {
-	struct nf_nat_range range;
+	struct nf_nat_ipv4_range range;
 
 	/* This must be a fresh one. */
 	BUG_ON(ct->status & IPS_NAT_DONE_MASK);
 
 	/* Change src to where master sends to */
-	range.flags = IP_NAT_RANGE_MAP_IPS;
+	range.flags = NF_NAT_RANGE_MAP_IPS;
 	range.min_ip = range.max_ip
 		= ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
-	nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
+	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
 
 	/* For DST manip, map port here to where it's expected. */
-	range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+	range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
 	range.min = range.max = exp->saved_proto;
 	range.min_ip = range.max_ip
 		= ct->master->tuplehash[!exp->dir].tuple.src.u3.ip;
-	nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
+	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
 }
 EXPORT_SYMBOL(nf_nat_follow_master);
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 3e8284ba46b8..c273d58980ae 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -47,7 +47,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
 	struct nf_conntrack_tuple t;
 	const struct nf_ct_pptp_master *ct_pptp_info;
 	const struct nf_nat_pptp *nat_pptp_info;
-	struct nf_nat_range range;
+	struct nf_nat_ipv4_range range;
 
 	ct_pptp_info = &nfct_help(master)->help.ct_pptp_info;
 	nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info;
@@ -88,24 +88,24 @@ static void pptp_nat_expected(struct nf_conn *ct,
 	BUG_ON(ct->status & IPS_NAT_DONE_MASK);
 
 	/* Change src to where master sends to */
-	range.flags = IP_NAT_RANGE_MAP_IPS;
+	range.flags = NF_NAT_RANGE_MAP_IPS;
 	range.min_ip = range.max_ip
 		= ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
 	if (exp->dir == IP_CT_DIR_ORIGINAL) {
-		range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+		range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
 		range.min = range.max = exp->saved_proto;
 	}
-	nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
+	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
 
 	/* For DST manip, map port here to where it's expected. */
-	range.flags = IP_NAT_RANGE_MAP_IPS;
+	range.flags = NF_NAT_RANGE_MAP_IPS;
 	range.min_ip = range.max_ip
 		= ct->master->tuplehash[!exp->dir].tuple.src.u3.ip;
 	if (exp->dir == IP_CT_DIR_REPLY) {
-		range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+		range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
 		range.min = range.max = exp->saved_proto;
 	}
-	nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
+	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
 }
 
 /* outbound packets == from PNS to PAC */
diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c
index a3d997618602..47fff91c9ae6 100644
--- a/net/ipv4/netfilter/nf_nat_proto_common.c
+++ b/net/ipv4/netfilter/nf_nat_proto_common.c
@@ -26,7 +26,7 @@ bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple,
 {
 	__be16 port;
 
-	if (maniptype == IP_NAT_MANIP_SRC)
+	if (maniptype == NF_NAT_MANIP_SRC)
 		port = tuple->src.u.all;
 	else
 		port = tuple->dst.u.all;
@@ -37,7 +37,7 @@ bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple,
 EXPORT_SYMBOL_GPL(nf_nat_proto_in_range);
 
 void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
-			       const struct nf_nat_range *range,
+			       const struct nf_nat_ipv4_range *range,
 			       enum nf_nat_manip_type maniptype,
 			       const struct nf_conn *ct,
 			       u_int16_t *rover)
@@ -46,15 +46,15 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
 	__be16 *portptr;
 	u_int16_t off;
 
-	if (maniptype == IP_NAT_MANIP_SRC)
+	if (maniptype == NF_NAT_MANIP_SRC)
 		portptr = &tuple->src.u.all;
 	else
 		portptr = &tuple->dst.u.all;
 
 	/* If no range specified... */
-	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
+	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
 		/* If it's dst rewrite, can't change port */
-		if (maniptype == IP_NAT_MANIP_DST)
+		if (maniptype == NF_NAT_MANIP_DST)
 			return;
 
 		if (ntohs(*portptr) < 1024) {
@@ -75,9 +75,9 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
 		range_size = ntohs(range->max.all) - min + 1;
 	}
 
-	if (range->flags & IP_NAT_RANGE_PROTO_RANDOM)
+	if (range->flags & NF_NAT_RANGE_PROTO_RANDOM)
 		off = secure_ipv4_port_ephemeral(tuple->src.u3.ip, tuple->dst.u3.ip,
-						 maniptype == IP_NAT_MANIP_SRC
+						 maniptype == NF_NAT_MANIP_SRC
 						 ? tuple->dst.u.all
 						 : tuple->src.u.all);
 	else
@@ -87,7 +87,7 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
 		*portptr = htons(min + off % range_size);
 		if (++i != range_size && nf_nat_used_tuple(tuple, ct))
 			continue;
-		if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM))
+		if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM))
 			*rover = off;
 		return;
 	}
@@ -97,7 +97,7 @@ EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple);
 
 #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
 int nf_nat_proto_range_to_nlattr(struct sk_buff *skb,
-				 const struct nf_nat_range *range)
+				 const struct nf_nat_ipv4_range *range)
 {
 	NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MIN, range->min.all);
 	NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MAX, range->max.all);
@@ -109,16 +109,16 @@ nla_put_failure:
 EXPORT_SYMBOL_GPL(nf_nat_proto_nlattr_to_range);
 
 int nf_nat_proto_nlattr_to_range(struct nlattr *tb[],
-				 struct nf_nat_range *range)
+				 struct nf_nat_ipv4_range *range)
 {
 	if (tb[CTA_PROTONAT_PORT_MIN]) {
 		range->min.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]);
 		range->max.all = range->min.tcp.port;
-		range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+		range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
 	}
 	if (tb[CTA_PROTONAT_PORT_MAX]) {
 		range->max.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]);
-		range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+		range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
 	}
 	return 0;
 }
diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/ipv4/netfilter/nf_nat_proto_dccp.c
index 570faf2667b2..c43d5b366d0d 100644
--- a/net/ipv4/netfilter/nf_nat_proto_dccp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_dccp.c
@@ -24,7 +24,7 @@ static u_int16_t dccp_port_rover;
 
 static void
 dccp_unique_tuple(struct nf_conntrack_tuple *tuple,
-		  const struct nf_nat_range *range,
+		  const struct nf_nat_ipv4_range *range,
 		  enum nf_nat_manip_type maniptype,
 		  const struct nf_conn *ct)
 {
@@ -54,7 +54,7 @@ dccp_manip_pkt(struct sk_buff *skb,
 	iph = (struct iphdr *)(skb->data + iphdroff);
 	hdr = (struct dccp_hdr *)(skb->data + hdroff);
 
-	if (maniptype == IP_NAT_MANIP_SRC) {
+	if (maniptype == NF_NAT_MANIP_SRC) {
 		oldip = iph->saddr;
 		newip = tuple->src.u3.ip;
 		newport = tuple->src.u.dccp.port;
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index bc8d83a31c73..9b1c629d7a00 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -39,7 +39,7 @@ MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
 /* generate unique tuple ... */
 static void
 gre_unique_tuple(struct nf_conntrack_tuple *tuple,
-		 const struct nf_nat_range *range,
+		 const struct nf_nat_ipv4_range *range,
 		 enum nf_nat_manip_type maniptype,
 		 const struct nf_conn *ct)
 {
@@ -52,12 +52,12 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
 	if (!ct->master)
 		return;
 
-	if (maniptype == IP_NAT_MANIP_SRC)
+	if (maniptype == NF_NAT_MANIP_SRC)
 		keyptr = &tuple->src.u.gre.key;
 	else
 		keyptr = &tuple->dst.u.gre.key;
 
-	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
+	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
 		pr_debug("%p: NATing GRE PPTP\n", ct);
 		min = 1;
 		range_size = 0xffff;
@@ -99,7 +99,7 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
 
 	/* we only have destination manip of a packet, since 'source key'
 	 * is not present in the packet itself */
-	if (maniptype != IP_NAT_MANIP_DST)
+	if (maniptype != NF_NAT_MANIP_DST)
 		return true;
 	switch (greh->version) {
 	case GRE_VERSION_1701:
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index 9f4dc1235dc7..8f87b4bebf2b 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -30,7 +30,7 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple,
 
 static void
 icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
-		  const struct nf_nat_range *range,
+		  const struct nf_nat_ipv4_range *range,
 		  enum nf_nat_manip_type maniptype,
 		  const struct nf_conn *ct)
 {
@@ -40,7 +40,7 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
 
 	range_size = ntohs(range->max.icmp.id) - ntohs(range->min.icmp.id) + 1;
 	/* If no range specified... */
-	if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED))
+	if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
 		range_size = 0xFFFF;
 
 	for (i = 0; ; ++id) {
diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c
index bd5a80a62a5b..4e70dc6fad21 100644
--- a/net/ipv4/netfilter/nf_nat_proto_sctp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c
@@ -19,7 +19,7 @@ static u_int16_t nf_sctp_port_rover;
 
 static void
 sctp_unique_tuple(struct nf_conntrack_tuple *tuple,
-		  const struct nf_nat_range *range,
+		  const struct nf_nat_ipv4_range *range,
 		  enum nf_nat_manip_type maniptype,
 		  const struct nf_conn *ct)
 {
@@ -46,7 +46,7 @@ sctp_manip_pkt(struct sk_buff *skb,
 	iph = (struct iphdr *)(skb->data + iphdroff);
 	hdr = (struct sctphdr *)(skb->data + hdroff);
 
-	if (maniptype == IP_NAT_MANIP_SRC) {
+	if (maniptype == NF_NAT_MANIP_SRC) {
 		/* Get rid of src ip and src pt */
 		oldip = iph->saddr;
 		newip = tuple->src.u3.ip;
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c
index 0d67bb80130f..6fcc865dc2ee 100644
--- a/net/ipv4/netfilter/nf_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c
@@ -23,7 +23,7 @@ static u_int16_t tcp_port_rover;
 
 static void
 tcp_unique_tuple(struct nf_conntrack_tuple *tuple,
-		 const struct nf_nat_range *range,
+		 const struct nf_nat_ipv4_range *range,
 		 enum nf_nat_manip_type maniptype,
 		 const struct nf_conn *ct)
 {
@@ -55,7 +55,7 @@ tcp_manip_pkt(struct sk_buff *skb,
 	iph = (struct iphdr *)(skb->data + iphdroff);
 	hdr = (struct tcphdr *)(skb->data + hdroff);
 
-	if (maniptype == IP_NAT_MANIP_SRC) {
+	if (maniptype == NF_NAT_MANIP_SRC) {
 		/* Get rid of src ip and src pt */
 		oldip = iph->saddr;
 		newip = tuple->src.u3.ip;
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c
index 0b1b8601cba7..18ea44ebfff7 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udp.c
@@ -22,7 +22,7 @@ static u_int16_t udp_port_rover;
 
 static void
 udp_unique_tuple(struct nf_conntrack_tuple *tuple,
-		 const struct nf_nat_range *range,
+		 const struct nf_nat_ipv4_range *range,
 		 enum nf_nat_manip_type maniptype,
 		 const struct nf_conn *ct)
 {
@@ -47,7 +47,7 @@ udp_manip_pkt(struct sk_buff *skb,
 	iph = (struct iphdr *)(skb->data + iphdroff);
 	hdr = (struct udphdr *)(skb->data + hdroff);
 
-	if (maniptype == IP_NAT_MANIP_SRC) {
+	if (maniptype == NF_NAT_MANIP_SRC) {
 		/* Get rid of src ip and src pt */
 		oldip = iph->saddr;
 		newip = tuple->src.u3.ip;
diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/ipv4/netfilter/nf_nat_proto_udplite.c
index f83ef23e2ab7..a17b75b9e2a7 100644
--- a/net/ipv4/netfilter/nf_nat_proto_udplite.c
+++ b/net/ipv4/netfilter/nf_nat_proto_udplite.c
@@ -21,7 +21,7 @@ static u_int16_t udplite_port_rover;
 
 static void
 udplite_unique_tuple(struct nf_conntrack_tuple *tuple,
-		     const struct nf_nat_range *range,
+		     const struct nf_nat_ipv4_range *range,
 		     enum nf_nat_manip_type maniptype,
 		     const struct nf_conn *ct)
 {
@@ -47,7 +47,7 @@ udplite_manip_pkt(struct sk_buff *skb,
 	iph = (struct iphdr *)(skb->data + iphdroff);
 	hdr = (struct udphdr *)(skb->data + hdroff);
 
-	if (maniptype == IP_NAT_MANIP_SRC) {
+	if (maniptype == NF_NAT_MANIP_SRC) {
 		/* Get rid of src ip and src pt */
 		oldip = iph->saddr;
 		newip = tuple->src.u3.ip;
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c
index a50f2bc1c732..ab8e8c132168 100644
--- a/net/ipv4/netfilter/nf_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c
@@ -27,7 +27,7 @@ static bool unknown_in_range(const struct nf_conntrack_tuple *tuple,
 }
 
 static void unknown_unique_tuple(struct nf_conntrack_tuple *tuple,
-				 const struct nf_nat_range *range,
+				 const struct nf_nat_ipv4_range *range,
 				 enum nf_nat_manip_type maniptype,
 				 const struct nf_conn *ct)
 {
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 733c9abc1cbd..d2a9dc314e0e 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -44,7 +44,7 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
-	const struct nf_nat_multi_range_compat *mr = par->targinfo;
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 
 	NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING ||
 		     par->hooknum == NF_INET_LOCAL_IN);
@@ -56,7 +56,7 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par)
 			    ctinfo == IP_CT_RELATED_REPLY));
 	NF_CT_ASSERT(par->out != NULL);
 
-	return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC);
+	return nf_nat_setup_info(ct, &mr->range[0], NF_NAT_MANIP_SRC);
 }
 
 static unsigned int
@@ -64,7 +64,7 @@ ipt_dnat_target(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
-	const struct nf_nat_multi_range_compat *mr = par->targinfo;
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 
 	NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
 		     par->hooknum == NF_INET_LOCAL_OUT);
@@ -74,12 +74,12 @@ ipt_dnat_target(struct sk_buff *skb, const struct xt_action_param *par)
 	/* Connection must be valid and new. */
 	NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
 
-	return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST);
+	return nf_nat_setup_info(ct, &mr->range[0], NF_NAT_MANIP_DST);
 }
 
 static int ipt_snat_checkentry(const struct xt_tgchk_param *par)
 {
-	const struct nf_nat_multi_range_compat *mr = par->targinfo;
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 
 	/* Must be a valid range */
 	if (mr->rangesize != 1) {
@@ -91,7 +91,7 @@ static int ipt_snat_checkentry(const struct xt_tgchk_param *par)
 
 static int ipt_dnat_checkentry(const struct xt_tgchk_param *par)
 {
-	const struct nf_nat_multi_range_compat *mr = par->targinfo;
+	const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
 
 	/* Must be a valid range */
 	if (mr->rangesize != 1) {
@@ -105,13 +105,13 @@ static unsigned int
 alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
 {
 	/* Force range to this IP; let proto decide mapping for
-	   per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
+	   per-proto parts (hence not NF_NAT_RANGE_PROTO_SPECIFIED).
 	*/
-	struct nf_nat_range range;
+	struct nf_nat_ipv4_range range;
 
 	range.flags = 0;
 	pr_debug("Allocating NULL binding for %p (%pI4)\n", ct,
-		 HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC ?
+		 HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
 		 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip :
 		 &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
 
@@ -140,7 +140,7 @@ int nf_nat_rule_find(struct sk_buff *skb,
 static struct xt_target ipt_snat_reg __read_mostly = {
 	.name		= "SNAT",
 	.target		= ipt_snat_target,
-	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
+	.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
 	.table		= "nat",
 	.hooks		= (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN),
 	.checkentry	= ipt_snat_checkentry,
@@ -150,7 +150,7 @@ static struct xt_target ipt_snat_reg __read_mostly = {
 static struct xt_target ipt_dnat_reg __read_mostly = {
 	.name		= "DNAT",
 	.target		= ipt_dnat_target,
-	.targetsize	= sizeof(struct nf_nat_multi_range_compat),
+	.targetsize	= sizeof(struct nf_nat_ipv4_multi_range_compat),
 	.table		= "nat",
 	.hooks		= (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT),
 	.checkentry	= ipt_dnat_checkentry,
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
index 78844d9208f1..d0319f96269f 100644
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ b/net/ipv4/netfilter/nf_nat_sip.c
@@ -249,25 +249,25 @@ static void ip_nat_sip_seq_adjust(struct sk_buff *skb, s16 off)
 static void ip_nat_sip_expected(struct nf_conn *ct,
 				struct nf_conntrack_expect *exp)
 {
-	struct nf_nat_range range;
+	struct nf_nat_ipv4_range range;
 
 	/* This must be a fresh one. */
 	BUG_ON(ct->status & IPS_NAT_DONE_MASK);
 
 	/* For DST manip, map port here to where it's expected. */
-	range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+	range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
 	range.min = range.max = exp->saved_proto;
 	range.min_ip = range.max_ip = exp->saved_ip;
-	nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
+	nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
 
 	/* Change src to where master sends to, but only if the connection
 	 * actually came from the same source. */
 	if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip ==
 	    ct->master->tuplehash[exp->dir].tuple.src.u3.ip) {
-		range.flags = IP_NAT_RANGE_MAP_IPS;
+		range.flags = NF_NAT_RANGE_MAP_IPS;
 		range.min_ip = range.max_ip
 			= ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
-		nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
+		nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
 	}
 }
 
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
index 92900482edea..3828a4229822 100644
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ b/net/ipv4/netfilter/nf_nat_standalone.c
@@ -137,7 +137,7 @@ nf_nat_fn(unsigned int hooknum,
 				return ret;
 		} else
 			pr_debug("Already setup manip %s for ct %p\n",
-				 maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",
+				 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
 				 ct);
 		break;
 
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 739548029dc2..4f9c941335c9 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1102,14 +1102,14 @@ ctnetlink_change_nat(struct nf_conn *ct, const struct nlattr * const cda[])
 
 	if (cda[CTA_NAT_DST]) {
 		ret = ctnetlink_parse_nat_setup(ct,
-						IP_NAT_MANIP_DST,
+						NF_NAT_MANIP_DST,
 						cda[CTA_NAT_DST]);
 		if (ret < 0)
 			return ret;
 	}
 	if (cda[CTA_NAT_SRC]) {
 		ret = ctnetlink_parse_nat_setup(ct,
-						IP_NAT_MANIP_SRC,
+						NF_NAT_MANIP_SRC,
 						cda[CTA_NAT_SRC]);
 		if (ret < 0)
 			return ret;
-- 
cgit v1.2.3


From 9d4dde5215779f4099730194ad30624fdba3d8b2 Mon Sep 17 00:00:00 2001
From: Ian Campbell <Ian.Campbell@citrix.com>
Date: Thu, 22 Dec 2011 23:39:14 +0000
Subject: net: only use a single page of slop in MAX_SKB_FRAGS

In order to accommodate a 64K buffer we need 64K/PAGE_SIZE plus one more page
in order to allow for a buffer which does not start on a page boundary.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 12e6fed73f8e..f47f0c3939f2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -128,13 +128,17 @@ struct sk_buff_head {
 
 struct sk_buff;
 
-/* To allow 64K frame to be packed as single skb without frag_list. Since
- * GRO uses frags we allocate at least 16 regardless of page size.
+/* To allow 64K frame to be packed as single skb without frag_list we
+ * require 64K/PAGE_SIZE pages plus 1 additional page to allow for
+ * buffers which do not start on a page boundary.
+ *
+ * Since GRO uses frags we allocate at least 16 regardless of page
+ * size.
  */
-#if (65536/PAGE_SIZE + 2) < 16
+#if (65536/PAGE_SIZE + 1) < 16
 #define MAX_SKB_FRAGS 16UL
 #else
-#define MAX_SKB_FRAGS (65536/PAGE_SIZE + 2)
+#define MAX_SKB_FRAGS (65536/PAGE_SIZE + 1)
 #endif
 
 typedef struct skb_frag_struct skb_frag_t;
-- 
cgit v1.2.3


From 60b778ce519625102d3f72a2071ea72a05e990ce Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 24 Dec 2011 06:56:49 +0000
Subject: rfs: better sizing of dev_flow_table

Aim of this patch is to provide full range of rps_flow_cnt on 64bit arches.

Theorical limit on number of flows is 2^32

Fix some buggy RPS/RFS macros as well.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Tom Herbert <therbert@google.com>
CC: Xi Wang <xi.wang@gmail.com>
CC: Laurent Chavey <chavey@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  8 ++++----
 net/core/net-sysfs.c      | 44 +++++++++++++++++++++++++++-----------------
 2 files changed, 31 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 603730804da5..a776a675c0e5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -597,7 +597,7 @@ struct rps_map {
 	struct rcu_head rcu;
 	u16 cpus[0];
 };
-#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16)))
+#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + ((_num) * sizeof(u16)))
 
 /*
  * The rps_dev_flow structure contains the mapping of a flow to a CPU, the
@@ -621,7 +621,7 @@ struct rps_dev_flow_table {
 	struct rps_dev_flow flows[0];
 };
 #define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \
-    (_num * sizeof(struct rps_dev_flow)))
+    ((_num) * sizeof(struct rps_dev_flow)))
 
 /*
  * The rps_sock_flow_table contains mappings of flows to the last CPU
@@ -632,7 +632,7 @@ struct rps_sock_flow_table {
 	u16 ents[0];
 };
 #define	RPS_SOCK_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_sock_flow_table) + \
-    (_num * sizeof(u16)))
+    ((_num) * sizeof(u16)))
 
 #define RPS_NO_CPU 0xffff
 
@@ -684,7 +684,7 @@ struct xps_map {
 	struct rcu_head rcu;
 	u16 queues[0];
 };
-#define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + (_num * sizeof(u16)))
+#define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + ((_num) * sizeof(u16)))
 #define XPS_MIN_MAP_ALLOC ((L1_CACHE_BYTES - sizeof(struct xps_map))	\
     / sizeof(u16))
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 4b4d0b0a3543..abf4393a77b3 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -622,15 +622,15 @@ static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
 					   char *buf)
 {
 	struct rps_dev_flow_table *flow_table;
-	unsigned int val = 0;
+	unsigned long val = 0;
 
 	rcu_read_lock();
 	flow_table = rcu_dereference(queue->rps_flow_table);
 	if (flow_table)
-		val = flow_table->mask + 1;
+		val = (unsigned long)flow_table->mask + 1;
 	rcu_read_unlock();
 
-	return sprintf(buf, "%u\n", val);
+	return sprintf(buf, "%lu\n", val);
 }
 
 static void rps_dev_flow_table_release_work(struct work_struct *work)
@@ -654,36 +654,46 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
 				     struct rx_queue_attribute *attr,
 				     const char *buf, size_t len)
 {
-	unsigned int count;
-	char *endp;
+	unsigned long mask, count;
 	struct rps_dev_flow_table *table, *old_table;
 	static DEFINE_SPINLOCK(rps_dev_flow_lock);
+	int rc;
 
 	if (!capable(CAP_NET_ADMIN))
 		return -EPERM;
 
-	count = simple_strtoul(buf, &endp, 0);
-	if (endp == buf)
-		return -EINVAL;
+	rc = kstrtoul(buf, 0, &count);
+	if (rc < 0)
+		return rc;
 
 	if (count) {
-		int i;
-
-		if (count > INT_MAX)
+		mask = count - 1;
+		/* mask = roundup_pow_of_two(count) - 1;
+		 * without overflows...
+		 */
+		while ((mask | (mask >> 1)) != mask)
+			mask |= (mask >> 1);
+		/* On 64 bit arches, must check mask fits in table->mask (u32),
+		 * and on 32bit arches, must check RPS_DEV_FLOW_TABLE_SIZE(mask + 1)
+		 * doesnt overflow.
+		 */
+#if BITS_PER_LONG > 32
+		if (mask > (unsigned long)(u32)mask)
 			return -EINVAL;
-		count = roundup_pow_of_two(count);
-		if (count > (ULONG_MAX - sizeof(struct rps_dev_flow_table))
+#else
+		if (mask > (ULONG_MAX - RPS_DEV_FLOW_TABLE_SIZE(1))
 				/ sizeof(struct rps_dev_flow)) {
 			/* Enforce a limit to prevent overflow */
 			return -EINVAL;
 		}
-		table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(count));
+#endif
+		table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(mask + 1));
 		if (!table)
 			return -ENOMEM;
 
-		table->mask = count - 1;
-		for (i = 0; i < count; i++)
-			table->flows[i].cpu = RPS_NO_CPU;
+		table->mask = mask;
+		for (count = 0; count <= mask; count++)
+			table->flows[count].cpu = RPS_NO_CPU;
 	} else
 		table = NULL;
 
-- 
cgit v1.2.3


From 9413902796f56f6209e19dd54e840ed46950612c Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 23 Dec 2011 14:19:50 +0100
Subject: netfilter: add extended accounting infrastructure over nfnetlink

We currently have two ways to account traffic in netfilter:

- iptables chain and rule counters:

 # iptables -L -n -v
Chain INPUT (policy DROP 3 packets, 867 bytes)
 pkts bytes target     prot opt in     out     source               destination
    8  1104 ACCEPT     all  --  lo     *       0.0.0.0/0            0.0.0.0/0

- use flow-based accounting provided by ctnetlink:

 # conntrack -L
tcp      6 431999 ESTABLISHED src=192.168.1.130 dst=212.106.219.168 sport=58152 dport=80 packets=47 bytes=7654 src=212.106.219.168 dst=192.168.1.130 sport=80 dport=58152 packets=49 bytes=66340 [ASSURED] mark=0 use=1

While trying to display real-time accounting statistics, we require
to pool the kernel periodically to obtain this information. This is
OK if the number of flows is relatively low. However, in case that
the number of flows is huge, we can spend a considerable amount of
cycles to iterate over the list of flows that have been obtained.

Moreover, if we want to obtain the sum of the flow accounting results
that match some criteria, we have to iterate over the whole list of
existing flows, look for matchings and update the counters.

This patch adds the extended accounting infrastructure for
nfnetlink which aims to allow displaying real-time traffic accounting
without the need of complicated and resource-consuming implementation
in user-space. Basically, this new infrastructure allows you to create
accounting objects. One accounting object is composed of packet and
byte counters.

In order to manipulate create accounting objects, you require the
new libnetfilter_acct library. It contains several examples of use:

libnetfilter_acct/examples# ./nfacct-add http-traffic
libnetfilter_acct/examples# ./nfacct-get
http-traffic = { pkts = 000000000000,   bytes = 000000000000 };

Then, you can use one of this accounting objects in several iptables
rules using the new nfacct match (which comes in a follow-up patch):

 # iptables -I INPUT -p tcp --sport 80 -m nfacct --nfacct-name http-traffic
 # iptables -I OUTPUT -p tcp --dport 80 -m nfacct --nfacct-name http-traffic

The idea is simple: if one packet matches the rule, the nfacct match
updates the counters.

Thanks to Patrick McHardy, Eric Dumazet, Changli Gao for reviewing and
providing feedback for this contribution.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/Kbuild           |   1 +
 include/linux/netfilter/nfnetlink.h      |   3 +-
 include/linux/netfilter/nfnetlink_acct.h |  36 ++++
 net/netfilter/Kconfig                    |   8 +
 net/netfilter/Makefile                   |   1 +
 net/netfilter/nfnetlink_acct.c           | 352 +++++++++++++++++++++++++++++++
 6 files changed, 400 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/netfilter/nfnetlink_acct.h
 create mode 100644 net/netfilter/nfnetlink_acct.c

(limited to 'include/linux')

diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index d81f7719b01c..6785246e6e62 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -7,6 +7,7 @@ header-y += nf_conntrack_tcp.h
 header-y += nf_conntrack_tuple_common.h
 header-y += nf_nat.h
 header-y += nfnetlink.h
+header-y += nfnetlink_acct.h
 header-y += nfnetlink_compat.h
 header-y += nfnetlink_conntrack.h
 header-y += nfnetlink_log.h
diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h
index 74d33861473c..b64454c2f79f 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -48,7 +48,8 @@ struct nfgenmsg {
 #define NFNL_SUBSYS_ULOG		4
 #define NFNL_SUBSYS_OSF			5
 #define NFNL_SUBSYS_IPSET		6
-#define NFNL_SUBSYS_COUNT		7
+#define NFNL_SUBSYS_ACCT		7
+#define NFNL_SUBSYS_COUNT		8
 
 #ifdef __KERNEL__
 
diff --git a/include/linux/netfilter/nfnetlink_acct.h b/include/linux/netfilter/nfnetlink_acct.h
new file mode 100644
index 000000000000..7c4279b4ae7a
--- /dev/null
+++ b/include/linux/netfilter/nfnetlink_acct.h
@@ -0,0 +1,36 @@
+#ifndef _NFNL_ACCT_H_
+#define _NFNL_ACCT_H_
+
+#ifndef NFACCT_NAME_MAX
+#define NFACCT_NAME_MAX		32
+#endif
+
+enum nfnl_acct_msg_types {
+	NFNL_MSG_ACCT_NEW,
+	NFNL_MSG_ACCT_GET,
+	NFNL_MSG_ACCT_GET_CTRZERO,
+	NFNL_MSG_ACCT_DEL,
+	NFNL_MSG_ACCT_MAX
+};
+
+enum nfnl_acct_type {
+	NFACCT_UNSPEC,
+	NFACCT_NAME,
+	NFACCT_PKTS,
+	NFACCT_BYTES,
+	NFACCT_USE,
+	__NFACCT_MAX
+};
+#define NFACCT_MAX (__NFACCT_MAX - 1)
+
+#ifdef __KERNEL__
+
+struct nf_acct;
+
+extern struct nf_acct *nfnl_acct_find_get(const char *filter_name);
+extern void nfnl_acct_put(struct nf_acct *acct);
+extern void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct);
+
+#endif /* __KERNEL__ */
+
+#endif /* _NFNL_ACCT_H */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index d5597b759ba3..77326acd1f57 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -4,6 +4,14 @@ menu "Core Netfilter Configuration"
 config NETFILTER_NETLINK
 	tristate
 
+config NETFILTER_NETLINK_ACCT
+tristate "Netfilter NFACCT over NFNETLINK interface"
+	depends on NETFILTER_ADVANCED
+	select NETFILTER_NETLINK
+	help
+	  If this option is enabled, the kernel will include support
+	  for extended accounting via NFNETLINK.
+
 config NETFILTER_NETLINK_QUEUE
 	tristate "Netfilter NFQUEUE over NFNETLINK interface"
 	depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 1a02853df863..4da1c879644f 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -7,6 +7,7 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
 obj-$(CONFIG_NETFILTER) = netfilter.o
 
 obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o
+obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o
 obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o
 obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o
 
diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c
new file mode 100644
index 000000000000..362ab6ca3dc1
--- /dev/null
+++ b/net/netfilter/nfnetlink_acct.c
@@ -0,0 +1,352 @@
+/*
+ * (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org>
+ * (C) 2011 Intra2net AG <http://www.intra2net.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation (or any later at your option).
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/rculist.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <net/netlink.h>
+#include <net/sock.h>
+#include <asm/atomic.h>
+
+#include <linux/netfilter.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_acct.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_DESCRIPTION("nfacct: Extended Netfilter accounting infrastructure");
+
+static LIST_HEAD(nfnl_acct_list);
+
+struct nf_acct {
+	atomic64_t		pkts;
+	atomic64_t		bytes;
+	struct list_head	head;
+	atomic_t		refcnt;
+	char			name[NFACCT_NAME_MAX];
+	struct rcu_head		rcu_head;
+};
+
+static int
+nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb,
+	     const struct nlmsghdr *nlh, const struct nlattr * const tb[])
+{
+	struct nf_acct *nfacct, *matching = NULL;
+	char *acct_name;
+
+	if (!tb[NFACCT_NAME])
+		return -EINVAL;
+
+	acct_name = nla_data(tb[NFACCT_NAME]);
+
+	list_for_each_entry(nfacct, &nfnl_acct_list, head) {
+		if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0)
+			continue;
+
+                if (nlh->nlmsg_flags & NLM_F_EXCL)
+			return -EEXIST;
+
+		matching = nfacct;
+		break;
+        }
+
+	if (matching) {
+		if (nlh->nlmsg_flags & NLM_F_REPLACE) {
+			/* reset counters if you request a replacement. */
+			atomic64_set(&matching->pkts, 0);
+			atomic64_set(&matching->bytes, 0);
+			return 0;
+		}
+		return -EBUSY;
+	}
+
+	nfacct = kzalloc(sizeof(struct nf_acct), GFP_KERNEL);
+	if (nfacct == NULL)
+		return -ENOMEM;
+
+	strncpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX);
+
+	if (tb[NFACCT_BYTES]) {
+		atomic64_set(&nfacct->bytes,
+			     be64_to_cpu(nla_get_u64(tb[NFACCT_BYTES])));
+	}
+	if (tb[NFACCT_PKTS]) {
+		atomic64_set(&nfacct->pkts,
+			     be64_to_cpu(nla_get_u64(tb[NFACCT_PKTS])));
+	}
+	atomic_set(&nfacct->refcnt, 1);
+	list_add_tail_rcu(&nfacct->head, &nfnl_acct_list);
+	return 0;
+}
+
+static int
+nfnl_acct_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type,
+		   int event, struct nf_acct *acct)
+{
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfmsg;
+	unsigned int flags = pid ? NLM_F_MULTI : 0;
+	u64 pkts, bytes;
+
+	event |= NFNL_SUBSYS_ACCT << 8;
+	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);
+	if (nlh == NULL)
+		goto nlmsg_failure;
+
+	nfmsg = nlmsg_data(nlh);
+	nfmsg->nfgen_family = AF_UNSPEC;
+	nfmsg->version = NFNETLINK_V0;
+	nfmsg->res_id = 0;
+
+	NLA_PUT_STRING(skb, NFACCT_NAME, acct->name);
+
+	if (type == NFNL_MSG_ACCT_GET_CTRZERO) {
+		pkts = atomic64_xchg(&acct->pkts, 0);
+		bytes = atomic64_xchg(&acct->bytes, 0);
+	} else {
+		pkts = atomic64_read(&acct->pkts);
+		bytes = atomic64_read(&acct->bytes);
+	}
+	NLA_PUT_BE64(skb, NFACCT_PKTS, cpu_to_be64(pkts));
+	NLA_PUT_BE64(skb, NFACCT_BYTES, cpu_to_be64(bytes));
+	NLA_PUT_BE32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt)));
+
+	nlmsg_end(skb, nlh);
+	return skb->len;
+
+nlmsg_failure:
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -1;
+}
+
+static int
+nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct nf_acct *cur, *last;
+
+	if (cb->args[2])
+		return 0;
+
+	last = (struct nf_acct *)cb->args[1];
+	if (cb->args[1])
+		cb->args[1] = 0;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(cur, &nfnl_acct_list, head) {
+		if (last && cur != last)
+			continue;
+
+		if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).pid,
+				       cb->nlh->nlmsg_seq,
+				       NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
+				       NFNL_MSG_ACCT_NEW, cur) < 0) {
+			cb->args[1] = (unsigned long)cur;
+			break;
+		}
+	}
+	if (!cb->args[1])
+		cb->args[2] = 1;
+	rcu_read_unlock();
+	return skb->len;
+}
+
+static int
+nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb,
+	     const struct nlmsghdr *nlh, const struct nlattr * const tb[])
+{
+	int ret = 0;
+	struct nf_acct *cur;
+	char *acct_name;
+
+	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+		return netlink_dump_start(nfnl, skb, nlh, nfnl_acct_dump,
+					  NULL, 0);
+	}
+
+	if (!tb[NFACCT_NAME])
+		return -EINVAL;
+	acct_name = nla_data(tb[NFACCT_NAME]);
+
+	list_for_each_entry(cur, &nfnl_acct_list, head) {
+		struct sk_buff *skb2;
+
+		if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
+			continue;
+
+		skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+		if (skb2 == NULL)
+			break;
+
+		ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).pid,
+					 nlh->nlmsg_seq,
+					 NFNL_MSG_TYPE(nlh->nlmsg_type),
+					 NFNL_MSG_ACCT_NEW, cur);
+		if (ret <= 0)
+			kfree_skb(skb2);
+
+		break;
+	}
+	return ret;
+}
+
+/* try to delete object, fail if it is still in use. */
+static int nfnl_acct_try_del(struct nf_acct *cur)
+{
+	int ret = 0;
+
+	/* we want to avoid races with nfnl_acct_find_get. */
+	if (atomic_dec_and_test(&cur->refcnt)) {
+		/* We are protected by nfnl mutex. */
+		list_del_rcu(&cur->head);
+		kfree_rcu(cur, rcu_head);
+	} else {
+		/* still in use, restore reference counter. */
+		atomic_inc(&cur->refcnt);
+		ret = -EBUSY;
+	}
+	return ret;
+}
+
+static int
+nfnl_acct_del(struct sock *nfnl, struct sk_buff *skb,
+	     const struct nlmsghdr *nlh, const struct nlattr * const tb[])
+{
+	char *acct_name;
+	struct nf_acct *cur;
+	int ret = -ENOENT;
+
+	if (!tb[NFACCT_NAME]) {
+		list_for_each_entry(cur, &nfnl_acct_list, head)
+			nfnl_acct_try_del(cur);
+
+		return 0;
+	}
+	acct_name = nla_data(tb[NFACCT_NAME]);
+
+	list_for_each_entry(cur, &nfnl_acct_list, head) {
+		if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX) != 0)
+			continue;
+
+		ret = nfnl_acct_try_del(cur);
+		if (ret < 0)
+			return ret;
+
+		break;
+	}
+	return ret;
+}
+
+static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = {
+	[NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 },
+	[NFACCT_BYTES] = { .type = NLA_U64 },
+	[NFACCT_PKTS] = { .type = NLA_U64 },
+};
+
+static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = {
+	[NFNL_MSG_ACCT_NEW]		= { .call = nfnl_acct_new,
+					    .attr_count = NFACCT_MAX,
+					    .policy = nfnl_acct_policy },
+	[NFNL_MSG_ACCT_GET] 		= { .call = nfnl_acct_get,
+					    .attr_count = NFACCT_MAX,
+					    .policy = nfnl_acct_policy },
+	[NFNL_MSG_ACCT_GET_CTRZERO] 	= { .call = nfnl_acct_get,
+					    .attr_count = NFACCT_MAX,
+					    .policy = nfnl_acct_policy },
+	[NFNL_MSG_ACCT_DEL]		= { .call = nfnl_acct_del,
+					    .attr_count = NFACCT_MAX,
+					    .policy = nfnl_acct_policy },
+};
+
+static const struct nfnetlink_subsystem nfnl_acct_subsys = {
+	.name				= "acct",
+	.subsys_id			= NFNL_SUBSYS_ACCT,
+	.cb_count			= NFNL_MSG_ACCT_MAX,
+	.cb				= nfnl_acct_cb,
+};
+
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ACCT);
+
+struct nf_acct *nfnl_acct_find_get(const char *acct_name)
+{
+	struct nf_acct *cur, *acct = NULL;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(cur, &nfnl_acct_list, head) {
+		if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0)
+			continue;
+
+		if (!try_module_get(THIS_MODULE))
+			goto err;
+
+		if (!atomic_inc_not_zero(&cur->refcnt)) {
+			module_put(THIS_MODULE);
+			goto err;
+		}
+
+		acct = cur;
+		break;
+	}
+err:
+	rcu_read_unlock();
+	return acct;
+}
+EXPORT_SYMBOL_GPL(nfnl_acct_find_get);
+
+void nfnl_acct_put(struct nf_acct *acct)
+{
+	atomic_dec(&acct->refcnt);
+	module_put(THIS_MODULE);
+}
+EXPORT_SYMBOL_GPL(nfnl_acct_put);
+
+void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct)
+{
+	atomic64_inc(&nfacct->pkts);
+	atomic64_add(skb->len, &nfacct->bytes);
+}
+EXPORT_SYMBOL_GPL(nfnl_acct_update);
+
+static int __init nfnl_acct_init(void)
+{
+	int ret;
+
+	pr_info("nfnl_acct: registering with nfnetlink.\n");
+	ret = nfnetlink_subsys_register(&nfnl_acct_subsys);
+	if (ret < 0) {
+		pr_err("nfnl_acct_init: cannot register with nfnetlink.\n");
+		goto err_out;
+	}
+	return 0;
+err_out:
+	return ret;
+}
+
+static void __exit nfnl_acct_exit(void)
+{
+	struct nf_acct *cur, *tmp;
+
+	pr_info("nfnl_acct: unregistering from nfnetlink.\n");
+	nfnetlink_subsys_unregister(&nfnl_acct_subsys);
+
+	list_for_each_entry_safe(cur, tmp, &nfnl_acct_list, head) {
+		list_del_rcu(&cur->head);
+		/* We are sure that our objects have no clients at this point,
+		 * it's safe to release them all without checking refcnt. */
+		kfree_rcu(cur, rcu_head);
+	}
+}
+
+module_init(nfnl_acct_init);
+module_exit(nfnl_acct_exit);
-- 
cgit v1.2.3


From ceb98d03eac5704820f2ac1f370c9ff385e3a9f5 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 23 Dec 2011 14:28:59 +0100
Subject: netfilter: xtables: add nfacct match to support extended accounting

This patch adds the match that allows to perform extended
accounting. It requires the new nfnetlink_acct infrastructure.

 # iptables -I INPUT -p tcp --sport 80 -m nfacct --nfacct-name http-traffic
 # iptables -I OUTPUT -p tcp --dport 80 -m nfacct --nfacct-name http-traffic

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/Kbuild      |  1 +
 include/linux/netfilter/xt_nfacct.h | 13 +++++++
 net/netfilter/Kconfig               | 10 +++++
 net/netfilter/Makefile              |  1 +
 net/netfilter/xt_nfacct.c           | 76 +++++++++++++++++++++++++++++++++++++
 5 files changed, 101 insertions(+)
 create mode 100644 include/linux/netfilter/xt_nfacct.h
 create mode 100644 net/netfilter/xt_nfacct.c

(limited to 'include/linux')

diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index 6785246e6e62..e630a2ed4f18 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -23,6 +23,7 @@ header-y += xt_DSCP.h
 header-y += xt_IDLETIMER.h
 header-y += xt_LED.h
 header-y += xt_MARK.h
+header-y += xt_nfacct.h
 header-y += xt_NFLOG.h
 header-y += xt_NFQUEUE.h
 header-y += xt_RATEEST.h
diff --git a/include/linux/netfilter/xt_nfacct.h b/include/linux/netfilter/xt_nfacct.h
new file mode 100644
index 000000000000..3e19c8a86576
--- /dev/null
+++ b/include/linux/netfilter/xt_nfacct.h
@@ -0,0 +1,13 @@
+#ifndef _XT_NFACCT_MATCH_H
+#define _XT_NFACCT_MATCH_H
+
+#include <linux/netfilter/nfnetlink_acct.h>
+
+struct nf_acct;
+
+struct xt_nfacct_match_info {
+	char		name[NFACCT_NAME_MAX];
+	struct nf_acct	*nfacct;
+};
+
+#endif /* _XT_NFACCT_MATCH_H */
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 77326acd1f57..bac93ba60778 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -887,6 +887,16 @@ config NETFILTER_XT_MATCH_MULTIPORT
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_MATCH_NFACCT
+	tristate '"nfacct" match support'
+	default m if NETFILTER_ADVANCED=n
+	select NETFILTER_NETLINK_ACCT
+	help
+	  This option allows you to use the extended accounting through
+	  nfnetlink_acct.
+
+	  To compile it as a module, choose M here.  If unsure, say N.
+
 config NETFILTER_XT_MATCH_OSF
 	tristate '"osf" Passive OS fingerprint match'
 	depends on NETFILTER_ADVANCED && NETFILTER_NETLINK
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 4da1c879644f..b2eee4df8168 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -91,6 +91,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_NFACCT) += xt_nfacct.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o
diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c
new file mode 100644
index 000000000000..b3be0ef21f19
--- /dev/null
+++ b/net/netfilter/xt_nfacct.c
@@ -0,0 +1,76 @@
+/*
+ * (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org>
+ * (C) 2011 Intra2net AG <http://www.intra2net.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 (or any
+ * later at your option) as published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/skbuff.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/nfnetlink_acct.h>
+#include <linux/netfilter/xt_nfacct.h>
+
+MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: match for the extended accounting infrastructure");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_nfacct");
+MODULE_ALIAS("ip6t_nfacct");
+
+static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct xt_nfacct_match_info *info = par->targinfo;
+
+	nfnl_acct_update(skb, info->nfacct);
+
+	return true;
+}
+
+static int
+nfacct_mt_checkentry(const struct xt_mtchk_param *par)
+{
+	struct xt_nfacct_match_info *info = par->matchinfo;
+	struct nf_acct *nfacct;
+
+	nfacct = nfnl_acct_find_get(info->name);
+	if (nfacct == NULL) {
+		pr_info("xt_nfacct: accounting object with name `%s' "
+			"does not exists\n", info->name);
+		return -ENOENT;
+	}
+	info->nfacct = nfacct;
+	return 0;
+}
+
+static void
+nfacct_mt_destroy(const struct xt_mtdtor_param *par)
+{
+	const struct xt_nfacct_match_info *info = par->matchinfo;
+
+	nfnl_acct_put(info->nfacct);
+}
+
+static struct xt_match nfacct_mt_reg __read_mostly = {
+	.name       = "nfacct",
+	.family     = NFPROTO_UNSPEC,
+	.checkentry = nfacct_mt_checkentry,
+	.match      = nfacct_mt,
+	.destroy    = nfacct_mt_destroy,
+	.matchsize  = sizeof(struct xt_nfacct_match_info),
+	.me         = THIS_MODULE,
+};
+
+static int __init nfacct_mt_init(void)
+{
+	return xt_register_match(&nfacct_mt_reg);
+}
+
+static void __exit nfacct_mt_exit(void)
+{
+	xt_unregister_match(&nfacct_mt_reg);
+}
+
+module_init(nfacct_mt_init);
+module_exit(nfacct_mt_exit);
-- 
cgit v1.2.3


From d446a8202c81d95f91b1682fc67e7fadd9a31389 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Thu, 9 Jun 2011 21:03:07 +0200
Subject: netfilter: xtables: move ipt_ecn to xt_ecn

Prepare the ECN match for augmentation by an IPv6 counterpart. Since
no symbol dependencies to ipv6.ko are added, having a single ecn match
module is the more so welcome.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/Kbuild         |   1 +
 include/linux/netfilter/xt_ecn.h       |  35 +++++++++
 include/linux/netfilter_ipv4/ipt_ecn.h |  31 +-------
 net/ipv4/netfilter/Kconfig             |  10 +--
 net/ipv4/netfilter/Makefile            |   1 -
 net/ipv4/netfilter/ipt_ecn.c           | 127 --------------------------------
 net/netfilter/Kconfig                  |   9 +++
 net/netfilter/Makefile                 |   1 +
 net/netfilter/xt_ecn.c                 | 128 +++++++++++++++++++++++++++++++++
 9 files changed, 180 insertions(+), 163 deletions(-)
 create mode 100644 include/linux/netfilter/xt_ecn.h
 delete mode 100644 net/ipv4/netfilter/ipt_ecn.c
 create mode 100644 net/netfilter/xt_ecn.c

(limited to 'include/linux')

diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index e630a2ed4f18..e144f54185c0 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -43,6 +43,7 @@ header-y += xt_cpu.h
 header-y += xt_dccp.h
 header-y += xt_devgroup.h
 header-y += xt_dscp.h
+header-y += xt_ecn.h
 header-y += xt_esp.h
 header-y += xt_hashlimit.h
 header-y += xt_helper.h
diff --git a/include/linux/netfilter/xt_ecn.h b/include/linux/netfilter/xt_ecn.h
new file mode 100644
index 000000000000..065c1a537e5d
--- /dev/null
+++ b/include/linux/netfilter/xt_ecn.h
@@ -0,0 +1,35 @@
+/* iptables module for matching the ECN header in IPv4 and TCP header
+ *
+ * (C) 2002 Harald Welte <laforge@gnumonks.org>
+ *
+ * This software is distributed under GNU GPL v2, 1991
+ * 
+ * ipt_ecn.h,v 1.4 2002/08/05 19:39:00 laforge Exp
+*/
+#ifndef _XT_ECN_H
+#define _XT_ECN_H
+
+#include <linux/types.h>
+#include <linux/netfilter/xt_dscp.h>
+
+#define IPT_ECN_IP_MASK	(~XT_DSCP_MASK)
+
+#define IPT_ECN_OP_MATCH_IP	0x01
+#define IPT_ECN_OP_MATCH_ECE	0x10
+#define IPT_ECN_OP_MATCH_CWR	0x20
+
+#define IPT_ECN_OP_MATCH_MASK	0xce
+
+/* match info */
+struct ipt_ecn_info {
+	__u8 operation;
+	__u8 invert;
+	__u8 ip_ect;
+	union {
+		struct {
+			__u8 ect;
+		} tcp;
+	} proto;
+};
+
+#endif /* _XT_ECN_H */
diff --git a/include/linux/netfilter_ipv4/ipt_ecn.h b/include/linux/netfilter_ipv4/ipt_ecn.h
index eabf95fb7d3e..b1124ec76190 100644
--- a/include/linux/netfilter_ipv4/ipt_ecn.h
+++ b/include/linux/netfilter_ipv4/ipt_ecn.h
@@ -1,35 +1,6 @@
-/* iptables module for matching the ECN header in IPv4 and TCP header
- *
- * (C) 2002 Harald Welte <laforge@gnumonks.org>
- *
- * This software is distributed under GNU GPL v2, 1991
- * 
- * ipt_ecn.h,v 1.4 2002/08/05 19:39:00 laforge Exp
-*/
 #ifndef _IPT_ECN_H
 #define _IPT_ECN_H
 
-#include <linux/types.h>
-#include <linux/netfilter/xt_dscp.h>
-
-#define IPT_ECN_IP_MASK	(~XT_DSCP_MASK)
-
-#define IPT_ECN_OP_MATCH_IP	0x01
-#define IPT_ECN_OP_MATCH_ECE	0x10
-#define IPT_ECN_OP_MATCH_CWR	0x20
-
-#define IPT_ECN_OP_MATCH_MASK	0xce
-
-/* match info */
-struct ipt_ecn_info {
-	__u8 operation;
-	__u8 invert;
-	__u8 ip_ect;
-	union {
-		struct {
-			__u8 ect;
-		} tcp;
-	} proto;
-};
+#include <linux/netfilter/xt_ecn.h>
 
 #endif /* _IPT_ECN_H */
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 7e1f5cdaf11e..53b9c79c8025 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -76,11 +76,11 @@ config IP_NF_MATCH_AH
 config IP_NF_MATCH_ECN
 	tristate '"ecn" match support'
 	depends on NETFILTER_ADVANCED
-	help
-	  This option adds a `ECN' match, which allows you to match against
-	  the IPv4 and TCP header ECN fields.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
+	select NETFILTER_XT_MATCH_ECN
+	---help---
+	This is a backwards-compat option for the user's convenience
+	(e.g. when running oldconfig). It selects
+	CONFIG_NETFILTER_XT_MATCH_ECN.
 
 config IP_NF_MATCH_RPFILTER
 	tristate '"rpfilter" reverse path filter match support'
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 123dd88cea53..213a462b739b 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -49,7 +49,6 @@ obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o
 
 # matches
 obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
-obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
 obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o
 
 # targets
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
deleted file mode 100644
index 2b57e52c746c..000000000000
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ /dev/null
@@ -1,127 +0,0 @@
-/* IP tables module for matching the value of the IPv4 and TCP ECN bits
- *
- * (C) 2002 by Harald Welte <laforge@gnumonks.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <net/ip.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/tcp.h>
-
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_ecn.h>
-
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag match for IPv4");
-MODULE_LICENSE("GPL");
-
-static inline bool match_ip(const struct sk_buff *skb,
-			    const struct ipt_ecn_info *einfo)
-{
-	return ((ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect) ^
-	       !!(einfo->invert & IPT_ECN_OP_MATCH_IP);
-}
-
-static inline bool match_tcp(const struct sk_buff *skb,
-			     const struct ipt_ecn_info *einfo,
-			     bool *hotdrop)
-{
-	struct tcphdr _tcph;
-	const struct tcphdr *th;
-
-	/* In practice, TCP match does this, so can't fail.  But let's
-	 * be good citizens.
-	 */
-	th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
-	if (th == NULL) {
-		*hotdrop = false;
-		return false;
-	}
-
-	if (einfo->operation & IPT_ECN_OP_MATCH_ECE) {
-		if (einfo->invert & IPT_ECN_OP_MATCH_ECE) {
-			if (th->ece == 1)
-				return false;
-		} else {
-			if (th->ece == 0)
-				return false;
-		}
-	}
-
-	if (einfo->operation & IPT_ECN_OP_MATCH_CWR) {
-		if (einfo->invert & IPT_ECN_OP_MATCH_CWR) {
-			if (th->cwr == 1)
-				return false;
-		} else {
-			if (th->cwr == 0)
-				return false;
-		}
-	}
-
-	return true;
-}
-
-static bool ecn_mt(const struct sk_buff *skb, struct xt_action_param *par)
-{
-	const struct ipt_ecn_info *info = par->matchinfo;
-
-	if (info->operation & IPT_ECN_OP_MATCH_IP)
-		if (!match_ip(skb, info))
-			return false;
-
-	if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) {
-		if (!match_tcp(skb, info, &par->hotdrop))
-			return false;
-	}
-
-	return true;
-}
-
-static int ecn_mt_check(const struct xt_mtchk_param *par)
-{
-	const struct ipt_ecn_info *info = par->matchinfo;
-	const struct ipt_ip *ip = par->entryinfo;
-
-	if (info->operation & IPT_ECN_OP_MATCH_MASK)
-		return -EINVAL;
-
-	if (info->invert & IPT_ECN_OP_MATCH_MASK)
-		return -EINVAL;
-
-	if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) &&
-	    (ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) {
-		pr_info("cannot match TCP bits in rule for non-tcp packets\n");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static struct xt_match ecn_mt_reg __read_mostly = {
-	.name		= "ecn",
-	.family		= NFPROTO_IPV4,
-	.match		= ecn_mt,
-	.matchsize	= sizeof(struct ipt_ecn_info),
-	.checkentry	= ecn_mt_check,
-	.me		= THIS_MODULE,
-};
-
-static int __init ecn_mt_init(void)
-{
-	return xt_register_match(&ecn_mt_reg);
-}
-
-static void __exit ecn_mt_exit(void)
-{
-	xt_unregister_match(&ecn_mt_reg);
-}
-
-module_init(ecn_mt_init);
-module_exit(ecn_mt_exit);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index bac93ba60778..20388a97df66 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -778,6 +778,15 @@ config NETFILTER_XT_MATCH_DSCP
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_MATCH_ECN
+	tristate '"ecn" match support'
+	depends on NETFILTER_ADVANCED
+	---help---
+	This option adds an "ECN" match, which allows you to match against
+	the IPv4 and TCP header ECN fields.
+
+	To compile it as a module, choose M here. If unsure, say N.
+
 config NETFILTER_XT_MATCH_ESP
 	tristate '"esp" match support'
 	depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index b2eee4df8168..40f4c3d636c5 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -81,6 +81,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_DEVGROUP) += xt_devgroup.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o
+obj-$(CONFIG_NETFILTER_XT_MATCH_ECN) += xt_ecn.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o
diff --git a/net/netfilter/xt_ecn.c b/net/netfilter/xt_ecn.c
new file mode 100644
index 000000000000..2c198f5e3efb
--- /dev/null
+++ b/net/netfilter/xt_ecn.c
@@ -0,0 +1,128 @@
+/* IP tables module for matching the value of the IPv4 and TCP ECN bits
+ *
+ * (C) 2002 by Harald Welte <laforge@gnumonks.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <net/ip.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/tcp.h>
+
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter_ipv4/ipt_ecn.h>
+
+MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
+MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag match for IPv4");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_ecn");
+
+static inline bool match_ip(const struct sk_buff *skb,
+			    const struct ipt_ecn_info *einfo)
+{
+	return ((ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect) ^
+	       !!(einfo->invert & IPT_ECN_OP_MATCH_IP);
+}
+
+static inline bool match_tcp(const struct sk_buff *skb,
+			     const struct ipt_ecn_info *einfo,
+			     bool *hotdrop)
+{
+	struct tcphdr _tcph;
+	const struct tcphdr *th;
+
+	/* In practice, TCP match does this, so can't fail.  But let's
+	 * be good citizens.
+	 */
+	th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
+	if (th == NULL) {
+		*hotdrop = false;
+		return false;
+	}
+
+	if (einfo->operation & IPT_ECN_OP_MATCH_ECE) {
+		if (einfo->invert & IPT_ECN_OP_MATCH_ECE) {
+			if (th->ece == 1)
+				return false;
+		} else {
+			if (th->ece == 0)
+				return false;
+		}
+	}
+
+	if (einfo->operation & IPT_ECN_OP_MATCH_CWR) {
+		if (einfo->invert & IPT_ECN_OP_MATCH_CWR) {
+			if (th->cwr == 1)
+				return false;
+		} else {
+			if (th->cwr == 0)
+				return false;
+		}
+	}
+
+	return true;
+}
+
+static bool ecn_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+	const struct ipt_ecn_info *info = par->matchinfo;
+
+	if (info->operation & IPT_ECN_OP_MATCH_IP)
+		if (!match_ip(skb, info))
+			return false;
+
+	if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) {
+		if (!match_tcp(skb, info, &par->hotdrop))
+			return false;
+	}
+
+	return true;
+}
+
+static int ecn_mt_check(const struct xt_mtchk_param *par)
+{
+	const struct ipt_ecn_info *info = par->matchinfo;
+	const struct ipt_ip *ip = par->entryinfo;
+
+	if (info->operation & IPT_ECN_OP_MATCH_MASK)
+		return -EINVAL;
+
+	if (info->invert & IPT_ECN_OP_MATCH_MASK)
+		return -EINVAL;
+
+	if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) &&
+	    (ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) {
+		pr_info("cannot match TCP bits in rule for non-tcp packets\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static struct xt_match ecn_mt_reg __read_mostly = {
+	.name		= "ecn",
+	.family		= NFPROTO_IPV4,
+	.match		= ecn_mt,
+	.matchsize	= sizeof(struct ipt_ecn_info),
+	.checkentry	= ecn_mt_check,
+	.me		= THIS_MODULE,
+};
+
+static int __init ecn_mt_init(void)
+{
+	return xt_register_match(&ecn_mt_reg);
+}
+
+static void __exit ecn_mt_exit(void)
+{
+	xt_unregister_match(&ecn_mt_reg);
+}
+
+module_init(ecn_mt_init);
+module_exit(ecn_mt_exit);
-- 
cgit v1.2.3


From a4c6f9d3636db538025f9622c008192a0835cc23 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Thu, 9 Jun 2011 21:15:37 +0200
Subject: netfilter: xtables: give xt_ecn its own name

Use the new macro and struct names in xt_ecn.h, and put the old
definitions into a definition-forwarding ipt_ecn.h.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/xt_ecn.h       | 12 ++++++------
 include/linux/netfilter_ipv4/ipt_ecn.h | 11 ++++++++++-
 net/netfilter/xt_ecn.c                 | 34 +++++++++++++++++-----------------
 3 files changed, 33 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/xt_ecn.h b/include/linux/netfilter/xt_ecn.h
index 065c1a537e5d..7158fca364f2 100644
--- a/include/linux/netfilter/xt_ecn.h
+++ b/include/linux/netfilter/xt_ecn.h
@@ -12,16 +12,16 @@
 #include <linux/types.h>
 #include <linux/netfilter/xt_dscp.h>
 
-#define IPT_ECN_IP_MASK	(~XT_DSCP_MASK)
+#define XT_ECN_IP_MASK	(~XT_DSCP_MASK)
 
-#define IPT_ECN_OP_MATCH_IP	0x01
-#define IPT_ECN_OP_MATCH_ECE	0x10
-#define IPT_ECN_OP_MATCH_CWR	0x20
+#define XT_ECN_OP_MATCH_IP	0x01
+#define XT_ECN_OP_MATCH_ECE	0x10
+#define XT_ECN_OP_MATCH_CWR	0x20
 
-#define IPT_ECN_OP_MATCH_MASK	0xce
+#define XT_ECN_OP_MATCH_MASK	0xce
 
 /* match info */
-struct ipt_ecn_info {
+struct xt_ecn_info {
 	__u8 operation;
 	__u8 invert;
 	__u8 ip_ect;
diff --git a/include/linux/netfilter_ipv4/ipt_ecn.h b/include/linux/netfilter_ipv4/ipt_ecn.h
index b1124ec76190..0e0c063dbf60 100644
--- a/include/linux/netfilter_ipv4/ipt_ecn.h
+++ b/include/linux/netfilter_ipv4/ipt_ecn.h
@@ -2,5 +2,14 @@
 #define _IPT_ECN_H
 
 #include <linux/netfilter/xt_ecn.h>
+#define ipt_ecn_info xt_ecn_info
 
-#endif /* _IPT_ECN_H */
+enum {
+	IPT_ECN_IP_MASK       = XT_ECN_IP_MASK,
+	IPT_ECN_OP_MATCH_IP   = XT_ECN_OP_MATCH_IP,
+	IPT_ECN_OP_MATCH_ECE  = XT_ECN_OP_MATCH_ECE,
+	IPT_ECN_OP_MATCH_CWR  = XT_ECN_OP_MATCH_CWR,
+	IPT_ECN_OP_MATCH_MASK = XT_ECN_OP_MATCH_MASK,
+};
+
+#endif /* IPT_ECN_H */
diff --git a/net/netfilter/xt_ecn.c b/net/netfilter/xt_ecn.c
index 2c198f5e3efb..3ebb3dcace65 100644
--- a/net/netfilter/xt_ecn.c
+++ b/net/netfilter/xt_ecn.c
@@ -15,8 +15,8 @@
 #include <linux/tcp.h>
 
 #include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_ecn.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_ecn.h>
 
 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
 MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag match for IPv4");
@@ -24,14 +24,14 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_ecn");
 
 static inline bool match_ip(const struct sk_buff *skb,
-			    const struct ipt_ecn_info *einfo)
+			    const struct xt_ecn_info *einfo)
 {
-	return ((ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect) ^
-	       !!(einfo->invert & IPT_ECN_OP_MATCH_IP);
+	return ((ip_hdr(skb)->tos & XT_ECN_IP_MASK) == einfo->ip_ect) ^
+	       !!(einfo->invert & XT_ECN_OP_MATCH_IP);
 }
 
 static inline bool match_tcp(const struct sk_buff *skb,
-			     const struct ipt_ecn_info *einfo,
+			     const struct xt_ecn_info *einfo,
 			     bool *hotdrop)
 {
 	struct tcphdr _tcph;
@@ -46,8 +46,8 @@ static inline bool match_tcp(const struct sk_buff *skb,
 		return false;
 	}
 
-	if (einfo->operation & IPT_ECN_OP_MATCH_ECE) {
-		if (einfo->invert & IPT_ECN_OP_MATCH_ECE) {
+	if (einfo->operation & XT_ECN_OP_MATCH_ECE) {
+		if (einfo->invert & XT_ECN_OP_MATCH_ECE) {
 			if (th->ece == 1)
 				return false;
 		} else {
@@ -56,8 +56,8 @@ static inline bool match_tcp(const struct sk_buff *skb,
 		}
 	}
 
-	if (einfo->operation & IPT_ECN_OP_MATCH_CWR) {
-		if (einfo->invert & IPT_ECN_OP_MATCH_CWR) {
+	if (einfo->operation & XT_ECN_OP_MATCH_CWR) {
+		if (einfo->invert & XT_ECN_OP_MATCH_CWR) {
 			if (th->cwr == 1)
 				return false;
 		} else {
@@ -71,13 +71,13 @@ static inline bool match_tcp(const struct sk_buff *skb,
 
 static bool ecn_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
-	const struct ipt_ecn_info *info = par->matchinfo;
+	const struct xt_ecn_info *info = par->matchinfo;
 
-	if (info->operation & IPT_ECN_OP_MATCH_IP)
+	if (info->operation & XT_ECN_OP_MATCH_IP)
 		if (!match_ip(skb, info))
 			return false;
 
-	if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) {
+	if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR)) {
 		if (!match_tcp(skb, info, &par->hotdrop))
 			return false;
 	}
@@ -87,16 +87,16 @@ static bool ecn_mt(const struct sk_buff *skb, struct xt_action_param *par)
 
 static int ecn_mt_check(const struct xt_mtchk_param *par)
 {
-	const struct ipt_ecn_info *info = par->matchinfo;
+	const struct xt_ecn_info *info = par->matchinfo;
 	const struct ipt_ip *ip = par->entryinfo;
 
-	if (info->operation & IPT_ECN_OP_MATCH_MASK)
+	if (info->operation & XT_ECN_OP_MATCH_MASK)
 		return -EINVAL;
 
-	if (info->invert & IPT_ECN_OP_MATCH_MASK)
+	if (info->invert & XT_ECN_OP_MATCH_MASK)
 		return -EINVAL;
 
-	if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) &&
+	if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) &&
 	    (ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) {
 		pr_info("cannot match TCP bits in rule for non-tcp packets\n");
 		return -EINVAL;
@@ -109,7 +109,7 @@ static struct xt_match ecn_mt_reg __read_mostly = {
 	.name		= "ecn",
 	.family		= NFPROTO_IPV4,
 	.match		= ecn_mt,
-	.matchsize	= sizeof(struct ipt_ecn_info),
+	.matchsize	= sizeof(struct xt_ecn_info),
 	.checkentry	= ecn_mt_check,
 	.me		= THIS_MODULE,
 };
-- 
cgit v1.2.3


From e6fe2371bdd3713d0b227e9cd7f905e127ff81a0 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Fri, 30 Dec 2011 00:52:21 +0000
Subject: sock_diag: Arrange sock_diag.h such that it is exportable to
 userspace

Properly toss existing components around the ifdef __KERNEL__
and include the header into the header-y target.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/Kbuild      |  1 +
 include/linux/sock_diag.h | 10 +++++++---
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 0b091b32267d..8e484d660bc3 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -195,6 +195,7 @@ header-y += igmp.h
 header-y += in.h
 header-y += in6.h
 header-y += in_route.h
+header-y += sock_diag.h
 header-y += inet_diag.h
 header-y += inotify.h
 header-y += input.h
diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index 379d5dccf8e1..66bc18ef4fa4 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -1,16 +1,19 @@
 #ifndef __SOCK_DIAG_H__
 #define __SOCK_DIAG_H__
 
-#define SOCK_DIAG_BY_FAMILY 20
+#include <linux/types.h>
 
-struct sk_buff;
-struct nlmsghdr;
+#define SOCK_DIAG_BY_FAMILY 20
 
 struct sock_diag_req {
 	__u8	sdiag_family;
 	__u8	sdiag_protocol;
 };
 
+#ifdef __KERNEL__
+struct sk_buff;
+struct nlmsghdr;
+
 struct sock_diag_handler {
 	__u8 family;
 	int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh);
@@ -26,4 +29,5 @@ int sock_diag_check_cookie(void *sk, __u32 *cookie);
 void sock_diag_save_cookie(void *sk, __u32 *cookie);
 
 extern struct sock *sock_diag_nlsk;
+#endif /* KERNEL */
 #endif
-- 
cgit v1.2.3


From 288461e1546fa4162fa237eeed8ea09a16521dcd Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Fri, 30 Dec 2011 00:52:51 +0000
Subject: unix_diag: Include unix_diag.h into header-y target

The headers check complains it should include the linux/types.h
withing, thus add this one.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/Kbuild      | 1 +
 include/linux/unix_diag.h | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 8e484d660bc3..c94e71781b79 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -197,6 +197,7 @@ header-y += in6.h
 header-y += in_route.h
 header-y += sock_diag.h
 header-y += inet_diag.h
+header-y += unix_diag.h
 header-y += inotify.h
 header-y += input.h
 header-y += ioctl.h
diff --git a/include/linux/unix_diag.h b/include/linux/unix_diag.h
index 3f7afb007d70..a5ce0f325745 100644
--- a/include/linux/unix_diag.h
+++ b/include/linux/unix_diag.h
@@ -1,6 +1,8 @@
 #ifndef __UNIX_DIAG_H__
 #define __UNIX_DIAG_H__
 
+#include <linux/types.h>
+
 struct unix_diag_req {
 	__u8	sdiag_family;
 	__u8	sdiag_protocol;
-- 
cgit v1.2.3


From 5d2e5f274f9e9a06fb934dd45260e2616a9992e6 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Fri, 30 Dec 2011 00:53:13 +0000
Subject: sock_diag: Introduce the meminfo nla core (v2)

Add a routine that dumps memory-related values of a socket.
It's made as an array to make it possible to add more stuff
here later without breaking compatibility.

Since v1: The SK_MEMINFO_ constants are in userspace
visible part of sock_diag.h, the rest is under __KERNEL__.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sock_diag.h | 15 +++++++++++++++
 net/core/sock_diag.c      | 23 +++++++++++++++++++++++
 2 files changed, 38 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index 66bc18ef4fa4..251729a47880 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -10,9 +10,22 @@ struct sock_diag_req {
 	__u8	sdiag_protocol;
 };
 
+enum {
+	SK_MEMINFO_RMEM_ALLOC,
+	SK_MEMINFO_RCVBUF,
+	SK_MEMINFO_WMEM_ALLOC,
+	SK_MEMINFO_SNDBUF,
+	SK_MEMINFO_FWD_ALLOC,
+	SK_MEMINFO_WMEM_QUEUED,
+	SK_MEMINFO_OPTMEM,
+
+	SK_MEMINFO_VARS,
+};
+
 #ifdef __KERNEL__
 struct sk_buff;
 struct nlmsghdr;
+struct sock;
 
 struct sock_diag_handler {
 	__u8 family;
@@ -28,6 +41,8 @@ void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlms
 int sock_diag_check_cookie(void *sk, __u32 *cookie);
 void sock_diag_save_cookie(void *sk, __u32 *cookie);
 
+int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr);
+
 extern struct sock *sock_diag_nlsk;
 #endif /* KERNEL */
 #endif
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 711bdefe7753..b9868e1fd62c 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -4,6 +4,8 @@
 #include <net/netlink.h>
 #include <net/net_namespace.h>
 #include <linux/module.h>
+#include <linux/rtnetlink.h>
+#include <net/sock.h>
 
 #include <linux/inet_diag.h>
 #include <linux/sock_diag.h>
@@ -31,6 +33,27 @@ void sock_diag_save_cookie(void *sk, __u32 *cookie)
 }
 EXPORT_SYMBOL_GPL(sock_diag_save_cookie);
 
+int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attrtype)
+{
+	__u32 *mem;
+
+	mem = RTA_DATA(__RTA_PUT(skb, attrtype, SK_MEMINFO_VARS * sizeof(__u32)));
+
+	mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
+	mem[SK_MEMINFO_RCVBUF] = sk->sk_rcvbuf;
+	mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
+	mem[SK_MEMINFO_SNDBUF] = sk->sk_sndbuf;
+	mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
+	mem[SK_MEMINFO_WMEM_QUEUED] = sk->sk_wmem_queued;
+	mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
+
+	return 0;
+
+rtattr_failure:
+	return -EMSGSIZE;
+}
+EXPORT_SYMBOL_GPL(sock_diag_put_meminfo);
+
 void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))
 {
 	mutex_lock(&sock_diag_table_mutex);
-- 
cgit v1.2.3


From c0636faa539ec4205ec50e80844a5b0454b4bbaa Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Fri, 30 Dec 2011 00:53:32 +0000
Subject: inet_diag: Add the SKMEMINFO extension

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h | 3 ++-
 net/ipv4/inet_diag.c      | 4 ++++
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index afa5d5c74169..34e8d52c1925 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -108,9 +108,10 @@ enum {
 	INET_DIAG_CONG,
 	INET_DIAG_TOS,
 	INET_DIAG_TCLASS,
+	INET_DIAG_SKMEMINFO,
 };
 
-#define INET_DIAG_MAX INET_DIAG_TCLASS
+#define INET_DIAG_MAX INET_DIAG_SKMEMINFO
 
 
 /* INET_DIAG_MEM */
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index fb2e47ff59f7..2240a8e8c44d 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -136,6 +136,10 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 		minfo->idiag_tmem = sk_wmem_alloc_get(sk);
 	}
 
+	if (ext & (1 << (INET_DIAG_SKMEMINFO - 1)))
+		if (sock_diag_put_meminfo(sk, skb, INET_DIAG_SKMEMINFO))
+			goto rtattr_failure;
+
 	if (icsk == NULL) {
 		r->idiag_rqueue = r->idiag_wqueue = 0;
 		goto out;
-- 
cgit v1.2.3


From 257b529876cb45ec791eaa89e3d2ee0d16b49383 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Fri, 30 Dec 2011 09:27:43 +0000
Subject: unix_diag: Add the MEMINFO extension

[ Fix indentation of sock_diag*() calls. -DaveM ]

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/unix_diag.h |  2 ++
 net/unix/diag.c           | 20 ++++++++++++--------
 2 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/unix_diag.h b/include/linux/unix_diag.h
index a5ce0f325745..93fdb782468a 100644
--- a/include/linux/unix_diag.h
+++ b/include/linux/unix_diag.h
@@ -18,6 +18,7 @@ struct unix_diag_req {
 #define UDIAG_SHOW_PEER		0x00000004	/* show peer socket info */
 #define UDIAG_SHOW_ICONS	0x00000008	/* show pending connections */
 #define UDIAG_SHOW_RQLEN	0x00000010	/* show skb receive queue len */
+#define UDIAG_SHOW_MEMINFO	0x00000020	/* show memory info of a socket */
 
 struct unix_diag_msg {
 	__u8	udiag_family;
@@ -35,6 +36,7 @@ enum {
 	UNIX_DIAG_PEER,
 	UNIX_DIAG_ICONS,
 	UNIX_DIAG_RQLEN,
+	UNIX_DIAG_MEMINFO,
 
 	UNIX_DIAG_MAX,
 };
diff --git a/net/unix/diag.c b/net/unix/diag.c
index c5bdbcb1c30b..98945f29da4f 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -127,23 +127,27 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
 	sock_diag_save_cookie(sk, rep->udiag_cookie);
 
 	if ((req->udiag_show & UDIAG_SHOW_NAME) &&
-			sk_diag_dump_name(sk, skb))
+	    sk_diag_dump_name(sk, skb))
 		goto nlmsg_failure;
 
 	if ((req->udiag_show & UDIAG_SHOW_VFS) &&
-			sk_diag_dump_vfs(sk, skb))
+	    sk_diag_dump_vfs(sk, skb))
 		goto nlmsg_failure;
 
 	if ((req->udiag_show & UDIAG_SHOW_PEER) &&
-			sk_diag_dump_peer(sk, skb))
+	    sk_diag_dump_peer(sk, skb))
 		goto nlmsg_failure;
 
 	if ((req->udiag_show & UDIAG_SHOW_ICONS) &&
-			sk_diag_dump_icons(sk, skb))
+	    sk_diag_dump_icons(sk, skb))
 		goto nlmsg_failure;
 
 	if ((req->udiag_show & UDIAG_SHOW_RQLEN) &&
-			sk_diag_show_rqlen(sk, skb))
+	    sk_diag_show_rqlen(sk, skb))
+		goto nlmsg_failure;
+
+	if ((req->udiag_show & UDIAG_SHOW_MEMINFO) &&
+	    sock_diag_put_meminfo(sk, skb, UNIX_DIAG_MEMINFO))
 		goto nlmsg_failure;
 
 	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
@@ -191,9 +195,9 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb)
 			if (!(req->udiag_states & (1 << sk->sk_state)))
 				goto next;
 			if (sk_diag_dump(sk, skb, req,
-						NETLINK_CB(cb->skb).pid,
-						cb->nlh->nlmsg_seq,
-						NLM_F_MULTI) < 0)
+					 NETLINK_CB(cb->skb).pid,
+					 cb->nlh->nlmsg_seq,
+					 NLM_F_MULTI) < 0)
 				goto done;
 next:
 			num++;
-- 
cgit v1.2.3


From c9da99e6475f92653139e43f3c30c0cd011a0fd8 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Fri, 30 Dec 2011 00:54:39 +0000
Subject: unix_diag: Fixup RQLEN extension report

While it's not too late fix the recently added RQLEN diag extension
to report rqlen and wqlen in the same way as TCP does.

I.e. for listening sockets the ack backlog length (which is the input
queue length for socket) in rqlen and the max ack backlog length in
wqlen, and what the CINQ/OUTQ ioctls do for established.

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/unix_diag.h |  5 +++++
 net/unix/diag.c           | 13 ++++++++++++-
 2 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/unix_diag.h b/include/linux/unix_diag.h
index 93fdb782468a..b1d2bf16b33c 100644
--- a/include/linux/unix_diag.h
+++ b/include/linux/unix_diag.h
@@ -46,4 +46,9 @@ struct unix_diag_vfs {
 	__u32	udiag_vfs_dev;
 };
 
+struct unix_diag_rqlen {
+	__u32	udiag_rqueue;
+	__u32	udiag_wqueue;
+};
+
 #endif
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 98945f29da4f..6b7697fd911b 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -101,7 +101,18 @@ rtattr_failure:
 
 static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb)
 {
-	RTA_PUT_U32(nlskb, UNIX_DIAG_RQLEN, sk->sk_receive_queue.qlen);
+	struct unix_diag_rqlen *rql;
+
+	rql = UNIX_DIAG_PUT(nlskb, UNIX_DIAG_RQLEN, sizeof(*rql));
+
+	if (sk->sk_state == TCP_LISTEN) {
+		rql->udiag_rqueue = sk->sk_receive_queue.qlen;
+		rql->udiag_wqueue = sk->sk_max_ack_backlog;
+	} else {
+		rql->udiag_rqueue = (__u32)unix_inq_len(sk);
+		rql->udiag_wqueue = (__u32)unix_outq_len(sk);
+	}
+
 	return 0;
 
 rtattr_failure:
-- 
cgit v1.2.3


From 43c6759e73907e4c8e6624f70f5c4a860518b203 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier@dowhile0.org>
Date: Tue, 3 Jan 2012 20:23:18 -0500
Subject: net: phy: smsc: Move SMSC PHY constants to <linux/smscphy.h>

SMSC generation 4 LAN chips integrate an IEEE 802.3 ethernet physical layer.
The ethernet driver for this family of devices needs to access the SMSC PHY
registers and bit-fields.

So, this patch moves these constants to a place where it can be used for both
the PHY and LAN drivers.

Signed-off-by: Javier Martinez Canillas <javier@dowhile0.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/smsc/smsc911x.h |  4 ++++
 drivers/net/phy/smsc.c               | 21 +--------------------
 include/linux/smscphy.h              | 25 +++++++++++++++++++++++++
 3 files changed, 30 insertions(+), 20 deletions(-)
 create mode 100644 include/linux/smscphy.h

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/smsc/smsc911x.h b/drivers/net/ethernet/smsc/smsc911x.h
index 8d67aacf8867..938ecf290813 100644
--- a/drivers/net/ethernet/smsc/smsc911x.h
+++ b/drivers/net/ethernet/smsc/smsc911x.h
@@ -401,4 +401,8 @@
 #include <asm/smsc911x.h>
 #endif
 
+#ifdef CONFIG_SMSC_PHY
+#include <linux/smscphy.h>
+#endif
+
 #endif				/* __SMSC911X_H__ */
diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c
index 342505c976d6..fc3e7e96c88c 100644
--- a/drivers/net/phy/smsc.c
+++ b/drivers/net/phy/smsc.c
@@ -22,26 +22,7 @@
 #include <linux/ethtool.h>
 #include <linux/phy.h>
 #include <linux/netdevice.h>
-
-#define MII_LAN83C185_ISF 29 /* Interrupt Source Flags */
-#define MII_LAN83C185_IM  30 /* Interrupt Mask */
-#define MII_LAN83C185_CTRL_STATUS 17 /* Mode/Status Register */
-
-#define MII_LAN83C185_ISF_INT1 (1<<1) /* Auto-Negotiation Page Received */
-#define MII_LAN83C185_ISF_INT2 (1<<2) /* Parallel Detection Fault */
-#define MII_LAN83C185_ISF_INT3 (1<<3) /* Auto-Negotiation LP Ack */
-#define MII_LAN83C185_ISF_INT4 (1<<4) /* Link Down */
-#define MII_LAN83C185_ISF_INT5 (1<<5) /* Remote Fault Detected */
-#define MII_LAN83C185_ISF_INT6 (1<<6) /* Auto-Negotiation complete */
-#define MII_LAN83C185_ISF_INT7 (1<<7) /* ENERGYON */
-
-#define MII_LAN83C185_ISF_INT_ALL (0x0e)
-
-#define MII_LAN83C185_ISF_INT_PHYLIB_EVENTS \
-	(MII_LAN83C185_ISF_INT6 | MII_LAN83C185_ISF_INT4 | \
-	 MII_LAN83C185_ISF_INT7)
-
-#define MII_LAN83C185_EDPWRDOWN	(1 << 13) /* EDPWRDOWN */
+#include <linux/smscphy.h>
 
 static int smsc_phy_config_intr(struct phy_device *phydev)
 {
diff --git a/include/linux/smscphy.h b/include/linux/smscphy.h
new file mode 100644
index 000000000000..ce718cbce435
--- /dev/null
+++ b/include/linux/smscphy.h
@@ -0,0 +1,25 @@
+#ifndef __LINUX_SMSCPHY_H__
+#define __LINUX_SMSCPHY_H__
+
+#define MII_LAN83C185_ISF 29 /* Interrupt Source Flags */
+#define MII_LAN83C185_IM  30 /* Interrupt Mask */
+#define MII_LAN83C185_CTRL_STATUS 17 /* Mode/Status Register */
+
+#define MII_LAN83C185_ISF_INT1 (1<<1) /* Auto-Negotiation Page Received */
+#define MII_LAN83C185_ISF_INT2 (1<<2) /* Parallel Detection Fault */
+#define MII_LAN83C185_ISF_INT3 (1<<3) /* Auto-Negotiation LP Ack */
+#define MII_LAN83C185_ISF_INT4 (1<<4) /* Link Down */
+#define MII_LAN83C185_ISF_INT5 (1<<5) /* Remote Fault Detected */
+#define MII_LAN83C185_ISF_INT6 (1<<6) /* Auto-Negotiation complete */
+#define MII_LAN83C185_ISF_INT7 (1<<7) /* ENERGYON */
+
+#define MII_LAN83C185_ISF_INT_ALL (0x0e)
+
+#define MII_LAN83C185_ISF_INT_PHYLIB_EVENTS \
+	(MII_LAN83C185_ISF_INT6 | MII_LAN83C185_ISF_INT4 | \
+	 MII_LAN83C185_ISF_INT7)
+
+#define MII_LAN83C185_EDPWRDOWN (1 << 13) /* EDPWRDOWN */
+#define MII_LAN83C185_ENERGYON  (1 << 1)  /* ENERGYON */
+
+#endif /* __LINUX_SMSCPHY_H__ */
-- 
cgit v1.2.3


From 55664f324c2a1a6386dc88492c5c94aa3d336b93 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Tue, 3 Jan 2012 12:04:51 +0000
Subject: ethtool: Allow drivers to select RX NFC rule locations

Define special location values for RX NFC that request the driver to
select the actual rule location.  This allows for implementation on
devices that use hash-based filter lookup, whereas currently the API is
more suited to devices with TCAM lookup or linear search.

In ethtool_set_rxnfc() and the compat wrapper ethtool_ioctl(), copy
the structure back to user-space after insertion so that the actual
location is returned.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 26 ++++++++++++++++++++++++--
 net/core/ethtool.c      | 11 ++++++++++-
 net/socket.c            |  2 +-
 3 files changed, 35 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index b38bf69310ee..d901714120a3 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -489,7 +489,10 @@ struct ethtool_rx_flow_spec {
  * on return.
  *
  * For %ETHTOOL_GRXCLSRLCNT, @rule_cnt is set to the number of defined
- * rules on return.
+ * rules on return.  If @data is non-zero on return then it is the
+ * size of the rule table, plus the flag %RX_CLS_LOC_SPECIAL if the
+ * driver supports any special location values.  If that flag is not
+ * set in @data then special location values should not be used.
  *
  * For %ETHTOOL_GRXCLSRULE, @fs.@location specifies the location of an
  * existing rule on entry and @fs contains the rule on return.
@@ -501,10 +504,23 @@ struct ethtool_rx_flow_spec {
  * must use the second parameter to get_rxnfc() instead of @rule_locs.
  *
  * For %ETHTOOL_SRXCLSRLINS, @fs specifies the rule to add or update.
- * @fs.@location specifies the location to use and must not be ignored.
+ * @fs.@location either specifies the location to use or is a special
+ * location value with %RX_CLS_LOC_SPECIAL flag set.  On return,
+ * @fs.@location is the actual rule location.
  *
  * For %ETHTOOL_SRXCLSRLDEL, @fs.@location specifies the location of an
  * existing rule on entry.
+ *
+ * A driver supporting the special location values for
+ * %ETHTOOL_SRXCLSRLINS may add the rule at any suitable unused
+ * location, and may remove a rule at a later location (lower
+ * priority) that matches exactly the same set of flows.  The special
+ * values are: %RX_CLS_LOC_ANY, selecting any location;
+ * %RX_CLS_LOC_FIRST, selecting the first suitable location (maximum
+ * priority); and %RX_CLS_LOC_LAST, selecting the last suitable
+ * location (minimum priority).  Additional special values may be
+ * defined in future and drivers must return -%EINVAL for any
+ * unrecognised value.
  */
 struct ethtool_rxnfc {
 	__u32				cmd;
@@ -1141,6 +1157,12 @@ struct ethtool_ops {
 
 #define	RX_CLS_FLOW_DISC	0xffffffffffffffffULL
 
+/* Special RX classification rule insert location values */
+#define RX_CLS_LOC_SPECIAL	0x80000000	/* flag */
+#define RX_CLS_LOC_ANY		0xffffffff
+#define RX_CLS_LOC_FIRST	0xfffffffe
+#define RX_CLS_LOC_LAST		0xfffffffd
+
 /* Reset flags */
 /* The reset() operation must clear the flags for the components which
  * were actually reset.  On successful return, the flags indicate the
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 597732c989ca..e88b80d41f73 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -439,6 +439,7 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
 {
 	struct ethtool_rxnfc info;
 	size_t info_size = sizeof(info);
+	int rc;
 
 	if (!dev->ethtool_ops->set_rxnfc)
 		return -EOPNOTSUPP;
@@ -454,7 +455,15 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
 	if (copy_from_user(&info, useraddr, info_size))
 		return -EFAULT;
 
-	return dev->ethtool_ops->set_rxnfc(dev, &info);
+	rc = dev->ethtool_ops->set_rxnfc(dev, &info);
+	if (rc)
+		return rc;
+
+	if (cmd == ETHTOOL_SRXCLSRLINS &&
+	    copy_to_user(useraddr, &info, info_size))
+		return -EFAULT;
+
+	return 0;
 }
 
 static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
diff --git a/net/socket.c b/net/socket.c
index e62b4f055071..2cad581318fe 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2758,10 +2758,10 @@ static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32)
 	case ETHTOOL_GRXRINGS:
 	case ETHTOOL_GRXCLSRLCNT:
 	case ETHTOOL_GRXCLSRULE:
+	case ETHTOOL_SRXCLSRLINS:
 		convert_out = true;
 		/* fall through */
 	case ETHTOOL_SRXCLSRLDEL:
-	case ETHTOOL_SRXCLSRLINS:
 		buf_size += sizeof(struct ethtool_rxnfc);
 		convert_in = true;
 		break;
-- 
cgit v1.2.3


From 6cfb5e759d47f037cbd0953ec2c3ceb220ed9e96 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Tue, 3 Jan 2012 12:07:59 +0000
Subject: ethtool: Remove ethtool_ops::set_rx_ntuple operation

All implementations have been converted to implement set_rxnfc
instead.

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h |  4 ----
 net/core/ethtool.c      | 55 -------------------------------------------------
 2 files changed, 59 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index d901714120a3..da5b2de99ae4 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -859,8 +859,6 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings)
  * @reset: Reset (part of) the device, as specified by a bitmask of
  *	flags from &enum ethtool_reset_flags.  Returns a negative
  *	error code or zero.
- * @set_rx_ntuple: Set an RX n-tuple rule.  Returns a negative error code
- *	or zero.
  * @get_rxfh_indir_size: Get the size of the RX flow hash indirection table.
  *	Returns zero if not supported for this specific device.
  * @get_rxfh_indir: Get the contents of the RX flow hash indirection table.
@@ -929,8 +927,6 @@ struct ethtool_ops {
 	int	(*set_rxnfc)(struct net_device *, struct ethtool_rxnfc *);
 	int	(*flash_device)(struct net_device *, struct ethtool_flash *);
 	int	(*reset)(struct net_device *, u32 *);
-	int	(*set_rx_ntuple)(struct net_device *,
-				 struct ethtool_rx_ntuple *);
 	u32	(*get_rxfh_indir_size)(struct net_device *);
 	int	(*get_rxfh_indir)(struct net_device *, u32 *);
 	int	(*set_rxfh_indir)(struct net_device *, const u32 *);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index e88b80d41f73..921aa2b4b415 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -631,58 +631,6 @@ out:
 	return ret;
 }
 
-/*
- * ethtool does not (or did not) set masks for flow parameters that are
- * not specified, so if both value and mask are 0 then this must be
- * treated as equivalent to a mask with all bits set.  Implement that
- * here rather than in drivers.
- */
-static void rx_ntuple_fix_masks(struct ethtool_rx_ntuple_flow_spec *fs)
-{
-	struct ethtool_tcpip4_spec *entry = &fs->h_u.tcp_ip4_spec;
-	struct ethtool_tcpip4_spec *mask = &fs->m_u.tcp_ip4_spec;
-
-	if (fs->flow_type != TCP_V4_FLOW &&
-	    fs->flow_type != UDP_V4_FLOW &&
-	    fs->flow_type != SCTP_V4_FLOW)
-		return;
-
-	if (!(entry->ip4src | mask->ip4src))
-		mask->ip4src = htonl(0xffffffff);
-	if (!(entry->ip4dst | mask->ip4dst))
-		mask->ip4dst = htonl(0xffffffff);
-	if (!(entry->psrc | mask->psrc))
-		mask->psrc = htons(0xffff);
-	if (!(entry->pdst | mask->pdst))
-		mask->pdst = htons(0xffff);
-	if (!(entry->tos | mask->tos))
-		mask->tos = 0xff;
-	if (!(fs->vlan_tag | fs->vlan_tag_mask))
-		fs->vlan_tag_mask = 0xffff;
-	if (!(fs->data | fs->data_mask))
-		fs->data_mask = 0xffffffffffffffffULL;
-}
-
-static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
-						    void __user *useraddr)
-{
-	struct ethtool_rx_ntuple cmd;
-	const struct ethtool_ops *ops = dev->ethtool_ops;
-
-	if (!ops->set_rx_ntuple)
-		return -EOPNOTSUPP;
-
-	if (!(dev->features & NETIF_F_NTUPLE))
-		return -EINVAL;
-
-	if (copy_from_user(&cmd, useraddr, sizeof(cmd)))
-		return -EFAULT;
-
-	rx_ntuple_fix_masks(&cmd.fs);
-
-	return ops->set_rx_ntuple(dev, &cmd);
-}
-
 static int ethtool_get_regs(struct net_device *dev, char __user *useraddr)
 {
 	struct ethtool_regs regs;
@@ -1495,9 +1443,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_RESET:
 		rc = ethtool_reset(dev, useraddr);
 		break;
-	case ETHTOOL_SRXNTUPLE:
-		rc = ethtool_set_rx_ntuple(dev, useraddr);
-		break;
 	case ETHTOOL_GSSET_INFO:
 		rc = ethtool_get_sset_info(dev, useraddr);
 		break;
-- 
cgit v1.2.3


From 288e0713f469c03dbc412153b5341d6dfc2c9907 Mon Sep 17 00:00:00 2001
From: Ilan Elias <ilane@ti.com>
Date: Thu, 22 Dec 2011 11:51:54 +0200
Subject: NFC: Export a new attribute nfcid1 in target info

The nfcid1 is the NFC-A identifier.
It is exported as an attribute of the target info
(returned as a response to NFC_CMD_GET_TARGET).

Signed-off-by: Ilan Elias <ilane@ti.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nfc.h   | 2 ++
 include/net/nfc/nfc.h | 3 +++
 net/nfc/nci/ntf.c     | 6 ++++++
 net/nfc/netlink.c     | 3 +++
 4 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nfc.h b/include/linux/nfc.h
index 89fee4ab1904..01d4e5d60325 100644
--- a/include/linux/nfc.h
+++ b/include/linux/nfc.h
@@ -88,6 +88,7 @@ enum nfc_commands {
  * @NFC_ATTR_TARGET_SENS_RES: NFC-A targets extra information such as NFCID
  * @NFC_ATTR_TARGET_SEL_RES: NFC-A targets extra information (useful if the
  *	target is not NFC-Forum compliant)
+ * @NFC_ATTR_TARGET_NFCID1: NFC-A targets identifier, max 10 bytes
  * @NFC_ATTR_COMM_MODE: Passive or active mode
  * @NFC_ATTR_RF_MODE: Initiator or target
  */
@@ -99,6 +100,7 @@ enum nfc_attrs {
 	NFC_ATTR_TARGET_INDEX,
 	NFC_ATTR_TARGET_SENS_RES,
 	NFC_ATTR_TARGET_SEL_RES,
+	NFC_ATTR_TARGET_NFCID1,
 	NFC_ATTR_COMM_MODE,
 	NFC_ATTR_RF_MODE,
 /* private: internal use only */
diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h
index ccfe757a94ec..8696b773a695 100644
--- a/include/net/nfc/nfc.h
+++ b/include/net/nfc/nfc.h
@@ -65,12 +65,15 @@ struct nfc_ops {
 
 #define NFC_TARGET_IDX_ANY -1
 #define NFC_MAX_GT_LEN 48
+#define NFC_MAX_NFCID1_LEN 10
 
 struct nfc_target {
 	u32 idx;
 	u32 supported_protocols;
 	u16 sens_res;
 	u8 sel_res;
+	u8 nfcid1_len;
+	u8 nfcid1[NFC_MAX_NFCID1_LEN];
 };
 
 struct nfc_genl_data {
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 352f7a2321d9..b16a8dc2afbe 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -154,6 +154,12 @@ static void nci_target_found(struct nci_dev *ndev,
 
 	nfc_tgt.sens_res = ntf->rf_tech_specific_params.nfca_poll.sens_res;
 	nfc_tgt.sel_res = ntf->rf_tech_specific_params.nfca_poll.sel_res;
+	nfc_tgt.nfcid1_len = ntf->rf_tech_specific_params.nfca_poll.nfcid1_len;
+	if (nfc_tgt.nfcid1_len > 0) {
+		memcpy(nfc_tgt.nfcid1,
+			ntf->rf_tech_specific_params.nfca_poll.nfcid1,
+			nfc_tgt.nfcid1_len);
+	}
 
 	if (!(nfc_tgt.supported_protocols & ndev->poll_prots)) {
 		pr_debug("the target found does not have the desired protocol\n");
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 43a1c47756a7..6989dfa28ee2 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -67,6 +67,9 @@ static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target,
 				target->supported_protocols);
 	NLA_PUT_U16(msg, NFC_ATTR_TARGET_SENS_RES, target->sens_res);
 	NLA_PUT_U8(msg, NFC_ATTR_TARGET_SEL_RES, target->sel_res);
+	if (target->nfcid1_len > 0)
+		NLA_PUT(msg, NFC_ATTR_TARGET_NFCID1, target->nfcid1_len,
+				target->nfcid1);
 
 	return genlmsg_end(msg, hdr);
 
-- 
cgit v1.2.3


From 68bad94ed801d955535cb50dde3412944a24530c Mon Sep 17 00:00:00 2001
From: Neerav Parikh <Neerav.Parikh@intel.com>
Date: Wed, 4 Jan 2012 20:23:39 +0000
Subject: netdev: FCoE: Add new ndo_get_fcoe_hbainfo() call

This adds a new ndo_get_fcoe_hbainfo() call in
net_device_ops for FCoE protocol stack.

If supported by the underlying device, the FCoE protocol
stack will call this to get device specific information
from the underlying device.
This information will then be utilized by the FCoE protocol
stack to register Fiber Channel HBA attributes with the
Fiber Channel Management Service via Fabric Device
Management Interface (FDMI) as per the T11 FC-GS
specification.

Changes in v2:
- As per comments from David Miller aligning the parameters
of the ndo_get_fcoe_hbainfo()

Signed-off-by: Neerav Parikh <Neerav.Parikh@intel.com>
Tested-by: Ross Brattain <ross.b.brattain@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a776a675c0e5..a1d109590da4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -707,6 +707,23 @@ struct netdev_tc_txq {
 	u16 offset;
 };
 
+#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
+/*
+ * This structure is to hold information about the device
+ * configured to run FCoE protocol stack.
+ */
+struct netdev_fcoe_hbainfo {
+	char	manufacturer[64];
+	char	serial_number[64];
+	char	hardware_version[64];
+	char	driver_version[64];
+	char	optionrom_version[64];
+	char	firmware_version[64];
+	char	model[256];
+	char	model_description[256];
+};
+#endif
+
 /*
  * This structure defines the management hooks for network devices.
  * The following hooks can be defined; unless noted otherwise, they are
@@ -847,6 +864,13 @@ struct netdev_tc_txq {
  *	perform necessary setup and returns 1 to indicate the device is set up
  *	successfully to perform DDP on this I/O, otherwise this returns 0.
  *
+ * int (*ndo_fcoe_get_hbainfo)(struct net_device *dev,
+ *			       struct netdev_fcoe_hbainfo *hbainfo);
+ *	Called when the FCoE Protocol stack wants information on the underlying
+ *	device. This information is utilized by the FCoE protocol stack to
+ *	register attributes with Fiber Channel management service as per the
+ *	FC-GS Fabric Device Management Information(FDMI) specification.
+ *
  * int (*ndo_fcoe_get_wwn)(struct net_device *dev, u64 *wwn, int type);
  *	Called when the underlying device wants to override default World Wide
  *	Name (WWN) generation mechanism in FCoE protocol stack to pass its own
@@ -950,6 +974,8 @@ struct net_device_ops {
 						       u16 xid,
 						       struct scatterlist *sgl,
 						       unsigned int sgc);
+	int			(*ndo_fcoe_get_hbainfo)(struct net_device *dev,
+							struct netdev_fcoe_hbainfo *hbainfo);
 #endif
 
 #if IS_ENABLED(CONFIG_LIBFCOE)
-- 
cgit v1.2.3


From 18cb809850fb499ad9bf288696a95f4071f73931 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 4 Jan 2012 14:18:38 +0000
Subject: net_sched: sfq: extend limits

SFQ as implemented in Linux is very limited, with at most 127 flows
and limit of 127 packets. [ So if 127 flows are active, we have one
packet per flow ]

This patch brings to SFQ following features to cope with modern needs.

- Ability to specify a smaller per flow limit of inflight packets.
    (default value being at 127 packets)

- Ability to have up to 65408 active flows (instead of 127)

- Ability to have head drops instead of tail drops
  (to drop old packets from a flow)

Example of use : No more than 20 packets per flow, max 8000 flows, max
20000 packets in SFQ qdisc, hash table of 65536 slots.

tc qdisc add ... sfq \
        flows 8000 \
        depth 20 \
        headdrop \
        limit 20000 \
	divisor 65536

Ram usage :

2 bytes per hash table entry (instead of previous 1 byte/entry)
32 bytes per flow on 64bit arches, instead of 384 for QFQ, so much
better cache hit ratio.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Dave Taht <dave.taht@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pkt_sched.h |  16 ++---
 net/sched/sch_sfq.c       | 175 +++++++++++++++++++++++++++++++---------------
 2 files changed, 124 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index 8daced32a014..8f1b928f777c 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -162,19 +162,17 @@ struct tc_sfq_qopt {
 	unsigned	flows;		/* Maximal number of flows  */
 };
 
+struct tc_sfq_qopt_v1 {
+	struct tc_sfq_qopt v0;
+	unsigned int	depth;		/* max number of packets per flow */
+	unsigned int	headdrop;
+};
+
+
 struct tc_sfq_xstats {
 	__s32		allot;
 };
 
-/*
- *  NOTE: limit, divisor and flows are hardwired to code at the moment.
- *
- *	limit=flows=128, divisor=1024;
- *
- *	The only reason for this is efficiency, it is possible
- *	to change these parameters in compile time.
- */
-
 /* RED section */
 
 enum {
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index 843018154a5c..0a7964009e8c 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -66,16 +66,18 @@
 	SFQ is superior for this purpose.
 
 	IMPLEMENTATION:
-	This implementation limits maximal queue length to 128;
-	max mtu to 2^18-1; max 128 flows, number of hash buckets to 1024.
-	The only goal of this restrictions was that all data
-	fit into one 4K page on 32bit arches.
+	This implementation limits :
+	- maximal queue length per flow to 127 packets.
+	- max mtu to 2^18-1;
+	- max 65408 flows,
+	- number of hash buckets to 65536.
 
 	It is easy to increase these values, but not in flight.  */
 
-#define SFQ_DEPTH		128 /* max number of packets per flow */
-#define SFQ_SLOTS		128 /* max number of flows */
-#define SFQ_EMPTY_SLOT		255
+#define SFQ_MAX_DEPTH		127 /* max number of packets per flow */
+#define SFQ_DEFAULT_FLOWS	128
+#define SFQ_MAX_FLOWS		(0x10000 - SFQ_MAX_DEPTH - 1) /* max number of flows */
+#define SFQ_EMPTY_SLOT		0xffff
 #define SFQ_DEFAULT_HASH_DIVISOR 1024
 
 /* We use 16 bits to store allot, and want to handle packets up to 64K
@@ -84,13 +86,13 @@
 #define SFQ_ALLOT_SHIFT		3
 #define SFQ_ALLOT_SIZE(X)	DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT)
 
-/* This type should contain at least SFQ_DEPTH + SFQ_SLOTS values */
-typedef unsigned char sfq_index;
+/* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */
+typedef u16 sfq_index;
 
 /*
  * We dont use pointers to save space.
- * Small indexes [0 ... SFQ_SLOTS - 1] are 'pointers' to slots[] array
- * while following values [SFQ_SLOTS ... SFQ_SLOTS + SFQ_DEPTH - 1]
+ * Small indexes [0 ... SFQ_MAX_FLOWS - 1] are 'pointers' to slots[] array
+ * while following values [SFQ_MAX_FLOWS ... SFQ_MAX_FLOWS + SFQ_MAX_DEPTH]
  * are 'pointers' to dep[] array
  */
 struct sfq_head {
@@ -102,28 +104,38 @@ struct sfq_slot {
 	struct sk_buff	*skblist_next;
 	struct sk_buff	*skblist_prev;
 	sfq_index	qlen; /* number of skbs in skblist */
-	sfq_index	next; /* next slot in sfq chain */
+	sfq_index	next; /* next slot in sfq RR chain */
 	struct sfq_head dep; /* anchor in dep[] chains */
 	unsigned short	hash; /* hash value (index in ht[]) */
 	short		allot; /* credit for this slot */
 };
 
 struct sfq_sched_data {
-/* Parameters */
-	int		perturb_period;
-	unsigned int	quantum;	/* Allotment per round: MUST BE >= MTU */
-	int		limit;
+/* frequently used fields */
+	int		limit;		/* limit of total number of packets in this qdisc */
 	unsigned int	divisor;	/* number of slots in hash table */
-/* Variables */
-	struct tcf_proto *filter_list;
-	struct timer_list perturb_timer;
+	unsigned int	maxflows;	/* number of flows in flows array */
+	int		headdrop;
+	int		maxdepth;	/* limit of packets per flow */
+
 	u32		perturbation;
+	struct tcf_proto *filter_list;
 	sfq_index	cur_depth;	/* depth of longest slot */
 	unsigned short  scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */
 	struct sfq_slot *tail;		/* current slot in round */
-	sfq_index	*ht;		/* Hash table (divisor slots) */
-	struct sfq_slot	slots[SFQ_SLOTS];
-	struct sfq_head	dep[SFQ_DEPTH];	/* Linked list of slots, indexed by depth */
+	sfq_index	*ht;		/* Hash table ('divisor' slots) */
+	struct sfq_slot	*slots;		/* Flows table ('maxflows' entries) */
+
+	struct sfq_head	dep[SFQ_MAX_DEPTH + 1];
+					/* Linked lists of slots, indexed by depth
+					 * dep[0] : list of unused flows
+					 * dep[1] : list of flows with 1 packet
+					 * dep[X] : list of flows with X packets
+					 */
+
+	int		perturb_period;
+	unsigned int	quantum;	/* Allotment per round: MUST BE >= MTU */
+	struct timer_list perturb_timer;
 };
 
 /*
@@ -131,9 +143,9 @@ struct sfq_sched_data {
  */
 static inline struct sfq_head *sfq_dep_head(struct sfq_sched_data *q, sfq_index val)
 {
-	if (val < SFQ_SLOTS)
+	if (val < SFQ_MAX_FLOWS)
 		return &q->slots[val].dep;
-	return &q->dep[val - SFQ_SLOTS];
+	return &q->dep[val - SFQ_MAX_FLOWS];
 }
 
 /*
@@ -199,18 +211,19 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch,
 }
 
 /*
- * x : slot number [0 .. SFQ_SLOTS - 1]
+ * x : slot number [0 .. SFQ_MAX_FLOWS - 1]
  */
 static inline void sfq_link(struct sfq_sched_data *q, sfq_index x)
 {
 	sfq_index p, n;
-	int qlen = q->slots[x].qlen;
+	struct sfq_slot *slot = &q->slots[x];
+	int qlen = slot->qlen;
 
-	p = qlen + SFQ_SLOTS;
+	p = qlen + SFQ_MAX_FLOWS;
 	n = q->dep[qlen].next;
 
-	q->slots[x].dep.next = n;
-	q->slots[x].dep.prev = p;
+	slot->dep.next = n;
+	slot->dep.prev = p;
 
 	q->dep[qlen].next = x;		/* sfq_dep_head(q, p)->next = x */
 	sfq_dep_head(q, n)->prev = x;
@@ -275,6 +288,7 @@ static inline struct sk_buff *slot_dequeue_head(struct sfq_slot *slot)
 
 static inline void slot_queue_init(struct sfq_slot *slot)
 {
+	memset(slot, 0, sizeof(*slot));
 	slot->skblist_prev = slot->skblist_next = (struct sk_buff *)slot;
 }
 
@@ -305,7 +319,7 @@ static unsigned int sfq_drop(struct Qdisc *sch)
 		x = q->dep[d].next;
 		slot = &q->slots[x];
 drop:
-		skb = slot_dequeue_tail(slot);
+		skb = q->headdrop ? slot_dequeue_head(slot) : slot_dequeue_tail(slot);
 		len = qdisc_pkt_len(skb);
 		sfq_dec(q, x);
 		kfree_skb(skb);
@@ -349,16 +363,27 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	slot = &q->slots[x];
 	if (x == SFQ_EMPTY_SLOT) {
 		x = q->dep[0].next; /* get a free slot */
+		if (x >= SFQ_MAX_FLOWS)
+			return qdisc_drop(skb, sch);
 		q->ht[hash] = x;
 		slot = &q->slots[x];
 		slot->hash = hash;
 	}
 
-	/* If selected queue has length q->limit, do simple tail drop,
-	 * i.e. drop _this_ packet.
-	 */
-	if (slot->qlen >= q->limit)
-		return qdisc_drop(skb, sch);
+	if (slot->qlen >= q->maxdepth) {
+		struct sk_buff *head;
+
+		if (!q->headdrop)
+			return qdisc_drop(skb, sch);
+
+		head = slot_dequeue_head(slot);
+		sch->qstats.backlog -= qdisc_pkt_len(head);
+		qdisc_drop(head, sch);
+
+		sch->qstats.backlog += qdisc_pkt_len(skb);
+		slot_queue_add(slot, skb);
+		return NET_XMIT_CN;
+	}
 
 	sch->qstats.backlog += qdisc_pkt_len(skb);
 	slot_queue_add(slot, skb);
@@ -445,16 +470,18 @@ sfq_reset(struct Qdisc *sch)
  * We dont use sfq_dequeue()/sfq_enqueue() because we dont want to change
  * counters.
  */
-static void sfq_rehash(struct sfq_sched_data *q)
+static void sfq_rehash(struct Qdisc *sch)
 {
+	struct sfq_sched_data *q = qdisc_priv(sch);
 	struct sk_buff *skb;
 	int i;
 	struct sfq_slot *slot;
 	struct sk_buff_head list;
+	int dropped = 0;
 
 	__skb_queue_head_init(&list);
 
-	for (i = 0; i < SFQ_SLOTS; i++) {
+	for (i = 0; i < q->maxflows; i++) {
 		slot = &q->slots[i];
 		if (!slot->qlen)
 			continue;
@@ -474,10 +501,18 @@ static void sfq_rehash(struct sfq_sched_data *q)
 		slot = &q->slots[x];
 		if (x == SFQ_EMPTY_SLOT) {
 			x = q->dep[0].next; /* get a free slot */
+			if (x >= SFQ_MAX_FLOWS) {
+drop:				sch->qstats.backlog -= qdisc_pkt_len(skb);
+				kfree_skb(skb);
+				dropped++;
+				continue;
+			}
 			q->ht[hash] = x;
 			slot = &q->slots[x];
 			slot->hash = hash;
 		}
+		if (slot->qlen >= q->maxdepth)
+			goto drop;
 		slot_queue_add(slot, skb);
 		sfq_inc(q, x);
 		if (slot->qlen == 1) {		/* The flow is new */
@@ -491,6 +526,8 @@ static void sfq_rehash(struct sfq_sched_data *q)
 			slot->allot = q->scaled_quantum;
 		}
 	}
+	sch->q.qlen -= dropped;
+	qdisc_tree_decrease_qlen(sch, dropped);
 }
 
 static void sfq_perturbation(unsigned long arg)
@@ -502,7 +539,7 @@ static void sfq_perturbation(unsigned long arg)
 	spin_lock(root_lock);
 	q->perturbation = net_random();
 	if (!q->filter_list && q->tail)
-		sfq_rehash(q);
+		sfq_rehash(sch);
 	spin_unlock(root_lock);
 
 	if (q->perturb_period)
@@ -513,23 +550,39 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	struct tc_sfq_qopt *ctl = nla_data(opt);
+	struct tc_sfq_qopt_v1 *ctl_v1 = NULL;
 	unsigned int qlen;
 
 	if (opt->nla_len < nla_attr_size(sizeof(*ctl)))
 		return -EINVAL;
-
+	if (opt->nla_len >= nla_attr_size(sizeof(*ctl_v1)))
+		ctl_v1 = nla_data(opt);
 	if (ctl->divisor &&
 	    (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536))
 		return -EINVAL;
 
 	sch_tree_lock(sch);
-	q->quantum = ctl->quantum ? : psched_mtu(qdisc_dev(sch));
-	q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
+	if (ctl->quantum) {
+		q->quantum = ctl->quantum;
+		q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
+	}
 	q->perturb_period = ctl->perturb_period * HZ;
-	if (ctl->limit)
-		q->limit = min_t(u32, ctl->limit, SFQ_DEPTH - 1);
-	if (ctl->divisor)
+	if (ctl->flows)
+		q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS);
+	if (ctl->divisor) {
 		q->divisor = ctl->divisor;
+		q->maxflows = min_t(u32, q->maxflows, q->divisor);
+	}
+	if (ctl_v1) {
+		if (ctl_v1->depth)
+			q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH);
+		q->headdrop = ctl_v1->headdrop;
+	}
+	if (ctl->limit) {
+		q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows);
+		q->maxflows = min_t(u32, q->maxflows, q->limit);
+	}
+
 	qlen = sch->q.qlen;
 	while (sch->q.qlen > q->limit)
 		sfq_drop(sch);
@@ -571,6 +624,7 @@ static void sfq_destroy(struct Qdisc *sch)
 	q->perturb_period = 0;
 	del_timer_sync(&q->perturb_timer);
 	sfq_free(q->ht);
+	sfq_free(q->slots);
 }
 
 static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
@@ -582,15 +636,17 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 	q->perturb_timer.data = (unsigned long)sch;
 	init_timer_deferrable(&q->perturb_timer);
 
-	for (i = 0; i < SFQ_DEPTH; i++) {
-		q->dep[i].next = i + SFQ_SLOTS;
-		q->dep[i].prev = i + SFQ_SLOTS;
+	for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) {
+		q->dep[i].next = i + SFQ_MAX_FLOWS;
+		q->dep[i].prev = i + SFQ_MAX_FLOWS;
 	}
 
-	q->limit = SFQ_DEPTH - 1;
+	q->limit = SFQ_MAX_DEPTH;
+	q->maxdepth = SFQ_MAX_DEPTH;
 	q->cur_depth = 0;
 	q->tail = NULL;
 	q->divisor = SFQ_DEFAULT_HASH_DIVISOR;
+	q->maxflows = SFQ_DEFAULT_FLOWS;
 	q->quantum = psched_mtu(qdisc_dev(sch));
 	q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum);
 	q->perturb_period = 0;
@@ -603,14 +659,15 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
 	}
 
 	q->ht = sfq_alloc(sizeof(q->ht[0]) * q->divisor);
-	if (!q->ht) {
+	q->slots = sfq_alloc(sizeof(q->slots[0]) * q->maxflows);
+	if (!q->ht || !q->slots) {
 		sfq_destroy(sch);
 		return -ENOMEM;
 	}
 	for (i = 0; i < q->divisor; i++)
 		q->ht[i] = SFQ_EMPTY_SLOT;
 
-	for (i = 0; i < SFQ_SLOTS; i++) {
+	for (i = 0; i < q->maxflows; i++) {
 		slot_queue_init(&q->slots[i]);
 		sfq_link(q, i);
 	}
@@ -625,14 +682,16 @@ static int sfq_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct sfq_sched_data *q = qdisc_priv(sch);
 	unsigned char *b = skb_tail_pointer(skb);
-	struct tc_sfq_qopt opt;
-
-	opt.quantum = q->quantum;
-	opt.perturb_period = q->perturb_period / HZ;
-
-	opt.limit = q->limit;
-	opt.divisor = q->divisor;
-	opt.flows = q->limit;
+	struct tc_sfq_qopt_v1 opt;
+
+	memset(&opt, 0, sizeof(opt));
+	opt.v0.quantum	= q->quantum;
+	opt.v0.perturb_period = q->perturb_period / HZ;
+	opt.v0.limit	= q->limit;
+	opt.v0.divisor	= q->divisor;
+	opt.v0.flows	= q->maxflows;
+	opt.depth	= q->maxdepth;
+	opt.headdrop	= q->headdrop;
 
 	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
 
-- 
cgit v1.2.3


From 9f42f126154786e6e76df513004800c8c633f020 Mon Sep 17 00:00:00 2001
From: Ian Campbell <Ian.Campbell@citrix.com>
Date: Thu, 5 Jan 2012 07:13:39 +0000
Subject: net: pack skb_shared_info more efficiently

nr_frags can be 8 bits since 256 is plenty of fragments. This allows it to be
packed with tx_flags.

Also by moving ip6_frag_id and dataref (both 4 bytes) next to each other we can
avoid a hole between ip6_frag_id and frag_list on 64 bit systems.

Signed-off-by: Ian Campbell <ian.campbell@citrix.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f47f0c3939f2..50db9b04a552 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -242,15 +242,15 @@ struct ubuf_info {
  * the end of the header data, ie. at skb->end.
  */
 struct skb_shared_info {
-	unsigned short	nr_frags;
+	unsigned char	nr_frags;
+	__u8		tx_flags;
 	unsigned short	gso_size;
 	/* Warning: this field is not always filled in (UFO)! */
 	unsigned short	gso_segs;
 	unsigned short  gso_type;
-	__be32          ip6_frag_id;
-	__u8		tx_flags;
 	struct sk_buff	*frag_list;
 	struct skb_shared_hwtstamps hwtstamps;
+	__be32          ip6_frag_id;
 
 	/*
 	 * Warning : all fields before dataref are cleared in __alloc_skb()
-- 
cgit v1.2.3