From 2b4aa3cec4873005a0d5155395b34641584b3a4e Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 15 Sep 2015 20:04:07 -0500
Subject: net: Remove dev_queue_xmit_sk

A function with weird arguments that it will never use to accomdate a
netfilter callback prototype is absolutely in the core of the
networking stack.  Frankly it does not make sense and it causes a lot
of confusion as to why arguments that are never used are being passed
to the function.

As I am preparing to make a second change to arguments to the okfn even
the names stops making sense.

As I have removed the two callers of this function remove this confusion
from the networking stack.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux/netdevice.h')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 88a00694eda5..e664f87c8e4c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2213,11 +2213,7 @@ int dev_close(struct net_device *dev);
 int dev_close_many(struct list_head *head, bool unlink);
 void dev_disable_lro(struct net_device *dev);
 int dev_loopback_xmit(struct sock *sk, struct sk_buff *newskb);
-int dev_queue_xmit_sk(struct sock *sk, struct sk_buff *skb);
-static inline int dev_queue_xmit(struct sk_buff *skb)
-{
-	return dev_queue_xmit_sk(skb->sk, skb);
-}
+int dev_queue_xmit(struct sk_buff *skb);
 int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv);
 int register_netdevice(struct net_device *dev);
 void unregister_netdevice_queue(struct net_device *dev, struct list_head *head);
-- 
cgit v1.2.3


From 04eb44890e5bb3cc855e5c0f18a05eb7311364b7 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 15 Sep 2015 20:04:15 -0500
Subject: bridge: Add br_netif_receive_skb remove netif_receive_skb_sk

netif_receive_skb_sk is only called once in the bridge code, replace
it with a bridge specific function that calls netif_receive_skb.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 6 +-----
 net/bridge/br_input.c     | 7 ++++++-
 net/core/dev.c            | 4 ++--
 3 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'include/linux/netdevice.h')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e664f87c8e4c..97ab5c9a7069 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2985,11 +2985,7 @@ static inline void dev_consume_skb_any(struct sk_buff *skb)
 
 int netif_rx(struct sk_buff *skb);
 int netif_rx_ni(struct sk_buff *skb);
-int netif_receive_skb_sk(struct sock *sk, struct sk_buff *skb);
-static inline int netif_receive_skb(struct sk_buff *skb)
-{
-	return netif_receive_skb_sk(skb->sk, skb);
-}
+int netif_receive_skb(struct sk_buff *skb);
 gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
 void napi_gro_flush(struct napi_struct *napi, bool flush_old);
 struct sk_buff *napi_get_frags(struct napi_struct *napi);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index f921a5dce22d..2359c041e27c 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -26,6 +26,11 @@
 br_should_route_hook_t __rcu *br_should_route_hook __read_mostly;
 EXPORT_SYMBOL(br_should_route_hook);
 
+static int br_netif_receive_skb(struct sock *sk, struct sk_buff *skb)
+{
+	return netif_receive_skb(skb);
+}
+
 static int br_pass_frame_up(struct sk_buff *skb)
 {
 	struct net_device *indev, *brdev = BR_INPUT_SKB_CB(skb)->brdev;
@@ -57,7 +62,7 @@ static int br_pass_frame_up(struct sk_buff *skb)
 
 	return NF_HOOK(NFPROTO_BRIDGE, NF_BR_LOCAL_IN, NULL, skb,
 		       indev, NULL,
-		       netif_receive_skb_sk);
+		       br_netif_receive_skb);
 }
 
 static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
diff --git a/net/core/dev.c b/net/core/dev.c
index dcf9ff913925..7db9b012dfb7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3982,13 +3982,13 @@ static int netif_receive_skb_internal(struct sk_buff *skb)
  *	NET_RX_SUCCESS: no congestion
  *	NET_RX_DROP: packet was dropped
  */
-int netif_receive_skb_sk(struct sock *sk, struct sk_buff *skb)
+int netif_receive_skb(struct sk_buff *skb)
 {
 	trace_netif_receive_skb_entry(skb);
 
 	return netif_receive_skb_internal(skb);
 }
-EXPORT_SYMBOL(netif_receive_skb_sk);
+EXPORT_SYMBOL(netif_receive_skb);
 
 /* Network device is going away, flush any packets still pending
  * Called with irqs disabled.
-- 
cgit v1.2.3


From 0c4b51f0054ce85c0ec578ab818f0631834573eb Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 15 Sep 2015 20:04:18 -0500
Subject: netfilter: Pass net into okfn

This is immediately motivated by the bridge code that chains functions that
call into netfilter.  Without passing net into the okfns the bridge code would
need to guess about the best expression for the network namespace to process
packets in.

As net is frequently one of the first things computed in continuation functions
after netfilter has done it's job passing in the desired network namespace is in
many cases a code simplification.

To support this change the function dst_output_okfn is introduced to
simplify passing dst_output as an okfn.  For the moment dst_output_okfn
just silently drops the struct net.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vrf.c                    |  2 +-
 include/linux/netdevice.h            |  2 +-
 include/linux/netfilter.h            | 26 ++++++++++++++------------
 include/linux/netfilter_bridge.h     |  2 +-
 include/net/dn_neigh.h               |  6 +++---
 include/net/dst.h                    |  4 ++++
 include/net/ipv6.h                   |  2 +-
 include/net/netfilter/br_netfilter.h |  2 +-
 net/bridge/br_forward.c              |  5 ++---
 net/bridge/br_input.c                |  7 ++++---
 net/bridge/br_netfilter_hooks.c      | 21 +++++++++------------
 net/bridge/br_netfilter_ipv6.c       |  3 +--
 net/bridge/br_private.h              |  6 +++---
 net/bridge/br_stp_bpdu.c             |  3 ++-
 net/core/dev.c                       |  4 +++-
 net/decnet/dn_neigh.c                |  8 ++++----
 net/decnet/dn_nsp_in.c               |  3 ++-
 net/decnet/dn_route.c                |  6 +++---
 net/ipv4/arp.c                       |  7 +++----
 net/ipv4/ip_forward.c                |  3 +--
 net/ipv4/ip_input.c                  |  7 ++-----
 net/ipv4/ip_output.c                 |  4 ++--
 net/ipv4/ipmr.c                      |  4 ++--
 net/ipv4/raw.c                       |  2 +-
 net/ipv4/xfrm4_input.c               |  3 ++-
 net/ipv4/xfrm4_output.c              |  2 +-
 net/ipv6/ip6_input.c                 |  5 ++---
 net/ipv6/ip6_output.c                |  7 ++++---
 net/ipv6/ip6mr.c                     |  3 +--
 net/ipv6/mcast.c                     |  4 ++--
 net/ipv6/ndisc.c                     |  2 +-
 net/ipv6/output_core.c               |  2 +-
 net/ipv6/raw.c                       |  2 +-
 net/ipv6/xfrm6_output.c              |  2 +-
 net/netfilter/ipvs/ip_vs_xmit.c      |  4 ++--
 net/netfilter/nf_queue.c             |  2 +-
 net/xfrm/xfrm_output.c               | 12 ++++++------
 37 files changed, 95 insertions(+), 94 deletions(-)

(limited to 'include/linux/netdevice.h')

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 979a4db9c6bc..637e9fd1e14c 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -253,7 +253,7 @@ static netdev_tx_t vrf_xmit(struct sk_buff *skb, struct net_device *dev)
 }
 
 /* modelled after ip_finish_output2 */
-static int vrf_finish_output(struct sock *sk, struct sk_buff *skb)
+static int vrf_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
 	struct rtable *rt = (struct rtable *)dst;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 97ab5c9a7069..b791405958b4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2212,7 +2212,7 @@ int dev_open(struct net_device *dev);
 int dev_close(struct net_device *dev);
 int dev_close_many(struct list_head *head, bool unlink);
 void dev_disable_lro(struct net_device *dev);
-int dev_loopback_xmit(struct sock *sk, struct sk_buff *newskb);
+int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb);
 int dev_queue_xmit(struct sk_buff *skb);
 int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv);
 int register_netdevice(struct net_device *dev);
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 295f2650b5dc..0b4d4560f33d 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -56,7 +56,7 @@ struct nf_hook_state {
 	struct sock *sk;
 	struct net *net;
 	struct list_head *hook_list;
-	int (*okfn)(struct sock *, struct sk_buff *);
+	int (*okfn)(struct net *, struct sock *, struct sk_buff *);
 };
 
 static inline void nf_hook_state_init(struct nf_hook_state *p,
@@ -67,7 +67,7 @@ static inline void nf_hook_state_init(struct nf_hook_state *p,
 				      struct net_device *outdev,
 				      struct sock *sk,
 				      struct net *net,
-				      int (*okfn)(struct sock *, struct sk_buff *))
+				      int (*okfn)(struct net *, struct sock *, struct sk_buff *))
 {
 	p->hook = hook;
 	p->thresh = thresh;
@@ -175,7 +175,7 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
 				 struct sk_buff *skb,
 				 struct net_device *indev,
 				 struct net_device *outdev,
-				 int (*okfn)(struct sock *, struct sk_buff *),
+				 int (*okfn)(struct net *, struct sock *, struct sk_buff *),
 				 int thresh)
 {
 	struct list_head *hook_list = &net->nf.hooks[pf][hook];
@@ -193,7 +193,7 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook,
 static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
 			  struct sock *sk, struct sk_buff *skb,
 			  struct net_device *indev, struct net_device *outdev,
-			  int (*okfn)(struct sock *, struct sk_buff *))
+			  int (*okfn)(struct net *, struct sock *, struct sk_buff *))
 {
 	return nf_hook_thresh(pf, hook, net, sk, skb, indev, outdev, okfn, INT_MIN);
 }
@@ -219,31 +219,33 @@ static inline int
 NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
 	       struct sk_buff *skb, struct net_device *in,
 	       struct net_device *out,
-	       int (*okfn)(struct sock *, struct sk_buff *), int thresh)
+	       int (*okfn)(struct net *, struct sock *, struct sk_buff *),
+	       int thresh)
 {
 	int ret = nf_hook_thresh(pf, hook, net, sk, skb, in, out, okfn, thresh);
 	if (ret == 1)
-		ret = okfn(sk, skb);
+		ret = okfn(net, sk, skb);
 	return ret;
 }
 
 static inline int
 NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
 	     struct sk_buff *skb, struct net_device *in, struct net_device *out,
-	     int (*okfn)(struct sock *, struct sk_buff *), bool cond)
+	     int (*okfn)(struct net *, struct sock *, struct sk_buff *),
+	     bool cond)
 {
 	int ret;
 
 	if (!cond ||
 	    ((ret = nf_hook_thresh(pf, hook, net, sk, skb, in, out, okfn, INT_MIN)) == 1))
-		ret = okfn(sk, skb);
+		ret = okfn(net, sk, skb);
 	return ret;
 }
 
 static inline int
 NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb,
 	struct net_device *in, struct net_device *out,
-	int (*okfn)(struct sock *, struct sk_buff *))
+	int (*okfn)(struct net *, struct sock *, struct sk_buff *))
 {
 	return NF_HOOK_THRESH(pf, hook, net, sk, skb, in, out, okfn, INT_MIN);
 }
@@ -345,12 +347,12 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family)
 }
 
 #else /* !CONFIG_NETFILTER */
-#define NF_HOOK(pf, hook, net, sk, skb, indev, outdev, okfn) (okfn)(sk, skb)
-#define NF_HOOK_COND(pf, hook, net, sk, skb, indev, outdev, okfn, cond) (okfn)(sk, skb)
+#define NF_HOOK(pf, hook, net, sk, skb, indev, outdev, okfn) (okfn)(net, sk, skb)
+#define NF_HOOK_COND(pf, hook, net, sk, skb, indev, outdev, okfn, cond) (okfn)(net, sk, skb)
 static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
 			  struct sock *sk, struct sk_buff *skb,
 			  struct net_device *indev, struct net_device *outdev,
-			  int (*okfn)(struct sock *, struct sk_buff *))
+			  int (*okfn)(struct net *, struct sock *, struct sk_buff *))
 {
 	return 1;
 }
diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index 2437b8a5d7a9..2ed40c402b5e 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -17,7 +17,7 @@ enum nf_br_hook_priorities {
 
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 
-int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb);
+int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
 
 static inline void br_drop_fake_rtable(struct sk_buff *skb)
 {
diff --git a/include/net/dn_neigh.h b/include/net/dn_neigh.h
index d0424269313f..5e902fc3f4eb 100644
--- a/include/net/dn_neigh.h
+++ b/include/net/dn_neigh.h
@@ -18,11 +18,11 @@ struct dn_neigh {
 
 void dn_neigh_init(void);
 void dn_neigh_cleanup(void);
-int dn_neigh_router_hello(struct sock *sk, struct sk_buff *skb);
-int dn_neigh_endnode_hello(struct sock *sk, struct sk_buff *skb);
+int dn_neigh_router_hello(struct net *net, struct sock *sk, struct sk_buff *skb);
+int dn_neigh_endnode_hello(struct net *net, struct sock *sk, struct sk_buff *skb);
 void dn_neigh_pointopoint_hello(struct sk_buff *skb);
 int dn_neigh_elist(struct net_device *dev, unsigned char *ptr, int n);
-int dn_to_neigh_output(struct sock *sk, struct sk_buff *skb);
+int dn_to_neigh_output(struct net *net, struct sock *sk, struct sk_buff *skb);
 
 extern struct neigh_table dn_neigh_table;
 
diff --git a/include/net/dst.h b/include/net/dst.h
index c72e58474e52..df0481a07029 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -458,6 +458,10 @@ static inline int dst_output(struct sock *sk, struct sk_buff *skb)
 {
 	return skb_dst(skb)->output(sk, skb);
 }
+static inline int dst_output_okfn(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+	return dst_output(sk, skb);
+}
 
 /* Input packet from network to transport.  */
 static inline int dst_input(struct sk_buff *skb)
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 711cca428cc8..384a93cf07d6 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -807,7 +807,7 @@ static inline u8 ip6_tclass(__be32 flowinfo)
 int ipv6_rcv(struct sk_buff *skb, struct net_device *dev,
 	     struct packet_type *pt, struct net_device *orig_dev);
 
-int ip6_rcv_finish(struct sock *sk, struct sk_buff *skb);
+int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
 
 /*
  *	upper-layer output functions
diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h
index d4c6b5f30acd..8fe266504900 100644
--- a/include/net/netfilter/br_netfilter.h
+++ b/include/net/netfilter/br_netfilter.h
@@ -31,7 +31,7 @@ static inline void nf_bridge_push_encap_header(struct sk_buff *skb)
 	skb->network_header -= len;
 }
 
-int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb);
+int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_buff *skb);
 
 static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
 {
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 2dd2a23ce707..48afca729ed7 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -35,7 +35,7 @@ static inline int should_deliver(const struct net_bridge_port *p,
 		p->state == BR_STATE_FORWARDING;
 }
 
-int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb)
+int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	if (!is_skb_forwardable(skb->dev, skb))
 		goto drop;
@@ -65,9 +65,8 @@ drop:
 }
 EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit);
 
-int br_forward_finish(struct sock *sk, struct sk_buff *skb)
+int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	struct net *net = dev_net(skb->dev);
 	return NF_HOOK(NFPROTO_BRIDGE, NF_BR_POST_ROUTING,
 		       net, sk, skb, NULL, skb->dev,
 		       br_dev_queue_push_xmit);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 78fa7acd836e..223f4040d9df 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -26,7 +26,8 @@
 br_should_route_hook_t __rcu *br_should_route_hook __read_mostly;
 EXPORT_SYMBOL(br_should_route_hook);
 
-static int br_netif_receive_skb(struct sock *sk, struct sk_buff *skb)
+static int
+br_netif_receive_skb(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	return netif_receive_skb(skb);
 }
@@ -125,7 +126,7 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
 }
 
 /* note: already called with rcu_read_lock */
-int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb)
+int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	const unsigned char *dest = eth_hdr(skb)->h_dest;
 	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
@@ -213,7 +214,7 @@ drop:
 EXPORT_SYMBOL_GPL(br_handle_frame_finish);
 
 /* note: already called with rcu_read_lock */
-static int br_handle_local_finish(struct sock *sk, struct sk_buff *skb)
+static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct net_bridge_port *p = br_port_get_rcu(skb->dev);
 	u16 vid = 0;
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 7886c9d7e23d..e6e76bbdc82f 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -256,7 +256,7 @@ void nf_bridge_update_protocol(struct sk_buff *skb)
  * don't, we use the neighbour framework to find out. In both cases, we make
  * sure that br_handle_frame_finish() is called afterwards.
  */
-int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb)
+int br_nf_pre_routing_finish_bridge(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct neighbour *neigh;
 	struct dst_entry *dst;
@@ -273,7 +273,7 @@ int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb)
 		if (neigh->hh.hh_len) {
 			neigh_hh_bridge(&neigh->hh, skb);
 			skb->dev = nf_bridge->physindev;
-			ret = br_handle_frame_finish(sk, skb);
+			ret = br_handle_frame_finish(net, sk, skb);
 		} else {
 			/* the neighbour function below overwrites the complete
 			 * MAC header, so we save the Ethernet source address and
@@ -342,11 +342,10 @@ br_nf_ipv4_daddr_was_changed(const struct sk_buff *skb,
  * device, we proceed as if ip_route_input() succeeded. If it differs from the
  * logical bridge port or if ip_route_output_key() fails we drop the packet.
  */
-static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb)
+static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
 	struct iphdr *iph = ip_hdr(skb);
-	struct net *net = dev_net(dev);
 	struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
 	struct rtable *rt;
 	int err;
@@ -536,10 +535,9 @@ static unsigned int br_nf_local_in(const struct nf_hook_ops *ops,
 }
 
 /* PF_BRIDGE/FORWARD *************************************************/
-static int br_nf_forward_finish(struct sock *sk, struct sk_buff *skb)
+static int br_nf_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
-	struct net *net = dev_net(skb->dev);
 	struct net_device *in;
 
 	if (!IS_ARP(skb) && !IS_VLAN_ARP(skb)) {
@@ -692,7 +690,7 @@ static int br_nf_push_frag_xmit(struct net *net, struct sock *sk, struct sk_buff
 	__skb_push(skb, data->encap_size);
 
 	nf_bridge_info_free(skb);
-	return br_dev_queue_push_xmit(sk, skb);
+	return br_dev_queue_push_xmit(net, sk, skb);
 }
 static int br_nf_push_frag_xmit_sk(struct sock *sk, struct sk_buff *skb)
 {
@@ -728,17 +726,16 @@ static unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
 	return 0;
 }
 
-static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
+static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct nf_bridge_info *nf_bridge;
 	unsigned int mtu_reserved;
-	struct net *net = dev_net(skb_dst(skb)->dev);
 
 	mtu_reserved = nf_bridge_mtu_reduction(skb);
 
 	if (skb_is_gso(skb) || skb->len + mtu_reserved <= skb->dev->mtu) {
 		nf_bridge_info_free(skb);
-		return br_dev_queue_push_xmit(sk, skb);
+		return br_dev_queue_push_xmit(net, sk, skb);
 	}
 
 	nf_bridge = nf_bridge_info_get(skb);
@@ -797,7 +794,7 @@ static int br_nf_dev_queue_xmit(struct sock *sk, struct sk_buff *skb)
 	}
 #endif
 	nf_bridge_info_free(skb);
-	return br_dev_queue_push_xmit(sk, skb);
+	return br_dev_queue_push_xmit(net, sk, skb);
  drop:
 	kfree_skb(skb);
 	return 0;
@@ -887,7 +884,7 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
 	skb->dev = nf_bridge->physindev;
 
 	nf_bridge->physoutdev = NULL;
-	br_handle_frame_finish(NULL, skb);
+	br_handle_frame_finish(dev_net(skb->dev), NULL, skb);
 }
 
 static int br_nf_dev_xmit(struct sk_buff *skb)
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index 5d19361ad5d3..e4dbbe44c724 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -161,12 +161,11 @@ br_nf_ipv6_daddr_was_changed(const struct sk_buff *skb,
  * for br_nf_pre_routing_finish(), same logic is used here but
  * equivalent IPv6 function ip6_route_input() called indirectly.
  */
-static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
+static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
 	struct rtable *rt;
 	struct net_device *dev = skb->dev;
-	struct net *net = dev_net(dev);
 	const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops();
 
 	nf_bridge->frag_max_size = IP6CB(skb)->frag_max_size;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 213baf7aaa93..74e99c75c8e4 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -413,10 +413,10 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
 
 /* br_forward.c */
 void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb);
-int br_dev_queue_push_xmit(struct sock *sk, struct sk_buff *skb);
+int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb);
 void br_forward(const struct net_bridge_port *to,
 		struct sk_buff *skb, struct sk_buff *skb0);
-int br_forward_finish(struct sock *sk, struct sk_buff *skb);
+int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
 void br_flood_deliver(struct net_bridge *br, struct sk_buff *skb, bool unicast);
 void br_flood_forward(struct net_bridge *br, struct sk_buff *skb,
 		      struct sk_buff *skb2, bool unicast);
@@ -434,7 +434,7 @@ void br_port_flags_change(struct net_bridge_port *port, unsigned long mask);
 void br_manage_promisc(struct net_bridge *br);
 
 /* br_input.c */
-int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb);
+int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
 rx_handler_result_t br_handle_frame(struct sk_buff **pskb);
 
 static inline bool br_rx_handler_check_rcu(const struct net_device *dev)
diff --git a/net/bridge/br_stp_bpdu.c b/net/bridge/br_stp_bpdu.c
index 8e2e8c352198..5881fbc114a9 100644
--- a/net/bridge/br_stp_bpdu.c
+++ b/net/bridge/br_stp_bpdu.c
@@ -30,7 +30,8 @@
 
 #define LLC_RESERVE sizeof(struct llc_pdu_un)
 
-static int br_send_bpdu_finish(struct sock *sk, struct sk_buff *skb)
+static int br_send_bpdu_finish(struct net *net, struct sock *sk,
+			       struct sk_buff *skb)
 {
 	return dev_queue_xmit(skb);
 }
diff --git a/net/core/dev.c b/net/core/dev.c
index 7db9b012dfb7..00dccfac8939 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2915,9 +2915,11 @@ EXPORT_SYMBOL(xmit_recursion);
 
 /**
  *	dev_loopback_xmit - loop back @skb
+ *	@net: network namespace this loopback is happening in
+ *	@sk:  sk needed to be a netfilter okfn
  *	@skb: buffer to transmit
  */
-int dev_loopback_xmit(struct sock *sk, struct sk_buff *skb)
+int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	skb_reset_mac_header(skb);
 	__skb_pull(skb, skb_network_offset(skb));
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index 305ab2fe25cd..482730cd8a56 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -194,7 +194,7 @@ static int dn_neigh_output(struct neighbour *neigh, struct sk_buff *skb)
 	return err;
 }
 
-static int dn_neigh_output_packet(struct sock *sk, struct sk_buff *skb)
+static int dn_neigh_output_packet(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
 	struct dn_route *rt = (struct dn_route *)dst;
@@ -334,7 +334,7 @@ static int dn_phase3_output(struct neighbour *neigh, struct sock *sk,
 		       dn_neigh_output_packet);
 }
 
-int dn_to_neigh_output(struct sock *sk, struct sk_buff *skb)
+int dn_to_neigh_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
 	struct dn_route *rt = (struct dn_route *) dst;
@@ -378,7 +378,7 @@ void dn_neigh_pointopoint_hello(struct sk_buff *skb)
 /*
  * Ethernet router hello message received
  */
-int dn_neigh_router_hello(struct sock *sk, struct sk_buff *skb)
+int dn_neigh_router_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct rtnode_hello_message *msg = (struct rtnode_hello_message *)skb->data;
 
@@ -440,7 +440,7 @@ int dn_neigh_router_hello(struct sock *sk, struct sk_buff *skb)
 /*
  * Endnode hello message received
  */
-int dn_neigh_endnode_hello(struct sock *sk, struct sk_buff *skb)
+int dn_neigh_endnode_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct endnode_hello_message *msg = (struct endnode_hello_message *)skb->data;
 	struct neighbour *neigh;
diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c
index e7b0605ca34a..7ac086d5c0c0 100644
--- a/net/decnet/dn_nsp_in.c
+++ b/net/decnet/dn_nsp_in.c
@@ -714,7 +714,8 @@ out:
 	return ret;
 }
 
-static int dn_nsp_rx_packet(struct sock *sk2, struct sk_buff *skb)
+static int dn_nsp_rx_packet(struct net *net, struct sock *sk2,
+			    struct sk_buff *skb)
 {
 	struct dn_skb_cb *cb = DN_SKB_CB(skb);
 	struct sock *sk = NULL;
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index fefcd2e85ef9..e930321e2c1d 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -512,7 +512,7 @@ static int dn_return_long(struct sk_buff *skb)
  *
  * Returns: result of input function if route is found, error code otherwise
  */
-static int dn_route_rx_packet(struct sock *sk, struct sk_buff *skb)
+static int dn_route_rx_packet(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct dn_skb_cb *cb;
 	int err;
@@ -610,7 +610,7 @@ drop_it:
 	return NET_RX_DROP;
 }
 
-static int dn_route_discard(struct sock *sk, struct sk_buff *skb)
+static int dn_route_discard(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	/*
 	 * I know we drop the packet here, but thats considered success in
@@ -620,7 +620,7 @@ static int dn_route_discard(struct sock *sk, struct sk_buff *skb)
 	return NET_RX_SUCCESS;
 }
 
-static int dn_route_ptp_hello(struct sock *sk, struct sk_buff *skb)
+static int dn_route_ptp_hello(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	dn_dev_hello(skb);
 	dn_neigh_pointopoint_hello(skb);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index ae71e9ade5f9..61ff5ea31283 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -621,7 +621,7 @@ out:
 }
 EXPORT_SYMBOL(arp_create);
 
-static int arp_xmit_finish(struct sock *sk, struct sk_buff *skb)
+static int arp_xmit_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	return dev_queue_xmit(skb);
 }
@@ -642,7 +642,7 @@ EXPORT_SYMBOL(arp_xmit);
  *	Process an arp request.
  */
 
-static int arp_process(struct sock *sk, struct sk_buff *skb)
+static int arp_process(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
 	struct in_device *in_dev = __in_dev_get_rcu(dev);
@@ -654,7 +654,6 @@ static int arp_process(struct sock *sk, struct sk_buff *skb)
 	u16 dev_type = dev->type;
 	int addr_type;
 	struct neighbour *n;
-	struct net *net = dev_net(dev);
 	bool is_garp = false;
 
 	/* arp_rcv below verifies the ARP header and verifies the device
@@ -865,7 +864,7 @@ out:
 
 static void parp_redo(struct sk_buff *skb)
 {
-	arp_process(NULL, skb);
+	arp_process(dev_net(skb->dev), NULL, skb);
 }
 
 
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 0a3c45a2e757..d66cfb35ba74 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -61,9 +61,8 @@ static bool ip_exceeds_mtu(const struct sk_buff *skb, unsigned int mtu)
 }
 
 
-static int ip_forward_finish(struct sock *sk, struct sk_buff *skb)
+static int ip_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	struct net *net = dev_net(skb_dst(skb)->dev);
 	struct ip_options *opt	= &(IPCB(skb)->opt);
 
 	IP_INC_STATS_BH(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 991d082c7312..7cc9f7bb7fb7 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -188,10 +188,8 @@ bool ip_call_ra_chain(struct sk_buff *skb)
 	return false;
 }
 
-static int ip_local_deliver_finish(struct sock *sk, struct sk_buff *skb)
+static int ip_local_deliver_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	struct net *net = dev_net(skb->dev);
-
 	__skb_pull(skb, skb_network_header_len(skb));
 
 	rcu_read_lock();
@@ -311,10 +309,9 @@ drop:
 int sysctl_ip_early_demux __read_mostly = 1;
 EXPORT_SYMBOL(sysctl_ip_early_demux);
 
-static int ip_rcv_finish(struct sock *sk, struct sk_buff *skb)
+static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	const struct iphdr *iph = ip_hdr(skb);
-	struct net *net = dev_net(skb->dev);
 	struct rtable *rt;
 
 	if (sysctl_ip_early_demux && !skb_dst(skb) && !skb->sk) {
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 4c9532259a7f..09a6b7bb7ea3 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -104,7 +104,7 @@ static int __ip_local_out_sk(struct sock *sk, struct sk_buff *skb)
 	ip_send_check(iph);
 	return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
 		       net, sk, skb, NULL, skb_dst(skb)->dev,
-		       dst_output);
+		       dst_output_okfn);
 }
 
 int __ip_local_out(struct sk_buff *skb)
@@ -266,7 +266,7 @@ static int ip_finish_output_gso(struct sock *sk, struct sk_buff *skb,
 	return ret;
 }
 
-static int ip_finish_output(struct sock *sk, struct sk_buff *skb)
+static int ip_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	unsigned int mtu;
 
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index a88c0c5374ff..cfcb996ec51b 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1678,10 +1678,10 @@ static void ip_encap(struct net *net, struct sk_buff *skb,
 	nf_reset(skb);
 }
 
-static inline int ipmr_forward_finish(struct sock *sk, struct sk_buff *skb)
+static inline int ipmr_forward_finish(struct net *net, struct sock *sk,
+				      struct sk_buff *skb)
 {
 	struct ip_options *opt = &(IPCB(skb)->opt);
-	struct net *net = dev_net(skb_dst(skb)->dev);
 
 	IP_INC_STATS_BH(net, IPSTATS_MIB_OUTFORWDATAGRAMS);
 	IP_ADD_STATS_BH(net, IPSTATS_MIB_OUTOCTETS, skb->len);
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 2045b1aaa6ef..28ef8a913130 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -413,7 +413,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 
 	err = NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_OUT,
 		      net, sk, skb, NULL, rt->dst.dev,
-		      dst_output);
+		      dst_output_okfn);
 	if (err > 0)
 		err = net_xmit_errno(err);
 	if (err)
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index 5093000d3d5e..62e1e72db461 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -22,7 +22,8 @@ int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb)
 	return xfrm4_extract_header(skb);
 }
 
-static inline int xfrm4_rcv_encap_finish(struct sock *sk, struct sk_buff *skb)
+static inline int xfrm4_rcv_encap_finish(struct net *net, struct sock *sk,
+					 struct sk_buff *skb)
 {
 	if (!skb_dst(skb)) {
 		const struct iphdr *iph = ip_hdr(skb);
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index e4a85199e015..28ae2048b93a 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -80,7 +80,7 @@ int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb)
 	return xfrm_output(sk, skb);
 }
 
-static int __xfrm4_output(struct sock *sk, struct sk_buff *skb)
+static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct xfrm_state *x = skb_dst(skb)->xfrm;
 
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index 583cf959c23d..9075acf081dd 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -47,7 +47,7 @@
 #include <net/inet_ecn.h>
 #include <net/dst_metadata.h>
 
-int ip6_rcv_finish(struct sock *sk, struct sk_buff *skb)
+int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
 		const struct inet6_protocol *ipprot;
@@ -199,9 +199,8 @@ drop:
  */
 
 
-static int ip6_input_finish(struct sock *sk, struct sk_buff *skb)
+static int ip6_input_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	struct net *net = dev_net(skb_dst(skb)->dev);
 	const struct inet6_protocol *ipprot;
 	struct inet6_dev *idev;
 	unsigned int nhoff;
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 96e76ddd4a44..d8d68e81d123 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -121,7 +121,7 @@ static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb)
 	return -EINVAL;
 }
 
-static int ip6_finish_output(struct sock *sk, struct sk_buff *skb)
+static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 	    dst_allfrag(skb_dst(skb)) ||
@@ -225,7 +225,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
 			      IPSTATS_MIB_OUT, skb->len);
 		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
 			       net, sk, skb, NULL, dst->dev,
-			       dst_output);
+			       dst_output_okfn);
 	}
 
 	skb->dev = dst->dev;
@@ -317,7 +317,8 @@ static int ip6_forward_proxy_check(struct sk_buff *skb)
 	return 0;
 }
 
-static inline int ip6_forward_finish(struct sock *sk, struct sk_buff *skb)
+static inline int ip6_forward_finish(struct net *net, struct sock *sk,
+				     struct sk_buff *skb)
 {
 	skb_sender_cpu_clear(skb);
 	return dst_output(sk, skb);
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index e830942b2090..5e5d16e7ce85 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -1985,9 +1985,8 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg)
 }
 #endif
 
-static inline int ip6mr_forward2_finish(struct sock *sk, struct sk_buff *skb)
+static inline int ip6mr_forward2_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	struct net *net = dev_net(skb_dst(skb)->dev);
 	IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
 			 IPSTATS_MIB_OUTFORWDATAGRAMS);
 	IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)),
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 124338a39e29..a8bf57ca74d3 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -1646,7 +1646,7 @@ static void mld_sendpack(struct sk_buff *skb)
 
 	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
 		      net, net->ipv6.igmp_sk, skb, NULL, skb->dev,
-		      dst_output);
+		      dst_output_okfn);
 out:
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT);
@@ -2010,7 +2010,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
 	skb_dst_set(skb, dst);
 	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
 		      net, sk, skb, NULL, skb->dev,
-		      dst_output);
+		      dst_output_okfn);
 out:
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS(net, idev, type);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index dd2b08d7c8d1..dde5a1e5875a 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -465,7 +465,7 @@ static void ndisc_send_skb(struct sk_buff *skb,
 
 	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
 		      net, sk, skb, NULL, dst->dev,
-		      dst_output);
+		      dst_output_okfn);
 	if (!err) {
 		ICMP6MSGOUT_INC_STATS(net, idev, type);
 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 9cc9127fb5e7..e77102c4f804 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -151,7 +151,7 @@ static int __ip6_local_out_sk(struct sock *sk, struct sk_buff *skb)
 
 	return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
 		       net, sk, skb, NULL, skb_dst(skb)->dev,
-		       dst_output);
+		       dst_output_okfn);
 }
 
 int __ip6_local_out(struct sk_buff *skb)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index dc65ec198f7c..fec0151522a2 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -655,7 +655,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
 
 	IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
 	err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
-		      NULL, rt->dst.dev, dst_output);
+		      NULL, rt->dst.dev, dst_output_okfn);
 	if (err > 0)
 		err = net_xmit_errno(err);
 	if (err)
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 431ae2c22234..68a996f8a044 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -131,7 +131,7 @@ int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb)
 	return xfrm_output(sk, skb);
 }
 
-static int __xfrm6_output(struct sock *sk, struct sk_buff *skb)
+static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct dst_entry *dst = skb_dst(skb);
 	struct xfrm_state *x = dst->xfrm;
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 65c996c14bca..cc7299033af8 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -574,7 +574,7 @@ static inline int ip_vs_nat_send_or_cont(int pf, struct sk_buff *skb,
 		if (!skb->sk)
 			skb_sender_cpu_clear(skb);
 		NF_HOOK(pf, NF_INET_LOCAL_OUT, ip_vs_conn_net(cp), NULL, skb,
-			NULL, skb_dst(skb)->dev, dst_output);
+			NULL, skb_dst(skb)->dev, dst_output_okfn);
 	} else
 		ret = NF_ACCEPT;
 
@@ -596,7 +596,7 @@ static inline int ip_vs_send_or_cont(int pf, struct sk_buff *skb,
 		if (!skb->sk)
 			skb_sender_cpu_clear(skb);
 		NF_HOOK(pf, NF_INET_LOCAL_OUT, ip_vs_conn_net(cp), NULL, skb,
-			NULL, skb_dst(skb)->dev, dst_output);
+			NULL, skb_dst(skb)->dev, dst_output_okfn);
 	} else
 		ret = NF_ACCEPT;
 	return ret;
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 96777f9a9350..9f3c3c25fa73 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -215,7 +215,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
 	case NF_ACCEPT:
 	case NF_STOP:
 		local_bh_disable();
-		entry->state.okfn(entry->state.sk, skb);
+		entry->state.okfn(entry->state.net, entry->state.sk, skb);
 		local_bh_enable();
 		break;
 	case NF_QUEUE:
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index c21f1a02ce13..61ba99f61dc8 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -19,7 +19,7 @@
 #include <net/dst.h>
 #include <net/xfrm.h>
 
-static int xfrm_output2(struct sock *sk, struct sk_buff *skb);
+static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb);
 
 static int xfrm_skb_check_space(struct sk_buff *skb)
 {
@@ -157,12 +157,12 @@ out:
 }
 EXPORT_SYMBOL_GPL(xfrm_output_resume);
 
-static int xfrm_output2(struct sock *sk, struct sk_buff *skb)
+static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	return xfrm_output_resume(skb, 1);
 }
 
-static int xfrm_output_gso(struct sock *sk, struct sk_buff *skb)
+static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct sk_buff *segs;
 
@@ -178,7 +178,7 @@ static int xfrm_output_gso(struct sock *sk, struct sk_buff *skb)
 		int err;
 
 		segs->next = NULL;
-		err = xfrm_output2(sk, segs);
+		err = xfrm_output2(net, sk, segs);
 
 		if (unlikely(err)) {
 			kfree_skb_list(nskb);
@@ -197,7 +197,7 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
 	int err;
 
 	if (skb_is_gso(skb))
-		return xfrm_output_gso(sk, skb);
+		return xfrm_output_gso(net, sk, skb);
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		err = skb_checksum_help(skb);
@@ -208,7 +208,7 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
 		}
 	}
 
-	return xfrm_output2(sk, skb);
+	return xfrm_output2(net, sk, skb);
 }
 EXPORT_SYMBOL_GPL(xfrm_output);
 
-- 
cgit v1.2.3


From 007979eaf94d1c888d8c7cf8a5250c2c6c9bd98e Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Tue, 29 Sep 2015 20:07:10 -0700
Subject: net: Rename IFF_VRF_MASTER to IFF_L3MDEV_MASTER

Rename IFF_VRF_MASTER to IFF_L3MDEV_MASTER and update the name of the
netif_is_vrf and netif_index_is_vrf macros.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vrf.c         |  6 +++---
 include/linux/netdevice.h | 14 +++++++-------
 include/net/route.h       |  2 +-
 include/net/vrf.h         |  4 ++--
 net/ipv4/ip_output.c      |  2 +-
 net/ipv4/route.c          |  2 +-
 net/ipv4/udp.c            |  2 +-
 7 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include/linux/netdevice.h')

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 4ecb3a3e516a..2d7418e0b908 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -438,7 +438,7 @@ out_fail:
 
 static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
 {
-	if (netif_is_vrf(port_dev) || vrf_is_slave(port_dev))
+	if (netif_is_l3_master(port_dev) || vrf_is_slave(port_dev))
 		return -EINVAL;
 
 	return do_vrf_add_slave(dev, port_dev);
@@ -591,7 +591,7 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
 
 	vrf->tb_id = nla_get_u32(data[IFLA_VRF_TABLE]);
 
-	dev->priv_flags |= IFF_VRF_MASTER;
+	dev->priv_flags |= IFF_L3MDEV_MASTER;
 
 	err = -ENOMEM;
 	vrf_ptr = kmalloc(sizeof(*dev->vrf_ptr), GFP_KERNEL);
@@ -657,7 +657,7 @@ static int vrf_device_event(struct notifier_block *unused,
 		struct net_vrf_dev *vrf_ptr = rtnl_dereference(dev->vrf_ptr);
 		struct net_device *vrf_dev;
 
-		if (!vrf_ptr || netif_is_vrf(dev))
+		if (!vrf_ptr || netif_is_l3_master(dev))
 			goto out;
 
 		vrf_dev = netdev_master_upper_dev_get(dev);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d2ffeafc9998..99c33e83822f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1258,7 +1258,7 @@ struct net_device_ops {
  * @IFF_LIVE_ADDR_CHANGE: device supports hardware address
  *	change when it's running
  * @IFF_MACVLAN: Macvlan device
- * @IFF_VRF_MASTER: device is a VRF master
+ * @IFF_L3MDEV_MASTER: device is an L3 master device
  * @IFF_NO_QUEUE: device can run without qdisc attached
  * @IFF_OPENVSWITCH: device is a Open vSwitch master
  */
@@ -1283,7 +1283,7 @@ enum netdev_priv_flags {
 	IFF_XMIT_DST_RELEASE_PERM	= 1<<17,
 	IFF_IPVLAN_MASTER		= 1<<18,
 	IFF_IPVLAN_SLAVE		= 1<<19,
-	IFF_VRF_MASTER			= 1<<20,
+	IFF_L3MDEV_MASTER		= 1<<20,
 	IFF_NO_QUEUE			= 1<<21,
 	IFF_OPENVSWITCH			= 1<<22,
 };
@@ -1308,7 +1308,7 @@ enum netdev_priv_flags {
 #define IFF_XMIT_DST_RELEASE_PERM	IFF_XMIT_DST_RELEASE_PERM
 #define IFF_IPVLAN_MASTER		IFF_IPVLAN_MASTER
 #define IFF_IPVLAN_SLAVE		IFF_IPVLAN_SLAVE
-#define IFF_VRF_MASTER			IFF_VRF_MASTER
+#define IFF_L3MDEV_MASTER		IFF_L3MDEV_MASTER
 #define IFF_NO_QUEUE			IFF_NO_QUEUE
 #define IFF_OPENVSWITCH			IFF_OPENVSWITCH
 
@@ -3824,9 +3824,9 @@ static inline bool netif_supports_nofcs(struct net_device *dev)
 	return dev->priv_flags & IFF_SUPP_NOFCS;
 }
 
-static inline bool netif_is_vrf(const struct net_device *dev)
+static inline bool netif_is_l3_master(const struct net_device *dev)
 {
-	return dev->priv_flags & IFF_VRF_MASTER;
+	return dev->priv_flags & IFF_L3MDEV_MASTER;
 }
 
 static inline bool netif_is_bridge_master(const struct net_device *dev)
@@ -3839,7 +3839,7 @@ static inline bool netif_is_ovs_master(const struct net_device *dev)
 	return dev->priv_flags & IFF_OPENVSWITCH;
 }
 
-static inline bool netif_index_is_vrf(struct net *net, int ifindex)
+static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
 {
 	bool rc = false;
 
@@ -3853,7 +3853,7 @@ static inline bool netif_index_is_vrf(struct net *net, int ifindex)
 
 	dev = dev_get_by_index_rcu(net, ifindex);
 	if (dev)
-		rc = netif_is_vrf(dev);
+		rc = netif_is_l3_master(dev);
 
 	rcu_read_unlock();
 #endif
diff --git a/include/net/route.h b/include/net/route.h
index d1bd90bb3187..a565d0dad12c 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -256,7 +256,7 @@ static inline void ip_route_connect_init(struct flowi4 *fl4, __be32 dst, __be32
 	if (inet_sk(sk)->transparent)
 		flow_flags |= FLOWI_FLAG_ANYSRC;
 
-	if (netif_index_is_vrf(sock_net(sk), oif))
+	if (netif_index_is_l3_master(sock_net(sk), oif))
 		flow_flags |= FLOWI_FLAG_VRFSRC | FLOWI_FLAG_SKIP_NH_OIF;
 
 	flowi4_init_output(fl4, oif, sk->sk_mark, tos, RT_SCOPE_UNIVERSE,
diff --git a/include/net/vrf.h b/include/net/vrf.h
index 593e6094ddd4..34bb3f69def2 100644
--- a/include/net/vrf.h
+++ b/include/net/vrf.h
@@ -43,7 +43,7 @@ static inline int vrf_master_ifindex_rcu(const struct net_device *dev)
 	if (!dev)
 		return 0;
 
-	if (netif_is_vrf(dev)) {
+	if (netif_is_l3_master(dev)) {
 		ifindex = dev->ifindex;
 	} else {
 		vrf_ptr = rcu_dereference(dev->vrf_ptr);
@@ -125,7 +125,7 @@ static inline u32 vrf_dev_table_rtnl(const struct net_device *dev)
 	return tb_id;
 }
 
-/* caller has already checked netif_is_vrf(dev) */
+/* caller has already checked netif_is_l3_master(dev) */
 static inline struct rtable *vrf_dev_get_rth(const struct net_device *dev)
 {
 	struct rtable *rth = ERR_PTR(-ENETUNREACH);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 06d2c87ed505..aff6766922e8 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1571,7 +1571,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
 	}
 
 	oif = arg->bound_dev_if;
-	if (!oif && netif_index_is_vrf(net, skb->skb_iif))
+	if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
 		oif = skb->skb_iif;
 
 	flowi4_init_output(&fl4, oif,
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 8c84a6664b30..a670f894ce13 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2124,7 +2124,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
 				fl4->saddr = inet_select_addr(dev_out, 0,
 							      RT_SCOPE_HOST);
 		}
-		if (netif_is_vrf(dev_out) &&
+		if (netif_is_l3_master(dev_out) &&
 		    !(fl4->flowi4_flags & FLOWI_FLAG_VRFSRC)) {
 			rth = vrf_dev_get_rth(dev_out);
 			goto out;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index f7d1d5e19e95..156ba75b6000 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1021,7 +1021,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 		 * device lookup source address from VRF table. This mimics
 		 * behavior of ip_route_connect{_init}.
 		 */
-		if (netif_index_is_vrf(net, ipc.oif)) {
+		if (netif_index_is_l3_master(net, ipc.oif)) {
 			flowi4_init_output(fl4, ipc.oif, sk->sk_mark, tos,
 					   RT_SCOPE_UNIVERSE, sk->sk_protocol,
 					   (flow_flags | FLOWI_FLAG_VRFSRC |
-- 
cgit v1.2.3


From 1b69c6d0ae90b7f1a4f61d5c8209d5cb7a55f849 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Tue, 29 Sep 2015 20:07:11 -0700
Subject: net: Introduce L3 Master device abstraction

L3 master devices allow users of the abstraction to influence FIB lookups
for enslaved devices. Current API provides a means for the master device
to return a specific FIB table for an enslaved device, to return an
rtable/custom dst and influence the OIF used for fib lookups.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS               |   7 +++
 include/linux/netdevice.h |   3 ++
 include/net/l3mdev.h      | 125 ++++++++++++++++++++++++++++++++++++++++++++++
 net/Kconfig               |   1 +
 net/Makefile              |   3 ++
 net/l3mdev/Kconfig        |  10 ++++
 net/l3mdev/Makefile       |   5 ++
 net/l3mdev/l3mdev.c       |  92 ++++++++++++++++++++++++++++++++++
 8 files changed, 246 insertions(+)
 create mode 100644 include/net/l3mdev.h
 create mode 100644 net/l3mdev/Kconfig
 create mode 100644 net/l3mdev/Makefile
 create mode 100644 net/l3mdev/l3mdev.c

(limited to 'include/linux/netdevice.h')

diff --git a/MAINTAINERS b/MAINTAINERS
index bcd263de4827..3f2d7a9d0bbf 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6095,6 +6095,13 @@ F:	Documentation/auxdisplay/ks0108
 F:	drivers/auxdisplay/ks0108.c
 F:	include/linux/ks0108.h
 
+L3MDEV
+M:	David Ahern <dsa@cumulusnetworks.com>
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	net/l3mdev
+F:	include/net/l3mdev.h
+
 LAPB module
 L:	linux-x25@vger.kernel.org
 S:	Orphan
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 99c33e83822f..c7f14794fe14 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1587,6 +1587,9 @@ struct net_device {
 #ifdef CONFIG_NET_SWITCHDEV
 	const struct switchdev_ops *switchdev_ops;
 #endif
+#ifdef CONFIG_NET_L3_MASTER_DEV
+	const struct l3mdev_ops	*l3mdev_ops;
+#endif
 
 	const struct header_ops *header_ops;
 
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
new file mode 100644
index 000000000000..e382c777bab8
--- /dev/null
+++ b/include/net/l3mdev.h
@@ -0,0 +1,125 @@
+/*
+ * include/net/l3mdev.h - L3 master device API
+ * Copyright (c) 2015 Cumulus Networks
+ * Copyright (c) 2015 David Ahern <dsa@cumulusnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#ifndef _NET_L3MDEV_H_
+#define _NET_L3MDEV_H_
+
+/**
+ * struct l3mdev_ops - l3mdev operations
+ *
+ * @l3mdev_fib_table: Get FIB table id to use for lookups
+ *
+ * @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device
+ */
+
+struct l3mdev_ops {
+	u32		(*l3mdev_fib_table)(const struct net_device *dev);
+	struct rtable *	(*l3mdev_get_rtable)(const struct net_device *dev,
+					     const struct flowi4 *fl4);
+};
+
+#ifdef CONFIG_NET_L3_MASTER_DEV
+
+int l3mdev_master_ifindex_rcu(struct net_device *dev);
+static inline int l3mdev_master_ifindex(struct net_device *dev)
+{
+	int ifindex;
+
+	rcu_read_lock();
+	ifindex = l3mdev_master_ifindex_rcu(dev);
+	rcu_read_unlock();
+
+	return ifindex;
+}
+
+/* get index of an interface to use for FIB lookups. For devices
+ * enslaved to an L3 master device FIB lookups are based on the
+ * master index
+ */
+static inline int l3mdev_fib_oif_rcu(struct net_device *dev)
+{
+	return l3mdev_master_ifindex_rcu(dev) ? : dev->ifindex;
+}
+
+static inline int l3mdev_fib_oif(struct net_device *dev)
+{
+	int oif;
+
+	rcu_read_lock();
+	oif = l3mdev_fib_oif_rcu(dev);
+	rcu_read_unlock();
+
+	return oif;
+}
+
+u32 l3mdev_fib_table_rcu(const struct net_device *dev);
+u32 l3mdev_fib_table_by_index(struct net *net, int ifindex);
+static inline u32 l3mdev_fib_table(const struct net_device *dev)
+{
+	u32 tb_id;
+
+	rcu_read_lock();
+	tb_id = l3mdev_fib_table_rcu(dev);
+	rcu_read_unlock();
+
+	return tb_id;
+}
+
+static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev,
+					       const struct flowi4 *fl4)
+{
+	if (netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_get_rtable)
+		return dev->l3mdev_ops->l3mdev_get_rtable(dev, fl4);
+
+	return NULL;
+}
+
+#else
+
+static inline int l3mdev_master_ifindex_rcu(struct net_device *dev)
+{
+	return 0;
+}
+static inline int l3mdev_master_ifindex(struct net_device *dev)
+{
+	return 0;
+}
+
+static inline int l3mdev_fib_oif_rcu(struct net_device *dev)
+{
+	return dev ? dev->ifindex : 0;
+}
+static inline int l3mdev_fib_oif(struct net_device *dev)
+{
+	return dev ? dev->ifindex : 0;
+}
+
+static inline u32 l3mdev_fib_table_rcu(const struct net_device *dev)
+{
+	return 0;
+}
+static inline u32 l3mdev_fib_table(const struct net_device *dev)
+{
+	return 0;
+}
+static inline u32 l3mdev_fib_table_by_index(struct net *net, int ifindex)
+{
+	return 0;
+}
+
+static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev,
+					       const struct flowi4 *fl4)
+{
+	return NULL;
+}
+
+#endif
+
+#endif /* _NET_L3MDEV_H_ */
diff --git a/net/Kconfig b/net/Kconfig
index 7021c1bf44d6..127da94ae25e 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -232,6 +232,7 @@ source "net/netlink/Kconfig"
 source "net/mpls/Kconfig"
 source "net/hsr/Kconfig"
 source "net/switchdev/Kconfig"
+source "net/l3mdev/Kconfig"
 
 config RPS
 	bool
diff --git a/net/Makefile b/net/Makefile
index 3995613e5510..a5d04098dfce 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -74,3 +74,6 @@ obj-$(CONFIG_HSR)		+= hsr/
 ifneq ($(CONFIG_NET_SWITCHDEV),)
 obj-y				+= switchdev/
 endif
+ifneq ($(CONFIG_NET_L3_MASTER_DEV),)
+obj-y				+= l3mdev/
+endif
diff --git a/net/l3mdev/Kconfig b/net/l3mdev/Kconfig
new file mode 100644
index 000000000000..5d47325037bc
--- /dev/null
+++ b/net/l3mdev/Kconfig
@@ -0,0 +1,10 @@
+#
+# Configuration for L3 master device support
+#
+
+config NET_L3_MASTER_DEV
+	bool "L3 Master device support"
+	depends on INET || IPV6
+	---help---
+	  This module provides glue between core networking code and device
+	  drivers to support L3 master devices like VRF.
diff --git a/net/l3mdev/Makefile b/net/l3mdev/Makefile
new file mode 100644
index 000000000000..84a53a6f609a
--- /dev/null
+++ b/net/l3mdev/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the L3 device API
+#
+
+obj-$(CONFIG_NET_L3_MASTER_DEV) += l3mdev.o
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
new file mode 100644
index 000000000000..ddf75ad41713
--- /dev/null
+++ b/net/l3mdev/l3mdev.c
@@ -0,0 +1,92 @@
+/*
+ * net/l3mdev/l3mdev.c - L3 master device implementation
+ * Copyright (c) 2015 Cumulus Networks
+ * Copyright (c) 2015 David Ahern <dsa@cumulusnetworks.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/netdevice.h>
+#include <net/l3mdev.h>
+
+/**
+ *	l3mdev_master_ifindex - get index of L3 master device
+ *	@dev: targeted interface
+ */
+
+int l3mdev_master_ifindex_rcu(struct net_device *dev)
+{
+	int ifindex = 0;
+
+	if (!dev)
+		return 0;
+
+	if (netif_is_l3_master(dev)) {
+		ifindex = dev->ifindex;
+	} else if (dev->flags & IFF_SLAVE) {
+		struct net_device *master;
+
+		master = netdev_master_upper_dev_get_rcu(dev);
+		if (master && netif_is_l3_master(master))
+			ifindex = master->ifindex;
+	}
+
+	return ifindex;
+}
+EXPORT_SYMBOL_GPL(l3mdev_master_ifindex_rcu);
+
+/**
+ *	l3mdev_fib_table - get FIB table id associated with an L3
+ *                             master interface
+ *	@dev: targeted interface
+ */
+
+u32 l3mdev_fib_table_rcu(const struct net_device *dev)
+{
+	u32 tb_id = 0;
+
+	if (!dev)
+		return 0;
+
+	if (netif_is_l3_master(dev)) {
+		if (dev->l3mdev_ops->l3mdev_fib_table)
+			tb_id = dev->l3mdev_ops->l3mdev_fib_table(dev);
+	} else if (dev->flags & IFF_SLAVE) {
+		/* Users of netdev_master_upper_dev_get_rcu need non-const,
+		 * but current inet_*type functions take a const
+		 */
+		struct net_device *_dev = (struct net_device *) dev;
+		const struct net_device *master;
+
+		master = netdev_master_upper_dev_get_rcu(_dev);
+		if (master && netif_is_l3_master(master) &&
+		    master->l3mdev_ops->l3mdev_fib_table)
+			tb_id = master->l3mdev_ops->l3mdev_fib_table(master);
+	}
+
+	return tb_id;
+}
+EXPORT_SYMBOL_GPL(l3mdev_fib_table_rcu);
+
+u32 l3mdev_fib_table_by_index(struct net *net, int ifindex)
+{
+	struct net_device *dev;
+	u32 tb_id = 0;
+
+	if (!ifindex)
+		return 0;
+
+	rcu_read_lock();
+
+	dev = dev_get_by_index_rcu(net, ifindex);
+	if (dev)
+		tb_id = l3mdev_fib_table_rcu(dev);
+
+	rcu_read_unlock();
+
+	return tb_id;
+}
+EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index);
-- 
cgit v1.2.3


From 93a7e7e837af6846052481da974320c19ab82e5c Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Tue, 29 Sep 2015 20:07:16 -0700
Subject: net: Remove the now unused vrf_ptr

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vrf.c         | 32 ++------------------------------
 include/linux/netdevice.h |  2 --
 include/net/vrf.h         |  6 ------
 3 files changed, 2 insertions(+), 38 deletions(-)

(limited to 'include/linux/netdevice.h')

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 72f1892ebad0..df872f4efb0d 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -396,18 +396,15 @@ static void __vrf_insert_slave(struct slave_queue *queue, struct slave *slave)
 
 static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
 {
-	struct net_vrf_dev *vrf_ptr = kmalloc(sizeof(*vrf_ptr), GFP_KERNEL);
 	struct slave *slave = kzalloc(sizeof(*slave), GFP_KERNEL);
 	struct net_vrf *vrf = netdev_priv(dev);
 	struct slave_queue *queue = &vrf->queue;
 	int ret = -ENOMEM;
 
-	if (!slave || !vrf_ptr)
+	if (!slave)
 		goto out_fail;
 
 	slave->dev = port_dev;
-	vrf_ptr->ifindex = dev->ifindex;
-	vrf_ptr->tb_id = vrf->tb_id;
 
 	/* register the packet handler for slave ports */
 	ret = netdev_rx_handler_register(port_dev, vrf_handle_frame, dev);
@@ -424,7 +421,6 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
 
 	port_dev->flags |= IFF_SLAVE;
 	__vrf_insert_slave(queue, slave);
-	rcu_assign_pointer(port_dev->vrf_ptr, vrf_ptr);
 	cycle_netdev(port_dev);
 
 	return 0;
@@ -432,7 +428,6 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
 out_unregister:
 	netdev_rx_handler_unregister(port_dev);
 out_fail:
-	kfree(vrf_ptr);
 	kfree(slave);
 	return ret;
 }
@@ -448,21 +443,15 @@ static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
 /* inverse of do_vrf_add_slave */
 static int do_vrf_del_slave(struct net_device *dev, struct net_device *port_dev)
 {
-	struct net_vrf_dev *vrf_ptr = rtnl_dereference(port_dev->vrf_ptr);
 	struct net_vrf *vrf = netdev_priv(dev);
 	struct slave_queue *queue = &vrf->queue;
 	struct slave *slave;
 
-	RCU_INIT_POINTER(port_dev->vrf_ptr, NULL);
-
 	netdev_upper_dev_unlink(port_dev, dev);
 	port_dev->flags &= ~IFF_SLAVE;
 
 	netdev_rx_handler_unregister(port_dev);
 
-	/* after netdev_rx_handler_unregister for synchronize_rcu */
-	kfree(vrf_ptr);
-
 	cycle_netdev(port_dev);
 
 	slave = __vrf_find_slave_dev(queue, port_dev);
@@ -601,10 +590,6 @@ static int vrf_validate(struct nlattr *tb[], struct nlattr *data[])
 
 static void vrf_dellink(struct net_device *dev, struct list_head *head)
 {
-	struct net_vrf_dev *vrf_ptr = rtnl_dereference(dev->vrf_ptr);
-
-	RCU_INIT_POINTER(dev->vrf_ptr, NULL);
-	kfree_rcu(vrf_ptr, rcu);
 	unregister_netdevice_queue(dev, head);
 }
 
@@ -612,7 +597,6 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
 		       struct nlattr *tb[], struct nlattr *data[])
 {
 	struct net_vrf *vrf = netdev_priv(dev);
-	struct net_vrf_dev *vrf_ptr;
 	int err;
 
 	if (!data || !data[IFLA_VRF_TABLE])
@@ -622,24 +606,13 @@ static int vrf_newlink(struct net *src_net, struct net_device *dev,
 
 	dev->priv_flags |= IFF_L3MDEV_MASTER;
 
-	err = -ENOMEM;
-	vrf_ptr = kmalloc(sizeof(*dev->vrf_ptr), GFP_KERNEL);
-	if (!vrf_ptr)
-		goto out_fail;
-
-	vrf_ptr->ifindex = dev->ifindex;
-	vrf_ptr->tb_id = vrf->tb_id;
-
 	err = register_netdevice(dev);
 	if (err < 0)
 		goto out_fail;
 
-	rcu_assign_pointer(dev->vrf_ptr, vrf_ptr);
-
 	return 0;
 
 out_fail:
-	kfree(vrf_ptr);
 	free_netdev(dev);
 	return err;
 }
@@ -683,10 +656,9 @@ static int vrf_device_event(struct notifier_block *unused,
 
 	/* only care about unregister events to drop slave references */
 	if (event == NETDEV_UNREGISTER) {
-		struct net_vrf_dev *vrf_ptr = rtnl_dereference(dev->vrf_ptr);
 		struct net_device *vrf_dev;
 
-		if (!vrf_ptr || netif_is_l3_master(dev))
+		if (netif_is_l3_master(dev))
 			goto out;
 
 		vrf_dev = netdev_master_upper_dev_get(dev);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c7f14794fe14..72bf9e37a2f0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1427,7 +1427,6 @@ enum netdev_priv_flags {
  *	@dn_ptr:	DECnet specific data
  *	@ip6_ptr:	IPv6 specific data
  *	@ax25_ptr:	AX.25 specific data
- *	@vrf_ptr:	VRF specific data
  *	@ieee80211_ptr:	IEEE 802.11 specific data, assign before registering
  *
  *	@last_rx:	Time of last Rx
@@ -1649,7 +1648,6 @@ struct net_device {
 	struct dn_dev __rcu     *dn_ptr;
 	struct inet6_dev __rcu	*ip6_ptr;
 	void			*ax25_ptr;
-	struct net_vrf_dev __rcu *vrf_ptr;
 	struct wireless_dev	*ieee80211_ptr;
 	struct wpan_dev		*ieee802154_ptr;
 #if IS_ENABLED(CONFIG_MPLS_ROUTING)
diff --git a/include/net/vrf.h b/include/net/vrf.h
index 5bba1535ba73..e83fc38770dd 100644
--- a/include/net/vrf.h
+++ b/include/net/vrf.h
@@ -11,12 +11,6 @@
 #ifndef __LINUX_NET_VRF_H
 #define __LINUX_NET_VRF_H
 
-struct net_vrf_dev {
-	struct rcu_head		rcu;
-	int                     ifindex; /* ifindex of master dev */
-	u32                     tb_id;   /* table id for VRF */
-};
-
 struct slave {
 	struct list_head	list;
 	struct net_device	*dev;
-- 
cgit v1.2.3


From 9478d12d33ad12d29c5343ae7346b51bc1f4c5a9 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Tue, 29 Sep 2015 20:07:18 -0700
Subject: net: Move netif_index_is_l3_master to l3mdev.h

Change CONFIG dependency to CONFIG_NET_L3_MASTER_DEV as well.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 21 ---------------------
 include/net/l3mdev.h      | 24 ++++++++++++++++++++++++
 include/net/route.h       |  1 +
 3 files changed, 25 insertions(+), 21 deletions(-)

(limited to 'include/linux/netdevice.h')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 72bf9e37a2f0..b9450784ae06 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3840,27 +3840,6 @@ static inline bool netif_is_ovs_master(const struct net_device *dev)
 	return dev->priv_flags & IFF_OPENVSWITCH;
 }
 
-static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
-{
-	bool rc = false;
-
-#if IS_ENABLED(CONFIG_NET_VRF)
-	struct net_device *dev;
-
-	if (ifindex == 0)
-		return false;
-
-	rcu_read_lock();
-
-	dev = dev_get_by_index_rcu(net, ifindex);
-	if (dev)
-		rc = netif_is_l3_master(dev);
-
-	rcu_read_unlock();
-#endif
-	return rc;
-}
-
 /* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */
 static inline void netif_keep_dst(struct net_device *dev)
 {
diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h
index e382c777bab8..87cee05a0a17 100644
--- a/include/net/l3mdev.h
+++ b/include/net/l3mdev.h
@@ -81,6 +81,25 @@ static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev,
 	return NULL;
 }
 
+static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
+{
+	struct net_device *dev;
+	bool rc = false;
+
+	if (ifindex == 0)
+		return false;
+
+	rcu_read_lock();
+
+	dev = dev_get_by_index_rcu(net, ifindex);
+	if (dev)
+		rc = netif_is_l3_master(dev);
+
+	rcu_read_unlock();
+
+	return rc;
+}
+
 #else
 
 static inline int l3mdev_master_ifindex_rcu(struct net_device *dev)
@@ -120,6 +139,11 @@ static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev,
 	return NULL;
 }
 
+static inline bool netif_index_is_l3_master(struct net *net, int ifindex)
+{
+	return false;
+}
+
 #endif
 
 #endif /* _NET_L3MDEV_H_ */
diff --git a/include/net/route.h b/include/net/route.h
index a565d0dad12c..e211dc167db1 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -28,6 +28,7 @@
 #include <net/inetpeer.h>
 #include <net/flow.h>
 #include <net/inet_sock.h>
+#include <net/l3mdev.h>
 #include <linux/in_route.h>
 #include <linux/rtnetlink.h>
 #include <linux/rcupdate.h>
-- 
cgit v1.2.3


From fee6d4c777a125e56de9370db3b2bf359bf958d6 Mon Sep 17 00:00:00 2001
From: David Ahern <dsa@cumulusnetworks.com>
Date: Mon, 5 Oct 2015 08:51:24 -0700
Subject: net: Add netif_is_l3_slave

IPv6 addrconf keys off of IFF_SLAVE so can not use it for L3 slave.
Add a new private flag and add netif_is_l3_slave function for checking
it.

Signed-off-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vrf.c         | 10 ++++------
 include/linux/netdevice.h |  7 +++++++
 net/l3mdev/l3mdev.c       |  8 ++++----
 3 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'include/linux/netdevice.h')

diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 4fd5af1acff0..8713317eed86 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -39,8 +39,6 @@
 #define DRV_NAME	"vrf"
 #define DRV_VERSION	"1.0"
 
-#define vrf_is_slave(dev)   ((dev)->flags & IFF_SLAVE)
-
 #define vrf_master_get_rcu(dev) \
 	((struct net_device *)rcu_dereference(dev->rx_handler_data))
 
@@ -433,7 +431,7 @@ static int do_vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
 	if (ret < 0)
 		goto out_unregister;
 
-	port_dev->flags |= IFF_SLAVE;
+	port_dev->priv_flags |= IFF_L3MDEV_SLAVE;
 	__vrf_insert_slave(queue, slave);
 	cycle_netdev(port_dev);
 
@@ -448,7 +446,7 @@ out_fail:
 
 static int vrf_add_slave(struct net_device *dev, struct net_device *port_dev)
 {
-	if (netif_is_l3_master(port_dev) || vrf_is_slave(port_dev))
+	if (netif_is_l3_master(port_dev) || netif_is_l3_slave(port_dev))
 		return -EINVAL;
 
 	return do_vrf_add_slave(dev, port_dev);
@@ -462,7 +460,7 @@ static int do_vrf_del_slave(struct net_device *dev, struct net_device *port_dev)
 	struct slave *slave;
 
 	netdev_upper_dev_unlink(port_dev, dev);
-	port_dev->flags &= ~IFF_SLAVE;
+	port_dev->priv_flags &= ~IFF_L3MDEV_SLAVE;
 
 	netdev_rx_handler_unregister(port_dev);
 
@@ -672,7 +670,7 @@ static int vrf_device_event(struct notifier_block *unused,
 	if (event == NETDEV_UNREGISTER) {
 		struct net_device *vrf_dev;
 
-		if (!vrf_is_slave(dev) || netif_is_l3_master(dev))
+		if (!netif_is_l3_slave(dev))
 			goto out;
 
 		vrf_dev = netdev_master_upper_dev_get(dev);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b9450784ae06..b3374402c1ea 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1261,6 +1261,7 @@ struct net_device_ops {
  * @IFF_L3MDEV_MASTER: device is an L3 master device
  * @IFF_NO_QUEUE: device can run without qdisc attached
  * @IFF_OPENVSWITCH: device is a Open vSwitch master
+ * @IFF_L3MDEV_SLAVE: device is enslaved to an L3 master device
  */
 enum netdev_priv_flags {
 	IFF_802_1Q_VLAN			= 1<<0,
@@ -1286,6 +1287,7 @@ enum netdev_priv_flags {
 	IFF_L3MDEV_MASTER		= 1<<20,
 	IFF_NO_QUEUE			= 1<<21,
 	IFF_OPENVSWITCH			= 1<<22,
+	IFF_L3MDEV_SLAVE		= 1<<23,
 };
 
 #define IFF_802_1Q_VLAN			IFF_802_1Q_VLAN
@@ -3830,6 +3832,11 @@ static inline bool netif_is_l3_master(const struct net_device *dev)
 	return dev->priv_flags & IFF_L3MDEV_MASTER;
 }
 
+static inline bool netif_is_l3_slave(const struct net_device *dev)
+{
+	return dev->priv_flags & IFF_L3MDEV_SLAVE;
+}
+
 static inline bool netif_is_bridge_master(const struct net_device *dev)
 {
 	return dev->priv_flags & IFF_EBRIDGE;
diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c
index ddf75ad41713..8e5ead366e7f 100644
--- a/net/l3mdev/l3mdev.c
+++ b/net/l3mdev/l3mdev.c
@@ -26,11 +26,11 @@ int l3mdev_master_ifindex_rcu(struct net_device *dev)
 
 	if (netif_is_l3_master(dev)) {
 		ifindex = dev->ifindex;
-	} else if (dev->flags & IFF_SLAVE) {
+	} else if (netif_is_l3_slave(dev)) {
 		struct net_device *master;
 
 		master = netdev_master_upper_dev_get_rcu(dev);
-		if (master && netif_is_l3_master(master))
+		if (master)
 			ifindex = master->ifindex;
 	}
 
@@ -54,7 +54,7 @@ u32 l3mdev_fib_table_rcu(const struct net_device *dev)
 	if (netif_is_l3_master(dev)) {
 		if (dev->l3mdev_ops->l3mdev_fib_table)
 			tb_id = dev->l3mdev_ops->l3mdev_fib_table(dev);
-	} else if (dev->flags & IFF_SLAVE) {
+	} else if (netif_is_l3_slave(dev)) {
 		/* Users of netdev_master_upper_dev_get_rcu need non-const,
 		 * but current inet_*type functions take a const
 		 */
@@ -62,7 +62,7 @@ u32 l3mdev_fib_table_rcu(const struct net_device *dev)
 		const struct net_device *master;
 
 		master = netdev_master_upper_dev_get_rcu(_dev);
-		if (master && netif_is_l3_master(master) &&
+		if (master &&
 		    master->l3mdev_ops->l3mdev_fib_table)
 			tb_id = master->l3mdev_ops->l3mdev_fib_table(master);
 	}
-- 
cgit v1.2.3


From 573c7ba006edbecff0714db651dd3602b9d0a6a0 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Fri, 16 Oct 2015 14:01:22 +0200
Subject: net: introduce pre-change upper device notifier

This newly introduced netdevice notifier is called before actual change
upper happens. That provides a possibility for notifier handlers to
know upper change will happen and react to it, including possibility to
forbid the change. That is valuable for drivers which can check if the
upper device linkage is supported and forbid that in case it is not.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 +
 net/core/dev.c            | 9 +++++++++
 2 files changed, 10 insertions(+)

(limited to 'include/linux/netdevice.h')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b3374402c1ea..69fdd427c8cb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2106,6 +2106,7 @@ struct pcpu_sw_netstats {
 #define NETDEV_PRECHANGEMTU	0x0017 /* notify before mtu change happened */
 #define NETDEV_CHANGEINFODATA	0x0018
 #define NETDEV_BONDING_INFO	0x0019
+#define NETDEV_PRECHANGEUPPER	0x001A
 
 int register_netdevice_notifier(struct notifier_block *nb);
 int unregister_netdevice_notifier(struct notifier_block *nb);
diff --git a/net/core/dev.c b/net/core/dev.c
index a229bf0d649d..1225b4be8ed6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5346,6 +5346,12 @@ static int __netdev_upper_dev_link(struct net_device *dev,
 	changeupper_info.master = master;
 	changeupper_info.linking = true;
 
+	ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+					    &changeupper_info.info);
+	ret = notifier_to_errno(ret);
+	if (ret)
+		return ret;
+
 	ret = __netdev_adjacent_dev_link_neighbour(dev, upper_dev, private,
 						   master);
 	if (ret)
@@ -5488,6 +5494,9 @@ void netdev_upper_dev_unlink(struct net_device *dev,
 	changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev;
 	changeupper_info.linking = false;
 
+	call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev,
+				      &changeupper_info.info);
+
 	__netdev_adjacent_dev_unlink_neighbour(dev, upper_dev);
 
 	/* Here is the tricky part. We must remove all dev's lower
-- 
cgit v1.2.3


From fc4099f17240767554ff3a73977acb78ef615404 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Thu, 22 Oct 2015 18:17:16 -0700
Subject: openvswitch: Fix egress tunnel info.

While transitioning to netdev based vport we broke OVS
feature which allows user to retrieve tunnel packet egress
information for lwtunnel devices.  Following patch fixes it
by introducing ndo operation to get the tunnel egress info.
Same ndo operation can be used for lwtunnel devices and compat
ovs-tnl-vport devices. So after adding such device operation
we can remove similar operation from ovs-vport.

Fixes: 614732eaa12d ("openvswitch: Use regular VXLAN net_device device").
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/geneve.c           | 40 ++++++++++++++++++++++++-----
 drivers/net/vxlan.c            | 41 +++++++++++++++++++++++++++++
 include/linux/netdevice.h      |  7 +++++
 include/net/dst_metadata.h     | 32 +++++++++++++++++++++++
 net/core/dev.c                 | 27 ++++++++++++++++++++
 net/ipv4/ip_gre.c              | 46 ++++++++++++++++++++++++++-------
 net/openvswitch/actions.c      |  9 +++----
 net/openvswitch/datapath.c     |  5 ++--
 net/openvswitch/datapath.h     |  1 -
 net/openvswitch/flow_netlink.c | 18 +++++--------
 net/openvswitch/flow_netlink.h |  6 ++---
 net/openvswitch/vport-geneve.c | 13 ----------
 net/openvswitch/vport-gre.c    |  8 ------
 net/openvswitch/vport-vxlan.c  | 19 --------------
 net/openvswitch/vport.c        | 58 ------------------------------------------
 net/openvswitch/vport.h        | 35 -------------------------
 16 files changed, 192 insertions(+), 173 deletions(-)

(limited to 'include/linux/netdevice.h')

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index cde29f8a37bf..445071c163cb 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -594,14 +594,12 @@ static struct rtable *geneve_get_rt(struct sk_buff *skb,
 	rt = ip_route_output_key(geneve->net, fl4);
 	if (IS_ERR(rt)) {
 		netdev_dbg(dev, "no route to %pI4\n", &fl4->daddr);
-		dev->stats.tx_carrier_errors++;
-		return rt;
+		return ERR_PTR(-ENETUNREACH);
 	}
 	if (rt->dst.dev == dev) { /* is this necessary? */
 		netdev_dbg(dev, "circular route to %pI4\n", &fl4->daddr);
-		dev->stats.collisions++;
 		ip_rt_put(rt);
-		return ERR_PTR(-EINVAL);
+		return ERR_PTR(-ELOOP);
 	}
 	return rt;
 }
@@ -627,12 +625,12 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct ip_tunnel_info *info = NULL;
 	struct rtable *rt = NULL;
 	const struct iphdr *iip; /* interior IP header */
+	int err = -EINVAL;
 	struct flowi4 fl4;
 	__u8 tos, ttl;
 	__be16 sport;
 	bool udp_csum;
 	__be16 df;
-	int err;
 
 	if (geneve->collect_md) {
 		info = skb_tunnel_info(skb);
@@ -647,7 +645,7 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
 	rt = geneve_get_rt(skb, dev, &fl4, info);
 	if (IS_ERR(rt)) {
 		netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr);
-		dev->stats.tx_carrier_errors++;
+		err = PTR_ERR(rt);
 		goto tx_error;
 	}
 
@@ -699,10 +697,37 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
 tx_error:
 	dev_kfree_skb(skb);
 err:
-	dev->stats.tx_errors++;
+	if (err == -ELOOP)
+		dev->stats.collisions++;
+	else if (err == -ENETUNREACH)
+		dev->stats.tx_carrier_errors++;
+	else
+		dev->stats.tx_errors++;
 	return NETDEV_TX_OK;
 }
 
+static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+{
+	struct ip_tunnel_info *info = skb_tunnel_info(skb);
+	struct geneve_dev *geneve = netdev_priv(dev);
+	struct rtable *rt;
+	struct flowi4 fl4;
+
+	if (ip_tunnel_info_af(info) != AF_INET)
+		return -EINVAL;
+
+	rt = geneve_get_rt(skb, dev, &fl4, info);
+	if (IS_ERR(rt))
+		return PTR_ERR(rt);
+
+	ip_rt_put(rt);
+	info->key.u.ipv4.src = fl4.saddr;
+	info->key.tp_src = udp_flow_src_port(geneve->net, skb,
+					     1, USHRT_MAX, true);
+	info->key.tp_dst = geneve->dst_port;
+	return 0;
+}
+
 static const struct net_device_ops geneve_netdev_ops = {
 	.ndo_init		= geneve_init,
 	.ndo_uninit		= geneve_uninit,
@@ -713,6 +738,7 @@ static const struct net_device_ops geneve_netdev_ops = {
 	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_fill_metadata_dst	= geneve_fill_metadata_dst,
 };
 
 static void geneve_get_drvinfo(struct net_device *dev,
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index afdc65fd5bc5..c1587ece28cf 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2337,6 +2337,46 @@ static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
+static int egress_ipv4_tun_info(struct net_device *dev, struct sk_buff *skb,
+				struct ip_tunnel_info *info,
+				__be16 sport, __be16 dport)
+{
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+	struct rtable *rt;
+	struct flowi4 fl4;
+
+	memset(&fl4, 0, sizeof(fl4));
+	fl4.flowi4_tos = RT_TOS(info->key.tos);
+	fl4.flowi4_mark = skb->mark;
+	fl4.flowi4_proto = IPPROTO_UDP;
+	fl4.daddr = info->key.u.ipv4.dst;
+
+	rt = ip_route_output_key(vxlan->net, &fl4);
+	if (IS_ERR(rt))
+		return PTR_ERR(rt);
+	ip_rt_put(rt);
+
+	info->key.u.ipv4.src = fl4.saddr;
+	info->key.tp_src = sport;
+	info->key.tp_dst = dport;
+	return 0;
+}
+
+static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+{
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+	struct ip_tunnel_info *info = skb_tunnel_info(skb);
+	__be16 sport, dport;
+
+	sport = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
+				  vxlan->cfg.port_max, true);
+	dport = info->key.tp_dst ? : vxlan->cfg.dst_port;
+
+	if (ip_tunnel_info_af(info) == AF_INET)
+		return egress_ipv4_tun_info(dev, skb, info, sport, dport);
+	return -EINVAL;
+}
+
 static const struct net_device_ops vxlan_netdev_ops = {
 	.ndo_init		= vxlan_init,
 	.ndo_uninit		= vxlan_uninit,
@@ -2351,6 +2391,7 @@ static const struct net_device_ops vxlan_netdev_ops = {
 	.ndo_fdb_add		= vxlan_fdb_add,
 	.ndo_fdb_del		= vxlan_fdb_delete,
 	.ndo_fdb_dump		= vxlan_fdb_dump,
+	.ndo_fill_metadata_dst	= vxlan_fill_metadata_dst,
 };
 
 /* Info for udev, that this is a virtual tunnel endpoint */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2d15e3831440..210d11a75e4f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1054,6 +1054,10 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  *	This function is used to pass protocol port error state information
  *	to the switch driver. The switch driver can react to the proto_down
  *      by doing a phys down on the associated switch port.
+ * int (*ndo_fill_metadata_dst)(struct net_device *dev, struct sk_buff *skb);
+ *	This function is used to get egress tunnel information for given skb.
+ *	This is useful for retrieving outer tunnel header parameters while
+ *	sampling packet.
  *
  */
 struct net_device_ops {
@@ -1227,6 +1231,8 @@ struct net_device_ops {
 	int			(*ndo_get_iflink)(const struct net_device *dev);
 	int			(*ndo_change_proto_down)(struct net_device *dev,
 							 bool proto_down);
+	int			(*ndo_fill_metadata_dst)(struct net_device *dev,
+						       struct sk_buff *skb);
 };
 
 /**
@@ -2203,6 +2209,7 @@ void dev_add_offload(struct packet_offload *po);
 void dev_remove_offload(struct packet_offload *po);
 
 int dev_get_iflink(const struct net_device *dev);
+int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb);
 struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
 				      unsigned short mask);
 struct net_device *dev_get_by_name(struct net *net, const char *name);
diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index af9d5382f6cb..ce009710120c 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -60,6 +60,38 @@ static inline struct metadata_dst *tun_rx_dst(int md_size)
 	return tun_dst;
 }
 
+static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb)
+{
+	struct metadata_dst *md_dst = skb_metadata_dst(skb);
+	int md_size = md_dst->u.tun_info.options_len;
+	struct metadata_dst *new_md;
+
+	if (!md_dst)
+		return ERR_PTR(-EINVAL);
+
+	new_md = metadata_dst_alloc(md_size, GFP_ATOMIC);
+	if (!new_md)
+		return ERR_PTR(-ENOMEM);
+
+	memcpy(&new_md->u.tun_info, &md_dst->u.tun_info,
+	       sizeof(struct ip_tunnel_info) + md_size);
+	skb_dst_drop(skb);
+	dst_hold(&new_md->dst);
+	skb_dst_set(skb, &new_md->dst);
+	return new_md;
+}
+
+static inline struct ip_tunnel_info *skb_tunnel_info_unclone(struct sk_buff *skb)
+{
+	struct metadata_dst *dst;
+
+	dst = tun_dst_unclone(skb);
+	if (IS_ERR(dst))
+		return NULL;
+
+	return &dst->u.tun_info;
+}
+
 static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb,
 						 __be16 flags,
 						 __be64 tunnel_id,
diff --git a/net/core/dev.c b/net/core/dev.c
index 6bb6470f5b7b..c14748d051e7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -99,6 +99,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/stat.h>
 #include <net/dst.h>
+#include <net/dst_metadata.h>
 #include <net/pkt_sched.h>
 #include <net/checksum.h>
 #include <net/xfrm.h>
@@ -681,6 +682,32 @@ int dev_get_iflink(const struct net_device *dev)
 }
 EXPORT_SYMBOL(dev_get_iflink);
 
+/**
+ *	dev_fill_metadata_dst - Retrieve tunnel egress information.
+ *	@dev: targeted interface
+ *	@skb: The packet.
+ *
+ *	For better visibility of tunnel traffic OVS needs to retrieve
+ *	egress tunnel information for a packet. Following API allows
+ *	user to get this info.
+ */
+int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+{
+	struct ip_tunnel_info *info;
+
+	if (!dev->netdev_ops  || !dev->netdev_ops->ndo_fill_metadata_dst)
+		return -EINVAL;
+
+	info = skb_tunnel_info_unclone(skb);
+	if (!info)
+		return -ENOMEM;
+	if (unlikely(!(info->mode & IP_TUNNEL_INFO_TX)))
+		return -EINVAL;
+
+	return dev->netdev_ops->ndo_fill_metadata_dst(dev, skb);
+}
+EXPORT_SYMBOL_GPL(dev_fill_metadata_dst);
+
 /**
  *	__dev_get_by_name	- find a device by its name
  *	@net: the applicable net namespace
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index bd0679d90519..614521437e30 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -498,10 +498,26 @@ static struct sk_buff *gre_handle_offloads(struct sk_buff *skb,
 					csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
 }
 
+static struct rtable *gre_get_rt(struct sk_buff *skb,
+				 struct net_device *dev,
+				 struct flowi4 *fl,
+				 const struct ip_tunnel_key *key)
+{
+	struct net *net = dev_net(dev);
+
+	memset(fl, 0, sizeof(*fl));
+	fl->daddr = key->u.ipv4.dst;
+	fl->saddr = key->u.ipv4.src;
+	fl->flowi4_tos = RT_TOS(key->tos);
+	fl->flowi4_mark = skb->mark;
+	fl->flowi4_proto = IPPROTO_GRE;
+
+	return ip_route_output_key(net, fl);
+}
+
 static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip_tunnel_info *tun_info;
-	struct net *net = dev_net(dev);
 	const struct ip_tunnel_key *key;
 	struct flowi4 fl;
 	struct rtable *rt;
@@ -516,14 +532,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev)
 		goto err_free_skb;
 
 	key = &tun_info->key;
-	memset(&fl, 0, sizeof(fl));
-	fl.daddr = key->u.ipv4.dst;
-	fl.saddr = key->u.ipv4.src;
-	fl.flowi4_tos = RT_TOS(key->tos);
-	fl.flowi4_mark = skb->mark;
-	fl.flowi4_proto = IPPROTO_GRE;
-
-	rt = ip_route_output_key(net, &fl);
+	rt = gre_get_rt(skb, dev, &fl, key);
 	if (IS_ERR(rt))
 		goto err_free_skb;
 
@@ -566,6 +575,24 @@ err_free_skb:
 	dev->stats.tx_dropped++;
 }
 
+static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+{
+	struct ip_tunnel_info *info = skb_tunnel_info(skb);
+	struct rtable *rt;
+	struct flowi4 fl4;
+
+	if (ip_tunnel_info_af(info) != AF_INET)
+		return -EINVAL;
+
+	rt = gre_get_rt(skb, dev, &fl4, &info->key);
+	if (IS_ERR(rt))
+		return PTR_ERR(rt);
+
+	ip_rt_put(rt);
+	info->key.u.ipv4.src = fl4.saddr;
+	return 0;
+}
+
 static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
 			      struct net_device *dev)
 {
@@ -1023,6 +1050,7 @@ static const struct net_device_ops gre_tap_netdev_ops = {
 	.ndo_change_mtu		= ip_tunnel_change_mtu,
 	.ndo_get_stats64	= ip_tunnel_get_stats64,
 	.ndo_get_iflink		= ip_tunnel_get_iflink,
+	.ndo_fill_metadata_dst	= gre_fill_metadata_dst,
 };
 
 static void ipgre_tap_setup(struct net_device *dev)
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index c6a39bf2c3b9..0bf0f406de52 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -768,7 +768,6 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
 			    struct sw_flow_key *key, const struct nlattr *attr,
 			    const struct nlattr *actions, int actions_len)
 {
-	struct ip_tunnel_info info;
 	struct dp_upcall_info upcall;
 	const struct nlattr *a;
 	int rem;
@@ -796,11 +795,9 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
 			if (vport) {
 				int err;
 
-				upcall.egress_tun_info = &info;
-				err = ovs_vport_get_egress_tun_info(vport, skb,
-								    &upcall);
-				if (err)
-					upcall.egress_tun_info = NULL;
+				err = dev_fill_metadata_dst(vport->dev, skb);
+				if (!err)
+					upcall.egress_tun_info = skb_tunnel_info(skb);
 			}
 
 			break;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index b816ff871528..c5d08ee37730 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -490,9 +490,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
 
 	if (upcall_info->egress_tun_info) {
 		nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_EGRESS_TUN_KEY);
-		err = ovs_nla_put_egress_tunnel_key(user_skb,
-						    upcall_info->egress_tun_info,
-						    upcall_info->egress_tun_opts);
+		err = ovs_nla_put_tunnel_info(user_skb,
+					      upcall_info->egress_tun_info);
 		BUG_ON(err);
 		nla_nest_end(user_skb, nla);
 	}
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index f88038a99f44..67bdecd9fdc1 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -117,7 +117,6 @@ struct ovs_skb_cb {
  */
 struct dp_upcall_info {
 	struct ip_tunnel_info *egress_tun_info;
-	const void *egress_tun_opts;
 	const struct nlattr *userdata;
 	const struct nlattr *actions;
 	int actions_len;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index bd710bc37469..38536c137c54 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -717,7 +717,7 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
 	if ((output->tun_flags & TUNNEL_OAM) &&
 	    nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
 		return -EMSGSIZE;
-	if (tun_opts) {
+	if (swkey_tun_opts_len) {
 		if (output->tun_flags & TUNNEL_GENEVE_OPT &&
 		    nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
 			    swkey_tun_opts_len, tun_opts))
@@ -749,13 +749,12 @@ static int ipv4_tun_to_nlattr(struct sk_buff *skb,
 	return 0;
 }
 
-int ovs_nla_put_egress_tunnel_key(struct sk_buff *skb,
-				  const struct ip_tunnel_info *egress_tun_info,
-				  const void *egress_tun_opts)
+int ovs_nla_put_tunnel_info(struct sk_buff *skb,
+			    struct ip_tunnel_info *tun_info)
 {
-	return __ipv4_tun_to_nlattr(skb, &egress_tun_info->key,
-				    egress_tun_opts,
-				    egress_tun_info->options_len);
+	return __ipv4_tun_to_nlattr(skb, &tun_info->key,
+				    ip_tunnel_info_opts(tun_info),
+				    tun_info->options_len);
 }
 
 static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match,
@@ -2383,10 +2382,7 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
 		if (!start)
 			return -EMSGSIZE;
 
-		err = ipv4_tun_to_nlattr(skb, &tun_info->key,
-					 tun_info->options_len ?
-					     ip_tunnel_info_opts(tun_info) : NULL,
-					 tun_info->options_len);
+		err = ovs_nla_put_tunnel_info(skb, tun_info);
 		if (err)
 			return err;
 		nla_nest_end(skb, start);
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index 6ca3f0baf449..47dd142eca1c 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -55,9 +55,9 @@ int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb);
 int ovs_nla_get_match(struct net *, struct sw_flow_match *,
 		      const struct nlattr *key, const struct nlattr *mask,
 		      bool log);
-int ovs_nla_put_egress_tunnel_key(struct sk_buff *,
-				  const struct ip_tunnel_info *,
-				  const void *egress_tun_opts);
+
+int ovs_nla_put_tunnel_info(struct sk_buff *skb,
+			    struct ip_tunnel_info *tun_info);
 
 bool ovs_nla_get_ufid(struct sw_flow_id *, const struct nlattr *, bool log);
 int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 2735e9c4a3b8..5f8aaaaa0785 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -52,18 +52,6 @@ static int geneve_get_options(const struct vport *vport,
 	return 0;
 }
 
-static int geneve_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
-				      struct dp_upcall_info *upcall)
-{
-	struct geneve_port *geneve_port = geneve_vport(vport);
-	struct net *net = ovs_dp_get_net(vport->dp);
-	__be16 dport = htons(geneve_port->port_no);
-	__be16 sport = udp_flow_src_port(net, skb, 1, USHRT_MAX, true);
-
-	return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp),
-					  skb, IPPROTO_UDP, sport, dport);
-}
-
 static struct vport *geneve_tnl_create(const struct vport_parms *parms)
 {
 	struct net *net = ovs_dp_get_net(parms->dp);
@@ -130,7 +118,6 @@ static struct vport_ops ovs_geneve_vport_ops = {
 	.get_options	= geneve_get_options,
 	.send		= ovs_netdev_send,
 	.owner          = THIS_MODULE,
-	.get_egress_tun_info	= geneve_get_egress_tun_info,
 };
 
 static int __init ovs_geneve_tnl_init(void)
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index 4d24481669c9..64225bf5eb40 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -84,18 +84,10 @@ static struct vport *gre_create(const struct vport_parms *parms)
 	return ovs_netdev_link(vport, parms->name);
 }
 
-static int gre_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
-				   struct dp_upcall_info *upcall)
-{
-	return ovs_tunnel_get_egress_info(upcall, ovs_dp_get_net(vport->dp),
-					  skb, IPPROTO_GRE, 0, 0);
-}
-
 static struct vport_ops ovs_gre_vport_ops = {
 	.type		= OVS_VPORT_TYPE_GRE,
 	.create		= gre_create,
 	.send		= ovs_netdev_send,
-	.get_egress_tun_info	= gre_get_egress_tun_info,
 	.destroy	= ovs_netdev_tunnel_destroy,
 	.owner		= THIS_MODULE,
 };
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index c11413d5075f..e1c9c0888037 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -146,31 +146,12 @@ static struct vport *vxlan_create(const struct vport_parms *parms)
 	return ovs_netdev_link(vport, parms->name);
 }
 
-static int vxlan_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
-				     struct dp_upcall_info *upcall)
-{
-	struct vxlan_dev *vxlan = netdev_priv(vport->dev);
-	struct net *net = ovs_dp_get_net(vport->dp);
-	__be16 dst_port = vxlan_dev_dst_port(vxlan);
-	__be16 src_port;
-	int port_min;
-	int port_max;
-
-	inet_get_local_port_range(net, &port_min, &port_max);
-	src_port = udp_flow_src_port(net, skb, 0, 0, true);
-
-	return ovs_tunnel_get_egress_info(upcall, net,
-					  skb, IPPROTO_UDP,
-					  src_port, dst_port);
-}
-
 static struct vport_ops ovs_vxlan_netdev_vport_ops = {
 	.type			= OVS_VPORT_TYPE_VXLAN,
 	.create			= vxlan_create,
 	.destroy		= ovs_netdev_tunnel_destroy,
 	.get_options		= vxlan_get_options,
 	.send			= ovs_netdev_send,
-	.get_egress_tun_info	= vxlan_get_egress_tun_info,
 };
 
 static int __init ovs_vxlan_tnl_init(void)
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 12a36ac21eda..320c765ce44a 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -479,61 +479,3 @@ void ovs_vport_deferred_free(struct vport *vport)
 	call_rcu(&vport->rcu, free_vport_rcu);
 }
 EXPORT_SYMBOL_GPL(ovs_vport_deferred_free);
-
-int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall,
-			       struct net *net,
-			       struct sk_buff *skb,
-			       u8 ipproto,
-			       __be16 tp_src,
-			       __be16 tp_dst)
-{
-	struct ip_tunnel_info *egress_tun_info = upcall->egress_tun_info;
-	const struct ip_tunnel_info *tun_info = skb_tunnel_info(skb);
-	const struct ip_tunnel_key *tun_key;
-	u32 skb_mark = skb->mark;
-	struct rtable *rt;
-	struct flowi4 fl;
-
-	if (unlikely(!tun_info))
-		return -EINVAL;
-	if (ip_tunnel_info_af(tun_info) != AF_INET)
-		return -EINVAL;
-
-	tun_key = &tun_info->key;
-
-	/* Route lookup to get srouce IP address.
-	 * The process may need to be changed if the corresponding process
-	 * in vports ops changed.
-	 */
-	rt = ovs_tunnel_route_lookup(net, tun_key, skb_mark, &fl, ipproto);
-	if (IS_ERR(rt))
-		return PTR_ERR(rt);
-
-	ip_rt_put(rt);
-
-	/* Generate egress_tun_info based on tun_info,
-	 * saddr, tp_src and tp_dst
-	 */
-	ip_tunnel_key_init(&egress_tun_info->key,
-			   fl.saddr, tun_key->u.ipv4.dst,
-			   tun_key->tos,
-			   tun_key->ttl,
-			   tp_src, tp_dst,
-			   tun_key->tun_id,
-			   tun_key->tun_flags);
-	egress_tun_info->options_len = tun_info->options_len;
-	egress_tun_info->mode = tun_info->mode;
-	upcall->egress_tun_opts = ip_tunnel_info_opts(egress_tun_info);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ovs_tunnel_get_egress_info);
-
-int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
-				  struct dp_upcall_info *upcall)
-{
-	/* get_egress_tun_info() is only implemented on tunnel ports. */
-	if (unlikely(!vport->ops->get_egress_tun_info))
-		return -EINVAL;
-
-	return vport->ops->get_egress_tun_info(vport, skb, upcall);
-}
diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h
index a413f3ae6a7b..d341ad6f3afe 100644
--- a/net/openvswitch/vport.h
+++ b/net/openvswitch/vport.h
@@ -27,7 +27,6 @@
 #include <linux/skbuff.h>
 #include <linux/spinlock.h>
 #include <linux/u64_stats_sync.h>
-#include <net/route.h>
 
 #include "datapath.h"
 
@@ -53,16 +52,6 @@ int ovs_vport_set_upcall_portids(struct vport *, const struct nlattr *pids);
 int ovs_vport_get_upcall_portids(const struct vport *, struct sk_buff *);
 u32 ovs_vport_find_upcall_portid(const struct vport *, struct sk_buff *);
 
-int ovs_tunnel_get_egress_info(struct dp_upcall_info *upcall,
-			       struct net *net,
-			       struct sk_buff *,
-			       u8 ipproto,
-			       __be16 tp_src,
-			       __be16 tp_dst);
-
-int ovs_vport_get_egress_tun_info(struct vport *vport, struct sk_buff *skb,
-				  struct dp_upcall_info *upcall);
-
 /**
  * struct vport_portids - array of netlink portids of a vport.
  *                        must be protected by rcu.
@@ -140,8 +129,6 @@ struct vport_parms {
  * have any configuration.
  * @send: Send a packet on the device.
  * zero for dropped packets or negative for error.
- * @get_egress_tun_info: Get the egress tunnel 5-tuple and other info for
- * a packet.
  */
 struct vport_ops {
 	enum ovs_vport_type type;
@@ -154,9 +141,6 @@ struct vport_ops {
 	int (*get_options)(const struct vport *, struct sk_buff *);
 
 	void (*send)(struct vport *, struct sk_buff *);
-	int (*get_egress_tun_info)(struct vport *, struct sk_buff *,
-				   struct dp_upcall_info *upcall);
-
 	struct module *owner;
 	struct list_head list;
 };
@@ -215,25 +199,6 @@ static inline const char *ovs_vport_name(struct vport *vport)
 int ovs_vport_ops_register(struct vport_ops *ops);
 void ovs_vport_ops_unregister(struct vport_ops *ops);
 
-static inline struct rtable *ovs_tunnel_route_lookup(struct net *net,
-						     const struct ip_tunnel_key *key,
-						     u32 mark,
-						     struct flowi4 *fl,
-						     u8 protocol)
-{
-	struct rtable *rt;
-
-	memset(fl, 0, sizeof(*fl));
-	fl->daddr = key->u.ipv4.dst;
-	fl->saddr = key->u.ipv4.src;
-	fl->flowi4_tos = RT_TOS(key->tos);
-	fl->flowi4_mark = mark;
-	fl->flowi4_proto = protocol;
-
-	rt = ip_route_output_key(net, fl);
-	return rt;
-}
-
 static inline void ovs_vport_send(struct vport *vport, struct sk_buff *skb)
 {
 	vport->ops->send(vport, skb);
-- 
cgit v1.2.3


From dd461d6aa894761fe67c30ddf81eec0d08be216b Mon Sep 17 00:00:00 2001
From: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Date: Fri, 28 Aug 2015 06:57:55 +0000
Subject: if_link: Add control trust VF

Add netlink directives and ndo entry to trust VF user.

This controls the special permission of VF user.
The administrator will dedicatedly trust VF user to use some features
which impacts security and/or performance.

The administrator never turn it on unless VF user is fully trusted.

CC: Sy Jong Choi <sy.jong.choi@intel.com>
Signed-off-by: Hiroshi Shimamoto <h-shimamoto@ct.jp.nec.com>
Acked-by: Greg Rose <gregory.v.rose@intel.com>
Tested-by: Krishneil Singh <Krishneil.k.singh@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 include/linux/if_link.h      |  1 +
 include/linux/netdevice.h    |  3 +++
 include/uapi/linux/if_link.h |  6 ++++++
 net/core/rtnetlink.c         | 24 +++++++++++++++++++++---
 4 files changed, 31 insertions(+), 3 deletions(-)

(limited to 'include/linux/netdevice.h')

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index ae5d0d22955d..f923d15b432c 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -24,5 +24,6 @@ struct ifla_vf_info {
 	__u32 min_tx_rate;
 	__u32 max_tx_rate;
 	__u32 rss_query_en;
+	__u32 trusted;
 };
 #endif /* _LINUX_IF_LINK_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 69fdd427c8cb..773383859bd9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -881,6 +881,7 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  * int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate,
  *			  int max_tx_rate);
  * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting);
+ * int (*ndo_set_vf_trust)(struct net_device *dev, int vf, bool setting);
  * int (*ndo_get_vf_config)(struct net_device *dev,
  *			    int vf, struct ifla_vf_info *ivf);
  * int (*ndo_set_vf_link_state)(struct net_device *dev, int vf, int link_state);
@@ -1109,6 +1110,8 @@ struct net_device_ops {
 						   int max_tx_rate);
 	int			(*ndo_set_vf_spoofchk)(struct net_device *dev,
 						       int vf, bool setting);
+	int			(*ndo_set_vf_trust)(struct net_device *dev,
+						    int vf, bool setting);
 	int			(*ndo_get_vf_config)(struct net_device *dev,
 						     int vf,
 						     struct ifla_vf_info *ivf);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index e3b6217f34f1..a7aea8418abb 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -550,6 +550,7 @@ enum {
 				 * on/off switch
 				 */
 	IFLA_VF_STATS,		/* network device statistics */
+	IFLA_VF_TRUST,		/* Trust VF */
 	__IFLA_VF_MAX,
 };
 
@@ -611,6 +612,11 @@ enum {
 
 #define IFLA_VF_STATS_MAX (__IFLA_VF_STATS_MAX - 1)
 
+struct ifla_vf_trust {
+	__u32 vf;
+	__u32 setting;
+};
+
 /* VF ports management section
  *
  *	Nested layout of set/get msg is:
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 7c78b5aca944..504bd17b7456 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -838,7 +838,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
 			 /* IFLA_VF_STATS_BROADCAST */
 			 nla_total_size(sizeof(__u64)) +
 			 /* IFLA_VF_STATS_MULTICAST */
-			 nla_total_size(sizeof(__u64)));
+			 nla_total_size(sizeof(__u64)) +
+			 nla_total_size(sizeof(struct ifla_vf_trust)));
 		return size;
 	} else
 		return 0;
@@ -1161,6 +1162,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			struct ifla_vf_link_state vf_linkstate;
 			struct ifla_vf_rss_query_en vf_rss_query_en;
 			struct ifla_vf_stats vf_stats;
+			struct ifla_vf_trust vf_trust;
 
 			/*
 			 * Not all SR-IOV capable drivers support the
@@ -1170,6 +1172,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			 */
 			ivi.spoofchk = -1;
 			ivi.rss_query_en = -1;
+			ivi.trusted = -1;
 			memset(ivi.mac, 0, sizeof(ivi.mac));
 			/* The default value for VF link state is "auto"
 			 * IFLA_VF_LINK_STATE_AUTO which equals zero
@@ -1183,7 +1186,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 				vf_tx_rate.vf =
 				vf_spoofchk.vf =
 				vf_linkstate.vf =
-				vf_rss_query_en.vf = ivi.vf;
+				vf_rss_query_en.vf =
+				vf_trust.vf = ivi.vf;
 
 			memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
 			vf_vlan.vlan = ivi.vlan;
@@ -1194,6 +1198,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			vf_spoofchk.setting = ivi.spoofchk;
 			vf_linkstate.link_state = ivi.linkstate;
 			vf_rss_query_en.setting = ivi.rss_query_en;
+			vf_trust.setting = ivi.trusted;
 			vf = nla_nest_start(skb, IFLA_VF_INFO);
 			if (!vf) {
 				nla_nest_cancel(skb, vfinfo);
@@ -1211,7 +1216,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 				    &vf_linkstate) ||
 			    nla_put(skb, IFLA_VF_RSS_QUERY_EN,
 				    sizeof(vf_rss_query_en),
-				    &vf_rss_query_en))
+				    &vf_rss_query_en) ||
+			    nla_put(skb, IFLA_VF_TRUST,
+				    sizeof(vf_trust), &vf_trust))
 				goto nla_put_failure;
 			memset(&vf_stats, 0, sizeof(vf_stats));
 			if (dev->netdev_ops->ndo_get_vf_stats)
@@ -1348,6 +1355,7 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
 	[IFLA_VF_LINK_STATE]	= { .len = sizeof(struct ifla_vf_link_state) },
 	[IFLA_VF_RSS_QUERY_EN]	= { .len = sizeof(struct ifla_vf_rss_query_en) },
 	[IFLA_VF_STATS]		= { .type = NLA_NESTED },
+	[IFLA_VF_TRUST]		= { .len = sizeof(struct ifla_vf_trust) },
 };
 
 static const struct nla_policy ifla_vf_stats_policy[IFLA_VF_STATS_MAX + 1] = {
@@ -1587,6 +1595,16 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
 			return err;
 	}
 
+	if (tb[IFLA_VF_TRUST]) {
+		struct ifla_vf_trust *ivt = nla_data(tb[IFLA_VF_TRUST]);
+
+		err = -EOPNOTSUPP;
+		if (ops->ndo_set_vf_trust)
+			err = ops->ndo_set_vf_trust(dev, ivt->vf, ivt->setting);
+		if (err < 0)
+			return err;
+	}
+
 	return err;
 }
 
-- 
cgit v1.2.3


From c59f419bdd1f056e1ceacbd29f7be7bcff746a5d Mon Sep 17 00:00:00 2001
From: Helge Deller <deller@gmx.de>
Date: Sun, 25 Oct 2015 10:00:32 +0100
Subject: net/xps: Fix calculation of initial number of xps queues

The existing code breaks on architectures where the L1 cache size
(L1_CACHE_BYTES) is smaller or equal the size of struct xps_map.

The new code ensures that we get at minimum one initial xps queue, or even more
as long as it fits into the next multiple of L1_CACHE_SIZE.

Signed-off-by: Helge Deller <deller@gmx.de>
Acked-by: Alexander Duyck <aduyck@mirantis.com>
---
 include/linux/netdevice.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux/netdevice.h')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2d15e3831440..2212c8225deb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -718,8 +718,8 @@ struct xps_map {
 	u16 queues[0];
 };
 #define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + ((_num) * sizeof(u16)))
-#define XPS_MIN_MAP_ALLOC ((L1_CACHE_BYTES - sizeof(struct xps_map))	\
-    / sizeof(u16))
+#define XPS_MIN_MAP_ALLOC ((L1_CACHE_ALIGN(offsetof(struct xps_map, queues[1])) \
+       - sizeof(struct xps_map)) / sizeof(u16))
 
 /*
  * This structure holds all XPS maps for device.  Maps are indexed by CPU.
-- 
cgit v1.2.3


From 8f25348b65cd073f77945f559ab1e5de83422cd1 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Wed, 4 Nov 2015 14:59:06 +0100
Subject: net: add forgotten IFF_L3MDEV_SLAVE define

Fixes: fee6d4c77 ("net: Add netif_is_l3_slave")
Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: David Ahern <dsa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux/netdevice.h')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4ac653b7b8ac..2c00772bd136 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1322,6 +1322,7 @@ enum netdev_priv_flags {
 #define IFF_L3MDEV_MASTER		IFF_L3MDEV_MASTER
 #define IFF_NO_QUEUE			IFF_NO_QUEUE
 #define IFF_OPENVSWITCH			IFF_OPENVSWITCH
+#define IFF_L3MDEV_SLAVE		IFF_L3MDEV_SLAVE
 
 /**
  *	struct net_device - The DEVICE structure.
-- 
cgit v1.2.3


From aabc92bbe3cfe4c545f8ccdaaeeea012a46f0abf Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 10 Nov 2015 14:31:18 +0100
Subject: net: add __netdev_alloc_pcpu_stats() to indicate gfp flags

nf_tables may create percpu counters from the packet path through its
dynamic set instantiation infrastructure, so we need a way to allocate
this through GFP_ATOMIC.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

(limited to 'include/linux/netdevice.h')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2c00772bd136..e9d0c8a75380 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2068,20 +2068,23 @@ struct pcpu_sw_netstats {
 	struct u64_stats_sync   syncp;
 };
 
-#define netdev_alloc_pcpu_stats(type)				\
-({								\
-	typeof(type) __percpu *pcpu_stats = alloc_percpu(type); \
-	if (pcpu_stats)	{					\
-		int __cpu;					\
-		for_each_possible_cpu(__cpu) {			\
-			typeof(type) *stat;			\
-			stat = per_cpu_ptr(pcpu_stats, __cpu);	\
-			u64_stats_init(&stat->syncp);		\
-		}						\
-	}							\
-	pcpu_stats;						\
+#define __netdev_alloc_pcpu_stats(type, gfp)				\
+({									\
+	typeof(type) __percpu *pcpu_stats = alloc_percpu_gfp(type, gfp);\
+	if (pcpu_stats)	{						\
+		int __cpu;						\
+		for_each_possible_cpu(__cpu) {				\
+			typeof(type) *stat;				\
+			stat = per_cpu_ptr(pcpu_stats, __cpu);		\
+			u64_stats_init(&stat->syncp);			\
+		}							\
+	}								\
+	pcpu_stats;							\
 })
 
+#define netdev_alloc_pcpu_stats(type)					\
+	__netdev_alloc_pcpu_stats(type, GFP_KERNEL);
+
 #include <linux/notifier.h>
 
 /* netdevice notifier chain. Please remember to update the rtnetlink
-- 
cgit v1.2.3


From 28f9ee22bcdd84726dbf6267d0b58f254166b900 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Mon, 16 Nov 2015 15:43:45 -0500
Subject: vlan: Do not put vlan headers back on bridge and macvlan ports

When a vlan is configured with REORDER_HEADER set to 0, the vlan
header is put back into the packet and makes it appear that
the vlan header is still there even after it's been processed.
This posses a problem for bridge and macvlan ports.  The packets
passed to those device may be forwarded and at the time of the
forward, vlan headers end up being unexpectedly present.

With the patch, we make sure that we do not put the vlan header
back (when REORDER_HEADER is 0) if a bridge or macvlan has
been configured on top of the vlan device.

Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 5 +++++
 net/8021q/vlan_core.c     | 4 +++-
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux/netdevice.h')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index cc221b967687..67bfac1abfc1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3857,6 +3857,11 @@ static inline bool netif_is_bridge_master(const struct net_device *dev)
 	return dev->priv_flags & IFF_EBRIDGE;
 }
 
+static inline bool netif_is_bridge_port(const struct net_device *dev)
+{
+	return dev->priv_flags & IFF_BRIDGE_PORT;
+}
+
 static inline bool netif_is_ovs_master(const struct net_device *dev)
 {
 	return dev->priv_flags & IFF_OPENVSWITCH;
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 496b27588493..e2ed69850489 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -30,7 +30,9 @@ bool vlan_do_receive(struct sk_buff **skbp)
 			skb->pkt_type = PACKET_HOST;
 	}
 
-	if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR)) {
+	if (!(vlan_dev_priv(vlan_dev)->flags & VLAN_FLAG_REORDER_HDR) &&
+	    !netif_is_macvlan_port(vlan_dev) &&
+	    !netif_is_bridge_port(vlan_dev)) {
 		unsigned int offset = skb->data - skb_mac_header(skb);
 
 		/*
-- 
cgit v1.2.3