From 6c5c9581044dd6e0cd284ab653502fb9264f08b6 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Tue, 28 Aug 2018 14:44:28 +0200 Subject: net: add napi_if_scheduled_mark_missed The function napi_if_scheduled_mark_missed is used to check if the NAPI context is scheduled, if so set NAPIF_STATE_MISSED and return true. Used by the AF_XDP zero-copy i40e Tx code implementation in order to make sure that irq affinity is honored by the napi context. Signed-off-by: Magnus Karlsson Signed-off-by: Alexei Starovoitov --- include/linux/netdevice.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ca5ab98053c8..4271f6b4e419 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -535,6 +535,32 @@ static inline void napi_synchronize(const struct napi_struct *n) barrier(); } +/** + * napi_if_scheduled_mark_missed - if napi is running, set the + * NAPIF_STATE_MISSED + * @n: NAPI context + * + * If napi is running, set the NAPIF_STATE_MISSED, and return true if + * NAPI is scheduled. + **/ +static inline bool napi_if_scheduled_mark_missed(struct napi_struct *n) +{ + unsigned long val, new; + + do { + val = READ_ONCE(n->state); + if (val & NAPIF_STATE_DISABLE) + return true; + + if (!(val & NAPIF_STATE_SCHED)) + return false; + + new = val | NAPIF_STATE_MISSED; + } while (cmpxchg(&n->state, val, new) != val); + + return true; +} + enum netdev_queue_state_t { __QUEUE_STATE_DRV_XOFF, __QUEUE_STATE_STACK_XOFF, -- cgit v1.2.3 From fa788d986a3aac5069378ed04697bd06f83d3488 Mon Sep 17 00:00:00 2001 From: Vincent Whitchurch Date: Mon, 3 Sep 2018 16:23:36 +0200 Subject: packet: add sockopt to ignore outgoing packets Currently, the only way to ignore outgoing packets on a packet socket is via the BPF filter. With MSG_ZEROCOPY, packets that are looped into AF_PACKET are copied in dev_queue_xmit_nit(), and this copy happens even if the filter run from packet_rcv() would reject them. So the presence of a packet socket on the interface takes away the benefits of MSG_ZEROCOPY, even if the packet socket is not interested in outgoing packets. (Even when MSG_ZEROCOPY is not used, the skb is unnecessarily cloned, but the cost for that is much lower.) Add a socket option to allow AF_PACKET sockets to ignore outgoing packets to solve this. Note that the *BSDs already have something similar: BIOCSSEESENT/BIOCSDIRECTION and BIOCSDIRFILT. The first intended user is lldpd. Signed-off-by: Vincent Whitchurch Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + include/uapi/linux/if_packet.h | 1 + net/core/dev.c | 3 +++ net/packet/af_packet.c | 17 +++++++++++++++++ 4 files changed, 22 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4271f6b4e419..e2b3bd750c98 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2343,6 +2343,7 @@ static inline struct sk_buff *call_gro_receive_sk(gro_receive_sk_t cb, struct packet_type { __be16 type; /* This is really htons(ether_type). */ + bool ignore_outgoing; struct net_device *dev; /* NULL is wildcarded here */ int (*func) (struct sk_buff *, struct net_device *, diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h index 67b61d91d89b..467b654bd4c7 100644 --- a/include/uapi/linux/if_packet.h +++ b/include/uapi/linux/if_packet.h @@ -57,6 +57,7 @@ struct sockaddr_ll { #define PACKET_QDISC_BYPASS 20 #define PACKET_ROLLOVER_STATS 21 #define PACKET_FANOUT_DATA 22 +#define PACKET_IGNORE_OUTGOING 23 #define PACKET_FANOUT_HASH 0 #define PACKET_FANOUT_LB 1 diff --git a/net/core/dev.c b/net/core/dev.c index 82114e1111e6..ca78dc5a79a3 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1969,6 +1969,9 @@ void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) rcu_read_lock(); again: list_for_each_entry_rcu(ptype, ptype_list, list) { + if (ptype->ignore_outgoing) + continue; + /* Never send packets back to the socket * they originated from - MvS (miquels@drinkel.ow.org) */ diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 75c92a87e7b2..f85f67b5c1f4 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3805,6 +3805,20 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv return fanout_set_data(po, optval, optlen); } + case PACKET_IGNORE_OUTGOING: + { + int val; + + if (optlen != sizeof(val)) + return -EINVAL; + if (copy_from_user(&val, optval, sizeof(val))) + return -EFAULT; + if (val < 0 || val > 1) + return -EINVAL; + + po->prot_hook.ignore_outgoing = !!val; + return 0; + } case PACKET_TX_HAS_OFF: { unsigned int val; @@ -3928,6 +3942,9 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, ((u32)po->fanout->flags << 24)) : 0); break; + case PACKET_IGNORE_OUTGOING: + val = po->prot_hook.ignore_outgoing; + break; case PACKET_ROLLOVER_STATS: if (!po->rollover) return -EINVAL; -- cgit v1.2.3 From 52bb6677d530d37055092d86b4eab69dce6c166a Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Fri, 14 Sep 2018 16:00:51 +0800 Subject: net: move definition of pcpu_lstats to header file pcpu_lstats is defined in several files, so unify them as one and move to header file Signed-off-by: Zhang Yu Signed-off-by: Li RongQing Signed-off-by: David S. Miller --- drivers/net/loopback.c | 6 ------ drivers/net/nlmon.c | 6 ------ drivers/net/vsockmon.c | 14 ++++---------- include/linux/netdevice.h | 6 ++++++ 4 files changed, 10 insertions(+), 22 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index 30612497643c..a7207fa7e451 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -59,12 +59,6 @@ #include #include -struct pcpu_lstats { - u64 packets; - u64 bytes; - struct u64_stats_sync syncp; -}; - /* The higher levels take care of making this non-reentrant (it's * called with bh's disabled). */ diff --git a/drivers/net/nlmon.c b/drivers/net/nlmon.c index 4b22955de191..dd0db7534cb3 100644 --- a/drivers/net/nlmon.c +++ b/drivers/net/nlmon.c @@ -6,12 +6,6 @@ #include #include -struct pcpu_lstats { - u64 packets; - u64 bytes; - struct u64_stats_sync syncp; -}; - static netdev_tx_t nlmon_xmit(struct sk_buff *skb, struct net_device *dev) { int len = skb->len; diff --git a/drivers/net/vsockmon.c b/drivers/net/vsockmon.c index c28bdce14fd5..7bad5c95551f 100644 --- a/drivers/net/vsockmon.c +++ b/drivers/net/vsockmon.c @@ -11,12 +11,6 @@ #define DEFAULT_MTU (VIRTIO_VSOCK_MAX_PKT_BUF_SIZE + \ sizeof(struct af_vsockmon_hdr)) -struct pcpu_lstats { - u64 rx_packets; - u64 rx_bytes; - struct u64_stats_sync syncp; -}; - static int vsockmon_dev_init(struct net_device *dev) { dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats); @@ -56,8 +50,8 @@ static netdev_tx_t vsockmon_xmit(struct sk_buff *skb, struct net_device *dev) struct pcpu_lstats *stats = this_cpu_ptr(dev->lstats); u64_stats_update_begin(&stats->syncp); - stats->rx_bytes += len; - stats->rx_packets++; + stats->bytes += len; + stats->packets++; u64_stats_update_end(&stats->syncp); dev_kfree_skb(skb); @@ -80,8 +74,8 @@ vsockmon_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) do { start = u64_stats_fetch_begin_irq(&vstats->syncp); - tbytes = vstats->rx_bytes; - tpackets = vstats->rx_packets; + tbytes = vstats->bytes; + tpackets = vstats->packets; } while (u64_stats_fetch_retry_irq(&vstats->syncp, start)); packets += tpackets; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e2b3bd750c98..baed5d5088c5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2382,6 +2382,12 @@ struct pcpu_sw_netstats { struct u64_stats_sync syncp; }; +struct pcpu_lstats { + u64 packets; + u64 bytes; + struct u64_stats_sync syncp; +}; + #define __netdev_alloc_pcpu_stats(type, gfp) \ ({ \ typeof(type) __percpu *pcpu_stats = alloc_percpu_gfp(type, gfp);\ -- cgit v1.2.3 From 14d73416792afa84f6a7245ee474d2432069da56 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Mon, 17 Sep 2018 18:46:55 +0800 Subject: veth: rename pcpu_vstats as pcpu_lstats struct pcpu_vstats and pcpu_lstats have same members and usage, and pcpu_lstats is used in many files, so rename pcpu_vstats as pcpu_lstats to reduce duplicate definition Signed-off-by: Zhang Yu Signed-off-by: Li RongQing Signed-off-by: David S. Miller --- drivers/net/veth.c | 22 ++++++++-------------- include/linux/netdevice.h | 1 - 2 files changed, 8 insertions(+), 15 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 8fc64b67f01e..224c56a4e2b1 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -37,12 +37,6 @@ #define VETH_XDP_TX BIT(0) #define VETH_XDP_REDIR BIT(1) -struct pcpu_vstats { - u64 packets; - u64 bytes; - struct u64_stats_sync syncp; -}; - struct veth_rq { struct napi_struct xdp_napi; struct net_device *dev; @@ -217,7 +211,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev) skb_tx_timestamp(skb); if (likely(veth_forward_skb(rcv, skb, rq, rcv_xdp) == NET_RX_SUCCESS)) { - struct pcpu_vstats *stats = this_cpu_ptr(dev->vstats); + struct pcpu_lstats *stats = this_cpu_ptr(dev->lstats); u64_stats_update_begin(&stats->syncp); stats->bytes += length; @@ -236,7 +230,7 @@ drop: return NETDEV_TX_OK; } -static u64 veth_stats_one(struct pcpu_vstats *result, struct net_device *dev) +static u64 veth_stats_one(struct pcpu_lstats *result, struct net_device *dev) { struct veth_priv *priv = netdev_priv(dev); int cpu; @@ -244,7 +238,7 @@ static u64 veth_stats_one(struct pcpu_vstats *result, struct net_device *dev) result->packets = 0; result->bytes = 0; for_each_possible_cpu(cpu) { - struct pcpu_vstats *stats = per_cpu_ptr(dev->vstats, cpu); + struct pcpu_lstats *stats = per_cpu_ptr(dev->lstats, cpu); u64 packets, bytes; unsigned int start; @@ -264,7 +258,7 @@ static void veth_get_stats64(struct net_device *dev, { struct veth_priv *priv = netdev_priv(dev); struct net_device *peer; - struct pcpu_vstats one; + struct pcpu_lstats one; tot->tx_dropped = veth_stats_one(&one, dev); tot->tx_bytes = one.bytes; @@ -830,13 +824,13 @@ static int veth_dev_init(struct net_device *dev) { int err; - dev->vstats = netdev_alloc_pcpu_stats(struct pcpu_vstats); - if (!dev->vstats) + dev->lstats = netdev_alloc_pcpu_stats(struct pcpu_lstats); + if (!dev->lstats) return -ENOMEM; err = veth_alloc_queues(dev); if (err) { - free_percpu(dev->vstats); + free_percpu(dev->lstats); return err; } @@ -846,7 +840,7 @@ static int veth_dev_init(struct net_device *dev) static void veth_dev_free(struct net_device *dev) { veth_free_queues(dev); - free_percpu(dev->vstats); + free_percpu(dev->lstats); } #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index baed5d5088c5..1cbbf77a685f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2000,7 +2000,6 @@ struct net_device { struct pcpu_lstats __percpu *lstats; struct pcpu_sw_netstats __percpu *tstats; struct pcpu_dstats __percpu *dstats; - struct pcpu_vstats __percpu *vstats; }; #if IS_ENABLED(CONFIG_GARP) -- cgit v1.2.3 From 6194114324139dc16f3251c67ed853bd6d4ae056 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Mon, 24 Sep 2018 21:58:59 +0200 Subject: net: core: add member wol_enabled to struct net_device Add flag wol_enabled to struct net_device indicating whether Wake-on-LAN is enabled. As first user phy_suspend() will use it to decide whether PHY can be suspended or not. Fixes: f1e911d5d0df ("r8169: add basic phylib support") Fixes: e8cfd9d6c772 ("net: phy: call state machine synchronously in phy_stop") Signed-off-by: Heiner Kallweit Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ net/core/ethtool.c | 9 ++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ca5ab98053c8..c7861e4b402c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1730,6 +1730,8 @@ enum netdev_priv_flags { * switch driver and used to set the phys state of the * switch port. * + * @wol_enabled: Wake-on-LAN is enabled + * * FIXME: cleanup struct net_device such that network protocol info * moves out. */ @@ -2014,6 +2016,7 @@ struct net_device { struct lock_class_key *qdisc_tx_busylock; struct lock_class_key *qdisc_running_key; bool proto_down; + unsigned wol_enabled:1; }; #define to_net_dev(d) container_of(d, struct net_device, dev) diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 234a0ec2e932..0762aaf8e964 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -1483,6 +1483,7 @@ static int ethtool_get_wol(struct net_device *dev, char __user *useraddr) static int ethtool_set_wol(struct net_device *dev, char __user *useraddr) { struct ethtool_wolinfo wol; + int ret; if (!dev->ethtool_ops->set_wol) return -EOPNOTSUPP; @@ -1490,7 +1491,13 @@ static int ethtool_set_wol(struct net_device *dev, char __user *useraddr) if (copy_from_user(&wol, useraddr, sizeof(wol))) return -EFAULT; - return dev->ethtool_ops->set_wol(dev, &wol); + ret = dev->ethtool_ops->set_wol(dev, &wol); + if (ret) + return ret; + + dev->wol_enabled = !!wol.wolopts; + + return 0; } static int ethtool_get_eee(struct net_device *dev, char __user *useraddr) -- cgit v1.2.3 From 661b8d1b0e3a745e25f05adef2ebd00d830eeea7 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Mon, 1 Oct 2018 14:51:33 +0200 Subject: net: add umem reference in netdev{_rx}_queue These references to the umem will be used to store information on what kind of AF_XDP umem that is bound to a queue id, if any. Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann --- include/linux/netdevice.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 1cbbf77a685f..8318f79586c2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -609,6 +609,9 @@ struct netdev_queue { /* Subordinate device that the queue has been assigned to */ struct net_device *sb_dev; +#ifdef CONFIG_XDP_SOCKETS + struct xdp_umem *umem; +#endif /* * write-mostly part */ @@ -738,6 +741,9 @@ struct netdev_rx_queue { struct kobject kobj; struct net_device *dev; struct xdp_rxq_info xdp_rxq; +#ifdef CONFIG_XDP_SOCKETS + struct xdp_umem *umem; +#endif } ____cacheline_aligned_in_smp; /* -- cgit v1.2.3 From af7d6cce53694a88d6a1bb60c9a239a6a5144459 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 9 Oct 2018 17:48:14 +0200 Subject: net: ipv4: update fnhe_pmtu when first hop's MTU changes Since commit 5aad1de5ea2c ("ipv4: use separate genid for next hop exceptions"), exceptions get deprecated separately from cached routes. In particular, administrative changes don't clear PMTU anymore. As Stefano described in commit e9fa1495d738 ("ipv6: Reflect MTU changes on PMTU of exceptions for MTU-less routes"), the PMTU discovered before the local MTU change can become stale: - if the local MTU is now lower than the PMTU, that PMTU is now incorrect - if the local MTU was the lowest value in the path, and is increased, we might discover a higher PMTU Similarly to what commit e9fa1495d738 did for IPv6, update PMTU in those cases. If the exception was locked, the discovered PMTU was smaller than the minimal accepted PMTU. In that case, if the new local MTU is smaller than the current PMTU, let PMTU discovery figure out if locking of the exception is still needed. To do this, we need to know the old link MTU in the NETDEV_CHANGEMTU notifier. By the time the notifier is called, dev->mtu has been changed. This patch adds the old MTU as additional information in the notifier structure, and a new call_netdevice_notifiers_u32() function. Fixes: 5aad1de5ea2c ("ipv4: use separate genid for next hop exceptions") Signed-off-by: Sabrina Dubroca Reviewed-by: Stefano Brivio Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 +++++++ include/net/ip_fib.h | 1 + net/core/dev.c | 28 ++++++++++++++++++++++++-- net/ipv4/fib_frontend.c | 12 ++++++++---- net/ipv4/fib_semantics.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 92 insertions(+), 6 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c7861e4b402c..d837dad24b4c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2458,6 +2458,13 @@ struct netdev_notifier_info { struct netlink_ext_ack *extack; }; +struct netdev_notifier_info_ext { + struct netdev_notifier_info info; /* must be first */ + union { + u32 mtu; + } ext; +}; + struct netdev_notifier_change_info { struct netdev_notifier_info info; /* must be first */ unsigned int flags_changed; diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 69c91d1934c1..c9b7b136939d 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -394,6 +394,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev); int fib_sync_down_dev(struct net_device *dev, unsigned long event, bool force); int fib_sync_down_addr(struct net_device *dev, __be32 local); int fib_sync_up(struct net_device *dev, unsigned int nh_flags); +void fib_sync_mtu(struct net_device *dev, u32 orig_mtu); #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, diff --git a/net/core/dev.c b/net/core/dev.c index 82114e1111e6..93243479085f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1752,6 +1752,28 @@ int call_netdevice_notifiers(unsigned long val, struct net_device *dev) } EXPORT_SYMBOL(call_netdevice_notifiers); +/** + * call_netdevice_notifiers_mtu - call all network notifier blocks + * @val: value passed unmodified to notifier function + * @dev: net_device pointer passed unmodified to notifier function + * @arg: additional u32 argument passed to the notifier function + * + * Call all network notifier blocks. Parameters and return value + * are as for raw_notifier_call_chain(). + */ +static int call_netdevice_notifiers_mtu(unsigned long val, + struct net_device *dev, u32 arg) +{ + struct netdev_notifier_info_ext info = { + .info.dev = dev, + .ext.mtu = arg, + }; + + BUILD_BUG_ON(offsetof(struct netdev_notifier_info_ext, info) != 0); + + return call_netdevice_notifiers_info(val, &info.info); +} + #ifdef CONFIG_NET_INGRESS static DEFINE_STATIC_KEY_FALSE(ingress_needed_key); @@ -7574,14 +7596,16 @@ int dev_set_mtu_ext(struct net_device *dev, int new_mtu, err = __dev_set_mtu(dev, new_mtu); if (!err) { - err = call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); + err = call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev, + orig_mtu); err = notifier_to_errno(err); if (err) { /* setting mtu back and notifying everyone again, * so that they have a chance to revert changes. */ __dev_set_mtu(dev, orig_mtu); - call_netdevice_notifiers(NETDEV_CHANGEMTU, dev); + call_netdevice_notifiers_mtu(NETDEV_CHANGEMTU, dev, + new_mtu); } } return err; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 2998b0e47d4b..0113993e9b2c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1243,7 +1243,8 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct netdev_notifier_changeupper_info *info; + struct netdev_notifier_changeupper_info *upper_info = ptr; + struct netdev_notifier_info_ext *info_ext = ptr; struct in_device *in_dev; struct net *net = dev_net(dev); unsigned int flags; @@ -1278,16 +1279,19 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo fib_sync_up(dev, RTNH_F_LINKDOWN); else fib_sync_down_dev(dev, event, false); - /* fall through */ + rt_cache_flush(net); + break; case NETDEV_CHANGEMTU: + fib_sync_mtu(dev, info_ext->ext.mtu); rt_cache_flush(net); break; case NETDEV_CHANGEUPPER: - info = ptr; + upper_info = ptr; /* flush all routes if dev is linked to or unlinked from * an L3 master device (e.g., VRF) */ - if (info->upper_dev && netif_is_l3_master(info->upper_dev)) + if (upper_info->upper_dev && + netif_is_l3_master(upper_info->upper_dev)) fib_disable_ip(dev, NETDEV_DOWN, true); break; } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index f3c89ccf14c5..446204ca7406 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1470,6 +1470,56 @@ static int call_fib_nh_notifiers(struct fib_nh *fib_nh, return NOTIFY_DONE; } +/* Update the PMTU of exceptions when: + * - the new MTU of the first hop becomes smaller than the PMTU + * - the old MTU was the same as the PMTU, and it limited discovery of + * larger MTUs on the path. With that limit raised, we can now + * discover larger MTUs + * A special case is locked exceptions, for which the PMTU is smaller + * than the minimal accepted PMTU: + * - if the new MTU is greater than the PMTU, don't make any change + * - otherwise, unlock and set PMTU + */ +static void nh_update_mtu(struct fib_nh *nh, u32 new, u32 orig) +{ + struct fnhe_hash_bucket *bucket; + int i; + + bucket = rcu_dereference_protected(nh->nh_exceptions, 1); + if (!bucket) + return; + + for (i = 0; i < FNHE_HASH_SIZE; i++) { + struct fib_nh_exception *fnhe; + + for (fnhe = rcu_dereference_protected(bucket[i].chain, 1); + fnhe; + fnhe = rcu_dereference_protected(fnhe->fnhe_next, 1)) { + if (fnhe->fnhe_mtu_locked) { + if (new <= fnhe->fnhe_pmtu) { + fnhe->fnhe_pmtu = new; + fnhe->fnhe_mtu_locked = false; + } + } else if (new < fnhe->fnhe_pmtu || + orig == fnhe->fnhe_pmtu) { + fnhe->fnhe_pmtu = new; + } + } + } +} + +void fib_sync_mtu(struct net_device *dev, u32 orig_mtu) +{ + unsigned int hash = fib_devindex_hashfn(dev->ifindex); + struct hlist_head *head = &fib_info_devhash[hash]; + struct fib_nh *nh; + + hlist_for_each_entry(nh, head, nh_hash) { + if (nh->nh_dev == dev) + nh_update_mtu(nh, dev->mtu, orig_mtu); + } +} + /* Event force Flags Description * NETDEV_CHANGE 0 LINKDOWN Carrier OFF, not for scope host * NETDEV_DOWN 0 LINKDOWN|DEAD Link down, not for scope host -- cgit v1.2.3 From 9f9a742db40f95f4dc20fc7293de4ea6ddb24e47 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Tue, 9 Oct 2018 23:57:49 +0100 Subject: FDDI: defza: Support capturing outgoing SMT traffic DEC FDDIcontroller 700 (DEFZA) uses a Tx/Rx queue pair to communicate SMT frames with adapter's firmware. Any SMT frame received from the RMC via the Rx queue is queued back by the driver to the SMT Rx queue for the firmware to process. Similarly the firmware uses the SMT Tx queue to supply the driver with SMT frames which are queued back to the Tx queue for the RMC to send to the ring. When a network tap is attached to an FDDI interface handled by `defza' any incoming SMT frames captured are queued to our usual processing of network data received, which in turn delivers them to any listening taps. However the outgoing SMT frames produced by the firmware bypass our network protocol stack and are therefore not delivered to taps. This in turn means that taps are missing a part of network traffic sent by the adapter, which may make it more difficult to track down network problems or do general traffic analysis. Call `dev_queue_xmit_nit' then in the SMT Tx path, having checked that a network tap is attached, with a newly-created `dev_nit_active' helper wrapping the usual condition used in the transmit path. Signed-off-by: Maciej W. Rozycki Signed-off-by: David S. Miller --- drivers/net/fddi/defza.c | 33 +++++++++++++++++++++++++++++++-- include/linux/netdevice.h | 1 + net/core/dev.c | 13 ++++++++++++- 3 files changed, 44 insertions(+), 3 deletions(-) (limited to 'include/linux/netdevice.h') diff --git a/drivers/net/fddi/defza.c b/drivers/net/fddi/defza.c index 7d01b70f7ed8..3b7f10a5f06a 100644 --- a/drivers/net/fddi/defza.c +++ b/drivers/net/fddi/defza.c @@ -797,11 +797,40 @@ static void fza_tx_smt(struct net_device *dev) smt_tx_ptr = fp->mmio + readl_u(&fp->ring_smt_tx[i].buffer); len = readl_u(&fp->ring_smt_tx[i].rmc) & FZA_RING_PBC_MASK; - /* Queue the frame to the RMC transmit ring. */ - if (!netif_queue_stopped(dev)) + if (!netif_queue_stopped(dev)) { + if (dev_nit_active(dev)) { + struct sk_buff *skb; + + /* Length must be a multiple of 4 as only word + * reads are permitted! + */ + skb = fza_alloc_skb_irq(dev, (len + 3) & ~3); + if (!skb) + goto err_no_skb; /* Drop. */ + + skb_data_ptr = (struct fza_buffer_tx *) + skb->data; + + fza_reads(smt_tx_ptr, skb_data_ptr, + (len + 3) & ~3); + skb->dev = dev; + skb_reserve(skb, 3); /* Skip over PRH. */ + skb_put(skb, len - 3); + skb_reset_network_header(skb); + + dev_queue_xmit_nit(skb, dev); + + dev_kfree_skb_irq(skb); + +err_no_skb: + ; + } + + /* Queue the frame to the RMC transmit ring. */ fza_do_xmit((union fza_buffer_txp) { .mmio_ptr = smt_tx_ptr }, len, dev, 1); + } writel_o(FZA_RING_OWN_FZA, &fp->ring_smt_tx[i].own); fp->ring_smt_tx_index = diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 22e4ef7bb701..dc1d9ed33b31 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3645,6 +3645,7 @@ static __always_inline int ____dev_forward_skb(struct net_device *dev, return 0; } +bool dev_nit_active(struct net_device *dev); void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); extern int netdev_budget; diff --git a/net/core/dev.c b/net/core/dev.c index a4d39b87b4e5..8497feea8fb5 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1976,6 +1976,17 @@ static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb) return false; } +/** + * dev_nit_active - return true if any network interface taps are in use + * + * @dev: network device to check for the presence of taps + */ +bool dev_nit_active(struct net_device *dev) +{ + return !list_empty(&ptype_all) || !list_empty(&dev->ptype_all); +} +EXPORT_SYMBOL_GPL(dev_nit_active); + /* * Support routine. Sends outgoing frames to any network * taps currently in use. @@ -3233,7 +3244,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev, unsigned int len; int rc; - if (!list_empty(&ptype_all) || !list_empty(&dev->ptype_all)) + if (dev_nit_active(dev)) dev_queue_xmit_nit(skb, dev); len = skb->len; -- cgit v1.2.3