From fa13a7b8fd68f4c00fc8c6905261b50a4733d8c3 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 5 Feb 2004 21:39:01 -0800 Subject: [NET]: Hash netdevices by name for faster lookup. --- include/linux/netdevice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 84b6fd41e2d2..11723ab7e2c5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -375,6 +375,8 @@ struct net_device atomic_t refcnt; /* delayed register/unregister */ struct list_head todo_list; + /* device name hash chain */ + struct hlist_node name_hlist; /* register/unregister state machine */ enum { NETREG_UNINITIALIZED=0, -- cgit v1.2.3 From 6434c04175e058429430e2064d3a0ce112ce45ad Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 5 Feb 2004 21:39:44 -0800 Subject: [NET]: Hash netdevices by ifindex for faster lookup. --- include/linux/netdevice.h | 2 ++ net/core/dev.c | 22 ++++++++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 11723ab7e2c5..1a77c3caf115 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -377,6 +377,8 @@ struct net_device struct list_head todo_list; /* device name hash chain */ struct hlist_node name_hlist; + /* device index hash chain */ + struct hlist_node index_hlist; /* register/unregister state machine */ enum { NETREG_UNINITIALIZED=0, diff --git a/net/core/dev.c b/net/core/dev.c index d5ba80179da6..17e1fa1b1815 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -188,6 +188,7 @@ EXPORT_SYMBOL(dev_base_lock); #define NETDEV_HASHBITS 8 static struct hlist_head dev_name_head[1<next) + hlist_for_each(p, dev_index_hash(ifindex)) { + struct net_device *dev + = hlist_entry(p, struct net_device, index_hlist); if (dev->ifindex == ifindex) - break; - return dev; + return dev; + } + return NULL; } @@ -2842,6 +2851,7 @@ int register_netdevice(struct net_device *dev) *dev_tail = dev; dev_tail = &dev->next; hlist_add_head(&dev->name_hlist, head); + hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex)); dev_hold(dev); dev->reg_state = NETREG_REGISTERING; write_unlock_bh(&dev_base_lock); @@ -3064,6 +3074,7 @@ int unregister_netdevice(struct net_device *dev) if (d == dev) { write_lock_bh(&dev_base_lock); hlist_del(&dev->name_hlist); + hlist_del(&dev->index_hlist); if (dev_tail == &dev->next) dev_tail = dp; *dp = d->next; @@ -3145,6 +3156,9 @@ static int __init net_dev_init(void) for (i = 0; i < ARRAY_SIZE(dev_name_head); i++) INIT_HLIST_HEAD(&dev_name_head[i]); + for (i = 0; i < ARRAY_SIZE(dev_index_head); i++) + INIT_HLIST_HEAD(&dev_index_head[i]); + /* * Initialise the packet receive queues. */ -- cgit v1.2.3 From f8c11435c19d5cee964370d8fd62d397f2b4c212 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 5 Feb 2004 22:24:10 -0800 Subject: [NET]: Support for lots of netdevs -- faster dev_alloc_name Convert dev_alloc_name from O(n^2) lookup to O(n) by using a page as bitmap to figure out how many devices of that pattern have been allocated. This works for up to 32k devices (PAGE_SIZE*8) on i386, more on other platforms. Correctly handles the boundary cases where number of devices won't fit because name length is limited. Adds strnchr to the string libraries since we need to find the % format character, but only care if it is in the first 15 bytes. --- include/linux/string.h | 3 +++ lib/string.c | 16 ++++++++++++ net/core/dev.c | 67 ++++++++++++++++++++++++++++++++++---------------- 3 files changed, 65 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/string.h b/include/linux/string.h index f37b7a6813d3..6ad4e5c32f22 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -52,6 +52,9 @@ extern int strnicmp(const char *, const char *, __kernel_size_t); #ifndef __HAVE_ARCH_STRCHR extern char * strchr(const char *,int); #endif +#ifndef __HAVE_ARCH_STRNCHR +extern char * strnchr(const char *, size_t, int); +#endif #ifndef __HAVE_ARCH_STRRCHR extern char * strrchr(const char *,int); #endif diff --git a/lib/string.c b/lib/string.c index e660de079a57..d2f23f2c1e69 100644 --- a/lib/string.c +++ b/lib/string.c @@ -273,6 +273,22 @@ char * strrchr(const char * s, int c) } #endif +#ifndef __HAVE_ARCH_STRNCHR +/** + * strnchr - Find a character in a length limited string + * @s: The string to be searched + * @count: The number of characters to be searched + * @c: The character to search for + */ +char *strnchr(const char *s, size_t count, int c) +{ + for (; count-- && *s != '\0'; ++s) + if (*s == (char) c) + return (char *) s; + return NULL; +} +#endif + #ifndef __HAVE_ARCH_STRLEN /** * strlen - Find the length of a string diff --git a/net/core/dev.c b/net/core/dev.c index 17e1fa1b1815..d1dfcef63c5a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -720,30 +720,55 @@ int dev_valid_name(const char *name) int dev_alloc_name(struct net_device *dev, const char *name) { - int i; - char buf[32]; - char *p; - - /* - * Verify the string as this thing may have come from - * the user. There must be either one "%d" and no other "%" - * characters, or no "%" characters at all. - */ - p = strchr(name, '%'); - if (p && (p[1] != 'd' || strchr(p + 2, '%'))) - return -EINVAL; + int i = 0; + char buf[IFNAMSIZ]; + const char *p; + const int max_netdevices = 8*PAGE_SIZE; + long *inuse; + struct net_device *d; + + p = strnchr(name, IFNAMSIZ-1, '%'); + if (p) { + /* + * Verify the string as this thing may have come from + * the user. There must be either one "%d" and no other "%" + * characters. + */ + if (p[1] != 'd' || strchr(p + 2, '%')) + return -EINVAL; - /* - * If you need over 100 please also fix the algorithm... - */ - for (i = 0; i < 100; i++) { - snprintf(buf, sizeof(buf), name, i); - if (!__dev_get_by_name(buf)) { - strcpy(dev->name, buf); - return i; + /* Use one page as a bit array of possible slots */ + inuse = (long *) get_zeroed_page(GFP_ATOMIC); + if (!inuse) + return -ENOMEM; + + for (d = dev_base; d; d = d->next) { + if (!sscanf(d->name, name, &i)) + continue; + if (i < 0 || i >= max_netdevices) + continue; + + /* avoid cases where sscanf is not exact inverse of printf */ + snprintf(buf, sizeof(buf), name, i); + if (!strncmp(buf, d->name, IFNAMSIZ)) + set_bit(i, inuse); } + + i = find_first_zero_bit(inuse, max_netdevices); + free_page((unsigned long) inuse); } - return -ENFILE; /* Over 100 of the things .. bail out! */ + + snprintf(buf, sizeof(buf), name, i); + if (!__dev_get_by_name(buf)) { + strlcpy(dev->name, buf, IFNAMSIZ); + return i; + } + + /* It is possible to run out of possible slots + * when the name is long and there isn't enough space left + * for the digits, or if all bits are used. + */ + return -ENFILE; } -- cgit v1.2.3 From 353131f37e48c775ed37e18eafd7a570d0feac15 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 5 Feb 2004 23:09:25 -0800 Subject: [NET]: Simply net_ratelimit(). Reimplement net_ratelimit() in terms of the new printk_ratelimit(). As net_ratelimit() already has it own sysctls we generalise printk_ratelimit() a bit so that networking does not lose its existing sysctls and so that it can use different time constants from the more generic printk_ratelimit(). --- include/linux/kernel.h | 1 + kernel/printk.c | 29 ++++++++++++++++++----------- net/core/utils.c | 30 ++---------------------------- 3 files changed, 21 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 6365a4159514..228182715b1d 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -90,6 +90,7 @@ asmlinkage int printk(const char * fmt, ...) unsigned long int_sqrt(unsigned long); extern int printk_ratelimit(void); +extern int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst); static inline void console_silent(void) { diff --git a/kernel/printk.c b/kernel/printk.c index 90be371e4842..8f2905cfa0a6 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -784,12 +784,6 @@ void tty_write_message(struct tty_struct *tty, char *msg) return; } -/* minimum time in jiffies between messages */ -int printk_ratelimit_jiffies = 5*HZ; - -/* number of messages we send before ratelimiting */ -int printk_ratelimit_burst = 10; - /* * printk rate limiting, lifted from the networking subsystem. * @@ -797,7 +791,7 @@ int printk_ratelimit_burst = 10; * every printk_ratelimit_jiffies to make a denial-of-service * attack impossible. */ -int printk_ratelimit(void) +int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst) { static spinlock_t ratelimit_lock = SPIN_LOCK_UNLOCKED; static unsigned long toks = 10*5*HZ; @@ -809,12 +803,12 @@ int printk_ratelimit(void) spin_lock_irqsave(&ratelimit_lock, flags); toks += now - last_msg; last_msg = now; - if (toks > (printk_ratelimit_burst * printk_ratelimit_jiffies)) - toks = printk_ratelimit_burst * printk_ratelimit_jiffies; - if (toks >= printk_ratelimit_jiffies) { + if (toks > (ratelimit_burst * ratelimit_jiffies)) + toks = ratelimit_burst * ratelimit_jiffies; + if (toks >= ratelimit_jiffies) { int lost = missed; missed = 0; - toks -= printk_ratelimit_jiffies; + toks -= ratelimit_jiffies; spin_unlock_irqrestore(&ratelimit_lock, flags); if (lost) printk(KERN_WARNING "printk: %d messages suppressed.\n", lost); @@ -824,4 +818,17 @@ int printk_ratelimit(void) spin_unlock_irqrestore(&ratelimit_lock, flags); return 0; } +EXPORT_SYMBOL(__printk_ratelimit); + +/* minimum time in jiffies between messages */ +int printk_ratelimit_jiffies = 5*HZ; + +/* number of messages we send before ratelimiting */ +int printk_ratelimit_burst = 10; + +int printk_ratelimit(void) +{ + return __printk_ratelimit(printk_ratelimit_jiffies, + printk_ratelimit_burst); +} EXPORT_SYMBOL(printk_ratelimit); diff --git a/net/core/utils.c b/net/core/utils.c index 995f2b5168c2..8058d9c5e236 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -41,37 +41,11 @@ int net_msg_cost = 5*HZ; int net_msg_burst = 10; /* - * This enforces a rate limit: not more than one kernel message - * every 5secs to make a denial-of-service attack impossible. - * - * All warning printk()s should be guarded by this function. + * All net warning printk()s should be guarded by this function. */ int net_ratelimit(void) { - static spinlock_t ratelimit_lock = SPIN_LOCK_UNLOCKED; - static unsigned long toks = 10*5*HZ; - static unsigned long last_msg; - static int missed; - unsigned long flags; - unsigned long now = jiffies; - - spin_lock_irqsave(&ratelimit_lock, flags); - toks += now - last_msg; - last_msg = now; - if (toks > net_msg_burst) - toks = net_msg_burst; - if (toks >= net_msg_cost) { - int lost = missed; - missed = 0; - toks -= net_msg_cost; - spin_unlock_irqrestore(&ratelimit_lock, flags); - if (lost) - printk(KERN_WARNING "NET: %d messages suppressed.\n", lost); - return 1; - } - missed++; - spin_unlock_irqrestore(&ratelimit_lock, flags); - return 0; + return __printk_ratelimit(net_msg_cost, net_msg_burst); } EXPORT_SYMBOL(net_random); -- cgit v1.2.3 From 6d7c32165fa2483419d6d4a0e82a7dd0830613e8 Mon Sep 17 00:00:00 2001 From: Shmulik Hen Date: Fri, 6 Feb 2004 00:00:41 -0800 Subject: [IPV4]: Split arp_send into arp_create and arp_xmit, export them. --- include/net/arp.h | 8 +++++++ net/ipv4/arp.c | 66 +++++++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 57 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/include/net/arp.h b/include/net/arp.h index f65d245f42cc..61fd735c7017 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -5,6 +5,8 @@ #include #include +#define HAVE_ARP_CREATE + extern struct neigh_table arp_tbl; extern void arp_init(void); @@ -19,6 +21,12 @@ extern int arp_bind_neighbour(struct dst_entry *dst); extern int arp_mc_map(u32 addr, u8 *haddr, struct net_device *dev, int dir); extern void arp_ifdown(struct net_device *dev); +extern struct sk_buff *arp_create(int type, int ptype, u32 dest_ip, + struct net_device *dev, u32 src_ip, + unsigned char *dest_hw, unsigned char *src_hw, + unsigned char *target_hw); +extern void arp_xmit(struct sk_buff *skb); + extern struct neigh_ops arp_broken_ops; #endif /* _ARP_H */ diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 5c03f63502bc..0de93f953ef9 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -67,6 +67,10 @@ * now it is in net/core/neighbour.c. * Krzysztof Halasa: Added Frame Relay ARP support. * Arnaldo C. Melo : convert /proc/net/arp to seq_file + * Shmulik Hen: Split arp_send to arp_create and + * arp_xmit so intermediate drivers like + * bonding can change the skb before + * sending (e.g. insert 8021q tag). */ #include @@ -487,26 +491,18 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt) */ /* - * Create and send an arp packet. If (dest_hw == NULL), we create a broadcast + * Create an arp packet. If (dest_hw == NULL), we create a broadcast * message. */ - -void arp_send(int type, int ptype, u32 dest_ip, - struct net_device *dev, u32 src_ip, - unsigned char *dest_hw, unsigned char *src_hw, - unsigned char *target_hw) +struct sk_buff *arp_create(int type, int ptype, u32 dest_ip, + struct net_device *dev, u32 src_ip, + unsigned char *dest_hw, unsigned char *src_hw, + unsigned char *target_hw) { struct sk_buff *skb; struct arphdr *arp; unsigned char *arp_ptr; - /* - * No arp on this interface. - */ - - if (dev->flags&IFF_NOARP) - return; - /* * Allocate a buffer */ @@ -514,7 +510,7 @@ void arp_send(int type, int ptype, u32 dest_ip, skb = alloc_skb(sizeof(struct arphdr)+ 2*(dev->addr_len+4) + LL_RESERVED_SPACE(dev), GFP_ATOMIC); if (skb == NULL) - return; + return NULL; skb_reserve(skb, LL_RESERVED_SPACE(dev)); skb->nh.raw = skb->data; @@ -594,12 +590,46 @@ void arp_send(int type, int ptype, u32 dest_ip, arp_ptr+=dev->addr_len; memcpy(arp_ptr, &dest_ip, 4); - /* Send it off, maybe filter it using firewalling first. */ - NF_HOOK(NF_ARP, NF_ARP_OUT, skb, NULL, dev, dev_queue_xmit); - return; + return skb; out: kfree_skb(skb); + return NULL; +} + +/* + * Send an arp packet. + */ +void arp_xmit(struct sk_buff *skb) +{ + /* Send it off, maybe filter it using firewalling first. */ + NF_HOOK(NF_ARP, NF_ARP_OUT, skb, NULL, skb->dev, dev_queue_xmit); +} + +/* + * Create and send an arp packet. + */ +void arp_send(int type, int ptype, u32 dest_ip, + struct net_device *dev, u32 src_ip, + unsigned char *dest_hw, unsigned char *src_hw, + unsigned char *target_hw) +{ + struct sk_buff *skb; + + /* + * No arp on this interface. + */ + + if (dev->flags&IFF_NOARP) + return; + + skb = arp_create(type, ptype, dest_ip, dev, src_ip, + dest_hw, src_hw, target_hw); + if (skb == NULL) { + return; + } + + arp_xmit(skb); } static void parp_redo(struct sk_buff *skb) @@ -1437,6 +1467,8 @@ static int __init arp_proc_init(void) EXPORT_SYMBOL(arp_broken_ops); EXPORT_SYMBOL(arp_find); EXPORT_SYMBOL(arp_rcv); +EXPORT_SYMBOL(arp_create); +EXPORT_SYMBOL(arp_xmit); EXPORT_SYMBOL(arp_send); EXPORT_SYMBOL(arp_tbl); -- cgit v1.2.3 From cdea4568af5eecf3446de415c55a9751822d757d Mon Sep 17 00:00:00 2001 From: Shmulik Hen Date: Fri, 6 Feb 2004 00:20:43 -0800 Subject: [VLAN]: Export VLAN tag get/set functionality. Enable intermediate network drivers like bonding to get or set a VLAN tag in an skb without a need to know about how tagging is done according to a network adapter's capabilities. --- include/linux/if_vlan.h | 146 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 146 insertions(+) (limited to 'include') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 6cb10ed07188..104df1877778 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -200,6 +200,152 @@ static inline int vlan_hwaccel_receive_skb(struct sk_buff *skb, { return __vlan_hwaccel_rx(skb, grp, vlan_tag, 1); } + +/** + * __vlan_put_tag - regular VLAN tag inserting + * @skb: skbuff to tag + * @tag: VLAN tag to insert + * + * Inserts the VLAN tag into @skb as part of the payload + * Returns a VLAN tagged skb. If a new skb is created, @skb is freed. + * + * Following the skb_unshare() example, in case of error, the calling function + * doesn't have to worry about freeing the original skb. + */ +static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, unsigned short tag) +{ + struct vlan_ethhdr *veth; + + if (skb_headroom(skb) < VLAN_HLEN) { + struct sk_buff *sk_tmp = skb; + skb = skb_realloc_headroom(sk_tmp, VLAN_HLEN); + kfree_skb(sk_tmp); + if (!skb) { + printk(KERN_ERR "vlan: failed to realloc headroom\n"); + return NULL; + } + } else { + skb = skb_unshare(skb, GFP_ATOMIC); + if (!skb) { + printk(KERN_ERR "vlan: failed to unshare skbuff\n"); + return NULL; + } + } + + veth = (struct vlan_ethhdr *)skb_push(skb, VLAN_HLEN); + + /* Move the mac addresses to the beginning of the new header. */ + memmove(skb->data, skb->data + VLAN_HLEN, 2 * VLAN_ETH_ALEN); + + /* first, the ethernet type */ + veth->h_vlan_proto = __constant_htons(ETH_P_8021Q); + + /* now, the tag */ + veth->h_vlan_TCI = htons(tag); + + skb->protocol = __constant_htons(ETH_P_8021Q); + skb->mac.raw -= VLAN_HLEN; + skb->nh.raw -= VLAN_HLEN; + + return skb; +} + +/** + * __vlan_hwaccel_put_tag - hardware accelerated VLAN inserting + * @skb: skbuff to tag + * @tag: VLAN tag to insert + * + * Puts the VLAN tag in @skb->cb[] and lets the device do the rest + */ +static inline struct sk_buff *__vlan_hwaccel_put_tag(struct sk_buff *skb, unsigned short tag) +{ + struct vlan_skb_tx_cookie *cookie; + + cookie = VLAN_TX_SKB_CB(skb); + cookie->magic = VLAN_TX_COOKIE_MAGIC; + cookie->vlan_tag = tag; + + return skb; +} + +#define HAVE_VLAN_PUT_TAG + +/** + * vlan_put_tag - inserts VLAN tag according to device features + * @skb: skbuff to tag + * @tag: VLAN tag to insert + * + * Assumes skb->dev is the target that will xmit this frame. + * Returns a VLAN tagged skb. + */ +static inline struct sk_buff *vlan_put_tag(struct sk_buff *skb, unsigned short tag) +{ + if (skb->dev->features & NETIF_F_HW_VLAN_TX) { + return __vlan_hwaccel_put_tag(skb, tag); + } else { + return __vlan_put_tag(skb, tag); + } +} + +/** + * __vlan_get_tag - get the VLAN ID that is part of the payload + * @skb: skbuff to query + * @tag: buffer to store vlaue + * + * Returns error if the skb is not of VLAN type + */ +static inline int __vlan_get_tag(struct sk_buff *skb, unsigned short *tag) +{ + struct vlan_ethhdr *veth = (struct vlan_ethhdr *)skb->data; + + if (veth->h_vlan_proto != __constant_htons(ETH_P_8021Q)) { + return -EINVAL; + } + + *tag = ntohs(veth->h_vlan_TCI); + + return 0; +} + +/** + * __vlan_hwaccel_get_tag - get the VLAN ID that is in @skb->cb[] + * @skb: skbuff to query + * @tag: buffer to store vlaue + * + * Returns error if @skb->cb[] is not set correctly + */ +static inline int __vlan_hwaccel_get_tag(struct sk_buff *skb, unsigned short *tag) +{ + struct vlan_skb_tx_cookie *cookie; + + cookie = VLAN_TX_SKB_CB(skb); + if (cookie->magic == VLAN_TX_COOKIE_MAGIC) { + *tag = cookie->vlan_tag; + return 0; + } else { + *tag = 0; + return -EINVAL; + } +} + +#define HAVE_VLAN_GET_TAG + +/** + * vlan_get_tag - get the VLAN ID from the skb + * @skb: skbuff to query + * @tag: buffer to store vlaue + * + * Returns error if the skb is not VLAN tagged + */ +static inline int vlan_get_tag(struct sk_buff *skb, unsigned short *tag) +{ + if (skb->dev->features & NETIF_F_HW_VLAN_TX) { + return __vlan_hwaccel_get_tag(skb, tag); + } else { + return __vlan_get_tag(skb, tag); + } +} + #endif /* __KERNEL__ */ /* VLAN IOCTLs are found in sockios.h */ -- cgit v1.2.3