diff options
| author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2004-09-07 02:43:04 -0700 |
|---|---|---|
| committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2004-09-07 02:43:04 -0700 |
| commit | f0975ce872bb3f5f6c86837bc84c1e9dfd76e87c (patch) | |
| tree | 9578db69ebff882287ff51860ee1c5244332c84e | |
| parent | 88e1f06c27cc74daececa9e6ad4337cbbfb626a9 (diff) | |
| parent | 8d34d4fde2dfa67d29298f4bb4c636c3ee1914aa (diff) | |
Merge bk://kernel.bkbits.net/davem/net-2.6
into ppc970.osdl.org:/home/torvalds/v2.6/linux
61 files changed, 736 insertions, 608 deletions
@@ -1586,6 +1586,15 @@ D: Backport/Forwardport merge monkey. D: Various Janitor work. S: United Kingdom +N: Martin Josfsson +E: gandalf@wlug.westbo.se +P: 1024D/F6B6D3B1 7610 7CED 5C34 4AA6 DBA2 8BE1 5A6D AF95 F6B6 D3B1 +D: netfilter: SAME target +D: netfilter: helper target +D: netfilter: various other hacks +S: Ronneby +S: Sweden + N: Ani Joshi E: ajoshi@shell.unixbox.com D: fbdev hacking @@ -1597,6 +1606,17 @@ S: Lemnosvej 1, 3.tv S: 2300 Copenhagen S S: Denmark +N: Jozsef Kadlecsik +E: kadlec@blackhole.kfki.hu +P: 1024D/470DB964 4CB3 1A05 713E 9BF7 FAC5 5809 DD8C B7B1 470D B964 +D: netfilter: TCP window tracking code +D: netfilter: raw table +D: netfilter: iprange match +D: netfilter: new logging interfaces +D: netfilter: various other hacks +S: Tata +S: Hungary + N: Bernhard Kaindl E: bkaindl@netway.at E: edv@bartelt.via.at @@ -2146,6 +2166,16 @@ D: OV511 driver S: (address available on request) S: USA +N: Patrick McHardy +E: kaber@trash.net +P: 1024D/12155E80 B128 7DE6 FF0A C2B2 48BE AB4C C9D4 964E 1215 5E80 +D: netfilter: endless number of bugfixes +D: netfilter: CLASSIFY target +D: netfilter: addrtype match +D: tc: HFSC scheduler +S: Freiburg +S: Germany + N: Mike McLagan E: mike.mclagan@linux.org W: http://www.invlogic.com/~mmclagan @@ -2821,7 +2851,7 @@ S: Germany N: Paul `Rusty' Russell E: rusty@rustcorp.com.au -W: http://www.samba.org/netfilter +W: http://ozlabs.org/~rusty D: Ruggedly handsome. D: netfilter, ipchains with Michael Neuling. S: 52 Moore St @@ -3405,6 +3435,18 @@ S: UC Berkeley S: Berkeley, CA 94720-1776 S: USA +N: Harald Welte +E: laforge@netfilter.org +P: 1024D/30F48BFF DBDE 6912 8831 9A53 879B 9190 5DA5 C655 30F4 8BFF +W: http://gnumonks.org/users/laforge +D: netfilter: new nat helper infrastructure +D: netfilter: ULOG, ECN, DSCP target +D: netfilter: TTL match +D: netfilter: IPv6 mangle table +D: netfilter: various other hacks +S: Berlin +S: Germany + N: Bill Wendling E: wendling@ganymede.isdn.uiuc.edu W: http://www.ncsa.uiuc.edu/~wendling/ diff --git a/MAINTAINERS b/MAINTAINERS index e6030768dddd..c1a5296a74c7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1548,6 +1548,14 @@ M: kaber@coreworks.de L: netdev@oss.sgi.com S: Maintained +IPVS +P: Wensong Zhang +M: wensong@linux-vs.org +P: Julian Anastasov +M: ja@ssi.bg +L: lvs-users@linuxvirtualserver.org +S: Maintained + NFS CLIENT P: Trond Myklebust M: trond.myklebust@fys.uio.no diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c index 72138612d2fb..6177397c30cb 100644 --- a/drivers/net/sungem.c +++ b/drivers/net/sungem.c @@ -181,6 +181,18 @@ static inline void phy_write(struct gem *gp, int reg, u16 val) __phy_write(gp, gp->mii_phy_addr, reg, val); } +static inline void gem_enable_ints(struct gem *gp) +{ + /* Enable all interrupts but TXDONE */ + writel(GREG_STAT_TXDONE, gp->regs + GREG_IMASK); +} + +static inline void gem_disable_ints(struct gem *gp) +{ + /* Disable all interrupts, including TXDONE */ + writel(GREG_STAT_NAPI | GREG_STAT_TXDONE, gp->regs + GREG_IMASK); +} + static void gem_handle_mif_event(struct gem *gp, u32 reg_val, u32 changed_bits) { if (netif_msg_intr(gp)) @@ -639,7 +651,7 @@ static __inline__ void gem_tx(struct net_device *dev, struct gem *gp, u32 gem_st } gp->net_stats.tx_packets++; - dev_kfree_skb_irq(skb); + dev_kfree_skb(skb); } gp->tx_old = entry; @@ -678,12 +690,12 @@ static __inline__ void gem_post_rxds(struct gem *gp, int limit) } } -static void gem_rx(struct gem *gp) +static int gem_rx(struct gem *gp, int work_to_do) { - int entry, drops; + int entry, drops, work_done = 0; u32 done; - if (netif_msg_intr(gp)) + if (netif_msg_rx_status(gp)) printk(KERN_DEBUG "%s: rx interrupt, done: %d, rx_new: %d\n", gp->dev->name, readl(gp->regs + RXDMA_DONE), gp->rx_new); @@ -700,6 +712,9 @@ static void gem_rx(struct gem *gp) if ((status & RXDCTRL_OWN) != 0) break; + if (work_done >= RX_RING_SIZE || work_done >= work_to_do) + break; + /* When writing back RX descriptor, GEM writes status * then buffer address, possibly in seperate transactions. * If we don't wait for the chip to write both, we could @@ -713,6 +728,9 @@ static void gem_rx(struct gem *gp) break; } + /* We can now account for the work we're about to do */ + work_done++; + skb = gp->rx_skbs[entry]; len = (status & RXDCTRL_BUFSZ) >> 16; @@ -775,7 +793,8 @@ static void gem_rx(struct gem *gp) skb->csum = ntohs((status & RXDCTRL_TCPCSUM) ^ 0xffff); skb->ip_summed = CHECKSUM_HW; skb->protocol = eth_type_trans(skb, gp->dev); - netif_rx(skb); + + netif_receive_skb(skb); gp->net_stats.rx_packets++; gp->net_stats.rx_bytes += len; @@ -792,32 +811,88 @@ static void gem_rx(struct gem *gp) if (drops) printk(KERN_INFO "%s: Memory squeeze, deferring packet.\n", gp->dev->name); + + return work_done; +} + +static int gem_poll(struct net_device *dev, int *budget) +{ + struct gem *gp = dev->priv; + unsigned long flags; + + spin_lock_irqsave(&gp->lock, flags); + + do { + int work_to_do, work_done; + + /* Handle anomalies */ + if (gp->status & GREG_STAT_ABNORMAL) { + if (gem_abnormal_irq(dev, gp, gp->status)) + break; + } + + /* Run TX completion thread */ + gem_tx(dev, gp, gp->status); + + spin_unlock_irqrestore(&gp->lock, flags); + + /* Run RX thread. We don't use any locking here, + * code willing to do bad things - like cleaning the + * rx ring - must call netif_poll_disable(), which + * schedule_timeout()'s if polling is already disabled. + */ + work_to_do = min(*budget, dev->quota); + + work_done = gem_rx(gp, work_to_do); + + *budget -= work_done; + dev->quota -= work_done; + + if (work_done >= work_to_do) + return 1; + + spin_lock_irqsave(&gp->lock, flags); + + gp->status = readl(gp->regs + GREG_STAT); + } while (gp->status & GREG_STAT_NAPI); + + __netif_rx_complete(dev); + gem_enable_ints(gp); + + spin_unlock_irqrestore(&gp->lock, flags); + return 0; } static irqreturn_t gem_interrupt(int irq, void *dev_id, struct pt_regs *regs) { struct net_device *dev = dev_id; struct gem *gp = dev->priv; - u32 gem_status = readl(gp->regs + GREG_STAT); + unsigned long flags; /* Swallow interrupts when shutting the chip down */ - if (gp->hw_running == 0) - goto out; + if (!gp->hw_running) + return IRQ_HANDLED; - spin_lock(&gp->lock); + spin_lock_irqsave(&gp->lock, flags); + + if (netif_rx_schedule_prep(dev)) { + u32 gem_status = readl(gp->regs + GREG_STAT); - if (gem_status & GREG_STAT_ABNORMAL) { - if (gem_abnormal_irq(dev, gp, gem_status)) - goto out_unlock; + if (gem_status == 0) { + spin_unlock_irqrestore(&gp->lock, flags); + return IRQ_NONE; + } + gp->status = gem_status; + gem_disable_ints(gp); + __netif_rx_schedule(dev); } - if (gem_status & (GREG_STAT_TXALL | GREG_STAT_TXINTME)) - gem_tx(dev, gp, gem_status); - if (gem_status & GREG_STAT_RXDONE) - gem_rx(gp); -out_unlock: - spin_unlock(&gp->lock); -out: + spin_unlock_irqrestore(&gp->lock, flags); + + /* If polling was disabled at the time we received that + * interrupt, we may return IRQ_HANDLED here while we + * should return IRQ_NONE. No big deal... + */ return IRQ_HANDLED; } @@ -1312,19 +1387,12 @@ static void gem_reset_task(void *data) { struct gem *gp = (struct gem *) data; - /* The link went down, we reset the ring, but keep - * DMA stopped. Todo: Use this function for reset - * on error as well. - */ - + netif_poll_disable(gp->dev); spin_lock_irq(&gp->lock); if (gp->hw_running && gp->opened) { - /* Make sure we don't get interrupts or tx packets */ netif_stop_queue(gp->dev); - writel(0xffffffff, gp->regs + GREG_IMASK); - /* Reset the chip & rings */ gem_stop(gp); gem_init_rings(gp); @@ -1337,6 +1405,7 @@ static void gem_reset_task(void *data) gp->reset_task_pending = 0; spin_unlock_irq(&gp->lock); + netif_poll_enable(gp->dev); } static void gem_link_timer(unsigned long data) @@ -2214,11 +2283,15 @@ static int gem_close(struct net_device *dev) /* Make sure we don't get distracted by suspend/resume */ down(&gp->pm_sem); + /* Note: we don't need to call netif_poll_disable() here because + * our caller (dev_close) already did it for us + */ + /* Stop traffic, mark us closed */ spin_lock_irq(&gp->lock); gp->opened = 0; - writel(0xffffffff, gp->regs + GREG_IMASK); + netif_stop_queue(dev); /* Stop chip */ @@ -2247,6 +2320,8 @@ static int gem_suspend(struct pci_dev *pdev, u32 state) struct net_device *dev = pci_get_drvdata(pdev); struct gem *gp = dev->priv; + netif_poll_disable(dev); + /* We hold the PM semaphore during entire driver * sleep time */ @@ -2262,8 +2337,6 @@ static int gem_suspend(struct pci_dev *pdev, u32 state) /* Stop traffic, mark us closed */ netif_device_detach(dev); - writel(0xffffffff, gp->regs + GREG_IMASK); - /* Stop chip */ gem_stop(gp); @@ -2317,6 +2390,8 @@ static int gem_resume(struct pci_dev *pdev) } up(&gp->pm_sem); + netif_poll_enable(dev); + return 0; } #endif /* CONFIG_PM */ @@ -2806,6 +2881,8 @@ static int __devinit gem_init_one(struct pci_dev *pdev, dev->get_stats = gem_get_stats; dev->set_multicast_list = gem_set_multicast; dev->do_ioctl = gem_ioctl; + dev->poll = gem_poll; + dev->weight = 64; dev->ethtool_ops = &gem_ethtool_ops; dev->tx_timeout = gem_tx_timeout; dev->watchdog_timeo = 5 * HZ; diff --git a/drivers/net/sungem.h b/drivers/net/sungem.h index eed77bfe1b60..bc0175acb52e 100644 --- a/drivers/net/sungem.h +++ b/drivers/net/sungem.h @@ -60,6 +60,9 @@ GREG_STAT_PCS | GREG_STAT_TXMAC | GREG_STAT_RXMAC | \ GREG_STAT_MAC | GREG_STAT_MIF | GREG_STAT_PCIERR) +#define GREG_STAT_NAPI (GREG_STAT_TXALL | GREG_STAT_TXINTME | \ + GREG_STAT_RXDONE | GREG_STAT_ABNORMAL) + /* The layout of GREG_IMASK and GREG_IACK is identical to GREG_STAT. * Bits set in GREG_IMASK will prevent that interrupt type from being * signalled to the cpu. GREG_IACK can be used to clear specific top-level @@ -969,6 +972,7 @@ struct gem { struct sk_buff *tx_skbs[RX_RING_SIZE]; u32 msg_enable; + u32 status; struct net_device_stats net_stats; diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c index f6740ca511bd..a8e034b156cf 100644 --- a/drivers/s390/net/qeth_main.c +++ b/drivers/s390/net/qeth_main.c @@ -6710,19 +6710,26 @@ static int qeth_arp_constructor(struct neighbour *neigh) { struct net_device *dev = neigh->dev; - struct in_device *in_dev = in_dev_get(dev); + struct in_device *in_dev; + struct neigh_parms *parms; - if (in_dev == NULL) - return -EINVAL; if (!qeth_verify_dev(dev)) { - in_dev_put(in_dev); return qeth_old_arp_constructor(neigh); } + rcu_read_lock(); + in_dev = rcu_dereference(__in_dev_get(dev)); + if (in_dev == NULL) { + rcu_read_unlock(); + return -EINVAL; + } + + parms = in_dev->arp_parms; + __neigh_parms_put(neigh->parms); + neigh->parms = neigh_parms_clone(parms); + rcu_read_unlock(); + neigh->type = inet_addr_type(*(u32 *) neigh->primary_key); - if (in_dev->arp_parms) - neigh->parms = in_dev->arp_parms; - in_dev_put(in_dev); neigh->nud_state = NUD_NOARP; neigh->ops = arp_direct_ops; neigh->output = neigh->ops->queue_xmit; diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index dc1a2d2c41d5..a493b5b5871b 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -15,6 +15,7 @@ #include <linux/types.h> #include <linux/compat.h> #include <linux/kernel.h> +#include <linux/compiler.h> #include <linux/sched.h> #include <linux/smp.h> #include <linux/smp_lock.h> @@ -407,6 +408,7 @@ out: return err; } +#ifdef CONFIG_NET static int do_siocgstamp(unsigned int fd, unsigned int cmd, unsigned long arg) { struct compat_timeval __user *up = compat_ptr(arg); @@ -461,7 +463,6 @@ struct ifconf32 { compat_caddr_t ifcbuf; }; -#ifdef CONFIG_NET static int dev_ifname32(unsigned int fd, unsigned int cmd, unsigned long arg) { struct net_device *dev; @@ -481,7 +482,6 @@ static int dev_ifname32(unsigned int fd, unsigned int cmd, unsigned long arg) err = copy_to_user(compat_ptr(arg), &ifr32, sizeof(ifr32)); return (err ? -EFAULT : 0); } -#endif static int dev_ifconf(unsigned int fd, unsigned int cmd, unsigned long arg) { @@ -797,6 +797,7 @@ static int routing_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg) return ret; } +#endif struct hd_geometry32 { unsigned char heads; @@ -1872,7 +1873,8 @@ static int do_atm_ioctl(unsigned int fd, unsigned int cmd32, unsigned long arg) return -EINVAL; } -static int ret_einval(unsigned int fd, unsigned int cmd, unsigned long arg) +static __attribute_used__ int +ret_einval(unsigned int fd, unsigned int cmd, unsigned long arg) { return -EINVAL; } @@ -3162,7 +3164,6 @@ HANDLE_IOCTL(MEMREADOOB32, mtd_rw_oob) HANDLE_IOCTL(MEMWRITEOOB32, mtd_rw_oob) #ifdef CONFIG_NET HANDLE_IOCTL(SIOCGIFNAME, dev_ifname32) -#endif HANDLE_IOCTL(SIOCGIFCONF, dev_ifconf) HANDLE_IOCTL(SIOCGIFFLAGS, dev_ifsioc) HANDLE_IOCTL(SIOCSIFFLAGS, dev_ifsioc) @@ -3206,6 +3207,7 @@ HANDLE_IOCTL(SIOCBRDELIF, dev_ifsioc) /* Note SIOCRTMSG is no longer, so this is safe and * the user would have seen just an -EINVAL anyways. */ HANDLE_IOCTL(SIOCRTMSG, ret_einval) HANDLE_IOCTL(SIOCGSTAMP, do_siocgstamp) +#endif HANDLE_IOCTL(HDIO_GETGEO, hdio_getgeo) HANDLE_IOCTL(BLKRAGET, w_long) HANDLE_IOCTL(BLKGETSIZE, w_long) diff --git a/include/asm-alpha/socket.h b/include/asm-alpha/socket.h index 88912c4c8931..d00259d3dc78 100644 --- a/include/asm-alpha/socket.h +++ b/include/asm-alpha/socket.h @@ -55,20 +55,4 @@ #define SO_SECURITY_ENCRYPTION_TRANSPORT 20 #define SO_SECURITY_ENCRYPTION_NETWORK 21 -/* Nast libc5 fixup - bletch */ -#if defined(__KERNEL__) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-arm/socket.h b/include/asm-arm/socket.h index b05e717397a1..46d20585d951 100644 --- a/include/asm-arm/socket.h +++ b/include/asm-arm/socket.h @@ -47,20 +47,4 @@ #define SO_PEERSEC 31 -/* Nast libc5 fixup - bletch */ -#if defined(__KERNEL__) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-arm26/socket.h b/include/asm-arm26/socket.h index b05e717397a1..46d20585d951 100644 --- a/include/asm-arm26/socket.h +++ b/include/asm-arm26/socket.h @@ -47,20 +47,4 @@ #define SO_PEERSEC 31 -/* Nast libc5 fixup - bletch */ -#if defined(__KERNEL__) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-cris/socket.h b/include/asm-cris/socket.h index b4421808b676..f159b4f165f7 100644 --- a/include/asm-cris/socket.h +++ b/include/asm-cris/socket.h @@ -49,21 +49,6 @@ #define SO_PEERSEC 31 -#if defined(__KERNEL__) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-h8300/socket.h b/include/asm-h8300/socket.h index 070d46d2344e..af33b8525dcf 100644 --- a/include/asm-h8300/socket.h +++ b/include/asm-h8300/socket.h @@ -47,20 +47,4 @@ #define SO_PEERSEC 31 -/* Nast libc5 fixup - bletch */ -#if defined(__KERNEL__) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-i386/socket.h b/include/asm-i386/socket.h index 711f906193fd..07f6b38ad140 100644 --- a/include/asm-i386/socket.h +++ b/include/asm-i386/socket.h @@ -47,20 +47,4 @@ #define SO_PEERSEC 31 -/* Nasty libc5 fixup - bletch */ -#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-ia64/socket.h b/include/asm-ia64/socket.h index bf4434e26a81..21a9f10d6baa 100644 --- a/include/asm-ia64/socket.h +++ b/include/asm-ia64/socket.h @@ -56,20 +56,4 @@ #define SO_PEERSEC 31 -/* Nast libc5 fixup - bletch */ -#if defined(__KERNEL__) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* _ASM_IA64_SOCKET_H */ diff --git a/include/asm-m68k/socket.h b/include/asm-m68k/socket.h index 68a33bfbae03..8d0b9fc2d07e 100644 --- a/include/asm-m68k/socket.h +++ b/include/asm-m68k/socket.h @@ -47,20 +47,4 @@ #define SO_PEERSEC 31 -/* Nast libc5 fixup - bletch */ -#if defined(__KERNEL__) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-mips/socket.h b/include/asm-mips/socket.h index 6556c10be8f0..855b86f3ea0e 100644 --- a/include/asm-mips/socket.h +++ b/include/asm-mips/socket.h @@ -68,20 +68,4 @@ To add: #define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */ #define SO_PEERSEC 30 -/* Nast libc5 fixup - bletch */ -#if defined(__KERNEL__) -/* Socket types. */ -#define SOCK_DGRAM 1 /* datagram (conn.less) socket */ -#define SOCK_STREAM 2 /* stream (connection) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-parisc/socket.h b/include/asm-parisc/socket.h index fd3f0f29eb3c..4a77996c1862 100644 --- a/include/asm-parisc/socket.h +++ b/include/asm-parisc/socket.h @@ -47,18 +47,4 @@ #define SO_PEERSEC 0x401d -#if defined(__KERNEL__) -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-ppc/socket.h b/include/asm-ppc/socket.h index bad94c36f1b8..4134376b0f66 100644 --- a/include/asm-ppc/socket.h +++ b/include/asm-ppc/socket.h @@ -53,20 +53,4 @@ #define SO_PEERSEC 31 -/* Nast libc5 fixup - bletch */ -#if defined(__KERNEL__) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif /* __KERNEL__ */ - #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-ppc64/socket.h b/include/asm-ppc64/socket.h index 1021a5268346..59e00dfc8b8e 100644 --- a/include/asm-ppc64/socket.h +++ b/include/asm-ppc64/socket.h @@ -54,20 +54,4 @@ #define SO_PEERSEC 31 -/* Nast libc5 fixup - bletch */ -#if defined(__KERNEL__) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-s390/socket.h b/include/asm-s390/socket.h index 1855ec7a112b..0e96eeca4e6b 100644 --- a/include/asm-s390/socket.h +++ b/include/asm-s390/socket.h @@ -55,20 +55,4 @@ #define SO_PEERSEC 31 -/* Nast libc5 fixup - bletch */ -#if defined(__KERNEL__) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-sh/socket.h b/include/asm-sh/socket.h index 2b9469472f76..dde696c3b4c7 100644 --- a/include/asm-sh/socket.h +++ b/include/asm-sh/socket.h @@ -47,20 +47,4 @@ #define SO_PEERSEC 31 -/* Nast libc5 fixup - bletch */ -#if defined(__KERNEL__) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* __ASM_SH_SOCKET_H */ diff --git a/include/asm-sparc/socket.h b/include/asm-sparc/socket.h index cfa529d9bb96..c1154e3ecfdf 100644 --- a/include/asm-sparc/socket.h +++ b/include/asm-sparc/socket.h @@ -52,20 +52,4 @@ #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 #define SO_SECURITY_ENCRYPTION_NETWORK 0x5004 -/* Nast libc5 fixup - bletch */ -#if defined(__KERNEL__) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-sparc64/socket.h b/include/asm-sparc64/socket.h index ba2230ba1d7d..865547a23908 100644 --- a/include/asm-sparc64/socket.h +++ b/include/asm-sparc64/socket.h @@ -52,20 +52,4 @@ #define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002 #define SO_SECURITY_ENCRYPTION_NETWORK 0x5004 -/* Nast libc5 fixup - bletch */ -#if defined(__KERNEL__) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* _ASM_SOCKET_H */ diff --git a/include/asm-v850/socket.h b/include/asm-v850/socket.h index 7d7f80efa8b3..213b852af53e 100644 --- a/include/asm-v850/socket.h +++ b/include/asm-v850/socket.h @@ -47,20 +47,4 @@ #define SO_PEERSEC 31 -/* Nast libc5 fixup - bletch */ -#if defined(__KERNEL__) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* __V850_SOCKET_H__ */ diff --git a/include/asm-x86_64/socket.h b/include/asm-x86_64/socket.h index 373e7abadfb5..d9a252ea8210 100644 --- a/include/asm-x86_64/socket.h +++ b/include/asm-x86_64/socket.h @@ -47,20 +47,4 @@ #define SO_PEERSEC 31 -/* Nasty libc5 fixup - bletch */ -#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) -/* Socket types. */ -#define SOCK_STREAM 1 /* stream (connection) socket */ -#define SOCK_DGRAM 2 /* datagram (conn.less) socket */ -#define SOCK_RAW 3 /* raw socket */ -#define SOCK_RDM 4 /* reliably-delivered message */ -#define SOCK_SEQPACKET 5 /* sequential packet socket */ -#define SOCK_PACKET 10 /* linux specific way of */ - /* getting packets at the dev */ - /* level. For writing rarp and */ - /* other similar things on the */ - /* user level. */ -#define SOCK_MAX (SOCK_PACKET+1) -#endif - #endif /* _ASM_SOCKET_H */ diff --git a/include/linux/net.h b/include/linux/net.h index 80e7fec727e3..0f710b7e4121 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -61,6 +61,27 @@ typedef enum { #define SOCK_ASYNC_WAITDATA 1 #define SOCK_NOSPACE 2 +/** sock_type - Socket types + * + * @SOCK_STREAM - stream (connection) socket + * @SOCK_DGRAM - datagram (conn.less) socket + * @SOCK_RAW - raw socket + * @SOCK_RDM - reliably-delivered message + * @SOCK_SEQPACKET - sequential packet socket + * @SOCK_PACKET - linux specific way of getting packets at the dev level. + * For writing rarp and other similar things on the user level. + */ +enum sock_type { + SOCK_STREAM = 1, + SOCK_DGRAM = 2, + SOCK_RAW = 3, + SOCK_RDM = 4, + SOCK_SEQPACKET = 5, + SOCK_PACKET = 10, +}; + +#define SOCK_MAX (SOCK_PACKET + 1) + /** * struct socket - general BSD socket * @state - socket state (%SS_CONNECTED, etc) diff --git a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h index 55d57404acb8..6edb801fa51f 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h @@ -75,6 +75,7 @@ extern int ip_conntrack_protocol_tcp_init(void); /* Log invalid packets */ extern unsigned int ip_ct_log_invalid; +#ifdef CONFIG_SYSCTL #ifdef DEBUG_INVALID_PACKETS #define LOG_INVALID(proto) \ (ip_ct_log_invalid == (proto) || ip_ct_log_invalid == IPPROTO_RAW) @@ -83,5 +84,8 @@ extern unsigned int ip_ct_log_invalid; ((ip_ct_log_invalid == (proto) || ip_ct_log_invalid == IPPROTO_RAW) \ && net_ratelimit()) #endif +#else +#define LOG_INVALID(proto) 0 +#endif /* CONFIG_SYSCTL */ #endif /*_IP_CONNTRACK_PROTOCOL_H*/ diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 9c42ac0b0322..86ca98c5ef8f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -201,6 +201,10 @@ struct tcp_sack_block { __u32 end_seq; }; +typedef struct tcp_pcount { + __u32 val; +} tcp_pcount_t; + struct tcp_opt { int tcp_header_len; /* Bytes of tcp header to send */ @@ -274,9 +278,9 @@ struct tcp_opt { __u32 rtt_seq; /* sequence number to update rttvar */ __u32 rto; /* retransmit timeout */ - __u32 packets_out; /* Packets which are "in flight" */ - __u32 left_out; /* Packets which leaved network */ - __u32 retrans_out; /* Retransmitted packets out */ + tcp_pcount_t packets_out; /* Packets which are "in flight" */ + tcp_pcount_t left_out; /* Packets which leaved network */ + tcp_pcount_t retrans_out; /* Retransmitted packets out */ /* @@ -337,9 +341,9 @@ struct tcp_opt { __u8 syn_retries; /* num of allowed syn retries */ __u8 ecn_flags; /* ECN status bits. */ __u16 prior_ssthresh; /* ssthresh saved at recovery start */ - __u32 lost_out; /* Lost packets */ - __u32 sacked_out; /* SACK'd packets */ - __u32 fackets_out; /* FACK'd packets */ + tcp_pcount_t lost_out; /* Lost packets */ + tcp_pcount_t sacked_out;/* SACK'd packets */ + tcp_pcount_t fackets_out;/* FACK'd packets */ __u32 high_seq; /* snd_nxt at onset of congestion */ __u32 retrans_stamp; /* Timestamp of the last retransmit, diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 3df412c9f386..8c97932a98e7 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -67,6 +67,8 @@ struct neigh_parms void *sysctl_table; + int dead; + atomic_t refcnt; struct rcu_head rcu_head; int base_reachable_time; @@ -199,6 +201,7 @@ extern struct neighbour *neigh_event_ns(struct neigh_table *tbl, extern struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl); extern void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms); +extern void neigh_parms_destroy(struct neigh_parms *parms); extern unsigned long neigh_rand_reach_time(unsigned long base); extern void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, @@ -220,6 +223,23 @@ extern int neigh_sysctl_register(struct net_device *dev, proc_handler *proc_handler); extern void neigh_sysctl_unregister(struct neigh_parms *p); +static inline void __neigh_parms_put(struct neigh_parms *parms) +{ + atomic_dec(&parms->refcnt); +} + +static inline void neigh_parms_put(struct neigh_parms *parms) +{ + if (atomic_dec_and_test(&parms->refcnt)) + neigh_parms_destroy(parms); +} + +static inline struct neigh_parms *neigh_parms_clone(struct neigh_parms *parms) +{ + atomic_inc(&parms->refcnt); + return parms; +} + /* * Neighbour references */ diff --git a/include/net/pkt_act.h b/include/net/pkt_act.h index 09b899d87629..be5d651e4fe3 100644 --- a/include/net/pkt_act.h +++ b/include/net/pkt_act.h @@ -274,11 +274,11 @@ tcf_hash_create(struct tc_st *parm, struct rtattr *est, struct tc_action *a, int static inline struct tcf_st * tcf_hash_init(struct tc_st *parm, struct rtattr *est, struct tc_action *a, int size, int ovr, int bind) { - struct tcf_st *p; - p = tcf_hash_check (parm,a,ovr,bind); - if (NULL == p) { - return tcf_hash_create(parm, est, a, size, ovr, bind); - } + struct tcf_st *p = tcf_hash_check (parm,a,ovr,bind); + + if (!p) + p = tcf_hash_create(parm, est, a, size, ovr, bind); + return p; } #endif diff --git a/include/net/tcp.h b/include/net/tcp.h index a5be63c232e3..1a8a317f2bd5 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1047,13 +1047,16 @@ static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long * is not a big flaw. */ -static __inline__ unsigned int tcp_current_mss(struct sock *sk, int large) +static inline unsigned int tcp_current_mss(struct sock *sk, int large) { struct tcp_opt *tp = tcp_sk(sk); struct dst_entry *dst = __sk_dst_get(sk); - int mss_now = large && (sk->sk_route_caps & NETIF_F_TSO) && - !tp->urg_mode ? - tp->mss_cache : tp->mss_cache_std; + int do_large, mss_now; + + do_large = (large && + (sk->sk_route_caps & NETIF_F_TSO) && + !tp->urg_mode); + mss_now = do_large ? tp->mss_cache : tp->mss_cache_std; if (dst) { u32 mtu = dst_pmtu(dst); @@ -1181,12 +1184,76 @@ struct tcp_skb_cb { __u16 urg_ptr; /* Valid w/URG flags is set. */ __u32 ack_seq; /* Sequence number ACK'd */ + __u32 tso_factor; }; #define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0])) #include <net/tcp_ecn.h> +/* Due to TSO, an SKB can be composed of multiple actual + * packets. To keep these tracked properly, we use this. + */ +static inline int tcp_skb_pcount(struct sk_buff *skb) +{ + return TCP_SKB_CB(skb)->tso_factor; +} + +static inline void tcp_inc_pcount(tcp_pcount_t *count, struct sk_buff *skb) +{ + count->val += tcp_skb_pcount(skb); +} + +static inline void tcp_inc_pcount_explicit(tcp_pcount_t *count, int amt) +{ + count->val += amt; +} + +static inline void tcp_dec_pcount_explicit(tcp_pcount_t *count, int amt) +{ + count->val -= amt; +} + +static inline void tcp_dec_pcount(tcp_pcount_t *count, struct sk_buff *skb) +{ + count->val -= tcp_skb_pcount(skb); +} + +static inline void tcp_dec_pcount_approx(tcp_pcount_t *count, + struct sk_buff *skb) +{ + if (count->val) { + count->val -= tcp_skb_pcount(skb); + if ((int)count->val < 0) + count->val = 0; + } +} + +static inline __u32 tcp_get_pcount(tcp_pcount_t *count) +{ + return count->val; +} + +static inline void tcp_set_pcount(tcp_pcount_t *count, __u32 val) +{ + count->val = val; +} + +static inline void tcp_packets_out_inc(struct sock *sk, struct tcp_opt *tp, + struct sk_buff *skb) +{ + int orig = tcp_get_pcount(&tp->packets_out); + + tcp_inc_pcount(&tp->packets_out, skb); + if (!orig) + tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); +} + +static inline void tcp_packets_out_dec(struct tcp_opt *tp, struct sk_buff *skb) +{ + tcp_dec_pcount(&tp->packets_out, skb); +} + /* This determines how many packets are "in the network" to the best * of our knowledge. In many cases it is conservative, but where * detailed information is available from the receiver (via SACK @@ -1203,7 +1270,9 @@ struct tcp_skb_cb { */ static __inline__ unsigned int tcp_packets_in_flight(struct tcp_opt *tp) { - return tp->packets_out - tp->left_out + tp->retrans_out; + return (tcp_get_pcount(&tp->packets_out) - + tcp_get_pcount(&tp->left_out) + + tcp_get_pcount(&tp->retrans_out)); } /* Recalculate snd_ssthresh, we want to set it to: @@ -1304,9 +1373,15 @@ static inline __u32 tcp_current_ssthresh(struct tcp_opt *tp) static inline void tcp_sync_left_out(struct tcp_opt *tp) { - if (tp->sack_ok && tp->sacked_out >= tp->packets_out - tp->lost_out) - tp->sacked_out = tp->packets_out - tp->lost_out; - tp->left_out = tp->sacked_out + tp->lost_out; + if (tp->sack_ok && + (tcp_get_pcount(&tp->sacked_out) >= + tcp_get_pcount(&tp->packets_out) - tcp_get_pcount(&tp->lost_out))) + tcp_set_pcount(&tp->sacked_out, + (tcp_get_pcount(&tp->packets_out) - + tcp_get_pcount(&tp->lost_out))); + tcp_set_pcount(&tp->left_out, + (tcp_get_pcount(&tp->sacked_out) + + tcp_get_pcount(&tp->lost_out))); } extern void tcp_cwnd_application_limited(struct sock *sk); @@ -1315,14 +1390,16 @@ extern void tcp_cwnd_application_limited(struct sock *sk); static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_opt *tp) { - if (tp->packets_out >= tp->snd_cwnd) { + __u32 packets_out = tcp_get_pcount(&tp->packets_out); + + if (packets_out >= tp->snd_cwnd) { /* Network is feed fully. */ tp->snd_cwnd_used = 0; tp->snd_cwnd_stamp = tcp_time_stamp; } else { /* Network starves. */ - if (tp->packets_out > tp->snd_cwnd_used) - tp->snd_cwnd_used = tp->packets_out; + if (tcp_get_pcount(&tp->packets_out) > tp->snd_cwnd_used) + tp->snd_cwnd_used = tcp_get_pcount(&tp->packets_out); if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto) tcp_cwnd_application_limited(sk); @@ -1388,16 +1465,25 @@ tcp_nagle_check(struct tcp_opt *tp, struct sk_buff *skb, unsigned mss_now, int n !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && ((nonagle&TCP_NAGLE_CORK) || (!nonagle && - tp->packets_out && + tcp_get_pcount(&tp->packets_out) && tcp_minshall_check(tp)))); } +extern void tcp_set_skb_tso_factor(struct sk_buff *, unsigned int, unsigned int); + /* This checks if the data bearing packet SKB (usually sk->sk_send_head) * should be put on the wire right now. */ static __inline__ int tcp_snd_test(struct tcp_opt *tp, struct sk_buff *skb, unsigned cur_mss, int nonagle) { + int pkts = TCP_SKB_CB(skb)->tso_factor; + + if (!pkts) { + tcp_set_skb_tso_factor(skb, cur_mss, tp->mss_cache_std); + pkts = TCP_SKB_CB(skb)->tso_factor; + } + /* RFC 1122 - section 4.2.3.4 * * We must queue if @@ -1424,14 +1510,14 @@ static __inline__ int tcp_snd_test(struct tcp_opt *tp, struct sk_buff *skb, */ return (((nonagle&TCP_NAGLE_PUSH) || tp->urg_mode || !tcp_nagle_check(tp, skb, cur_mss, nonagle)) && - ((tcp_packets_in_flight(tp) < tp->snd_cwnd) || + (((tcp_packets_in_flight(tp) + (pkts-1)) < tp->snd_cwnd) || (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) && !after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd)); } static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_opt *tp) { - if (!tp->packets_out && !tp->pending) + if (!tcp_get_pcount(&tp->packets_out) && !tp->pending) tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto); } @@ -1964,7 +2050,7 @@ static inline void tcp_westwood_slow_bw(struct sock *sk, struct sk_buff *skb) static inline __u32 __tcp_westwood_bw_rttmin(const struct tcp_opt *tp) { return max((tp->westwood.bw_est) * (tp->westwood.rtt_min) / - (__u32) (tp->mss_cache), + (__u32) (tp->mss_cache_std), 2U); } diff --git a/net/Makefile b/net/Makefile index 61740b47a67d..a46436e0fcc2 100644 --- a/net/Makefile +++ b/net/Makefile @@ -9,7 +9,8 @@ obj-y := nonet.o obj-$(CONFIG_NET) := socket.o core/ -obj-$(CONFIG_COMPAT) += compat.o +tmp-$(CONFIG_COMPAT) := compat.o +obj-$(CONFIG_NET) += $(tmp-y) # LLC has to be linked before the files in net/802/ obj-$(CONFIG_LLC) += llc/ diff --git a/net/atm/clip.c b/net/atm/clip.c index 5de7c1fd73b5..104dd4d19da4 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -26,6 +26,7 @@ #include <linux/bitops.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <linux/rcupdate.h> #include <net/route.h> /* for struct rtable and routing */ #include <net/icmp.h> /* icmp_send */ #include <asm/param.h> /* for HZ */ @@ -311,13 +312,25 @@ static int clip_constructor(struct neighbour *neigh) { struct atmarp_entry *entry = NEIGH2ENTRY(neigh); struct net_device *dev = neigh->dev; - struct in_device *in_dev = dev->ip_ptr; + struct in_device *in_dev; + struct neigh_parms *parms; DPRINTK("clip_constructor (neigh %p, entry %p)\n",neigh,entry); - if (!in_dev) return -EINVAL; neigh->type = inet_addr_type(entry->ip); if (neigh->type != RTN_UNICAST) return -EINVAL; - if (in_dev->arp_parms) neigh->parms = in_dev->arp_parms; + + rcu_read_lock(); + in_dev = rcu_dereference(__in_dev_get(dev)); + if (!in_dev) { + rcu_read_unlock(); + return -EINVAL; + } + + parms = in_dev->arp_parms; + __neigh_parms_put(neigh->parms); + neigh->parms = neigh_parms_clone(parms); + rcu_read_unlock(); + neigh->ops = &clip_neigh_ops; neigh->output = neigh->nud_state & NUD_VALID ? neigh->ops->connected_output : neigh->ops->output; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 47fbd98e0e81..3a84182f4474 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1176,13 +1176,16 @@ static int ax25_connect(struct socket *sock, struct sockaddr *uaddr, /* check if we can remove this feature. It is broken. */ printk(KERN_WARNING "ax25_connect(): %s uses autobind, please contact jreuter@yaina.de\n", current->comm); - if ((err = ax25_rt_autobind(ax25, &fsa->fsa_ax25.sax25_call)) < 0) + if ((err = ax25_rt_autobind(ax25, &fsa->fsa_ax25.sax25_call)) < 0) { + kfree(digi); goto out; + } ax25_fillin_cb(ax25, ax25->ax25_dev); ax25_cb_add(ax25); } else { if (ax25->ax25_dev == NULL) { + kfree(digi); err = -EHOSTUNREACH; goto out; } @@ -1191,8 +1194,7 @@ static int ax25_connect(struct socket *sock, struct sockaddr *uaddr, if (sk->sk_type == SOCK_SEQPACKET && (ax25t=ax25_find_cb(&ax25->source_addr, &fsa->fsa_ax25.sax25_call, digi, ax25->ax25_dev->dev))) { - if (digi != NULL) - kfree(digi); + kfree(digi); err = -EADDRINUSE; /* Already such a connection */ ax25_cb_put(ax25t); goto out; diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c index d0702fbcb21d..f8fb49e34764 100644 --- a/net/bridge/br_notify.c +++ b/net/bridge/br_notify.c @@ -76,10 +76,12 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v break; case NETDEV_UNREGISTER: + spin_unlock_bh(&br->lock); br_del_if(br, dev); - break; + goto done; } spin_unlock_bh(&br->lock); + done: return NOTIFY_DONE; } diff --git a/net/compat.c b/net/compat.c index 998b21b65363..6080b6439b96 100644 --- a/net/compat.c +++ b/net/compat.c @@ -455,13 +455,15 @@ static int do_set_sock_timeout(int fd, int level, int optname, char __user *optv asmlinkage long compat_sys_setsockopt(int fd, int level, int optname, char __user *optval, int optlen) { + /* SO_SET_REPLACE seems to be the same in all levels */ if (optname == IPT_SO_SET_REPLACE) return do_netfilter_replace(fd, level, optname, optval, optlen); - if (optname == SO_ATTACH_FILTER) + if (level == SOL_SOCKET && optname == SO_ATTACH_FILTER) return do_set_attach_filter(fd, level, optname, optval, optlen); - if (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO) + if (level == SOL_SOCKET && + (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO)) return do_set_sock_timeout(fd, level, optname, optval, optlen); return sys_setsockopt(fd, level, optname, optval, optlen); diff --git a/net/core/dev.c b/net/core/dev.c index 7a50c543e505..47b3d8497a5d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1249,17 +1249,17 @@ int __skb_linearize(struct sk_buff *skb, int gfp_mask) return 0; } -#define HARD_TX_LOCK_BH(dev, cpu) { \ +#define HARD_TX_LOCK(dev, cpu) { \ if ((dev->features & NETIF_F_LLTX) == 0) { \ - spin_lock_bh(&dev->xmit_lock); \ + spin_lock(&dev->xmit_lock); \ dev->xmit_lock_owner = cpu; \ } \ } -#define HARD_TX_UNLOCK_BH(dev) { \ +#define HARD_TX_UNLOCK(dev) { \ if ((dev->features & NETIF_F_LLTX) == 0) { \ dev->xmit_lock_owner = -1; \ - spin_unlock_bh(&dev->xmit_lock); \ + spin_unlock(&dev->xmit_lock); \ } \ } @@ -1313,7 +1313,12 @@ int dev_queue_xmit(struct sk_buff *skb) if (skb_checksum_help(&skb, 0)) goto out_kfree_skb; - rcu_read_lock(); + + /* Disable soft irqs for various locks below. Also + * stops preemption for RCU. + */ + local_bh_disable(); + /* Updates of qdisc are serialized by queue_lock. * The struct Qdisc which is pointed to by qdisc is now a * rcu structure - it may be accessed without acquiring @@ -1332,18 +1337,16 @@ int dev_queue_xmit(struct sk_buff *skb) #endif if (q->enqueue) { /* Grab device queue */ - spin_lock_bh(&dev->queue_lock); + spin_lock(&dev->queue_lock); rc = q->enqueue(skb, q); qdisc_run(dev); - spin_unlock_bh(&dev->queue_lock); - rcu_read_unlock(); + spin_unlock(&dev->queue_lock); rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; goto out; } - rcu_read_unlock(); /* The device has no queue. Common case for software devices: loopback, all the sorts of tunnels... @@ -1358,12 +1361,11 @@ int dev_queue_xmit(struct sk_buff *skb) Either shot noqueue qdisc, it is even simpler 8) */ if (dev->flags & IFF_UP) { - int cpu = get_cpu(); + int cpu = smp_processor_id(); /* ok because BHs are off */ if (dev->xmit_lock_owner != cpu) { - HARD_TX_LOCK_BH(dev, cpu); - put_cpu(); + HARD_TX_LOCK(dev, cpu); if (!netif_queue_stopped(dev)) { if (netdev_nit) @@ -1371,17 +1373,16 @@ int dev_queue_xmit(struct sk_buff *skb) rc = 0; if (!dev->hard_start_xmit(skb, dev)) { - HARD_TX_UNLOCK_BH(dev); + HARD_TX_UNLOCK(dev); goto out; } } - HARD_TX_UNLOCK_BH(dev); + HARD_TX_UNLOCK(dev); if (net_ratelimit()) printk(KERN_CRIT "Virtual device %s asks to " "queue packet!\n", dev->name); goto out_enetdown; } else { - put_cpu(); /* Recursion is detected! It is possible, * unfortunately */ if (net_ratelimit()) @@ -1394,6 +1395,7 @@ out_enetdown: out_kfree_skb: kfree_skb(skb); out: + local_bh_enable(); return rc; } diff --git a/net/core/dv.c b/net/core/dv.c index c1340cc53b75..f8e3f9c6b282 100644 --- a/net/core/dv.c +++ b/net/core/dv.c @@ -553,6 +553,3 @@ void divert_frame(struct sk_buff *skb) break; } } - -EXPORT_SYMBOL(alloc_divert_blk); -EXPORT_SYMBOL(free_divert_blk); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index f5deae1541c4..c9a747e89e5d 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -227,7 +227,6 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) we must kill timers etc. and move it to safe state. */ - n->parms = &tbl->parms; skb_queue_purge(&n->arp_queue); n->output = neigh_blackhole; if (n->nud_state & NUD_VALID) @@ -273,7 +272,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl) n->updated = n->used = now; n->nud_state = NUD_NONE; n->output = neigh_blackhole; - n->parms = &tbl->parms; + n->parms = neigh_parms_clone(&tbl->parms); init_timer(&n->timer); n->timer.function = neigh_timer_handler; n->timer.data = (unsigned long)n; @@ -340,12 +339,16 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, hash_val = tbl->hash(pkey, dev); write_lock_bh(&tbl->lock); + if (n->parms->dead) { + rc = ERR_PTR(-EINVAL); + goto out_tbl_unlock; + } + for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) { if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) { neigh_hold(n1); - write_unlock_bh(&tbl->lock); rc = n1; - goto out_neigh_release; + goto out_tbl_unlock; } } @@ -358,6 +361,8 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, rc = n; out: return rc; +out_tbl_unlock: + write_unlock_bh(&tbl->lock); out_neigh_release: neigh_release(n); goto out; @@ -494,6 +499,7 @@ void neigh_destroy(struct neighbour *neigh) skb_queue_purge(&neigh->arp_queue); dev_put(neigh->dev); + neigh_parms_put(neigh->parms); NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh); @@ -1120,6 +1126,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev, if (p) { memcpy(p, &tbl->parms, sizeof(*p)); p->tbl = tbl; + atomic_set(&p->refcnt, 1); INIT_RCU_HEAD(&p->rcu_head); p->reachable_time = neigh_rand_reach_time(p->base_reachable_time); @@ -1141,7 +1148,7 @@ static void neigh_rcu_free_parms(struct rcu_head *head) struct neigh_parms *parms = container_of(head, struct neigh_parms, rcu_head); - kfree(parms); + neigh_parms_put(parms); } void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) @@ -1154,6 +1161,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) for (p = &tbl->parms.next; *p; p = &(*p)->next) { if (*p == parms) { *p = parms->next; + parms->dead = 1; write_unlock_bh(&tbl->lock); call_rcu(&parms->rcu_head, neigh_rcu_free_parms); return; @@ -1163,11 +1171,17 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) NEIGH_PRINTK1("neigh_parms_release: not found\n"); } +void neigh_parms_destroy(struct neigh_parms *parms) +{ + kfree(parms); +} + void neigh_table_init(struct neigh_table *tbl) { unsigned long now = jiffies; + atomic_set(&tbl->parms.refcnt, 1); INIT_RCU_HEAD(&tbl->parms.rcu_head); tbl->parms.reachable_time = neigh_rand_reach_time(tbl->parms.base_reachable_time); diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 5a05efb83092..a21a326808b4 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -41,6 +41,7 @@ #include <linux/sysctl.h> #include <linux/notifier.h> #include <asm/uaccess.h> +#include <asm/system.h> #include <net/neighbour.h> #include <net/dst.h> #include <net/flow.h> @@ -1108,6 +1109,7 @@ struct dn_dev *dn_dev_create(struct net_device *dev, int *err) memset(dn_db, 0, sizeof(struct dn_dev)); memcpy(&dn_db->parms, p, sizeof(struct dn_dev_parms)); + smp_wmb(); dev->dn_ptr = dn_db; dn_db->dev = dev; init_timer(&dn_db->timer); @@ -1215,6 +1217,7 @@ static void dn_dev_delete(struct net_device *dev) dev->dn_ptr = NULL; neigh_parms_release(&dn_neigh_table, dn_db->neigh_parms); + neigh_ifdown(&dn_neigh_table, dev); if (dn_db->router) neigh_release(dn_db->router); diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index ab64b850c12b..d3d6c592a5cb 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -35,6 +35,7 @@ #include <linux/netfilter_decnet.h> #include <linux/spinlock.h> #include <linux/seq_file.h> +#include <linux/rcupdate.h> #include <asm/atomic.h> #include <net/neighbour.h> #include <net/dst.h> @@ -134,13 +135,25 @@ static int dn_neigh_construct(struct neighbour *neigh) { struct net_device *dev = neigh->dev; struct dn_neigh *dn = (struct dn_neigh *)neigh; - struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr; + struct dn_dev *dn_db; + struct neigh_parms *parms; + + rcu_read_lock(); + dn_db = rcu_dereference(dev->dn_ptr); + if (dn_db == NULL) { + rcu_read_unlock(); + return -EINVAL; + } - if (dn_db == NULL) + parms = dn_db->neigh_parms; + if (!parms) { + rcu_read_unlock(); return -EINVAL; + } - if (dn_db->neigh_parms) - neigh->parms = dn_db->neigh_parms; + __neigh_parms_put(neigh->parms); + neigh->parms = neigh_parms_clone(parms); + rcu_read_unlock(); if (dn_db->use_long) neigh->ops = &dn_long_ops; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index ed2923791e0e..c859b31fd0f1 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -837,7 +837,7 @@ struct proto_ops inet_dgram_ops = { .sendpage = inet_sendpage, }; -struct net_proto_family inet_family_ops = { +static struct net_proto_family inet_family_ops = { .family = PF_INET, .create = inet_create, .owner = THIS_MODULE, @@ -1157,7 +1157,6 @@ EXPORT_SYMBOL(inet_accept); EXPORT_SYMBOL(inet_bind); EXPORT_SYMBOL(inet_dgram_connect); EXPORT_SYMBOL(inet_dgram_ops); -EXPORT_SYMBOL(inet_family_ops); EXPORT_SYMBOL(inet_getname); EXPORT_SYMBOL(inet_ioctl); EXPORT_SYMBOL(inet_listen); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 562702d99ba2..41e726ac3337 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -96,6 +96,7 @@ #include <linux/stat.h> #include <linux/init.h> #include <linux/net.h> +#include <linux/rcupdate.h> #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> #endif @@ -237,16 +238,22 @@ static int arp_constructor(struct neighbour *neigh) { u32 addr = *(u32*)neigh->primary_key; struct net_device *dev = neigh->dev; - struct in_device *in_dev = in_dev_get(dev); - - if (in_dev == NULL) - return -EINVAL; + struct in_device *in_dev; + struct neigh_parms *parms; neigh->type = inet_addr_type(addr); - if (in_dev->arp_parms) - neigh->parms = in_dev->arp_parms; - in_dev_put(in_dev); + rcu_read_lock(); + in_dev = rcu_dereference(__in_dev_get(dev)); + if (in_dev == NULL) { + rcu_read_unlock(); + return -EINVAL; + } + + parms = in_dev->arp_parms; + __neigh_parms_put(neigh->parms); + neigh->parms = neigh_parms_clone(parms); + rcu_read_unlock(); if (dev->hard_header == NULL) { neigh->nud_state = NUD_NOARP; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index fc9930460864..19eb795a1140 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -184,6 +184,7 @@ static void in_dev_rcu_put(struct rcu_head *head) static void inetdev_destroy(struct in_device *in_dev) { struct in_ifaddr *ifa; + struct net_device *dev; ASSERT_RTNL(); @@ -200,12 +201,15 @@ static void inetdev_destroy(struct in_device *in_dev) devinet_sysctl_unregister(&in_dev->cnf); #endif - in_dev->dev->ip_ptr = NULL; + dev = in_dev->dev; + dev->ip_ptr = NULL; #ifdef CONFIG_SYSCTL neigh_sysctl_unregister(in_dev->arp_parms); #endif neigh_parms_release(&arp_tbl, in_dev->arp_parms); + arp_ifdown(dev); + call_rcu(&in_dev->rcu_head, in_dev_rcu_put); } diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c index 3a85f7a8d02a..9a8f051208d1 100644 --- a/net/ipv4/ipvs/ip_vs_xmit.c +++ b/net/ipv4/ipvs/ip_vs_xmit.c @@ -127,6 +127,7 @@ ip_vs_dst_reset(struct ip_vs_dest *dest) #define IP_VS_XMIT(skb, rt) \ do { \ + nf_reset(skb); \ (skb)->nfcache |= NFC_IPVS_PROPERTY; \ NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, (skb), NULL, \ (rt)->u.dst.dev, dst_output); \ @@ -201,9 +202,6 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; -#ifdef CONFIG_NETFILTER_DEBUG - skb->nf_debug = 0; -#endif /* CONFIG_NETFILTER_DEBUG */ IP_VS_XMIT(skb, rt); LeaveFunction(10); @@ -280,9 +278,6 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; -#ifdef CONFIG_NETFILTER_DEBUG - skb->nf_debug = 0; -#endif /* CONFIG_NETFILTER_DEBUG */ IP_VS_XMIT(skb, rt); LeaveFunction(10); @@ -418,10 +413,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; -#ifdef CONFIG_NETFILTER_DEBUG - skb->nf_debug = 0; -#endif /* CONFIG_NETFILTER_DEBUG */ - IP_VS_XMIT(skb, rt); LeaveFunction(10); @@ -480,9 +471,6 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; -#ifdef CONFIG_NETFILTER_DEBUG - skb->nf_debug = 0; -#endif /* CONFIG_NETFILTER_DEBUG */ IP_VS_XMIT(skb, rt); LeaveFunction(10); @@ -557,9 +545,6 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; -#ifdef CONFIG_NETFILTER_DEBUG - skb->nf_debug = 0; -#endif /* CONFIG_NETFILTER_DEBUG */ IP_VS_XMIT(skb, rt); rc = NF_STOLEN; diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c index 64755c5aed6e..3e51036e5065 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c @@ -623,8 +623,8 @@ int __init init(void) return ret; - cleanup: #ifdef CONFIG_SYSCTL + cleanup: ip_conntrack_protocol_unregister(&ip_conntrack_protocol_sctp); #endif out: diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index ceff26dbff47..f4c3899771c4 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -54,6 +54,7 @@ static int kill_proto(const struct ip_conntrack *i, void *data) *((u_int8_t *) data)); } +#ifdef CONFIG_PROC_FS static unsigned int print_tuple(char *buffer, const struct ip_conntrack_tuple *tuple, struct ip_conntrack_protocol *proto) @@ -367,6 +368,7 @@ static struct file_operations ct_cpu_seq_fops = { .llseek = seq_lseek, .release = seq_release_private, }; +#endif static unsigned int ip_confirm(unsigned int hooknum, struct sk_buff **pskb, @@ -726,10 +728,15 @@ static ctl_table ip_ct_net_table[] = { }, { .ctl_name = 0 } }; -#endif + +EXPORT_SYMBOL(ip_ct_log_invalid); +#endif /* CONFIG_SYSCTL */ + static int init_or_cleanup(int init) { +#ifdef CONFIG_PROC_FS struct proc_dir_entry *proc, *proc_exp, *proc_stat; +#endif int ret = 0; if (!init) goto cleanup; @@ -738,19 +745,20 @@ static int init_or_cleanup(int init) if (ret < 0) goto cleanup_nothing; - proc = proc_net_create("ip_conntrack", 0440, NULL); +#ifdef CONFIG_PROC_FS + proc = proc_net_fops_create("ip_conntrack", 0440, &ct_file_ops); if (!proc) goto cleanup_init; - proc->proc_fops = &ct_file_ops; - proc_exp = proc_net_create("ip_conntrack_expect", 0440, NULL); + proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440, + &exp_file_ops); if (!proc_exp) goto cleanup_proc; - proc_exp->proc_fops = &exp_file_ops; proc_stat = proc_net_fops_create("ip_conntrack_stat", S_IRUGO, &ct_cpu_seq_fops); if (!proc_stat) goto cleanup_proc_exp; proc_stat->owner = THIS_MODULE; +#endif ret = nf_register_hook(&ip_conntrack_defrag_ops); if (ret < 0) { @@ -814,12 +822,14 @@ static int init_or_cleanup(int init) local_bh_enable(); nf_unregister_hook(&ip_conntrack_defrag_ops); cleanup_proc_stat: +#ifdef CONFIG_PROC_FS proc_net_remove("ip_conntrack_stat"); cleanup_proc_exp: proc_net_remove("ip_conntrack_exp"); cleanup_proc: proc_net_remove("ip_conntrack"); cleanup_init: +#endif /* CONFIG_PROC_FS */ ip_conntrack_cleanup(); cleanup_nothing: return ret; @@ -912,4 +922,3 @@ EXPORT_SYMBOL(ip_conntrack_hash); EXPORT_SYMBOL(ip_conntrack_untracked); EXPORT_SYMBOL_GPL(ip_conntrack_find_get); EXPORT_SYMBOL_GPL(ip_conntrack_put); -EXPORT_SYMBOL(ip_ct_log_invalid); diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 906b89df2f19..26dca38f692a 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -619,6 +619,7 @@ static ctl_table ipq_root_table[] = { { .ctl_name = 0 } }; +#ifdef CONFIG_PROC_FS static int ipq_get_info(char *buffer, char **start, off_t offset, int length) { @@ -648,6 +649,7 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length) len = 0; return len; } +#endif /* CONFIG_PROC_FS */ static int init_or_cleanup(int init) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f34bdec2f31e..36953ef7e6c2 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1818,7 +1818,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->backoff = 0; tp->snd_cwnd = 2; tp->probes_out = 0; - tp->packets_out = 0; + tcp_set_pcount(&tp->packets_out, 0); tp->snd_ssthresh = 0x7fffffff; tp->snd_cwnd_cnt = 0; tcp_set_ca_state(tp, TCP_CA_Open); diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 5506944b7e7c..e0f8a7664f7e 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -70,14 +70,14 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_rto = (1000000*tp->rto)/HZ; info->tcpi_ato = (1000000*tp->ack.ato)/HZ; - info->tcpi_snd_mss = tp->mss_cache; + info->tcpi_snd_mss = tp->mss_cache_std; info->tcpi_rcv_mss = tp->ack.rcv_mss; - info->tcpi_unacked = tp->packets_out; - info->tcpi_sacked = tp->sacked_out; - info->tcpi_lost = tp->lost_out; - info->tcpi_retrans = tp->retrans_out; - info->tcpi_fackets = tp->fackets_out; + info->tcpi_unacked = tcp_get_pcount(&tp->packets_out); + info->tcpi_sacked = tcp_get_pcount(&tp->sacked_out); + info->tcpi_lost = tcp_get_pcount(&tp->lost_out); + info->tcpi_retrans = tcp_get_pcount(&tp->retrans_out); + info->tcpi_fackets = tcp_get_pcount(&tp->fackets_out); info->tcpi_last_data_sent = ((now - tp->lsndtime)*1000)/HZ; info->tcpi_last_data_recv = ((now - tp->ack.lrcvtime)*1000)/HZ; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 85643472b84d..d7fb3cde4f20 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -897,7 +897,9 @@ static void tcp_update_reordering(struct tcp_opt *tp, int metric, int ts) #if FASTRETRANS_DEBUG > 1 printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n", tp->sack_ok, tp->ca_state, - tp->reordering, tp->fackets_out, tp->sacked_out, + tp->reordering, + tcp_get_pcount(&tp->fackets_out), + tcp_get_pcount(&tp->sacked_out), tp->undo_marker ? tp->undo_retrans : 0); #endif /* Disable FACK yet. */ @@ -960,7 +962,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked; struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2); int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; - int reord = tp->packets_out; + int reord = tcp_get_pcount(&tp->packets_out); int prior_fackets; u32 lost_retrans = 0; int flag = 0; @@ -974,9 +976,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ tp->mss_cache = tp->mss_cache_std; } - if (!tp->sacked_out) - tp->fackets_out = 0; - prior_fackets = tp->fackets_out; + if (!tcp_get_pcount(&tp->sacked_out)) + tcp_set_pcount(&tp->fackets_out, 0); + prior_fackets = tcp_get_pcount(&tp->fackets_out); for (i=0; i<num_sacks; i++, sp++) { struct sk_buff *skb; @@ -1074,8 +1076,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ */ if (sacked & TCPCB_LOST) { TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); - tp->lost_out--; - tp->retrans_out--; + tcp_dec_pcount(&tp->lost_out, skb); + tcp_dec_pcount(&tp->retrans_out, skb); } } else { /* New sack for not retransmitted frame, @@ -1087,16 +1089,16 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ if (sacked & TCPCB_LOST) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; - tp->lost_out--; + tcp_dec_pcount(&tp->lost_out, skb); } } TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED; flag |= FLAG_DATA_SACKED; - tp->sacked_out++; + tcp_inc_pcount(&tp->sacked_out, skb); - if (fack_count > tp->fackets_out) - tp->fackets_out = fack_count; + if (fack_count > tcp_get_pcount(&tp->fackets_out)) + tcp_set_pcount(&tp->fackets_out, fack_count); } else { if (dup_sack && (sacked&TCPCB_RETRANS)) reord = min(fack_count, reord); @@ -1110,7 +1112,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ if (dup_sack && (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tp->retrans_out--; + tcp_dec_pcount(&tp->retrans_out, skb); } } } @@ -1134,12 +1136,12 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ (IsFack(tp) || !before(lost_retrans, TCP_SKB_CB(skb)->ack_seq + tp->reordering * - tp->mss_cache))) { + tp->mss_cache_std))) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tp->retrans_out--; + tcp_dec_pcount(&tp->retrans_out, skb); if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) { - tp->lost_out++; + tcp_inc_pcount(&tp->lost_out, skb); TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; flag |= FLAG_DATA_SACKED; NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT); @@ -1148,15 +1150,20 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ } } - tp->left_out = tp->sacked_out + tp->lost_out; + tcp_set_pcount(&tp->left_out, + (tcp_get_pcount(&tp->sacked_out) + + tcp_get_pcount(&tp->lost_out))); - if (reord < tp->fackets_out && tp->ca_state != TCP_CA_Loss) - tcp_update_reordering(tp, (tp->fackets_out + 1) - reord, 0); + if ((reord < tcp_get_pcount(&tp->fackets_out)) && + tp->ca_state != TCP_CA_Loss) + tcp_update_reordering(tp, + ((tcp_get_pcount(&tp->fackets_out) + 1) - + reord), 0); #if FASTRETRANS_DEBUG > 0 - BUG_TRAP((int)tp->sacked_out >= 0); - BUG_TRAP((int)tp->lost_out >= 0); - BUG_TRAP((int)tp->retrans_out >= 0); + BUG_TRAP((int)tcp_get_pcount(&tp->sacked_out) >= 0); + BUG_TRAP((int)tcp_get_pcount(&tp->lost_out) >= 0); + BUG_TRAP((int)tcp_get_pcount(&tp->retrans_out) >= 0); BUG_TRAP((int)tcp_packets_in_flight(tp) >= 0); #endif return flag; @@ -1186,7 +1193,7 @@ void tcp_enter_frto(struct sock *sk) * If something was really lost, it is eventually caught up * in tcp_enter_frto_loss. */ - tp->retrans_out = 0; + tcp_set_pcount(&tp->retrans_out, 0); tp->undo_marker = tp->snd_una; tp->undo_retrans = 0; @@ -1209,26 +1216,26 @@ static void tcp_enter_frto_loss(struct sock *sk) struct sk_buff *skb; int cnt = 0; - tp->sacked_out = 0; - tp->lost_out = 0; - tp->fackets_out = 0; + tcp_set_pcount(&tp->sacked_out, 0); + tcp_set_pcount(&tp->lost_out, 0); + tcp_set_pcount(&tp->fackets_out, 0); sk_stream_for_retrans_queue(skb, sk) { - cnt++; + cnt += TCP_SKB_CB(skb)->tso_factor;; TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) { /* Do not mark those segments lost that were * forward transmitted after RTO */ - if(!after(TCP_SKB_CB(skb)->end_seq, + if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out++; + tcp_inc_pcount(&tp->lost_out, skb); } } else { - tp->sacked_out++; - tp->fackets_out = cnt; + tcp_inc_pcount(&tp->sacked_out, skb); + tcp_set_pcount(&tp->fackets_out, cnt); } } tcp_sync_left_out(tp); @@ -1250,12 +1257,12 @@ static void tcp_enter_frto_loss(struct sock *sk) void tcp_clear_retrans(struct tcp_opt *tp) { - tp->left_out = 0; - tp->retrans_out = 0; + tcp_set_pcount(&tp->left_out, 0); + tcp_set_pcount(&tp->retrans_out, 0); - tp->fackets_out = 0; - tp->sacked_out = 0; - tp->lost_out = 0; + tcp_set_pcount(&tp->fackets_out, 0); + tcp_set_pcount(&tp->sacked_out, 0); + tcp_set_pcount(&tp->lost_out, 0); tp->undo_marker = 0; tp->undo_retrans = 0; @@ -1289,17 +1296,17 @@ void tcp_enter_loss(struct sock *sk, int how) tp->undo_marker = tp->snd_una; sk_stream_for_retrans_queue(skb, sk) { - cnt++; + cnt += TCP_SKB_CB(skb)->tso_factor; if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS) tp->undo_marker = 0; TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED; if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out++; + tcp_inc_pcount(&tp->lost_out, skb); } else { - tp->sacked_out++; - tp->fackets_out = cnt; + tcp_inc_pcount(&tp->sacked_out, skb); + tcp_set_pcount(&tp->fackets_out, cnt); } } tcp_sync_left_out(tp); @@ -1336,7 +1343,8 @@ static int tcp_check_sack_reneging(struct sock *sk, struct tcp_opt *tp) static inline int tcp_fackets_out(struct tcp_opt *tp) { - return IsReno(tp) ? tp->sacked_out+1 : tp->fackets_out; + return IsReno(tp) ? tcp_get_pcount(&tp->sacked_out)+1 : + tcp_get_pcount(&tp->fackets_out); } static inline int tcp_skb_timedout(struct tcp_opt *tp, struct sk_buff *skb) @@ -1346,7 +1354,7 @@ static inline int tcp_skb_timedout(struct tcp_opt *tp, struct sk_buff *skb) static inline int tcp_head_timedout(struct sock *sk, struct tcp_opt *tp) { - return tp->packets_out && + return tcp_get_pcount(&tp->packets_out) && tcp_skb_timedout(tp, skb_peek(&sk->sk_write_queue)); } @@ -1446,8 +1454,10 @@ static inline int tcp_head_timedout(struct sock *sk, struct tcp_opt *tp) static int tcp_time_to_recover(struct sock *sk, struct tcp_opt *tp) { + __u32 packets_out; + /* Trick#1: The loss is proven. */ - if (tp->lost_out) + if (tcp_get_pcount(&tp->lost_out)) return 1; /* Not-A-Trick#2 : Classic rule... */ @@ -1463,8 +1473,9 @@ tcp_time_to_recover(struct sock *sk, struct tcp_opt *tp) /* Trick#4: It is still not OK... But will it be useful to delay * recovery more? */ - if (tp->packets_out <= tp->reordering && - tp->sacked_out >= max_t(__u32, tp->packets_out/2, sysctl_tcp_reordering) && + packets_out = tcp_get_pcount(&tp->packets_out); + if (packets_out <= tp->reordering && + tcp_get_pcount(&tp->sacked_out) >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) && !tcp_may_send_now(sk, tp)) { /* We have nothing to send. This connection is limited * either by receiver window or by application. @@ -1483,12 +1494,16 @@ static void tcp_check_reno_reordering(struct tcp_opt *tp, int addend) { u32 holes; - holes = max(tp->lost_out, 1U); - holes = min(holes, tp->packets_out); + holes = max(tcp_get_pcount(&tp->lost_out), 1U); + holes = min(holes, tcp_get_pcount(&tp->packets_out)); - if (tp->sacked_out + holes > tp->packets_out) { - tp->sacked_out = tp->packets_out - holes; - tcp_update_reordering(tp, tp->packets_out+addend, 0); + if ((tcp_get_pcount(&tp->sacked_out) + holes) > + tcp_get_pcount(&tp->packets_out)) { + tcp_set_pcount(&tp->sacked_out, + (tcp_get_pcount(&tp->packets_out) - holes)); + tcp_update_reordering(tp, + tcp_get_pcount(&tp->packets_out)+addend, + 0); } } @@ -1496,7 +1511,7 @@ static void tcp_check_reno_reordering(struct tcp_opt *tp, int addend) static void tcp_add_reno_sack(struct tcp_opt *tp) { - ++tp->sacked_out; + tcp_inc_pcount_explicit(&tp->sacked_out, 1); tcp_check_reno_reordering(tp, 0); tcp_sync_left_out(tp); } @@ -1507,10 +1522,10 @@ static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_opt *tp, int acked { if (acked > 0) { /* One ACK acked hole. The rest eat duplicate ACKs. */ - if (acked-1 >= tp->sacked_out) - tp->sacked_out = 0; + if (acked-1 >= tcp_get_pcount(&tp->sacked_out)) + tcp_set_pcount(&tp->sacked_out, 0); else - tp->sacked_out -= acked-1; + tcp_dec_pcount_explicit(&tp->sacked_out, acked-1); } tcp_check_reno_reordering(tp, acked); tcp_sync_left_out(tp); @@ -1518,8 +1533,8 @@ static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_opt *tp, int acked static inline void tcp_reset_reno_sack(struct tcp_opt *tp) { - tp->sacked_out = 0; - tp->left_out = tp->lost_out; + tcp_set_pcount(&tp->sacked_out, 0); + tcp_set_pcount(&tp->left_out, tcp_get_pcount(&tp->lost_out)); } /* Mark head of queue up as lost. */ @@ -1529,14 +1544,15 @@ tcp_mark_head_lost(struct sock *sk, struct tcp_opt *tp, int packets, u32 high_se struct sk_buff *skb; int cnt = packets; - BUG_TRAP(cnt <= tp->packets_out); + BUG_TRAP(cnt <= tcp_get_pcount(&tp->packets_out)); sk_stream_for_retrans_queue(skb, sk) { - if (--cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq)) + cnt -= TCP_SKB_CB(skb)->tso_factor; + if (cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq)) break; if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out++; + tcp_inc_pcount(&tp->lost_out, skb); } } tcp_sync_left_out(tp); @@ -1547,7 +1563,7 @@ tcp_mark_head_lost(struct sock *sk, struct tcp_opt *tp, int packets, u32 high_se static void tcp_update_scoreboard(struct sock *sk, struct tcp_opt *tp) { if (IsFack(tp)) { - int lost = tp->fackets_out - tp->reordering; + int lost = tcp_get_pcount(&tp->fackets_out) - tp->reordering; if (lost <= 0) lost = 1; tcp_mark_head_lost(sk, tp, lost, tp->high_seq); @@ -1567,7 +1583,7 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_opt *tp) if (tcp_skb_timedout(tp, skb) && !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out++; + tcp_inc_pcount(&tp->lost_out, skb); } } tcp_sync_left_out(tp); @@ -1632,8 +1648,9 @@ static void DBGUNDO(struct sock *sk, struct tcp_opt *tp, const char *msg) printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n", msg, NIPQUAD(inet->daddr), ntohs(inet->dport), - tp->snd_cwnd, tp->left_out, - tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out); + tp->snd_cwnd, tcp_get_pcount(&tp->left_out), + tp->snd_ssthresh, tp->prior_ssthresh, + tcp_get_pcount(&tp->packets_out)); } #else #define DBGUNDO(x...) do { } while (0) @@ -1703,13 +1720,13 @@ static void tcp_try_undo_dsack(struct sock *sk, struct tcp_opt *tp) static int tcp_try_undo_partial(struct sock *sk, struct tcp_opt *tp, int acked) { /* Partial ACK arrived. Force Hoe's retransmit. */ - int failed = IsReno(tp) || tp->fackets_out>tp->reordering; + int failed = IsReno(tp) || tcp_get_pcount(&tp->fackets_out)>tp->reordering; if (tcp_may_undo(tp)) { /* Plain luck! Hole if filled with delayed * packet, rather than with a retransmit. */ - if (tp->retrans_out == 0) + if (tcp_get_pcount(&tp->retrans_out) == 0) tp->retrans_stamp = 0; tcp_update_reordering(tp, tcp_fackets_out(tp)+acked, 1); @@ -1736,8 +1753,8 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_opt *tp) TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; } DBGUNDO(sk, tp, "partial loss"); - tp->lost_out = 0; - tp->left_out = tp->sacked_out; + tcp_set_pcount(&tp->lost_out, 0); + tcp_set_pcount(&tp->left_out, tcp_get_pcount(&tp->sacked_out)); tcp_undo_cwr(tp, 1); NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO); tp->retransmits = 0; @@ -1760,9 +1777,9 @@ static __inline__ void tcp_complete_cwr(struct tcp_opt *tp) static void tcp_try_to_open(struct sock *sk, struct tcp_opt *tp, int flag) { - tp->left_out = tp->sacked_out; + tcp_set_pcount(&tp->left_out, tcp_get_pcount(&tp->sacked_out)); - if (tp->retrans_out == 0) + if (tcp_get_pcount(&tp->retrans_out) == 0) tp->retrans_stamp = 0; if (flag&FLAG_ECE) @@ -1771,8 +1788,8 @@ static void tcp_try_to_open(struct sock *sk, struct tcp_opt *tp, int flag) if (tp->ca_state != TCP_CA_CWR) { int state = TCP_CA_Open; - if (tp->left_out || - tp->retrans_out || + if (tcp_get_pcount(&tp->left_out) || + tcp_get_pcount(&tp->retrans_out) || tp->undo_marker) state = TCP_CA_Disorder; @@ -1806,11 +1823,11 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, /* Some technical things: * 1. Reno does not count dupacks (sacked_out) automatically. */ - if (!tp->packets_out) - tp->sacked_out = 0; + if (!tcp_get_pcount(&tp->packets_out)) + tcp_set_pcount(&tp->sacked_out, 0); /* 2. SACK counts snd_fack in packets inaccurately. */ - if (tp->sacked_out == 0) - tp->fackets_out = 0; + if (tcp_get_pcount(&tp->sacked_out) == 0) + tcp_set_pcount(&tp->fackets_out, 0); /* Now state machine starts. * A. ECE, hence prohibit cwnd undoing, the reduction is required. */ @@ -1818,15 +1835,15 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, tp->prior_ssthresh = 0; /* B. In all the states check for reneging SACKs. */ - if (tp->sacked_out && tcp_check_sack_reneging(sk, tp)) + if (tcp_get_pcount(&tp->sacked_out) && tcp_check_sack_reneging(sk, tp)) return; /* C. Process data loss notification, provided it is valid. */ if ((flag&FLAG_DATA_LOST) && before(tp->snd_una, tp->high_seq) && tp->ca_state != TCP_CA_Open && - tp->fackets_out > tp->reordering) { - tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq); + tcp_get_pcount(&tp->fackets_out) > tp->reordering) { + tcp_mark_head_lost(sk, tp, tcp_get_pcount(&tp->fackets_out)-tp->reordering, tp->high_seq); NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); } @@ -1837,7 +1854,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, * when high_seq is ACKed. */ if (tp->ca_state == TCP_CA_Open) { if (!sysctl_tcp_frto) - BUG_TRAP(tp->retrans_out == 0); + BUG_TRAP(tcp_get_pcount(&tp->retrans_out) == 0); tp->retrans_stamp = 0; } else if (!before(tp->snd_una, tp->high_seq)) { switch (tp->ca_state) { @@ -1884,7 +1901,8 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, if (IsReno(tp) && is_dupack) tcp_add_reno_sack(tp); } else { - int acked = prior_packets - tp->packets_out; + int acked = prior_packets - + tcp_get_pcount(&tp->packets_out); if (IsReno(tp)) tcp_remove_reno_sacks(sk, tp, acked); is_dupack = tcp_try_undo_partial(sk, tp, acked); @@ -1927,7 +1945,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, tp->high_seq = tp->snd_nxt; tp->prior_ssthresh = 0; tp->undo_marker = tp->snd_una; - tp->undo_retrans = tp->retrans_out; + tp->undo_retrans = tcp_get_pcount(&tp->retrans_out); if (tp->ca_state < TCP_CA_CWR) { if (!(flag&FLAG_ECE)) @@ -2156,7 +2174,7 @@ static void vegas_cong_avoid(struct tcp_opt *tp, u32 ack, u32 seq_rtt) * is the cwnd during the previous RTT. */ old_wnd = (tp->vegas.beg_snd_nxt - tp->vegas.beg_snd_una) / - tp->mss_cache; + tp->mss_cache_std; old_snd_cwnd = tp->vegas.beg_snd_cwnd; /* Save the extent of the current window so we can use this @@ -2327,7 +2345,7 @@ static inline void tcp_cong_avoid(struct tcp_opt *tp, u32 ack, u32 seq_rtt) static __inline__ void tcp_ack_packets_out(struct sock *sk, struct tcp_opt *tp) { - if (tp->packets_out==0) { + if (!tcp_get_pcount(&tp->packets_out)) { tcp_clear_xmit_timer(sk, TCP_TIME_RETRANS); } else { tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); @@ -2343,7 +2361,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) int acked = 0; __s32 seq_rtt = -1; - while ((skb = skb_peek(&sk->sk_write_queue)) && skb != sk->sk_send_head) { + while ((skb = skb_peek(&sk->sk_write_queue)) && + skb != sk->sk_send_head) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); __u8 sacked = scb->sacked; @@ -2361,7 +2380,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) * connection startup slow start one packet too * quickly. This is severely frowned upon behavior. */ - if(!(scb->flags & TCPCB_FLAG_SYN)) { + if (!(scb->flags & TCPCB_FLAG_SYN)) { acked |= FLAG_DATA_ACKED; } else { acked |= FLAG_SYN_ACKED; @@ -2369,27 +2388,26 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) } if (sacked) { - if(sacked & TCPCB_RETRANS) { + if (sacked & TCPCB_RETRANS) { if(sacked & TCPCB_SACKED_RETRANS) - tp->retrans_out--; + tcp_dec_pcount(&tp->retrans_out, skb); acked |= FLAG_RETRANS_DATA_ACKED; seq_rtt = -1; } else if (seq_rtt < 0) seq_rtt = now - scb->when; - if(sacked & TCPCB_SACKED_ACKED) - tp->sacked_out--; - if(sacked & TCPCB_LOST) - tp->lost_out--; - if(sacked & TCPCB_URG) { + if (sacked & TCPCB_SACKED_ACKED) + tcp_dec_pcount(&tp->sacked_out, skb); + if (sacked & TCPCB_LOST) + tcp_dec_pcount(&tp->lost_out, skb); + if (sacked & TCPCB_URG) { if (tp->urg_mode && !before(scb->end_seq, tp->snd_up)) tp->urg_mode = 0; } } else if (seq_rtt < 0) seq_rtt = now - scb->when; - if (tp->fackets_out) - tp->fackets_out--; - tp->packets_out--; + tcp_dec_pcount_approx(&tp->fackets_out, skb); + tcp_packets_out_dec(tp, skb); __skb_unlink(skb, skb->list); sk_stream_free_skb(sk, skb); } @@ -2400,24 +2418,27 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) } #if FASTRETRANS_DEBUG > 0 - BUG_TRAP((int)tp->sacked_out >= 0); - BUG_TRAP((int)tp->lost_out >= 0); - BUG_TRAP((int)tp->retrans_out >= 0); - if (!tp->packets_out && tp->sack_ok) { - if (tp->lost_out) { - printk(KERN_DEBUG "Leak l=%u %d\n", tp->lost_out, - tp->ca_state); - tp->lost_out = 0; + BUG_TRAP((int)tcp_get_pcount(&tp->sacked_out) >= 0); + BUG_TRAP((int)tcp_get_pcount(&tp->lost_out) >= 0); + BUG_TRAP((int)tcp_get_pcount(&tp->retrans_out) >= 0); + if (!tcp_get_pcount(&tp->packets_out) && tp->sack_ok) { + if (tcp_get_pcount(&tp->lost_out)) { + printk(KERN_DEBUG "Leak l=%u %d\n", + tcp_get_pcount(&tp->lost_out), + tp->ca_state); + tcp_set_pcount(&tp->lost_out, 0); } - if (tp->sacked_out) { - printk(KERN_DEBUG "Leak s=%u %d\n", tp->sacked_out, - tp->ca_state); - tp->sacked_out = 0; + if (tcp_get_pcount(&tp->sacked_out)) { + printk(KERN_DEBUG "Leak s=%u %d\n", + tcp_get_pcount(&tp->sacked_out), + tp->ca_state); + tcp_set_pcount(&tp->sacked_out, 0); } - if (tp->retrans_out) { - printk(KERN_DEBUG "Leak r=%u %d\n", tp->retrans_out, - tp->ca_state); - tp->retrans_out = 0; + if (tcp_get_pcount(&tp->retrans_out)) { + printk(KERN_DEBUG "Leak r=%u %d\n", + tcp_get_pcount(&tp->retrans_out), + tp->ca_state); + tcp_set_pcount(&tp->retrans_out, 0); } } #endif @@ -2712,19 +2733,19 @@ static void westwood_dupack_update(struct sock *sk) { struct tcp_opt *tp = tcp_sk(sk); - tp->westwood.accounted += tp->mss_cache; - tp->westwood.cumul_ack = tp->mss_cache; + tp->westwood.accounted += tp->mss_cache_std; + tp->westwood.cumul_ack = tp->mss_cache_std; } static inline int westwood_may_change_cumul(struct tcp_opt *tp) { - return ((tp->westwood.cumul_ack) > tp->mss_cache); + return ((tp->westwood.cumul_ack) > tp->mss_cache_std); } static inline void westwood_partial_update(struct tcp_opt *tp) { tp->westwood.accounted -= tp->westwood.cumul_ack; - tp->westwood.cumul_ack = tp->mss_cache; + tp->westwood.cumul_ack = tp->mss_cache_std; } static inline void westwood_complete_update(struct tcp_opt *tp) @@ -2835,7 +2856,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) */ sk->sk_err_soft = 0; tp->rcv_tstamp = tcp_time_stamp; - prior_packets = tp->packets_out; + prior_packets = tcp_get_pcount(&tp->packets_out); if (!prior_packets) goto no_queue; @@ -3857,11 +3878,11 @@ static void tcp_new_space(struct sock *sk) { struct tcp_opt *tp = tcp_sk(sk); - if (tp->packets_out < tp->snd_cwnd && + if (tcp_get_pcount(&tp->packets_out) < tp->snd_cwnd && !(sk->sk_userlocks & SOCK_SNDBUF_LOCK) && !tcp_memory_pressure && atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { - int sndmem = max_t(u32, tp->mss_clamp, tp->mss_cache) + + int sndmem = max_t(u32, tp->mss_clamp, tp->mss_cache_std) + MAX_TCP_HEADER + 16 + sizeof(struct sk_buff), demanded = max_t(unsigned int, tp->snd_cwnd, tp->reordering + 1); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2cfd74fbf566..73f12904c7c3 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2075,7 +2075,7 @@ static int tcp_v4_init_sock(struct sock *sk) */ tp->snd_ssthresh = 0x7fffffff; /* Infinity */ tp->snd_cwnd_clamp = ~0; - tp->mss_cache = 536; + tp->mss_cache_std = tp->mss_cache = 536; tp->reordering = sysctl_tcp_reordering; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 79c1884c2b8b..ab04144245e5 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -752,11 +752,11 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, newtp->mdev = TCP_TIMEOUT_INIT; newtp->rto = TCP_TIMEOUT_INIT; - newtp->packets_out = 0; - newtp->left_out = 0; - newtp->retrans_out = 0; - newtp->sacked_out = 0; - newtp->fackets_out = 0; + tcp_set_pcount(&newtp->packets_out, 0); + tcp_set_pcount(&newtp->left_out, 0); + tcp_set_pcount(&newtp->retrans_out, 0); + tcp_set_pcount(&newtp->sacked_out, 0); + tcp_set_pcount(&newtp->fackets_out, 0); newtp->snd_ssthresh = 0x7fffffff; /* So many TCP implementations out there (incorrectly) count the diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index bd3d0133f724..32174549304e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -52,8 +52,7 @@ void update_send_head(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb) if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue) sk->sk_send_head = NULL; tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; - if (tp->packets_out++ == 0) - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); + tcp_packets_out_inc(sk, tp, skb); } /* SND.NXT, if window was not shrunk. @@ -123,7 +122,8 @@ static __inline__ void tcp_event_data_sent(struct tcp_opt *tp, struct sk_buff *s { u32 now = tcp_time_stamp; - if (!tp->packets_out && (s32)(now - tp->lsndtime) > tp->rto) + if (!tcp_get_pcount(&tp->packets_out) && + (s32)(now - tp->lsndtime) > tp->rto) tcp_cwnd_restart(tp, __sk_dst_get(sk)); tp->lsndtime = now; @@ -259,7 +259,7 @@ static __inline__ u16 tcp_select_window(struct sock *sk) */ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) { - if(skb != NULL) { + if (skb != NULL) { struct inet_opt *inet = inet_sk(sk); struct tcp_opt *tp = tcp_sk(sk); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); @@ -268,6 +268,8 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb) int sysctl_flags; int err; + BUG_ON(!TCP_SKB_CB(skb)->tso_factor); + #define SYSCTL_FLAG_TSTAMPS 0x1 #define SYSCTL_FLAG_WSCALE 0x2 #define SYSCTL_FLAG_SACK 0x4 @@ -414,13 +416,29 @@ void tcp_push_one(struct sock *sk, unsigned cur_mss) if (!tcp_transmit_skb(sk, skb_clone(skb, sk->sk_allocation))) { sk->sk_send_head = NULL; tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; - if (tp->packets_out++ == 0) - tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); + tcp_packets_out_inc(sk, tp, skb); return; } } } +void tcp_set_skb_tso_factor(struct sk_buff *skb, unsigned int mss, + unsigned int mss_std) +{ + if (skb->len <= mss_std) { + /* Avoid the costly divide in the normal + * non-TSO case. + */ + TCP_SKB_CB(skb)->tso_factor = 1; + } else { + unsigned int factor; + + factor = skb->len + (mss_std - 1); + factor /= mss; + TCP_SKB_CB(skb)->tso_factor = factor; + } +} + /* Function to create two new TCP segments. Shrinks the given segment * to the specified size and appends a new segment with the rest of the * packet to the list. This won't be called frequently, I hope. @@ -453,10 +471,12 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len) flags = TCP_SKB_CB(skb)->flags; TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); TCP_SKB_CB(buff)->flags = flags; - TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_EVER_RETRANS|TCPCB_AT_TAIL); + TCP_SKB_CB(buff)->sacked = + (TCP_SKB_CB(skb)->sacked & + (TCPCB_LOST | TCPCB_EVER_RETRANS | TCPCB_AT_TAIL)); if (TCP_SKB_CB(buff)->sacked&TCPCB_LOST) { - tp->lost_out++; - tp->left_out++; + tcp_inc_pcount(&tp->lost_out, buff); + tcp_inc_pcount(&tp->left_out, buff); } TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL; @@ -480,6 +500,10 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len) */ TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when; + /* Fix up tso_factor for both original and new SKB. */ + tcp_set_skb_tso_factor(skb, tp->mss_cache, tp->mss_cache_std); + tcp_set_skb_tso_factor(buff, tp->mss_cache, tp->mss_cache_std); + /* Link BUFF into the send queue. */ __skb_append(skb, buff); @@ -596,7 +620,7 @@ int tcp_sync_mss(struct sock *sk, u32 pmtu) tp->mss_cache = tp->mss_cache_std = mss_now; if (sk->sk_route_caps & NETIF_F_TSO) { - int large_mss; + int large_mss, factor; large_mss = 65535 - tp->af_specific->net_header_len - tp->ext_header_len - tp->ext2_header_len - tp->tcp_header_len; @@ -604,8 +628,14 @@ int tcp_sync_mss(struct sock *sk, u32 pmtu) if (tp->max_window && large_mss > (tp->max_window>>1)) large_mss = max((tp->max_window>>1), 68U - tp->tcp_header_len); - /* Always keep large mss multiple of real mss. */ - tp->mss_cache = mss_now*(large_mss/mss_now); + /* Always keep large mss multiple of real mss, but + * do not exceed congestion window. + */ + factor = large_mss / mss_now; + if (factor > tp->snd_cwnd) + factor = tp->snd_cwnd; + + tp->mss_cache = mss_now * factor; } return mss_now; @@ -662,7 +692,7 @@ int tcp_write_xmit(struct sock *sk, int nonagle) return 0; } - return !tp->packets_out && sk->sk_send_head; + return !tcp_get_pcount(&tp->packets_out) && sk->sk_send_head; } return 0; } @@ -788,7 +818,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m /* The first test we must make is that neither of these two * SKB's are still referenced by someone else. */ - if(!skb_cloned(skb) && !skb_cloned(next_skb)) { + if (!skb_cloned(skb) && !skb_cloned(next_skb)) { int skb_size = skb->len, next_skb_size = next_skb->len; u16 flags = TCP_SKB_CB(skb)->flags; @@ -831,24 +861,23 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m */ TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked&(TCPCB_EVER_RETRANS|TCPCB_AT_TAIL); if (TCP_SKB_CB(next_skb)->sacked&TCPCB_SACKED_RETRANS) - tp->retrans_out--; + tcp_dec_pcount(&tp->retrans_out, next_skb); if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST) { - tp->lost_out--; - tp->left_out--; + tcp_dec_pcount(&tp->lost_out, next_skb); + tcp_dec_pcount(&tp->left_out, next_skb); } /* Reno case is special. Sigh... */ - if (!tp->sack_ok && tp->sacked_out) { - tp->sacked_out--; - tp->left_out--; + if (!tp->sack_ok && tcp_get_pcount(&tp->sacked_out)) { + tcp_dec_pcount_approx(&tp->sacked_out, next_skb); + tcp_dec_pcount(&tp->left_out, next_skb); } /* Not quite right: it can be > snd.fack, but * it is better to underestimate fackets. */ - if (tp->fackets_out) - tp->fackets_out--; + tcp_dec_pcount_approx(&tp->fackets_out, next_skb); + tcp_packets_out_dec(tp, next_skb); sk_stream_free_skb(sk, next_skb); - tp->packets_out--; } } @@ -868,11 +897,11 @@ void tcp_simple_retransmit(struct sock *sk) !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) { if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tp->retrans_out--; + tcp_dec_pcount(&tp->retrans_out, skb); } if (!(TCP_SKB_CB(skb)->sacked&TCPCB_LOST)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tp->lost_out++; + tcp_inc_pcount(&tp->lost_out, skb); lost = 1; } } @@ -938,12 +967,12 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) && TCP_SKB_CB(skb)->seq != tp->snd_una) return -EAGAIN; - if(skb->len > cur_mss) { - if(tcp_fragment(sk, skb, cur_mss)) + if (skb->len > cur_mss) { + if (tcp_fragment(sk, skb, cur_mss)) return -ENOMEM; /* We'll try again later. */ /* New SKB created, account for it. */ - tp->packets_out++; + tcp_inc_pcount(&tp->packets_out, skb); } /* Collapse two adjacent packets if worthwhile and we can. */ @@ -992,7 +1021,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) } #endif TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS; - tp->retrans_out++; + tcp_inc_pcount(&tp->retrans_out, skb); /* Save stamp of the first retransmit. */ if (!tp->retrans_stamp) @@ -1020,14 +1049,18 @@ void tcp_xmit_retransmit_queue(struct sock *sk) { struct tcp_opt *tp = tcp_sk(sk); struct sk_buff *skb; - int packet_cnt = tp->lost_out; + int packet_cnt = tcp_get_pcount(&tp->lost_out); /* First pass: retransmit lost packets. */ if (packet_cnt) { sk_stream_for_retrans_queue(skb, sk) { __u8 sacked = TCP_SKB_CB(skb)->sacked; + int pkts = TCP_SKB_CB(skb)->tso_factor; + + BUG_ON(!pkts); - if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) + if ((tcp_packets_in_flight(tp) + (pkts-1)) >= + tp->snd_cwnd) return; if (sacked&TCPCB_LOST) { @@ -1044,7 +1077,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk) tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); } - if (--packet_cnt <= 0) + packet_cnt -= TCP_SKB_CB(skb)->tso_factor; + if (packet_cnt <= 0) break; } } @@ -1073,17 +1107,22 @@ void tcp_xmit_retransmit_queue(struct sock *sk) packet_cnt = 0; sk_stream_for_retrans_queue(skb, sk) { - if(++packet_cnt > tp->fackets_out) + int pkts = TCP_SKB_CB(skb)->tso_factor; + + BUG_ON(!pkts); + + packet_cnt += pkts; + if (packet_cnt > tcp_get_pcount(&tp->fackets_out)) break; - if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) + if ((tcp_packets_in_flight(tp) + (pkts-1)) >= tp->snd_cwnd) break; - if(TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) + if (TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) continue; /* Ok, retransmit it. */ - if(tcp_retransmit_skb(sk, skb)) + if (tcp_retransmit_skb(sk, skb)) break; if (skb == skb_peek(&sk->sk_write_queue)) @@ -1101,13 +1140,13 @@ void tcp_send_fin(struct sock *sk) { struct tcp_opt *tp = tcp_sk(sk); struct sk_buff *skb = skb_peek_tail(&sk->sk_write_queue); - unsigned int mss_now; + int mss_now; /* Optimization, tack on the FIN if we have a queue of * unsent frames. But be careful about outgoing SACKS * and IP options. */ - mss_now = tcp_current_mss(sk, 1); + mss_now = tcp_current_mss(sk, 1); if (sk->sk_send_head != NULL) { TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN; @@ -1127,6 +1166,7 @@ void tcp_send_fin(struct sock *sk) skb->csum = 0; TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN); TCP_SKB_CB(skb)->sacked = 0; + TCP_SKB_CB(skb)->tso_factor = 1; /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ TCP_SKB_CB(skb)->seq = tp->write_seq; @@ -1158,6 +1198,7 @@ void tcp_send_active_reset(struct sock *sk, int priority) skb->csum = 0; TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST); TCP_SKB_CB(skb)->sacked = 0; + TCP_SKB_CB(skb)->tso_factor = 1; /* Send it off. */ TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp); @@ -1237,6 +1278,8 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, th->dest = req->rmt_port; TCP_SKB_CB(skb)->seq = req->snt_isn; TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1; + TCP_SKB_CB(skb)->sacked = 0; + TCP_SKB_CB(skb)->tso_factor = 1; th->seq = htonl(TCP_SKB_CB(skb)->seq); th->ack_seq = htonl(req->rcv_isn + 1); if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ @@ -1338,6 +1381,7 @@ int tcp_connect(struct sock *sk) TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN; TCP_ECN_send_syn(sk, tp, buff); TCP_SKB_CB(buff)->sacked = 0; + TCP_SKB_CB(buff)->tso_factor = 1; buff->csum = 0; TCP_SKB_CB(buff)->seq = tp->write_seq++; TCP_SKB_CB(buff)->end_seq = tp->write_seq; @@ -1350,7 +1394,7 @@ int tcp_connect(struct sock *sk) tp->retrans_stamp = TCP_SKB_CB(buff)->when; __skb_queue_tail(&sk->sk_write_queue, buff); sk_charge_skb(sk, buff); - tp->packets_out++; + tcp_inc_pcount(&tp->packets_out, buff); tcp_transmit_skb(sk, skb_clone(buff, GFP_KERNEL)); TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); @@ -1437,6 +1481,7 @@ void tcp_send_ack(struct sock *sk) buff->csum = 0; TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK; TCP_SKB_CB(buff)->sacked = 0; + TCP_SKB_CB(buff)->tso_factor = 1; /* Send it off, this clears delayed acks for us. */ TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp); @@ -1471,6 +1516,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) skb->csum = 0; TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK; TCP_SKB_CB(skb)->sacked = urgent; + TCP_SKB_CB(skb)->tso_factor = 1; /* Use a previous sequence. This should cause the other * end to send an ack. Don't queue or clone SKB, just @@ -1491,8 +1537,8 @@ int tcp_write_wakeup(struct sock *sk) if ((skb = sk->sk_send_head) != NULL && before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) { int err; - int mss = tcp_current_mss(sk, 0); - int seg_size = tp->snd_una+tp->snd_wnd-TCP_SKB_CB(skb)->seq; + unsigned int mss = tcp_current_mss(sk, 0); + unsigned int seg_size = tp->snd_una+tp->snd_wnd-TCP_SKB_CB(skb)->seq; if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq)) tp->pushed_seq = TCP_SKB_CB(skb)->end_seq; @@ -1514,7 +1560,9 @@ int tcp_write_wakeup(struct sock *sk) sk->sk_route_caps &= ~NETIF_F_TSO; tp->mss_cache = tp->mss_cache_std; } - } + } else if (!TCP_SKB_CB(skb)->tso_factor) + tcp_set_skb_tso_factor(skb, mss, tp->mss_cache_std); + TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; TCP_SKB_CB(skb)->when = tcp_time_stamp; err = tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC)); @@ -1542,7 +1590,7 @@ void tcp_send_probe0(struct sock *sk) err = tcp_write_wakeup(sk); - if (tp->packets_out || !sk->sk_send_head) { + if (tcp_get_pcount(&tp->packets_out) || !sk->sk_send_head) { /* Cancel probe timer, if it is not required. */ tp->probes_out = 0; tp->backoff = 0; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 72a5a50b50ab..c060bb333471 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -121,7 +121,7 @@ static int tcp_out_of_resources(struct sock *sk, int do_reset) * 1. Last segment was sent recently. */ if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN || /* 2. Window is closed. */ - (!tp->snd_wnd && !tp->packets_out)) + (!tp->snd_wnd && !tcp_get_pcount(&tp->packets_out))) do_reset = 1; if (do_reset) tcp_send_active_reset(sk, GFP_ATOMIC); @@ -269,7 +269,7 @@ static void tcp_probe_timer(struct sock *sk) struct tcp_opt *tp = tcp_sk(sk); int max_probes; - if (tp->packets_out || !sk->sk_send_head) { + if (tcp_get_pcount(&tp->packets_out) || !sk->sk_send_head) { tp->probes_out = 0; return; } @@ -316,7 +316,7 @@ static void tcp_retransmit_timer(struct sock *sk) { struct tcp_opt *tp = tcp_sk(sk); - if (tp->packets_out == 0) + if (!tcp_get_pcount(&tp->packets_out)) goto out; BUG_TRAP(!skb_queue_empty(&sk->sk_write_queue)); @@ -606,7 +606,7 @@ static void tcp_keepalive_timer (unsigned long data) elapsed = keepalive_time_when(tp); /* It is alive without keepalive 8) */ - if (tp->packets_out || sk->sk_send_head) + if (tcp_get_pcount(&tp->packets_out) || sk->sk_send_head) goto resched; elapsed = tcp_time_stamp - tp->rcv_tstamp; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 7150375908a8..d2091c5ce489 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2072,6 +2072,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) neigh_sysctl_unregister(idev->nd_parms); #endif neigh_parms_release(&nd_tbl, idev->nd_parms); + neigh_ifdown(&nd_tbl, dev); in6_dev_put(idev); } return 0; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index b278e5a04ca8..e1f5aeb79258 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -58,6 +58,7 @@ #include <linux/in6.h> #include <linux/route.h> #include <linux/init.h> +#include <linux/rcupdate.h> #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> #endif @@ -284,14 +285,21 @@ static int ndisc_constructor(struct neighbour *neigh) { struct in6_addr *addr = (struct in6_addr*)&neigh->primary_key; struct net_device *dev = neigh->dev; - struct inet6_dev *in6_dev = in6_dev_get(dev); + struct inet6_dev *in6_dev; + struct neigh_parms *parms; int is_multicast = ipv6_addr_is_multicast(addr); - if (in6_dev == NULL) + rcu_read_lock(); + in6_dev = in6_dev_get(dev); + if (in6_dev == NULL) { + rcu_read_unlock(); return -EINVAL; + } - if (in6_dev->nd_parms) - neigh->parms = in6_dev->nd_parms; + parms = in6_dev->nd_parms; + __neigh_parms_put(neigh->parms); + neigh->parms = neigh_parms_clone(parms); + rcu_read_unlock(); neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST; if (dev->hard_header == NULL) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 73b34df7fd17..ebed7e197aac 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1929,7 +1929,7 @@ static int tcp_v6_init_sock(struct sock *sk) */ tp->snd_ssthresh = 0x7fffffff; tp->snd_cwnd_clamp = ~0; - tp->mss_cache = 536; + tp->mss_cache_std = tp->mss_cache = 536; tp->reordering = sysctl_tcp_reordering; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 51d0514fd2a7..1b441a628b71 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -786,11 +786,13 @@ out: static int packet_release(struct socket *sock) { struct sock *sk = sock->sk; - struct packet_opt *po = pkt_sk(sk); + struct packet_opt *po; if (!sk) return 0; + po = pkt_sk(sk); + write_lock_bh(&packet_sklist_lock); sk_del_node_init(sk); write_unlock_bh(&packet_sklist_lock); diff --git a/net/sched/gact.c b/net/sched/gact.c index cd1a58c60485..5607f5e8cd83 100644 --- a/net/sched/gact.c +++ b/net/sched/gact.c @@ -76,7 +76,9 @@ tcf_gact_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a,int ov { struct rtattr *tb[TCA_GACT_MAX]; struct tc_gact *parm = NULL; +#ifdef CONFIG_GACT_PROB struct tc_gact_p *p_parm = NULL; +#endif struct tcf_gact *p = NULL; int ret = 0; int size = sizeof (*p); @@ -176,7 +178,9 @@ tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb->tail; struct tc_gact opt; +#ifdef CONFIG_GACT_PROB struct tc_gact_p p_opt; +#endif struct tcf_gact *p; struct tcf_t t; diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index fe530156875a..ff61f8e698c9 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -714,3 +714,4 @@ static void __exit atm_exit(void) module_init(atm_init) module_exit(atm_exit) +MODULE_LICENSE("GPL"); diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index bae07708eb01..97f66fd770f4 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -81,8 +81,6 @@ static struct sctp_af *sctp_af_v6_specific; kmem_cache_t *sctp_chunk_cachep; kmem_cache_t *sctp_bucket_cachep; -extern struct net_proto_family inet_family_ops; - extern int sctp_snmp_proc_init(void); extern int sctp_snmp_proc_exit(void); extern int sctp_eps_proc_init(void); |
