summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2004-09-07 02:43:04 -0700
committerLinus Torvalds <torvalds@ppc970.osdl.org>2004-09-07 02:43:04 -0700
commitf0975ce872bb3f5f6c86837bc84c1e9dfd76e87c (patch)
tree9578db69ebff882287ff51860ee1c5244332c84e
parent88e1f06c27cc74daececa9e6ad4337cbbfb626a9 (diff)
parent8d34d4fde2dfa67d29298f4bb4c636c3ee1914aa (diff)
Merge bk://kernel.bkbits.net/davem/net-2.6
into ppc970.osdl.org:/home/torvalds/v2.6/linux
-rw-r--r--CREDITS44
-rw-r--r--MAINTAINERS8
-rw-r--r--drivers/net/sungem.c137
-rw-r--r--drivers/net/sungem.h4
-rw-r--r--drivers/s390/net/qeth_main.c21
-rw-r--r--fs/compat_ioctl.c10
-rw-r--r--include/asm-alpha/socket.h16
-rw-r--r--include/asm-arm/socket.h16
-rw-r--r--include/asm-arm26/socket.h16
-rw-r--r--include/asm-cris/socket.h15
-rw-r--r--include/asm-h8300/socket.h16
-rw-r--r--include/asm-i386/socket.h16
-rw-r--r--include/asm-ia64/socket.h16
-rw-r--r--include/asm-m68k/socket.h16
-rw-r--r--include/asm-mips/socket.h16
-rw-r--r--include/asm-parisc/socket.h14
-rw-r--r--include/asm-ppc/socket.h16
-rw-r--r--include/asm-ppc64/socket.h16
-rw-r--r--include/asm-s390/socket.h16
-rw-r--r--include/asm-sh/socket.h16
-rw-r--r--include/asm-sparc/socket.h16
-rw-r--r--include/asm-sparc64/socket.h16
-rw-r--r--include/asm-v850/socket.h16
-rw-r--r--include/asm-x86_64/socket.h16
-rw-r--r--include/linux/net.h21
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_protocol.h4
-rw-r--r--include/linux/tcp.h16
-rw-r--r--include/net/neighbour.h20
-rw-r--r--include/net/pkt_act.h10
-rw-r--r--include/net/tcp.h116
-rw-r--r--net/Makefile3
-rw-r--r--net/atm/clip.c19
-rw-r--r--net/ax25/af_ax25.c8
-rw-r--r--net/bridge/br_notify.c4
-rw-r--r--net/compat.c6
-rw-r--r--net/core/dev.c32
-rw-r--r--net/core/dv.c3
-rw-r--r--net/core/neighbour.c24
-rw-r--r--net/decnet/dn_dev.c3
-rw-r--r--net/decnet/dn_neigh.c21
-rw-r--r--net/ipv4/af_inet.c3
-rw-r--r--net/ipv4/arp.c21
-rw-r--r--net/ipv4/devinet.c6
-rw-r--r--net/ipv4/ipvs/ip_vs_xmit.c17
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_sctp.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c21
-rw-r--r--net/ipv4/netfilter/ip_queue.c2
-rw-r--r--net/ipv4/tcp.c2
-rw-r--r--net/ipv4/tcp_diag.c12
-rw-r--r--net/ipv4/tcp_input.c255
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/tcp_minisocks.c10
-rw-r--r--net/ipv4/tcp_output.c134
-rw-r--r--net/ipv4/tcp_timer.c8
-rw-r--r--net/ipv6/addrconf.c1
-rw-r--r--net/ipv6/ndisc.c16
-rw-r--r--net/ipv6/tcp_ipv6.c2
-rw-r--r--net/packet/af_packet.c4
-rw-r--r--net/sched/gact.c4
-rw-r--r--net/sched/sch_atm.c1
-rw-r--r--net/sctp/protocol.c2
61 files changed, 736 insertions, 608 deletions
diff --git a/CREDITS b/CREDITS
index 736a442f2276..bb348015fb77 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1586,6 +1586,15 @@ D: Backport/Forwardport merge monkey.
D: Various Janitor work.
S: United Kingdom
+N: Martin Josfsson
+E: gandalf@wlug.westbo.se
+P: 1024D/F6B6D3B1 7610 7CED 5C34 4AA6 DBA2 8BE1 5A6D AF95 F6B6 D3B1
+D: netfilter: SAME target
+D: netfilter: helper target
+D: netfilter: various other hacks
+S: Ronneby
+S: Sweden
+
N: Ani Joshi
E: ajoshi@shell.unixbox.com
D: fbdev hacking
@@ -1597,6 +1606,17 @@ S: Lemnosvej 1, 3.tv
S: 2300 Copenhagen S
S: Denmark
+N: Jozsef Kadlecsik
+E: kadlec@blackhole.kfki.hu
+P: 1024D/470DB964 4CB3 1A05 713E 9BF7 FAC5 5809 DD8C B7B1 470D B964
+D: netfilter: TCP window tracking code
+D: netfilter: raw table
+D: netfilter: iprange match
+D: netfilter: new logging interfaces
+D: netfilter: various other hacks
+S: Tata
+S: Hungary
+
N: Bernhard Kaindl
E: bkaindl@netway.at
E: edv@bartelt.via.at
@@ -2146,6 +2166,16 @@ D: OV511 driver
S: (address available on request)
S: USA
+N: Patrick McHardy
+E: kaber@trash.net
+P: 1024D/12155E80 B128 7DE6 FF0A C2B2 48BE AB4C C9D4 964E 1215 5E80
+D: netfilter: endless number of bugfixes
+D: netfilter: CLASSIFY target
+D: netfilter: addrtype match
+D: tc: HFSC scheduler
+S: Freiburg
+S: Germany
+
N: Mike McLagan
E: mike.mclagan@linux.org
W: http://www.invlogic.com/~mmclagan
@@ -2821,7 +2851,7 @@ S: Germany
N: Paul `Rusty' Russell
E: rusty@rustcorp.com.au
-W: http://www.samba.org/netfilter
+W: http://ozlabs.org/~rusty
D: Ruggedly handsome.
D: netfilter, ipchains with Michael Neuling.
S: 52 Moore St
@@ -3405,6 +3435,18 @@ S: UC Berkeley
S: Berkeley, CA 94720-1776
S: USA
+N: Harald Welte
+E: laforge@netfilter.org
+P: 1024D/30F48BFF DBDE 6912 8831 9A53 879B 9190 5DA5 C655 30F4 8BFF
+W: http://gnumonks.org/users/laforge
+D: netfilter: new nat helper infrastructure
+D: netfilter: ULOG, ECN, DSCP target
+D: netfilter: TTL match
+D: netfilter: IPv6 mangle table
+D: netfilter: various other hacks
+S: Berlin
+S: Germany
+
N: Bill Wendling
E: wendling@ganymede.isdn.uiuc.edu
W: http://www.ncsa.uiuc.edu/~wendling/
diff --git a/MAINTAINERS b/MAINTAINERS
index e6030768dddd..c1a5296a74c7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1548,6 +1548,14 @@ M: kaber@coreworks.de
L: netdev@oss.sgi.com
S: Maintained
+IPVS
+P: Wensong Zhang
+M: wensong@linux-vs.org
+P: Julian Anastasov
+M: ja@ssi.bg
+L: lvs-users@linuxvirtualserver.org
+S: Maintained
+
NFS CLIENT
P: Trond Myklebust
M: trond.myklebust@fys.uio.no
diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index 72138612d2fb..6177397c30cb 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -181,6 +181,18 @@ static inline void phy_write(struct gem *gp, int reg, u16 val)
__phy_write(gp, gp->mii_phy_addr, reg, val);
}
+static inline void gem_enable_ints(struct gem *gp)
+{
+ /* Enable all interrupts but TXDONE */
+ writel(GREG_STAT_TXDONE, gp->regs + GREG_IMASK);
+}
+
+static inline void gem_disable_ints(struct gem *gp)
+{
+ /* Disable all interrupts, including TXDONE */
+ writel(GREG_STAT_NAPI | GREG_STAT_TXDONE, gp->regs + GREG_IMASK);
+}
+
static void gem_handle_mif_event(struct gem *gp, u32 reg_val, u32 changed_bits)
{
if (netif_msg_intr(gp))
@@ -639,7 +651,7 @@ static __inline__ void gem_tx(struct net_device *dev, struct gem *gp, u32 gem_st
}
gp->net_stats.tx_packets++;
- dev_kfree_skb_irq(skb);
+ dev_kfree_skb(skb);
}
gp->tx_old = entry;
@@ -678,12 +690,12 @@ static __inline__ void gem_post_rxds(struct gem *gp, int limit)
}
}
-static void gem_rx(struct gem *gp)
+static int gem_rx(struct gem *gp, int work_to_do)
{
- int entry, drops;
+ int entry, drops, work_done = 0;
u32 done;
- if (netif_msg_intr(gp))
+ if (netif_msg_rx_status(gp))
printk(KERN_DEBUG "%s: rx interrupt, done: %d, rx_new: %d\n",
gp->dev->name, readl(gp->regs + RXDMA_DONE), gp->rx_new);
@@ -700,6 +712,9 @@ static void gem_rx(struct gem *gp)
if ((status & RXDCTRL_OWN) != 0)
break;
+ if (work_done >= RX_RING_SIZE || work_done >= work_to_do)
+ break;
+
/* When writing back RX descriptor, GEM writes status
* then buffer address, possibly in seperate transactions.
* If we don't wait for the chip to write both, we could
@@ -713,6 +728,9 @@ static void gem_rx(struct gem *gp)
break;
}
+ /* We can now account for the work we're about to do */
+ work_done++;
+
skb = gp->rx_skbs[entry];
len = (status & RXDCTRL_BUFSZ) >> 16;
@@ -775,7 +793,8 @@ static void gem_rx(struct gem *gp)
skb->csum = ntohs((status & RXDCTRL_TCPCSUM) ^ 0xffff);
skb->ip_summed = CHECKSUM_HW;
skb->protocol = eth_type_trans(skb, gp->dev);
- netif_rx(skb);
+
+ netif_receive_skb(skb);
gp->net_stats.rx_packets++;
gp->net_stats.rx_bytes += len;
@@ -792,32 +811,88 @@ static void gem_rx(struct gem *gp)
if (drops)
printk(KERN_INFO "%s: Memory squeeze, deferring packet.\n",
gp->dev->name);
+
+ return work_done;
+}
+
+static int gem_poll(struct net_device *dev, int *budget)
+{
+ struct gem *gp = dev->priv;
+ unsigned long flags;
+
+ spin_lock_irqsave(&gp->lock, flags);
+
+ do {
+ int work_to_do, work_done;
+
+ /* Handle anomalies */
+ if (gp->status & GREG_STAT_ABNORMAL) {
+ if (gem_abnormal_irq(dev, gp, gp->status))
+ break;
+ }
+
+ /* Run TX completion thread */
+ gem_tx(dev, gp, gp->status);
+
+ spin_unlock_irqrestore(&gp->lock, flags);
+
+ /* Run RX thread. We don't use any locking here,
+ * code willing to do bad things - like cleaning the
+ * rx ring - must call netif_poll_disable(), which
+ * schedule_timeout()'s if polling is already disabled.
+ */
+ work_to_do = min(*budget, dev->quota);
+
+ work_done = gem_rx(gp, work_to_do);
+
+ *budget -= work_done;
+ dev->quota -= work_done;
+
+ if (work_done >= work_to_do)
+ return 1;
+
+ spin_lock_irqsave(&gp->lock, flags);
+
+ gp->status = readl(gp->regs + GREG_STAT);
+ } while (gp->status & GREG_STAT_NAPI);
+
+ __netif_rx_complete(dev);
+ gem_enable_ints(gp);
+
+ spin_unlock_irqrestore(&gp->lock, flags);
+ return 0;
}
static irqreturn_t gem_interrupt(int irq, void *dev_id, struct pt_regs *regs)
{
struct net_device *dev = dev_id;
struct gem *gp = dev->priv;
- u32 gem_status = readl(gp->regs + GREG_STAT);
+ unsigned long flags;
/* Swallow interrupts when shutting the chip down */
- if (gp->hw_running == 0)
- goto out;
+ if (!gp->hw_running)
+ return IRQ_HANDLED;
- spin_lock(&gp->lock);
+ spin_lock_irqsave(&gp->lock, flags);
+
+ if (netif_rx_schedule_prep(dev)) {
+ u32 gem_status = readl(gp->regs + GREG_STAT);
- if (gem_status & GREG_STAT_ABNORMAL) {
- if (gem_abnormal_irq(dev, gp, gem_status))
- goto out_unlock;
+ if (gem_status == 0) {
+ spin_unlock_irqrestore(&gp->lock, flags);
+ return IRQ_NONE;
+ }
+ gp->status = gem_status;
+ gem_disable_ints(gp);
+ __netif_rx_schedule(dev);
}
- if (gem_status & (GREG_STAT_TXALL | GREG_STAT_TXINTME))
- gem_tx(dev, gp, gem_status);
- if (gem_status & GREG_STAT_RXDONE)
- gem_rx(gp);
-out_unlock:
- spin_unlock(&gp->lock);
-out:
+ spin_unlock_irqrestore(&gp->lock, flags);
+
+ /* If polling was disabled at the time we received that
+ * interrupt, we may return IRQ_HANDLED here while we
+ * should return IRQ_NONE. No big deal...
+ */
return IRQ_HANDLED;
}
@@ -1312,19 +1387,12 @@ static void gem_reset_task(void *data)
{
struct gem *gp = (struct gem *) data;
- /* The link went down, we reset the ring, but keep
- * DMA stopped. Todo: Use this function for reset
- * on error as well.
- */
-
+ netif_poll_disable(gp->dev);
spin_lock_irq(&gp->lock);
if (gp->hw_running && gp->opened) {
- /* Make sure we don't get interrupts or tx packets */
netif_stop_queue(gp->dev);
- writel(0xffffffff, gp->regs + GREG_IMASK);
-
/* Reset the chip & rings */
gem_stop(gp);
gem_init_rings(gp);
@@ -1337,6 +1405,7 @@ static void gem_reset_task(void *data)
gp->reset_task_pending = 0;
spin_unlock_irq(&gp->lock);
+ netif_poll_enable(gp->dev);
}
static void gem_link_timer(unsigned long data)
@@ -2214,11 +2283,15 @@ static int gem_close(struct net_device *dev)
/* Make sure we don't get distracted by suspend/resume */
down(&gp->pm_sem);
+ /* Note: we don't need to call netif_poll_disable() here because
+ * our caller (dev_close) already did it for us
+ */
+
/* Stop traffic, mark us closed */
spin_lock_irq(&gp->lock);
gp->opened = 0;
- writel(0xffffffff, gp->regs + GREG_IMASK);
+
netif_stop_queue(dev);
/* Stop chip */
@@ -2247,6 +2320,8 @@ static int gem_suspend(struct pci_dev *pdev, u32 state)
struct net_device *dev = pci_get_drvdata(pdev);
struct gem *gp = dev->priv;
+ netif_poll_disable(dev);
+
/* We hold the PM semaphore during entire driver
* sleep time
*/
@@ -2262,8 +2337,6 @@ static int gem_suspend(struct pci_dev *pdev, u32 state)
/* Stop traffic, mark us closed */
netif_device_detach(dev);
- writel(0xffffffff, gp->regs + GREG_IMASK);
-
/* Stop chip */
gem_stop(gp);
@@ -2317,6 +2390,8 @@ static int gem_resume(struct pci_dev *pdev)
}
up(&gp->pm_sem);
+ netif_poll_enable(dev);
+
return 0;
}
#endif /* CONFIG_PM */
@@ -2806,6 +2881,8 @@ static int __devinit gem_init_one(struct pci_dev *pdev,
dev->get_stats = gem_get_stats;
dev->set_multicast_list = gem_set_multicast;
dev->do_ioctl = gem_ioctl;
+ dev->poll = gem_poll;
+ dev->weight = 64;
dev->ethtool_ops = &gem_ethtool_ops;
dev->tx_timeout = gem_tx_timeout;
dev->watchdog_timeo = 5 * HZ;
diff --git a/drivers/net/sungem.h b/drivers/net/sungem.h
index eed77bfe1b60..bc0175acb52e 100644
--- a/drivers/net/sungem.h
+++ b/drivers/net/sungem.h
@@ -60,6 +60,9 @@
GREG_STAT_PCS | GREG_STAT_TXMAC | GREG_STAT_RXMAC | \
GREG_STAT_MAC | GREG_STAT_MIF | GREG_STAT_PCIERR)
+#define GREG_STAT_NAPI (GREG_STAT_TXALL | GREG_STAT_TXINTME | \
+ GREG_STAT_RXDONE | GREG_STAT_ABNORMAL)
+
/* The layout of GREG_IMASK and GREG_IACK is identical to GREG_STAT.
* Bits set in GREG_IMASK will prevent that interrupt type from being
* signalled to the cpu. GREG_IACK can be used to clear specific top-level
@@ -969,6 +972,7 @@ struct gem {
struct sk_buff *tx_skbs[RX_RING_SIZE];
u32 msg_enable;
+ u32 status;
struct net_device_stats net_stats;
diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
index f6740ca511bd..a8e034b156cf 100644
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -6710,19 +6710,26 @@ static int
qeth_arp_constructor(struct neighbour *neigh)
{
struct net_device *dev = neigh->dev;
- struct in_device *in_dev = in_dev_get(dev);
+ struct in_device *in_dev;
+ struct neigh_parms *parms;
- if (in_dev == NULL)
- return -EINVAL;
if (!qeth_verify_dev(dev)) {
- in_dev_put(in_dev);
return qeth_old_arp_constructor(neigh);
}
+ rcu_read_lock();
+ in_dev = rcu_dereference(__in_dev_get(dev));
+ if (in_dev == NULL) {
+ rcu_read_unlock();
+ return -EINVAL;
+ }
+
+ parms = in_dev->arp_parms;
+ __neigh_parms_put(neigh->parms);
+ neigh->parms = neigh_parms_clone(parms);
+ rcu_read_unlock();
+
neigh->type = inet_addr_type(*(u32 *) neigh->primary_key);
- if (in_dev->arp_parms)
- neigh->parms = in_dev->arp_parms;
- in_dev_put(in_dev);
neigh->nud_state = NUD_NOARP;
neigh->ops = arp_direct_ops;
neigh->output = neigh->ops->queue_xmit;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index dc1a2d2c41d5..a493b5b5871b 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -15,6 +15,7 @@
#include <linux/types.h>
#include <linux/compat.h>
#include <linux/kernel.h>
+#include <linux/compiler.h>
#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/smp_lock.h>
@@ -407,6 +408,7 @@ out:
return err;
}
+#ifdef CONFIG_NET
static int do_siocgstamp(unsigned int fd, unsigned int cmd, unsigned long arg)
{
struct compat_timeval __user *up = compat_ptr(arg);
@@ -461,7 +463,6 @@ struct ifconf32 {
compat_caddr_t ifcbuf;
};
-#ifdef CONFIG_NET
static int dev_ifname32(unsigned int fd, unsigned int cmd, unsigned long arg)
{
struct net_device *dev;
@@ -481,7 +482,6 @@ static int dev_ifname32(unsigned int fd, unsigned int cmd, unsigned long arg)
err = copy_to_user(compat_ptr(arg), &ifr32, sizeof(ifr32));
return (err ? -EFAULT : 0);
}
-#endif
static int dev_ifconf(unsigned int fd, unsigned int cmd, unsigned long arg)
{
@@ -797,6 +797,7 @@ static int routing_ioctl(unsigned int fd, unsigned int cmd, unsigned long arg)
return ret;
}
+#endif
struct hd_geometry32 {
unsigned char heads;
@@ -1872,7 +1873,8 @@ static int do_atm_ioctl(unsigned int fd, unsigned int cmd32, unsigned long arg)
return -EINVAL;
}
-static int ret_einval(unsigned int fd, unsigned int cmd, unsigned long arg)
+static __attribute_used__ int
+ret_einval(unsigned int fd, unsigned int cmd, unsigned long arg)
{
return -EINVAL;
}
@@ -3162,7 +3164,6 @@ HANDLE_IOCTL(MEMREADOOB32, mtd_rw_oob)
HANDLE_IOCTL(MEMWRITEOOB32, mtd_rw_oob)
#ifdef CONFIG_NET
HANDLE_IOCTL(SIOCGIFNAME, dev_ifname32)
-#endif
HANDLE_IOCTL(SIOCGIFCONF, dev_ifconf)
HANDLE_IOCTL(SIOCGIFFLAGS, dev_ifsioc)
HANDLE_IOCTL(SIOCSIFFLAGS, dev_ifsioc)
@@ -3206,6 +3207,7 @@ HANDLE_IOCTL(SIOCBRDELIF, dev_ifsioc)
/* Note SIOCRTMSG is no longer, so this is safe and * the user would have seen just an -EINVAL anyways. */
HANDLE_IOCTL(SIOCRTMSG, ret_einval)
HANDLE_IOCTL(SIOCGSTAMP, do_siocgstamp)
+#endif
HANDLE_IOCTL(HDIO_GETGEO, hdio_getgeo)
HANDLE_IOCTL(BLKRAGET, w_long)
HANDLE_IOCTL(BLKGETSIZE, w_long)
diff --git a/include/asm-alpha/socket.h b/include/asm-alpha/socket.h
index 88912c4c8931..d00259d3dc78 100644
--- a/include/asm-alpha/socket.h
+++ b/include/asm-alpha/socket.h
@@ -55,20 +55,4 @@
#define SO_SECURITY_ENCRYPTION_TRANSPORT 20
#define SO_SECURITY_ENCRYPTION_NETWORK 21
-/* Nast libc5 fixup - bletch */
-#if defined(__KERNEL__)
-/* Socket types. */
-#define SOCK_STREAM 1 /* stream (connection) socket */
-#define SOCK_DGRAM 2 /* datagram (conn.less) socket */
-#define SOCK_RAW 3 /* raw socket */
-#define SOCK_RDM 4 /* reliably-delivered message */
-#define SOCK_SEQPACKET 5 /* sequential packet socket */
-#define SOCK_PACKET 10 /* linux specific way of */
- /* getting packets at the dev */
- /* level. For writing rarp and */
- /* other similar things on the */
- /* user level. */
-#define SOCK_MAX (SOCK_PACKET+1)
-#endif
-
#endif /* _ASM_SOCKET_H */
diff --git a/include/asm-arm/socket.h b/include/asm-arm/socket.h
index b05e717397a1..46d20585d951 100644
--- a/include/asm-arm/socket.h
+++ b/include/asm-arm/socket.h
@@ -47,20 +47,4 @@
#define SO_PEERSEC 31
-/* Nast libc5 fixup - bletch */
-#if defined(__KERNEL__)
-/* Socket types. */
-#define SOCK_STREAM 1 /* stream (connection) socket */
-#define SOCK_DGRAM 2 /* datagram (conn.less) socket */
-#define SOCK_RAW 3 /* raw socket */
-#define SOCK_RDM 4 /* reliably-delivered message */
-#define SOCK_SEQPACKET 5 /* sequential packet socket */
-#define SOCK_PACKET 10 /* linux specific way of */
- /* getting packets at the dev */
- /* level. For writing rarp and */
- /* other similar things on the */
- /* user level. */
-#define SOCK_MAX (SOCK_PACKET+1)
-#endif
-
#endif /* _ASM_SOCKET_H */
diff --git a/include/asm-arm26/socket.h b/include/asm-arm26/socket.h
index b05e717397a1..46d20585d951 100644
--- a/include/asm-arm26/socket.h
+++ b/include/asm-arm26/socket.h
@@ -47,20 +47,4 @@
#define SO_PEERSEC 31
-/* Nast libc5 fixup - bletch */
-#if defined(__KERNEL__)
-/* Socket types. */
-#define SOCK_STREAM 1 /* stream (connection) socket */
-#define SOCK_DGRAM 2 /* datagram (conn.less) socket */
-#define SOCK_RAW 3 /* raw socket */
-#define SOCK_RDM 4 /* reliably-delivered message */
-#define SOCK_SEQPACKET 5 /* sequential packet socket */
-#define SOCK_PACKET 10 /* linux specific way of */
- /* getting packets at the dev */
- /* level. For writing rarp and */
- /* other similar things on the */
- /* user level. */
-#define SOCK_MAX (SOCK_PACKET+1)
-#endif
-
#endif /* _ASM_SOCKET_H */
diff --git a/include/asm-cris/socket.h b/include/asm-cris/socket.h
index b4421808b676..f159b4f165f7 100644
--- a/include/asm-cris/socket.h
+++ b/include/asm-cris/socket.h
@@ -49,21 +49,6 @@
#define SO_PEERSEC 31
-#if defined(__KERNEL__)
-/* Socket types. */
-#define SOCK_STREAM 1 /* stream (connection) socket */
-#define SOCK_DGRAM 2 /* datagram (conn.less) socket */
-#define SOCK_RAW 3 /* raw socket */
-#define SOCK_RDM 4 /* reliably-delivered message */
-#define SOCK_SEQPACKET 5 /* sequential packet socket */
-#define SOCK_PACKET 10 /* linux specific way of */
- /* getting packets at the dev */
- /* level. For writing rarp and */
- /* other similar things on the */
- /* user level. */
-#define SOCK_MAX (SOCK_PACKET+1)
-#endif
-
#endif /* _ASM_SOCKET_H */
diff --git a/include/asm-h8300/socket.h b/include/asm-h8300/socket.h
index 070d46d2344e..af33b8525dcf 100644
--- a/include/asm-h8300/socket.h
+++ b/include/asm-h8300/socket.h
@@ -47,20 +47,4 @@
#define SO_PEERSEC 31
-/* Nast libc5 fixup - bletch */
-#if defined(__KERNEL__)
-/* Socket types. */
-#define SOCK_STREAM 1 /* stream (connection) socket */
-#define SOCK_DGRAM 2 /* datagram (conn.less) socket */
-#define SOCK_RAW 3 /* raw socket */
-#define SOCK_RDM 4 /* reliably-delivered message */
-#define SOCK_SEQPACKET 5 /* sequential packet socket */
-#define SOCK_PACKET 10 /* linux specific way of */
- /* getting packets at the dev */
- /* level. For writing rarp and */
- /* other similar things on the */
- /* user level. */
-#define SOCK_MAX (SOCK_PACKET+1)
-#endif
-
#endif /* _ASM_SOCKET_H */
diff --git a/include/asm-i386/socket.h b/include/asm-i386/socket.h
index 711f906193fd..07f6b38ad140 100644
--- a/include/asm-i386/socket.h
+++ b/include/asm-i386/socket.h
@@ -47,20 +47,4 @@
#define SO_PEERSEC 31
-/* Nasty libc5 fixup - bletch */
-#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
-/* Socket types. */
-#define SOCK_STREAM 1 /* stream (connection) socket */
-#define SOCK_DGRAM 2 /* datagram (conn.less) socket */
-#define SOCK_RAW 3 /* raw socket */
-#define SOCK_RDM 4 /* reliably-delivered message */
-#define SOCK_SEQPACKET 5 /* sequential packet socket */
-#define SOCK_PACKET 10 /* linux specific way of */
- /* getting packets at the dev */
- /* level. For writing rarp and */
- /* other similar things on the */
- /* user level. */
-#define SOCK_MAX (SOCK_PACKET+1)
-#endif
-
#endif /* _ASM_SOCKET_H */
diff --git a/include/asm-ia64/socket.h b/include/asm-ia64/socket.h
index bf4434e26a81..21a9f10d6baa 100644
--- a/include/asm-ia64/socket.h
+++ b/include/asm-ia64/socket.h
@@ -56,20 +56,4 @@
#define SO_PEERSEC 31
-/* Nast libc5 fixup - bletch */
-#if defined(__KERNEL__)
-/* Socket types. */
-#define SOCK_STREAM 1 /* stream (connection) socket */
-#define SOCK_DGRAM 2 /* datagram (conn.less) socket */
-#define SOCK_RAW 3 /* raw socket */
-#define SOCK_RDM 4 /* reliably-delivered message */
-#define SOCK_SEQPACKET 5 /* sequential packet socket */
-#define SOCK_PACKET 10 /* linux specific way of */
- /* getting packets at the dev */
- /* level. For writing rarp and */
- /* other similar things on the */
- /* user level. */
-#define SOCK_MAX (SOCK_PACKET+1)
-#endif
-
#endif /* _ASM_IA64_SOCKET_H */
diff --git a/include/asm-m68k/socket.h b/include/asm-m68k/socket.h
index 68a33bfbae03..8d0b9fc2d07e 100644
--- a/include/asm-m68k/socket.h
+++ b/include/asm-m68k/socket.h
@@ -47,20 +47,4 @@
#define SO_PEERSEC 31
-/* Nast libc5 fixup - bletch */
-#if defined(__KERNEL__)
-/* Socket types. */
-#define SOCK_STREAM 1 /* stream (connection) socket */
-#define SOCK_DGRAM 2 /* datagram (conn.less) socket */
-#define SOCK_RAW 3 /* raw socket */
-#define SOCK_RDM 4 /* reliably-delivered message */
-#define SOCK_SEQPACKET 5 /* sequential packet socket */
-#define SOCK_PACKET 10 /* linux specific way of */
- /* getting packets at the dev */
- /* level. For writing rarp and */
- /* other similar things on the */
- /* user level. */
-#define SOCK_MAX (SOCK_PACKET+1)
-#endif
-
#endif /* _ASM_SOCKET_H */
diff --git a/include/asm-mips/socket.h b/include/asm-mips/socket.h
index 6556c10be8f0..855b86f3ea0e 100644
--- a/include/asm-mips/socket.h
+++ b/include/asm-mips/socket.h
@@ -68,20 +68,4 @@ To add: #define SO_REUSEPORT 0x0200 /* Allow local address and port reuse. */
#define SO_PEERSEC 30
-/* Nast libc5 fixup - bletch */
-#if defined(__KERNEL__)
-/* Socket types. */
-#define SOCK_DGRAM 1 /* datagram (conn.less) socket */
-#define SOCK_STREAM 2 /* stream (connection) socket */
-#define SOCK_RAW 3 /* raw socket */
-#define SOCK_RDM 4 /* reliably-delivered message */
-#define SOCK_SEQPACKET 5 /* sequential packet socket */
-#define SOCK_PACKET 10 /* linux specific way of */
- /* getting packets at the dev */
- /* level. For writing rarp and */
- /* other similar things on the */
- /* user level. */
-#define SOCK_MAX (SOCK_PACKET+1)
-#endif
-
#endif /* _ASM_SOCKET_H */
diff --git a/include/asm-parisc/socket.h b/include/asm-parisc/socket.h
index fd3f0f29eb3c..4a77996c1862 100644
--- a/include/asm-parisc/socket.h
+++ b/include/asm-parisc/socket.h
@@ -47,18 +47,4 @@
#define SO_PEERSEC 0x401d
-#if defined(__KERNEL__)
-#define SOCK_STREAM 1 /* stream (connection) socket */
-#define SOCK_DGRAM 2 /* datagram (conn.less) socket */
-#define SOCK_RAW 3 /* raw socket */
-#define SOCK_RDM 4 /* reliably-delivered message */
-#define SOCK_SEQPACKET 5 /* sequential packet socket */
-#define SOCK_PACKET 10 /* linux specific way of */
- /* getting packets at the dev */
- /* level. For writing rarp and */
- /* other similar things on the */
- /* user level. */
-#define SOCK_MAX (SOCK_PACKET+1)
-#endif
-
#endif /* _ASM_SOCKET_H */
diff --git a/include/asm-ppc/socket.h b/include/asm-ppc/socket.h
index bad94c36f1b8..4134376b0f66 100644
--- a/include/asm-ppc/socket.h
+++ b/include/asm-ppc/socket.h
@@ -53,20 +53,4 @@
#define SO_PEERSEC 31
-/* Nast libc5 fixup - bletch */
-#if defined(__KERNEL__)
-/* Socket types. */
-#define SOCK_STREAM 1 /* stream (connection) socket */
-#define SOCK_DGRAM 2 /* datagram (conn.less) socket */
-#define SOCK_RAW 3 /* raw socket */
-#define SOCK_RDM 4 /* reliably-delivered message */
-#define SOCK_SEQPACKET 5 /* sequential packet socket */
-#define SOCK_PACKET 10 /* linux specific way of */
- /* getting packets at the dev */
- /* level. For writing rarp and */
- /* other similar things on the */
- /* user level. */
-#define SOCK_MAX (SOCK_PACKET+1)
-#endif /* __KERNEL__ */
-
#endif /* _ASM_SOCKET_H */
diff --git a/include/asm-ppc64/socket.h b/include/asm-ppc64/socket.h
index 1021a5268346..59e00dfc8b8e 100644
--- a/include/asm-ppc64/socket.h
+++ b/include/asm-ppc64/socket.h
@@ -54,20 +54,4 @@
#define SO_PEERSEC 31
-/* Nast libc5 fixup - bletch */
-#if defined(__KERNEL__)
-/* Socket types. */
-#define SOCK_STREAM 1 /* stream (connection) socket */
-#define SOCK_DGRAM 2 /* datagram (conn.less) socket */
-#define SOCK_RAW 3 /* raw socket */
-#define SOCK_RDM 4 /* reliably-delivered message */
-#define SOCK_SEQPACKET 5 /* sequential packet socket */
-#define SOCK_PACKET 10 /* linux specific way of */
- /* getting packets at the dev */
- /* level. For writing rarp and */
- /* other similar things on the */
- /* user level. */
-#define SOCK_MAX (SOCK_PACKET+1)
-#endif
-
#endif /* _ASM_SOCKET_H */
diff --git a/include/asm-s390/socket.h b/include/asm-s390/socket.h
index 1855ec7a112b..0e96eeca4e6b 100644
--- a/include/asm-s390/socket.h
+++ b/include/asm-s390/socket.h
@@ -55,20 +55,4 @@
#define SO_PEERSEC 31
-/* Nast libc5 fixup - bletch */
-#if defined(__KERNEL__)
-/* Socket types. */
-#define SOCK_STREAM 1 /* stream (connection) socket */
-#define SOCK_DGRAM 2 /* datagram (conn.less) socket */
-#define SOCK_RAW 3 /* raw socket */
-#define SOCK_RDM 4 /* reliably-delivered message */
-#define SOCK_SEQPACKET 5 /* sequential packet socket */
-#define SOCK_PACKET 10 /* linux specific way of */
- /* getting packets at the dev */
- /* level. For writing rarp and */
- /* other similar things on the */
- /* user level. */
-#define SOCK_MAX (SOCK_PACKET+1)
-#endif
-
#endif /* _ASM_SOCKET_H */
diff --git a/include/asm-sh/socket.h b/include/asm-sh/socket.h
index 2b9469472f76..dde696c3b4c7 100644
--- a/include/asm-sh/socket.h
+++ b/include/asm-sh/socket.h
@@ -47,20 +47,4 @@
#define SO_PEERSEC 31
-/* Nast libc5 fixup - bletch */
-#if defined(__KERNEL__)
-/* Socket types. */
-#define SOCK_STREAM 1 /* stream (connection) socket */
-#define SOCK_DGRAM 2 /* datagram (conn.less) socket */
-#define SOCK_RAW 3 /* raw socket */
-#define SOCK_RDM 4 /* reliably-delivered message */
-#define SOCK_SEQPACKET 5 /* sequential packet socket */
-#define SOCK_PACKET 10 /* linux specific way of */
- /* getting packets at the dev */
- /* level. For writing rarp and */
- /* other similar things on the */
- /* user level. */
-#define SOCK_MAX (SOCK_PACKET+1)
-#endif
-
#endif /* __ASM_SH_SOCKET_H */
diff --git a/include/asm-sparc/socket.h b/include/asm-sparc/socket.h
index cfa529d9bb96..c1154e3ecfdf 100644
--- a/include/asm-sparc/socket.h
+++ b/include/asm-sparc/socket.h
@@ -52,20 +52,4 @@
#define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002
#define SO_SECURITY_ENCRYPTION_NETWORK 0x5004
-/* Nast libc5 fixup - bletch */
-#if defined(__KERNEL__)
-/* Socket types. */
-#define SOCK_STREAM 1 /* stream (connection) socket */
-#define SOCK_DGRAM 2 /* datagram (conn.less) socket */
-#define SOCK_RAW 3 /* raw socket */
-#define SOCK_RDM 4 /* reliably-delivered message */
-#define SOCK_SEQPACKET 5 /* sequential packet socket */
-#define SOCK_PACKET 10 /* linux specific way of */
- /* getting packets at the dev */
- /* level. For writing rarp and */
- /* other similar things on the */
- /* user level. */
-#define SOCK_MAX (SOCK_PACKET+1)
-#endif
-
#endif /* _ASM_SOCKET_H */
diff --git a/include/asm-sparc64/socket.h b/include/asm-sparc64/socket.h
index ba2230ba1d7d..865547a23908 100644
--- a/include/asm-sparc64/socket.h
+++ b/include/asm-sparc64/socket.h
@@ -52,20 +52,4 @@
#define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002
#define SO_SECURITY_ENCRYPTION_NETWORK 0x5004
-/* Nast libc5 fixup - bletch */
-#if defined(__KERNEL__)
-/* Socket types. */
-#define SOCK_STREAM 1 /* stream (connection) socket */
-#define SOCK_DGRAM 2 /* datagram (conn.less) socket */
-#define SOCK_RAW 3 /* raw socket */
-#define SOCK_RDM 4 /* reliably-delivered message */
-#define SOCK_SEQPACKET 5 /* sequential packet socket */
-#define SOCK_PACKET 10 /* linux specific way of */
- /* getting packets at the dev */
- /* level. For writing rarp and */
- /* other similar things on the */
- /* user level. */
-#define SOCK_MAX (SOCK_PACKET+1)
-#endif
-
#endif /* _ASM_SOCKET_H */
diff --git a/include/asm-v850/socket.h b/include/asm-v850/socket.h
index 7d7f80efa8b3..213b852af53e 100644
--- a/include/asm-v850/socket.h
+++ b/include/asm-v850/socket.h
@@ -47,20 +47,4 @@
#define SO_PEERSEC 31
-/* Nast libc5 fixup - bletch */
-#if defined(__KERNEL__)
-/* Socket types. */
-#define SOCK_STREAM 1 /* stream (connection) socket */
-#define SOCK_DGRAM 2 /* datagram (conn.less) socket */
-#define SOCK_RAW 3 /* raw socket */
-#define SOCK_RDM 4 /* reliably-delivered message */
-#define SOCK_SEQPACKET 5 /* sequential packet socket */
-#define SOCK_PACKET 10 /* linux specific way of */
- /* getting packets at the dev */
- /* level. For writing rarp and */
- /* other similar things on the */
- /* user level. */
-#define SOCK_MAX (SOCK_PACKET+1)
-#endif
-
#endif /* __V850_SOCKET_H__ */
diff --git a/include/asm-x86_64/socket.h b/include/asm-x86_64/socket.h
index 373e7abadfb5..d9a252ea8210 100644
--- a/include/asm-x86_64/socket.h
+++ b/include/asm-x86_64/socket.h
@@ -47,20 +47,4 @@
#define SO_PEERSEC 31
-/* Nasty libc5 fixup - bletch */
-#if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2)
-/* Socket types. */
-#define SOCK_STREAM 1 /* stream (connection) socket */
-#define SOCK_DGRAM 2 /* datagram (conn.less) socket */
-#define SOCK_RAW 3 /* raw socket */
-#define SOCK_RDM 4 /* reliably-delivered message */
-#define SOCK_SEQPACKET 5 /* sequential packet socket */
-#define SOCK_PACKET 10 /* linux specific way of */
- /* getting packets at the dev */
- /* level. For writing rarp and */
- /* other similar things on the */
- /* user level. */
-#define SOCK_MAX (SOCK_PACKET+1)
-#endif
-
#endif /* _ASM_SOCKET_H */
diff --git a/include/linux/net.h b/include/linux/net.h
index 80e7fec727e3..0f710b7e4121 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -61,6 +61,27 @@ typedef enum {
#define SOCK_ASYNC_WAITDATA 1
#define SOCK_NOSPACE 2
+/** sock_type - Socket types
+ *
+ * @SOCK_STREAM - stream (connection) socket
+ * @SOCK_DGRAM - datagram (conn.less) socket
+ * @SOCK_RAW - raw socket
+ * @SOCK_RDM - reliably-delivered message
+ * @SOCK_SEQPACKET - sequential packet socket
+ * @SOCK_PACKET - linux specific way of getting packets at the dev level.
+ * For writing rarp and other similar things on the user level.
+ */
+enum sock_type {
+ SOCK_STREAM = 1,
+ SOCK_DGRAM = 2,
+ SOCK_RAW = 3,
+ SOCK_RDM = 4,
+ SOCK_SEQPACKET = 5,
+ SOCK_PACKET = 10,
+};
+
+#define SOCK_MAX (SOCK_PACKET + 1)
+
/**
* struct socket - general BSD socket
* @state - socket state (%SS_CONNECTED, etc)
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
index 55d57404acb8..6edb801fa51f 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
@@ -75,6 +75,7 @@ extern int ip_conntrack_protocol_tcp_init(void);
/* Log invalid packets */
extern unsigned int ip_ct_log_invalid;
+#ifdef CONFIG_SYSCTL
#ifdef DEBUG_INVALID_PACKETS
#define LOG_INVALID(proto) \
(ip_ct_log_invalid == (proto) || ip_ct_log_invalid == IPPROTO_RAW)
@@ -83,5 +84,8 @@ extern unsigned int ip_ct_log_invalid;
((ip_ct_log_invalid == (proto) || ip_ct_log_invalid == IPPROTO_RAW) \
&& net_ratelimit())
#endif
+#else
+#define LOG_INVALID(proto) 0
+#endif /* CONFIG_SYSCTL */
#endif /*_IP_CONNTRACK_PROTOCOL_H*/
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 9c42ac0b0322..86ca98c5ef8f 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -201,6 +201,10 @@ struct tcp_sack_block {
__u32 end_seq;
};
+typedef struct tcp_pcount {
+ __u32 val;
+} tcp_pcount_t;
+
struct tcp_opt {
int tcp_header_len; /* Bytes of tcp header to send */
@@ -274,9 +278,9 @@ struct tcp_opt {
__u32 rtt_seq; /* sequence number to update rttvar */
__u32 rto; /* retransmit timeout */
- __u32 packets_out; /* Packets which are "in flight" */
- __u32 left_out; /* Packets which leaved network */
- __u32 retrans_out; /* Retransmitted packets out */
+ tcp_pcount_t packets_out; /* Packets which are "in flight" */
+ tcp_pcount_t left_out; /* Packets which leaved network */
+ tcp_pcount_t retrans_out; /* Retransmitted packets out */
/*
@@ -337,9 +341,9 @@ struct tcp_opt {
__u8 syn_retries; /* num of allowed syn retries */
__u8 ecn_flags; /* ECN status bits. */
__u16 prior_ssthresh; /* ssthresh saved at recovery start */
- __u32 lost_out; /* Lost packets */
- __u32 sacked_out; /* SACK'd packets */
- __u32 fackets_out; /* FACK'd packets */
+ tcp_pcount_t lost_out; /* Lost packets */
+ tcp_pcount_t sacked_out;/* SACK'd packets */
+ tcp_pcount_t fackets_out;/* FACK'd packets */
__u32 high_seq; /* snd_nxt at onset of congestion */
__u32 retrans_stamp; /* Timestamp of the last retransmit,
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 3df412c9f386..8c97932a98e7 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -67,6 +67,8 @@ struct neigh_parms
void *sysctl_table;
+ int dead;
+ atomic_t refcnt;
struct rcu_head rcu_head;
int base_reachable_time;
@@ -199,6 +201,7 @@ extern struct neighbour *neigh_event_ns(struct neigh_table *tbl,
extern struct neigh_parms *neigh_parms_alloc(struct net_device *dev, struct neigh_table *tbl);
extern void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms);
+extern void neigh_parms_destroy(struct neigh_parms *parms);
extern unsigned long neigh_rand_reach_time(unsigned long base);
extern void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
@@ -220,6 +223,23 @@ extern int neigh_sysctl_register(struct net_device *dev,
proc_handler *proc_handler);
extern void neigh_sysctl_unregister(struct neigh_parms *p);
+static inline void __neigh_parms_put(struct neigh_parms *parms)
+{
+ atomic_dec(&parms->refcnt);
+}
+
+static inline void neigh_parms_put(struct neigh_parms *parms)
+{
+ if (atomic_dec_and_test(&parms->refcnt))
+ neigh_parms_destroy(parms);
+}
+
+static inline struct neigh_parms *neigh_parms_clone(struct neigh_parms *parms)
+{
+ atomic_inc(&parms->refcnt);
+ return parms;
+}
+
/*
* Neighbour references
*/
diff --git a/include/net/pkt_act.h b/include/net/pkt_act.h
index 09b899d87629..be5d651e4fe3 100644
--- a/include/net/pkt_act.h
+++ b/include/net/pkt_act.h
@@ -274,11 +274,11 @@ tcf_hash_create(struct tc_st *parm, struct rtattr *est, struct tc_action *a, int
static inline struct tcf_st *
tcf_hash_init(struct tc_st *parm, struct rtattr *est, struct tc_action *a, int size, int ovr, int bind)
{
- struct tcf_st *p;
- p = tcf_hash_check (parm,a,ovr,bind);
- if (NULL == p) {
- return tcf_hash_create(parm, est, a, size, ovr, bind);
- }
+ struct tcf_st *p = tcf_hash_check (parm,a,ovr,bind);
+
+ if (!p)
+ p = tcf_hash_create(parm, est, a, size, ovr, bind);
+ return p;
}
#endif
diff --git a/include/net/tcp.h b/include/net/tcp.h
index a5be63c232e3..1a8a317f2bd5 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1047,13 +1047,16 @@ static inline void tcp_reset_xmit_timer(struct sock *sk, int what, unsigned long
* is not a big flaw.
*/
-static __inline__ unsigned int tcp_current_mss(struct sock *sk, int large)
+static inline unsigned int tcp_current_mss(struct sock *sk, int large)
{
struct tcp_opt *tp = tcp_sk(sk);
struct dst_entry *dst = __sk_dst_get(sk);
- int mss_now = large && (sk->sk_route_caps & NETIF_F_TSO) &&
- !tp->urg_mode ?
- tp->mss_cache : tp->mss_cache_std;
+ int do_large, mss_now;
+
+ do_large = (large &&
+ (sk->sk_route_caps & NETIF_F_TSO) &&
+ !tp->urg_mode);
+ mss_now = do_large ? tp->mss_cache : tp->mss_cache_std;
if (dst) {
u32 mtu = dst_pmtu(dst);
@@ -1181,12 +1184,76 @@ struct tcp_skb_cb {
__u16 urg_ptr; /* Valid w/URG flags is set. */
__u32 ack_seq; /* Sequence number ACK'd */
+ __u32 tso_factor;
};
#define TCP_SKB_CB(__skb) ((struct tcp_skb_cb *)&((__skb)->cb[0]))
#include <net/tcp_ecn.h>
+/* Due to TSO, an SKB can be composed of multiple actual
+ * packets. To keep these tracked properly, we use this.
+ */
+static inline int tcp_skb_pcount(struct sk_buff *skb)
+{
+ return TCP_SKB_CB(skb)->tso_factor;
+}
+
+static inline void tcp_inc_pcount(tcp_pcount_t *count, struct sk_buff *skb)
+{
+ count->val += tcp_skb_pcount(skb);
+}
+
+static inline void tcp_inc_pcount_explicit(tcp_pcount_t *count, int amt)
+{
+ count->val += amt;
+}
+
+static inline void tcp_dec_pcount_explicit(tcp_pcount_t *count, int amt)
+{
+ count->val -= amt;
+}
+
+static inline void tcp_dec_pcount(tcp_pcount_t *count, struct sk_buff *skb)
+{
+ count->val -= tcp_skb_pcount(skb);
+}
+
+static inline void tcp_dec_pcount_approx(tcp_pcount_t *count,
+ struct sk_buff *skb)
+{
+ if (count->val) {
+ count->val -= tcp_skb_pcount(skb);
+ if ((int)count->val < 0)
+ count->val = 0;
+ }
+}
+
+static inline __u32 tcp_get_pcount(tcp_pcount_t *count)
+{
+ return count->val;
+}
+
+static inline void tcp_set_pcount(tcp_pcount_t *count, __u32 val)
+{
+ count->val = val;
+}
+
+static inline void tcp_packets_out_inc(struct sock *sk, struct tcp_opt *tp,
+ struct sk_buff *skb)
+{
+ int orig = tcp_get_pcount(&tp->packets_out);
+
+ tcp_inc_pcount(&tp->packets_out, skb);
+ if (!orig)
+ tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+}
+
+static inline void tcp_packets_out_dec(struct tcp_opt *tp, struct sk_buff *skb)
+{
+ tcp_dec_pcount(&tp->packets_out, skb);
+}
+
/* This determines how many packets are "in the network" to the best
* of our knowledge. In many cases it is conservative, but where
* detailed information is available from the receiver (via SACK
@@ -1203,7 +1270,9 @@ struct tcp_skb_cb {
*/
static __inline__ unsigned int tcp_packets_in_flight(struct tcp_opt *tp)
{
- return tp->packets_out - tp->left_out + tp->retrans_out;
+ return (tcp_get_pcount(&tp->packets_out) -
+ tcp_get_pcount(&tp->left_out) +
+ tcp_get_pcount(&tp->retrans_out));
}
/* Recalculate snd_ssthresh, we want to set it to:
@@ -1304,9 +1373,15 @@ static inline __u32 tcp_current_ssthresh(struct tcp_opt *tp)
static inline void tcp_sync_left_out(struct tcp_opt *tp)
{
- if (tp->sack_ok && tp->sacked_out >= tp->packets_out - tp->lost_out)
- tp->sacked_out = tp->packets_out - tp->lost_out;
- tp->left_out = tp->sacked_out + tp->lost_out;
+ if (tp->sack_ok &&
+ (tcp_get_pcount(&tp->sacked_out) >=
+ tcp_get_pcount(&tp->packets_out) - tcp_get_pcount(&tp->lost_out)))
+ tcp_set_pcount(&tp->sacked_out,
+ (tcp_get_pcount(&tp->packets_out) -
+ tcp_get_pcount(&tp->lost_out)));
+ tcp_set_pcount(&tp->left_out,
+ (tcp_get_pcount(&tp->sacked_out) +
+ tcp_get_pcount(&tp->lost_out)));
}
extern void tcp_cwnd_application_limited(struct sock *sk);
@@ -1315,14 +1390,16 @@ extern void tcp_cwnd_application_limited(struct sock *sk);
static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_opt *tp)
{
- if (tp->packets_out >= tp->snd_cwnd) {
+ __u32 packets_out = tcp_get_pcount(&tp->packets_out);
+
+ if (packets_out >= tp->snd_cwnd) {
/* Network is feed fully. */
tp->snd_cwnd_used = 0;
tp->snd_cwnd_stamp = tcp_time_stamp;
} else {
/* Network starves. */
- if (tp->packets_out > tp->snd_cwnd_used)
- tp->snd_cwnd_used = tp->packets_out;
+ if (tcp_get_pcount(&tp->packets_out) > tp->snd_cwnd_used)
+ tp->snd_cwnd_used = tcp_get_pcount(&tp->packets_out);
if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto)
tcp_cwnd_application_limited(sk);
@@ -1388,16 +1465,25 @@ tcp_nagle_check(struct tcp_opt *tp, struct sk_buff *skb, unsigned mss_now, int n
!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
((nonagle&TCP_NAGLE_CORK) ||
(!nonagle &&
- tp->packets_out &&
+ tcp_get_pcount(&tp->packets_out) &&
tcp_minshall_check(tp))));
}
+extern void tcp_set_skb_tso_factor(struct sk_buff *, unsigned int, unsigned int);
+
/* This checks if the data bearing packet SKB (usually sk->sk_send_head)
* should be put on the wire right now.
*/
static __inline__ int tcp_snd_test(struct tcp_opt *tp, struct sk_buff *skb,
unsigned cur_mss, int nonagle)
{
+ int pkts = TCP_SKB_CB(skb)->tso_factor;
+
+ if (!pkts) {
+ tcp_set_skb_tso_factor(skb, cur_mss, tp->mss_cache_std);
+ pkts = TCP_SKB_CB(skb)->tso_factor;
+ }
+
/* RFC 1122 - section 4.2.3.4
*
* We must queue if
@@ -1424,14 +1510,14 @@ static __inline__ int tcp_snd_test(struct tcp_opt *tp, struct sk_buff *skb,
*/
return (((nonagle&TCP_NAGLE_PUSH) || tp->urg_mode
|| !tcp_nagle_check(tp, skb, cur_mss, nonagle)) &&
- ((tcp_packets_in_flight(tp) < tp->snd_cwnd) ||
+ (((tcp_packets_in_flight(tp) + (pkts-1)) < tp->snd_cwnd) ||
(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)) &&
!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una + tp->snd_wnd));
}
static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_opt *tp)
{
- if (!tp->packets_out && !tp->pending)
+ if (!tcp_get_pcount(&tp->packets_out) && !tp->pending)
tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto);
}
@@ -1964,7 +2050,7 @@ static inline void tcp_westwood_slow_bw(struct sock *sk, struct sk_buff *skb)
static inline __u32 __tcp_westwood_bw_rttmin(const struct tcp_opt *tp)
{
return max((tp->westwood.bw_est) * (tp->westwood.rtt_min) /
- (__u32) (tp->mss_cache),
+ (__u32) (tp->mss_cache_std),
2U);
}
diff --git a/net/Makefile b/net/Makefile
index 61740b47a67d..a46436e0fcc2 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -9,7 +9,8 @@ obj-y := nonet.o
obj-$(CONFIG_NET) := socket.o core/
-obj-$(CONFIG_COMPAT) += compat.o
+tmp-$(CONFIG_COMPAT) := compat.o
+obj-$(CONFIG_NET) += $(tmp-y)
# LLC has to be linked before the files in net/802/
obj-$(CONFIG_LLC) += llc/
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 5de7c1fd73b5..104dd4d19da4 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -26,6 +26,7 @@
#include <linux/bitops.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
+#include <linux/rcupdate.h>
#include <net/route.h> /* for struct rtable and routing */
#include <net/icmp.h> /* icmp_send */
#include <asm/param.h> /* for HZ */
@@ -311,13 +312,25 @@ static int clip_constructor(struct neighbour *neigh)
{
struct atmarp_entry *entry = NEIGH2ENTRY(neigh);
struct net_device *dev = neigh->dev;
- struct in_device *in_dev = dev->ip_ptr;
+ struct in_device *in_dev;
+ struct neigh_parms *parms;
DPRINTK("clip_constructor (neigh %p, entry %p)\n",neigh,entry);
- if (!in_dev) return -EINVAL;
neigh->type = inet_addr_type(entry->ip);
if (neigh->type != RTN_UNICAST) return -EINVAL;
- if (in_dev->arp_parms) neigh->parms = in_dev->arp_parms;
+
+ rcu_read_lock();
+ in_dev = rcu_dereference(__in_dev_get(dev));
+ if (!in_dev) {
+ rcu_read_unlock();
+ return -EINVAL;
+ }
+
+ parms = in_dev->arp_parms;
+ __neigh_parms_put(neigh->parms);
+ neigh->parms = neigh_parms_clone(parms);
+ rcu_read_unlock();
+
neigh->ops = &clip_neigh_ops;
neigh->output = neigh->nud_state & NUD_VALID ?
neigh->ops->connected_output : neigh->ops->output;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 47fbd98e0e81..3a84182f4474 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1176,13 +1176,16 @@ static int ax25_connect(struct socket *sock, struct sockaddr *uaddr,
/* check if we can remove this feature. It is broken. */
printk(KERN_WARNING "ax25_connect(): %s uses autobind, please contact jreuter@yaina.de\n",
current->comm);
- if ((err = ax25_rt_autobind(ax25, &fsa->fsa_ax25.sax25_call)) < 0)
+ if ((err = ax25_rt_autobind(ax25, &fsa->fsa_ax25.sax25_call)) < 0) {
+ kfree(digi);
goto out;
+ }
ax25_fillin_cb(ax25, ax25->ax25_dev);
ax25_cb_add(ax25);
} else {
if (ax25->ax25_dev == NULL) {
+ kfree(digi);
err = -EHOSTUNREACH;
goto out;
}
@@ -1191,8 +1194,7 @@ static int ax25_connect(struct socket *sock, struct sockaddr *uaddr,
if (sk->sk_type == SOCK_SEQPACKET &&
(ax25t=ax25_find_cb(&ax25->source_addr, &fsa->fsa_ax25.sax25_call, digi,
ax25->ax25_dev->dev))) {
- if (digi != NULL)
- kfree(digi);
+ kfree(digi);
err = -EADDRINUSE; /* Already such a connection */
ax25_cb_put(ax25t);
goto out;
diff --git a/net/bridge/br_notify.c b/net/bridge/br_notify.c
index d0702fbcb21d..f8fb49e34764 100644
--- a/net/bridge/br_notify.c
+++ b/net/bridge/br_notify.c
@@ -76,10 +76,12 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v
break;
case NETDEV_UNREGISTER:
+ spin_unlock_bh(&br->lock);
br_del_if(br, dev);
- break;
+ goto done;
}
spin_unlock_bh(&br->lock);
+ done:
return NOTIFY_DONE;
}
diff --git a/net/compat.c b/net/compat.c
index 998b21b65363..6080b6439b96 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -455,13 +455,15 @@ static int do_set_sock_timeout(int fd, int level, int optname, char __user *optv
asmlinkage long compat_sys_setsockopt(int fd, int level, int optname,
char __user *optval, int optlen)
{
+ /* SO_SET_REPLACE seems to be the same in all levels */
if (optname == IPT_SO_SET_REPLACE)
return do_netfilter_replace(fd, level, optname,
optval, optlen);
- if (optname == SO_ATTACH_FILTER)
+ if (level == SOL_SOCKET && optname == SO_ATTACH_FILTER)
return do_set_attach_filter(fd, level, optname,
optval, optlen);
- if (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO)
+ if (level == SOL_SOCKET &&
+ (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO))
return do_set_sock_timeout(fd, level, optname, optval, optlen);
return sys_setsockopt(fd, level, optname, optval, optlen);
diff --git a/net/core/dev.c b/net/core/dev.c
index 7a50c543e505..47b3d8497a5d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1249,17 +1249,17 @@ int __skb_linearize(struct sk_buff *skb, int gfp_mask)
return 0;
}
-#define HARD_TX_LOCK_BH(dev, cpu) { \
+#define HARD_TX_LOCK(dev, cpu) { \
if ((dev->features & NETIF_F_LLTX) == 0) { \
- spin_lock_bh(&dev->xmit_lock); \
+ spin_lock(&dev->xmit_lock); \
dev->xmit_lock_owner = cpu; \
} \
}
-#define HARD_TX_UNLOCK_BH(dev) { \
+#define HARD_TX_UNLOCK(dev) { \
if ((dev->features & NETIF_F_LLTX) == 0) { \
dev->xmit_lock_owner = -1; \
- spin_unlock_bh(&dev->xmit_lock); \
+ spin_unlock(&dev->xmit_lock); \
} \
}
@@ -1313,7 +1313,12 @@ int dev_queue_xmit(struct sk_buff *skb)
if (skb_checksum_help(&skb, 0))
goto out_kfree_skb;
- rcu_read_lock();
+
+ /* Disable soft irqs for various locks below. Also
+ * stops preemption for RCU.
+ */
+ local_bh_disable();
+
/* Updates of qdisc are serialized by queue_lock.
* The struct Qdisc which is pointed to by qdisc is now a
* rcu structure - it may be accessed without acquiring
@@ -1332,18 +1337,16 @@ int dev_queue_xmit(struct sk_buff *skb)
#endif
if (q->enqueue) {
/* Grab device queue */
- spin_lock_bh(&dev->queue_lock);
+ spin_lock(&dev->queue_lock);
rc = q->enqueue(skb, q);
qdisc_run(dev);
- spin_unlock_bh(&dev->queue_lock);
- rcu_read_unlock();
+ spin_unlock(&dev->queue_lock);
rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
goto out;
}
- rcu_read_unlock();
/* The device has no queue. Common case for software devices:
loopback, all the sorts of tunnels...
@@ -1358,12 +1361,11 @@ int dev_queue_xmit(struct sk_buff *skb)
Either shot noqueue qdisc, it is even simpler 8)
*/
if (dev->flags & IFF_UP) {
- int cpu = get_cpu();
+ int cpu = smp_processor_id(); /* ok because BHs are off */
if (dev->xmit_lock_owner != cpu) {
- HARD_TX_LOCK_BH(dev, cpu);
- put_cpu();
+ HARD_TX_LOCK(dev, cpu);
if (!netif_queue_stopped(dev)) {
if (netdev_nit)
@@ -1371,17 +1373,16 @@ int dev_queue_xmit(struct sk_buff *skb)
rc = 0;
if (!dev->hard_start_xmit(skb, dev)) {
- HARD_TX_UNLOCK_BH(dev);
+ HARD_TX_UNLOCK(dev);
goto out;
}
}
- HARD_TX_UNLOCK_BH(dev);
+ HARD_TX_UNLOCK(dev);
if (net_ratelimit())
printk(KERN_CRIT "Virtual device %s asks to "
"queue packet!\n", dev->name);
goto out_enetdown;
} else {
- put_cpu();
/* Recursion is detected! It is possible,
* unfortunately */
if (net_ratelimit())
@@ -1394,6 +1395,7 @@ out_enetdown:
out_kfree_skb:
kfree_skb(skb);
out:
+ local_bh_enable();
return rc;
}
diff --git a/net/core/dv.c b/net/core/dv.c
index c1340cc53b75..f8e3f9c6b282 100644
--- a/net/core/dv.c
+++ b/net/core/dv.c
@@ -553,6 +553,3 @@ void divert_frame(struct sk_buff *skb)
break;
}
}
-
-EXPORT_SYMBOL(alloc_divert_blk);
-EXPORT_SYMBOL(free_divert_blk);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index f5deae1541c4..c9a747e89e5d 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -227,7 +227,6 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
we must kill timers etc. and move
it to safe state.
*/
- n->parms = &tbl->parms;
skb_queue_purge(&n->arp_queue);
n->output = neigh_blackhole;
if (n->nud_state & NUD_VALID)
@@ -273,7 +272,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl)
n->updated = n->used = now;
n->nud_state = NUD_NONE;
n->output = neigh_blackhole;
- n->parms = &tbl->parms;
+ n->parms = neigh_parms_clone(&tbl->parms);
init_timer(&n->timer);
n->timer.function = neigh_timer_handler;
n->timer.data = (unsigned long)n;
@@ -340,12 +339,16 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
hash_val = tbl->hash(pkey, dev);
write_lock_bh(&tbl->lock);
+ if (n->parms->dead) {
+ rc = ERR_PTR(-EINVAL);
+ goto out_tbl_unlock;
+ }
+
for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) {
if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
neigh_hold(n1);
- write_unlock_bh(&tbl->lock);
rc = n1;
- goto out_neigh_release;
+ goto out_tbl_unlock;
}
}
@@ -358,6 +361,8 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey,
rc = n;
out:
return rc;
+out_tbl_unlock:
+ write_unlock_bh(&tbl->lock);
out_neigh_release:
neigh_release(n);
goto out;
@@ -494,6 +499,7 @@ void neigh_destroy(struct neighbour *neigh)
skb_queue_purge(&neigh->arp_queue);
dev_put(neigh->dev);
+ neigh_parms_put(neigh->parms);
NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh);
@@ -1120,6 +1126,7 @@ struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
if (p) {
memcpy(p, &tbl->parms, sizeof(*p));
p->tbl = tbl;
+ atomic_set(&p->refcnt, 1);
INIT_RCU_HEAD(&p->rcu_head);
p->reachable_time =
neigh_rand_reach_time(p->base_reachable_time);
@@ -1141,7 +1148,7 @@ static void neigh_rcu_free_parms(struct rcu_head *head)
struct neigh_parms *parms =
container_of(head, struct neigh_parms, rcu_head);
- kfree(parms);
+ neigh_parms_put(parms);
}
void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
@@ -1154,6 +1161,7 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
for (p = &tbl->parms.next; *p; p = &(*p)->next) {
if (*p == parms) {
*p = parms->next;
+ parms->dead = 1;
write_unlock_bh(&tbl->lock);
call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
return;
@@ -1163,11 +1171,17 @@ void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
NEIGH_PRINTK1("neigh_parms_release: not found\n");
}
+void neigh_parms_destroy(struct neigh_parms *parms)
+{
+ kfree(parms);
+}
+
void neigh_table_init(struct neigh_table *tbl)
{
unsigned long now = jiffies;
+ atomic_set(&tbl->parms.refcnt, 1);
INIT_RCU_HEAD(&tbl->parms.rcu_head);
tbl->parms.reachable_time =
neigh_rand_reach_time(tbl->parms.base_reachable_time);
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 5a05efb83092..a21a326808b4 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -41,6 +41,7 @@
#include <linux/sysctl.h>
#include <linux/notifier.h>
#include <asm/uaccess.h>
+#include <asm/system.h>
#include <net/neighbour.h>
#include <net/dst.h>
#include <net/flow.h>
@@ -1108,6 +1109,7 @@ struct dn_dev *dn_dev_create(struct net_device *dev, int *err)
memset(dn_db, 0, sizeof(struct dn_dev));
memcpy(&dn_db->parms, p, sizeof(struct dn_dev_parms));
+ smp_wmb();
dev->dn_ptr = dn_db;
dn_db->dev = dev;
init_timer(&dn_db->timer);
@@ -1215,6 +1217,7 @@ static void dn_dev_delete(struct net_device *dev)
dev->dn_ptr = NULL;
neigh_parms_release(&dn_neigh_table, dn_db->neigh_parms);
+ neigh_ifdown(&dn_neigh_table, dev);
if (dn_db->router)
neigh_release(dn_db->router);
diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c
index ab64b850c12b..d3d6c592a5cb 100644
--- a/net/decnet/dn_neigh.c
+++ b/net/decnet/dn_neigh.c
@@ -35,6 +35,7 @@
#include <linux/netfilter_decnet.h>
#include <linux/spinlock.h>
#include <linux/seq_file.h>
+#include <linux/rcupdate.h>
#include <asm/atomic.h>
#include <net/neighbour.h>
#include <net/dst.h>
@@ -134,13 +135,25 @@ static int dn_neigh_construct(struct neighbour *neigh)
{
struct net_device *dev = neigh->dev;
struct dn_neigh *dn = (struct dn_neigh *)neigh;
- struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr;
+ struct dn_dev *dn_db;
+ struct neigh_parms *parms;
+
+ rcu_read_lock();
+ dn_db = rcu_dereference(dev->dn_ptr);
+ if (dn_db == NULL) {
+ rcu_read_unlock();
+ return -EINVAL;
+ }
- if (dn_db == NULL)
+ parms = dn_db->neigh_parms;
+ if (!parms) {
+ rcu_read_unlock();
return -EINVAL;
+ }
- if (dn_db->neigh_parms)
- neigh->parms = dn_db->neigh_parms;
+ __neigh_parms_put(neigh->parms);
+ neigh->parms = neigh_parms_clone(parms);
+ rcu_read_unlock();
if (dn_db->use_long)
neigh->ops = &dn_long_ops;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index ed2923791e0e..c859b31fd0f1 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -837,7 +837,7 @@ struct proto_ops inet_dgram_ops = {
.sendpage = inet_sendpage,
};
-struct net_proto_family inet_family_ops = {
+static struct net_proto_family inet_family_ops = {
.family = PF_INET,
.create = inet_create,
.owner = THIS_MODULE,
@@ -1157,7 +1157,6 @@ EXPORT_SYMBOL(inet_accept);
EXPORT_SYMBOL(inet_bind);
EXPORT_SYMBOL(inet_dgram_connect);
EXPORT_SYMBOL(inet_dgram_ops);
-EXPORT_SYMBOL(inet_family_ops);
EXPORT_SYMBOL(inet_getname);
EXPORT_SYMBOL(inet_ioctl);
EXPORT_SYMBOL(inet_listen);
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index 562702d99ba2..41e726ac3337 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -96,6 +96,7 @@
#include <linux/stat.h>
#include <linux/init.h>
#include <linux/net.h>
+#include <linux/rcupdate.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
@@ -237,16 +238,22 @@ static int arp_constructor(struct neighbour *neigh)
{
u32 addr = *(u32*)neigh->primary_key;
struct net_device *dev = neigh->dev;
- struct in_device *in_dev = in_dev_get(dev);
-
- if (in_dev == NULL)
- return -EINVAL;
+ struct in_device *in_dev;
+ struct neigh_parms *parms;
neigh->type = inet_addr_type(addr);
- if (in_dev->arp_parms)
- neigh->parms = in_dev->arp_parms;
- in_dev_put(in_dev);
+ rcu_read_lock();
+ in_dev = rcu_dereference(__in_dev_get(dev));
+ if (in_dev == NULL) {
+ rcu_read_unlock();
+ return -EINVAL;
+ }
+
+ parms = in_dev->arp_parms;
+ __neigh_parms_put(neigh->parms);
+ neigh->parms = neigh_parms_clone(parms);
+ rcu_read_unlock();
if (dev->hard_header == NULL) {
neigh->nud_state = NUD_NOARP;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index fc9930460864..19eb795a1140 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -184,6 +184,7 @@ static void in_dev_rcu_put(struct rcu_head *head)
static void inetdev_destroy(struct in_device *in_dev)
{
struct in_ifaddr *ifa;
+ struct net_device *dev;
ASSERT_RTNL();
@@ -200,12 +201,15 @@ static void inetdev_destroy(struct in_device *in_dev)
devinet_sysctl_unregister(&in_dev->cnf);
#endif
- in_dev->dev->ip_ptr = NULL;
+ dev = in_dev->dev;
+ dev->ip_ptr = NULL;
#ifdef CONFIG_SYSCTL
neigh_sysctl_unregister(in_dev->arp_parms);
#endif
neigh_parms_release(&arp_tbl, in_dev->arp_parms);
+ arp_ifdown(dev);
+
call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
}
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index 3a85f7a8d02a..9a8f051208d1 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -127,6 +127,7 @@ ip_vs_dst_reset(struct ip_vs_dest *dest)
#define IP_VS_XMIT(skb, rt) \
do { \
+ nf_reset(skb); \
(skb)->nfcache |= NFC_IPVS_PROPERTY; \
NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, (skb), NULL, \
(rt)->u.dst.dev, dst_output); \
@@ -201,9 +202,6 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
-#ifdef CONFIG_NETFILTER_DEBUG
- skb->nf_debug = 0;
-#endif /* CONFIG_NETFILTER_DEBUG */
IP_VS_XMIT(skb, rt);
LeaveFunction(10);
@@ -280,9 +278,6 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
-#ifdef CONFIG_NETFILTER_DEBUG
- skb->nf_debug = 0;
-#endif /* CONFIG_NETFILTER_DEBUG */
IP_VS_XMIT(skb, rt);
LeaveFunction(10);
@@ -418,10 +413,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
-#ifdef CONFIG_NETFILTER_DEBUG
- skb->nf_debug = 0;
-#endif /* CONFIG_NETFILTER_DEBUG */
-
IP_VS_XMIT(skb, rt);
LeaveFunction(10);
@@ -480,9 +471,6 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
-#ifdef CONFIG_NETFILTER_DEBUG
- skb->nf_debug = 0;
-#endif /* CONFIG_NETFILTER_DEBUG */
IP_VS_XMIT(skb, rt);
LeaveFunction(10);
@@ -557,9 +545,6 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
/* Another hack: avoid icmp_send in ip_fragment */
skb->local_df = 1;
-#ifdef CONFIG_NETFILTER_DEBUG
- skb->nf_debug = 0;
-#endif /* CONFIG_NETFILTER_DEBUG */
IP_VS_XMIT(skb, rt);
rc = NF_STOLEN;
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
index 64755c5aed6e..3e51036e5065 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -623,8 +623,8 @@ int __init init(void)
return ret;
- cleanup:
#ifdef CONFIG_SYSCTL
+ cleanup:
ip_conntrack_protocol_unregister(&ip_conntrack_protocol_sctp);
#endif
out:
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index ceff26dbff47..f4c3899771c4 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -54,6 +54,7 @@ static int kill_proto(const struct ip_conntrack *i, void *data)
*((u_int8_t *) data));
}
+#ifdef CONFIG_PROC_FS
static unsigned int
print_tuple(char *buffer, const struct ip_conntrack_tuple *tuple,
struct ip_conntrack_protocol *proto)
@@ -367,6 +368,7 @@ static struct file_operations ct_cpu_seq_fops = {
.llseek = seq_lseek,
.release = seq_release_private,
};
+#endif
static unsigned int ip_confirm(unsigned int hooknum,
struct sk_buff **pskb,
@@ -726,10 +728,15 @@ static ctl_table ip_ct_net_table[] = {
},
{ .ctl_name = 0 }
};
-#endif
+
+EXPORT_SYMBOL(ip_ct_log_invalid);
+#endif /* CONFIG_SYSCTL */
+
static int init_or_cleanup(int init)
{
+#ifdef CONFIG_PROC_FS
struct proc_dir_entry *proc, *proc_exp, *proc_stat;
+#endif
int ret = 0;
if (!init) goto cleanup;
@@ -738,19 +745,20 @@ static int init_or_cleanup(int init)
if (ret < 0)
goto cleanup_nothing;
- proc = proc_net_create("ip_conntrack", 0440, NULL);
+#ifdef CONFIG_PROC_FS
+ proc = proc_net_fops_create("ip_conntrack", 0440, &ct_file_ops);
if (!proc) goto cleanup_init;
- proc->proc_fops = &ct_file_ops;
- proc_exp = proc_net_create("ip_conntrack_expect", 0440, NULL);
+ proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440,
+ &exp_file_ops);
if (!proc_exp) goto cleanup_proc;
- proc_exp->proc_fops = &exp_file_ops;
proc_stat = proc_net_fops_create("ip_conntrack_stat", S_IRUGO,
&ct_cpu_seq_fops);
if (!proc_stat)
goto cleanup_proc_exp;
proc_stat->owner = THIS_MODULE;
+#endif
ret = nf_register_hook(&ip_conntrack_defrag_ops);
if (ret < 0) {
@@ -814,12 +822,14 @@ static int init_or_cleanup(int init)
local_bh_enable();
nf_unregister_hook(&ip_conntrack_defrag_ops);
cleanup_proc_stat:
+#ifdef CONFIG_PROC_FS
proc_net_remove("ip_conntrack_stat");
cleanup_proc_exp:
proc_net_remove("ip_conntrack_exp");
cleanup_proc:
proc_net_remove("ip_conntrack");
cleanup_init:
+#endif /* CONFIG_PROC_FS */
ip_conntrack_cleanup();
cleanup_nothing:
return ret;
@@ -912,4 +922,3 @@ EXPORT_SYMBOL(ip_conntrack_hash);
EXPORT_SYMBOL(ip_conntrack_untracked);
EXPORT_SYMBOL_GPL(ip_conntrack_find_get);
EXPORT_SYMBOL_GPL(ip_conntrack_put);
-EXPORT_SYMBOL(ip_ct_log_invalid);
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index 906b89df2f19..26dca38f692a 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -619,6 +619,7 @@ static ctl_table ipq_root_table[] = {
{ .ctl_name = 0 }
};
+#ifdef CONFIG_PROC_FS
static int
ipq_get_info(char *buffer, char **start, off_t offset, int length)
{
@@ -648,6 +649,7 @@ ipq_get_info(char *buffer, char **start, off_t offset, int length)
len = 0;
return len;
}
+#endif /* CONFIG_PROC_FS */
static int
init_or_cleanup(int init)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f34bdec2f31e..36953ef7e6c2 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1818,7 +1818,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->backoff = 0;
tp->snd_cwnd = 2;
tp->probes_out = 0;
- tp->packets_out = 0;
+ tcp_set_pcount(&tp->packets_out, 0);
tp->snd_ssthresh = 0x7fffffff;
tp->snd_cwnd_cnt = 0;
tcp_set_ca_state(tp, TCP_CA_Open);
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 5506944b7e7c..e0f8a7664f7e 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -70,14 +70,14 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_rto = (1000000*tp->rto)/HZ;
info->tcpi_ato = (1000000*tp->ack.ato)/HZ;
- info->tcpi_snd_mss = tp->mss_cache;
+ info->tcpi_snd_mss = tp->mss_cache_std;
info->tcpi_rcv_mss = tp->ack.rcv_mss;
- info->tcpi_unacked = tp->packets_out;
- info->tcpi_sacked = tp->sacked_out;
- info->tcpi_lost = tp->lost_out;
- info->tcpi_retrans = tp->retrans_out;
- info->tcpi_fackets = tp->fackets_out;
+ info->tcpi_unacked = tcp_get_pcount(&tp->packets_out);
+ info->tcpi_sacked = tcp_get_pcount(&tp->sacked_out);
+ info->tcpi_lost = tcp_get_pcount(&tp->lost_out);
+ info->tcpi_retrans = tcp_get_pcount(&tp->retrans_out);
+ info->tcpi_fackets = tcp_get_pcount(&tp->fackets_out);
info->tcpi_last_data_sent = ((now - tp->lsndtime)*1000)/HZ;
info->tcpi_last_data_recv = ((now - tp->ack.lrcvtime)*1000)/HZ;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 85643472b84d..d7fb3cde4f20 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -897,7 +897,9 @@ static void tcp_update_reordering(struct tcp_opt *tp, int metric, int ts)
#if FASTRETRANS_DEBUG > 1
printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n",
tp->sack_ok, tp->ca_state,
- tp->reordering, tp->fackets_out, tp->sacked_out,
+ tp->reordering,
+ tcp_get_pcount(&tp->fackets_out),
+ tcp_get_pcount(&tp->sacked_out),
tp->undo_marker ? tp->undo_retrans : 0);
#endif
/* Disable FACK yet. */
@@ -960,7 +962,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked;
struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2);
int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
- int reord = tp->packets_out;
+ int reord = tcp_get_pcount(&tp->packets_out);
int prior_fackets;
u32 lost_retrans = 0;
int flag = 0;
@@ -974,9 +976,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
tp->mss_cache = tp->mss_cache_std;
}
- if (!tp->sacked_out)
- tp->fackets_out = 0;
- prior_fackets = tp->fackets_out;
+ if (!tcp_get_pcount(&tp->sacked_out))
+ tcp_set_pcount(&tp->fackets_out, 0);
+ prior_fackets = tcp_get_pcount(&tp->fackets_out);
for (i=0; i<num_sacks; i++, sp++) {
struct sk_buff *skb;
@@ -1074,8 +1076,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
*/
if (sacked & TCPCB_LOST) {
TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
- tp->lost_out--;
- tp->retrans_out--;
+ tcp_dec_pcount(&tp->lost_out, skb);
+ tcp_dec_pcount(&tp->retrans_out, skb);
}
} else {
/* New sack for not retransmitted frame,
@@ -1087,16 +1089,16 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
if (sacked & TCPCB_LOST) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
- tp->lost_out--;
+ tcp_dec_pcount(&tp->lost_out, skb);
}
}
TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
flag |= FLAG_DATA_SACKED;
- tp->sacked_out++;
+ tcp_inc_pcount(&tp->sacked_out, skb);
- if (fack_count > tp->fackets_out)
- tp->fackets_out = fack_count;
+ if (fack_count > tcp_get_pcount(&tp->fackets_out))
+ tcp_set_pcount(&tp->fackets_out, fack_count);
} else {
if (dup_sack && (sacked&TCPCB_RETRANS))
reord = min(fack_count, reord);
@@ -1110,7 +1112,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
if (dup_sack &&
(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
- tp->retrans_out--;
+ tcp_dec_pcount(&tp->retrans_out, skb);
}
}
}
@@ -1134,12 +1136,12 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
(IsFack(tp) ||
!before(lost_retrans,
TCP_SKB_CB(skb)->ack_seq + tp->reordering *
- tp->mss_cache))) {
+ tp->mss_cache_std))) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
- tp->retrans_out--;
+ tcp_dec_pcount(&tp->retrans_out, skb);
if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) {
- tp->lost_out++;
+ tcp_inc_pcount(&tp->lost_out, skb);
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
flag |= FLAG_DATA_SACKED;
NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT);
@@ -1148,15 +1150,20 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
}
}
- tp->left_out = tp->sacked_out + tp->lost_out;
+ tcp_set_pcount(&tp->left_out,
+ (tcp_get_pcount(&tp->sacked_out) +
+ tcp_get_pcount(&tp->lost_out)));
- if (reord < tp->fackets_out && tp->ca_state != TCP_CA_Loss)
- tcp_update_reordering(tp, (tp->fackets_out + 1) - reord, 0);
+ if ((reord < tcp_get_pcount(&tp->fackets_out)) &&
+ tp->ca_state != TCP_CA_Loss)
+ tcp_update_reordering(tp,
+ ((tcp_get_pcount(&tp->fackets_out) + 1) -
+ reord), 0);
#if FASTRETRANS_DEBUG > 0
- BUG_TRAP((int)tp->sacked_out >= 0);
- BUG_TRAP((int)tp->lost_out >= 0);
- BUG_TRAP((int)tp->retrans_out >= 0);
+ BUG_TRAP((int)tcp_get_pcount(&tp->sacked_out) >= 0);
+ BUG_TRAP((int)tcp_get_pcount(&tp->lost_out) >= 0);
+ BUG_TRAP((int)tcp_get_pcount(&tp->retrans_out) >= 0);
BUG_TRAP((int)tcp_packets_in_flight(tp) >= 0);
#endif
return flag;
@@ -1186,7 +1193,7 @@ void tcp_enter_frto(struct sock *sk)
* If something was really lost, it is eventually caught up
* in tcp_enter_frto_loss.
*/
- tp->retrans_out = 0;
+ tcp_set_pcount(&tp->retrans_out, 0);
tp->undo_marker = tp->snd_una;
tp->undo_retrans = 0;
@@ -1209,26 +1216,26 @@ static void tcp_enter_frto_loss(struct sock *sk)
struct sk_buff *skb;
int cnt = 0;
- tp->sacked_out = 0;
- tp->lost_out = 0;
- tp->fackets_out = 0;
+ tcp_set_pcount(&tp->sacked_out, 0);
+ tcp_set_pcount(&tp->lost_out, 0);
+ tcp_set_pcount(&tp->fackets_out, 0);
sk_stream_for_retrans_queue(skb, sk) {
- cnt++;
+ cnt += TCP_SKB_CB(skb)->tso_factor;;
TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
/* Do not mark those segments lost that were
* forward transmitted after RTO
*/
- if(!after(TCP_SKB_CB(skb)->end_seq,
+ if (!after(TCP_SKB_CB(skb)->end_seq,
tp->frto_highmark)) {
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
- tp->lost_out++;
+ tcp_inc_pcount(&tp->lost_out, skb);
}
} else {
- tp->sacked_out++;
- tp->fackets_out = cnt;
+ tcp_inc_pcount(&tp->sacked_out, skb);
+ tcp_set_pcount(&tp->fackets_out, cnt);
}
}
tcp_sync_left_out(tp);
@@ -1250,12 +1257,12 @@ static void tcp_enter_frto_loss(struct sock *sk)
void tcp_clear_retrans(struct tcp_opt *tp)
{
- tp->left_out = 0;
- tp->retrans_out = 0;
+ tcp_set_pcount(&tp->left_out, 0);
+ tcp_set_pcount(&tp->retrans_out, 0);
- tp->fackets_out = 0;
- tp->sacked_out = 0;
- tp->lost_out = 0;
+ tcp_set_pcount(&tp->fackets_out, 0);
+ tcp_set_pcount(&tp->sacked_out, 0);
+ tcp_set_pcount(&tp->lost_out, 0);
tp->undo_marker = 0;
tp->undo_retrans = 0;
@@ -1289,17 +1296,17 @@ void tcp_enter_loss(struct sock *sk, int how)
tp->undo_marker = tp->snd_una;
sk_stream_for_retrans_queue(skb, sk) {
- cnt++;
+ cnt += TCP_SKB_CB(skb)->tso_factor;
if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)
tp->undo_marker = 0;
TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
- tp->lost_out++;
+ tcp_inc_pcount(&tp->lost_out, skb);
} else {
- tp->sacked_out++;
- tp->fackets_out = cnt;
+ tcp_inc_pcount(&tp->sacked_out, skb);
+ tcp_set_pcount(&tp->fackets_out, cnt);
}
}
tcp_sync_left_out(tp);
@@ -1336,7 +1343,8 @@ static int tcp_check_sack_reneging(struct sock *sk, struct tcp_opt *tp)
static inline int tcp_fackets_out(struct tcp_opt *tp)
{
- return IsReno(tp) ? tp->sacked_out+1 : tp->fackets_out;
+ return IsReno(tp) ? tcp_get_pcount(&tp->sacked_out)+1 :
+ tcp_get_pcount(&tp->fackets_out);
}
static inline int tcp_skb_timedout(struct tcp_opt *tp, struct sk_buff *skb)
@@ -1346,7 +1354,7 @@ static inline int tcp_skb_timedout(struct tcp_opt *tp, struct sk_buff *skb)
static inline int tcp_head_timedout(struct sock *sk, struct tcp_opt *tp)
{
- return tp->packets_out &&
+ return tcp_get_pcount(&tp->packets_out) &&
tcp_skb_timedout(tp, skb_peek(&sk->sk_write_queue));
}
@@ -1446,8 +1454,10 @@ static inline int tcp_head_timedout(struct sock *sk, struct tcp_opt *tp)
static int
tcp_time_to_recover(struct sock *sk, struct tcp_opt *tp)
{
+ __u32 packets_out;
+
/* Trick#1: The loss is proven. */
- if (tp->lost_out)
+ if (tcp_get_pcount(&tp->lost_out))
return 1;
/* Not-A-Trick#2 : Classic rule... */
@@ -1463,8 +1473,9 @@ tcp_time_to_recover(struct sock *sk, struct tcp_opt *tp)
/* Trick#4: It is still not OK... But will it be useful to delay
* recovery more?
*/
- if (tp->packets_out <= tp->reordering &&
- tp->sacked_out >= max_t(__u32, tp->packets_out/2, sysctl_tcp_reordering) &&
+ packets_out = tcp_get_pcount(&tp->packets_out);
+ if (packets_out <= tp->reordering &&
+ tcp_get_pcount(&tp->sacked_out) >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&
!tcp_may_send_now(sk, tp)) {
/* We have nothing to send. This connection is limited
* either by receiver window or by application.
@@ -1483,12 +1494,16 @@ static void tcp_check_reno_reordering(struct tcp_opt *tp, int addend)
{
u32 holes;
- holes = max(tp->lost_out, 1U);
- holes = min(holes, tp->packets_out);
+ holes = max(tcp_get_pcount(&tp->lost_out), 1U);
+ holes = min(holes, tcp_get_pcount(&tp->packets_out));
- if (tp->sacked_out + holes > tp->packets_out) {
- tp->sacked_out = tp->packets_out - holes;
- tcp_update_reordering(tp, tp->packets_out+addend, 0);
+ if ((tcp_get_pcount(&tp->sacked_out) + holes) >
+ tcp_get_pcount(&tp->packets_out)) {
+ tcp_set_pcount(&tp->sacked_out,
+ (tcp_get_pcount(&tp->packets_out) - holes));
+ tcp_update_reordering(tp,
+ tcp_get_pcount(&tp->packets_out)+addend,
+ 0);
}
}
@@ -1496,7 +1511,7 @@ static void tcp_check_reno_reordering(struct tcp_opt *tp, int addend)
static void tcp_add_reno_sack(struct tcp_opt *tp)
{
- ++tp->sacked_out;
+ tcp_inc_pcount_explicit(&tp->sacked_out, 1);
tcp_check_reno_reordering(tp, 0);
tcp_sync_left_out(tp);
}
@@ -1507,10 +1522,10 @@ static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_opt *tp, int acked
{
if (acked > 0) {
/* One ACK acked hole. The rest eat duplicate ACKs. */
- if (acked-1 >= tp->sacked_out)
- tp->sacked_out = 0;
+ if (acked-1 >= tcp_get_pcount(&tp->sacked_out))
+ tcp_set_pcount(&tp->sacked_out, 0);
else
- tp->sacked_out -= acked-1;
+ tcp_dec_pcount_explicit(&tp->sacked_out, acked-1);
}
tcp_check_reno_reordering(tp, acked);
tcp_sync_left_out(tp);
@@ -1518,8 +1533,8 @@ static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_opt *tp, int acked
static inline void tcp_reset_reno_sack(struct tcp_opt *tp)
{
- tp->sacked_out = 0;
- tp->left_out = tp->lost_out;
+ tcp_set_pcount(&tp->sacked_out, 0);
+ tcp_set_pcount(&tp->left_out, tcp_get_pcount(&tp->lost_out));
}
/* Mark head of queue up as lost. */
@@ -1529,14 +1544,15 @@ tcp_mark_head_lost(struct sock *sk, struct tcp_opt *tp, int packets, u32 high_se
struct sk_buff *skb;
int cnt = packets;
- BUG_TRAP(cnt <= tp->packets_out);
+ BUG_TRAP(cnt <= tcp_get_pcount(&tp->packets_out));
sk_stream_for_retrans_queue(skb, sk) {
- if (--cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq))
+ cnt -= TCP_SKB_CB(skb)->tso_factor;
+ if (cnt < 0 || after(TCP_SKB_CB(skb)->end_seq, high_seq))
break;
if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
- tp->lost_out++;
+ tcp_inc_pcount(&tp->lost_out, skb);
}
}
tcp_sync_left_out(tp);
@@ -1547,7 +1563,7 @@ tcp_mark_head_lost(struct sock *sk, struct tcp_opt *tp, int packets, u32 high_se
static void tcp_update_scoreboard(struct sock *sk, struct tcp_opt *tp)
{
if (IsFack(tp)) {
- int lost = tp->fackets_out - tp->reordering;
+ int lost = tcp_get_pcount(&tp->fackets_out) - tp->reordering;
if (lost <= 0)
lost = 1;
tcp_mark_head_lost(sk, tp, lost, tp->high_seq);
@@ -1567,7 +1583,7 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_opt *tp)
if (tcp_skb_timedout(tp, skb) &&
!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
- tp->lost_out++;
+ tcp_inc_pcount(&tp->lost_out, skb);
}
}
tcp_sync_left_out(tp);
@@ -1632,8 +1648,9 @@ static void DBGUNDO(struct sock *sk, struct tcp_opt *tp, const char *msg)
printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n",
msg,
NIPQUAD(inet->daddr), ntohs(inet->dport),
- tp->snd_cwnd, tp->left_out,
- tp->snd_ssthresh, tp->prior_ssthresh, tp->packets_out);
+ tp->snd_cwnd, tcp_get_pcount(&tp->left_out),
+ tp->snd_ssthresh, tp->prior_ssthresh,
+ tcp_get_pcount(&tp->packets_out));
}
#else
#define DBGUNDO(x...) do { } while (0)
@@ -1703,13 +1720,13 @@ static void tcp_try_undo_dsack(struct sock *sk, struct tcp_opt *tp)
static int tcp_try_undo_partial(struct sock *sk, struct tcp_opt *tp, int acked)
{
/* Partial ACK arrived. Force Hoe's retransmit. */
- int failed = IsReno(tp) || tp->fackets_out>tp->reordering;
+ int failed = IsReno(tp) || tcp_get_pcount(&tp->fackets_out)>tp->reordering;
if (tcp_may_undo(tp)) {
/* Plain luck! Hole if filled with delayed
* packet, rather than with a retransmit.
*/
- if (tp->retrans_out == 0)
+ if (tcp_get_pcount(&tp->retrans_out) == 0)
tp->retrans_stamp = 0;
tcp_update_reordering(tp, tcp_fackets_out(tp)+acked, 1);
@@ -1736,8 +1753,8 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_opt *tp)
TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
}
DBGUNDO(sk, tp, "partial loss");
- tp->lost_out = 0;
- tp->left_out = tp->sacked_out;
+ tcp_set_pcount(&tp->lost_out, 0);
+ tcp_set_pcount(&tp->left_out, tcp_get_pcount(&tp->sacked_out));
tcp_undo_cwr(tp, 1);
NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);
tp->retransmits = 0;
@@ -1760,9 +1777,9 @@ static __inline__ void tcp_complete_cwr(struct tcp_opt *tp)
static void tcp_try_to_open(struct sock *sk, struct tcp_opt *tp, int flag)
{
- tp->left_out = tp->sacked_out;
+ tcp_set_pcount(&tp->left_out, tcp_get_pcount(&tp->sacked_out));
- if (tp->retrans_out == 0)
+ if (tcp_get_pcount(&tp->retrans_out) == 0)
tp->retrans_stamp = 0;
if (flag&FLAG_ECE)
@@ -1771,8 +1788,8 @@ static void tcp_try_to_open(struct sock *sk, struct tcp_opt *tp, int flag)
if (tp->ca_state != TCP_CA_CWR) {
int state = TCP_CA_Open;
- if (tp->left_out ||
- tp->retrans_out ||
+ if (tcp_get_pcount(&tp->left_out) ||
+ tcp_get_pcount(&tp->retrans_out) ||
tp->undo_marker)
state = TCP_CA_Disorder;
@@ -1806,11 +1823,11 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
/* Some technical things:
* 1. Reno does not count dupacks (sacked_out) automatically. */
- if (!tp->packets_out)
- tp->sacked_out = 0;
+ if (!tcp_get_pcount(&tp->packets_out))
+ tcp_set_pcount(&tp->sacked_out, 0);
/* 2. SACK counts snd_fack in packets inaccurately. */
- if (tp->sacked_out == 0)
- tp->fackets_out = 0;
+ if (tcp_get_pcount(&tp->sacked_out) == 0)
+ tcp_set_pcount(&tp->fackets_out, 0);
/* Now state machine starts.
* A. ECE, hence prohibit cwnd undoing, the reduction is required. */
@@ -1818,15 +1835,15 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
tp->prior_ssthresh = 0;
/* B. In all the states check for reneging SACKs. */
- if (tp->sacked_out && tcp_check_sack_reneging(sk, tp))
+ if (tcp_get_pcount(&tp->sacked_out) && tcp_check_sack_reneging(sk, tp))
return;
/* C. Process data loss notification, provided it is valid. */
if ((flag&FLAG_DATA_LOST) &&
before(tp->snd_una, tp->high_seq) &&
tp->ca_state != TCP_CA_Open &&
- tp->fackets_out > tp->reordering) {
- tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq);
+ tcp_get_pcount(&tp->fackets_out) > tp->reordering) {
+ tcp_mark_head_lost(sk, tp, tcp_get_pcount(&tp->fackets_out)-tp->reordering, tp->high_seq);
NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
}
@@ -1837,7 +1854,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
* when high_seq is ACKed. */
if (tp->ca_state == TCP_CA_Open) {
if (!sysctl_tcp_frto)
- BUG_TRAP(tp->retrans_out == 0);
+ BUG_TRAP(tcp_get_pcount(&tp->retrans_out) == 0);
tp->retrans_stamp = 0;
} else if (!before(tp->snd_una, tp->high_seq)) {
switch (tp->ca_state) {
@@ -1884,7 +1901,8 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
if (IsReno(tp) && is_dupack)
tcp_add_reno_sack(tp);
} else {
- int acked = prior_packets - tp->packets_out;
+ int acked = prior_packets -
+ tcp_get_pcount(&tp->packets_out);
if (IsReno(tp))
tcp_remove_reno_sacks(sk, tp, acked);
is_dupack = tcp_try_undo_partial(sk, tp, acked);
@@ -1927,7 +1945,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
tp->high_seq = tp->snd_nxt;
tp->prior_ssthresh = 0;
tp->undo_marker = tp->snd_una;
- tp->undo_retrans = tp->retrans_out;
+ tp->undo_retrans = tcp_get_pcount(&tp->retrans_out);
if (tp->ca_state < TCP_CA_CWR) {
if (!(flag&FLAG_ECE))
@@ -2156,7 +2174,7 @@ static void vegas_cong_avoid(struct tcp_opt *tp, u32 ack, u32 seq_rtt)
* is the cwnd during the previous RTT.
*/
old_wnd = (tp->vegas.beg_snd_nxt - tp->vegas.beg_snd_una) /
- tp->mss_cache;
+ tp->mss_cache_std;
old_snd_cwnd = tp->vegas.beg_snd_cwnd;
/* Save the extent of the current window so we can use this
@@ -2327,7 +2345,7 @@ static inline void tcp_cong_avoid(struct tcp_opt *tp, u32 ack, u32 seq_rtt)
static __inline__ void tcp_ack_packets_out(struct sock *sk, struct tcp_opt *tp)
{
- if (tp->packets_out==0) {
+ if (!tcp_get_pcount(&tp->packets_out)) {
tcp_clear_xmit_timer(sk, TCP_TIME_RETRANS);
} else {
tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
@@ -2343,7 +2361,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
int acked = 0;
__s32 seq_rtt = -1;
- while ((skb = skb_peek(&sk->sk_write_queue)) && skb != sk->sk_send_head) {
+ while ((skb = skb_peek(&sk->sk_write_queue)) &&
+ skb != sk->sk_send_head) {
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
__u8 sacked = scb->sacked;
@@ -2361,7 +2380,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
* connection startup slow start one packet too
* quickly. This is severely frowned upon behavior.
*/
- if(!(scb->flags & TCPCB_FLAG_SYN)) {
+ if (!(scb->flags & TCPCB_FLAG_SYN)) {
acked |= FLAG_DATA_ACKED;
} else {
acked |= FLAG_SYN_ACKED;
@@ -2369,27 +2388,26 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
}
if (sacked) {
- if(sacked & TCPCB_RETRANS) {
+ if (sacked & TCPCB_RETRANS) {
if(sacked & TCPCB_SACKED_RETRANS)
- tp->retrans_out--;
+ tcp_dec_pcount(&tp->retrans_out, skb);
acked |= FLAG_RETRANS_DATA_ACKED;
seq_rtt = -1;
} else if (seq_rtt < 0)
seq_rtt = now - scb->when;
- if(sacked & TCPCB_SACKED_ACKED)
- tp->sacked_out--;
- if(sacked & TCPCB_LOST)
- tp->lost_out--;
- if(sacked & TCPCB_URG) {
+ if (sacked & TCPCB_SACKED_ACKED)
+ tcp_dec_pcount(&tp->sacked_out, skb);
+ if (sacked & TCPCB_LOST)
+ tcp_dec_pcount(&tp->lost_out, skb);
+ if (sacked & TCPCB_URG) {
if (tp->urg_mode &&
!before(scb->end_seq, tp->snd_up))
tp->urg_mode = 0;
}
} else if (seq_rtt < 0)
seq_rtt = now - scb->when;
- if (tp->fackets_out)
- tp->fackets_out--;
- tp->packets_out--;
+ tcp_dec_pcount_approx(&tp->fackets_out, skb);
+ tcp_packets_out_dec(tp, skb);
__skb_unlink(skb, skb->list);
sk_stream_free_skb(sk, skb);
}
@@ -2400,24 +2418,27 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
}
#if FASTRETRANS_DEBUG > 0
- BUG_TRAP((int)tp->sacked_out >= 0);
- BUG_TRAP((int)tp->lost_out >= 0);
- BUG_TRAP((int)tp->retrans_out >= 0);
- if (!tp->packets_out && tp->sack_ok) {
- if (tp->lost_out) {
- printk(KERN_DEBUG "Leak l=%u %d\n", tp->lost_out,
- tp->ca_state);
- tp->lost_out = 0;
+ BUG_TRAP((int)tcp_get_pcount(&tp->sacked_out) >= 0);
+ BUG_TRAP((int)tcp_get_pcount(&tp->lost_out) >= 0);
+ BUG_TRAP((int)tcp_get_pcount(&tp->retrans_out) >= 0);
+ if (!tcp_get_pcount(&tp->packets_out) && tp->sack_ok) {
+ if (tcp_get_pcount(&tp->lost_out)) {
+ printk(KERN_DEBUG "Leak l=%u %d\n",
+ tcp_get_pcount(&tp->lost_out),
+ tp->ca_state);
+ tcp_set_pcount(&tp->lost_out, 0);
}
- if (tp->sacked_out) {
- printk(KERN_DEBUG "Leak s=%u %d\n", tp->sacked_out,
- tp->ca_state);
- tp->sacked_out = 0;
+ if (tcp_get_pcount(&tp->sacked_out)) {
+ printk(KERN_DEBUG "Leak s=%u %d\n",
+ tcp_get_pcount(&tp->sacked_out),
+ tp->ca_state);
+ tcp_set_pcount(&tp->sacked_out, 0);
}
- if (tp->retrans_out) {
- printk(KERN_DEBUG "Leak r=%u %d\n", tp->retrans_out,
- tp->ca_state);
- tp->retrans_out = 0;
+ if (tcp_get_pcount(&tp->retrans_out)) {
+ printk(KERN_DEBUG "Leak r=%u %d\n",
+ tcp_get_pcount(&tp->retrans_out),
+ tp->ca_state);
+ tcp_set_pcount(&tp->retrans_out, 0);
}
}
#endif
@@ -2712,19 +2733,19 @@ static void westwood_dupack_update(struct sock *sk)
{
struct tcp_opt *tp = tcp_sk(sk);
- tp->westwood.accounted += tp->mss_cache;
- tp->westwood.cumul_ack = tp->mss_cache;
+ tp->westwood.accounted += tp->mss_cache_std;
+ tp->westwood.cumul_ack = tp->mss_cache_std;
}
static inline int westwood_may_change_cumul(struct tcp_opt *tp)
{
- return ((tp->westwood.cumul_ack) > tp->mss_cache);
+ return ((tp->westwood.cumul_ack) > tp->mss_cache_std);
}
static inline void westwood_partial_update(struct tcp_opt *tp)
{
tp->westwood.accounted -= tp->westwood.cumul_ack;
- tp->westwood.cumul_ack = tp->mss_cache;
+ tp->westwood.cumul_ack = tp->mss_cache_std;
}
static inline void westwood_complete_update(struct tcp_opt *tp)
@@ -2835,7 +2856,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
*/
sk->sk_err_soft = 0;
tp->rcv_tstamp = tcp_time_stamp;
- prior_packets = tp->packets_out;
+ prior_packets = tcp_get_pcount(&tp->packets_out);
if (!prior_packets)
goto no_queue;
@@ -3857,11 +3878,11 @@ static void tcp_new_space(struct sock *sk)
{
struct tcp_opt *tp = tcp_sk(sk);
- if (tp->packets_out < tp->snd_cwnd &&
+ if (tcp_get_pcount(&tp->packets_out) < tp->snd_cwnd &&
!(sk->sk_userlocks & SOCK_SNDBUF_LOCK) &&
!tcp_memory_pressure &&
atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
- int sndmem = max_t(u32, tp->mss_clamp, tp->mss_cache) +
+ int sndmem = max_t(u32, tp->mss_clamp, tp->mss_cache_std) +
MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
demanded = max_t(unsigned int, tp->snd_cwnd,
tp->reordering + 1);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2cfd74fbf566..73f12904c7c3 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2075,7 +2075,7 @@ static int tcp_v4_init_sock(struct sock *sk)
*/
tp->snd_ssthresh = 0x7fffffff; /* Infinity */
tp->snd_cwnd_clamp = ~0;
- tp->mss_cache = 536;
+ tp->mss_cache_std = tp->mss_cache = 536;
tp->reordering = sysctl_tcp_reordering;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 79c1884c2b8b..ab04144245e5 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -752,11 +752,11 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
newtp->mdev = TCP_TIMEOUT_INIT;
newtp->rto = TCP_TIMEOUT_INIT;
- newtp->packets_out = 0;
- newtp->left_out = 0;
- newtp->retrans_out = 0;
- newtp->sacked_out = 0;
- newtp->fackets_out = 0;
+ tcp_set_pcount(&newtp->packets_out, 0);
+ tcp_set_pcount(&newtp->left_out, 0);
+ tcp_set_pcount(&newtp->retrans_out, 0);
+ tcp_set_pcount(&newtp->sacked_out, 0);
+ tcp_set_pcount(&newtp->fackets_out, 0);
newtp->snd_ssthresh = 0x7fffffff;
/* So many TCP implementations out there (incorrectly) count the
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index bd3d0133f724..32174549304e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -52,8 +52,7 @@ void update_send_head(struct sock *sk, struct tcp_opt *tp, struct sk_buff *skb)
if (sk->sk_send_head == (struct sk_buff *)&sk->sk_write_queue)
sk->sk_send_head = NULL;
tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
- if (tp->packets_out++ == 0)
- tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+ tcp_packets_out_inc(sk, tp, skb);
}
/* SND.NXT, if window was not shrunk.
@@ -123,7 +122,8 @@ static __inline__ void tcp_event_data_sent(struct tcp_opt *tp, struct sk_buff *s
{
u32 now = tcp_time_stamp;
- if (!tp->packets_out && (s32)(now - tp->lsndtime) > tp->rto)
+ if (!tcp_get_pcount(&tp->packets_out) &&
+ (s32)(now - tp->lsndtime) > tp->rto)
tcp_cwnd_restart(tp, __sk_dst_get(sk));
tp->lsndtime = now;
@@ -259,7 +259,7 @@ static __inline__ u16 tcp_select_window(struct sock *sk)
*/
int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
{
- if(skb != NULL) {
+ if (skb != NULL) {
struct inet_opt *inet = inet_sk(sk);
struct tcp_opt *tp = tcp_sk(sk);
struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
@@ -268,6 +268,8 @@ int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb)
int sysctl_flags;
int err;
+ BUG_ON(!TCP_SKB_CB(skb)->tso_factor);
+
#define SYSCTL_FLAG_TSTAMPS 0x1
#define SYSCTL_FLAG_WSCALE 0x2
#define SYSCTL_FLAG_SACK 0x4
@@ -414,13 +416,29 @@ void tcp_push_one(struct sock *sk, unsigned cur_mss)
if (!tcp_transmit_skb(sk, skb_clone(skb, sk->sk_allocation))) {
sk->sk_send_head = NULL;
tp->snd_nxt = TCP_SKB_CB(skb)->end_seq;
- if (tp->packets_out++ == 0)
- tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
+ tcp_packets_out_inc(sk, tp, skb);
return;
}
}
}
+void tcp_set_skb_tso_factor(struct sk_buff *skb, unsigned int mss,
+ unsigned int mss_std)
+{
+ if (skb->len <= mss_std) {
+ /* Avoid the costly divide in the normal
+ * non-TSO case.
+ */
+ TCP_SKB_CB(skb)->tso_factor = 1;
+ } else {
+ unsigned int factor;
+
+ factor = skb->len + (mss_std - 1);
+ factor /= mss;
+ TCP_SKB_CB(skb)->tso_factor = factor;
+ }
+}
+
/* Function to create two new TCP segments. Shrinks the given segment
* to the specified size and appends a new segment with the rest of the
* packet to the list. This won't be called frequently, I hope.
@@ -453,10 +471,12 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
flags = TCP_SKB_CB(skb)->flags;
TCP_SKB_CB(skb)->flags = flags & ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH);
TCP_SKB_CB(buff)->flags = flags;
- TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_EVER_RETRANS|TCPCB_AT_TAIL);
+ TCP_SKB_CB(buff)->sacked =
+ (TCP_SKB_CB(skb)->sacked &
+ (TCPCB_LOST | TCPCB_EVER_RETRANS | TCPCB_AT_TAIL));
if (TCP_SKB_CB(buff)->sacked&TCPCB_LOST) {
- tp->lost_out++;
- tp->left_out++;
+ tcp_inc_pcount(&tp->lost_out, buff);
+ tcp_inc_pcount(&tp->left_out, buff);
}
TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL;
@@ -480,6 +500,10 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
*/
TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when;
+ /* Fix up tso_factor for both original and new SKB. */
+ tcp_set_skb_tso_factor(skb, tp->mss_cache, tp->mss_cache_std);
+ tcp_set_skb_tso_factor(buff, tp->mss_cache, tp->mss_cache_std);
+
/* Link BUFF into the send queue. */
__skb_append(skb, buff);
@@ -596,7 +620,7 @@ int tcp_sync_mss(struct sock *sk, u32 pmtu)
tp->mss_cache = tp->mss_cache_std = mss_now;
if (sk->sk_route_caps & NETIF_F_TSO) {
- int large_mss;
+ int large_mss, factor;
large_mss = 65535 - tp->af_specific->net_header_len -
tp->ext_header_len - tp->ext2_header_len - tp->tcp_header_len;
@@ -604,8 +628,14 @@ int tcp_sync_mss(struct sock *sk, u32 pmtu)
if (tp->max_window && large_mss > (tp->max_window>>1))
large_mss = max((tp->max_window>>1), 68U - tp->tcp_header_len);
- /* Always keep large mss multiple of real mss. */
- tp->mss_cache = mss_now*(large_mss/mss_now);
+ /* Always keep large mss multiple of real mss, but
+ * do not exceed congestion window.
+ */
+ factor = large_mss / mss_now;
+ if (factor > tp->snd_cwnd)
+ factor = tp->snd_cwnd;
+
+ tp->mss_cache = mss_now * factor;
}
return mss_now;
@@ -662,7 +692,7 @@ int tcp_write_xmit(struct sock *sk, int nonagle)
return 0;
}
- return !tp->packets_out && sk->sk_send_head;
+ return !tcp_get_pcount(&tp->packets_out) && sk->sk_send_head;
}
return 0;
}
@@ -788,7 +818,7 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
/* The first test we must make is that neither of these two
* SKB's are still referenced by someone else.
*/
- if(!skb_cloned(skb) && !skb_cloned(next_skb)) {
+ if (!skb_cloned(skb) && !skb_cloned(next_skb)) {
int skb_size = skb->len, next_skb_size = next_skb->len;
u16 flags = TCP_SKB_CB(skb)->flags;
@@ -831,24 +861,23 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
*/
TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked&(TCPCB_EVER_RETRANS|TCPCB_AT_TAIL);
if (TCP_SKB_CB(next_skb)->sacked&TCPCB_SACKED_RETRANS)
- tp->retrans_out--;
+ tcp_dec_pcount(&tp->retrans_out, next_skb);
if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST) {
- tp->lost_out--;
- tp->left_out--;
+ tcp_dec_pcount(&tp->lost_out, next_skb);
+ tcp_dec_pcount(&tp->left_out, next_skb);
}
/* Reno case is special. Sigh... */
- if (!tp->sack_ok && tp->sacked_out) {
- tp->sacked_out--;
- tp->left_out--;
+ if (!tp->sack_ok && tcp_get_pcount(&tp->sacked_out)) {
+ tcp_dec_pcount_approx(&tp->sacked_out, next_skb);
+ tcp_dec_pcount(&tp->left_out, next_skb);
}
/* Not quite right: it can be > snd.fack, but
* it is better to underestimate fackets.
*/
- if (tp->fackets_out)
- tp->fackets_out--;
+ tcp_dec_pcount_approx(&tp->fackets_out, next_skb);
+ tcp_packets_out_dec(tp, next_skb);
sk_stream_free_skb(sk, next_skb);
- tp->packets_out--;
}
}
@@ -868,11 +897,11 @@ void tcp_simple_retransmit(struct sock *sk)
!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
- tp->retrans_out--;
+ tcp_dec_pcount(&tp->retrans_out, skb);
}
if (!(TCP_SKB_CB(skb)->sacked&TCPCB_LOST)) {
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
- tp->lost_out++;
+ tcp_inc_pcount(&tp->lost_out, skb);
lost = 1;
}
}
@@ -938,12 +967,12 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
&& TCP_SKB_CB(skb)->seq != tp->snd_una)
return -EAGAIN;
- if(skb->len > cur_mss) {
- if(tcp_fragment(sk, skb, cur_mss))
+ if (skb->len > cur_mss) {
+ if (tcp_fragment(sk, skb, cur_mss))
return -ENOMEM; /* We'll try again later. */
/* New SKB created, account for it. */
- tp->packets_out++;
+ tcp_inc_pcount(&tp->packets_out, skb);
}
/* Collapse two adjacent packets if worthwhile and we can. */
@@ -992,7 +1021,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
}
#endif
TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
- tp->retrans_out++;
+ tcp_inc_pcount(&tp->retrans_out, skb);
/* Save stamp of the first retransmit. */
if (!tp->retrans_stamp)
@@ -1020,14 +1049,18 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
{
struct tcp_opt *tp = tcp_sk(sk);
struct sk_buff *skb;
- int packet_cnt = tp->lost_out;
+ int packet_cnt = tcp_get_pcount(&tp->lost_out);
/* First pass: retransmit lost packets. */
if (packet_cnt) {
sk_stream_for_retrans_queue(skb, sk) {
__u8 sacked = TCP_SKB_CB(skb)->sacked;
+ int pkts = TCP_SKB_CB(skb)->tso_factor;
+
+ BUG_ON(!pkts);
- if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
+ if ((tcp_packets_in_flight(tp) + (pkts-1)) >=
+ tp->snd_cwnd)
return;
if (sacked&TCPCB_LOST) {
@@ -1044,7 +1077,8 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
}
- if (--packet_cnt <= 0)
+ packet_cnt -= TCP_SKB_CB(skb)->tso_factor;
+ if (packet_cnt <= 0)
break;
}
}
@@ -1073,17 +1107,22 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
packet_cnt = 0;
sk_stream_for_retrans_queue(skb, sk) {
- if(++packet_cnt > tp->fackets_out)
+ int pkts = TCP_SKB_CB(skb)->tso_factor;
+
+ BUG_ON(!pkts);
+
+ packet_cnt += pkts;
+ if (packet_cnt > tcp_get_pcount(&tp->fackets_out))
break;
- if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
+ if ((tcp_packets_in_flight(tp) + (pkts-1)) >= tp->snd_cwnd)
break;
- if(TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS)
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS)
continue;
/* Ok, retransmit it. */
- if(tcp_retransmit_skb(sk, skb))
+ if (tcp_retransmit_skb(sk, skb))
break;
if (skb == skb_peek(&sk->sk_write_queue))
@@ -1101,13 +1140,13 @@ void tcp_send_fin(struct sock *sk)
{
struct tcp_opt *tp = tcp_sk(sk);
struct sk_buff *skb = skb_peek_tail(&sk->sk_write_queue);
- unsigned int mss_now;
+ int mss_now;
/* Optimization, tack on the FIN if we have a queue of
* unsent frames. But be careful about outgoing SACKS
* and IP options.
*/
- mss_now = tcp_current_mss(sk, 1);
+ mss_now = tcp_current_mss(sk, 1);
if (sk->sk_send_head != NULL) {
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_FIN;
@@ -1127,6 +1166,7 @@ void tcp_send_fin(struct sock *sk)
skb->csum = 0;
TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_FIN);
TCP_SKB_CB(skb)->sacked = 0;
+ TCP_SKB_CB(skb)->tso_factor = 1;
/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
TCP_SKB_CB(skb)->seq = tp->write_seq;
@@ -1158,6 +1198,7 @@ void tcp_send_active_reset(struct sock *sk, int priority)
skb->csum = 0;
TCP_SKB_CB(skb)->flags = (TCPCB_FLAG_ACK | TCPCB_FLAG_RST);
TCP_SKB_CB(skb)->sacked = 0;
+ TCP_SKB_CB(skb)->tso_factor = 1;
/* Send it off. */
TCP_SKB_CB(skb)->seq = tcp_acceptable_seq(sk, tp);
@@ -1237,6 +1278,8 @@ struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst,
th->dest = req->rmt_port;
TCP_SKB_CB(skb)->seq = req->snt_isn;
TCP_SKB_CB(skb)->end_seq = TCP_SKB_CB(skb)->seq + 1;
+ TCP_SKB_CB(skb)->sacked = 0;
+ TCP_SKB_CB(skb)->tso_factor = 1;
th->seq = htonl(TCP_SKB_CB(skb)->seq);
th->ack_seq = htonl(req->rcv_isn + 1);
if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
@@ -1338,6 +1381,7 @@ int tcp_connect(struct sock *sk)
TCP_SKB_CB(buff)->flags = TCPCB_FLAG_SYN;
TCP_ECN_send_syn(sk, tp, buff);
TCP_SKB_CB(buff)->sacked = 0;
+ TCP_SKB_CB(buff)->tso_factor = 1;
buff->csum = 0;
TCP_SKB_CB(buff)->seq = tp->write_seq++;
TCP_SKB_CB(buff)->end_seq = tp->write_seq;
@@ -1350,7 +1394,7 @@ int tcp_connect(struct sock *sk)
tp->retrans_stamp = TCP_SKB_CB(buff)->when;
__skb_queue_tail(&sk->sk_write_queue, buff);
sk_charge_skb(sk, buff);
- tp->packets_out++;
+ tcp_inc_pcount(&tp->packets_out, buff);
tcp_transmit_skb(sk, skb_clone(buff, GFP_KERNEL));
TCP_INC_STATS(TCP_MIB_ACTIVEOPENS);
@@ -1437,6 +1481,7 @@ void tcp_send_ack(struct sock *sk)
buff->csum = 0;
TCP_SKB_CB(buff)->flags = TCPCB_FLAG_ACK;
TCP_SKB_CB(buff)->sacked = 0;
+ TCP_SKB_CB(buff)->tso_factor = 1;
/* Send it off, this clears delayed acks for us. */
TCP_SKB_CB(buff)->seq = TCP_SKB_CB(buff)->end_seq = tcp_acceptable_seq(sk, tp);
@@ -1471,6 +1516,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
skb->csum = 0;
TCP_SKB_CB(skb)->flags = TCPCB_FLAG_ACK;
TCP_SKB_CB(skb)->sacked = urgent;
+ TCP_SKB_CB(skb)->tso_factor = 1;
/* Use a previous sequence. This should cause the other
* end to send an ack. Don't queue or clone SKB, just
@@ -1491,8 +1537,8 @@ int tcp_write_wakeup(struct sock *sk)
if ((skb = sk->sk_send_head) != NULL &&
before(TCP_SKB_CB(skb)->seq, tp->snd_una+tp->snd_wnd)) {
int err;
- int mss = tcp_current_mss(sk, 0);
- int seg_size = tp->snd_una+tp->snd_wnd-TCP_SKB_CB(skb)->seq;
+ unsigned int mss = tcp_current_mss(sk, 0);
+ unsigned int seg_size = tp->snd_una+tp->snd_wnd-TCP_SKB_CB(skb)->seq;
if (before(tp->pushed_seq, TCP_SKB_CB(skb)->end_seq))
tp->pushed_seq = TCP_SKB_CB(skb)->end_seq;
@@ -1514,7 +1560,9 @@ int tcp_write_wakeup(struct sock *sk)
sk->sk_route_caps &= ~NETIF_F_TSO;
tp->mss_cache = tp->mss_cache_std;
}
- }
+ } else if (!TCP_SKB_CB(skb)->tso_factor)
+ tcp_set_skb_tso_factor(skb, mss, tp->mss_cache_std);
+
TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH;
TCP_SKB_CB(skb)->when = tcp_time_stamp;
err = tcp_transmit_skb(sk, skb_clone(skb, GFP_ATOMIC));
@@ -1542,7 +1590,7 @@ void tcp_send_probe0(struct sock *sk)
err = tcp_write_wakeup(sk);
- if (tp->packets_out || !sk->sk_send_head) {
+ if (tcp_get_pcount(&tp->packets_out) || !sk->sk_send_head) {
/* Cancel probe timer, if it is not required. */
tp->probes_out = 0;
tp->backoff = 0;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 72a5a50b50ab..c060bb333471 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -121,7 +121,7 @@ static int tcp_out_of_resources(struct sock *sk, int do_reset)
* 1. Last segment was sent recently. */
if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
/* 2. Window is closed. */
- (!tp->snd_wnd && !tp->packets_out))
+ (!tp->snd_wnd && !tcp_get_pcount(&tp->packets_out)))
do_reset = 1;
if (do_reset)
tcp_send_active_reset(sk, GFP_ATOMIC);
@@ -269,7 +269,7 @@ static void tcp_probe_timer(struct sock *sk)
struct tcp_opt *tp = tcp_sk(sk);
int max_probes;
- if (tp->packets_out || !sk->sk_send_head) {
+ if (tcp_get_pcount(&tp->packets_out) || !sk->sk_send_head) {
tp->probes_out = 0;
return;
}
@@ -316,7 +316,7 @@ static void tcp_retransmit_timer(struct sock *sk)
{
struct tcp_opt *tp = tcp_sk(sk);
- if (tp->packets_out == 0)
+ if (!tcp_get_pcount(&tp->packets_out))
goto out;
BUG_TRAP(!skb_queue_empty(&sk->sk_write_queue));
@@ -606,7 +606,7 @@ static void tcp_keepalive_timer (unsigned long data)
elapsed = keepalive_time_when(tp);
/* It is alive without keepalive 8) */
- if (tp->packets_out || sk->sk_send_head)
+ if (tcp_get_pcount(&tp->packets_out) || sk->sk_send_head)
goto resched;
elapsed = tcp_time_stamp - tp->rcv_tstamp;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 7150375908a8..d2091c5ce489 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2072,6 +2072,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
neigh_sysctl_unregister(idev->nd_parms);
#endif
neigh_parms_release(&nd_tbl, idev->nd_parms);
+ neigh_ifdown(&nd_tbl, dev);
in6_dev_put(idev);
}
return 0;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index b278e5a04ca8..e1f5aeb79258 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -58,6 +58,7 @@
#include <linux/in6.h>
#include <linux/route.h>
#include <linux/init.h>
+#include <linux/rcupdate.h>
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif
@@ -284,14 +285,21 @@ static int ndisc_constructor(struct neighbour *neigh)
{
struct in6_addr *addr = (struct in6_addr*)&neigh->primary_key;
struct net_device *dev = neigh->dev;
- struct inet6_dev *in6_dev = in6_dev_get(dev);
+ struct inet6_dev *in6_dev;
+ struct neigh_parms *parms;
int is_multicast = ipv6_addr_is_multicast(addr);
- if (in6_dev == NULL)
+ rcu_read_lock();
+ in6_dev = in6_dev_get(dev);
+ if (in6_dev == NULL) {
+ rcu_read_unlock();
return -EINVAL;
+ }
- if (in6_dev->nd_parms)
- neigh->parms = in6_dev->nd_parms;
+ parms = in6_dev->nd_parms;
+ __neigh_parms_put(neigh->parms);
+ neigh->parms = neigh_parms_clone(parms);
+ rcu_read_unlock();
neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST;
if (dev->hard_header == NULL) {
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 73b34df7fd17..ebed7e197aac 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1929,7 +1929,7 @@ static int tcp_v6_init_sock(struct sock *sk)
*/
tp->snd_ssthresh = 0x7fffffff;
tp->snd_cwnd_clamp = ~0;
- tp->mss_cache = 536;
+ tp->mss_cache_std = tp->mss_cache = 536;
tp->reordering = sysctl_tcp_reordering;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 51d0514fd2a7..1b441a628b71 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -786,11 +786,13 @@ out:
static int packet_release(struct socket *sock)
{
struct sock *sk = sock->sk;
- struct packet_opt *po = pkt_sk(sk);
+ struct packet_opt *po;
if (!sk)
return 0;
+ po = pkt_sk(sk);
+
write_lock_bh(&packet_sklist_lock);
sk_del_node_init(sk);
write_unlock_bh(&packet_sklist_lock);
diff --git a/net/sched/gact.c b/net/sched/gact.c
index cd1a58c60485..5607f5e8cd83 100644
--- a/net/sched/gact.c
+++ b/net/sched/gact.c
@@ -76,7 +76,9 @@ tcf_gact_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a,int ov
{
struct rtattr *tb[TCA_GACT_MAX];
struct tc_gact *parm = NULL;
+#ifdef CONFIG_GACT_PROB
struct tc_gact_p *p_parm = NULL;
+#endif
struct tcf_gact *p = NULL;
int ret = 0;
int size = sizeof (*p);
@@ -176,7 +178,9 @@ tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
unsigned char *b = skb->tail;
struct tc_gact opt;
+#ifdef CONFIG_GACT_PROB
struct tc_gact_p p_opt;
+#endif
struct tcf_gact *p;
struct tcf_t t;
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index fe530156875a..ff61f8e698c9 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -714,3 +714,4 @@ static void __exit atm_exit(void)
module_init(atm_init)
module_exit(atm_exit)
+MODULE_LICENSE("GPL");
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index bae07708eb01..97f66fd770f4 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -81,8 +81,6 @@ static struct sctp_af *sctp_af_v6_specific;
kmem_cache_t *sctp_chunk_cachep;
kmem_cache_t *sctp_bucket_cachep;
-extern struct net_proto_family inet_family_ops;
-
extern int sctp_snmp_proc_init(void);
extern int sctp_snmp_proc_exit(void);
extern int sctp_eps_proc_init(void);