summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@ppc970.osdl.org>2005-01-19 15:48:53 -0800
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-01-19 15:48:53 -0800
commit9472e348190603fffed9a32e19c54a5cf17e4bcc (patch)
treecab35f9e8f975dc7b2a0654035f45012b4fa1227
parent90fcd610926a7d54d8d488faaa313109bb82f88a (diff)
parent4debb9ea116cb46ea91819d0886915f59df58544 (diff)
Merge bk://kernel.bkbits.net/davem/net-2.6
into ppc970.osdl.org:/home/torvalds/v2.6/linux
-rw-r--r--Documentation/networking/netdevices.txt9
-rw-r--r--drivers/atm/ambassador.c75
-rw-r--r--drivers/atm/he.c37
-rw-r--r--drivers/atm/horizon.c19
-rw-r--r--drivers/atm/idt77252.c9
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib.h9
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_ib.c4
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_main.c26
-rw-r--r--drivers/net/e1000/e1000.h1
-rw-r--r--drivers/net/e1000/e1000_main.c33
-rw-r--r--drivers/net/fc/iph5526_ip.h1
-rw-r--r--drivers/net/sungem.c122
-rw-r--r--drivers/net/sungem.h1
-rw-r--r--drivers/net/tg3.c126
-rw-r--r--drivers/net/tg3.h9
-rw-r--r--drivers/s390/net/qeth_main.c26
-rw-r--r--include/linux/atmdev.h4
-rw-r--r--include/linux/fcdevice.h4
-rw-r--r--include/linux/fddidevice.h7
-rw-r--r--include/linux/hippidevice.h21
-rw-r--r--include/linux/ip.h14
-rw-r--r--include/linux/ipv6.h34
-rw-r--r--include/linux/netdevice.h10
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack.h150
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_amanda.h13
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_core.h11
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_ftp.h30
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_helper.h16
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_irc.h18
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_protocol.h4
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_tftp.h4
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_tuple.h10
-rw-r--r--include/linux/netfilter_ipv4/ip_nat.h30
-rw-r--r--include/linux/netfilter_ipv4/ip_nat_core.h21
-rw-r--r--include/linux/netfilter_ipv4/ip_nat_helper.h45
-rw-r--r--include/linux/netfilter_ipv4/ip_nat_protocol.h6
-rw-r--r--include/linux/tcp.h16
-rw-r--r--include/linux/trdevice.h4
-rw-r--r--include/net/ipx.h13
-rw-r--r--include/net/sctp/sctp.h19
-rw-r--r--include/net/sctp/structs.h44
-rw-r--r--include/net/tcp.h73
-rw-r--r--net/802/fc.c7
-rw-r--r--net/802/fddi.c7
-rw-r--r--net/802/hippi.c19
-rw-r--r--net/802/tr.c7
-rw-r--r--net/8021q/vlan.c9
-rw-r--r--net/8021q/vlan.h1
-rw-r--r--net/8021q/vlanproc.c2
-rw-r--r--net/atm/addr.c93
-rw-r--r--net/atm/clip.c4
-rw-r--r--net/atm/lec.c322
-rw-r--r--net/atm/lec.h1
-rw-r--r--net/atm/resources.c1
-rw-r--r--net/atm/svc.c11
-rw-r--r--net/core/dev.c4
-rw-r--r--net/core/dev_mcast.c26
-rw-r--r--net/core/netpoll.c6
-rw-r--r--net/core/pktgen.c9
-rw-r--r--net/ipv4/netfilter/ip_conntrack_amanda.c56
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c609
-rw-r--r--net/ipv4/netfilter/ip_conntrack_ftp.c164
-rw-r--r--net/ipv4/netfilter/ip_conntrack_irc.c71
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_icmp.c2
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_sctp.c24
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_tcp.c25
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c32
-rw-r--r--net/ipv4/netfilter/ip_conntrack_tftp.c31
-rw-r--r--net/ipv4/netfilter/ip_nat_amanda.c126
-rw-r--r--net/ipv4/netfilter/ip_nat_core.c575
-rw-r--r--net/ipv4/netfilter/ip_nat_ftp.c259
-rw-r--r--net/ipv4/netfilter/ip_nat_helper.c112
-rw-r--r--net/ipv4/netfilter/ip_nat_irc.c223
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_icmp.c8
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_tcp.c23
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_udp.c24
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_unknown.c2
-rw-r--r--net/ipv4/netfilter/ip_nat_rule.c27
-rw-r--r--net/ipv4/netfilter/ip_nat_snmp_basic.c145
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c72
-rw-r--r--net/ipv4/netfilter/ip_nat_tftp.c168
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c2
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c4
-rw-r--r--net/ipv4/netfilter/ipt_hashlimit.c6
-rw-r--r--net/ipv4/netfilter/ipt_helper.c16
-rw-r--r--net/ipv4/tcp.c12
-rw-r--r--net/ipv4/tcp_input.c222
-rw-r--r--net/ipv4/tcp_minisocks.c10
-rw-r--r--net/ipv4/tcp_output.c47
-rw-r--r--net/ipv4/tcp_timer.c8
-rw-r--r--net/ipv6/addrconf.c27
-rw-r--r--net/ipv6/icmp.c32
-rw-r--r--net/ipv6/ndisc.c67
-rw-r--r--net/ipv6/raw.c41
-rw-r--r--net/ipv6/route.c18
-rw-r--r--net/ipx/af_ipx.c69
-rw-r--r--net/ipx/ipx_proc.c4
-rw-r--r--net/ipx/ipx_route.c2
-rw-r--r--net/netlink/af_netlink.c38
-rw-r--r--net/sched/cls_api.c40
-rw-r--r--net/sched/cls_route.c32
-rw-r--r--net/sched/cls_rsvp.h16
-rw-r--r--net/sched/sch_cbq.c79
-rw-r--r--net/sched/sch_generic.c32
-rw-r--r--net/sched/sch_hfsc.c45
-rw-r--r--net/sched/sch_htb.c60
-rw-r--r--net/sched/sch_prio.c74
-rw-r--r--net/sched/sch_teql.c6
-rw-r--r--net/sctp/associola.c8
-rw-r--r--net/sctp/bind_addr.c4
-rw-r--r--net/sctp/chunk.c2
-rw-r--r--net/sctp/endpointola.c2
-rw-r--r--net/sctp/ipv6.c20
-rw-r--r--net/sctp/output.c4
-rw-r--r--net/sctp/protocol.c16
-rw-r--r--net/sctp/sm_make_chunk.c2
-rw-r--r--net/sctp/socket.c64
-rw-r--r--net/sctp/transport.c2
-rw-r--r--net/sctp/ulpqueue.c3
-rw-r--r--net/socket.c12
-rw-r--r--net/xfrm/xfrm_policy.c2
121 files changed, 2090 insertions, 3495 deletions
diff --git a/Documentation/networking/netdevices.txt b/Documentation/networking/netdevices.txt
index 1509f3aff968..1450809aed4b 100644
--- a/Documentation/networking/netdevices.txt
+++ b/Documentation/networking/netdevices.txt
@@ -45,10 +45,9 @@ dev->hard_start_xmit:
Synchronization: dev->xmit_lock spinlock.
When the driver sets NETIF_F_LLTX in dev->features this will be
called without holding xmit_lock. In this case the driver
- has to lock by itself when needed. It is recommended to use a try lock
- for this and return -1 when the spin lock fails.
- The locking there should also properly protect against
- set_multicast_list
+ has to execute it's transmission routine in a completely lockless
+ manner. It is recommended only for queueless devices such
+ loopback and tunnels.
Context: BHs disabled
Notes: netif_queue_stopped() is guaranteed false
Return codes:
@@ -56,8 +55,6 @@ dev->hard_start_xmit:
o NETDEV_TX_BUSY Cannot transmit packet, try later
Usually a bug, means queue start/stop flow control is broken in
the driver. Note: the driver must NOT put the skb in its DMA ring.
- o NETDEV_TX_LOCKED Locking failed, please retry quickly.
- Only valid when NETIF_F_LLTX is set.
dev->tx_timeout:
Synchronization: dev->xmit_lock spinlock.
diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c
index 924abd2654a0..3870e3787b7b 100644
--- a/drivers/atm/ambassador.c
+++ b/drivers/atm/ambassador.c
@@ -574,7 +574,6 @@ static int command_do (amb_dev * dev, command * cmd) {
amb_cq * cq = &dev->cq;
volatile amb_cq_ptrs * ptrs = &cq->ptrs;
command * my_slot;
- unsigned long timeout;
PRINTD (DBG_FLOW|DBG_CMD, "command_do %p", dev);
@@ -599,20 +598,14 @@ static int command_do (amb_dev * dev, command * cmd) {
// mail the command
wr_mem (dev, offsetof(amb_mem, mb.adapter.cmd_address), virt_to_bus (ptrs->in));
- // prepare to wait for cq->pending milliseconds
- // effectively one centisecond on i386
- timeout = (cq->pending*HZ+999)/1000;
-
if (cq->pending > cq->high)
cq->high = cq->pending;
spin_unlock (&cq->lock);
- while (timeout) {
- // go to sleep
- // PRINTD (DBG_CMD, "wait: sleeping %lu for command", timeout);
- set_current_state(TASK_UNINTERRUPTIBLE);
- timeout = schedule_timeout (timeout);
- }
+ // these comments were in a while-loop before, msleep removes the loop
+ // go to sleep
+ // PRINTD (DBG_CMD, "wait: sleeping %lu for command", timeout);
+ msleep(cq->pending);
// wait for my slot to be reached (all waiters are here or above, until...)
while (ptrs->out != my_slot) {
@@ -1799,12 +1792,11 @@ static int __init do_loader_command (volatile loader_block * lb,
// dump_loader_block (lb);
wr_mem (dev, offsetof(amb_mem, doorbell), virt_to_bus (lb) & ~onegigmask);
- timeout = command_timeouts[cmd] * HZ/100;
+ timeout = command_timeouts[cmd] * 10;
while (!lb->result || lb->result == cpu_to_be32 (COMMAND_IN_PROGRESS))
if (timeout) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- timeout = schedule_timeout (timeout);
+ timeout = msleep_interruptible(timeout);
} else {
PRINTD (DBG_LOAD|DBG_ERR, "command %d timed out", cmd);
dump_registers (dev);
@@ -1814,10 +1806,10 @@ static int __init do_loader_command (volatile loader_block * lb,
if (cmd == adapter_start) {
// wait for start command to acknowledge...
- timeout = HZ/10;
+ timeout = 100;
while (rd_plain (dev, offsetof(amb_mem, doorbell)))
if (timeout) {
- timeout = schedule_timeout (timeout);
+ timeout = msleep_interruptible(timeout);
} else {
PRINTD (DBG_LOAD|DBG_ERR, "start command did not clear doorbell, res=%08x",
be32_to_cpu (lb->result));
@@ -1932,17 +1924,12 @@ static int amb_reset (amb_dev * dev, int diags) {
if (diags) {
unsigned long timeout;
// 4.2 second wait
- timeout = HZ*42/10;
- while (timeout) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- timeout = schedule_timeout (timeout);
- }
+ msleep(4200);
// half second time-out
- timeout = HZ/2;
+ timeout = 500;
while (!rd_plain (dev, offsetof(amb_mem, mb.loader.ready)))
if (timeout) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- timeout = schedule_timeout (timeout);
+ timeout = msleep_interruptible(timeout);
} else {
PRINTD (DBG_LOAD|DBG_ERR, "reset timed out");
return -ETIMEDOUT;
@@ -2056,14 +2043,12 @@ static int __init amb_talk (amb_dev * dev) {
wr_mem (dev, offsetof(amb_mem, doorbell), virt_to_bus (&a));
// 2.2 second wait (must not touch doorbell during 2 second DMA test)
- timeout = HZ*22/10;
- while (timeout)
- timeout = schedule_timeout (timeout);
+ msleep(2200);
// give the adapter another half second?
- timeout = HZ/2;
+ timeout = 500;
while (rd_plain (dev, offsetof(amb_mem, doorbell)))
if (timeout) {
- timeout = schedule_timeout (timeout);
+ timeout = msleep_interruptible(timeout);
} else {
PRINTD (DBG_INIT|DBG_ERR, "adapter init timed out");
return -ETIMEDOUT;
@@ -2228,17 +2213,12 @@ static void setup_dev(amb_dev *dev, struct pci_dev *pci_dev)
spin_lock_init (&dev->rxq[pool].lock);
}
-static int setup_pci_dev(struct pci_dev *pci_dev)
+static void setup_pci_dev(struct pci_dev *pci_dev)
{
unsigned char lat;
- int ret;
// enable bus master accesses
pci_set_master(pci_dev);
-
- ret = pci_enable_device(pci_dev);
- if (ret < 0)
- goto out;
// frobnicate latency (upwards, usually)
pci_read_config_byte (pci_dev, PCI_LATENCY_TIMER, &lat);
@@ -2251,22 +2231,27 @@ static int setup_pci_dev(struct pci_dev *pci_dev)
lat, pci_lat);
pci_write_config_byte(pci_dev, PCI_LATENCY_TIMER, pci_lat);
}
-out:
- return ret;
}
static int __devinit amb_probe(struct pci_dev *pci_dev, const struct pci_device_id *pci_ent)
{
amb_dev * dev;
int err;
+ unsigned int irq;
+
+ err = pci_enable_device(pci_dev);
+ if (err < 0) {
+ PRINTK (KERN_ERR, "skipped broken (PLX rev 2) card");
+ goto out;
+ }
// read resources from PCI configuration space
- unsigned int irq = pci_dev->irq;
+ irq = pci_dev->irq;
if (pci_dev->device == PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD) {
PRINTK (KERN_ERR, "skipped broken (PLX rev 2) card");
err = -EINVAL;
- goto out;
+ goto out_disable;
}
PRINTD (DBG_INFO, "found Madge ATM adapter (amb) at"
@@ -2277,7 +2262,7 @@ static int __devinit amb_probe(struct pci_dev *pci_dev, const struct pci_device_
err = pci_request_region(pci_dev, 1, DEV_LABEL);
if (err < 0) {
PRINTK (KERN_ERR, "IO range already in use!");
- goto out;
+ goto out_disable;
}
dev = kmalloc (sizeof(amb_dev), GFP_KERNEL);
@@ -2295,15 +2280,13 @@ static int __devinit amb_probe(struct pci_dev *pci_dev, const struct pci_device_
goto out_free;
}
- err = setup_pci_dev(pci_dev);
- if (err < 0)
- goto out_reset;
+ setup_pci_dev(pci_dev);
// grab (but share) IRQ and install handler
err = request_irq(irq, interrupt_handler, SA_SHIRQ, DEV_LABEL, dev);
if (err < 0) {
PRINTK (KERN_ERR, "request IRQ failed!");
- goto out_disable;
+ goto out_reset;
}
dev->atm_dev = atm_dev_register (DEV_LABEL, &amb_ops, -1, NULL);
@@ -2337,14 +2320,14 @@ out:
out_free_irq:
free_irq(irq, dev);
-out_disable:
- pci_disable_device(pci_dev);
out_reset:
amb_reset(dev, 0);
out_free:
kfree(dev);
out_release:
pci_release_region(pci_dev, 1);
+out_disable:
+ pci_disable_device(pci_dev);
goto out;
}
diff --git a/drivers/atm/he.c b/drivers/atm/he.c
index 7221439b4937..e64d422470ff 100644
--- a/drivers/atm/he.c
+++ b/drivers/atm/he.c
@@ -86,44 +86,19 @@
#undef USE_RBPL_POOL /* if memory is tight try this */
#define USE_TPD_POOL
/* #undef CONFIG_ATM_HE_USE_SUNI */
-
-/* compatibility */
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,69)
-typedef void irqreturn_t;
-#define IRQ_NONE
-#define IRQ_HANDLED
-#define IRQ_RETVAL(x)
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,9)
-#define __devexit_p(func) func
-#endif
-
-#ifndef MODULE_LICENSE
-#define MODULE_LICENSE(x)
-#endif
-
-#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3)
-#define pci_set_drvdata(pci_dev, data) (pci_dev)->driver_data = (data)
-#define pci_get_drvdata(pci_dev) (pci_dev)->driver_data
-#endif
+/* #undef HE_DEBUG */
#include "he.h"
-
#include "suni.h"
-
#include <linux/atm_he.h>
#define hprintk(fmt,args...) printk(KERN_ERR DEV_LABEL "%d: " fmt, he_dev->number , ##args)
-#undef DEBUG
-#ifdef DEBUG
+#ifdef HE_DEBUG
#define HPRINTK(fmt,args...) printk(KERN_DEBUG DEV_LABEL "%d: " fmt, he_dev->number , ##args)
-#else
+#else /* !HE_DEBUG */
#define HPRINTK(fmt,args...) do { } while (0)
-#endif /* DEBUG */
-
+#endif /* HE_DEBUG */
/* version definition */
@@ -147,8 +122,8 @@ static u8 read_prom_byte(struct he_dev *he_dev, int addr);
/* globals */
-static struct he_dev *he_devs = NULL;
-static int disable64 = 0;
+static struct he_dev *he_devs;
+static int disable64;
static short nvpibits = -1;
static short nvcibits = -1;
static short rx_skb_reserve = 16;
diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c
index 088440b8056a..1c80cc922e4a 100644
--- a/drivers/atm/horizon.c
+++ b/drivers/atm/horizon.c
@@ -2706,18 +2706,18 @@ static int __devinit hrz_probe(struct pci_dev *pci_dev, const struct pci_device_
// adapter slot free, read resources from PCI configuration space
u32 iobase = pci_resource_start (pci_dev, 0);
u32 * membase = bus_to_virt (pci_resource_start (pci_dev, 1));
- u8 irq = pci_dev->irq;
+ unsigned int irq;
unsigned char lat;
PRINTD (DBG_FLOW, "hrz_probe");
- /* XXX DEV_LABEL is a guess */
- if (!request_region(iobase, HRZ_IO_EXTENT, DEV_LABEL))
+ if (pci_enable_device(pci_dev))
return -EINVAL;
- if (pci_enable_device(pci_dev)) {
- err = -EINVAL;
- goto out_release;
+ /* XXX DEV_LABEL is a guess */
+ if (!request_region(iobase, HRZ_IO_EXTENT, DEV_LABEL)) {
+ return -EINVAL;
+ goto out_disable;
}
dev = kmalloc(sizeof(hrz_dev), GFP_KERNEL);
@@ -2725,7 +2725,7 @@ static int __devinit hrz_probe(struct pci_dev *pci_dev, const struct pci_device_
// perhaps we should be nice: deregister all adapters and abort?
PRINTD(DBG_ERR, "out of memory");
err = -ENOMEM;
- goto out_disable;
+ goto out_release;
}
memset(dev, 0, sizeof(hrz_dev));
@@ -2733,6 +2733,7 @@ static int __devinit hrz_probe(struct pci_dev *pci_dev, const struct pci_device_
pci_set_drvdata(pci_dev, dev);
// grab IRQ and install handler - move this someplace more sensible
+ irq = pci_dev->irq;
if (request_irq(irq,
interrupt_handler,
SA_SHIRQ, /* irqflags guess */
@@ -2846,10 +2847,10 @@ out_free_irq:
free_irq(dev->irq, dev);
out_free:
kfree(dev);
-out_disable:
- pci_disable_device(pci_dev);
out_release:
release_region(iobase, HRZ_IO_EXTENT);
+out_disable:
+ pci_disable_device(pci_dev);
goto out;
}
diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c
index 80e304cf3169..3d8764ab2825 100644
--- a/drivers/atm/idt77252.c
+++ b/drivers/atm/idt77252.c
@@ -3136,14 +3136,11 @@ deinit_card(struct idt77252_dev *card)
}
}
- if (card->soft_tst)
- vfree(card->soft_tst);
+ vfree(card->soft_tst);
- if (card->scd2vc)
- vfree(card->scd2vc);
+ vfree(card->scd2vc);
- if (card->vcs)
- vfree(card->vcs);
+ vfree(card->vcs);
if (card->raw_cell_hnd) {
pci_free_consistent(card->pcidev, 2 * sizeof(u32),
diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
index 074394d4f8a9..b097522c55e8 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib.h
+++ b/drivers/infiniband/ulp/ipoib/ipoib.h
@@ -104,10 +104,10 @@ struct ipoib_buf {
};
/*
- * Device private locking: tx_lock protects members used in TX fast
- * path (and we use LLTX so upper layers don't do extra locking).
- * lock protects everything else. lock nests inside of tx_lock (ie
- * tx_lock must be acquired first if needed).
+ * Device private locking: netdev->xmit_lock protects members used
+ * in TX fast path.
+ * lock protects everything else. lock nests inside of xmit_lock (ie
+ * xmit_lock must be acquired first if needed).
*/
struct ipoib_dev_priv {
spinlock_t lock;
@@ -150,7 +150,6 @@ struct ipoib_dev_priv {
struct ipoib_buf *rx_ring;
- spinlock_t tx_lock;
struct ipoib_buf *tx_ring;
unsigned tx_head;
unsigned tx_tail;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
index ac550991227e..d70f9f53d9d0 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
@@ -247,12 +247,12 @@ static void ipoib_ib_handle_wc(struct net_device *dev,
dev_kfree_skb_any(tx_req->skb);
- spin_lock_irqsave(&priv->tx_lock, flags);
+ spin_lock_irqsave(&dev->xmit_lock, flags);
++priv->tx_tail;
if (netif_queue_stopped(dev) &&
priv->tx_head - priv->tx_tail <= IPOIB_TX_RING_SIZE / 2)
netif_wake_queue(dev);
- spin_unlock_irqrestore(&priv->tx_lock, flags);
+ spin_unlock_irqrestore(&dev->xmit_lock, flags);
if (wc->status != IB_WC_SUCCESS &&
wc->status != IB_WC_WR_FLUSH_ERR)
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 63c8168d8af8..90c73a7cea72 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -411,7 +411,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev)
/*
* We can only be called from ipoib_start_xmit, so we're
- * inside tx_lock -- no need to save/restore flags.
+ * inside dev->xmit_lock -- no need to save/restore flags.
*/
spin_lock(&priv->lock);
@@ -483,7 +483,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
/*
* We can only be called from ipoib_start_xmit, so we're
- * inside tx_lock -- no need to save/restore flags.
+ * inside dev->xmit_lock -- no need to save/restore flags.
*/
spin_lock(&priv->lock);
@@ -526,27 +526,11 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev,
spin_unlock(&priv->lock);
}
+/* Called with dev->xmit_lock held and IRQs disabled. */
static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct ipoib_dev_priv *priv = netdev_priv(dev);
struct ipoib_neigh *neigh;
- unsigned long flags;
-
- local_irq_save(flags);
- if (!spin_trylock(&priv->tx_lock)) {
- local_irq_restore(flags);
- return NETDEV_TX_LOCKED;
- }
-
- /*
- * Check if our queue is stopped. Since we have the LLTX bit
- * set, we can't rely on netif_stop_queue() preventing our
- * xmit function from being called with a full queue.
- */
- if (unlikely(netif_queue_stopped(dev))) {
- spin_unlock_irqrestore(&priv->tx_lock, flags);
- return NETDEV_TX_BUSY;
- }
if (skb->dst && skb->dst->neighbour) {
if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) {
@@ -601,7 +585,6 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev)
}
out:
- spin_unlock_irqrestore(&priv->tx_lock, flags);
return NETDEV_TX_OK;
}
@@ -797,7 +780,7 @@ static void ipoib_setup(struct net_device *dev)
dev->addr_len = INFINIBAND_ALEN;
dev->type = ARPHRD_INFINIBAND;
dev->tx_queue_len = IPOIB_TX_RING_SIZE * 2;
- dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX;
+ dev->features = NETIF_F_VLAN_CHALLENGED;
/* MTU will be reset when mcast join happens */
dev->mtu = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN;
@@ -812,7 +795,6 @@ static void ipoib_setup(struct net_device *dev)
priv->dev = dev;
spin_lock_init(&priv->lock);
- spin_lock_init(&priv->tx_lock);
init_MUTEX(&priv->mcast_mutex);
init_MUTEX(&priv->vlan_mutex);
diff --git a/drivers/net/e1000/e1000.h b/drivers/net/e1000/e1000.h
index 77db78960430..0843a7c9c624 100644
--- a/drivers/net/e1000/e1000.h
+++ b/drivers/net/e1000/e1000.h
@@ -209,7 +209,6 @@ struct e1000_adapter {
/* TX */
struct e1000_desc_ring tx_ring;
- spinlock_t tx_lock;
uint32_t txd_cmd;
uint32_t tx_int_delay;
uint32_t tx_abs_int_delay;
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index aa5ad41acf24..3966e55dcd9a 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -291,7 +291,9 @@ e1000_up(struct e1000_adapter *adapter)
e1000_phy_reset(&adapter->hw);
}
+ spin_lock_irq(&netdev->xmit_lock);
e1000_set_multi(netdev);
+ spin_unlock_irq(&netdev->xmit_lock);
e1000_restore_vlan(adapter);
@@ -520,9 +522,6 @@ e1000_probe(struct pci_dev *pdev,
if(pci_using_dac)
netdev->features |= NETIF_F_HIGHDMA;
- /* hard_start_xmit is safe against parallel locking */
- netdev->features |= NETIF_F_LLTX;
-
/* before reading the EEPROM, reset the controller to
* put the device in a known good starting state */
@@ -732,7 +731,6 @@ e1000_sw_init(struct e1000_adapter *adapter)
atomic_set(&adapter->irq_sem, 1);
spin_lock_init(&adapter->stats_lock);
- spin_lock_init(&adapter->tx_lock);
return 0;
}
@@ -1293,6 +1291,8 @@ e1000_set_mac(struct net_device *netdev, void *p)
* list or the network interface flags are updated. This routine is
* responsible for configuring the hardware for proper multicast,
* promiscuous mode, and all-multi behavior.
+ *
+ * Called with netdev->xmit_lock held and IRQs disabled.
**/
static void
@@ -1304,12 +1304,9 @@ e1000_set_multi(struct net_device *netdev)
uint32_t rctl;
uint32_t hash_value;
int i;
- unsigned long flags;
/* Check for Promiscuous and All Multicast modes */
- spin_lock_irqsave(&adapter->tx_lock, flags);
-
rctl = E1000_READ_REG(hw, RCTL);
if(netdev->flags & IFF_PROMISC) {
@@ -1358,8 +1355,6 @@ e1000_set_multi(struct net_device *netdev)
if(hw->mac_type == e1000_82542_rev2_0)
e1000_leave_82542_rst(adapter);
-
- spin_unlock_irqrestore(&adapter->tx_lock, flags);
}
/* Need to wait a few seconds after link up to get diagnostic information from
@@ -1786,6 +1781,8 @@ no_fifo_stall_required:
}
#define TXD_USE_COUNT(S, X) (((S) >> (X)) + 1 )
+
+/* Called with dev->xmit_lock held and interrupts disabled. */
static int
e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
{
@@ -1794,7 +1791,6 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
unsigned int max_txd_pwr = E1000_MAX_TXD_PWR;
unsigned int tx_flags = 0;
unsigned int len = skb->len;
- unsigned long flags;
unsigned int nr_frags = 0;
unsigned int mss = 0;
int count = 0;
@@ -1838,18 +1834,10 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
if(adapter->pcix_82544)
count += nr_frags;
- local_irq_save(flags);
- if (!spin_trylock(&adapter->tx_lock)) {
- /* Collision - tell upper layer to requeue */
- local_irq_restore(flags);
- return NETDEV_TX_LOCKED;
- }
-
/* need: count + 2 desc gap to keep tail from touching
* head, otherwise try next time */
if(unlikely(E1000_DESC_UNUSED(&adapter->tx_ring) < count + 2)) {
netif_stop_queue(netdev);
- spin_unlock_irqrestore(&adapter->tx_lock, flags);
return NETDEV_TX_BUSY;
}
@@ -1857,7 +1845,6 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
if(unlikely(e1000_82547_fifo_workaround(adapter, skb))) {
netif_stop_queue(netdev);
mod_timer(&adapter->tx_fifo_stall_timer, jiffies);
- spin_unlock_irqrestore(&adapter->tx_lock, flags);
return NETDEV_TX_BUSY;
}
}
@@ -1884,7 +1871,6 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
if(unlikely(E1000_DESC_UNUSED(&adapter->tx_ring) < MAX_SKB_FRAGS + 2))
netif_stop_queue(netdev);
- spin_unlock_irqrestore(&adapter->tx_lock, flags);
return NETDEV_TX_OK;
}
@@ -2234,13 +2220,13 @@ e1000_clean_tx_irq(struct e1000_adapter *adapter)
tx_ring->next_to_clean = i;
- spin_lock(&adapter->tx_lock);
+ spin_lock(&netdev->xmit_lock);
if(unlikely(cleaned && netif_queue_stopped(netdev) &&
netif_carrier_ok(netdev)))
netif_wake_queue(netdev);
- spin_unlock(&adapter->tx_lock);
+ spin_unlock(&netdev->xmit_lock);
return cleaned;
}
@@ -2819,7 +2805,10 @@ e1000_suspend(struct pci_dev *pdev, uint32_t state)
if(wufc) {
e1000_setup_rctl(adapter);
+
+ spin_lock_irq(&netdev->xmit_lock);
e1000_set_multi(netdev);
+ spin_unlock_irq(&netdev->xmit_lock);
/* turn on all-multi mode if wake on multicast is enabled */
if(adapter->wol & E1000_WUFC_MC) {
diff --git a/drivers/net/fc/iph5526_ip.h b/drivers/net/fc/iph5526_ip.h
index b54f727e140a..9fae3b002fec 100644
--- a/drivers/net/fc/iph5526_ip.h
+++ b/drivers/net/fc/iph5526_ip.h
@@ -18,7 +18,6 @@ static int iph5526_change_mtu(struct net_device *dev, int mtu);
static void rx_net_packet(struct fc_info *fi, u_char *buff_addr, int payload_size);
static void rx_net_mfs_packet(struct fc_info *fi, struct sk_buff *skb);
-unsigned short fc_type_trans(struct sk_buff *skb, struct net_device *dev);
static int tx_ip_packet(struct sk_buff *skb, unsigned long len, struct fc_info *fi);
static int tx_arp_packet(char *data, unsigned long len, struct fc_info *fi);
#endif
diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index 38e94961e1a4..c5cbe1bda9cb 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -835,9 +835,9 @@ static int gem_poll(struct net_device *dev, int *budget)
}
/* Run TX completion thread */
- spin_lock(&gp->tx_lock);
+ spin_lock(&dev->xmit_lock);
gem_tx(dev, gp, gp->status);
- spin_unlock(&gp->tx_lock);
+ spin_unlock(&dev->xmit_lock);
spin_unlock_irqrestore(&gp->lock, flags);
@@ -932,12 +932,12 @@ static void gem_tx_timeout(struct net_device *dev)
readl(gp->regs + MAC_RXCFG));
spin_lock_irq(&gp->lock);
- spin_lock(&gp->tx_lock);
+ spin_lock(&dev->xmit_lock);
gp->reset_task_pending = 2;
schedule_work(&gp->reset_task);
- spin_unlock(&gp->tx_lock);
+ spin_unlock(&dev->xmit_lock);
spin_unlock_irq(&gp->lock);
}
@@ -955,7 +955,6 @@ static int gem_start_xmit(struct sk_buff *skb, struct net_device *dev)
struct gem *gp = dev->priv;
int entry;
u64 ctrl;
- unsigned long flags;
ctrl = 0;
if (skb->ip_summed == CHECKSUM_HW) {
@@ -969,17 +968,9 @@ static int gem_start_xmit(struct sk_buff *skb, struct net_device *dev)
(csum_stuff_off << 21));
}
- local_irq_save(flags);
- if (!spin_trylock(&gp->tx_lock)) {
- /* Tell upper layer to requeue */
- local_irq_restore(flags);
- return NETDEV_TX_LOCKED;
- }
-
/* This is a hard error, log it. */
if (TX_BUFFS_AVAIL(gp) <= (skb_shinfo(skb)->nr_frags + 1)) {
netif_stop_queue(dev);
- spin_unlock_irqrestore(&gp->tx_lock, flags);
printk(KERN_ERR PFX "%s: BUG! Tx Ring full when queue awake!\n",
dev->name);
return NETDEV_TX_BUSY;
@@ -1066,7 +1057,6 @@ static int gem_start_xmit(struct sk_buff *skb, struct net_device *dev)
dev->name, entry, skb->len);
mb();
writel(gp->tx_new, gp->regs + TXDMA_KICK);
- spin_unlock_irqrestore(&gp->tx_lock, flags);
dev->trans_start = jiffies;
@@ -1097,11 +1087,11 @@ static int gem_change_mtu(struct net_device *dev, int new_mtu)
}
spin_lock_irq(&gp->lock);
- spin_lock(&gp->tx_lock);
+ spin_lock(&dev->xmit_lock);
dev->mtu = new_mtu;
gp->reset_task_pending = 1;
schedule_work(&gp->reset_task);
- spin_unlock(&gp->tx_lock);
+ spin_unlock(&dev->xmit_lock);
spin_unlock_irq(&gp->lock);
flush_scheduled_work();
@@ -1111,7 +1101,7 @@ static int gem_change_mtu(struct net_device *dev, int new_mtu)
#define STOP_TRIES 32
-/* Must be invoked under gp->lock and gp->tx_lock. */
+/* Must be invoked under gp->lock and dev->xmit_lock. */
static void gem_stop(struct gem *gp)
{
int limit;
@@ -1137,7 +1127,7 @@ static void gem_stop(struct gem *gp)
printk(KERN_ERR "%s: SW reset is ghetto.\n", gp->dev->name);
}
-/* Must be invoked under gp->lock and gp->tx_lock. */
+/* Must be invoked under gp->lock and dev->xmit_lock. */
static void gem_start_dma(struct gem *gp)
{
unsigned long val;
@@ -1162,7 +1152,7 @@ static void gem_start_dma(struct gem *gp)
}
-/* Must be invoked under gp->lock and gp->tx_lock. */
+/* Must be invoked under gp->lock and dev->xmit_lock. */
// XXX dbl check what that function should do when called on PCS PHY
static void gem_begin_auto_negotiation(struct gem *gp, struct ethtool_cmd *ep)
{
@@ -1249,7 +1239,7 @@ non_mii:
/* A link-up condition has occurred, initialize and enable the
* rest of the chip.
*
- * Must be invoked under gp->lock and gp->tx_lock.
+ * Must be invoked under gp->lock and dev->xmit_lock.
*/
static int gem_set_link_modes(struct gem *gp)
{
@@ -1356,7 +1346,7 @@ static int gem_set_link_modes(struct gem *gp)
return 0;
}
-/* Must be invoked under gp->lock and gp->tx_lock. */
+/* Must be invoked under gp->lock and dev->xmit_lock. */
static int gem_mdio_link_not_up(struct gem *gp)
{
switch (gp->lstate) {
@@ -1414,7 +1404,7 @@ static void gem_reset_task(void *data)
netif_poll_disable(gp->dev);
spin_lock_irq(&gp->lock);
- spin_lock(&gp->tx_lock);
+ spin_lock(&gp->dev->xmit_lock);
if (gp->hw_running && gp->opened) {
netif_stop_queue(gp->dev);
@@ -1430,7 +1420,7 @@ static void gem_reset_task(void *data)
}
gp->reset_task_pending = 0;
- spin_unlock(&gp->tx_lock);
+ spin_unlock(&gp->dev->xmit_lock);
spin_unlock_irq(&gp->lock);
netif_poll_enable(gp->dev);
}
@@ -1444,7 +1434,7 @@ static void gem_link_timer(unsigned long data)
return;
spin_lock_irq(&gp->lock);
- spin_lock(&gp->tx_lock);
+ spin_lock(&gp->dev->xmit_lock);
/* If the link of task is still pending, we just
* reschedule the link timer
@@ -1514,11 +1504,11 @@ static void gem_link_timer(unsigned long data)
restart:
mod_timer(&gp->link_timer, jiffies + ((12 * HZ) / 10));
out_unlock:
- spin_unlock(&gp->tx_lock);
+ spin_unlock(&gp->dev->xmit_lock);
spin_unlock_irq(&gp->lock);
}
-/* Must be invoked under gp->lock and gp->tx_lock. */
+/* Must be invoked under gp->lock and dev->xmit_lock. */
static void gem_clean_rings(struct gem *gp)
{
struct gem_init_block *gb = gp->init_block;
@@ -1569,7 +1559,7 @@ static void gem_clean_rings(struct gem *gp)
}
}
-/* Must be invoked under gp->lock and gp->tx_lock. */
+/* Must be invoked under gp->lock and dev->xmit_lock. */
static void gem_init_rings(struct gem *gp)
{
struct gem_init_block *gb = gp->init_block;
@@ -1619,7 +1609,7 @@ static void gem_init_rings(struct gem *gp)
wmb();
}
-/* Must be invoked under gp->lock and gp->tx_lock. */
+/* Must be invoked under gp->lock and dev->xmit_lock. */
static void gem_init_phy(struct gem *gp)
{
u32 mifcfg;
@@ -1757,7 +1747,7 @@ static void gem_init_phy(struct gem *gp)
}
}
-/* Must be invoked under gp->lock and gp->tx_lock. */
+/* Must be invoked under gp->lock and dev->xmit_lock. */
static void gem_init_dma(struct gem *gp)
{
u64 desc_dma = (u64) gp->gblock_dvma;
@@ -1795,7 +1785,7 @@ static void gem_init_dma(struct gem *gp)
gp->regs + RXDMA_BLANK);
}
-/* Must be invoked under gp->lock and gp->tx_lock. */
+/* Must be invoked under dev->xmit_lock. */
static u32
gem_setup_multicast(struct gem *gp)
{
@@ -1838,7 +1828,7 @@ gem_setup_multicast(struct gem *gp)
return rxcfg;
}
-/* Must be invoked under gp->lock and gp->tx_lock. */
+/* Must be invoked under gp->lock and dev->xmit_lock. */
static void gem_init_mac(struct gem *gp)
{
unsigned char *e = &gp->dev->dev_addr[0];
@@ -1916,7 +1906,7 @@ static void gem_init_mac(struct gem *gp)
writel(0xffffffff, gp->regs + MAC_MCMASK);
}
-/* Must be invoked under gp->lock and gp->tx_lock. */
+/* Must be invoked under gp->lock and dev->xmit_lock. */
static void gem_init_pause_thresholds(struct gem *gp)
{
u32 cfg;
@@ -2052,7 +2042,7 @@ static int gem_check_invariants(struct gem *gp)
return 0;
}
-/* Must be invoked under gp->lock and gp->tx_lock. */
+/* Must be invoked under gp->lock and dev->xmit_lock. */
static void gem_init_hw(struct gem *gp, int restart_link)
{
/* On Apple's gmac, I initialize the PHY only after
@@ -2150,11 +2140,11 @@ static void gem_stop_phy(struct gem *gp)
if (!gp->wake_on_lan) {
spin_lock_irqsave(&gp->lock, flags);
- spin_lock(&gp->tx_lock);
+ spin_lock(&gp->dev->xmit_lock);
gem_stop(gp);
writel(MAC_TXRST_CMD, gp->regs + MAC_TXRST);
writel(MAC_RXRST_CMD, gp->regs + MAC_RXRST);
- spin_unlock(&gp->tx_lock);
+ spin_unlock(&gp->dev->xmit_lock);
spin_unlock_irqrestore(&gp->lock, flags);
}
@@ -2202,9 +2192,9 @@ static void gem_shutdown(struct gem *gp)
unsigned long flags;
spin_lock_irqsave(&gp->lock, flags);
- spin_lock(&gp->tx_lock);
+ spin_lock(&gp->dev->xmit_lock);
gem_stop(gp);
- spin_unlock(&gp->tx_lock);
+ spin_unlock(&gp->dev->xmit_lock);
spin_unlock_irqrestore(&gp->lock, flags);
}
}
@@ -2265,9 +2255,9 @@ static int gem_open(struct net_device *dev)
/* Reset the chip */
spin_lock_irq(&gp->lock);
- spin_lock(&gp->tx_lock);
+ spin_lock(&gp->dev->xmit_lock);
gem_stop(gp);
- spin_unlock(&gp->tx_lock);
+ spin_unlock(&gp->dev->xmit_lock);
spin_unlock_irq(&gp->lock);
gp->hw_running = 1;
@@ -2281,7 +2271,7 @@ static int gem_open(struct net_device *dev)
printk(KERN_ERR "%s: failed to request irq !\n", gp->dev->name);
spin_lock_irq(&gp->lock);
- spin_lock(&gp->tx_lock);
+ spin_lock(&gp->dev->xmit_lock);
#ifdef CONFIG_PPC_PMAC
if (!hw_was_up && gp->pdev->vendor == PCI_VENDOR_ID_APPLE)
gem_apple_powerdown(gp);
@@ -2290,14 +2280,14 @@ static int gem_open(struct net_device *dev)
gp->pm_timer.expires = jiffies + 10*HZ;
add_timer(&gp->pm_timer);
up(&gp->pm_sem);
- spin_unlock(&gp->tx_lock);
+ spin_unlock(&gp->dev->xmit_lock);
spin_unlock_irq(&gp->lock);
return -EAGAIN;
}
spin_lock_irq(&gp->lock);
- spin_lock(&gp->tx_lock);
+ spin_lock(&gp->dev->xmit_lock);
/* Allocate & setup ring buffers */
gem_init_rings(gp);
@@ -2307,7 +2297,7 @@ static int gem_open(struct net_device *dev)
gp->opened = 1;
- spin_unlock(&gp->tx_lock);
+ spin_unlock(&gp->dev->xmit_lock);
spin_unlock_irq(&gp->lock);
up(&gp->pm_sem);
@@ -2328,7 +2318,7 @@ static int gem_close(struct net_device *dev)
/* Stop traffic, mark us closed */
spin_lock_irq(&gp->lock);
- spin_lock(&gp->tx_lock);
+ spin_lock(&gp->dev->xmit_lock);
gp->opened = 0;
@@ -2343,7 +2333,7 @@ static int gem_close(struct net_device *dev)
/* Bye, the pm timer will finish the job */
free_irq(gp->pdev->irq, (void *) dev);
- spin_unlock(&gp->tx_lock);
+ spin_unlock(&gp->dev->xmit_lock);
spin_unlock_irq(&gp->lock);
/* Fire the PM timer that will shut us down in about 10 seconds */
@@ -2374,7 +2364,7 @@ static int gem_suspend(struct pci_dev *pdev, u32 state)
/* If the driver is opened, we stop the DMA */
if (gp->opened) {
spin_lock_irq(&gp->lock);
- spin_lock(&gp->tx_lock);
+ spin_lock(&gp->dev->xmit_lock);
/* Stop traffic, mark us closed */
netif_device_detach(dev);
@@ -2385,7 +2375,7 @@ static int gem_suspend(struct pci_dev *pdev, u32 state)
/* Get rid of ring buffers */
gem_clean_rings(gp);
- spin_unlock(&gp->tx_lock);
+ spin_unlock(&gp->dev->xmit_lock);
spin_unlock_irq(&gp->lock);
if (gp->pdev->vendor == PCI_VENDOR_ID_APPLE)
@@ -2419,14 +2409,14 @@ static int gem_resume(struct pci_dev *pdev)
}
#endif /* CONFIG_PPC_PMAC */
spin_lock_irq(&gp->lock);
- spin_lock(&gp->tx_lock);
+ spin_lock(&gp->dev->xmit_lock);
gem_stop(gp);
gp->hw_running = 1;
gem_init_rings(gp);
gem_init_hw(gp, 1);
- spin_unlock(&gp->tx_lock);
+ spin_unlock(&gp->dev->xmit_lock);
spin_unlock_irq(&gp->lock);
netif_device_attach(dev);
@@ -2447,7 +2437,7 @@ static struct net_device_stats *gem_get_stats(struct net_device *dev)
struct net_device_stats *stats = &gp->net_stats;
spin_lock_irq(&gp->lock);
- spin_lock(&gp->tx_lock);
+ spin_lock(&dev->xmit_lock);
if (gp->hw_running) {
stats->rx_crc_errors += readl(gp->regs + MAC_FCSERR);
@@ -2467,12 +2457,13 @@ static struct net_device_stats *gem_get_stats(struct net_device *dev)
writel(0, gp->regs + MAC_LCOLL);
}
- spin_unlock(&gp->tx_lock);
+ spin_unlock(&dev->xmit_lock);
spin_unlock_irq(&gp->lock);
return &gp->net_stats;
}
+/* Called with dev->xmit_lock held and IRQs disabled. */
static void gem_set_multicast(struct net_device *dev)
{
struct gem *gp = dev->priv;
@@ -2482,9 +2473,6 @@ static void gem_set_multicast(struct net_device *dev)
if (!gp->hw_running)
return;
- spin_lock_irq(&gp->lock);
- spin_lock(&gp->tx_lock);
-
netif_stop_queue(dev);
rxcfg = readl(gp->regs + MAC_RXCFG);
@@ -2507,9 +2495,6 @@ static void gem_set_multicast(struct net_device *dev)
writel(rxcfg, gp->regs + MAC_RXCFG);
netif_wake_queue(dev);
-
- spin_unlock(&gp->tx_lock);
- spin_unlock_irq(&gp->lock);
}
static void gem_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
@@ -2540,7 +2525,7 @@ static int gem_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
/* Return current PHY settings */
spin_lock_irq(&gp->lock);
- spin_lock(&gp->tx_lock);
+ spin_lock(&dev->xmit_lock);
cmd->autoneg = gp->want_autoneg;
cmd->speed = gp->phy_mii.speed;
cmd->duplex = gp->phy_mii.duplex;
@@ -2552,7 +2537,7 @@ static int gem_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
*/
if (cmd->advertising == 0)
cmd->advertising = cmd->supported;
- spin_unlock(&gp->tx_lock);
+ spin_unlock(&dev->xmit_lock);
spin_unlock_irq(&gp->lock);
} else { // XXX PCS ?
cmd->supported =
@@ -2592,9 +2577,9 @@ static int gem_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
/* Apply settings and restart link process. */
spin_lock_irq(&gp->lock);
- spin_lock(&gp->tx_lock);
+ spin_lock(&dev->xmit_lock);
gem_begin_auto_negotiation(gp, cmd);
- spin_unlock(&gp->tx_lock);
+ spin_unlock(&dev->xmit_lock);
spin_unlock_irq(&gp->lock);
return 0;
@@ -2609,9 +2594,9 @@ static int gem_nway_reset(struct net_device *dev)
/* Restart link process. */
spin_lock_irq(&gp->lock);
- spin_lock(&gp->tx_lock);
+ spin_lock(&dev->xmit_lock);
gem_begin_auto_negotiation(gp, NULL);
- spin_unlock(&gp->tx_lock);
+ spin_unlock(&dev->xmit_lock);
spin_unlock_irq(&gp->lock);
return 0;
@@ -2863,7 +2848,6 @@ static int __devinit gem_init_one(struct pci_dev *pdev,
gp->msg_enable = DEFAULT_MSG;
spin_lock_init(&gp->lock);
- spin_lock_init(&gp->tx_lock);
init_MUTEX(&gp->pm_sem);
init_timer(&gp->link_timer);
@@ -2899,9 +2883,9 @@ static int __devinit gem_init_one(struct pci_dev *pdev,
gem_apple_powerup(gp);
#endif
spin_lock_irq(&gp->lock);
- spin_lock(&gp->tx_lock);
+ spin_lock(&dev->xmit_lock);
gem_stop(gp);
- spin_unlock(&gp->tx_lock);
+ spin_unlock(&dev->xmit_lock);
spin_unlock_irq(&gp->lock);
/* Fill up the mii_phy structure (even if we won't use it) */
@@ -2967,11 +2951,11 @@ static int __devinit gem_init_one(struct pci_dev *pdev,
/* Detect & init PHY, start autoneg */
spin_lock_irq(&gp->lock);
- spin_lock(&gp->tx_lock);
+ spin_lock(&dev->xmit_lock);
gp->hw_running = 1;
gem_init_phy(gp);
gem_begin_auto_negotiation(gp, NULL);
- spin_unlock(&gp->tx_lock);
+ spin_unlock(&dev->xmit_lock);
spin_unlock_irq(&gp->lock);
if (gp->phy_type == phy_mii_mdio0 ||
@@ -2982,7 +2966,7 @@ static int __devinit gem_init_one(struct pci_dev *pdev,
pci_set_drvdata(pdev, dev);
/* GEM can do it all... */
- dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_LLTX;
+ dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
if (pci_using_dac)
dev->features |= NETIF_F_HIGHDMA;
diff --git a/drivers/net/sungem.h b/drivers/net/sungem.h
index 00343226fb71..8bbc104d848f 100644
--- a/drivers/net/sungem.h
+++ b/drivers/net/sungem.h
@@ -953,7 +953,6 @@ enum link_state {
struct gem {
spinlock_t lock;
- spinlock_t tx_lock;
void __iomem *regs;
int rx_new, rx_old;
int tx_new, tx_old;
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 8a165aca7542..2088143716af 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -60,8 +60,8 @@
#define DRV_MODULE_NAME "tg3"
#define PFX DRV_MODULE_NAME ": "
-#define DRV_MODULE_VERSION "3.15"
-#define DRV_MODULE_RELDATE "January 6, 2005"
+#define DRV_MODULE_VERSION "3.16"
+#define DRV_MODULE_RELDATE "January 17, 2005"
#define TG3_DEF_MAC_MODE 0
#define TG3_DEF_RX_MODE 0
@@ -2706,7 +2706,11 @@ static int tg3_rx(struct tg3 *tp, int budget)
len = ((desc->idx_len & RXD_LEN_MASK) >> RXD_LEN_SHIFT) - 4; /* omit crc */
- if (len > RX_COPY_THRESHOLD) {
+ if (len > RX_COPY_THRESHOLD
+ && tp->rx_offset == 2
+ /* rx_offset != 2 iff this is a 5701 card running
+ * in PCI-X mode [see tg3_get_invariants()] */
+ ) {
int skb_size;
skb_size = tg3_alloc_rx_skb(tp, opaque_key,
@@ -2812,9 +2816,9 @@ static int tg3_poll(struct net_device *netdev, int *budget)
/* run TX completion thread */
if (sblk->idx[0].tx_consumer != tp->tx_cons) {
- spin_lock(&tp->tx_lock);
+ spin_lock(&netdev->xmit_lock);
tg3_tx(tp);
- spin_unlock(&tp->tx_lock);
+ spin_unlock(&netdev->xmit_lock);
}
spin_unlock_irqrestore(&tp->lock, flags);
@@ -2935,7 +2939,7 @@ static void tg3_reset_task(void *_data)
tg3_netif_stop(tp);
spin_lock_irq(&tp->lock);
- spin_lock(&tp->tx_lock);
+ spin_lock(&tp->dev->xmit_lock);
restart_timer = tp->tg3_flags2 & TG3_FLG2_RESTART_TIMER;
tp->tg3_flags2 &= ~TG3_FLG2_RESTART_TIMER;
@@ -2945,7 +2949,7 @@ static void tg3_reset_task(void *_data)
tg3_netif_start(tp);
- spin_unlock(&tp->tx_lock);
+ spin_unlock(&tp->dev->xmit_lock);
spin_unlock_irq(&tp->lock);
if (restart_timer)
@@ -3044,6 +3048,7 @@ static inline int tg3_4g_overflow_test(dma_addr_t mapping, int len)
(base + len + 8 < base));
}
+/* dev->xmit_lock is held and IRQs are disabled. */
static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct tg3 *tp = netdev_priv(dev);
@@ -3051,39 +3056,12 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
unsigned int i;
u32 len, entry, base_flags, mss;
int would_hit_hwbug;
- unsigned long flags;
len = skb_headlen(skb);
- /* No BH disabling for tx_lock here. We are running in BH disabled
- * context and TX reclaim runs via tp->poll inside of a software
- * interrupt. Rejoice!
- *
- * Actually, things are not so simple. If we are to take a hw
- * IRQ here, we can deadlock, consider:
- *
- * CPU1 CPU2
- * tg3_start_xmit
- * take tp->tx_lock
- * tg3_timer
- * take tp->lock
- * tg3_interrupt
- * spin on tp->lock
- * spin on tp->tx_lock
- *
- * So we really do need to disable interrupts when taking
- * tx_lock here.
- */
- local_irq_save(flags);
- if (!spin_trylock(&tp->tx_lock)) {
- local_irq_restore(flags);
- return NETDEV_TX_LOCKED;
- }
-
/* This is a hard error, log it. */
if (unlikely(TX_BUFFS_AVAIL(tp) <= (skb_shinfo(skb)->nr_frags + 1))) {
netif_stop_queue(dev);
- spin_unlock_irqrestore(&tp->tx_lock, flags);
printk(KERN_ERR PFX "%s: BUG! Tx Ring full when queue awake!\n",
dev->name);
return NETDEV_TX_BUSY;
@@ -3220,7 +3198,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
entry, len,
last_plus_one,
&start, mss))
- goto out_unlock;
+ goto out;
entry = start;
}
@@ -3232,9 +3210,8 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
if (TX_BUFFS_AVAIL(tp) <= (MAX_SKB_FRAGS + 1))
netif_stop_queue(dev);
-out_unlock:
+out:
mmiowb();
- spin_unlock_irqrestore(&tp->tx_lock, flags);
dev->trans_start = jiffies;
@@ -3269,7 +3246,7 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu)
tg3_netif_stop(tp);
spin_lock_irq(&tp->lock);
- spin_lock(&tp->tx_lock);
+ spin_lock(&dev->xmit_lock);
tg3_halt(tp);
@@ -3279,7 +3256,7 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu)
tg3_netif_start(tp);
- spin_unlock(&tp->tx_lock);
+ spin_unlock(&dev->xmit_lock);
spin_unlock_irq(&tp->lock);
return 0;
@@ -5570,7 +5547,7 @@ static void tg3_timer(unsigned long __opaque)
unsigned long flags;
spin_lock_irqsave(&tp->lock, flags);
- spin_lock(&tp->tx_lock);
+ spin_lock(&tp->dev->xmit_lock);
/* All of this garbage is because when using non-tagged
* IRQ status the mailbox/status_block protocol the chip
@@ -5586,7 +5563,7 @@ static void tg3_timer(unsigned long __opaque)
if (!(tr32(WDMAC_MODE) & WDMAC_MODE_ENABLE)) {
tp->tg3_flags2 |= TG3_FLG2_RESTART_TIMER;
- spin_unlock(&tp->tx_lock);
+ spin_unlock(&tp->dev->xmit_lock);
spin_unlock_irqrestore(&tp->lock, flags);
schedule_work(&tp->reset_task);
return;
@@ -5655,7 +5632,7 @@ static void tg3_timer(unsigned long __opaque)
tp->asf_counter = tp->asf_multiplier;
}
- spin_unlock(&tp->tx_lock);
+ spin_unlock(&tp->dev->xmit_lock);
spin_unlock_irqrestore(&tp->lock, flags);
tp->timer.expires = jiffies + tp->timer_offset;
@@ -5668,12 +5645,12 @@ static int tg3_open(struct net_device *dev)
int err;
spin_lock_irq(&tp->lock);
- spin_lock(&tp->tx_lock);
+ spin_lock(&dev->xmit_lock);
tg3_disable_ints(tp);
tp->tg3_flags &= ~TG3_FLAG_INIT_COMPLETE;
- spin_unlock(&tp->tx_lock);
+ spin_unlock(&dev->xmit_lock);
spin_unlock_irq(&tp->lock);
/* The placement of this call is tied
@@ -5692,7 +5669,7 @@ static int tg3_open(struct net_device *dev)
}
spin_lock_irq(&tp->lock);
- spin_lock(&tp->tx_lock);
+ spin_lock(&dev->xmit_lock);
err = tg3_init_hw(tp);
if (err) {
@@ -5712,7 +5689,7 @@ static int tg3_open(struct net_device *dev)
tp->tg3_flags |= TG3_FLAG_INIT_COMPLETE;
}
- spin_unlock(&tp->tx_lock);
+ spin_unlock(&dev->xmit_lock);
spin_unlock_irq(&tp->lock);
if (err) {
@@ -5722,11 +5699,11 @@ static int tg3_open(struct net_device *dev)
}
spin_lock_irq(&tp->lock);
- spin_lock(&tp->tx_lock);
+ spin_lock(&dev->xmit_lock);
tg3_enable_ints(tp);
- spin_unlock(&tp->tx_lock);
+ spin_unlock(&dev->xmit_lock);
spin_unlock_irq(&tp->lock);
netif_start_queue(dev);
@@ -5974,7 +5951,7 @@ static int tg3_close(struct net_device *dev)
del_timer_sync(&tp->timer);
spin_lock_irq(&tp->lock);
- spin_lock(&tp->tx_lock);
+ spin_lock(&dev->xmit_lock);
#if 0
tg3_dump_state(tp);
#endif
@@ -5988,7 +5965,7 @@ static int tg3_close(struct net_device *dev)
TG3_FLAG_GOT_SERDES_FLOWCTL);
netif_carrier_off(tp->dev);
- spin_unlock(&tp->tx_lock);
+ spin_unlock(&dev->xmit_lock);
spin_unlock_irq(&tp->lock);
free_irq(dev->irq, dev);
@@ -6287,15 +6264,10 @@ static void __tg3_set_rx_mode(struct net_device *dev)
}
}
+/* Called with dev->xmit_lock held and IRQs disabled. */
static void tg3_set_rx_mode(struct net_device *dev)
{
- struct tg3 *tp = netdev_priv(dev);
-
- spin_lock_irq(&tp->lock);
- spin_lock(&tp->tx_lock);
__tg3_set_rx_mode(dev);
- spin_unlock(&tp->tx_lock);
- spin_unlock_irq(&tp->lock);
}
#define TG3_REGDUMP_LEN (32 * 1024)
@@ -6318,7 +6290,7 @@ static void tg3_get_regs(struct net_device *dev,
memset(p, 0, TG3_REGDUMP_LEN);
spin_lock_irq(&tp->lock);
- spin_lock(&tp->tx_lock);
+ spin_lock(&dev->xmit_lock);
#define __GET_REG32(reg) (*(p)++ = tr32(reg))
#define GET_REG32_LOOP(base,len) \
@@ -6368,7 +6340,7 @@ do { p = (u32 *)(orig_p + (reg)); \
#undef GET_REG32_LOOP
#undef GET_REG32_1
- spin_unlock(&tp->tx_lock);
+ spin_unlock(&dev->xmit_lock);
spin_unlock_irq(&tp->lock);
}
@@ -6492,7 +6464,7 @@ static int tg3_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
}
spin_lock_irq(&tp->lock);
- spin_lock(&tp->tx_lock);
+ spin_lock(&dev->xmit_lock);
tp->link_config.autoneg = cmd->autoneg;
if (cmd->autoneg == AUTONEG_ENABLE) {
@@ -6506,7 +6478,7 @@ static int tg3_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
}
tg3_setup_phy(tp, 1);
- spin_unlock(&tp->tx_lock);
+ spin_unlock(&dev->xmit_lock);
spin_unlock_irq(&tp->lock);
return 0;
@@ -6623,7 +6595,7 @@ static int tg3_set_ringparam(struct net_device *dev, struct ethtool_ringparam *e
tg3_netif_stop(tp);
spin_lock_irq(&tp->lock);
- spin_lock(&tp->tx_lock);
+ spin_lock(&dev->xmit_lock);
tp->rx_pending = ering->rx_pending;
@@ -6636,7 +6608,7 @@ static int tg3_set_ringparam(struct net_device *dev, struct ethtool_ringparam *e
tg3_halt(tp);
tg3_init_hw(tp);
tg3_netif_start(tp);
- spin_unlock(&tp->tx_lock);
+ spin_unlock(&dev->xmit_lock);
spin_unlock_irq(&tp->lock);
return 0;
@@ -6657,7 +6629,7 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam
tg3_netif_stop(tp);
spin_lock_irq(&tp->lock);
- spin_lock(&tp->tx_lock);
+ spin_lock(&dev->xmit_lock);
if (epause->autoneg)
tp->tg3_flags |= TG3_FLAG_PAUSE_AUTONEG;
else
@@ -6673,7 +6645,7 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam
tg3_halt(tp);
tg3_init_hw(tp);
tg3_netif_start(tp);
- spin_unlock(&tp->tx_lock);
+ spin_unlock(&dev->xmit_lock);
spin_unlock_irq(&tp->lock);
return 0;
@@ -6799,14 +6771,14 @@ static void tg3_vlan_rx_register(struct net_device *dev, struct vlan_group *grp)
struct tg3 *tp = netdev_priv(dev);
spin_lock_irq(&tp->lock);
- spin_lock(&tp->tx_lock);
+ spin_lock(&dev->xmit_lock);
tp->vlgrp = grp;
/* Update RX_MODE_KEEP_VLAN_TAG bit in RX_MODE register. */
__tg3_set_rx_mode(dev);
- spin_unlock(&tp->tx_lock);
+ spin_unlock(&dev->xmit_lock);
spin_unlock_irq(&tp->lock);
}
@@ -6815,10 +6787,10 @@ static void tg3_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid)
struct tg3 *tp = netdev_priv(dev);
spin_lock_irq(&tp->lock);
- spin_lock(&tp->tx_lock);
+ spin_lock(&dev->xmit_lock);
if (tp->vlgrp)
tp->vlgrp->vlan_devices[vid] = NULL;
- spin_unlock(&tp->tx_lock);
+ spin_unlock(&dev->xmit_lock);
spin_unlock_irq(&tp->lock);
}
#endif
@@ -8237,7 +8209,6 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
if (pci_using_dac)
dev->features |= NETIF_F_HIGHDMA;
- dev->features |= NETIF_F_LLTX;
#if TG3_VLAN_TAG_USED
dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX;
dev->vlan_rx_register = tg3_vlan_rx_register;
@@ -8279,7 +8250,6 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
tp->grc_mode |= GRC_MODE_BSWAP_NONFRM_DATA;
#endif
spin_lock_init(&tp->lock);
- spin_lock_init(&tp->tx_lock);
spin_lock_init(&tp->indirect_lock);
INIT_WORK(&tp->reset_task, tg3_reset_task, tp);
@@ -8492,23 +8462,23 @@ static int tg3_suspend(struct pci_dev *pdev, u32 state)
del_timer_sync(&tp->timer);
spin_lock_irq(&tp->lock);
- spin_lock(&tp->tx_lock);
+ spin_lock(&dev->xmit_lock);
tg3_disable_ints(tp);
- spin_unlock(&tp->tx_lock);
+ spin_unlock(&dev->xmit_lock);
spin_unlock_irq(&tp->lock);
netif_device_detach(dev);
spin_lock_irq(&tp->lock);
- spin_lock(&tp->tx_lock);
+ spin_lock(&dev->xmit_lock);
tg3_halt(tp);
- spin_unlock(&tp->tx_lock);
+ spin_unlock(&dev->xmit_lock);
spin_unlock_irq(&tp->lock);
err = tg3_set_power_state(tp, state);
if (err) {
spin_lock_irq(&tp->lock);
- spin_lock(&tp->tx_lock);
+ spin_lock(&dev->xmit_lock);
tg3_init_hw(tp);
@@ -8518,7 +8488,7 @@ static int tg3_suspend(struct pci_dev *pdev, u32 state)
netif_device_attach(dev);
tg3_netif_start(tp);
- spin_unlock(&tp->tx_lock);
+ spin_unlock(&dev->xmit_lock);
spin_unlock_irq(&tp->lock);
}
@@ -8543,7 +8513,7 @@ static int tg3_resume(struct pci_dev *pdev)
netif_device_attach(dev);
spin_lock_irq(&tp->lock);
- spin_lock(&tp->tx_lock);
+ spin_lock(&dev->xmit_lock);
tg3_init_hw(tp);
@@ -8554,7 +8524,7 @@ static int tg3_resume(struct pci_dev *pdev)
tg3_netif_start(tp);
- spin_unlock(&tp->tx_lock);
+ spin_unlock(&dev->xmit_lock);
spin_unlock_irq(&tp->lock);
return 0;
diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h
index 3b22f53d2579..68b7520784ea 100644
--- a/drivers/net/tg3.h
+++ b/drivers/net/tg3.h
@@ -1980,12 +1980,11 @@ struct tg3 {
* lock: Held during all operations except TX packet
* processing.
*
- * tx_lock: Held during tg3_start_xmit{,_4gbug} and tg3_tx
+ * dev->xmit_lock: Held during tg3_start_xmit and tg3_tx
*
* If you want to shut up all asynchronous processing you must
- * acquire both locks, 'lock' taken before 'tx_lock'. IRQs must
- * be disabled to take 'lock' but only softirq disabling is
- * necessary for acquisition of 'tx_lock'.
+ * acquire both locks, 'lock' taken before 'xmit_lock'. IRQs must
+ * be disabled to take either lock.
*/
spinlock_t lock;
spinlock_t indirect_lock;
@@ -2004,8 +2003,6 @@ struct tg3 {
u32 tx_cons;
u32 tx_pending;
- spinlock_t tx_lock;
-
struct tg3_tx_buffer_desc *tx_ring;
struct tx_ring_info *tx_buffers;
dma_addr_t tx_desc_mapping;
diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c
index 6a10cc610ad4..a84ff2f17599 100644
--- a/drivers/s390/net/qeth_main.c
+++ b/drivers/s390/net/qeth_main.c
@@ -5033,27 +5033,6 @@ qeth_neigh_setup(struct net_device *dev, struct neigh_parms *np)
return 0;
}
-#ifdef CONFIG_QETH_IPV6
-int
-qeth_ipv6_generate_eui64(u8 * eui, struct net_device *dev)
-{
- switch (dev->type) {
- case ARPHRD_ETHER:
- case ARPHRD_FDDI:
- case ARPHRD_IEEE802_TR:
- if (dev->addr_len != ETH_ALEN)
- return -1;
- memcpy(eui, dev->dev_addr, 3);
- memcpy(eui + 5, dev->dev_addr + 3, 3);
- eui[3] = (dev->dev_id >> 8) & 0xff;
- eui[4] = dev->dev_id & 0xff;
- return 0;
- }
- return -1;
-
-}
-#endif
-
static void
qeth_get_mac_for_ipm(__u32 ipm, char *mac, struct net_device *dev)
{
@@ -5587,11 +5566,8 @@ qeth_netdev_init(struct net_device *dev)
}
#ifdef CONFIG_QETH_IPV6
/*IPv6 address autoconfiguration stuff*/
- card->dev->dev_id = card->info.unique_id & 0xffff;
if (!(card->info.unique_id & UNIQUE_ID_NOT_BY_CARD))
- card->dev->generate_eui64 = qeth_ipv6_generate_eui64;
-
-
+ card->dev->dev_id = card->info.unique_id & 0xffff;
#endif
dev->hard_header_parse = NULL;
dev->set_mac_address = qeth_layer2_set_mac_address;
diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h
index 87a901c75370..bc701ddb88e4 100644
--- a/include/linux/atmdev.h
+++ b/include/linux/atmdev.h
@@ -313,7 +313,7 @@ struct atm_vcc {
struct atm_dev_addr {
struct sockaddr_atmsvc addr; /* ATM address */
- struct atm_dev_addr *next; /* next address */
+ struct list_head entry; /* next address */
};
struct atm_dev {
@@ -325,7 +325,7 @@ struct atm_dev {
void *dev_data; /* per-device data */
void *phy_data; /* private PHY date */
unsigned long flags; /* device flags (ATM_DF_*) */
- struct atm_dev_addr *local; /* local ATM addresses */
+ struct list_head local; /* local ATM addresses */
unsigned char esi[ESI_LEN]; /* ESI ("MAC" addr) */
struct atm_cirange ci_range; /* VPI/VCI range */
struct k_atm_dev_stats stats; /* statistics */
diff --git a/include/linux/fcdevice.h b/include/linux/fcdevice.h
index daccf6c444d0..e42fc78f679e 100644
--- a/include/linux/fcdevice.h
+++ b/include/linux/fcdevice.h
@@ -27,10 +27,6 @@
#include <linux/if_fc.h>
#ifdef __KERNEL__
-extern int fc_header(struct sk_buff *skb, struct net_device *dev,
- unsigned short type, void *daddr,
- void *saddr, unsigned len);
-extern int fc_rebuild_header(struct sk_buff *skb);
extern unsigned short fc_type_trans(struct sk_buff *skb, struct net_device *dev);
extern struct net_device *alloc_fcdev(int sizeof_priv);
diff --git a/include/linux/fddidevice.h b/include/linux/fddidevice.h
index b6c55dad37a4..2e5ee47f3e1e 100644
--- a/include/linux/fddidevice.h
+++ b/include/linux/fddidevice.h
@@ -25,13 +25,6 @@
#include <linux/if_fddi.h>
#ifdef __KERNEL__
-extern int fddi_header(struct sk_buff *skb,
- struct net_device *dev,
- unsigned short type,
- void *daddr,
- void *saddr,
- unsigned len);
-extern int fddi_rebuild_header(struct sk_buff *skb);
extern unsigned short fddi_type_trans(struct sk_buff *skb,
struct net_device *dev);
extern struct net_device *alloc_fddidev(int sizeof_priv);
diff --git a/include/linux/hippidevice.h b/include/linux/hippidevice.h
index 9d594ddfe5f4..89b3a4a5b761 100644
--- a/include/linux/hippidevice.h
+++ b/include/linux/hippidevice.h
@@ -26,30 +26,9 @@
#include <linux/if_hippi.h>
#ifdef __KERNEL__
-extern int hippi_header(struct sk_buff *skb,
- struct net_device *dev,
- unsigned short type,
- void *daddr,
- void *saddr,
- unsigned len);
-
-extern int hippi_rebuild_header(struct sk_buff *skb);
-
extern unsigned short hippi_type_trans(struct sk_buff *skb,
struct net_device *dev);
-extern void hippi_header_cache_bind(struct hh_cache ** hhp,
- struct net_device *dev,
- unsigned short htype,
- __u32 daddr);
-
-extern void hippi_header_cache_update(struct hh_cache *hh,
- struct net_device *dev,
- unsigned char * haddr);
-extern int hippi_header_parse(struct sk_buff *skb, unsigned char *haddr);
-
-extern void hippi_net_init(void);
-
extern struct net_device *alloc_hippi_dev(int sizeof_priv);
#endif
diff --git a/include/linux/ip.h b/include/linux/ip.h
index 3fe93474047d..487152a404f8 100644
--- a/include/linux/ip.h
+++ b/include/linux/ip.h
@@ -158,6 +158,20 @@ static inline struct inet_sock *inet_sk(const struct sock *sk)
return (struct inet_sock *)sk;
}
+static inline void __inet_sk_copy_descendant(struct sock *sk_to,
+ const struct sock *sk_from,
+ const int ancestor_size)
+{
+ memcpy(inet_sk(sk_to) + 1, inet_sk(sk_from) + 1,
+ sk_from->sk_prot->slab_obj_size - ancestor_size);
+}
+#if !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE))
+static inline void inet_sk_copy_descendant(struct sock *sk_to,
+ const struct sock *sk_from)
+{
+ __inet_sk_copy_descendant(sk_to, sk_from, sizeof(struct inet_sock));
+}
+#endif
#endif
struct iphdr {
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index f80c4b3f84a3..939942384b78 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -247,27 +247,26 @@ struct ipv6_pinfo {
} cork;
};
-struct raw6_opt {
+/* WARNING: don't change the layout of the members in {raw,udp,tcp}6_sock! */
+struct raw6_sock {
+ /* inet_sock has to be the first member of raw6_sock */
+ struct inet_sock inet;
__u32 checksum; /* perform checksum */
__u32 offset; /* checksum offset */
-
struct icmp6_filter filter;
-};
-
-/* WARNING: don't change the layout of the members in {raw,udp,tcp}6_sock! */
-struct raw6_sock {
- struct inet_sock inet;
- struct raw6_opt raw6;
- struct ipv6_pinfo inet6;
+ /* ipv6_pinfo has to be the last member of raw6_sock, see inet6_sk_generic */
+ struct ipv6_pinfo inet6;
};
struct udp6_sock {
struct udp_sock udp;
+ /* ipv6_pinfo has to be the last member of udp6_sock, see inet6_sk_generic */
struct ipv6_pinfo inet6;
};
struct tcp6_sock {
struct tcp_sock tcp;
+ /* ipv6_pinfo has to be the last member of tcp6_sock, see inet6_sk_generic */
struct ipv6_pinfo inet6;
};
@@ -277,9 +276,20 @@ static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk)
return inet_sk(__sk)->pinet6;
}
-static inline struct raw6_opt * raw6_sk(const struct sock *__sk)
+static inline struct raw6_sock *raw6_sk(const struct sock *sk)
+{
+ return (struct raw6_sock *)sk;
+}
+
+static inline void inet_sk_copy_descendant(struct sock *sk_to,
+ const struct sock *sk_from)
{
- return &((struct raw6_sock *)__sk)->raw6;
+ int ancestor_size = sizeof(struct inet_sock);
+
+ if (sk_from->sk_family == PF_INET6)
+ ancestor_size += sizeof(struct ipv6_pinfo);
+
+ __inet_sk_copy_descendant(sk_to, sk_from, ancestor_size);
}
#define __ipv6_only_sock(sk) (inet6_sk(sk)->ipv6only)
@@ -293,7 +303,7 @@ static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk)
return NULL;
}
-static inline struct raw6_opt * raw6_sk(const struct sock *__sk)
+static inline struct raw6_sock *raw6_sk(const struct sock *sk)
{
return NULL;
}
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index fca36de5e3fc..48e3d5f4bcde 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -76,7 +76,6 @@ struct ethtool_ops;
/* Driver transmit return codes */
#define NETDEV_TX_OK 0 /* driver took care of packet */
#define NETDEV_TX_BUSY 1 /* driver tx path was busy*/
-#define NETDEV_TX_LOCKED -1 /* driver tx lock was already taken */
/*
* Compute the worst case header length according to the protocols
@@ -345,6 +344,7 @@ struct net_device
unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */
unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address */
unsigned char addr_len; /* hardware address length */
+ unsigned short dev_id; /* for shared network cards */
struct dev_mc_list *mc_list; /* Multicast mac addresses */
int mc_count; /* Number of installed mcasts */
@@ -414,7 +414,7 @@ struct net_device
#define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */
#define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */
#define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */
-#define NETIF_F_LLTX 4096 /* LockLess TX */
+#define NETIF_F_LLTX 4096 /* Do not grab xmit_lock during ->hard_start_xmit */
/* Called after device is detached from network. */
void (*uninit)(struct net_device *dev);
@@ -893,9 +893,11 @@ static inline void __netif_rx_complete(struct net_device *dev)
static inline void netif_tx_disable(struct net_device *dev)
{
- spin_lock_bh(&dev->xmit_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->xmit_lock, flags);
netif_stop_queue(dev);
- spin_unlock_bh(&dev->xmit_lock);
+ spin_unlock_irqrestore(&dev->xmit_lock, flags);
}
/* These functions live elsewhere (drivers/net/net_init.c, but related) */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h
index b88b52c33db7..675a01a13398 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack.h
@@ -3,13 +3,6 @@
/* Connection state tracking for netfilter. This is separated from,
but required by, the NAT layer; it can also be used by an iptables
extension. */
-
-#include <linux/config.h>
-#include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
-#include <linux/bitops.h>
-#include <linux/compiler.h>
-#include <asm/atomic.h>
-
enum ip_conntrack_info
{
/* Part of an established connection (either direction). */
@@ -47,8 +40,40 @@ enum ip_conntrack_status {
/* Connection is confirmed: originating packet has left box */
IPS_CONFIRMED_BIT = 3,
IPS_CONFIRMED = (1 << IPS_CONFIRMED_BIT),
+
+ /* Connection needs src nat in orig dir. This bit never changed. */
+ IPS_SRC_NAT_BIT = 4,
+ IPS_SRC_NAT = (1 << IPS_SRC_NAT_BIT),
+
+ /* Connection needs dst nat in orig dir. This bit never changed. */
+ IPS_DST_NAT_BIT = 5,
+ IPS_DST_NAT = (1 << IPS_DST_NAT_BIT),
+
+ /* Both together. */
+ IPS_NAT_MASK = (IPS_DST_NAT | IPS_SRC_NAT),
+
+ /* Connection needs TCP sequence adjusted. */
+ IPS_SEQ_ADJUST_BIT = 6,
+ IPS_SEQ_ADJUST = (1 << IPS_SEQ_ADJUST_BIT),
+
+ /* NAT initialization bits. */
+ IPS_SRC_NAT_DONE_BIT = 7,
+ IPS_SRC_NAT_DONE = (1 << IPS_SRC_NAT_DONE_BIT),
+
+ IPS_DST_NAT_DONE_BIT = 8,
+ IPS_DST_NAT_DONE = (1 << IPS_DST_NAT_DONE_BIT),
+
+ /* Both together */
+ IPS_NAT_DONE_MASK = (IPS_DST_NAT_DONE | IPS_SRC_NAT_DONE),
};
+#ifdef __KERNEL__
+#include <linux/config.h>
+#include <linux/netfilter_ipv4/ip_conntrack_tuple.h>
+#include <linux/bitops.h>
+#include <linux/compiler.h>
+#include <asm/atomic.h>
+
#include <linux/netfilter_ipv4/ip_conntrack_tcp.h>
#include <linux/netfilter_ipv4/ip_conntrack_icmp.h>
#include <linux/netfilter_ipv4/ip_conntrack_sctp.h>
@@ -70,20 +95,6 @@ union ip_conntrack_expect_proto {
#include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
#include <linux/netfilter_ipv4/ip_conntrack_irc.h>
-/* per expectation: application helper private data */
-union ip_conntrack_expect_help {
- /* insert conntrack helper private data (expect) here */
- struct ip_ct_amanda_expect exp_amanda_info;
- struct ip_ct_ftp_expect exp_ftp_info;
- struct ip_ct_irc_expect exp_irc_info;
-
-#ifdef CONFIG_IP_NF_NAT_NEEDED
- union {
- /* insert nat helper private data (expect) here */
- } nat;
-#endif
-};
-
/* per conntrack: application helper private data */
union ip_conntrack_help {
/* insert conntrack helper private data (master) here */
@@ -93,15 +104,8 @@ union ip_conntrack_help {
#ifdef CONFIG_IP_NF_NAT_NEEDED
#include <linux/netfilter_ipv4/ip_nat.h>
-
-/* per conntrack: nat application helper private data */
-union ip_conntrack_nat_help {
- /* insert nat helper private data here */
-};
#endif
-#ifdef __KERNEL__
-
#include <linux/types.h>
#include <linux/skbuff.h>
@@ -123,39 +127,26 @@ struct ip_conntrack_expect
/* Internal linked list (global expectation list) */
struct list_head list;
- /* reference count */
- atomic_t use;
-
- /* expectation list for this master */
- struct list_head expected_list;
+ /* We expect this tuple, with the following mask */
+ struct ip_conntrack_tuple tuple, mask;
+
+ /* Function to call after setup and insertion */
+ void (*expectfn)(struct ip_conntrack *new,
+ struct ip_conntrack_expect *this);
/* The conntrack of the master connection */
- struct ip_conntrack *expectant;
-
- /* The conntrack of the sibling connection, set after
- * expectation arrived */
- struct ip_conntrack *sibling;
-
- /* Tuple saved for conntrack */
- struct ip_conntrack_tuple ct_tuple;
+ struct ip_conntrack *master;
/* Timer function; deletes the expectation. */
struct timer_list timeout;
- /* Data filled out by the conntrack helpers follow: */
-
- /* We expect this tuple, with the following mask */
- struct ip_conntrack_tuple tuple, mask;
-
- /* Function to call after setup and insertion */
- int (*expectfn)(struct ip_conntrack *new);
-
- /* At which sequence number did this expectation occur */
- u_int32_t seq;
-
- union ip_conntrack_expect_proto proto;
-
- union ip_conntrack_expect_help help;
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+ /* This is the original per-proto part, used to map the
+ * expected connection the way the recipient expects. */
+ union ip_conntrack_manip_proto saved_proto;
+ /* Direction relative to the master connection. */
+ enum ip_conntrack_dir dir;
+#endif
};
struct ip_conntrack_counter
@@ -182,17 +173,12 @@ struct ip_conntrack
/* Accounting Information (same cache line as other written members) */
struct ip_conntrack_counter counters[IP_CT_DIR_MAX];
#endif
+ /* If we were expected by an expectation, this will be it */
+ struct ip_conntrack *master;
- /* If we're expecting another related connection, this will be
- in expected linked list */
- struct list_head sibling_list;
-
/* Current number of expected connections */
unsigned int expecting;
- /* If we were expected by an expectation, this will be it */
- struct ip_conntrack_expect *master;
-
/* Helper, if any. */
struct ip_conntrack_helper *helper;
@@ -204,7 +190,6 @@ struct ip_conntrack
#ifdef CONFIG_IP_NF_NAT_NEEDED
struct {
struct ip_nat_info info;
- union ip_conntrack_nat_help help;
#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
int masq_index;
@@ -221,8 +206,15 @@ struct ip_conntrack
struct ip_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX];
};
+static inline struct ip_conntrack *
+tuplehash_to_ctrack(const struct ip_conntrack_tuple_hash *hash)
+{
+ return container_of(hash, struct ip_conntrack,
+ tuplehash[hash->tuple.dst.dir]);
+}
+
/* get master conntrack via master expectation */
-#define master_ct(conntr) (conntr->master ? conntr->master->expectant : NULL)
+#define master_ct(conntr) (conntr->master)
/* Alter reply tuple (maybe alter helper). */
extern void
@@ -246,13 +238,6 @@ ip_conntrack_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
/* decrement reference count on a conntrack */
extern inline void ip_conntrack_put(struct ip_conntrack *ct);
-/* find unconfirmed expectation based on tuple */
-struct ip_conntrack_expect *
-ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple);
-
-/* decrement reference count on an expectation */
-void ip_conntrack_expect_put(struct ip_conntrack_expect *exp);
-
/* call to create an explicit dependency on ip_conntrack. */
extern void need_ip_conntrack(void);
@@ -267,9 +252,9 @@ extern void ip_ct_refresh_acct(struct ip_conntrack *ct,
/* These are for NAT. Icky. */
/* Update TCP window tracking data when NAT mangles the packet */
-extern int ip_conntrack_tcp_update(struct sk_buff *skb,
- struct ip_conntrack *conntrack,
- int dir);
+extern void ip_conntrack_tcp_update(struct sk_buff *skb,
+ struct ip_conntrack *conntrack,
+ enum ip_conntrack_dir dir);
/* Call me when a conntrack is destroyed. */
extern void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack);
@@ -316,15 +301,12 @@ struct ip_conntrack_stat
#define CONNTRACK_STAT_INC(count) (__get_cpu_var(ip_conntrack_stat).count++)
-/* eg. PROVIDES_CONNTRACK(ftp); */
-#define PROVIDES_CONNTRACK(name) \
- int needs_ip_conntrack_##name; \
- EXPORT_SYMBOL(needs_ip_conntrack_##name)
-
-/*. eg. NEEDS_CONNTRACK(ftp); */
-#define NEEDS_CONNTRACK(name) \
- extern int needs_ip_conntrack_##name; \
- static int *need_ip_conntrack_##name __attribute_used__ = &needs_ip_conntrack_##name
-
+static inline int ip_nat_initialized(struct ip_conntrack *conntrack,
+ enum ip_nat_manip_type manip)
+{
+ if (manip == IP_NAT_MANIP_SRC)
+ return test_bit(IPS_SRC_NAT_DONE_BIT, &conntrack->status);
+ return test_bit(IPS_DST_NAT_DONE_BIT, &conntrack->status);
+}
#endif /* __KERNEL__ */
#endif /* _IP_CONNTRACK_H */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_amanda.h b/include/linux/netfilter_ipv4/ip_conntrack_amanda.h
index 75ee293bd088..de3e41f51aec 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_amanda.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_amanda.h
@@ -2,11 +2,10 @@
#define _IP_CONNTRACK_AMANDA_H
/* AMANDA tracking. */
-struct ip_ct_amanda_expect
-{
- u_int16_t port; /* port number of this expectation */
- u_int16_t offset; /* offset of port in ctrl packet */
- u_int16_t len; /* length of the port number string */
-};
-
+struct ip_conntrack_expect;
+extern unsigned int (*ip_nat_amanda_hook)(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct ip_conntrack_expect *exp);
#endif /* _IP_CONNTRACK_AMANDA_H */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_core.h b/include/linux/netfilter_ipv4/ip_conntrack_core.h
index bb9b11c680ac..d84be02cb4fc 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_core.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_core.h
@@ -34,20 +34,19 @@ struct ip_conntrack_tuple_hash *
ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
const struct ip_conntrack *ignored_conntrack);
-extern int __ip_conntrack_confirm(struct sk_buff *skb);
+extern int __ip_conntrack_confirm(struct sk_buff **pskb);
/* Confirm a connection: returns NF_DROP if packet must be dropped. */
-static inline int ip_conntrack_confirm(struct sk_buff *skb)
+static inline int ip_conntrack_confirm(struct sk_buff **pskb)
{
- if (skb->nfct
- && !is_confirmed((struct ip_conntrack *)skb->nfct))
- return __ip_conntrack_confirm(skb);
+ if ((*pskb)->nfct
+ && !is_confirmed((struct ip_conntrack *)(*pskb)->nfct))
+ return __ip_conntrack_confirm(pskb);
return NF_ACCEPT;
}
extern struct list_head *ip_conntrack_hash;
extern struct list_head ip_conntrack_expect_list;
DECLARE_RWLOCK_EXTERN(ip_conntrack_lock);
-DECLARE_RWLOCK_EXTERN(ip_conntrack_expect_tuple_lock);
#endif /* _IP_CONNTRACK_CORE_H */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_ftp.h b/include/linux/netfilter_ipv4/ip_conntrack_ftp.h
index 2f85006c75f9..5f06429b9047 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_ftp.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_ftp.h
@@ -20,24 +20,24 @@ enum ip_ct_ftp_type
IP_CT_FTP_EPSV,
};
-/* This structure is per expected connection */
-struct ip_ct_ftp_expect
-{
- /* We record seq number and length of ftp ip/port text here: all in
- * host order. */
-
- /* sequence number of IP address in packet is in ip_conntrack_expect */
- u_int32_t len; /* length of IP address */
- enum ip_ct_ftp_type ftptype; /* PORT or PASV ? */
- u_int16_t port; /* TCP port that was to be used */
-};
-
+#define NUM_SEQ_TO_REMEMBER 2
/* This structure exists only once per master */
struct ip_ct_ftp_master {
- /* Next valid seq position for cmd matching after newline */
- u_int32_t seq_aft_nl[IP_CT_DIR_MAX];
+ /* Valid seq positions for cmd matching after newline */
+ u_int32_t seq_aft_nl[IP_CT_DIR_MAX][NUM_SEQ_TO_REMEMBER];
/* 0 means seq_match_aft_nl not set */
- int seq_aft_nl_set[IP_CT_DIR_MAX];
+ int seq_aft_nl_num[IP_CT_DIR_MAX];
};
+struct ip_conntrack_expect;
+
+/* For NAT to hook in when we find a packet which describes what other
+ * connection we should expect. */
+extern unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ enum ip_ct_ftp_type type,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct ip_conntrack_expect *exp,
+ u32 *seq);
#endif /* _IP_CONNTRACK_FTP_H */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_helper.h b/include/linux/netfilter_ipv4/ip_conntrack_helper.h
index fe6268bd1737..b1bbba0a12cb 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_helper.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_helper.h
@@ -5,15 +5,11 @@
struct module;
-/* Reuse expectation when max_expected reached */
-#define IP_CT_HELPER_F_REUSE_EXPECT 0x01
-
struct ip_conntrack_helper
{
struct list_head list; /* Internal use. */
const char *name; /* name of the module */
- unsigned char flags; /* Flags (see above) */
struct module *me; /* pointer to self */
unsigned int max_expected; /* Maximum number of concurrent
* expected connections */
@@ -25,7 +21,7 @@ struct ip_conntrack_helper
/* Function to call when data passes; return verdict, or -1 to
invalidate. */
- int (*help)(struct sk_buff *skb,
+ int (*help)(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info conntrackinfo);
};
@@ -33,17 +29,13 @@ struct ip_conntrack_helper
extern int ip_conntrack_helper_register(struct ip_conntrack_helper *);
extern void ip_conntrack_helper_unregister(struct ip_conntrack_helper *);
-extern struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple);
-
-
/* Allocate space for an expectation: this is mandatory before calling
ip_conntrack_expect_related. */
extern struct ip_conntrack_expect *ip_conntrack_expect_alloc(void);
+extern void ip_conntrack_expect_free(struct ip_conntrack_expect *exp);
+
/* Add an expected connection: can have more than one per connection */
-extern int ip_conntrack_expect_related(struct ip_conntrack_expect *exp,
- struct ip_conntrack *related_to);
-extern int ip_conntrack_change_expect(struct ip_conntrack_expect *expect,
- struct ip_conntrack_tuple *newtuple);
+extern int ip_conntrack_expect_related(struct ip_conntrack_expect *exp);
extern void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp);
#endif /*_IP_CONNTRACK_HELPER_H*/
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_irc.h b/include/linux/netfilter_ipv4/ip_conntrack_irc.h
index 0cd24a02d360..16601e0d5626 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_irc.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_irc.h
@@ -14,24 +14,16 @@
#ifndef _IP_CONNTRACK_IRC_H
#define _IP_CONNTRACK_IRC_H
-/* We record seq number and length of irc ip/port text here: all in
- host order. */
-
-/* This structure is per expected connection */
-struct ip_ct_irc_expect
-{
- /* length of IP address */
- u_int32_t len;
- /* Port that was to be used */
- u_int16_t port;
-};
-
/* This structure exists only once per master */
struct ip_ct_irc_master {
};
-
#ifdef __KERNEL__
+extern unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct ip_conntrack_expect *exp);
#define IRC_PORT 6667
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
index 0ff067b3fdb0..e20b57c5e1b7 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
@@ -44,10 +44,6 @@ struct ip_conntrack_protocol
/* Called when a conntrack entry is destroyed */
void (*destroy)(struct ip_conntrack *conntrack);
- /* Has to decide if a expectation matches one packet or not */
- int (*exp_matches_pkt)(struct ip_conntrack_expect *exp,
- const struct sk_buff *skb);
-
int (*error)(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
unsigned int hooknum);
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_tftp.h b/include/linux/netfilter_ipv4/ip_conntrack_tftp.h
index 8b75b45f1f61..50fbafdf9ed5 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_tftp.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_tftp.h
@@ -13,4 +13,8 @@ struct tftphdr {
#define TFTP_OPCODE_ACK 4
#define TFTP_OPCODE_ERROR 5
+unsigned int (*ip_nat_tftp_hook)(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ struct ip_conntrack_expect *exp);
+
#endif /* _IP_CT_TFTP */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h
index 3a71176e2060..ca1afa8fc693 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h
@@ -64,7 +64,10 @@ struct ip_conntrack_tuple
} u;
/* The protocol. */
- u_int16_t protonum;
+ u8 protonum;
+
+ /* The direction (for tuplehash) */
+ u8 dir;
} dst;
};
@@ -94,7 +97,7 @@ DEBUGP("tuple %p: %u %u.%u.%u.%u:%hu -> %u.%u.%u.%u:%hu\n", \
#define CTINFO2DIR(ctinfo) ((ctinfo) >= IP_CT_IS_REPLY ? IP_CT_DIR_REPLY : IP_CT_DIR_ORIGINAL)
/* If we're the first tuple, it's the original dir. */
-#define DIRECTION(h) ((enum ip_conntrack_dir)(&(h)->ctrack->tuplehash[1] == (h)))
+#define DIRECTION(h) ((enum ip_conntrack_dir)(h)->tuple.dst.dir)
/* Connections have two entries in the hash table: one for each way */
struct ip_conntrack_tuple_hash
@@ -102,9 +105,6 @@ struct ip_conntrack_tuple_hash
struct list_head list;
struct ip_conntrack_tuple tuple;
-
- /* this == &ctrack->tuplehash[DIRECTION(this)]. */
- struct ip_conntrack *ctrack;
};
#endif /* __KERNEL__ */
diff --git a/include/linux/netfilter_ipv4/ip_nat.h b/include/linux/netfilter_ipv4/ip_nat.h
index 06e5ad38683e..2b72b86176f0 100644
--- a/include/linux/netfilter_ipv4/ip_nat.h
+++ b/include/linux/netfilter_ipv4/ip_nat.h
@@ -16,8 +16,6 @@ enum ip_nat_manip_type
#define IP_NAT_RANGE_MAP_IPS 1
#define IP_NAT_RANGE_PROTO_SPECIFIED 2
-/* Used internally by get_unique_tuple(). */
-#define IP_NAT_RANGE_FULL 4
/* NAT sequence number modifications */
struct ip_nat_seq {
@@ -50,24 +48,6 @@ struct ip_nat_multi_range_compat
struct ip_nat_range range[1];
};
-/* Worst case: local-out manip + 1 post-routing, and reverse dirn. */
-#define IP_NAT_MAX_MANIPS (2*3)
-
-struct ip_nat_info_manip
-{
- /* The direction. */
- u_int8_t direction;
-
- /* Which hook the manipulation happens on. */
- u_int8_t hooknum;
-
- /* The manipulation type. */
- u_int8_t maniptype;
-
- /* Manipulations to occur at each conntrack in this dirn. */
- struct ip_conntrack_manip manip;
-};
-
#ifdef __KERNEL__
#include <linux/list.h>
#include <linux/netfilter_ipv4/lockhelp.h>
@@ -78,14 +58,6 @@ DECLARE_RWLOCK_EXTERN(ip_nat_lock);
/* The structure embedded in the conntrack structure. */
struct ip_nat_info
{
- /* Set to zero when conntrack created: bitmask of maniptypes */
- u_int16_t initialized;
-
- u_int16_t num_manips;
-
- /* Manipulations to be done on this conntrack. */
- struct ip_nat_info_manip manips[IP_NAT_MAX_MANIPS];
-
struct list_head bysource;
/* Helper (NULL if none). */
@@ -94,6 +66,8 @@ struct ip_nat_info
struct ip_nat_seq seq[IP_CT_DIR_MAX];
};
+struct ip_conntrack;
+
/* Set up the info structure to map into this range. */
extern unsigned int ip_nat_setup_info(struct ip_conntrack *conntrack,
const struct ip_nat_range *range,
diff --git a/include/linux/netfilter_ipv4/ip_nat_core.h b/include/linux/netfilter_ipv4/ip_nat_core.h
index 4f01f882f2fc..3b50eb91f007 100644
--- a/include/linux/netfilter_ipv4/ip_nat_core.h
+++ b/include/linux/netfilter_ipv4/ip_nat_core.h
@@ -8,20 +8,13 @@
extern int ip_nat_init(void);
extern void ip_nat_cleanup(void);
-extern unsigned int do_bindings(struct ip_conntrack *ct,
- enum ip_conntrack_info conntrackinfo,
- struct ip_nat_info *info,
- unsigned int hooknum,
- struct sk_buff **pskb);
+extern unsigned int nat_packet(struct ip_conntrack *ct,
+ enum ip_conntrack_info conntrackinfo,
+ unsigned int hooknum,
+ struct sk_buff **pskb);
extern int icmp_reply_translation(struct sk_buff **pskb,
- struct ip_conntrack *conntrack,
- unsigned int hooknum,
- int dir);
-
-extern void replace_in_hashes(struct ip_conntrack *conntrack,
- struct ip_nat_info *info);
-extern void place_in_hashes(struct ip_conntrack *conntrack,
- struct ip_nat_info *info);
-
+ struct ip_conntrack *ct,
+ enum ip_nat_manip_type manip,
+ enum ip_conntrack_dir dir);
#endif /* _IP_NAT_CORE_H */
diff --git a/include/linux/netfilter_ipv4/ip_nat_helper.h b/include/linux/netfilter_ipv4/ip_nat_helper.h
index b34e4ce9ee6a..bf9cb105c885 100644
--- a/include/linux/netfilter_ipv4/ip_nat_helper.h
+++ b/include/linux/netfilter_ipv4/ip_nat_helper.h
@@ -7,46 +7,6 @@
struct sk_buff;
-/* Flags */
-/* NAT helper must be called on every packet (for TCP) */
-#define IP_NAT_HELPER_F_ALWAYS 0x01
-
-struct ip_nat_helper
-{
- struct list_head list; /* Internal use */
-
- const char *name; /* name of the module */
- unsigned char flags; /* Flags (see above) */
- struct module *me; /* pointer to self */
-
- /* Mask of things we will help: vs. tuple from server */
- struct ip_conntrack_tuple tuple;
- struct ip_conntrack_tuple mask;
-
- /* Helper function: returns verdict */
- unsigned int (*help)(struct ip_conntrack *ct,
- struct ip_conntrack_expect *exp,
- struct ip_nat_info *info,
- enum ip_conntrack_info ctinfo,
- unsigned int hooknum,
- struct sk_buff **pskb);
-
- /* Returns verdict and sets up NAT for this connection */
- unsigned int (*expect)(struct sk_buff **pskb,
- unsigned int hooknum,
- struct ip_conntrack *ct,
- struct ip_nat_info *info);
-};
-
-extern int ip_nat_helper_register(struct ip_nat_helper *me);
-extern void ip_nat_helper_unregister(struct ip_nat_helper *me);
-
-extern struct ip_nat_helper *
-ip_nat_find_helper(const struct ip_conntrack_tuple *tuple);
-
-extern struct ip_nat_helper *
-__ip_nat_find_helper(const struct ip_conntrack_tuple *tuple);
-
/* These return true or false. */
extern int ip_nat_mangle_tcp_packet(struct sk_buff **skb,
struct ip_conntrack *ct,
@@ -65,4 +25,9 @@ extern int ip_nat_mangle_udp_packet(struct sk_buff **skb,
extern int ip_nat_seq_adjust(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo);
+
+/* Setup NAT on this expected conntrack so it follows master, but goes
+ * to port ct->master->saved_proto. */
+extern void ip_nat_follow_master(struct ip_conntrack *ct,
+ struct ip_conntrack_expect *this);
#endif
diff --git a/include/linux/netfilter_ipv4/ip_nat_protocol.h b/include/linux/netfilter_ipv4/ip_nat_protocol.h
index f343239cd4ea..129708c22386 100644
--- a/include/linux/netfilter_ipv4/ip_nat_protocol.h
+++ b/include/linux/netfilter_ipv4/ip_nat_protocol.h
@@ -15,11 +15,11 @@ struct ip_nat_protocol
/* Protocol number. */
unsigned int protonum;
- /* Do a packet translation according to the ip_nat_proto_manip
- * and manip type. Return true if succeeded. */
+ /* Translate a packet to the target according to manip type.
+ Return true if succeeded. */
int (*manip_pkt)(struct sk_buff **pskb,
unsigned int iphdroff,
- const struct ip_conntrack_manip *manip,
+ const struct ip_conntrack_tuple *tuple,
enum ip_nat_manip_type maniptype);
/* Is the manipable part of the tuple between min and max incl? */
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index b8d7df3916a5..b31ca0400372 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -203,10 +203,6 @@ struct tcp_sack_block {
__u32 end_seq;
};
-typedef struct tcp_pcount {
- __u32 val;
-} tcp_pcount_t;
-
enum tcp_congestion_algo {
TCP_RENO=0,
TCP_VEGAS,
@@ -289,9 +285,9 @@ struct tcp_sock {
__u32 rtt_seq; /* sequence number to update rttvar */
__u32 rto; /* retransmit timeout */
- tcp_pcount_t packets_out; /* Packets which are "in flight" */
- tcp_pcount_t left_out; /* Packets which leaved network */
- tcp_pcount_t retrans_out; /* Retransmitted packets out */
+ __u32 packets_out; /* Packets which are "in flight" */
+ __u32 left_out; /* Packets which leaved network */
+ __u32 retrans_out; /* Retransmitted packets out */
/*
@@ -352,9 +348,9 @@ struct tcp_sock {
__u8 syn_retries; /* num of allowed syn retries */
__u8 ecn_flags; /* ECN status bits. */
__u16 prior_ssthresh; /* ssthresh saved at recovery start */
- tcp_pcount_t lost_out; /* Lost packets */
- tcp_pcount_t sacked_out;/* SACK'd packets */
- tcp_pcount_t fackets_out;/* FACK'd packets */
+ __u32 lost_out; /* Lost packets */
+ __u32 sacked_out; /* SACK'd packets */
+ __u32 fackets_out; /* FACK'd packets */
__u32 high_seq; /* snd_nxt at onset of congestion */
__u32 retrans_stamp; /* Timestamp of the last retransmit,
diff --git a/include/linux/trdevice.h b/include/linux/trdevice.h
index 2662f57568d4..aaa1f337edcb 100644
--- a/include/linux/trdevice.h
+++ b/include/linux/trdevice.h
@@ -28,10 +28,6 @@
#include <linux/if_tr.h>
#ifdef __KERNEL__
-extern int tr_header(struct sk_buff *skb, struct net_device *dev,
- unsigned short type, void *daddr,
- void *saddr, unsigned len);
-extern int tr_rebuild_header(struct sk_buff *skb);
extern unsigned short tr_type_trans(struct sk_buff *skb, struct net_device *dev);
extern void tr_source_route(struct sk_buff *skb, struct trh_hdr *trh, struct net_device *dev);
extern struct net_device *alloc_trdev(int sizeof_priv);
diff --git a/include/net/ipx.h b/include/net/ipx.h
index 006a31bfa2d1..5c0cf33826c5 100644
--- a/include/net/ipx.h
+++ b/include/net/ipx.h
@@ -90,7 +90,11 @@ struct ipx_cb {
} last_hop;
};
-struct ipx_opt {
+#include <net/sock.h>
+
+struct ipx_sock {
+ /* struct sock has to be the first member of ipx_sock */
+ struct sock sk;
struct ipx_address dest_addr;
struct ipx_interface *intrfc;
unsigned short port;
@@ -105,9 +109,14 @@ struct ipx_opt {
unsigned short ipx_ncp_conn;
};
-#define ipx_sk(__sk) ((struct ipx_opt *)(__sk)->sk_protinfo)
+static inline struct ipx_sock *ipx_sk(struct sock *sk)
+{
+ return (struct ipx_sock *)sk;
+}
+
#define IPX_SKB_CB(__skb) ((struct ipx_cb *)&((__skb)->cb[0]))
#endif
+
#define IPX_MIN_EPHEMERAL_SOCKET 0x4000
#define IPX_MAX_EPHEMERAL_SOCKET 0x7fff
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index cca17d427c9d..960abfa48d68 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -423,7 +423,7 @@ static inline __s32 sctp_jitter(__u32 rto)
}
/* Break down data chunks at this point. */
-static inline int sctp_frag_point(const struct sctp_opt *sp, int pmtu)
+static inline int sctp_frag_point(const struct sctp_sock *sp, int pmtu)
{
int frag = pmtu;
@@ -576,23 +576,6 @@ static inline int sctp_vtag_hashfn(__u16 lport, __u16 rport, __u32 vtag)
return (h & (sctp_assoc_hashsize-1));
}
-/* WARNING: Do not change the layout of the members in sctp_sock! */
-struct sctp_sock {
- struct inet_sock inet;
- struct sctp_opt sctp;
-};
-
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-struct sctp6_sock {
- struct inet_sock inet;
- struct sctp_opt sctp;
- struct ipv6_pinfo inet6;
-};
-#endif /* CONFIG_IPV6 */
-
-#define sctp_sk(__sk) (&((struct sctp_sock *)__sk)->sctp)
-#define sctp_opt2sk(__sp) &container_of(__sp, struct sctp_sock, sctp)->inet.sk
-
/* Is a socket of this style? */
#define sctp_style(sk, style) __sctp_style((sk), (SCTP_SOCKET_##style))
static inline int __sctp_style(const struct sock *sk, sctp_socket_type_t style)
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index dfa0dc43fb17..7e64cf6bda1e 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -58,6 +58,7 @@
#include <linux/socket.h> /* linux/in.h needs this!! */
#include <linux/in.h> /* We get struct sockaddr_in. */
#include <linux/in6.h> /* We get struct in6_addr */
+#include <linux/ipv6.h>
#include <asm/param.h> /* We get MAXHOSTNAMELEN. */
#include <asm/atomic.h> /* This gets us atomic counters. */
#include <linux/skbuff.h> /* We need sk_buff_head. */
@@ -84,7 +85,6 @@ struct sctp_inq;
struct sctp_outq;
struct sctp_bind_addr;
struct sctp_ulpq;
-struct sctp_opt;
struct sctp_ep_common;
struct sctp_ssnmap;
@@ -234,7 +234,9 @@ typedef enum {
} sctp_socket_type_t;
/* Per socket SCTP information. */
-struct sctp_opt {
+struct sctp_sock {
+ /* inet_sock has to be the first member of sctp_sock */
+ struct inet_sock inet;
/* What kind of a socket is this? */
sctp_socket_type_t type;
@@ -272,6 +274,22 @@ struct sctp_opt {
struct sk_buff_head pd_lobby;
};
+static inline struct sctp_sock *sctp_sk(const struct sock *sk)
+{
+ return (struct sctp_sock *)sk;
+}
+
+static inline struct sock *sctp_opt2sk(const struct sctp_sock *sp)
+{
+ return (struct sock *)sp;
+}
+
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+struct sctp6_sock {
+ struct sctp_sock sctp;
+ struct ipv6_pinfo inet6;
+};
+#endif /* CONFIG_IPV6 */
/* This is our APPLICATION-SPECIFIC state cookie.
@@ -487,12 +505,12 @@ struct sctp_af {
int (*to_addr_param) (const union sctp_addr *,
union sctp_addr_param *);
int (*addr_valid) (union sctp_addr *,
- struct sctp_opt *);
+ struct sctp_sock *);
sctp_scope_t (*scope) (union sctp_addr *);
void (*inaddr_any) (union sctp_addr *, unsigned short);
int (*is_any) (const union sctp_addr *);
int (*available) (union sctp_addr *,
- struct sctp_opt *);
+ struct sctp_sock *);
int (*skb_iif) (const struct sk_buff *sk);
int (*is_ce) (const struct sk_buff *sk);
void (*seq_dump_addr)(struct seq_file *seq,
@@ -510,16 +528,16 @@ int sctp_register_af(struct sctp_af *);
struct sctp_pf {
void (*event_msgname)(struct sctp_ulpevent *, char *, int *);
void (*skb_msgname) (struct sk_buff *, char *, int *);
- int (*af_supported) (sa_family_t, struct sctp_opt *);
+ int (*af_supported) (sa_family_t, struct sctp_sock *);
int (*cmp_addr) (const union sctp_addr *,
const union sctp_addr *,
- struct sctp_opt *);
- int (*bind_verify) (struct sctp_opt *, union sctp_addr *);
- int (*send_verify) (struct sctp_opt *, union sctp_addr *);
- int (*supported_addrs)(const struct sctp_opt *, __u16 *);
+ struct sctp_sock *);
+ int (*bind_verify) (struct sctp_sock *, union sctp_addr *);
+ int (*send_verify) (struct sctp_sock *, union sctp_addr *);
+ int (*supported_addrs)(const struct sctp_sock *, __u16 *);
struct sock *(*create_accept_sk) (struct sock *sk,
struct sctp_association *asoc);
- void (*addr_v4map) (struct sctp_opt *, union sctp_addr *);
+ void (*addr_v4map) (struct sctp_sock *, union sctp_addr *);
struct sctp_af *af;
};
@@ -922,7 +940,7 @@ struct sctp_transport *sctp_transport_new(const union sctp_addr *, int);
void sctp_transport_set_owner(struct sctp_transport *,
struct sctp_association *);
void sctp_transport_route(struct sctp_transport *, union sctp_addr *,
- struct sctp_opt *);
+ struct sctp_sock *);
void sctp_transport_pmtu(struct sctp_transport *);
void sctp_transport_free(struct sctp_transport *);
void sctp_transport_reset_timers(struct sctp_transport *);
@@ -1071,11 +1089,11 @@ int sctp_add_bind_addr(struct sctp_bind_addr *, union sctp_addr *,
int gfp);
int sctp_del_bind_addr(struct sctp_bind_addr *, union sctp_addr *);
int sctp_bind_addr_match(struct sctp_bind_addr *, const union sctp_addr *,
- struct sctp_opt *);
+ struct sctp_sock *);
union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp,
const union sctp_addr *addrs,
int addrcnt,
- struct sctp_opt *opt);
+ struct sctp_sock *opt);
union sctp_params sctp_bind_addrs_to_raw(const struct sctp_bind_addr *bp,
int *addrs_len, int gfp);
int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw, int len,
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 8987a316fe91..1a54cdba14ba 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1176,55 +1176,23 @@ static inline int tcp_skb_mss(const struct sk_buff *skb)
return skb_shinfo(skb)->tso_size;
}
-static inline void tcp_inc_pcount(tcp_pcount_t *count,
- const struct sk_buff *skb)
-{
- count->val += tcp_skb_pcount(skb);
-}
-
-static inline void tcp_inc_pcount_explicit(tcp_pcount_t *count, int amt)
-{
- count->val += amt;
-}
-
-static inline void tcp_dec_pcount_explicit(tcp_pcount_t *count, int amt)
-{
- count->val -= amt;
-}
-
-static inline void tcp_dec_pcount(tcp_pcount_t *count,
- const struct sk_buff *skb)
-{
- count->val -= tcp_skb_pcount(skb);
-}
-
-static inline void tcp_dec_pcount_approx(tcp_pcount_t *count,
+static inline void tcp_dec_pcount_approx(__u32 *count,
const struct sk_buff *skb)
{
- if (count->val) {
- count->val -= tcp_skb_pcount(skb);
- if ((int)count->val < 0)
- count->val = 0;
+ if (*count) {
+ *count -= tcp_skb_pcount(skb);
+ if ((int)*count < 0)
+ *count = 0;
}
}
-static inline __u32 tcp_get_pcount(const tcp_pcount_t *count)
-{
- return count->val;
-}
-
-static inline void tcp_set_pcount(tcp_pcount_t *count, __u32 val)
-{
- count->val = val;
-}
-
static inline void tcp_packets_out_inc(struct sock *sk,
struct tcp_sock *tp,
const struct sk_buff *skb)
{
- int orig = tcp_get_pcount(&tp->packets_out);
+ int orig = tp->packets_out;
- tcp_inc_pcount(&tp->packets_out, skb);
+ tp->packets_out += tcp_skb_pcount(skb);
if (!orig)
tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
}
@@ -1232,7 +1200,7 @@ static inline void tcp_packets_out_inc(struct sock *sk,
static inline void tcp_packets_out_dec(struct tcp_sock *tp,
const struct sk_buff *skb)
{
- tcp_dec_pcount(&tp->packets_out, skb);
+ tp->packets_out -= tcp_skb_pcount(skb);
}
/* This determines how many packets are "in the network" to the best
@@ -1251,9 +1219,7 @@ static inline void tcp_packets_out_dec(struct tcp_sock *tp,
*/
static __inline__ unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
{
- return (tcp_get_pcount(&tp->packets_out) -
- tcp_get_pcount(&tp->left_out) +
- tcp_get_pcount(&tp->retrans_out));
+ return (tp->packets_out - tp->left_out + tp->retrans_out);
}
/*
@@ -1357,14 +1323,9 @@ static inline __u32 tcp_current_ssthresh(struct tcp_sock *tp)
static inline void tcp_sync_left_out(struct tcp_sock *tp)
{
if (tp->sack_ok &&
- (tcp_get_pcount(&tp->sacked_out) >=
- tcp_get_pcount(&tp->packets_out) - tcp_get_pcount(&tp->lost_out)))
- tcp_set_pcount(&tp->sacked_out,
- (tcp_get_pcount(&tp->packets_out) -
- tcp_get_pcount(&tp->lost_out)));
- tcp_set_pcount(&tp->left_out,
- (tcp_get_pcount(&tp->sacked_out) +
- tcp_get_pcount(&tp->lost_out)));
+ (tp->sacked_out >= tp->packets_out - tp->lost_out))
+ tp->sacked_out = tp->packets_out - tp->lost_out;
+ tp->left_out = tp->sacked_out + tp->lost_out;
}
extern void tcp_cwnd_application_limited(struct sock *sk);
@@ -1373,7 +1334,7 @@ extern void tcp_cwnd_application_limited(struct sock *sk);
static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp)
{
- __u32 packets_out = tcp_get_pcount(&tp->packets_out);
+ __u32 packets_out = tp->packets_out;
if (packets_out >= tp->snd_cwnd) {
/* Network is feed fully. */
@@ -1381,8 +1342,8 @@ static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp)
tp->snd_cwnd_stamp = tcp_time_stamp;
} else {
/* Network starves. */
- if (tcp_get_pcount(&tp->packets_out) > tp->snd_cwnd_used)
- tp->snd_cwnd_used = tcp_get_pcount(&tp->packets_out);
+ if (tp->packets_out > tp->snd_cwnd_used)
+ tp->snd_cwnd_used = tp->packets_out;
if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto)
tcp_cwnd_application_limited(sk);
@@ -1450,7 +1411,7 @@ tcp_nagle_check(const struct tcp_sock *tp, const struct sk_buff *skb,
!(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) &&
((nonagle&TCP_NAGLE_CORK) ||
(!nonagle &&
- tcp_get_pcount(&tp->packets_out) &&
+ tp->packets_out &&
tcp_minshall_check(tp))));
}
@@ -1503,7 +1464,7 @@ static __inline__ int tcp_snd_test(const struct tcp_sock *tp,
static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp)
{
- if (!tcp_get_pcount(&tp->packets_out) && !tp->pending)
+ if (!tp->packets_out && !tp->pending)
tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto);
}
diff --git a/net/802/fc.c b/net/802/fc.c
index 9a502820f7f8..16702377958e 100644
--- a/net/802/fc.c
+++ b/net/802/fc.c
@@ -35,8 +35,9 @@
* Put the headers on a Fibre Channel packet.
*/
-int fc_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
- void *daddr, void *saddr, unsigned len)
+static int fc_header(struct sk_buff *skb, struct net_device *dev,
+ unsigned short type,
+ void *daddr, void *saddr, unsigned len)
{
struct fch_hdr *fch;
int hdr_len;
@@ -81,7 +82,7 @@ int fc_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
* can now send the packet.
*/
-int fc_rebuild_header(struct sk_buff *skb)
+static int fc_rebuild_header(struct sk_buff *skb)
{
struct fch_hdr *fch=(struct fch_hdr *)skb->data;
struct fcllc *fcllc=(struct fcllc *)(skb->data+sizeof(struct fch_hdr));
diff --git a/net/802/fddi.c b/net/802/fddi.c
index 752d77d37d3e..f9a31a9f70f1 100644
--- a/net/802/fddi.c
+++ b/net/802/fddi.c
@@ -52,8 +52,9 @@
* daddr=NULL means leave destination address (eg unresolved arp)
*/
-int fddi_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
- void *daddr, void *saddr, unsigned len)
+static int fddi_header(struct sk_buff *skb, struct net_device *dev,
+ unsigned short type,
+ void *daddr, void *saddr, unsigned len)
{
int hl = FDDI_K_SNAP_HLEN;
struct fddihdr *fddi;
@@ -96,7 +97,7 @@ int fddi_header(struct sk_buff *skb, struct net_device *dev, unsigned short type
* this sk_buff. We now let ARP fill in the other fields.
*/
-int fddi_rebuild_header(struct sk_buff *skb)
+static int fddi_rebuild_header(struct sk_buff *skb)
{
struct fddihdr *fddi = (struct fddihdr *)skb->data;
diff --git a/net/802/hippi.c b/net/802/hippi.c
index bb66e0315276..4eb135c0afbb 100644
--- a/net/802/hippi.c
+++ b/net/802/hippi.c
@@ -40,26 +40,15 @@
#include <asm/system.h>
/*
- * hippi_net_init()
- *
- * Do nothing, this is just to pursuade the stupid linker to behave.
- */
-
-void hippi_net_init(void)
-{
- return;
-}
-
-/*
* Create the HIPPI MAC header for an arbitrary protocol layer
*
* saddr=NULL means use device source address
* daddr=NULL means leave destination address (eg unresolved arp)
*/
-int hippi_header(struct sk_buff *skb, struct net_device *dev,
- unsigned short type, void *daddr, void *saddr,
- unsigned len)
+static int hippi_header(struct sk_buff *skb, struct net_device *dev,
+ unsigned short type, void *daddr, void *saddr,
+ unsigned len)
{
struct hippi_hdr *hip = (struct hippi_hdr *)skb_push(skb, HIPPI_HLEN);
@@ -107,7 +96,7 @@ int hippi_header(struct sk_buff *skb, struct net_device *dev,
* completed on this sk_buff. We now let ARP fill in the other fields.
*/
-int hippi_rebuild_header(struct sk_buff *skb)
+static int hippi_rebuild_header(struct sk_buff *skb)
{
struct hippi_hdr *hip = (struct hippi_hdr *)skb->data;
diff --git a/net/802/tr.c b/net/802/tr.c
index 94cd2668768e..85293ccf7efc 100644
--- a/net/802/tr.c
+++ b/net/802/tr.c
@@ -98,8 +98,9 @@ static inline unsigned long rif_hash(const unsigned char *addr)
* makes this a little more exciting than on ethernet.
*/
-int tr_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
- void *daddr, void *saddr, unsigned len)
+static int tr_header(struct sk_buff *skb, struct net_device *dev,
+ unsigned short type,
+ void *daddr, void *saddr, unsigned len)
{
struct trh_hdr *trh;
int hdr_len;
@@ -153,7 +154,7 @@ int tr_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
* can now send the packet.
*/
-int tr_rebuild_header(struct sk_buff *skb)
+static int tr_rebuild_header(struct sk_buff *skb)
{
struct trh_hdr *trh=(struct trh_hdr *)skb->data;
struct trllc *trllc=(struct trllc *)(skb->data+sizeof(struct trh_hdr));
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index 87b211585e91..1f6d31670bc7 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -40,7 +40,7 @@
/* Global VLAN variables */
/* Our listing of VLAN group(s) */
-struct hlist_head vlan_group_hash[VLAN_GRP_HASH_SIZE];
+static struct hlist_head vlan_group_hash[VLAN_GRP_HASH_SIZE];
#define vlan_grp_hashfn(IDX) ((((IDX) >> VLAN_GRP_HASH_SHIFT) ^ (IDX)) & VLAN_GRP_HASH_MASK)
static char vlan_fullname[] = "802.1Q VLAN Support";
@@ -52,7 +52,7 @@ static int vlan_device_event(struct notifier_block *, unsigned long, void *);
static int vlan_ioctl_handler(void __user *);
static int unregister_vlan_dev(struct net_device *, unsigned short );
-struct notifier_block vlan_notifier_block = {
+static struct notifier_block vlan_notifier_block = {
.notifier_call = vlan_device_event,
};
@@ -61,9 +61,6 @@ struct notifier_block vlan_notifier_block = {
/* Determines interface naming scheme. */
unsigned short vlan_name_type = VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD;
-/* DO reorder the header by default */
-unsigned short vlan_default_dev_flags = 1;
-
static struct packet_type vlan_packet_type = {
.type = __constant_htons(ETH_P_8021Q),
.func = vlan_skb_recv, /* VLAN receive method */
@@ -490,7 +487,7 @@ static struct net_device *register_vlan_device(const char *eth_IF_name,
VLAN_DEV_INFO(new_dev)->vlan_id = VLAN_ID; /* 1 through VLAN_VID_MASK */
VLAN_DEV_INFO(new_dev)->real_dev = real_dev;
VLAN_DEV_INFO(new_dev)->dent = NULL;
- VLAN_DEV_INFO(new_dev)->flags = vlan_default_dev_flags;
+ VLAN_DEV_INFO(new_dev)->flags = 1;
#ifdef VLAN_DEBUG
printk(VLAN_DBG "About to go find the group for idx: %i\n",
diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h
index 0a76d1f0d029..508b1fa14546 100644
--- a/net/8021q/vlan.h
+++ b/net/8021q/vlan.h
@@ -33,7 +33,6 @@ extern unsigned short vlan_name_type;
#define VLAN_GRP_HASH_SHIFT 5
#define VLAN_GRP_HASH_SIZE (1 << VLAN_GRP_HASH_SHIFT)
#define VLAN_GRP_HASH_MASK (VLAN_GRP_HASH_SIZE - 1)
-extern struct hlist_head vlan_group_hash[VLAN_GRP_HASH_SIZE];
/* Find a VLAN device by the MAC address of its Ethernet device, and
* it's VLAN ID. The default configuration is to have VLAN's scope
diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c
index 5a5f9cd17d81..c32d27af0a3f 100644
--- a/net/8021q/vlanproc.c
+++ b/net/8021q/vlanproc.c
@@ -239,7 +239,7 @@ int vlan_proc_rem_dev(struct net_device *vlandev)
*/
/* starting at dev, find a VLAN device */
-struct net_device *vlan_skip(struct net_device *dev)
+static struct net_device *vlan_skip(struct net_device *dev)
{
while (dev && !(dev->priv_flags & IFF_802_1Q_VLAN))
dev = dev->next;
diff --git a/net/atm/addr.c b/net/atm/addr.c
index 96407a0bb609..225f6843c32f 100644
--- a/net/atm/addr.c
+++ b/net/atm/addr.c
@@ -2,7 +2,6 @@
/* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */
-
#include <linux/atm.h>
#include <linux/atmdev.h>
#include <linux/sched.h>
@@ -11,127 +10,121 @@
#include "signaling.h"
#include "addr.h"
-
static int check_addr(struct sockaddr_atmsvc *addr)
{
int i;
- if (addr->sas_family != AF_ATMSVC) return -EAFNOSUPPORT;
+ if (addr->sas_family != AF_ATMSVC)
+ return -EAFNOSUPPORT;
if (!*addr->sas_addr.pub)
return *addr->sas_addr.prv ? 0 : -EINVAL;
- for (i = 1; i < ATM_E164_LEN+1; i++) /* make sure it's \0-terminated */
- if (!addr->sas_addr.pub[i]) return 0;
+ for (i = 1; i < ATM_E164_LEN + 1; i++) /* make sure it's \0-terminated */
+ if (!addr->sas_addr.pub[i])
+ return 0;
return -EINVAL;
}
-
-static int identical(struct sockaddr_atmsvc *a,struct sockaddr_atmsvc *b)
+static int identical(struct sockaddr_atmsvc *a, struct sockaddr_atmsvc *b)
{
if (*a->sas_addr.prv)
- if (memcmp(a->sas_addr.prv,b->sas_addr.prv,ATM_ESA_LEN))
+ if (memcmp(a->sas_addr.prv, b->sas_addr.prv, ATM_ESA_LEN))
return 0;
- if (!*a->sas_addr.pub) return !*b->sas_addr.pub;
- if (!*b->sas_addr.pub) return 0;
- return !strcmp(a->sas_addr.pub,b->sas_addr.pub);
+ if (!*a->sas_addr.pub)
+ return !*b->sas_addr.pub;
+ if (!*b->sas_addr.pub)
+ return 0;
+ return !strcmp(a->sas_addr.pub, b->sas_addr.pub);
}
-
static void notify_sigd(struct atm_dev *dev)
{
struct sockaddr_atmpvc pvc;
pvc.sap_addr.itf = dev->number;
- sigd_enq(NULL,as_itf_notify,NULL,&pvc,NULL);
+ sigd_enq(NULL, as_itf_notify, NULL, &pvc, NULL);
}
-
void atm_reset_addr(struct atm_dev *dev)
{
unsigned long flags;
- struct atm_dev_addr *this;
+ struct atm_dev_addr *this, *p;
spin_lock_irqsave(&dev->lock, flags);
- while (dev->local) {
- this = dev->local;
- dev->local = this->next;
- kfree(this);
- }
+ list_for_each_entry_safe(this, p, &dev->local, entry)
+ kfree(this);
spin_unlock_irqrestore(&dev->lock, flags);
notify_sigd(dev);
}
-
-int atm_add_addr(struct atm_dev *dev,struct sockaddr_atmsvc *addr)
+int atm_add_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr)
{
unsigned long flags;
- struct atm_dev_addr **walk;
+ struct atm_dev_addr *this;
int error;
error = check_addr(addr);
if (error)
return error;
spin_lock_irqsave(&dev->lock, flags);
- for (walk = &dev->local; *walk; walk = &(*walk)->next)
- if (identical(&(*walk)->addr,addr)) {
+ list_for_each_entry(this, &dev->local, entry) {
+ if (identical(&this->addr, addr)) {
spin_unlock_irqrestore(&dev->lock, flags);
return -EEXIST;
}
- *walk = kmalloc(sizeof(struct atm_dev_addr), GFP_ATOMIC);
- if (!*walk) {
+ }
+ this = kmalloc(sizeof(struct atm_dev_addr), GFP_ATOMIC);
+ if (!this) {
spin_unlock_irqrestore(&dev->lock, flags);
return -ENOMEM;
}
- (*walk)->addr = *addr;
- (*walk)->next = NULL;
+ this->addr = *addr;
+ list_add(&this->entry, &dev->local);
spin_unlock_irqrestore(&dev->lock, flags);
notify_sigd(dev);
return 0;
}
-
-int atm_del_addr(struct atm_dev *dev,struct sockaddr_atmsvc *addr)
+int atm_del_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr)
{
unsigned long flags;
- struct atm_dev_addr **walk,*this;
+ struct atm_dev_addr *this;
int error;
error = check_addr(addr);
if (error)
return error;
spin_lock_irqsave(&dev->lock, flags);
- for (walk = &dev->local; *walk; walk = &(*walk)->next)
- if (identical(&(*walk)->addr,addr)) break;
- if (!*walk) {
- spin_unlock_irqrestore(&dev->lock, flags);
- return -ENOENT;
+ list_for_each_entry(this, &dev->local, entry) {
+ if (identical(&this->addr, addr)) {
+ list_del(&this->entry);
+ spin_unlock_irqrestore(&dev->lock, flags);
+ kfree(this);
+ notify_sigd(dev);
+ return 0;
+ }
}
- this = *walk;
- *walk = this->next;
- kfree(this);
spin_unlock_irqrestore(&dev->lock, flags);
- notify_sigd(dev);
- return 0;
+ return -ENOENT;
}
-
-int atm_get_addr(struct atm_dev *dev,struct sockaddr_atmsvc __user *buf,int size)
+int atm_get_addr(struct atm_dev *dev, struct sockaddr_atmsvc __user * buf,
+ int size)
{
unsigned long flags;
- struct atm_dev_addr *walk;
+ struct atm_dev_addr *this;
int total = 0, error;
struct sockaddr_atmsvc *tmp_buf, *tmp_bufp;
-
spin_lock_irqsave(&dev->lock, flags);
- for (walk = dev->local; walk; walk = walk->next)
- total += sizeof(struct sockaddr_atmsvc);
+ list_for_each_entry(this, &dev->local, entry)
+ total += sizeof(struct sockaddr_atmsvc);
tmp_buf = tmp_bufp = kmalloc(total, GFP_ATOMIC);
if (!tmp_buf) {
spin_unlock_irqrestore(&dev->lock, flags);
return -ENOMEM;
}
- for (walk = dev->local; walk; walk = walk->next)
- memcpy(tmp_bufp++, &walk->addr, sizeof(struct sockaddr_atmsvc));
+ list_for_each_entry(this, &dev->local, entry)
+ memcpy(tmp_bufp++, &this->addr, sizeof(struct sockaddr_atmsvc));
spin_unlock_irqrestore(&dev->lock, flags);
error = total > size ? -E2BIG : total;
if (copy_to_user(buf, tmp_buf, total < size ? total : size))
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 8db42d467af3..53aac1833182 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -97,7 +97,7 @@ static void unlink_clip_vcc(struct clip_vcc *clip_vcc)
printk(KERN_CRIT "!clip_vcc->entry (clip_vcc %p)\n",clip_vcc);
return;
}
- spin_lock_bh(&entry->neigh->dev->xmit_lock); /* block clip_start_xmit() */
+ spin_lock_irq(&entry->neigh->dev->xmit_lock); /* block clip_start_xmit() */
entry->neigh->used = jiffies;
for (walk = &entry->vccs; *walk; walk = &(*walk)->next)
if (*walk == clip_vcc) {
@@ -121,7 +121,7 @@ static void unlink_clip_vcc(struct clip_vcc *clip_vcc)
printk(KERN_CRIT "ATMARP: unlink_clip_vcc failed (entry %p, vcc "
"0x%p)\n",entry,clip_vcc);
out:
- spin_unlock_bh(&entry->neigh->dev->xmit_lock);
+ spin_unlock_irq(&entry->neigh->dev->xmit_lock);
}
/* The neighbour entry n->lock is held. */
diff --git a/net/atm/lec.c b/net/atm/lec.c
index a920b9246f08..bf2c45af6835 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -422,6 +422,7 @@ lec_get_stats(struct net_device *dev)
static int
lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
{
+ unsigned long flags;
struct net_device *dev = (struct net_device*)vcc->proto_data;
struct lec_priv *priv = (struct lec_priv*)dev->priv;
struct atmlec_msg *mesg;
@@ -456,8 +457,10 @@ lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb)
lec_flush_complete(priv, mesg->content.normal.flag);
break;
case l_narp_req: /* LANE2: see 7.1.35 in the lane2 spec */
+ spin_lock_irqsave(&priv->lec_arp_lock, flags);
entry = lec_arp_find(priv, mesg->content.normal.mac_addr);
lec_arp_remove(priv, entry);
+ spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
if (mesg->content.normal.no_source_le_narp)
break;
@@ -1222,17 +1225,20 @@ module_exit(lane_module_cleanup);
static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force,
u8 **tlvs, u32 *sizeoftlvs)
{
+ unsigned long flags;
struct lec_priv *priv = (struct lec_priv *)dev->priv;
struct lec_arp_table *table;
struct sk_buff *skb;
int retval;
if (force == 0) {
+ spin_lock_irqsave(&priv->lec_arp_lock, flags);
table = lec_arp_find(priv, dst_mac);
+ spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
if(table == NULL)
return -1;
- *tlvs = kmalloc(table->sizeoftlvs, GFP_KERNEL);
+ *tlvs = kmalloc(table->sizeoftlvs, GFP_ATOMIC);
if (*tlvs == NULL)
return -1;
@@ -1377,18 +1383,6 @@ void dump_arp_table(struct lec_priv *priv);
#define HASH(ch) (ch & (LEC_ARP_TABLE_SIZE -1))
-static __inline__ void
-lec_arp_get(struct lec_priv *priv)
-{
- atomic_inc(&priv->lec_arp_users);
-}
-
-static __inline__ void
-lec_arp_put(struct lec_priv *priv)
-{
- atomic_dec(&priv->lec_arp_users);
-}
-
/*
* Initialization of arp-cache
*/
@@ -1397,12 +1391,12 @@ lec_arp_init(struct lec_priv *priv)
{
unsigned short i;
- for (i=0;i<LEC_ARP_TABLE_SIZE;i++) {
+ for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
priv->lec_arp_tables[i] = NULL;
}
spin_lock_init(&priv->lec_arp_lock);
init_timer(&priv->lec_arp_timer);
- priv->lec_arp_timer.expires = jiffies+LEC_ARP_REFRESH_INTERVAL;
+ priv->lec_arp_timer.expires = jiffies + LEC_ARP_REFRESH_INTERVAL;
priv->lec_arp_timer.data = (unsigned long)priv;
priv->lec_arp_timer.function = lec_arp_check_expire;
add_timer(&priv->lec_arp_timer);
@@ -1439,12 +1433,9 @@ lec_arp_clear_vccs(struct lec_arp_table *entry)
static inline void
lec_arp_add(struct lec_priv *priv, struct lec_arp_table *to_add)
{
- unsigned long flags;
unsigned short place;
struct lec_arp_table *tmp;
- spin_lock_irqsave(&priv->lec_arp_lock, flags);
-
place = HASH(to_add->mac_addr[ETH_ALEN-1]);
tmp = priv->lec_arp_tables[place];
to_add->next = NULL;
@@ -1457,8 +1448,6 @@ lec_arp_add(struct lec_priv *priv, struct lec_arp_table *to_add)
tmp->next = to_add;
}
- spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
-
DPRINTK("LEC_ARP: Added entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n",
0xff&to_add->mac_addr[0], 0xff&to_add->mac_addr[1],
0xff&to_add->mac_addr[2], 0xff&to_add->mac_addr[3],
@@ -1472,15 +1461,11 @@ static int
lec_arp_remove(struct lec_priv *priv,
struct lec_arp_table *to_remove)
{
- unsigned long flags;
unsigned short place;
struct lec_arp_table *tmp;
int remove_vcc=1;
- spin_lock_irqsave(&priv->lec_arp_lock, flags);
-
if (!to_remove) {
- spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
return -1;
}
place = HASH(to_remove->mac_addr[ETH_ALEN-1]);
@@ -1492,7 +1477,6 @@ lec_arp_remove(struct lec_priv *priv,
tmp = tmp->next;
}
if (!tmp) {/* Entry was not found */
- spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
return -1;
}
}
@@ -1505,7 +1489,7 @@ lec_arp_remove(struct lec_priv *priv,
/*
* ESI_FLUSH_PENDING, ESI_FORWARD_DIRECT
*/
- for(place=0;place<LEC_ARP_TABLE_SIZE;place++) {
+ for(place = 0; place < LEC_ARP_TABLE_SIZE; place++) {
for(tmp = priv->lec_arp_tables[place]; tmp != NULL; tmp = tmp->next) {
if (memcmp(tmp->atm_addr, to_remove->atm_addr,
ATM_ESA_LEN)==0) {
@@ -1519,8 +1503,6 @@ lec_arp_remove(struct lec_priv *priv,
}
skb_queue_purge(&to_remove->tx_wait); /* FIXME: good place for this? */
- spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
-
DPRINTK("LEC_ARP: Removed entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n",
0xff&to_remove->mac_addr[0], 0xff&to_remove->mac_addr[1],
0xff&to_remove->mac_addr[2], 0xff&to_remove->mac_addr[3],
@@ -1704,6 +1686,7 @@ dump_arp_table(struct lec_priv *priv)
void
lec_arp_destroy(struct lec_priv *priv)
{
+ unsigned long flags;
struct lec_arp_table *entry, *next;
int i;
@@ -1712,8 +1695,10 @@ lec_arp_destroy(struct lec_priv *priv)
/*
* Remove all entries
*/
- for (i=0;i<LEC_ARP_TABLE_SIZE;i++) {
- for(entry =priv->lec_arp_tables[i];entry != NULL; entry=next) {
+
+ spin_lock_irqsave(&priv->lec_arp_lock, flags);
+ for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
+ for(entry = priv->lec_arp_tables[i]; entry != NULL; entry=next) {
next = entry->next;
lec_arp_remove(priv, entry);
kfree(entry);
@@ -1748,7 +1733,8 @@ lec_arp_destroy(struct lec_priv *priv)
priv->mcast_fwds = NULL;
priv->mcast_vcc = NULL;
memset(priv->lec_arp_tables, 0,
- sizeof(struct lec_arp_table*)*LEC_ARP_TABLE_SIZE);
+ sizeof(struct lec_arp_table *) * LEC_ARP_TABLE_SIZE);
+ spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
}
@@ -1765,18 +1751,15 @@ lec_arp_find(struct lec_priv *priv,
DPRINTK("LEC_ARP: lec_arp_find :%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n",
mac_addr[0]&0xff, mac_addr[1]&0xff, mac_addr[2]&0xff,
mac_addr[3]&0xff, mac_addr[4]&0xff, mac_addr[5]&0xff);
- lec_arp_get(priv);
place = HASH(mac_addr[ETH_ALEN-1]);
to_return = priv->lec_arp_tables[place];
while(to_return) {
if (memcmp(mac_addr, to_return->mac_addr, ETH_ALEN) == 0) {
- lec_arp_put(priv);
return to_return;
}
to_return = to_return->next;
}
- lec_arp_put(priv);
return NULL;
}
@@ -1785,17 +1768,17 @@ make_entry(struct lec_priv *priv, unsigned char *mac_addr)
{
struct lec_arp_table *to_return;
- to_return=(struct lec_arp_table *)kmalloc(sizeof(struct lec_arp_table),
- GFP_ATOMIC);
+ to_return = (struct lec_arp_table *) kmalloc(sizeof(struct lec_arp_table),
+ GFP_ATOMIC);
if (!to_return) {
printk("LEC: Arp entry kmalloc failed\n");
return NULL;
}
- memset(to_return,0,sizeof(struct lec_arp_table));
+ memset(to_return, 0, sizeof(struct lec_arp_table));
memcpy(to_return->mac_addr, mac_addr, ETH_ALEN);
init_timer(&to_return->timer);
to_return->timer.function = lec_arp_expire_arp;
- to_return->timer.data = (unsigned long)to_return;
+ to_return->timer.data = (unsigned long) to_return;
to_return->last_used = jiffies;
to_return->priv = priv;
skb_queue_head_init(&to_return->tx_wait);
@@ -1835,6 +1818,7 @@ lec_arp_expire_arp(unsigned long data)
static void
lec_arp_expire_vcc(unsigned long data)
{
+ unsigned flags;
struct lec_arp_table *to_remove = (struct lec_arp_table*)data;
struct lec_priv *priv = (struct lec_priv *)to_remove->priv;
struct lec_arp_table *entry = NULL;
@@ -1846,6 +1830,8 @@ lec_arp_expire_vcc(unsigned long data)
to_remove->vcc?to_remove->recv_vcc->vpi:0,
to_remove->vcc?to_remove->recv_vcc->vci:0);
DPRINTK("eo:%p nf:%p\n",priv->lec_arp_empty_ones,priv->lec_no_forward);
+
+ spin_lock_irqsave(&priv->lec_arp_lock, flags);
if (to_remove == priv->lec_arp_empty_ones)
priv->lec_arp_empty_ones = to_remove->next;
else {
@@ -1866,6 +1852,8 @@ lec_arp_expire_vcc(unsigned long data)
entry->next = to_remove->next;
}
}
+ spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
+
lec_arp_clear_vccs(to_remove);
kfree(to_remove);
}
@@ -1889,69 +1877,67 @@ lec_arp_expire_vcc(unsigned long data)
static void
lec_arp_check_expire(unsigned long data)
{
+ unsigned long flags;
struct lec_priv *priv = (struct lec_priv *)data;
struct lec_arp_table *entry, *next;
unsigned long now;
unsigned long time_to_check;
int i;
- DPRINTK("lec_arp_check_expire %p,%d\n",priv,
- atomic_read(&priv->lec_arp_users));
+ DPRINTK("lec_arp_check_expire %p\n",priv);
DPRINTK("expire: eo:%p nf:%p\n",priv->lec_arp_empty_ones,
priv->lec_no_forward);
- if (!atomic_read(&priv->lec_arp_users)) {
- lec_arp_get(priv);
- now = jiffies;
- for(i=0;i<LEC_ARP_TABLE_SIZE;i++) {
- for(entry = priv->lec_arp_tables[i]; entry != NULL; ) {
- if ((entry->flags) & LEC_REMOTE_FLAG &&
- priv->topology_change)
- time_to_check=priv->forward_delay_time;
- else
- time_to_check = priv->aging_time;
-
- DPRINTK("About to expire: %lx - %lx > %lx\n",
- now,entry->last_used, time_to_check);
- if( time_after(now, entry->last_used+
- time_to_check) &&
- !(entry->flags & LEC_PERMANENT_FLAG) &&
- !(entry->mac_addr[0] & 0x01) ) { /* LANE2: 7.1.20 */
- /* Remove entry */
- DPRINTK("LEC:Entry timed out\n");
- next = entry->next;
- lec_arp_remove(priv, entry);
- kfree(entry);
- entry = next;
- } else {
- /* Something else */
- if ((entry->status == ESI_VC_PENDING ||
- entry->status == ESI_ARP_PENDING)
- && time_after_eq(now,
- entry->timestamp +
- priv->max_unknown_frame_time)) {
- entry->timestamp = jiffies;
- entry->packets_flooded = 0;
- if (entry->status == ESI_VC_PENDING)
- send_to_lecd(priv, l_svc_setup, entry->mac_addr, entry->atm_addr, NULL);
- }
- if (entry->status == ESI_FLUSH_PENDING
- &&
- time_after_eq(now, entry->timestamp+
- priv->path_switching_delay)) {
- struct sk_buff *skb;
-
- while ((skb = skb_dequeue(&entry->tx_wait)) != NULL)
- lec_send(entry->vcc, skb, entry->priv);
- entry->last_used = jiffies;
- entry->status =
- ESI_FORWARD_DIRECT;
- }
- entry = entry->next;
- }
- }
- }
- lec_arp_put(priv);
- }
+ now = jiffies;
+ spin_lock_irqsave(&priv->lec_arp_lock, flags);
+ for(i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
+ for(entry = priv->lec_arp_tables[i]; entry != NULL; ) {
+ if ((entry->flags) & LEC_REMOTE_FLAG &&
+ priv->topology_change)
+ time_to_check = priv->forward_delay_time;
+ else
+ time_to_check = priv->aging_time;
+
+ DPRINTK("About to expire: %lx - %lx > %lx\n",
+ now,entry->last_used, time_to_check);
+ if( time_after(now, entry->last_used+
+ time_to_check) &&
+ !(entry->flags & LEC_PERMANENT_FLAG) &&
+ !(entry->mac_addr[0] & 0x01) ) { /* LANE2: 7.1.20 */
+ /* Remove entry */
+ DPRINTK("LEC:Entry timed out\n");
+ next = entry->next;
+ lec_arp_remove(priv, entry);
+ kfree(entry);
+ entry = next;
+ } else {
+ /* Something else */
+ if ((entry->status == ESI_VC_PENDING ||
+ entry->status == ESI_ARP_PENDING)
+ && time_after_eq(now,
+ entry->timestamp +
+ priv->max_unknown_frame_time)) {
+ entry->timestamp = jiffies;
+ entry->packets_flooded = 0;
+ if (entry->status == ESI_VC_PENDING)
+ send_to_lecd(priv, l_svc_setup, entry->mac_addr, entry->atm_addr, NULL);
+ }
+ if (entry->status == ESI_FLUSH_PENDING
+ &&
+ time_after_eq(now, entry->timestamp+
+ priv->path_switching_delay)) {
+ struct sk_buff *skb;
+
+ while ((skb = skb_dequeue(&entry->tx_wait)) != NULL)
+ lec_send(entry->vcc, skb, entry->priv);
+ entry->last_used = jiffies;
+ entry->status =
+ ESI_FORWARD_DIRECT;
+ }
+ entry = entry->next;
+ }
+ }
+ }
+ spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
mod_timer(&priv->lec_arp_timer, jiffies + LEC_ARP_REFRESH_INTERVAL);
}
@@ -1963,9 +1949,11 @@ struct atm_vcc*
lec_arp_resolve(struct lec_priv *priv, unsigned char *mac_to_find, int is_rdesc,
struct lec_arp_table **ret_entry)
{
+ unsigned long flags;
struct lec_arp_table *entry;
+ struct atm_vcc *found;
- if (mac_to_find[0]&0x01) {
+ if (mac_to_find[0] & 0x01) {
switch (priv->lane_version) {
case 1:
return priv->mcast_vcc;
@@ -1979,6 +1967,7 @@ lec_arp_resolve(struct lec_priv *priv, unsigned char *mac_to_find, int is_rdesc,
}
}
+ spin_lock_irqsave(&priv->lec_arp_lock, flags);
entry = lec_arp_find(priv, mac_to_find);
if (entry) {
@@ -1986,7 +1975,8 @@ lec_arp_resolve(struct lec_priv *priv, unsigned char *mac_to_find, int is_rdesc,
/* Connection Ok */
entry->last_used = jiffies;
*ret_entry = entry;
- return entry->vcc;
+ found = entry->vcc;
+ goto out;
}
/* Data direct VC not yet set up, check to see if the unknown
frame count is greater than the limit. If the limit has
@@ -1996,7 +1986,8 @@ lec_arp_resolve(struct lec_priv *priv, unsigned char *mac_to_find, int is_rdesc,
entry->packets_flooded<priv->maximum_unknown_frame_count) {
entry->packets_flooded++;
DPRINTK("LEC_ARP: Flooding..\n");
- return priv->mcast_vcc;
+ found = priv->mcast_vcc;
+ goto out;
}
/* We got here because entry->status == ESI_FLUSH_PENDING
* or BUS flood limit was reached for an entry which is
@@ -2004,13 +1995,14 @@ lec_arp_resolve(struct lec_priv *priv, unsigned char *mac_to_find, int is_rdesc,
*/
*ret_entry = entry;
DPRINTK("lec: entry->status %d entry->vcc %p\n", entry->status, entry->vcc);
- return NULL;
+ found = NULL;
} else {
/* No matching entry was found */
entry = make_entry(priv, mac_to_find);
DPRINTK("LEC_ARP: Making entry\n");
if (!entry) {
- return priv->mcast_vcc;
+ found = priv->mcast_vcc;
+ goto out;
}
lec_arp_add(priv, entry);
/* We want arp-request(s) to be sent */
@@ -2026,33 +2018,38 @@ lec_arp_resolve(struct lec_priv *priv, unsigned char *mac_to_find, int is_rdesc,
entry->timer.expires = jiffies + (1*HZ);
entry->timer.function = lec_arp_expire_arp;
add_timer(&entry->timer);
- return priv->mcast_vcc;
+ found = priv->mcast_vcc;
}
+
+out:
+ spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
+ return found;
}
int
lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr,
unsigned long permanent)
{
+ unsigned long flags;
struct lec_arp_table *entry, *next;
int i;
- lec_arp_get(priv);
DPRINTK("lec_addr_delete\n");
- for(i=0;i<LEC_ARP_TABLE_SIZE;i++) {
- for(entry=priv->lec_arp_tables[i];entry != NULL; entry=next) {
+ spin_lock_irqsave(&priv->lec_arp_lock, flags);
+ for(i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
+ for(entry = priv->lec_arp_tables[i]; entry != NULL; entry = next) {
next = entry->next;
if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN)
&& (permanent ||
!(entry->flags & LEC_PERMANENT_FLAG))) {
- lec_arp_remove(priv, entry);
+ lec_arp_remove(priv, entry);
kfree(entry);
}
- lec_arp_put(priv);
+ spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
return 0;
}
}
- lec_arp_put(priv);
+ spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
return -1;
}
@@ -2064,6 +2061,7 @@ lec_arp_update(struct lec_priv *priv, unsigned char *mac_addr,
unsigned char *atm_addr, unsigned long remoteflag,
unsigned int targetless_le_arp)
{
+ unsigned long flags;
struct lec_arp_table *entry, *tmp;
int i;
@@ -2072,12 +2070,12 @@ lec_arp_update(struct lec_priv *priv, unsigned char *mac_addr,
mac_addr[0],mac_addr[1],mac_addr[2],mac_addr[3],
mac_addr[4],mac_addr[5]);
+ spin_lock_irqsave(&priv->lec_arp_lock, flags);
entry = lec_arp_find(priv, mac_addr);
if (entry == NULL && targetless_le_arp)
- return; /* LANE2: ignore targetless LE_ARPs for which
- * we have no entry in the cache. 7.1.30
- */
- lec_arp_get(priv);
+ goto out; /* LANE2: ignore targetless LE_ARPs for which
+ * we have no entry in the cache. 7.1.30
+ */
if (priv->lec_arp_empty_ones) {
entry = priv->lec_arp_empty_ones;
if (!memcmp(entry->atm_addr, atm_addr, ATM_ESA_LEN)) {
@@ -2117,27 +2115,24 @@ lec_arp_update(struct lec_priv *priv, unsigned char *mac_addr,
entry->flags|=LEC_REMOTE_FLAG;
else
entry->flags&=~LEC_REMOTE_FLAG;
- lec_arp_put(priv);
DPRINTK("After update\n");
dump_arp_table(priv);
- return;
+ goto out;
}
}
entry = lec_arp_find(priv, mac_addr);
if (!entry) {
entry = make_entry(priv, mac_addr);
- if (!entry) {
- lec_arp_put(priv);
- return;
- }
+ if (!entry)
+ goto out;
entry->status = ESI_UNKNOWN;
lec_arp_add(priv, entry);
/* Temporary, changes before end of function */
}
memcpy(entry->atm_addr, atm_addr, ATM_ESA_LEN);
del_timer(&entry->timer);
- for(i=0;i<LEC_ARP_TABLE_SIZE;i++) {
- for(tmp=priv->lec_arp_tables[i];tmp;tmp=tmp->next) {
+ for(i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
+ for(tmp = priv->lec_arp_tables[i]; tmp; tmp=tmp->next) {
if (entry != tmp &&
!memcmp(tmp->atm_addr, atm_addr,
ATM_ESA_LEN)) {
@@ -2166,7 +2161,8 @@ lec_arp_update(struct lec_priv *priv, unsigned char *mac_addr,
}
DPRINTK("After update2\n");
dump_arp_table(priv);
- lec_arp_put(priv);
+out:
+ spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
}
/*
@@ -2177,10 +2173,11 @@ lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data,
struct atm_vcc *vcc,
void (*old_push)(struct atm_vcc *vcc, struct sk_buff *skb))
{
+ unsigned long flags;
struct lec_arp_table *entry;
int i, found_entry=0;
- lec_arp_get(priv);
+ spin_lock_irqsave(&priv->lec_arp_lock, flags);
if (ioc_data->receive == 2) {
/* Vcc for Multicast Forward. No timer, LANEv2 7.1.20 and 2.3.5.3 */
@@ -2189,26 +2186,22 @@ lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data,
entry = lec_arp_find(priv, bus_mac);
if (!entry) {
printk("LEC_ARP: Multicast entry not found!\n");
- lec_arp_put(priv);
- return;
+ goto out;
}
memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN);
entry->recv_vcc = vcc;
entry->old_recv_push = old_push;
#endif
entry = make_entry(priv, bus_mac);
- if (entry == NULL) {
- lec_arp_put(priv);
- return;
- }
+ if (entry == NULL)
+ goto out;
del_timer(&entry->timer);
memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN);
entry->recv_vcc = vcc;
entry->old_recv_push = old_push;
entry->next = priv->mcast_fwds;
priv->mcast_fwds = entry;
- lec_arp_put(priv);
- return;
+ goto out;
} else if (ioc_data->receive == 1) {
/* Vcc which we don't want to make default vcc, attach it
anyway. */
@@ -2224,10 +2217,8 @@ lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data,
ioc_data->atm_addr[16],ioc_data->atm_addr[17],
ioc_data->atm_addr[18],ioc_data->atm_addr[19]);
entry = make_entry(priv, bus_mac);
- if (entry == NULL) {
- lec_arp_put(priv);
- return;
- }
+ if (entry == NULL)
+ goto out;
memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN);
memset(entry->mac_addr, 0, ETH_ALEN);
entry->recv_vcc = vcc;
@@ -2238,9 +2229,8 @@ lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data,
add_timer(&entry->timer);
entry->next = priv->lec_no_forward;
priv->lec_no_forward = entry;
- lec_arp_put(priv);
dump_arp_table(priv);
- return;
+ goto out;
}
DPRINTK("LEC_ARP:Attaching data direct, default:%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n",
ioc_data->atm_addr[0],ioc_data->atm_addr[1],
@@ -2253,8 +2243,8 @@ lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data,
ioc_data->atm_addr[14],ioc_data->atm_addr[15],
ioc_data->atm_addr[16],ioc_data->atm_addr[17],
ioc_data->atm_addr[18],ioc_data->atm_addr[19]);
- for (i=0;i<LEC_ARP_TABLE_SIZE;i++) {
- for (entry = priv->lec_arp_tables[i];entry;entry=entry->next) {
+ for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
+ for (entry = priv->lec_arp_tables[i]; entry; entry=entry->next) {
if (memcmp(ioc_data->atm_addr, entry->atm_addr,
ATM_ESA_LEN)==0) {
DPRINTK("LEC_ARP: Attaching data direct\n");
@@ -2297,18 +2287,15 @@ lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data,
}
}
if (found_entry) {
- lec_arp_put(priv);
DPRINTK("After vcc was added\n");
dump_arp_table(priv);
- return;
+ goto out;
}
/* Not found, snatch address from first data packet that arrives from
this vcc */
entry = make_entry(priv, bus_mac);
- if (!entry) {
- lec_arp_put(priv);
- return;
- }
+ if (!entry)
+ goto out;
entry->vcc = vcc;
entry->old_push = old_push;
memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN);
@@ -2319,20 +2306,23 @@ lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data,
entry->timer.expires = jiffies + priv->vcc_timeout_period;
entry->timer.function = lec_arp_expire_vcc;
add_timer(&entry->timer);
- lec_arp_put(priv);
DPRINTK("After vcc was added\n");
dump_arp_table(priv);
+out:
+ spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
}
void
lec_flush_complete(struct lec_priv *priv, unsigned long tran_id)
{
+ unsigned long flags;
struct lec_arp_table *entry;
int i;
DPRINTK("LEC:lec_flush_complete %lx\n",tran_id);
- for (i=0;i<LEC_ARP_TABLE_SIZE;i++) {
- for (entry=priv->lec_arp_tables[i];entry;entry=entry->next) {
+ spin_lock_irqsave(&priv->lec_arp_lock, flags);
+ for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) {
+ for (entry = priv->lec_arp_tables[i]; entry; entry=entry->next) {
if (entry->flush_tran_id == tran_id &&
entry->status == ESI_FLUSH_PENDING) {
struct sk_buff *skb;
@@ -2344,6 +2334,7 @@ lec_flush_complete(struct lec_priv *priv, unsigned long tran_id)
}
}
}
+ spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
dump_arp_table(priv);
}
@@ -2351,24 +2342,29 @@ void
lec_set_flush_tran_id(struct lec_priv *priv,
unsigned char *atm_addr, unsigned long tran_id)
{
+ unsigned long flags;
struct lec_arp_table *entry;
int i;
- for (i=0;i<LEC_ARP_TABLE_SIZE;i++)
- for(entry=priv->lec_arp_tables[i];entry;entry=entry->next)
+ spin_lock_irqsave(&priv->lec_arp_lock, flags);
+ for (i = 0; i < LEC_ARP_TABLE_SIZE; i++)
+ for(entry = priv->lec_arp_tables[i]; entry; entry=entry->next)
if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN)) {
entry->flush_tran_id = tran_id;
DPRINTK("Set flush transaction id to %lx for %p\n",tran_id,entry);
}
+ spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
}
int
lec_mcast_make(struct lec_priv *priv, struct atm_vcc *vcc)
{
+ unsigned long flags;
unsigned char mac_addr[] = {
0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
struct lec_arp_table *to_add;
struct lec_vcc_priv *vpriv;
+ int err = 0;
if (!(vpriv = kmalloc(sizeof(struct lec_vcc_priv), GFP_KERNEL)))
return -ENOMEM;
@@ -2376,13 +2372,13 @@ lec_mcast_make(struct lec_priv *priv, struct atm_vcc *vcc)
vpriv->old_pop = vcc->pop;
vcc->user_back = vpriv;
vcc->pop = lec_pop;
- lec_arp_get(priv);
+ spin_lock_irqsave(&priv->lec_arp_lock, flags);
to_add = make_entry(priv, mac_addr);
if (!to_add) {
- lec_arp_put(priv);
vcc->pop = vpriv->old_pop;
kfree(vpriv);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto out;
}
memcpy(to_add->atm_addr, vcc->remote.sas_addr.prv, ATM_ESA_LEN);
to_add->status = ESI_FORWARD_DIRECT;
@@ -2392,19 +2388,21 @@ lec_mcast_make(struct lec_priv *priv, struct atm_vcc *vcc)
vcc->push = lec_push;
priv->mcast_vcc = vcc;
lec_arp_add(priv, to_add);
- lec_arp_put(priv);
- return 0;
+out:
+ spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
+ return err;
}
void
lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc)
{
+ unsigned long flags;
struct lec_arp_table *entry, *next;
int i;
DPRINTK("LEC_ARP: lec_vcc_close vpi:%d vci:%d\n",vcc->vpi,vcc->vci);
dump_arp_table(priv);
- lec_arp_get(priv);
+ spin_lock_irqsave(&priv->lec_arp_lock, flags);
for(i=0;i<LEC_ARP_TABLE_SIZE;i++) {
for(entry = priv->lec_arp_tables[i];entry; entry=next) {
next = entry->next;
@@ -2466,7 +2464,7 @@ lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc)
entry = next;
}
- lec_arp_put(priv);
+ spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
dump_arp_table(priv);
}
@@ -2486,26 +2484,22 @@ lec_arp_check_empties(struct lec_priv *priv,
#endif
src = hdr->h_source;
- lec_arp_get(priv);
+ spin_lock_irqsave(&priv->lec_arp_lock, flags);
entry = priv->lec_arp_empty_ones;
if (vcc == entry->vcc) {
- spin_lock_irqsave(&priv->lec_arp_lock, flags);
del_timer(&entry->timer);
memcpy(entry->mac_addr, src, ETH_ALEN);
entry->status = ESI_FORWARD_DIRECT;
entry->last_used = jiffies;
priv->lec_arp_empty_ones = entry->next;
- spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
/* We might have got an entry */
- if ((prev=lec_arp_find(priv,src))) {
+ if ((prev = lec_arp_find(priv,src))) {
lec_arp_remove(priv, prev);
kfree(prev);
}
lec_arp_add(priv, entry);
- lec_arp_put(priv);
- return;
+ goto out;
}
- spin_lock_irqsave(&priv->lec_arp_lock, flags);
prev = entry;
entry = entry->next;
while (entry && entry->vcc != vcc) {
@@ -2514,21 +2508,19 @@ lec_arp_check_empties(struct lec_priv *priv,
}
if (!entry) {
DPRINTK("LEC_ARP: Arp_check_empties: entry not found!\n");
- lec_arp_put(priv);
- spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
- return;
+ goto out;
}
del_timer(&entry->timer);
memcpy(entry->mac_addr, src, ETH_ALEN);
entry->status = ESI_FORWARD_DIRECT;
entry->last_used = jiffies;
prev->next = entry->next;
- spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
if ((prev = lec_arp_find(priv, src))) {
lec_arp_remove(priv, prev);
kfree(prev);
}
lec_arp_add(priv, entry);
- lec_arp_put(priv);
+out:
+ spin_unlock_irqrestore(&priv->lec_arp_lock, flags);
}
MODULE_LICENSE("GPL");
diff --git a/net/atm/lec.h b/net/atm/lec.h
index 9c190210982e..34a64f4b63e3 100644
--- a/net/atm/lec.h
+++ b/net/atm/lec.h
@@ -95,7 +95,6 @@ struct lec_priv {
establishes multiple Multicast Forward VCCs to us. This list
collects all those VCCs. LANEv1 client has only one item in this
list. These entries are not aged out. */
- atomic_t lec_arp_users;
spinlock_t lec_arp_lock;
struct atm_vcc *mcast_vcc; /* Default Multicast Send VCC */
struct atm_vcc *lecd;
diff --git a/net/atm/resources.c b/net/atm/resources.c
index f030fea2ea60..4cadbfa6ecbd 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -38,6 +38,7 @@ static struct atm_dev *__alloc_atm_dev(const char *type)
dev->signal = ATM_PHY_SIG_UNKNOWN;
dev->link_rate = ATM_OC3_PCR;
spin_lock_init(&dev->lock);
+ INIT_LIST_HEAD(&dev->local);
return dev;
}
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 859e57b05b92..3465678faf2f 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -71,9 +71,7 @@ static void svc_disconnect(struct atm_vcc *vcc)
sigd_enq2(NULL,as_reject,vcc,NULL,NULL,&vcc->qos,0);
dev_kfree_skb(skb);
}
- clear_bit(ATM_VF_REGIS,&vcc->flags);
- clear_bit(ATM_VF_RELEASED,&vcc->flags);
- clear_bit(ATM_VF_CLOSE,&vcc->flags);
+ clear_bit(ATM_VF_REGIS, &vcc->flags);
/* ... may retry later */
}
@@ -90,10 +88,8 @@ static int svc_release(struct socket *sock)
/* VCC pointer is used as a reference, so we must not free it
(thereby subjecting it to re-use) before all pending connections
are closed */
- sock_hold(sk);
- vcc_release(sock);
svc_disconnect(vcc);
- sock_put(sk);
+ vcc_release(sock);
}
return 0;
}
@@ -286,7 +282,8 @@ static int svc_connect(struct socket *sock,struct sockaddr *sockaddr,
*/
if (!(error = vcc_connect(sock, vcc->itf, vcc->vpi, vcc->vci)))
sock->state = SS_CONNECTED;
- else (void) svc_disconnect(vcc);
+ else
+ (void) svc_disconnect(vcc);
out:
release_sock(sk);
return error;
diff --git a/net/core/dev.c b/net/core/dev.c
index 4dc01e26f158..2fdd7e27e22d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1190,7 +1190,7 @@ int __skb_linearize(struct sk_buff *skb, int gfp_mask)
#define HARD_TX_LOCK(dev, cpu) { \
if ((dev->features & NETIF_F_LLTX) == 0) { \
- spin_lock(&dev->xmit_lock); \
+ spin_lock_irq(&dev->xmit_lock); \
dev->xmit_lock_owner = cpu; \
} \
}
@@ -1198,7 +1198,7 @@ int __skb_linearize(struct sk_buff *skb, int gfp_mask)
#define HARD_TX_UNLOCK(dev) { \
if ((dev->features & NETIF_F_LLTX) == 0) { \
dev->xmit_lock_owner = -1; \
- spin_unlock(&dev->xmit_lock); \
+ spin_unlock_irq(&dev->xmit_lock); \
} \
}
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
index db098ff3cd6a..769dcf76eb6e 100644
--- a/net/core/dev_mcast.c
+++ b/net/core/dev_mcast.c
@@ -93,9 +93,9 @@ static void __dev_mc_upload(struct net_device *dev)
void dev_mc_upload(struct net_device *dev)
{
- spin_lock_bh(&dev->xmit_lock);
+ spin_lock_irq(&dev->xmit_lock);
__dev_mc_upload(dev);
- spin_unlock_bh(&dev->xmit_lock);
+ spin_unlock_irq(&dev->xmit_lock);
}
/*
@@ -107,7 +107,7 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
int err = 0;
struct dev_mc_list *dmi, **dmip;
- spin_lock_bh(&dev->xmit_lock);
+ spin_lock_irq(&dev->xmit_lock);
for (dmip = &dev->mc_list; (dmi = *dmip) != NULL; dmip = &dmi->next) {
/*
@@ -139,13 +139,13 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
*/
__dev_mc_upload(dev);
- spin_unlock_bh(&dev->xmit_lock);
+ spin_unlock_irq(&dev->xmit_lock);
return 0;
}
}
err = -ENOENT;
done:
- spin_unlock_bh(&dev->xmit_lock);
+ spin_unlock_irq(&dev->xmit_lock);
return err;
}
@@ -160,7 +160,7 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
dmi1 = (struct dev_mc_list *)kmalloc(sizeof(*dmi), GFP_ATOMIC);
- spin_lock_bh(&dev->xmit_lock);
+ spin_lock_irq(&dev->xmit_lock);
for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) {
if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 &&
dmi->dmi_addrlen == alen) {
@@ -176,7 +176,7 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
}
if ((dmi = dmi1) == NULL) {
- spin_unlock_bh(&dev->xmit_lock);
+ spin_unlock_irq(&dev->xmit_lock);
return -ENOMEM;
}
memcpy(dmi->dmi_addr, addr, alen);
@@ -189,11 +189,11 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
__dev_mc_upload(dev);
- spin_unlock_bh(&dev->xmit_lock);
+ spin_unlock_irq(&dev->xmit_lock);
return 0;
done:
- spin_unlock_bh(&dev->xmit_lock);
+ spin_unlock_irq(&dev->xmit_lock);
if (dmi1)
kfree(dmi1);
return err;
@@ -205,7 +205,7 @@ done:
void dev_mc_discard(struct net_device *dev)
{
- spin_lock_bh(&dev->xmit_lock);
+ spin_lock_irq(&dev->xmit_lock);
while (dev->mc_list != NULL) {
struct dev_mc_list *tmp = dev->mc_list;
@@ -216,7 +216,7 @@ void dev_mc_discard(struct net_device *dev)
}
dev->mc_count = 0;
- spin_unlock_bh(&dev->xmit_lock);
+ spin_unlock_irq(&dev->xmit_lock);
}
#ifdef CONFIG_PROC_FS
@@ -251,7 +251,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
struct dev_mc_list *m;
struct net_device *dev = v;
- spin_lock_bh(&dev->xmit_lock);
+ spin_lock_irq(&dev->xmit_lock);
for (m = dev->mc_list; m; m = m->next) {
int i;
@@ -263,7 +263,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v)
seq_putc(seq, '\n');
}
- spin_unlock_bh(&dev->xmit_lock);
+ spin_unlock_irq(&dev->xmit_lock);
return 0;
}
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 83697277a851..af02bba906a1 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -188,7 +188,7 @@ repeat:
return;
}
- spin_lock(&np->dev->xmit_lock);
+ spin_lock_irq(&np->dev->xmit_lock);
np->dev->xmit_lock_owner = smp_processor_id();
/*
@@ -197,7 +197,7 @@ repeat:
*/
if (netif_queue_stopped(np->dev)) {
np->dev->xmit_lock_owner = -1;
- spin_unlock(&np->dev->xmit_lock);
+ spin_unlock_irq(&np->dev->xmit_lock);
netpoll_poll(np);
goto repeat;
@@ -205,7 +205,7 @@ repeat:
status = np->dev->hard_start_xmit(skb, np->dev);
np->dev->xmit_lock_owner = -1;
- spin_unlock(&np->dev->xmit_lock);
+ spin_unlock_irq(&np->dev->xmit_lock);
/* transmit busy */
if(status) {
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 9bee84a7fbf8..3364ee3269e6 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2664,12 +2664,11 @@ __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev)
}
}
- spin_lock_bh(&odev->xmit_lock);
+ spin_lock_irq(&odev->xmit_lock);
if (!netif_queue_stopped(odev)) {
u64 now;
atomic_inc(&(pkt_dev->skb->users));
-retry_now:
ret = odev->hard_start_xmit(pkt_dev->skb, odev);
if (likely(ret == NETDEV_TX_OK)) {
pkt_dev->last_ok = 1;
@@ -2677,10 +2676,6 @@ retry_now:
pkt_dev->seq_num++;
pkt_dev->tx_bytes += pkt_dev->cur_pkt_size;
- } else if (ret == NETDEV_TX_LOCKED
- && (odev->features & NETIF_F_LLTX)) {
- cpu_relax();
- goto retry_now;
} else { /* Retry it next time */
atomic_dec(&(pkt_dev->skb->users));
@@ -2716,7 +2711,7 @@ retry_now:
pkt_dev->next_tx_ns = 0;
}
- spin_unlock_bh(&odev->xmit_lock);
+ spin_unlock_irq(&odev->xmit_lock);
/* If pkt_dev->count is zero, then run forever */
if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) {
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c
index fc741925911a..3dbddd062605 100644
--- a/net/ipv4/netfilter/ip_conntrack_amanda.c
+++ b/net/ipv4/netfilter/ip_conntrack_amanda.c
@@ -44,14 +44,21 @@ static char *conns[] = { "DATA ", "MESG ", "INDEX " };
static char amanda_buffer[65536];
static DECLARE_LOCK(amanda_buffer_lock);
-static int help(struct sk_buff *skb,
+unsigned int (*ip_nat_amanda_hook)(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct ip_conntrack_expect *exp);
+EXPORT_SYMBOL_GPL(ip_nat_amanda_hook);
+
+static int help(struct sk_buff **pskb,
struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
{
struct ip_conntrack_expect *exp;
- struct ip_ct_amanda_expect *exp_amanda_info;
char *data, *data_limit, *tmp;
unsigned int dataoff, i;
u_int16_t port, len;
+ int ret = NF_ACCEPT;
/* Only look at packets from the Amanda server */
if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
@@ -62,17 +69,17 @@ static int help(struct sk_buff *skb,
ip_ct_refresh_acct(ct, ctinfo, NULL, master_timeout * HZ);
/* No data? */
- dataoff = skb->nh.iph->ihl*4 + sizeof(struct udphdr);
- if (dataoff >= skb->len) {
+ dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
+ if (dataoff >= (*pskb)->len) {
if (net_ratelimit())
- printk("amanda_help: skblen = %u\n", skb->len);
+ printk("amanda_help: skblen = %u\n", (*pskb)->len);
return NF_ACCEPT;
}
LOCK_BH(&amanda_buffer_lock);
- skb_copy_bits(skb, dataoff, amanda_buffer, skb->len - dataoff);
+ skb_copy_bits(*pskb, dataoff, amanda_buffer, (*pskb)->len - dataoff);
data = amanda_buffer;
- data_limit = amanda_buffer + skb->len - dataoff;
+ data_limit = amanda_buffer + (*pskb)->len - dataoff;
*data_limit = '\0';
/* Search for the CONNECT string */
@@ -96,36 +103,44 @@ static int help(struct sk_buff *skb,
break;
exp = ip_conntrack_expect_alloc();
- if (exp == NULL)
+ if (exp == NULL) {
+ ret = NF_DROP;
goto out;
+ }
+
+ exp->expectfn = NULL;
+ exp->master = ct;
exp->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
+ exp->tuple.src.u.tcp.port = 0;
exp->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
exp->tuple.dst.protonum = IPPROTO_TCP;
+ exp->tuple.dst.u.tcp.port = htons(port);
+
exp->mask.src.ip = 0xFFFFFFFF;
+ exp->mask.src.u.tcp.port = 0;
exp->mask.dst.ip = 0xFFFFFFFF;
- exp->mask.dst.protonum = 0xFFFF;
+ exp->mask.dst.protonum = 0xFF;
exp->mask.dst.u.tcp.port = 0xFFFF;
- exp_amanda_info = &exp->help.exp_amanda_info;
- exp_amanda_info->offset = tmp - amanda_buffer;
- exp_amanda_info->port = port;
- exp_amanda_info->len = len;
-
- exp->tuple.dst.u.tcp.port = htons(port);
-
- ip_conntrack_expect_related(exp, ct);
+ if (ip_nat_amanda_hook)
+ ret = ip_nat_amanda_hook(pskb, ctinfo,
+ tmp - amanda_buffer,
+ len, exp);
+ else if (ip_conntrack_expect_related(exp) != 0) {
+ ip_conntrack_expect_free(exp);
+ ret = NF_DROP;
+ }
}
out:
UNLOCK_BH(&amanda_buffer_lock);
- return NF_ACCEPT;
+ return ret;
}
static struct ip_conntrack_helper amanda_helper = {
.max_expected = ARRAY_SIZE(conns),
.timeout = 180,
- .flags = IP_CT_HELPER_F_REUSE_EXPECT,
.me = THIS_MODULE,
.help = help,
.name = "amanda",
@@ -134,7 +149,7 @@ static struct ip_conntrack_helper amanda_helper = {
.dst = { .protonum = IPPROTO_UDP },
},
.mask = { .src = { .u = { 0xFFFF } },
- .dst = { .protonum = 0xFFFF },
+ .dst = { .protonum = 0xFF },
},
};
@@ -148,6 +163,5 @@ static int __init init(void)
return ip_conntrack_helper_register(&amanda_helper);
}
-PROVIDES_CONNTRACK(amanda);
module_init(init);
module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 247301938778..0eaafec43dd0 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -58,7 +58,6 @@
#endif
DECLARE_RWLOCK(ip_conntrack_lock);
-DECLARE_RWLOCK(ip_conntrack_expect_tuple_lock);
/* ip_conntrack_standalone needs this */
atomic_t ip_conntrack_count = ATOMIC_INIT(0);
@@ -79,7 +78,7 @@ static int ip_conntrack_vmalloc;
DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
-inline void
+void
ip_conntrack_put(struct ip_conntrack *ct)
{
IP_NF_ASSERT(ct);
@@ -118,6 +117,7 @@ ip_ct_get_tuple(const struct iphdr *iph,
tuple->src.ip = iph->saddr;
tuple->dst.ip = iph->daddr;
tuple->dst.protonum = iph->protocol;
+ tuple->dst.dir = IP_CT_DIR_ORIGINAL;
return protocol->pkt_to_tuple(skb, dataoff, tuple);
}
@@ -130,135 +130,76 @@ ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse,
inverse->src.ip = orig->dst.ip;
inverse->dst.ip = orig->src.ip;
inverse->dst.protonum = orig->dst.protonum;
+ inverse->dst.dir = !orig->dst.dir;
return protocol->invert_tuple(inverse, orig);
}
/* ip_conntrack_expect helper functions */
-
-/* Compare tuple parts depending on mask. */
-static inline int expect_cmp(const struct ip_conntrack_expect *i,
- const struct ip_conntrack_tuple *tuple)
-{
- MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock);
- return ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask);
-}
-
-static void
-destroy_expect(struct ip_conntrack_expect *exp)
+static void destroy_expect(struct ip_conntrack_expect *exp)
{
- DEBUGP("destroy_expect(%p) use=%d\n", exp, atomic_read(&exp->use));
- IP_NF_ASSERT(atomic_read(&exp->use) == 0);
+ ip_conntrack_put(exp->master);
IP_NF_ASSERT(!timer_pending(&exp->timeout));
-
kmem_cache_free(ip_conntrack_expect_cachep, exp);
CONNTRACK_STAT_INC(expect_delete);
}
-inline void ip_conntrack_expect_put(struct ip_conntrack_expect *exp)
+static void unlink_expect(struct ip_conntrack_expect *exp)
{
- IP_NF_ASSERT(exp);
-
- if (atomic_dec_and_test(&exp->use)) {
- /* usage count dropped to zero */
- destroy_expect(exp);
- }
-}
-
-static inline struct ip_conntrack_expect *
-__ip_ct_expect_find(const struct ip_conntrack_tuple *tuple)
-{
- MUST_BE_READ_LOCKED(&ip_conntrack_lock);
- MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock);
- return LIST_FIND(&ip_conntrack_expect_list, expect_cmp,
- struct ip_conntrack_expect *, tuple);
-}
-
-/* Find a expectation corresponding to a tuple. */
-struct ip_conntrack_expect *
-ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple)
-{
- struct ip_conntrack_expect *exp;
-
- READ_LOCK(&ip_conntrack_lock);
- READ_LOCK(&ip_conntrack_expect_tuple_lock);
- exp = __ip_ct_expect_find(tuple);
- if (exp)
- atomic_inc(&exp->use);
- READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
- READ_UNLOCK(&ip_conntrack_lock);
-
- return exp;
+ MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
+ list_del(&exp->list);
+ /* Logically in destroy_expect, but we hold the lock here. */
+ exp->master->expecting--;
}
-/* remove one specific expectation from all lists and drop refcount,
- * does _NOT_ delete the timer. */
-static void __unexpect_related(struct ip_conntrack_expect *expect)
+static void expectation_timed_out(unsigned long ul_expect)
{
- DEBUGP("unexpect_related(%p)\n", expect);
- MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
-
- /* we're not allowed to unexpect a confirmed expectation! */
- IP_NF_ASSERT(!expect->sibling);
-
- /* delete from global and local lists */
- list_del(&expect->list);
- list_del(&expect->expected_list);
+ struct ip_conntrack_expect *exp = (void *)ul_expect;
- /* decrement expect-count of master conntrack */
- if (expect->expectant)
- expect->expectant->expecting--;
-
- ip_conntrack_expect_put(expect);
+ WRITE_LOCK(&ip_conntrack_lock);
+ unlink_expect(exp);
+ WRITE_UNLOCK(&ip_conntrack_lock);
+ destroy_expect(exp);
}
-/* remove one specific expecatation from all lists, drop refcount
- * and expire timer.
- * This function can _NOT_ be called for confirmed expects! */
-static void unexpect_related(struct ip_conntrack_expect *expect)
+/* If an expectation for this connection is found, it gets delete from
+ * global list then returned. */
+static struct ip_conntrack_expect *
+find_expectation(const struct ip_conntrack_tuple *tuple)
{
- IP_NF_ASSERT(expect->expectant);
- IP_NF_ASSERT(expect->expectant->helper);
- /* if we are supposed to have a timer, but we can't delete
- * it: race condition. __unexpect_related will
- * be calledd by timeout function */
- if (expect->expectant->helper->timeout
- && !del_timer(&expect->timeout))
- return;
+ struct ip_conntrack_expect *i;
- __unexpect_related(expect);
+ list_for_each_entry(i, &ip_conntrack_expect_list, list) {
+ /* If master is not in hash table yet (ie. packet hasn't left
+ this machine yet), how can other end know about expected?
+ Hence these are not the droids you are looking for (if
+ master ct never got confirmed, we'd hold a reference to it
+ and weird things would happen to future packets). */
+ if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
+ && is_confirmed(i->master)
+ && del_timer(&i->timeout)) {
+ unlink_expect(i);
+ return i;
+ }
+ }
+ return NULL;
}
-/* delete all unconfirmed expectations for this conntrack */
-static void remove_expectations(struct ip_conntrack *ct, int drop_refcount)
+/* delete all expectations for this conntrack */
+static void remove_expectations(struct ip_conntrack *ct)
{
- struct list_head *exp_entry, *next;
- struct ip_conntrack_expect *exp;
+ struct ip_conntrack_expect *i, *tmp;
- DEBUGP("remove_expectations(%p)\n", ct);
-
- list_for_each_safe(exp_entry, next, &ct->sibling_list) {
- exp = list_entry(exp_entry, struct ip_conntrack_expect,
- expected_list);
+ /* Optimization: most connection never expect any others. */
+ if (ct->expecting == 0)
+ return;
- /* we skip established expectations, as we want to delete
- * the un-established ones only */
- if (exp->sibling) {
- DEBUGP("remove_expectations: skipping established %p of %p\n", exp->sibling, ct);
- if (drop_refcount) {
- /* Indicate that this expectations parent is dead */
- ip_conntrack_put(exp->expectant);
- exp->expectant = NULL;
- }
- continue;
+ list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) {
+ if (i->master == ct && del_timer(&i->timeout)) {
+ unlink_expect(i);
+ destroy_expect(i);
}
-
- IP_NF_ASSERT(list_inlist(&ip_conntrack_expect_list, exp));
- IP_NF_ASSERT(exp->expectant == ct);
-
- /* delete expectation from global and private lists */
- unexpect_related(exp);
}
}
@@ -275,14 +216,14 @@ clean_from_lists(struct ip_conntrack *ct)
LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
- /* Destroy all un-established, pending expectations */
- remove_expectations(ct, 1);
+ /* Destroy all pending expectations */
+ remove_expectations(ct);
}
static void
destroy_conntrack(struct nf_conntrack *nfct)
{
- struct ip_conntrack *ct = (struct ip_conntrack *)nfct, *master = NULL;
+ struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
struct ip_conntrack_protocol *proto;
DEBUGP("destroy_conntrack(%p)\n", ct);
@@ -304,8 +245,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
* except TFTP can create an expectation on the first packet,
* before connection is in the list, so we need to clean here,
* too. */
- if (ct->expecting)
- remove_expectations(ct, 1);
+ remove_expectations(ct);
/* We overload first tuple to link into unconfirmed list. */
if (!is_confirmed(ct)) {
@@ -313,21 +253,11 @@ destroy_conntrack(struct nf_conntrack *nfct)
list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
}
- /* Delete our master expectation */
- if (ct->master) {
- if (ct->master->expectant) {
- /* can't call __unexpect_related here,
- * since it would screw up expect_list */
- list_del(&ct->master->expected_list);
- master = ct->master->expectant;
- }
- kmem_cache_free(ip_conntrack_expect_cachep, ct->master);
- }
CONNTRACK_STAT_INC(delete);
WRITE_UNLOCK(&ip_conntrack_lock);
- if (master)
- ip_conntrack_put(master);
+ if (ct->master)
+ ip_conntrack_put(ct->master);
DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
kmem_cache_free(ip_conntrack_cachep, ct);
@@ -353,7 +283,7 @@ conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
const struct ip_conntrack *ignored_conntrack)
{
MUST_BE_READ_LOCKED(&ip_conntrack_lock);
- return i->ctrack != ignored_conntrack
+ return tuplehash_to_ctrack(i) != ignored_conntrack
&& ip_ct_tuple_equal(tuple, &i->tuple);
}
@@ -386,7 +316,7 @@ ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
READ_LOCK(&ip_conntrack_lock);
h = __ip_conntrack_find(tuple, ignored_conntrack);
if (h)
- atomic_inc(&h->ctrack->ct_general.use);
+ atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
READ_UNLOCK(&ip_conntrack_lock);
return h;
@@ -394,13 +324,13 @@ ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
/* Confirm a connection given skb; places it in hash table */
int
-__ip_conntrack_confirm(struct sk_buff *skb)
+__ip_conntrack_confirm(struct sk_buff **pskb)
{
unsigned int hash, repl_hash;
struct ip_conntrack *ct;
enum ip_conntrack_info ctinfo;
- ct = ip_conntrack_get(skb, &ctinfo);
+ ct = ip_conntrack_get(*pskb, &ctinfo);
/* ipt_REJECT uses ip_conntrack_attach to attach related
ICMP/TCP RST packets in other direction. Actual packet
@@ -479,30 +409,33 @@ ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
connection. Too bad: we're in trouble anyway. */
static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
{
- return !(test_bit(IPS_ASSURED_BIT, &i->ctrack->status));
+ return !(test_bit(IPS_ASSURED_BIT, &tuplehash_to_ctrack(i)->status));
}
static int early_drop(struct list_head *chain)
{
/* Traverse backwards: gives us oldest, which is roughly LRU */
struct ip_conntrack_tuple_hash *h;
+ struct ip_conntrack *ct = NULL;
int dropped = 0;
READ_LOCK(&ip_conntrack_lock);
h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *);
- if (h)
- atomic_inc(&h->ctrack->ct_general.use);
+ if (h) {
+ ct = tuplehash_to_ctrack(h);
+ atomic_inc(&ct->ct_general.use);
+ }
READ_UNLOCK(&ip_conntrack_lock);
- if (!h)
+ if (!ct)
return dropped;
- if (del_timer(&h->ctrack->timeout)) {
- death_by_timeout((unsigned long)h->ctrack);
+ if (del_timer(&ct->timeout)) {
+ death_by_timeout((unsigned long)ct);
dropped = 1;
CONNTRACK_STAT_INC(early_drop);
}
- ip_conntrack_put(h->ctrack);
+ ip_conntrack_put(ct);
return dropped;
}
@@ -512,7 +445,7 @@ static inline int helper_cmp(const struct ip_conntrack_helper *i,
return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
}
-struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple)
+static struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple)
{
return LIST_FIND(&helpers, helper_cmp,
struct ip_conntrack_helper *,
@@ -529,7 +462,7 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
struct ip_conntrack *conntrack;
struct ip_conntrack_tuple repl_tuple;
size_t hash;
- struct ip_conntrack_expect *expected;
+ struct ip_conntrack_expect *exp;
if (!ip_conntrack_hash_rnd_initted) {
get_random_bytes(&ip_conntrack_hash_rnd, 4);
@@ -565,9 +498,7 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
atomic_set(&conntrack->ct_general.use, 1);
conntrack->ct_general.destroy = destroy_conntrack;
conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple;
- conntrack->tuplehash[IP_CT_DIR_ORIGINAL].ctrack = conntrack;
conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple;
- conntrack->tuplehash[IP_CT_DIR_REPLY].ctrack = conntrack;
if (!protocol->new(conntrack, skb)) {
kmem_cache_free(ip_conntrack_cachep, conntrack);
return NULL;
@@ -577,73 +508,39 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
conntrack->timeout.data = (unsigned long)conntrack;
conntrack->timeout.function = death_by_timeout;
- INIT_LIST_HEAD(&conntrack->sibling_list);
-
WRITE_LOCK(&ip_conntrack_lock);
- /* Need finding and deleting of expected ONLY if we win race */
- READ_LOCK(&ip_conntrack_expect_tuple_lock);
- expected = LIST_FIND(&ip_conntrack_expect_list, expect_cmp,
- struct ip_conntrack_expect *, tuple);
- READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
-
- if (expected) {
- /* If master is not in hash table yet (ie. packet hasn't left
- this machine yet), how can other end know about expected?
- Hence these are not the droids you are looking for (if
- master ct never got confirmed, we'd hold a reference to it
- and weird things would happen to future packets). */
- if (!is_confirmed(expected->expectant)) {
- conntrack->helper = ip_ct_find_helper(&repl_tuple);
- goto end;
- }
-
- /* Expectation is dying... */
- if (expected->expectant->helper->timeout
- && !del_timer(&expected->timeout))
- goto end;
+ exp = find_expectation(tuple);
+ if (exp) {
DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
- conntrack, expected);
+ conntrack, exp);
/* Welcome, Mr. Bond. We've been expecting you... */
- IP_NF_ASSERT(expected->expectant);
__set_bit(IPS_EXPECTED_BIT, &conntrack->status);
- conntrack->master = expected;
- expected->sibling = conntrack;
+ conntrack->master = exp->master;
#if CONFIG_IP_NF_CONNTRACK_MARK
- conntrack->mark = expected->expectant->mark;
+ conntrack->mark = exp->master->mark;
#endif
- LIST_DELETE(&ip_conntrack_expect_list, expected);
- expected->expectant->expecting--;
- nf_conntrack_get(&master_ct(conntrack)->ct_general);
-
- /* this is a braindead... --pablo */
- atomic_inc(&ip_conntrack_count);
-
- /* Overload tuple linked list to put us in unconfirmed list. */
- list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list,
- &unconfirmed);
- WRITE_UNLOCK(&ip_conntrack_lock);
-
- if (expected->expectfn)
- expected->expectfn(conntrack);
-
+ nf_conntrack_get(&conntrack->master->ct_general);
CONNTRACK_STAT_INC(expect_new);
-
- goto ret;
- } else {
+ } else {
conntrack->helper = ip_ct_find_helper(&repl_tuple);
CONNTRACK_STAT_INC(new);
}
-end:
/* Overload tuple linked list to put us in unconfirmed list. */
list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
atomic_inc(&ip_conntrack_count);
WRITE_UNLOCK(&ip_conntrack_lock);
-ret: return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
+ if (exp) {
+ if (exp->expectfn)
+ exp->expectfn(conntrack, exp);
+ destroy_expect(exp);
+ }
+
+ return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
}
/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
@@ -656,6 +553,7 @@ resolve_normal_ct(struct sk_buff *skb,
{
struct ip_conntrack_tuple tuple;
struct ip_conntrack_tuple_hash *h;
+ struct ip_conntrack *ct;
IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
@@ -672,6 +570,7 @@ resolve_normal_ct(struct sk_buff *skb,
if (IS_ERR(h))
return (void *)h;
}
+ ct = tuplehash_to_ctrack(h);
/* It exists; we have (non-exclusive) reference. */
if (DIRECTION(h) == IP_CT_DIR_REPLY) {
@@ -680,24 +579,24 @@ resolve_normal_ct(struct sk_buff *skb,
*set_reply = 1;
} else {
/* Once we've had two way comms, always ESTABLISHED. */
- if (test_bit(IPS_SEEN_REPLY_BIT, &h->ctrack->status)) {
+ if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
DEBUGP("ip_conntrack_in: normal packet for %p\n",
- h->ctrack);
+ ct);
*ctinfo = IP_CT_ESTABLISHED;
- } else if (test_bit(IPS_EXPECTED_BIT, &h->ctrack->status)) {
+ } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
DEBUGP("ip_conntrack_in: related packet for %p\n",
- h->ctrack);
+ ct);
*ctinfo = IP_CT_RELATED;
} else {
DEBUGP("ip_conntrack_in: new packet for %p\n",
- h->ctrack);
+ ct);
*ctinfo = IP_CT_NEW;
}
*set_reply = 0;
}
- skb->nfct = &h->ctrack->ct_general;
+ skb->nfct = &ct->ct_general;
skb->nfctinfo = *ctinfo;
- return h->ctrack;
+ return ct;
}
/* Netfilter hook itself. */
@@ -782,16 +681,6 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
return -ret;
}
- if (ret != NF_DROP && ct->helper) {
- ret = ct->helper->help(*pskb, ct, ctinfo);
- if (ret == -1) {
- /* Invalid */
- CONNTRACK_STAT_INC(invalid);
- nf_conntrack_put((*pskb)->nfct);
- (*pskb)->nfct = NULL;
- return NF_ACCEPT;
- }
- }
if (set_reply)
set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
@@ -805,55 +694,49 @@ int invert_tuplepr(struct ip_conntrack_tuple *inverse,
ip_ct_find_proto(orig->dst.protonum));
}
-static inline int resent_expect(const struct ip_conntrack_expect *i,
- const struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_tuple *mask)
-{
- DEBUGP("resent_expect\n");
- DEBUGP(" tuple: "); DUMP_TUPLE(&i->tuple);
- DEBUGP("ct_tuple: "); DUMP_TUPLE(&i->ct_tuple);
- DEBUGP("test tuple: "); DUMP_TUPLE(tuple);
- return (((i->ct_tuple.dst.protonum == 0 && ip_ct_tuple_equal(&i->tuple, tuple))
- || (i->ct_tuple.dst.protonum && ip_ct_tuple_equal(&i->ct_tuple, tuple)))
- && ip_ct_tuple_equal(&i->mask, mask));
-}
-
/* Would two expected things clash? */
-static inline int expect_clash(const struct ip_conntrack_expect *i,
- const struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_tuple *mask)
+static inline int expect_clash(const struct ip_conntrack_expect *a,
+ const struct ip_conntrack_expect *b)
{
/* Part covered by intersection of masks must be unequal,
otherwise they clash */
struct ip_conntrack_tuple intersect_mask
- = { { i->mask.src.ip & mask->src.ip,
- { i->mask.src.u.all & mask->src.u.all } },
- { i->mask.dst.ip & mask->dst.ip,
- { i->mask.dst.u.all & mask->dst.u.all },
- i->mask.dst.protonum & mask->dst.protonum } };
+ = { { a->mask.src.ip & b->mask.src.ip,
+ { a->mask.src.u.all & b->mask.src.u.all } },
+ { a->mask.dst.ip & b->mask.dst.ip,
+ { a->mask.dst.u.all & b->mask.dst.u.all },
+ a->mask.dst.protonum & b->mask.dst.protonum } };
- return ip_ct_tuple_mask_cmp(&i->tuple, tuple, &intersect_mask);
+ return ip_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
}
-inline void ip_conntrack_unexpect_related(struct ip_conntrack_expect *expect)
+static inline int expect_matches(const struct ip_conntrack_expect *a,
+ const struct ip_conntrack_expect *b)
{
- WRITE_LOCK(&ip_conntrack_lock);
- unexpect_related(expect);
- WRITE_UNLOCK(&ip_conntrack_lock);
+ return a->master == b->master
+ && ip_ct_tuple_equal(&a->tuple, &b->tuple)
+ && ip_ct_tuple_equal(&a->mask, &b->mask);
}
-
-static void expectation_timed_out(unsigned long ul_expect)
+
+/* Generally a bad idea to call this: could have matched already. */
+void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp)
{
- struct ip_conntrack_expect *expect = (void *) ul_expect;
+ struct ip_conntrack_expect *i;
- DEBUGP("expectation %p timed out\n", expect);
WRITE_LOCK(&ip_conntrack_lock);
- __unexpect_related(expect);
+ /* choose the the oldest expectation to evict */
+ list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
+ if (expect_matches(i, exp) && del_timer(&i->timeout)) {
+ unlink_expect(i);
+ WRITE_UNLOCK(&ip_conntrack_lock);
+ destroy_expect(i);
+ return;
+ }
+ }
WRITE_UNLOCK(&ip_conntrack_lock);
}
-struct ip_conntrack_expect *
-ip_conntrack_expect_alloc(void)
+struct ip_conntrack_expect *ip_conntrack_expect_alloc(void)
{
struct ip_conntrack_expect *new;
@@ -862,178 +745,95 @@ ip_conntrack_expect_alloc(void)
DEBUGP("expect_related: OOM allocating expect\n");
return NULL;
}
-
- /* tuple_cmp compares whole union, we have to initialized cleanly */
- memset(new, 0, sizeof(struct ip_conntrack_expect));
- atomic_set(&new->use, 1);
-
+ new->master = NULL;
return new;
}
-static void
-ip_conntrack_expect_insert(struct ip_conntrack_expect *new,
- struct ip_conntrack *related_to)
+void ip_conntrack_expect_free(struct ip_conntrack_expect *expect)
{
- DEBUGP("new expectation %p of conntrack %p\n", new, related_to);
- new->expectant = related_to;
- new->sibling = NULL;
-
- /* add to expected list for this connection */
- list_add_tail(&new->expected_list, &related_to->sibling_list);
- /* add to global list of expectations */
- list_prepend(&ip_conntrack_expect_list, &new->list);
- /* add and start timer if required */
- if (related_to->helper->timeout) {
- init_timer(&new->timeout);
- new->timeout.data = (unsigned long)new;
- new->timeout.function = expectation_timed_out;
- new->timeout.expires = jiffies +
- related_to->helper->timeout * HZ;
- add_timer(&new->timeout);
- }
- related_to->expecting++;
+ kmem_cache_free(ip_conntrack_expect_cachep, expect);
}
-/* Add a related connection. */
-int ip_conntrack_expect_related(struct ip_conntrack_expect *expect,
- struct ip_conntrack *related_to)
+static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
{
- struct ip_conntrack_expect *old;
- int ret = 0;
+ atomic_inc(&exp->master->ct_general.use);
+ exp->master->expecting++;
+ list_add(&exp->list, &ip_conntrack_expect_list);
+
+ if (exp->master->helper->timeout) {
+ init_timer(&exp->timeout);
+ exp->timeout.data = (unsigned long)exp;
+ exp->timeout.function = expectation_timed_out;
+ exp->timeout.expires
+ = jiffies + exp->master->helper->timeout * HZ;
+ add_timer(&exp->timeout);
+ } else
+ exp->timeout.function = NULL;
- WRITE_LOCK(&ip_conntrack_lock);
- /* Because of the write lock, no reader can walk the lists,
- * so there is no need to use the tuple lock too */
+ CONNTRACK_STAT_INC(expect_create);
+}
- DEBUGP("ip_conntrack_expect_related %p\n", related_to);
- DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple);
- DEBUGP("mask: "); DUMP_TUPLE(&expect->mask);
+/* Race with expectations being used means we could have none to find; OK. */
+static void evict_oldest_expect(struct ip_conntrack *master)
+{
+ struct ip_conntrack_expect *i;
- old = LIST_FIND(&ip_conntrack_expect_list, resent_expect,
- struct ip_conntrack_expect *, &expect->tuple,
- &expect->mask);
- if (old) {
- /* Helper private data may contain offsets but no pointers
- pointing into the payload - otherwise we should have to copy
- the data filled out by the helper over the old one */
- DEBUGP("expect_related: resent packet\n");
- if (related_to->helper->timeout) {
- if (!del_timer(&old->timeout)) {
- /* expectation is dying. Fall through */
- goto out;
- } else {
- old->timeout.expires = jiffies +
- related_to->helper->timeout * HZ;
- add_timer(&old->timeout);
+ list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
+ if (i->master == master) {
+ if (del_timer(&i->timeout)) {
+ unlink_expect(i);
+ destroy_expect(i);
}
+ break;
}
-
- WRITE_UNLOCK(&ip_conntrack_lock);
- /* This expectation is not inserted so no need to lock */
- kmem_cache_free(ip_conntrack_expect_cachep, expect);
- return -EEXIST;
-
- } else if (related_to->helper->max_expected &&
- related_to->expecting >= related_to->helper->max_expected) {
- /* old == NULL */
- if (!(related_to->helper->flags &
- IP_CT_HELPER_F_REUSE_EXPECT)) {
- WRITE_UNLOCK(&ip_conntrack_lock);
- if (net_ratelimit())
- printk(KERN_WARNING
- "ip_conntrack: max number of expected "
- "connections %i of %s reached for "
- "%u.%u.%u.%u->%u.%u.%u.%u\n",
- related_to->helper->max_expected,
- related_to->helper->name,
- NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip),
- NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip));
- kmem_cache_free(ip_conntrack_expect_cachep, expect);
- return -EPERM;
- }
- DEBUGP("ip_conntrack: max number of expected "
- "connections %i of %s reached for "
- "%u.%u.%u.%u->%u.%u.%u.%u, reusing\n",
- related_to->helper->max_expected,
- related_to->helper->name,
- NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip),
- NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip));
-
- /* choose the the oldest expectation to evict */
- list_for_each_entry(old, &related_to->sibling_list,
- expected_list)
- if (old->sibling == NULL)
- break;
-
- /* We cannot fail since related_to->expecting is the number
- * of unconfirmed expectations */
- IP_NF_ASSERT(old && old->sibling == NULL);
-
- /* newnat14 does not reuse the real allocated memory
- * structures but rather unexpects the old and
- * allocates a new. unexpect_related will decrement
- * related_to->expecting.
- */
- unexpect_related(old);
- ret = -EPERM;
- } else if (LIST_FIND(&ip_conntrack_expect_list, expect_clash,
- struct ip_conntrack_expect *, &expect->tuple,
- &expect->mask)) {
- WRITE_UNLOCK(&ip_conntrack_lock);
- DEBUGP("expect_related: busy!\n");
-
- kmem_cache_free(ip_conntrack_expect_cachep, expect);
- return -EBUSY;
}
+}
-out: ip_conntrack_expect_insert(expect, related_to);
-
- WRITE_UNLOCK(&ip_conntrack_lock);
-
- CONNTRACK_STAT_INC(expect_create);
+static inline int refresh_timer(struct ip_conntrack_expect *i)
+{
+ if (!del_timer(&i->timeout))
+ return 0;
- return ret;
+ i->timeout.expires = jiffies + i->master->helper->timeout*HZ;
+ add_timer(&i->timeout);
+ return 1;
}
-/* Change tuple in an existing expectation */
-int ip_conntrack_change_expect(struct ip_conntrack_expect *expect,
- struct ip_conntrack_tuple *newtuple)
+int ip_conntrack_expect_related(struct ip_conntrack_expect *expect)
{
+ struct ip_conntrack_expect *i;
int ret;
- MUST_BE_READ_LOCKED(&ip_conntrack_lock);
- WRITE_LOCK(&ip_conntrack_expect_tuple_lock);
-
- DEBUGP("change_expect:\n");
- DEBUGP("exp tuple: "); DUMP_TUPLE(&expect->tuple);
- DEBUGP("exp mask: "); DUMP_TUPLE(&expect->mask);
- DEBUGP("newtuple: "); DUMP_TUPLE(newtuple);
- if (expect->ct_tuple.dst.protonum == 0) {
- /* Never seen before */
- DEBUGP("change expect: never seen before\n");
- if (!ip_ct_tuple_equal(&expect->tuple, newtuple)
- && LIST_FIND(&ip_conntrack_expect_list, expect_clash,
- struct ip_conntrack_expect *, newtuple, &expect->mask)) {
- /* Force NAT to find an unused tuple */
- ret = -1;
- } else {
- memcpy(&expect->ct_tuple, &expect->tuple, sizeof(expect->tuple));
- memcpy(&expect->tuple, newtuple, sizeof(expect->tuple));
- ret = 0;
- }
- } else {
- /* Resent packet */
- DEBUGP("change expect: resent packet\n");
- if (ip_ct_tuple_equal(&expect->tuple, newtuple)) {
- ret = 0;
- } else {
- /* Force NAT to choose again the same port */
- ret = -1;
+ DEBUGP("ip_conntrack_expect_related %p\n", related_to);
+ DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple);
+ DEBUGP("mask: "); DUMP_TUPLE(&expect->mask);
+
+ WRITE_LOCK(&ip_conntrack_lock);
+ list_for_each_entry(i, &ip_conntrack_expect_list, list) {
+ if (expect_matches(i, expect)) {
+ /* Refresh timer: if it's dying, ignore.. */
+ if (refresh_timer(i)) {
+ ret = 0;
+ /* We don't need the one they've given us. */
+ ip_conntrack_expect_free(expect);
+ goto out;
+ }
+ } else if (expect_clash(i, expect)) {
+ ret = -EBUSY;
+ goto out;
}
}
- WRITE_UNLOCK(&ip_conntrack_expect_tuple_lock);
-
- return ret;
+
+ /* Will be over limit? */
+ if (expect->master->helper->max_expected &&
+ expect->master->expecting >= expect->master->helper->max_expected)
+ evict_oldest_expect(expect->master);
+
+ ip_conntrack_expect_insert(expect);
+ ret = 0;
+out:
+ WRITE_UNLOCK(&ip_conntrack_lock);
+ return ret;
}
/* Alter reply tuple (maybe alter helper). This is for NAT, and is
@@ -1049,13 +849,14 @@ void ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
DUMP_TUPLE(newreply);
conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
- if (!conntrack->master && list_empty(&conntrack->sibling_list))
+ if (!conntrack->master && conntrack->expecting == 0)
conntrack->helper = ip_ct_find_helper(newreply);
WRITE_UNLOCK(&ip_conntrack_lock);
}
int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
{
+ BUG_ON(me->timeout == 0);
WRITE_LOCK(&ip_conntrack_lock);
list_prepend(&helpers, me);
WRITE_UNLOCK(&ip_conntrack_lock);
@@ -1066,23 +867,27 @@ int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
static inline int unhelp(struct ip_conntrack_tuple_hash *i,
const struct ip_conntrack_helper *me)
{
- if (i->ctrack->helper == me) {
- /* Get rid of any expected. */
- remove_expectations(i->ctrack, 0);
- /* And *then* set helper to NULL */
- i->ctrack->helper = NULL;
- }
+ if (tuplehash_to_ctrack(i)->helper == me)
+ tuplehash_to_ctrack(i)->helper = NULL;
return 0;
}
void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
{
unsigned int i;
+ struct ip_conntrack_expect *exp, *tmp;
/* Need write lock here, to delete helper. */
WRITE_LOCK(&ip_conntrack_lock);
LIST_DELETE(&helpers, me);
+ /* Get rid of expectations */
+ list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
+ if (exp->master->helper == me && del_timer(&exp->timeout)) {
+ unlink_expect(exp);
+ destroy_expect(exp);
+ }
+ }
/* Get rid of expecteds, set helpers to NULL. */
LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me);
for (i = 0; i < ip_conntrack_htable_size; i++)
@@ -1201,7 +1006,7 @@ do_iter(const struct ip_conntrack_tuple_hash *i,
int (*iter)(struct ip_conntrack *i, void *data),
void *data)
{
- return iter(i->ctrack, data);
+ return iter(tuplehash_to_ctrack(i), data);
}
/* Bring out ya dead! */
@@ -1222,7 +1027,7 @@ get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data),
h = LIST_FIND_W(&unconfirmed, do_iter,
struct ip_conntrack_tuple_hash *, iter, data);
if (h)
- atomic_inc(&h->ctrack->ct_general.use);
+ atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
WRITE_UNLOCK(&ip_conntrack_lock);
return h;
@@ -1235,12 +1040,13 @@ ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data)
unsigned int bucket = 0;
while ((h = get_next_corpse(iter, data, &bucket)) != NULL) {
+ struct ip_conntrack *ct = tuplehash_to_ctrack(h);
/* Time to push up daises... */
- if (del_timer(&h->ctrack->timeout))
- death_by_timeout((unsigned long)h->ctrack);
+ if (del_timer(&ct->timeout))
+ death_by_timeout((unsigned long)ct);
/* ... else the timer will get him soon. */
- ip_conntrack_put(h->ctrack);
+ ip_conntrack_put(ct);
}
}
@@ -1277,16 +1083,17 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
h = ip_conntrack_find_get(&tuple, NULL);
if (h) {
struct sockaddr_in sin;
+ struct ip_conntrack *ct = tuplehash_to_ctrack(h);
sin.sin_family = AF_INET;
- sin.sin_port = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL]
+ sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
.tuple.dst.u.tcp.port;
- sin.sin_addr.s_addr = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL]
+ sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
.tuple.dst.ip;
DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
- ip_conntrack_put(h->ctrack);
+ ip_conntrack_put(ct);
if (copy_to_user(user, &sin, sizeof(sin)) != 0)
return -EFAULT;
else
@@ -1398,7 +1205,7 @@ int __init ip_conntrack_init(void)
ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
sizeof(struct ip_conntrack), 0,
- SLAB_HWCACHE_ALIGN, NULL,NULL);
+ 0, NULL, NULL);
if (!ip_conntrack_cachep) {
printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
goto err_free_hash;
@@ -1406,7 +1213,7 @@ int __init ip_conntrack_init(void)
ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect",
sizeof(struct ip_conntrack_expect),
- 0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+ 0, 0, NULL, NULL);
if (!ip_conntrack_expect_cachep) {
printk(KERN_ERR "Unable to create ip_expect slab cache\n");
goto err_free_conntrack_slab;
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
index 2d6ffa497997..f3818d71c7a2 100644
--- a/net/ipv4/netfilter/ip_conntrack_ftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_ftp.c
@@ -29,7 +29,6 @@ MODULE_DESCRIPTION("ftp connection tracking helper");
static char ftp_buffer[65536];
static DECLARE_LOCK(ip_ftp_lock);
-struct module *ip_conntrack_ftp = THIS_MODULE;
#define MAX_PORTS 8
static int ports[MAX_PORTS];
@@ -39,6 +38,15 @@ module_param_array(ports, int, &ports_c, 0400);
static int loose;
module_param(loose, int, 0600);
+unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ enum ip_ct_ftp_type type,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct ip_conntrack_expect *exp,
+ u32 *seq);
+EXPORT_SYMBOL_GPL(ip_nat_ftp_hook);
+
#if 0
#define DEBUGP printk
#else
@@ -243,24 +251,53 @@ static int find_pattern(const char *data, size_t dlen,
return 1;
}
-static int help(struct sk_buff *skb,
+/* Look up to see if we're just after a \n. */
+static int find_nl_seq(u16 seq, const struct ip_ct_ftp_master *info, int dir)
+{
+ unsigned int i;
+
+ for (i = 0; i < info->seq_aft_nl_num[dir]; i++)
+ if (info->seq_aft_nl[dir][i] == seq)
+ return 1;
+ return 0;
+}
+
+/* We don't update if it's older than what we have. */
+static void update_nl_seq(u16 nl_seq, struct ip_ct_ftp_master *info, int dir)
+{
+ unsigned int i, oldest = NUM_SEQ_TO_REMEMBER;
+
+ /* Look for oldest: if we find exact match, we're done. */
+ for (i = 0; i < info->seq_aft_nl_num[dir]; i++) {
+ if (info->seq_aft_nl[dir][i] == nl_seq)
+ return;
+
+ if (oldest == info->seq_aft_nl_num[dir]
+ || before(info->seq_aft_nl[dir][i], oldest))
+ oldest = i;
+ }
+
+ if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER)
+ info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq;
+ else if (oldest != NUM_SEQ_TO_REMEMBER)
+ info->seq_aft_nl[dir][oldest] = nl_seq;
+}
+
+static int help(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo)
{
unsigned int dataoff, datalen;
struct tcphdr _tcph, *th;
char *fb_ptr;
- u_int32_t old_seq_aft_nl;
- int old_seq_aft_nl_set, ret;
- u_int32_t array[6] = { 0 };
+ int ret;
+ u32 seq, array[6] = { 0 };
int dir = CTINFO2DIR(ctinfo);
unsigned int matchlen, matchoff;
struct ip_ct_ftp_master *ct_ftp_info = &ct->help.ct_ftp_info;
struct ip_conntrack_expect *exp;
- struct ip_ct_ftp_expect *exp_ftp_info;
-
unsigned int i;
- int found = 0;
+ int found = 0, ends_in_nl;
/* Until there's been traffic both ways, don't look in packets. */
if (ctinfo != IP_CT_ESTABLISHED
@@ -269,46 +306,35 @@ static int help(struct sk_buff *skb,
return NF_ACCEPT;
}
- th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
+ th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
sizeof(_tcph), &_tcph);
if (th == NULL)
return NF_ACCEPT;
- dataoff = skb->nh.iph->ihl*4 + th->doff*4;
+ dataoff = (*pskb)->nh.iph->ihl*4 + th->doff*4;
/* No data? */
- if (dataoff >= skb->len) {
- DEBUGP("ftp: skblen = %u\n", skb->len);
+ if (dataoff >= (*pskb)->len) {
+ DEBUGP("ftp: pskblen = %u\n", (*pskb)->len);
return NF_ACCEPT;
}
- datalen = skb->len - dataoff;
+ datalen = (*pskb)->len - dataoff;
LOCK_BH(&ip_ftp_lock);
- fb_ptr = skb_header_pointer(skb, dataoff,
- skb->len - dataoff, ftp_buffer);
+ fb_ptr = skb_header_pointer(*pskb, dataoff,
+ (*pskb)->len - dataoff, ftp_buffer);
BUG_ON(fb_ptr == NULL);
- old_seq_aft_nl_set = ct_ftp_info->seq_aft_nl_set[dir];
- old_seq_aft_nl = ct_ftp_info->seq_aft_nl[dir];
-
- DEBUGP("conntrack_ftp: datalen %u\n", datalen);
- if (fb_ptr[datalen - 1] == '\n') {
- DEBUGP("conntrack_ftp: datalen %u ends in \\n\n", datalen);
- if (!old_seq_aft_nl_set
- || after(ntohl(th->seq) + datalen, old_seq_aft_nl)) {
- DEBUGP("conntrack_ftp: updating nl to %u\n",
- ntohl(th->seq) + datalen);
- ct_ftp_info->seq_aft_nl[dir] =
- ntohl(th->seq) + datalen;
- ct_ftp_info->seq_aft_nl_set[dir] = 1;
- }
- }
+ ends_in_nl = (fb_ptr[datalen - 1] == '\n');
+ seq = ntohl(th->seq) + datalen;
- if(!old_seq_aft_nl_set ||
- (ntohl(th->seq) != old_seq_aft_nl)) {
- DEBUGP("ip_conntrack_ftp_help: wrong seq pos %s(%u)\n",
+ /* Look up to see if we're just after a \n. */
+ if (!find_nl_seq(ntohl(th->seq), ct_ftp_info, dir)) {
+ /* Now if this ends in \n, update ftp info. */
+ DEBUGP("ip_conntrack_ftp_help: wrong seq pos %s(%u) or %s(%u)\n",
+ ct_ftp_info->seq_aft_nl[0][dir]
old_seq_aft_nl_set ? "":"(UNSET) ", old_seq_aft_nl);
ret = NF_ACCEPT;
- goto out;
+ goto out_update_nl;
}
/* Initialize IP array to expected address (it's not mentioned
@@ -321,7 +347,7 @@ static int help(struct sk_buff *skb,
for (i = 0; i < ARRAY_SIZE(search); i++) {
if (search[i].dir != dir) continue;
- found = find_pattern(fb_ptr, skb->len - dataoff,
+ found = find_pattern(fb_ptr, (*pskb)->len - dataoff,
search[i].pattern,
search[i].plen,
search[i].skip,
@@ -344,7 +370,7 @@ static int help(struct sk_buff *skb,
goto out;
} else if (found == 0) { /* No match */
ret = NF_ACCEPT;
- goto out;
+ goto out_update_nl;
}
DEBUGP("conntrack_ftp: match `%.*s' (%u bytes at %u)\n",
@@ -354,20 +380,17 @@ static int help(struct sk_buff *skb,
/* Allocate expectation which will be inserted */
exp = ip_conntrack_expect_alloc();
if (exp == NULL) {
- ret = NF_ACCEPT;
+ ret = NF_DROP;
goto out;
}
- exp_ftp_info = &exp->help.exp_ftp_info;
+ /* We refer to the reverse direction ("!dir") tuples here,
+ * because we're expecting something in the other direction.
+ * Doesn't matter unless NAT is happening. */
+ exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
- /* Update the ftp info */
if (htonl((array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3])
- == ct->tuplehash[dir].tuple.src.ip) {
- exp->seq = ntohl(th->seq) + matchoff;
- exp_ftp_info->len = matchlen;
- exp_ftp_info->ftptype = search[i].ftptype;
- exp_ftp_info->port = array[4] << 8 | array[5];
- } else {
+ != ct->tuplehash[dir].tuple.src.ip) {
/* Enrico Scholz's passive FTP to partially RNAT'd ftp
server: it really wants us to connect to a
different IP address. Simply don't record it for
@@ -381,28 +404,44 @@ static int help(struct sk_buff *skb,
problem (DMZ machines opening holes to internal
networks, or the packet filter itself). */
if (!loose) {
- ip_conntrack_expect_put(exp);
ret = NF_ACCEPT;
- goto out;
+ ip_conntrack_expect_free(exp);
+ goto out_update_nl;
}
+ exp->tuple.dst.ip = htonl((array[0] << 24) | (array[1] << 16)
+ | (array[2] << 8) | array[3]);
}
- exp->tuple = ((struct ip_conntrack_tuple)
- { { ct->tuplehash[!dir].tuple.src.ip,
- { 0 } },
- { htonl((array[0] << 24) | (array[1] << 16)
- | (array[2] << 8) | array[3]),
- { .tcp = { htons(array[4] << 8 | array[5]) } },
- IPPROTO_TCP }});
+ exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
+ exp->tuple.dst.u.tcp.port = htons(array[4] << 8 | array[5]);
+ exp->tuple.src.u.tcp.port = 0; /* Don't care. */
+ exp->tuple.dst.protonum = IPPROTO_TCP;
exp->mask = ((struct ip_conntrack_tuple)
{ { 0xFFFFFFFF, { 0 } },
- { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFFFF }});
+ { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFF }});
exp->expectfn = NULL;
+ exp->master = ct;
+
+ /* Now, NAT might want to mangle the packet, and register the
+ * (possibly changed) expectation itself. */
+ if (ip_nat_ftp_hook)
+ ret = ip_nat_ftp_hook(pskb, ctinfo, search[i].ftptype,
+ matchoff, matchlen, exp, &seq);
+ else {
+ /* Can't expect this? Best to drop packet now. */
+ if (ip_conntrack_expect_related(exp) != 0) {
+ ip_conntrack_expect_free(exp);
+ ret = NF_DROP;
+ } else
+ ret = NF_ACCEPT;
+ }
- /* Ignore failure; should only happen with NAT */
- ip_conntrack_expect_related(exp, ct);
- ret = NF_ACCEPT;
+out_update_nl:
+ /* Now if this ends in \n, update ftp info. Seq may have been
+ * adjusted by NAT code. */
+ if (ends_in_nl)
+ update_nl_seq(seq, ct_ftp_info,dir);
out:
UNLOCK_BH(&ip_ftp_lock);
return ret;
@@ -434,11 +473,10 @@ static int __init init(void)
ftp[i].tuple.src.u.tcp.port = htons(ports[i]);
ftp[i].tuple.dst.protonum = IPPROTO_TCP;
ftp[i].mask.src.u.tcp.port = 0xFFFF;
- ftp[i].mask.dst.protonum = 0xFFFF;
+ ftp[i].mask.dst.protonum = 0xFF;
ftp[i].max_expected = 1;
- ftp[i].timeout = 0;
- ftp[i].flags = IP_CT_HELPER_F_REUSE_EXPECT;
- ftp[i].me = ip_conntrack_ftp;
+ ftp[i].timeout = 5 * 60; /* 5 minutes */
+ ftp[i].me = THIS_MODULE;
ftp[i].help = help;
tmpname = &ftp_names[i][0];
@@ -460,7 +498,5 @@ static int __init init(void)
return 0;
}
-PROVIDES_CONNTRACK(ftp);
-
module_init(init);
module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c
index ec79c08a13fd..54ef2dab5de0 100644
--- a/net/ipv4/netfilter/ip_conntrack_irc.c
+++ b/net/ipv4/netfilter/ip_conntrack_irc.c
@@ -43,6 +43,13 @@ static unsigned int dcc_timeout = 300;
static char irc_buffer[65536];
static DECLARE_LOCK(irc_buffer_lock);
+unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct ip_conntrack_expect *exp);
+EXPORT_SYMBOL_GPL(ip_nat_irc_hook);
+
MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
MODULE_DESCRIPTION("IRC (DCC) connection tracking helper");
MODULE_LICENSE("GPL");
@@ -56,8 +63,6 @@ MODULE_PARM_DESC(dcc_timeout, "timeout on for unestablished DCC channels");
static char *dccprotos[] = { "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT " };
#define MINMATCHLEN 5
-struct module *ip_conntrack_irc = THIS_MODULE;
-
#if 0
#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s:" format, \
__FILE__, __FUNCTION__ , ## args)
@@ -98,7 +103,7 @@ static int parse_dcc(char *data, char *data_end, u_int32_t *ip,
return 0;
}
-static int help(struct sk_buff *skb,
+static int help(struct sk_buff **pskb,
struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
{
unsigned int dataoff;
@@ -106,11 +111,10 @@ static int help(struct sk_buff *skb,
char *data, *data_limit, *ib_ptr;
int dir = CTINFO2DIR(ctinfo);
struct ip_conntrack_expect *exp;
- struct ip_ct_irc_expect *exp_irc_info = NULL;
-
+ u32 seq;
u_int32_t dcc_ip;
u_int16_t dcc_port;
- int i;
+ int i, ret = NF_ACCEPT;
char *addr_beg_p, *addr_end_p;
DEBUGP("entered\n");
@@ -127,23 +131,23 @@ static int help(struct sk_buff *skb,
}
/* Not a full tcp header? */
- th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
+ th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
sizeof(_tcph), &_tcph);
if (th == NULL)
return NF_ACCEPT;
/* No data? */
- dataoff = skb->nh.iph->ihl*4 + th->doff*4;
- if (dataoff >= skb->len)
+ dataoff = (*pskb)->nh.iph->ihl*4 + th->doff*4;
+ if (dataoff >= (*pskb)->len)
return NF_ACCEPT;
LOCK_BH(&irc_buffer_lock);
- ib_ptr = skb_header_pointer(skb, dataoff,
- skb->len - dataoff, irc_buffer);
+ ib_ptr = skb_header_pointer(*pskb, dataoff,
+ (*pskb)->len - dataoff, irc_buffer);
BUG_ON(ib_ptr == NULL);
data = ib_ptr;
- data_limit = ib_ptr + skb->len - dataoff;
+ data_limit = ib_ptr + (*pskb)->len - dataoff;
/* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24
* 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */
@@ -195,19 +199,15 @@ static int help(struct sk_buff *skb,
}
exp = ip_conntrack_expect_alloc();
- if (exp == NULL)
+ if (exp == NULL) {
+ ret = NF_DROP;
goto out;
-
- exp_irc_info = &exp->help.exp_irc_info;
+ }
/* save position of address in dcc string,
* necessary for NAT */
DEBUGP("tcph->seq = %u\n", th->seq);
- exp->seq = ntohl(th->seq) + (addr_beg_p - ib_ptr);
- exp_irc_info->len = (addr_end_p - addr_beg_p);
- exp_irc_info->port = dcc_port;
- DEBUGP("wrote info seq=%u (ofs=%u), len=%d\n",
- exp->seq, (addr_end_p - _data), exp_irc_info->len);
+ seq = ntohl(th->seq) + (addr_beg_p - ib_ptr);
exp->tuple = ((struct ip_conntrack_tuple)
{ { 0, { 0 } },
@@ -215,25 +215,25 @@ static int help(struct sk_buff *skb,
IPPROTO_TCP }});
exp->mask = ((struct ip_conntrack_tuple)
{ { 0, { 0 } },
- { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFFFF }});
-
+ { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFF }});
exp->expectfn = NULL;
-
- DEBUGP("expect_related %u.%u.%u.%u:%u-%u.%u.%u.%u:%u\n",
- NIPQUAD(exp->tuple.src.ip),
- ntohs(exp->tuple.src.u.tcp.port),
- NIPQUAD(exp->tuple.dst.ip),
- ntohs(exp->tuple.dst.u.tcp.port));
-
- ip_conntrack_expect_related(exp, ct);
-
+ exp->master = ct;
+ if (ip_nat_irc_hook)
+ ret = ip_nat_irc_hook(pskb, ctinfo,
+ addr_beg_p - ib_ptr,
+ addr_end_p - addr_beg_p,
+ exp);
+ else if (ip_conntrack_expect_related(exp) != 0) {
+ ip_conntrack_expect_free(exp);
+ ret = NF_DROP;
+ }
goto out;
} /* for .. NUM_DCCPROTO */
} /* while data < ... */
out:
UNLOCK_BH(&irc_buffer_lock);
- return NF_ACCEPT;
+ return ret;
}
static struct ip_conntrack_helper irc_helpers[MAX_PORTS];
@@ -265,11 +265,10 @@ static int __init init(void)
hlpr->tuple.src.u.tcp.port = htons(ports[i]);
hlpr->tuple.dst.protonum = IPPROTO_TCP;
hlpr->mask.src.u.tcp.port = 0xFFFF;
- hlpr->mask.dst.protonum = 0xFFFF;
+ hlpr->mask.dst.protonum = 0xFF;
hlpr->max_expected = max_dcc_channels;
hlpr->timeout = dcc_timeout;
- hlpr->flags = IP_CT_HELPER_F_REUSE_EXPECT;
- hlpr->me = ip_conntrack_irc;
+ hlpr->me = THIS_MODULE;
hlpr->help = help;
tmpname = &irc_names[i][0];
@@ -305,7 +304,5 @@ static void fini(void)
}
}
-PROVIDES_CONNTRACK(irc);
-
module_init(init);
module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
index 344820a514ca..602c74db3252 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -196,7 +196,7 @@ icmp_error_message(struct sk_buff *skb,
}
/* Update skb to refer to this connection */
- skb->nfct = &h->ctrack->ct_general;
+ skb->nfct = &tuplehash_to_ctrack(h)->ct_general;
skb->nfctinfo = *ctinfo;
return -NF_ACCEPT;
}
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
index aa49bea4aa56..7d9f8ea14a5e 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
@@ -58,13 +58,13 @@ static const char *sctp_conntrack_names[] = {
#define HOURS * 60 MINS
#define DAYS * 24 HOURS
-unsigned long ip_ct_sctp_timeout_closed = 10 SECS;
-unsigned long ip_ct_sctp_timeout_cookie_wait = 3 SECS;
-unsigned long ip_ct_sctp_timeout_cookie_echoed = 3 SECS;
-unsigned long ip_ct_sctp_timeout_established = 5 DAYS;
-unsigned long ip_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000;
-unsigned long ip_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000;
-unsigned long ip_ct_sctp_timeout_shutdown_ack_sent = 3 SECS;
+static unsigned long ip_ct_sctp_timeout_closed = 10 SECS;
+static unsigned long ip_ct_sctp_timeout_cookie_wait = 3 SECS;
+static unsigned long ip_ct_sctp_timeout_cookie_echoed = 3 SECS;
+static unsigned long ip_ct_sctp_timeout_established = 5 DAYS;
+static unsigned long ip_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000;
+static unsigned long ip_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000;
+static unsigned long ip_ct_sctp_timeout_shutdown_ack_sent = 3 SECS;
static unsigned long * sctp_timeouts[]
= { NULL, /* SCTP_CONNTRACK_NONE */
@@ -494,14 +494,7 @@ static int sctp_new(struct ip_conntrack *conntrack,
return 1;
}
-static int sctp_exp_matches_pkt(struct ip_conntrack_expect *exp,
- const struct sk_buff *skb)
-{
- /* To be implemented */
- return 0;
-}
-
-struct ip_conntrack_protocol ip_conntrack_protocol_sctp = {
+static struct ip_conntrack_protocol ip_conntrack_protocol_sctp = {
.proto = IPPROTO_SCTP,
.name = "sctp",
.pkt_to_tuple = sctp_pkt_to_tuple,
@@ -511,7 +504,6 @@ struct ip_conntrack_protocol ip_conntrack_protocol_sctp = {
.packet = sctp_packet,
.new = sctp_new,
.destroy = NULL,
- .exp_matches_pkt = sctp_exp_matches_pkt,
.me = THIS_MODULE
};
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index d327678d3dbd..54a2f2fe1cb1 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -707,9 +707,9 @@ static int tcp_in_window(struct ip_ct_tcp *state,
#ifdef CONFIG_IP_NF_NAT_NEEDED
/* Update sender->td_end after NAT successfully mangled the packet */
-int ip_conntrack_tcp_update(struct sk_buff *skb,
- struct ip_conntrack *conntrack,
- int dir)
+void ip_conntrack_tcp_update(struct sk_buff *skb,
+ struct ip_conntrack *conntrack,
+ enum ip_conntrack_dir dir)
{
struct iphdr *iph = skb->nh.iph;
struct tcphdr *tcph = (void *)skb->nh.iph + skb->nh.iph->ihl*4;
@@ -735,8 +735,6 @@ int ip_conntrack_tcp_update(struct sk_buff *skb,
sender->td_scale,
receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
receiver->td_scale);
-
- return 1;
}
#endif
@@ -1061,22 +1059,6 @@ static int tcp_new(struct ip_conntrack *conntrack,
return 1;
}
-static int tcp_exp_matches_pkt(struct ip_conntrack_expect *exp,
- const struct sk_buff *skb)
-{
- const struct iphdr *iph = skb->nh.iph;
- struct tcphdr *th, _tcph;
- unsigned int datalen;
-
- th = skb_header_pointer(skb, iph->ihl * 4,
- sizeof(_tcph), &_tcph);
- if (th == NULL)
- return 0;
- datalen = skb->len - iph->ihl*4 - th->doff*4;
-
- return between(exp->seq, ntohl(th->seq), ntohl(th->seq) + datalen);
-}
-
struct ip_conntrack_protocol ip_conntrack_protocol_tcp =
{
.proto = IPPROTO_TCP,
@@ -1087,6 +1069,5 @@ struct ip_conntrack_protocol ip_conntrack_protocol_tcp =
.print_conntrack = tcp_print_conntrack,
.packet = tcp_packet,
.new = tcp_new,
- .exp_matches_pkt = tcp_exp_matches_pkt,
.error = tcp_error,
};
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index dc796520c158..b1b002d94983 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -66,7 +66,8 @@ print_tuple(struct seq_file *s, const struct ip_conntrack_tuple *tuple,
#ifdef CONFIG_IP_NF_CT_ACCT
static unsigned int
-seq_print_counters(struct seq_file *s, struct ip_conntrack_counter *counter)
+seq_print_counters(struct seq_file *s,
+ const struct ip_conntrack_counter *counter)
{
return seq_printf(s, "packets=%llu bytes=%llu ",
(unsigned long long)counter->packets,
@@ -99,7 +100,7 @@ static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
static int ct_seq_real_show(const struct ip_conntrack_tuple_hash *hash,
struct seq_file *s)
{
- struct ip_conntrack *conntrack = hash->ctrack;
+ const struct ip_conntrack *conntrack = tuplehash_to_ctrack(hash);
struct ip_conntrack_protocol *proto;
MUST_BE_READ_LOCKED(&ip_conntrack_lock);
@@ -200,7 +201,6 @@ static void *exp_seq_start(struct seq_file *s, loff_t *pos)
/* strange seq_file api calls stop even if we fail,
* thus we need to grab lock since stop unlocks */
READ_LOCK(&ip_conntrack_lock);
- READ_LOCK(&ip_conntrack_expect_tuple_lock);
if (list_empty(e))
return NULL;
@@ -227,7 +227,6 @@ static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
static void exp_seq_stop(struct seq_file *s, void *v)
{
- READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
READ_UNLOCK(&ip_conntrack_lock);
}
@@ -235,14 +234,13 @@ static int exp_seq_show(struct seq_file *s, void *v)
{
struct ip_conntrack_expect *expect = v;
- if (expect->expectant->helper->timeout)
+ if (expect->timeout.function)
seq_printf(s, "%lu ", timer_pending(&expect->timeout)
? (expect->timeout.expires - jiffies)/HZ : 0);
else
seq_printf(s, "- ");
- seq_printf(s, "use=%u proto=%u ", atomic_read(&expect->use),
- expect->tuple.dst.protonum);
+ seq_printf(s, "proto=%u ", expect->tuple.dst.protonum);
print_tuple(s, &expect->tuple,
ip_ct_find_proto(expect->tuple.dst.protonum));
@@ -364,8 +362,20 @@ static unsigned int ip_confirm(unsigned int hooknum,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
+ struct ip_conntrack *ct;
+ enum ip_conntrack_info ctinfo;
+
+ /* This is where we call the helper: as the packet goes out. */
+ ct = ip_conntrack_get(*pskb, &ctinfo);
+ if (ct && ct->helper) {
+ unsigned int ret;
+ ret = ct->helper->help(pskb, ct, ctinfo);
+ if (ret != NF_ACCEPT)
+ return ret;
+ }
+
/* We've seen it coming out the other side: confirm it */
- return ip_conntrack_confirm(*pskb);
+ return ip_conntrack_confirm(pskb);
}
static unsigned int ip_conntrack_defrag(unsigned int hooknum,
@@ -896,17 +906,13 @@ EXPORT_SYMBOL(ip_ct_iterate_cleanup);
EXPORT_SYMBOL(ip_ct_refresh_acct);
EXPORT_SYMBOL(ip_ct_protos);
EXPORT_SYMBOL(ip_ct_find_proto);
-EXPORT_SYMBOL(ip_ct_find_helper);
EXPORT_SYMBOL(ip_conntrack_expect_alloc);
+EXPORT_SYMBOL(ip_conntrack_expect_free);
EXPORT_SYMBOL(ip_conntrack_expect_related);
-EXPORT_SYMBOL(ip_conntrack_change_expect);
EXPORT_SYMBOL(ip_conntrack_unexpect_related);
-EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get);
-EXPORT_SYMBOL_GPL(ip_conntrack_expect_put);
EXPORT_SYMBOL(ip_conntrack_tuple_taken);
EXPORT_SYMBOL(ip_ct_gather_frags);
EXPORT_SYMBOL(ip_conntrack_htable_size);
-EXPORT_SYMBOL(ip_conntrack_expect_list);
EXPORT_SYMBOL(ip_conntrack_lock);
EXPORT_SYMBOL(ip_conntrack_hash);
EXPORT_SYMBOL(ip_conntrack_untracked);
diff --git a/net/ipv4/netfilter/ip_conntrack_tftp.c b/net/ipv4/netfilter/ip_conntrack_tftp.c
index 01a5e53e81bb..992fac3e36ee 100644
--- a/net/ipv4/netfilter/ip_conntrack_tftp.c
+++ b/net/ipv4/netfilter/ip_conntrack_tftp.c
@@ -38,15 +38,21 @@ MODULE_PARM_DESC(ports, "port numbers of tftp servers");
#define DEBUGP(format, args...)
#endif
-static int tftp_help(struct sk_buff *skb,
+unsigned int (*ip_nat_tftp_hook)(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ struct ip_conntrack_expect *exp);
+EXPORT_SYMBOL_GPL(ip_nat_tftp_hook);
+
+static int tftp_help(struct sk_buff **pskb,
struct ip_conntrack *ct,
enum ip_conntrack_info ctinfo)
{
struct tftphdr _tftph, *tfh;
struct ip_conntrack_expect *exp;
+ unsigned int ret = NF_ACCEPT;
- tfh = skb_header_pointer(skb,
- skb->nh.iph->ihl * 4 + sizeof(struct udphdr),
+ tfh = skb_header_pointer(*pskb,
+ (*pskb)->nh.iph->ihl*4+sizeof(struct udphdr),
sizeof(_tftph), &_tftph);
if (tfh == NULL)
return NF_ACCEPT;
@@ -61,19 +67,25 @@ static int tftp_help(struct sk_buff *skb,
exp = ip_conntrack_expect_alloc();
if (exp == NULL)
- return NF_ACCEPT;
+ return NF_DROP;
exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
exp->mask.src.ip = 0xffffffff;
exp->mask.dst.ip = 0xffffffff;
exp->mask.dst.u.udp.port = 0xffff;
- exp->mask.dst.protonum = 0xffff;
+ exp->mask.dst.protonum = 0xff;
exp->expectfn = NULL;
+ exp->master = ct;
DEBUGP("expect: ");
DUMP_TUPLE(&exp->tuple);
DUMP_TUPLE(&exp->mask);
- ip_conntrack_expect_related(exp, ct);
+ if (ip_nat_tftp_hook)
+ ret = ip_nat_tftp_hook(pskb, ctinfo, exp);
+ else if (ip_conntrack_expect_related(exp) != 0) {
+ ip_conntrack_expect_free(exp);
+ ret = NF_DROP;
+ }
break;
case TFTP_OPCODE_DATA:
case TFTP_OPCODE_ACK:
@@ -116,11 +128,10 @@ static int __init init(void)
tftp[i].tuple.dst.protonum = IPPROTO_UDP;
tftp[i].tuple.src.u.udp.port = htons(ports[i]);
- tftp[i].mask.dst.protonum = 0xFFFF;
+ tftp[i].mask.dst.protonum = 0xFF;
tftp[i].mask.src.u.udp.port = 0xFFFF;
tftp[i].max_expected = 1;
- tftp[i].timeout = 0;
- tftp[i].flags = IP_CT_HELPER_F_REUSE_EXPECT;
+ tftp[i].timeout = 5 * 60; /* 5 minutes */
tftp[i].me = THIS_MODULE;
tftp[i].help = tftp_help;
@@ -144,7 +155,5 @@ static int __init init(void)
return(0);
}
-PROVIDES_CONNTRACK(tftp);
-
module_init(init);
module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_nat_amanda.c b/net/ipv4/netfilter/ip_nat_amanda.c
index 144e32f3582d..da1f412583ed 100644
--- a/net/ipv4/netfilter/ip_nat_amanda.c
+++ b/net/ipv4/netfilter/ip_nat_amanda.c
@@ -31,118 +31,58 @@ MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
MODULE_DESCRIPTION("Amanda NAT helper");
MODULE_LICENSE("GPL");
-static unsigned int
-amanda_nat_expected(struct sk_buff **pskb,
- unsigned int hooknum,
- struct ip_conntrack *ct,
- struct ip_nat_info *info)
+static unsigned int help(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct ip_conntrack_expect *exp)
{
- struct ip_conntrack *master = master_ct(ct);
- struct ip_ct_amanda_expect *exp_amanda_info;
- struct ip_nat_range range;
- u_int32_t newip;
-
- IP_NF_ASSERT(info);
- IP_NF_ASSERT(master);
- IP_NF_ASSERT(!(info->initialized & (1 << HOOK2MANIP(hooknum))));
+ char buffer[sizeof("65535")];
+ u_int16_t port;
+ unsigned int ret;
- if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC)
- newip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
- else
- newip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
+ /* Connection comes from client. */
+ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+ exp->dir = IP_CT_DIR_ORIGINAL;
- /* We don't want to manip the per-protocol, just the IPs. */
- range.flags = IP_NAT_RANGE_MAP_IPS;
- range.min_ip = range.max_ip = newip;
+ /* When you see the packet, we need to NAT it the same as the
+ * this one (ie. same IP: it will be TCP and master is UDP). */
+ exp->expectfn = ip_nat_follow_master;
- if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_DST) {
- exp_amanda_info = &ct->master->help.exp_amanda_info;
- range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
- range.min = range.max
- = ((union ip_conntrack_manip_proto)
- { .udp = { htons(exp_amanda_info->port) } });
+ /* Try to get same port: if not, try to change it. */
+ for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
+ exp->tuple.dst.u.tcp.port = htons(port);
+ if (ip_conntrack_expect_related(exp) == 0)
+ break;
}
- return ip_nat_setup_info(ct, &range, hooknum);
-}
-
-static int amanda_data_fixup(struct ip_conntrack *ct,
- struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- struct ip_conntrack_expect *exp)
-{
- struct ip_ct_amanda_expect *exp_amanda_info;
- struct ip_conntrack_tuple t = exp->tuple;
- char buffer[sizeof("65535")];
- u_int16_t port;
-
- /* Alter conntrack's expectations. */
- exp_amanda_info = &exp->help.exp_amanda_info;
- t.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
- for (port = exp_amanda_info->port; port != 0; port++) {
- t.dst.u.tcp.port = htons(port);
- if (ip_conntrack_change_expect(exp, &t) == 0)
- break;
+ if (port == 0) {
+ ip_conntrack_expect_free(exp);
+ return NF_DROP;
}
- if (port == 0)
- return 0;
sprintf(buffer, "%u", port);
- return ip_nat_mangle_udp_packet(pskb, ct, ctinfo,
- exp_amanda_info->offset,
- exp_amanda_info->len,
- buffer, strlen(buffer));
-}
-
-static unsigned int help(struct ip_conntrack *ct,
- struct ip_conntrack_expect *exp,
- struct ip_nat_info *info,
- enum ip_conntrack_info ctinfo,
- unsigned int hooknum,
- struct sk_buff **pskb)
-{
- int dir = CTINFO2DIR(ctinfo);
- int ret = NF_ACCEPT;
-
- /* Only mangle things once: original direction in POST_ROUTING
- and reply direction on PRE_ROUTING. */
- if (!((hooknum == NF_IP_POST_ROUTING && dir == IP_CT_DIR_ORIGINAL)
- || (hooknum == NF_IP_PRE_ROUTING && dir == IP_CT_DIR_REPLY)))
- return NF_ACCEPT;
-
- /* if this exectation has a "offset" the packet needs to be mangled */
- if (exp->help.exp_amanda_info.offset != 0)
- if (!amanda_data_fixup(ct, pskb, ctinfo, exp))
- ret = NF_DROP;
- exp->help.exp_amanda_info.offset = 0;
-
+ ret = ip_nat_mangle_udp_packet(pskb, exp->master, ctinfo,
+ matchoff, matchlen,
+ buffer, strlen(buffer));
+ if (ret != NF_ACCEPT)
+ ip_conntrack_unexpect_related(exp);
return ret;
}
-static struct ip_nat_helper ip_nat_amanda_helper;
-
static void __exit fini(void)
{
- ip_nat_helper_unregister(&ip_nat_amanda_helper);
+ ip_nat_amanda_hook = NULL;
+ /* Make sure noone calls it, meanwhile. */
+ synchronize_net();
}
static int __init init(void)
{
- struct ip_nat_helper *hlpr = &ip_nat_amanda_helper;
-
- hlpr->tuple.dst.protonum = IPPROTO_UDP;
- hlpr->tuple.src.u.udp.port = htons(10080);
- hlpr->mask.src.u.udp.port = 0xFFFF;
- hlpr->mask.dst.protonum = 0xFFFF;
- hlpr->help = help;
- hlpr->flags = 0;
- hlpr->me = THIS_MODULE;
- hlpr->expect = amanda_nat_expected;
- hlpr->name = "amanda";
-
- return ip_nat_helper_register(hlpr);
+ BUG_ON(ip_nat_amanda_hook);
+ ip_nat_amanda_hook = help;
+ return 0;
}
-NEEDS_CONNTRACK(amanda);
module_init(init);
module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index 84c1e0e2cda6..96b24c024f9c 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -42,7 +42,6 @@
#endif
DECLARE_RWLOCK(ip_nat_lock);
-DECLARE_RWLOCK_EXTERN(ip_conntrack_lock);
/* Calculated at init based on memory size */
static unsigned int ip_nat_htable_size;
@@ -51,42 +50,23 @@ static struct list_head *bysource;
struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
-/* We keep extra hashes for each conntrack, for fast searching. */
-static inline size_t
-hash_by_ipsproto(u_int32_t src, u_int32_t dst, u_int16_t proto)
-{
- /* Modified src and dst, to ensure we don't create two
- identical streams. */
- return (src + dst + proto) % ip_nat_htable_size;
-}
-
-static inline size_t
-hash_by_src(const struct ip_conntrack_manip *manip, u_int16_t proto)
+/* We keep an extra hash for each conntrack, for fast searching. */
+static inline unsigned int
+hash_by_src(const struct ip_conntrack_tuple *tuple)
{
/* Original src, to ensure we map it consistently if poss. */
- return (manip->ip + manip->u.all + proto) % ip_nat_htable_size;
+ return jhash_3words(tuple->src.ip, tuple->src.u.all,
+ tuple->dst.protonum, 0) % ip_nat_htable_size;
}
/* Noone using conntrack by the time this called. */
static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn)
{
- struct ip_nat_info *info = &conn->nat.info;
- unsigned int hs, hp;
-
- if (!info->initialized)
+ if (!(conn->status & IPS_NAT_DONE_MASK))
return;
- hs = hash_by_src(&conn->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src,
- conn->tuplehash[IP_CT_DIR_ORIGINAL]
- .tuple.dst.protonum);
-
- hp = hash_by_ipsproto(conn->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip,
- conn->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip,
- conn->tuplehash[IP_CT_DIR_REPLY]
- .tuple.dst.protonum);
-
WRITE_LOCK(&ip_nat_lock);
- list_del(&info->bysource);
+ list_del(&conn->nat.info.bysource);
WRITE_UNLOCK(&ip_nat_lock);
}
@@ -117,25 +97,6 @@ ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
return ip_conntrack_tuple_taken(&reply, ignored_conntrack);
}
-/* Before 2.6.11 we did implicit source NAT if required. Warn about change. */
-static void warn_if_extra_mangle(u32 dstip, u32 srcip)
-{
- static int warned = 0;
- struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } };
- struct rtable *rt;
-
- if (ip_route_output_key(&rt, &fl) != 0)
- return;
-
- if (rt->rt_src != srcip && !warned) {
- printk("NAT: no longer support implicit source local NAT\n");
- printk("NAT: packet src %u.%u.%u.%u -> dst %u.%u.%u.%u\n",
- NIPQUAD(srcip), NIPQUAD(dstip));
- warned = 1;
- }
- ip_rt_put(rt);
-}
-
/* If we source map this tuple so reply looks like reply_tuple, will
* that meet the constraints of range. */
static int
@@ -178,11 +139,10 @@ find_appropriate_src(const struct ip_conntrack_tuple *tuple,
struct ip_conntrack_tuple *result,
const struct ip_nat_range *range)
{
- unsigned int h = hash_by_src(&tuple->src, tuple->dst.protonum);
+ unsigned int h = hash_by_src(tuple);
struct ip_conntrack *ct;
- MUST_BE_READ_LOCKED(&ip_nat_lock);
-
+ READ_LOCK(&ip_nat_lock);
list_for_each_entry(ct, &bysource[h], nat.info.bysource) {
if (same_src(ct, tuple)) {
/* Copy source part from reply tuple. */
@@ -190,10 +150,13 @@ find_appropriate_src(const struct ip_conntrack_tuple *tuple,
&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
result->dst = tuple->dst;
- if (in_range(result, range))
+ if (in_range(result, range)) {
+ READ_UNLOCK(&ip_nat_lock);
return 1;
+ }
}
}
+ READ_UNLOCK(&ip_nat_lock);
return 0;
}
@@ -207,7 +170,7 @@ static void
find_best_ips_proto(struct ip_conntrack_tuple *tuple,
const struct ip_nat_range *range,
const struct ip_conntrack *conntrack,
- unsigned int hooknum)
+ enum ip_nat_manip_type maniptype)
{
u_int32_t *var_ipp;
/* Host order */
@@ -217,7 +180,7 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple,
if (!(range->flags & IP_NAT_RANGE_MAP_IPS))
return;
- if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC)
+ if (maniptype == IP_NAT_MANIP_SRC)
var_ipp = &tuple->src.ip;
else
var_ipp = &tuple->dst.ip;
@@ -232,7 +195,7 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple,
* spread in practice (if there are a small number of IPs
* involved, there usually aren't that many connections
* anyway). The consistency means that servers see the same
- * client coming from the same IP (some Internet Backing sites
+ * client coming from the same IP (some Internet Banking sites
* like this), even across reboots. */
minip = ntohl(range->min_ip);
maxip = ntohl(range->max_ip);
@@ -251,7 +214,7 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple,
const struct ip_conntrack_tuple *orig_tuple,
const struct ip_nat_range *range,
struct ip_conntrack *conntrack,
- unsigned int hooknum)
+ enum ip_nat_manip_type maniptype)
{
struct ip_nat_protocol *proto
= ip_nat_find_proto(orig_tuple->dst.protonum);
@@ -263,7 +226,7 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple,
This is only required for source (ie. NAT/masq) mappings.
So far, we don't do local source mappings, so multiple
manips not an issue. */
- if (hooknum == NF_IP_POST_ROUTING) {
+ if (maniptype == IP_NAT_MANIP_SRC) {
if (find_appropriate_src(orig_tuple, tuple, range)) {
DEBUGP("get_unique_tuple: Found current src map\n");
if (!ip_nat_used_tuple(tuple, conntrack))
@@ -274,375 +237,172 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple,
/* 2) Select the least-used IP/proto combination in the given
range. */
*tuple = *orig_tuple;
- find_best_ips_proto(tuple, range, conntrack, hooknum);
-
- if (hooknum == NF_IP_LOCAL_OUT && tuple->dst.ip != orig_tuple->dst.ip)
- warn_if_extra_mangle(tuple->src.ip, tuple->dst.ip);
+ find_best_ips_proto(tuple, range, conntrack, maniptype);
/* 3) The per-protocol part of the manip is made to map into
the range to make a unique tuple. */
/* Only bother mapping if it's not already in range and unique */
if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
- || proto->in_range(tuple, HOOK2MANIP(hooknum),
- &range->min, &range->max))
+ || proto->in_range(tuple, maniptype, &range->min, &range->max))
&& !ip_nat_used_tuple(tuple, conntrack))
return;
/* Last change: get protocol to try to obtain unique tuple. */
- proto->unique_tuple(tuple, range, HOOK2MANIP(hooknum), conntrack);
+ proto->unique_tuple(tuple, range, maniptype, conntrack);
}
-/* Where to manip the reply packets (will be reverse manip). */
-static unsigned int opposite_hook[NF_IP_NUMHOOKS]
-= { [NF_IP_PRE_ROUTING] = NF_IP_POST_ROUTING,
- [NF_IP_POST_ROUTING] = NF_IP_PRE_ROUTING,
- [NF_IP_LOCAL_OUT] = NF_IP_LOCAL_IN,
- [NF_IP_LOCAL_IN] = NF_IP_LOCAL_OUT,
-};
-
unsigned int
ip_nat_setup_info(struct ip_conntrack *conntrack,
const struct ip_nat_range *range,
unsigned int hooknum)
{
- struct ip_conntrack_tuple new_tuple, inv_tuple, reply;
- struct ip_conntrack_tuple orig_tp;
+ struct ip_conntrack_tuple curr_tuple, new_tuple;
struct ip_nat_info *info = &conntrack->nat.info;
- int in_hashes = info->initialized;
+ int have_to_hash = !(conntrack->status & IPS_NAT_DONE_MASK);
+ enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
- MUST_BE_WRITE_LOCKED(&ip_nat_lock);
IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
|| hooknum == NF_IP_POST_ROUTING
|| hooknum == NF_IP_LOCAL_IN
|| hooknum == NF_IP_LOCAL_OUT);
- IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS);
- IP_NF_ASSERT(!(info->initialized & (1 << HOOK2MANIP(hooknum))));
+ BUG_ON(ip_nat_initialized(conntrack, maniptype));
/* What we've got will look like inverse of reply. Normally
this is what is in the conntrack, except for prior
manipulations (future optimization: if num_manips == 0,
orig_tp =
conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */
- invert_tuplepr(&orig_tp,
+ invert_tuplepr(&curr_tuple,
&conntrack->tuplehash[IP_CT_DIR_REPLY].tuple);
-#if 0
- {
- unsigned int i;
-
- DEBUGP("Hook %u (%s), ", hooknum,
- HOOK2MANIP(hooknum)==IP_NAT_MANIP_SRC ? "SRC" : "DST");
- DUMP_TUPLE(&orig_tp);
- DEBUGP("Range %p: ", mr);
- for (i = 0; i < mr->rangesize; i++) {
- DEBUGP("%u:%s%s%s %u.%u.%u.%u - %u.%u.%u.%u %u - %u\n",
- i,
- (mr->range[i].flags & IP_NAT_RANGE_MAP_IPS)
- ? " MAP_IPS" : "",
- (mr->range[i].flags
- & IP_NAT_RANGE_PROTO_SPECIFIED)
- ? " PROTO_SPECIFIED" : "",
- (mr->range[i].flags & IP_NAT_RANGE_FULL)
- ? " FULL" : "",
- NIPQUAD(mr->range[i].min_ip),
- NIPQUAD(mr->range[i].max_ip),
- mr->range[i].min.all,
- mr->range[i].max.all);
- }
- }
-#endif
-
- get_unique_tuple(&new_tuple, &orig_tp, range, conntrack, hooknum);
+ get_unique_tuple(&new_tuple, &curr_tuple, range, conntrack, maniptype);
- /* We now have two tuples (SRCIP/SRCPT/DSTIP/DSTPT):
- the original (A/B/C/D') and the mangled one (E/F/G/H').
+ if (!ip_ct_tuple_equal(&new_tuple, &curr_tuple)) {
+ struct ip_conntrack_tuple reply;
- We're only allowed to work with the SRC per-proto
- part, so we create inverses of both to start, then
- derive the other fields we need. */
+ /* Alter conntrack table so will recognize replies. */
+ invert_tuplepr(&reply, &new_tuple);
+ ip_conntrack_alter_reply(conntrack, &reply);
- /* Reply connection: simply invert the new tuple
- (G/H/E/F') */
- invert_tuplepr(&reply, &new_tuple);
-
- /* Alter conntrack table so will recognize replies. */
- ip_conntrack_alter_reply(conntrack, &reply);
-
- /* FIXME: We can simply used existing conntrack reply tuple
- here --RR */
- /* Create inverse of original: C/D/A/B' */
- invert_tuplepr(&inv_tuple, &orig_tp);
-
- /* Has source changed?. */
- if (!ip_ct_tuple_src_equal(&new_tuple, &orig_tp)) {
- IP_NF_ASSERT(HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC);
- IP_NF_ASSERT(ip_ct_tuple_dst_equal(&new_tuple, &orig_tp));
-
- /* In this direction, a source manip. */
- info->manips[info->num_manips++] =
- ((struct ip_nat_info_manip)
- { IP_CT_DIR_ORIGINAL, hooknum,
- IP_NAT_MANIP_SRC, new_tuple.src });
-
- IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS);
-
- /* In the reverse direction, a destination manip. */
- info->manips[info->num_manips++] =
- ((struct ip_nat_info_manip)
- { IP_CT_DIR_REPLY, opposite_hook[hooknum],
- IP_NAT_MANIP_DST, orig_tp.src });
- IP_NF_ASSERT(info->num_manips <= IP_NAT_MAX_MANIPS);
+ /* Non-atomic: we own this at the moment. */
+ if (maniptype == IP_NAT_MANIP_SRC)
+ conntrack->status |= IPS_SRC_NAT;
+ else
+ conntrack->status |= IPS_DST_NAT;
}
- /* Has destination changed? */
- if (!ip_ct_tuple_dst_equal(&new_tuple, &orig_tp)) {
- IP_NF_ASSERT(HOOK2MANIP(hooknum) == IP_NAT_MANIP_DST);
-
- /* In this direction, a destination manip */
- info->manips[info->num_manips++] =
- ((struct ip_nat_info_manip)
- { IP_CT_DIR_ORIGINAL, hooknum,
- IP_NAT_MANIP_DST, reply.src });
-
- IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS);
-
- /* In the reverse direction, a source manip. */
- info->manips[info->num_manips++] =
- ((struct ip_nat_info_manip)
- { IP_CT_DIR_REPLY, opposite_hook[hooknum],
- IP_NAT_MANIP_SRC, inv_tuple.src });
- IP_NF_ASSERT(info->num_manips <= IP_NAT_MAX_MANIPS);
+ /* Place in source hash if this is the first time. */
+ if (have_to_hash) {
+ unsigned int srchash
+ = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
+ .tuple);
+ WRITE_LOCK(&ip_nat_lock);
+ list_add(&info->bysource, &bysource[srchash]);
+ WRITE_UNLOCK(&ip_nat_lock);
}
- /* If there's a helper, assign it; based on new tuple. */
- if (!conntrack->master)
- info->helper = __ip_nat_find_helper(&reply);
-
/* It's done. */
- info->initialized |= (1 << HOOK2MANIP(hooknum));
-
- if (in_hashes)
- replace_in_hashes(conntrack, info);
+ if (maniptype == IP_NAT_MANIP_DST)
+ set_bit(IPS_DST_NAT_DONE_BIT, &conntrack->status);
else
- place_in_hashes(conntrack, info);
+ set_bit(IPS_SRC_NAT_DONE_BIT, &conntrack->status);
return NF_ACCEPT;
}
-void replace_in_hashes(struct ip_conntrack *conntrack,
- struct ip_nat_info *info)
-{
- /* Source has changed, so replace in hashes. */
- unsigned int srchash
- = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
- .tuple.src,
- conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
- .tuple.dst.protonum);
- MUST_BE_WRITE_LOCKED(&ip_nat_lock);
- list_move(&info->bysource, &bysource[srchash]);
-}
-
-void place_in_hashes(struct ip_conntrack *conntrack,
- struct ip_nat_info *info)
-{
- unsigned int srchash
- = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
- .tuple.src,
- conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
- .tuple.dst.protonum);
- MUST_BE_WRITE_LOCKED(&ip_nat_lock);
- list_add(&info->bysource, &bysource[srchash]);
-}
-
/* Returns true if succeeded. */
static int
manip_pkt(u_int16_t proto,
struct sk_buff **pskb,
unsigned int iphdroff,
- const struct ip_conntrack_manip *manip,
+ const struct ip_conntrack_tuple *target,
enum ip_nat_manip_type maniptype)
{
struct iphdr *iph;
(*pskb)->nfcache |= NFC_ALTERED;
- if (!skb_ip_make_writable(pskb, iphdroff+sizeof(*iph)))
+ if (!skb_ip_make_writable(pskb, iphdroff + sizeof(*iph)))
return 0;
iph = (void *)(*pskb)->data + iphdroff;
/* Manipulate protcol part. */
if (!ip_nat_find_proto(proto)->manip_pkt(pskb, iphdroff,
- manip, maniptype))
+ target, maniptype))
return 0;
iph = (void *)(*pskb)->data + iphdroff;
if (maniptype == IP_NAT_MANIP_SRC) {
- iph->check = ip_nat_cheat_check(~iph->saddr, manip->ip,
+ iph->check = ip_nat_cheat_check(~iph->saddr, target->src.ip,
iph->check);
- iph->saddr = manip->ip;
+ iph->saddr = target->src.ip;
} else {
- iph->check = ip_nat_cheat_check(~iph->daddr, manip->ip,
+ iph->check = ip_nat_cheat_check(~iph->daddr, target->dst.ip,
iph->check);
- iph->daddr = manip->ip;
+ iph->daddr = target->dst.ip;
}
return 1;
}
-static inline int exp_for_packet(struct ip_conntrack_expect *exp,
- struct sk_buff *skb)
+/* Do packet manipulations according to ip_nat_setup_info. */
+unsigned int nat_packet(struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int hooknum,
+ struct sk_buff **pskb)
{
- struct ip_conntrack_protocol *proto;
- int ret = 1;
-
- MUST_BE_READ_LOCKED(&ip_conntrack_lock);
- proto = ip_ct_find_proto(skb->nh.iph->protocol);
- if (proto->exp_matches_pkt)
- ret = proto->exp_matches_pkt(exp, skb);
-
- return ret;
-}
-
-/* Do packet manipulations according to binding. */
-unsigned int
-do_bindings(struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- struct ip_nat_info *info,
- unsigned int hooknum,
- struct sk_buff **pskb)
-{
- unsigned int i;
- struct ip_nat_helper *helper;
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- int proto = (*pskb)->nh.iph->protocol;
-
- /* Need nat lock to protect against modification, but neither
- conntrack (referenced) and helper (deleted with
- synchronize_bh()) can vanish. */
- READ_LOCK(&ip_nat_lock);
- for (i = 0; i < info->num_manips; i++) {
- if (info->manips[i].direction == dir
- && info->manips[i].hooknum == hooknum) {
- DEBUGP("Mangling %p: %s to %u.%u.%u.%u %u\n",
- *pskb,
- info->manips[i].maniptype == IP_NAT_MANIP_SRC
- ? "SRC" : "DST",
- NIPQUAD(info->manips[i].manip.ip),
- htons(info->manips[i].manip.u.all));
- if (!manip_pkt(proto, pskb, 0,
- &info->manips[i].manip,
- info->manips[i].maniptype)) {
- READ_UNLOCK(&ip_nat_lock);
- return NF_DROP;
- }
- }
+ unsigned long statusbit;
+ enum ip_nat_manip_type mtype = HOOK2MANIP(hooknum);
+
+ if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)
+ && (hooknum == NF_IP_POST_ROUTING || hooknum == NF_IP_LOCAL_IN)) {
+ DEBUGP("ip_nat_core: adjusting sequence number\n");
+ /* future: put this in a l4-proto specific function,
+ * and call this function here. */
+ if (!ip_nat_seq_adjust(pskb, ct, ctinfo))
+ return NF_DROP;
}
- helper = info->helper;
- READ_UNLOCK(&ip_nat_lock);
- if (helper) {
- struct ip_conntrack_expect *exp = NULL;
- struct list_head *cur_item;
- int ret = NF_ACCEPT;
- int helper_called = 0;
-
- DEBUGP("do_bindings: helper existing for (%p)\n", ct);
-
- /* Always defragged for helpers */
- IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off
- & htons(IP_MF|IP_OFFSET)));
-
- /* Have to grab read lock before sibling_list traversal */
- READ_LOCK(&ip_conntrack_lock);
- list_for_each_prev(cur_item, &ct->sibling_list) {
- exp = list_entry(cur_item, struct ip_conntrack_expect,
- expected_list);
-
- /* if this expectation is already established, skip */
- if (exp->sibling)
- continue;
-
- if (exp_for_packet(exp, *pskb)) {
- /* FIXME: May be true multiple times in the
- * case of UDP!! */
- DEBUGP("calling nat helper (exp=%p) for packet\n", exp);
- ret = helper->help(ct, exp, info, ctinfo,
- hooknum, pskb);
- if (ret != NF_ACCEPT) {
- READ_UNLOCK(&ip_conntrack_lock);
- return ret;
- }
- helper_called = 1;
- }
- }
- /* Helper might want to manip the packet even when there is no
- * matching expectation for this packet */
- if (!helper_called && helper->flags & IP_NAT_HELPER_F_ALWAYS) {
- DEBUGP("calling nat helper for packet without expectation\n");
- ret = helper->help(ct, NULL, info, ctinfo,
- hooknum, pskb);
- if (ret != NF_ACCEPT) {
- READ_UNLOCK(&ip_conntrack_lock);
- return ret;
- }
- }
- READ_UNLOCK(&ip_conntrack_lock);
-
- /* Adjust sequence number only once per packet
- * (helper is called at all hooks) */
- if (proto == IPPROTO_TCP
- && (hooknum == NF_IP_POST_ROUTING
- || hooknum == NF_IP_LOCAL_IN)) {
- DEBUGP("ip_nat_core: adjusting sequence number\n");
- /* future: put this in a l4-proto specific function,
- * and call this function here. */
- if (!ip_nat_seq_adjust(pskb, ct, ctinfo))
- ret = NF_DROP;
- }
+ if (mtype == IP_NAT_MANIP_SRC)
+ statusbit = IPS_SRC_NAT;
+ else
+ statusbit = IPS_DST_NAT;
- return ret;
+ /* Invert if this is reply dir. */
+ if (dir == IP_CT_DIR_REPLY)
+ statusbit ^= IPS_NAT_MASK;
- } else
- return NF_ACCEPT;
+ /* Non-atomic: these bits don't change. */
+ if (ct->status & statusbit) {
+ struct ip_conntrack_tuple target;
- /* not reached */
-}
+ /* We are aiming to look like inverse of other direction. */
+ invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
-static inline int tuple_src_equal_dst(const struct ip_conntrack_tuple *t1,
- const struct ip_conntrack_tuple *t2)
-{
- if (t1->dst.protonum != t2->dst.protonum || t1->src.ip != t2->dst.ip)
- return 0;
- if (t1->dst.protonum != IPPROTO_ICMP)
- return t1->src.u.all == t2->dst.u.all;
- else {
- struct ip_conntrack_tuple inv;
-
- /* ICMP tuples are asymetric */
- invert_tuplepr(&inv, t1);
- return inv.src.u.all == t2->src.u.all &&
- inv.dst.u.all == t2->dst.u.all;
+ if (!manip_pkt(target.dst.protonum, pskb, 0, &target, mtype))
+ return NF_DROP;
}
+ return NF_ACCEPT;
}
-int
-icmp_reply_translation(struct sk_buff **pskb,
- struct ip_conntrack *conntrack,
- unsigned int hooknum,
- int dir)
+/* Dir is direction ICMP is coming from (opposite to packet it contains) */
+int icmp_reply_translation(struct sk_buff **pskb,
+ struct ip_conntrack *ct,
+ enum ip_nat_manip_type manip,
+ enum ip_conntrack_dir dir)
{
struct {
struct icmphdr icmp;
struct iphdr ip;
} *inside;
- unsigned int i;
- struct ip_nat_info *info = &conntrack->nat.info;
- struct ip_conntrack_tuple *cttuple, innertuple;
- int hdrlen;
+ struct ip_conntrack_tuple inner, target;
+ int hdrlen = (*pskb)->nh.iph->ihl * 4;
- if (!skb_ip_make_writable(pskb,(*pskb)->nh.iph->ihl*4+sizeof(*inside)))
+ if (!skb_ip_make_writable(pskb, hdrlen + sizeof(*inside)))
return 0;
+
inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
/* We're actually going to mangle it beyond trivial checksum
@@ -662,93 +422,53 @@ icmp_reply_translation(struct sk_buff **pskb,
start talking to each other without our translation, and be
confused... --RR */
if (inside->icmp.type == ICMP_REDIRECT) {
- /* Don't care about races here. */
- if (info->initialized
- != ((1 << IP_NAT_MANIP_SRC) | (1 << IP_NAT_MANIP_DST))
- || info->num_manips != 0)
+ /* If NAT isn't finished, assume it and drop. */
+ if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
+ return 0;
+
+ if (ct->status & IPS_NAT_MASK)
return 0;
}
- DEBUGP("icmp_reply_translation: translating error %p hook %u dir %s\n",
- *pskb, hooknum, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
- /* Note: May not be from a NAT'd host, but probably safest to
- do translation always as if it came from the host itself
- (even though a "host unreachable" coming from the host
- itself is a bit weird).
-
- More explanation: some people use NAT for anonymizing.
- Also, CERT recommends dropping all packets from private IP
- addresses (although ICMP errors from internal links with
- such addresses are not too uncommon, as Alan Cox points
- out) */
+ DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n",
+ *pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
if (!ip_ct_get_tuple(&inside->ip, *pskb, (*pskb)->nh.iph->ihl*4 +
sizeof(struct icmphdr) + inside->ip.ihl*4,
- &innertuple,
- ip_ct_find_proto(inside->ip.protocol)))
+ &inner, ip_ct_find_proto(inside->ip.protocol)))
return 0;
- cttuple = &conntrack->tuplehash[dir].tuple;
- READ_LOCK(&ip_nat_lock);
- for (i = 0; i < info->num_manips; i++) {
- DEBUGP("icmp_reply: manip %u dir %s hook %u\n",
- i, info->manips[i].direction == IP_CT_DIR_ORIGINAL ?
- "ORIG" : "REPLY", info->manips[i].hooknum);
-
- if (info->manips[i].direction != dir)
- continue;
-
- /* Mapping the inner packet is just like a normal packet, except
- * it was never src/dst reversed, so where we would normally
- * apply a dst manip, we apply a src, and vice versa. */
-
- /* Only true for forwarded packets, locally generated packets
- * never hit PRE_ROUTING, we need to apply their PRE_ROUTING
- * manips in LOCAL_OUT. */
- if (hooknum == NF_IP_LOCAL_OUT &&
- info->manips[i].hooknum == NF_IP_PRE_ROUTING)
- hooknum = info->manips[i].hooknum;
-
- if (info->manips[i].hooknum != hooknum)
- continue;
-
- /* ICMP errors may be generated locally for packets that
- * don't have all NAT manips applied yet. Verify manips
- * have been applied before reversing them */
- if (info->manips[i].maniptype == IP_NAT_MANIP_SRC) {
- if (!tuple_src_equal_dst(cttuple, &innertuple))
- continue;
- } else {
- if (!tuple_src_equal_dst(&innertuple, cttuple))
- continue;
- }
+ /* Change inner back to look like incoming packet. We do the
+ opposite manip on this hook to normal, because it might not
+ pass all hooks (locally-generated ICMP). Consider incoming
+ packet: PREROUTING (DST manip), routing produces ICMP, goes
+ through POSTROUTING (which must correct the DST manip). */
+ if (!manip_pkt(inside->ip.protocol, pskb,
+ (*pskb)->nh.iph->ihl*4
+ + sizeof(inside->icmp),
+ &ct->tuplehash[!dir].tuple,
+ !manip))
+ return 0;
- DEBUGP("icmp_reply: inner %s -> %u.%u.%u.%u %u\n",
- info->manips[i].maniptype == IP_NAT_MANIP_SRC
- ? "DST" : "SRC", NIPQUAD(info->manips[i].manip.ip),
- ntohs(info->manips[i].manip.u.udp.port));
- if (!manip_pkt(inside->ip.protocol, pskb,
- (*pskb)->nh.iph->ihl*4 + sizeof(inside->icmp),
- &info->manips[i].manip,
- !info->manips[i].maniptype))
- goto unlock_fail;
-
- /* Outer packet needs to have IP header NATed like
- it's a reply. */
-
- /* Use mapping to map outer packet: 0 give no
- per-proto mapping */
- DEBUGP("icmp_reply: outer %s -> %u.%u.%u.%u\n",
- info->manips[i].maniptype == IP_NAT_MANIP_SRC
- ? "SRC" : "DST", NIPQUAD(info->manips[i].manip.ip));
- if (!manip_pkt(0, pskb, 0, &info->manips[i].manip,
- info->manips[i].maniptype))
- goto unlock_fail;
- }
- READ_UNLOCK(&ip_nat_lock);
+ /* Change outer to look the reply to an incoming packet
+ * (proto 0 means don't invert per-proto part). */
- hdrlen = (*pskb)->nh.iph->ihl * 4;
+ /* Obviously, we need to NAT destination IP, but source IP
+ should be NAT'ed only if it is from a NAT'd host.
+ Explanation: some people use NAT for anonymizing. Also,
+ CERT recommends dropping all packets from private IP
+ addresses (although ICMP errors from internal links with
+ such addresses are not too uncommon, as Alan Cox points
+ out) */
+ if (manip != IP_NAT_MANIP_SRC
+ || ((*pskb)->nh.iph->saddr == ct->tuplehash[dir].tuple.src.ip)) {
+ invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
+ if (!manip_pkt(0, pskb, 0, &target, manip))
+ return 0;
+ }
+
+ /* Reloading "inside" here since manip_pkt inner. */
inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
inside->icmp.checksum = 0;
@@ -756,10 +476,33 @@ icmp_reply_translation(struct sk_buff **pskb,
(*pskb)->len - hdrlen,
0));
return 1;
+}
- unlock_fail:
- READ_UNLOCK(&ip_nat_lock);
- return 0;
+/* Protocol registration. */
+int ip_nat_protocol_register(struct ip_nat_protocol *proto)
+{
+ int ret = 0;
+
+ WRITE_LOCK(&ip_nat_lock);
+ if (ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) {
+ ret = -EBUSY;
+ goto out;
+ }
+ ip_nat_protos[proto->protonum] = proto;
+ out:
+ WRITE_UNLOCK(&ip_nat_lock);
+ return ret;
+}
+
+/* Noone stores the protocol anywhere; simply delete it. */
+void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
+{
+ WRITE_LOCK(&ip_nat_lock);
+ ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol;
+ WRITE_UNLOCK(&ip_nat_lock);
+
+ /* Someone could be still looking at the proto in a bh. */
+ synchronize_net();
}
int __init ip_nat_init(void)
@@ -790,11 +533,9 @@ int __init ip_nat_init(void)
/* FIXME: Man, this is a hack. <SIGH> */
IP_NF_ASSERT(ip_conntrack_destroyed == NULL);
ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;
-
- /* Initialize fake conntrack so that NAT will skip it */
- ip_conntrack_untracked.nat.info.initialized |=
- (1 << IP_NAT_MANIP_SRC) | (1 << IP_NAT_MANIP_DST);
+ /* Initialize fake conntrack so that NAT will skip it */
+ ip_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
return 0;
}
diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c
index b488b5e1fca2..e4799f2da77a 100644
--- a/net/ipv4/netfilter/ip_nat_ftp.c
+++ b/net/ipv4/netfilter/ip_nat_ftp.c
@@ -30,71 +30,8 @@ MODULE_DESCRIPTION("ftp NAT helper");
#define DEBUGP(format, args...)
#endif
-#define MAX_PORTS 8
-static int ports[MAX_PORTS];
-static int ports_c;
-
-module_param_array(ports, int, &ports_c, 0400);
-
/* FIXME: Time out? --RR */
-static unsigned int
-ftp_nat_expected(struct sk_buff **pskb,
- unsigned int hooknum,
- struct ip_conntrack *ct,
- struct ip_nat_info *info)
-{
- struct ip_nat_range range;
- u_int32_t newdstip, newsrcip, newip;
- struct ip_ct_ftp_expect *exp_ftp_info;
-
- struct ip_conntrack *master = master_ct(ct);
-
- IP_NF_ASSERT(info);
- IP_NF_ASSERT(master);
-
- IP_NF_ASSERT(!(info->initialized & (1<<HOOK2MANIP(hooknum))));
-
- DEBUGP("nat_expected: We have a connection!\n");
- exp_ftp_info = &ct->master->help.exp_ftp_info;
-
- if (exp_ftp_info->ftptype == IP_CT_FTP_PORT
- || exp_ftp_info->ftptype == IP_CT_FTP_EPRT) {
- /* PORT command: make connection go to the client. */
- newdstip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
- newsrcip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
- DEBUGP("nat_expected: PORT cmd. %u.%u.%u.%u->%u.%u.%u.%u\n",
- NIPQUAD(newsrcip), NIPQUAD(newdstip));
- } else {
- /* PASV command: make the connection go to the server */
- newdstip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
- newsrcip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
- DEBUGP("nat_expected: PASV cmd. %u.%u.%u.%u->%u.%u.%u.%u\n",
- NIPQUAD(newsrcip), NIPQUAD(newdstip));
- }
-
- if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC)
- newip = newsrcip;
- else
- newip = newdstip;
-
- DEBUGP("nat_expected: IP to %u.%u.%u.%u\n", NIPQUAD(newip));
-
- /* We don't want to manip the per-protocol, just the IPs... */
- range.flags = IP_NAT_RANGE_MAP_IPS;
- range.min_ip = range.max_ip = newip;
-
- /* ... unless we're doing a MANIP_DST, in which case, make
- sure we map to the correct port */
- if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_DST) {
- range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
- range.min = range.max
- = ((union ip_conntrack_manip_proto)
- { .tcp = { htons(exp_ftp_info->port) } });
- }
- return ip_nat_setup_info(ct, &range, hooknum);
-}
-
static int
mangle_rfc959_packet(struct sk_buff **pskb,
u_int32_t newip,
@@ -102,7 +39,8 @@ mangle_rfc959_packet(struct sk_buff **pskb,
unsigned int matchoff,
unsigned int matchlen,
struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
+ enum ip_conntrack_info ctinfo,
+ u32 *seq)
{
char buffer[sizeof("nnn,nnn,nnn,nnn,nnn,nnn")];
@@ -111,6 +49,7 @@ mangle_rfc959_packet(struct sk_buff **pskb,
DEBUGP("calling ip_nat_mangle_tcp_packet\n");
+ *seq += strlen(buffer) - matchlen;
return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
matchlen, buffer, strlen(buffer));
}
@@ -123,7 +62,8 @@ mangle_eprt_packet(struct sk_buff **pskb,
unsigned int matchoff,
unsigned int matchlen,
struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
+ enum ip_conntrack_info ctinfo,
+ u32 *seq)
{
char buffer[sizeof("|1|255.255.255.255|65535|")];
@@ -131,6 +71,7 @@ mangle_eprt_packet(struct sk_buff **pskb,
DEBUGP("calling ip_nat_mangle_tcp_packet\n");
+ *seq += strlen(buffer) - matchlen;
return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
matchlen, buffer, strlen(buffer));
}
@@ -143,7 +84,8 @@ mangle_epsv_packet(struct sk_buff **pskb,
unsigned int matchoff,
unsigned int matchlen,
struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
+ enum ip_conntrack_info ctinfo,
+ u32 *seq)
{
char buffer[sizeof("|||65535|")];
@@ -151,6 +93,7 @@ mangle_epsv_packet(struct sk_buff **pskb,
DEBUGP("calling ip_nat_mangle_tcp_packet\n");
+ *seq += strlen(buffer) - matchlen;
return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
matchlen, buffer, strlen(buffer));
}
@@ -159,181 +102,73 @@ static int (*mangle[])(struct sk_buff **, u_int32_t, u_int16_t,
unsigned int,
unsigned int,
struct ip_conntrack *,
- enum ip_conntrack_info)
+ enum ip_conntrack_info,
+ u32 *seq)
= { [IP_CT_FTP_PORT] = mangle_rfc959_packet,
[IP_CT_FTP_PASV] = mangle_rfc959_packet,
[IP_CT_FTP_EPRT] = mangle_eprt_packet,
[IP_CT_FTP_EPSV] = mangle_epsv_packet
};
-static int ftp_data_fixup(const struct ip_ct_ftp_expect *exp_ftp_info,
- struct ip_conntrack *ct,
- struct sk_buff **pskb,
- u32 tcp_seq,
- enum ip_conntrack_info ctinfo,
- struct ip_conntrack_expect *expect)
+/* So, this packet has hit the connection tracking matching code.
+ Mangle it, and change the expectation to match the new version. */
+static unsigned int ip_nat_ftp(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ enum ip_ct_ftp_type type,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct ip_conntrack_expect *exp,
+ u32 *seq)
{
u_int32_t newip;
u_int16_t port;
- struct ip_conntrack_tuple newtuple;
+ int dir = CTINFO2DIR(ctinfo);
+ struct ip_conntrack *ct = exp->master;
- DEBUGP("FTP_NAT: seq %u + %u in %u\n",
- expect->seq, exp_ftp_info->len, tcp_seq);
+ DEBUGP("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen);
- /* Change address inside packet to match way we're mapping
- this connection. */
- if (exp_ftp_info->ftptype == IP_CT_FTP_PASV
- || exp_ftp_info->ftptype == IP_CT_FTP_EPSV) {
- /* PASV/EPSV response: must be where client thinks server
- is */
- newip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
- /* Expect something from client->server */
- newtuple.src.ip =
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
- newtuple.dst.ip =
- ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
- } else {
- /* PORT command: must be where server thinks client is */
- newip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
- /* Expect something from server->client */
- newtuple.src.ip =
- ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
- newtuple.dst.ip =
- ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
- }
- newtuple.dst.protonum = IPPROTO_TCP;
- newtuple.src.u.tcp.port = expect->tuple.src.u.tcp.port;
+ /* Connection will come from wherever this packet goes, hence !dir */
+ newip = ct->tuplehash[!dir].tuple.dst.ip;
+ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+ exp->dir = !dir;
- /* Try to get same port: if not, try to change it. */
- for (port = exp_ftp_info->port; port != 0; port++) {
- newtuple.dst.u.tcp.port = htons(port);
+ /* When you see the packet, we need to NAT it the same as the
+ * this one. */
+ exp->expectfn = ip_nat_follow_master;
- if (ip_conntrack_change_expect(expect, &newtuple) == 0)
+ /* Try to get same port: if not, try to change it. */
+ for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
+ exp->tuple.dst.u.tcp.port = htons(port);
+ if (ip_conntrack_expect_related(exp) == 0)
break;
}
- if (port == 0)
- return 0;
-
- if (!mangle[exp_ftp_info->ftptype](pskb, newip, port,
- expect->seq - tcp_seq,
- exp_ftp_info->len, ct, ctinfo))
- return 0;
- return 1;
-}
-
-static unsigned int help(struct ip_conntrack *ct,
- struct ip_conntrack_expect *exp,
- struct ip_nat_info *info,
- enum ip_conntrack_info ctinfo,
- unsigned int hooknum,
- struct sk_buff **pskb)
-{
- struct iphdr *iph = (*pskb)->nh.iph;
- struct tcphdr _tcph, *tcph;
- unsigned int datalen;
- int dir;
- struct ip_ct_ftp_expect *exp_ftp_info;
-
- if (!exp)
- DEBUGP("ip_nat_ftp: no exp!!");
-
- exp_ftp_info = &exp->help.exp_ftp_info;
-
- /* Only mangle things once: original direction in POST_ROUTING
- and reply direction on PRE_ROUTING. */
- dir = CTINFO2DIR(ctinfo);
- if (!((hooknum == NF_IP_POST_ROUTING && dir == IP_CT_DIR_ORIGINAL)
- || (hooknum == NF_IP_PRE_ROUTING && dir == IP_CT_DIR_REPLY))) {
- DEBUGP("nat_ftp: Not touching dir %s at hook %s\n",
- dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY",
- hooknum == NF_IP_POST_ROUTING ? "POSTROUTING"
- : hooknum == NF_IP_PRE_ROUTING ? "PREROUTING"
- : hooknum == NF_IP_LOCAL_OUT ? "OUTPUT" : "???");
- return NF_ACCEPT;
+ if (port == 0) {
+ ip_conntrack_expect_free(exp);
+ return NF_DROP;
}
- /* We passed tcp tracking, plus ftp helper: this must succeed. */
- tcph = skb_header_pointer(*pskb, iph->ihl * 4, sizeof(_tcph), &_tcph);
- BUG_ON(!tcph);
-
- datalen = (*pskb)->len - iph->ihl * 4 - tcph->doff * 4;
- /* If it's in the right range... */
- if (between(exp->seq + exp_ftp_info->len,
- ntohl(tcph->seq),
- ntohl(tcph->seq) + datalen)) {
- if (!ftp_data_fixup(exp_ftp_info, ct, pskb, ntohl(tcph->seq),
- ctinfo, exp))
- return NF_DROP;
- } else {
- /* Half a match? This means a partial retransmisison.
- It's a cracker being funky. */
- if (net_ratelimit()) {
- printk("FTP_NAT: partial packet %u/%u in %u/%u\n",
- exp->seq, exp_ftp_info->len,
- ntohl(tcph->seq),
- ntohl(tcph->seq) + datalen);
- }
+ if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo,
+ seq)) {
+ ip_conntrack_unexpect_related(exp);
return NF_DROP;
}
return NF_ACCEPT;
}
-static struct ip_nat_helper ftp[MAX_PORTS];
-static char ftp_names[MAX_PORTS][10];
-
-/* Not __exit: called from init() */
-static void fini(void)
+static void __exit fini(void)
{
- int i;
-
- for (i = 0; i < ports_c; i++) {
- DEBUGP("ip_nat_ftp: unregistering port %d\n", ports[i]);
- ip_nat_helper_unregister(&ftp[i]);
- }
+ ip_nat_ftp_hook = NULL;
+ /* Make sure noone calls it, meanwhile. */
+ synchronize_net();
}
static int __init init(void)
{
- int i, ret = 0;
- char *tmpname;
-
- if (ports_c == 0)
- ports[ports_c++] = FTP_PORT;
-
- for (i = 0; i < ports_c; i++) {
- ftp[i].tuple.dst.protonum = IPPROTO_TCP;
- ftp[i].tuple.src.u.tcp.port = htons(ports[i]);
- ftp[i].mask.dst.protonum = 0xFFFF;
- ftp[i].mask.src.u.tcp.port = 0xFFFF;
- ftp[i].help = help;
- ftp[i].me = THIS_MODULE;
- ftp[i].flags = 0;
- ftp[i].expect = ftp_nat_expected;
-
- tmpname = &ftp_names[i][0];
- if (ports[i] == FTP_PORT)
- sprintf(tmpname, "ftp");
- else
- sprintf(tmpname, "ftp-%d", i);
- ftp[i].name = tmpname;
-
- DEBUGP("ip_nat_ftp: Trying to register for port %d\n",
- ports[i]);
- ret = ip_nat_helper_register(&ftp[i]);
-
- if (ret) {
- printk("ip_nat_ftp: error registering "
- "helper for port %d\n", ports[i]);
- fini();
- return ret;
- }
- }
-
- return ret;
+ BUG_ON(ip_nat_ftp_hook);
+ ip_nat_ftp_hook = ip_nat_ftp;
+ return 0;
}
-NEEDS_CONNTRACK(ftp);
-
module_init(init);
module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
index 69b759fe99c7..1637b96d8c01 100644
--- a/net/ipv4/netfilter/ip_nat_helper.c
+++ b/net/ipv4/netfilter/ip_nat_helper.c
@@ -47,8 +47,7 @@
#define DUMP_OFFSET(x)
#endif
-static LIST_HEAD(helpers);
-DECLARE_LOCK(ip_nat_seqofs_lock);
+static DECLARE_LOCK(ip_nat_seqofs_lock);
/* Setup TCP sequence correction given this change at this sequence */
static inline void
@@ -193,9 +192,14 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
tcph->check = tcp_v4_check(tcph, datalen, iph->saddr, iph->daddr,
csum_partial((char *)tcph, datalen, 0));
- adjust_tcp_sequence(ntohl(tcph->seq),
- (int)rep_len - (int)match_len,
- ct, ctinfo);
+ if (rep_len != match_len) {
+ set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
+ adjust_tcp_sequence(ntohl(tcph->seq),
+ (int)rep_len - (int)match_len,
+ ct, ctinfo);
+ /* Tell TCP window tracking about seq change */
+ ip_conntrack_tcp_update(*pskb, ct, CTINFO2DIR(ctinfo));
+ }
return 1;
}
@@ -362,11 +366,6 @@ ip_nat_seq_adjust(struct sk_buff **pskb,
this_way = &ct->nat.info.seq[dir];
other_way = &ct->nat.info.seq[!dir];
- /* No adjustments to make? Very common case. */
- if (!this_way->offset_before && !this_way->offset_after
- && !other_way->offset_before && !other_way->offset_after)
- return 1;
-
if (!skb_ip_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
return 0;
@@ -404,75 +403,28 @@ ip_nat_seq_adjust(struct sk_buff **pskb,
return 1;
}
-static inline int
-helper_cmp(const struct ip_nat_helper *helper,
- const struct ip_conntrack_tuple *tuple)
+/* Setup NAT on this expected conntrack so it follows master. */
+/* If we fail to get a free NAT slot, we'll get dropped on confirm */
+void ip_nat_follow_master(struct ip_conntrack *ct,
+ struct ip_conntrack_expect *exp)
{
- return ip_ct_tuple_mask_cmp(tuple, &helper->tuple, &helper->mask);
-}
-
-int ip_nat_helper_register(struct ip_nat_helper *me)
-{
- int ret = 0;
-
- WRITE_LOCK(&ip_nat_lock);
- if (LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *,&me->tuple))
- ret = -EBUSY;
- else
- list_prepend(&helpers, me);
- WRITE_UNLOCK(&ip_nat_lock);
-
- return ret;
-}
-
-struct ip_nat_helper *
-__ip_nat_find_helper(const struct ip_conntrack_tuple *tuple)
-{
- return LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *, tuple);
-}
-
-struct ip_nat_helper *
-ip_nat_find_helper(const struct ip_conntrack_tuple *tuple)
-{
- struct ip_nat_helper *h;
-
- READ_LOCK(&ip_nat_lock);
- h = __ip_nat_find_helper(tuple);
- READ_UNLOCK(&ip_nat_lock);
-
- return h;
-}
-
-static int
-kill_helper(struct ip_conntrack *i, void *helper)
-{
- int ret;
-
- READ_LOCK(&ip_nat_lock);
- ret = (i->nat.info.helper == helper);
- READ_UNLOCK(&ip_nat_lock);
-
- return ret;
-}
-
-void ip_nat_helper_unregister(struct ip_nat_helper *me)
-{
- WRITE_LOCK(&ip_nat_lock);
- /* Autoloading conntrack helper might have failed */
- if (LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *,&me->tuple)) {
- LIST_DELETE(&helpers, me);
- }
- WRITE_UNLOCK(&ip_nat_lock);
-
- /* Someone could be still looking at the helper in a bh. */
- synchronize_net();
-
- /* Find anything using it, and umm, kill them. We can't turn
- them into normal connections: if we've adjusted SYNs, then
- they'll ackstorm. So we just drop it. We used to just
- bump module count when a connection existed, but that
- forces admins to gen fake RSTs or bounce box, either of
- which is just a long-winded way of making things
- worse. --RR */
- ip_ct_iterate_cleanup(kill_helper, me);
+ struct ip_nat_range range;
+
+ /* This must be a fresh one. */
+ BUG_ON(ct->status & IPS_NAT_DONE_MASK);
+
+ /* Change src to where master sends to */
+ range.flags = IP_NAT_RANGE_MAP_IPS;
+ range.min_ip = range.max_ip
+ = ct->master->tuplehash[!exp->dir].tuple.dst.ip;
+ /* hook doesn't matter, but it has to do source manip */
+ ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
+
+ /* For DST manip, map port here to where it's expected. */
+ range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
+ range.min = range.max = exp->saved_proto;
+ range.min_ip = range.max_ip
+ = ct->master->tuplehash[!exp->dir].tuple.src.ip;
+ /* hook doesn't matter, but it has to do destination manip */
+ ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
}
diff --git a/net/ipv4/netfilter/ip_nat_irc.c b/net/ipv4/netfilter/ip_nat_irc.c
index fa884374b5dc..7c2c3762888e 100644
--- a/net/ipv4/netfilter/ip_nat_irc.c
+++ b/net/ipv4/netfilter/ip_nat_irc.c
@@ -1,5 +1,6 @@
/* IRC extension for TCP NAT alteration.
* (C) 2000-2001 by Harald Welte <laforge@gnumonks.org>
+ * (C) 2004 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
* based on a copy of RR's ip_nat_ftp.c
*
* ip_nat_irc.c,v 1.16 2001/12/06 07:42:10 laforge Exp
@@ -8,12 +9,6 @@
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
- *
- * Module load syntax:
- * insmod ip_nat_irc.o ports=port1,port2,...port<MAX_PORTS>
- *
- * please give the ports of all IRC servers You wish to connect to.
- * If You don't specify ports, the default will be port 6667
*/
#include <linux/module.h>
@@ -35,66 +30,18 @@
#define DEBUGP(format, args...)
#endif
-#define MAX_PORTS 8
-static int ports[MAX_PORTS];
-static int ports_c;
-
MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
MODULE_DESCRIPTION("IRC (DCC) NAT helper");
MODULE_LICENSE("GPL");
-module_param_array(ports, int, &ports_c, 0400);
-MODULE_PARM_DESC(ports, "port numbers of IRC servers");
-
-/* FIXME: Time out? --RR */
-
-static unsigned int
-irc_nat_expected(struct sk_buff **pskb,
- unsigned int hooknum,
- struct ip_conntrack *ct,
- struct ip_nat_info *info)
-{
- struct ip_nat_range range;
- u_int32_t newdstip, newsrcip, newip;
-
- struct ip_conntrack *master = master_ct(ct);
-
- IP_NF_ASSERT(info);
- IP_NF_ASSERT(master);
-
- IP_NF_ASSERT(!(info->initialized & (1 << HOOK2MANIP(hooknum))));
-
- DEBUGP("nat_expected: We have a connection!\n");
-
- newdstip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
- newsrcip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
- DEBUGP("nat_expected: DCC cmd. %u.%u.%u.%u->%u.%u.%u.%u\n",
- NIPQUAD(newsrcip), NIPQUAD(newdstip));
- if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC)
- newip = newsrcip;
- else
- newip = newdstip;
-
- DEBUGP("nat_expected: IP to %u.%u.%u.%u\n", NIPQUAD(newip));
-
- /* We don't want to manip the per-protocol, just the IPs. */
- range.flags = IP_NAT_RANGE_MAP_IPS;
- range.min_ip = range.max_ip = newip;
-
- return ip_nat_setup_info(ct, &range, hooknum);
-}
-
-static int irc_data_fixup(const struct ip_ct_irc_expect *exp_irc_info,
- struct ip_conntrack *ct,
- struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- struct ip_conntrack_expect *expect)
+static unsigned int help(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ unsigned int matchoff,
+ unsigned int matchlen,
+ struct ip_conntrack_expect *exp)
{
- u_int32_t newip;
- struct ip_conntrack_tuple t;
- struct iphdr *iph = (*pskb)->nh.iph;
- struct tcphdr *tcph = (void *) iph + iph->ihl * 4;
u_int16_t port;
+ unsigned int ret;
/* "4294967296 65635 " */
char buffer[18];
@@ -103,21 +50,25 @@ static int irc_data_fixup(const struct ip_ct_irc_expect *exp_irc_info,
expect->seq, exp_irc_info->len,
ntohl(tcph->seq));
- newip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
+ /* Reply comes from server. */
+ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+ exp->dir = IP_CT_DIR_REPLY;
+
+ /* When you see the packet, we need to NAT it the same as the
+ * this one. */
+ exp->expectfn = ip_nat_follow_master;
- /* Alter conntrack's expectations. */
- t = expect->tuple;
- t.dst.ip = newip;
- for (port = exp_irc_info->port; port != 0; port++) {
- t.dst.u.tcp.port = htons(port);
- if (ip_conntrack_change_expect(expect, &t) == 0) {
- DEBUGP("using port %d", port);
+ /* Try to get same port: if not, try to change it. */
+ for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
+ exp->tuple.dst.u.tcp.port = htons(port);
+ if (ip_conntrack_expect_related(exp) == 0)
break;
- }
+ }
+ if (port == 0) {
+ ip_conntrack_expect_free(exp);
+ return NF_DROP;
}
- if (port == 0)
- return 0;
/* strlen("\1DCC CHAT chat AAAAAAAA P\1\n")=27
* strlen("\1DCC SCHAT chat AAAAAAAA P\1\n")=28
@@ -132,131 +83,31 @@ static int irc_data_fixup(const struct ip_ct_irc_expect *exp_irc_info,
* 0x01, \n: terminators
*/
- sprintf(buffer, "%u %u", ntohl(newip), port);
+ sprintf(buffer, "%u %u", ntohl(exp->tuple.src.ip), port);
DEBUGP("ip_nat_irc: Inserting '%s' == %u.%u.%u.%u, port %u\n",
- buffer, NIPQUAD(newip), port);
-
- return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
- expect->seq - ntohl(tcph->seq),
- exp_irc_info->len, buffer,
- strlen(buffer));
-}
-
-static unsigned int help(struct ip_conntrack *ct,
- struct ip_conntrack_expect *exp,
- struct ip_nat_info *info,
- enum ip_conntrack_info ctinfo,
- unsigned int hooknum,
- struct sk_buff **pskb)
-{
- struct iphdr *iph = (*pskb)->nh.iph;
- struct tcphdr *tcph = (void *) iph + iph->ihl * 4;
- unsigned int datalen;
- int dir;
- struct ip_ct_irc_expect *exp_irc_info;
-
- if (!exp)
- DEBUGP("ip_nat_irc: no exp!!");
-
- exp_irc_info = &exp->help.exp_irc_info;
+ buffer, NIPQUAD(exp->tuple.src.ip), port);
- /* Only mangle things once: original direction in POST_ROUTING
- and reply direction on PRE_ROUTING. */
- dir = CTINFO2DIR(ctinfo);
- if (!((hooknum == NF_IP_POST_ROUTING && dir == IP_CT_DIR_ORIGINAL)
- || (hooknum == NF_IP_PRE_ROUTING && dir == IP_CT_DIR_REPLY))) {
- DEBUGP("nat_irc: Not touching dir %s at hook %s\n",
- dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY",
- hooknum == NF_IP_POST_ROUTING ? "POSTROUTING"
- : hooknum == NF_IP_PRE_ROUTING ? "PREROUTING"
- : hooknum == NF_IP_LOCAL_OUT ? "OUTPUT" : "???");
- return NF_ACCEPT;
- }
- DEBUGP("got beyond not touching\n");
-
- datalen = (*pskb)->len - iph->ihl * 4 - tcph->doff * 4;
- /* Check whether the whole IP/address pattern is carried in the payload */
- if (between(exp->seq + exp_irc_info->len,
- ntohl(tcph->seq),
- ntohl(tcph->seq) + datalen)) {
- if (!irc_data_fixup(exp_irc_info, ct, pskb, ctinfo, exp))
- return NF_DROP;
- } else {
- /* Half a match? This means a partial retransmisison.
- It's a cracker being funky. */
- if (net_ratelimit()) {
- printk
- ("IRC_NAT: partial packet %u/%u in %u/%u\n",
- exp->seq, exp_irc_info->len,
- ntohl(tcph->seq),
- ntohl(tcph->seq) + datalen);
- }
- return NF_DROP;
- }
- return NF_ACCEPT;
+ ret = ip_nat_mangle_tcp_packet(pskb, exp->master, ctinfo,
+ matchoff, matchlen, buffer,
+ strlen(buffer));
+ if (ret != NF_ACCEPT)
+ ip_conntrack_unexpect_related(exp);
+ return ret;
}
-static struct ip_nat_helper ip_nat_irc_helpers[MAX_PORTS];
-static char irc_names[MAX_PORTS][10];
-
-/* This function is intentionally _NOT_ defined as __exit, because
- * it is needed by init() */
-static void fini(void)
+static void __exit fini(void)
{
- int i;
-
- for (i = 0; i < ports_c; i++) {
- DEBUGP("ip_nat_irc: unregistering helper for port %d\n",
- ports[i]);
- ip_nat_helper_unregister(&ip_nat_irc_helpers[i]);
- }
+ ip_nat_irc_hook = NULL;
+ /* Make sure noone calls it, meanwhile. */
+ synchronize_net();
}
static int __init init(void)
{
- int ret = 0;
- int i;
- struct ip_nat_helper *hlpr;
- char *tmpname;
-
- if (ports_c == 0)
- ports[ports_c++] = IRC_PORT;
-
- for (i = 0; i < ports_c; i++) {
- hlpr = &ip_nat_irc_helpers[i];
- hlpr->tuple.dst.protonum = IPPROTO_TCP;
- hlpr->tuple.src.u.tcp.port = htons(ports[i]);
- hlpr->mask.src.u.tcp.port = 0xFFFF;
- hlpr->mask.dst.protonum = 0xFFFF;
- hlpr->help = help;
- hlpr->flags = 0;
- hlpr->me = THIS_MODULE;
- hlpr->expect = irc_nat_expected;
-
- tmpname = &irc_names[i][0];
- if (ports[i] == IRC_PORT)
- sprintf(tmpname, "irc");
- else
- sprintf(tmpname, "irc-%d", i);
- hlpr->name = tmpname;
-
- DEBUGP
- ("ip_nat_irc: Trying to register helper for port %d: name %s\n",
- ports[i], hlpr->name);
- ret = ip_nat_helper_register(hlpr);
-
- if (ret) {
- printk
- ("ip_nat_irc: error registering helper for port %d\n",
- ports[i]);
- fini();
- return 1;
- }
- }
- return ret;
+ BUG_ON(ip_nat_irc_hook);
+ ip_nat_irc_hook = help;
+ return 0;
}
-NEEDS_CONNTRACK(irc);
-
module_init(init);
module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c
index 7cbe08819b0e..a558cf0eee8a 100644
--- a/net/ipv4/netfilter/ip_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c
@@ -54,7 +54,7 @@ icmp_unique_tuple(struct ip_conntrack_tuple *tuple,
static int
icmp_manip_pkt(struct sk_buff **pskb,
unsigned int iphdroff,
- const struct ip_conntrack_manip *manip,
+ const struct ip_conntrack_tuple *tuple,
enum ip_nat_manip_type maniptype)
{
struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
@@ -64,12 +64,12 @@ icmp_manip_pkt(struct sk_buff **pskb,
if (!skb_ip_make_writable(pskb, hdroff + sizeof(*hdr)))
return 0;
- hdr = (void *)(*pskb)->data + hdroff;
+ hdr = (struct icmphdr *)((*pskb)->data + hdroff);
hdr->checksum = ip_nat_cheat_check(hdr->un.echo.id ^ 0xFFFF,
- manip->u.icmp.id,
+ tuple->src.u.icmp.id,
hdr->checksum);
- hdr->un.echo.id = manip->u.icmp.id;
+ hdr->un.echo.id = tuple->src.u.icmp.id;
return 1;
}
diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c
index fb21a0875fa4..694838c0acd0 100644
--- a/net/ipv4/netfilter/ip_nat_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c
@@ -85,14 +85,14 @@ tcp_unique_tuple(struct ip_conntrack_tuple *tuple,
static int
tcp_manip_pkt(struct sk_buff **pskb,
unsigned int iphdroff,
- const struct ip_conntrack_manip *manip,
+ const struct ip_conntrack_tuple *tuple,
enum ip_nat_manip_type maniptype)
{
struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
struct tcphdr *hdr;
unsigned int hdroff = iphdroff + iph->ihl*4;
- u32 oldip, oldsrc = iph->saddr, olddst = iph->daddr;
- u16 *portptr, oldport;
+ u32 oldip, newip;
+ u16 *portptr, newport, oldport;
int hdrsize = 8; /* TCP connection tracking guarantees this much */
/* this could be a inner header returned in icmp packet; in such
@@ -104,27 +104,32 @@ tcp_manip_pkt(struct sk_buff **pskb,
if (!skb_ip_make_writable(pskb, hdroff + hdrsize))
return 0;
- hdr = (void *)(*pskb)->data + hdroff;
+ iph = (struct iphdr *)((*pskb)->data + iphdroff);
+ hdr = (struct tcphdr *)((*pskb)->data + iph->ihl*4);
if (maniptype == IP_NAT_MANIP_SRC) {
/* Get rid of src ip and src pt */
- oldip = oldsrc;
+ oldip = iph->saddr;
+ newip = tuple->src.ip;
+ newport = tuple->src.u.tcp.port;
portptr = &hdr->source;
} else {
/* Get rid of dst ip and dst pt */
- oldip = olddst;
+ oldip = iph->daddr;
+ newip = tuple->dst.ip;
+ newport = tuple->dst.u.tcp.port;
portptr = &hdr->dest;
}
oldport = *portptr;
- *portptr = manip->u.tcp.port;
+ *portptr = newport;
if (hdrsize < sizeof(*hdr))
return 1;
- hdr->check = ip_nat_cheat_check(~oldip, manip->ip,
+ hdr->check = ip_nat_cheat_check(~oldip, newip,
ip_nat_cheat_check(oldport ^ 0xFFFF,
- manip->u.tcp.port,
+ newport,
hdr->check));
return 1;
}
diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c
index 3c492530863c..c669e3b5f5d0 100644
--- a/net/ipv4/netfilter/ip_nat_proto_udp.c
+++ b/net/ipv4/netfilter/ip_nat_proto_udp.c
@@ -84,34 +84,40 @@ udp_unique_tuple(struct ip_conntrack_tuple *tuple,
static int
udp_manip_pkt(struct sk_buff **pskb,
unsigned int iphdroff,
- const struct ip_conntrack_manip *manip,
+ const struct ip_conntrack_tuple *tuple,
enum ip_nat_manip_type maniptype)
{
struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
struct udphdr *hdr;
unsigned int hdroff = iphdroff + iph->ihl*4;
- u32 oldip, oldsrc = iph->saddr, olddst = iph->daddr;
- u16 *portptr;
+ u32 oldip, newip;
+ u16 *portptr, newport;
if (!skb_ip_make_writable(pskb, hdroff + sizeof(*hdr)))
return 0;
- hdr = (void *)(*pskb)->data + hdroff;
+ iph = (struct iphdr *)((*pskb)->data + iphdroff);
+ hdr = (struct udphdr *)((*pskb)->data + hdroff);
+
if (maniptype == IP_NAT_MANIP_SRC) {
/* Get rid of src ip and src pt */
- oldip = oldsrc;
+ oldip = iph->saddr;
+ newip = tuple->src.ip;
+ newport = tuple->src.u.udp.port;
portptr = &hdr->source;
} else {
/* Get rid of dst ip and dst pt */
- oldip = olddst;
+ oldip = iph->daddr;
+ newip = tuple->dst.ip;
+ newport = tuple->dst.u.udp.port;
portptr = &hdr->dest;
}
if (hdr->check) /* 0 is a special case meaning no checksum */
- hdr->check = ip_nat_cheat_check(~oldip, manip->ip,
+ hdr->check = ip_nat_cheat_check(~oldip, newip,
ip_nat_cheat_check(*portptr ^ 0xFFFF,
- manip->u.udp.port,
+ newport,
hdr->check));
- *portptr = manip->u.udp.port;
+ *portptr = newport;
return 1;
}
diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c
index 8f2e7ddbbdc8..f5525bd58d16 100644
--- a/net/ipv4/netfilter/ip_nat_proto_unknown.c
+++ b/net/ipv4/netfilter/ip_nat_proto_unknown.c
@@ -40,7 +40,7 @@ static int unknown_unique_tuple(struct ip_conntrack_tuple *tuple,
static int
unknown_manip_pkt(struct sk_buff **pskb,
unsigned int iphdroff,
- const struct ip_conntrack_manip *manip,
+ const struct ip_conntrack_tuple *tuple,
enum ip_nat_manip_type maniptype)
{
return 1;
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
index 80773588d8ad..4c204714a3a4 100644
--- a/net/ipv4/netfilter/ip_nat_rule.c
+++ b/net/ipv4/netfilter/ip_nat_rule.c
@@ -16,6 +16,7 @@
#include <linux/skbuff.h>
#include <linux/proc_fs.h>
#include <net/checksum.h>
+#include <net/route.h>
#include <linux/bitops.h>
#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock)
@@ -120,6 +121,25 @@ static unsigned int ipt_snat_target(struct sk_buff **pskb,
return ip_nat_setup_info(ct, &mr->range[0], hooknum);
}
+/* Before 2.6.11 we did implicit source NAT if required. Warn about change. */
+static void warn_if_extra_mangle(u32 dstip, u32 srcip)
+{
+ static int warned = 0;
+ struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } };
+ struct rtable *rt;
+
+ if (ip_route_output_key(&rt, &fl) != 0)
+ return;
+
+ if (rt->rt_src != srcip && !warned) {
+ printk("NAT: no longer support implicit source local NAT\n");
+ printk("NAT: packet src %u.%u.%u.%u -> dst %u.%u.%u.%u\n",
+ NIPQUAD(srcip), NIPQUAD(dstip));
+ warned = 1;
+ }
+ ip_rt_put(rt);
+}
+
static unsigned int ipt_dnat_target(struct sk_buff **pskb,
const struct net_device *in,
const struct net_device *out,
@@ -139,6 +159,11 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb,
/* Connection must be valid and new. */
IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
+ if (hooknum == NF_IP_LOCAL_OUT
+ && mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
+ warn_if_extra_mangle((*pskb)->nh.iph->daddr,
+ mr->range[0].min_ip);
+
return ip_nat_setup_info(ct, &mr->range[0], hooknum);
}
@@ -242,7 +267,7 @@ int ip_nat_rule_find(struct sk_buff **pskb,
ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL);
if (ret == NF_ACCEPT) {
- if (!(info->initialized & (1 << HOOK2MANIP(hooknum))))
+ if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
/* NUL mapping */
ret = alloc_null_binding(ct, info, hooknum);
}
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c
index b18e79332169..2a48b6e635ae 100644
--- a/net/ipv4/netfilter/ip_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c
@@ -50,6 +50,7 @@
#include <linux/moduleparam.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv4/ip_nat.h>
+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
#include <linux/netfilter_ipv4/ip_nat_helper.h>
#include <linux/ip.h>
#include <net/checksum.h>
@@ -1203,9 +1204,7 @@ static int snmp_parse_mangle(unsigned char *msg,
* SNMP translation routine.
*/
static int snmp_translate(struct ip_conntrack *ct,
- struct ip_nat_info *info,
enum ip_conntrack_info ctinfo,
- unsigned int hooknum,
struct sk_buff **pskb)
{
struct iphdr *iph = (*pskb)->nh.iph;
@@ -1234,101 +1233,86 @@ static int snmp_translate(struct ip_conntrack *ct,
if (!snmp_parse_mangle((unsigned char *)udph + sizeof(struct udphdr),
paylen, &map, &udph->check)) {
- printk(KERN_WARNING "bsalg: parser failed\n");
+ if (net_ratelimit())
+ printk(KERN_WARNING "bsalg: parser failed\n");
return NF_DROP;
}
return NF_ACCEPT;
}
-/*
- * NAT helper function, packets arrive here from NAT code.
- */
-static unsigned int nat_help(struct ip_conntrack *ct,
- struct ip_conntrack_expect *exp,
- struct ip_nat_info *info,
- enum ip_conntrack_info ctinfo,
- unsigned int hooknum,
- struct sk_buff **pskb)
+/* We don't actually set up expectations, just adjust internal IP
+ * addresses if this is being NATted */
+static int help(struct sk_buff **pskb,
+ struct ip_conntrack *ct,
+ enum ip_conntrack_info ctinfo)
{
int dir = CTINFO2DIR(ctinfo);
+ unsigned int ret;
struct iphdr *iph = (*pskb)->nh.iph;
struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
-
- if (!skb_ip_make_writable(pskb, (*pskb)->len))
- return NF_DROP;
- spin_lock_bh(&snmp_lock);
-
- /*
- * Translate snmp replies on pre-routing (DNAT) and snmp traps
- * on post routing (SNAT).
- */
- if (!((dir == IP_CT_DIR_REPLY && hooknum == NF_IP_PRE_ROUTING &&
- udph->source == ntohs(SNMP_PORT)) ||
- (dir == IP_CT_DIR_ORIGINAL && hooknum == NF_IP_POST_ROUTING &&
- udph->dest == ntohs(SNMP_TRAP_PORT)))) {
- spin_unlock_bh(&snmp_lock);
+ /* SNMP replies and originating SNMP traps get mangled */
+ if (udph->source == ntohs(SNMP_PORT) && dir != IP_CT_DIR_REPLY)
+ return NF_ACCEPT;
+ if (udph->dest == ntohs(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL)
+ return NF_ACCEPT;
+
+ /* No NAT? */
+ if (!(ct->status & IPS_NAT_MASK))
return NF_ACCEPT;
- }
- if (debug > 1) {
- printk(KERN_DEBUG "bsalg: dir=%s hook=%d manip=%s len=%d "
- "src=%u.%u.%u.%u:%u dst=%u.%u.%u.%u:%u "
- "osrc=%u.%u.%u.%u odst=%u.%u.%u.%u "
- "rsrc=%u.%u.%u.%u rdst=%u.%u.%u.%u "
- "\n",
- dir == IP_CT_DIR_REPLY ? "reply" : "orig", hooknum,
- HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC ? "snat" :
- "dnat", (*pskb)->len,
- NIPQUAD(iph->saddr), ntohs(udph->source),
- NIPQUAD(iph->daddr), ntohs(udph->dest),
- NIPQUAD(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip),
- NIPQUAD(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip),
- NIPQUAD(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip),
- NIPQUAD(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip));
- }
-
/*
* Make sure the packet length is ok. So far, we were only guaranteed
* to have a valid length IP header plus 8 bytes, which means we have
* enough room for a UDP header. Just verify the UDP length field so we
* can mess around with the payload.
*/
- if (ntohs(udph->len) == (*pskb)->len - (iph->ihl << 2)) {
- int ret = snmp_translate(ct, info, ctinfo, hooknum, pskb);
- spin_unlock_bh(&snmp_lock);
- return ret;
+ if (ntohs(udph->len) != (*pskb)->len - (iph->ihl << 2)) {
+ if (net_ratelimit())
+ printk(KERN_WARNING "SNMP: dropping malformed packet "
+ "src=%u.%u.%u.%u dst=%u.%u.%u.%u\n",
+ NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
+ return NF_DROP;
}
-
- if (net_ratelimit())
- printk(KERN_WARNING "bsalg: dropping malformed packet "
- "src=%u.%u.%u.%u dst=%u.%u.%u.%u\n",
- NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
+
+ if (!skb_ip_make_writable(pskb, (*pskb)->len))
+ return NF_DROP;
+
+ spin_lock_bh(&snmp_lock);
+ ret = snmp_translate(ct, ctinfo, pskb);
spin_unlock_bh(&snmp_lock);
- return NF_DROP;
+ return ret;
}
-static struct ip_nat_helper snmp = {
- { NULL, NULL },
- "snmp",
- 0,
- THIS_MODULE,
- { { 0, { .udp = { __constant_htons(SNMP_PORT) } } },
- { 0, { 0 }, IPPROTO_UDP } },
- { { 0, { .udp = { 0xFFFF } } },
- { 0, { 0 }, 0xFFFF } },
- nat_help, NULL };
-
-static struct ip_nat_helper snmp_trap = {
- { NULL, NULL },
- "snmp_trap",
- 0,
- THIS_MODULE,
- { { 0, { .udp = { __constant_htons(SNMP_TRAP_PORT) } } },
- { 0, { 0 }, IPPROTO_UDP } },
- { { 0, { .udp = { 0xFFFF } } },
- { 0, { 0 }, 0xFFFF } },
- nat_help, NULL };
+static struct ip_conntrack_helper snmp_helper = {
+ .max_expected = 0,
+ .timeout = 180,
+ .me = THIS_MODULE,
+ .help = help,
+ .name = "snmp",
+
+ .tuple = { .src = { .u = { __constant_htons(SNMP_PORT) } },
+ .dst = { .protonum = IPPROTO_UDP },
+ },
+ .mask = { .src = { .u = { 0xFFFF } },
+ .dst = { .protonum = 0xFF },
+ },
+};
+
+static struct ip_conntrack_helper snmp_trap_helper = {
+ .max_expected = 0,
+ .timeout = 180,
+ .me = THIS_MODULE,
+ .help = help,
+ .name = "snmp_trap",
+
+ .tuple = { .src = { .u = { __constant_htons(SNMP_TRAP_PORT) } },
+ .dst = { .protonum = IPPROTO_UDP },
+ },
+ .mask = { .src = { .u = { 0xFFFF } },
+ .dst = { .protonum = 0xFF },
+ },
+};
/*****************************************************************************
*
@@ -1340,12 +1324,12 @@ static int __init init(void)
{
int ret = 0;
- ret = ip_nat_helper_register(&snmp);
+ ret = ip_conntrack_helper_register(&snmp_helper);
if (ret < 0)
return ret;
- ret = ip_nat_helper_register(&snmp_trap);
+ ret = ip_conntrack_helper_register(&snmp_trap_helper);
if (ret < 0) {
- ip_nat_helper_unregister(&snmp);
+ ip_conntrack_helper_unregister(&snmp_helper);
return ret;
}
return ret;
@@ -1353,9 +1337,8 @@ static int __init init(void)
static void __exit fini(void)
{
- ip_nat_helper_unregister(&snmp);
- ip_nat_helper_unregister(&snmp_trap);
- synchronize_net();
+ ip_conntrack_helper_unregister(&snmp_helper);
+ ip_conntrack_helper_unregister(&snmp_trap_helper);
}
module_init(init);
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index 0ef8efffb91b..0efc4c8292d0 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -55,15 +55,6 @@
: ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN" \
: "*ERROR*")))
-static inline int call_expect(struct ip_conntrack *master,
- struct sk_buff **pskb,
- unsigned int hooknum,
- struct ip_conntrack *ct,
- struct ip_nat_info *info)
-{
- return master->nat.info.helper->expect(pskb, hooknum, ct, info);
-}
-
static unsigned int
ip_nat_fn(unsigned int hooknum,
struct sk_buff **pskb,
@@ -115,7 +106,7 @@ ip_nat_fn(unsigned int hooknum,
case IP_CT_RELATED:
case IP_CT_RELATED+IP_CT_IS_REPLY:
if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
- if (!icmp_reply_translation(pskb, ct, hooknum,
+ if (!icmp_reply_translation(pskb, ct, maniptype,
CTINFO2DIR(ctinfo)))
return NF_DROP;
else
@@ -125,37 +116,26 @@ ip_nat_fn(unsigned int hooknum,
case IP_CT_NEW:
info = &ct->nat.info;
- WRITE_LOCK(&ip_nat_lock);
/* Seen it before? This can happen for loopback, retrans,
or local packets.. */
- if (!(info->initialized & (1 << maniptype))) {
+ if (!ip_nat_initialized(ct, maniptype)) {
unsigned int ret;
- if (ct->master
- && master_ct(ct)->nat.info.helper
- && master_ct(ct)->nat.info.helper->expect) {
- ret = call_expect(master_ct(ct), pskb,
- hooknum, ct, info);
- } else {
- /* LOCAL_IN hook doesn't have a chain! */
- if (hooknum == NF_IP_LOCAL_IN)
- ret = alloc_null_binding(ct, info,
- hooknum);
- else
- ret = ip_nat_rule_find(pskb, hooknum,
- in, out, ct,
- info);
- }
+ /* LOCAL_IN hook doesn't have a chain! */
+ if (hooknum == NF_IP_LOCAL_IN)
+ ret = alloc_null_binding(ct, info, hooknum);
+ else
+ ret = ip_nat_rule_find(pskb, hooknum,
+ in, out, ct,
+ info);
if (ret != NF_ACCEPT) {
- WRITE_UNLOCK(&ip_nat_lock);
return ret;
}
} else
DEBUGP("Already setup manip %s for ct %p\n",
maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",
ct);
- WRITE_UNLOCK(&ip_nat_lock);
break;
default:
@@ -166,7 +146,7 @@ ip_nat_fn(unsigned int hooknum,
}
IP_NF_ASSERT(info);
- return do_bindings(ct, ctinfo, info, hooknum, pskb);
+ return nat_packet(ct, ctinfo, hooknum, pskb);
}
static unsigned int
@@ -288,33 +268,6 @@ static struct nf_hook_ops ip_nat_local_in_ops = {
.priority = NF_IP_PRI_NAT_SRC,
};
-/* Protocol registration. */
-int ip_nat_protocol_register(struct ip_nat_protocol *proto)
-{
- int ret = 0;
-
- WRITE_LOCK(&ip_nat_lock);
- if (ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) {
- ret = -EBUSY;
- goto out;
- }
- ip_nat_protos[proto->protonum] = proto;
- out:
- WRITE_UNLOCK(&ip_nat_lock);
- return ret;
-}
-
-/* Noone stores the protocol anywhere; simply delete it. */
-void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
-{
- WRITE_LOCK(&ip_nat_lock);
- ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol;
- WRITE_UNLOCK(&ip_nat_lock);
-
- /* Someone could be still looking at the proto in a bh. */
- synchronize_net();
-}
-
static int init_or_cleanup(int init)
{
int ret = 0;
@@ -388,12 +341,9 @@ module_exit(fini);
EXPORT_SYMBOL(ip_nat_setup_info);
EXPORT_SYMBOL(ip_nat_protocol_register);
EXPORT_SYMBOL(ip_nat_protocol_unregister);
-EXPORT_SYMBOL(ip_nat_helper_register);
-EXPORT_SYMBOL(ip_nat_helper_unregister);
EXPORT_SYMBOL(ip_nat_cheat_check);
EXPORT_SYMBOL(ip_nat_mangle_tcp_packet);
EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
EXPORT_SYMBOL(ip_nat_used_tuple);
-EXPORT_SYMBOL(ip_nat_find_helper);
-EXPORT_SYMBOL(__ip_nat_find_helper);
+EXPORT_SYMBOL(ip_nat_follow_master);
MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ip_nat_tftp.c b/net/ipv4/netfilter/ip_nat_tftp.c
index e173191031c0..0343e0d64674 100644
--- a/net/ipv4/netfilter/ip_nat_tftp.c
+++ b/net/ipv4/netfilter/ip_nat_tftp.c
@@ -38,168 +38,32 @@ MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
MODULE_DESCRIPTION("tftp NAT helper");
MODULE_LICENSE("GPL");
-#define MAX_PORTS 8
-
-static int ports[MAX_PORTS];
-static int ports_c = 0;
-module_param_array(ports, int, &ports_c, 0400);
-MODULE_PARM_DESC(ports, "port numbers of tftp servers");
-
-#if 0
-#define DEBUGP(format, args...) printk("%s:%s:" format, \
- __FILE__, __FUNCTION__ , ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-static unsigned int
-tftp_nat_help(struct ip_conntrack *ct,
- struct ip_conntrack_expect *exp,
- struct ip_nat_info *info,
- enum ip_conntrack_info ctinfo,
- unsigned int hooknum,
- struct sk_buff **pskb)
+static unsigned int help(struct sk_buff **pskb,
+ enum ip_conntrack_info ctinfo,
+ struct ip_conntrack_expect *exp)
{
- int dir = CTINFO2DIR(ctinfo);
- struct tftphdr _tftph, *tfh;
- struct ip_conntrack_tuple repl;
-
- if (!((hooknum == NF_IP_POST_ROUTING && dir == IP_CT_DIR_ORIGINAL)
- || (hooknum == NF_IP_PRE_ROUTING && dir == IP_CT_DIR_REPLY)))
- return NF_ACCEPT;
-
- if (!exp) {
- DEBUGP("no conntrack expectation to modify\n");
- return NF_ACCEPT;
- }
-
- tfh = skb_header_pointer(*pskb,
- (*pskb)->nh.iph->ihl*4+sizeof(struct udphdr),
- sizeof(_tftph), &_tftph);
- if (tfh == NULL)
+ exp->saved_proto.udp.port = exp->tuple.dst.u.tcp.port;
+ exp->dir = IP_CT_DIR_REPLY;
+ exp->expectfn = ip_nat_follow_master;
+ if (ip_conntrack_expect_related(exp) != 0) {
+ ip_conntrack_expect_free(exp);
return NF_DROP;
-
- switch (ntohs(tfh->opcode)) {
- /* RRQ and WRQ works the same way */
- case TFTP_OPCODE_READ:
- case TFTP_OPCODE_WRITE:
- repl = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
- DEBUGP("");
- DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
- DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
- DEBUGP("expecting: ");
- DUMP_TUPLE(&repl);
- DUMP_TUPLE(&exp->mask);
- ip_conntrack_change_expect(exp, &repl);
- break;
- default:
- DEBUGP("Unknown opcode\n");
- }
-
- return NF_ACCEPT;
-}
-
-static unsigned int
-tftp_nat_expected(struct sk_buff **pskb,
- unsigned int hooknum,
- struct ip_conntrack *ct,
- struct ip_nat_info *info)
-{
- const struct ip_conntrack *master = ct->master->expectant;
- const struct ip_conntrack_tuple *orig =
- &master->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
- struct ip_nat_range range;
-#if 0
- const struct ip_conntrack_tuple *repl =
- &master->tuplehash[IP_CT_DIR_REPLY].tuple;
- struct udphdr _udph, *uh;
-
- uh = skb_header_pointer(*pskb,
- (*pskb)->nh.iph->ihl*4,
- sizeof(_udph), &_udph);
- if (uh == NULL)
- return NF_DROP;
-#endif
-
- IP_NF_ASSERT(info);
- IP_NF_ASSERT(master);
- IP_NF_ASSERT(!(info->initialized & (1 << HOOK2MANIP(hooknum))));
-
- range.flags = IP_NAT_RANGE_MAP_IPS;
-
- if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC) {
- range.min_ip = range.max_ip = orig->dst.ip;
- DEBUGP("orig: %u.%u.%u.%u:%u <-> %u.%u.%u.%u:%u "
- "newsrc: %u.%u.%u.%u\n",
- NIPQUAD((*pskb)->nh.iph->saddr), ntohs(uh->source),
- NIPQUAD((*pskb)->nh.iph->daddr), ntohs(uh->dest),
- NIPQUAD(orig->dst.ip));
- } else {
- range.min_ip = range.max_ip = orig->src.ip;
- range.min.udp.port = range.max.udp.port = orig->src.u.udp.port;
- range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
-
- DEBUGP("orig: %u.%u.%u.%u:%u <-> %u.%u.%u.%u:%u "
- "newdst: %u.%u.%u.%u:%u\n",
- NIPQUAD((*pskb)->nh.iph->saddr), ntohs(uh->source),
- NIPQUAD((*pskb)->nh.iph->daddr), ntohs(uh->dest),
- NIPQUAD(orig->src.ip), ntohs(orig->src.u.udp.port));
}
-
- return ip_nat_setup_info(ct, &range, hooknum);
+ return NF_ACCEPT;
}
-static struct ip_nat_helper tftp[MAX_PORTS];
-static char tftp_names[MAX_PORTS][10];
-
-static void fini(void)
+static void __exit fini(void)
{
- int i;
-
- for (i = 0 ; i < ports_c; i++) {
- DEBUGP("unregistering helper for port %d\n", ports[i]);
- ip_nat_helper_unregister(&tftp[i]);
- }
+ ip_nat_tftp_hook = NULL;
+ /* Make sure noone calls it, meanwhile. */
+ synchronize_net();
}
static int __init init(void)
{
- int i, ret = 0;
- char *tmpname;
-
- if (ports_c == 0)
- ports[ports_c++] = TFTP_PORT;
-
- for (i = 0; i < ports_c; i++) {
- memset(&tftp[i], 0, sizeof(struct ip_nat_helper));
-
- tftp[i].tuple.dst.protonum = IPPROTO_UDP;
- tftp[i].tuple.src.u.udp.port = htons(ports[i]);
- tftp[i].mask.dst.protonum = 0xFFFF;
- tftp[i].mask.src.u.udp.port = 0xFFFF;
- tftp[i].help = tftp_nat_help;
- tftp[i].flags = 0;
- tftp[i].me = THIS_MODULE;
- tftp[i].expect = tftp_nat_expected;
-
- tmpname = &tftp_names[i][0];
- if (ports[i] == TFTP_PORT)
- sprintf(tmpname, "tftp");
- else
- sprintf(tmpname, "tftp-%d", i);
- tftp[i].name = tmpname;
-
- DEBUGP("ip_nat_tftp: registering for port %d: name %s\n",
- ports[i], tftp[i].name);
- ret = ip_nat_helper_register(&tftp[i]);
-
- if (ret) {
- printk("ip_nat_tftp: unable to register for port %d\n",
- ports[i]);
- fini();
- return ret;
- }
- }
- return ret;
+ BUG_ON(ip_nat_tftp_hook);
+ ip_nat_tftp_hook = help;
+ return 0;
}
module_init(init);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 68002ff25454..b3dda712f1f7 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -66,7 +66,7 @@ static LIST_HEAD(clusterip_configs);
/* clusterip_lock protects the clusterip_configs list _AND_ the configurable
* data within all structurses (num_local_nodes, local_nodes[]) */
-DECLARE_RWLOCK(clusterip_lock);
+static DECLARE_RWLOCK(clusterip_lock);
#ifdef CONFIG_PROC_FS
static struct file_operations clusterip_proc_fops;
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 56d018940954..6f2cefbe16cd 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -100,7 +100,7 @@ typedef struct {
static ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS]; /* array of buffers */
static struct sock *nflognl; /* our socket */
-DECLARE_LOCK(ulog_lock); /* spinlock */
+static DECLARE_LOCK(ulog_lock); /* spinlock */
/* send one ulog_buff_t to userspace */
static void ulog_send(unsigned int nlgroupnum)
@@ -140,7 +140,7 @@ static void ulog_timer(unsigned long data)
UNLOCK_BH(&ulog_lock);
}
-struct sk_buff *ulog_alloc_skb(unsigned int size)
+static struct sk_buff *ulog_alloc_skb(unsigned int size)
{
struct sk_buff *skb;
diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c
index 04c95d874886..50d76aa4cb99 100644
--- a/net/ipv4/netfilter/ipt_hashlimit.c
+++ b/net/ipv4/netfilter/ipt_hashlimit.c
@@ -97,7 +97,7 @@ struct ipt_hashlimit_htable {
struct list_head hash[0]; /* hashtable itself */
};
-DECLARE_RWLOCK(hashlimit_lock); /* protects htables list */
+static DECLARE_RWLOCK(hashlimit_lock); /* protects htables list */
static LIST_HEAD(hashlimit_htables);
static kmem_cache_t *hashlimit_cachep;
@@ -668,11 +668,9 @@ static int init_or_fini(int fini)
goto cleanup_nothing;
}
- /* FIXME: do we really want HWCACHE_ALIGN since our objects are
- * quite small ? */
hashlimit_cachep = kmem_cache_create("ipt_hashlimit",
sizeof(struct dsthash_ent), 0,
- SLAB_HWCACHE_ALIGN, NULL, NULL);
+ 0, NULL, NULL);
if (!hashlimit_cachep) {
printk(KERN_ERR "Unable to create ipt_hashlimit slab cache\n");
ret = -ENOMEM;
diff --git a/net/ipv4/netfilter/ipt_helper.c b/net/ipv4/netfilter/ipt_helper.c
index 1ea5c1e46ba2..33fdf364d3d3 100644
--- a/net/ipv4/netfilter/ipt_helper.c
+++ b/net/ipv4/netfilter/ipt_helper.c
@@ -38,7 +38,6 @@ match(const struct sk_buff *skb,
int *hotdrop)
{
const struct ipt_helper_info *info = matchinfo;
- struct ip_conntrack_expect *exp;
struct ip_conntrack *ct;
enum ip_conntrack_info ctinfo;
int ret = info->invert;
@@ -54,28 +53,21 @@ match(const struct sk_buff *skb,
return ret;
}
- exp = ct->master;
READ_LOCK(&ip_conntrack_lock);
- if (!exp->expectant) {
- DEBUGP("ipt_helper: expectation %p without expectant !?!\n",
- exp);
- goto out_unlock;
- }
-
- if (!exp->expectant->helper) {
+ if (!ct->master->helper) {
DEBUGP("ipt_helper: master ct %p has no helper\n",
exp->expectant);
goto out_unlock;
}
DEBUGP("master's name = %s , info->name = %s\n",
- exp->expectant->helper->name, info->name);
+ ct->master->helper->name, info->name);
if (info->name[0] == '\0')
ret ^= 1;
else
- ret ^= !strncmp(exp->expectant->helper->name, info->name,
- strlen(exp->expectant->helper->name));
+ ret ^= !strncmp(ct->master->helper->name, info->name,
+ strlen(ct->master->helper->name));
out_unlock:
READ_UNLOCK(&ip_conntrack_lock);
return ret;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c495e7ccd0c8..d34035d63c75 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1822,7 +1822,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->backoff = 0;
tp->snd_cwnd = 2;
tp->probes_out = 0;
- tcp_set_pcount(&tp->packets_out, 0);
+ tp->packets_out = 0;
tp->snd_ssthresh = 0x7fffffff;
tp->snd_cwnd_cnt = 0;
tcp_set_ca_state(tp, TCP_CA_Open);
@@ -2137,11 +2137,11 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_snd_mss = tp->mss_cache_std;
info->tcpi_rcv_mss = tp->ack.rcv_mss;
- info->tcpi_unacked = tcp_get_pcount(&tp->packets_out);
- info->tcpi_sacked = tcp_get_pcount(&tp->sacked_out);
- info->tcpi_lost = tcp_get_pcount(&tp->lost_out);
- info->tcpi_retrans = tcp_get_pcount(&tp->retrans_out);
- info->tcpi_fackets = tcp_get_pcount(&tp->fackets_out);
+ info->tcpi_unacked = tp->packets_out;
+ info->tcpi_sacked = tp->sacked_out;
+ info->tcpi_lost = tp->lost_out;
+ info->tcpi_retrans = tp->retrans_out;
+ info->tcpi_fackets = tp->fackets_out;
info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime);
info->tcpi_last_data_recv = jiffies_to_msecs(now - tp->ack.lrcvtime);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 5a8085e92302..bfcd43832cb0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -902,8 +902,8 @@ static void tcp_update_reordering(struct tcp_sock *tp, int metric, int ts)
printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n",
tp->sack_ok, tp->ca_state,
tp->reordering,
- tcp_get_pcount(&tp->fackets_out),
- tcp_get_pcount(&tp->sacked_out),
+ tp->fackets_out,
+ tp->sacked_out,
tp->undo_marker ? tp->undo_retrans : 0);
#endif
/* Disable FACK yet. */
@@ -966,7 +966,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked;
struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2);
int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
- int reord = tcp_get_pcount(&tp->packets_out);
+ int reord = tp->packets_out;
int prior_fackets;
u32 lost_retrans = 0;
int flag = 0;
@@ -980,9 +980,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
tp->mss_cache = tp->mss_cache_std;
}
- if (!tcp_get_pcount(&tp->sacked_out))
- tcp_set_pcount(&tp->fackets_out, 0);
- prior_fackets = tcp_get_pcount(&tp->fackets_out);
+ if (!tp->sacked_out)
+ tp->fackets_out = 0;
+ prior_fackets = tp->fackets_out;
for (i=0; i<num_sacks; i++, sp++) {
struct sk_buff *skb;
@@ -1080,8 +1080,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
*/
if (sacked & TCPCB_LOST) {
TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
- tcp_dec_pcount(&tp->lost_out, skb);
- tcp_dec_pcount(&tp->retrans_out, skb);
+ tp->lost_out -= tcp_skb_pcount(skb);
+ tp->retrans_out -= tcp_skb_pcount(skb);
}
} else {
/* New sack for not retransmitted frame,
@@ -1093,16 +1093,16 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
if (sacked & TCPCB_LOST) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
- tcp_dec_pcount(&tp->lost_out, skb);
+ tp->lost_out -= tcp_skb_pcount(skb);
}
}
TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
flag |= FLAG_DATA_SACKED;
- tcp_inc_pcount(&tp->sacked_out, skb);
+ tp->sacked_out += tcp_skb_pcount(skb);
- if (fack_count > tcp_get_pcount(&tp->fackets_out))
- tcp_set_pcount(&tp->fackets_out, fack_count);
+ if (fack_count > tp->fackets_out)
+ tp->fackets_out = fack_count;
} else {
if (dup_sack && (sacked&TCPCB_RETRANS))
reord = min(fack_count, reord);
@@ -1116,7 +1116,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
if (dup_sack &&
(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
- tcp_dec_pcount(&tp->retrans_out, skb);
+ tp->retrans_out -= tcp_skb_pcount(skb);
}
}
}
@@ -1142,10 +1142,10 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
TCP_SKB_CB(skb)->ack_seq + tp->reordering *
tp->mss_cache_std))) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
- tcp_dec_pcount(&tp->retrans_out, skb);
+ tp->retrans_out -= tcp_skb_pcount(skb);
if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) {
- tcp_inc_pcount(&tp->lost_out, skb);
+ tp->lost_out += tcp_skb_pcount(skb);
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
flag |= FLAG_DATA_SACKED;
NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT);
@@ -1154,20 +1154,15 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_
}
}
- tcp_set_pcount(&tp->left_out,
- (tcp_get_pcount(&tp->sacked_out) +
- tcp_get_pcount(&tp->lost_out)));
+ tp->left_out = tp->sacked_out + tp->lost_out;
- if ((reord < tcp_get_pcount(&tp->fackets_out)) &&
- tp->ca_state != TCP_CA_Loss)
- tcp_update_reordering(tp,
- ((tcp_get_pcount(&tp->fackets_out) + 1) -
- reord), 0);
+ if ((reord < tp->fackets_out) && tp->ca_state != TCP_CA_Loss)
+ tcp_update_reordering(tp, ((tp->fackets_out + 1) - reord), 0);
#if FASTRETRANS_DEBUG > 0
- BUG_TRAP((int)tcp_get_pcount(&tp->sacked_out) >= 0);
- BUG_TRAP((int)tcp_get_pcount(&tp->lost_out) >= 0);
- BUG_TRAP((int)tcp_get_pcount(&tp->retrans_out) >= 0);
+ BUG_TRAP((int)tp->sacked_out >= 0);
+ BUG_TRAP((int)tp->lost_out >= 0);
+ BUG_TRAP((int)tp->retrans_out >= 0);
BUG_TRAP((int)tcp_packets_in_flight(tp) >= 0);
#endif
return flag;
@@ -1197,7 +1192,7 @@ void tcp_enter_frto(struct sock *sk)
* If something was really lost, it is eventually caught up
* in tcp_enter_frto_loss.
*/
- tcp_set_pcount(&tp->retrans_out, 0);
+ tp->retrans_out = 0;
tp->undo_marker = tp->snd_una;
tp->undo_retrans = 0;
@@ -1220,9 +1215,9 @@ static void tcp_enter_frto_loss(struct sock *sk)
struct sk_buff *skb;
int cnt = 0;
- tcp_set_pcount(&tp->sacked_out, 0);
- tcp_set_pcount(&tp->lost_out, 0);
- tcp_set_pcount(&tp->fackets_out, 0);
+ tp->sacked_out = 0;
+ tp->lost_out = 0;
+ tp->fackets_out = 0;
sk_stream_for_retrans_queue(skb, sk) {
cnt += tcp_skb_pcount(skb);
@@ -1235,11 +1230,11 @@ static void tcp_enter_frto_loss(struct sock *sk)
if (!after(TCP_SKB_CB(skb)->end_seq,
tp->frto_highmark)) {
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
- tcp_inc_pcount(&tp->lost_out, skb);
+ tp->lost_out += tcp_skb_pcount(skb);
}
} else {
- tcp_inc_pcount(&tp->sacked_out, skb);
- tcp_set_pcount(&tp->fackets_out, cnt);
+ tp->sacked_out += tcp_skb_pcount(skb);
+ tp->fackets_out = cnt;
}
}
tcp_sync_left_out(tp);
@@ -1261,12 +1256,12 @@ static void tcp_enter_frto_loss(struct sock *sk)
void tcp_clear_retrans(struct tcp_sock *tp)
{
- tcp_set_pcount(&tp->left_out, 0);
- tcp_set_pcount(&tp->retrans_out, 0);
+ tp->left_out = 0;
+ tp->retrans_out = 0;
- tcp_set_pcount(&tp->fackets_out, 0);
- tcp_set_pcount(&tp->sacked_out, 0);
- tcp_set_pcount(&tp->lost_out, 0);
+ tp->fackets_out = 0;
+ tp->sacked_out = 0;
+ tp->lost_out = 0;
tp->undo_marker = 0;
tp->undo_retrans = 0;
@@ -1307,10 +1302,10 @@ void tcp_enter_loss(struct sock *sk, int how)
if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
- tcp_inc_pcount(&tp->lost_out, skb);
+ tp->lost_out += tcp_skb_pcount(skb);
} else {
- tcp_inc_pcount(&tp->sacked_out, skb);
- tcp_set_pcount(&tp->fackets_out, cnt);
+ tp->sacked_out += tcp_skb_pcount(skb);
+ tp->fackets_out = cnt;
}
}
tcp_sync_left_out(tp);
@@ -1347,8 +1342,7 @@ static int tcp_check_sack_reneging(struct sock *sk, struct tcp_sock *tp)
static inline int tcp_fackets_out(struct tcp_sock *tp)
{
- return IsReno(tp) ? tcp_get_pcount(&tp->sacked_out)+1 :
- tcp_get_pcount(&tp->fackets_out);
+ return IsReno(tp) ? tp->sacked_out+1 : tp->fackets_out;
}
static inline int tcp_skb_timedout(struct tcp_sock *tp, struct sk_buff *skb)
@@ -1358,7 +1352,7 @@ static inline int tcp_skb_timedout(struct tcp_sock *tp, struct sk_buff *skb)
static inline int tcp_head_timedout(struct sock *sk, struct tcp_sock *tp)
{
- return tcp_get_pcount(&tp->packets_out) &&
+ return tp->packets_out &&
tcp_skb_timedout(tp, skb_peek(&sk->sk_write_queue));
}
@@ -1460,7 +1454,7 @@ static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp)
__u32 packets_out;
/* Trick#1: The loss is proven. */
- if (tcp_get_pcount(&tp->lost_out))
+ if (tp->lost_out)
return 1;
/* Not-A-Trick#2 : Classic rule... */
@@ -1476,9 +1470,9 @@ static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp)
/* Trick#4: It is still not OK... But will it be useful to delay
* recovery more?
*/
- packets_out = tcp_get_pcount(&tp->packets_out);
+ packets_out = tp->packets_out;
if (packets_out <= tp->reordering &&
- tcp_get_pcount(&tp->sacked_out) >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&
+ tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) &&
!tcp_may_send_now(sk, tp)) {
/* We have nothing to send. This connection is limited
* either by receiver window or by application.
@@ -1497,16 +1491,12 @@ static void tcp_check_reno_reordering(struct tcp_sock *tp, int addend)
{
u32 holes;
- holes = max(tcp_get_pcount(&tp->lost_out), 1U);
- holes = min(holes, tcp_get_pcount(&tp->packets_out));
+ holes = max(tp->lost_out, 1U);
+ holes = min(holes, tp->packets_out);
- if ((tcp_get_pcount(&tp->sacked_out) + holes) >
- tcp_get_pcount(&tp->packets_out)) {
- tcp_set_pcount(&tp->sacked_out,
- (tcp_get_pcount(&tp->packets_out) - holes));
- tcp_update_reordering(tp,
- tcp_get_pcount(&tp->packets_out)+addend,
- 0);
+ if ((tp->sacked_out + holes) > tp->packets_out) {
+ tp->sacked_out = tp->packets_out - holes;
+ tcp_update_reordering(tp, tp->packets_out+addend, 0);
}
}
@@ -1514,7 +1504,7 @@ static void tcp_check_reno_reordering(struct tcp_sock *tp, int addend)
static void tcp_add_reno_sack(struct tcp_sock *tp)
{
- tcp_inc_pcount_explicit(&tp->sacked_out, 1);
+ tp->sacked_out++;
tcp_check_reno_reordering(tp, 0);
tcp_sync_left_out(tp);
}
@@ -1525,10 +1515,10 @@ static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_sock *tp, int acke
{
if (acked > 0) {
/* One ACK acked hole. The rest eat duplicate ACKs. */
- if (acked-1 >= tcp_get_pcount(&tp->sacked_out))
- tcp_set_pcount(&tp->sacked_out, 0);
+ if (acked-1 >= tp->sacked_out)
+ tp->sacked_out = 0;
else
- tcp_dec_pcount_explicit(&tp->sacked_out, acked-1);
+ tp->sacked_out -= acked-1;
}
tcp_check_reno_reordering(tp, acked);
tcp_sync_left_out(tp);
@@ -1536,8 +1526,8 @@ static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_sock *tp, int acke
static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
{
- tcp_set_pcount(&tp->sacked_out, 0);
- tcp_set_pcount(&tp->left_out, tcp_get_pcount(&tp->lost_out));
+ tp->sacked_out = 0;
+ tp->left_out = tp->lost_out;
}
/* Mark head of queue up as lost. */
@@ -1547,7 +1537,7 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
struct sk_buff *skb;
int cnt = packets;
- BUG_TRAP(cnt <= tcp_get_pcount(&tp->packets_out));
+ BUG_TRAP(cnt <= tp->packets_out);
sk_stream_for_retrans_queue(skb, sk) {
cnt -= tcp_skb_pcount(skb);
@@ -1555,7 +1545,7 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
break;
if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
- tcp_inc_pcount(&tp->lost_out, skb);
+ tp->lost_out += tcp_skb_pcount(skb);
}
}
tcp_sync_left_out(tp);
@@ -1566,7 +1556,7 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp,
static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp)
{
if (IsFack(tp)) {
- int lost = tcp_get_pcount(&tp->fackets_out) - tp->reordering;
+ int lost = tp->fackets_out - tp->reordering;
if (lost <= 0)
lost = 1;
tcp_mark_head_lost(sk, tp, lost, tp->high_seq);
@@ -1586,7 +1576,7 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp)
if (tcp_skb_timedout(tp, skb) &&
!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) {
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
- tcp_inc_pcount(&tp->lost_out, skb);
+ tp->lost_out += tcp_skb_pcount(skb);
}
}
tcp_sync_left_out(tp);
@@ -1651,9 +1641,9 @@ static void DBGUNDO(struct sock *sk, struct tcp_sock *tp, const char *msg)
printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n",
msg,
NIPQUAD(inet->daddr), ntohs(inet->dport),
- tp->snd_cwnd, tcp_get_pcount(&tp->left_out),
+ tp->snd_cwnd, tp->left_out,
tp->snd_ssthresh, tp->prior_ssthresh,
- tcp_get_pcount(&tp->packets_out));
+ tp->packets_out);
}
#else
#define DBGUNDO(x...) do { } while (0)
@@ -1724,13 +1714,13 @@ static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp,
int acked)
{
/* Partial ACK arrived. Force Hoe's retransmit. */
- int failed = IsReno(tp) || tcp_get_pcount(&tp->fackets_out)>tp->reordering;
+ int failed = IsReno(tp) || tp->fackets_out>tp->reordering;
if (tcp_may_undo(tp)) {
/* Plain luck! Hole if filled with delayed
* packet, rather than with a retransmit.
*/
- if (tcp_get_pcount(&tp->retrans_out) == 0)
+ if (tp->retrans_out == 0)
tp->retrans_stamp = 0;
tcp_update_reordering(tp, tcp_fackets_out(tp)+acked, 1);
@@ -1757,8 +1747,8 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp)
TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
}
DBGUNDO(sk, tp, "partial loss");
- tcp_set_pcount(&tp->lost_out, 0);
- tcp_set_pcount(&tp->left_out, tcp_get_pcount(&tp->sacked_out));
+ tp->lost_out = 0;
+ tp->left_out = tp->sacked_out;
tcp_undo_cwr(tp, 1);
NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO);
tp->retransmits = 0;
@@ -1781,9 +1771,9 @@ static inline void tcp_complete_cwr(struct tcp_sock *tp)
static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag)
{
- tcp_set_pcount(&tp->left_out, tcp_get_pcount(&tp->sacked_out));
+ tp->left_out = tp->sacked_out;
- if (tcp_get_pcount(&tp->retrans_out) == 0)
+ if (tp->retrans_out == 0)
tp->retrans_stamp = 0;
if (flag&FLAG_ECE)
@@ -1792,9 +1782,7 @@ static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag)
if (tp->ca_state != TCP_CA_CWR) {
int state = TCP_CA_Open;
- if (tcp_get_pcount(&tp->left_out) ||
- tcp_get_pcount(&tp->retrans_out) ||
- tp->undo_marker)
+ if (tp->left_out || tp->retrans_out || tp->undo_marker)
state = TCP_CA_Disorder;
if (tp->ca_state != state) {
@@ -1827,11 +1815,11 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
/* Some technical things:
* 1. Reno does not count dupacks (sacked_out) automatically. */
- if (!tcp_get_pcount(&tp->packets_out))
- tcp_set_pcount(&tp->sacked_out, 0);
+ if (!tp->packets_out)
+ tp->sacked_out = 0;
/* 2. SACK counts snd_fack in packets inaccurately. */
- if (tcp_get_pcount(&tp->sacked_out) == 0)
- tcp_set_pcount(&tp->fackets_out, 0);
+ if (tp->sacked_out == 0)
+ tp->fackets_out = 0;
/* Now state machine starts.
* A. ECE, hence prohibit cwnd undoing, the reduction is required. */
@@ -1839,15 +1827,15 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
tp->prior_ssthresh = 0;
/* B. In all the states check for reneging SACKs. */
- if (tcp_get_pcount(&tp->sacked_out) && tcp_check_sack_reneging(sk, tp))
+ if (tp->sacked_out && tcp_check_sack_reneging(sk, tp))
return;
/* C. Process data loss notification, provided it is valid. */
if ((flag&FLAG_DATA_LOST) &&
before(tp->snd_una, tp->high_seq) &&
tp->ca_state != TCP_CA_Open &&
- tcp_get_pcount(&tp->fackets_out) > tp->reordering) {
- tcp_mark_head_lost(sk, tp, tcp_get_pcount(&tp->fackets_out)-tp->reordering, tp->high_seq);
+ tp->fackets_out > tp->reordering) {
+ tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq);
NET_INC_STATS_BH(LINUX_MIB_TCPLOSS);
}
@@ -1858,7 +1846,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
* when high_seq is ACKed. */
if (tp->ca_state == TCP_CA_Open) {
if (!sysctl_tcp_frto)
- BUG_TRAP(tcp_get_pcount(&tp->retrans_out) == 0);
+ BUG_TRAP(tp->retrans_out == 0);
tp->retrans_stamp = 0;
} else if (!before(tp->snd_una, tp->high_seq)) {
switch (tp->ca_state) {
@@ -1905,8 +1893,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
if (IsReno(tp) && is_dupack)
tcp_add_reno_sack(tp);
} else {
- int acked = prior_packets -
- tcp_get_pcount(&tp->packets_out);
+ int acked = prior_packets - tp->packets_out;
if (IsReno(tp))
tcp_remove_reno_sacks(sk, tp, acked);
is_dupack = tcp_try_undo_partial(sk, tp, acked);
@@ -1949,7 +1936,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una,
tp->high_seq = tp->snd_nxt;
tp->prior_ssthresh = 0;
tp->undo_marker = tp->snd_una;
- tp->undo_retrans = tcp_get_pcount(&tp->retrans_out);
+ tp->undo_retrans = tp->retrans_out;
if (tp->ca_state < TCP_CA_CWR) {
if (!(flag&FLAG_ECE))
@@ -2349,7 +2336,7 @@ static inline void tcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 seq_rtt)
static inline void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp)
{
- if (!tcp_get_pcount(&tp->packets_out)) {
+ if (!tp->packets_out) {
tcp_clear_xmit_timer(sk, TCP_TIME_RETRANS);
} else {
tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto);
@@ -2391,18 +2378,15 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
if (sacked) {
if (sacked & TCPCB_RETRANS) {
if (sacked & TCPCB_SACKED_RETRANS)
- tcp_dec_pcount_explicit(&tp->retrans_out,
- packets_acked);
+ tp->retrans_out -= packets_acked;
acked |= FLAG_RETRANS_DATA_ACKED;
*seq_rtt = -1;
} else if (*seq_rtt < 0)
*seq_rtt = now - scb->when;
if (sacked & TCPCB_SACKED_ACKED)
- tcp_dec_pcount_explicit(&tp->sacked_out,
- packets_acked);
+ tp->sacked_out -= packets_acked;
if (sacked & TCPCB_LOST)
- tcp_dec_pcount_explicit(&tp->lost_out,
- packets_acked);
+ tp->lost_out -= packets_acked;
if (sacked & TCPCB_URG) {
if (tp->urg_mode &&
!before(seq, tp->snd_up))
@@ -2411,12 +2395,11 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb,
} else if (*seq_rtt < 0)
*seq_rtt = now - scb->when;
- if (tcp_get_pcount(&tp->fackets_out)) {
- __u32 dval = min(tcp_get_pcount(&tp->fackets_out),
- packets_acked);
- tcp_dec_pcount_explicit(&tp->fackets_out, dval);
+ if (tp->fackets_out) {
+ __u32 dval = min(tp->fackets_out, packets_acked);
+ tp->fackets_out -= dval;
}
- tcp_dec_pcount_explicit(&tp->packets_out, packets_acked);
+ tp->packets_out -= packets_acked;
BUG_ON(tcp_skb_pcount(skb) == 0);
BUG_ON(!before(scb->seq, scb->end_seq));
@@ -2468,15 +2451,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
if (sacked) {
if (sacked & TCPCB_RETRANS) {
if(sacked & TCPCB_SACKED_RETRANS)
- tcp_dec_pcount(&tp->retrans_out, skb);
+ tp->retrans_out -= tcp_skb_pcount(skb);
acked |= FLAG_RETRANS_DATA_ACKED;
seq_rtt = -1;
} else if (seq_rtt < 0)
seq_rtt = now - scb->when;
if (sacked & TCPCB_SACKED_ACKED)
- tcp_dec_pcount(&tp->sacked_out, skb);
+ tp->sacked_out -= tcp_skb_pcount(skb);
if (sacked & TCPCB_LOST)
- tcp_dec_pcount(&tp->lost_out, skb);
+ tp->lost_out -= tcp_skb_pcount(skb);
if (sacked & TCPCB_URG) {
if (tp->urg_mode &&
!before(scb->end_seq, tp->snd_up))
@@ -2496,27 +2479,24 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
}
#if FASTRETRANS_DEBUG > 0
- BUG_TRAP((int)tcp_get_pcount(&tp->sacked_out) >= 0);
- BUG_TRAP((int)tcp_get_pcount(&tp->lost_out) >= 0);
- BUG_TRAP((int)tcp_get_pcount(&tp->retrans_out) >= 0);
- if (!tcp_get_pcount(&tp->packets_out) && tp->sack_ok) {
- if (tcp_get_pcount(&tp->lost_out)) {
+ BUG_TRAP((int)tp->sacked_out >= 0);
+ BUG_TRAP((int)tp->lost_out >= 0);
+ BUG_TRAP((int)tp->retrans_out >= 0);
+ if (!tp->packets_out && tp->sack_ok) {
+ if (tp->lost_out) {
printk(KERN_DEBUG "Leak l=%u %d\n",
- tcp_get_pcount(&tp->lost_out),
- tp->ca_state);
- tcp_set_pcount(&tp->lost_out, 0);
+ tp->lost_out, tp->ca_state);
+ tp->lost_out = 0;
}
- if (tcp_get_pcount(&tp->sacked_out)) {
+ if (tp->sacked_out) {
printk(KERN_DEBUG "Leak s=%u %d\n",
- tcp_get_pcount(&tp->sacked_out),
- tp->ca_state);
- tcp_set_pcount(&tp->sacked_out, 0);
+ tp->sacked_out, tp->ca_state);
+ tp->sacked_out = 0;
}
- if (tcp_get_pcount(&tp->retrans_out)) {
+ if (tp->retrans_out) {
printk(KERN_DEBUG "Leak r=%u %d\n",
- tcp_get_pcount(&tp->retrans_out),
- tp->ca_state);
- tcp_set_pcount(&tp->retrans_out, 0);
+ tp->retrans_out, tp->ca_state);
+ tp->retrans_out = 0;
}
}
#endif
@@ -2943,7 +2923,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag)
*/
sk->sk_err_soft = 0;
tp->rcv_tstamp = tcp_time_stamp;
- prior_packets = tcp_get_pcount(&tp->packets_out);
+ prior_packets = tp->packets_out;
if (!prior_packets)
goto no_queue;
@@ -3964,7 +3944,7 @@ static void tcp_new_space(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (tcp_get_pcount(&tp->packets_out) < tp->snd_cwnd &&
+ if (tp->packets_out < tp->snd_cwnd &&
!(sk->sk_userlocks & SOCK_SNDBUF_LOCK) &&
!tcp_memory_pressure &&
atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) {
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 748224b44399..2876f505674d 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -754,11 +754,11 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req,
newtp->mdev = TCP_TIMEOUT_INIT;
newtp->rto = TCP_TIMEOUT_INIT;
- tcp_set_pcount(&newtp->packets_out, 0);
- tcp_set_pcount(&newtp->left_out, 0);
- tcp_set_pcount(&newtp->retrans_out, 0);
- tcp_set_pcount(&newtp->sacked_out, 0);
- tcp_set_pcount(&newtp->fackets_out, 0);
+ newtp->packets_out = 0;
+ newtp->left_out = 0;
+ newtp->retrans_out = 0;
+ newtp->sacked_out = 0;
+ newtp->fackets_out = 0;
newtp->snd_ssthresh = 0x7fffffff;
/* So many TCP implementations out there (incorrectly) count the
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7443293b862d..56947f62a198 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -129,8 +129,7 @@ static inline void tcp_event_data_sent(struct tcp_sock *tp,
{
u32 now = tcp_time_stamp;
- if (!tcp_get_pcount(&tp->packets_out) &&
- (s32)(now - tp->lsndtime) > tp->rto)
+ if (!tp->packets_out && (s32)(now - tp->lsndtime) > tp->rto)
tcp_cwnd_restart(tp, __sk_dst_get(sk));
tp->lsndtime = now;
@@ -509,8 +508,8 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when;
if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) {
- tcp_dec_pcount(&tp->lost_out, skb);
- tcp_dec_pcount(&tp->left_out, skb);
+ tp->lost_out -= tcp_skb_pcount(skb);
+ tp->left_out -= tcp_skb_pcount(skb);
}
/* Fix up tso_factor for both original and new SKB. */
@@ -518,13 +517,13 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len)
tcp_set_skb_tso_segs(buff, tp->mss_cache_std);
if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) {
- tcp_inc_pcount(&tp->lost_out, skb);
- tcp_inc_pcount(&tp->left_out, skb);
+ tp->lost_out += tcp_skb_pcount(skb);
+ tp->left_out += tcp_skb_pcount(skb);
}
if (TCP_SKB_CB(buff)->sacked&TCPCB_LOST) {
- tcp_inc_pcount(&tp->lost_out, buff);
- tcp_inc_pcount(&tp->left_out, buff);
+ tp->lost_out += tcp_skb_pcount(buff);
+ tp->left_out += tcp_skb_pcount(buff);
}
/* Link BUFF into the send queue. */
@@ -773,7 +772,7 @@ int tcp_write_xmit(struct sock *sk, int nonagle)
return 0;
}
- return !tcp_get_pcount(&tp->packets_out) && sk->sk_send_head;
+ return !tp->packets_out && sk->sk_send_head;
}
return 0;
}
@@ -945,15 +944,15 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m
*/
TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked&(TCPCB_EVER_RETRANS|TCPCB_AT_TAIL);
if (TCP_SKB_CB(next_skb)->sacked&TCPCB_SACKED_RETRANS)
- tcp_dec_pcount(&tp->retrans_out, next_skb);
+ tp->retrans_out -= tcp_skb_pcount(next_skb);
if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST) {
- tcp_dec_pcount(&tp->lost_out, next_skb);
- tcp_dec_pcount(&tp->left_out, next_skb);
+ tp->lost_out -= tcp_skb_pcount(next_skb);
+ tp->left_out -= tcp_skb_pcount(next_skb);
}
/* Reno case is special. Sigh... */
- if (!tp->sack_ok && tcp_get_pcount(&tp->sacked_out)) {
+ if (!tp->sack_ok && tp->sacked_out) {
tcp_dec_pcount_approx(&tp->sacked_out, next_skb);
- tcp_dec_pcount(&tp->left_out, next_skb);
+ tp->left_out -= tcp_skb_pcount(next_skb);
}
/* Not quite right: it can be > snd.fack, but
@@ -981,11 +980,11 @@ void tcp_simple_retransmit(struct sock *sk)
!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) {
if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) {
TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
- tcp_dec_pcount(&tp->retrans_out, skb);
+ tp->retrans_out -= tcp_skb_pcount(skb);
}
if (!(TCP_SKB_CB(skb)->sacked&TCPCB_LOST)) {
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
- tcp_inc_pcount(&tp->lost_out, skb);
+ tp->lost_out += tcp_skb_pcount(skb);
lost = 1;
}
}
@@ -1060,9 +1059,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
/* New SKB created, account for it. */
new_factor = tcp_skb_pcount(skb);
- tcp_dec_pcount_explicit(&tp->packets_out,
- old_factor - new_factor);
- tcp_inc_pcount(&tp->packets_out, skb->next);
+ tp->packets_out -= old_factor - new_factor;
+ tp->packets_out += tcp_skb_pcount(skb->next);
}
/* Collapse two adjacent packets if worthwhile and we can. */
@@ -1071,6 +1069,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
(skb->next != sk->sk_send_head) &&
(skb->next != (struct sk_buff *)&sk->sk_write_queue) &&
(skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(skb->next)->nr_frags == 0) &&
+ (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(skb->next) == 1) &&
(sysctl_tcp_retrans_collapse != 0))
tcp_retrans_try_collapse(sk, skb, cur_mss);
@@ -1115,7 +1114,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
}
#endif
TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS;
- tcp_inc_pcount(&tp->retrans_out, skb);
+ tp->retrans_out += tcp_skb_pcount(skb);
/* Save stamp of the first retransmit. */
if (!tp->retrans_stamp)
@@ -1143,7 +1142,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
- int packet_cnt = tcp_get_pcount(&tp->lost_out);
+ int packet_cnt = tp->lost_out;
/* First pass: retransmit lost packets. */
if (packet_cnt) {
@@ -1210,7 +1209,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
* real MSS sized packet because tcp_retransmit_skb()
* will fragment it if necessary.
*/
- if (++packet_cnt > tcp_get_pcount(&tp->fackets_out))
+ if (++packet_cnt > tp->fackets_out)
break;
if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
@@ -1496,7 +1495,7 @@ int tcp_connect(struct sock *sk)
tp->retrans_stamp = TCP_SKB_CB(buff)->when;
__skb_queue_tail(&sk->sk_write_queue, buff);
sk_charge_skb(sk, buff);
- tcp_inc_pcount(&tp->packets_out, buff);
+ tp->packets_out += tcp_skb_pcount(buff);
tcp_transmit_skb(sk, skb_clone(buff, GFP_KERNEL));
TCP_INC_STATS(TCP_MIB_ACTIVEOPENS);
@@ -1694,7 +1693,7 @@ void tcp_send_probe0(struct sock *sk)
err = tcp_write_wakeup(sk);
- if (tcp_get_pcount(&tp->packets_out) || !sk->sk_send_head) {
+ if (tp->packets_out || !sk->sk_send_head) {
/* Cancel probe timer, if it is not required. */
tp->probes_out = 0;
tp->backoff = 0;
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index a2799d1678af..c3751508ed24 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -123,7 +123,7 @@ static int tcp_out_of_resources(struct sock *sk, int do_reset)
* 1. Last segment was sent recently. */
if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN ||
/* 2. Window is closed. */
- (!tp->snd_wnd && !tcp_get_pcount(&tp->packets_out)))
+ (!tp->snd_wnd && !tp->packets_out))
do_reset = 1;
if (do_reset)
tcp_send_active_reset(sk, GFP_ATOMIC);
@@ -271,7 +271,7 @@ static void tcp_probe_timer(struct sock *sk)
struct tcp_sock *tp = tcp_sk(sk);
int max_probes;
- if (tcp_get_pcount(&tp->packets_out) || !sk->sk_send_head) {
+ if (tp->packets_out || !sk->sk_send_head) {
tp->probes_out = 0;
return;
}
@@ -318,7 +318,7 @@ static void tcp_retransmit_timer(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (!tcp_get_pcount(&tp->packets_out))
+ if (!tp->packets_out)
goto out;
BUG_TRAP(!skb_queue_empty(&sk->sk_write_queue));
@@ -608,7 +608,7 @@ static void tcp_keepalive_timer (unsigned long data)
elapsed = keepalive_time_when(tp);
/* It is alive without keepalive 8) */
- if (tcp_get_pcount(&tp->packets_out) || sk->sk_send_head)
+ if (tp->packets_out || sk->sk_send_head)
goto resched;
elapsed = tcp_time_stamp - tp->rcv_tstamp;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index fecf022809ae..f4eeb8629a0e 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1079,10 +1079,29 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)
if (dev->addr_len != ETH_ALEN)
return -1;
memcpy(eui, dev->dev_addr, 3);
- memcpy(eui + 5, dev->dev_addr+3, 3);
- eui[3] = 0xFF;
- eui[4] = 0xFE;
- eui[0] ^= 2;
+ memcpy(eui + 5, dev->dev_addr + 3, 3);
+
+ /*
+ * The zSeries OSA network cards can be shared among various
+ * OS instances, but the OSA cards have only one MAC address.
+ * This leads to duplicate address conflicts in conjunction
+ * with IPv6 if more than one instance uses the same card.
+ *
+ * The driver for these cards can deliver a unique 16-bit
+ * identifier for each instance sharing the same card. It is
+ * placed instead of 0xFFFE in the interface identifier. The
+ * "u" bit of the interface identifier is not inverted in this
+ * case. Hence the resulting interface identifier has local
+ * scope according to RFC2373.
+ */
+ if (dev->dev_id) {
+ eui[3] = (dev->dev_id >> 8) & 0xFF;
+ eui[4] = dev->dev_id & 0xFF;
+ } else {
+ eui[3] = 0xFF;
+ eui[4] = 0xFE;
+ eui[0] ^= 2;
+ }
return 0;
case ARPHRD_ARCNET:
/* XXX: inherit EUI-64 from other interface -- yoshfuji */
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index e9eca00e0e41..18637221a750 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -287,7 +287,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
int iif = 0;
int addr_type = 0;
int len;
- int hlimit = -1;
+ int hlimit;
int err = 0;
if ((u8*)hdr < skb->head || (u8*)(hdr+1) > skb->tail)
@@ -375,14 +375,12 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info,
if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
goto out_dst_release;
- if (hlimit < 0) {
- if (ipv6_addr_is_multicast(&fl.fl6_dst))
- hlimit = np->mcast_hops;
- else
- hlimit = np->hop_limit;
- if (hlimit < 0)
- hlimit = dst_metric(dst, RTAX_HOPLIMIT);
- }
+ if (ipv6_addr_is_multicast(&fl.fl6_dst))
+ hlimit = np->mcast_hops;
+ else
+ hlimit = np->hop_limit;
+ if (hlimit < 0)
+ hlimit = dst_metric(dst, RTAX_HOPLIMIT);
msg.skb = skb;
msg.offset = skb->nh.raw - skb->data;
@@ -433,7 +431,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
struct icmpv6_msg msg;
struct dst_entry *dst;
int err = 0;
- int hlimit = -1;
+ int hlimit;
saddr = &skb->nh.ipv6h->daddr;
@@ -463,14 +461,12 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
goto out_dst_release;
- if (hlimit < 0) {
- if (ipv6_addr_is_multicast(&fl.fl6_dst))
- hlimit = np->mcast_hops;
- else
- hlimit = np->hop_limit;
- if (hlimit < 0)
- hlimit = dst_metric(dst, RTAX_HOPLIMIT);
- }
+ if (ipv6_addr_is_multicast(&fl.fl6_dst))
+ hlimit = np->mcast_hops;
+ else
+ hlimit = np->hop_limit;
+ if (hlimit < 0)
+ hlimit = dst_metric(dst, RTAX_HOPLIMIT);
idev = in6_dev_get(skb->dev);
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 55110651a3f2..0b45f8da2950 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -169,12 +169,33 @@ struct ndisc_options {
#define NDISC_OPT_SPACE(len) (((len)+2+7)&~7)
-static u8 *ndisc_fill_option(u8 *opt, int type, void *data, int data_len)
+/*
+ * Return the padding between the option length and the start of the
+ * link addr. Currently only IP-over-InfiniBand needs this, although
+ * if RFC 3831 IPv6-over-Fibre Channel is ever implemented it may
+ * also need a pad of 2.
+ */
+static int ndisc_addr_option_pad(unsigned short type)
+{
+ switch (type) {
+ case ARPHRD_INFINIBAND: return 2;
+ default: return 0;
+ }
+}
+
+static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len,
+ unsigned short addr_type)
{
int space = NDISC_OPT_SPACE(data_len);
+ int pad = ndisc_addr_option_pad(addr_type);
opt[0] = type;
opt[1] = space>>3;
+
+ memset(opt + 2, 0, pad);
+ opt += pad;
+ space -= pad;
+
memcpy(opt+2, data, data_len);
data_len += 2;
opt += data_len;
@@ -453,7 +474,8 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
ipv6_addr_copy(&msg->target, solicited_addr);
if (inc_opt)
- ndisc_fill_option(msg->opt, ND_OPT_TARGET_LL_ADDR, dev->dev_addr, dev->addr_len);
+ ndisc_fill_addr_option(msg->opt, ND_OPT_TARGET_LL_ADDR, dev->dev_addr,
+ dev->addr_len, dev->type);
/* checksum */
msg->icmph.icmp6_cksum = csum_ipv6_magic(src_addr, daddr, len,
@@ -536,7 +558,8 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
ipv6_addr_copy(&msg->target, solicit);
if (send_llinfo)
- ndisc_fill_option(msg->opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr, dev->addr_len);
+ ndisc_fill_addr_option(msg->opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr,
+ dev->addr_len, dev->type);
/* checksum */
msg->icmph.icmp6_cksum = csum_ipv6_magic(&skb->nh.ipv6h->saddr,
@@ -610,7 +633,8 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
opt = (u8*) (hdr + 1);
if (dev->addr_len)
- ndisc_fill_option(opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr, dev->addr_len);
+ ndisc_fill_addr_option(opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr,
+ dev->addr_len, dev->type);
/* checksum */
hdr->icmp6_cksum = csum_ipv6_magic(&skb->nh.ipv6h->saddr, daddr, len,
@@ -717,7 +741,8 @@ static void ndisc_recv_ns(struct sk_buff *skb)
}
if (ndopts.nd_opts_src_lladdr) {
- lladdr = (u8*)(ndopts.nd_opts_src_lladdr + 1);
+ lladdr = (u8*)(ndopts.nd_opts_src_lladdr + 1) +
+ ndisc_addr_option_pad(dev->type);
lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len)) {
ND_PRINTK2(KERN_WARNING
@@ -874,7 +899,8 @@ static void ndisc_recv_na(struct sk_buff *skb)
return;
}
if (ndopts.nd_opts_tgt_lladdr) {
- lladdr = (u8*)(ndopts.nd_opts_tgt_lladdr + 1);
+ lladdr = (u8*)(ndopts.nd_opts_tgt_lladdr + 1) +
+ ndisc_addr_option_pad(dev->type);
lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len)) {
ND_PRINTK2(KERN_WARNING
@@ -903,6 +929,9 @@ static void ndisc_recv_na(struct sk_buff *skb)
if (neigh) {
u8 old_flags = neigh->flags;
+ if (neigh->nud_state & NUD_FAILED)
+ goto out;
+
neigh_update(neigh, lladdr,
msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
NEIGH_UPDATE_F_WEAK_OVERRIDE|
@@ -920,6 +949,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
ip6_del_rt(rt, NULL, NULL);
}
+out:
neigh_release(neigh);
}
}
@@ -964,7 +994,8 @@ static void ndisc_recv_rs(struct sk_buff *skb)
}
if (ndopts.nd_opts_src_lladdr) {
- lladdr = (u8 *)(ndopts.nd_opts_src_lladdr + 1);
+ lladdr = (u8 *)(ndopts.nd_opts_src_lladdr + 1) +
+ ndisc_addr_option_pad(skb->dev->type);
lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
if (lladdrlen != NDISC_OPT_SPACE(skb->dev->addr_len))
goto out;
@@ -985,7 +1016,7 @@ out:
static void ndisc_router_discovery(struct sk_buff *skb)
{
struct ra_msg *ra_msg = (struct ra_msg *) skb->h.raw;
- struct neighbour *neigh;
+ struct neighbour *neigh = NULL;
struct inet6_dev *in6_dev;
struct rt6_info *rt;
int lifetime;
@@ -1053,7 +1084,11 @@ static void ndisc_router_discovery(struct sk_buff *skb)
rt = rt6_get_dflt_router(&skb->nh.ipv6h->saddr, skb->dev);
+ if (rt)
+ neigh = rt->rt6i_nexthop;
+
if (rt && lifetime == 0) {
+ neigh_clone(neigh);
ip6_del_rt(rt, NULL, NULL);
rt = NULL;
}
@@ -1126,11 +1161,15 @@ static void ndisc_router_discovery(struct sk_buff *skb)
* Process options.
*/
- if (rt && (neigh = rt->rt6i_nexthop) != NULL) {
+ if (!neigh)
+ neigh = __neigh_lookup(&nd_tbl, &skb->nh.ipv6h->saddr,
+ skb->dev, 1);
+ if (neigh) {
u8 *lladdr = NULL;
int lladdrlen;
if (ndopts.nd_opts_src_lladdr) {
- lladdr = (u8*)((ndopts.nd_opts_src_lladdr)+1);
+ lladdr = (u8*)((ndopts.nd_opts_src_lladdr)+1) +
+ ndisc_addr_option_pad(skb->dev->type);
lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
if (lladdrlen != NDISC_OPT_SPACE(skb->dev->addr_len)) {
ND_PRINTK2(KERN_WARNING
@@ -1181,6 +1220,8 @@ static void ndisc_router_discovery(struct sk_buff *skb)
out:
if (rt)
dst_release(&rt->u.dst);
+ else if (neigh)
+ neigh_release(neigh);
in6_dev_put(in6_dev);
}
@@ -1250,7 +1291,8 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
return;
}
if (ndopts.nd_opts_tgt_lladdr) {
- lladdr = (u8*)(ndopts.nd_opts_tgt_lladdr + 1);
+ lladdr = (u8*)(ndopts.nd_opts_tgt_lladdr + 1) +
+ ndisc_addr_option_pad(skb->dev->type);
lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3;
if (lladdrlen != NDISC_OPT_SPACE(skb->dev->addr_len)) {
ND_PRINTK2(KERN_WARNING
@@ -1379,7 +1421,8 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
*/
if (dev->addr_len)
- opt = ndisc_fill_option(opt, ND_OPT_TARGET_LL_ADDR, neigh->ha, dev->addr_len);
+ opt = ndisc_fill_addr_option(opt, ND_OPT_TARGET_LL_ADDR, neigh->ha,
+ dev->addr_len, dev->type);
/*
* build redirect option and copy skb over to the new packet.
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 9e0a2e169f7b..be23939b8f8e 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -115,10 +115,10 @@ found:
static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb)
{
struct icmp6hdr *icmph;
- struct raw6_opt *opt = raw6_sk(sk);
+ struct raw6_sock *rp = raw6_sk(sk);
if (pskb_may_pull(skb, sizeof(struct icmp6hdr))) {
- __u32 *data = &opt->filter.data[0];
+ __u32 *data = &rp->filter.data[0];
int bit_nr;
icmph = (struct icmp6hdr *) skb->data;
@@ -315,14 +315,14 @@ static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb)
int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
{
struct inet_sock *inet = inet_sk(sk);
- struct raw6_opt *raw_opt = raw6_sk(sk);
+ struct raw6_sock *rp = raw6_sk(sk);
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
kfree_skb(skb);
return NET_RX_DROP;
}
- if (!raw_opt->checksum)
+ if (!rp->checksum)
skb->ip_summed = CHECKSUM_UNNECESSARY;
if (skb->ip_summed != CHECKSUM_UNNECESSARY) {
@@ -451,21 +451,22 @@ csum_copy_err:
goto out_free;
}
-static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct raw6_opt *opt, int len)
+static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
+ struct raw6_sock *rp, int len)
{
struct sk_buff *skb;
int err = 0;
u16 *csum;
u32 tmp_csum;
- if (!opt->checksum)
+ if (!rp->checksum)
goto send;
if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
goto out;
- if (opt->offset + 1 < len)
- csum = (u16 *)(skb->h.raw + opt->offset);
+ if (rp->offset + 1 < len)
+ csum = (u16 *)(skb->h.raw + rp->offset);
else {
err = -EINVAL;
goto out;
@@ -609,7 +610,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
struct in6_addr *daddr, *final_p = NULL, final;
struct inet_sock *inet = inet_sk(sk);
struct ipv6_pinfo *np = inet6_sk(sk);
- struct raw6_opt *raw_opt = raw6_sk(sk);
+ struct raw6_sock *rp = raw6_sk(sk);
struct ipv6_txoptions *opt = NULL;
struct ip6_flowlabel *flowlabel = NULL;
struct dst_entry *dst = NULL;
@@ -771,7 +772,7 @@ back_from_confirm:
if (err)
ip6_flush_pending_frames(sk);
else if (!(msg->msg_flags & MSG_MORE))
- err = rawv6_push_pending_frames(sk, &fl, raw_opt, len);
+ err = rawv6_push_pending_frames(sk, &fl, rp, len);
}
done:
ip6_dst_store(sk, dst,
@@ -838,7 +839,7 @@ static int rawv6_geticmpfilter(struct sock *sk, int level, int optname,
static int rawv6_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, int optlen)
{
- struct raw6_opt *opt = raw6_sk(sk);
+ struct raw6_sock *rp = raw6_sk(sk);
int val;
switch(level) {
@@ -868,10 +869,10 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname,
if (val > 0 && (val&1))
return(-EINVAL);
if (val < 0) {
- opt->checksum = 0;
+ rp->checksum = 0;
} else {
- opt->checksum = 1;
- opt->offset = val;
+ rp->checksum = 1;
+ rp->offset = val;
}
return 0;
@@ -885,7 +886,7 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname,
static int rawv6_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
- struct raw6_opt *opt = raw6_sk(sk);
+ struct raw6_sock *rp = raw6_sk(sk);
int val, len;
switch(level) {
@@ -910,10 +911,10 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname,
switch (optname) {
case IPV6_CHECKSUM:
- if (opt->checksum == 0)
+ if (rp->checksum == 0)
val = -1;
else
- val = opt->offset;
+ val = rp->offset;
break;
default:
@@ -966,9 +967,9 @@ static void rawv6_close(struct sock *sk, long timeout)
static int rawv6_init_sk(struct sock *sk)
{
if (inet_sk(sk)->num == IPPROTO_ICMPV6) {
- struct raw6_opt *opt = raw6_sk(sk);
- opt->checksum = 1;
- opt->offset = 2;
+ struct raw6_sock *rp = raw6_sk(sk);
+ rp->checksum = 1;
+ rp->offset = 2;
}
return(0);
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 316644b92cb3..06ba75fcfff4 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -167,6 +167,12 @@ static void ip6_dst_ifdown(struct dst_entry *dst, int how)
}
}
+static __inline__ int rt6_check_expired(const struct rt6_info *rt)
+{
+ return (rt->rt6i_flags & RTF_EXPIRES &&
+ time_after(jiffies, rt->rt6i_expires));
+}
+
/*
* Route lookup. Any rt6_lock is implied.
*/
@@ -237,8 +243,7 @@ static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
sprt->rt6i_dev->ifindex == oif))
m += 8;
- if ((sprt->rt6i_flags & RTF_EXPIRES) &&
- time_after(jiffies, sprt->rt6i_expires))
+ if (rt6_check_expired(sprt))
continue;
if (sprt == rt6_dflt_pointer)
@@ -296,7 +301,8 @@ static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
for (sprt = rt6_dflt_pointer->u.next;
sprt; sprt = sprt->u.next) {
if (sprt->u.dst.obsolete <= 0 &&
- sprt->u.dst.error == 0) {
+ sprt->u.dst.error == 0 &&
+ !rt6_check_expired(sprt)) {
match = sprt;
break;
}
@@ -305,7 +311,8 @@ static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
!match && sprt;
sprt = sprt->u.next) {
if (sprt->u.dst.obsolete <= 0 &&
- sprt->u.dst.error == 0) {
+ sprt->u.dst.error == 0 &&
+ !rt6_check_expired(sprt)) {
match = sprt;
break;
}
@@ -331,7 +338,8 @@ static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
*/
for (sprt = ip6_routing_table.leaf;
sprt; sprt = sprt->u.next) {
- if ((sprt->rt6i_flags & RTF_DEFAULT) &&
+ if (!rt6_check_expired(sprt) &&
+ (sprt->rt6i_flags & RTF_DEFAULT) &&
(!oif ||
(sprt->rt6i_dev &&
sprt->rt6i_dev->ifindex == oif))) {
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 5fbd19f82c11..344be7e9ea40 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -80,6 +80,8 @@ static struct proto_ops ipx_dgram_ops;
LIST_HEAD(ipx_interfaces);
DEFINE_SPINLOCK(ipx_interfaces_lock);
+static kmem_cache_t *ipx_sk_slab;
+
struct ipx_interface *ipx_primary_net;
struct ipx_interface *ipx_internal_net;
@@ -277,7 +279,7 @@ static struct sock *ipxitf_find_internal_socket(struct ipx_interface *intrfc,
spin_lock_bh(&intrfc->if_sklist_lock);
sk_for_each(s, node, &intrfc->if_sklist) {
- struct ipx_opt *ipxs = ipx_sk(s);
+ struct ipx_sock *ipxs = ipx_sk(s);
if (ipxs->port == port &&
!memcmp(ipx_node, ipxs->node, IPX_NODE_LEN))
@@ -302,7 +304,7 @@ static void __ipxitf_down(struct ipx_interface *intrfc)
spin_lock_bh(&intrfc->if_sklist_lock);
/* error sockets */
sk_for_each_safe(s, node, t, &intrfc->if_sklist) {
- struct ipx_opt *ipxs = ipx_sk(s);
+ struct ipx_sock *ipxs = ipx_sk(s);
s->sk_err = ENOLINK;
s->sk_error_report(s);
@@ -400,7 +402,7 @@ static int ipxitf_demux_socket(struct ipx_interface *intrfc,
spin_lock_bh(&intrfc->if_sklist_lock);
sk_for_each(s, node, &intrfc->if_sklist) {
- struct ipx_opt *ipxs = ipx_sk(s);
+ struct ipx_sock *ipxs = ipx_sk(s);
if (ipxs->port == ipx->ipx_dest.sock &&
(is_broadcast || !memcmp(ipx->ipx_dest.node,
@@ -1348,32 +1350,21 @@ out:
static int ipx_create(struct socket *sock, int protocol)
{
int rc = -ESOCKTNOSUPPORT;
- struct ipx_opt *ipx = NULL;
struct sock *sk;
- switch (sock->type) {
- case SOCK_DGRAM:
- sk = sk_alloc(PF_IPX, GFP_KERNEL, 1, NULL);
- rc = -ENOMEM;
- if (!sk)
- goto out;
- ipx = sk->sk_protinfo = kmalloc(sizeof(*ipx), GFP_KERNEL);
- if (!ipx)
- goto outsk;
- memset(ipx, 0, sizeof(*ipx));
- sock->ops = &ipx_dgram_ops;
- break;
- case SOCK_SEQPACKET:
- /*
- * SPX support is not anymore in the kernel sources. If
- * you want to ressurrect it, completing it and making
- * it understand shared skbs, be fully multithreaded,
- * etc, grab the sources in an early 2.5 kernel tree.
- */
- case SOCK_STREAM: /* Allow higher levels to piggyback */
- default:
+ /*
+ * SPX support is not anymore in the kernel sources. If you want to
+ * ressurrect it, completing it and making it understand shared skbs,
+ * be fully multithreaded, etc, grab the sources in an early 2.5 kernel
+ * tree.
+ */
+ if (sock->type != SOCK_DGRAM)
+ goto out;
+
+ sk = sk_alloc(PF_IPX, GFP_KERNEL, sizeof(struct ipx_sock), ipx_sk_slab);
+ rc = -ENOMEM;
+ if (!sk)
goto out;
- }
#ifdef IPX_REFCNT_DEBUG
atomic_inc(&ipx_sock_nr);
printk(KERN_DEBUG "IPX socket %p created, now we have %d alive\n", sk,
@@ -1382,12 +1373,10 @@ static int ipx_create(struct socket *sock, int protocol)
sock_init_data(sock, sk);
sk_set_owner(sk, THIS_MODULE);
sk->sk_no_check = 1; /* Checksum off by default */
+ sock->ops = &ipx_dgram_ops;
rc = 0;
out:
return rc;
-outsk:
- sk_free(sk);
- goto out;
}
static int ipx_release(struct socket *sock)
@@ -1433,7 +1422,7 @@ static unsigned short ipx_first_free_socketnum(struct ipx_interface *intrfc)
static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sock *sk = sock->sk;
- struct ipx_opt *ipxs = ipx_sk(sk);
+ struct ipx_sock *ipxs = ipx_sk(sk);
struct ipx_interface *intrfc;
struct sockaddr_ipx *addr = (struct sockaddr_ipx *)uaddr;
int rc = -EINVAL;
@@ -1529,7 +1518,7 @@ static int ipx_connect(struct socket *sock, struct sockaddr *uaddr,
int addr_len, int flags)
{
struct sock *sk = sock->sk;
- struct ipx_opt *ipxs = ipx_sk(sk);
+ struct ipx_sock *ipxs = ipx_sk(sk);
struct sockaddr_ipx *addr;
int rc = -EINVAL;
struct ipx_route *rt;
@@ -1593,7 +1582,7 @@ static int ipx_getname(struct socket *sock, struct sockaddr *uaddr,
struct ipx_address *addr;
struct sockaddr_ipx sipx;
struct sock *sk = sock->sk;
- struct ipx_opt *ipxs = ipx_sk(sk);
+ struct ipx_sock *ipxs = ipx_sk(sk);
int rc;
*uaddr_len = sizeof(struct sockaddr_ipx);
@@ -1693,7 +1682,7 @@ static int ipx_sendmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t len)
{
struct sock *sk = sock->sk;
- struct ipx_opt *ipxs = ipx_sk(sk);
+ struct ipx_sock *ipxs = ipx_sk(sk);
struct sockaddr_ipx *usipx = (struct sockaddr_ipx *)msg->msg_name;
struct sockaddr_ipx local_sipx;
int rc = -EINVAL;
@@ -1758,7 +1747,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *msg, size_t size, int flags)
{
struct sock *sk = sock->sk;
- struct ipx_opt *ipxs = ipx_sk(sk);
+ struct ipx_sock *ipxs = ipx_sk(sk);
struct sockaddr_ipx *sipx = (struct sockaddr_ipx *)msg->msg_name;
struct ipxhdr *ipx = NULL;
struct sk_buff *skb;
@@ -1965,6 +1954,13 @@ static char ipx_snap_err_msg[] __initdata =
static int __init ipx_init(void)
{
+ ipx_sk_slab = kmem_cache_create("ipx_sock",
+ sizeof(struct ipx_sock), 0,
+ SLAB_HWCACHE_ALIGN, NULL, NULL);
+
+ if (ipx_sk_slab == NULL)
+ return -ENOMEM;
+
sock_register(&ipx_family_ops);
pEII_datalink = make_EII_client();
@@ -2016,6 +2012,11 @@ static void __exit ipx_proto_finito(void)
destroy_EII_client(pEII_datalink);
pEII_datalink = NULL;
+ if (ipx_sk_slab != NULL) {
+ kmem_cache_destroy(ipx_sk_slab);
+ ipx_sk_slab = NULL;
+ }
+
sock_unregister(ipx_family_ops.family);
}
diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c
index 6b3cb469fc96..b6761913445a 100644
--- a/net/ipx/ipx_proc.c
+++ b/net/ipx/ipx_proc.c
@@ -202,7 +202,7 @@ static void *ipx_seq_socket_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct sock* sk, *next;
struct ipx_interface *i;
- struct ipx_opt *ipxs;
+ struct ipx_sock *ipxs;
++*pos;
if (v == SEQ_START_TOKEN) {
@@ -243,7 +243,7 @@ out:
static int ipx_seq_socket_show(struct seq_file *seq, void *v)
{
struct sock *s;
- struct ipx_opt *ipxs;
+ struct ipx_sock *ipxs;
if (v == SEQ_START_TOKEN) {
#ifdef CONFIG_IPX_INTERN
diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c
index c85e682f7f66..67774448efd9 100644
--- a/net/ipx/ipx_route.c
+++ b/net/ipx/ipx_route.c
@@ -172,7 +172,7 @@ int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx,
struct iovec *iov, size_t len, int noblock)
{
struct sk_buff *skb;
- struct ipx_opt *ipxs = ipx_sk(sk);
+ struct ipx_sock *ipxs = ipx_sk(sk);
struct ipx_interface *intrfc;
struct ipxhdr *ipx;
size_t size;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 74764e224c85..2dab5095ea89 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -629,7 +629,6 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long t
}
return 1;
}
- skb_orphan(skb);
skb_set_owner_r(skb, sk);
return 0;
}
@@ -661,21 +660,28 @@ void netlink_detachskb(struct sock *sk, struct sk_buff *skb)
sock_put(sk);
}
-static inline void netlink_trim(struct sk_buff *skb, int allocation)
+static inline struct sk_buff *netlink_trim(struct sk_buff *skb, int allocation)
{
- int delta = skb->end - skb->tail;
+ int delta;
- /* If the packet is charged to a socket, the modification
- * of truesize below is illegal and will corrupt socket
- * buffer accounting state.
- */
- BUG_ON(skb->list != NULL);
+ skb_orphan(skb);
+ delta = skb->end - skb->tail;
if (delta * 2 < skb->truesize)
- return;
- if (pskb_expand_head(skb, 0, -delta, allocation))
- return;
- skb->truesize -= delta;
+ return skb;
+
+ if (skb_shared(skb)) {
+ struct sk_buff *nskb = skb_clone(skb, allocation);
+ if (!nskb)
+ return skb;
+ kfree_skb(skb);
+ skb = nskb;
+ }
+
+ if (!pskb_expand_head(skb, 0, -delta, allocation))
+ skb->truesize -= delta;
+
+ return skb;
}
int netlink_unicast(struct sock *ssk, struct sk_buff *skb, u32 pid, int nonblock)
@@ -684,7 +690,7 @@ int netlink_unicast(struct sock *ssk, struct sk_buff *skb, u32 pid, int nonblock
int err;
long timeo;
- netlink_trim(skb, gfp_any());
+ skb = netlink_trim(skb, gfp_any());
timeo = sock_sndtimeo(ssk, nonblock);
retry:
@@ -707,14 +713,12 @@ static __inline__ int netlink_broadcast_deliver(struct sock *sk, struct sk_buff
struct netlink_opt *nlk = nlk_sk(sk);
#ifdef NL_EMULATE_DEV
if (nlk->handler) {
- skb_orphan(skb);
nlk->handler(sk->sk_protocol, skb);
return 0;
} else
#endif
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
!test_bit(0, &nlk->state)) {
- skb_orphan(skb);
skb_set_owner_r(skb, sk);
skb_queue_tail(&sk->sk_receive_queue, skb);
sk->sk_data_ready(sk, skb->len);
@@ -784,6 +788,8 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
struct hlist_node *node;
struct sock *sk;
+ skb = netlink_trim(skb, allocation);
+
info.exclude_sk = ssk;
info.pid = pid;
info.group = group;
@@ -794,8 +800,6 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
info.skb = skb;
info.skb2 = NULL;
- netlink_trim(skb, allocation);
-
/* While we sleep in clone, do not allow to change socket list */
netlink_lock_table();
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index a780c51defea..fe85d5588b46 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -239,8 +239,10 @@ replay:
* replay the request. We indicate this using
* -EAGAIN.
*/
- if (tp_ops != NULL)
+ if (tp_ops != NULL) {
+ module_put(tp_ops->owner);
err = -EAGAIN;
+ }
}
#endif
kfree(tp);
@@ -486,24 +488,26 @@ tcf_exts_validate(struct tcf_proto *tp, struct rtattr **tb,
memset(exts, 0, sizeof(*exts));
#ifdef CONFIG_NET_CLS_ACT
- int err;
- struct tc_action *act;
+ {
+ int err;
+ struct tc_action *act;
- if (map->police && tb[map->police-1]) {
- act = tcf_action_init_1(tb[map->police-1], rate_tlv, "police",
- TCA_ACT_NOREPLACE, TCA_ACT_BIND, &err);
- if (act == NULL)
- return err;
-
- act->type = TCA_OLD_COMPAT;
- exts->action = act;
- } else if (map->action && tb[map->action-1]) {
- act = tcf_action_init(tb[map->action-1], rate_tlv, NULL,
- TCA_ACT_NOREPLACE, TCA_ACT_BIND, &err);
- if (act == NULL)
- return err;
-
- exts->action = act;
+ if (map->police && tb[map->police-1]) {
+ act = tcf_action_init_1(tb[map->police-1], rate_tlv, "police",
+ TCA_ACT_NOREPLACE, TCA_ACT_BIND, &err);
+ if (act == NULL)
+ return err;
+
+ act->type = TCA_OLD_COMPAT;
+ exts->action = act;
+ } else if (map->action && tb[map->action-1]) {
+ act = tcf_action_init(tb[map->action-1], rate_tlv, NULL,
+ TCA_ACT_NOREPLACE, TCA_ACT_BIND, &err);
+ if (act == NULL)
+ return err;
+
+ exts->action = act;
+ }
}
#elif defined CONFIG_NET_CLS_POLICE
if (map->police && tb[map->police-1]) {
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index d057789645ce..02996ac05c75 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -125,20 +125,20 @@ static __inline__ int route4_hash_wild(void)
return 32;
}
-#define ROUTE4_APPLY_RESULT() \
- do { \
- *res = f->res; \
- if (tcf_exts_is_available(&f->exts)) { \
- int r = tcf_exts_exec(skb, &f->exts, res); \
- if (r < 0) { \
- dont_cache = 1; \
- continue; \
- } \
- return r; \
- } else if (!dont_cache) \
- route4_set_fastmap(head, id, iif, f); \
- return 0; \
- } while(0)
+#define ROUTE4_APPLY_RESULT() \
+{ \
+ *res = f->res; \
+ if (tcf_exts_is_available(&f->exts)) { \
+ int r = tcf_exts_exec(skb, &f->exts, res); \
+ if (r < 0) { \
+ dont_cache = 1; \
+ continue; \
+ } \
+ return r; \
+ } else if (!dont_cache) \
+ route4_set_fastmap(head, id, iif, f); \
+ return 0; \
+}
static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp,
struct tcf_result *res)
@@ -384,9 +384,9 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base,
id = *(u32*)RTA_DATA(tb[TCA_ROUTE4_IIF-1]);
if (id > 0x7FFF)
goto errout;
- nhandle = (id | 0x8000) << 16;
+ nhandle |= (id | 0x8000) << 16;
} else
- nhandle = 0xFFFF << 16;
+ nhandle |= 0xFFFF << 16;
if (handle && new) {
nhandle |= handle & 0x7F00;
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
index 7b74ad5ccd08..232fb9196810 100644
--- a/net/sched/cls_rsvp.h
+++ b/net/sched/cls_rsvp.h
@@ -123,14 +123,14 @@ static struct tcf_ext_map rsvp_ext_map = {
.action = TCA_RSVP_ACT
};
-#define RSVP_APPLY_RESULT() \
- do { \
- int r = tcf_exts_exec(skb, &f->exts, res); \
- if (r < 0) \
- continue; \
- else if (r > 0) \
- return r; \
- } while(0)
+#define RSVP_APPLY_RESULT() \
+{ \
+ int r = tcf_exts_exec(skb, &f->exts, res); \
+ if (r < 0) \
+ continue; \
+ else if (r > 0) \
+ return r; \
+}
static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
struct tcf_result *res)
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index b4c1c1f15dc2..d43e3b8cbf6a 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -241,7 +241,7 @@ cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
*/
static struct cbq_class *
-cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qres)
+cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
{
struct cbq_sched_data *q = qdisc_priv(sch);
struct cbq_class *head = &q->link;
@@ -255,13 +255,11 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qres)
*/
if (TC_H_MAJ(prio^sch->handle) == 0 &&
(cl = cbq_class_lookup(q, prio)) != NULL)
- return cl;
+ return cl;
+ *qerr = NET_XMIT_DROP;
for (;;) {
int result = 0;
-#ifdef CONFIG_NET_CLS_ACT
- int terminal = 0;
-#endif
defmap = head->defaults;
/*
@@ -282,27 +280,13 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qres)
#ifdef CONFIG_NET_CLS_ACT
switch (result) {
- case TC_ACT_SHOT: /* Stop and kfree */
- *qres = NET_XMIT_DROP;
- terminal = 1;
- break;
case TC_ACT_QUEUED:
case TC_ACT_STOLEN:
- terminal = 1;
- break;
- case TC_ACT_RECLASSIFY: /* Things look good */
- case TC_ACT_OK:
- case TC_ACT_UNSPEC:
- default:
- break;
- }
-
- if (terminal) {
- kfree_skb(skb);
+ *qerr = NET_XMIT_SUCCESS;
+ case TC_ACT_SHOT:
return NULL;
}
-#else
-#ifdef CONFIG_NET_CLS_POLICE
+#elif defined(CONFIG_NET_CLS_POLICE)
switch (result) {
case TC_POLICE_RECLASSIFY:
return cbq_reclassify(skb, cl);
@@ -312,7 +296,6 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qres)
break;
}
#endif
-#endif
if (cl->level == 0)
return cl;
@@ -423,45 +406,35 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct cbq_sched_data *q = qdisc_priv(sch);
int len = skb->len;
- int ret = NET_XMIT_SUCCESS;
- struct cbq_class *cl = cbq_classify(skb, sch,&ret);
+ int ret;
+ struct cbq_class *cl = cbq_classify(skb, sch, &ret);
#ifdef CONFIG_NET_CLS_POLICE
q->rx_class = cl;
#endif
- if (cl) {
-#ifdef CONFIG_NET_CLS_POLICE
- cl->q->__parent = sch;
-#endif
- if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
- sch->q.qlen++;
- sch->bstats.packets++;
- sch->bstats.bytes+=len;
- cbq_mark_toplevel(q, cl);
- if (!cl->next_alive)
- cbq_activate_class(cl);
- return ret;
- }
- }
-
-#ifndef CONFIG_NET_CLS_ACT
- sch->qstats.drops++;
- if (cl == NULL)
+ if (cl == NULL) {
+ if (ret == NET_XMIT_DROP)
+ sch->qstats.drops++;
kfree_skb(skb);
- else {
- cbq_mark_toplevel(q, cl);
- cl->qstats.drops++;
- }
-#else
- if ( NET_XMIT_DROP == ret) {
- sch->qstats.drops++;
+ return ret;
}
- if (cl != NULL) {
+#ifdef CONFIG_NET_CLS_POLICE
+ cl->q->__parent = sch;
+#endif
+ if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
+ sch->q.qlen++;
+ sch->bstats.packets++;
+ sch->bstats.bytes+=len;
cbq_mark_toplevel(q, cl);
- cl->qstats.drops++;
+ if (!cl->next_alive)
+ cbq_activate_class(cl);
+ return ret;
}
-#endif
+
+ sch->qstats.drops++;
+ cbq_mark_toplevel(q, cl);
+ cl->qstats.drops++;
return ret;
}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 8c01e023f02e..e9f7dba9a5a4 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -99,17 +99,11 @@ int qdisc_restart(struct net_device *dev)
if ((skb = q->dequeue(q)) != NULL) {
unsigned nolock = (dev->features & NETIF_F_LLTX);
/*
- * When the driver has LLTX set it does its own locking
- * in start_xmit. No need to add additional overhead by
- * locking again. These checks are worth it because
- * even uncongested locks can be quite expensive.
- * The driver can do trylock like here too, in case
- * of lock congestion it should return -1 and the packet
- * will be requeued.
+ * When the driver has LLTX set it does not require any
+ * locking in start_xmit.
*/
if (!nolock) {
- if (!spin_trylock(&dev->xmit_lock)) {
- collision:
+ if (!spin_trylock_irq(&dev->xmit_lock)) {
/* So, someone grabbed the driver. */
/* It may be transient configuration error,
@@ -143,22 +137,18 @@ int qdisc_restart(struct net_device *dev)
if (ret == NETDEV_TX_OK) {
if (!nolock) {
dev->xmit_lock_owner = -1;
- spin_unlock(&dev->xmit_lock);
+ spin_unlock_irq(&dev->xmit_lock);
}
spin_lock(&dev->queue_lock);
return -1;
}
- if (ret == NETDEV_TX_LOCKED && nolock) {
- spin_lock(&dev->queue_lock);
- goto collision;
- }
}
/* NETDEV_TX_BUSY - we need to requeue */
/* Release the driver */
if (!nolock) {
dev->xmit_lock_owner = -1;
- spin_unlock(&dev->xmit_lock);
+ spin_unlock_irq(&dev->xmit_lock);
}
spin_lock(&dev->queue_lock);
q = dev->qdisc;
@@ -186,7 +176,7 @@ static void dev_watchdog(unsigned long arg)
{
struct net_device *dev = (struct net_device *)arg;
- spin_lock(&dev->xmit_lock);
+ spin_lock_irq(&dev->xmit_lock);
if (dev->qdisc != &noop_qdisc) {
if (netif_device_present(dev) &&
netif_running(dev) &&
@@ -200,7 +190,7 @@ static void dev_watchdog(unsigned long arg)
dev_hold(dev);
}
}
- spin_unlock(&dev->xmit_lock);
+ spin_unlock_irq(&dev->xmit_lock);
dev_put(dev);
}
@@ -224,17 +214,17 @@ void __netdev_watchdog_up(struct net_device *dev)
static void dev_watchdog_up(struct net_device *dev)
{
- spin_lock_bh(&dev->xmit_lock);
+ spin_lock_irq(&dev->xmit_lock);
__netdev_watchdog_up(dev);
- spin_unlock_bh(&dev->xmit_lock);
+ spin_unlock_irq(&dev->xmit_lock);
}
static void dev_watchdog_down(struct net_device *dev)
{
- spin_lock_bh(&dev->xmit_lock);
+ spin_lock_irq(&dev->xmit_lock);
if (del_timer(&dev->watchdog_timer))
__dev_put(dev);
- spin_unlock_bh(&dev->xmit_lock);
+ spin_unlock_irq(&dev->xmit_lock);
}
/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index d09e0b0cb5f9..c26764bc4103 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1214,7 +1214,7 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg)
}
static struct hfsc_class *
-hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qres)
+hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
{
struct hfsc_sched *q = qdisc_priv(sch);
struct hfsc_class *cl;
@@ -1227,36 +1227,21 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qres)
if (cl->level == 0)
return cl;
+ *qerr = NET_XMIT_DROP;
tcf = q->root.filter_list;
while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
#ifdef CONFIG_NET_CLS_ACT
- int terminal = 0;
switch (result) {
- case TC_ACT_SHOT:
- *qres = NET_XMIT_DROP;
- terminal = 1;
- break;
case TC_ACT_QUEUED:
case TC_ACT_STOLEN:
- terminal = 1;
- break;
- case TC_ACT_RECLASSIFY:
- case TC_ACT_OK:
- case TC_ACT_UNSPEC:
- default:
- break;
- }
-
- if (terminal) {
- kfree_skb(skb);
+ *qerr = NET_XMIT_SUCCESS;
+ case TC_ACT_SHOT:
return NULL;
}
-#else
-#ifdef CONFIG_NET_CLS_POLICE
+#elif defined(CONFIG_NET_CLS_POLICE)
if (result == TC_POLICE_SHOT)
return NULL;
#endif
-#endif
if ((cl = (struct hfsc_class *)res.class) == NULL) {
if ((cl = hfsc_find_class(res.classid, sch)) == NULL)
break; /* filter selected invalid classid */
@@ -1652,27 +1637,19 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb)
static int
hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
- int ret = NET_XMIT_SUCCESS;
- struct hfsc_class *cl = hfsc_classify(skb, sch, &ret);
- unsigned int len = skb->len;
+ struct hfsc_class *cl;
+ unsigned int len;
int err;
-
-#ifdef CONFIG_NET_CLS_ACT
+ cl = hfsc_classify(skb, sch, &err);
if (cl == NULL) {
- if (NET_XMIT_DROP == ret) {
+ if (err == NET_XMIT_DROP)
sch->qstats.drops++;
- }
- return ret;
- }
-#else
- if (cl == NULL) {
kfree_skb(skb);
- sch->qstats.drops++;
- return NET_XMIT_DROP;
+ return err;
}
-#endif
+ len = skb->len;
err = cl->qdisc->enqueue(skb, cl->qdisc);
if (unlikely(err != NET_XMIT_SUCCESS)) {
cl->qstats.drops++;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 116028554ae4..a85935e7d53d 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -305,7 +305,7 @@ static inline u32 htb_classid(struct htb_class *cl)
return (cl && cl != HTB_DIRECT) ? cl->classid : TC_H_UNSPEC;
}
-static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, int *qres)
+static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
{
struct htb_sched *q = qdisc_priv(sch);
struct htb_class *cl;
@@ -321,35 +321,20 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, in
if ((cl = htb_find(skb->priority,sch)) != NULL && cl->level == 0)
return cl;
+ *qerr = NET_XMIT_DROP;
tcf = q->filter_list;
while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) {
#ifdef CONFIG_NET_CLS_ACT
- int terminal = 0;
switch (result) {
- case TC_ACT_SHOT: /* Stop and kfree */
- *qres = NET_XMIT_DROP;
- terminal = 1;
- break;
case TC_ACT_QUEUED:
case TC_ACT_STOLEN:
- terminal = 1;
- break;
- case TC_ACT_RECLASSIFY: /* Things look good */
- case TC_ACT_OK:
- case TC_ACT_UNSPEC:
- default:
- break;
- }
-
- if (terminal) {
- kfree_skb(skb);
+ *qerr = NET_XMIT_SUCCESS;
+ case TC_ACT_SHOT:
return NULL;
}
-#else
-#ifdef CONFIG_NET_CLS_POLICE
+#elif defined(CONFIG_NET_CLS_POLICE)
if (result == TC_POLICE_SHOT)
- return NULL;
-#endif
+ return HTB_DIRECT;
#endif
if ((cl = (void*)res.class) == NULL) {
if (res.classid == sch->handle)
@@ -723,37 +708,24 @@ htb_deactivate(struct htb_sched *q,struct htb_class *cl)
static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
- int ret = NET_XMIT_SUCCESS;
+ int ret;
struct htb_sched *q = qdisc_priv(sch);
struct htb_class *cl = htb_classify(skb,sch,&ret);
-
-#ifdef CONFIG_NET_CLS_ACT
- if (cl == HTB_DIRECT ) {
- if (q->direct_queue.qlen < q->direct_qlen ) {
- __skb_queue_tail(&q->direct_queue, skb);
- q->direct_pkts++;
- }
- } else if (!cl) {
- if (NET_XMIT_DROP == ret) {
- sch->qstats.drops++;
- }
- return ret;
- }
-#else
- if (cl == HTB_DIRECT || !cl) {
+ if (cl == HTB_DIRECT) {
/* enqueue to helper queue */
- if (q->direct_queue.qlen < q->direct_qlen && cl) {
+ if (q->direct_queue.qlen < q->direct_qlen) {
__skb_queue_tail(&q->direct_queue, skb);
q->direct_pkts++;
- } else {
- kfree_skb (skb);
- sch->qstats.drops++;
- return NET_XMIT_DROP;
}
- }
+#ifdef CONFIG_NET_CLS_ACT
+ } else if (!cl) {
+ if (ret == NET_XMIT_DROP)
+ sch->qstats.drops++;
+ kfree_skb (skb);
+ return ret;
#endif
- else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
+ } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) {
sch->qstats.drops++;
cl->qstats.drops++;
return NET_XMIT_DROP;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 2f06270cad4b..3ac0f495bad0 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -47,37 +47,23 @@ struct prio_sched_data
};
-static struct Qdisc *prio_classify(struct sk_buff *skb,
- struct Qdisc *sch, int *r)
+static struct Qdisc *
+prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
{
struct prio_sched_data *q = qdisc_priv(sch);
u32 band = skb->priority;
struct tcf_result res;
+ *qerr = NET_XMIT_DROP;
if (TC_H_MAJ(skb->priority) != sch->handle) {
#ifdef CONFIG_NET_CLS_ACT
- int result = 0, terminal = 0;
- result = tc_classify(skb, q->filter_list, &res);
-
- switch (result) {
- case TC_ACT_SHOT:
- *r = NET_XMIT_DROP;
- terminal = 1;
- break;
- case TC_ACT_STOLEN:
- case TC_ACT_QUEUED:
- terminal = 1;
- break;
- case TC_ACT_RECLASSIFY:
- case TC_ACT_OK:
- case TC_ACT_UNSPEC:
- default:
- break;
- };
- if (terminal) {
- kfree_skb(skb);
+ switch (tc_classify(skb, q->filter_list, &res)) {
+ case TC_ACT_STOLEN:
+ case TC_ACT_QUEUED:
+ *qerr = NET_XMIT_SUCCESS;
+ case TC_ACT_SHOT:
return NULL;
- }
+ };
if (!q->filter_list ) {
#else
@@ -97,15 +83,20 @@ static struct Qdisc *prio_classify(struct sk_buff *skb,
}
static int
-prio_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+prio_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{
struct Qdisc *qdisc;
- int ret = NET_XMIT_SUCCESS;
+ int ret;
qdisc = prio_classify(skb, sch, &ret);
-
- if (NULL == qdisc)
- goto dropped;
+#ifdef CONFIG_NET_CLS_ACT
+ if (qdisc == NULL) {
+ if (ret == NET_XMIT_DROP)
+ sch->qstats.drops++;
+ kfree_skb(skb);
+ return ret;
+ }
+#endif
if ((ret = qdisc->enqueue(skb, qdisc)) == NET_XMIT_SUCCESS) {
sch->bstats.bytes += skb->len;
@@ -113,17 +104,7 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc* sch)
sch->q.qlen++;
return NET_XMIT_SUCCESS;
}
-
-dropped:
-#ifdef CONFIG_NET_CLS_ACT
- if (NET_XMIT_DROP == ret) {
-#endif
- sch->qstats.drops++;
-#ifdef CONFIG_NET_CLS_ACT
- } else {
- sch->qstats.overlimits++; /* abuse, but noone uses it */
- }
-#endif
+ sch->qstats.drops++;
return ret;
}
@@ -132,18 +113,23 @@ static int
prio_requeue(struct sk_buff *skb, struct Qdisc* sch)
{
struct Qdisc *qdisc;
- int ret = NET_XMIT_DROP;
+ int ret;
qdisc = prio_classify(skb, sch, &ret);
- if (qdisc == NULL)
- goto dropped;
+#ifdef CONFIG_NET_CLS_ACT
+ if (qdisc == NULL) {
+ if (ret == NET_XMIT_DROP)
+ sch->qstats.drops++;
+ kfree_skb(skb);
+ return ret;
+ }
+#endif
- if ((ret = qdisc->ops->requeue(skb, qdisc)) == 0) {
+ if ((ret = qdisc->ops->requeue(skb, qdisc)) == NET_XMIT_SUCCESS) {
sch->q.qlen++;
sch->qstats.requeues++;
return 0;
}
-dropped:
sch->qstats.drops++;
return NET_XMIT_DROP;
}
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 6cf0342706b5..b88cbf028397 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -301,12 +301,12 @@ restart:
switch (teql_resolve(skb, skb_res, slave)) {
case 0:
- if (spin_trylock(&slave->xmit_lock)) {
+ if (spin_trylock_irq(&slave->xmit_lock)) {
slave->xmit_lock_owner = smp_processor_id();
if (!netif_queue_stopped(slave) &&
slave->hard_start_xmit(skb, slave) == 0) {
slave->xmit_lock_owner = -1;
- spin_unlock(&slave->xmit_lock);
+ spin_unlock_irq(&slave->xmit_lock);
master->slaves = NEXT_SLAVE(q);
netif_wake_queue(dev);
master->stats.tx_packets++;
@@ -314,7 +314,7 @@ restart:
return 0;
}
slave->xmit_lock_owner = -1;
- spin_unlock(&slave->xmit_lock);
+ spin_unlock_irq(&slave->xmit_lock);
}
if (netif_queue_stopped(dev))
busy = 1;
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index fda3bc435c7c..663843d97a92 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -73,7 +73,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a
sctp_scope_t scope,
int gfp)
{
- struct sctp_opt *sp;
+ struct sctp_sock *sp;
int i;
/* Retrieve the SCTP per socket area. */
@@ -434,7 +434,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
int gfp)
{
struct sctp_transport *peer;
- struct sctp_opt *sp;
+ struct sctp_sock *sp;
unsigned short port;
sp = sctp_sk(asoc->base.sk);
@@ -886,7 +886,7 @@ static void sctp_assoc_bh_rcv(struct sctp_association *asoc)
/* This routine moves an association from its old sk to a new sk. */
void sctp_assoc_migrate(struct sctp_association *assoc, struct sock *newsk)
{
- struct sctp_opt *newsp = sctp_sk(newsk);
+ struct sctp_sock *newsp = sctp_sk(newsk);
struct sock *oldsk = assoc->base.sk;
/* Delete the association from the old endpoint's list of
@@ -1059,7 +1059,7 @@ void sctp_assoc_sync_pmtu(struct sctp_association *asoc)
}
if (pmtu) {
- struct sctp_opt *sp = sctp_sk(asoc->base.sk);
+ struct sctp_sock *sp = sctp_sk(asoc->base.sk);
asoc->pmtu = pmtu;
asoc->frag_point = sctp_frag_point(sp, pmtu);
}
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index ec2d7450fb18..f90eadfb60a2 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -293,7 +293,7 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list,
/* Does this contain a specified address? Allow wildcarding. */
int sctp_bind_addr_match(struct sctp_bind_addr *bp,
const union sctp_addr *addr,
- struct sctp_opt *opt)
+ struct sctp_sock *opt)
{
struct sctp_sockaddr_entry *laddr;
struct list_head *pos;
@@ -313,7 +313,7 @@ int sctp_bind_addr_match(struct sctp_bind_addr *bp,
union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp,
const union sctp_addr *addrs,
int addrcnt,
- struct sctp_opt *opt)
+ struct sctp_sock *opt)
{
struct sctp_sockaddr_entry *laddr;
union sctp_addr *addr;
diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c
index 6e500f8ca47f..0c2ab7885058 100644
--- a/net/sctp/chunk.c
+++ b/net/sctp/chunk.c
@@ -77,7 +77,7 @@ static void sctp_datamsg_destroy(struct sctp_datamsg *msg)
{
struct list_head *pos, *temp;
struct sctp_chunk *chunk;
- struct sctp_opt *sp;
+ struct sctp_sock *sp;
struct sctp_ulpevent *ev;
struct sctp_association *asoc = NULL;
int error = 0, notify;
diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c
index cfeb033841dc..b4631b3001a3 100644
--- a/net/sctp/endpointola.c
+++ b/net/sctp/endpointola.c
@@ -69,7 +69,7 @@ static void sctp_endpoint_bh_rcv(struct sctp_endpoint *ep);
static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep,
struct sock *sk, int gfp)
{
- struct sctp_opt *sp = sctp_sk(sk);
+ struct sctp_sock *sp = sctp_sk(sk);
memset(ep, 0, sizeof(struct sctp_endpoint));
/* Initialize the base structure. */
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index f23de6bd5b52..efe44d19d2c4 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -502,7 +502,7 @@ static int sctp_v6_is_any(const union sctp_addr *addr)
}
/* Should this be available for binding? */
-static int sctp_v6_available(union sctp_addr *addr, struct sctp_opt *sp)
+static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp)
{
int type;
struct in6_addr *in6 = (struct in6_addr *)&addr->v6.sin6_addr;
@@ -531,14 +531,14 @@ static int sctp_v6_available(union sctp_addr *addr, struct sctp_opt *sp)
* Return 0 - If the address is a non-unicast or an illegal address.
* Return 1 - If the address is a unicast.
*/
-static int sctp_v6_addr_valid(union sctp_addr *addr, struct sctp_opt *sp)
+static int sctp_v6_addr_valid(union sctp_addr *addr, struct sctp_sock *sp)
{
int ret = ipv6_addr_type(&addr->v6.sin6_addr);
/* Support v4-mapped-v6 address. */
if (ret == IPV6_ADDR_MAPPED) {
/* Note: This routine is used in input, so v4-mapped-v6
- * are disallowed here when there is no sctp_opt.
+ * are disallowed here when there is no sctp_sock.
*/
if (!sp || !sp->v4mapped)
return 0;
@@ -616,7 +616,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
newsk->sk_shutdown = sk->sk_shutdown;
newsctp6sk = (struct sctp6_sock *)newsk;
- newsctp6sk->inet.pinet6 = &newsctp6sk->inet6;
+ inet_sk(newsk)->pinet6 = &newsctp6sk->inet6;
newinet = inet_sk(newsk);
newnp = inet6_sk(newsk);
@@ -661,7 +661,7 @@ out:
}
/* Map v4 address to mapped v6 address */
-static void sctp_v6_addr_v4map(struct sctp_opt *sp, union sctp_addr *addr)
+static void sctp_v6_addr_v4map(struct sctp_sock *sp, union sctp_addr *addr)
{
if (sp->v4mapped && AF_INET == addr->sa.sa_family)
sctp_v4_map_v6(addr);
@@ -766,7 +766,7 @@ static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname,
}
/* Do we support this AF? */
-static int sctp_inet6_af_supported(sa_family_t family, struct sctp_opt *sp)
+static int sctp_inet6_af_supported(sa_family_t family, struct sctp_sock *sp)
{
switch (family) {
case AF_INET6:
@@ -786,7 +786,7 @@ static int sctp_inet6_af_supported(sa_family_t family, struct sctp_opt *sp)
*/
static int sctp_inet6_cmp_addr(const union sctp_addr *addr1,
const union sctp_addr *addr2,
- struct sctp_opt *opt)
+ struct sctp_sock *opt)
{
struct sctp_af *af1, *af2;
@@ -808,7 +808,7 @@ static int sctp_inet6_cmp_addr(const union sctp_addr *addr1,
/* Verify that the provided sockaddr looks bindable. Common verification,
* has already been taken care of.
*/
-static int sctp_inet6_bind_verify(struct sctp_opt *opt, union sctp_addr *addr)
+static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr)
{
struct sctp_af *af;
@@ -838,7 +838,7 @@ static int sctp_inet6_bind_verify(struct sctp_opt *opt, union sctp_addr *addr)
/* Verify that the provided sockaddr looks bindable. Common verification,
* has already been taken care of.
*/
-static int sctp_inet6_send_verify(struct sctp_opt *opt, union sctp_addr *addr)
+static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr)
{
struct sctp_af *af = NULL;
@@ -872,7 +872,7 @@ static int sctp_inet6_send_verify(struct sctp_opt *opt, union sctp_addr *addr)
* addresses.
* Returns number of addresses supported.
*/
-static int sctp_inet6_supported_addrs(const struct sctp_opt *opt,
+static int sctp_inet6_supported_addrs(const struct sctp_sock *opt,
__u16 *types)
{
types[0] = SCTP_PARAM_IPV4_ADDRESS;
diff --git a/net/sctp/output.c b/net/sctp/output.c
index 14009b01c433..9013f64f5219 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -110,7 +110,7 @@ struct sctp_packet *sctp_packet_init(struct sctp_packet *packet,
packet->destination_port = dport;
skb_queue_head_init(&packet->chunks);
if (asoc) {
- struct sctp_opt *sp = sctp_sk(asoc->base.sk);
+ struct sctp_sock *sp = sctp_sk(asoc->base.sk);
overhead = sp->pf->af->net_header_len;
} else {
overhead = sizeof(struct ipv6hdr);
@@ -534,7 +534,7 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet,
struct sctp_transport *transport = packet->transport;
__u32 max_burst_bytes;
struct sctp_association *asoc = transport->asoc;
- struct sctp_opt *sp = sctp_sk(asoc->base.sk);
+ struct sctp_sock *sp = sctp_sk(asoc->base.sk);
struct sctp_outq *q = &asoc->outqueue;
/* RFC 2960 6.1 Transmission of DATA Chunks
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 817f14f97a4f..c1ee92a662b7 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -364,7 +364,7 @@ static int sctp_v4_is_any(const union sctp_addr *addr)
* Return 0 - If the address is a non-unicast or an illegal address.
* Return 1 - If the address is a unicast.
*/
-static int sctp_v4_addr_valid(union sctp_addr *addr, struct sctp_opt *sp)
+static int sctp_v4_addr_valid(union sctp_addr *addr, struct sctp_sock *sp)
{
/* Is this a non-unicast address or a unusable SCTP address? */
if (IS_IPV4_UNUSABLE_ADDRESS(&addr->v4.sin_addr.s_addr))
@@ -374,7 +374,7 @@ static int sctp_v4_addr_valid(union sctp_addr *addr, struct sctp_opt *sp)
}
/* Should this be available for binding? */
-static int sctp_v4_available(union sctp_addr *addr, struct sctp_opt *sp)
+static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp)
{
int ret = inet_addr_type(addr->v4.sin_addr.s_addr);
@@ -608,7 +608,7 @@ out:
}
/* Map address, empty for v4 family */
-static void sctp_v4_addr_v4map(struct sctp_opt *sp, union sctp_addr *addr)
+static void sctp_v4_addr_v4map(struct sctp_sock *sp, union sctp_addr *addr)
{
/* Empty */
}
@@ -745,7 +745,7 @@ static void sctp_inet_skb_msgname(struct sk_buff *skb, char *msgname, int *len)
}
/* Do we support this AF? */
-static int sctp_inet_af_supported(sa_family_t family, struct sctp_opt *sp)
+static int sctp_inet_af_supported(sa_family_t family, struct sctp_sock *sp)
{
/* PF_INET only supports AF_INET addresses. */
return (AF_INET == family);
@@ -754,7 +754,7 @@ static int sctp_inet_af_supported(sa_family_t family, struct sctp_opt *sp)
/* Address matching with wildcards allowed. */
static int sctp_inet_cmp_addr(const union sctp_addr *addr1,
const union sctp_addr *addr2,
- struct sctp_opt *opt)
+ struct sctp_sock *opt)
{
/* PF_INET only supports AF_INET addresses. */
if (addr1->sa.sa_family != addr2->sa.sa_family)
@@ -771,7 +771,7 @@ static int sctp_inet_cmp_addr(const union sctp_addr *addr1,
/* Verify that provided sockaddr looks bindable. Common verification has
* already been taken care of.
*/
-static int sctp_inet_bind_verify(struct sctp_opt *opt, union sctp_addr *addr)
+static int sctp_inet_bind_verify(struct sctp_sock *opt, union sctp_addr *addr)
{
return sctp_v4_available(addr, opt);
}
@@ -779,7 +779,7 @@ static int sctp_inet_bind_verify(struct sctp_opt *opt, union sctp_addr *addr)
/* Verify that sockaddr looks sendable. Common verification has already
* been taken care of.
*/
-static int sctp_inet_send_verify(struct sctp_opt *opt, union sctp_addr *addr)
+static int sctp_inet_send_verify(struct sctp_sock *opt, union sctp_addr *addr)
{
return 1;
}
@@ -787,7 +787,7 @@ static int sctp_inet_send_verify(struct sctp_opt *opt, union sctp_addr *addr)
/* Fill in Supported Address Type information for INIT and INIT-ACK
* chunks. Returns number of addresses supported.
*/
-static int sctp_inet_supported_addrs(const struct sctp_opt *opt,
+static int sctp_inet_supported_addrs(const struct sctp_sock *opt,
__u16 *types)
{
types[0] = SCTP_PARAM_IPV4_ADDRESS;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 7a0c6fa4c33c..1db12cc18cf7 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -181,7 +181,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc,
size_t chunksize;
struct sctp_chunk *retval = NULL;
int num_types, addrs_len = 0;
- struct sctp_opt *sp;
+ struct sctp_sock *sp;
sctp_supported_addrs_param_t sat;
__u16 types[2];
sctp_adaption_ind_param_t aiparam;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 30b921b90d00..6f66ee490784 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -93,7 +93,7 @@ static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p);
static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p);
static int sctp_wait_for_accept(struct sock *sk, long timeo);
static void sctp_wait_for_close(struct sock *sk, long timeo);
-static struct sctp_af *sctp_sockaddr_af(struct sctp_opt *opt,
+static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt,
union sctp_addr *addr, int len);
static int sctp_bindx_add(struct sock *, struct sockaddr *, int);
static int sctp_bindx_rem(struct sock *, struct sockaddr *, int);
@@ -269,7 +269,7 @@ SCTP_STATIC int sctp_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
static long sctp_get_port_local(struct sock *, union sctp_addr *);
/* Verify this is a valid sockaddr. */
-static struct sctp_af *sctp_sockaddr_af(struct sctp_opt *opt,
+static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt,
union sctp_addr *addr, int len)
{
struct sctp_af *af;
@@ -294,7 +294,7 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_opt *opt,
/* Bind a local address either to an endpoint or to an association. */
SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
{
- struct sctp_opt *sp = sctp_sk(sk);
+ struct sctp_sock *sp = sctp_sk(sk);
struct sctp_endpoint *ep = sp->ep;
struct sctp_bind_addr *bp = &ep->base.bind_addr;
struct sctp_af *af;
@@ -467,7 +467,7 @@ static int sctp_send_asconf_add_ip(struct sock *sk,
struct sockaddr *addrs,
int addrcnt)
{
- struct sctp_opt *sp;
+ struct sctp_sock *sp;
struct sctp_endpoint *ep;
struct sctp_association *asoc;
struct sctp_bind_addr *bp;
@@ -572,7 +572,7 @@ out:
*/
int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt)
{
- struct sctp_opt *sp = sctp_sk(sk);
+ struct sctp_sock *sp = sctp_sk(sk);
struct sctp_endpoint *ep = sp->ep;
int cnt;
struct sctp_bind_addr *bp = &ep->base.bind_addr;
@@ -656,7 +656,7 @@ static int sctp_send_asconf_del_ip(struct sock *sk,
struct sockaddr *addrs,
int addrcnt)
{
- struct sctp_opt *sp;
+ struct sctp_sock *sp;
struct sctp_endpoint *ep;
struct sctp_association *asoc;
struct sctp_bind_addr *bp;
@@ -1051,7 +1051,7 @@ SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *);
SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
struct msghdr *msg, size_t msg_len)
{
- struct sctp_opt *sp;
+ struct sctp_sock *sp;
struct sctp_endpoint *ep;
struct sctp_association *new_asoc=NULL, *asoc=NULL;
struct sctp_transport *transport, *chunk_tp;
@@ -1492,7 +1492,7 @@ SCTP_STATIC int sctp_recvmsg(struct kiocb *iocb, struct sock *sk,
int flags, int *addr_len)
{
struct sctp_ulpevent *event = NULL;
- struct sctp_opt *sp = sctp_sk(sk);
+ struct sctp_sock *sp = sctp_sk(sk);
struct sk_buff *skb;
int copied;
int err = 0;
@@ -1637,7 +1637,7 @@ static int sctp_setsockopt_events(struct sock *sk, char __user *optval,
static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval,
int optlen)
{
- struct sctp_opt *sp = sctp_sk(sk);
+ struct sctp_sock *sp = sctp_sk(sk);
/* Applicable to UDP-style socket only */
if (sctp_style(sk, TCP))
@@ -1779,7 +1779,7 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk,
static int sctp_setsockopt_initmsg(struct sock *sk, char __user *optval, int optlen)
{
struct sctp_initmsg sinit;
- struct sctp_opt *sp = sctp_sk(sk);
+ struct sctp_sock *sp = sctp_sk(sk);
if (optlen != sizeof(struct sctp_initmsg))
return -EINVAL;
@@ -1817,7 +1817,7 @@ static int sctp_setsockopt_default_send_param(struct sock *sk,
{
struct sctp_sndrcvinfo info;
struct sctp_association *asoc;
- struct sctp_opt *sp = sctp_sk(sk);
+ struct sctp_sock *sp = sctp_sk(sk);
if (optlen != sizeof(struct sctp_sndrcvinfo))
return -EINVAL;
@@ -1934,7 +1934,7 @@ static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, int opt
/* If there is no association or the association-id = 0
* set the values to the endpoint.
*/
- struct sctp_opt *sp = sctp_sk(sk);
+ struct sctp_sock *sp = sctp_sk(sk);
if (rtoinfo.srto_initial != 0)
sp->rtoinfo.srto_initial = rtoinfo.srto_initial;
@@ -1987,7 +1987,7 @@ static int sctp_setsockopt_associnfo(struct sock *sk, char __user *optval, int o
}
} else {
/* Set the values to the endpoint */
- struct sctp_opt *sp = sctp_sk(sk);
+ struct sctp_sock *sp = sctp_sk(sk);
if (assocparams.sasoc_asocmaxrxt != 0)
sp->assocparams.sasoc_asocmaxrxt =
@@ -2012,7 +2012,7 @@ static int sctp_setsockopt_associnfo(struct sock *sk, char __user *optval, int o
static int sctp_setsockopt_mappedv4(struct sock *sk, char __user *optval, int optlen)
{
int val;
- struct sctp_opt *sp = sctp_sk(sk);
+ struct sctp_sock *sp = sctp_sk(sk);
if (optlen < sizeof(int))
return -EINVAL;
@@ -2040,7 +2040,7 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, int optl
{
struct sctp_association *asoc;
struct list_head *pos;
- struct sctp_opt *sp = sctp_sk(sk);
+ struct sctp_sock *sp = sctp_sk(sk);
int val;
if (optlen < sizeof(int))
@@ -2074,7 +2074,7 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, int optl
static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optval,
int optlen)
{
- struct sctp_opt *sp;
+ struct sctp_sock *sp;
struct sctp_endpoint *ep;
struct sctp_association *asoc = NULL;
struct sctp_setpeerprim prim;
@@ -2269,7 +2269,7 @@ out_nounlock:
SCTP_STATIC int sctp_connect(struct sock *sk, struct sockaddr *uaddr,
int addr_len)
{
- struct sctp_opt *sp;
+ struct sctp_sock *sp;
struct sctp_endpoint *ep;
struct sctp_association *asoc;
struct sctp_transport *transport;
@@ -2390,7 +2390,7 @@ SCTP_STATIC int sctp_disconnect(struct sock *sk, int flags)
*/
SCTP_STATIC struct sock *sctp_accept(struct sock *sk, int flags, int *err)
{
- struct sctp_opt *sp;
+ struct sctp_sock *sp;
struct sctp_endpoint *ep;
struct sock *newsk = NULL;
struct sctp_association *asoc;
@@ -2453,7 +2453,7 @@ SCTP_STATIC int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg)
SCTP_STATIC int sctp_init_sock(struct sock *sk)
{
struct sctp_endpoint *ep;
- struct sctp_opt *sp;
+ struct sctp_sock *sp;
SCTP_DEBUG_PRINTK("sctp_init_sock(sk: %p)\n", sk);
@@ -3007,7 +3007,7 @@ static int sctp_getsockopt_peer_addrs(struct sock *sk, int len,
struct sctp_transport *from;
void __user *to;
union sctp_addr temp;
- struct sctp_opt *sp = sctp_sk(sk);
+ struct sctp_sock *sp = sctp_sk(sk);
int addrlen;
if (len != sizeof(struct sctp_getaddrs))
@@ -3164,7 +3164,7 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len,
struct sctp_sockaddr_entry *addr;
void __user *to;
union sctp_addr temp;
- struct sctp_opt *sp = sctp_sk(sk);
+ struct sctp_sock *sp = sctp_sk(sk);
int addrlen;
rwlock_t *addr_lock;
int err = 0;
@@ -3250,7 +3250,7 @@ static int sctp_getsockopt_primary_addr(struct sock *sk, int len,
{
struct sctp_prim prim;
struct sctp_association *asoc;
- struct sctp_opt *sp = sctp_sk(sk);
+ struct sctp_sock *sp = sctp_sk(sk);
if (len != sizeof(struct sctp_prim))
return -EINVAL;
@@ -3329,7 +3329,7 @@ static int sctp_getsockopt_default_send_param(struct sock *sk,
{
struct sctp_sndrcvinfo info;
struct sctp_association *asoc;
- struct sctp_opt *sp = sctp_sk(sk);
+ struct sctp_sock *sp = sctp_sk(sk);
if (len != sizeof(struct sctp_sndrcvinfo))
return -EINVAL;
@@ -3423,7 +3423,7 @@ static int sctp_getsockopt_rtoinfo(struct sock *sk, int len,
rtoinfo.srto_min = jiffies_to_msecs(asoc->rto_min);
} else {
/* Values corresponding to the endpoint. */
- struct sctp_opt *sp = sctp_sk(sk);
+ struct sctp_sock *sp = sctp_sk(sk);
rtoinfo.srto_initial = sp->rtoinfo.srto_initial;
rtoinfo.srto_max = sp->rtoinfo.srto_max;
@@ -3489,7 +3489,7 @@ static int sctp_getsockopt_associnfo(struct sock *sk, int len,
assocparams.sasoc_number_peer_destinations = cnt;
} else {
/* Values corresponding to the endpoint */
- struct sctp_opt *sp = sctp_sk(sk);
+ struct sctp_sock *sp = sctp_sk(sk);
assocparams.sasoc_asocmaxrxt = sp->assocparams.sasoc_asocmaxrxt;
assocparams.sasoc_peer_rwnd = sp->assocparams.sasoc_peer_rwnd;
@@ -3524,7 +3524,7 @@ static int sctp_getsockopt_mappedv4(struct sock *sk, int len,
char __user *optval, int __user *optlen)
{
int val;
- struct sctp_opt *sp = sctp_sk(sk);
+ struct sctp_sock *sp = sctp_sk(sk);
if (len < sizeof(int))
return -EINVAL;
@@ -3876,7 +3876,7 @@ static int sctp_get_port(struct sock *sk, unsigned short snum)
*/
SCTP_STATIC int sctp_seqpacket_listen(struct sock *sk, int backlog)
{
- struct sctp_opt *sp = sctp_sk(sk);
+ struct sctp_sock *sp = sctp_sk(sk);
struct sctp_endpoint *ep = sp->ep;
/* Only UDP style sockets that are not peeled off are allowed to
@@ -3925,7 +3925,7 @@ SCTP_STATIC int sctp_seqpacket_listen(struct sock *sk, int backlog)
*/
SCTP_STATIC int sctp_stream_listen(struct sock *sk, int backlog)
{
- struct sctp_opt *sp = sctp_sk(sk);
+ struct sctp_sock *sp = sctp_sk(sk);
struct sctp_endpoint *ep = sp->ep;
/* If backlog is zero, disable listening. */
@@ -4026,7 +4026,7 @@ cleanup:
unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
{
struct sock *sk = sock->sk;
- struct sctp_opt *sp = sctp_sk(sk);
+ struct sctp_sock *sp = sctp_sk(sk);
unsigned int mask;
poll_wait(file, sk->sk_sleep, wait);
@@ -4654,8 +4654,8 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
struct sctp_association *assoc,
sctp_socket_type_t type)
{
- struct sctp_opt *oldsp = sctp_sk(oldsk);
- struct sctp_opt *newsp = sctp_sk(newsk);
+ struct sctp_sock *oldsp = sctp_sk(oldsk);
+ struct sctp_sock *newsp = sctp_sk(newsk);
struct sctp_bind_bucket *pp; /* hash list port iterator */
struct sctp_endpoint *newep = newsp->ep;
struct sk_buff *skb, *tmp;
@@ -4667,7 +4667,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
newsk->sk_sndbuf = oldsk->sk_sndbuf;
newsk->sk_rcvbuf = oldsk->sk_rcvbuf;
/* Brute force copy old sctp opt. */
- memcpy(newsp, oldsp, sizeof(struct sctp_opt));
+ inet_sk_copy_descendant(newsk, oldsk);
/* Restore the ep value that was overwritten with the above structure
* copy.
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 3fcca5ec314b..0e0c0f8f1911 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -237,7 +237,7 @@ void sctp_transport_pmtu(struct sctp_transport *transport)
* address.
*/
void sctp_transport_route(struct sctp_transport *transport,
- union sctp_addr *saddr, struct sctp_opt *opt)
+ union sctp_addr *saddr, struct sctp_sock *opt)
{
struct sctp_association *asoc = transport->asoc;
struct sctp_af *af = transport->af_specific;
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 47a43580f05b..d5dd2cf7ac4a 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -138,8 +138,7 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk,
*/
int sctp_clear_pd(struct sock *sk)
{
- struct sctp_opt *sp;
- sp = sctp_sk(sk);
+ struct sctp_sock *sp = sctp_sk(sk);
sp->pd_mode = 0;
if (!skb_queue_empty(&sp->pd_lobby)) {
diff --git a/net/socket.c b/net/socket.c
index 4223cbad8224..88145eb4adc9 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -104,7 +104,7 @@ static int sock_mmap(struct file *file, struct vm_area_struct * vma);
static int sock_close(struct inode *inode, struct file *file);
static unsigned int sock_poll(struct file *file,
struct poll_table_struct *wait);
-static int sock_ioctl(struct inode *inode, struct file *file,
+static long sock_ioctl(struct file *file,
unsigned int cmd, unsigned long arg);
static int sock_fasync(int fd, struct file *filp, int on);
static ssize_t sock_readv(struct file *file, const struct iovec *vector,
@@ -126,7 +126,7 @@ static struct file_operations socket_file_ops = {
.aio_read = sock_aio_read,
.aio_write = sock_aio_write,
.poll = sock_poll,
- .ioctl = sock_ioctl,
+ .unlocked_ioctl = sock_ioctl,
.mmap = sock_mmap,
.open = sock_no_open, /* special open code to disallow open via /proc */
.release = sock_close,
@@ -829,15 +829,13 @@ EXPORT_SYMBOL(dlci_ioctl_set);
* what to do with it - that's up to the protocol still.
*/
-static int sock_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
- unsigned long arg)
+static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{
struct socket *sock;
void __user *argp = (void __user *)arg;
int pid, err;
- unlock_kernel();
- sock = SOCKET_I(inode);
+ sock = SOCKET_I(file->f_dentry->d_inode);
if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) {
err = dev_ioctl(cmd, argp);
} else
@@ -903,8 +901,6 @@ static int sock_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
err = sock->ops->ioctl(sock, cmd, arg);
break;
}
- lock_kernel();
-
return err;
}
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 4851b46b046e..3ec936879f38 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -549,8 +549,6 @@ void xfrm_policy_delete(struct xfrm_policy *pol, int dir)
}
}
-EXPORT_SYMBOL(xfrm_policy_delete);
-
int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
{
struct xfrm_policy *old_pol;