diff options
| author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-01-19 15:48:53 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-01-19 15:48:53 -0800 |
| commit | 9472e348190603fffed9a32e19c54a5cf17e4bcc (patch) | |
| tree | cab35f9e8f975dc7b2a0654035f45012b4fa1227 | |
| parent | 90fcd610926a7d54d8d488faaa313109bb82f88a (diff) | |
| parent | 4debb9ea116cb46ea91819d0886915f59df58544 (diff) | |
Merge bk://kernel.bkbits.net/davem/net-2.6
into ppc970.osdl.org:/home/torvalds/v2.6/linux
121 files changed, 2090 insertions, 3495 deletions
diff --git a/Documentation/networking/netdevices.txt b/Documentation/networking/netdevices.txt index 1509f3aff968..1450809aed4b 100644 --- a/Documentation/networking/netdevices.txt +++ b/Documentation/networking/netdevices.txt @@ -45,10 +45,9 @@ dev->hard_start_xmit: Synchronization: dev->xmit_lock spinlock. When the driver sets NETIF_F_LLTX in dev->features this will be called without holding xmit_lock. In this case the driver - has to lock by itself when needed. It is recommended to use a try lock - for this and return -1 when the spin lock fails. - The locking there should also properly protect against - set_multicast_list + has to execute it's transmission routine in a completely lockless + manner. It is recommended only for queueless devices such + loopback and tunnels. Context: BHs disabled Notes: netif_queue_stopped() is guaranteed false Return codes: @@ -56,8 +55,6 @@ dev->hard_start_xmit: o NETDEV_TX_BUSY Cannot transmit packet, try later Usually a bug, means queue start/stop flow control is broken in the driver. Note: the driver must NOT put the skb in its DMA ring. - o NETDEV_TX_LOCKED Locking failed, please retry quickly. - Only valid when NETIF_F_LLTX is set. dev->tx_timeout: Synchronization: dev->xmit_lock spinlock. diff --git a/drivers/atm/ambassador.c b/drivers/atm/ambassador.c index 924abd2654a0..3870e3787b7b 100644 --- a/drivers/atm/ambassador.c +++ b/drivers/atm/ambassador.c @@ -574,7 +574,6 @@ static int command_do (amb_dev * dev, command * cmd) { amb_cq * cq = &dev->cq; volatile amb_cq_ptrs * ptrs = &cq->ptrs; command * my_slot; - unsigned long timeout; PRINTD (DBG_FLOW|DBG_CMD, "command_do %p", dev); @@ -599,20 +598,14 @@ static int command_do (amb_dev * dev, command * cmd) { // mail the command wr_mem (dev, offsetof(amb_mem, mb.adapter.cmd_address), virt_to_bus (ptrs->in)); - // prepare to wait for cq->pending milliseconds - // effectively one centisecond on i386 - timeout = (cq->pending*HZ+999)/1000; - if (cq->pending > cq->high) cq->high = cq->pending; spin_unlock (&cq->lock); - while (timeout) { - // go to sleep - // PRINTD (DBG_CMD, "wait: sleeping %lu for command", timeout); - set_current_state(TASK_UNINTERRUPTIBLE); - timeout = schedule_timeout (timeout); - } + // these comments were in a while-loop before, msleep removes the loop + // go to sleep + // PRINTD (DBG_CMD, "wait: sleeping %lu for command", timeout); + msleep(cq->pending); // wait for my slot to be reached (all waiters are here or above, until...) while (ptrs->out != my_slot) { @@ -1799,12 +1792,11 @@ static int __init do_loader_command (volatile loader_block * lb, // dump_loader_block (lb); wr_mem (dev, offsetof(amb_mem, doorbell), virt_to_bus (lb) & ~onegigmask); - timeout = command_timeouts[cmd] * HZ/100; + timeout = command_timeouts[cmd] * 10; while (!lb->result || lb->result == cpu_to_be32 (COMMAND_IN_PROGRESS)) if (timeout) { - set_current_state(TASK_UNINTERRUPTIBLE); - timeout = schedule_timeout (timeout); + timeout = msleep_interruptible(timeout); } else { PRINTD (DBG_LOAD|DBG_ERR, "command %d timed out", cmd); dump_registers (dev); @@ -1814,10 +1806,10 @@ static int __init do_loader_command (volatile loader_block * lb, if (cmd == adapter_start) { // wait for start command to acknowledge... - timeout = HZ/10; + timeout = 100; while (rd_plain (dev, offsetof(amb_mem, doorbell))) if (timeout) { - timeout = schedule_timeout (timeout); + timeout = msleep_interruptible(timeout); } else { PRINTD (DBG_LOAD|DBG_ERR, "start command did not clear doorbell, res=%08x", be32_to_cpu (lb->result)); @@ -1932,17 +1924,12 @@ static int amb_reset (amb_dev * dev, int diags) { if (diags) { unsigned long timeout; // 4.2 second wait - timeout = HZ*42/10; - while (timeout) { - set_current_state(TASK_UNINTERRUPTIBLE); - timeout = schedule_timeout (timeout); - } + msleep(4200); // half second time-out - timeout = HZ/2; + timeout = 500; while (!rd_plain (dev, offsetof(amb_mem, mb.loader.ready))) if (timeout) { - set_current_state(TASK_UNINTERRUPTIBLE); - timeout = schedule_timeout (timeout); + timeout = msleep_interruptible(timeout); } else { PRINTD (DBG_LOAD|DBG_ERR, "reset timed out"); return -ETIMEDOUT; @@ -2056,14 +2043,12 @@ static int __init amb_talk (amb_dev * dev) { wr_mem (dev, offsetof(amb_mem, doorbell), virt_to_bus (&a)); // 2.2 second wait (must not touch doorbell during 2 second DMA test) - timeout = HZ*22/10; - while (timeout) - timeout = schedule_timeout (timeout); + msleep(2200); // give the adapter another half second? - timeout = HZ/2; + timeout = 500; while (rd_plain (dev, offsetof(amb_mem, doorbell))) if (timeout) { - timeout = schedule_timeout (timeout); + timeout = msleep_interruptible(timeout); } else { PRINTD (DBG_INIT|DBG_ERR, "adapter init timed out"); return -ETIMEDOUT; @@ -2228,17 +2213,12 @@ static void setup_dev(amb_dev *dev, struct pci_dev *pci_dev) spin_lock_init (&dev->rxq[pool].lock); } -static int setup_pci_dev(struct pci_dev *pci_dev) +static void setup_pci_dev(struct pci_dev *pci_dev) { unsigned char lat; - int ret; // enable bus master accesses pci_set_master(pci_dev); - - ret = pci_enable_device(pci_dev); - if (ret < 0) - goto out; // frobnicate latency (upwards, usually) pci_read_config_byte (pci_dev, PCI_LATENCY_TIMER, &lat); @@ -2251,22 +2231,27 @@ static int setup_pci_dev(struct pci_dev *pci_dev) lat, pci_lat); pci_write_config_byte(pci_dev, PCI_LATENCY_TIMER, pci_lat); } -out: - return ret; } static int __devinit amb_probe(struct pci_dev *pci_dev, const struct pci_device_id *pci_ent) { amb_dev * dev; int err; + unsigned int irq; + + err = pci_enable_device(pci_dev); + if (err < 0) { + PRINTK (KERN_ERR, "skipped broken (PLX rev 2) card"); + goto out; + } // read resources from PCI configuration space - unsigned int irq = pci_dev->irq; + irq = pci_dev->irq; if (pci_dev->device == PCI_DEVICE_ID_MADGE_AMBASSADOR_BAD) { PRINTK (KERN_ERR, "skipped broken (PLX rev 2) card"); err = -EINVAL; - goto out; + goto out_disable; } PRINTD (DBG_INFO, "found Madge ATM adapter (amb) at" @@ -2277,7 +2262,7 @@ static int __devinit amb_probe(struct pci_dev *pci_dev, const struct pci_device_ err = pci_request_region(pci_dev, 1, DEV_LABEL); if (err < 0) { PRINTK (KERN_ERR, "IO range already in use!"); - goto out; + goto out_disable; } dev = kmalloc (sizeof(amb_dev), GFP_KERNEL); @@ -2295,15 +2280,13 @@ static int __devinit amb_probe(struct pci_dev *pci_dev, const struct pci_device_ goto out_free; } - err = setup_pci_dev(pci_dev); - if (err < 0) - goto out_reset; + setup_pci_dev(pci_dev); // grab (but share) IRQ and install handler err = request_irq(irq, interrupt_handler, SA_SHIRQ, DEV_LABEL, dev); if (err < 0) { PRINTK (KERN_ERR, "request IRQ failed!"); - goto out_disable; + goto out_reset; } dev->atm_dev = atm_dev_register (DEV_LABEL, &amb_ops, -1, NULL); @@ -2337,14 +2320,14 @@ out: out_free_irq: free_irq(irq, dev); -out_disable: - pci_disable_device(pci_dev); out_reset: amb_reset(dev, 0); out_free: kfree(dev); out_release: pci_release_region(pci_dev, 1); +out_disable: + pci_disable_device(pci_dev); goto out; } diff --git a/drivers/atm/he.c b/drivers/atm/he.c index 7221439b4937..e64d422470ff 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -86,44 +86,19 @@ #undef USE_RBPL_POOL /* if memory is tight try this */ #define USE_TPD_POOL /* #undef CONFIG_ATM_HE_USE_SUNI */ - -/* compatibility */ - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,5,69) -typedef void irqreturn_t; -#define IRQ_NONE -#define IRQ_HANDLED -#define IRQ_RETVAL(x) -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,9) -#define __devexit_p(func) func -#endif - -#ifndef MODULE_LICENSE -#define MODULE_LICENSE(x) -#endif - -#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) -#define pci_set_drvdata(pci_dev, data) (pci_dev)->driver_data = (data) -#define pci_get_drvdata(pci_dev) (pci_dev)->driver_data -#endif +/* #undef HE_DEBUG */ #include "he.h" - #include "suni.h" - #include <linux/atm_he.h> #define hprintk(fmt,args...) printk(KERN_ERR DEV_LABEL "%d: " fmt, he_dev->number , ##args) -#undef DEBUG -#ifdef DEBUG +#ifdef HE_DEBUG #define HPRINTK(fmt,args...) printk(KERN_DEBUG DEV_LABEL "%d: " fmt, he_dev->number , ##args) -#else +#else /* !HE_DEBUG */ #define HPRINTK(fmt,args...) do { } while (0) -#endif /* DEBUG */ - +#endif /* HE_DEBUG */ /* version definition */ @@ -147,8 +122,8 @@ static u8 read_prom_byte(struct he_dev *he_dev, int addr); /* globals */ -static struct he_dev *he_devs = NULL; -static int disable64 = 0; +static struct he_dev *he_devs; +static int disable64; static short nvpibits = -1; static short nvcibits = -1; static short rx_skb_reserve = 16; diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c index 088440b8056a..1c80cc922e4a 100644 --- a/drivers/atm/horizon.c +++ b/drivers/atm/horizon.c @@ -2706,18 +2706,18 @@ static int __devinit hrz_probe(struct pci_dev *pci_dev, const struct pci_device_ // adapter slot free, read resources from PCI configuration space u32 iobase = pci_resource_start (pci_dev, 0); u32 * membase = bus_to_virt (pci_resource_start (pci_dev, 1)); - u8 irq = pci_dev->irq; + unsigned int irq; unsigned char lat; PRINTD (DBG_FLOW, "hrz_probe"); - /* XXX DEV_LABEL is a guess */ - if (!request_region(iobase, HRZ_IO_EXTENT, DEV_LABEL)) + if (pci_enable_device(pci_dev)) return -EINVAL; - if (pci_enable_device(pci_dev)) { - err = -EINVAL; - goto out_release; + /* XXX DEV_LABEL is a guess */ + if (!request_region(iobase, HRZ_IO_EXTENT, DEV_LABEL)) { + return -EINVAL; + goto out_disable; } dev = kmalloc(sizeof(hrz_dev), GFP_KERNEL); @@ -2725,7 +2725,7 @@ static int __devinit hrz_probe(struct pci_dev *pci_dev, const struct pci_device_ // perhaps we should be nice: deregister all adapters and abort? PRINTD(DBG_ERR, "out of memory"); err = -ENOMEM; - goto out_disable; + goto out_release; } memset(dev, 0, sizeof(hrz_dev)); @@ -2733,6 +2733,7 @@ static int __devinit hrz_probe(struct pci_dev *pci_dev, const struct pci_device_ pci_set_drvdata(pci_dev, dev); // grab IRQ and install handler - move this someplace more sensible + irq = pci_dev->irq; if (request_irq(irq, interrupt_handler, SA_SHIRQ, /* irqflags guess */ @@ -2846,10 +2847,10 @@ out_free_irq: free_irq(dev->irq, dev); out_free: kfree(dev); -out_disable: - pci_disable_device(pci_dev); out_release: release_region(iobase, HRZ_IO_EXTENT); +out_disable: + pci_disable_device(pci_dev); goto out; } diff --git a/drivers/atm/idt77252.c b/drivers/atm/idt77252.c index 80e304cf3169..3d8764ab2825 100644 --- a/drivers/atm/idt77252.c +++ b/drivers/atm/idt77252.c @@ -3136,14 +3136,11 @@ deinit_card(struct idt77252_dev *card) } } - if (card->soft_tst) - vfree(card->soft_tst); + vfree(card->soft_tst); - if (card->scd2vc) - vfree(card->scd2vc); + vfree(card->scd2vc); - if (card->vcs) - vfree(card->vcs); + vfree(card->vcs); if (card->raw_cell_hnd) { pci_free_consistent(card->pcidev, 2 * sizeof(u32), diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h index 074394d4f8a9..b097522c55e8 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib.h +++ b/drivers/infiniband/ulp/ipoib/ipoib.h @@ -104,10 +104,10 @@ struct ipoib_buf { }; /* - * Device private locking: tx_lock protects members used in TX fast - * path (and we use LLTX so upper layers don't do extra locking). - * lock protects everything else. lock nests inside of tx_lock (ie - * tx_lock must be acquired first if needed). + * Device private locking: netdev->xmit_lock protects members used + * in TX fast path. + * lock protects everything else. lock nests inside of xmit_lock (ie + * xmit_lock must be acquired first if needed). */ struct ipoib_dev_priv { spinlock_t lock; @@ -150,7 +150,6 @@ struct ipoib_dev_priv { struct ipoib_buf *rx_ring; - spinlock_t tx_lock; struct ipoib_buf *tx_ring; unsigned tx_head; unsigned tx_tail; diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c index ac550991227e..d70f9f53d9d0 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c @@ -247,12 +247,12 @@ static void ipoib_ib_handle_wc(struct net_device *dev, dev_kfree_skb_any(tx_req->skb); - spin_lock_irqsave(&priv->tx_lock, flags); + spin_lock_irqsave(&dev->xmit_lock, flags); ++priv->tx_tail; if (netif_queue_stopped(dev) && priv->tx_head - priv->tx_tail <= IPOIB_TX_RING_SIZE / 2) netif_wake_queue(dev); - spin_unlock_irqrestore(&priv->tx_lock, flags); + spin_unlock_irqrestore(&dev->xmit_lock, flags); if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c index 63c8168d8af8..90c73a7cea72 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_main.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c @@ -411,7 +411,7 @@ static void neigh_add_path(struct sk_buff *skb, struct net_device *dev) /* * We can only be called from ipoib_start_xmit, so we're - * inside tx_lock -- no need to save/restore flags. + * inside dev->xmit_lock -- no need to save/restore flags. */ spin_lock(&priv->lock); @@ -483,7 +483,7 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, /* * We can only be called from ipoib_start_xmit, so we're - * inside tx_lock -- no need to save/restore flags. + * inside dev->xmit_lock -- no need to save/restore flags. */ spin_lock(&priv->lock); @@ -526,27 +526,11 @@ static void unicast_arp_send(struct sk_buff *skb, struct net_device *dev, spin_unlock(&priv->lock); } +/* Called with dev->xmit_lock held and IRQs disabled. */ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct ipoib_dev_priv *priv = netdev_priv(dev); struct ipoib_neigh *neigh; - unsigned long flags; - - local_irq_save(flags); - if (!spin_trylock(&priv->tx_lock)) { - local_irq_restore(flags); - return NETDEV_TX_LOCKED; - } - - /* - * Check if our queue is stopped. Since we have the LLTX bit - * set, we can't rely on netif_stop_queue() preventing our - * xmit function from being called with a full queue. - */ - if (unlikely(netif_queue_stopped(dev))) { - spin_unlock_irqrestore(&priv->tx_lock, flags); - return NETDEV_TX_BUSY; - } if (skb->dst && skb->dst->neighbour) { if (unlikely(!*to_ipoib_neigh(skb->dst->neighbour))) { @@ -601,7 +585,6 @@ static int ipoib_start_xmit(struct sk_buff *skb, struct net_device *dev) } out: - spin_unlock_irqrestore(&priv->tx_lock, flags); return NETDEV_TX_OK; } @@ -797,7 +780,7 @@ static void ipoib_setup(struct net_device *dev) dev->addr_len = INFINIBAND_ALEN; dev->type = ARPHRD_INFINIBAND; dev->tx_queue_len = IPOIB_TX_RING_SIZE * 2; - dev->features = NETIF_F_VLAN_CHALLENGED | NETIF_F_LLTX; + dev->features = NETIF_F_VLAN_CHALLENGED; /* MTU will be reset when mcast join happens */ dev->mtu = IPOIB_PACKET_SIZE - IPOIB_ENCAP_LEN; @@ -812,7 +795,6 @@ static void ipoib_setup(struct net_device *dev) priv->dev = dev; spin_lock_init(&priv->lock); - spin_lock_init(&priv->tx_lock); init_MUTEX(&priv->mcast_mutex); init_MUTEX(&priv->vlan_mutex); diff --git a/drivers/net/e1000/e1000.h b/drivers/net/e1000/e1000.h index 77db78960430..0843a7c9c624 100644 --- a/drivers/net/e1000/e1000.h +++ b/drivers/net/e1000/e1000.h @@ -209,7 +209,6 @@ struct e1000_adapter { /* TX */ struct e1000_desc_ring tx_ring; - spinlock_t tx_lock; uint32_t txd_cmd; uint32_t tx_int_delay; uint32_t tx_abs_int_delay; diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c index aa5ad41acf24..3966e55dcd9a 100644 --- a/drivers/net/e1000/e1000_main.c +++ b/drivers/net/e1000/e1000_main.c @@ -291,7 +291,9 @@ e1000_up(struct e1000_adapter *adapter) e1000_phy_reset(&adapter->hw); } + spin_lock_irq(&netdev->xmit_lock); e1000_set_multi(netdev); + spin_unlock_irq(&netdev->xmit_lock); e1000_restore_vlan(adapter); @@ -520,9 +522,6 @@ e1000_probe(struct pci_dev *pdev, if(pci_using_dac) netdev->features |= NETIF_F_HIGHDMA; - /* hard_start_xmit is safe against parallel locking */ - netdev->features |= NETIF_F_LLTX; - /* before reading the EEPROM, reset the controller to * put the device in a known good starting state */ @@ -732,7 +731,6 @@ e1000_sw_init(struct e1000_adapter *adapter) atomic_set(&adapter->irq_sem, 1); spin_lock_init(&adapter->stats_lock); - spin_lock_init(&adapter->tx_lock); return 0; } @@ -1293,6 +1291,8 @@ e1000_set_mac(struct net_device *netdev, void *p) * list or the network interface flags are updated. This routine is * responsible for configuring the hardware for proper multicast, * promiscuous mode, and all-multi behavior. + * + * Called with netdev->xmit_lock held and IRQs disabled. **/ static void @@ -1304,12 +1304,9 @@ e1000_set_multi(struct net_device *netdev) uint32_t rctl; uint32_t hash_value; int i; - unsigned long flags; /* Check for Promiscuous and All Multicast modes */ - spin_lock_irqsave(&adapter->tx_lock, flags); - rctl = E1000_READ_REG(hw, RCTL); if(netdev->flags & IFF_PROMISC) { @@ -1358,8 +1355,6 @@ e1000_set_multi(struct net_device *netdev) if(hw->mac_type == e1000_82542_rev2_0) e1000_leave_82542_rst(adapter); - - spin_unlock_irqrestore(&adapter->tx_lock, flags); } /* Need to wait a few seconds after link up to get diagnostic information from @@ -1786,6 +1781,8 @@ no_fifo_stall_required: } #define TXD_USE_COUNT(S, X) (((S) >> (X)) + 1 ) + +/* Called with dev->xmit_lock held and interrupts disabled. */ static int e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) { @@ -1794,7 +1791,6 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) unsigned int max_txd_pwr = E1000_MAX_TXD_PWR; unsigned int tx_flags = 0; unsigned int len = skb->len; - unsigned long flags; unsigned int nr_frags = 0; unsigned int mss = 0; int count = 0; @@ -1838,18 +1834,10 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) if(adapter->pcix_82544) count += nr_frags; - local_irq_save(flags); - if (!spin_trylock(&adapter->tx_lock)) { - /* Collision - tell upper layer to requeue */ - local_irq_restore(flags); - return NETDEV_TX_LOCKED; - } - /* need: count + 2 desc gap to keep tail from touching * head, otherwise try next time */ if(unlikely(E1000_DESC_UNUSED(&adapter->tx_ring) < count + 2)) { netif_stop_queue(netdev); - spin_unlock_irqrestore(&adapter->tx_lock, flags); return NETDEV_TX_BUSY; } @@ -1857,7 +1845,6 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) if(unlikely(e1000_82547_fifo_workaround(adapter, skb))) { netif_stop_queue(netdev); mod_timer(&adapter->tx_fifo_stall_timer, jiffies); - spin_unlock_irqrestore(&adapter->tx_lock, flags); return NETDEV_TX_BUSY; } } @@ -1884,7 +1871,6 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) if(unlikely(E1000_DESC_UNUSED(&adapter->tx_ring) < MAX_SKB_FRAGS + 2)) netif_stop_queue(netdev); - spin_unlock_irqrestore(&adapter->tx_lock, flags); return NETDEV_TX_OK; } @@ -2234,13 +2220,13 @@ e1000_clean_tx_irq(struct e1000_adapter *adapter) tx_ring->next_to_clean = i; - spin_lock(&adapter->tx_lock); + spin_lock(&netdev->xmit_lock); if(unlikely(cleaned && netif_queue_stopped(netdev) && netif_carrier_ok(netdev))) netif_wake_queue(netdev); - spin_unlock(&adapter->tx_lock); + spin_unlock(&netdev->xmit_lock); return cleaned; } @@ -2819,7 +2805,10 @@ e1000_suspend(struct pci_dev *pdev, uint32_t state) if(wufc) { e1000_setup_rctl(adapter); + + spin_lock_irq(&netdev->xmit_lock); e1000_set_multi(netdev); + spin_unlock_irq(&netdev->xmit_lock); /* turn on all-multi mode if wake on multicast is enabled */ if(adapter->wol & E1000_WUFC_MC) { diff --git a/drivers/net/fc/iph5526_ip.h b/drivers/net/fc/iph5526_ip.h index b54f727e140a..9fae3b002fec 100644 --- a/drivers/net/fc/iph5526_ip.h +++ b/drivers/net/fc/iph5526_ip.h @@ -18,7 +18,6 @@ static int iph5526_change_mtu(struct net_device *dev, int mtu); static void rx_net_packet(struct fc_info *fi, u_char *buff_addr, int payload_size); static void rx_net_mfs_packet(struct fc_info *fi, struct sk_buff *skb); -unsigned short fc_type_trans(struct sk_buff *skb, struct net_device *dev); static int tx_ip_packet(struct sk_buff *skb, unsigned long len, struct fc_info *fi); static int tx_arp_packet(char *data, unsigned long len, struct fc_info *fi); #endif diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c index 38e94961e1a4..c5cbe1bda9cb 100644 --- a/drivers/net/sungem.c +++ b/drivers/net/sungem.c @@ -835,9 +835,9 @@ static int gem_poll(struct net_device *dev, int *budget) } /* Run TX completion thread */ - spin_lock(&gp->tx_lock); + spin_lock(&dev->xmit_lock); gem_tx(dev, gp, gp->status); - spin_unlock(&gp->tx_lock); + spin_unlock(&dev->xmit_lock); spin_unlock_irqrestore(&gp->lock, flags); @@ -932,12 +932,12 @@ static void gem_tx_timeout(struct net_device *dev) readl(gp->regs + MAC_RXCFG)); spin_lock_irq(&gp->lock); - spin_lock(&gp->tx_lock); + spin_lock(&dev->xmit_lock); gp->reset_task_pending = 2; schedule_work(&gp->reset_task); - spin_unlock(&gp->tx_lock); + spin_unlock(&dev->xmit_lock); spin_unlock_irq(&gp->lock); } @@ -955,7 +955,6 @@ static int gem_start_xmit(struct sk_buff *skb, struct net_device *dev) struct gem *gp = dev->priv; int entry; u64 ctrl; - unsigned long flags; ctrl = 0; if (skb->ip_summed == CHECKSUM_HW) { @@ -969,17 +968,9 @@ static int gem_start_xmit(struct sk_buff *skb, struct net_device *dev) (csum_stuff_off << 21)); } - local_irq_save(flags); - if (!spin_trylock(&gp->tx_lock)) { - /* Tell upper layer to requeue */ - local_irq_restore(flags); - return NETDEV_TX_LOCKED; - } - /* This is a hard error, log it. */ if (TX_BUFFS_AVAIL(gp) <= (skb_shinfo(skb)->nr_frags + 1)) { netif_stop_queue(dev); - spin_unlock_irqrestore(&gp->tx_lock, flags); printk(KERN_ERR PFX "%s: BUG! Tx Ring full when queue awake!\n", dev->name); return NETDEV_TX_BUSY; @@ -1066,7 +1057,6 @@ static int gem_start_xmit(struct sk_buff *skb, struct net_device *dev) dev->name, entry, skb->len); mb(); writel(gp->tx_new, gp->regs + TXDMA_KICK); - spin_unlock_irqrestore(&gp->tx_lock, flags); dev->trans_start = jiffies; @@ -1097,11 +1087,11 @@ static int gem_change_mtu(struct net_device *dev, int new_mtu) } spin_lock_irq(&gp->lock); - spin_lock(&gp->tx_lock); + spin_lock(&dev->xmit_lock); dev->mtu = new_mtu; gp->reset_task_pending = 1; schedule_work(&gp->reset_task); - spin_unlock(&gp->tx_lock); + spin_unlock(&dev->xmit_lock); spin_unlock_irq(&gp->lock); flush_scheduled_work(); @@ -1111,7 +1101,7 @@ static int gem_change_mtu(struct net_device *dev, int new_mtu) #define STOP_TRIES 32 -/* Must be invoked under gp->lock and gp->tx_lock. */ +/* Must be invoked under gp->lock and dev->xmit_lock. */ static void gem_stop(struct gem *gp) { int limit; @@ -1137,7 +1127,7 @@ static void gem_stop(struct gem *gp) printk(KERN_ERR "%s: SW reset is ghetto.\n", gp->dev->name); } -/* Must be invoked under gp->lock and gp->tx_lock. */ +/* Must be invoked under gp->lock and dev->xmit_lock. */ static void gem_start_dma(struct gem *gp) { unsigned long val; @@ -1162,7 +1152,7 @@ static void gem_start_dma(struct gem *gp) } -/* Must be invoked under gp->lock and gp->tx_lock. */ +/* Must be invoked under gp->lock and dev->xmit_lock. */ // XXX dbl check what that function should do when called on PCS PHY static void gem_begin_auto_negotiation(struct gem *gp, struct ethtool_cmd *ep) { @@ -1249,7 +1239,7 @@ non_mii: /* A link-up condition has occurred, initialize and enable the * rest of the chip. * - * Must be invoked under gp->lock and gp->tx_lock. + * Must be invoked under gp->lock and dev->xmit_lock. */ static int gem_set_link_modes(struct gem *gp) { @@ -1356,7 +1346,7 @@ static int gem_set_link_modes(struct gem *gp) return 0; } -/* Must be invoked under gp->lock and gp->tx_lock. */ +/* Must be invoked under gp->lock and dev->xmit_lock. */ static int gem_mdio_link_not_up(struct gem *gp) { switch (gp->lstate) { @@ -1414,7 +1404,7 @@ static void gem_reset_task(void *data) netif_poll_disable(gp->dev); spin_lock_irq(&gp->lock); - spin_lock(&gp->tx_lock); + spin_lock(&gp->dev->xmit_lock); if (gp->hw_running && gp->opened) { netif_stop_queue(gp->dev); @@ -1430,7 +1420,7 @@ static void gem_reset_task(void *data) } gp->reset_task_pending = 0; - spin_unlock(&gp->tx_lock); + spin_unlock(&gp->dev->xmit_lock); spin_unlock_irq(&gp->lock); netif_poll_enable(gp->dev); } @@ -1444,7 +1434,7 @@ static void gem_link_timer(unsigned long data) return; spin_lock_irq(&gp->lock); - spin_lock(&gp->tx_lock); + spin_lock(&gp->dev->xmit_lock); /* If the link of task is still pending, we just * reschedule the link timer @@ -1514,11 +1504,11 @@ static void gem_link_timer(unsigned long data) restart: mod_timer(&gp->link_timer, jiffies + ((12 * HZ) / 10)); out_unlock: - spin_unlock(&gp->tx_lock); + spin_unlock(&gp->dev->xmit_lock); spin_unlock_irq(&gp->lock); } -/* Must be invoked under gp->lock and gp->tx_lock. */ +/* Must be invoked under gp->lock and dev->xmit_lock. */ static void gem_clean_rings(struct gem *gp) { struct gem_init_block *gb = gp->init_block; @@ -1569,7 +1559,7 @@ static void gem_clean_rings(struct gem *gp) } } -/* Must be invoked under gp->lock and gp->tx_lock. */ +/* Must be invoked under gp->lock and dev->xmit_lock. */ static void gem_init_rings(struct gem *gp) { struct gem_init_block *gb = gp->init_block; @@ -1619,7 +1609,7 @@ static void gem_init_rings(struct gem *gp) wmb(); } -/* Must be invoked under gp->lock and gp->tx_lock. */ +/* Must be invoked under gp->lock and dev->xmit_lock. */ static void gem_init_phy(struct gem *gp) { u32 mifcfg; @@ -1757,7 +1747,7 @@ static void gem_init_phy(struct gem *gp) } } -/* Must be invoked under gp->lock and gp->tx_lock. */ +/* Must be invoked under gp->lock and dev->xmit_lock. */ static void gem_init_dma(struct gem *gp) { u64 desc_dma = (u64) gp->gblock_dvma; @@ -1795,7 +1785,7 @@ static void gem_init_dma(struct gem *gp) gp->regs + RXDMA_BLANK); } -/* Must be invoked under gp->lock and gp->tx_lock. */ +/* Must be invoked under dev->xmit_lock. */ static u32 gem_setup_multicast(struct gem *gp) { @@ -1838,7 +1828,7 @@ gem_setup_multicast(struct gem *gp) return rxcfg; } -/* Must be invoked under gp->lock and gp->tx_lock. */ +/* Must be invoked under gp->lock and dev->xmit_lock. */ static void gem_init_mac(struct gem *gp) { unsigned char *e = &gp->dev->dev_addr[0]; @@ -1916,7 +1906,7 @@ static void gem_init_mac(struct gem *gp) writel(0xffffffff, gp->regs + MAC_MCMASK); } -/* Must be invoked under gp->lock and gp->tx_lock. */ +/* Must be invoked under gp->lock and dev->xmit_lock. */ static void gem_init_pause_thresholds(struct gem *gp) { u32 cfg; @@ -2052,7 +2042,7 @@ static int gem_check_invariants(struct gem *gp) return 0; } -/* Must be invoked under gp->lock and gp->tx_lock. */ +/* Must be invoked under gp->lock and dev->xmit_lock. */ static void gem_init_hw(struct gem *gp, int restart_link) { /* On Apple's gmac, I initialize the PHY only after @@ -2150,11 +2140,11 @@ static void gem_stop_phy(struct gem *gp) if (!gp->wake_on_lan) { spin_lock_irqsave(&gp->lock, flags); - spin_lock(&gp->tx_lock); + spin_lock(&gp->dev->xmit_lock); gem_stop(gp); writel(MAC_TXRST_CMD, gp->regs + MAC_TXRST); writel(MAC_RXRST_CMD, gp->regs + MAC_RXRST); - spin_unlock(&gp->tx_lock); + spin_unlock(&gp->dev->xmit_lock); spin_unlock_irqrestore(&gp->lock, flags); } @@ -2202,9 +2192,9 @@ static void gem_shutdown(struct gem *gp) unsigned long flags; spin_lock_irqsave(&gp->lock, flags); - spin_lock(&gp->tx_lock); + spin_lock(&gp->dev->xmit_lock); gem_stop(gp); - spin_unlock(&gp->tx_lock); + spin_unlock(&gp->dev->xmit_lock); spin_unlock_irqrestore(&gp->lock, flags); } } @@ -2265,9 +2255,9 @@ static int gem_open(struct net_device *dev) /* Reset the chip */ spin_lock_irq(&gp->lock); - spin_lock(&gp->tx_lock); + spin_lock(&gp->dev->xmit_lock); gem_stop(gp); - spin_unlock(&gp->tx_lock); + spin_unlock(&gp->dev->xmit_lock); spin_unlock_irq(&gp->lock); gp->hw_running = 1; @@ -2281,7 +2271,7 @@ static int gem_open(struct net_device *dev) printk(KERN_ERR "%s: failed to request irq !\n", gp->dev->name); spin_lock_irq(&gp->lock); - spin_lock(&gp->tx_lock); + spin_lock(&gp->dev->xmit_lock); #ifdef CONFIG_PPC_PMAC if (!hw_was_up && gp->pdev->vendor == PCI_VENDOR_ID_APPLE) gem_apple_powerdown(gp); @@ -2290,14 +2280,14 @@ static int gem_open(struct net_device *dev) gp->pm_timer.expires = jiffies + 10*HZ; add_timer(&gp->pm_timer); up(&gp->pm_sem); - spin_unlock(&gp->tx_lock); + spin_unlock(&gp->dev->xmit_lock); spin_unlock_irq(&gp->lock); return -EAGAIN; } spin_lock_irq(&gp->lock); - spin_lock(&gp->tx_lock); + spin_lock(&gp->dev->xmit_lock); /* Allocate & setup ring buffers */ gem_init_rings(gp); @@ -2307,7 +2297,7 @@ static int gem_open(struct net_device *dev) gp->opened = 1; - spin_unlock(&gp->tx_lock); + spin_unlock(&gp->dev->xmit_lock); spin_unlock_irq(&gp->lock); up(&gp->pm_sem); @@ -2328,7 +2318,7 @@ static int gem_close(struct net_device *dev) /* Stop traffic, mark us closed */ spin_lock_irq(&gp->lock); - spin_lock(&gp->tx_lock); + spin_lock(&gp->dev->xmit_lock); gp->opened = 0; @@ -2343,7 +2333,7 @@ static int gem_close(struct net_device *dev) /* Bye, the pm timer will finish the job */ free_irq(gp->pdev->irq, (void *) dev); - spin_unlock(&gp->tx_lock); + spin_unlock(&gp->dev->xmit_lock); spin_unlock_irq(&gp->lock); /* Fire the PM timer that will shut us down in about 10 seconds */ @@ -2374,7 +2364,7 @@ static int gem_suspend(struct pci_dev *pdev, u32 state) /* If the driver is opened, we stop the DMA */ if (gp->opened) { spin_lock_irq(&gp->lock); - spin_lock(&gp->tx_lock); + spin_lock(&gp->dev->xmit_lock); /* Stop traffic, mark us closed */ netif_device_detach(dev); @@ -2385,7 +2375,7 @@ static int gem_suspend(struct pci_dev *pdev, u32 state) /* Get rid of ring buffers */ gem_clean_rings(gp); - spin_unlock(&gp->tx_lock); + spin_unlock(&gp->dev->xmit_lock); spin_unlock_irq(&gp->lock); if (gp->pdev->vendor == PCI_VENDOR_ID_APPLE) @@ -2419,14 +2409,14 @@ static int gem_resume(struct pci_dev *pdev) } #endif /* CONFIG_PPC_PMAC */ spin_lock_irq(&gp->lock); - spin_lock(&gp->tx_lock); + spin_lock(&gp->dev->xmit_lock); gem_stop(gp); gp->hw_running = 1; gem_init_rings(gp); gem_init_hw(gp, 1); - spin_unlock(&gp->tx_lock); + spin_unlock(&gp->dev->xmit_lock); spin_unlock_irq(&gp->lock); netif_device_attach(dev); @@ -2447,7 +2437,7 @@ static struct net_device_stats *gem_get_stats(struct net_device *dev) struct net_device_stats *stats = &gp->net_stats; spin_lock_irq(&gp->lock); - spin_lock(&gp->tx_lock); + spin_lock(&dev->xmit_lock); if (gp->hw_running) { stats->rx_crc_errors += readl(gp->regs + MAC_FCSERR); @@ -2467,12 +2457,13 @@ static struct net_device_stats *gem_get_stats(struct net_device *dev) writel(0, gp->regs + MAC_LCOLL); } - spin_unlock(&gp->tx_lock); + spin_unlock(&dev->xmit_lock); spin_unlock_irq(&gp->lock); return &gp->net_stats; } +/* Called with dev->xmit_lock held and IRQs disabled. */ static void gem_set_multicast(struct net_device *dev) { struct gem *gp = dev->priv; @@ -2482,9 +2473,6 @@ static void gem_set_multicast(struct net_device *dev) if (!gp->hw_running) return; - spin_lock_irq(&gp->lock); - spin_lock(&gp->tx_lock); - netif_stop_queue(dev); rxcfg = readl(gp->regs + MAC_RXCFG); @@ -2507,9 +2495,6 @@ static void gem_set_multicast(struct net_device *dev) writel(rxcfg, gp->regs + MAC_RXCFG); netif_wake_queue(dev); - - spin_unlock(&gp->tx_lock); - spin_unlock_irq(&gp->lock); } static void gem_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) @@ -2540,7 +2525,7 @@ static int gem_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) /* Return current PHY settings */ spin_lock_irq(&gp->lock); - spin_lock(&gp->tx_lock); + spin_lock(&dev->xmit_lock); cmd->autoneg = gp->want_autoneg; cmd->speed = gp->phy_mii.speed; cmd->duplex = gp->phy_mii.duplex; @@ -2552,7 +2537,7 @@ static int gem_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) */ if (cmd->advertising == 0) cmd->advertising = cmd->supported; - spin_unlock(&gp->tx_lock); + spin_unlock(&dev->xmit_lock); spin_unlock_irq(&gp->lock); } else { // XXX PCS ? cmd->supported = @@ -2592,9 +2577,9 @@ static int gem_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) /* Apply settings and restart link process. */ spin_lock_irq(&gp->lock); - spin_lock(&gp->tx_lock); + spin_lock(&dev->xmit_lock); gem_begin_auto_negotiation(gp, cmd); - spin_unlock(&gp->tx_lock); + spin_unlock(&dev->xmit_lock); spin_unlock_irq(&gp->lock); return 0; @@ -2609,9 +2594,9 @@ static int gem_nway_reset(struct net_device *dev) /* Restart link process. */ spin_lock_irq(&gp->lock); - spin_lock(&gp->tx_lock); + spin_lock(&dev->xmit_lock); gem_begin_auto_negotiation(gp, NULL); - spin_unlock(&gp->tx_lock); + spin_unlock(&dev->xmit_lock); spin_unlock_irq(&gp->lock); return 0; @@ -2863,7 +2848,6 @@ static int __devinit gem_init_one(struct pci_dev *pdev, gp->msg_enable = DEFAULT_MSG; spin_lock_init(&gp->lock); - spin_lock_init(&gp->tx_lock); init_MUTEX(&gp->pm_sem); init_timer(&gp->link_timer); @@ -2899,9 +2883,9 @@ static int __devinit gem_init_one(struct pci_dev *pdev, gem_apple_powerup(gp); #endif spin_lock_irq(&gp->lock); - spin_lock(&gp->tx_lock); + spin_lock(&dev->xmit_lock); gem_stop(gp); - spin_unlock(&gp->tx_lock); + spin_unlock(&dev->xmit_lock); spin_unlock_irq(&gp->lock); /* Fill up the mii_phy structure (even if we won't use it) */ @@ -2967,11 +2951,11 @@ static int __devinit gem_init_one(struct pci_dev *pdev, /* Detect & init PHY, start autoneg */ spin_lock_irq(&gp->lock); - spin_lock(&gp->tx_lock); + spin_lock(&dev->xmit_lock); gp->hw_running = 1; gem_init_phy(gp); gem_begin_auto_negotiation(gp, NULL); - spin_unlock(&gp->tx_lock); + spin_unlock(&dev->xmit_lock); spin_unlock_irq(&gp->lock); if (gp->phy_type == phy_mii_mdio0 || @@ -2982,7 +2966,7 @@ static int __devinit gem_init_one(struct pci_dev *pdev, pci_set_drvdata(pdev, dev); /* GEM can do it all... */ - dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_LLTX; + dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM; if (pci_using_dac) dev->features |= NETIF_F_HIGHDMA; diff --git a/drivers/net/sungem.h b/drivers/net/sungem.h index 00343226fb71..8bbc104d848f 100644 --- a/drivers/net/sungem.h +++ b/drivers/net/sungem.h @@ -953,7 +953,6 @@ enum link_state { struct gem { spinlock_t lock; - spinlock_t tx_lock; void __iomem *regs; int rx_new, rx_old; int tx_new, tx_old; diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 8a165aca7542..2088143716af 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -60,8 +60,8 @@ #define DRV_MODULE_NAME "tg3" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "3.15" -#define DRV_MODULE_RELDATE "January 6, 2005" +#define DRV_MODULE_VERSION "3.16" +#define DRV_MODULE_RELDATE "January 17, 2005" #define TG3_DEF_MAC_MODE 0 #define TG3_DEF_RX_MODE 0 @@ -2706,7 +2706,11 @@ static int tg3_rx(struct tg3 *tp, int budget) len = ((desc->idx_len & RXD_LEN_MASK) >> RXD_LEN_SHIFT) - 4; /* omit crc */ - if (len > RX_COPY_THRESHOLD) { + if (len > RX_COPY_THRESHOLD + && tp->rx_offset == 2 + /* rx_offset != 2 iff this is a 5701 card running + * in PCI-X mode [see tg3_get_invariants()] */ + ) { int skb_size; skb_size = tg3_alloc_rx_skb(tp, opaque_key, @@ -2812,9 +2816,9 @@ static int tg3_poll(struct net_device *netdev, int *budget) /* run TX completion thread */ if (sblk->idx[0].tx_consumer != tp->tx_cons) { - spin_lock(&tp->tx_lock); + spin_lock(&netdev->xmit_lock); tg3_tx(tp); - spin_unlock(&tp->tx_lock); + spin_unlock(&netdev->xmit_lock); } spin_unlock_irqrestore(&tp->lock, flags); @@ -2935,7 +2939,7 @@ static void tg3_reset_task(void *_data) tg3_netif_stop(tp); spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + spin_lock(&tp->dev->xmit_lock); restart_timer = tp->tg3_flags2 & TG3_FLG2_RESTART_TIMER; tp->tg3_flags2 &= ~TG3_FLG2_RESTART_TIMER; @@ -2945,7 +2949,7 @@ static void tg3_reset_task(void *_data) tg3_netif_start(tp); - spin_unlock(&tp->tx_lock); + spin_unlock(&tp->dev->xmit_lock); spin_unlock_irq(&tp->lock); if (restart_timer) @@ -3044,6 +3048,7 @@ static inline int tg3_4g_overflow_test(dma_addr_t mapping, int len) (base + len + 8 < base)); } +/* dev->xmit_lock is held and IRQs are disabled. */ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct tg3 *tp = netdev_priv(dev); @@ -3051,39 +3056,12 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) unsigned int i; u32 len, entry, base_flags, mss; int would_hit_hwbug; - unsigned long flags; len = skb_headlen(skb); - /* No BH disabling for tx_lock here. We are running in BH disabled - * context and TX reclaim runs via tp->poll inside of a software - * interrupt. Rejoice! - * - * Actually, things are not so simple. If we are to take a hw - * IRQ here, we can deadlock, consider: - * - * CPU1 CPU2 - * tg3_start_xmit - * take tp->tx_lock - * tg3_timer - * take tp->lock - * tg3_interrupt - * spin on tp->lock - * spin on tp->tx_lock - * - * So we really do need to disable interrupts when taking - * tx_lock here. - */ - local_irq_save(flags); - if (!spin_trylock(&tp->tx_lock)) { - local_irq_restore(flags); - return NETDEV_TX_LOCKED; - } - /* This is a hard error, log it. */ if (unlikely(TX_BUFFS_AVAIL(tp) <= (skb_shinfo(skb)->nr_frags + 1))) { netif_stop_queue(dev); - spin_unlock_irqrestore(&tp->tx_lock, flags); printk(KERN_ERR PFX "%s: BUG! Tx Ring full when queue awake!\n", dev->name); return NETDEV_TX_BUSY; @@ -3220,7 +3198,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) entry, len, last_plus_one, &start, mss)) - goto out_unlock; + goto out; entry = start; } @@ -3232,9 +3210,8 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) if (TX_BUFFS_AVAIL(tp) <= (MAX_SKB_FRAGS + 1)) netif_stop_queue(dev); -out_unlock: +out: mmiowb(); - spin_unlock_irqrestore(&tp->tx_lock, flags); dev->trans_start = jiffies; @@ -3269,7 +3246,7 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu) tg3_netif_stop(tp); spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + spin_lock(&dev->xmit_lock); tg3_halt(tp); @@ -3279,7 +3256,7 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu) tg3_netif_start(tp); - spin_unlock(&tp->tx_lock); + spin_unlock(&dev->xmit_lock); spin_unlock_irq(&tp->lock); return 0; @@ -5570,7 +5547,7 @@ static void tg3_timer(unsigned long __opaque) unsigned long flags; spin_lock_irqsave(&tp->lock, flags); - spin_lock(&tp->tx_lock); + spin_lock(&tp->dev->xmit_lock); /* All of this garbage is because when using non-tagged * IRQ status the mailbox/status_block protocol the chip @@ -5586,7 +5563,7 @@ static void tg3_timer(unsigned long __opaque) if (!(tr32(WDMAC_MODE) & WDMAC_MODE_ENABLE)) { tp->tg3_flags2 |= TG3_FLG2_RESTART_TIMER; - spin_unlock(&tp->tx_lock); + spin_unlock(&tp->dev->xmit_lock); spin_unlock_irqrestore(&tp->lock, flags); schedule_work(&tp->reset_task); return; @@ -5655,7 +5632,7 @@ static void tg3_timer(unsigned long __opaque) tp->asf_counter = tp->asf_multiplier; } - spin_unlock(&tp->tx_lock); + spin_unlock(&tp->dev->xmit_lock); spin_unlock_irqrestore(&tp->lock, flags); tp->timer.expires = jiffies + tp->timer_offset; @@ -5668,12 +5645,12 @@ static int tg3_open(struct net_device *dev) int err; spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + spin_lock(&dev->xmit_lock); tg3_disable_ints(tp); tp->tg3_flags &= ~TG3_FLAG_INIT_COMPLETE; - spin_unlock(&tp->tx_lock); + spin_unlock(&dev->xmit_lock); spin_unlock_irq(&tp->lock); /* The placement of this call is tied @@ -5692,7 +5669,7 @@ static int tg3_open(struct net_device *dev) } spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + spin_lock(&dev->xmit_lock); err = tg3_init_hw(tp); if (err) { @@ -5712,7 +5689,7 @@ static int tg3_open(struct net_device *dev) tp->tg3_flags |= TG3_FLAG_INIT_COMPLETE; } - spin_unlock(&tp->tx_lock); + spin_unlock(&dev->xmit_lock); spin_unlock_irq(&tp->lock); if (err) { @@ -5722,11 +5699,11 @@ static int tg3_open(struct net_device *dev) } spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + spin_lock(&dev->xmit_lock); tg3_enable_ints(tp); - spin_unlock(&tp->tx_lock); + spin_unlock(&dev->xmit_lock); spin_unlock_irq(&tp->lock); netif_start_queue(dev); @@ -5974,7 +5951,7 @@ static int tg3_close(struct net_device *dev) del_timer_sync(&tp->timer); spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + spin_lock(&dev->xmit_lock); #if 0 tg3_dump_state(tp); #endif @@ -5988,7 +5965,7 @@ static int tg3_close(struct net_device *dev) TG3_FLAG_GOT_SERDES_FLOWCTL); netif_carrier_off(tp->dev); - spin_unlock(&tp->tx_lock); + spin_unlock(&dev->xmit_lock); spin_unlock_irq(&tp->lock); free_irq(dev->irq, dev); @@ -6287,15 +6264,10 @@ static void __tg3_set_rx_mode(struct net_device *dev) } } +/* Called with dev->xmit_lock held and IRQs disabled. */ static void tg3_set_rx_mode(struct net_device *dev) { - struct tg3 *tp = netdev_priv(dev); - - spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); __tg3_set_rx_mode(dev); - spin_unlock(&tp->tx_lock); - spin_unlock_irq(&tp->lock); } #define TG3_REGDUMP_LEN (32 * 1024) @@ -6318,7 +6290,7 @@ static void tg3_get_regs(struct net_device *dev, memset(p, 0, TG3_REGDUMP_LEN); spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + spin_lock(&dev->xmit_lock); #define __GET_REG32(reg) (*(p)++ = tr32(reg)) #define GET_REG32_LOOP(base,len) \ @@ -6368,7 +6340,7 @@ do { p = (u32 *)(orig_p + (reg)); \ #undef GET_REG32_LOOP #undef GET_REG32_1 - spin_unlock(&tp->tx_lock); + spin_unlock(&dev->xmit_lock); spin_unlock_irq(&tp->lock); } @@ -6492,7 +6464,7 @@ static int tg3_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) } spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + spin_lock(&dev->xmit_lock); tp->link_config.autoneg = cmd->autoneg; if (cmd->autoneg == AUTONEG_ENABLE) { @@ -6506,7 +6478,7 @@ static int tg3_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) } tg3_setup_phy(tp, 1); - spin_unlock(&tp->tx_lock); + spin_unlock(&dev->xmit_lock); spin_unlock_irq(&tp->lock); return 0; @@ -6623,7 +6595,7 @@ static int tg3_set_ringparam(struct net_device *dev, struct ethtool_ringparam *e tg3_netif_stop(tp); spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + spin_lock(&dev->xmit_lock); tp->rx_pending = ering->rx_pending; @@ -6636,7 +6608,7 @@ static int tg3_set_ringparam(struct net_device *dev, struct ethtool_ringparam *e tg3_halt(tp); tg3_init_hw(tp); tg3_netif_start(tp); - spin_unlock(&tp->tx_lock); + spin_unlock(&dev->xmit_lock); spin_unlock_irq(&tp->lock); return 0; @@ -6657,7 +6629,7 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam tg3_netif_stop(tp); spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + spin_lock(&dev->xmit_lock); if (epause->autoneg) tp->tg3_flags |= TG3_FLAG_PAUSE_AUTONEG; else @@ -6673,7 +6645,7 @@ static int tg3_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam tg3_halt(tp); tg3_init_hw(tp); tg3_netif_start(tp); - spin_unlock(&tp->tx_lock); + spin_unlock(&dev->xmit_lock); spin_unlock_irq(&tp->lock); return 0; @@ -6799,14 +6771,14 @@ static void tg3_vlan_rx_register(struct net_device *dev, struct vlan_group *grp) struct tg3 *tp = netdev_priv(dev); spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + spin_lock(&dev->xmit_lock); tp->vlgrp = grp; /* Update RX_MODE_KEEP_VLAN_TAG bit in RX_MODE register. */ __tg3_set_rx_mode(dev); - spin_unlock(&tp->tx_lock); + spin_unlock(&dev->xmit_lock); spin_unlock_irq(&tp->lock); } @@ -6815,10 +6787,10 @@ static void tg3_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) struct tg3 *tp = netdev_priv(dev); spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + spin_lock(&dev->xmit_lock); if (tp->vlgrp) tp->vlgrp->vlan_devices[vid] = NULL; - spin_unlock(&tp->tx_lock); + spin_unlock(&dev->xmit_lock); spin_unlock_irq(&tp->lock); } #endif @@ -8237,7 +8209,6 @@ static int __devinit tg3_init_one(struct pci_dev *pdev, if (pci_using_dac) dev->features |= NETIF_F_HIGHDMA; - dev->features |= NETIF_F_LLTX; #if TG3_VLAN_TAG_USED dev->features |= NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX; dev->vlan_rx_register = tg3_vlan_rx_register; @@ -8279,7 +8250,6 @@ static int __devinit tg3_init_one(struct pci_dev *pdev, tp->grc_mode |= GRC_MODE_BSWAP_NONFRM_DATA; #endif spin_lock_init(&tp->lock); - spin_lock_init(&tp->tx_lock); spin_lock_init(&tp->indirect_lock); INIT_WORK(&tp->reset_task, tg3_reset_task, tp); @@ -8492,23 +8462,23 @@ static int tg3_suspend(struct pci_dev *pdev, u32 state) del_timer_sync(&tp->timer); spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + spin_lock(&dev->xmit_lock); tg3_disable_ints(tp); - spin_unlock(&tp->tx_lock); + spin_unlock(&dev->xmit_lock); spin_unlock_irq(&tp->lock); netif_device_detach(dev); spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + spin_lock(&dev->xmit_lock); tg3_halt(tp); - spin_unlock(&tp->tx_lock); + spin_unlock(&dev->xmit_lock); spin_unlock_irq(&tp->lock); err = tg3_set_power_state(tp, state); if (err) { spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + spin_lock(&dev->xmit_lock); tg3_init_hw(tp); @@ -8518,7 +8488,7 @@ static int tg3_suspend(struct pci_dev *pdev, u32 state) netif_device_attach(dev); tg3_netif_start(tp); - spin_unlock(&tp->tx_lock); + spin_unlock(&dev->xmit_lock); spin_unlock_irq(&tp->lock); } @@ -8543,7 +8513,7 @@ static int tg3_resume(struct pci_dev *pdev) netif_device_attach(dev); spin_lock_irq(&tp->lock); - spin_lock(&tp->tx_lock); + spin_lock(&dev->xmit_lock); tg3_init_hw(tp); @@ -8554,7 +8524,7 @@ static int tg3_resume(struct pci_dev *pdev) tg3_netif_start(tp); - spin_unlock(&tp->tx_lock); + spin_unlock(&dev->xmit_lock); spin_unlock_irq(&tp->lock); return 0; diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h index 3b22f53d2579..68b7520784ea 100644 --- a/drivers/net/tg3.h +++ b/drivers/net/tg3.h @@ -1980,12 +1980,11 @@ struct tg3 { * lock: Held during all operations except TX packet * processing. * - * tx_lock: Held during tg3_start_xmit{,_4gbug} and tg3_tx + * dev->xmit_lock: Held during tg3_start_xmit and tg3_tx * * If you want to shut up all asynchronous processing you must - * acquire both locks, 'lock' taken before 'tx_lock'. IRQs must - * be disabled to take 'lock' but only softirq disabling is - * necessary for acquisition of 'tx_lock'. + * acquire both locks, 'lock' taken before 'xmit_lock'. IRQs must + * be disabled to take either lock. */ spinlock_t lock; spinlock_t indirect_lock; @@ -2004,8 +2003,6 @@ struct tg3 { u32 tx_cons; u32 tx_pending; - spinlock_t tx_lock; - struct tg3_tx_buffer_desc *tx_ring; struct tx_ring_info *tx_buffers; dma_addr_t tx_desc_mapping; diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c index 6a10cc610ad4..a84ff2f17599 100644 --- a/drivers/s390/net/qeth_main.c +++ b/drivers/s390/net/qeth_main.c @@ -5033,27 +5033,6 @@ qeth_neigh_setup(struct net_device *dev, struct neigh_parms *np) return 0; } -#ifdef CONFIG_QETH_IPV6 -int -qeth_ipv6_generate_eui64(u8 * eui, struct net_device *dev) -{ - switch (dev->type) { - case ARPHRD_ETHER: - case ARPHRD_FDDI: - case ARPHRD_IEEE802_TR: - if (dev->addr_len != ETH_ALEN) - return -1; - memcpy(eui, dev->dev_addr, 3); - memcpy(eui + 5, dev->dev_addr + 3, 3); - eui[3] = (dev->dev_id >> 8) & 0xff; - eui[4] = dev->dev_id & 0xff; - return 0; - } - return -1; - -} -#endif - static void qeth_get_mac_for_ipm(__u32 ipm, char *mac, struct net_device *dev) { @@ -5587,11 +5566,8 @@ qeth_netdev_init(struct net_device *dev) } #ifdef CONFIG_QETH_IPV6 /*IPv6 address autoconfiguration stuff*/ - card->dev->dev_id = card->info.unique_id & 0xffff; if (!(card->info.unique_id & UNIQUE_ID_NOT_BY_CARD)) - card->dev->generate_eui64 = qeth_ipv6_generate_eui64; - - + card->dev->dev_id = card->info.unique_id & 0xffff; #endif dev->hard_header_parse = NULL; dev->set_mac_address = qeth_layer2_set_mac_address; diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index 87a901c75370..bc701ddb88e4 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -313,7 +313,7 @@ struct atm_vcc { struct atm_dev_addr { struct sockaddr_atmsvc addr; /* ATM address */ - struct atm_dev_addr *next; /* next address */ + struct list_head entry; /* next address */ }; struct atm_dev { @@ -325,7 +325,7 @@ struct atm_dev { void *dev_data; /* per-device data */ void *phy_data; /* private PHY date */ unsigned long flags; /* device flags (ATM_DF_*) */ - struct atm_dev_addr *local; /* local ATM addresses */ + struct list_head local; /* local ATM addresses */ unsigned char esi[ESI_LEN]; /* ESI ("MAC" addr) */ struct atm_cirange ci_range; /* VPI/VCI range */ struct k_atm_dev_stats stats; /* statistics */ diff --git a/include/linux/fcdevice.h b/include/linux/fcdevice.h index daccf6c444d0..e42fc78f679e 100644 --- a/include/linux/fcdevice.h +++ b/include/linux/fcdevice.h @@ -27,10 +27,6 @@ #include <linux/if_fc.h> #ifdef __KERNEL__ -extern int fc_header(struct sk_buff *skb, struct net_device *dev, - unsigned short type, void *daddr, - void *saddr, unsigned len); -extern int fc_rebuild_header(struct sk_buff *skb); extern unsigned short fc_type_trans(struct sk_buff *skb, struct net_device *dev); extern struct net_device *alloc_fcdev(int sizeof_priv); diff --git a/include/linux/fddidevice.h b/include/linux/fddidevice.h index b6c55dad37a4..2e5ee47f3e1e 100644 --- a/include/linux/fddidevice.h +++ b/include/linux/fddidevice.h @@ -25,13 +25,6 @@ #include <linux/if_fddi.h> #ifdef __KERNEL__ -extern int fddi_header(struct sk_buff *skb, - struct net_device *dev, - unsigned short type, - void *daddr, - void *saddr, - unsigned len); -extern int fddi_rebuild_header(struct sk_buff *skb); extern unsigned short fddi_type_trans(struct sk_buff *skb, struct net_device *dev); extern struct net_device *alloc_fddidev(int sizeof_priv); diff --git a/include/linux/hippidevice.h b/include/linux/hippidevice.h index 9d594ddfe5f4..89b3a4a5b761 100644 --- a/include/linux/hippidevice.h +++ b/include/linux/hippidevice.h @@ -26,30 +26,9 @@ #include <linux/if_hippi.h> #ifdef __KERNEL__ -extern int hippi_header(struct sk_buff *skb, - struct net_device *dev, - unsigned short type, - void *daddr, - void *saddr, - unsigned len); - -extern int hippi_rebuild_header(struct sk_buff *skb); - extern unsigned short hippi_type_trans(struct sk_buff *skb, struct net_device *dev); -extern void hippi_header_cache_bind(struct hh_cache ** hhp, - struct net_device *dev, - unsigned short htype, - __u32 daddr); - -extern void hippi_header_cache_update(struct hh_cache *hh, - struct net_device *dev, - unsigned char * haddr); -extern int hippi_header_parse(struct sk_buff *skb, unsigned char *haddr); - -extern void hippi_net_init(void); - extern struct net_device *alloc_hippi_dev(int sizeof_priv); #endif diff --git a/include/linux/ip.h b/include/linux/ip.h index 3fe93474047d..487152a404f8 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -158,6 +158,20 @@ static inline struct inet_sock *inet_sk(const struct sock *sk) return (struct inet_sock *)sk; } +static inline void __inet_sk_copy_descendant(struct sock *sk_to, + const struct sock *sk_from, + const int ancestor_size) +{ + memcpy(inet_sk(sk_to) + 1, inet_sk(sk_from) + 1, + sk_from->sk_prot->slab_obj_size - ancestor_size); +} +#if !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)) +static inline void inet_sk_copy_descendant(struct sock *sk_to, + const struct sock *sk_from) +{ + __inet_sk_copy_descendant(sk_to, sk_from, sizeof(struct inet_sock)); +} +#endif #endif struct iphdr { diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index f80c4b3f84a3..939942384b78 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -247,27 +247,26 @@ struct ipv6_pinfo { } cork; }; -struct raw6_opt { +/* WARNING: don't change the layout of the members in {raw,udp,tcp}6_sock! */ +struct raw6_sock { + /* inet_sock has to be the first member of raw6_sock */ + struct inet_sock inet; __u32 checksum; /* perform checksum */ __u32 offset; /* checksum offset */ - struct icmp6_filter filter; -}; - -/* WARNING: don't change the layout of the members in {raw,udp,tcp}6_sock! */ -struct raw6_sock { - struct inet_sock inet; - struct raw6_opt raw6; - struct ipv6_pinfo inet6; + /* ipv6_pinfo has to be the last member of raw6_sock, see inet6_sk_generic */ + struct ipv6_pinfo inet6; }; struct udp6_sock { struct udp_sock udp; + /* ipv6_pinfo has to be the last member of udp6_sock, see inet6_sk_generic */ struct ipv6_pinfo inet6; }; struct tcp6_sock { struct tcp_sock tcp; + /* ipv6_pinfo has to be the last member of tcp6_sock, see inet6_sk_generic */ struct ipv6_pinfo inet6; }; @@ -277,9 +276,20 @@ static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk) return inet_sk(__sk)->pinet6; } -static inline struct raw6_opt * raw6_sk(const struct sock *__sk) +static inline struct raw6_sock *raw6_sk(const struct sock *sk) +{ + return (struct raw6_sock *)sk; +} + +static inline void inet_sk_copy_descendant(struct sock *sk_to, + const struct sock *sk_from) { - return &((struct raw6_sock *)__sk)->raw6; + int ancestor_size = sizeof(struct inet_sock); + + if (sk_from->sk_family == PF_INET6) + ancestor_size += sizeof(struct ipv6_pinfo); + + __inet_sk_copy_descendant(sk_to, sk_from, ancestor_size); } #define __ipv6_only_sock(sk) (inet6_sk(sk)->ipv6only) @@ -293,7 +303,7 @@ static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk) return NULL; } -static inline struct raw6_opt * raw6_sk(const struct sock *__sk) +static inline struct raw6_sock *raw6_sk(const struct sock *sk) { return NULL; } diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fca36de5e3fc..48e3d5f4bcde 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -76,7 +76,6 @@ struct ethtool_ops; /* Driver transmit return codes */ #define NETDEV_TX_OK 0 /* driver took care of packet */ #define NETDEV_TX_BUSY 1 /* driver tx path was busy*/ -#define NETDEV_TX_LOCKED -1 /* driver tx lock was already taken */ /* * Compute the worst case header length according to the protocols @@ -345,6 +344,7 @@ struct net_device unsigned char broadcast[MAX_ADDR_LEN]; /* hw bcast add */ unsigned char dev_addr[MAX_ADDR_LEN]; /* hw address */ unsigned char addr_len; /* hardware address length */ + unsigned short dev_id; /* for shared network cards */ struct dev_mc_list *mc_list; /* Multicast mac addresses */ int mc_count; /* Number of installed mcasts */ @@ -414,7 +414,7 @@ struct net_device #define NETIF_F_HW_VLAN_FILTER 512 /* Receive filtering on VLAN */ #define NETIF_F_VLAN_CHALLENGED 1024 /* Device cannot handle VLAN packets */ #define NETIF_F_TSO 2048 /* Can offload TCP/IP segmentation */ -#define NETIF_F_LLTX 4096 /* LockLess TX */ +#define NETIF_F_LLTX 4096 /* Do not grab xmit_lock during ->hard_start_xmit */ /* Called after device is detached from network. */ void (*uninit)(struct net_device *dev); @@ -893,9 +893,11 @@ static inline void __netif_rx_complete(struct net_device *dev) static inline void netif_tx_disable(struct net_device *dev) { - spin_lock_bh(&dev->xmit_lock); + unsigned long flags; + + spin_lock_irqsave(&dev->xmit_lock, flags); netif_stop_queue(dev); - spin_unlock_bh(&dev->xmit_lock); + spin_unlock_irqrestore(&dev->xmit_lock, flags); } /* These functions live elsewhere (drivers/net/net_init.c, but related) */ diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index b88b52c33db7..675a01a13398 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -3,13 +3,6 @@ /* Connection state tracking for netfilter. This is separated from, but required by, the NAT layer; it can also be used by an iptables extension. */ - -#include <linux/config.h> -#include <linux/netfilter_ipv4/ip_conntrack_tuple.h> -#include <linux/bitops.h> -#include <linux/compiler.h> -#include <asm/atomic.h> - enum ip_conntrack_info { /* Part of an established connection (either direction). */ @@ -47,8 +40,40 @@ enum ip_conntrack_status { /* Connection is confirmed: originating packet has left box */ IPS_CONFIRMED_BIT = 3, IPS_CONFIRMED = (1 << IPS_CONFIRMED_BIT), + + /* Connection needs src nat in orig dir. This bit never changed. */ + IPS_SRC_NAT_BIT = 4, + IPS_SRC_NAT = (1 << IPS_SRC_NAT_BIT), + + /* Connection needs dst nat in orig dir. This bit never changed. */ + IPS_DST_NAT_BIT = 5, + IPS_DST_NAT = (1 << IPS_DST_NAT_BIT), + + /* Both together. */ + IPS_NAT_MASK = (IPS_DST_NAT | IPS_SRC_NAT), + + /* Connection needs TCP sequence adjusted. */ + IPS_SEQ_ADJUST_BIT = 6, + IPS_SEQ_ADJUST = (1 << IPS_SEQ_ADJUST_BIT), + + /* NAT initialization bits. */ + IPS_SRC_NAT_DONE_BIT = 7, + IPS_SRC_NAT_DONE = (1 << IPS_SRC_NAT_DONE_BIT), + + IPS_DST_NAT_DONE_BIT = 8, + IPS_DST_NAT_DONE = (1 << IPS_DST_NAT_DONE_BIT), + + /* Both together */ + IPS_NAT_DONE_MASK = (IPS_DST_NAT_DONE | IPS_SRC_NAT_DONE), }; +#ifdef __KERNEL__ +#include <linux/config.h> +#include <linux/netfilter_ipv4/ip_conntrack_tuple.h> +#include <linux/bitops.h> +#include <linux/compiler.h> +#include <asm/atomic.h> + #include <linux/netfilter_ipv4/ip_conntrack_tcp.h> #include <linux/netfilter_ipv4/ip_conntrack_icmp.h> #include <linux/netfilter_ipv4/ip_conntrack_sctp.h> @@ -70,20 +95,6 @@ union ip_conntrack_expect_proto { #include <linux/netfilter_ipv4/ip_conntrack_ftp.h> #include <linux/netfilter_ipv4/ip_conntrack_irc.h> -/* per expectation: application helper private data */ -union ip_conntrack_expect_help { - /* insert conntrack helper private data (expect) here */ - struct ip_ct_amanda_expect exp_amanda_info; - struct ip_ct_ftp_expect exp_ftp_info; - struct ip_ct_irc_expect exp_irc_info; - -#ifdef CONFIG_IP_NF_NAT_NEEDED - union { - /* insert nat helper private data (expect) here */ - } nat; -#endif -}; - /* per conntrack: application helper private data */ union ip_conntrack_help { /* insert conntrack helper private data (master) here */ @@ -93,15 +104,8 @@ union ip_conntrack_help { #ifdef CONFIG_IP_NF_NAT_NEEDED #include <linux/netfilter_ipv4/ip_nat.h> - -/* per conntrack: nat application helper private data */ -union ip_conntrack_nat_help { - /* insert nat helper private data here */ -}; #endif -#ifdef __KERNEL__ - #include <linux/types.h> #include <linux/skbuff.h> @@ -123,39 +127,26 @@ struct ip_conntrack_expect /* Internal linked list (global expectation list) */ struct list_head list; - /* reference count */ - atomic_t use; - - /* expectation list for this master */ - struct list_head expected_list; + /* We expect this tuple, with the following mask */ + struct ip_conntrack_tuple tuple, mask; + + /* Function to call after setup and insertion */ + void (*expectfn)(struct ip_conntrack *new, + struct ip_conntrack_expect *this); /* The conntrack of the master connection */ - struct ip_conntrack *expectant; - - /* The conntrack of the sibling connection, set after - * expectation arrived */ - struct ip_conntrack *sibling; - - /* Tuple saved for conntrack */ - struct ip_conntrack_tuple ct_tuple; + struct ip_conntrack *master; /* Timer function; deletes the expectation. */ struct timer_list timeout; - /* Data filled out by the conntrack helpers follow: */ - - /* We expect this tuple, with the following mask */ - struct ip_conntrack_tuple tuple, mask; - - /* Function to call after setup and insertion */ - int (*expectfn)(struct ip_conntrack *new); - - /* At which sequence number did this expectation occur */ - u_int32_t seq; - - union ip_conntrack_expect_proto proto; - - union ip_conntrack_expect_help help; +#ifdef CONFIG_IP_NF_NAT_NEEDED + /* This is the original per-proto part, used to map the + * expected connection the way the recipient expects. */ + union ip_conntrack_manip_proto saved_proto; + /* Direction relative to the master connection. */ + enum ip_conntrack_dir dir; +#endif }; struct ip_conntrack_counter @@ -182,17 +173,12 @@ struct ip_conntrack /* Accounting Information (same cache line as other written members) */ struct ip_conntrack_counter counters[IP_CT_DIR_MAX]; #endif + /* If we were expected by an expectation, this will be it */ + struct ip_conntrack *master; - /* If we're expecting another related connection, this will be - in expected linked list */ - struct list_head sibling_list; - /* Current number of expected connections */ unsigned int expecting; - /* If we were expected by an expectation, this will be it */ - struct ip_conntrack_expect *master; - /* Helper, if any. */ struct ip_conntrack_helper *helper; @@ -204,7 +190,6 @@ struct ip_conntrack #ifdef CONFIG_IP_NF_NAT_NEEDED struct { struct ip_nat_info info; - union ip_conntrack_nat_help help; #if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \ defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE) int masq_index; @@ -221,8 +206,15 @@ struct ip_conntrack struct ip_conntrack_tuple_hash tuplehash[IP_CT_DIR_MAX]; }; +static inline struct ip_conntrack * +tuplehash_to_ctrack(const struct ip_conntrack_tuple_hash *hash) +{ + return container_of(hash, struct ip_conntrack, + tuplehash[hash->tuple.dst.dir]); +} + /* get master conntrack via master expectation */ -#define master_ct(conntr) (conntr->master ? conntr->master->expectant : NULL) +#define master_ct(conntr) (conntr->master) /* Alter reply tuple (maybe alter helper). */ extern void @@ -246,13 +238,6 @@ ip_conntrack_get(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo) /* decrement reference count on a conntrack */ extern inline void ip_conntrack_put(struct ip_conntrack *ct); -/* find unconfirmed expectation based on tuple */ -struct ip_conntrack_expect * -ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple); - -/* decrement reference count on an expectation */ -void ip_conntrack_expect_put(struct ip_conntrack_expect *exp); - /* call to create an explicit dependency on ip_conntrack. */ extern void need_ip_conntrack(void); @@ -267,9 +252,9 @@ extern void ip_ct_refresh_acct(struct ip_conntrack *ct, /* These are for NAT. Icky. */ /* Update TCP window tracking data when NAT mangles the packet */ -extern int ip_conntrack_tcp_update(struct sk_buff *skb, - struct ip_conntrack *conntrack, - int dir); +extern void ip_conntrack_tcp_update(struct sk_buff *skb, + struct ip_conntrack *conntrack, + enum ip_conntrack_dir dir); /* Call me when a conntrack is destroyed. */ extern void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack); @@ -316,15 +301,12 @@ struct ip_conntrack_stat #define CONNTRACK_STAT_INC(count) (__get_cpu_var(ip_conntrack_stat).count++) -/* eg. PROVIDES_CONNTRACK(ftp); */ -#define PROVIDES_CONNTRACK(name) \ - int needs_ip_conntrack_##name; \ - EXPORT_SYMBOL(needs_ip_conntrack_##name) - -/*. eg. NEEDS_CONNTRACK(ftp); */ -#define NEEDS_CONNTRACK(name) \ - extern int needs_ip_conntrack_##name; \ - static int *need_ip_conntrack_##name __attribute_used__ = &needs_ip_conntrack_##name - +static inline int ip_nat_initialized(struct ip_conntrack *conntrack, + enum ip_nat_manip_type manip) +{ + if (manip == IP_NAT_MANIP_SRC) + return test_bit(IPS_SRC_NAT_DONE_BIT, &conntrack->status); + return test_bit(IPS_DST_NAT_DONE_BIT, &conntrack->status); +} #endif /* __KERNEL__ */ #endif /* _IP_CONNTRACK_H */ diff --git a/include/linux/netfilter_ipv4/ip_conntrack_amanda.h b/include/linux/netfilter_ipv4/ip_conntrack_amanda.h index 75ee293bd088..de3e41f51aec 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_amanda.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_amanda.h @@ -2,11 +2,10 @@ #define _IP_CONNTRACK_AMANDA_H /* AMANDA tracking. */ -struct ip_ct_amanda_expect -{ - u_int16_t port; /* port number of this expectation */ - u_int16_t offset; /* offset of port in ctrl packet */ - u_int16_t len; /* length of the port number string */ -}; - +struct ip_conntrack_expect; +extern unsigned int (*ip_nat_amanda_hook)(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + unsigned int matchoff, + unsigned int matchlen, + struct ip_conntrack_expect *exp); #endif /* _IP_CONNTRACK_AMANDA_H */ diff --git a/include/linux/netfilter_ipv4/ip_conntrack_core.h b/include/linux/netfilter_ipv4/ip_conntrack_core.h index bb9b11c680ac..d84be02cb4fc 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_core.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_core.h @@ -34,20 +34,19 @@ struct ip_conntrack_tuple_hash * ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple, const struct ip_conntrack *ignored_conntrack); -extern int __ip_conntrack_confirm(struct sk_buff *skb); +extern int __ip_conntrack_confirm(struct sk_buff **pskb); /* Confirm a connection: returns NF_DROP if packet must be dropped. */ -static inline int ip_conntrack_confirm(struct sk_buff *skb) +static inline int ip_conntrack_confirm(struct sk_buff **pskb) { - if (skb->nfct - && !is_confirmed((struct ip_conntrack *)skb->nfct)) - return __ip_conntrack_confirm(skb); + if ((*pskb)->nfct + && !is_confirmed((struct ip_conntrack *)(*pskb)->nfct)) + return __ip_conntrack_confirm(pskb); return NF_ACCEPT; } extern struct list_head *ip_conntrack_hash; extern struct list_head ip_conntrack_expect_list; DECLARE_RWLOCK_EXTERN(ip_conntrack_lock); -DECLARE_RWLOCK_EXTERN(ip_conntrack_expect_tuple_lock); #endif /* _IP_CONNTRACK_CORE_H */ diff --git a/include/linux/netfilter_ipv4/ip_conntrack_ftp.h b/include/linux/netfilter_ipv4/ip_conntrack_ftp.h index 2f85006c75f9..5f06429b9047 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_ftp.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_ftp.h @@ -20,24 +20,24 @@ enum ip_ct_ftp_type IP_CT_FTP_EPSV, }; -/* This structure is per expected connection */ -struct ip_ct_ftp_expect -{ - /* We record seq number and length of ftp ip/port text here: all in - * host order. */ - - /* sequence number of IP address in packet is in ip_conntrack_expect */ - u_int32_t len; /* length of IP address */ - enum ip_ct_ftp_type ftptype; /* PORT or PASV ? */ - u_int16_t port; /* TCP port that was to be used */ -}; - +#define NUM_SEQ_TO_REMEMBER 2 /* This structure exists only once per master */ struct ip_ct_ftp_master { - /* Next valid seq position for cmd matching after newline */ - u_int32_t seq_aft_nl[IP_CT_DIR_MAX]; + /* Valid seq positions for cmd matching after newline */ + u_int32_t seq_aft_nl[IP_CT_DIR_MAX][NUM_SEQ_TO_REMEMBER]; /* 0 means seq_match_aft_nl not set */ - int seq_aft_nl_set[IP_CT_DIR_MAX]; + int seq_aft_nl_num[IP_CT_DIR_MAX]; }; +struct ip_conntrack_expect; + +/* For NAT to hook in when we find a packet which describes what other + * connection we should expect. */ +extern unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + enum ip_ct_ftp_type type, + unsigned int matchoff, + unsigned int matchlen, + struct ip_conntrack_expect *exp, + u32 *seq); #endif /* _IP_CONNTRACK_FTP_H */ diff --git a/include/linux/netfilter_ipv4/ip_conntrack_helper.h b/include/linux/netfilter_ipv4/ip_conntrack_helper.h index fe6268bd1737..b1bbba0a12cb 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_helper.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_helper.h @@ -5,15 +5,11 @@ struct module; -/* Reuse expectation when max_expected reached */ -#define IP_CT_HELPER_F_REUSE_EXPECT 0x01 - struct ip_conntrack_helper { struct list_head list; /* Internal use. */ const char *name; /* name of the module */ - unsigned char flags; /* Flags (see above) */ struct module *me; /* pointer to self */ unsigned int max_expected; /* Maximum number of concurrent * expected connections */ @@ -25,7 +21,7 @@ struct ip_conntrack_helper /* Function to call when data passes; return verdict, or -1 to invalidate. */ - int (*help)(struct sk_buff *skb, + int (*help)(struct sk_buff **pskb, struct ip_conntrack *ct, enum ip_conntrack_info conntrackinfo); }; @@ -33,17 +29,13 @@ struct ip_conntrack_helper extern int ip_conntrack_helper_register(struct ip_conntrack_helper *); extern void ip_conntrack_helper_unregister(struct ip_conntrack_helper *); -extern struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple); - - /* Allocate space for an expectation: this is mandatory before calling ip_conntrack_expect_related. */ extern struct ip_conntrack_expect *ip_conntrack_expect_alloc(void); +extern void ip_conntrack_expect_free(struct ip_conntrack_expect *exp); + /* Add an expected connection: can have more than one per connection */ -extern int ip_conntrack_expect_related(struct ip_conntrack_expect *exp, - struct ip_conntrack *related_to); -extern int ip_conntrack_change_expect(struct ip_conntrack_expect *expect, - struct ip_conntrack_tuple *newtuple); +extern int ip_conntrack_expect_related(struct ip_conntrack_expect *exp); extern void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp); #endif /*_IP_CONNTRACK_HELPER_H*/ diff --git a/include/linux/netfilter_ipv4/ip_conntrack_irc.h b/include/linux/netfilter_ipv4/ip_conntrack_irc.h index 0cd24a02d360..16601e0d5626 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_irc.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_irc.h @@ -14,24 +14,16 @@ #ifndef _IP_CONNTRACK_IRC_H #define _IP_CONNTRACK_IRC_H -/* We record seq number and length of irc ip/port text here: all in - host order. */ - -/* This structure is per expected connection */ -struct ip_ct_irc_expect -{ - /* length of IP address */ - u_int32_t len; - /* Port that was to be used */ - u_int16_t port; -}; - /* This structure exists only once per master */ struct ip_ct_irc_master { }; - #ifdef __KERNEL__ +extern unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + unsigned int matchoff, + unsigned int matchlen, + struct ip_conntrack_expect *exp); #define IRC_PORT 6667 diff --git a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h index 0ff067b3fdb0..e20b57c5e1b7 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h @@ -44,10 +44,6 @@ struct ip_conntrack_protocol /* Called when a conntrack entry is destroyed */ void (*destroy)(struct ip_conntrack *conntrack); - /* Has to decide if a expectation matches one packet or not */ - int (*exp_matches_pkt)(struct ip_conntrack_expect *exp, - const struct sk_buff *skb); - int (*error)(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, unsigned int hooknum); diff --git a/include/linux/netfilter_ipv4/ip_conntrack_tftp.h b/include/linux/netfilter_ipv4/ip_conntrack_tftp.h index 8b75b45f1f61..50fbafdf9ed5 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_tftp.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_tftp.h @@ -13,4 +13,8 @@ struct tftphdr { #define TFTP_OPCODE_ACK 4 #define TFTP_OPCODE_ERROR 5 +unsigned int (*ip_nat_tftp_hook)(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + struct ip_conntrack_expect *exp); + #endif /* _IP_CT_TFTP */ diff --git a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h index 3a71176e2060..ca1afa8fc693 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack_tuple.h +++ b/include/linux/netfilter_ipv4/ip_conntrack_tuple.h @@ -64,7 +64,10 @@ struct ip_conntrack_tuple } u; /* The protocol. */ - u_int16_t protonum; + u8 protonum; + + /* The direction (for tuplehash) */ + u8 dir; } dst; }; @@ -94,7 +97,7 @@ DEBUGP("tuple %p: %u %u.%u.%u.%u:%hu -> %u.%u.%u.%u:%hu\n", \ #define CTINFO2DIR(ctinfo) ((ctinfo) >= IP_CT_IS_REPLY ? IP_CT_DIR_REPLY : IP_CT_DIR_ORIGINAL) /* If we're the first tuple, it's the original dir. */ -#define DIRECTION(h) ((enum ip_conntrack_dir)(&(h)->ctrack->tuplehash[1] == (h))) +#define DIRECTION(h) ((enum ip_conntrack_dir)(h)->tuple.dst.dir) /* Connections have two entries in the hash table: one for each way */ struct ip_conntrack_tuple_hash @@ -102,9 +105,6 @@ struct ip_conntrack_tuple_hash struct list_head list; struct ip_conntrack_tuple tuple; - - /* this == &ctrack->tuplehash[DIRECTION(this)]. */ - struct ip_conntrack *ctrack; }; #endif /* __KERNEL__ */ diff --git a/include/linux/netfilter_ipv4/ip_nat.h b/include/linux/netfilter_ipv4/ip_nat.h index 06e5ad38683e..2b72b86176f0 100644 --- a/include/linux/netfilter_ipv4/ip_nat.h +++ b/include/linux/netfilter_ipv4/ip_nat.h @@ -16,8 +16,6 @@ enum ip_nat_manip_type #define IP_NAT_RANGE_MAP_IPS 1 #define IP_NAT_RANGE_PROTO_SPECIFIED 2 -/* Used internally by get_unique_tuple(). */ -#define IP_NAT_RANGE_FULL 4 /* NAT sequence number modifications */ struct ip_nat_seq { @@ -50,24 +48,6 @@ struct ip_nat_multi_range_compat struct ip_nat_range range[1]; }; -/* Worst case: local-out manip + 1 post-routing, and reverse dirn. */ -#define IP_NAT_MAX_MANIPS (2*3) - -struct ip_nat_info_manip -{ - /* The direction. */ - u_int8_t direction; - - /* Which hook the manipulation happens on. */ - u_int8_t hooknum; - - /* The manipulation type. */ - u_int8_t maniptype; - - /* Manipulations to occur at each conntrack in this dirn. */ - struct ip_conntrack_manip manip; -}; - #ifdef __KERNEL__ #include <linux/list.h> #include <linux/netfilter_ipv4/lockhelp.h> @@ -78,14 +58,6 @@ DECLARE_RWLOCK_EXTERN(ip_nat_lock); /* The structure embedded in the conntrack structure. */ struct ip_nat_info { - /* Set to zero when conntrack created: bitmask of maniptypes */ - u_int16_t initialized; - - u_int16_t num_manips; - - /* Manipulations to be done on this conntrack. */ - struct ip_nat_info_manip manips[IP_NAT_MAX_MANIPS]; - struct list_head bysource; /* Helper (NULL if none). */ @@ -94,6 +66,8 @@ struct ip_nat_info struct ip_nat_seq seq[IP_CT_DIR_MAX]; }; +struct ip_conntrack; + /* Set up the info structure to map into this range. */ extern unsigned int ip_nat_setup_info(struct ip_conntrack *conntrack, const struct ip_nat_range *range, diff --git a/include/linux/netfilter_ipv4/ip_nat_core.h b/include/linux/netfilter_ipv4/ip_nat_core.h index 4f01f882f2fc..3b50eb91f007 100644 --- a/include/linux/netfilter_ipv4/ip_nat_core.h +++ b/include/linux/netfilter_ipv4/ip_nat_core.h @@ -8,20 +8,13 @@ extern int ip_nat_init(void); extern void ip_nat_cleanup(void); -extern unsigned int do_bindings(struct ip_conntrack *ct, - enum ip_conntrack_info conntrackinfo, - struct ip_nat_info *info, - unsigned int hooknum, - struct sk_buff **pskb); +extern unsigned int nat_packet(struct ip_conntrack *ct, + enum ip_conntrack_info conntrackinfo, + unsigned int hooknum, + struct sk_buff **pskb); extern int icmp_reply_translation(struct sk_buff **pskb, - struct ip_conntrack *conntrack, - unsigned int hooknum, - int dir); - -extern void replace_in_hashes(struct ip_conntrack *conntrack, - struct ip_nat_info *info); -extern void place_in_hashes(struct ip_conntrack *conntrack, - struct ip_nat_info *info); - + struct ip_conntrack *ct, + enum ip_nat_manip_type manip, + enum ip_conntrack_dir dir); #endif /* _IP_NAT_CORE_H */ diff --git a/include/linux/netfilter_ipv4/ip_nat_helper.h b/include/linux/netfilter_ipv4/ip_nat_helper.h index b34e4ce9ee6a..bf9cb105c885 100644 --- a/include/linux/netfilter_ipv4/ip_nat_helper.h +++ b/include/linux/netfilter_ipv4/ip_nat_helper.h @@ -7,46 +7,6 @@ struct sk_buff; -/* Flags */ -/* NAT helper must be called on every packet (for TCP) */ -#define IP_NAT_HELPER_F_ALWAYS 0x01 - -struct ip_nat_helper -{ - struct list_head list; /* Internal use */ - - const char *name; /* name of the module */ - unsigned char flags; /* Flags (see above) */ - struct module *me; /* pointer to self */ - - /* Mask of things we will help: vs. tuple from server */ - struct ip_conntrack_tuple tuple; - struct ip_conntrack_tuple mask; - - /* Helper function: returns verdict */ - unsigned int (*help)(struct ip_conntrack *ct, - struct ip_conntrack_expect *exp, - struct ip_nat_info *info, - enum ip_conntrack_info ctinfo, - unsigned int hooknum, - struct sk_buff **pskb); - - /* Returns verdict and sets up NAT for this connection */ - unsigned int (*expect)(struct sk_buff **pskb, - unsigned int hooknum, - struct ip_conntrack *ct, - struct ip_nat_info *info); -}; - -extern int ip_nat_helper_register(struct ip_nat_helper *me); -extern void ip_nat_helper_unregister(struct ip_nat_helper *me); - -extern struct ip_nat_helper * -ip_nat_find_helper(const struct ip_conntrack_tuple *tuple); - -extern struct ip_nat_helper * -__ip_nat_find_helper(const struct ip_conntrack_tuple *tuple); - /* These return true or false. */ extern int ip_nat_mangle_tcp_packet(struct sk_buff **skb, struct ip_conntrack *ct, @@ -65,4 +25,9 @@ extern int ip_nat_mangle_udp_packet(struct sk_buff **skb, extern int ip_nat_seq_adjust(struct sk_buff **pskb, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo); + +/* Setup NAT on this expected conntrack so it follows master, but goes + * to port ct->master->saved_proto. */ +extern void ip_nat_follow_master(struct ip_conntrack *ct, + struct ip_conntrack_expect *this); #endif diff --git a/include/linux/netfilter_ipv4/ip_nat_protocol.h b/include/linux/netfilter_ipv4/ip_nat_protocol.h index f343239cd4ea..129708c22386 100644 --- a/include/linux/netfilter_ipv4/ip_nat_protocol.h +++ b/include/linux/netfilter_ipv4/ip_nat_protocol.h @@ -15,11 +15,11 @@ struct ip_nat_protocol /* Protocol number. */ unsigned int protonum; - /* Do a packet translation according to the ip_nat_proto_manip - * and manip type. Return true if succeeded. */ + /* Translate a packet to the target according to manip type. + Return true if succeeded. */ int (*manip_pkt)(struct sk_buff **pskb, unsigned int iphdroff, - const struct ip_conntrack_manip *manip, + const struct ip_conntrack_tuple *tuple, enum ip_nat_manip_type maniptype); /* Is the manipable part of the tuple between min and max incl? */ diff --git a/include/linux/tcp.h b/include/linux/tcp.h index b8d7df3916a5..b31ca0400372 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -203,10 +203,6 @@ struct tcp_sack_block { __u32 end_seq; }; -typedef struct tcp_pcount { - __u32 val; -} tcp_pcount_t; - enum tcp_congestion_algo { TCP_RENO=0, TCP_VEGAS, @@ -289,9 +285,9 @@ struct tcp_sock { __u32 rtt_seq; /* sequence number to update rttvar */ __u32 rto; /* retransmit timeout */ - tcp_pcount_t packets_out; /* Packets which are "in flight" */ - tcp_pcount_t left_out; /* Packets which leaved network */ - tcp_pcount_t retrans_out; /* Retransmitted packets out */ + __u32 packets_out; /* Packets which are "in flight" */ + __u32 left_out; /* Packets which leaved network */ + __u32 retrans_out; /* Retransmitted packets out */ /* @@ -352,9 +348,9 @@ struct tcp_sock { __u8 syn_retries; /* num of allowed syn retries */ __u8 ecn_flags; /* ECN status bits. */ __u16 prior_ssthresh; /* ssthresh saved at recovery start */ - tcp_pcount_t lost_out; /* Lost packets */ - tcp_pcount_t sacked_out;/* SACK'd packets */ - tcp_pcount_t fackets_out;/* FACK'd packets */ + __u32 lost_out; /* Lost packets */ + __u32 sacked_out; /* SACK'd packets */ + __u32 fackets_out; /* FACK'd packets */ __u32 high_seq; /* snd_nxt at onset of congestion */ __u32 retrans_stamp; /* Timestamp of the last retransmit, diff --git a/include/linux/trdevice.h b/include/linux/trdevice.h index 2662f57568d4..aaa1f337edcb 100644 --- a/include/linux/trdevice.h +++ b/include/linux/trdevice.h @@ -28,10 +28,6 @@ #include <linux/if_tr.h> #ifdef __KERNEL__ -extern int tr_header(struct sk_buff *skb, struct net_device *dev, - unsigned short type, void *daddr, - void *saddr, unsigned len); -extern int tr_rebuild_header(struct sk_buff *skb); extern unsigned short tr_type_trans(struct sk_buff *skb, struct net_device *dev); extern void tr_source_route(struct sk_buff *skb, struct trh_hdr *trh, struct net_device *dev); extern struct net_device *alloc_trdev(int sizeof_priv); diff --git a/include/net/ipx.h b/include/net/ipx.h index 006a31bfa2d1..5c0cf33826c5 100644 --- a/include/net/ipx.h +++ b/include/net/ipx.h @@ -90,7 +90,11 @@ struct ipx_cb { } last_hop; }; -struct ipx_opt { +#include <net/sock.h> + +struct ipx_sock { + /* struct sock has to be the first member of ipx_sock */ + struct sock sk; struct ipx_address dest_addr; struct ipx_interface *intrfc; unsigned short port; @@ -105,9 +109,14 @@ struct ipx_opt { unsigned short ipx_ncp_conn; }; -#define ipx_sk(__sk) ((struct ipx_opt *)(__sk)->sk_protinfo) +static inline struct ipx_sock *ipx_sk(struct sock *sk) +{ + return (struct ipx_sock *)sk; +} + #define IPX_SKB_CB(__skb) ((struct ipx_cb *)&((__skb)->cb[0])) #endif + #define IPX_MIN_EPHEMERAL_SOCKET 0x4000 #define IPX_MAX_EPHEMERAL_SOCKET 0x7fff diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index cca17d427c9d..960abfa48d68 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -423,7 +423,7 @@ static inline __s32 sctp_jitter(__u32 rto) } /* Break down data chunks at this point. */ -static inline int sctp_frag_point(const struct sctp_opt *sp, int pmtu) +static inline int sctp_frag_point(const struct sctp_sock *sp, int pmtu) { int frag = pmtu; @@ -576,23 +576,6 @@ static inline int sctp_vtag_hashfn(__u16 lport, __u16 rport, __u32 vtag) return (h & (sctp_assoc_hashsize-1)); } -/* WARNING: Do not change the layout of the members in sctp_sock! */ -struct sctp_sock { - struct inet_sock inet; - struct sctp_opt sctp; -}; - -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -struct sctp6_sock { - struct inet_sock inet; - struct sctp_opt sctp; - struct ipv6_pinfo inet6; -}; -#endif /* CONFIG_IPV6 */ - -#define sctp_sk(__sk) (&((struct sctp_sock *)__sk)->sctp) -#define sctp_opt2sk(__sp) &container_of(__sp, struct sctp_sock, sctp)->inet.sk - /* Is a socket of this style? */ #define sctp_style(sk, style) __sctp_style((sk), (SCTP_SOCKET_##style)) static inline int __sctp_style(const struct sock *sk, sctp_socket_type_t style) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index dfa0dc43fb17..7e64cf6bda1e 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -58,6 +58,7 @@ #include <linux/socket.h> /* linux/in.h needs this!! */ #include <linux/in.h> /* We get struct sockaddr_in. */ #include <linux/in6.h> /* We get struct in6_addr */ +#include <linux/ipv6.h> #include <asm/param.h> /* We get MAXHOSTNAMELEN. */ #include <asm/atomic.h> /* This gets us atomic counters. */ #include <linux/skbuff.h> /* We need sk_buff_head. */ @@ -84,7 +85,6 @@ struct sctp_inq; struct sctp_outq; struct sctp_bind_addr; struct sctp_ulpq; -struct sctp_opt; struct sctp_ep_common; struct sctp_ssnmap; @@ -234,7 +234,9 @@ typedef enum { } sctp_socket_type_t; /* Per socket SCTP information. */ -struct sctp_opt { +struct sctp_sock { + /* inet_sock has to be the first member of sctp_sock */ + struct inet_sock inet; /* What kind of a socket is this? */ sctp_socket_type_t type; @@ -272,6 +274,22 @@ struct sctp_opt { struct sk_buff_head pd_lobby; }; +static inline struct sctp_sock *sctp_sk(const struct sock *sk) +{ + return (struct sctp_sock *)sk; +} + +static inline struct sock *sctp_opt2sk(const struct sctp_sock *sp) +{ + return (struct sock *)sp; +} + +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +struct sctp6_sock { + struct sctp_sock sctp; + struct ipv6_pinfo inet6; +}; +#endif /* CONFIG_IPV6 */ /* This is our APPLICATION-SPECIFIC state cookie. @@ -487,12 +505,12 @@ struct sctp_af { int (*to_addr_param) (const union sctp_addr *, union sctp_addr_param *); int (*addr_valid) (union sctp_addr *, - struct sctp_opt *); + struct sctp_sock *); sctp_scope_t (*scope) (union sctp_addr *); void (*inaddr_any) (union sctp_addr *, unsigned short); int (*is_any) (const union sctp_addr *); int (*available) (union sctp_addr *, - struct sctp_opt *); + struct sctp_sock *); int (*skb_iif) (const struct sk_buff *sk); int (*is_ce) (const struct sk_buff *sk); void (*seq_dump_addr)(struct seq_file *seq, @@ -510,16 +528,16 @@ int sctp_register_af(struct sctp_af *); struct sctp_pf { void (*event_msgname)(struct sctp_ulpevent *, char *, int *); void (*skb_msgname) (struct sk_buff *, char *, int *); - int (*af_supported) (sa_family_t, struct sctp_opt *); + int (*af_supported) (sa_family_t, struct sctp_sock *); int (*cmp_addr) (const union sctp_addr *, const union sctp_addr *, - struct sctp_opt *); - int (*bind_verify) (struct sctp_opt *, union sctp_addr *); - int (*send_verify) (struct sctp_opt *, union sctp_addr *); - int (*supported_addrs)(const struct sctp_opt *, __u16 *); + struct sctp_sock *); + int (*bind_verify) (struct sctp_sock *, union sctp_addr *); + int (*send_verify) (struct sctp_sock *, union sctp_addr *); + int (*supported_addrs)(const struct sctp_sock *, __u16 *); struct sock *(*create_accept_sk) (struct sock *sk, struct sctp_association *asoc); - void (*addr_v4map) (struct sctp_opt *, union sctp_addr *); + void (*addr_v4map) (struct sctp_sock *, union sctp_addr *); struct sctp_af *af; }; @@ -922,7 +940,7 @@ struct sctp_transport *sctp_transport_new(const union sctp_addr *, int); void sctp_transport_set_owner(struct sctp_transport *, struct sctp_association *); void sctp_transport_route(struct sctp_transport *, union sctp_addr *, - struct sctp_opt *); + struct sctp_sock *); void sctp_transport_pmtu(struct sctp_transport *); void sctp_transport_free(struct sctp_transport *); void sctp_transport_reset_timers(struct sctp_transport *); @@ -1071,11 +1089,11 @@ int sctp_add_bind_addr(struct sctp_bind_addr *, union sctp_addr *, int gfp); int sctp_del_bind_addr(struct sctp_bind_addr *, union sctp_addr *); int sctp_bind_addr_match(struct sctp_bind_addr *, const union sctp_addr *, - struct sctp_opt *); + struct sctp_sock *); union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp, const union sctp_addr *addrs, int addrcnt, - struct sctp_opt *opt); + struct sctp_sock *opt); union sctp_params sctp_bind_addrs_to_raw(const struct sctp_bind_addr *bp, int *addrs_len, int gfp); int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw, int len, diff --git a/include/net/tcp.h b/include/net/tcp.h index 8987a316fe91..1a54cdba14ba 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1176,55 +1176,23 @@ static inline int tcp_skb_mss(const struct sk_buff *skb) return skb_shinfo(skb)->tso_size; } -static inline void tcp_inc_pcount(tcp_pcount_t *count, - const struct sk_buff *skb) -{ - count->val += tcp_skb_pcount(skb); -} - -static inline void tcp_inc_pcount_explicit(tcp_pcount_t *count, int amt) -{ - count->val += amt; -} - -static inline void tcp_dec_pcount_explicit(tcp_pcount_t *count, int amt) -{ - count->val -= amt; -} - -static inline void tcp_dec_pcount(tcp_pcount_t *count, - const struct sk_buff *skb) -{ - count->val -= tcp_skb_pcount(skb); -} - -static inline void tcp_dec_pcount_approx(tcp_pcount_t *count, +static inline void tcp_dec_pcount_approx(__u32 *count, const struct sk_buff *skb) { - if (count->val) { - count->val -= tcp_skb_pcount(skb); - if ((int)count->val < 0) - count->val = 0; + if (*count) { + *count -= tcp_skb_pcount(skb); + if ((int)*count < 0) + *count = 0; } } -static inline __u32 tcp_get_pcount(const tcp_pcount_t *count) -{ - return count->val; -} - -static inline void tcp_set_pcount(tcp_pcount_t *count, __u32 val) -{ - count->val = val; -} - static inline void tcp_packets_out_inc(struct sock *sk, struct tcp_sock *tp, const struct sk_buff *skb) { - int orig = tcp_get_pcount(&tp->packets_out); + int orig = tp->packets_out; - tcp_inc_pcount(&tp->packets_out, skb); + tp->packets_out += tcp_skb_pcount(skb); if (!orig) tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); } @@ -1232,7 +1200,7 @@ static inline void tcp_packets_out_inc(struct sock *sk, static inline void tcp_packets_out_dec(struct tcp_sock *tp, const struct sk_buff *skb) { - tcp_dec_pcount(&tp->packets_out, skb); + tp->packets_out -= tcp_skb_pcount(skb); } /* This determines how many packets are "in the network" to the best @@ -1251,9 +1219,7 @@ static inline void tcp_packets_out_dec(struct tcp_sock *tp, */ static __inline__ unsigned int tcp_packets_in_flight(const struct tcp_sock *tp) { - return (tcp_get_pcount(&tp->packets_out) - - tcp_get_pcount(&tp->left_out) + - tcp_get_pcount(&tp->retrans_out)); + return (tp->packets_out - tp->left_out + tp->retrans_out); } /* @@ -1357,14 +1323,9 @@ static inline __u32 tcp_current_ssthresh(struct tcp_sock *tp) static inline void tcp_sync_left_out(struct tcp_sock *tp) { if (tp->sack_ok && - (tcp_get_pcount(&tp->sacked_out) >= - tcp_get_pcount(&tp->packets_out) - tcp_get_pcount(&tp->lost_out))) - tcp_set_pcount(&tp->sacked_out, - (tcp_get_pcount(&tp->packets_out) - - tcp_get_pcount(&tp->lost_out))); - tcp_set_pcount(&tp->left_out, - (tcp_get_pcount(&tp->sacked_out) + - tcp_get_pcount(&tp->lost_out))); + (tp->sacked_out >= tp->packets_out - tp->lost_out)) + tp->sacked_out = tp->packets_out - tp->lost_out; + tp->left_out = tp->sacked_out + tp->lost_out; } extern void tcp_cwnd_application_limited(struct sock *sk); @@ -1373,7 +1334,7 @@ extern void tcp_cwnd_application_limited(struct sock *sk); static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp) { - __u32 packets_out = tcp_get_pcount(&tp->packets_out); + __u32 packets_out = tp->packets_out; if (packets_out >= tp->snd_cwnd) { /* Network is feed fully. */ @@ -1381,8 +1342,8 @@ static inline void tcp_cwnd_validate(struct sock *sk, struct tcp_sock *tp) tp->snd_cwnd_stamp = tcp_time_stamp; } else { /* Network starves. */ - if (tcp_get_pcount(&tp->packets_out) > tp->snd_cwnd_used) - tp->snd_cwnd_used = tcp_get_pcount(&tp->packets_out); + if (tp->packets_out > tp->snd_cwnd_used) + tp->snd_cwnd_used = tp->packets_out; if ((s32)(tcp_time_stamp - tp->snd_cwnd_stamp) >= tp->rto) tcp_cwnd_application_limited(sk); @@ -1450,7 +1411,7 @@ tcp_nagle_check(const struct tcp_sock *tp, const struct sk_buff *skb, !(TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN) && ((nonagle&TCP_NAGLE_CORK) || (!nonagle && - tcp_get_pcount(&tp->packets_out) && + tp->packets_out && tcp_minshall_check(tp)))); } @@ -1503,7 +1464,7 @@ static __inline__ int tcp_snd_test(const struct tcp_sock *tp, static __inline__ void tcp_check_probe_timer(struct sock *sk, struct tcp_sock *tp) { - if (!tcp_get_pcount(&tp->packets_out) && !tp->pending) + if (!tp->packets_out && !tp->pending) tcp_reset_xmit_timer(sk, TCP_TIME_PROBE0, tp->rto); } diff --git a/net/802/fc.c b/net/802/fc.c index 9a502820f7f8..16702377958e 100644 --- a/net/802/fc.c +++ b/net/802/fc.c @@ -35,8 +35,9 @@ * Put the headers on a Fibre Channel packet. */ -int fc_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, - void *daddr, void *saddr, unsigned len) +static int fc_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, + void *daddr, void *saddr, unsigned len) { struct fch_hdr *fch; int hdr_len; @@ -81,7 +82,7 @@ int fc_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, * can now send the packet. */ -int fc_rebuild_header(struct sk_buff *skb) +static int fc_rebuild_header(struct sk_buff *skb) { struct fch_hdr *fch=(struct fch_hdr *)skb->data; struct fcllc *fcllc=(struct fcllc *)(skb->data+sizeof(struct fch_hdr)); diff --git a/net/802/fddi.c b/net/802/fddi.c index 752d77d37d3e..f9a31a9f70f1 100644 --- a/net/802/fddi.c +++ b/net/802/fddi.c @@ -52,8 +52,9 @@ * daddr=NULL means leave destination address (eg unresolved arp) */ -int fddi_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, - void *daddr, void *saddr, unsigned len) +static int fddi_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, + void *daddr, void *saddr, unsigned len) { int hl = FDDI_K_SNAP_HLEN; struct fddihdr *fddi; @@ -96,7 +97,7 @@ int fddi_header(struct sk_buff *skb, struct net_device *dev, unsigned short type * this sk_buff. We now let ARP fill in the other fields. */ -int fddi_rebuild_header(struct sk_buff *skb) +static int fddi_rebuild_header(struct sk_buff *skb) { struct fddihdr *fddi = (struct fddihdr *)skb->data; diff --git a/net/802/hippi.c b/net/802/hippi.c index bb66e0315276..4eb135c0afbb 100644 --- a/net/802/hippi.c +++ b/net/802/hippi.c @@ -40,26 +40,15 @@ #include <asm/system.h> /* - * hippi_net_init() - * - * Do nothing, this is just to pursuade the stupid linker to behave. - */ - -void hippi_net_init(void) -{ - return; -} - -/* * Create the HIPPI MAC header for an arbitrary protocol layer * * saddr=NULL means use device source address * daddr=NULL means leave destination address (eg unresolved arp) */ -int hippi_header(struct sk_buff *skb, struct net_device *dev, - unsigned short type, void *daddr, void *saddr, - unsigned len) +static int hippi_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, void *daddr, void *saddr, + unsigned len) { struct hippi_hdr *hip = (struct hippi_hdr *)skb_push(skb, HIPPI_HLEN); @@ -107,7 +96,7 @@ int hippi_header(struct sk_buff *skb, struct net_device *dev, * completed on this sk_buff. We now let ARP fill in the other fields. */ -int hippi_rebuild_header(struct sk_buff *skb) +static int hippi_rebuild_header(struct sk_buff *skb) { struct hippi_hdr *hip = (struct hippi_hdr *)skb->data; diff --git a/net/802/tr.c b/net/802/tr.c index 94cd2668768e..85293ccf7efc 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -98,8 +98,9 @@ static inline unsigned long rif_hash(const unsigned char *addr) * makes this a little more exciting than on ethernet. */ -int tr_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, - void *daddr, void *saddr, unsigned len) +static int tr_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, + void *daddr, void *saddr, unsigned len) { struct trh_hdr *trh; int hdr_len; @@ -153,7 +154,7 @@ int tr_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, * can now send the packet. */ -int tr_rebuild_header(struct sk_buff *skb) +static int tr_rebuild_header(struct sk_buff *skb) { struct trh_hdr *trh=(struct trh_hdr *)skb->data; struct trllc *trllc=(struct trllc *)(skb->data+sizeof(struct trh_hdr)); diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 87b211585e91..1f6d31670bc7 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -40,7 +40,7 @@ /* Global VLAN variables */ /* Our listing of VLAN group(s) */ -struct hlist_head vlan_group_hash[VLAN_GRP_HASH_SIZE]; +static struct hlist_head vlan_group_hash[VLAN_GRP_HASH_SIZE]; #define vlan_grp_hashfn(IDX) ((((IDX) >> VLAN_GRP_HASH_SHIFT) ^ (IDX)) & VLAN_GRP_HASH_MASK) static char vlan_fullname[] = "802.1Q VLAN Support"; @@ -52,7 +52,7 @@ static int vlan_device_event(struct notifier_block *, unsigned long, void *); static int vlan_ioctl_handler(void __user *); static int unregister_vlan_dev(struct net_device *, unsigned short ); -struct notifier_block vlan_notifier_block = { +static struct notifier_block vlan_notifier_block = { .notifier_call = vlan_device_event, }; @@ -61,9 +61,6 @@ struct notifier_block vlan_notifier_block = { /* Determines interface naming scheme. */ unsigned short vlan_name_type = VLAN_NAME_TYPE_RAW_PLUS_VID_NO_PAD; -/* DO reorder the header by default */ -unsigned short vlan_default_dev_flags = 1; - static struct packet_type vlan_packet_type = { .type = __constant_htons(ETH_P_8021Q), .func = vlan_skb_recv, /* VLAN receive method */ @@ -490,7 +487,7 @@ static struct net_device *register_vlan_device(const char *eth_IF_name, VLAN_DEV_INFO(new_dev)->vlan_id = VLAN_ID; /* 1 through VLAN_VID_MASK */ VLAN_DEV_INFO(new_dev)->real_dev = real_dev; VLAN_DEV_INFO(new_dev)->dent = NULL; - VLAN_DEV_INFO(new_dev)->flags = vlan_default_dev_flags; + VLAN_DEV_INFO(new_dev)->flags = 1; #ifdef VLAN_DEBUG printk(VLAN_DBG "About to go find the group for idx: %i\n", diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index 0a76d1f0d029..508b1fa14546 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -33,7 +33,6 @@ extern unsigned short vlan_name_type; #define VLAN_GRP_HASH_SHIFT 5 #define VLAN_GRP_HASH_SIZE (1 << VLAN_GRP_HASH_SHIFT) #define VLAN_GRP_HASH_MASK (VLAN_GRP_HASH_SIZE - 1) -extern struct hlist_head vlan_group_hash[VLAN_GRP_HASH_SIZE]; /* Find a VLAN device by the MAC address of its Ethernet device, and * it's VLAN ID. The default configuration is to have VLAN's scope diff --git a/net/8021q/vlanproc.c b/net/8021q/vlanproc.c index 5a5f9cd17d81..c32d27af0a3f 100644 --- a/net/8021q/vlanproc.c +++ b/net/8021q/vlanproc.c @@ -239,7 +239,7 @@ int vlan_proc_rem_dev(struct net_device *vlandev) */ /* starting at dev, find a VLAN device */ -struct net_device *vlan_skip(struct net_device *dev) +static struct net_device *vlan_skip(struct net_device *dev) { while (dev && !(dev->priv_flags & IFF_802_1Q_VLAN)) dev = dev->next; diff --git a/net/atm/addr.c b/net/atm/addr.c index 96407a0bb609..225f6843c32f 100644 --- a/net/atm/addr.c +++ b/net/atm/addr.c @@ -2,7 +2,6 @@ /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ - #include <linux/atm.h> #include <linux/atmdev.h> #include <linux/sched.h> @@ -11,127 +10,121 @@ #include "signaling.h" #include "addr.h" - static int check_addr(struct sockaddr_atmsvc *addr) { int i; - if (addr->sas_family != AF_ATMSVC) return -EAFNOSUPPORT; + if (addr->sas_family != AF_ATMSVC) + return -EAFNOSUPPORT; if (!*addr->sas_addr.pub) return *addr->sas_addr.prv ? 0 : -EINVAL; - for (i = 1; i < ATM_E164_LEN+1; i++) /* make sure it's \0-terminated */ - if (!addr->sas_addr.pub[i]) return 0; + for (i = 1; i < ATM_E164_LEN + 1; i++) /* make sure it's \0-terminated */ + if (!addr->sas_addr.pub[i]) + return 0; return -EINVAL; } - -static int identical(struct sockaddr_atmsvc *a,struct sockaddr_atmsvc *b) +static int identical(struct sockaddr_atmsvc *a, struct sockaddr_atmsvc *b) { if (*a->sas_addr.prv) - if (memcmp(a->sas_addr.prv,b->sas_addr.prv,ATM_ESA_LEN)) + if (memcmp(a->sas_addr.prv, b->sas_addr.prv, ATM_ESA_LEN)) return 0; - if (!*a->sas_addr.pub) return !*b->sas_addr.pub; - if (!*b->sas_addr.pub) return 0; - return !strcmp(a->sas_addr.pub,b->sas_addr.pub); + if (!*a->sas_addr.pub) + return !*b->sas_addr.pub; + if (!*b->sas_addr.pub) + return 0; + return !strcmp(a->sas_addr.pub, b->sas_addr.pub); } - static void notify_sigd(struct atm_dev *dev) { struct sockaddr_atmpvc pvc; pvc.sap_addr.itf = dev->number; - sigd_enq(NULL,as_itf_notify,NULL,&pvc,NULL); + sigd_enq(NULL, as_itf_notify, NULL, &pvc, NULL); } - void atm_reset_addr(struct atm_dev *dev) { unsigned long flags; - struct atm_dev_addr *this; + struct atm_dev_addr *this, *p; spin_lock_irqsave(&dev->lock, flags); - while (dev->local) { - this = dev->local; - dev->local = this->next; - kfree(this); - } + list_for_each_entry_safe(this, p, &dev->local, entry) + kfree(this); spin_unlock_irqrestore(&dev->lock, flags); notify_sigd(dev); } - -int atm_add_addr(struct atm_dev *dev,struct sockaddr_atmsvc *addr) +int atm_add_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr) { unsigned long flags; - struct atm_dev_addr **walk; + struct atm_dev_addr *this; int error; error = check_addr(addr); if (error) return error; spin_lock_irqsave(&dev->lock, flags); - for (walk = &dev->local; *walk; walk = &(*walk)->next) - if (identical(&(*walk)->addr,addr)) { + list_for_each_entry(this, &dev->local, entry) { + if (identical(&this->addr, addr)) { spin_unlock_irqrestore(&dev->lock, flags); return -EEXIST; } - *walk = kmalloc(sizeof(struct atm_dev_addr), GFP_ATOMIC); - if (!*walk) { + } + this = kmalloc(sizeof(struct atm_dev_addr), GFP_ATOMIC); + if (!this) { spin_unlock_irqrestore(&dev->lock, flags); return -ENOMEM; } - (*walk)->addr = *addr; - (*walk)->next = NULL; + this->addr = *addr; + list_add(&this->entry, &dev->local); spin_unlock_irqrestore(&dev->lock, flags); notify_sigd(dev); return 0; } - -int atm_del_addr(struct atm_dev *dev,struct sockaddr_atmsvc *addr) +int atm_del_addr(struct atm_dev *dev, struct sockaddr_atmsvc *addr) { unsigned long flags; - struct atm_dev_addr **walk,*this; + struct atm_dev_addr *this; int error; error = check_addr(addr); if (error) return error; spin_lock_irqsave(&dev->lock, flags); - for (walk = &dev->local; *walk; walk = &(*walk)->next) - if (identical(&(*walk)->addr,addr)) break; - if (!*walk) { - spin_unlock_irqrestore(&dev->lock, flags); - return -ENOENT; + list_for_each_entry(this, &dev->local, entry) { + if (identical(&this->addr, addr)) { + list_del(&this->entry); + spin_unlock_irqrestore(&dev->lock, flags); + kfree(this); + notify_sigd(dev); + return 0; + } } - this = *walk; - *walk = this->next; - kfree(this); spin_unlock_irqrestore(&dev->lock, flags); - notify_sigd(dev); - return 0; + return -ENOENT; } - -int atm_get_addr(struct atm_dev *dev,struct sockaddr_atmsvc __user *buf,int size) +int atm_get_addr(struct atm_dev *dev, struct sockaddr_atmsvc __user * buf, + int size) { unsigned long flags; - struct atm_dev_addr *walk; + struct atm_dev_addr *this; int total = 0, error; struct sockaddr_atmsvc *tmp_buf, *tmp_bufp; - spin_lock_irqsave(&dev->lock, flags); - for (walk = dev->local; walk; walk = walk->next) - total += sizeof(struct sockaddr_atmsvc); + list_for_each_entry(this, &dev->local, entry) + total += sizeof(struct sockaddr_atmsvc); tmp_buf = tmp_bufp = kmalloc(total, GFP_ATOMIC); if (!tmp_buf) { spin_unlock_irqrestore(&dev->lock, flags); return -ENOMEM; } - for (walk = dev->local; walk; walk = walk->next) - memcpy(tmp_bufp++, &walk->addr, sizeof(struct sockaddr_atmsvc)); + list_for_each_entry(this, &dev->local, entry) + memcpy(tmp_bufp++, &this->addr, sizeof(struct sockaddr_atmsvc)); spin_unlock_irqrestore(&dev->lock, flags); error = total > size ? -E2BIG : total; if (copy_to_user(buf, tmp_buf, total < size ? total : size)) diff --git a/net/atm/clip.c b/net/atm/clip.c index 8db42d467af3..53aac1833182 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -97,7 +97,7 @@ static void unlink_clip_vcc(struct clip_vcc *clip_vcc) printk(KERN_CRIT "!clip_vcc->entry (clip_vcc %p)\n",clip_vcc); return; } - spin_lock_bh(&entry->neigh->dev->xmit_lock); /* block clip_start_xmit() */ + spin_lock_irq(&entry->neigh->dev->xmit_lock); /* block clip_start_xmit() */ entry->neigh->used = jiffies; for (walk = &entry->vccs; *walk; walk = &(*walk)->next) if (*walk == clip_vcc) { @@ -121,7 +121,7 @@ static void unlink_clip_vcc(struct clip_vcc *clip_vcc) printk(KERN_CRIT "ATMARP: unlink_clip_vcc failed (entry %p, vcc " "0x%p)\n",entry,clip_vcc); out: - spin_unlock_bh(&entry->neigh->dev->xmit_lock); + spin_unlock_irq(&entry->neigh->dev->xmit_lock); } /* The neighbour entry n->lock is held. */ diff --git a/net/atm/lec.c b/net/atm/lec.c index a920b9246f08..bf2c45af6835 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -422,6 +422,7 @@ lec_get_stats(struct net_device *dev) static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) { + unsigned long flags; struct net_device *dev = (struct net_device*)vcc->proto_data; struct lec_priv *priv = (struct lec_priv*)dev->priv; struct atmlec_msg *mesg; @@ -456,8 +457,10 @@ lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) lec_flush_complete(priv, mesg->content.normal.flag); break; case l_narp_req: /* LANE2: see 7.1.35 in the lane2 spec */ + spin_lock_irqsave(&priv->lec_arp_lock, flags); entry = lec_arp_find(priv, mesg->content.normal.mac_addr); lec_arp_remove(priv, entry); + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); if (mesg->content.normal.no_source_le_narp) break; @@ -1222,17 +1225,20 @@ module_exit(lane_module_cleanup); static int lane2_resolve(struct net_device *dev, u8 *dst_mac, int force, u8 **tlvs, u32 *sizeoftlvs) { + unsigned long flags; struct lec_priv *priv = (struct lec_priv *)dev->priv; struct lec_arp_table *table; struct sk_buff *skb; int retval; if (force == 0) { + spin_lock_irqsave(&priv->lec_arp_lock, flags); table = lec_arp_find(priv, dst_mac); + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); if(table == NULL) return -1; - *tlvs = kmalloc(table->sizeoftlvs, GFP_KERNEL); + *tlvs = kmalloc(table->sizeoftlvs, GFP_ATOMIC); if (*tlvs == NULL) return -1; @@ -1377,18 +1383,6 @@ void dump_arp_table(struct lec_priv *priv); #define HASH(ch) (ch & (LEC_ARP_TABLE_SIZE -1)) -static __inline__ void -lec_arp_get(struct lec_priv *priv) -{ - atomic_inc(&priv->lec_arp_users); -} - -static __inline__ void -lec_arp_put(struct lec_priv *priv) -{ - atomic_dec(&priv->lec_arp_users); -} - /* * Initialization of arp-cache */ @@ -1397,12 +1391,12 @@ lec_arp_init(struct lec_priv *priv) { unsigned short i; - for (i=0;i<LEC_ARP_TABLE_SIZE;i++) { + for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { priv->lec_arp_tables[i] = NULL; } spin_lock_init(&priv->lec_arp_lock); init_timer(&priv->lec_arp_timer); - priv->lec_arp_timer.expires = jiffies+LEC_ARP_REFRESH_INTERVAL; + priv->lec_arp_timer.expires = jiffies + LEC_ARP_REFRESH_INTERVAL; priv->lec_arp_timer.data = (unsigned long)priv; priv->lec_arp_timer.function = lec_arp_check_expire; add_timer(&priv->lec_arp_timer); @@ -1439,12 +1433,9 @@ lec_arp_clear_vccs(struct lec_arp_table *entry) static inline void lec_arp_add(struct lec_priv *priv, struct lec_arp_table *to_add) { - unsigned long flags; unsigned short place; struct lec_arp_table *tmp; - spin_lock_irqsave(&priv->lec_arp_lock, flags); - place = HASH(to_add->mac_addr[ETH_ALEN-1]); tmp = priv->lec_arp_tables[place]; to_add->next = NULL; @@ -1457,8 +1448,6 @@ lec_arp_add(struct lec_priv *priv, struct lec_arp_table *to_add) tmp->next = to_add; } - spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - DPRINTK("LEC_ARP: Added entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", 0xff&to_add->mac_addr[0], 0xff&to_add->mac_addr[1], 0xff&to_add->mac_addr[2], 0xff&to_add->mac_addr[3], @@ -1472,15 +1461,11 @@ static int lec_arp_remove(struct lec_priv *priv, struct lec_arp_table *to_remove) { - unsigned long flags; unsigned short place; struct lec_arp_table *tmp; int remove_vcc=1; - spin_lock_irqsave(&priv->lec_arp_lock, flags); - if (!to_remove) { - spin_unlock_irqrestore(&priv->lec_arp_lock, flags); return -1; } place = HASH(to_remove->mac_addr[ETH_ALEN-1]); @@ -1492,7 +1477,6 @@ lec_arp_remove(struct lec_priv *priv, tmp = tmp->next; } if (!tmp) {/* Entry was not found */ - spin_unlock_irqrestore(&priv->lec_arp_lock, flags); return -1; } } @@ -1505,7 +1489,7 @@ lec_arp_remove(struct lec_priv *priv, /* * ESI_FLUSH_PENDING, ESI_FORWARD_DIRECT */ - for(place=0;place<LEC_ARP_TABLE_SIZE;place++) { + for(place = 0; place < LEC_ARP_TABLE_SIZE; place++) { for(tmp = priv->lec_arp_tables[place]; tmp != NULL; tmp = tmp->next) { if (memcmp(tmp->atm_addr, to_remove->atm_addr, ATM_ESA_LEN)==0) { @@ -1519,8 +1503,6 @@ lec_arp_remove(struct lec_priv *priv, } skb_queue_purge(&to_remove->tx_wait); /* FIXME: good place for this? */ - spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - DPRINTK("LEC_ARP: Removed entry:%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", 0xff&to_remove->mac_addr[0], 0xff&to_remove->mac_addr[1], 0xff&to_remove->mac_addr[2], 0xff&to_remove->mac_addr[3], @@ -1704,6 +1686,7 @@ dump_arp_table(struct lec_priv *priv) void lec_arp_destroy(struct lec_priv *priv) { + unsigned long flags; struct lec_arp_table *entry, *next; int i; @@ -1712,8 +1695,10 @@ lec_arp_destroy(struct lec_priv *priv) /* * Remove all entries */ - for (i=0;i<LEC_ARP_TABLE_SIZE;i++) { - for(entry =priv->lec_arp_tables[i];entry != NULL; entry=next) { + + spin_lock_irqsave(&priv->lec_arp_lock, flags); + for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { + for(entry = priv->lec_arp_tables[i]; entry != NULL; entry=next) { next = entry->next; lec_arp_remove(priv, entry); kfree(entry); @@ -1748,7 +1733,8 @@ lec_arp_destroy(struct lec_priv *priv) priv->mcast_fwds = NULL; priv->mcast_vcc = NULL; memset(priv->lec_arp_tables, 0, - sizeof(struct lec_arp_table*)*LEC_ARP_TABLE_SIZE); + sizeof(struct lec_arp_table *) * LEC_ARP_TABLE_SIZE); + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); } @@ -1765,18 +1751,15 @@ lec_arp_find(struct lec_priv *priv, DPRINTK("LEC_ARP: lec_arp_find :%2.2x %2.2x %2.2x %2.2x %2.2x %2.2x\n", mac_addr[0]&0xff, mac_addr[1]&0xff, mac_addr[2]&0xff, mac_addr[3]&0xff, mac_addr[4]&0xff, mac_addr[5]&0xff); - lec_arp_get(priv); place = HASH(mac_addr[ETH_ALEN-1]); to_return = priv->lec_arp_tables[place]; while(to_return) { if (memcmp(mac_addr, to_return->mac_addr, ETH_ALEN) == 0) { - lec_arp_put(priv); return to_return; } to_return = to_return->next; } - lec_arp_put(priv); return NULL; } @@ -1785,17 +1768,17 @@ make_entry(struct lec_priv *priv, unsigned char *mac_addr) { struct lec_arp_table *to_return; - to_return=(struct lec_arp_table *)kmalloc(sizeof(struct lec_arp_table), - GFP_ATOMIC); + to_return = (struct lec_arp_table *) kmalloc(sizeof(struct lec_arp_table), + GFP_ATOMIC); if (!to_return) { printk("LEC: Arp entry kmalloc failed\n"); return NULL; } - memset(to_return,0,sizeof(struct lec_arp_table)); + memset(to_return, 0, sizeof(struct lec_arp_table)); memcpy(to_return->mac_addr, mac_addr, ETH_ALEN); init_timer(&to_return->timer); to_return->timer.function = lec_arp_expire_arp; - to_return->timer.data = (unsigned long)to_return; + to_return->timer.data = (unsigned long) to_return; to_return->last_used = jiffies; to_return->priv = priv; skb_queue_head_init(&to_return->tx_wait); @@ -1835,6 +1818,7 @@ lec_arp_expire_arp(unsigned long data) static void lec_arp_expire_vcc(unsigned long data) { + unsigned flags; struct lec_arp_table *to_remove = (struct lec_arp_table*)data; struct lec_priv *priv = (struct lec_priv *)to_remove->priv; struct lec_arp_table *entry = NULL; @@ -1846,6 +1830,8 @@ lec_arp_expire_vcc(unsigned long data) to_remove->vcc?to_remove->recv_vcc->vpi:0, to_remove->vcc?to_remove->recv_vcc->vci:0); DPRINTK("eo:%p nf:%p\n",priv->lec_arp_empty_ones,priv->lec_no_forward); + + spin_lock_irqsave(&priv->lec_arp_lock, flags); if (to_remove == priv->lec_arp_empty_ones) priv->lec_arp_empty_ones = to_remove->next; else { @@ -1866,6 +1852,8 @@ lec_arp_expire_vcc(unsigned long data) entry->next = to_remove->next; } } + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); + lec_arp_clear_vccs(to_remove); kfree(to_remove); } @@ -1889,69 +1877,67 @@ lec_arp_expire_vcc(unsigned long data) static void lec_arp_check_expire(unsigned long data) { + unsigned long flags; struct lec_priv *priv = (struct lec_priv *)data; struct lec_arp_table *entry, *next; unsigned long now; unsigned long time_to_check; int i; - DPRINTK("lec_arp_check_expire %p,%d\n",priv, - atomic_read(&priv->lec_arp_users)); + DPRINTK("lec_arp_check_expire %p\n",priv); DPRINTK("expire: eo:%p nf:%p\n",priv->lec_arp_empty_ones, priv->lec_no_forward); - if (!atomic_read(&priv->lec_arp_users)) { - lec_arp_get(priv); - now = jiffies; - for(i=0;i<LEC_ARP_TABLE_SIZE;i++) { - for(entry = priv->lec_arp_tables[i]; entry != NULL; ) { - if ((entry->flags) & LEC_REMOTE_FLAG && - priv->topology_change) - time_to_check=priv->forward_delay_time; - else - time_to_check = priv->aging_time; - - DPRINTK("About to expire: %lx - %lx > %lx\n", - now,entry->last_used, time_to_check); - if( time_after(now, entry->last_used+ - time_to_check) && - !(entry->flags & LEC_PERMANENT_FLAG) && - !(entry->mac_addr[0] & 0x01) ) { /* LANE2: 7.1.20 */ - /* Remove entry */ - DPRINTK("LEC:Entry timed out\n"); - next = entry->next; - lec_arp_remove(priv, entry); - kfree(entry); - entry = next; - } else { - /* Something else */ - if ((entry->status == ESI_VC_PENDING || - entry->status == ESI_ARP_PENDING) - && time_after_eq(now, - entry->timestamp + - priv->max_unknown_frame_time)) { - entry->timestamp = jiffies; - entry->packets_flooded = 0; - if (entry->status == ESI_VC_PENDING) - send_to_lecd(priv, l_svc_setup, entry->mac_addr, entry->atm_addr, NULL); - } - if (entry->status == ESI_FLUSH_PENDING - && - time_after_eq(now, entry->timestamp+ - priv->path_switching_delay)) { - struct sk_buff *skb; - - while ((skb = skb_dequeue(&entry->tx_wait)) != NULL) - lec_send(entry->vcc, skb, entry->priv); - entry->last_used = jiffies; - entry->status = - ESI_FORWARD_DIRECT; - } - entry = entry->next; - } - } - } - lec_arp_put(priv); - } + now = jiffies; + spin_lock_irqsave(&priv->lec_arp_lock, flags); + for(i = 0; i < LEC_ARP_TABLE_SIZE; i++) { + for(entry = priv->lec_arp_tables[i]; entry != NULL; ) { + if ((entry->flags) & LEC_REMOTE_FLAG && + priv->topology_change) + time_to_check = priv->forward_delay_time; + else + time_to_check = priv->aging_time; + + DPRINTK("About to expire: %lx - %lx > %lx\n", + now,entry->last_used, time_to_check); + if( time_after(now, entry->last_used+ + time_to_check) && + !(entry->flags & LEC_PERMANENT_FLAG) && + !(entry->mac_addr[0] & 0x01) ) { /* LANE2: 7.1.20 */ + /* Remove entry */ + DPRINTK("LEC:Entry timed out\n"); + next = entry->next; + lec_arp_remove(priv, entry); + kfree(entry); + entry = next; + } else { + /* Something else */ + if ((entry->status == ESI_VC_PENDING || + entry->status == ESI_ARP_PENDING) + && time_after_eq(now, + entry->timestamp + + priv->max_unknown_frame_time)) { + entry->timestamp = jiffies; + entry->packets_flooded = 0; + if (entry->status == ESI_VC_PENDING) + send_to_lecd(priv, l_svc_setup, entry->mac_addr, entry->atm_addr, NULL); + } + if (entry->status == ESI_FLUSH_PENDING + && + time_after_eq(now, entry->timestamp+ + priv->path_switching_delay)) { + struct sk_buff *skb; + + while ((skb = skb_dequeue(&entry->tx_wait)) != NULL) + lec_send(entry->vcc, skb, entry->priv); + entry->last_used = jiffies; + entry->status = + ESI_FORWARD_DIRECT; + } + entry = entry->next; + } + } + } + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); mod_timer(&priv->lec_arp_timer, jiffies + LEC_ARP_REFRESH_INTERVAL); } @@ -1963,9 +1949,11 @@ struct atm_vcc* lec_arp_resolve(struct lec_priv *priv, unsigned char *mac_to_find, int is_rdesc, struct lec_arp_table **ret_entry) { + unsigned long flags; struct lec_arp_table *entry; + struct atm_vcc *found; - if (mac_to_find[0]&0x01) { + if (mac_to_find[0] & 0x01) { switch (priv->lane_version) { case 1: return priv->mcast_vcc; @@ -1979,6 +1967,7 @@ lec_arp_resolve(struct lec_priv *priv, unsigned char *mac_to_find, int is_rdesc, } } + spin_lock_irqsave(&priv->lec_arp_lock, flags); entry = lec_arp_find(priv, mac_to_find); if (entry) { @@ -1986,7 +1975,8 @@ lec_arp_resolve(struct lec_priv *priv, unsigned char *mac_to_find, int is_rdesc, /* Connection Ok */ entry->last_used = jiffies; *ret_entry = entry; - return entry->vcc; + found = entry->vcc; + goto out; } /* Data direct VC not yet set up, check to see if the unknown frame count is greater than the limit. If the limit has @@ -1996,7 +1986,8 @@ lec_arp_resolve(struct lec_priv *priv, unsigned char *mac_to_find, int is_rdesc, entry->packets_flooded<priv->maximum_unknown_frame_count) { entry->packets_flooded++; DPRINTK("LEC_ARP: Flooding..\n"); - return priv->mcast_vcc; + found = priv->mcast_vcc; + goto out; } /* We got here because entry->status == ESI_FLUSH_PENDING * or BUS flood limit was reached for an entry which is @@ -2004,13 +1995,14 @@ lec_arp_resolve(struct lec_priv *priv, unsigned char *mac_to_find, int is_rdesc, */ *ret_entry = entry; DPRINTK("lec: entry->status %d entry->vcc %p\n", entry->status, entry->vcc); - return NULL; + found = NULL; } else { /* No matching entry was found */ entry = make_entry(priv, mac_to_find); DPRINTK("LEC_ARP: Making entry\n"); if (!entry) { - return priv->mcast_vcc; + found = priv->mcast_vcc; + goto out; } lec_arp_add(priv, entry); /* We want arp-request(s) to be sent */ @@ -2026,33 +2018,38 @@ lec_arp_resolve(struct lec_priv *priv, unsigned char *mac_to_find, int is_rdesc, entry->timer.expires = jiffies + (1*HZ); entry->timer.function = lec_arp_expire_arp; add_timer(&entry->timer); - return priv->mcast_vcc; + found = priv->mcast_vcc; } + +out: + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); + return found; } int lec_addr_delete(struct lec_priv *priv, unsigned char *atm_addr, unsigned long permanent) { + unsigned long flags; struct lec_arp_table *entry, *next; int i; - lec_arp_get(priv); DPRINTK("lec_addr_delete\n"); - for(i=0;i<LEC_ARP_TABLE_SIZE;i++) { - for(entry=priv->lec_arp_tables[i];entry != NULL; entry=next) { + spin_lock_irqsave(&priv->lec_arp_lock, flags); + for(i = 0; i < LEC_ARP_TABLE_SIZE; i++) { + for(entry = priv->lec_arp_tables[i]; entry != NULL; entry = next) { next = entry->next; if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN) && (permanent || !(entry->flags & LEC_PERMANENT_FLAG))) { - lec_arp_remove(priv, entry); + lec_arp_remove(priv, entry); kfree(entry); } - lec_arp_put(priv); + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); return 0; } } - lec_arp_put(priv); + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); return -1; } @@ -2064,6 +2061,7 @@ lec_arp_update(struct lec_priv *priv, unsigned char *mac_addr, unsigned char *atm_addr, unsigned long remoteflag, unsigned int targetless_le_arp) { + unsigned long flags; struct lec_arp_table *entry, *tmp; int i; @@ -2072,12 +2070,12 @@ lec_arp_update(struct lec_priv *priv, unsigned char *mac_addr, mac_addr[0],mac_addr[1],mac_addr[2],mac_addr[3], mac_addr[4],mac_addr[5]); + spin_lock_irqsave(&priv->lec_arp_lock, flags); entry = lec_arp_find(priv, mac_addr); if (entry == NULL && targetless_le_arp) - return; /* LANE2: ignore targetless LE_ARPs for which - * we have no entry in the cache. 7.1.30 - */ - lec_arp_get(priv); + goto out; /* LANE2: ignore targetless LE_ARPs for which + * we have no entry in the cache. 7.1.30 + */ if (priv->lec_arp_empty_ones) { entry = priv->lec_arp_empty_ones; if (!memcmp(entry->atm_addr, atm_addr, ATM_ESA_LEN)) { @@ -2117,27 +2115,24 @@ lec_arp_update(struct lec_priv *priv, unsigned char *mac_addr, entry->flags|=LEC_REMOTE_FLAG; else entry->flags&=~LEC_REMOTE_FLAG; - lec_arp_put(priv); DPRINTK("After update\n"); dump_arp_table(priv); - return; + goto out; } } entry = lec_arp_find(priv, mac_addr); if (!entry) { entry = make_entry(priv, mac_addr); - if (!entry) { - lec_arp_put(priv); - return; - } + if (!entry) + goto out; entry->status = ESI_UNKNOWN; lec_arp_add(priv, entry); /* Temporary, changes before end of function */ } memcpy(entry->atm_addr, atm_addr, ATM_ESA_LEN); del_timer(&entry->timer); - for(i=0;i<LEC_ARP_TABLE_SIZE;i++) { - for(tmp=priv->lec_arp_tables[i];tmp;tmp=tmp->next) { + for(i = 0; i < LEC_ARP_TABLE_SIZE; i++) { + for(tmp = priv->lec_arp_tables[i]; tmp; tmp=tmp->next) { if (entry != tmp && !memcmp(tmp->atm_addr, atm_addr, ATM_ESA_LEN)) { @@ -2166,7 +2161,8 @@ lec_arp_update(struct lec_priv *priv, unsigned char *mac_addr, } DPRINTK("After update2\n"); dump_arp_table(priv); - lec_arp_put(priv); +out: + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); } /* @@ -2177,10 +2173,11 @@ lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data, struct atm_vcc *vcc, void (*old_push)(struct atm_vcc *vcc, struct sk_buff *skb)) { + unsigned long flags; struct lec_arp_table *entry; int i, found_entry=0; - lec_arp_get(priv); + spin_lock_irqsave(&priv->lec_arp_lock, flags); if (ioc_data->receive == 2) { /* Vcc for Multicast Forward. No timer, LANEv2 7.1.20 and 2.3.5.3 */ @@ -2189,26 +2186,22 @@ lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data, entry = lec_arp_find(priv, bus_mac); if (!entry) { printk("LEC_ARP: Multicast entry not found!\n"); - lec_arp_put(priv); - return; + goto out; } memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN); entry->recv_vcc = vcc; entry->old_recv_push = old_push; #endif entry = make_entry(priv, bus_mac); - if (entry == NULL) { - lec_arp_put(priv); - return; - } + if (entry == NULL) + goto out; del_timer(&entry->timer); memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN); entry->recv_vcc = vcc; entry->old_recv_push = old_push; entry->next = priv->mcast_fwds; priv->mcast_fwds = entry; - lec_arp_put(priv); - return; + goto out; } else if (ioc_data->receive == 1) { /* Vcc which we don't want to make default vcc, attach it anyway. */ @@ -2224,10 +2217,8 @@ lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data, ioc_data->atm_addr[16],ioc_data->atm_addr[17], ioc_data->atm_addr[18],ioc_data->atm_addr[19]); entry = make_entry(priv, bus_mac); - if (entry == NULL) { - lec_arp_put(priv); - return; - } + if (entry == NULL) + goto out; memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN); memset(entry->mac_addr, 0, ETH_ALEN); entry->recv_vcc = vcc; @@ -2238,9 +2229,8 @@ lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data, add_timer(&entry->timer); entry->next = priv->lec_no_forward; priv->lec_no_forward = entry; - lec_arp_put(priv); dump_arp_table(priv); - return; + goto out; } DPRINTK("LEC_ARP:Attaching data direct, default:%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x%2.2x\n", ioc_data->atm_addr[0],ioc_data->atm_addr[1], @@ -2253,8 +2243,8 @@ lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data, ioc_data->atm_addr[14],ioc_data->atm_addr[15], ioc_data->atm_addr[16],ioc_data->atm_addr[17], ioc_data->atm_addr[18],ioc_data->atm_addr[19]); - for (i=0;i<LEC_ARP_TABLE_SIZE;i++) { - for (entry = priv->lec_arp_tables[i];entry;entry=entry->next) { + for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { + for (entry = priv->lec_arp_tables[i]; entry; entry=entry->next) { if (memcmp(ioc_data->atm_addr, entry->atm_addr, ATM_ESA_LEN)==0) { DPRINTK("LEC_ARP: Attaching data direct\n"); @@ -2297,18 +2287,15 @@ lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data, } } if (found_entry) { - lec_arp_put(priv); DPRINTK("After vcc was added\n"); dump_arp_table(priv); - return; + goto out; } /* Not found, snatch address from first data packet that arrives from this vcc */ entry = make_entry(priv, bus_mac); - if (!entry) { - lec_arp_put(priv); - return; - } + if (!entry) + goto out; entry->vcc = vcc; entry->old_push = old_push; memcpy(entry->atm_addr, ioc_data->atm_addr, ATM_ESA_LEN); @@ -2319,20 +2306,23 @@ lec_vcc_added(struct lec_priv *priv, struct atmlec_ioc *ioc_data, entry->timer.expires = jiffies + priv->vcc_timeout_period; entry->timer.function = lec_arp_expire_vcc; add_timer(&entry->timer); - lec_arp_put(priv); DPRINTK("After vcc was added\n"); dump_arp_table(priv); +out: + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); } void lec_flush_complete(struct lec_priv *priv, unsigned long tran_id) { + unsigned long flags; struct lec_arp_table *entry; int i; DPRINTK("LEC:lec_flush_complete %lx\n",tran_id); - for (i=0;i<LEC_ARP_TABLE_SIZE;i++) { - for (entry=priv->lec_arp_tables[i];entry;entry=entry->next) { + spin_lock_irqsave(&priv->lec_arp_lock, flags); + for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) { + for (entry = priv->lec_arp_tables[i]; entry; entry=entry->next) { if (entry->flush_tran_id == tran_id && entry->status == ESI_FLUSH_PENDING) { struct sk_buff *skb; @@ -2344,6 +2334,7 @@ lec_flush_complete(struct lec_priv *priv, unsigned long tran_id) } } } + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); dump_arp_table(priv); } @@ -2351,24 +2342,29 @@ void lec_set_flush_tran_id(struct lec_priv *priv, unsigned char *atm_addr, unsigned long tran_id) { + unsigned long flags; struct lec_arp_table *entry; int i; - for (i=0;i<LEC_ARP_TABLE_SIZE;i++) - for(entry=priv->lec_arp_tables[i];entry;entry=entry->next) + spin_lock_irqsave(&priv->lec_arp_lock, flags); + for (i = 0; i < LEC_ARP_TABLE_SIZE; i++) + for(entry = priv->lec_arp_tables[i]; entry; entry=entry->next) if (!memcmp(atm_addr, entry->atm_addr, ATM_ESA_LEN)) { entry->flush_tran_id = tran_id; DPRINTK("Set flush transaction id to %lx for %p\n",tran_id,entry); } + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); } int lec_mcast_make(struct lec_priv *priv, struct atm_vcc *vcc) { + unsigned long flags; unsigned char mac_addr[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; struct lec_arp_table *to_add; struct lec_vcc_priv *vpriv; + int err = 0; if (!(vpriv = kmalloc(sizeof(struct lec_vcc_priv), GFP_KERNEL))) return -ENOMEM; @@ -2376,13 +2372,13 @@ lec_mcast_make(struct lec_priv *priv, struct atm_vcc *vcc) vpriv->old_pop = vcc->pop; vcc->user_back = vpriv; vcc->pop = lec_pop; - lec_arp_get(priv); + spin_lock_irqsave(&priv->lec_arp_lock, flags); to_add = make_entry(priv, mac_addr); if (!to_add) { - lec_arp_put(priv); vcc->pop = vpriv->old_pop; kfree(vpriv); - return -ENOMEM; + err = -ENOMEM; + goto out; } memcpy(to_add->atm_addr, vcc->remote.sas_addr.prv, ATM_ESA_LEN); to_add->status = ESI_FORWARD_DIRECT; @@ -2392,19 +2388,21 @@ lec_mcast_make(struct lec_priv *priv, struct atm_vcc *vcc) vcc->push = lec_push; priv->mcast_vcc = vcc; lec_arp_add(priv, to_add); - lec_arp_put(priv); - return 0; +out: + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); + return err; } void lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc) { + unsigned long flags; struct lec_arp_table *entry, *next; int i; DPRINTK("LEC_ARP: lec_vcc_close vpi:%d vci:%d\n",vcc->vpi,vcc->vci); dump_arp_table(priv); - lec_arp_get(priv); + spin_lock_irqsave(&priv->lec_arp_lock, flags); for(i=0;i<LEC_ARP_TABLE_SIZE;i++) { for(entry = priv->lec_arp_tables[i];entry; entry=next) { next = entry->next; @@ -2466,7 +2464,7 @@ lec_vcc_close(struct lec_priv *priv, struct atm_vcc *vcc) entry = next; } - lec_arp_put(priv); + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); dump_arp_table(priv); } @@ -2486,26 +2484,22 @@ lec_arp_check_empties(struct lec_priv *priv, #endif src = hdr->h_source; - lec_arp_get(priv); + spin_lock_irqsave(&priv->lec_arp_lock, flags); entry = priv->lec_arp_empty_ones; if (vcc == entry->vcc) { - spin_lock_irqsave(&priv->lec_arp_lock, flags); del_timer(&entry->timer); memcpy(entry->mac_addr, src, ETH_ALEN); entry->status = ESI_FORWARD_DIRECT; entry->last_used = jiffies; priv->lec_arp_empty_ones = entry->next; - spin_unlock_irqrestore(&priv->lec_arp_lock, flags); /* We might have got an entry */ - if ((prev=lec_arp_find(priv,src))) { + if ((prev = lec_arp_find(priv,src))) { lec_arp_remove(priv, prev); kfree(prev); } lec_arp_add(priv, entry); - lec_arp_put(priv); - return; + goto out; } - spin_lock_irqsave(&priv->lec_arp_lock, flags); prev = entry; entry = entry->next; while (entry && entry->vcc != vcc) { @@ -2514,21 +2508,19 @@ lec_arp_check_empties(struct lec_priv *priv, } if (!entry) { DPRINTK("LEC_ARP: Arp_check_empties: entry not found!\n"); - lec_arp_put(priv); - spin_unlock_irqrestore(&priv->lec_arp_lock, flags); - return; + goto out; } del_timer(&entry->timer); memcpy(entry->mac_addr, src, ETH_ALEN); entry->status = ESI_FORWARD_DIRECT; entry->last_used = jiffies; prev->next = entry->next; - spin_unlock_irqrestore(&priv->lec_arp_lock, flags); if ((prev = lec_arp_find(priv, src))) { lec_arp_remove(priv, prev); kfree(prev); } lec_arp_add(priv, entry); - lec_arp_put(priv); +out: + spin_unlock_irqrestore(&priv->lec_arp_lock, flags); } MODULE_LICENSE("GPL"); diff --git a/net/atm/lec.h b/net/atm/lec.h index 9c190210982e..34a64f4b63e3 100644 --- a/net/atm/lec.h +++ b/net/atm/lec.h @@ -95,7 +95,6 @@ struct lec_priv { establishes multiple Multicast Forward VCCs to us. This list collects all those VCCs. LANEv1 client has only one item in this list. These entries are not aged out. */ - atomic_t lec_arp_users; spinlock_t lec_arp_lock; struct atm_vcc *mcast_vcc; /* Default Multicast Send VCC */ struct atm_vcc *lecd; diff --git a/net/atm/resources.c b/net/atm/resources.c index f030fea2ea60..4cadbfa6ecbd 100644 --- a/net/atm/resources.c +++ b/net/atm/resources.c @@ -38,6 +38,7 @@ static struct atm_dev *__alloc_atm_dev(const char *type) dev->signal = ATM_PHY_SIG_UNKNOWN; dev->link_rate = ATM_OC3_PCR; spin_lock_init(&dev->lock); + INIT_LIST_HEAD(&dev->local); return dev; } diff --git a/net/atm/svc.c b/net/atm/svc.c index 859e57b05b92..3465678faf2f 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -71,9 +71,7 @@ static void svc_disconnect(struct atm_vcc *vcc) sigd_enq2(NULL,as_reject,vcc,NULL,NULL,&vcc->qos,0); dev_kfree_skb(skb); } - clear_bit(ATM_VF_REGIS,&vcc->flags); - clear_bit(ATM_VF_RELEASED,&vcc->flags); - clear_bit(ATM_VF_CLOSE,&vcc->flags); + clear_bit(ATM_VF_REGIS, &vcc->flags); /* ... may retry later */ } @@ -90,10 +88,8 @@ static int svc_release(struct socket *sock) /* VCC pointer is used as a reference, so we must not free it (thereby subjecting it to re-use) before all pending connections are closed */ - sock_hold(sk); - vcc_release(sock); svc_disconnect(vcc); - sock_put(sk); + vcc_release(sock); } return 0; } @@ -286,7 +282,8 @@ static int svc_connect(struct socket *sock,struct sockaddr *sockaddr, */ if (!(error = vcc_connect(sock, vcc->itf, vcc->vpi, vcc->vci))) sock->state = SS_CONNECTED; - else (void) svc_disconnect(vcc); + else + (void) svc_disconnect(vcc); out: release_sock(sk); return error; diff --git a/net/core/dev.c b/net/core/dev.c index 4dc01e26f158..2fdd7e27e22d 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1190,7 +1190,7 @@ int __skb_linearize(struct sk_buff *skb, int gfp_mask) #define HARD_TX_LOCK(dev, cpu) { \ if ((dev->features & NETIF_F_LLTX) == 0) { \ - spin_lock(&dev->xmit_lock); \ + spin_lock_irq(&dev->xmit_lock); \ dev->xmit_lock_owner = cpu; \ } \ } @@ -1198,7 +1198,7 @@ int __skb_linearize(struct sk_buff *skb, int gfp_mask) #define HARD_TX_UNLOCK(dev) { \ if ((dev->features & NETIF_F_LLTX) == 0) { \ dev->xmit_lock_owner = -1; \ - spin_unlock(&dev->xmit_lock); \ + spin_unlock_irq(&dev->xmit_lock); \ } \ } diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c index db098ff3cd6a..769dcf76eb6e 100644 --- a/net/core/dev_mcast.c +++ b/net/core/dev_mcast.c @@ -93,9 +93,9 @@ static void __dev_mc_upload(struct net_device *dev) void dev_mc_upload(struct net_device *dev) { - spin_lock_bh(&dev->xmit_lock); + spin_lock_irq(&dev->xmit_lock); __dev_mc_upload(dev); - spin_unlock_bh(&dev->xmit_lock); + spin_unlock_irq(&dev->xmit_lock); } /* @@ -107,7 +107,7 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl) int err = 0; struct dev_mc_list *dmi, **dmip; - spin_lock_bh(&dev->xmit_lock); + spin_lock_irq(&dev->xmit_lock); for (dmip = &dev->mc_list; (dmi = *dmip) != NULL; dmip = &dmi->next) { /* @@ -139,13 +139,13 @@ int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl) */ __dev_mc_upload(dev); - spin_unlock_bh(&dev->xmit_lock); + spin_unlock_irq(&dev->xmit_lock); return 0; } } err = -ENOENT; done: - spin_unlock_bh(&dev->xmit_lock); + spin_unlock_irq(&dev->xmit_lock); return err; } @@ -160,7 +160,7 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) dmi1 = (struct dev_mc_list *)kmalloc(sizeof(*dmi), GFP_ATOMIC); - spin_lock_bh(&dev->xmit_lock); + spin_lock_irq(&dev->xmit_lock); for (dmi = dev->mc_list; dmi != NULL; dmi = dmi->next) { if (memcmp(dmi->dmi_addr, addr, dmi->dmi_addrlen) == 0 && dmi->dmi_addrlen == alen) { @@ -176,7 +176,7 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) } if ((dmi = dmi1) == NULL) { - spin_unlock_bh(&dev->xmit_lock); + spin_unlock_irq(&dev->xmit_lock); return -ENOMEM; } memcpy(dmi->dmi_addr, addr, alen); @@ -189,11 +189,11 @@ int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl) __dev_mc_upload(dev); - spin_unlock_bh(&dev->xmit_lock); + spin_unlock_irq(&dev->xmit_lock); return 0; done: - spin_unlock_bh(&dev->xmit_lock); + spin_unlock_irq(&dev->xmit_lock); if (dmi1) kfree(dmi1); return err; @@ -205,7 +205,7 @@ done: void dev_mc_discard(struct net_device *dev) { - spin_lock_bh(&dev->xmit_lock); + spin_lock_irq(&dev->xmit_lock); while (dev->mc_list != NULL) { struct dev_mc_list *tmp = dev->mc_list; @@ -216,7 +216,7 @@ void dev_mc_discard(struct net_device *dev) } dev->mc_count = 0; - spin_unlock_bh(&dev->xmit_lock); + spin_unlock_irq(&dev->xmit_lock); } #ifdef CONFIG_PROC_FS @@ -251,7 +251,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v) struct dev_mc_list *m; struct net_device *dev = v; - spin_lock_bh(&dev->xmit_lock); + spin_lock_irq(&dev->xmit_lock); for (m = dev->mc_list; m; m = m->next) { int i; @@ -263,7 +263,7 @@ static int dev_mc_seq_show(struct seq_file *seq, void *v) seq_putc(seq, '\n'); } - spin_unlock_bh(&dev->xmit_lock); + spin_unlock_irq(&dev->xmit_lock); return 0; } diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 83697277a851..af02bba906a1 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -188,7 +188,7 @@ repeat: return; } - spin_lock(&np->dev->xmit_lock); + spin_lock_irq(&np->dev->xmit_lock); np->dev->xmit_lock_owner = smp_processor_id(); /* @@ -197,7 +197,7 @@ repeat: */ if (netif_queue_stopped(np->dev)) { np->dev->xmit_lock_owner = -1; - spin_unlock(&np->dev->xmit_lock); + spin_unlock_irq(&np->dev->xmit_lock); netpoll_poll(np); goto repeat; @@ -205,7 +205,7 @@ repeat: status = np->dev->hard_start_xmit(skb, np->dev); np->dev->xmit_lock_owner = -1; - spin_unlock(&np->dev->xmit_lock); + spin_unlock_irq(&np->dev->xmit_lock); /* transmit busy */ if(status) { diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 9bee84a7fbf8..3364ee3269e6 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2664,12 +2664,11 @@ __inline__ void pktgen_xmit(struct pktgen_dev *pkt_dev) } } - spin_lock_bh(&odev->xmit_lock); + spin_lock_irq(&odev->xmit_lock); if (!netif_queue_stopped(odev)) { u64 now; atomic_inc(&(pkt_dev->skb->users)); -retry_now: ret = odev->hard_start_xmit(pkt_dev->skb, odev); if (likely(ret == NETDEV_TX_OK)) { pkt_dev->last_ok = 1; @@ -2677,10 +2676,6 @@ retry_now: pkt_dev->seq_num++; pkt_dev->tx_bytes += pkt_dev->cur_pkt_size; - } else if (ret == NETDEV_TX_LOCKED - && (odev->features & NETIF_F_LLTX)) { - cpu_relax(); - goto retry_now; } else { /* Retry it next time */ atomic_dec(&(pkt_dev->skb->users)); @@ -2716,7 +2711,7 @@ retry_now: pkt_dev->next_tx_ns = 0; } - spin_unlock_bh(&odev->xmit_lock); + spin_unlock_irq(&odev->xmit_lock); /* If pkt_dev->count is zero, then run forever */ if ((pkt_dev->count != 0) && (pkt_dev->sofar >= pkt_dev->count)) { diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c index fc741925911a..3dbddd062605 100644 --- a/net/ipv4/netfilter/ip_conntrack_amanda.c +++ b/net/ipv4/netfilter/ip_conntrack_amanda.c @@ -44,14 +44,21 @@ static char *conns[] = { "DATA ", "MESG ", "INDEX " }; static char amanda_buffer[65536]; static DECLARE_LOCK(amanda_buffer_lock); -static int help(struct sk_buff *skb, +unsigned int (*ip_nat_amanda_hook)(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + unsigned int matchoff, + unsigned int matchlen, + struct ip_conntrack_expect *exp); +EXPORT_SYMBOL_GPL(ip_nat_amanda_hook); + +static int help(struct sk_buff **pskb, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) { struct ip_conntrack_expect *exp; - struct ip_ct_amanda_expect *exp_amanda_info; char *data, *data_limit, *tmp; unsigned int dataoff, i; u_int16_t port, len; + int ret = NF_ACCEPT; /* Only look at packets from the Amanda server */ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) @@ -62,17 +69,17 @@ static int help(struct sk_buff *skb, ip_ct_refresh_acct(ct, ctinfo, NULL, master_timeout * HZ); /* No data? */ - dataoff = skb->nh.iph->ihl*4 + sizeof(struct udphdr); - if (dataoff >= skb->len) { + dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); + if (dataoff >= (*pskb)->len) { if (net_ratelimit()) - printk("amanda_help: skblen = %u\n", skb->len); + printk("amanda_help: skblen = %u\n", (*pskb)->len); return NF_ACCEPT; } LOCK_BH(&amanda_buffer_lock); - skb_copy_bits(skb, dataoff, amanda_buffer, skb->len - dataoff); + skb_copy_bits(*pskb, dataoff, amanda_buffer, (*pskb)->len - dataoff); data = amanda_buffer; - data_limit = amanda_buffer + skb->len - dataoff; + data_limit = amanda_buffer + (*pskb)->len - dataoff; *data_limit = '\0'; /* Search for the CONNECT string */ @@ -96,36 +103,44 @@ static int help(struct sk_buff *skb, break; exp = ip_conntrack_expect_alloc(); - if (exp == NULL) + if (exp == NULL) { + ret = NF_DROP; goto out; + } + + exp->expectfn = NULL; + exp->master = ct; exp->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; + exp->tuple.src.u.tcp.port = 0; exp->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; exp->tuple.dst.protonum = IPPROTO_TCP; + exp->tuple.dst.u.tcp.port = htons(port); + exp->mask.src.ip = 0xFFFFFFFF; + exp->mask.src.u.tcp.port = 0; exp->mask.dst.ip = 0xFFFFFFFF; - exp->mask.dst.protonum = 0xFFFF; + exp->mask.dst.protonum = 0xFF; exp->mask.dst.u.tcp.port = 0xFFFF; - exp_amanda_info = &exp->help.exp_amanda_info; - exp_amanda_info->offset = tmp - amanda_buffer; - exp_amanda_info->port = port; - exp_amanda_info->len = len; - - exp->tuple.dst.u.tcp.port = htons(port); - - ip_conntrack_expect_related(exp, ct); + if (ip_nat_amanda_hook) + ret = ip_nat_amanda_hook(pskb, ctinfo, + tmp - amanda_buffer, + len, exp); + else if (ip_conntrack_expect_related(exp) != 0) { + ip_conntrack_expect_free(exp); + ret = NF_DROP; + } } out: UNLOCK_BH(&amanda_buffer_lock); - return NF_ACCEPT; + return ret; } static struct ip_conntrack_helper amanda_helper = { .max_expected = ARRAY_SIZE(conns), .timeout = 180, - .flags = IP_CT_HELPER_F_REUSE_EXPECT, .me = THIS_MODULE, .help = help, .name = "amanda", @@ -134,7 +149,7 @@ static struct ip_conntrack_helper amanda_helper = { .dst = { .protonum = IPPROTO_UDP }, }, .mask = { .src = { .u = { 0xFFFF } }, - .dst = { .protonum = 0xFFFF }, + .dst = { .protonum = 0xFF }, }, }; @@ -148,6 +163,5 @@ static int __init init(void) return ip_conntrack_helper_register(&amanda_helper); } -PROVIDES_CONNTRACK(amanda); module_init(init); module_exit(fini); diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 247301938778..0eaafec43dd0 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -58,7 +58,6 @@ #endif DECLARE_RWLOCK(ip_conntrack_lock); -DECLARE_RWLOCK(ip_conntrack_expect_tuple_lock); /* ip_conntrack_standalone needs this */ atomic_t ip_conntrack_count = ATOMIC_INIT(0); @@ -79,7 +78,7 @@ static int ip_conntrack_vmalloc; DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat); -inline void +void ip_conntrack_put(struct ip_conntrack *ct) { IP_NF_ASSERT(ct); @@ -118,6 +117,7 @@ ip_ct_get_tuple(const struct iphdr *iph, tuple->src.ip = iph->saddr; tuple->dst.ip = iph->daddr; tuple->dst.protonum = iph->protocol; + tuple->dst.dir = IP_CT_DIR_ORIGINAL; return protocol->pkt_to_tuple(skb, dataoff, tuple); } @@ -130,135 +130,76 @@ ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse, inverse->src.ip = orig->dst.ip; inverse->dst.ip = orig->src.ip; inverse->dst.protonum = orig->dst.protonum; + inverse->dst.dir = !orig->dst.dir; return protocol->invert_tuple(inverse, orig); } /* ip_conntrack_expect helper functions */ - -/* Compare tuple parts depending on mask. */ -static inline int expect_cmp(const struct ip_conntrack_expect *i, - const struct ip_conntrack_tuple *tuple) -{ - MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock); - return ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask); -} - -static void -destroy_expect(struct ip_conntrack_expect *exp) +static void destroy_expect(struct ip_conntrack_expect *exp) { - DEBUGP("destroy_expect(%p) use=%d\n", exp, atomic_read(&exp->use)); - IP_NF_ASSERT(atomic_read(&exp->use) == 0); + ip_conntrack_put(exp->master); IP_NF_ASSERT(!timer_pending(&exp->timeout)); - kmem_cache_free(ip_conntrack_expect_cachep, exp); CONNTRACK_STAT_INC(expect_delete); } -inline void ip_conntrack_expect_put(struct ip_conntrack_expect *exp) +static void unlink_expect(struct ip_conntrack_expect *exp) { - IP_NF_ASSERT(exp); - - if (atomic_dec_and_test(&exp->use)) { - /* usage count dropped to zero */ - destroy_expect(exp); - } -} - -static inline struct ip_conntrack_expect * -__ip_ct_expect_find(const struct ip_conntrack_tuple *tuple) -{ - MUST_BE_READ_LOCKED(&ip_conntrack_lock); - MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock); - return LIST_FIND(&ip_conntrack_expect_list, expect_cmp, - struct ip_conntrack_expect *, tuple); -} - -/* Find a expectation corresponding to a tuple. */ -struct ip_conntrack_expect * -ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple) -{ - struct ip_conntrack_expect *exp; - - READ_LOCK(&ip_conntrack_lock); - READ_LOCK(&ip_conntrack_expect_tuple_lock); - exp = __ip_ct_expect_find(tuple); - if (exp) - atomic_inc(&exp->use); - READ_UNLOCK(&ip_conntrack_expect_tuple_lock); - READ_UNLOCK(&ip_conntrack_lock); - - return exp; + MUST_BE_WRITE_LOCKED(&ip_conntrack_lock); + list_del(&exp->list); + /* Logically in destroy_expect, but we hold the lock here. */ + exp->master->expecting--; } -/* remove one specific expectation from all lists and drop refcount, - * does _NOT_ delete the timer. */ -static void __unexpect_related(struct ip_conntrack_expect *expect) +static void expectation_timed_out(unsigned long ul_expect) { - DEBUGP("unexpect_related(%p)\n", expect); - MUST_BE_WRITE_LOCKED(&ip_conntrack_lock); - - /* we're not allowed to unexpect a confirmed expectation! */ - IP_NF_ASSERT(!expect->sibling); - - /* delete from global and local lists */ - list_del(&expect->list); - list_del(&expect->expected_list); + struct ip_conntrack_expect *exp = (void *)ul_expect; - /* decrement expect-count of master conntrack */ - if (expect->expectant) - expect->expectant->expecting--; - - ip_conntrack_expect_put(expect); + WRITE_LOCK(&ip_conntrack_lock); + unlink_expect(exp); + WRITE_UNLOCK(&ip_conntrack_lock); + destroy_expect(exp); } -/* remove one specific expecatation from all lists, drop refcount - * and expire timer. - * This function can _NOT_ be called for confirmed expects! */ -static void unexpect_related(struct ip_conntrack_expect *expect) +/* If an expectation for this connection is found, it gets delete from + * global list then returned. */ +static struct ip_conntrack_expect * +find_expectation(const struct ip_conntrack_tuple *tuple) { - IP_NF_ASSERT(expect->expectant); - IP_NF_ASSERT(expect->expectant->helper); - /* if we are supposed to have a timer, but we can't delete - * it: race condition. __unexpect_related will - * be calledd by timeout function */ - if (expect->expectant->helper->timeout - && !del_timer(&expect->timeout)) - return; + struct ip_conntrack_expect *i; - __unexpect_related(expect); + list_for_each_entry(i, &ip_conntrack_expect_list, list) { + /* If master is not in hash table yet (ie. packet hasn't left + this machine yet), how can other end know about expected? + Hence these are not the droids you are looking for (if + master ct never got confirmed, we'd hold a reference to it + and weird things would happen to future packets). */ + if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) + && is_confirmed(i->master) + && del_timer(&i->timeout)) { + unlink_expect(i); + return i; + } + } + return NULL; } -/* delete all unconfirmed expectations for this conntrack */ -static void remove_expectations(struct ip_conntrack *ct, int drop_refcount) +/* delete all expectations for this conntrack */ +static void remove_expectations(struct ip_conntrack *ct) { - struct list_head *exp_entry, *next; - struct ip_conntrack_expect *exp; + struct ip_conntrack_expect *i, *tmp; - DEBUGP("remove_expectations(%p)\n", ct); - - list_for_each_safe(exp_entry, next, &ct->sibling_list) { - exp = list_entry(exp_entry, struct ip_conntrack_expect, - expected_list); + /* Optimization: most connection never expect any others. */ + if (ct->expecting == 0) + return; - /* we skip established expectations, as we want to delete - * the un-established ones only */ - if (exp->sibling) { - DEBUGP("remove_expectations: skipping established %p of %p\n", exp->sibling, ct); - if (drop_refcount) { - /* Indicate that this expectations parent is dead */ - ip_conntrack_put(exp->expectant); - exp->expectant = NULL; - } - continue; + list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) { + if (i->master == ct && del_timer(&i->timeout)) { + unlink_expect(i); + destroy_expect(i); } - - IP_NF_ASSERT(list_inlist(&ip_conntrack_expect_list, exp)); - IP_NF_ASSERT(exp->expectant == ct); - - /* delete expectation from global and private lists */ - unexpect_related(exp); } } @@ -275,14 +216,14 @@ clean_from_lists(struct ip_conntrack *ct) LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]); LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]); - /* Destroy all un-established, pending expectations */ - remove_expectations(ct, 1); + /* Destroy all pending expectations */ + remove_expectations(ct); } static void destroy_conntrack(struct nf_conntrack *nfct) { - struct ip_conntrack *ct = (struct ip_conntrack *)nfct, *master = NULL; + struct ip_conntrack *ct = (struct ip_conntrack *)nfct; struct ip_conntrack_protocol *proto; DEBUGP("destroy_conntrack(%p)\n", ct); @@ -304,8 +245,7 @@ destroy_conntrack(struct nf_conntrack *nfct) * except TFTP can create an expectation on the first packet, * before connection is in the list, so we need to clean here, * too. */ - if (ct->expecting) - remove_expectations(ct, 1); + remove_expectations(ct); /* We overload first tuple to link into unconfirmed list. */ if (!is_confirmed(ct)) { @@ -313,21 +253,11 @@ destroy_conntrack(struct nf_conntrack *nfct) list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); } - /* Delete our master expectation */ - if (ct->master) { - if (ct->master->expectant) { - /* can't call __unexpect_related here, - * since it would screw up expect_list */ - list_del(&ct->master->expected_list); - master = ct->master->expectant; - } - kmem_cache_free(ip_conntrack_expect_cachep, ct->master); - } CONNTRACK_STAT_INC(delete); WRITE_UNLOCK(&ip_conntrack_lock); - if (master) - ip_conntrack_put(master); + if (ct->master) + ip_conntrack_put(ct->master); DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct); kmem_cache_free(ip_conntrack_cachep, ct); @@ -353,7 +283,7 @@ conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i, const struct ip_conntrack *ignored_conntrack) { MUST_BE_READ_LOCKED(&ip_conntrack_lock); - return i->ctrack != ignored_conntrack + return tuplehash_to_ctrack(i) != ignored_conntrack && ip_ct_tuple_equal(tuple, &i->tuple); } @@ -386,7 +316,7 @@ ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple, READ_LOCK(&ip_conntrack_lock); h = __ip_conntrack_find(tuple, ignored_conntrack); if (h) - atomic_inc(&h->ctrack->ct_general.use); + atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use); READ_UNLOCK(&ip_conntrack_lock); return h; @@ -394,13 +324,13 @@ ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple, /* Confirm a connection given skb; places it in hash table */ int -__ip_conntrack_confirm(struct sk_buff *skb) +__ip_conntrack_confirm(struct sk_buff **pskb) { unsigned int hash, repl_hash; struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; - ct = ip_conntrack_get(skb, &ctinfo); + ct = ip_conntrack_get(*pskb, &ctinfo); /* ipt_REJECT uses ip_conntrack_attach to attach related ICMP/TCP RST packets in other direction. Actual packet @@ -479,30 +409,33 @@ ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple, connection. Too bad: we're in trouble anyway. */ static inline int unreplied(const struct ip_conntrack_tuple_hash *i) { - return !(test_bit(IPS_ASSURED_BIT, &i->ctrack->status)); + return !(test_bit(IPS_ASSURED_BIT, &tuplehash_to_ctrack(i)->status)); } static int early_drop(struct list_head *chain) { /* Traverse backwards: gives us oldest, which is roughly LRU */ struct ip_conntrack_tuple_hash *h; + struct ip_conntrack *ct = NULL; int dropped = 0; READ_LOCK(&ip_conntrack_lock); h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *); - if (h) - atomic_inc(&h->ctrack->ct_general.use); + if (h) { + ct = tuplehash_to_ctrack(h); + atomic_inc(&ct->ct_general.use); + } READ_UNLOCK(&ip_conntrack_lock); - if (!h) + if (!ct) return dropped; - if (del_timer(&h->ctrack->timeout)) { - death_by_timeout((unsigned long)h->ctrack); + if (del_timer(&ct->timeout)) { + death_by_timeout((unsigned long)ct); dropped = 1; CONNTRACK_STAT_INC(early_drop); } - ip_conntrack_put(h->ctrack); + ip_conntrack_put(ct); return dropped; } @@ -512,7 +445,7 @@ static inline int helper_cmp(const struct ip_conntrack_helper *i, return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask); } -struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple) +static struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple) { return LIST_FIND(&helpers, helper_cmp, struct ip_conntrack_helper *, @@ -529,7 +462,7 @@ init_conntrack(const struct ip_conntrack_tuple *tuple, struct ip_conntrack *conntrack; struct ip_conntrack_tuple repl_tuple; size_t hash; - struct ip_conntrack_expect *expected; + struct ip_conntrack_expect *exp; if (!ip_conntrack_hash_rnd_initted) { get_random_bytes(&ip_conntrack_hash_rnd, 4); @@ -565,9 +498,7 @@ init_conntrack(const struct ip_conntrack_tuple *tuple, atomic_set(&conntrack->ct_general.use, 1); conntrack->ct_general.destroy = destroy_conntrack; conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple; - conntrack->tuplehash[IP_CT_DIR_ORIGINAL].ctrack = conntrack; conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple; - conntrack->tuplehash[IP_CT_DIR_REPLY].ctrack = conntrack; if (!protocol->new(conntrack, skb)) { kmem_cache_free(ip_conntrack_cachep, conntrack); return NULL; @@ -577,73 +508,39 @@ init_conntrack(const struct ip_conntrack_tuple *tuple, conntrack->timeout.data = (unsigned long)conntrack; conntrack->timeout.function = death_by_timeout; - INIT_LIST_HEAD(&conntrack->sibling_list); - WRITE_LOCK(&ip_conntrack_lock); - /* Need finding and deleting of expected ONLY if we win race */ - READ_LOCK(&ip_conntrack_expect_tuple_lock); - expected = LIST_FIND(&ip_conntrack_expect_list, expect_cmp, - struct ip_conntrack_expect *, tuple); - READ_UNLOCK(&ip_conntrack_expect_tuple_lock); - - if (expected) { - /* If master is not in hash table yet (ie. packet hasn't left - this machine yet), how can other end know about expected? - Hence these are not the droids you are looking for (if - master ct never got confirmed, we'd hold a reference to it - and weird things would happen to future packets). */ - if (!is_confirmed(expected->expectant)) { - conntrack->helper = ip_ct_find_helper(&repl_tuple); - goto end; - } - - /* Expectation is dying... */ - if (expected->expectant->helper->timeout - && !del_timer(&expected->timeout)) - goto end; + exp = find_expectation(tuple); + if (exp) { DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n", - conntrack, expected); + conntrack, exp); /* Welcome, Mr. Bond. We've been expecting you... */ - IP_NF_ASSERT(expected->expectant); __set_bit(IPS_EXPECTED_BIT, &conntrack->status); - conntrack->master = expected; - expected->sibling = conntrack; + conntrack->master = exp->master; #if CONFIG_IP_NF_CONNTRACK_MARK - conntrack->mark = expected->expectant->mark; + conntrack->mark = exp->master->mark; #endif - LIST_DELETE(&ip_conntrack_expect_list, expected); - expected->expectant->expecting--; - nf_conntrack_get(&master_ct(conntrack)->ct_general); - - /* this is a braindead... --pablo */ - atomic_inc(&ip_conntrack_count); - - /* Overload tuple linked list to put us in unconfirmed list. */ - list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, - &unconfirmed); - WRITE_UNLOCK(&ip_conntrack_lock); - - if (expected->expectfn) - expected->expectfn(conntrack); - + nf_conntrack_get(&conntrack->master->ct_general); CONNTRACK_STAT_INC(expect_new); - - goto ret; - } else { + } else { conntrack->helper = ip_ct_find_helper(&repl_tuple); CONNTRACK_STAT_INC(new); } -end: /* Overload tuple linked list to put us in unconfirmed list. */ list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed); atomic_inc(&ip_conntrack_count); WRITE_UNLOCK(&ip_conntrack_lock); -ret: return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL]; + if (exp) { + if (exp->expectfn) + exp->expectfn(conntrack, exp); + destroy_expect(exp); + } + + return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL]; } /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ @@ -656,6 +553,7 @@ resolve_normal_ct(struct sk_buff *skb, { struct ip_conntrack_tuple tuple; struct ip_conntrack_tuple_hash *h; + struct ip_conntrack *ct; IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0); @@ -672,6 +570,7 @@ resolve_normal_ct(struct sk_buff *skb, if (IS_ERR(h)) return (void *)h; } + ct = tuplehash_to_ctrack(h); /* It exists; we have (non-exclusive) reference. */ if (DIRECTION(h) == IP_CT_DIR_REPLY) { @@ -680,24 +579,24 @@ resolve_normal_ct(struct sk_buff *skb, *set_reply = 1; } else { /* Once we've had two way comms, always ESTABLISHED. */ - if (test_bit(IPS_SEEN_REPLY_BIT, &h->ctrack->status)) { + if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { DEBUGP("ip_conntrack_in: normal packet for %p\n", - h->ctrack); + ct); *ctinfo = IP_CT_ESTABLISHED; - } else if (test_bit(IPS_EXPECTED_BIT, &h->ctrack->status)) { + } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) { DEBUGP("ip_conntrack_in: related packet for %p\n", - h->ctrack); + ct); *ctinfo = IP_CT_RELATED; } else { DEBUGP("ip_conntrack_in: new packet for %p\n", - h->ctrack); + ct); *ctinfo = IP_CT_NEW; } *set_reply = 0; } - skb->nfct = &h->ctrack->ct_general; + skb->nfct = &ct->ct_general; skb->nfctinfo = *ctinfo; - return h->ctrack; + return ct; } /* Netfilter hook itself. */ @@ -782,16 +681,6 @@ unsigned int ip_conntrack_in(unsigned int hooknum, return -ret; } - if (ret != NF_DROP && ct->helper) { - ret = ct->helper->help(*pskb, ct, ctinfo); - if (ret == -1) { - /* Invalid */ - CONNTRACK_STAT_INC(invalid); - nf_conntrack_put((*pskb)->nfct); - (*pskb)->nfct = NULL; - return NF_ACCEPT; - } - } if (set_reply) set_bit(IPS_SEEN_REPLY_BIT, &ct->status); @@ -805,55 +694,49 @@ int invert_tuplepr(struct ip_conntrack_tuple *inverse, ip_ct_find_proto(orig->dst.protonum)); } -static inline int resent_expect(const struct ip_conntrack_expect *i, - const struct ip_conntrack_tuple *tuple, - const struct ip_conntrack_tuple *mask) -{ - DEBUGP("resent_expect\n"); - DEBUGP(" tuple: "); DUMP_TUPLE(&i->tuple); - DEBUGP("ct_tuple: "); DUMP_TUPLE(&i->ct_tuple); - DEBUGP("test tuple: "); DUMP_TUPLE(tuple); - return (((i->ct_tuple.dst.protonum == 0 && ip_ct_tuple_equal(&i->tuple, tuple)) - || (i->ct_tuple.dst.protonum && ip_ct_tuple_equal(&i->ct_tuple, tuple))) - && ip_ct_tuple_equal(&i->mask, mask)); -} - /* Would two expected things clash? */ -static inline int expect_clash(const struct ip_conntrack_expect *i, - const struct ip_conntrack_tuple *tuple, - const struct ip_conntrack_tuple *mask) +static inline int expect_clash(const struct ip_conntrack_expect *a, + const struct ip_conntrack_expect *b) { /* Part covered by intersection of masks must be unequal, otherwise they clash */ struct ip_conntrack_tuple intersect_mask - = { { i->mask.src.ip & mask->src.ip, - { i->mask.src.u.all & mask->src.u.all } }, - { i->mask.dst.ip & mask->dst.ip, - { i->mask.dst.u.all & mask->dst.u.all }, - i->mask.dst.protonum & mask->dst.protonum } }; + = { { a->mask.src.ip & b->mask.src.ip, + { a->mask.src.u.all & b->mask.src.u.all } }, + { a->mask.dst.ip & b->mask.dst.ip, + { a->mask.dst.u.all & b->mask.dst.u.all }, + a->mask.dst.protonum & b->mask.dst.protonum } }; - return ip_ct_tuple_mask_cmp(&i->tuple, tuple, &intersect_mask); + return ip_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask); } -inline void ip_conntrack_unexpect_related(struct ip_conntrack_expect *expect) +static inline int expect_matches(const struct ip_conntrack_expect *a, + const struct ip_conntrack_expect *b) { - WRITE_LOCK(&ip_conntrack_lock); - unexpect_related(expect); - WRITE_UNLOCK(&ip_conntrack_lock); + return a->master == b->master + && ip_ct_tuple_equal(&a->tuple, &b->tuple) + && ip_ct_tuple_equal(&a->mask, &b->mask); } - -static void expectation_timed_out(unsigned long ul_expect) + +/* Generally a bad idea to call this: could have matched already. */ +void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp) { - struct ip_conntrack_expect *expect = (void *) ul_expect; + struct ip_conntrack_expect *i; - DEBUGP("expectation %p timed out\n", expect); WRITE_LOCK(&ip_conntrack_lock); - __unexpect_related(expect); + /* choose the the oldest expectation to evict */ + list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) { + if (expect_matches(i, exp) && del_timer(&i->timeout)) { + unlink_expect(i); + WRITE_UNLOCK(&ip_conntrack_lock); + destroy_expect(i); + return; + } + } WRITE_UNLOCK(&ip_conntrack_lock); } -struct ip_conntrack_expect * -ip_conntrack_expect_alloc(void) +struct ip_conntrack_expect *ip_conntrack_expect_alloc(void) { struct ip_conntrack_expect *new; @@ -862,178 +745,95 @@ ip_conntrack_expect_alloc(void) DEBUGP("expect_related: OOM allocating expect\n"); return NULL; } - - /* tuple_cmp compares whole union, we have to initialized cleanly */ - memset(new, 0, sizeof(struct ip_conntrack_expect)); - atomic_set(&new->use, 1); - + new->master = NULL; return new; } -static void -ip_conntrack_expect_insert(struct ip_conntrack_expect *new, - struct ip_conntrack *related_to) +void ip_conntrack_expect_free(struct ip_conntrack_expect *expect) { - DEBUGP("new expectation %p of conntrack %p\n", new, related_to); - new->expectant = related_to; - new->sibling = NULL; - - /* add to expected list for this connection */ - list_add_tail(&new->expected_list, &related_to->sibling_list); - /* add to global list of expectations */ - list_prepend(&ip_conntrack_expect_list, &new->list); - /* add and start timer if required */ - if (related_to->helper->timeout) { - init_timer(&new->timeout); - new->timeout.data = (unsigned long)new; - new->timeout.function = expectation_timed_out; - new->timeout.expires = jiffies + - related_to->helper->timeout * HZ; - add_timer(&new->timeout); - } - related_to->expecting++; + kmem_cache_free(ip_conntrack_expect_cachep, expect); } -/* Add a related connection. */ -int ip_conntrack_expect_related(struct ip_conntrack_expect *expect, - struct ip_conntrack *related_to) +static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp) { - struct ip_conntrack_expect *old; - int ret = 0; + atomic_inc(&exp->master->ct_general.use); + exp->master->expecting++; + list_add(&exp->list, &ip_conntrack_expect_list); + + if (exp->master->helper->timeout) { + init_timer(&exp->timeout); + exp->timeout.data = (unsigned long)exp; + exp->timeout.function = expectation_timed_out; + exp->timeout.expires + = jiffies + exp->master->helper->timeout * HZ; + add_timer(&exp->timeout); + } else + exp->timeout.function = NULL; - WRITE_LOCK(&ip_conntrack_lock); - /* Because of the write lock, no reader can walk the lists, - * so there is no need to use the tuple lock too */ + CONNTRACK_STAT_INC(expect_create); +} - DEBUGP("ip_conntrack_expect_related %p\n", related_to); - DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple); - DEBUGP("mask: "); DUMP_TUPLE(&expect->mask); +/* Race with expectations being used means we could have none to find; OK. */ +static void evict_oldest_expect(struct ip_conntrack *master) +{ + struct ip_conntrack_expect *i; - old = LIST_FIND(&ip_conntrack_expect_list, resent_expect, - struct ip_conntrack_expect *, &expect->tuple, - &expect->mask); - if (old) { - /* Helper private data may contain offsets but no pointers - pointing into the payload - otherwise we should have to copy - the data filled out by the helper over the old one */ - DEBUGP("expect_related: resent packet\n"); - if (related_to->helper->timeout) { - if (!del_timer(&old->timeout)) { - /* expectation is dying. Fall through */ - goto out; - } else { - old->timeout.expires = jiffies + - related_to->helper->timeout * HZ; - add_timer(&old->timeout); + list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) { + if (i->master == master) { + if (del_timer(&i->timeout)) { + unlink_expect(i); + destroy_expect(i); } + break; } - - WRITE_UNLOCK(&ip_conntrack_lock); - /* This expectation is not inserted so no need to lock */ - kmem_cache_free(ip_conntrack_expect_cachep, expect); - return -EEXIST; - - } else if (related_to->helper->max_expected && - related_to->expecting >= related_to->helper->max_expected) { - /* old == NULL */ - if (!(related_to->helper->flags & - IP_CT_HELPER_F_REUSE_EXPECT)) { - WRITE_UNLOCK(&ip_conntrack_lock); - if (net_ratelimit()) - printk(KERN_WARNING - "ip_conntrack: max number of expected " - "connections %i of %s reached for " - "%u.%u.%u.%u->%u.%u.%u.%u\n", - related_to->helper->max_expected, - related_to->helper->name, - NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip), - NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip)); - kmem_cache_free(ip_conntrack_expect_cachep, expect); - return -EPERM; - } - DEBUGP("ip_conntrack: max number of expected " - "connections %i of %s reached for " - "%u.%u.%u.%u->%u.%u.%u.%u, reusing\n", - related_to->helper->max_expected, - related_to->helper->name, - NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip), - NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip)); - - /* choose the the oldest expectation to evict */ - list_for_each_entry(old, &related_to->sibling_list, - expected_list) - if (old->sibling == NULL) - break; - - /* We cannot fail since related_to->expecting is the number - * of unconfirmed expectations */ - IP_NF_ASSERT(old && old->sibling == NULL); - - /* newnat14 does not reuse the real allocated memory - * structures but rather unexpects the old and - * allocates a new. unexpect_related will decrement - * related_to->expecting. - */ - unexpect_related(old); - ret = -EPERM; - } else if (LIST_FIND(&ip_conntrack_expect_list, expect_clash, - struct ip_conntrack_expect *, &expect->tuple, - &expect->mask)) { - WRITE_UNLOCK(&ip_conntrack_lock); - DEBUGP("expect_related: busy!\n"); - - kmem_cache_free(ip_conntrack_expect_cachep, expect); - return -EBUSY; } +} -out: ip_conntrack_expect_insert(expect, related_to); - - WRITE_UNLOCK(&ip_conntrack_lock); - - CONNTRACK_STAT_INC(expect_create); +static inline int refresh_timer(struct ip_conntrack_expect *i) +{ + if (!del_timer(&i->timeout)) + return 0; - return ret; + i->timeout.expires = jiffies + i->master->helper->timeout*HZ; + add_timer(&i->timeout); + return 1; } -/* Change tuple in an existing expectation */ -int ip_conntrack_change_expect(struct ip_conntrack_expect *expect, - struct ip_conntrack_tuple *newtuple) +int ip_conntrack_expect_related(struct ip_conntrack_expect *expect) { + struct ip_conntrack_expect *i; int ret; - MUST_BE_READ_LOCKED(&ip_conntrack_lock); - WRITE_LOCK(&ip_conntrack_expect_tuple_lock); - - DEBUGP("change_expect:\n"); - DEBUGP("exp tuple: "); DUMP_TUPLE(&expect->tuple); - DEBUGP("exp mask: "); DUMP_TUPLE(&expect->mask); - DEBUGP("newtuple: "); DUMP_TUPLE(newtuple); - if (expect->ct_tuple.dst.protonum == 0) { - /* Never seen before */ - DEBUGP("change expect: never seen before\n"); - if (!ip_ct_tuple_equal(&expect->tuple, newtuple) - && LIST_FIND(&ip_conntrack_expect_list, expect_clash, - struct ip_conntrack_expect *, newtuple, &expect->mask)) { - /* Force NAT to find an unused tuple */ - ret = -1; - } else { - memcpy(&expect->ct_tuple, &expect->tuple, sizeof(expect->tuple)); - memcpy(&expect->tuple, newtuple, sizeof(expect->tuple)); - ret = 0; - } - } else { - /* Resent packet */ - DEBUGP("change expect: resent packet\n"); - if (ip_ct_tuple_equal(&expect->tuple, newtuple)) { - ret = 0; - } else { - /* Force NAT to choose again the same port */ - ret = -1; + DEBUGP("ip_conntrack_expect_related %p\n", related_to); + DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple); + DEBUGP("mask: "); DUMP_TUPLE(&expect->mask); + + WRITE_LOCK(&ip_conntrack_lock); + list_for_each_entry(i, &ip_conntrack_expect_list, list) { + if (expect_matches(i, expect)) { + /* Refresh timer: if it's dying, ignore.. */ + if (refresh_timer(i)) { + ret = 0; + /* We don't need the one they've given us. */ + ip_conntrack_expect_free(expect); + goto out; + } + } else if (expect_clash(i, expect)) { + ret = -EBUSY; + goto out; } } - WRITE_UNLOCK(&ip_conntrack_expect_tuple_lock); - - return ret; + + /* Will be over limit? */ + if (expect->master->helper->max_expected && + expect->master->expecting >= expect->master->helper->max_expected) + evict_oldest_expect(expect->master); + + ip_conntrack_expect_insert(expect); + ret = 0; +out: + WRITE_UNLOCK(&ip_conntrack_lock); + return ret; } /* Alter reply tuple (maybe alter helper). This is for NAT, and is @@ -1049,13 +849,14 @@ void ip_conntrack_alter_reply(struct ip_conntrack *conntrack, DUMP_TUPLE(newreply); conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; - if (!conntrack->master && list_empty(&conntrack->sibling_list)) + if (!conntrack->master && conntrack->expecting == 0) conntrack->helper = ip_ct_find_helper(newreply); WRITE_UNLOCK(&ip_conntrack_lock); } int ip_conntrack_helper_register(struct ip_conntrack_helper *me) { + BUG_ON(me->timeout == 0); WRITE_LOCK(&ip_conntrack_lock); list_prepend(&helpers, me); WRITE_UNLOCK(&ip_conntrack_lock); @@ -1066,23 +867,27 @@ int ip_conntrack_helper_register(struct ip_conntrack_helper *me) static inline int unhelp(struct ip_conntrack_tuple_hash *i, const struct ip_conntrack_helper *me) { - if (i->ctrack->helper == me) { - /* Get rid of any expected. */ - remove_expectations(i->ctrack, 0); - /* And *then* set helper to NULL */ - i->ctrack->helper = NULL; - } + if (tuplehash_to_ctrack(i)->helper == me) + tuplehash_to_ctrack(i)->helper = NULL; return 0; } void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) { unsigned int i; + struct ip_conntrack_expect *exp, *tmp; /* Need write lock here, to delete helper. */ WRITE_LOCK(&ip_conntrack_lock); LIST_DELETE(&helpers, me); + /* Get rid of expectations */ + list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) { + if (exp->master->helper == me && del_timer(&exp->timeout)) { + unlink_expect(exp); + destroy_expect(exp); + } + } /* Get rid of expecteds, set helpers to NULL. */ LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me); for (i = 0; i < ip_conntrack_htable_size; i++) @@ -1201,7 +1006,7 @@ do_iter(const struct ip_conntrack_tuple_hash *i, int (*iter)(struct ip_conntrack *i, void *data), void *data) { - return iter(i->ctrack, data); + return iter(tuplehash_to_ctrack(i), data); } /* Bring out ya dead! */ @@ -1222,7 +1027,7 @@ get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data), h = LIST_FIND_W(&unconfirmed, do_iter, struct ip_conntrack_tuple_hash *, iter, data); if (h) - atomic_inc(&h->ctrack->ct_general.use); + atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use); WRITE_UNLOCK(&ip_conntrack_lock); return h; @@ -1235,12 +1040,13 @@ ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data) unsigned int bucket = 0; while ((h = get_next_corpse(iter, data, &bucket)) != NULL) { + struct ip_conntrack *ct = tuplehash_to_ctrack(h); /* Time to push up daises... */ - if (del_timer(&h->ctrack->timeout)) - death_by_timeout((unsigned long)h->ctrack); + if (del_timer(&ct->timeout)) + death_by_timeout((unsigned long)ct); /* ... else the timer will get him soon. */ - ip_conntrack_put(h->ctrack); + ip_conntrack_put(ct); } } @@ -1277,16 +1083,17 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) h = ip_conntrack_find_get(&tuple, NULL); if (h) { struct sockaddr_in sin; + struct ip_conntrack *ct = tuplehash_to_ctrack(h); sin.sin_family = AF_INET; - sin.sin_port = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL] + sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL] .tuple.dst.u.tcp.port; - sin.sin_addr.s_addr = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL] + sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL] .tuple.dst.ip; DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n", NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); - ip_conntrack_put(h->ctrack); + ip_conntrack_put(ct); if (copy_to_user(user, &sin, sizeof(sin)) != 0) return -EFAULT; else @@ -1398,7 +1205,7 @@ int __init ip_conntrack_init(void) ip_conntrack_cachep = kmem_cache_create("ip_conntrack", sizeof(struct ip_conntrack), 0, - SLAB_HWCACHE_ALIGN, NULL,NULL); + 0, NULL, NULL); if (!ip_conntrack_cachep) { printk(KERN_ERR "Unable to create ip_conntrack slab cache\n"); goto err_free_hash; @@ -1406,7 +1213,7 @@ int __init ip_conntrack_init(void) ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect", sizeof(struct ip_conntrack_expect), - 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + 0, 0, NULL, NULL); if (!ip_conntrack_expect_cachep) { printk(KERN_ERR "Unable to create ip_expect slab cache\n"); goto err_free_conntrack_slab; diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c index 2d6ffa497997..f3818d71c7a2 100644 --- a/net/ipv4/netfilter/ip_conntrack_ftp.c +++ b/net/ipv4/netfilter/ip_conntrack_ftp.c @@ -29,7 +29,6 @@ MODULE_DESCRIPTION("ftp connection tracking helper"); static char ftp_buffer[65536]; static DECLARE_LOCK(ip_ftp_lock); -struct module *ip_conntrack_ftp = THIS_MODULE; #define MAX_PORTS 8 static int ports[MAX_PORTS]; @@ -39,6 +38,15 @@ module_param_array(ports, int, &ports_c, 0400); static int loose; module_param(loose, int, 0600); +unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + enum ip_ct_ftp_type type, + unsigned int matchoff, + unsigned int matchlen, + struct ip_conntrack_expect *exp, + u32 *seq); +EXPORT_SYMBOL_GPL(ip_nat_ftp_hook); + #if 0 #define DEBUGP printk #else @@ -243,24 +251,53 @@ static int find_pattern(const char *data, size_t dlen, return 1; } -static int help(struct sk_buff *skb, +/* Look up to see if we're just after a \n. */ +static int find_nl_seq(u16 seq, const struct ip_ct_ftp_master *info, int dir) +{ + unsigned int i; + + for (i = 0; i < info->seq_aft_nl_num[dir]; i++) + if (info->seq_aft_nl[dir][i] == seq) + return 1; + return 0; +} + +/* We don't update if it's older than what we have. */ +static void update_nl_seq(u16 nl_seq, struct ip_ct_ftp_master *info, int dir) +{ + unsigned int i, oldest = NUM_SEQ_TO_REMEMBER; + + /* Look for oldest: if we find exact match, we're done. */ + for (i = 0; i < info->seq_aft_nl_num[dir]; i++) { + if (info->seq_aft_nl[dir][i] == nl_seq) + return; + + if (oldest == info->seq_aft_nl_num[dir] + || before(info->seq_aft_nl[dir][i], oldest)) + oldest = i; + } + + if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) + info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq; + else if (oldest != NUM_SEQ_TO_REMEMBER) + info->seq_aft_nl[dir][oldest] = nl_seq; +} + +static int help(struct sk_buff **pskb, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) { unsigned int dataoff, datalen; struct tcphdr _tcph, *th; char *fb_ptr; - u_int32_t old_seq_aft_nl; - int old_seq_aft_nl_set, ret; - u_int32_t array[6] = { 0 }; + int ret; + u32 seq, array[6] = { 0 }; int dir = CTINFO2DIR(ctinfo); unsigned int matchlen, matchoff; struct ip_ct_ftp_master *ct_ftp_info = &ct->help.ct_ftp_info; struct ip_conntrack_expect *exp; - struct ip_ct_ftp_expect *exp_ftp_info; - unsigned int i; - int found = 0; + int found = 0, ends_in_nl; /* Until there's been traffic both ways, don't look in packets. */ if (ctinfo != IP_CT_ESTABLISHED @@ -269,46 +306,35 @@ static int help(struct sk_buff *skb, return NF_ACCEPT; } - th = skb_header_pointer(skb, skb->nh.iph->ihl*4, + th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4, sizeof(_tcph), &_tcph); if (th == NULL) return NF_ACCEPT; - dataoff = skb->nh.iph->ihl*4 + th->doff*4; + dataoff = (*pskb)->nh.iph->ihl*4 + th->doff*4; /* No data? */ - if (dataoff >= skb->len) { - DEBUGP("ftp: skblen = %u\n", skb->len); + if (dataoff >= (*pskb)->len) { + DEBUGP("ftp: pskblen = %u\n", (*pskb)->len); return NF_ACCEPT; } - datalen = skb->len - dataoff; + datalen = (*pskb)->len - dataoff; LOCK_BH(&ip_ftp_lock); - fb_ptr = skb_header_pointer(skb, dataoff, - skb->len - dataoff, ftp_buffer); + fb_ptr = skb_header_pointer(*pskb, dataoff, + (*pskb)->len - dataoff, ftp_buffer); BUG_ON(fb_ptr == NULL); - old_seq_aft_nl_set = ct_ftp_info->seq_aft_nl_set[dir]; - old_seq_aft_nl = ct_ftp_info->seq_aft_nl[dir]; - - DEBUGP("conntrack_ftp: datalen %u\n", datalen); - if (fb_ptr[datalen - 1] == '\n') { - DEBUGP("conntrack_ftp: datalen %u ends in \\n\n", datalen); - if (!old_seq_aft_nl_set - || after(ntohl(th->seq) + datalen, old_seq_aft_nl)) { - DEBUGP("conntrack_ftp: updating nl to %u\n", - ntohl(th->seq) + datalen); - ct_ftp_info->seq_aft_nl[dir] = - ntohl(th->seq) + datalen; - ct_ftp_info->seq_aft_nl_set[dir] = 1; - } - } + ends_in_nl = (fb_ptr[datalen - 1] == '\n'); + seq = ntohl(th->seq) + datalen; - if(!old_seq_aft_nl_set || - (ntohl(th->seq) != old_seq_aft_nl)) { - DEBUGP("ip_conntrack_ftp_help: wrong seq pos %s(%u)\n", + /* Look up to see if we're just after a \n. */ + if (!find_nl_seq(ntohl(th->seq), ct_ftp_info, dir)) { + /* Now if this ends in \n, update ftp info. */ + DEBUGP("ip_conntrack_ftp_help: wrong seq pos %s(%u) or %s(%u)\n", + ct_ftp_info->seq_aft_nl[0][dir] old_seq_aft_nl_set ? "":"(UNSET) ", old_seq_aft_nl); ret = NF_ACCEPT; - goto out; + goto out_update_nl; } /* Initialize IP array to expected address (it's not mentioned @@ -321,7 +347,7 @@ static int help(struct sk_buff *skb, for (i = 0; i < ARRAY_SIZE(search); i++) { if (search[i].dir != dir) continue; - found = find_pattern(fb_ptr, skb->len - dataoff, + found = find_pattern(fb_ptr, (*pskb)->len - dataoff, search[i].pattern, search[i].plen, search[i].skip, @@ -344,7 +370,7 @@ static int help(struct sk_buff *skb, goto out; } else if (found == 0) { /* No match */ ret = NF_ACCEPT; - goto out; + goto out_update_nl; } DEBUGP("conntrack_ftp: match `%.*s' (%u bytes at %u)\n", @@ -354,20 +380,17 @@ static int help(struct sk_buff *skb, /* Allocate expectation which will be inserted */ exp = ip_conntrack_expect_alloc(); if (exp == NULL) { - ret = NF_ACCEPT; + ret = NF_DROP; goto out; } - exp_ftp_info = &exp->help.exp_ftp_info; + /* We refer to the reverse direction ("!dir") tuples here, + * because we're expecting something in the other direction. + * Doesn't matter unless NAT is happening. */ + exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip; - /* Update the ftp info */ if (htonl((array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3]) - == ct->tuplehash[dir].tuple.src.ip) { - exp->seq = ntohl(th->seq) + matchoff; - exp_ftp_info->len = matchlen; - exp_ftp_info->ftptype = search[i].ftptype; - exp_ftp_info->port = array[4] << 8 | array[5]; - } else { + != ct->tuplehash[dir].tuple.src.ip) { /* Enrico Scholz's passive FTP to partially RNAT'd ftp server: it really wants us to connect to a different IP address. Simply don't record it for @@ -381,28 +404,44 @@ static int help(struct sk_buff *skb, problem (DMZ machines opening holes to internal networks, or the packet filter itself). */ if (!loose) { - ip_conntrack_expect_put(exp); ret = NF_ACCEPT; - goto out; + ip_conntrack_expect_free(exp); + goto out_update_nl; } + exp->tuple.dst.ip = htonl((array[0] << 24) | (array[1] << 16) + | (array[2] << 8) | array[3]); } - exp->tuple = ((struct ip_conntrack_tuple) - { { ct->tuplehash[!dir].tuple.src.ip, - { 0 } }, - { htonl((array[0] << 24) | (array[1] << 16) - | (array[2] << 8) | array[3]), - { .tcp = { htons(array[4] << 8 | array[5]) } }, - IPPROTO_TCP }}); + exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip; + exp->tuple.dst.u.tcp.port = htons(array[4] << 8 | array[5]); + exp->tuple.src.u.tcp.port = 0; /* Don't care. */ + exp->tuple.dst.protonum = IPPROTO_TCP; exp->mask = ((struct ip_conntrack_tuple) { { 0xFFFFFFFF, { 0 } }, - { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFFFF }}); + { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFF }}); exp->expectfn = NULL; + exp->master = ct; + + /* Now, NAT might want to mangle the packet, and register the + * (possibly changed) expectation itself. */ + if (ip_nat_ftp_hook) + ret = ip_nat_ftp_hook(pskb, ctinfo, search[i].ftptype, + matchoff, matchlen, exp, &seq); + else { + /* Can't expect this? Best to drop packet now. */ + if (ip_conntrack_expect_related(exp) != 0) { + ip_conntrack_expect_free(exp); + ret = NF_DROP; + } else + ret = NF_ACCEPT; + } - /* Ignore failure; should only happen with NAT */ - ip_conntrack_expect_related(exp, ct); - ret = NF_ACCEPT; +out_update_nl: + /* Now if this ends in \n, update ftp info. Seq may have been + * adjusted by NAT code. */ + if (ends_in_nl) + update_nl_seq(seq, ct_ftp_info,dir); out: UNLOCK_BH(&ip_ftp_lock); return ret; @@ -434,11 +473,10 @@ static int __init init(void) ftp[i].tuple.src.u.tcp.port = htons(ports[i]); ftp[i].tuple.dst.protonum = IPPROTO_TCP; ftp[i].mask.src.u.tcp.port = 0xFFFF; - ftp[i].mask.dst.protonum = 0xFFFF; + ftp[i].mask.dst.protonum = 0xFF; ftp[i].max_expected = 1; - ftp[i].timeout = 0; - ftp[i].flags = IP_CT_HELPER_F_REUSE_EXPECT; - ftp[i].me = ip_conntrack_ftp; + ftp[i].timeout = 5 * 60; /* 5 minutes */ + ftp[i].me = THIS_MODULE; ftp[i].help = help; tmpname = &ftp_names[i][0]; @@ -460,7 +498,5 @@ static int __init init(void) return 0; } -PROVIDES_CONNTRACK(ftp); - module_init(init); module_exit(fini); diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c index ec79c08a13fd..54ef2dab5de0 100644 --- a/net/ipv4/netfilter/ip_conntrack_irc.c +++ b/net/ipv4/netfilter/ip_conntrack_irc.c @@ -43,6 +43,13 @@ static unsigned int dcc_timeout = 300; static char irc_buffer[65536]; static DECLARE_LOCK(irc_buffer_lock); +unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + unsigned int matchoff, + unsigned int matchlen, + struct ip_conntrack_expect *exp); +EXPORT_SYMBOL_GPL(ip_nat_irc_hook); + MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); MODULE_DESCRIPTION("IRC (DCC) connection tracking helper"); MODULE_LICENSE("GPL"); @@ -56,8 +63,6 @@ MODULE_PARM_DESC(dcc_timeout, "timeout on for unestablished DCC channels"); static char *dccprotos[] = { "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT " }; #define MINMATCHLEN 5 -struct module *ip_conntrack_irc = THIS_MODULE; - #if 0 #define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s:" format, \ __FILE__, __FUNCTION__ , ## args) @@ -98,7 +103,7 @@ static int parse_dcc(char *data, char *data_end, u_int32_t *ip, return 0; } -static int help(struct sk_buff *skb, +static int help(struct sk_buff **pskb, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) { unsigned int dataoff; @@ -106,11 +111,10 @@ static int help(struct sk_buff *skb, char *data, *data_limit, *ib_ptr; int dir = CTINFO2DIR(ctinfo); struct ip_conntrack_expect *exp; - struct ip_ct_irc_expect *exp_irc_info = NULL; - + u32 seq; u_int32_t dcc_ip; u_int16_t dcc_port; - int i; + int i, ret = NF_ACCEPT; char *addr_beg_p, *addr_end_p; DEBUGP("entered\n"); @@ -127,23 +131,23 @@ static int help(struct sk_buff *skb, } /* Not a full tcp header? */ - th = skb_header_pointer(skb, skb->nh.iph->ihl*4, + th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4, sizeof(_tcph), &_tcph); if (th == NULL) return NF_ACCEPT; /* No data? */ - dataoff = skb->nh.iph->ihl*4 + th->doff*4; - if (dataoff >= skb->len) + dataoff = (*pskb)->nh.iph->ihl*4 + th->doff*4; + if (dataoff >= (*pskb)->len) return NF_ACCEPT; LOCK_BH(&irc_buffer_lock); - ib_ptr = skb_header_pointer(skb, dataoff, - skb->len - dataoff, irc_buffer); + ib_ptr = skb_header_pointer(*pskb, dataoff, + (*pskb)->len - dataoff, irc_buffer); BUG_ON(ib_ptr == NULL); data = ib_ptr; - data_limit = ib_ptr + skb->len - dataoff; + data_limit = ib_ptr + (*pskb)->len - dataoff; /* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24 * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */ @@ -195,19 +199,15 @@ static int help(struct sk_buff *skb, } exp = ip_conntrack_expect_alloc(); - if (exp == NULL) + if (exp == NULL) { + ret = NF_DROP; goto out; - - exp_irc_info = &exp->help.exp_irc_info; + } /* save position of address in dcc string, * necessary for NAT */ DEBUGP("tcph->seq = %u\n", th->seq); - exp->seq = ntohl(th->seq) + (addr_beg_p - ib_ptr); - exp_irc_info->len = (addr_end_p - addr_beg_p); - exp_irc_info->port = dcc_port; - DEBUGP("wrote info seq=%u (ofs=%u), len=%d\n", - exp->seq, (addr_end_p - _data), exp_irc_info->len); + seq = ntohl(th->seq) + (addr_beg_p - ib_ptr); exp->tuple = ((struct ip_conntrack_tuple) { { 0, { 0 } }, @@ -215,25 +215,25 @@ static int help(struct sk_buff *skb, IPPROTO_TCP }}); exp->mask = ((struct ip_conntrack_tuple) { { 0, { 0 } }, - { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFFFF }}); - + { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFF }}); exp->expectfn = NULL; - - DEBUGP("expect_related %u.%u.%u.%u:%u-%u.%u.%u.%u:%u\n", - NIPQUAD(exp->tuple.src.ip), - ntohs(exp->tuple.src.u.tcp.port), - NIPQUAD(exp->tuple.dst.ip), - ntohs(exp->tuple.dst.u.tcp.port)); - - ip_conntrack_expect_related(exp, ct); - + exp->master = ct; + if (ip_nat_irc_hook) + ret = ip_nat_irc_hook(pskb, ctinfo, + addr_beg_p - ib_ptr, + addr_end_p - addr_beg_p, + exp); + else if (ip_conntrack_expect_related(exp) != 0) { + ip_conntrack_expect_free(exp); + ret = NF_DROP; + } goto out; } /* for .. NUM_DCCPROTO */ } /* while data < ... */ out: UNLOCK_BH(&irc_buffer_lock); - return NF_ACCEPT; + return ret; } static struct ip_conntrack_helper irc_helpers[MAX_PORTS]; @@ -265,11 +265,10 @@ static int __init init(void) hlpr->tuple.src.u.tcp.port = htons(ports[i]); hlpr->tuple.dst.protonum = IPPROTO_TCP; hlpr->mask.src.u.tcp.port = 0xFFFF; - hlpr->mask.dst.protonum = 0xFFFF; + hlpr->mask.dst.protonum = 0xFF; hlpr->max_expected = max_dcc_channels; hlpr->timeout = dcc_timeout; - hlpr->flags = IP_CT_HELPER_F_REUSE_EXPECT; - hlpr->me = ip_conntrack_irc; + hlpr->me = THIS_MODULE; hlpr->help = help; tmpname = &irc_names[i][0]; @@ -305,7 +304,5 @@ static void fini(void) } } -PROVIDES_CONNTRACK(irc); - module_init(init); module_exit(fini); diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c index 344820a514ca..602c74db3252 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c @@ -196,7 +196,7 @@ icmp_error_message(struct sk_buff *skb, } /* Update skb to refer to this connection */ - skb->nfct = &h->ctrack->ct_general; + skb->nfct = &tuplehash_to_ctrack(h)->ct_general; skb->nfctinfo = *ctinfo; return -NF_ACCEPT; } diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c index aa49bea4aa56..7d9f8ea14a5e 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c @@ -58,13 +58,13 @@ static const char *sctp_conntrack_names[] = { #define HOURS * 60 MINS #define DAYS * 24 HOURS -unsigned long ip_ct_sctp_timeout_closed = 10 SECS; -unsigned long ip_ct_sctp_timeout_cookie_wait = 3 SECS; -unsigned long ip_ct_sctp_timeout_cookie_echoed = 3 SECS; -unsigned long ip_ct_sctp_timeout_established = 5 DAYS; -unsigned long ip_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000; -unsigned long ip_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000; -unsigned long ip_ct_sctp_timeout_shutdown_ack_sent = 3 SECS; +static unsigned long ip_ct_sctp_timeout_closed = 10 SECS; +static unsigned long ip_ct_sctp_timeout_cookie_wait = 3 SECS; +static unsigned long ip_ct_sctp_timeout_cookie_echoed = 3 SECS; +static unsigned long ip_ct_sctp_timeout_established = 5 DAYS; +static unsigned long ip_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000; +static unsigned long ip_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000; +static unsigned long ip_ct_sctp_timeout_shutdown_ack_sent = 3 SECS; static unsigned long * sctp_timeouts[] = { NULL, /* SCTP_CONNTRACK_NONE */ @@ -494,14 +494,7 @@ static int sctp_new(struct ip_conntrack *conntrack, return 1; } -static int sctp_exp_matches_pkt(struct ip_conntrack_expect *exp, - const struct sk_buff *skb) -{ - /* To be implemented */ - return 0; -} - -struct ip_conntrack_protocol ip_conntrack_protocol_sctp = { +static struct ip_conntrack_protocol ip_conntrack_protocol_sctp = { .proto = IPPROTO_SCTP, .name = "sctp", .pkt_to_tuple = sctp_pkt_to_tuple, @@ -511,7 +504,6 @@ struct ip_conntrack_protocol ip_conntrack_protocol_sctp = { .packet = sctp_packet, .new = sctp_new, .destroy = NULL, - .exp_matches_pkt = sctp_exp_matches_pkt, .me = THIS_MODULE }; diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index d327678d3dbd..54a2f2fe1cb1 100644 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -707,9 +707,9 @@ static int tcp_in_window(struct ip_ct_tcp *state, #ifdef CONFIG_IP_NF_NAT_NEEDED /* Update sender->td_end after NAT successfully mangled the packet */ -int ip_conntrack_tcp_update(struct sk_buff *skb, - struct ip_conntrack *conntrack, - int dir) +void ip_conntrack_tcp_update(struct sk_buff *skb, + struct ip_conntrack *conntrack, + enum ip_conntrack_dir dir) { struct iphdr *iph = skb->nh.iph; struct tcphdr *tcph = (void *)skb->nh.iph + skb->nh.iph->ihl*4; @@ -735,8 +735,6 @@ int ip_conntrack_tcp_update(struct sk_buff *skb, sender->td_scale, receiver->td_end, receiver->td_maxend, receiver->td_maxwin, receiver->td_scale); - - return 1; } #endif @@ -1061,22 +1059,6 @@ static int tcp_new(struct ip_conntrack *conntrack, return 1; } -static int tcp_exp_matches_pkt(struct ip_conntrack_expect *exp, - const struct sk_buff *skb) -{ - const struct iphdr *iph = skb->nh.iph; - struct tcphdr *th, _tcph; - unsigned int datalen; - - th = skb_header_pointer(skb, iph->ihl * 4, - sizeof(_tcph), &_tcph); - if (th == NULL) - return 0; - datalen = skb->len - iph->ihl*4 - th->doff*4; - - return between(exp->seq, ntohl(th->seq), ntohl(th->seq) + datalen); -} - struct ip_conntrack_protocol ip_conntrack_protocol_tcp = { .proto = IPPROTO_TCP, @@ -1087,6 +1069,5 @@ struct ip_conntrack_protocol ip_conntrack_protocol_tcp = .print_conntrack = tcp_print_conntrack, .packet = tcp_packet, .new = tcp_new, - .exp_matches_pkt = tcp_exp_matches_pkt, .error = tcp_error, }; diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index dc796520c158..b1b002d94983 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -66,7 +66,8 @@ print_tuple(struct seq_file *s, const struct ip_conntrack_tuple *tuple, #ifdef CONFIG_IP_NF_CT_ACCT static unsigned int -seq_print_counters(struct seq_file *s, struct ip_conntrack_counter *counter) +seq_print_counters(struct seq_file *s, + const struct ip_conntrack_counter *counter) { return seq_printf(s, "packets=%llu bytes=%llu ", (unsigned long long)counter->packets, @@ -99,7 +100,7 @@ static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos) static int ct_seq_real_show(const struct ip_conntrack_tuple_hash *hash, struct seq_file *s) { - struct ip_conntrack *conntrack = hash->ctrack; + const struct ip_conntrack *conntrack = tuplehash_to_ctrack(hash); struct ip_conntrack_protocol *proto; MUST_BE_READ_LOCKED(&ip_conntrack_lock); @@ -200,7 +201,6 @@ static void *exp_seq_start(struct seq_file *s, loff_t *pos) /* strange seq_file api calls stop even if we fail, * thus we need to grab lock since stop unlocks */ READ_LOCK(&ip_conntrack_lock); - READ_LOCK(&ip_conntrack_expect_tuple_lock); if (list_empty(e)) return NULL; @@ -227,7 +227,6 @@ static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos) static void exp_seq_stop(struct seq_file *s, void *v) { - READ_UNLOCK(&ip_conntrack_expect_tuple_lock); READ_UNLOCK(&ip_conntrack_lock); } @@ -235,14 +234,13 @@ static int exp_seq_show(struct seq_file *s, void *v) { struct ip_conntrack_expect *expect = v; - if (expect->expectant->helper->timeout) + if (expect->timeout.function) seq_printf(s, "%lu ", timer_pending(&expect->timeout) ? (expect->timeout.expires - jiffies)/HZ : 0); else seq_printf(s, "- "); - seq_printf(s, "use=%u proto=%u ", atomic_read(&expect->use), - expect->tuple.dst.protonum); + seq_printf(s, "proto=%u ", expect->tuple.dst.protonum); print_tuple(s, &expect->tuple, ip_ct_find_proto(expect->tuple.dst.protonum)); @@ -364,8 +362,20 @@ static unsigned int ip_confirm(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; + + /* This is where we call the helper: as the packet goes out. */ + ct = ip_conntrack_get(*pskb, &ctinfo); + if (ct && ct->helper) { + unsigned int ret; + ret = ct->helper->help(pskb, ct, ctinfo); + if (ret != NF_ACCEPT) + return ret; + } + /* We've seen it coming out the other side: confirm it */ - return ip_conntrack_confirm(*pskb); + return ip_conntrack_confirm(pskb); } static unsigned int ip_conntrack_defrag(unsigned int hooknum, @@ -896,17 +906,13 @@ EXPORT_SYMBOL(ip_ct_iterate_cleanup); EXPORT_SYMBOL(ip_ct_refresh_acct); EXPORT_SYMBOL(ip_ct_protos); EXPORT_SYMBOL(ip_ct_find_proto); -EXPORT_SYMBOL(ip_ct_find_helper); EXPORT_SYMBOL(ip_conntrack_expect_alloc); +EXPORT_SYMBOL(ip_conntrack_expect_free); EXPORT_SYMBOL(ip_conntrack_expect_related); -EXPORT_SYMBOL(ip_conntrack_change_expect); EXPORT_SYMBOL(ip_conntrack_unexpect_related); -EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get); -EXPORT_SYMBOL_GPL(ip_conntrack_expect_put); EXPORT_SYMBOL(ip_conntrack_tuple_taken); EXPORT_SYMBOL(ip_ct_gather_frags); EXPORT_SYMBOL(ip_conntrack_htable_size); -EXPORT_SYMBOL(ip_conntrack_expect_list); EXPORT_SYMBOL(ip_conntrack_lock); EXPORT_SYMBOL(ip_conntrack_hash); EXPORT_SYMBOL(ip_conntrack_untracked); diff --git a/net/ipv4/netfilter/ip_conntrack_tftp.c b/net/ipv4/netfilter/ip_conntrack_tftp.c index 01a5e53e81bb..992fac3e36ee 100644 --- a/net/ipv4/netfilter/ip_conntrack_tftp.c +++ b/net/ipv4/netfilter/ip_conntrack_tftp.c @@ -38,15 +38,21 @@ MODULE_PARM_DESC(ports, "port numbers of tftp servers"); #define DEBUGP(format, args...) #endif -static int tftp_help(struct sk_buff *skb, +unsigned int (*ip_nat_tftp_hook)(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + struct ip_conntrack_expect *exp); +EXPORT_SYMBOL_GPL(ip_nat_tftp_hook); + +static int tftp_help(struct sk_buff **pskb, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) { struct tftphdr _tftph, *tfh; struct ip_conntrack_expect *exp; + unsigned int ret = NF_ACCEPT; - tfh = skb_header_pointer(skb, - skb->nh.iph->ihl * 4 + sizeof(struct udphdr), + tfh = skb_header_pointer(*pskb, + (*pskb)->nh.iph->ihl*4+sizeof(struct udphdr), sizeof(_tftph), &_tftph); if (tfh == NULL) return NF_ACCEPT; @@ -61,19 +67,25 @@ static int tftp_help(struct sk_buff *skb, exp = ip_conntrack_expect_alloc(); if (exp == NULL) - return NF_ACCEPT; + return NF_DROP; exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; exp->mask.src.ip = 0xffffffff; exp->mask.dst.ip = 0xffffffff; exp->mask.dst.u.udp.port = 0xffff; - exp->mask.dst.protonum = 0xffff; + exp->mask.dst.protonum = 0xff; exp->expectfn = NULL; + exp->master = ct; DEBUGP("expect: "); DUMP_TUPLE(&exp->tuple); DUMP_TUPLE(&exp->mask); - ip_conntrack_expect_related(exp, ct); + if (ip_nat_tftp_hook) + ret = ip_nat_tftp_hook(pskb, ctinfo, exp); + else if (ip_conntrack_expect_related(exp) != 0) { + ip_conntrack_expect_free(exp); + ret = NF_DROP; + } break; case TFTP_OPCODE_DATA: case TFTP_OPCODE_ACK: @@ -116,11 +128,10 @@ static int __init init(void) tftp[i].tuple.dst.protonum = IPPROTO_UDP; tftp[i].tuple.src.u.udp.port = htons(ports[i]); - tftp[i].mask.dst.protonum = 0xFFFF; + tftp[i].mask.dst.protonum = 0xFF; tftp[i].mask.src.u.udp.port = 0xFFFF; tftp[i].max_expected = 1; - tftp[i].timeout = 0; - tftp[i].flags = IP_CT_HELPER_F_REUSE_EXPECT; + tftp[i].timeout = 5 * 60; /* 5 minutes */ tftp[i].me = THIS_MODULE; tftp[i].help = tftp_help; @@ -144,7 +155,5 @@ static int __init init(void) return(0); } -PROVIDES_CONNTRACK(tftp); - module_init(init); module_exit(fini); diff --git a/net/ipv4/netfilter/ip_nat_amanda.c b/net/ipv4/netfilter/ip_nat_amanda.c index 144e32f3582d..da1f412583ed 100644 --- a/net/ipv4/netfilter/ip_nat_amanda.c +++ b/net/ipv4/netfilter/ip_nat_amanda.c @@ -31,118 +31,58 @@ MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>"); MODULE_DESCRIPTION("Amanda NAT helper"); MODULE_LICENSE("GPL"); -static unsigned int -amanda_nat_expected(struct sk_buff **pskb, - unsigned int hooknum, - struct ip_conntrack *ct, - struct ip_nat_info *info) +static unsigned int help(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + unsigned int matchoff, + unsigned int matchlen, + struct ip_conntrack_expect *exp) { - struct ip_conntrack *master = master_ct(ct); - struct ip_ct_amanda_expect *exp_amanda_info; - struct ip_nat_range range; - u_int32_t newip; - - IP_NF_ASSERT(info); - IP_NF_ASSERT(master); - IP_NF_ASSERT(!(info->initialized & (1 << HOOK2MANIP(hooknum)))); + char buffer[sizeof("65535")]; + u_int16_t port; + unsigned int ret; - if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC) - newip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; - else - newip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; + /* Connection comes from client. */ + exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; + exp->dir = IP_CT_DIR_ORIGINAL; - /* We don't want to manip the per-protocol, just the IPs. */ - range.flags = IP_NAT_RANGE_MAP_IPS; - range.min_ip = range.max_ip = newip; + /* When you see the packet, we need to NAT it the same as the + * this one (ie. same IP: it will be TCP and master is UDP). */ + exp->expectfn = ip_nat_follow_master; - if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_DST) { - exp_amanda_info = &ct->master->help.exp_amanda_info; - range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED; - range.min = range.max - = ((union ip_conntrack_manip_proto) - { .udp = { htons(exp_amanda_info->port) } }); + /* Try to get same port: if not, try to change it. */ + for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { + exp->tuple.dst.u.tcp.port = htons(port); + if (ip_conntrack_expect_related(exp) == 0) + break; } - return ip_nat_setup_info(ct, &range, hooknum); -} - -static int amanda_data_fixup(struct ip_conntrack *ct, - struct sk_buff **pskb, - enum ip_conntrack_info ctinfo, - struct ip_conntrack_expect *exp) -{ - struct ip_ct_amanda_expect *exp_amanda_info; - struct ip_conntrack_tuple t = exp->tuple; - char buffer[sizeof("65535")]; - u_int16_t port; - - /* Alter conntrack's expectations. */ - exp_amanda_info = &exp->help.exp_amanda_info; - t.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; - for (port = exp_amanda_info->port; port != 0; port++) { - t.dst.u.tcp.port = htons(port); - if (ip_conntrack_change_expect(exp, &t) == 0) - break; + if (port == 0) { + ip_conntrack_expect_free(exp); + return NF_DROP; } - if (port == 0) - return 0; sprintf(buffer, "%u", port); - return ip_nat_mangle_udp_packet(pskb, ct, ctinfo, - exp_amanda_info->offset, - exp_amanda_info->len, - buffer, strlen(buffer)); -} - -static unsigned int help(struct ip_conntrack *ct, - struct ip_conntrack_expect *exp, - struct ip_nat_info *info, - enum ip_conntrack_info ctinfo, - unsigned int hooknum, - struct sk_buff **pskb) -{ - int dir = CTINFO2DIR(ctinfo); - int ret = NF_ACCEPT; - - /* Only mangle things once: original direction in POST_ROUTING - and reply direction on PRE_ROUTING. */ - if (!((hooknum == NF_IP_POST_ROUTING && dir == IP_CT_DIR_ORIGINAL) - || (hooknum == NF_IP_PRE_ROUTING && dir == IP_CT_DIR_REPLY))) - return NF_ACCEPT; - - /* if this exectation has a "offset" the packet needs to be mangled */ - if (exp->help.exp_amanda_info.offset != 0) - if (!amanda_data_fixup(ct, pskb, ctinfo, exp)) - ret = NF_DROP; - exp->help.exp_amanda_info.offset = 0; - + ret = ip_nat_mangle_udp_packet(pskb, exp->master, ctinfo, + matchoff, matchlen, + buffer, strlen(buffer)); + if (ret != NF_ACCEPT) + ip_conntrack_unexpect_related(exp); return ret; } -static struct ip_nat_helper ip_nat_amanda_helper; - static void __exit fini(void) { - ip_nat_helper_unregister(&ip_nat_amanda_helper); + ip_nat_amanda_hook = NULL; + /* Make sure noone calls it, meanwhile. */ + synchronize_net(); } static int __init init(void) { - struct ip_nat_helper *hlpr = &ip_nat_amanda_helper; - - hlpr->tuple.dst.protonum = IPPROTO_UDP; - hlpr->tuple.src.u.udp.port = htons(10080); - hlpr->mask.src.u.udp.port = 0xFFFF; - hlpr->mask.dst.protonum = 0xFFFF; - hlpr->help = help; - hlpr->flags = 0; - hlpr->me = THIS_MODULE; - hlpr->expect = amanda_nat_expected; - hlpr->name = "amanda"; - - return ip_nat_helper_register(hlpr); + BUG_ON(ip_nat_amanda_hook); + ip_nat_amanda_hook = help; + return 0; } -NEEDS_CONNTRACK(amanda); module_init(init); module_exit(fini); diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 84c1e0e2cda6..96b24c024f9c 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -42,7 +42,6 @@ #endif DECLARE_RWLOCK(ip_nat_lock); -DECLARE_RWLOCK_EXTERN(ip_conntrack_lock); /* Calculated at init based on memory size */ static unsigned int ip_nat_htable_size; @@ -51,42 +50,23 @@ static struct list_head *bysource; struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO]; -/* We keep extra hashes for each conntrack, for fast searching. */ -static inline size_t -hash_by_ipsproto(u_int32_t src, u_int32_t dst, u_int16_t proto) -{ - /* Modified src and dst, to ensure we don't create two - identical streams. */ - return (src + dst + proto) % ip_nat_htable_size; -} - -static inline size_t -hash_by_src(const struct ip_conntrack_manip *manip, u_int16_t proto) +/* We keep an extra hash for each conntrack, for fast searching. */ +static inline unsigned int +hash_by_src(const struct ip_conntrack_tuple *tuple) { /* Original src, to ensure we map it consistently if poss. */ - return (manip->ip + manip->u.all + proto) % ip_nat_htable_size; + return jhash_3words(tuple->src.ip, tuple->src.u.all, + tuple->dst.protonum, 0) % ip_nat_htable_size; } /* Noone using conntrack by the time this called. */ static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn) { - struct ip_nat_info *info = &conn->nat.info; - unsigned int hs, hp; - - if (!info->initialized) + if (!(conn->status & IPS_NAT_DONE_MASK)) return; - hs = hash_by_src(&conn->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src, - conn->tuplehash[IP_CT_DIR_ORIGINAL] - .tuple.dst.protonum); - - hp = hash_by_ipsproto(conn->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip, - conn->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip, - conn->tuplehash[IP_CT_DIR_REPLY] - .tuple.dst.protonum); - WRITE_LOCK(&ip_nat_lock); - list_del(&info->bysource); + list_del(&conn->nat.info.bysource); WRITE_UNLOCK(&ip_nat_lock); } @@ -117,25 +97,6 @@ ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple, return ip_conntrack_tuple_taken(&reply, ignored_conntrack); } -/* Before 2.6.11 we did implicit source NAT if required. Warn about change. */ -static void warn_if_extra_mangle(u32 dstip, u32 srcip) -{ - static int warned = 0; - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } }; - struct rtable *rt; - - if (ip_route_output_key(&rt, &fl) != 0) - return; - - if (rt->rt_src != srcip && !warned) { - printk("NAT: no longer support implicit source local NAT\n"); - printk("NAT: packet src %u.%u.%u.%u -> dst %u.%u.%u.%u\n", - NIPQUAD(srcip), NIPQUAD(dstip)); - warned = 1; - } - ip_rt_put(rt); -} - /* If we source map this tuple so reply looks like reply_tuple, will * that meet the constraints of range. */ static int @@ -178,11 +139,10 @@ find_appropriate_src(const struct ip_conntrack_tuple *tuple, struct ip_conntrack_tuple *result, const struct ip_nat_range *range) { - unsigned int h = hash_by_src(&tuple->src, tuple->dst.protonum); + unsigned int h = hash_by_src(tuple); struct ip_conntrack *ct; - MUST_BE_READ_LOCKED(&ip_nat_lock); - + READ_LOCK(&ip_nat_lock); list_for_each_entry(ct, &bysource[h], nat.info.bysource) { if (same_src(ct, tuple)) { /* Copy source part from reply tuple. */ @@ -190,10 +150,13 @@ find_appropriate_src(const struct ip_conntrack_tuple *tuple, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); result->dst = tuple->dst; - if (in_range(result, range)) + if (in_range(result, range)) { + READ_UNLOCK(&ip_nat_lock); return 1; + } } } + READ_UNLOCK(&ip_nat_lock); return 0; } @@ -207,7 +170,7 @@ static void find_best_ips_proto(struct ip_conntrack_tuple *tuple, const struct ip_nat_range *range, const struct ip_conntrack *conntrack, - unsigned int hooknum) + enum ip_nat_manip_type maniptype) { u_int32_t *var_ipp; /* Host order */ @@ -217,7 +180,7 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple, if (!(range->flags & IP_NAT_RANGE_MAP_IPS)) return; - if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC) + if (maniptype == IP_NAT_MANIP_SRC) var_ipp = &tuple->src.ip; else var_ipp = &tuple->dst.ip; @@ -232,7 +195,7 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple, * spread in practice (if there are a small number of IPs * involved, there usually aren't that many connections * anyway). The consistency means that servers see the same - * client coming from the same IP (some Internet Backing sites + * client coming from the same IP (some Internet Banking sites * like this), even across reboots. */ minip = ntohl(range->min_ip); maxip = ntohl(range->max_ip); @@ -251,7 +214,7 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple, const struct ip_conntrack_tuple *orig_tuple, const struct ip_nat_range *range, struct ip_conntrack *conntrack, - unsigned int hooknum) + enum ip_nat_manip_type maniptype) { struct ip_nat_protocol *proto = ip_nat_find_proto(orig_tuple->dst.protonum); @@ -263,7 +226,7 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple, This is only required for source (ie. NAT/masq) mappings. So far, we don't do local source mappings, so multiple manips not an issue. */ - if (hooknum == NF_IP_POST_ROUTING) { + if (maniptype == IP_NAT_MANIP_SRC) { if (find_appropriate_src(orig_tuple, tuple, range)) { DEBUGP("get_unique_tuple: Found current src map\n"); if (!ip_nat_used_tuple(tuple, conntrack)) @@ -274,375 +237,172 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple, /* 2) Select the least-used IP/proto combination in the given range. */ *tuple = *orig_tuple; - find_best_ips_proto(tuple, range, conntrack, hooknum); - - if (hooknum == NF_IP_LOCAL_OUT && tuple->dst.ip != orig_tuple->dst.ip) - warn_if_extra_mangle(tuple->src.ip, tuple->dst.ip); + find_best_ips_proto(tuple, range, conntrack, maniptype); /* 3) The per-protocol part of the manip is made to map into the range to make a unique tuple. */ /* Only bother mapping if it's not already in range and unique */ if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) - || proto->in_range(tuple, HOOK2MANIP(hooknum), - &range->min, &range->max)) + || proto->in_range(tuple, maniptype, &range->min, &range->max)) && !ip_nat_used_tuple(tuple, conntrack)) return; /* Last change: get protocol to try to obtain unique tuple. */ - proto->unique_tuple(tuple, range, HOOK2MANIP(hooknum), conntrack); + proto->unique_tuple(tuple, range, maniptype, conntrack); } -/* Where to manip the reply packets (will be reverse manip). */ -static unsigned int opposite_hook[NF_IP_NUMHOOKS] -= { [NF_IP_PRE_ROUTING] = NF_IP_POST_ROUTING, - [NF_IP_POST_ROUTING] = NF_IP_PRE_ROUTING, - [NF_IP_LOCAL_OUT] = NF_IP_LOCAL_IN, - [NF_IP_LOCAL_IN] = NF_IP_LOCAL_OUT, -}; - unsigned int ip_nat_setup_info(struct ip_conntrack *conntrack, const struct ip_nat_range *range, unsigned int hooknum) { - struct ip_conntrack_tuple new_tuple, inv_tuple, reply; - struct ip_conntrack_tuple orig_tp; + struct ip_conntrack_tuple curr_tuple, new_tuple; struct ip_nat_info *info = &conntrack->nat.info; - int in_hashes = info->initialized; + int have_to_hash = !(conntrack->status & IPS_NAT_DONE_MASK); + enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum); - MUST_BE_WRITE_LOCKED(&ip_nat_lock); IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_POST_ROUTING || hooknum == NF_IP_LOCAL_IN || hooknum == NF_IP_LOCAL_OUT); - IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS); - IP_NF_ASSERT(!(info->initialized & (1 << HOOK2MANIP(hooknum)))); + BUG_ON(ip_nat_initialized(conntrack, maniptype)); /* What we've got will look like inverse of reply. Normally this is what is in the conntrack, except for prior manipulations (future optimization: if num_manips == 0, orig_tp = conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */ - invert_tuplepr(&orig_tp, + invert_tuplepr(&curr_tuple, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple); -#if 0 - { - unsigned int i; - - DEBUGP("Hook %u (%s), ", hooknum, - HOOK2MANIP(hooknum)==IP_NAT_MANIP_SRC ? "SRC" : "DST"); - DUMP_TUPLE(&orig_tp); - DEBUGP("Range %p: ", mr); - for (i = 0; i < mr->rangesize; i++) { - DEBUGP("%u:%s%s%s %u.%u.%u.%u - %u.%u.%u.%u %u - %u\n", - i, - (mr->range[i].flags & IP_NAT_RANGE_MAP_IPS) - ? " MAP_IPS" : "", - (mr->range[i].flags - & IP_NAT_RANGE_PROTO_SPECIFIED) - ? " PROTO_SPECIFIED" : "", - (mr->range[i].flags & IP_NAT_RANGE_FULL) - ? " FULL" : "", - NIPQUAD(mr->range[i].min_ip), - NIPQUAD(mr->range[i].max_ip), - mr->range[i].min.all, - mr->range[i].max.all); - } - } -#endif - - get_unique_tuple(&new_tuple, &orig_tp, range, conntrack, hooknum); + get_unique_tuple(&new_tuple, &curr_tuple, range, conntrack, maniptype); - /* We now have two tuples (SRCIP/SRCPT/DSTIP/DSTPT): - the original (A/B/C/D') and the mangled one (E/F/G/H'). + if (!ip_ct_tuple_equal(&new_tuple, &curr_tuple)) { + struct ip_conntrack_tuple reply; - We're only allowed to work with the SRC per-proto - part, so we create inverses of both to start, then - derive the other fields we need. */ + /* Alter conntrack table so will recognize replies. */ + invert_tuplepr(&reply, &new_tuple); + ip_conntrack_alter_reply(conntrack, &reply); - /* Reply connection: simply invert the new tuple - (G/H/E/F') */ - invert_tuplepr(&reply, &new_tuple); - - /* Alter conntrack table so will recognize replies. */ - ip_conntrack_alter_reply(conntrack, &reply); - - /* FIXME: We can simply used existing conntrack reply tuple - here --RR */ - /* Create inverse of original: C/D/A/B' */ - invert_tuplepr(&inv_tuple, &orig_tp); - - /* Has source changed?. */ - if (!ip_ct_tuple_src_equal(&new_tuple, &orig_tp)) { - IP_NF_ASSERT(HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC); - IP_NF_ASSERT(ip_ct_tuple_dst_equal(&new_tuple, &orig_tp)); - - /* In this direction, a source manip. */ - info->manips[info->num_manips++] = - ((struct ip_nat_info_manip) - { IP_CT_DIR_ORIGINAL, hooknum, - IP_NAT_MANIP_SRC, new_tuple.src }); - - IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS); - - /* In the reverse direction, a destination manip. */ - info->manips[info->num_manips++] = - ((struct ip_nat_info_manip) - { IP_CT_DIR_REPLY, opposite_hook[hooknum], - IP_NAT_MANIP_DST, orig_tp.src }); - IP_NF_ASSERT(info->num_manips <= IP_NAT_MAX_MANIPS); + /* Non-atomic: we own this at the moment. */ + if (maniptype == IP_NAT_MANIP_SRC) + conntrack->status |= IPS_SRC_NAT; + else + conntrack->status |= IPS_DST_NAT; } - /* Has destination changed? */ - if (!ip_ct_tuple_dst_equal(&new_tuple, &orig_tp)) { - IP_NF_ASSERT(HOOK2MANIP(hooknum) == IP_NAT_MANIP_DST); - - /* In this direction, a destination manip */ - info->manips[info->num_manips++] = - ((struct ip_nat_info_manip) - { IP_CT_DIR_ORIGINAL, hooknum, - IP_NAT_MANIP_DST, reply.src }); - - IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS); - - /* In the reverse direction, a source manip. */ - info->manips[info->num_manips++] = - ((struct ip_nat_info_manip) - { IP_CT_DIR_REPLY, opposite_hook[hooknum], - IP_NAT_MANIP_SRC, inv_tuple.src }); - IP_NF_ASSERT(info->num_manips <= IP_NAT_MAX_MANIPS); + /* Place in source hash if this is the first time. */ + if (have_to_hash) { + unsigned int srchash + = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple); + WRITE_LOCK(&ip_nat_lock); + list_add(&info->bysource, &bysource[srchash]); + WRITE_UNLOCK(&ip_nat_lock); } - /* If there's a helper, assign it; based on new tuple. */ - if (!conntrack->master) - info->helper = __ip_nat_find_helper(&reply); - /* It's done. */ - info->initialized |= (1 << HOOK2MANIP(hooknum)); - - if (in_hashes) - replace_in_hashes(conntrack, info); + if (maniptype == IP_NAT_MANIP_DST) + set_bit(IPS_DST_NAT_DONE_BIT, &conntrack->status); else - place_in_hashes(conntrack, info); + set_bit(IPS_SRC_NAT_DONE_BIT, &conntrack->status); return NF_ACCEPT; } -void replace_in_hashes(struct ip_conntrack *conntrack, - struct ip_nat_info *info) -{ - /* Source has changed, so replace in hashes. */ - unsigned int srchash - = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL] - .tuple.src, - conntrack->tuplehash[IP_CT_DIR_ORIGINAL] - .tuple.dst.protonum); - MUST_BE_WRITE_LOCKED(&ip_nat_lock); - list_move(&info->bysource, &bysource[srchash]); -} - -void place_in_hashes(struct ip_conntrack *conntrack, - struct ip_nat_info *info) -{ - unsigned int srchash - = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL] - .tuple.src, - conntrack->tuplehash[IP_CT_DIR_ORIGINAL] - .tuple.dst.protonum); - MUST_BE_WRITE_LOCKED(&ip_nat_lock); - list_add(&info->bysource, &bysource[srchash]); -} - /* Returns true if succeeded. */ static int manip_pkt(u_int16_t proto, struct sk_buff **pskb, unsigned int iphdroff, - const struct ip_conntrack_manip *manip, + const struct ip_conntrack_tuple *target, enum ip_nat_manip_type maniptype) { struct iphdr *iph; (*pskb)->nfcache |= NFC_ALTERED; - if (!skb_ip_make_writable(pskb, iphdroff+sizeof(*iph))) + if (!skb_ip_make_writable(pskb, iphdroff + sizeof(*iph))) return 0; iph = (void *)(*pskb)->data + iphdroff; /* Manipulate protcol part. */ if (!ip_nat_find_proto(proto)->manip_pkt(pskb, iphdroff, - manip, maniptype)) + target, maniptype)) return 0; iph = (void *)(*pskb)->data + iphdroff; if (maniptype == IP_NAT_MANIP_SRC) { - iph->check = ip_nat_cheat_check(~iph->saddr, manip->ip, + iph->check = ip_nat_cheat_check(~iph->saddr, target->src.ip, iph->check); - iph->saddr = manip->ip; + iph->saddr = target->src.ip; } else { - iph->check = ip_nat_cheat_check(~iph->daddr, manip->ip, + iph->check = ip_nat_cheat_check(~iph->daddr, target->dst.ip, iph->check); - iph->daddr = manip->ip; + iph->daddr = target->dst.ip; } return 1; } -static inline int exp_for_packet(struct ip_conntrack_expect *exp, - struct sk_buff *skb) +/* Do packet manipulations according to ip_nat_setup_info. */ +unsigned int nat_packet(struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, + struct sk_buff **pskb) { - struct ip_conntrack_protocol *proto; - int ret = 1; - - MUST_BE_READ_LOCKED(&ip_conntrack_lock); - proto = ip_ct_find_proto(skb->nh.iph->protocol); - if (proto->exp_matches_pkt) - ret = proto->exp_matches_pkt(exp, skb); - - return ret; -} - -/* Do packet manipulations according to binding. */ -unsigned int -do_bindings(struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo, - struct ip_nat_info *info, - unsigned int hooknum, - struct sk_buff **pskb) -{ - unsigned int i; - struct ip_nat_helper *helper; enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - int proto = (*pskb)->nh.iph->protocol; - - /* Need nat lock to protect against modification, but neither - conntrack (referenced) and helper (deleted with - synchronize_bh()) can vanish. */ - READ_LOCK(&ip_nat_lock); - for (i = 0; i < info->num_manips; i++) { - if (info->manips[i].direction == dir - && info->manips[i].hooknum == hooknum) { - DEBUGP("Mangling %p: %s to %u.%u.%u.%u %u\n", - *pskb, - info->manips[i].maniptype == IP_NAT_MANIP_SRC - ? "SRC" : "DST", - NIPQUAD(info->manips[i].manip.ip), - htons(info->manips[i].manip.u.all)); - if (!manip_pkt(proto, pskb, 0, - &info->manips[i].manip, - info->manips[i].maniptype)) { - READ_UNLOCK(&ip_nat_lock); - return NF_DROP; - } - } + unsigned long statusbit; + enum ip_nat_manip_type mtype = HOOK2MANIP(hooknum); + + if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) + && (hooknum == NF_IP_POST_ROUTING || hooknum == NF_IP_LOCAL_IN)) { + DEBUGP("ip_nat_core: adjusting sequence number\n"); + /* future: put this in a l4-proto specific function, + * and call this function here. */ + if (!ip_nat_seq_adjust(pskb, ct, ctinfo)) + return NF_DROP; } - helper = info->helper; - READ_UNLOCK(&ip_nat_lock); - if (helper) { - struct ip_conntrack_expect *exp = NULL; - struct list_head *cur_item; - int ret = NF_ACCEPT; - int helper_called = 0; - - DEBUGP("do_bindings: helper existing for (%p)\n", ct); - - /* Always defragged for helpers */ - IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off - & htons(IP_MF|IP_OFFSET))); - - /* Have to grab read lock before sibling_list traversal */ - READ_LOCK(&ip_conntrack_lock); - list_for_each_prev(cur_item, &ct->sibling_list) { - exp = list_entry(cur_item, struct ip_conntrack_expect, - expected_list); - - /* if this expectation is already established, skip */ - if (exp->sibling) - continue; - - if (exp_for_packet(exp, *pskb)) { - /* FIXME: May be true multiple times in the - * case of UDP!! */ - DEBUGP("calling nat helper (exp=%p) for packet\n", exp); - ret = helper->help(ct, exp, info, ctinfo, - hooknum, pskb); - if (ret != NF_ACCEPT) { - READ_UNLOCK(&ip_conntrack_lock); - return ret; - } - helper_called = 1; - } - } - /* Helper might want to manip the packet even when there is no - * matching expectation for this packet */ - if (!helper_called && helper->flags & IP_NAT_HELPER_F_ALWAYS) { - DEBUGP("calling nat helper for packet without expectation\n"); - ret = helper->help(ct, NULL, info, ctinfo, - hooknum, pskb); - if (ret != NF_ACCEPT) { - READ_UNLOCK(&ip_conntrack_lock); - return ret; - } - } - READ_UNLOCK(&ip_conntrack_lock); - - /* Adjust sequence number only once per packet - * (helper is called at all hooks) */ - if (proto == IPPROTO_TCP - && (hooknum == NF_IP_POST_ROUTING - || hooknum == NF_IP_LOCAL_IN)) { - DEBUGP("ip_nat_core: adjusting sequence number\n"); - /* future: put this in a l4-proto specific function, - * and call this function here. */ - if (!ip_nat_seq_adjust(pskb, ct, ctinfo)) - ret = NF_DROP; - } + if (mtype == IP_NAT_MANIP_SRC) + statusbit = IPS_SRC_NAT; + else + statusbit = IPS_DST_NAT; - return ret; + /* Invert if this is reply dir. */ + if (dir == IP_CT_DIR_REPLY) + statusbit ^= IPS_NAT_MASK; - } else - return NF_ACCEPT; + /* Non-atomic: these bits don't change. */ + if (ct->status & statusbit) { + struct ip_conntrack_tuple target; - /* not reached */ -} + /* We are aiming to look like inverse of other direction. */ + invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); -static inline int tuple_src_equal_dst(const struct ip_conntrack_tuple *t1, - const struct ip_conntrack_tuple *t2) -{ - if (t1->dst.protonum != t2->dst.protonum || t1->src.ip != t2->dst.ip) - return 0; - if (t1->dst.protonum != IPPROTO_ICMP) - return t1->src.u.all == t2->dst.u.all; - else { - struct ip_conntrack_tuple inv; - - /* ICMP tuples are asymetric */ - invert_tuplepr(&inv, t1); - return inv.src.u.all == t2->src.u.all && - inv.dst.u.all == t2->dst.u.all; + if (!manip_pkt(target.dst.protonum, pskb, 0, &target, mtype)) + return NF_DROP; } + return NF_ACCEPT; } -int -icmp_reply_translation(struct sk_buff **pskb, - struct ip_conntrack *conntrack, - unsigned int hooknum, - int dir) +/* Dir is direction ICMP is coming from (opposite to packet it contains) */ +int icmp_reply_translation(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_nat_manip_type manip, + enum ip_conntrack_dir dir) { struct { struct icmphdr icmp; struct iphdr ip; } *inside; - unsigned int i; - struct ip_nat_info *info = &conntrack->nat.info; - struct ip_conntrack_tuple *cttuple, innertuple; - int hdrlen; + struct ip_conntrack_tuple inner, target; + int hdrlen = (*pskb)->nh.iph->ihl * 4; - if (!skb_ip_make_writable(pskb,(*pskb)->nh.iph->ihl*4+sizeof(*inside))) + if (!skb_ip_make_writable(pskb, hdrlen + sizeof(*inside))) return 0; + inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; /* We're actually going to mangle it beyond trivial checksum @@ -662,93 +422,53 @@ icmp_reply_translation(struct sk_buff **pskb, start talking to each other without our translation, and be confused... --RR */ if (inside->icmp.type == ICMP_REDIRECT) { - /* Don't care about races here. */ - if (info->initialized - != ((1 << IP_NAT_MANIP_SRC) | (1 << IP_NAT_MANIP_DST)) - || info->num_manips != 0) + /* If NAT isn't finished, assume it and drop. */ + if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) + return 0; + + if (ct->status & IPS_NAT_MASK) return 0; } - DEBUGP("icmp_reply_translation: translating error %p hook %u dir %s\n", - *pskb, hooknum, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); - /* Note: May not be from a NAT'd host, but probably safest to - do translation always as if it came from the host itself - (even though a "host unreachable" coming from the host - itself is a bit weird). - - More explanation: some people use NAT for anonymizing. - Also, CERT recommends dropping all packets from private IP - addresses (although ICMP errors from internal links with - such addresses are not too uncommon, as Alan Cox points - out) */ + DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n", + *pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); if (!ip_ct_get_tuple(&inside->ip, *pskb, (*pskb)->nh.iph->ihl*4 + sizeof(struct icmphdr) + inside->ip.ihl*4, - &innertuple, - ip_ct_find_proto(inside->ip.protocol))) + &inner, ip_ct_find_proto(inside->ip.protocol))) return 0; - cttuple = &conntrack->tuplehash[dir].tuple; - READ_LOCK(&ip_nat_lock); - for (i = 0; i < info->num_manips; i++) { - DEBUGP("icmp_reply: manip %u dir %s hook %u\n", - i, info->manips[i].direction == IP_CT_DIR_ORIGINAL ? - "ORIG" : "REPLY", info->manips[i].hooknum); - - if (info->manips[i].direction != dir) - continue; - - /* Mapping the inner packet is just like a normal packet, except - * it was never src/dst reversed, so where we would normally - * apply a dst manip, we apply a src, and vice versa. */ - - /* Only true for forwarded packets, locally generated packets - * never hit PRE_ROUTING, we need to apply their PRE_ROUTING - * manips in LOCAL_OUT. */ - if (hooknum == NF_IP_LOCAL_OUT && - info->manips[i].hooknum == NF_IP_PRE_ROUTING) - hooknum = info->manips[i].hooknum; - - if (info->manips[i].hooknum != hooknum) - continue; - - /* ICMP errors may be generated locally for packets that - * don't have all NAT manips applied yet. Verify manips - * have been applied before reversing them */ - if (info->manips[i].maniptype == IP_NAT_MANIP_SRC) { - if (!tuple_src_equal_dst(cttuple, &innertuple)) - continue; - } else { - if (!tuple_src_equal_dst(&innertuple, cttuple)) - continue; - } + /* Change inner back to look like incoming packet. We do the + opposite manip on this hook to normal, because it might not + pass all hooks (locally-generated ICMP). Consider incoming + packet: PREROUTING (DST manip), routing produces ICMP, goes + through POSTROUTING (which must correct the DST manip). */ + if (!manip_pkt(inside->ip.protocol, pskb, + (*pskb)->nh.iph->ihl*4 + + sizeof(inside->icmp), + &ct->tuplehash[!dir].tuple, + !manip)) + return 0; - DEBUGP("icmp_reply: inner %s -> %u.%u.%u.%u %u\n", - info->manips[i].maniptype == IP_NAT_MANIP_SRC - ? "DST" : "SRC", NIPQUAD(info->manips[i].manip.ip), - ntohs(info->manips[i].manip.u.udp.port)); - if (!manip_pkt(inside->ip.protocol, pskb, - (*pskb)->nh.iph->ihl*4 + sizeof(inside->icmp), - &info->manips[i].manip, - !info->manips[i].maniptype)) - goto unlock_fail; - - /* Outer packet needs to have IP header NATed like - it's a reply. */ - - /* Use mapping to map outer packet: 0 give no - per-proto mapping */ - DEBUGP("icmp_reply: outer %s -> %u.%u.%u.%u\n", - info->manips[i].maniptype == IP_NAT_MANIP_SRC - ? "SRC" : "DST", NIPQUAD(info->manips[i].manip.ip)); - if (!manip_pkt(0, pskb, 0, &info->manips[i].manip, - info->manips[i].maniptype)) - goto unlock_fail; - } - READ_UNLOCK(&ip_nat_lock); + /* Change outer to look the reply to an incoming packet + * (proto 0 means don't invert per-proto part). */ - hdrlen = (*pskb)->nh.iph->ihl * 4; + /* Obviously, we need to NAT destination IP, but source IP + should be NAT'ed only if it is from a NAT'd host. + Explanation: some people use NAT for anonymizing. Also, + CERT recommends dropping all packets from private IP + addresses (although ICMP errors from internal links with + such addresses are not too uncommon, as Alan Cox points + out) */ + if (manip != IP_NAT_MANIP_SRC + || ((*pskb)->nh.iph->saddr == ct->tuplehash[dir].tuple.src.ip)) { + invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); + if (!manip_pkt(0, pskb, 0, &target, manip)) + return 0; + } + + /* Reloading "inside" here since manip_pkt inner. */ inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; inside->icmp.checksum = 0; @@ -756,10 +476,33 @@ icmp_reply_translation(struct sk_buff **pskb, (*pskb)->len - hdrlen, 0)); return 1; +} - unlock_fail: - READ_UNLOCK(&ip_nat_lock); - return 0; +/* Protocol registration. */ +int ip_nat_protocol_register(struct ip_nat_protocol *proto) +{ + int ret = 0; + + WRITE_LOCK(&ip_nat_lock); + if (ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) { + ret = -EBUSY; + goto out; + } + ip_nat_protos[proto->protonum] = proto; + out: + WRITE_UNLOCK(&ip_nat_lock); + return ret; +} + +/* Noone stores the protocol anywhere; simply delete it. */ +void ip_nat_protocol_unregister(struct ip_nat_protocol *proto) +{ + WRITE_LOCK(&ip_nat_lock); + ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol; + WRITE_UNLOCK(&ip_nat_lock); + + /* Someone could be still looking at the proto in a bh. */ + synchronize_net(); } int __init ip_nat_init(void) @@ -790,11 +533,9 @@ int __init ip_nat_init(void) /* FIXME: Man, this is a hack. <SIGH> */ IP_NF_ASSERT(ip_conntrack_destroyed == NULL); ip_conntrack_destroyed = &ip_nat_cleanup_conntrack; - - /* Initialize fake conntrack so that NAT will skip it */ - ip_conntrack_untracked.nat.info.initialized |= - (1 << IP_NAT_MANIP_SRC) | (1 << IP_NAT_MANIP_DST); + /* Initialize fake conntrack so that NAT will skip it */ + ip_conntrack_untracked.status |= IPS_NAT_DONE_MASK; return 0; } diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c index b488b5e1fca2..e4799f2da77a 100644 --- a/net/ipv4/netfilter/ip_nat_ftp.c +++ b/net/ipv4/netfilter/ip_nat_ftp.c @@ -30,71 +30,8 @@ MODULE_DESCRIPTION("ftp NAT helper"); #define DEBUGP(format, args...) #endif -#define MAX_PORTS 8 -static int ports[MAX_PORTS]; -static int ports_c; - -module_param_array(ports, int, &ports_c, 0400); - /* FIXME: Time out? --RR */ -static unsigned int -ftp_nat_expected(struct sk_buff **pskb, - unsigned int hooknum, - struct ip_conntrack *ct, - struct ip_nat_info *info) -{ - struct ip_nat_range range; - u_int32_t newdstip, newsrcip, newip; - struct ip_ct_ftp_expect *exp_ftp_info; - - struct ip_conntrack *master = master_ct(ct); - - IP_NF_ASSERT(info); - IP_NF_ASSERT(master); - - IP_NF_ASSERT(!(info->initialized & (1<<HOOK2MANIP(hooknum)))); - - DEBUGP("nat_expected: We have a connection!\n"); - exp_ftp_info = &ct->master->help.exp_ftp_info; - - if (exp_ftp_info->ftptype == IP_CT_FTP_PORT - || exp_ftp_info->ftptype == IP_CT_FTP_EPRT) { - /* PORT command: make connection go to the client. */ - newdstip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; - newsrcip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; - DEBUGP("nat_expected: PORT cmd. %u.%u.%u.%u->%u.%u.%u.%u\n", - NIPQUAD(newsrcip), NIPQUAD(newdstip)); - } else { - /* PASV command: make the connection go to the server */ - newdstip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; - newsrcip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; - DEBUGP("nat_expected: PASV cmd. %u.%u.%u.%u->%u.%u.%u.%u\n", - NIPQUAD(newsrcip), NIPQUAD(newdstip)); - } - - if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC) - newip = newsrcip; - else - newip = newdstip; - - DEBUGP("nat_expected: IP to %u.%u.%u.%u\n", NIPQUAD(newip)); - - /* We don't want to manip the per-protocol, just the IPs... */ - range.flags = IP_NAT_RANGE_MAP_IPS; - range.min_ip = range.max_ip = newip; - - /* ... unless we're doing a MANIP_DST, in which case, make - sure we map to the correct port */ - if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_DST) { - range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED; - range.min = range.max - = ((union ip_conntrack_manip_proto) - { .tcp = { htons(exp_ftp_info->port) } }); - } - return ip_nat_setup_info(ct, &range, hooknum); -} - static int mangle_rfc959_packet(struct sk_buff **pskb, u_int32_t newip, @@ -102,7 +39,8 @@ mangle_rfc959_packet(struct sk_buff **pskb, unsigned int matchoff, unsigned int matchlen, struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo) + enum ip_conntrack_info ctinfo, + u32 *seq) { char buffer[sizeof("nnn,nnn,nnn,nnn,nnn,nnn")]; @@ -111,6 +49,7 @@ mangle_rfc959_packet(struct sk_buff **pskb, DEBUGP("calling ip_nat_mangle_tcp_packet\n"); + *seq += strlen(buffer) - matchlen; return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); } @@ -123,7 +62,8 @@ mangle_eprt_packet(struct sk_buff **pskb, unsigned int matchoff, unsigned int matchlen, struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo) + enum ip_conntrack_info ctinfo, + u32 *seq) { char buffer[sizeof("|1|255.255.255.255|65535|")]; @@ -131,6 +71,7 @@ mangle_eprt_packet(struct sk_buff **pskb, DEBUGP("calling ip_nat_mangle_tcp_packet\n"); + *seq += strlen(buffer) - matchlen; return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); } @@ -143,7 +84,8 @@ mangle_epsv_packet(struct sk_buff **pskb, unsigned int matchoff, unsigned int matchlen, struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo) + enum ip_conntrack_info ctinfo, + u32 *seq) { char buffer[sizeof("|||65535|")]; @@ -151,6 +93,7 @@ mangle_epsv_packet(struct sk_buff **pskb, DEBUGP("calling ip_nat_mangle_tcp_packet\n"); + *seq += strlen(buffer) - matchlen; return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, matchlen, buffer, strlen(buffer)); } @@ -159,181 +102,73 @@ static int (*mangle[])(struct sk_buff **, u_int32_t, u_int16_t, unsigned int, unsigned int, struct ip_conntrack *, - enum ip_conntrack_info) + enum ip_conntrack_info, + u32 *seq) = { [IP_CT_FTP_PORT] = mangle_rfc959_packet, [IP_CT_FTP_PASV] = mangle_rfc959_packet, [IP_CT_FTP_EPRT] = mangle_eprt_packet, [IP_CT_FTP_EPSV] = mangle_epsv_packet }; -static int ftp_data_fixup(const struct ip_ct_ftp_expect *exp_ftp_info, - struct ip_conntrack *ct, - struct sk_buff **pskb, - u32 tcp_seq, - enum ip_conntrack_info ctinfo, - struct ip_conntrack_expect *expect) +/* So, this packet has hit the connection tracking matching code. + Mangle it, and change the expectation to match the new version. */ +static unsigned int ip_nat_ftp(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + enum ip_ct_ftp_type type, + unsigned int matchoff, + unsigned int matchlen, + struct ip_conntrack_expect *exp, + u32 *seq) { u_int32_t newip; u_int16_t port; - struct ip_conntrack_tuple newtuple; + int dir = CTINFO2DIR(ctinfo); + struct ip_conntrack *ct = exp->master; - DEBUGP("FTP_NAT: seq %u + %u in %u\n", - expect->seq, exp_ftp_info->len, tcp_seq); + DEBUGP("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen); - /* Change address inside packet to match way we're mapping - this connection. */ - if (exp_ftp_info->ftptype == IP_CT_FTP_PASV - || exp_ftp_info->ftptype == IP_CT_FTP_EPSV) { - /* PASV/EPSV response: must be where client thinks server - is */ - newip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; - /* Expect something from client->server */ - newtuple.src.ip = - ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; - newtuple.dst.ip = - ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; - } else { - /* PORT command: must be where server thinks client is */ - newip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; - /* Expect something from server->client */ - newtuple.src.ip = - ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; - newtuple.dst.ip = - ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; - } - newtuple.dst.protonum = IPPROTO_TCP; - newtuple.src.u.tcp.port = expect->tuple.src.u.tcp.port; + /* Connection will come from wherever this packet goes, hence !dir */ + newip = ct->tuplehash[!dir].tuple.dst.ip; + exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; + exp->dir = !dir; - /* Try to get same port: if not, try to change it. */ - for (port = exp_ftp_info->port; port != 0; port++) { - newtuple.dst.u.tcp.port = htons(port); + /* When you see the packet, we need to NAT it the same as the + * this one. */ + exp->expectfn = ip_nat_follow_master; - if (ip_conntrack_change_expect(expect, &newtuple) == 0) + /* Try to get same port: if not, try to change it. */ + for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { + exp->tuple.dst.u.tcp.port = htons(port); + if (ip_conntrack_expect_related(exp) == 0) break; } - if (port == 0) - return 0; - - if (!mangle[exp_ftp_info->ftptype](pskb, newip, port, - expect->seq - tcp_seq, - exp_ftp_info->len, ct, ctinfo)) - return 0; - return 1; -} - -static unsigned int help(struct ip_conntrack *ct, - struct ip_conntrack_expect *exp, - struct ip_nat_info *info, - enum ip_conntrack_info ctinfo, - unsigned int hooknum, - struct sk_buff **pskb) -{ - struct iphdr *iph = (*pskb)->nh.iph; - struct tcphdr _tcph, *tcph; - unsigned int datalen; - int dir; - struct ip_ct_ftp_expect *exp_ftp_info; - - if (!exp) - DEBUGP("ip_nat_ftp: no exp!!"); - - exp_ftp_info = &exp->help.exp_ftp_info; - - /* Only mangle things once: original direction in POST_ROUTING - and reply direction on PRE_ROUTING. */ - dir = CTINFO2DIR(ctinfo); - if (!((hooknum == NF_IP_POST_ROUTING && dir == IP_CT_DIR_ORIGINAL) - || (hooknum == NF_IP_PRE_ROUTING && dir == IP_CT_DIR_REPLY))) { - DEBUGP("nat_ftp: Not touching dir %s at hook %s\n", - dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY", - hooknum == NF_IP_POST_ROUTING ? "POSTROUTING" - : hooknum == NF_IP_PRE_ROUTING ? "PREROUTING" - : hooknum == NF_IP_LOCAL_OUT ? "OUTPUT" : "???"); - return NF_ACCEPT; + if (port == 0) { + ip_conntrack_expect_free(exp); + return NF_DROP; } - /* We passed tcp tracking, plus ftp helper: this must succeed. */ - tcph = skb_header_pointer(*pskb, iph->ihl * 4, sizeof(_tcph), &_tcph); - BUG_ON(!tcph); - - datalen = (*pskb)->len - iph->ihl * 4 - tcph->doff * 4; - /* If it's in the right range... */ - if (between(exp->seq + exp_ftp_info->len, - ntohl(tcph->seq), - ntohl(tcph->seq) + datalen)) { - if (!ftp_data_fixup(exp_ftp_info, ct, pskb, ntohl(tcph->seq), - ctinfo, exp)) - return NF_DROP; - } else { - /* Half a match? This means a partial retransmisison. - It's a cracker being funky. */ - if (net_ratelimit()) { - printk("FTP_NAT: partial packet %u/%u in %u/%u\n", - exp->seq, exp_ftp_info->len, - ntohl(tcph->seq), - ntohl(tcph->seq) + datalen); - } + if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo, + seq)) { + ip_conntrack_unexpect_related(exp); return NF_DROP; } return NF_ACCEPT; } -static struct ip_nat_helper ftp[MAX_PORTS]; -static char ftp_names[MAX_PORTS][10]; - -/* Not __exit: called from init() */ -static void fini(void) +static void __exit fini(void) { - int i; - - for (i = 0; i < ports_c; i++) { - DEBUGP("ip_nat_ftp: unregistering port %d\n", ports[i]); - ip_nat_helper_unregister(&ftp[i]); - } + ip_nat_ftp_hook = NULL; + /* Make sure noone calls it, meanwhile. */ + synchronize_net(); } static int __init init(void) { - int i, ret = 0; - char *tmpname; - - if (ports_c == 0) - ports[ports_c++] = FTP_PORT; - - for (i = 0; i < ports_c; i++) { - ftp[i].tuple.dst.protonum = IPPROTO_TCP; - ftp[i].tuple.src.u.tcp.port = htons(ports[i]); - ftp[i].mask.dst.protonum = 0xFFFF; - ftp[i].mask.src.u.tcp.port = 0xFFFF; - ftp[i].help = help; - ftp[i].me = THIS_MODULE; - ftp[i].flags = 0; - ftp[i].expect = ftp_nat_expected; - - tmpname = &ftp_names[i][0]; - if (ports[i] == FTP_PORT) - sprintf(tmpname, "ftp"); - else - sprintf(tmpname, "ftp-%d", i); - ftp[i].name = tmpname; - - DEBUGP("ip_nat_ftp: Trying to register for port %d\n", - ports[i]); - ret = ip_nat_helper_register(&ftp[i]); - - if (ret) { - printk("ip_nat_ftp: error registering " - "helper for port %d\n", ports[i]); - fini(); - return ret; - } - } - - return ret; + BUG_ON(ip_nat_ftp_hook); + ip_nat_ftp_hook = ip_nat_ftp; + return 0; } -NEEDS_CONNTRACK(ftp); - module_init(init); module_exit(fini); diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c index 69b759fe99c7..1637b96d8c01 100644 --- a/net/ipv4/netfilter/ip_nat_helper.c +++ b/net/ipv4/netfilter/ip_nat_helper.c @@ -47,8 +47,7 @@ #define DUMP_OFFSET(x) #endif -static LIST_HEAD(helpers); -DECLARE_LOCK(ip_nat_seqofs_lock); +static DECLARE_LOCK(ip_nat_seqofs_lock); /* Setup TCP sequence correction given this change at this sequence */ static inline void @@ -193,9 +192,14 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb, tcph->check = tcp_v4_check(tcph, datalen, iph->saddr, iph->daddr, csum_partial((char *)tcph, datalen, 0)); - adjust_tcp_sequence(ntohl(tcph->seq), - (int)rep_len - (int)match_len, - ct, ctinfo); + if (rep_len != match_len) { + set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); + adjust_tcp_sequence(ntohl(tcph->seq), + (int)rep_len - (int)match_len, + ct, ctinfo); + /* Tell TCP window tracking about seq change */ + ip_conntrack_tcp_update(*pskb, ct, CTINFO2DIR(ctinfo)); + } return 1; } @@ -362,11 +366,6 @@ ip_nat_seq_adjust(struct sk_buff **pskb, this_way = &ct->nat.info.seq[dir]; other_way = &ct->nat.info.seq[!dir]; - /* No adjustments to make? Very common case. */ - if (!this_way->offset_before && !this_way->offset_after - && !other_way->offset_before && !other_way->offset_after) - return 1; - if (!skb_ip_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) return 0; @@ -404,75 +403,28 @@ ip_nat_seq_adjust(struct sk_buff **pskb, return 1; } -static inline int -helper_cmp(const struct ip_nat_helper *helper, - const struct ip_conntrack_tuple *tuple) +/* Setup NAT on this expected conntrack so it follows master. */ +/* If we fail to get a free NAT slot, we'll get dropped on confirm */ +void ip_nat_follow_master(struct ip_conntrack *ct, + struct ip_conntrack_expect *exp) { - return ip_ct_tuple_mask_cmp(tuple, &helper->tuple, &helper->mask); -} - -int ip_nat_helper_register(struct ip_nat_helper *me) -{ - int ret = 0; - - WRITE_LOCK(&ip_nat_lock); - if (LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *,&me->tuple)) - ret = -EBUSY; - else - list_prepend(&helpers, me); - WRITE_UNLOCK(&ip_nat_lock); - - return ret; -} - -struct ip_nat_helper * -__ip_nat_find_helper(const struct ip_conntrack_tuple *tuple) -{ - return LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *, tuple); -} - -struct ip_nat_helper * -ip_nat_find_helper(const struct ip_conntrack_tuple *tuple) -{ - struct ip_nat_helper *h; - - READ_LOCK(&ip_nat_lock); - h = __ip_nat_find_helper(tuple); - READ_UNLOCK(&ip_nat_lock); - - return h; -} - -static int -kill_helper(struct ip_conntrack *i, void *helper) -{ - int ret; - - READ_LOCK(&ip_nat_lock); - ret = (i->nat.info.helper == helper); - READ_UNLOCK(&ip_nat_lock); - - return ret; -} - -void ip_nat_helper_unregister(struct ip_nat_helper *me) -{ - WRITE_LOCK(&ip_nat_lock); - /* Autoloading conntrack helper might have failed */ - if (LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *,&me->tuple)) { - LIST_DELETE(&helpers, me); - } - WRITE_UNLOCK(&ip_nat_lock); - - /* Someone could be still looking at the helper in a bh. */ - synchronize_net(); - - /* Find anything using it, and umm, kill them. We can't turn - them into normal connections: if we've adjusted SYNs, then - they'll ackstorm. So we just drop it. We used to just - bump module count when a connection existed, but that - forces admins to gen fake RSTs or bounce box, either of - which is just a long-winded way of making things - worse. --RR */ - ip_ct_iterate_cleanup(kill_helper, me); + struct ip_nat_range range; + + /* This must be a fresh one. */ + BUG_ON(ct->status & IPS_NAT_DONE_MASK); + + /* Change src to where master sends to */ + range.flags = IP_NAT_RANGE_MAP_IPS; + range.min_ip = range.max_ip + = ct->master->tuplehash[!exp->dir].tuple.dst.ip; + /* hook doesn't matter, but it has to do source manip */ + ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING); + + /* For DST manip, map port here to where it's expected. */ + range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); + range.min = range.max = exp->saved_proto; + range.min_ip = range.max_ip + = ct->master->tuplehash[!exp->dir].tuple.src.ip; + /* hook doesn't matter, but it has to do destination manip */ + ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING); } diff --git a/net/ipv4/netfilter/ip_nat_irc.c b/net/ipv4/netfilter/ip_nat_irc.c index fa884374b5dc..7c2c3762888e 100644 --- a/net/ipv4/netfilter/ip_nat_irc.c +++ b/net/ipv4/netfilter/ip_nat_irc.c @@ -1,5 +1,6 @@ /* IRC extension for TCP NAT alteration. * (C) 2000-2001 by Harald Welte <laforge@gnumonks.org> + * (C) 2004 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation * based on a copy of RR's ip_nat_ftp.c * * ip_nat_irc.c,v 1.16 2001/12/06 07:42:10 laforge Exp @@ -8,12 +9,6 @@ * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. - * - * Module load syntax: - * insmod ip_nat_irc.o ports=port1,port2,...port<MAX_PORTS> - * - * please give the ports of all IRC servers You wish to connect to. - * If You don't specify ports, the default will be port 6667 */ #include <linux/module.h> @@ -35,66 +30,18 @@ #define DEBUGP(format, args...) #endif -#define MAX_PORTS 8 -static int ports[MAX_PORTS]; -static int ports_c; - MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); MODULE_DESCRIPTION("IRC (DCC) NAT helper"); MODULE_LICENSE("GPL"); -module_param_array(ports, int, &ports_c, 0400); -MODULE_PARM_DESC(ports, "port numbers of IRC servers"); - -/* FIXME: Time out? --RR */ - -static unsigned int -irc_nat_expected(struct sk_buff **pskb, - unsigned int hooknum, - struct ip_conntrack *ct, - struct ip_nat_info *info) -{ - struct ip_nat_range range; - u_int32_t newdstip, newsrcip, newip; - - struct ip_conntrack *master = master_ct(ct); - - IP_NF_ASSERT(info); - IP_NF_ASSERT(master); - - IP_NF_ASSERT(!(info->initialized & (1 << HOOK2MANIP(hooknum)))); - - DEBUGP("nat_expected: We have a connection!\n"); - - newdstip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; - newsrcip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; - DEBUGP("nat_expected: DCC cmd. %u.%u.%u.%u->%u.%u.%u.%u\n", - NIPQUAD(newsrcip), NIPQUAD(newdstip)); - if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC) - newip = newsrcip; - else - newip = newdstip; - - DEBUGP("nat_expected: IP to %u.%u.%u.%u\n", NIPQUAD(newip)); - - /* We don't want to manip the per-protocol, just the IPs. */ - range.flags = IP_NAT_RANGE_MAP_IPS; - range.min_ip = range.max_ip = newip; - - return ip_nat_setup_info(ct, &range, hooknum); -} - -static int irc_data_fixup(const struct ip_ct_irc_expect *exp_irc_info, - struct ip_conntrack *ct, - struct sk_buff **pskb, - enum ip_conntrack_info ctinfo, - struct ip_conntrack_expect *expect) +static unsigned int help(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + unsigned int matchoff, + unsigned int matchlen, + struct ip_conntrack_expect *exp) { - u_int32_t newip; - struct ip_conntrack_tuple t; - struct iphdr *iph = (*pskb)->nh.iph; - struct tcphdr *tcph = (void *) iph + iph->ihl * 4; u_int16_t port; + unsigned int ret; /* "4294967296 65635 " */ char buffer[18]; @@ -103,21 +50,25 @@ static int irc_data_fixup(const struct ip_ct_irc_expect *exp_irc_info, expect->seq, exp_irc_info->len, ntohl(tcph->seq)); - newip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; + /* Reply comes from server. */ + exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; + exp->dir = IP_CT_DIR_REPLY; + + /* When you see the packet, we need to NAT it the same as the + * this one. */ + exp->expectfn = ip_nat_follow_master; - /* Alter conntrack's expectations. */ - t = expect->tuple; - t.dst.ip = newip; - for (port = exp_irc_info->port; port != 0; port++) { - t.dst.u.tcp.port = htons(port); - if (ip_conntrack_change_expect(expect, &t) == 0) { - DEBUGP("using port %d", port); + /* Try to get same port: if not, try to change it. */ + for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { + exp->tuple.dst.u.tcp.port = htons(port); + if (ip_conntrack_expect_related(exp) == 0) break; - } + } + if (port == 0) { + ip_conntrack_expect_free(exp); + return NF_DROP; } - if (port == 0) - return 0; /* strlen("\1DCC CHAT chat AAAAAAAA P\1\n")=27 * strlen("\1DCC SCHAT chat AAAAAAAA P\1\n")=28 @@ -132,131 +83,31 @@ static int irc_data_fixup(const struct ip_ct_irc_expect *exp_irc_info, * 0x01, \n: terminators */ - sprintf(buffer, "%u %u", ntohl(newip), port); + sprintf(buffer, "%u %u", ntohl(exp->tuple.src.ip), port); DEBUGP("ip_nat_irc: Inserting '%s' == %u.%u.%u.%u, port %u\n", - buffer, NIPQUAD(newip), port); - - return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, - expect->seq - ntohl(tcph->seq), - exp_irc_info->len, buffer, - strlen(buffer)); -} - -static unsigned int help(struct ip_conntrack *ct, - struct ip_conntrack_expect *exp, - struct ip_nat_info *info, - enum ip_conntrack_info ctinfo, - unsigned int hooknum, - struct sk_buff **pskb) -{ - struct iphdr *iph = (*pskb)->nh.iph; - struct tcphdr *tcph = (void *) iph + iph->ihl * 4; - unsigned int datalen; - int dir; - struct ip_ct_irc_expect *exp_irc_info; - - if (!exp) - DEBUGP("ip_nat_irc: no exp!!"); - - exp_irc_info = &exp->help.exp_irc_info; + buffer, NIPQUAD(exp->tuple.src.ip), port); - /* Only mangle things once: original direction in POST_ROUTING - and reply direction on PRE_ROUTING. */ - dir = CTINFO2DIR(ctinfo); - if (!((hooknum == NF_IP_POST_ROUTING && dir == IP_CT_DIR_ORIGINAL) - || (hooknum == NF_IP_PRE_ROUTING && dir == IP_CT_DIR_REPLY))) { - DEBUGP("nat_irc: Not touching dir %s at hook %s\n", - dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY", - hooknum == NF_IP_POST_ROUTING ? "POSTROUTING" - : hooknum == NF_IP_PRE_ROUTING ? "PREROUTING" - : hooknum == NF_IP_LOCAL_OUT ? "OUTPUT" : "???"); - return NF_ACCEPT; - } - DEBUGP("got beyond not touching\n"); - - datalen = (*pskb)->len - iph->ihl * 4 - tcph->doff * 4; - /* Check whether the whole IP/address pattern is carried in the payload */ - if (between(exp->seq + exp_irc_info->len, - ntohl(tcph->seq), - ntohl(tcph->seq) + datalen)) { - if (!irc_data_fixup(exp_irc_info, ct, pskb, ctinfo, exp)) - return NF_DROP; - } else { - /* Half a match? This means a partial retransmisison. - It's a cracker being funky. */ - if (net_ratelimit()) { - printk - ("IRC_NAT: partial packet %u/%u in %u/%u\n", - exp->seq, exp_irc_info->len, - ntohl(tcph->seq), - ntohl(tcph->seq) + datalen); - } - return NF_DROP; - } - return NF_ACCEPT; + ret = ip_nat_mangle_tcp_packet(pskb, exp->master, ctinfo, + matchoff, matchlen, buffer, + strlen(buffer)); + if (ret != NF_ACCEPT) + ip_conntrack_unexpect_related(exp); + return ret; } -static struct ip_nat_helper ip_nat_irc_helpers[MAX_PORTS]; -static char irc_names[MAX_PORTS][10]; - -/* This function is intentionally _NOT_ defined as __exit, because - * it is needed by init() */ -static void fini(void) +static void __exit fini(void) { - int i; - - for (i = 0; i < ports_c; i++) { - DEBUGP("ip_nat_irc: unregistering helper for port %d\n", - ports[i]); - ip_nat_helper_unregister(&ip_nat_irc_helpers[i]); - } + ip_nat_irc_hook = NULL; + /* Make sure noone calls it, meanwhile. */ + synchronize_net(); } static int __init init(void) { - int ret = 0; - int i; - struct ip_nat_helper *hlpr; - char *tmpname; - - if (ports_c == 0) - ports[ports_c++] = IRC_PORT; - - for (i = 0; i < ports_c; i++) { - hlpr = &ip_nat_irc_helpers[i]; - hlpr->tuple.dst.protonum = IPPROTO_TCP; - hlpr->tuple.src.u.tcp.port = htons(ports[i]); - hlpr->mask.src.u.tcp.port = 0xFFFF; - hlpr->mask.dst.protonum = 0xFFFF; - hlpr->help = help; - hlpr->flags = 0; - hlpr->me = THIS_MODULE; - hlpr->expect = irc_nat_expected; - - tmpname = &irc_names[i][0]; - if (ports[i] == IRC_PORT) - sprintf(tmpname, "irc"); - else - sprintf(tmpname, "irc-%d", i); - hlpr->name = tmpname; - - DEBUGP - ("ip_nat_irc: Trying to register helper for port %d: name %s\n", - ports[i], hlpr->name); - ret = ip_nat_helper_register(hlpr); - - if (ret) { - printk - ("ip_nat_irc: error registering helper for port %d\n", - ports[i]); - fini(); - return 1; - } - } - return ret; + BUG_ON(ip_nat_irc_hook); + ip_nat_irc_hook = help; + return 0; } -NEEDS_CONNTRACK(irc); - module_init(init); module_exit(fini); diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c index 7cbe08819b0e..a558cf0eee8a 100644 --- a/net/ipv4/netfilter/ip_nat_proto_icmp.c +++ b/net/ipv4/netfilter/ip_nat_proto_icmp.c @@ -54,7 +54,7 @@ icmp_unique_tuple(struct ip_conntrack_tuple *tuple, static int icmp_manip_pkt(struct sk_buff **pskb, unsigned int iphdroff, - const struct ip_conntrack_manip *manip, + const struct ip_conntrack_tuple *tuple, enum ip_nat_manip_type maniptype) { struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); @@ -64,12 +64,12 @@ icmp_manip_pkt(struct sk_buff **pskb, if (!skb_ip_make_writable(pskb, hdroff + sizeof(*hdr))) return 0; - hdr = (void *)(*pskb)->data + hdroff; + hdr = (struct icmphdr *)((*pskb)->data + hdroff); hdr->checksum = ip_nat_cheat_check(hdr->un.echo.id ^ 0xFFFF, - manip->u.icmp.id, + tuple->src.u.icmp.id, hdr->checksum); - hdr->un.echo.id = manip->u.icmp.id; + hdr->un.echo.id = tuple->src.u.icmp.id; return 1; } diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c index fb21a0875fa4..694838c0acd0 100644 --- a/net/ipv4/netfilter/ip_nat_proto_tcp.c +++ b/net/ipv4/netfilter/ip_nat_proto_tcp.c @@ -85,14 +85,14 @@ tcp_unique_tuple(struct ip_conntrack_tuple *tuple, static int tcp_manip_pkt(struct sk_buff **pskb, unsigned int iphdroff, - const struct ip_conntrack_manip *manip, + const struct ip_conntrack_tuple *tuple, enum ip_nat_manip_type maniptype) { struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); struct tcphdr *hdr; unsigned int hdroff = iphdroff + iph->ihl*4; - u32 oldip, oldsrc = iph->saddr, olddst = iph->daddr; - u16 *portptr, oldport; + u32 oldip, newip; + u16 *portptr, newport, oldport; int hdrsize = 8; /* TCP connection tracking guarantees this much */ /* this could be a inner header returned in icmp packet; in such @@ -104,27 +104,32 @@ tcp_manip_pkt(struct sk_buff **pskb, if (!skb_ip_make_writable(pskb, hdroff + hdrsize)) return 0; - hdr = (void *)(*pskb)->data + hdroff; + iph = (struct iphdr *)((*pskb)->data + iphdroff); + hdr = (struct tcphdr *)((*pskb)->data + iph->ihl*4); if (maniptype == IP_NAT_MANIP_SRC) { /* Get rid of src ip and src pt */ - oldip = oldsrc; + oldip = iph->saddr; + newip = tuple->src.ip; + newport = tuple->src.u.tcp.port; portptr = &hdr->source; } else { /* Get rid of dst ip and dst pt */ - oldip = olddst; + oldip = iph->daddr; + newip = tuple->dst.ip; + newport = tuple->dst.u.tcp.port; portptr = &hdr->dest; } oldport = *portptr; - *portptr = manip->u.tcp.port; + *portptr = newport; if (hdrsize < sizeof(*hdr)) return 1; - hdr->check = ip_nat_cheat_check(~oldip, manip->ip, + hdr->check = ip_nat_cheat_check(~oldip, newip, ip_nat_cheat_check(oldport ^ 0xFFFF, - manip->u.tcp.port, + newport, hdr->check)); return 1; } diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c index 3c492530863c..c669e3b5f5d0 100644 --- a/net/ipv4/netfilter/ip_nat_proto_udp.c +++ b/net/ipv4/netfilter/ip_nat_proto_udp.c @@ -84,34 +84,40 @@ udp_unique_tuple(struct ip_conntrack_tuple *tuple, static int udp_manip_pkt(struct sk_buff **pskb, unsigned int iphdroff, - const struct ip_conntrack_manip *manip, + const struct ip_conntrack_tuple *tuple, enum ip_nat_manip_type maniptype) { struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); struct udphdr *hdr; unsigned int hdroff = iphdroff + iph->ihl*4; - u32 oldip, oldsrc = iph->saddr, olddst = iph->daddr; - u16 *portptr; + u32 oldip, newip; + u16 *portptr, newport; if (!skb_ip_make_writable(pskb, hdroff + sizeof(*hdr))) return 0; - hdr = (void *)(*pskb)->data + hdroff; + iph = (struct iphdr *)((*pskb)->data + iphdroff); + hdr = (struct udphdr *)((*pskb)->data + hdroff); + if (maniptype == IP_NAT_MANIP_SRC) { /* Get rid of src ip and src pt */ - oldip = oldsrc; + oldip = iph->saddr; + newip = tuple->src.ip; + newport = tuple->src.u.udp.port; portptr = &hdr->source; } else { /* Get rid of dst ip and dst pt */ - oldip = olddst; + oldip = iph->daddr; + newip = tuple->dst.ip; + newport = tuple->dst.u.udp.port; portptr = &hdr->dest; } if (hdr->check) /* 0 is a special case meaning no checksum */ - hdr->check = ip_nat_cheat_check(~oldip, manip->ip, + hdr->check = ip_nat_cheat_check(~oldip, newip, ip_nat_cheat_check(*portptr ^ 0xFFFF, - manip->u.udp.port, + newport, hdr->check)); - *portptr = manip->u.udp.port; + *portptr = newport; return 1; } diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c index 8f2e7ddbbdc8..f5525bd58d16 100644 --- a/net/ipv4/netfilter/ip_nat_proto_unknown.c +++ b/net/ipv4/netfilter/ip_nat_proto_unknown.c @@ -40,7 +40,7 @@ static int unknown_unique_tuple(struct ip_conntrack_tuple *tuple, static int unknown_manip_pkt(struct sk_buff **pskb, unsigned int iphdroff, - const struct ip_conntrack_manip *manip, + const struct ip_conntrack_tuple *tuple, enum ip_nat_manip_type maniptype) { return 1; diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c index 80773588d8ad..4c204714a3a4 100644 --- a/net/ipv4/netfilter/ip_nat_rule.c +++ b/net/ipv4/netfilter/ip_nat_rule.c @@ -16,6 +16,7 @@ #include <linux/skbuff.h> #include <linux/proc_fs.h> #include <net/checksum.h> +#include <net/route.h> #include <linux/bitops.h> #define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock) @@ -120,6 +121,25 @@ static unsigned int ipt_snat_target(struct sk_buff **pskb, return ip_nat_setup_info(ct, &mr->range[0], hooknum); } +/* Before 2.6.11 we did implicit source NAT if required. Warn about change. */ +static void warn_if_extra_mangle(u32 dstip, u32 srcip) +{ + static int warned = 0; + struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } }; + struct rtable *rt; + + if (ip_route_output_key(&rt, &fl) != 0) + return; + + if (rt->rt_src != srcip && !warned) { + printk("NAT: no longer support implicit source local NAT\n"); + printk("NAT: packet src %u.%u.%u.%u -> dst %u.%u.%u.%u\n", + NIPQUAD(srcip), NIPQUAD(dstip)); + warned = 1; + } + ip_rt_put(rt); +} + static unsigned int ipt_dnat_target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, @@ -139,6 +159,11 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb, /* Connection must be valid and new. */ IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); + if (hooknum == NF_IP_LOCAL_OUT + && mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) + warn_if_extra_mangle((*pskb)->nh.iph->daddr, + mr->range[0].min_ip); + return ip_nat_setup_info(ct, &mr->range[0], hooknum); } @@ -242,7 +267,7 @@ int ip_nat_rule_find(struct sk_buff **pskb, ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL); if (ret == NF_ACCEPT) { - if (!(info->initialized & (1 << HOOK2MANIP(hooknum)))) + if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum))) /* NUL mapping */ ret = alloc_null_binding(ct, info, hooknum); } diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c index b18e79332169..2a48b6e635ae 100644 --- a/net/ipv4/netfilter/ip_nat_snmp_basic.c +++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c @@ -50,6 +50,7 @@ #include <linux/moduleparam.h> #include <linux/netfilter_ipv4.h> #include <linux/netfilter_ipv4/ip_nat.h> +#include <linux/netfilter_ipv4/ip_conntrack_helper.h> #include <linux/netfilter_ipv4/ip_nat_helper.h> #include <linux/ip.h> #include <net/checksum.h> @@ -1203,9 +1204,7 @@ static int snmp_parse_mangle(unsigned char *msg, * SNMP translation routine. */ static int snmp_translate(struct ip_conntrack *ct, - struct ip_nat_info *info, enum ip_conntrack_info ctinfo, - unsigned int hooknum, struct sk_buff **pskb) { struct iphdr *iph = (*pskb)->nh.iph; @@ -1234,101 +1233,86 @@ static int snmp_translate(struct ip_conntrack *ct, if (!snmp_parse_mangle((unsigned char *)udph + sizeof(struct udphdr), paylen, &map, &udph->check)) { - printk(KERN_WARNING "bsalg: parser failed\n"); + if (net_ratelimit()) + printk(KERN_WARNING "bsalg: parser failed\n"); return NF_DROP; } return NF_ACCEPT; } -/* - * NAT helper function, packets arrive here from NAT code. - */ -static unsigned int nat_help(struct ip_conntrack *ct, - struct ip_conntrack_expect *exp, - struct ip_nat_info *info, - enum ip_conntrack_info ctinfo, - unsigned int hooknum, - struct sk_buff **pskb) +/* We don't actually set up expectations, just adjust internal IP + * addresses if this is being NATted */ +static int help(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo) { int dir = CTINFO2DIR(ctinfo); + unsigned int ret; struct iphdr *iph = (*pskb)->nh.iph; struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl); - - if (!skb_ip_make_writable(pskb, (*pskb)->len)) - return NF_DROP; - spin_lock_bh(&snmp_lock); - - /* - * Translate snmp replies on pre-routing (DNAT) and snmp traps - * on post routing (SNAT). - */ - if (!((dir == IP_CT_DIR_REPLY && hooknum == NF_IP_PRE_ROUTING && - udph->source == ntohs(SNMP_PORT)) || - (dir == IP_CT_DIR_ORIGINAL && hooknum == NF_IP_POST_ROUTING && - udph->dest == ntohs(SNMP_TRAP_PORT)))) { - spin_unlock_bh(&snmp_lock); + /* SNMP replies and originating SNMP traps get mangled */ + if (udph->source == ntohs(SNMP_PORT) && dir != IP_CT_DIR_REPLY) + return NF_ACCEPT; + if (udph->dest == ntohs(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL) + return NF_ACCEPT; + + /* No NAT? */ + if (!(ct->status & IPS_NAT_MASK)) return NF_ACCEPT; - } - if (debug > 1) { - printk(KERN_DEBUG "bsalg: dir=%s hook=%d manip=%s len=%d " - "src=%u.%u.%u.%u:%u dst=%u.%u.%u.%u:%u " - "osrc=%u.%u.%u.%u odst=%u.%u.%u.%u " - "rsrc=%u.%u.%u.%u rdst=%u.%u.%u.%u " - "\n", - dir == IP_CT_DIR_REPLY ? "reply" : "orig", hooknum, - HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC ? "snat" : - "dnat", (*pskb)->len, - NIPQUAD(iph->saddr), ntohs(udph->source), - NIPQUAD(iph->daddr), ntohs(udph->dest), - NIPQUAD(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip), - NIPQUAD(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip), - NIPQUAD(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip), - NIPQUAD(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip)); - } - /* * Make sure the packet length is ok. So far, we were only guaranteed * to have a valid length IP header plus 8 bytes, which means we have * enough room for a UDP header. Just verify the UDP length field so we * can mess around with the payload. */ - if (ntohs(udph->len) == (*pskb)->len - (iph->ihl << 2)) { - int ret = snmp_translate(ct, info, ctinfo, hooknum, pskb); - spin_unlock_bh(&snmp_lock); - return ret; + if (ntohs(udph->len) != (*pskb)->len - (iph->ihl << 2)) { + if (net_ratelimit()) + printk(KERN_WARNING "SNMP: dropping malformed packet " + "src=%u.%u.%u.%u dst=%u.%u.%u.%u\n", + NIPQUAD(iph->saddr), NIPQUAD(iph->daddr)); + return NF_DROP; } - - if (net_ratelimit()) - printk(KERN_WARNING "bsalg: dropping malformed packet " - "src=%u.%u.%u.%u dst=%u.%u.%u.%u\n", - NIPQUAD(iph->saddr), NIPQUAD(iph->daddr)); + + if (!skb_ip_make_writable(pskb, (*pskb)->len)) + return NF_DROP; + + spin_lock_bh(&snmp_lock); + ret = snmp_translate(ct, ctinfo, pskb); spin_unlock_bh(&snmp_lock); - return NF_DROP; + return ret; } -static struct ip_nat_helper snmp = { - { NULL, NULL }, - "snmp", - 0, - THIS_MODULE, - { { 0, { .udp = { __constant_htons(SNMP_PORT) } } }, - { 0, { 0 }, IPPROTO_UDP } }, - { { 0, { .udp = { 0xFFFF } } }, - { 0, { 0 }, 0xFFFF } }, - nat_help, NULL }; - -static struct ip_nat_helper snmp_trap = { - { NULL, NULL }, - "snmp_trap", - 0, - THIS_MODULE, - { { 0, { .udp = { __constant_htons(SNMP_TRAP_PORT) } } }, - { 0, { 0 }, IPPROTO_UDP } }, - { { 0, { .udp = { 0xFFFF } } }, - { 0, { 0 }, 0xFFFF } }, - nat_help, NULL }; +static struct ip_conntrack_helper snmp_helper = { + .max_expected = 0, + .timeout = 180, + .me = THIS_MODULE, + .help = help, + .name = "snmp", + + .tuple = { .src = { .u = { __constant_htons(SNMP_PORT) } }, + .dst = { .protonum = IPPROTO_UDP }, + }, + .mask = { .src = { .u = { 0xFFFF } }, + .dst = { .protonum = 0xFF }, + }, +}; + +static struct ip_conntrack_helper snmp_trap_helper = { + .max_expected = 0, + .timeout = 180, + .me = THIS_MODULE, + .help = help, + .name = "snmp_trap", + + .tuple = { .src = { .u = { __constant_htons(SNMP_TRAP_PORT) } }, + .dst = { .protonum = IPPROTO_UDP }, + }, + .mask = { .src = { .u = { 0xFFFF } }, + .dst = { .protonum = 0xFF }, + }, +}; /***************************************************************************** * @@ -1340,12 +1324,12 @@ static int __init init(void) { int ret = 0; - ret = ip_nat_helper_register(&snmp); + ret = ip_conntrack_helper_register(&snmp_helper); if (ret < 0) return ret; - ret = ip_nat_helper_register(&snmp_trap); + ret = ip_conntrack_helper_register(&snmp_trap_helper); if (ret < 0) { - ip_nat_helper_unregister(&snmp); + ip_conntrack_helper_unregister(&snmp_helper); return ret; } return ret; @@ -1353,9 +1337,8 @@ static int __init init(void) static void __exit fini(void) { - ip_nat_helper_unregister(&snmp); - ip_nat_helper_unregister(&snmp_trap); - synchronize_net(); + ip_conntrack_helper_unregister(&snmp_helper); + ip_conntrack_helper_unregister(&snmp_trap_helper); } module_init(init); diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c index 0ef8efffb91b..0efc4c8292d0 100644 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ b/net/ipv4/netfilter/ip_nat_standalone.c @@ -55,15 +55,6 @@ : ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN" \ : "*ERROR*"))) -static inline int call_expect(struct ip_conntrack *master, - struct sk_buff **pskb, - unsigned int hooknum, - struct ip_conntrack *ct, - struct ip_nat_info *info) -{ - return master->nat.info.helper->expect(pskb, hooknum, ct, info); -} - static unsigned int ip_nat_fn(unsigned int hooknum, struct sk_buff **pskb, @@ -115,7 +106,7 @@ ip_nat_fn(unsigned int hooknum, case IP_CT_RELATED: case IP_CT_RELATED+IP_CT_IS_REPLY: if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { - if (!icmp_reply_translation(pskb, ct, hooknum, + if (!icmp_reply_translation(pskb, ct, maniptype, CTINFO2DIR(ctinfo))) return NF_DROP; else @@ -125,37 +116,26 @@ ip_nat_fn(unsigned int hooknum, case IP_CT_NEW: info = &ct->nat.info; - WRITE_LOCK(&ip_nat_lock); /* Seen it before? This can happen for loopback, retrans, or local packets.. */ - if (!(info->initialized & (1 << maniptype))) { + if (!ip_nat_initialized(ct, maniptype)) { unsigned int ret; - if (ct->master - && master_ct(ct)->nat.info.helper - && master_ct(ct)->nat.info.helper->expect) { - ret = call_expect(master_ct(ct), pskb, - hooknum, ct, info); - } else { - /* LOCAL_IN hook doesn't have a chain! */ - if (hooknum == NF_IP_LOCAL_IN) - ret = alloc_null_binding(ct, info, - hooknum); - else - ret = ip_nat_rule_find(pskb, hooknum, - in, out, ct, - info); - } + /* LOCAL_IN hook doesn't have a chain! */ + if (hooknum == NF_IP_LOCAL_IN) + ret = alloc_null_binding(ct, info, hooknum); + else + ret = ip_nat_rule_find(pskb, hooknum, + in, out, ct, + info); if (ret != NF_ACCEPT) { - WRITE_UNLOCK(&ip_nat_lock); return ret; } } else DEBUGP("Already setup manip %s for ct %p\n", maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST", ct); - WRITE_UNLOCK(&ip_nat_lock); break; default: @@ -166,7 +146,7 @@ ip_nat_fn(unsigned int hooknum, } IP_NF_ASSERT(info); - return do_bindings(ct, ctinfo, info, hooknum, pskb); + return nat_packet(ct, ctinfo, hooknum, pskb); } static unsigned int @@ -288,33 +268,6 @@ static struct nf_hook_ops ip_nat_local_in_ops = { .priority = NF_IP_PRI_NAT_SRC, }; -/* Protocol registration. */ -int ip_nat_protocol_register(struct ip_nat_protocol *proto) -{ - int ret = 0; - - WRITE_LOCK(&ip_nat_lock); - if (ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) { - ret = -EBUSY; - goto out; - } - ip_nat_protos[proto->protonum] = proto; - out: - WRITE_UNLOCK(&ip_nat_lock); - return ret; -} - -/* Noone stores the protocol anywhere; simply delete it. */ -void ip_nat_protocol_unregister(struct ip_nat_protocol *proto) -{ - WRITE_LOCK(&ip_nat_lock); - ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol; - WRITE_UNLOCK(&ip_nat_lock); - - /* Someone could be still looking at the proto in a bh. */ - synchronize_net(); -} - static int init_or_cleanup(int init) { int ret = 0; @@ -388,12 +341,9 @@ module_exit(fini); EXPORT_SYMBOL(ip_nat_setup_info); EXPORT_SYMBOL(ip_nat_protocol_register); EXPORT_SYMBOL(ip_nat_protocol_unregister); -EXPORT_SYMBOL(ip_nat_helper_register); -EXPORT_SYMBOL(ip_nat_helper_unregister); EXPORT_SYMBOL(ip_nat_cheat_check); EXPORT_SYMBOL(ip_nat_mangle_tcp_packet); EXPORT_SYMBOL(ip_nat_mangle_udp_packet); EXPORT_SYMBOL(ip_nat_used_tuple); -EXPORT_SYMBOL(ip_nat_find_helper); -EXPORT_SYMBOL(__ip_nat_find_helper); +EXPORT_SYMBOL(ip_nat_follow_master); MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/ip_nat_tftp.c b/net/ipv4/netfilter/ip_nat_tftp.c index e173191031c0..0343e0d64674 100644 --- a/net/ipv4/netfilter/ip_nat_tftp.c +++ b/net/ipv4/netfilter/ip_nat_tftp.c @@ -38,168 +38,32 @@ MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>"); MODULE_DESCRIPTION("tftp NAT helper"); MODULE_LICENSE("GPL"); -#define MAX_PORTS 8 - -static int ports[MAX_PORTS]; -static int ports_c = 0; -module_param_array(ports, int, &ports_c, 0400); -MODULE_PARM_DESC(ports, "port numbers of tftp servers"); - -#if 0 -#define DEBUGP(format, args...) printk("%s:%s:" format, \ - __FILE__, __FUNCTION__ , ## args) -#else -#define DEBUGP(format, args...) -#endif -static unsigned int -tftp_nat_help(struct ip_conntrack *ct, - struct ip_conntrack_expect *exp, - struct ip_nat_info *info, - enum ip_conntrack_info ctinfo, - unsigned int hooknum, - struct sk_buff **pskb) +static unsigned int help(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + struct ip_conntrack_expect *exp) { - int dir = CTINFO2DIR(ctinfo); - struct tftphdr _tftph, *tfh; - struct ip_conntrack_tuple repl; - - if (!((hooknum == NF_IP_POST_ROUTING && dir == IP_CT_DIR_ORIGINAL) - || (hooknum == NF_IP_PRE_ROUTING && dir == IP_CT_DIR_REPLY))) - return NF_ACCEPT; - - if (!exp) { - DEBUGP("no conntrack expectation to modify\n"); - return NF_ACCEPT; - } - - tfh = skb_header_pointer(*pskb, - (*pskb)->nh.iph->ihl*4+sizeof(struct udphdr), - sizeof(_tftph), &_tftph); - if (tfh == NULL) + exp->saved_proto.udp.port = exp->tuple.dst.u.tcp.port; + exp->dir = IP_CT_DIR_REPLY; + exp->expectfn = ip_nat_follow_master; + if (ip_conntrack_expect_related(exp) != 0) { + ip_conntrack_expect_free(exp); return NF_DROP; - - switch (ntohs(tfh->opcode)) { - /* RRQ and WRQ works the same way */ - case TFTP_OPCODE_READ: - case TFTP_OPCODE_WRITE: - repl = ct->tuplehash[IP_CT_DIR_REPLY].tuple; - DEBUGP(""); - DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); - DEBUGP("expecting: "); - DUMP_TUPLE(&repl); - DUMP_TUPLE(&exp->mask); - ip_conntrack_change_expect(exp, &repl); - break; - default: - DEBUGP("Unknown opcode\n"); - } - - return NF_ACCEPT; -} - -static unsigned int -tftp_nat_expected(struct sk_buff **pskb, - unsigned int hooknum, - struct ip_conntrack *ct, - struct ip_nat_info *info) -{ - const struct ip_conntrack *master = ct->master->expectant; - const struct ip_conntrack_tuple *orig = - &master->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - struct ip_nat_range range; -#if 0 - const struct ip_conntrack_tuple *repl = - &master->tuplehash[IP_CT_DIR_REPLY].tuple; - struct udphdr _udph, *uh; - - uh = skb_header_pointer(*pskb, - (*pskb)->nh.iph->ihl*4, - sizeof(_udph), &_udph); - if (uh == NULL) - return NF_DROP; -#endif - - IP_NF_ASSERT(info); - IP_NF_ASSERT(master); - IP_NF_ASSERT(!(info->initialized & (1 << HOOK2MANIP(hooknum)))); - - range.flags = IP_NAT_RANGE_MAP_IPS; - - if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC) { - range.min_ip = range.max_ip = orig->dst.ip; - DEBUGP("orig: %u.%u.%u.%u:%u <-> %u.%u.%u.%u:%u " - "newsrc: %u.%u.%u.%u\n", - NIPQUAD((*pskb)->nh.iph->saddr), ntohs(uh->source), - NIPQUAD((*pskb)->nh.iph->daddr), ntohs(uh->dest), - NIPQUAD(orig->dst.ip)); - } else { - range.min_ip = range.max_ip = orig->src.ip; - range.min.udp.port = range.max.udp.port = orig->src.u.udp.port; - range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED; - - DEBUGP("orig: %u.%u.%u.%u:%u <-> %u.%u.%u.%u:%u " - "newdst: %u.%u.%u.%u:%u\n", - NIPQUAD((*pskb)->nh.iph->saddr), ntohs(uh->source), - NIPQUAD((*pskb)->nh.iph->daddr), ntohs(uh->dest), - NIPQUAD(orig->src.ip), ntohs(orig->src.u.udp.port)); } - - return ip_nat_setup_info(ct, &range, hooknum); + return NF_ACCEPT; } -static struct ip_nat_helper tftp[MAX_PORTS]; -static char tftp_names[MAX_PORTS][10]; - -static void fini(void) +static void __exit fini(void) { - int i; - - for (i = 0 ; i < ports_c; i++) { - DEBUGP("unregistering helper for port %d\n", ports[i]); - ip_nat_helper_unregister(&tftp[i]); - } + ip_nat_tftp_hook = NULL; + /* Make sure noone calls it, meanwhile. */ + synchronize_net(); } static int __init init(void) { - int i, ret = 0; - char *tmpname; - - if (ports_c == 0) - ports[ports_c++] = TFTP_PORT; - - for (i = 0; i < ports_c; i++) { - memset(&tftp[i], 0, sizeof(struct ip_nat_helper)); - - tftp[i].tuple.dst.protonum = IPPROTO_UDP; - tftp[i].tuple.src.u.udp.port = htons(ports[i]); - tftp[i].mask.dst.protonum = 0xFFFF; - tftp[i].mask.src.u.udp.port = 0xFFFF; - tftp[i].help = tftp_nat_help; - tftp[i].flags = 0; - tftp[i].me = THIS_MODULE; - tftp[i].expect = tftp_nat_expected; - - tmpname = &tftp_names[i][0]; - if (ports[i] == TFTP_PORT) - sprintf(tmpname, "tftp"); - else - sprintf(tmpname, "tftp-%d", i); - tftp[i].name = tmpname; - - DEBUGP("ip_nat_tftp: registering for port %d: name %s\n", - ports[i], tftp[i].name); - ret = ip_nat_helper_register(&tftp[i]); - - if (ret) { - printk("ip_nat_tftp: unable to register for port %d\n", - ports[i]); - fini(); - return ret; - } - } - return ret; + BUG_ON(ip_nat_tftp_hook); + ip_nat_tftp_hook = help; + return 0; } module_init(init); diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 68002ff25454..b3dda712f1f7 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -66,7 +66,7 @@ static LIST_HEAD(clusterip_configs); /* clusterip_lock protects the clusterip_configs list _AND_ the configurable * data within all structurses (num_local_nodes, local_nodes[]) */ -DECLARE_RWLOCK(clusterip_lock); +static DECLARE_RWLOCK(clusterip_lock); #ifdef CONFIG_PROC_FS static struct file_operations clusterip_proc_fops; diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 56d018940954..6f2cefbe16cd 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -100,7 +100,7 @@ typedef struct { static ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS]; /* array of buffers */ static struct sock *nflognl; /* our socket */ -DECLARE_LOCK(ulog_lock); /* spinlock */ +static DECLARE_LOCK(ulog_lock); /* spinlock */ /* send one ulog_buff_t to userspace */ static void ulog_send(unsigned int nlgroupnum) @@ -140,7 +140,7 @@ static void ulog_timer(unsigned long data) UNLOCK_BH(&ulog_lock); } -struct sk_buff *ulog_alloc_skb(unsigned int size) +static struct sk_buff *ulog_alloc_skb(unsigned int size) { struct sk_buff *skb; diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c index 04c95d874886..50d76aa4cb99 100644 --- a/net/ipv4/netfilter/ipt_hashlimit.c +++ b/net/ipv4/netfilter/ipt_hashlimit.c @@ -97,7 +97,7 @@ struct ipt_hashlimit_htable { struct list_head hash[0]; /* hashtable itself */ }; -DECLARE_RWLOCK(hashlimit_lock); /* protects htables list */ +static DECLARE_RWLOCK(hashlimit_lock); /* protects htables list */ static LIST_HEAD(hashlimit_htables); static kmem_cache_t *hashlimit_cachep; @@ -668,11 +668,9 @@ static int init_or_fini(int fini) goto cleanup_nothing; } - /* FIXME: do we really want HWCACHE_ALIGN since our objects are - * quite small ? */ hashlimit_cachep = kmem_cache_create("ipt_hashlimit", sizeof(struct dsthash_ent), 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + 0, NULL, NULL); if (!hashlimit_cachep) { printk(KERN_ERR "Unable to create ipt_hashlimit slab cache\n"); ret = -ENOMEM; diff --git a/net/ipv4/netfilter/ipt_helper.c b/net/ipv4/netfilter/ipt_helper.c index 1ea5c1e46ba2..33fdf364d3d3 100644 --- a/net/ipv4/netfilter/ipt_helper.c +++ b/net/ipv4/netfilter/ipt_helper.c @@ -38,7 +38,6 @@ match(const struct sk_buff *skb, int *hotdrop) { const struct ipt_helper_info *info = matchinfo; - struct ip_conntrack_expect *exp; struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; int ret = info->invert; @@ -54,28 +53,21 @@ match(const struct sk_buff *skb, return ret; } - exp = ct->master; READ_LOCK(&ip_conntrack_lock); - if (!exp->expectant) { - DEBUGP("ipt_helper: expectation %p without expectant !?!\n", - exp); - goto out_unlock; - } - - if (!exp->expectant->helper) { + if (!ct->master->helper) { DEBUGP("ipt_helper: master ct %p has no helper\n", exp->expectant); goto out_unlock; } DEBUGP("master's name = %s , info->name = %s\n", - exp->expectant->helper->name, info->name); + ct->master->helper->name, info->name); if (info->name[0] == '\0') ret ^= 1; else - ret ^= !strncmp(exp->expectant->helper->name, info->name, - strlen(exp->expectant->helper->name)); + ret ^= !strncmp(ct->master->helper->name, info->name, + strlen(ct->master->helper->name)); out_unlock: READ_UNLOCK(&ip_conntrack_lock); return ret; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c495e7ccd0c8..d34035d63c75 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1822,7 +1822,7 @@ int tcp_disconnect(struct sock *sk, int flags) tp->backoff = 0; tp->snd_cwnd = 2; tp->probes_out = 0; - tcp_set_pcount(&tp->packets_out, 0); + tp->packets_out = 0; tp->snd_ssthresh = 0x7fffffff; tp->snd_cwnd_cnt = 0; tcp_set_ca_state(tp, TCP_CA_Open); @@ -2137,11 +2137,11 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_snd_mss = tp->mss_cache_std; info->tcpi_rcv_mss = tp->ack.rcv_mss; - info->tcpi_unacked = tcp_get_pcount(&tp->packets_out); - info->tcpi_sacked = tcp_get_pcount(&tp->sacked_out); - info->tcpi_lost = tcp_get_pcount(&tp->lost_out); - info->tcpi_retrans = tcp_get_pcount(&tp->retrans_out); - info->tcpi_fackets = tcp_get_pcount(&tp->fackets_out); + info->tcpi_unacked = tp->packets_out; + info->tcpi_sacked = tp->sacked_out; + info->tcpi_lost = tp->lost_out; + info->tcpi_retrans = tp->retrans_out; + info->tcpi_fackets = tp->fackets_out; info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime); info->tcpi_last_data_recv = jiffies_to_msecs(now - tp->ack.lrcvtime); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 5a8085e92302..bfcd43832cb0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -902,8 +902,8 @@ static void tcp_update_reordering(struct tcp_sock *tp, int metric, int ts) printk(KERN_DEBUG "Disorder%d %d %u f%u s%u rr%d\n", tp->sack_ok, tp->ca_state, tp->reordering, - tcp_get_pcount(&tp->fackets_out), - tcp_get_pcount(&tp->sacked_out), + tp->fackets_out, + tp->sacked_out, tp->undo_marker ? tp->undo_retrans : 0); #endif /* Disable FACK yet. */ @@ -966,7 +966,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ unsigned char *ptr = ack_skb->h.raw + TCP_SKB_CB(ack_skb)->sacked; struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2); int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; - int reord = tcp_get_pcount(&tp->packets_out); + int reord = tp->packets_out; int prior_fackets; u32 lost_retrans = 0; int flag = 0; @@ -980,9 +980,9 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ tp->mss_cache = tp->mss_cache_std; } - if (!tcp_get_pcount(&tp->sacked_out)) - tcp_set_pcount(&tp->fackets_out, 0); - prior_fackets = tcp_get_pcount(&tp->fackets_out); + if (!tp->sacked_out) + tp->fackets_out = 0; + prior_fackets = tp->fackets_out; for (i=0; i<num_sacks; i++, sp++) { struct sk_buff *skb; @@ -1080,8 +1080,8 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ */ if (sacked & TCPCB_LOST) { TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS); - tcp_dec_pcount(&tp->lost_out, skb); - tcp_dec_pcount(&tp->retrans_out, skb); + tp->lost_out -= tcp_skb_pcount(skb); + tp->retrans_out -= tcp_skb_pcount(skb); } } else { /* New sack for not retransmitted frame, @@ -1093,16 +1093,16 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ if (sacked & TCPCB_LOST) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; - tcp_dec_pcount(&tp->lost_out, skb); + tp->lost_out -= tcp_skb_pcount(skb); } } TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED; flag |= FLAG_DATA_SACKED; - tcp_inc_pcount(&tp->sacked_out, skb); + tp->sacked_out += tcp_skb_pcount(skb); - if (fack_count > tcp_get_pcount(&tp->fackets_out)) - tcp_set_pcount(&tp->fackets_out, fack_count); + if (fack_count > tp->fackets_out) + tp->fackets_out = fack_count; } else { if (dup_sack && (sacked&TCPCB_RETRANS)) reord = min(fack_count, reord); @@ -1116,7 +1116,7 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ if (dup_sack && (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tcp_dec_pcount(&tp->retrans_out, skb); + tp->retrans_out -= tcp_skb_pcount(skb); } } } @@ -1142,10 +1142,10 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ TCP_SKB_CB(skb)->ack_seq + tp->reordering * tp->mss_cache_std))) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tcp_dec_pcount(&tp->retrans_out, skb); + tp->retrans_out -= tcp_skb_pcount(skb); if (!(TCP_SKB_CB(skb)->sacked&(TCPCB_LOST|TCPCB_SACKED_ACKED))) { - tcp_inc_pcount(&tp->lost_out, skb); + tp->lost_out += tcp_skb_pcount(skb); TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; flag |= FLAG_DATA_SACKED; NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT); @@ -1154,20 +1154,15 @@ tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_ } } - tcp_set_pcount(&tp->left_out, - (tcp_get_pcount(&tp->sacked_out) + - tcp_get_pcount(&tp->lost_out))); + tp->left_out = tp->sacked_out + tp->lost_out; - if ((reord < tcp_get_pcount(&tp->fackets_out)) && - tp->ca_state != TCP_CA_Loss) - tcp_update_reordering(tp, - ((tcp_get_pcount(&tp->fackets_out) + 1) - - reord), 0); + if ((reord < tp->fackets_out) && tp->ca_state != TCP_CA_Loss) + tcp_update_reordering(tp, ((tp->fackets_out + 1) - reord), 0); #if FASTRETRANS_DEBUG > 0 - BUG_TRAP((int)tcp_get_pcount(&tp->sacked_out) >= 0); - BUG_TRAP((int)tcp_get_pcount(&tp->lost_out) >= 0); - BUG_TRAP((int)tcp_get_pcount(&tp->retrans_out) >= 0); + BUG_TRAP((int)tp->sacked_out >= 0); + BUG_TRAP((int)tp->lost_out >= 0); + BUG_TRAP((int)tp->retrans_out >= 0); BUG_TRAP((int)tcp_packets_in_flight(tp) >= 0); #endif return flag; @@ -1197,7 +1192,7 @@ void tcp_enter_frto(struct sock *sk) * If something was really lost, it is eventually caught up * in tcp_enter_frto_loss. */ - tcp_set_pcount(&tp->retrans_out, 0); + tp->retrans_out = 0; tp->undo_marker = tp->snd_una; tp->undo_retrans = 0; @@ -1220,9 +1215,9 @@ static void tcp_enter_frto_loss(struct sock *sk) struct sk_buff *skb; int cnt = 0; - tcp_set_pcount(&tp->sacked_out, 0); - tcp_set_pcount(&tp->lost_out, 0); - tcp_set_pcount(&tp->fackets_out, 0); + tp->sacked_out = 0; + tp->lost_out = 0; + tp->fackets_out = 0; sk_stream_for_retrans_queue(skb, sk) { cnt += tcp_skb_pcount(skb); @@ -1235,11 +1230,11 @@ static void tcp_enter_frto_loss(struct sock *sk) if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tcp_inc_pcount(&tp->lost_out, skb); + tp->lost_out += tcp_skb_pcount(skb); } } else { - tcp_inc_pcount(&tp->sacked_out, skb); - tcp_set_pcount(&tp->fackets_out, cnt); + tp->sacked_out += tcp_skb_pcount(skb); + tp->fackets_out = cnt; } } tcp_sync_left_out(tp); @@ -1261,12 +1256,12 @@ static void tcp_enter_frto_loss(struct sock *sk) void tcp_clear_retrans(struct tcp_sock *tp) { - tcp_set_pcount(&tp->left_out, 0); - tcp_set_pcount(&tp->retrans_out, 0); + tp->left_out = 0; + tp->retrans_out = 0; - tcp_set_pcount(&tp->fackets_out, 0); - tcp_set_pcount(&tp->sacked_out, 0); - tcp_set_pcount(&tp->lost_out, 0); + tp->fackets_out = 0; + tp->sacked_out = 0; + tp->lost_out = 0; tp->undo_marker = 0; tp->undo_retrans = 0; @@ -1307,10 +1302,10 @@ void tcp_enter_loss(struct sock *sk, int how) if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tcp_inc_pcount(&tp->lost_out, skb); + tp->lost_out += tcp_skb_pcount(skb); } else { - tcp_inc_pcount(&tp->sacked_out, skb); - tcp_set_pcount(&tp->fackets_out, cnt); + tp->sacked_out += tcp_skb_pcount(skb); + tp->fackets_out = cnt; } } tcp_sync_left_out(tp); @@ -1347,8 +1342,7 @@ static int tcp_check_sack_reneging(struct sock *sk, struct tcp_sock *tp) static inline int tcp_fackets_out(struct tcp_sock *tp) { - return IsReno(tp) ? tcp_get_pcount(&tp->sacked_out)+1 : - tcp_get_pcount(&tp->fackets_out); + return IsReno(tp) ? tp->sacked_out+1 : tp->fackets_out; } static inline int tcp_skb_timedout(struct tcp_sock *tp, struct sk_buff *skb) @@ -1358,7 +1352,7 @@ static inline int tcp_skb_timedout(struct tcp_sock *tp, struct sk_buff *skb) static inline int tcp_head_timedout(struct sock *sk, struct tcp_sock *tp) { - return tcp_get_pcount(&tp->packets_out) && + return tp->packets_out && tcp_skb_timedout(tp, skb_peek(&sk->sk_write_queue)); } @@ -1460,7 +1454,7 @@ static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp) __u32 packets_out; /* Trick#1: The loss is proven. */ - if (tcp_get_pcount(&tp->lost_out)) + if (tp->lost_out) return 1; /* Not-A-Trick#2 : Classic rule... */ @@ -1476,9 +1470,9 @@ static int tcp_time_to_recover(struct sock *sk, struct tcp_sock *tp) /* Trick#4: It is still not OK... But will it be useful to delay * recovery more? */ - packets_out = tcp_get_pcount(&tp->packets_out); + packets_out = tp->packets_out; if (packets_out <= tp->reordering && - tcp_get_pcount(&tp->sacked_out) >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) && + tp->sacked_out >= max_t(__u32, packets_out/2, sysctl_tcp_reordering) && !tcp_may_send_now(sk, tp)) { /* We have nothing to send. This connection is limited * either by receiver window or by application. @@ -1497,16 +1491,12 @@ static void tcp_check_reno_reordering(struct tcp_sock *tp, int addend) { u32 holes; - holes = max(tcp_get_pcount(&tp->lost_out), 1U); - holes = min(holes, tcp_get_pcount(&tp->packets_out)); + holes = max(tp->lost_out, 1U); + holes = min(holes, tp->packets_out); - if ((tcp_get_pcount(&tp->sacked_out) + holes) > - tcp_get_pcount(&tp->packets_out)) { - tcp_set_pcount(&tp->sacked_out, - (tcp_get_pcount(&tp->packets_out) - holes)); - tcp_update_reordering(tp, - tcp_get_pcount(&tp->packets_out)+addend, - 0); + if ((tp->sacked_out + holes) > tp->packets_out) { + tp->sacked_out = tp->packets_out - holes; + tcp_update_reordering(tp, tp->packets_out+addend, 0); } } @@ -1514,7 +1504,7 @@ static void tcp_check_reno_reordering(struct tcp_sock *tp, int addend) static void tcp_add_reno_sack(struct tcp_sock *tp) { - tcp_inc_pcount_explicit(&tp->sacked_out, 1); + tp->sacked_out++; tcp_check_reno_reordering(tp, 0); tcp_sync_left_out(tp); } @@ -1525,10 +1515,10 @@ static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_sock *tp, int acke { if (acked > 0) { /* One ACK acked hole. The rest eat duplicate ACKs. */ - if (acked-1 >= tcp_get_pcount(&tp->sacked_out)) - tcp_set_pcount(&tp->sacked_out, 0); + if (acked-1 >= tp->sacked_out) + tp->sacked_out = 0; else - tcp_dec_pcount_explicit(&tp->sacked_out, acked-1); + tp->sacked_out -= acked-1; } tcp_check_reno_reordering(tp, acked); tcp_sync_left_out(tp); @@ -1536,8 +1526,8 @@ static void tcp_remove_reno_sacks(struct sock *sk, struct tcp_sock *tp, int acke static inline void tcp_reset_reno_sack(struct tcp_sock *tp) { - tcp_set_pcount(&tp->sacked_out, 0); - tcp_set_pcount(&tp->left_out, tcp_get_pcount(&tp->lost_out)); + tp->sacked_out = 0; + tp->left_out = tp->lost_out; } /* Mark head of queue up as lost. */ @@ -1547,7 +1537,7 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb; int cnt = packets; - BUG_TRAP(cnt <= tcp_get_pcount(&tp->packets_out)); + BUG_TRAP(cnt <= tp->packets_out); sk_stream_for_retrans_queue(skb, sk) { cnt -= tcp_skb_pcount(skb); @@ -1555,7 +1545,7 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp, break; if (!(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tcp_inc_pcount(&tp->lost_out, skb); + tp->lost_out += tcp_skb_pcount(skb); } } tcp_sync_left_out(tp); @@ -1566,7 +1556,7 @@ static void tcp_mark_head_lost(struct sock *sk, struct tcp_sock *tp, static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp) { if (IsFack(tp)) { - int lost = tcp_get_pcount(&tp->fackets_out) - tp->reordering; + int lost = tp->fackets_out - tp->reordering; if (lost <= 0) lost = 1; tcp_mark_head_lost(sk, tp, lost, tp->high_seq); @@ -1586,7 +1576,7 @@ static void tcp_update_scoreboard(struct sock *sk, struct tcp_sock *tp) if (tcp_skb_timedout(tp, skb) && !(TCP_SKB_CB(skb)->sacked&TCPCB_TAGBITS)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tcp_inc_pcount(&tp->lost_out, skb); + tp->lost_out += tcp_skb_pcount(skb); } } tcp_sync_left_out(tp); @@ -1651,9 +1641,9 @@ static void DBGUNDO(struct sock *sk, struct tcp_sock *tp, const char *msg) printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n", msg, NIPQUAD(inet->daddr), ntohs(inet->dport), - tp->snd_cwnd, tcp_get_pcount(&tp->left_out), + tp->snd_cwnd, tp->left_out, tp->snd_ssthresh, tp->prior_ssthresh, - tcp_get_pcount(&tp->packets_out)); + tp->packets_out); } #else #define DBGUNDO(x...) do { } while (0) @@ -1724,13 +1714,13 @@ static int tcp_try_undo_partial(struct sock *sk, struct tcp_sock *tp, int acked) { /* Partial ACK arrived. Force Hoe's retransmit. */ - int failed = IsReno(tp) || tcp_get_pcount(&tp->fackets_out)>tp->reordering; + int failed = IsReno(tp) || tp->fackets_out>tp->reordering; if (tcp_may_undo(tp)) { /* Plain luck! Hole if filled with delayed * packet, rather than with a retransmit. */ - if (tcp_get_pcount(&tp->retrans_out) == 0) + if (tp->retrans_out == 0) tp->retrans_stamp = 0; tcp_update_reordering(tp, tcp_fackets_out(tp)+acked, 1); @@ -1757,8 +1747,8 @@ static int tcp_try_undo_loss(struct sock *sk, struct tcp_sock *tp) TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; } DBGUNDO(sk, tp, "partial loss"); - tcp_set_pcount(&tp->lost_out, 0); - tcp_set_pcount(&tp->left_out, tcp_get_pcount(&tp->sacked_out)); + tp->lost_out = 0; + tp->left_out = tp->sacked_out; tcp_undo_cwr(tp, 1); NET_INC_STATS_BH(LINUX_MIB_TCPLOSSUNDO); tp->retransmits = 0; @@ -1781,9 +1771,9 @@ static inline void tcp_complete_cwr(struct tcp_sock *tp) static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag) { - tcp_set_pcount(&tp->left_out, tcp_get_pcount(&tp->sacked_out)); + tp->left_out = tp->sacked_out; - if (tcp_get_pcount(&tp->retrans_out) == 0) + if (tp->retrans_out == 0) tp->retrans_stamp = 0; if (flag&FLAG_ECE) @@ -1792,9 +1782,7 @@ static void tcp_try_to_open(struct sock *sk, struct tcp_sock *tp, int flag) if (tp->ca_state != TCP_CA_CWR) { int state = TCP_CA_Open; - if (tcp_get_pcount(&tp->left_out) || - tcp_get_pcount(&tp->retrans_out) || - tp->undo_marker) + if (tp->left_out || tp->retrans_out || tp->undo_marker) state = TCP_CA_Disorder; if (tp->ca_state != state) { @@ -1827,11 +1815,11 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, /* Some technical things: * 1. Reno does not count dupacks (sacked_out) automatically. */ - if (!tcp_get_pcount(&tp->packets_out)) - tcp_set_pcount(&tp->sacked_out, 0); + if (!tp->packets_out) + tp->sacked_out = 0; /* 2. SACK counts snd_fack in packets inaccurately. */ - if (tcp_get_pcount(&tp->sacked_out) == 0) - tcp_set_pcount(&tp->fackets_out, 0); + if (tp->sacked_out == 0) + tp->fackets_out = 0; /* Now state machine starts. * A. ECE, hence prohibit cwnd undoing, the reduction is required. */ @@ -1839,15 +1827,15 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, tp->prior_ssthresh = 0; /* B. In all the states check for reneging SACKs. */ - if (tcp_get_pcount(&tp->sacked_out) && tcp_check_sack_reneging(sk, tp)) + if (tp->sacked_out && tcp_check_sack_reneging(sk, tp)) return; /* C. Process data loss notification, provided it is valid. */ if ((flag&FLAG_DATA_LOST) && before(tp->snd_una, tp->high_seq) && tp->ca_state != TCP_CA_Open && - tcp_get_pcount(&tp->fackets_out) > tp->reordering) { - tcp_mark_head_lost(sk, tp, tcp_get_pcount(&tp->fackets_out)-tp->reordering, tp->high_seq); + tp->fackets_out > tp->reordering) { + tcp_mark_head_lost(sk, tp, tp->fackets_out-tp->reordering, tp->high_seq); NET_INC_STATS_BH(LINUX_MIB_TCPLOSS); } @@ -1858,7 +1846,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, * when high_seq is ACKed. */ if (tp->ca_state == TCP_CA_Open) { if (!sysctl_tcp_frto) - BUG_TRAP(tcp_get_pcount(&tp->retrans_out) == 0); + BUG_TRAP(tp->retrans_out == 0); tp->retrans_stamp = 0; } else if (!before(tp->snd_una, tp->high_seq)) { switch (tp->ca_state) { @@ -1905,8 +1893,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, if (IsReno(tp) && is_dupack) tcp_add_reno_sack(tp); } else { - int acked = prior_packets - - tcp_get_pcount(&tp->packets_out); + int acked = prior_packets - tp->packets_out; if (IsReno(tp)) tcp_remove_reno_sacks(sk, tp, acked); is_dupack = tcp_try_undo_partial(sk, tp, acked); @@ -1949,7 +1936,7 @@ tcp_fastretrans_alert(struct sock *sk, u32 prior_snd_una, tp->high_seq = tp->snd_nxt; tp->prior_ssthresh = 0; tp->undo_marker = tp->snd_una; - tp->undo_retrans = tcp_get_pcount(&tp->retrans_out); + tp->undo_retrans = tp->retrans_out; if (tp->ca_state < TCP_CA_CWR) { if (!(flag&FLAG_ECE)) @@ -2349,7 +2336,7 @@ static inline void tcp_cong_avoid(struct tcp_sock *tp, u32 ack, u32 seq_rtt) static inline void tcp_ack_packets_out(struct sock *sk, struct tcp_sock *tp) { - if (!tcp_get_pcount(&tp->packets_out)) { + if (!tp->packets_out) { tcp_clear_xmit_timer(sk, TCP_TIME_RETRANS); } else { tcp_reset_xmit_timer(sk, TCP_TIME_RETRANS, tp->rto); @@ -2391,18 +2378,15 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb, if (sacked) { if (sacked & TCPCB_RETRANS) { if (sacked & TCPCB_SACKED_RETRANS) - tcp_dec_pcount_explicit(&tp->retrans_out, - packets_acked); + tp->retrans_out -= packets_acked; acked |= FLAG_RETRANS_DATA_ACKED; *seq_rtt = -1; } else if (*seq_rtt < 0) *seq_rtt = now - scb->when; if (sacked & TCPCB_SACKED_ACKED) - tcp_dec_pcount_explicit(&tp->sacked_out, - packets_acked); + tp->sacked_out -= packets_acked; if (sacked & TCPCB_LOST) - tcp_dec_pcount_explicit(&tp->lost_out, - packets_acked); + tp->lost_out -= packets_acked; if (sacked & TCPCB_URG) { if (tp->urg_mode && !before(seq, tp->snd_up)) @@ -2411,12 +2395,11 @@ static int tcp_tso_acked(struct sock *sk, struct sk_buff *skb, } else if (*seq_rtt < 0) *seq_rtt = now - scb->when; - if (tcp_get_pcount(&tp->fackets_out)) { - __u32 dval = min(tcp_get_pcount(&tp->fackets_out), - packets_acked); - tcp_dec_pcount_explicit(&tp->fackets_out, dval); + if (tp->fackets_out) { + __u32 dval = min(tp->fackets_out, packets_acked); + tp->fackets_out -= dval; } - tcp_dec_pcount_explicit(&tp->packets_out, packets_acked); + tp->packets_out -= packets_acked; BUG_ON(tcp_skb_pcount(skb) == 0); BUG_ON(!before(scb->seq, scb->end_seq)); @@ -2468,15 +2451,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) if (sacked) { if (sacked & TCPCB_RETRANS) { if(sacked & TCPCB_SACKED_RETRANS) - tcp_dec_pcount(&tp->retrans_out, skb); + tp->retrans_out -= tcp_skb_pcount(skb); acked |= FLAG_RETRANS_DATA_ACKED; seq_rtt = -1; } else if (seq_rtt < 0) seq_rtt = now - scb->when; if (sacked & TCPCB_SACKED_ACKED) - tcp_dec_pcount(&tp->sacked_out, skb); + tp->sacked_out -= tcp_skb_pcount(skb); if (sacked & TCPCB_LOST) - tcp_dec_pcount(&tp->lost_out, skb); + tp->lost_out -= tcp_skb_pcount(skb); if (sacked & TCPCB_URG) { if (tp->urg_mode && !before(scb->end_seq, tp->snd_up)) @@ -2496,27 +2479,24 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p) } #if FASTRETRANS_DEBUG > 0 - BUG_TRAP((int)tcp_get_pcount(&tp->sacked_out) >= 0); - BUG_TRAP((int)tcp_get_pcount(&tp->lost_out) >= 0); - BUG_TRAP((int)tcp_get_pcount(&tp->retrans_out) >= 0); - if (!tcp_get_pcount(&tp->packets_out) && tp->sack_ok) { - if (tcp_get_pcount(&tp->lost_out)) { + BUG_TRAP((int)tp->sacked_out >= 0); + BUG_TRAP((int)tp->lost_out >= 0); + BUG_TRAP((int)tp->retrans_out >= 0); + if (!tp->packets_out && tp->sack_ok) { + if (tp->lost_out) { printk(KERN_DEBUG "Leak l=%u %d\n", - tcp_get_pcount(&tp->lost_out), - tp->ca_state); - tcp_set_pcount(&tp->lost_out, 0); + tp->lost_out, tp->ca_state); + tp->lost_out = 0; } - if (tcp_get_pcount(&tp->sacked_out)) { + if (tp->sacked_out) { printk(KERN_DEBUG "Leak s=%u %d\n", - tcp_get_pcount(&tp->sacked_out), - tp->ca_state); - tcp_set_pcount(&tp->sacked_out, 0); + tp->sacked_out, tp->ca_state); + tp->sacked_out = 0; } - if (tcp_get_pcount(&tp->retrans_out)) { + if (tp->retrans_out) { printk(KERN_DEBUG "Leak r=%u %d\n", - tcp_get_pcount(&tp->retrans_out), - tp->ca_state); - tcp_set_pcount(&tp->retrans_out, 0); + tp->retrans_out, tp->ca_state); + tp->retrans_out = 0; } } #endif @@ -2943,7 +2923,7 @@ static int tcp_ack(struct sock *sk, struct sk_buff *skb, int flag) */ sk->sk_err_soft = 0; tp->rcv_tstamp = tcp_time_stamp; - prior_packets = tcp_get_pcount(&tp->packets_out); + prior_packets = tp->packets_out; if (!prior_packets) goto no_queue; @@ -3964,7 +3944,7 @@ static void tcp_new_space(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - if (tcp_get_pcount(&tp->packets_out) < tp->snd_cwnd && + if (tp->packets_out < tp->snd_cwnd && !(sk->sk_userlocks & SOCK_SNDBUF_LOCK) && !tcp_memory_pressure && atomic_read(&tcp_memory_allocated) < sysctl_tcp_mem[0]) { diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 748224b44399..2876f505674d 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -754,11 +754,11 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, newtp->mdev = TCP_TIMEOUT_INIT; newtp->rto = TCP_TIMEOUT_INIT; - tcp_set_pcount(&newtp->packets_out, 0); - tcp_set_pcount(&newtp->left_out, 0); - tcp_set_pcount(&newtp->retrans_out, 0); - tcp_set_pcount(&newtp->sacked_out, 0); - tcp_set_pcount(&newtp->fackets_out, 0); + newtp->packets_out = 0; + newtp->left_out = 0; + newtp->retrans_out = 0; + newtp->sacked_out = 0; + newtp->fackets_out = 0; newtp->snd_ssthresh = 0x7fffffff; /* So many TCP implementations out there (incorrectly) count the diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 7443293b862d..56947f62a198 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -129,8 +129,7 @@ static inline void tcp_event_data_sent(struct tcp_sock *tp, { u32 now = tcp_time_stamp; - if (!tcp_get_pcount(&tp->packets_out) && - (s32)(now - tp->lsndtime) > tp->rto) + if (!tp->packets_out && (s32)(now - tp->lsndtime) > tp->rto) tcp_cwnd_restart(tp, __sk_dst_get(sk)); tp->lsndtime = now; @@ -509,8 +508,8 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len) TCP_SKB_CB(buff)->when = TCP_SKB_CB(skb)->when; if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { - tcp_dec_pcount(&tp->lost_out, skb); - tcp_dec_pcount(&tp->left_out, skb); + tp->lost_out -= tcp_skb_pcount(skb); + tp->left_out -= tcp_skb_pcount(skb); } /* Fix up tso_factor for both original and new SKB. */ @@ -518,13 +517,13 @@ static int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len) tcp_set_skb_tso_segs(buff, tp->mss_cache_std); if (TCP_SKB_CB(skb)->sacked & TCPCB_LOST) { - tcp_inc_pcount(&tp->lost_out, skb); - tcp_inc_pcount(&tp->left_out, skb); + tp->lost_out += tcp_skb_pcount(skb); + tp->left_out += tcp_skb_pcount(skb); } if (TCP_SKB_CB(buff)->sacked&TCPCB_LOST) { - tcp_inc_pcount(&tp->lost_out, buff); - tcp_inc_pcount(&tp->left_out, buff); + tp->lost_out += tcp_skb_pcount(buff); + tp->left_out += tcp_skb_pcount(buff); } /* Link BUFF into the send queue. */ @@ -773,7 +772,7 @@ int tcp_write_xmit(struct sock *sk, int nonagle) return 0; } - return !tcp_get_pcount(&tp->packets_out) && sk->sk_send_head; + return !tp->packets_out && sk->sk_send_head; } return 0; } @@ -945,15 +944,15 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m */ TCP_SKB_CB(skb)->sacked |= TCP_SKB_CB(next_skb)->sacked&(TCPCB_EVER_RETRANS|TCPCB_AT_TAIL); if (TCP_SKB_CB(next_skb)->sacked&TCPCB_SACKED_RETRANS) - tcp_dec_pcount(&tp->retrans_out, next_skb); + tp->retrans_out -= tcp_skb_pcount(next_skb); if (TCP_SKB_CB(next_skb)->sacked&TCPCB_LOST) { - tcp_dec_pcount(&tp->lost_out, next_skb); - tcp_dec_pcount(&tp->left_out, next_skb); + tp->lost_out -= tcp_skb_pcount(next_skb); + tp->left_out -= tcp_skb_pcount(next_skb); } /* Reno case is special. Sigh... */ - if (!tp->sack_ok && tcp_get_pcount(&tp->sacked_out)) { + if (!tp->sack_ok && tp->sacked_out) { tcp_dec_pcount_approx(&tp->sacked_out, next_skb); - tcp_dec_pcount(&tp->left_out, next_skb); + tp->left_out -= tcp_skb_pcount(next_skb); } /* Not quite right: it can be > snd.fack, but @@ -981,11 +980,11 @@ void tcp_simple_retransmit(struct sock *sk) !(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED)) { if (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS; - tcp_dec_pcount(&tp->retrans_out, skb); + tp->retrans_out -= tcp_skb_pcount(skb); } if (!(TCP_SKB_CB(skb)->sacked&TCPCB_LOST)) { TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; - tcp_inc_pcount(&tp->lost_out, skb); + tp->lost_out += tcp_skb_pcount(skb); lost = 1; } } @@ -1060,9 +1059,8 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) /* New SKB created, account for it. */ new_factor = tcp_skb_pcount(skb); - tcp_dec_pcount_explicit(&tp->packets_out, - old_factor - new_factor); - tcp_inc_pcount(&tp->packets_out, skb->next); + tp->packets_out -= old_factor - new_factor; + tp->packets_out += tcp_skb_pcount(skb->next); } /* Collapse two adjacent packets if worthwhile and we can. */ @@ -1071,6 +1069,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) (skb->next != sk->sk_send_head) && (skb->next != (struct sk_buff *)&sk->sk_write_queue) && (skb_shinfo(skb)->nr_frags == 0 && skb_shinfo(skb->next)->nr_frags == 0) && + (tcp_skb_pcount(skb) == 1 && tcp_skb_pcount(skb->next) == 1) && (sysctl_tcp_retrans_collapse != 0)) tcp_retrans_try_collapse(sk, skb, cur_mss); @@ -1115,7 +1114,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb) } #endif TCP_SKB_CB(skb)->sacked |= TCPCB_RETRANS; - tcp_inc_pcount(&tp->retrans_out, skb); + tp->retrans_out += tcp_skb_pcount(skb); /* Save stamp of the first retransmit. */ if (!tp->retrans_stamp) @@ -1143,7 +1142,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - int packet_cnt = tcp_get_pcount(&tp->lost_out); + int packet_cnt = tp->lost_out; /* First pass: retransmit lost packets. */ if (packet_cnt) { @@ -1210,7 +1209,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) * real MSS sized packet because tcp_retransmit_skb() * will fragment it if necessary. */ - if (++packet_cnt > tcp_get_pcount(&tp->fackets_out)) + if (++packet_cnt > tp->fackets_out) break; if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) @@ -1496,7 +1495,7 @@ int tcp_connect(struct sock *sk) tp->retrans_stamp = TCP_SKB_CB(buff)->when; __skb_queue_tail(&sk->sk_write_queue, buff); sk_charge_skb(sk, buff); - tcp_inc_pcount(&tp->packets_out, buff); + tp->packets_out += tcp_skb_pcount(buff); tcp_transmit_skb(sk, skb_clone(buff, GFP_KERNEL)); TCP_INC_STATS(TCP_MIB_ACTIVEOPENS); @@ -1694,7 +1693,7 @@ void tcp_send_probe0(struct sock *sk) err = tcp_write_wakeup(sk); - if (tcp_get_pcount(&tp->packets_out) || !sk->sk_send_head) { + if (tp->packets_out || !sk->sk_send_head) { /* Cancel probe timer, if it is not required. */ tp->probes_out = 0; tp->backoff = 0; diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index a2799d1678af..c3751508ed24 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -123,7 +123,7 @@ static int tcp_out_of_resources(struct sock *sk, int do_reset) * 1. Last segment was sent recently. */ if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN || /* 2. Window is closed. */ - (!tp->snd_wnd && !tcp_get_pcount(&tp->packets_out))) + (!tp->snd_wnd && !tp->packets_out)) do_reset = 1; if (do_reset) tcp_send_active_reset(sk, GFP_ATOMIC); @@ -271,7 +271,7 @@ static void tcp_probe_timer(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); int max_probes; - if (tcp_get_pcount(&tp->packets_out) || !sk->sk_send_head) { + if (tp->packets_out || !sk->sk_send_head) { tp->probes_out = 0; return; } @@ -318,7 +318,7 @@ static void tcp_retransmit_timer(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - if (!tcp_get_pcount(&tp->packets_out)) + if (!tp->packets_out) goto out; BUG_TRAP(!skb_queue_empty(&sk->sk_write_queue)); @@ -608,7 +608,7 @@ static void tcp_keepalive_timer (unsigned long data) elapsed = keepalive_time_when(tp); /* It is alive without keepalive 8) */ - if (tcp_get_pcount(&tp->packets_out) || sk->sk_send_head) + if (tp->packets_out || sk->sk_send_head) goto resched; elapsed = tcp_time_stamp - tp->rcv_tstamp; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index fecf022809ae..f4eeb8629a0e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1079,10 +1079,29 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) if (dev->addr_len != ETH_ALEN) return -1; memcpy(eui, dev->dev_addr, 3); - memcpy(eui + 5, dev->dev_addr+3, 3); - eui[3] = 0xFF; - eui[4] = 0xFE; - eui[0] ^= 2; + memcpy(eui + 5, dev->dev_addr + 3, 3); + + /* + * The zSeries OSA network cards can be shared among various + * OS instances, but the OSA cards have only one MAC address. + * This leads to duplicate address conflicts in conjunction + * with IPv6 if more than one instance uses the same card. + * + * The driver for these cards can deliver a unique 16-bit + * identifier for each instance sharing the same card. It is + * placed instead of 0xFFFE in the interface identifier. The + * "u" bit of the interface identifier is not inverted in this + * case. Hence the resulting interface identifier has local + * scope according to RFC2373. + */ + if (dev->dev_id) { + eui[3] = (dev->dev_id >> 8) & 0xFF; + eui[4] = dev->dev_id & 0xFF; + } else { + eui[3] = 0xFF; + eui[4] = 0xFE; + eui[0] ^= 2; + } return 0; case ARPHRD_ARCNET: /* XXX: inherit EUI-64 from other interface -- yoshfuji */ diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index e9eca00e0e41..18637221a750 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -287,7 +287,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, int iif = 0; int addr_type = 0; int len; - int hlimit = -1; + int hlimit; int err = 0; if ((u8*)hdr < skb->head || (u8*)(hdr+1) > skb->tail) @@ -375,14 +375,12 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) goto out_dst_release; - if (hlimit < 0) { - if (ipv6_addr_is_multicast(&fl.fl6_dst)) - hlimit = np->mcast_hops; - else - hlimit = np->hop_limit; - if (hlimit < 0) - hlimit = dst_metric(dst, RTAX_HOPLIMIT); - } + if (ipv6_addr_is_multicast(&fl.fl6_dst)) + hlimit = np->mcast_hops; + else + hlimit = np->hop_limit; + if (hlimit < 0) + hlimit = dst_metric(dst, RTAX_HOPLIMIT); msg.skb = skb; msg.offset = skb->nh.raw - skb->data; @@ -433,7 +431,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) struct icmpv6_msg msg; struct dst_entry *dst; int err = 0; - int hlimit = -1; + int hlimit; saddr = &skb->nh.ipv6h->daddr; @@ -463,14 +461,12 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) goto out_dst_release; - if (hlimit < 0) { - if (ipv6_addr_is_multicast(&fl.fl6_dst)) - hlimit = np->mcast_hops; - else - hlimit = np->hop_limit; - if (hlimit < 0) - hlimit = dst_metric(dst, RTAX_HOPLIMIT); - } + if (ipv6_addr_is_multicast(&fl.fl6_dst)) + hlimit = np->mcast_hops; + else + hlimit = np->hop_limit; + if (hlimit < 0) + hlimit = dst_metric(dst, RTAX_HOPLIMIT); idev = in6_dev_get(skb->dev); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 55110651a3f2..0b45f8da2950 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -169,12 +169,33 @@ struct ndisc_options { #define NDISC_OPT_SPACE(len) (((len)+2+7)&~7) -static u8 *ndisc_fill_option(u8 *opt, int type, void *data, int data_len) +/* + * Return the padding between the option length and the start of the + * link addr. Currently only IP-over-InfiniBand needs this, although + * if RFC 3831 IPv6-over-Fibre Channel is ever implemented it may + * also need a pad of 2. + */ +static int ndisc_addr_option_pad(unsigned short type) +{ + switch (type) { + case ARPHRD_INFINIBAND: return 2; + default: return 0; + } +} + +static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len, + unsigned short addr_type) { int space = NDISC_OPT_SPACE(data_len); + int pad = ndisc_addr_option_pad(addr_type); opt[0] = type; opt[1] = space>>3; + + memset(opt + 2, 0, pad); + opt += pad; + space -= pad; + memcpy(opt+2, data, data_len); data_len += 2; opt += data_len; @@ -453,7 +474,8 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, ipv6_addr_copy(&msg->target, solicited_addr); if (inc_opt) - ndisc_fill_option(msg->opt, ND_OPT_TARGET_LL_ADDR, dev->dev_addr, dev->addr_len); + ndisc_fill_addr_option(msg->opt, ND_OPT_TARGET_LL_ADDR, dev->dev_addr, + dev->addr_len, dev->type); /* checksum */ msg->icmph.icmp6_cksum = csum_ipv6_magic(src_addr, daddr, len, @@ -536,7 +558,8 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, ipv6_addr_copy(&msg->target, solicit); if (send_llinfo) - ndisc_fill_option(msg->opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr, dev->addr_len); + ndisc_fill_addr_option(msg->opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr, + dev->addr_len, dev->type); /* checksum */ msg->icmph.icmp6_cksum = csum_ipv6_magic(&skb->nh.ipv6h->saddr, @@ -610,7 +633,8 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr, opt = (u8*) (hdr + 1); if (dev->addr_len) - ndisc_fill_option(opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr, dev->addr_len); + ndisc_fill_addr_option(opt, ND_OPT_SOURCE_LL_ADDR, dev->dev_addr, + dev->addr_len, dev->type); /* checksum */ hdr->icmp6_cksum = csum_ipv6_magic(&skb->nh.ipv6h->saddr, daddr, len, @@ -717,7 +741,8 @@ static void ndisc_recv_ns(struct sk_buff *skb) } if (ndopts.nd_opts_src_lladdr) { - lladdr = (u8*)(ndopts.nd_opts_src_lladdr + 1); + lladdr = (u8*)(ndopts.nd_opts_src_lladdr + 1) + + ndisc_addr_option_pad(dev->type); lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3; if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len)) { ND_PRINTK2(KERN_WARNING @@ -874,7 +899,8 @@ static void ndisc_recv_na(struct sk_buff *skb) return; } if (ndopts.nd_opts_tgt_lladdr) { - lladdr = (u8*)(ndopts.nd_opts_tgt_lladdr + 1); + lladdr = (u8*)(ndopts.nd_opts_tgt_lladdr + 1) + + ndisc_addr_option_pad(dev->type); lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3; if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len)) { ND_PRINTK2(KERN_WARNING @@ -903,6 +929,9 @@ static void ndisc_recv_na(struct sk_buff *skb) if (neigh) { u8 old_flags = neigh->flags; + if (neigh->nud_state & NUD_FAILED) + goto out; + neigh_update(neigh, lladdr, msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE, NEIGH_UPDATE_F_WEAK_OVERRIDE| @@ -920,6 +949,7 @@ static void ndisc_recv_na(struct sk_buff *skb) ip6_del_rt(rt, NULL, NULL); } +out: neigh_release(neigh); } } @@ -964,7 +994,8 @@ static void ndisc_recv_rs(struct sk_buff *skb) } if (ndopts.nd_opts_src_lladdr) { - lladdr = (u8 *)(ndopts.nd_opts_src_lladdr + 1); + lladdr = (u8 *)(ndopts.nd_opts_src_lladdr + 1) + + ndisc_addr_option_pad(skb->dev->type); lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3; if (lladdrlen != NDISC_OPT_SPACE(skb->dev->addr_len)) goto out; @@ -985,7 +1016,7 @@ out: static void ndisc_router_discovery(struct sk_buff *skb) { struct ra_msg *ra_msg = (struct ra_msg *) skb->h.raw; - struct neighbour *neigh; + struct neighbour *neigh = NULL; struct inet6_dev *in6_dev; struct rt6_info *rt; int lifetime; @@ -1053,7 +1084,11 @@ static void ndisc_router_discovery(struct sk_buff *skb) rt = rt6_get_dflt_router(&skb->nh.ipv6h->saddr, skb->dev); + if (rt) + neigh = rt->rt6i_nexthop; + if (rt && lifetime == 0) { + neigh_clone(neigh); ip6_del_rt(rt, NULL, NULL); rt = NULL; } @@ -1126,11 +1161,15 @@ static void ndisc_router_discovery(struct sk_buff *skb) * Process options. */ - if (rt && (neigh = rt->rt6i_nexthop) != NULL) { + if (!neigh) + neigh = __neigh_lookup(&nd_tbl, &skb->nh.ipv6h->saddr, + skb->dev, 1); + if (neigh) { u8 *lladdr = NULL; int lladdrlen; if (ndopts.nd_opts_src_lladdr) { - lladdr = (u8*)((ndopts.nd_opts_src_lladdr)+1); + lladdr = (u8*)((ndopts.nd_opts_src_lladdr)+1) + + ndisc_addr_option_pad(skb->dev->type); lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3; if (lladdrlen != NDISC_OPT_SPACE(skb->dev->addr_len)) { ND_PRINTK2(KERN_WARNING @@ -1181,6 +1220,8 @@ static void ndisc_router_discovery(struct sk_buff *skb) out: if (rt) dst_release(&rt->u.dst); + else if (neigh) + neigh_release(neigh); in6_dev_put(in6_dev); } @@ -1250,7 +1291,8 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) return; } if (ndopts.nd_opts_tgt_lladdr) { - lladdr = (u8*)(ndopts.nd_opts_tgt_lladdr + 1); + lladdr = (u8*)(ndopts.nd_opts_tgt_lladdr + 1) + + ndisc_addr_option_pad(skb->dev->type); lladdrlen = ndopts.nd_opts_tgt_lladdr->nd_opt_len << 3; if (lladdrlen != NDISC_OPT_SPACE(skb->dev->addr_len)) { ND_PRINTK2(KERN_WARNING @@ -1379,7 +1421,8 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, */ if (dev->addr_len) - opt = ndisc_fill_option(opt, ND_OPT_TARGET_LL_ADDR, neigh->ha, dev->addr_len); + opt = ndisc_fill_addr_option(opt, ND_OPT_TARGET_LL_ADDR, neigh->ha, + dev->addr_len, dev->type); /* * build redirect option and copy skb over to the new packet. diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 9e0a2e169f7b..be23939b8f8e 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -115,10 +115,10 @@ found: static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb) { struct icmp6hdr *icmph; - struct raw6_opt *opt = raw6_sk(sk); + struct raw6_sock *rp = raw6_sk(sk); if (pskb_may_pull(skb, sizeof(struct icmp6hdr))) { - __u32 *data = &opt->filter.data[0]; + __u32 *data = &rp->filter.data[0]; int bit_nr; icmph = (struct icmp6hdr *) skb->data; @@ -315,14 +315,14 @@ static inline int rawv6_rcv_skb(struct sock * sk, struct sk_buff * skb) int rawv6_rcv(struct sock *sk, struct sk_buff *skb) { struct inet_sock *inet = inet_sk(sk); - struct raw6_opt *raw_opt = raw6_sk(sk); + struct raw6_sock *rp = raw6_sk(sk); if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { kfree_skb(skb); return NET_RX_DROP; } - if (!raw_opt->checksum) + if (!rp->checksum) skb->ip_summed = CHECKSUM_UNNECESSARY; if (skb->ip_summed != CHECKSUM_UNNECESSARY) { @@ -451,21 +451,22 @@ csum_copy_err: goto out_free; } -static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct raw6_opt *opt, int len) +static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, + struct raw6_sock *rp, int len) { struct sk_buff *skb; int err = 0; u16 *csum; u32 tmp_csum; - if (!opt->checksum) + if (!rp->checksum) goto send; if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) goto out; - if (opt->offset + 1 < len) - csum = (u16 *)(skb->h.raw + opt->offset); + if (rp->offset + 1 < len) + csum = (u16 *)(skb->h.raw + rp->offset); else { err = -EINVAL; goto out; @@ -609,7 +610,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct in6_addr *daddr, *final_p = NULL, final; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct raw6_opt *raw_opt = raw6_sk(sk); + struct raw6_sock *rp = raw6_sk(sk); struct ipv6_txoptions *opt = NULL; struct ip6_flowlabel *flowlabel = NULL; struct dst_entry *dst = NULL; @@ -771,7 +772,7 @@ back_from_confirm: if (err) ip6_flush_pending_frames(sk); else if (!(msg->msg_flags & MSG_MORE)) - err = rawv6_push_pending_frames(sk, &fl, raw_opt, len); + err = rawv6_push_pending_frames(sk, &fl, rp, len); } done: ip6_dst_store(sk, dst, @@ -838,7 +839,7 @@ static int rawv6_geticmpfilter(struct sock *sk, int level, int optname, static int rawv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { - struct raw6_opt *opt = raw6_sk(sk); + struct raw6_sock *rp = raw6_sk(sk); int val; switch(level) { @@ -868,10 +869,10 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname, if (val > 0 && (val&1)) return(-EINVAL); if (val < 0) { - opt->checksum = 0; + rp->checksum = 0; } else { - opt->checksum = 1; - opt->offset = val; + rp->checksum = 1; + rp->offset = val; } return 0; @@ -885,7 +886,7 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname, static int rawv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { - struct raw6_opt *opt = raw6_sk(sk); + struct raw6_sock *rp = raw6_sk(sk); int val, len; switch(level) { @@ -910,10 +911,10 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname, switch (optname) { case IPV6_CHECKSUM: - if (opt->checksum == 0) + if (rp->checksum == 0) val = -1; else - val = opt->offset; + val = rp->offset; break; default: @@ -966,9 +967,9 @@ static void rawv6_close(struct sock *sk, long timeout) static int rawv6_init_sk(struct sock *sk) { if (inet_sk(sk)->num == IPPROTO_ICMPV6) { - struct raw6_opt *opt = raw6_sk(sk); - opt->checksum = 1; - opt->offset = 2; + struct raw6_sock *rp = raw6_sk(sk); + rp->checksum = 1; + rp->offset = 2; } return(0); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 316644b92cb3..06ba75fcfff4 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -167,6 +167,12 @@ static void ip6_dst_ifdown(struct dst_entry *dst, int how) } } +static __inline__ int rt6_check_expired(const struct rt6_info *rt) +{ + return (rt->rt6i_flags & RTF_EXPIRES && + time_after(jiffies, rt->rt6i_expires)); +} + /* * Route lookup. Any rt6_lock is implied. */ @@ -237,8 +243,7 @@ static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif) sprt->rt6i_dev->ifindex == oif)) m += 8; - if ((sprt->rt6i_flags & RTF_EXPIRES) && - time_after(jiffies, sprt->rt6i_expires)) + if (rt6_check_expired(sprt)) continue; if (sprt == rt6_dflt_pointer) @@ -296,7 +301,8 @@ static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif) for (sprt = rt6_dflt_pointer->u.next; sprt; sprt = sprt->u.next) { if (sprt->u.dst.obsolete <= 0 && - sprt->u.dst.error == 0) { + sprt->u.dst.error == 0 && + !rt6_check_expired(sprt)) { match = sprt; break; } @@ -305,7 +311,8 @@ static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif) !match && sprt; sprt = sprt->u.next) { if (sprt->u.dst.obsolete <= 0 && - sprt->u.dst.error == 0) { + sprt->u.dst.error == 0 && + !rt6_check_expired(sprt)) { match = sprt; break; } @@ -331,7 +338,8 @@ static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif) */ for (sprt = ip6_routing_table.leaf; sprt; sprt = sprt->u.next) { - if ((sprt->rt6i_flags & RTF_DEFAULT) && + if (!rt6_check_expired(sprt) && + (sprt->rt6i_flags & RTF_DEFAULT) && (!oif || (sprt->rt6i_dev && sprt->rt6i_dev->ifindex == oif))) { diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index 5fbd19f82c11..344be7e9ea40 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -80,6 +80,8 @@ static struct proto_ops ipx_dgram_ops; LIST_HEAD(ipx_interfaces); DEFINE_SPINLOCK(ipx_interfaces_lock); +static kmem_cache_t *ipx_sk_slab; + struct ipx_interface *ipx_primary_net; struct ipx_interface *ipx_internal_net; @@ -277,7 +279,7 @@ static struct sock *ipxitf_find_internal_socket(struct ipx_interface *intrfc, spin_lock_bh(&intrfc->if_sklist_lock); sk_for_each(s, node, &intrfc->if_sklist) { - struct ipx_opt *ipxs = ipx_sk(s); + struct ipx_sock *ipxs = ipx_sk(s); if (ipxs->port == port && !memcmp(ipx_node, ipxs->node, IPX_NODE_LEN)) @@ -302,7 +304,7 @@ static void __ipxitf_down(struct ipx_interface *intrfc) spin_lock_bh(&intrfc->if_sklist_lock); /* error sockets */ sk_for_each_safe(s, node, t, &intrfc->if_sklist) { - struct ipx_opt *ipxs = ipx_sk(s); + struct ipx_sock *ipxs = ipx_sk(s); s->sk_err = ENOLINK; s->sk_error_report(s); @@ -400,7 +402,7 @@ static int ipxitf_demux_socket(struct ipx_interface *intrfc, spin_lock_bh(&intrfc->if_sklist_lock); sk_for_each(s, node, &intrfc->if_sklist) { - struct ipx_opt *ipxs = ipx_sk(s); + struct ipx_sock *ipxs = ipx_sk(s); if (ipxs->port == ipx->ipx_dest.sock && (is_broadcast || !memcmp(ipx->ipx_dest.node, @@ -1348,32 +1350,21 @@ out: static int ipx_create(struct socket *sock, int protocol) { int rc = -ESOCKTNOSUPPORT; - struct ipx_opt *ipx = NULL; struct sock *sk; - switch (sock->type) { - case SOCK_DGRAM: - sk = sk_alloc(PF_IPX, GFP_KERNEL, 1, NULL); - rc = -ENOMEM; - if (!sk) - goto out; - ipx = sk->sk_protinfo = kmalloc(sizeof(*ipx), GFP_KERNEL); - if (!ipx) - goto outsk; - memset(ipx, 0, sizeof(*ipx)); - sock->ops = &ipx_dgram_ops; - break; - case SOCK_SEQPACKET: - /* - * SPX support is not anymore in the kernel sources. If - * you want to ressurrect it, completing it and making - * it understand shared skbs, be fully multithreaded, - * etc, grab the sources in an early 2.5 kernel tree. - */ - case SOCK_STREAM: /* Allow higher levels to piggyback */ - default: + /* + * SPX support is not anymore in the kernel sources. If you want to + * ressurrect it, completing it and making it understand shared skbs, + * be fully multithreaded, etc, grab the sources in an early 2.5 kernel + * tree. + */ + if (sock->type != SOCK_DGRAM) + goto out; + + sk = sk_alloc(PF_IPX, GFP_KERNEL, sizeof(struct ipx_sock), ipx_sk_slab); + rc = -ENOMEM; + if (!sk) goto out; - } #ifdef IPX_REFCNT_DEBUG atomic_inc(&ipx_sock_nr); printk(KERN_DEBUG "IPX socket %p created, now we have %d alive\n", sk, @@ -1382,12 +1373,10 @@ static int ipx_create(struct socket *sock, int protocol) sock_init_data(sock, sk); sk_set_owner(sk, THIS_MODULE); sk->sk_no_check = 1; /* Checksum off by default */ + sock->ops = &ipx_dgram_ops; rc = 0; out: return rc; -outsk: - sk_free(sk); - goto out; } static int ipx_release(struct socket *sock) @@ -1433,7 +1422,7 @@ static unsigned short ipx_first_free_socketnum(struct ipx_interface *intrfc) static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { struct sock *sk = sock->sk; - struct ipx_opt *ipxs = ipx_sk(sk); + struct ipx_sock *ipxs = ipx_sk(sk); struct ipx_interface *intrfc; struct sockaddr_ipx *addr = (struct sockaddr_ipx *)uaddr; int rc = -EINVAL; @@ -1529,7 +1518,7 @@ static int ipx_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { struct sock *sk = sock->sk; - struct ipx_opt *ipxs = ipx_sk(sk); + struct ipx_sock *ipxs = ipx_sk(sk); struct sockaddr_ipx *addr; int rc = -EINVAL; struct ipx_route *rt; @@ -1593,7 +1582,7 @@ static int ipx_getname(struct socket *sock, struct sockaddr *uaddr, struct ipx_address *addr; struct sockaddr_ipx sipx; struct sock *sk = sock->sk; - struct ipx_opt *ipxs = ipx_sk(sk); + struct ipx_sock *ipxs = ipx_sk(sk); int rc; *uaddr_len = sizeof(struct sockaddr_ipx); @@ -1693,7 +1682,7 @@ static int ipx_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; - struct ipx_opt *ipxs = ipx_sk(sk); + struct ipx_sock *ipxs = ipx_sk(sk); struct sockaddr_ipx *usipx = (struct sockaddr_ipx *)msg->msg_name; struct sockaddr_ipx local_sipx; int rc = -EINVAL; @@ -1758,7 +1747,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; - struct ipx_opt *ipxs = ipx_sk(sk); + struct ipx_sock *ipxs = ipx_sk(sk); struct sockaddr_ipx *sipx = (struct sockaddr_ipx *)msg->msg_name; struct ipxhdr *ipx = NULL; struct sk_buff *skb; @@ -1965,6 +1954,13 @@ static char ipx_snap_err_msg[] __initdata = static int __init ipx_init(void) { + ipx_sk_slab = kmem_cache_create("ipx_sock", + sizeof(struct ipx_sock), 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + + if (ipx_sk_slab == NULL) + return -ENOMEM; + sock_register(&ipx_family_ops); pEII_datalink = make_EII_client(); @@ -2016,6 +2012,11 @@ static void __exit ipx_proto_finito(void) destroy_EII_client(pEII_datalink); pEII_datalink = NULL; + if (ipx_sk_slab != NULL) { + kmem_cache_destroy(ipx_sk_slab); + ipx_sk_slab = NULL; + } + sock_unregister(ipx_family_ops.family); } diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c index 6b3cb469fc96..b6761913445a 100644 --- a/net/ipx/ipx_proc.c +++ b/net/ipx/ipx_proc.c @@ -202,7 +202,7 @@ static void *ipx_seq_socket_next(struct seq_file *seq, void *v, loff_t *pos) { struct sock* sk, *next; struct ipx_interface *i; - struct ipx_opt *ipxs; + struct ipx_sock *ipxs; ++*pos; if (v == SEQ_START_TOKEN) { @@ -243,7 +243,7 @@ out: static int ipx_seq_socket_show(struct seq_file *seq, void *v) { struct sock *s; - struct ipx_opt *ipxs; + struct ipx_sock *ipxs; if (v == SEQ_START_TOKEN) { #ifdef CONFIG_IPX_INTERN diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c index c85e682f7f66..67774448efd9 100644 --- a/net/ipx/ipx_route.c +++ b/net/ipx/ipx_route.c @@ -172,7 +172,7 @@ int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx, struct iovec *iov, size_t len, int noblock) { struct sk_buff *skb; - struct ipx_opt *ipxs = ipx_sk(sk); + struct ipx_sock *ipxs = ipx_sk(sk); struct ipx_interface *intrfc; struct ipxhdr *ipx; size_t size; diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 74764e224c85..2dab5095ea89 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -629,7 +629,6 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb, int nonblock, long t } return 1; } - skb_orphan(skb); skb_set_owner_r(skb, sk); return 0; } @@ -661,21 +660,28 @@ void netlink_detachskb(struct sock *sk, struct sk_buff *skb) sock_put(sk); } -static inline void netlink_trim(struct sk_buff *skb, int allocation) +static inline struct sk_buff *netlink_trim(struct sk_buff *skb, int allocation) { - int delta = skb->end - skb->tail; + int delta; - /* If the packet is charged to a socket, the modification - * of truesize below is illegal and will corrupt socket - * buffer accounting state. - */ - BUG_ON(skb->list != NULL); + skb_orphan(skb); + delta = skb->end - skb->tail; if (delta * 2 < skb->truesize) - return; - if (pskb_expand_head(skb, 0, -delta, allocation)) - return; - skb->truesize -= delta; + return skb; + + if (skb_shared(skb)) { + struct sk_buff *nskb = skb_clone(skb, allocation); + if (!nskb) + return skb; + kfree_skb(skb); + skb = nskb; + } + + if (!pskb_expand_head(skb, 0, -delta, allocation)) + skb->truesize -= delta; + + return skb; } int netlink_unicast(struct sock *ssk, struct sk_buff *skb, u32 pid, int nonblock) @@ -684,7 +690,7 @@ int netlink_unicast(struct sock *ssk, struct sk_buff *skb, u32 pid, int nonblock int err; long timeo; - netlink_trim(skb, gfp_any()); + skb = netlink_trim(skb, gfp_any()); timeo = sock_sndtimeo(ssk, nonblock); retry: @@ -707,14 +713,12 @@ static __inline__ int netlink_broadcast_deliver(struct sock *sk, struct sk_buff struct netlink_opt *nlk = nlk_sk(sk); #ifdef NL_EMULATE_DEV if (nlk->handler) { - skb_orphan(skb); nlk->handler(sk->sk_protocol, skb); return 0; } else #endif if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && !test_bit(0, &nlk->state)) { - skb_orphan(skb); skb_set_owner_r(skb, sk); skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk, skb->len); @@ -784,6 +788,8 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, struct hlist_node *node; struct sock *sk; + skb = netlink_trim(skb, allocation); + info.exclude_sk = ssk; info.pid = pid; info.group = group; @@ -794,8 +800,6 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, info.skb = skb; info.skb2 = NULL; - netlink_trim(skb, allocation); - /* While we sleep in clone, do not allow to change socket list */ netlink_lock_table(); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index a780c51defea..fe85d5588b46 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -239,8 +239,10 @@ replay: * replay the request. We indicate this using * -EAGAIN. */ - if (tp_ops != NULL) + if (tp_ops != NULL) { + module_put(tp_ops->owner); err = -EAGAIN; + } } #endif kfree(tp); @@ -486,24 +488,26 @@ tcf_exts_validate(struct tcf_proto *tp, struct rtattr **tb, memset(exts, 0, sizeof(*exts)); #ifdef CONFIG_NET_CLS_ACT - int err; - struct tc_action *act; + { + int err; + struct tc_action *act; - if (map->police && tb[map->police-1]) { - act = tcf_action_init_1(tb[map->police-1], rate_tlv, "police", - TCA_ACT_NOREPLACE, TCA_ACT_BIND, &err); - if (act == NULL) - return err; - - act->type = TCA_OLD_COMPAT; - exts->action = act; - } else if (map->action && tb[map->action-1]) { - act = tcf_action_init(tb[map->action-1], rate_tlv, NULL, - TCA_ACT_NOREPLACE, TCA_ACT_BIND, &err); - if (act == NULL) - return err; - - exts->action = act; + if (map->police && tb[map->police-1]) { + act = tcf_action_init_1(tb[map->police-1], rate_tlv, "police", + TCA_ACT_NOREPLACE, TCA_ACT_BIND, &err); + if (act == NULL) + return err; + + act->type = TCA_OLD_COMPAT; + exts->action = act; + } else if (map->action && tb[map->action-1]) { + act = tcf_action_init(tb[map->action-1], rate_tlv, NULL, + TCA_ACT_NOREPLACE, TCA_ACT_BIND, &err); + if (act == NULL) + return err; + + exts->action = act; + } } #elif defined CONFIG_NET_CLS_POLICE if (map->police && tb[map->police-1]) { diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index d057789645ce..02996ac05c75 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -125,20 +125,20 @@ static __inline__ int route4_hash_wild(void) return 32; } -#define ROUTE4_APPLY_RESULT() \ - do { \ - *res = f->res; \ - if (tcf_exts_is_available(&f->exts)) { \ - int r = tcf_exts_exec(skb, &f->exts, res); \ - if (r < 0) { \ - dont_cache = 1; \ - continue; \ - } \ - return r; \ - } else if (!dont_cache) \ - route4_set_fastmap(head, id, iif, f); \ - return 0; \ - } while(0) +#define ROUTE4_APPLY_RESULT() \ +{ \ + *res = f->res; \ + if (tcf_exts_is_available(&f->exts)) { \ + int r = tcf_exts_exec(skb, &f->exts, res); \ + if (r < 0) { \ + dont_cache = 1; \ + continue; \ + } \ + return r; \ + } else if (!dont_cache) \ + route4_set_fastmap(head, id, iif, f); \ + return 0; \ +} static int route4_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_result *res) @@ -384,9 +384,9 @@ static int route4_set_parms(struct tcf_proto *tp, unsigned long base, id = *(u32*)RTA_DATA(tb[TCA_ROUTE4_IIF-1]); if (id > 0x7FFF) goto errout; - nhandle = (id | 0x8000) << 16; + nhandle |= (id | 0x8000) << 16; } else - nhandle = 0xFFFF << 16; + nhandle |= 0xFFFF << 16; if (handle && new) { nhandle |= handle & 0x7F00; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 7b74ad5ccd08..232fb9196810 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -123,14 +123,14 @@ static struct tcf_ext_map rsvp_ext_map = { .action = TCA_RSVP_ACT }; -#define RSVP_APPLY_RESULT() \ - do { \ - int r = tcf_exts_exec(skb, &f->exts, res); \ - if (r < 0) \ - continue; \ - else if (r > 0) \ - return r; \ - } while(0) +#define RSVP_APPLY_RESULT() \ +{ \ + int r = tcf_exts_exec(skb, &f->exts, res); \ + if (r < 0) \ + continue; \ + else if (r > 0) \ + return r; \ +} static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_result *res) diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index b4c1c1f15dc2..d43e3b8cbf6a 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -241,7 +241,7 @@ cbq_reclassify(struct sk_buff *skb, struct cbq_class *this) */ static struct cbq_class * -cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qres) +cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) { struct cbq_sched_data *q = qdisc_priv(sch); struct cbq_class *head = &q->link; @@ -255,13 +255,11 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qres) */ if (TC_H_MAJ(prio^sch->handle) == 0 && (cl = cbq_class_lookup(q, prio)) != NULL) - return cl; + return cl; + *qerr = NET_XMIT_DROP; for (;;) { int result = 0; -#ifdef CONFIG_NET_CLS_ACT - int terminal = 0; -#endif defmap = head->defaults; /* @@ -282,27 +280,13 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qres) #ifdef CONFIG_NET_CLS_ACT switch (result) { - case TC_ACT_SHOT: /* Stop and kfree */ - *qres = NET_XMIT_DROP; - terminal = 1; - break; case TC_ACT_QUEUED: case TC_ACT_STOLEN: - terminal = 1; - break; - case TC_ACT_RECLASSIFY: /* Things look good */ - case TC_ACT_OK: - case TC_ACT_UNSPEC: - default: - break; - } - - if (terminal) { - kfree_skb(skb); + *qerr = NET_XMIT_SUCCESS; + case TC_ACT_SHOT: return NULL; } -#else -#ifdef CONFIG_NET_CLS_POLICE +#elif defined(CONFIG_NET_CLS_POLICE) switch (result) { case TC_POLICE_RECLASSIFY: return cbq_reclassify(skb, cl); @@ -312,7 +296,6 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qres) break; } #endif -#endif if (cl->level == 0) return cl; @@ -423,45 +406,35 @@ cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct cbq_sched_data *q = qdisc_priv(sch); int len = skb->len; - int ret = NET_XMIT_SUCCESS; - struct cbq_class *cl = cbq_classify(skb, sch,&ret); + int ret; + struct cbq_class *cl = cbq_classify(skb, sch, &ret); #ifdef CONFIG_NET_CLS_POLICE q->rx_class = cl; #endif - if (cl) { -#ifdef CONFIG_NET_CLS_POLICE - cl->q->__parent = sch; -#endif - if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) { - sch->q.qlen++; - sch->bstats.packets++; - sch->bstats.bytes+=len; - cbq_mark_toplevel(q, cl); - if (!cl->next_alive) - cbq_activate_class(cl); - return ret; - } - } - -#ifndef CONFIG_NET_CLS_ACT - sch->qstats.drops++; - if (cl == NULL) + if (cl == NULL) { + if (ret == NET_XMIT_DROP) + sch->qstats.drops++; kfree_skb(skb); - else { - cbq_mark_toplevel(q, cl); - cl->qstats.drops++; - } -#else - if ( NET_XMIT_DROP == ret) { - sch->qstats.drops++; + return ret; } - if (cl != NULL) { +#ifdef CONFIG_NET_CLS_POLICE + cl->q->__parent = sch; +#endif + if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) { + sch->q.qlen++; + sch->bstats.packets++; + sch->bstats.bytes+=len; cbq_mark_toplevel(q, cl); - cl->qstats.drops++; + if (!cl->next_alive) + cbq_activate_class(cl); + return ret; } -#endif + + sch->qstats.drops++; + cbq_mark_toplevel(q, cl); + cl->qstats.drops++; return ret; } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 8c01e023f02e..e9f7dba9a5a4 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -99,17 +99,11 @@ int qdisc_restart(struct net_device *dev) if ((skb = q->dequeue(q)) != NULL) { unsigned nolock = (dev->features & NETIF_F_LLTX); /* - * When the driver has LLTX set it does its own locking - * in start_xmit. No need to add additional overhead by - * locking again. These checks are worth it because - * even uncongested locks can be quite expensive. - * The driver can do trylock like here too, in case - * of lock congestion it should return -1 and the packet - * will be requeued. + * When the driver has LLTX set it does not require any + * locking in start_xmit. */ if (!nolock) { - if (!spin_trylock(&dev->xmit_lock)) { - collision: + if (!spin_trylock_irq(&dev->xmit_lock)) { /* So, someone grabbed the driver. */ /* It may be transient configuration error, @@ -143,22 +137,18 @@ int qdisc_restart(struct net_device *dev) if (ret == NETDEV_TX_OK) { if (!nolock) { dev->xmit_lock_owner = -1; - spin_unlock(&dev->xmit_lock); + spin_unlock_irq(&dev->xmit_lock); } spin_lock(&dev->queue_lock); return -1; } - if (ret == NETDEV_TX_LOCKED && nolock) { - spin_lock(&dev->queue_lock); - goto collision; - } } /* NETDEV_TX_BUSY - we need to requeue */ /* Release the driver */ if (!nolock) { dev->xmit_lock_owner = -1; - spin_unlock(&dev->xmit_lock); + spin_unlock_irq(&dev->xmit_lock); } spin_lock(&dev->queue_lock); q = dev->qdisc; @@ -186,7 +176,7 @@ static void dev_watchdog(unsigned long arg) { struct net_device *dev = (struct net_device *)arg; - spin_lock(&dev->xmit_lock); + spin_lock_irq(&dev->xmit_lock); if (dev->qdisc != &noop_qdisc) { if (netif_device_present(dev) && netif_running(dev) && @@ -200,7 +190,7 @@ static void dev_watchdog(unsigned long arg) dev_hold(dev); } } - spin_unlock(&dev->xmit_lock); + spin_unlock_irq(&dev->xmit_lock); dev_put(dev); } @@ -224,17 +214,17 @@ void __netdev_watchdog_up(struct net_device *dev) static void dev_watchdog_up(struct net_device *dev) { - spin_lock_bh(&dev->xmit_lock); + spin_lock_irq(&dev->xmit_lock); __netdev_watchdog_up(dev); - spin_unlock_bh(&dev->xmit_lock); + spin_unlock_irq(&dev->xmit_lock); } static void dev_watchdog_down(struct net_device *dev) { - spin_lock_bh(&dev->xmit_lock); + spin_lock_irq(&dev->xmit_lock); if (del_timer(&dev->watchdog_timer)) __dev_put(dev); - spin_unlock_bh(&dev->xmit_lock); + spin_unlock_irq(&dev->xmit_lock); } /* "NOOP" scheduler: the best scheduler, recommended for all interfaces diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index d09e0b0cb5f9..c26764bc4103 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1214,7 +1214,7 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg) } static struct hfsc_class * -hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qres) +hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) { struct hfsc_sched *q = qdisc_priv(sch); struct hfsc_class *cl; @@ -1227,36 +1227,21 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qres) if (cl->level == 0) return cl; + *qerr = NET_XMIT_DROP; tcf = q->root.filter_list; while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) { #ifdef CONFIG_NET_CLS_ACT - int terminal = 0; switch (result) { - case TC_ACT_SHOT: - *qres = NET_XMIT_DROP; - terminal = 1; - break; case TC_ACT_QUEUED: case TC_ACT_STOLEN: - terminal = 1; - break; - case TC_ACT_RECLASSIFY: - case TC_ACT_OK: - case TC_ACT_UNSPEC: - default: - break; - } - - if (terminal) { - kfree_skb(skb); + *qerr = NET_XMIT_SUCCESS; + case TC_ACT_SHOT: return NULL; } -#else -#ifdef CONFIG_NET_CLS_POLICE +#elif defined(CONFIG_NET_CLS_POLICE) if (result == TC_POLICE_SHOT) return NULL; #endif -#endif if ((cl = (struct hfsc_class *)res.class) == NULL) { if ((cl = hfsc_find_class(res.classid, sch)) == NULL) break; /* filter selected invalid classid */ @@ -1652,27 +1637,19 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb) static int hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch) { - int ret = NET_XMIT_SUCCESS; - struct hfsc_class *cl = hfsc_classify(skb, sch, &ret); - unsigned int len = skb->len; + struct hfsc_class *cl; + unsigned int len; int err; - -#ifdef CONFIG_NET_CLS_ACT + cl = hfsc_classify(skb, sch, &err); if (cl == NULL) { - if (NET_XMIT_DROP == ret) { + if (err == NET_XMIT_DROP) sch->qstats.drops++; - } - return ret; - } -#else - if (cl == NULL) { kfree_skb(skb); - sch->qstats.drops++; - return NET_XMIT_DROP; + return err; } -#endif + len = skb->len; err = cl->qdisc->enqueue(skb, cl->qdisc); if (unlikely(err != NET_XMIT_SUCCESS)) { cl->qstats.drops++; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 116028554ae4..a85935e7d53d 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -305,7 +305,7 @@ static inline u32 htb_classid(struct htb_class *cl) return (cl && cl != HTB_DIRECT) ? cl->classid : TC_H_UNSPEC; } -static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, int *qres) +static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) { struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl; @@ -321,35 +321,20 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, in if ((cl = htb_find(skb->priority,sch)) != NULL && cl->level == 0) return cl; + *qerr = NET_XMIT_DROP; tcf = q->filter_list; while (tcf && (result = tc_classify(skb, tcf, &res)) >= 0) { #ifdef CONFIG_NET_CLS_ACT - int terminal = 0; switch (result) { - case TC_ACT_SHOT: /* Stop and kfree */ - *qres = NET_XMIT_DROP; - terminal = 1; - break; case TC_ACT_QUEUED: case TC_ACT_STOLEN: - terminal = 1; - break; - case TC_ACT_RECLASSIFY: /* Things look good */ - case TC_ACT_OK: - case TC_ACT_UNSPEC: - default: - break; - } - - if (terminal) { - kfree_skb(skb); + *qerr = NET_XMIT_SUCCESS; + case TC_ACT_SHOT: return NULL; } -#else -#ifdef CONFIG_NET_CLS_POLICE +#elif defined(CONFIG_NET_CLS_POLICE) if (result == TC_POLICE_SHOT) - return NULL; -#endif + return HTB_DIRECT; #endif if ((cl = (void*)res.class) == NULL) { if (res.classid == sch->handle) @@ -723,37 +708,24 @@ htb_deactivate(struct htb_sched *q,struct htb_class *cl) static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) { - int ret = NET_XMIT_SUCCESS; + int ret; struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = htb_classify(skb,sch,&ret); - -#ifdef CONFIG_NET_CLS_ACT - if (cl == HTB_DIRECT ) { - if (q->direct_queue.qlen < q->direct_qlen ) { - __skb_queue_tail(&q->direct_queue, skb); - q->direct_pkts++; - } - } else if (!cl) { - if (NET_XMIT_DROP == ret) { - sch->qstats.drops++; - } - return ret; - } -#else - if (cl == HTB_DIRECT || !cl) { + if (cl == HTB_DIRECT) { /* enqueue to helper queue */ - if (q->direct_queue.qlen < q->direct_qlen && cl) { + if (q->direct_queue.qlen < q->direct_qlen) { __skb_queue_tail(&q->direct_queue, skb); q->direct_pkts++; - } else { - kfree_skb (skb); - sch->qstats.drops++; - return NET_XMIT_DROP; } - } +#ifdef CONFIG_NET_CLS_ACT + } else if (!cl) { + if (ret == NET_XMIT_DROP) + sch->qstats.drops++; + kfree_skb (skb); + return ret; #endif - else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) { + } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) { sch->qstats.drops++; cl->qstats.drops++; return NET_XMIT_DROP; diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 2f06270cad4b..3ac0f495bad0 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -47,37 +47,23 @@ struct prio_sched_data }; -static struct Qdisc *prio_classify(struct sk_buff *skb, - struct Qdisc *sch, int *r) +static struct Qdisc * +prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) { struct prio_sched_data *q = qdisc_priv(sch); u32 band = skb->priority; struct tcf_result res; + *qerr = NET_XMIT_DROP; if (TC_H_MAJ(skb->priority) != sch->handle) { #ifdef CONFIG_NET_CLS_ACT - int result = 0, terminal = 0; - result = tc_classify(skb, q->filter_list, &res); - - switch (result) { - case TC_ACT_SHOT: - *r = NET_XMIT_DROP; - terminal = 1; - break; - case TC_ACT_STOLEN: - case TC_ACT_QUEUED: - terminal = 1; - break; - case TC_ACT_RECLASSIFY: - case TC_ACT_OK: - case TC_ACT_UNSPEC: - default: - break; - }; - if (terminal) { - kfree_skb(skb); + switch (tc_classify(skb, q->filter_list, &res)) { + case TC_ACT_STOLEN: + case TC_ACT_QUEUED: + *qerr = NET_XMIT_SUCCESS; + case TC_ACT_SHOT: return NULL; - } + }; if (!q->filter_list ) { #else @@ -97,15 +83,20 @@ static struct Qdisc *prio_classify(struct sk_buff *skb, } static int -prio_enqueue(struct sk_buff *skb, struct Qdisc* sch) +prio_enqueue(struct sk_buff *skb, struct Qdisc *sch) { struct Qdisc *qdisc; - int ret = NET_XMIT_SUCCESS; + int ret; qdisc = prio_classify(skb, sch, &ret); - - if (NULL == qdisc) - goto dropped; +#ifdef CONFIG_NET_CLS_ACT + if (qdisc == NULL) { + if (ret == NET_XMIT_DROP) + sch->qstats.drops++; + kfree_skb(skb); + return ret; + } +#endif if ((ret = qdisc->enqueue(skb, qdisc)) == NET_XMIT_SUCCESS) { sch->bstats.bytes += skb->len; @@ -113,17 +104,7 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc* sch) sch->q.qlen++; return NET_XMIT_SUCCESS; } - -dropped: -#ifdef CONFIG_NET_CLS_ACT - if (NET_XMIT_DROP == ret) { -#endif - sch->qstats.drops++; -#ifdef CONFIG_NET_CLS_ACT - } else { - sch->qstats.overlimits++; /* abuse, but noone uses it */ - } -#endif + sch->qstats.drops++; return ret; } @@ -132,18 +113,23 @@ static int prio_requeue(struct sk_buff *skb, struct Qdisc* sch) { struct Qdisc *qdisc; - int ret = NET_XMIT_DROP; + int ret; qdisc = prio_classify(skb, sch, &ret); - if (qdisc == NULL) - goto dropped; +#ifdef CONFIG_NET_CLS_ACT + if (qdisc == NULL) { + if (ret == NET_XMIT_DROP) + sch->qstats.drops++; + kfree_skb(skb); + return ret; + } +#endif - if ((ret = qdisc->ops->requeue(skb, qdisc)) == 0) { + if ((ret = qdisc->ops->requeue(skb, qdisc)) == NET_XMIT_SUCCESS) { sch->q.qlen++; sch->qstats.requeues++; return 0; } -dropped: sch->qstats.drops++; return NET_XMIT_DROP; } diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index 6cf0342706b5..b88cbf028397 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -301,12 +301,12 @@ restart: switch (teql_resolve(skb, skb_res, slave)) { case 0: - if (spin_trylock(&slave->xmit_lock)) { + if (spin_trylock_irq(&slave->xmit_lock)) { slave->xmit_lock_owner = smp_processor_id(); if (!netif_queue_stopped(slave) && slave->hard_start_xmit(skb, slave) == 0) { slave->xmit_lock_owner = -1; - spin_unlock(&slave->xmit_lock); + spin_unlock_irq(&slave->xmit_lock); master->slaves = NEXT_SLAVE(q); netif_wake_queue(dev); master->stats.tx_packets++; @@ -314,7 +314,7 @@ restart: return 0; } slave->xmit_lock_owner = -1; - spin_unlock(&slave->xmit_lock); + spin_unlock_irq(&slave->xmit_lock); } if (netif_queue_stopped(dev)) busy = 1; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index fda3bc435c7c..663843d97a92 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -73,7 +73,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a sctp_scope_t scope, int gfp) { - struct sctp_opt *sp; + struct sctp_sock *sp; int i; /* Retrieve the SCTP per socket area. */ @@ -434,7 +434,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, int gfp) { struct sctp_transport *peer; - struct sctp_opt *sp; + struct sctp_sock *sp; unsigned short port; sp = sctp_sk(asoc->base.sk); @@ -886,7 +886,7 @@ static void sctp_assoc_bh_rcv(struct sctp_association *asoc) /* This routine moves an association from its old sk to a new sk. */ void sctp_assoc_migrate(struct sctp_association *assoc, struct sock *newsk) { - struct sctp_opt *newsp = sctp_sk(newsk); + struct sctp_sock *newsp = sctp_sk(newsk); struct sock *oldsk = assoc->base.sk; /* Delete the association from the old endpoint's list of @@ -1059,7 +1059,7 @@ void sctp_assoc_sync_pmtu(struct sctp_association *asoc) } if (pmtu) { - struct sctp_opt *sp = sctp_sk(asoc->base.sk); + struct sctp_sock *sp = sctp_sk(asoc->base.sk); asoc->pmtu = pmtu; asoc->frag_point = sctp_frag_point(sp, pmtu); } diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index ec2d7450fb18..f90eadfb60a2 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -293,7 +293,7 @@ int sctp_raw_to_bind_addrs(struct sctp_bind_addr *bp, __u8 *raw_addr_list, /* Does this contain a specified address? Allow wildcarding. */ int sctp_bind_addr_match(struct sctp_bind_addr *bp, const union sctp_addr *addr, - struct sctp_opt *opt) + struct sctp_sock *opt) { struct sctp_sockaddr_entry *laddr; struct list_head *pos; @@ -313,7 +313,7 @@ int sctp_bind_addr_match(struct sctp_bind_addr *bp, union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp, const union sctp_addr *addrs, int addrcnt, - struct sctp_opt *opt) + struct sctp_sock *opt) { struct sctp_sockaddr_entry *laddr; union sctp_addr *addr; diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 6e500f8ca47f..0c2ab7885058 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -77,7 +77,7 @@ static void sctp_datamsg_destroy(struct sctp_datamsg *msg) { struct list_head *pos, *temp; struct sctp_chunk *chunk; - struct sctp_opt *sp; + struct sctp_sock *sp; struct sctp_ulpevent *ev; struct sctp_association *asoc = NULL; int error = 0, notify; diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index cfeb033841dc..b4631b3001a3 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -69,7 +69,7 @@ static void sctp_endpoint_bh_rcv(struct sctp_endpoint *ep); static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, struct sock *sk, int gfp) { - struct sctp_opt *sp = sctp_sk(sk); + struct sctp_sock *sp = sctp_sk(sk); memset(ep, 0, sizeof(struct sctp_endpoint)); /* Initialize the base structure. */ diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index f23de6bd5b52..efe44d19d2c4 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -502,7 +502,7 @@ static int sctp_v6_is_any(const union sctp_addr *addr) } /* Should this be available for binding? */ -static int sctp_v6_available(union sctp_addr *addr, struct sctp_opt *sp) +static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp) { int type; struct in6_addr *in6 = (struct in6_addr *)&addr->v6.sin6_addr; @@ -531,14 +531,14 @@ static int sctp_v6_available(union sctp_addr *addr, struct sctp_opt *sp) * Return 0 - If the address is a non-unicast or an illegal address. * Return 1 - If the address is a unicast. */ -static int sctp_v6_addr_valid(union sctp_addr *addr, struct sctp_opt *sp) +static int sctp_v6_addr_valid(union sctp_addr *addr, struct sctp_sock *sp) { int ret = ipv6_addr_type(&addr->v6.sin6_addr); /* Support v4-mapped-v6 address. */ if (ret == IPV6_ADDR_MAPPED) { /* Note: This routine is used in input, so v4-mapped-v6 - * are disallowed here when there is no sctp_opt. + * are disallowed here when there is no sctp_sock. */ if (!sp || !sp->v4mapped) return 0; @@ -616,7 +616,7 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk, newsk->sk_shutdown = sk->sk_shutdown; newsctp6sk = (struct sctp6_sock *)newsk; - newsctp6sk->inet.pinet6 = &newsctp6sk->inet6; + inet_sk(newsk)->pinet6 = &newsctp6sk->inet6; newinet = inet_sk(newsk); newnp = inet6_sk(newsk); @@ -661,7 +661,7 @@ out: } /* Map v4 address to mapped v6 address */ -static void sctp_v6_addr_v4map(struct sctp_opt *sp, union sctp_addr *addr) +static void sctp_v6_addr_v4map(struct sctp_sock *sp, union sctp_addr *addr) { if (sp->v4mapped && AF_INET == addr->sa.sa_family) sctp_v4_map_v6(addr); @@ -766,7 +766,7 @@ static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname, } /* Do we support this AF? */ -static int sctp_inet6_af_supported(sa_family_t family, struct sctp_opt *sp) +static int sctp_inet6_af_supported(sa_family_t family, struct sctp_sock *sp) { switch (family) { case AF_INET6: @@ -786,7 +786,7 @@ static int sctp_inet6_af_supported(sa_family_t family, struct sctp_opt *sp) */ static int sctp_inet6_cmp_addr(const union sctp_addr *addr1, const union sctp_addr *addr2, - struct sctp_opt *opt) + struct sctp_sock *opt) { struct sctp_af *af1, *af2; @@ -808,7 +808,7 @@ static int sctp_inet6_cmp_addr(const union sctp_addr *addr1, /* Verify that the provided sockaddr looks bindable. Common verification, * has already been taken care of. */ -static int sctp_inet6_bind_verify(struct sctp_opt *opt, union sctp_addr *addr) +static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr) { struct sctp_af *af; @@ -838,7 +838,7 @@ static int sctp_inet6_bind_verify(struct sctp_opt *opt, union sctp_addr *addr) /* Verify that the provided sockaddr looks bindable. Common verification, * has already been taken care of. */ -static int sctp_inet6_send_verify(struct sctp_opt *opt, union sctp_addr *addr) +static int sctp_inet6_send_verify(struct sctp_sock *opt, union sctp_addr *addr) { struct sctp_af *af = NULL; @@ -872,7 +872,7 @@ static int sctp_inet6_send_verify(struct sctp_opt *opt, union sctp_addr *addr) * addresses. * Returns number of addresses supported. */ -static int sctp_inet6_supported_addrs(const struct sctp_opt *opt, +static int sctp_inet6_supported_addrs(const struct sctp_sock *opt, __u16 *types) { types[0] = SCTP_PARAM_IPV4_ADDRESS; diff --git a/net/sctp/output.c b/net/sctp/output.c index 14009b01c433..9013f64f5219 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -110,7 +110,7 @@ struct sctp_packet *sctp_packet_init(struct sctp_packet *packet, packet->destination_port = dport; skb_queue_head_init(&packet->chunks); if (asoc) { - struct sctp_opt *sp = sctp_sk(asoc->base.sk); + struct sctp_sock *sp = sctp_sk(asoc->base.sk); overhead = sp->pf->af->net_header_len; } else { overhead = sizeof(struct ipv6hdr); @@ -534,7 +534,7 @@ static sctp_xmit_t sctp_packet_append_data(struct sctp_packet *packet, struct sctp_transport *transport = packet->transport; __u32 max_burst_bytes; struct sctp_association *asoc = transport->asoc; - struct sctp_opt *sp = sctp_sk(asoc->base.sk); + struct sctp_sock *sp = sctp_sk(asoc->base.sk); struct sctp_outq *q = &asoc->outqueue; /* RFC 2960 6.1 Transmission of DATA Chunks diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 817f14f97a4f..c1ee92a662b7 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -364,7 +364,7 @@ static int sctp_v4_is_any(const union sctp_addr *addr) * Return 0 - If the address is a non-unicast or an illegal address. * Return 1 - If the address is a unicast. */ -static int sctp_v4_addr_valid(union sctp_addr *addr, struct sctp_opt *sp) +static int sctp_v4_addr_valid(union sctp_addr *addr, struct sctp_sock *sp) { /* Is this a non-unicast address or a unusable SCTP address? */ if (IS_IPV4_UNUSABLE_ADDRESS(&addr->v4.sin_addr.s_addr)) @@ -374,7 +374,7 @@ static int sctp_v4_addr_valid(union sctp_addr *addr, struct sctp_opt *sp) } /* Should this be available for binding? */ -static int sctp_v4_available(union sctp_addr *addr, struct sctp_opt *sp) +static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp) { int ret = inet_addr_type(addr->v4.sin_addr.s_addr); @@ -608,7 +608,7 @@ out: } /* Map address, empty for v4 family */ -static void sctp_v4_addr_v4map(struct sctp_opt *sp, union sctp_addr *addr) +static void sctp_v4_addr_v4map(struct sctp_sock *sp, union sctp_addr *addr) { /* Empty */ } @@ -745,7 +745,7 @@ static void sctp_inet_skb_msgname(struct sk_buff *skb, char *msgname, int *len) } /* Do we support this AF? */ -static int sctp_inet_af_supported(sa_family_t family, struct sctp_opt *sp) +static int sctp_inet_af_supported(sa_family_t family, struct sctp_sock *sp) { /* PF_INET only supports AF_INET addresses. */ return (AF_INET == family); @@ -754,7 +754,7 @@ static int sctp_inet_af_supported(sa_family_t family, struct sctp_opt *sp) /* Address matching with wildcards allowed. */ static int sctp_inet_cmp_addr(const union sctp_addr *addr1, const union sctp_addr *addr2, - struct sctp_opt *opt) + struct sctp_sock *opt) { /* PF_INET only supports AF_INET addresses. */ if (addr1->sa.sa_family != addr2->sa.sa_family) @@ -771,7 +771,7 @@ static int sctp_inet_cmp_addr(const union sctp_addr *addr1, /* Verify that provided sockaddr looks bindable. Common verification has * already been taken care of. */ -static int sctp_inet_bind_verify(struct sctp_opt *opt, union sctp_addr *addr) +static int sctp_inet_bind_verify(struct sctp_sock *opt, union sctp_addr *addr) { return sctp_v4_available(addr, opt); } @@ -779,7 +779,7 @@ static int sctp_inet_bind_verify(struct sctp_opt *opt, union sctp_addr *addr) /* Verify that sockaddr looks sendable. Common verification has already * been taken care of. */ -static int sctp_inet_send_verify(struct sctp_opt *opt, union sctp_addr *addr) +static int sctp_inet_send_verify(struct sctp_sock *opt, union sctp_addr *addr) { return 1; } @@ -787,7 +787,7 @@ static int sctp_inet_send_verify(struct sctp_opt *opt, union sctp_addr *addr) /* Fill in Supported Address Type information for INIT and INIT-ACK * chunks. Returns number of addresses supported. */ -static int sctp_inet_supported_addrs(const struct sctp_opt *opt, +static int sctp_inet_supported_addrs(const struct sctp_sock *opt, __u16 *types) { types[0] = SCTP_PARAM_IPV4_ADDRESS; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 7a0c6fa4c33c..1db12cc18cf7 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -181,7 +181,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, size_t chunksize; struct sctp_chunk *retval = NULL; int num_types, addrs_len = 0; - struct sctp_opt *sp; + struct sctp_sock *sp; sctp_supported_addrs_param_t sat; __u16 types[2]; sctp_adaption_ind_param_t aiparam; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 30b921b90d00..6f66ee490784 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -93,7 +93,7 @@ static int sctp_wait_for_packet(struct sock * sk, int *err, long *timeo_p); static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p); static int sctp_wait_for_accept(struct sock *sk, long timeo); static void sctp_wait_for_close(struct sock *sk, long timeo); -static struct sctp_af *sctp_sockaddr_af(struct sctp_opt *opt, +static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt, union sctp_addr *addr, int len); static int sctp_bindx_add(struct sock *, struct sockaddr *, int); static int sctp_bindx_rem(struct sock *, struct sockaddr *, int); @@ -269,7 +269,7 @@ SCTP_STATIC int sctp_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) static long sctp_get_port_local(struct sock *, union sctp_addr *); /* Verify this is a valid sockaddr. */ -static struct sctp_af *sctp_sockaddr_af(struct sctp_opt *opt, +static struct sctp_af *sctp_sockaddr_af(struct sctp_sock *opt, union sctp_addr *addr, int len) { struct sctp_af *af; @@ -294,7 +294,7 @@ static struct sctp_af *sctp_sockaddr_af(struct sctp_opt *opt, /* Bind a local address either to an endpoint or to an association. */ SCTP_STATIC int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len) { - struct sctp_opt *sp = sctp_sk(sk); + struct sctp_sock *sp = sctp_sk(sk); struct sctp_endpoint *ep = sp->ep; struct sctp_bind_addr *bp = &ep->base.bind_addr; struct sctp_af *af; @@ -467,7 +467,7 @@ static int sctp_send_asconf_add_ip(struct sock *sk, struct sockaddr *addrs, int addrcnt) { - struct sctp_opt *sp; + struct sctp_sock *sp; struct sctp_endpoint *ep; struct sctp_association *asoc; struct sctp_bind_addr *bp; @@ -572,7 +572,7 @@ out: */ int sctp_bindx_rem(struct sock *sk, struct sockaddr *addrs, int addrcnt) { - struct sctp_opt *sp = sctp_sk(sk); + struct sctp_sock *sp = sctp_sk(sk); struct sctp_endpoint *ep = sp->ep; int cnt; struct sctp_bind_addr *bp = &ep->base.bind_addr; @@ -656,7 +656,7 @@ static int sctp_send_asconf_del_ip(struct sock *sk, struct sockaddr *addrs, int addrcnt) { - struct sctp_opt *sp; + struct sctp_sock *sp; struct sctp_endpoint *ep; struct sctp_association *asoc; struct sctp_bind_addr *bp; @@ -1051,7 +1051,7 @@ SCTP_STATIC int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *); SCTP_STATIC int sctp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t msg_len) { - struct sctp_opt *sp; + struct sctp_sock *sp; struct sctp_endpoint *ep; struct sctp_association *new_asoc=NULL, *asoc=NULL; struct sctp_transport *transport, *chunk_tp; @@ -1492,7 +1492,7 @@ SCTP_STATIC int sctp_recvmsg(struct kiocb *iocb, struct sock *sk, int flags, int *addr_len) { struct sctp_ulpevent *event = NULL; - struct sctp_opt *sp = sctp_sk(sk); + struct sctp_sock *sp = sctp_sk(sk); struct sk_buff *skb; int copied; int err = 0; @@ -1637,7 +1637,7 @@ static int sctp_setsockopt_events(struct sock *sk, char __user *optval, static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval, int optlen) { - struct sctp_opt *sp = sctp_sk(sk); + struct sctp_sock *sp = sctp_sk(sk); /* Applicable to UDP-style socket only */ if (sctp_style(sk, TCP)) @@ -1779,7 +1779,7 @@ static int sctp_setsockopt_peer_addr_params(struct sock *sk, static int sctp_setsockopt_initmsg(struct sock *sk, char __user *optval, int optlen) { struct sctp_initmsg sinit; - struct sctp_opt *sp = sctp_sk(sk); + struct sctp_sock *sp = sctp_sk(sk); if (optlen != sizeof(struct sctp_initmsg)) return -EINVAL; @@ -1817,7 +1817,7 @@ static int sctp_setsockopt_default_send_param(struct sock *sk, { struct sctp_sndrcvinfo info; struct sctp_association *asoc; - struct sctp_opt *sp = sctp_sk(sk); + struct sctp_sock *sp = sctp_sk(sk); if (optlen != sizeof(struct sctp_sndrcvinfo)) return -EINVAL; @@ -1934,7 +1934,7 @@ static int sctp_setsockopt_rtoinfo(struct sock *sk, char __user *optval, int opt /* If there is no association or the association-id = 0 * set the values to the endpoint. */ - struct sctp_opt *sp = sctp_sk(sk); + struct sctp_sock *sp = sctp_sk(sk); if (rtoinfo.srto_initial != 0) sp->rtoinfo.srto_initial = rtoinfo.srto_initial; @@ -1987,7 +1987,7 @@ static int sctp_setsockopt_associnfo(struct sock *sk, char __user *optval, int o } } else { /* Set the values to the endpoint */ - struct sctp_opt *sp = sctp_sk(sk); + struct sctp_sock *sp = sctp_sk(sk); if (assocparams.sasoc_asocmaxrxt != 0) sp->assocparams.sasoc_asocmaxrxt = @@ -2012,7 +2012,7 @@ static int sctp_setsockopt_associnfo(struct sock *sk, char __user *optval, int o static int sctp_setsockopt_mappedv4(struct sock *sk, char __user *optval, int optlen) { int val; - struct sctp_opt *sp = sctp_sk(sk); + struct sctp_sock *sp = sctp_sk(sk); if (optlen < sizeof(int)) return -EINVAL; @@ -2040,7 +2040,7 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, int optl { struct sctp_association *asoc; struct list_head *pos; - struct sctp_opt *sp = sctp_sk(sk); + struct sctp_sock *sp = sctp_sk(sk); int val; if (optlen < sizeof(int)) @@ -2074,7 +2074,7 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, int optl static int sctp_setsockopt_peer_primary_addr(struct sock *sk, char __user *optval, int optlen) { - struct sctp_opt *sp; + struct sctp_sock *sp; struct sctp_endpoint *ep; struct sctp_association *asoc = NULL; struct sctp_setpeerprim prim; @@ -2269,7 +2269,7 @@ out_nounlock: SCTP_STATIC int sctp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { - struct sctp_opt *sp; + struct sctp_sock *sp; struct sctp_endpoint *ep; struct sctp_association *asoc; struct sctp_transport *transport; @@ -2390,7 +2390,7 @@ SCTP_STATIC int sctp_disconnect(struct sock *sk, int flags) */ SCTP_STATIC struct sock *sctp_accept(struct sock *sk, int flags, int *err) { - struct sctp_opt *sp; + struct sctp_sock *sp; struct sctp_endpoint *ep; struct sock *newsk = NULL; struct sctp_association *asoc; @@ -2453,7 +2453,7 @@ SCTP_STATIC int sctp_ioctl(struct sock *sk, int cmd, unsigned long arg) SCTP_STATIC int sctp_init_sock(struct sock *sk) { struct sctp_endpoint *ep; - struct sctp_opt *sp; + struct sctp_sock *sp; SCTP_DEBUG_PRINTK("sctp_init_sock(sk: %p)\n", sk); @@ -3007,7 +3007,7 @@ static int sctp_getsockopt_peer_addrs(struct sock *sk, int len, struct sctp_transport *from; void __user *to; union sctp_addr temp; - struct sctp_opt *sp = sctp_sk(sk); + struct sctp_sock *sp = sctp_sk(sk); int addrlen; if (len != sizeof(struct sctp_getaddrs)) @@ -3164,7 +3164,7 @@ static int sctp_getsockopt_local_addrs(struct sock *sk, int len, struct sctp_sockaddr_entry *addr; void __user *to; union sctp_addr temp; - struct sctp_opt *sp = sctp_sk(sk); + struct sctp_sock *sp = sctp_sk(sk); int addrlen; rwlock_t *addr_lock; int err = 0; @@ -3250,7 +3250,7 @@ static int sctp_getsockopt_primary_addr(struct sock *sk, int len, { struct sctp_prim prim; struct sctp_association *asoc; - struct sctp_opt *sp = sctp_sk(sk); + struct sctp_sock *sp = sctp_sk(sk); if (len != sizeof(struct sctp_prim)) return -EINVAL; @@ -3329,7 +3329,7 @@ static int sctp_getsockopt_default_send_param(struct sock *sk, { struct sctp_sndrcvinfo info; struct sctp_association *asoc; - struct sctp_opt *sp = sctp_sk(sk); + struct sctp_sock *sp = sctp_sk(sk); if (len != sizeof(struct sctp_sndrcvinfo)) return -EINVAL; @@ -3423,7 +3423,7 @@ static int sctp_getsockopt_rtoinfo(struct sock *sk, int len, rtoinfo.srto_min = jiffies_to_msecs(asoc->rto_min); } else { /* Values corresponding to the endpoint. */ - struct sctp_opt *sp = sctp_sk(sk); + struct sctp_sock *sp = sctp_sk(sk); rtoinfo.srto_initial = sp->rtoinfo.srto_initial; rtoinfo.srto_max = sp->rtoinfo.srto_max; @@ -3489,7 +3489,7 @@ static int sctp_getsockopt_associnfo(struct sock *sk, int len, assocparams.sasoc_number_peer_destinations = cnt; } else { /* Values corresponding to the endpoint */ - struct sctp_opt *sp = sctp_sk(sk); + struct sctp_sock *sp = sctp_sk(sk); assocparams.sasoc_asocmaxrxt = sp->assocparams.sasoc_asocmaxrxt; assocparams.sasoc_peer_rwnd = sp->assocparams.sasoc_peer_rwnd; @@ -3524,7 +3524,7 @@ static int sctp_getsockopt_mappedv4(struct sock *sk, int len, char __user *optval, int __user *optlen) { int val; - struct sctp_opt *sp = sctp_sk(sk); + struct sctp_sock *sp = sctp_sk(sk); if (len < sizeof(int)) return -EINVAL; @@ -3876,7 +3876,7 @@ static int sctp_get_port(struct sock *sk, unsigned short snum) */ SCTP_STATIC int sctp_seqpacket_listen(struct sock *sk, int backlog) { - struct sctp_opt *sp = sctp_sk(sk); + struct sctp_sock *sp = sctp_sk(sk); struct sctp_endpoint *ep = sp->ep; /* Only UDP style sockets that are not peeled off are allowed to @@ -3925,7 +3925,7 @@ SCTP_STATIC int sctp_seqpacket_listen(struct sock *sk, int backlog) */ SCTP_STATIC int sctp_stream_listen(struct sock *sk, int backlog) { - struct sctp_opt *sp = sctp_sk(sk); + struct sctp_sock *sp = sctp_sk(sk); struct sctp_endpoint *ep = sp->ep; /* If backlog is zero, disable listening. */ @@ -4026,7 +4026,7 @@ cleanup: unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait) { struct sock *sk = sock->sk; - struct sctp_opt *sp = sctp_sk(sk); + struct sctp_sock *sp = sctp_sk(sk); unsigned int mask; poll_wait(file, sk->sk_sleep, wait); @@ -4654,8 +4654,8 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, struct sctp_association *assoc, sctp_socket_type_t type) { - struct sctp_opt *oldsp = sctp_sk(oldsk); - struct sctp_opt *newsp = sctp_sk(newsk); + struct sctp_sock *oldsp = sctp_sk(oldsk); + struct sctp_sock *newsp = sctp_sk(newsk); struct sctp_bind_bucket *pp; /* hash list port iterator */ struct sctp_endpoint *newep = newsp->ep; struct sk_buff *skb, *tmp; @@ -4667,7 +4667,7 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, newsk->sk_sndbuf = oldsk->sk_sndbuf; newsk->sk_rcvbuf = oldsk->sk_rcvbuf; /* Brute force copy old sctp opt. */ - memcpy(newsp, oldsp, sizeof(struct sctp_opt)); + inet_sk_copy_descendant(newsk, oldsk); /* Restore the ep value that was overwritten with the above structure * copy. diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 3fcca5ec314b..0e0c0f8f1911 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -237,7 +237,7 @@ void sctp_transport_pmtu(struct sctp_transport *transport) * address. */ void sctp_transport_route(struct sctp_transport *transport, - union sctp_addr *saddr, struct sctp_opt *opt) + union sctp_addr *saddr, struct sctp_sock *opt) { struct sctp_association *asoc = transport->asoc; struct sctp_af *af = transport->af_specific; diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 47a43580f05b..d5dd2cf7ac4a 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -138,8 +138,7 @@ int sctp_ulpq_tail_data(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk, */ int sctp_clear_pd(struct sock *sk) { - struct sctp_opt *sp; - sp = sctp_sk(sk); + struct sctp_sock *sp = sctp_sk(sk); sp->pd_mode = 0; if (!skb_queue_empty(&sp->pd_lobby)) { diff --git a/net/socket.c b/net/socket.c index 4223cbad8224..88145eb4adc9 100644 --- a/net/socket.c +++ b/net/socket.c @@ -104,7 +104,7 @@ static int sock_mmap(struct file *file, struct vm_area_struct * vma); static int sock_close(struct inode *inode, struct file *file); static unsigned int sock_poll(struct file *file, struct poll_table_struct *wait); -static int sock_ioctl(struct inode *inode, struct file *file, +static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); static int sock_fasync(int fd, struct file *filp, int on); static ssize_t sock_readv(struct file *file, const struct iovec *vector, @@ -126,7 +126,7 @@ static struct file_operations socket_file_ops = { .aio_read = sock_aio_read, .aio_write = sock_aio_write, .poll = sock_poll, - .ioctl = sock_ioctl, + .unlocked_ioctl = sock_ioctl, .mmap = sock_mmap, .open = sock_no_open, /* special open code to disallow open via /proc */ .release = sock_close, @@ -829,15 +829,13 @@ EXPORT_SYMBOL(dlci_ioctl_set); * what to do with it - that's up to the protocol still. */ -static int sock_ioctl(struct inode *inode, struct file *file, unsigned int cmd, - unsigned long arg) +static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) { struct socket *sock; void __user *argp = (void __user *)arg; int pid, err; - unlock_kernel(); - sock = SOCKET_I(inode); + sock = SOCKET_I(file->f_dentry->d_inode); if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) { err = dev_ioctl(cmd, argp); } else @@ -903,8 +901,6 @@ static int sock_ioctl(struct inode *inode, struct file *file, unsigned int cmd, err = sock->ops->ioctl(sock, cmd, arg); break; } - lock_kernel(); - return err; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 4851b46b046e..3ec936879f38 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -549,8 +549,6 @@ void xfrm_policy_delete(struct xfrm_policy *pol, int dir) } } -EXPORT_SYMBOL(xfrm_policy_delete); - int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) { struct xfrm_policy *old_pol; |
