diff options
| author | David S. Miller <davem@nuts.davemloft.net> | 2004-09-26 22:16:02 -0700 |
|---|---|---|
| committer | David S. Miller <davem@nuts.davemloft.net> | 2004-09-26 22:16:02 -0700 |
| commit | 9c80ee75ffa8e176ca6e39738e8c580e2f71819c (patch) | |
| tree | 7a8c4a943e3fdd33c8d58298c3e5a6ae70883555 | |
| parent | 3c417db9f490d67074de52e3239f27402e8056e7 (diff) | |
| parent | e3ede754864448c81e0d3c0fb9a75d6706daa3d6 (diff) | |
Merge bk://212.42.230.204/netfilter-2.6
into nuts.davemloft.net:/disk1/BK/net-2.6
36 files changed, 917 insertions, 778 deletions
diff --git a/arch/ppc64/Kconfig b/arch/ppc64/Kconfig index 687b64a73d8c..dbcd7bc75135 100644 --- a/arch/ppc64/Kconfig +++ b/arch/ppc64/Kconfig @@ -78,6 +78,7 @@ config PPC_PMAC bool " Apple G5 based machines" default y select ADB_PMU + select U3_DART config PPC bool @@ -109,16 +110,10 @@ config PPC_SPLPAR processors, that is, which share physical processors between two or more partitions. -config PMAC_DART - bool "Enable DART/IOMMU on PowerMac (allow >2G of RAM)" - depends on PPC_PMAC - depends on EXPERIMENTAL +config U3_DART + bool + depends on PPC_MULTIPLATFORM default n - help - Enabling DART makes it possible to boot a PowerMac G5 with more - than 2GB of memory. Note that the code is very new and untested - at this time, so it has to be considered experimental. Enabling - this might result in data loss. config PPC_PMAC64 bool diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile index 95635ac4e9d8..acb62477981e 100644 --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile @@ -49,7 +49,7 @@ obj-$(CONFIG_HVCS) += hvcserver.o obj-$(CONFIG_PPC_PMAC) += pmac_setup.o pmac_feature.o pmac_pci.o \ pmac_time.o pmac_nvram.o pmac_low_i2c.o \ open_pic_u3.o -obj-$(CONFIG_PMAC_DART) += pmac_iommu.o +obj-$(CONFIG_U3_DART) += u3_iommu.o ifdef CONFIG_SMP obj-$(CONFIG_PPC_PMAC) += pmac_smp.o smp-tbsync.o diff --git a/arch/ppc64/kernel/misc.S b/arch/ppc64/kernel/misc.S index bfb8b22cb129..c153e266f484 100644 --- a/arch/ppc64/kernel/misc.S +++ b/arch/ppc64/kernel/misc.S @@ -687,7 +687,7 @@ _GLOBAL(kernel_thread) ld r30,-16(r1) blr -#ifndef CONFIG_PPC_PSERIE /* hack hack hack */ +#ifndef CONFIG_PPC_PSERIES /* hack hack hack */ #define ppc_rtas sys_ni_syscall #endif diff --git a/arch/ppc64/kernel/pSeries_lpar.c b/arch/ppc64/kernel/pSeries_lpar.c index 7062971ea8bc..1328cd246154 100644 --- a/arch/ppc64/kernel/pSeries_lpar.c +++ b/arch/ppc64/kernel/pSeries_lpar.c @@ -439,7 +439,7 @@ void hpte_init_lpar(void) ppc_md.hpte_insert = pSeries_lpar_hpte_insert; ppc_md.hpte_remove = pSeries_lpar_hpte_remove; ppc_md.flush_hash_range = pSeries_lpar_flush_hash_range; - ppc_md.htpe_clear_all = pSeries_lpar_hptab_clear; + ppc_md.hpte_clear_all = pSeries_lpar_hptab_clear; htab_finish_init(); } diff --git a/arch/ppc64/kernel/pmac.h b/arch/ppc64/kernel/pmac.h index 9aef0e89bc6b..4d69b3abc9fe 100644 --- a/arch/ppc64/kernel/pmac.h +++ b/arch/ppc64/kernel/pmac.h @@ -29,6 +29,4 @@ extern void pmac_ide_init_hwif_ports(hw_regs_t *hw, extern void pmac_nvram_init(void); -extern void pmac_iommu_alloc(void); - #endif /* __PMAC_H__ */ diff --git a/arch/ppc64/kernel/pmac_pci.c b/arch/ppc64/kernel/pmac_pci.c index 2ddb10da29a7..e66a299037ba 100644 --- a/arch/ppc64/kernel/pmac_pci.c +++ b/arch/ppc64/kernel/pmac_pci.c @@ -664,9 +664,7 @@ void __init pmac_pcibios_fixup(void) pci_fix_bus_sysdata(); -#ifdef CONFIG_PMAC_DART - iommu_setup_pmac(); -#endif /* CONFIG_PMAC_DART */ + iommu_setup_u3(); } diff --git a/arch/ppc64/kernel/pmac_setup.c b/arch/ppc64/kernel/pmac_setup.c index ed7e4615b387..36617b773f95 100644 --- a/arch/ppc64/kernel/pmac_setup.c +++ b/arch/ppc64/kernel/pmac_setup.c @@ -447,16 +447,6 @@ static int __init pmac_probe(int platform) if (platform != PLATFORM_POWERMAC) return 0; -#ifdef CONFIG_PMAC_DART - /* - * On U3, the DART (iommu) must be allocated now since it - * has an impact on htab_initialize (due to the large page it - * occupies having to be broken up so the DART itself is not - * part of the cacheable linar mapping - */ - pmac_iommu_alloc(); -#endif /* CONFIG_PMAC_DART */ - return 1; } diff --git a/arch/ppc64/kernel/prom_init.c b/arch/ppc64/kernel/prom_init.c index 6d3b72c5fe73..f7b44772cba2 100644 --- a/arch/ppc64/kernel/prom_init.c +++ b/arch/ppc64/kernel/prom_init.c @@ -423,13 +423,6 @@ static void __init early_cmdline_parse(void) else if (!strncmp(opt, RELOC("force"), 5)) RELOC(iommu_force_on) = 1; } - -#ifndef CONFIG_PMAC_DART - if (RELOC(of_platform) == PLATFORM_POWERMAC) { - RELOC(ppc64_iommu_off) = 1; - prom_printf("DART disabled on PowerMac !\n"); - } -#endif } /* diff --git a/arch/ppc64/kernel/setup.c b/arch/ppc64/kernel/setup.c index d21b7b74c60d..e895274e4a41 100644 --- a/arch/ppc64/kernel/setup.c +++ b/arch/ppc64/kernel/setup.c @@ -50,6 +50,7 @@ #include <asm/setup.h> #include <asm/system.h> #include <asm/rtas.h> +#include <asm/iommu.h> #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -405,6 +406,16 @@ void __init early_setup(unsigned long dt_ptr) DBG("Found, Initializing memory management...\n"); +#ifdef CONFIG_U3_DART + /* + * On U3, the DART (iommu) must be allocated now since it + * has an impact on htab_initialize (due to the large page it + * occupies having to be broken up so the DART itself is not + * part of the cacheable linar mapping + */ + alloc_u3_dart_table(); +#endif /* CONFIG_U3_DART */ + /* * Initialize stab / SLB management */ diff --git a/arch/ppc64/kernel/pmac_iommu.c b/arch/ppc64/kernel/u3_iommu.c index 4c3b3a691c55..a35324afa1c6 100644 --- a/arch/ppc64/kernel/pmac_iommu.c +++ b/arch/ppc64/kernel/u3_iommu.c @@ -1,5 +1,5 @@ /* - * arch/ppc64/kernel/pmac_iommu.c + * arch/ppc64/kernel/u3_iommu.c * * Copyright (C) 2004 Olof Johansson <olof@austin.ibm.com>, IBM Corporation * @@ -7,7 +7,7 @@ * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation * Copyright (C) 2004 Olof Johansson <olof@austin.ibm.com>, IBM Corporation * - * Dynamic DMA mapping support, PowerMac G5 (DART)-specific parts. + * Dynamic DMA mapping support, Apple U3 & IBM CPC925 "DART" iommu. * * * This program is free software; you can redistribute it and/or modify @@ -89,7 +89,7 @@ static unsigned int *dart; /* Dummy val that entries are set to when unused */ static unsigned int dart_emptyval; -static struct iommu_table iommu_table_pmac; +static struct iommu_table iommu_table_u3; static int dart_dirty; #define DBG(...) @@ -141,9 +141,9 @@ static void dart_flush(struct iommu_table *tbl) dart_dirty = 0; } -static void dart_build_pmac(struct iommu_table *tbl, long index, - long npages, unsigned long uaddr, - enum dma_data_direction direction) +static void dart_build(struct iommu_table *tbl, long index, + long npages, unsigned long uaddr, + enum dma_data_direction direction) { unsigned int *dp; unsigned int rpn; @@ -152,7 +152,7 @@ static void dart_build_pmac(struct iommu_table *tbl, long index, dp = ((unsigned int*)tbl->it_base) + index; - /* On pmac, all memory is contigous, so we can move this + /* On U3, all memory is contigous, so we can move this * out of the loop. */ while (npages--) { @@ -168,7 +168,7 @@ static void dart_build_pmac(struct iommu_table *tbl, long index, } -static void dart_free_pmac(struct iommu_table *tbl, long index, long npages) +static void dart_free(struct iommu_table *tbl, long index, long npages) { unsigned int *dp; @@ -239,32 +239,32 @@ static int dart_init(struct device_node *dart_node) /* Invalidate DART to get rid of possible stale TLBs */ dart_tlb_invalidate_all(); - iommu_table_pmac.it_busno = 0; + iommu_table_u3.it_busno = 0; /* Units of tce entries */ - iommu_table_pmac.it_offset = 0; + iommu_table_u3.it_offset = 0; /* Set the tce table size - measured in pages */ - iommu_table_pmac.it_size = dart_tablesize >> PAGE_SHIFT; + iommu_table_u3.it_size = dart_tablesize >> PAGE_SHIFT; /* Initialize the common IOMMU code */ - iommu_table_pmac.it_base = (unsigned long)dart_vbase; - iommu_table_pmac.it_index = 0; - iommu_table_pmac.it_blocksize = 1; - iommu_table_pmac.it_entrysize = sizeof(u32); - iommu_init_table(&iommu_table_pmac); + iommu_table_u3.it_base = (unsigned long)dart_vbase; + iommu_table_u3.it_index = 0; + iommu_table_u3.it_blocksize = 1; + iommu_table_u3.it_entrysize = sizeof(u32); + iommu_init_table(&iommu_table_u3); /* Reserve the last page of the DART to avoid possible prefetch * past the DART mapped area */ - set_bit(iommu_table_pmac.it_mapsize - 1, iommu_table_pmac.it_map); + set_bit(iommu_table_u3.it_mapsize - 1, iommu_table_u3.it_map); - printk(KERN_INFO "U3-DART IOMMU initialized\n"); + printk(KERN_INFO "U3/CPC925 DART IOMMU initialized\n"); return 0; } -void iommu_setup_pmac(void) +void iommu_setup_u3(void) { struct pci_dev *dev = NULL; struct device_node *dn; @@ -275,8 +275,8 @@ void iommu_setup_pmac(void) return; /* Setup low level TCE operations for the core IOMMU code */ - ppc_md.tce_build = dart_build_pmac; - ppc_md.tce_free = dart_free_pmac; + ppc_md.tce_build = dart_build; + ppc_md.tce_free = dart_free; ppc_md.tce_flush = dart_flush; /* Initialize the DART HW */ @@ -296,11 +296,11 @@ void iommu_setup_pmac(void) */ struct device_node *dn = pci_device_to_OF_node(dev); if (dn) - dn->iommu_table = &iommu_table_pmac; + dn->iommu_table = &iommu_table_u3; } } -void __init pmac_iommu_alloc(void) +void __init alloc_u3_dart_table(void) { /* Only reserve DART space if machine has more than 2GB of RAM * or if requested with iommu=on on cmdline. diff --git a/arch/ppc64/mm/hash_utils.c b/arch/ppc64/mm/hash_utils.c index 76a3572b8065..3a8a61380542 100644 --- a/arch/ppc64/mm/hash_utils.c +++ b/arch/ppc64/mm/hash_utils.c @@ -71,9 +71,9 @@ * */ -#ifdef CONFIG_PMAC_DART +#ifdef CONFIG_U3_DART extern unsigned long dart_tablebase; -#endif /* CONFIG_PMAC_DART */ +#endif /* CONFIG_U3_DART */ HTAB htab_data = {NULL, 0, 0, 0, 0}; @@ -203,7 +203,7 @@ void __init htab_initialize(void) DBG("creating mapping for region: %lx : %lx\n", base, size); -#ifdef CONFIG_PMAC_DART +#ifdef CONFIG_U3_DART /* Do not map the DART space. Fortunately, it will be aligned * in such a way that it will not cross two lmb regions and will * fit within a single 16Mb page. @@ -223,7 +223,7 @@ void __init htab_initialize(void) mode_rw, use_largepages); continue; } -#endif /* CONFIG_PMAC_DART */ +#endif /* CONFIG_U3_DART */ create_pte_mapping(base, base + size, mode_rw, use_largepages); } DBG(" <- htab_initialize()\n"); diff --git a/drivers/video/Makefile b/drivers/video/Makefile index 4eb645540042..b88f69ae154b 100644 --- a/drivers/video/Makefile +++ b/drivers/video/Makefile @@ -88,7 +88,7 @@ obj-$(CONFIG_FB_68328) += 68328fb.o cfbfillrect.o cfbcopyarea.o cfbim obj-$(CONFIG_FB_GBE) += gbefb.o cfbfillrect.o cfbcopyarea.o cfbimgblt.o obj-$(CONFIG_FB_CIRRUS) += cirrusfb.o cfbfillrect.o cfbimgblt.o cfbcopyarea.o obj-$(CONFIG_FB_ASILIANT) += asiliantfb.o cfbfillrect.o cfbcopyarea.o cfbimgblt.o -obj-$(CONFIG_FB_PXA) += pxafb.o cfbimgblt.o cfbcopyarea.o cfbfillrect0.o +obj-$(CONFIG_FB_PXA) += pxafb.o cfbimgblt.o cfbcopyarea.o cfbfillrect.o # Platform or fallback drivers go here obj-$(CONFIG_FB_VESA) += vesafb.o cfbfillrect.o cfbcopyarea.o cfbimgblt.o diff --git a/fs/partitions/ldm.c b/fs/partitions/ldm.c index 4d51456360cf..5ad7ff8d9afc 100644 --- a/fs/partitions/ldm.c +++ b/fs/partitions/ldm.c @@ -2,7 +2,7 @@ * ldm - Support for Windows Logical Disk Manager (Dynamic Disks) * * Copyright (C) 2001,2002 Richard Russon <ldm@flatcap.org> - * Copyright (C) 2001 Anton Altaparmakov <aia21@cantab.net> + * Copyright (c) 2001-2004 Anton Altaparmakov * Copyright (C) 2001,2002 Jakob Kemi <jakob.kemi@telia.com> * * Documentation is available at http://linux-ntfs.sf.net/ldm @@ -517,9 +517,15 @@ static BOOL ldm_validate_vmdb (struct block_device *bdev, unsigned long base, if (vm->vblk_offset != 512) ldm_info ("VBLKs start at offset 0x%04x.", vm->vblk_offset); - /* FIXME: How should we handle this situation? */ - if ((vm->vblk_size * vm->last_vblk_seq) != (toc->bitmap1_size << 9)) - ldm_info ("VMDB and TOCBLOCK don't agree on the database size."); + /* + * The last_vblkd_seq can be before the end of the vmdb, just make sure + * it is not out of bounds. + */ + if ((vm->vblk_size * vm->last_vblk_seq) > (toc->bitmap1_size << 9)) { + ldm_crit ("VMDB exceeds allowed size specified by TOCBLOCK. " + "Database is corrupt. Aborting."); + goto out; + } result = TRUE; out: diff --git a/include/asm-ppc64/iommu.h b/include/asm-ppc64/iommu.h index 2985dcf9bfea..e67575457fb2 100644 --- a/include/asm-ppc64/iommu.h +++ b/include/asm-ppc64/iommu.h @@ -108,7 +108,7 @@ struct scatterlist; /* Walks all buses and creates iommu tables */ extern void iommu_setup_pSeries(void); -extern void iommu_setup_pmac(void); +extern void iommu_setup_u3(void); /* Creates table for an individual device node */ extern void iommu_devnode_init(struct device_node *dn); @@ -155,6 +155,8 @@ extern void tce_init_iSeries(void); extern void pci_iommu_init(void); extern void pci_dma_init_direct(void); +extern void alloc_u3_dart_table(void); + extern int ppc64_iommu_off; #endif /* _ASM_IOMMU_H */ diff --git a/include/asm-ppc64/machdep.h b/include/asm-ppc64/machdep.h index e0fc14a61891..e3b860769cd8 100644 --- a/include/asm-ppc64/machdep.h +++ b/include/asm-ppc64/machdep.h @@ -58,7 +58,7 @@ struct machdep_calls { int local); /* special for kexec, to be called in real mode, linar mapping is * destroyed as well */ - void (*htpe_clear_all)(void); + void (*hpte_clear_all)(void); void (*tce_build)(struct iommu_table * tbl, long index, diff --git a/include/asm-ppc64/systemcfg.h b/include/asm-ppc64/systemcfg.h index 7cefeef2baa2..b7d6e0f340fe 100644 --- a/include/asm-ppc64/systemcfg.h +++ b/include/asm-ppc64/systemcfg.h @@ -59,54 +59,7 @@ struct systemcfg { #ifdef __KERNEL__ extern struct systemcfg *systemcfg; -#else - -/* Processor Version Register (PVR) field extraction */ -#define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */ -#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */ - -/* Processor Version Numbers */ -#define PV_NORTHSTAR 0x0033 -#define PV_PULSAR 0x0034 -#define PV_POWER4 0x0035 -#define PV_ICESTAR 0x0036 -#define PV_SSTAR 0x0037 -#define PV_POWER4p 0x0038 -#define PV_GPUL 0x0039 -#define PV_POWER5 0x003a -#define PV_970FX 0x003c -#define PV_630 0x0040 -#define PV_630p 0x0041 - -/* Platforms supported by PPC64 */ -#define PLATFORM_PSERIES 0x0100 -#define PLATFORM_PSERIES_LPAR 0x0101 -#define PLATFORM_ISERIES_LPAR 0x0201 -#define PLATFORM_POWERMAC 0x0400 - -/* Compatibility with drivers coming from PPC32 world */ -#define _machine (systemcfg->platform) -#define _MACH_Pmac PLATFORM_POWERMAC - - -static inline volatile struct systemcfg *systemcfg_init(void) -{ - int fd = open("/proc/ppc64/systemcfg", O_RDONLY); - volatile struct systemcfg *ret; - - if (fd == -1) - return 0; - ret = mmap(0, sizeof(struct systemcfg), PROT_READ, MAP_SHARED, fd, 0); - close(fd); - if (!ret) - return 0; - if (ret->version.major != SYSTEMCFG_MAJOR || ret->version.minor < SYSTEMCFG_MINOR) { - munmap((void *)ret, sizeof(struct systemcfg)); - return 0; - } - return ret; -} -#endif /* __KERNEL__ */ +#endif #endif /* __ASSEMBLY__ */ diff --git a/include/linux/tcp.h b/include/linux/tcp.h index c03df4894379..f374df7a823f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -205,6 +205,13 @@ typedef struct tcp_pcount { __u32 val; } tcp_pcount_t; +enum tcp_congestion_algo { + TCP_RENO=0, + TCP_VEGAS, + TCP_WESTWOOD, + TCP_BIC, +}; + struct tcp_opt { int tcp_header_len; /* Bytes of tcp header to send */ @@ -265,7 +272,7 @@ struct tcp_opt { __u8 frto_counter; /* Number of new acks after RTO */ __u32 frto_highmark; /* snd_nxt when RTO occurred */ - __u8 unused_pad; + __u8 adv_cong; /* Using Vegas, Westwood, or BIC */ __u8 defer_accept; /* User waits for some data after accept() */ /* one byte hole, try to pack */ @@ -412,7 +419,6 @@ struct tcp_opt { __u32 beg_snd_nxt; /* right edge during last RTT */ __u32 beg_snd_una; /* left edge during last RTT */ __u32 beg_snd_cwnd; /* saves the size of the cwnd */ - __u8 do_vegas; /* do vegas for this connection */ __u8 doing_vegas_now;/* if true, do vegas for this RTT */ __u16 cntRTT; /* # of RTTs measured within last RTT */ __u32 minRTT; /* min of RTTs measured within last RTT (in usec) */ diff --git a/include/linux/wait.h b/include/linux/wait.h index 4417f800a639..21cd4df67b24 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -156,6 +156,29 @@ do { \ __wait_event(wq, condition); \ } while (0) +#define __wait_event_timeout(wq, condition, ret) \ +do { \ + DEFINE_WAIT(__wait); \ + \ + for (;;) { \ + prepare_to_wait(&wq, &__wait, TASK_UNINTERRUPTIBLE); \ + if (condition) \ + break; \ + ret = schedule_timeout(ret); \ + if (!ret) \ + break; \ + } \ + finish_wait(&wq, &__wait); \ +} while (0) + +#define wait_event_timeout(wq, condition, timeout) \ +({ \ + long __ret = timeout; \ + if (!(condition)) \ + __wait_event_timeout(wq, condition, __ret); \ + __ret; \ +}) + #define __wait_event_interruptible(wq, condition, ret) \ do { \ DEFINE_WAIT(__wait); \ diff --git a/include/net/addrconf.h b/include/net/addrconf.h index fc2b279cd148..50f4c2c4df86 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -74,7 +74,7 @@ extern int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2); extern void addrconf_join_solict(struct net_device *dev, struct in6_addr *addr); -extern void addrconf_leave_solict(struct net_device *dev, +extern void addrconf_leave_solict(struct inet6_dev *idev, struct in6_addr *addr); /* @@ -89,6 +89,7 @@ extern int inet6_mc_check(struct sock *sk, struct in6_addr *mc_addr, struct in6_addr *src_addr); extern int ipv6_dev_mc_inc(struct net_device *dev, struct in6_addr *addr); +extern int __ipv6_dev_mc_dec(struct inet6_dev *idev, struct in6_addr *addr); extern int ipv6_dev_mc_dec(struct net_device *dev, struct in6_addr *addr); extern void ipv6_mc_up(struct inet6_dev *idev); extern void ipv6_mc_down(struct inet6_dev *idev); @@ -111,6 +112,7 @@ extern void ipv6_sock_ac_close(struct sock *sk); extern int inet6_ac_check(struct sock *sk, struct in6_addr *addr, int ifindex); extern int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr); +extern int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr); extern int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr); extern int ipv6_chk_acast_addr(struct net_device *dev, struct in6_addr *addr); diff --git a/include/net/dn_neigh.h b/include/net/dn_neigh.h index b0c2802a5216..4b1eb038d637 100644 --- a/include/net/dn_neigh.h +++ b/include/net/dn_neigh.h @@ -18,7 +18,6 @@ struct dn_neigh { extern void dn_neigh_init(void); extern void dn_neigh_cleanup(void); -extern struct neighbour *dn_neigh_lookup(struct neigh_table *tbl, const void *ptr); extern int dn_neigh_router_hello(struct sk_buff *skb); extern int dn_neigh_endnode_hello(struct sk_buff *skb); extern void dn_neigh_pointopoint_hello(struct sk_buff *skb); diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 88c2d3ec20cf..e66c71da2357 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -47,6 +47,7 @@ #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/rcupdate.h> +#include <linux/seq_file.h> #include <linux/err.h> #include <linux/sysctl.h> @@ -139,9 +140,6 @@ struct pneigh_entry u8 key[0]; }; -#define NEIGH_HASHMASK 0x1F -#define PNEIGH_HASHMASK 0xF - /* * neighbour table manipulation */ @@ -175,8 +173,11 @@ struct neigh_table struct neigh_parms *parms_list; kmem_cache_t *kmem_cachep; struct neigh_statistics stats; - struct neighbour *hash_buckets[NEIGH_HASHMASK+1]; - struct pneigh_entry *phash_buckets[PNEIGH_HASHMASK+1]; + struct neighbour **hash_buckets; + unsigned int hash_mask; + __u32 hash_rnd; + unsigned int hash_chain_gc; + struct pneigh_entry **phash_buckets; }; /* flags for neigh_update() */ @@ -191,6 +192,8 @@ extern int neigh_table_clear(struct neigh_table *tbl); extern struct neighbour * neigh_lookup(struct neigh_table *tbl, const void *pkey, struct net_device *dev); +extern struct neighbour * neigh_lookup_nodev(struct neigh_table *tbl, + const void *pkey); extern struct neighbour * neigh_create(struct neigh_table *tbl, const void *pkey, struct net_device *dev); @@ -224,6 +227,24 @@ extern int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg); extern int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg); extern void neigh_app_ns(struct neighbour *n); +extern void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie); +extern void __neigh_for_each_release(struct neigh_table *tbl, int (*cb)(struct neighbour *)); +extern void pneigh_for_each(struct neigh_table *tbl, void (*cb)(struct pneigh_entry *)); + +struct neigh_seq_state { + struct neigh_table *tbl; + void *(*neigh_sub_iter)(struct neigh_seq_state *state, + struct neighbour *n, loff_t *pos); + unsigned int bucket; + unsigned int flags; +#define NEIGH_SEQ_NEIGH_ONLY 0x00000001 +#define NEIGH_SEQ_IS_PNEIGH 0x00000002 +#define NEIGH_SEQ_SKIP_NOARP 0x00000004 +}; +extern void *neigh_seq_start(struct seq_file *, loff_t *, struct neigh_table *, unsigned int); +extern void *neigh_seq_next(struct seq_file *, void *, loff_t *); +extern void neigh_seq_stop(struct seq_file *, void *); + extern int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, int p_id, int pdev_id, diff --git a/include/net/tcp.h b/include/net/tcp.h index 63ed0f745a8d..eb0f1970a40f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1271,6 +1271,13 @@ static __inline__ unsigned int tcp_packets_in_flight(struct tcp_opt *tp) tcp_get_pcount(&tp->retrans_out)); } +/* + * Which congestion algorithim is in use on the connection. + */ +#define tcp_is_vegas(__tp) ((__tp)->adv_cong == TCP_VEGAS) +#define tcp_is_westwood(__tp) ((__tp)->adv_cong == TCP_WESTWOOD) +#define tcp_is_bic(__tp) ((__tp)->adv_cong == TCP_BIC) + /* Recalculate snd_ssthresh, we want to set it to: * * Reno: @@ -1283,7 +1290,7 @@ static __inline__ unsigned int tcp_packets_in_flight(struct tcp_opt *tp) */ static inline __u32 tcp_recalc_ssthresh(struct tcp_opt *tp) { - if (sysctl_tcp_bic) { + if (tcp_is_bic(tp)) { if (sysctl_tcp_bic_fast_convergence && tp->snd_cwnd < tp->bictcp.last_max_cwnd) tp->bictcp.last_max_cwnd @@ -1302,11 +1309,6 @@ static inline __u32 tcp_recalc_ssthresh(struct tcp_opt *tp) /* Stop taking Vegas samples for now. */ #define tcp_vegas_disable(__tp) ((__tp)->vegas.doing_vegas_now = 0) - -/* Is this TCP connection using Vegas (regardless of whether it is taking - * Vegas measurements at the current time)? - */ -#define tcp_is_vegas(__tp) ((__tp)->vegas.do_vegas) static inline void tcp_vegas_enable(struct tcp_opt *tp) { @@ -1340,7 +1342,7 @@ static inline void tcp_vegas_enable(struct tcp_opt *tp) /* Should we be taking Vegas samples right now? */ #define tcp_vegas_enabled(__tp) ((__tp)->vegas.doing_vegas_now) -extern void tcp_vegas_init(struct tcp_opt *tp); +extern void tcp_ca_init(struct tcp_opt *tp); static inline void tcp_set_ca_state(struct tcp_opt *tp, u8 ca_state) { @@ -2024,7 +2026,7 @@ extern void tcp_proc_unregister(struct tcp_seq_afinfo *afinfo); static inline void tcp_westwood_update_rtt(struct tcp_opt *tp, __u32 rtt_seq) { - if (sysctl_tcp_westwood) + if (tcp_is_westwood(tp)) tp->westwood.rtt = rtt_seq; } @@ -2033,13 +2035,13 @@ void __tcp_westwood_slow_bw(struct sock *, struct sk_buff *); static inline void tcp_westwood_fast_bw(struct sock *sk, struct sk_buff *skb) { - if (sysctl_tcp_westwood) + if (tcp_is_westwood(tcp_sk(sk))) __tcp_westwood_fast_bw(sk, skb); } static inline void tcp_westwood_slow_bw(struct sock *sk, struct sk_buff *skb) { - if (sysctl_tcp_westwood) + if (tcp_is_westwood(tcp_sk(sk))) __tcp_westwood_slow_bw(sk, skb); } @@ -2052,14 +2054,14 @@ static inline __u32 __tcp_westwood_bw_rttmin(const struct tcp_opt *tp) static inline __u32 tcp_westwood_bw_rttmin(const struct tcp_opt *tp) { - return sysctl_tcp_westwood ? __tcp_westwood_bw_rttmin(tp) : 0; + return tcp_is_westwood(tp) ? __tcp_westwood_bw_rttmin(tp) : 0; } static inline int tcp_westwood_ssthresh(struct tcp_opt *tp) { __u32 ssthresh = 0; - if (sysctl_tcp_westwood) { + if (tcp_is_westwood(tp)) { ssthresh = __tcp_westwood_bw_rttmin(tp); if (ssthresh) tp->snd_ssthresh = ssthresh; @@ -2072,7 +2074,7 @@ static inline int tcp_westwood_cwnd(struct tcp_opt *tp) { __u32 cwnd = 0; - if (sysctl_tcp_westwood) { + if (tcp_is_westwood(tp)) { cwnd = __tcp_westwood_bw_rttmin(tp); if (cwnd) tp->snd_cwnd = cwnd; diff --git a/net/atm/clip.c b/net/atm/clip.c index f447aa949ca7..49cd61b9a937 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -27,6 +27,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/rcupdate.h> +#include <linux/jhash.h> #include <net/route.h> /* for struct rtable and routing */ #include <net/icmp.h> /* icmp_send */ #include <asm/param.h> /* for HZ */ @@ -123,64 +124,49 @@ out: spin_unlock_bh(&entry->neigh->dev->xmit_lock); } - -static void idle_timer_check(unsigned long dummy) +/* The neighbour entry n->lock is held. */ +static int neigh_check_cb(struct neighbour *n) { - int i; + struct atmarp_entry *entry = NEIGH2ENTRY(n); + struct clip_vcc *cv; - /*DPRINTK("idle_timer_check\n");*/ - write_lock(&clip_tbl.lock); - for (i = 0; i <= NEIGH_HASHMASK; i++) { - struct neighbour **np; - - for (np = &clip_tbl.hash_buckets[i]; *np;) { - struct neighbour *n = *np; - struct atmarp_entry *entry = NEIGH2ENTRY(n); - struct clip_vcc *clip_vcc; - - write_lock(&n->lock); - - for (clip_vcc = entry->vccs; clip_vcc; - clip_vcc = clip_vcc->next) - if (clip_vcc->idle_timeout && - time_after(jiffies, clip_vcc->last_use+ - clip_vcc->idle_timeout)) { - DPRINTK("releasing vcc %p->%p of " - "entry %p\n",clip_vcc,clip_vcc->vcc, - entry); - vcc_release_async(clip_vcc->vcc, - -ETIMEDOUT); - } - if (entry->vccs || - time_before(jiffies, entry->expires)) { - np = &n->next; - write_unlock(&n->lock); - continue; - } - if (atomic_read(&n->refcnt) > 1) { - struct sk_buff *skb; - - DPRINTK("destruction postponed with ref %d\n", - atomic_read(&n->refcnt)); - while ((skb = skb_dequeue(&n->arp_queue)) != - NULL) - dev_kfree_skb(skb); - np = &n->next; - write_unlock(&n->lock); - continue; - } - *np = n->next; - DPRINTK("expired neigh %p\n",n); - n->dead = 1; - write_unlock(&n->lock); - neigh_release(n); + for (cv = entry->vccs; cv; cv = cv->next) { + unsigned long exp = cv->last_use + cv->idle_timeout; + + if (cv->idle_timeout && time_after(jiffies, exp)) { + DPRINTK("releasing vcc %p->%p of entry %p\n", + cv, cv->vcc, entry); + vcc_release_async(cv->vcc, -ETIMEDOUT); } } + + if (entry->vccs || time_before(jiffies, entry->expires)) + return 0; + + if (atomic_read(&n->refcnt) > 1) { + struct sk_buff *skb; + + DPRINTK("destruction postponed with ref %d\n", + atomic_read(&n->refcnt)); + + while ((skb = skb_dequeue(&n->arp_queue)) != NULL) + dev_kfree_skb(skb); + + return 0; + } + + DPRINTK("expired neigh %p\n",n); + return 1; +} + +static void idle_timer_check(unsigned long dummy) +{ + write_lock(&clip_tbl.lock); + __neigh_for_each_release(&clip_tbl, neigh_check_cb); mod_timer(&idle_timer, jiffies+CLIP_CHECK_INTERVAL*HZ); write_unlock(&clip_tbl.lock); } - static int clip_arp_rcv(struct sk_buff *skb) { struct atm_vcc *vcc; @@ -343,15 +329,7 @@ static int clip_constructor(struct neighbour *neigh) static u32 clip_hash(const void *pkey, const struct net_device *dev) { - u32 hash_val; - - hash_val = *(u32*)pkey; - hash_val ^= (hash_val>>16); - hash_val ^= hash_val>>8; - hash_val ^= hash_val>>3; - hash_val = (hash_val^dev->ifindex)&NEIGH_HASHMASK; - - return hash_val; + return jhash_2words(*(u32 *)pkey, dev->ifindex, clip_tbl.hash_rnd); } static struct neigh_table clip_tbl = { @@ -833,120 +811,126 @@ static void svc_addr(struct seq_file *seq, struct sockaddr_atmsvc *addr) } } +/* This means the neighbour entry has no attached VCC objects. */ +#define SEQ_NO_VCC_TOKEN ((void *) 2) + static void atmarp_info(struct seq_file *seq, struct net_device *dev, struct atmarp_entry *entry, struct clip_vcc *clip_vcc) { + unsigned long exp; char buf[17]; - int svc, off; + int svc, llc, off; + + svc = ((clip_vcc == SEQ_NO_VCC_TOKEN) || + (clip_vcc->vcc->sk->sk_family == AF_ATMSVC)); + + llc = ((clip_vcc == SEQ_NO_VCC_TOKEN) || + clip_vcc->encap); - svc = !clip_vcc || clip_vcc->vcc->sk->sk_family == AF_ATMSVC; - seq_printf(seq, "%-6s%-4s%-4s%5ld ", dev->name, svc ? "SVC" : "PVC", - !clip_vcc || clip_vcc->encap ? "LLC" : "NULL", - (jiffies-(clip_vcc ? clip_vcc->last_use : entry->neigh->used))/HZ); + if (clip_vcc == SEQ_NO_VCC_TOKEN) + exp = entry->neigh->used; + else + exp = clip_vcc->last_use; - off = scnprintf(buf, sizeof(buf) - 1, "%d.%d.%d.%d", NIPQUAD(entry->ip)); + exp = (jiffies - exp) / HZ; + + seq_printf(seq, "%-6s%-4s%-4s%5ld ", + dev->name, + svc ? "SVC" : "PVC", + llc ? "LLC" : "NULL", + exp); + + off = scnprintf(buf, sizeof(buf) - 1, "%d.%d.%d.%d", + NIPQUAD(entry->ip)); while (off < 16) buf[off++] = ' '; buf[off] = '\0'; seq_printf(seq, "%s", buf); - if (!clip_vcc) { + if (clip_vcc == SEQ_NO_VCC_TOKEN) { if (time_before(jiffies, entry->expires)) seq_printf(seq, "(resolving)\n"); else seq_printf(seq, "(expired, ref %d)\n", atomic_read(&entry->neigh->refcnt)); } else if (!svc) { - seq_printf(seq, "%d.%d.%d\n", clip_vcc->vcc->dev->number, - clip_vcc->vcc->vpi, clip_vcc->vcc->vci); + seq_printf(seq, "%d.%d.%d\n", + clip_vcc->vcc->dev->number, + clip_vcc->vcc->vpi, + clip_vcc->vcc->vci); } else { svc_addr(seq, &clip_vcc->vcc->remote); seq_putc(seq, '\n'); } } -struct arp_state { - int bucket; - struct neighbour *n; +struct clip_seq_state { + /* This member must be first. */ + struct neigh_seq_state ns; + + /* Local to clip specific iteration. */ struct clip_vcc *vcc; }; - -static void *arp_vcc_walk(struct arp_state *state, - struct atmarp_entry *e, loff_t *l) -{ - struct clip_vcc *vcc = state->vcc; - if (!vcc) - vcc = e->vccs; - if (vcc == (void *)1) { - vcc = e->vccs; - --*l; - } - for (; vcc; vcc = vcc->next) { - if (--*l < 0) - break; - } - state->vcc = vcc; - return (*l < 0) ? state : NULL; -} - -static void *arp_get_idx(struct arp_state *state, loff_t l) +static struct clip_vcc *clip_seq_next_vcc(struct atmarp_entry *e, + struct clip_vcc *curr) { - void *v = NULL; - - for (; state->bucket <= NEIGH_HASHMASK; state->bucket++) { - for (; state->n; state->n = state->n->next) { - v = arp_vcc_walk(state, NEIGH2ENTRY(state->n), &l); - if (v) - goto done; - } - state->n = clip_tbl.hash_buckets[state->bucket + 1]; + if (!curr) { + curr = e->vccs; + if (!curr) + return SEQ_NO_VCC_TOKEN; + return curr; } -done: - return v; + if (curr == SEQ_NO_VCC_TOKEN) + return NULL; + + curr = curr->next; + + return curr; } -static void *arp_seq_start(struct seq_file *seq, loff_t *pos) +static void *clip_seq_vcc_walk(struct clip_seq_state *state, + struct atmarp_entry *e, loff_t *pos) { - struct arp_state *state = seq->private; - void *ret = (void *)1; - - read_lock_bh(&clip_tbl.lock); - state->bucket = 0; - state->n = clip_tbl.hash_buckets[0]; - state->vcc = (void *)1; - if (*pos) - ret = arp_get_idx(state, *pos); - return ret; -} + struct clip_vcc *vcc = state->vcc; -static void arp_seq_stop(struct seq_file *seq, void *v) + vcc = clip_seq_next_vcc(e, vcc); + if (vcc && pos != NULL) { + while (*pos) { + vcc = clip_seq_next_vcc(e, vcc); + if (!vcc) + break; + --(*pos); + } + } + state->vcc = vcc; + + return vcc; +} + +static void *clip_seq_sub_iter(struct neigh_seq_state *_state, + struct neighbour *n, loff_t *pos) { - struct arp_state *state = seq->private; + struct clip_seq_state *state = (struct clip_seq_state *) _state; - if (state->bucket != -1) - read_unlock_bh(&clip_tbl.lock); + return clip_seq_vcc_walk(state, NEIGH2ENTRY(n), pos); } -static void *arp_seq_next(struct seq_file *seq, void *v, loff_t *pos) +static void *clip_seq_start(struct seq_file *seq, loff_t *pos) { - struct arp_state *state = seq->private; - - v = arp_get_idx(state, 1); - *pos += !!PTR_ERR(v); - return v; + return neigh_seq_start(seq, pos, &clip_tbl, NEIGH_SEQ_NEIGH_ONLY); } -static int arp_seq_show(struct seq_file *seq, void *v) +static int clip_seq_show(struct seq_file *seq, void *v) { static char atm_arp_banner[] = "IPitf TypeEncp Idle IP address ATM address\n"; - if (v == (void *)1) + if (v == SEQ_START_TOKEN) { seq_puts(seq, atm_arp_banner); - else { - struct arp_state *state = seq->private; - struct neighbour *n = state->n; + } else { + struct clip_seq_state *state = seq->private; + struct neighbour *n = v; struct clip_vcc *vcc = state->vcc; atmarp_info(seq, n->dev, NEIGH2ENTRY(n), vcc); @@ -955,15 +939,15 @@ static int arp_seq_show(struct seq_file *seq, void *v) } static struct seq_operations arp_seq_ops = { - .start = arp_seq_start, - .next = arp_seq_next, - .stop = arp_seq_stop, - .show = arp_seq_show, + .start = clip_seq_start, + .next = neigh_seq_next, + .stop = neigh_seq_stop, + .show = clip_seq_show, }; static int arp_seq_open(struct inode *inode, struct file *file) { - struct arp_state *state; + struct clip_seq_state *state; struct seq_file *seq; int rc = -EAGAIN; @@ -972,6 +956,8 @@ static int arp_seq_open(struct inode *inode, struct file *file) rc = -ENOMEM; goto out_kfree; } + memset(state, 0, sizeof(*state)); + state->ns.neigh_sub_iter = clip_seq_sub_iter; rc = seq_open(file, &arp_seq_ops); if (rc) @@ -987,16 +973,11 @@ out_kfree: goto out; } -static int arp_seq_release(struct inode *inode, struct file *file) -{ - return seq_release_private(inode, file); -} - static struct file_operations arp_seq_fops = { .open = arp_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = arp_seq_release, + .release = seq_release_private, .owner = THIS_MODULE }; #endif diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 232998d40767..34bf8dee8cf9 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -29,6 +29,7 @@ #include <net/dst.h> #include <net/sock.h> #include <linux/rtnetlink.h> +#include <linux/random.h> #define NEIGH_DEBUG 1 @@ -47,6 +48,8 @@ #define NEIGH_PRINTK2 NEIGH_PRINTK #endif +#define PNEIGH_HASHMASK 0xF + static void neigh_timer_handler(unsigned long arg); #ifdef CONFIG_ARPD static void neigh_app_notify(struct neighbour *n); @@ -113,27 +116,19 @@ static int neigh_forced_gc(struct neigh_table *tbl) int shrunk = 0; int i; - for (i = 0; i <= NEIGH_HASHMASK; i++) { + write_lock_bh(&tbl->lock); + for (i = 0; i <= tbl->hash_mask; i++) { struct neighbour *n, **np; np = &tbl->hash_buckets[i]; - write_lock_bh(&tbl->lock); while ((n = *np) != NULL) { /* Neighbour record may be discarded if: - - nobody refers to it. - - it is not permanent - - (NEW and probably wrong) - INCOMPLETE entries are kept at least for - n->parms->retrans_time, otherwise we could - flood network with resolution requests. - It is not clear, what is better table overflow - or flooding. + * - nobody refers to it. + * - it is not permanent */ write_lock(&n->lock); if (atomic_read(&n->refcnt) == 1 && - !(n->nud_state & NUD_PERMANENT) && - (n->nud_state != NUD_INCOMPLETE || - time_after(jiffies, n->used + n->parms->retrans_time))) { + !(n->nud_state & NUD_PERMANENT)) { *np = n->next; n->dead = 1; shrunk = 1; @@ -144,10 +139,12 @@ static int neigh_forced_gc(struct neigh_table *tbl) write_unlock(&n->lock); np = &n->next; } - write_unlock_bh(&tbl->lock); } tbl->last_flush = jiffies; + + write_unlock_bh(&tbl->lock); + return shrunk; } @@ -177,7 +174,7 @@ void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev) write_lock_bh(&tbl->lock); - for (i=0; i <= NEIGH_HASHMASK; i++) { + for (i=0; i <= tbl->hash_mask; i++) { struct neighbour *n, **np; np = &tbl->hash_buckets[i]; @@ -204,7 +201,7 @@ int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) write_lock_bh(&tbl->lock); - for (i = 0; i <= NEIGH_HASHMASK; i++) { + for (i = 0; i <= tbl->hash_mask; i++) { struct neighbour *n, **np = &tbl->hash_buckets[i]; while ((n = *np) != NULL) { @@ -286,12 +283,73 @@ out: return n; } +static struct neighbour **neigh_hash_alloc(unsigned int entries) +{ + unsigned long size = entries * sizeof(struct neighbour *); + struct neighbour **ret; + + if (size <= PAGE_SIZE) { + ret = kmalloc(size, GFP_ATOMIC); + } else { + ret = (struct neighbour **) + __get_free_pages(GFP_ATOMIC, get_order(size)); + } + if (ret) + memset(ret, 0, size); + + return ret; +} + +static void neigh_hash_free(struct neighbour **hash, unsigned int entries) +{ + unsigned long size = entries * sizeof(struct neighbour *); + + if (size <= PAGE_SIZE) + kfree(hash); + else + free_pages((unsigned long)hash, get_order(size)); +} + +static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries) +{ + struct neighbour **new_hash, **old_hash; + unsigned int i, new_hash_mask, old_entries; + + BUG_ON(new_entries & (new_entries - 1)); + new_hash = neigh_hash_alloc(new_entries); + if (!new_hash) + return; + + old_entries = tbl->hash_mask + 1; + new_hash_mask = new_entries - 1; + old_hash = tbl->hash_buckets; + + get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); + for (i = 0; i < old_entries; i++) { + struct neighbour *n, *next; + + for (n = old_hash[i]; n; n = next) { + unsigned int hash_val = tbl->hash(n->primary_key, n->dev); + + hash_val &= new_hash_mask; + next = n->next; + + n->next = new_hash[hash_val]; + new_hash[hash_val] = n; + } + } + tbl->hash_buckets = new_hash; + tbl->hash_mask = new_hash_mask; + + neigh_hash_free(old_hash, old_entries); +} + struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, struct net_device *dev) { struct neighbour *n; int key_len = tbl->key_len; - u32 hash_val = tbl->hash(pkey, dev); + u32 hash_val = tbl->hash(pkey, dev) & tbl->hash_mask; read_lock_bh(&tbl->lock); for (n = tbl->hash_buckets[hash_val]; n; n = n->next) { @@ -304,6 +362,23 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, return n; } +struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey) +{ + struct neighbour *n; + int key_len = tbl->key_len; + u32 hash_val = tbl->hash(pkey, NULL) & tbl->hash_mask; + + read_lock_bh(&tbl->lock); + for (n = tbl->hash_buckets[hash_val]; n; n = n->next) { + if (!memcmp(n->primary_key, pkey, key_len)) { + neigh_hold(n); + break; + } + } + read_unlock_bh(&tbl->lock); + return n; +} + struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, struct net_device *dev) { @@ -317,6 +392,12 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, goto out; } + if (tbl->entries > (tbl->hash_mask + 1)) { + write_lock_bh(&tbl->lock); + neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1); + write_unlock_bh(&tbl->lock); + } + memcpy(n->primary_key, pkey, key_len); n->dev = dev; dev_hold(dev); @@ -336,9 +417,10 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, n->confirmed = jiffies - (n->parms->base_reachable_time << 1); - hash_val = tbl->hash(pkey, dev); - write_lock_bh(&tbl->lock); + + hash_val = tbl->hash(pkey, dev) & tbl->hash_mask; + if (n->parms->dead) { rc = ERR_PTR(-EINVAL); goto out_tbl_unlock; @@ -428,10 +510,10 @@ int pneigh_delete(struct neigh_table *tbl, const void *pkey, hash_val ^= hash_val >> 4; hash_val &= PNEIGH_HASHMASK; + write_lock_bh(&tbl->lock); for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL; np = &n->next) { if (!memcmp(n->key, pkey, key_len) && n->dev == dev) { - write_lock_bh(&tbl->lock); *np = n->next; write_unlock_bh(&tbl->lock); if (tbl->pdestructor) @@ -440,6 +522,7 @@ int pneigh_delete(struct neigh_table *tbl, const void *pkey, return 0; } } + write_unlock_bh(&tbl->lock); return -ENOENT; } @@ -545,9 +628,8 @@ static void neigh_connect(struct neighbour *neigh) static void neigh_periodic_timer(unsigned long arg) { struct neigh_table *tbl = (struct neigh_table *)arg; - unsigned long now = jiffies; - int i; - + struct neighbour *n, **np; + unsigned long expire, now = jiffies; write_lock(&tbl->lock); @@ -563,41 +645,49 @@ static void neigh_periodic_timer(unsigned long arg) neigh_rand_reach_time(p->base_reachable_time); } - for (i = 0; i <= NEIGH_HASHMASK; i++) { - struct neighbour *n, **np; + np = &tbl->hash_buckets[tbl->hash_chain_gc]; + tbl->hash_chain_gc = ((tbl->hash_chain_gc + 1) & tbl->hash_mask); - np = &tbl->hash_buckets[i]; - while ((n = *np) != NULL) { - unsigned state; + while ((n = *np) != NULL) { + unsigned int state; - write_lock(&n->lock); + write_lock(&n->lock); - state = n->nud_state; - if (state & (NUD_PERMANENT | NUD_IN_TIMER)) { - write_unlock(&n->lock); - goto next_elt; - } + state = n->nud_state; + if (state & (NUD_PERMANENT | NUD_IN_TIMER)) { + write_unlock(&n->lock); + goto next_elt; + } - if (time_before(n->used, n->confirmed)) - n->used = n->confirmed; + if (time_before(n->used, n->confirmed)) + n->used = n->confirmed; - if (atomic_read(&n->refcnt) == 1 && - (state == NUD_FAILED || - time_after(now, n->used + n->parms->gc_staletime))) { - *np = n->next; - n->dead = 1; - write_unlock(&n->lock); - neigh_release(n); - continue; - } + if (atomic_read(&n->refcnt) == 1 && + (state == NUD_FAILED || + time_after(now, n->used + n->parms->gc_staletime))) { + *np = n->next; + n->dead = 1; write_unlock(&n->lock); + neigh_release(n); + continue; + } + write_unlock(&n->lock); next_elt: - np = &n->next; - } + np = &n->next; } - mod_timer(&tbl->gc_timer, now + tbl->gc_interval); + /* Cycle through all hash buckets every base_reachable_time/2 ticks. + * ARP entry timeouts range from 1/2 base_reachable_time to 3/2 + * base_reachable_time. + */ + expire = tbl->parms.base_reachable_time >> 1; + expire /= (tbl->hash_mask + 1); + if (!expire) + expire = 1; + + mod_timer(&tbl->gc_timer, now + expire); + write_unlock(&tbl->lock); } @@ -1205,6 +1295,7 @@ void neigh_parms_destroy(struct neigh_parms *parms) void neigh_table_init(struct neigh_table *tbl) { unsigned long now = jiffies; + unsigned long phsize; atomic_set(&tbl->parms.refcnt, 1); INIT_RCU_HEAD(&tbl->parms.rcu_head); @@ -1220,12 +1311,24 @@ void neigh_table_init(struct neigh_table *tbl) if (!tbl->kmem_cachep) panic("cannot create neighbour cache"); + tbl->hash_mask = 1; + tbl->hash_buckets = neigh_hash_alloc(tbl->hash_mask + 1); + + phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *); + tbl->phash_buckets = kmalloc(phsize, GFP_KERNEL); + + if (!tbl->hash_buckets || !tbl->phash_buckets) + panic("cannot allocate neighbour cache hashes"); + + memset(tbl->phash_buckets, 0, phsize); + + get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); + tbl->lock = RW_LOCK_UNLOCKED; init_timer(&tbl->gc_timer); tbl->gc_timer.data = (unsigned long)tbl; tbl->gc_timer.function = neigh_periodic_timer; - tbl->gc_timer.expires = now + tbl->gc_interval + - tbl->parms.reachable_time; + tbl->gc_timer.expires = now + 1; add_timer(&tbl->gc_timer); init_timer(&tbl->proxy_timer); @@ -1260,6 +1363,13 @@ int neigh_table_clear(struct neigh_table *tbl) } } write_unlock(&neigh_tbl_lock); + + neigh_hash_free(tbl->hash_buckets, tbl->hash_mask + 1); + tbl->hash_buckets = NULL; + + kfree(tbl->phash_buckets); + tbl->phash_buckets = NULL; + return 0; } @@ -1439,7 +1549,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, int rc, h, s_h = cb->args[1]; int idx, s_idx = idx = cb->args[2]; - for (h = 0; h <= NEIGH_HASHMASK; h++) { + for (h = 0; h <= tbl->hash_mask; h++) { if (h < s_h) continue; if (h > s_h) @@ -1489,6 +1599,266 @@ int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } +void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie) +{ + int chain; + + read_lock_bh(&tbl->lock); + for (chain = 0; chain <= tbl->hash_mask; chain++) { + struct neighbour *n; + + for (n = tbl->hash_buckets[chain]; n; n = n->next) + cb(n, cookie); + } + read_unlock_bh(&tbl->lock); +} +EXPORT_SYMBOL(neigh_for_each); + +/* The tbl->lock must be held as a writer and BH disabled. */ +void __neigh_for_each_release(struct neigh_table *tbl, + int (*cb)(struct neighbour *)) +{ + int chain; + + for (chain = 0; chain <= tbl->hash_mask; chain++) { + struct neighbour *n, **np; + + np = &tbl->hash_buckets[chain]; + while ((n = *np) != NULL) { + int release; + + write_lock(&n->lock); + release = cb(n); + if (release) { + *np = n->next; + n->dead = 1; + } else + np = &n->next; + write_unlock(&n->lock); + if (release) + neigh_release(n); + } + } +} +EXPORT_SYMBOL(__neigh_for_each_release); + +#ifdef CONFIG_PROC_FS + +static struct neighbour *neigh_get_first(struct seq_file *seq) +{ + struct neigh_seq_state *state = seq->private; + struct neigh_table *tbl = state->tbl; + struct neighbour *n = NULL; + int bucket = state->bucket; + + state->flags &= ~NEIGH_SEQ_IS_PNEIGH; + for (bucket = 0; bucket <= tbl->hash_mask; bucket++) { + n = tbl->hash_buckets[bucket]; + + while (n) { + if (state->neigh_sub_iter) { + loff_t fakep = 0; + void *v; + + v = state->neigh_sub_iter(state, n, &fakep); + if (!v) + goto next; + } + if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) + break; + if (n->nud_state & ~NUD_NOARP) + break; + next: + n = n->next; + } + + if (n) + break; + } + state->bucket = bucket; + + return n; +} + +static struct neighbour *neigh_get_next(struct seq_file *seq, + struct neighbour *n, + loff_t *pos) +{ + struct neigh_seq_state *state = seq->private; + struct neigh_table *tbl = state->tbl; + + if (state->neigh_sub_iter) { + void *v = state->neigh_sub_iter(state, n, pos); + if (v) + return n; + } + n = n->next; + + while (1) { + while (n) { + if (state->neigh_sub_iter) { + void *v = state->neigh_sub_iter(state, n, pos); + if (v) + return n; + goto next; + } + if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) + break; + + if (n->nud_state & ~NUD_NOARP) + break; + next: + n = n->next; + } + + if (n) + break; + + if (++state->bucket > tbl->hash_mask) + break; + + n = tbl->hash_buckets[state->bucket]; + } + + if (n && pos) + --(*pos); + return n; +} + +static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos) +{ + struct neighbour *n = neigh_get_first(seq); + + if (n) { + while (*pos) { + n = neigh_get_next(seq, n, pos); + if (!n) + break; + } + } + return *pos ? NULL : n; +} + +static struct pneigh_entry *pneigh_get_first(struct seq_file *seq) +{ + struct neigh_seq_state *state = seq->private; + struct neigh_table *tbl = state->tbl; + struct pneigh_entry *pn = NULL; + int bucket = state->bucket; + + state->flags |= NEIGH_SEQ_IS_PNEIGH; + for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) { + pn = tbl->phash_buckets[bucket]; + if (pn) + break; + } + state->bucket = bucket; + + return pn; +} + +static struct pneigh_entry *pneigh_get_next(struct seq_file *seq, + struct pneigh_entry *pn, + loff_t *pos) +{ + struct neigh_seq_state *state = seq->private; + struct neigh_table *tbl = state->tbl; + + pn = pn->next; + while (!pn) { + if (++state->bucket > PNEIGH_HASHMASK) + break; + pn = tbl->phash_buckets[state->bucket]; + if (pn) + break; + } + + if (pn && pos) + --(*pos); + + return pn; +} + +static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos) +{ + struct pneigh_entry *pn = pneigh_get_first(seq); + + if (pn) { + while (*pos) { + pn = pneigh_get_next(seq, pn, pos); + if (!pn) + break; + } + } + return *pos ? NULL : pn; +} + +static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos) +{ + struct neigh_seq_state *state = seq->private; + void *rc; + + rc = neigh_get_idx(seq, pos); + if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY)) + rc = pneigh_get_idx(seq, pos); + + return rc; +} + +void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags) +{ + struct neigh_seq_state *state = seq->private; + loff_t pos_minus_one; + + state->tbl = tbl; + state->bucket = 0; + state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH); + + read_lock_bh(&tbl->lock); + + pos_minus_one = *pos - 1; + return *pos ? neigh_get_idx_any(seq, &pos_minus_one) : SEQ_START_TOKEN; +} +EXPORT_SYMBOL(neigh_seq_start); + +void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct neigh_seq_state *state; + void *rc; + + if (v == SEQ_START_TOKEN) { + rc = neigh_get_idx(seq, pos); + goto out; + } + + state = seq->private; + if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) { + rc = neigh_get_next(seq, v, NULL); + if (rc) + goto out; + if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY)) + rc = pneigh_get_first(seq); + } else { + BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY); + rc = pneigh_get_next(seq, v, NULL); + } +out: + ++(*pos); + return rc; +} +EXPORT_SYMBOL(neigh_seq_next); + +void neigh_seq_stop(struct seq_file *seq, void *v) +{ + struct neigh_seq_state *state = seq->private; + struct neigh_table *tbl = state->tbl; + + read_unlock_bh(&tbl->lock); +} +EXPORT_SYMBOL(neigh_seq_stop); + +#endif /* CONFIG_PROC_FS */ + #ifdef CONFIG_ARPD void neigh_app_ns(struct neighbour *n) { @@ -1785,6 +2155,7 @@ EXPORT_SYMBOL(neigh_dump_info); EXPORT_SYMBOL(neigh_event_ns); EXPORT_SYMBOL(neigh_ifdown); EXPORT_SYMBOL(neigh_lookup); +EXPORT_SYMBOL(neigh_lookup_nodev); EXPORT_SYMBOL(neigh_parms_alloc); EXPORT_SYMBOL(neigh_parms_release); EXPORT_SYMBOL(neigh_rand_reach_time); diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index d3d6c592a5cb..0691b11ccf2a 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -36,6 +36,7 @@ #include <linux/spinlock.h> #include <linux/seq_file.h> #include <linux/rcupdate.h> +#include <linux/jhash.h> #include <asm/atomic.h> #include <net/neighbour.h> #include <net/dst.h> @@ -122,13 +123,7 @@ struct neigh_table dn_neigh_table = { static u32 dn_neigh_hash(const void *pkey, const struct net_device *dev) { - u32 hash_val; - - hash_val = *(dn_address *)pkey; - hash_val ^= (hash_val >> 10); - hash_val ^= (hash_val >> 3); - - return hash_val & NEIGH_HASHMASK; + return jhash_2words(*(dn_address *)pkey, 0, dn_neigh_table.hash_rnd); } static int dn_neigh_construct(struct neighbour *neigh) @@ -359,27 +354,6 @@ static int dn_phase3_output(struct sk_buff *skb) * basically does a neigh_lookup(), but without comparing the device * field. This is required for the On-Ethernet cache */ -struct neighbour *dn_neigh_lookup(struct neigh_table *tbl, const void *ptr) -{ - struct neighbour *neigh; - u32 hash_val; - - hash_val = tbl->hash(ptr, NULL); - - read_lock_bh(&tbl->lock); - for(neigh = tbl->hash_buckets[hash_val]; neigh != NULL; neigh = neigh->next) { - if (memcmp(neigh->primary_key, ptr, tbl->key_len) == 0) { - atomic_inc(&neigh->refcnt); - read_unlock_bh(&tbl->lock); - return neigh; - } - } - read_unlock_bh(&tbl->lock); - - return NULL; -} - - /* * Any traffic on a pointopoint link causes the timer to be reset * for the entry in the neighbour table. @@ -514,141 +488,66 @@ static char *dn_find_slot(char *base, int max, int priority) return (*min < priority) ? (min - 6) : NULL; } -int dn_neigh_elist(struct net_device *dev, unsigned char *ptr, int n) -{ - int t = 0; - int i; - struct neighbour *neigh; - struct dn_neigh *dn; - struct neigh_table *tbl = &dn_neigh_table; - unsigned char *rs = ptr; - struct dn_dev *dn_db = (struct dn_dev *)dev->dn_ptr; - - read_lock_bh(&tbl->lock); - - for(i = 0; i < NEIGH_HASHMASK; i++) { - for(neigh = tbl->hash_buckets[i]; neigh != NULL; neigh = neigh->next) { - if (neigh->dev != dev) - continue; - dn = (struct dn_neigh *)neigh; - if (!(dn->flags & (DN_NDFLAG_R1|DN_NDFLAG_R2))) - continue; - if (dn_db->parms.forwarding == 1 && (dn->flags & DN_NDFLAG_R2)) - continue; - if (t == n) - rs = dn_find_slot(ptr, n, dn->priority); - else - t++; - if (rs == NULL) - continue; - dn_dn2eth(rs, dn->addr); - rs += 6; - *rs = neigh->nud_state & NUD_CONNECTED ? 0x80 : 0x0; - *rs |= dn->priority; - rs++; - } - } - - read_unlock_bh(&tbl->lock); - - return t; -} - - -#ifdef CONFIG_PROC_FS - -struct dn_neigh_iter_state { - int bucket; +struct elist_cb_state { + struct net_device *dev; + unsigned char *ptr; + unsigned char *rs; + int t, n; }; -static struct neighbour *neigh_get_first(struct seq_file *seq) -{ - struct dn_neigh_iter_state *state = seq->private; - struct neighbour *n = NULL; - - for(state->bucket = 0; - state->bucket <= NEIGH_HASHMASK; - ++state->bucket) { - n = dn_neigh_table.hash_buckets[state->bucket]; - if (n) - break; - } - - return n; -} - -static struct neighbour *neigh_get_next(struct seq_file *seq, - struct neighbour *n) +static void neigh_elist_cb(struct neighbour *neigh, void *_info) { - struct dn_neigh_iter_state *state = seq->private; - - n = n->next; -try_again: - if (n) - goto out; - if (++state->bucket > NEIGH_HASHMASK) - goto out; - n = dn_neigh_table.hash_buckets[state->bucket]; - goto try_again; -out: - return n; -} + struct elist_cb_state *s = _info; + struct dn_dev *dn_db; + struct dn_neigh *dn; -static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos) -{ - struct neighbour *n = neigh_get_first(seq); + if (neigh->dev != s->dev) + return; - if (n) - while(*pos && (n = neigh_get_next(seq, n))) - --*pos; - return *pos ? NULL : n; -} + dn = (struct dn_neigh *) neigh; + if (!(dn->flags & (DN_NDFLAG_R1|DN_NDFLAG_R2))) + return; -static void *dn_neigh_get_idx(struct seq_file *seq, loff_t pos) -{ - void *rc; - read_lock_bh(&dn_neigh_table.lock); - rc = neigh_get_idx(seq, &pos); - if (!rc) { - read_unlock_bh(&dn_neigh_table.lock); - } - return rc; -} + dn_db = (struct dn_dev *) s->dev->dn_ptr; + if (dn_db->parms.forwarding == 1 && (dn->flags & DN_NDFLAG_R2)) + return; -static void *dn_neigh_seq_start(struct seq_file *seq, loff_t *pos) -{ - return *pos ? dn_neigh_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; + if (s->t == s->n) + s->rs = dn_find_slot(s->ptr, s->n, dn->priority); + else + s->t++; + if (s->rs == NULL) + return; + + dn_dn2eth(s->rs, dn->addr); + s->rs += 6; + *(s->rs) = neigh->nud_state & NUD_CONNECTED ? 0x80 : 0x0; + *(s->rs) |= dn->priority; + s->rs++; } -static void *dn_neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos) +int dn_neigh_elist(struct net_device *dev, unsigned char *ptr, int n) { - void *rc; + struct elist_cb_state state; + state.dev = dev; + state.t = 0; + state.n = n; + state.ptr = ptr; + state.rs = ptr; - if (v == SEQ_START_TOKEN) { - rc = dn_neigh_get_idx(seq, 0); - goto out; - } + neigh_for_each(&dn_neigh_table, neigh_elist_cb, &state); - rc = neigh_get_next(seq, v); - if (rc) - goto out; - read_unlock_bh(&dn_neigh_table.lock); -out: - ++*pos; - return rc; + return state.t; } -static void dn_neigh_seq_stop(struct seq_file *seq, void *v) -{ - if (v && v != SEQ_START_TOKEN) - read_unlock_bh(&dn_neigh_table.lock); -} + +#ifdef CONFIG_PROC_FS static inline void dn_neigh_format_entry(struct seq_file *seq, struct neighbour *n) { - struct dn_neigh *dn = (struct dn_neigh *)n; + struct dn_neigh *dn = (struct dn_neigh *) n; char buf[DN_ASCBUF_LEN]; read_lock(&n->lock); @@ -675,10 +574,16 @@ static int dn_neigh_seq_show(struct seq_file *seq, void *v) return 0; } +static void *dn_neigh_seq_start(struct seq_file *seq, loff_t *pos) +{ + return neigh_seq_start(seq, pos, &dn_neigh_table, + NEIGH_SEQ_NEIGH_ONLY); +} + static struct seq_operations dn_neigh_seq_ops = { .start = dn_neigh_seq_start, - .next = dn_neigh_seq_next, - .stop = dn_neigh_seq_stop, + .next = neigh_seq_next, + .stop = neigh_seq_stop, .show = dn_neigh_seq_show, }; @@ -686,11 +591,12 @@ static int dn_neigh_seq_open(struct inode *inode, struct file *file) { struct seq_file *seq; int rc = -ENOMEM; - struct dn_neigh_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL); + struct neigh_seq_state *s = kmalloc(sizeof(*s), GFP_KERNEL); if (!s) goto out; + memset(s, 0, sizeof(*s)); rc = seq_open(file, &dn_neigh_seq_ops); if (rc) goto out_kfree; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 5b1626bb45ff..64bdf10b75b7 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -996,7 +996,7 @@ source_ok: * here */ if (!try_hard) { - neigh = dn_neigh_lookup(&dn_neigh_table, &fl.fld_dst); + neigh = neigh_lookup_nodev(&dn_neigh_table, &fl.fld_dst); if (neigh) { if ((oldflp->oif && (neigh->dev->ifindex != oldflp->oif)) || diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 35e5038bcdc4..528b3966cf5f 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -71,6 +71,7 @@ * arp_xmit so intermediate drivers like * bonding can change the skb before * sending (e.g. insert 8021q tag). + * Harald Welte : convert to make use of jenkins hash */ #include <linux/module.h> @@ -97,6 +98,7 @@ #include <linux/init.h> #include <linux/net.h> #include <linux/rcupdate.h> +#include <linux/jhash.h> #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> #endif @@ -223,15 +225,7 @@ int arp_mc_map(u32 addr, u8 *haddr, struct net_device *dev, int dir) static u32 arp_hash(const void *pkey, const struct net_device *dev) { - u32 hash_val; - - hash_val = *(u32*)pkey; - hash_val ^= (hash_val>>16); - hash_val ^= hash_val>>8; - hash_val ^= hash_val>>3; - hash_val = (hash_val^dev->ifindex)&NEIGH_HASHMASK; - - return hash_val; + return jhash_2words(*(u32 *)pkey, dev->ifindex, arp_tbl.hash_rnd); } static int arp_constructor(struct neighbour *neigh) @@ -1269,162 +1263,10 @@ static char *ax2asc2(ax25_address *a, char *buf) } #endif /* CONFIG_AX25 */ -struct arp_iter_state { - int is_pneigh, bucket; -}; - -static struct neighbour *neigh_get_first(struct seq_file *seq) -{ - struct arp_iter_state* state = seq->private; - struct neighbour *n = NULL; - - state->is_pneigh = 0; - - for (state->bucket = 0; - state->bucket <= NEIGH_HASHMASK; - ++state->bucket) { - n = arp_tbl.hash_buckets[state->bucket]; - while (n && !(n->nud_state & ~NUD_NOARP)) - n = n->next; - if (n) - break; - } - - return n; -} - -static struct neighbour *neigh_get_next(struct seq_file *seq, - struct neighbour *n) -{ - struct arp_iter_state* state = seq->private; - - do { - n = n->next; - /* Don't confuse "arp -a" w/ magic entries */ -try_again: - ; - } while (n && !(n->nud_state & ~NUD_NOARP)); - - if (n) - goto out; - if (++state->bucket > NEIGH_HASHMASK) - goto out; - n = arp_tbl.hash_buckets[state->bucket]; - goto try_again; -out: - return n; -} - -static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos) -{ - struct neighbour *n = neigh_get_first(seq); - - if (n) - while (*pos && (n = neigh_get_next(seq, n))) - --*pos; - return *pos ? NULL : n; -} - -static struct pneigh_entry *pneigh_get_first(struct seq_file *seq) -{ - struct arp_iter_state* state = seq->private; - struct pneigh_entry *pn; - - state->is_pneigh = 1; - - for (state->bucket = 0; - state->bucket <= PNEIGH_HASHMASK; - ++state->bucket) { - pn = arp_tbl.phash_buckets[state->bucket]; - if (pn) - break; - } - return pn; -} - -static struct pneigh_entry *pneigh_get_next(struct seq_file *seq, - struct pneigh_entry *pn) -{ - struct arp_iter_state* state = seq->private; - - pn = pn->next; - while (!pn) { - if (++state->bucket > PNEIGH_HASHMASK) - break; - pn = arp_tbl.phash_buckets[state->bucket]; - } - return pn; -} - -static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t pos) -{ - struct pneigh_entry *pn = pneigh_get_first(seq); - - if (pn) - while (pos && (pn = pneigh_get_next(seq, pn))) - --pos; - return pos ? NULL : pn; -} - -static void *arp_get_idx(struct seq_file *seq, loff_t pos) -{ - void *rc; - - read_lock_bh(&arp_tbl.lock); - rc = neigh_get_idx(seq, &pos); - - if (!rc) { - read_unlock_bh(&arp_tbl.lock); - rc = pneigh_get_idx(seq, pos); - } - return rc; -} - -static void *arp_seq_start(struct seq_file *seq, loff_t *pos) -{ - struct arp_iter_state* state = seq->private; - - state->is_pneigh = 0; - state->bucket = 0; - return *pos ? arp_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; -} - -static void *arp_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - void *rc; - struct arp_iter_state* state; - - if (v == SEQ_START_TOKEN) { - rc = arp_get_idx(seq, 0); - goto out; - } - - state = seq->private; - if (!state->is_pneigh) { - rc = neigh_get_next(seq, v); - if (rc) - goto out; - read_unlock_bh(&arp_tbl.lock); - rc = pneigh_get_first(seq); - } else - rc = pneigh_get_next(seq, v); -out: - ++*pos; - return rc; -} - -static void arp_seq_stop(struct seq_file *seq, void *v) -{ - struct arp_iter_state* state = seq->private; - - if (!state->is_pneigh && v != SEQ_START_TOKEN) - read_unlock_bh(&arp_tbl.lock); -} - #define HBUFFERLEN 30 -static __inline__ void arp_format_neigh_entry(struct seq_file *seq, - struct neighbour *n) +static void arp_format_neigh_entry(struct seq_file *seq, + struct neighbour *n) { char hbuffer[HBUFFERLEN]; const char hexbuf[] = "0123456789ABCDEF"; @@ -1455,8 +1297,8 @@ static __inline__ void arp_format_neigh_entry(struct seq_file *seq, read_unlock(&n->lock); } -static __inline__ void arp_format_pneigh_entry(struct seq_file *seq, - struct pneigh_entry *n) +static void arp_format_pneigh_entry(struct seq_file *seq, + struct pneigh_entry *n) { struct net_device *dev = n->dev; int hatype = dev ? dev->type : 0; @@ -1470,13 +1312,13 @@ static __inline__ void arp_format_pneigh_entry(struct seq_file *seq, static int arp_seq_show(struct seq_file *seq, void *v) { - if (v == SEQ_START_TOKEN) + if (v == SEQ_START_TOKEN) { seq_puts(seq, "IP address HW type Flags " "HW address Mask Device\n"); - else { - struct arp_iter_state* state = seq->private; + } else { + struct neigh_seq_state *state = seq->private; - if (state->is_pneigh) + if (state->flags & NEIGH_SEQ_IS_PNEIGH) arp_format_pneigh_entry(seq, v); else arp_format_neigh_entry(seq, v); @@ -1485,12 +1327,20 @@ static int arp_seq_show(struct seq_file *seq, void *v) return 0; } +static void *arp_seq_start(struct seq_file *seq, loff_t *pos) +{ + /* Don't want to confuse "arp -a" w/ magic entries, + * so we tell the generic iterator to skip NUD_NOARP. + */ + return neigh_seq_start(seq, pos, &arp_tbl, NEIGH_SEQ_SKIP_NOARP); +} + /* ------------------------------------------------------------------------ */ static struct seq_operations arp_seq_ops = { .start = arp_seq_start, - .next = arp_seq_next, - .stop = arp_seq_stop, + .next = neigh_seq_next, + .stop = neigh_seq_stop, .show = arp_seq_show, }; @@ -1498,11 +1348,12 @@ static int arp_seq_open(struct inode *inode, struct file *file) { struct seq_file *seq; int rc = -ENOMEM; - struct arp_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL); + struct neigh_seq_state *s = kmalloc(sizeof(*s), GFP_KERNEL); if (!s) goto out; + memset(s, 0, sizeof(*s)); rc = seq_open(file, &arp_seq_ops); if (rc) goto out_kfree; diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 15cd8e55b796..723ed400d165 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -438,17 +438,15 @@ static struct fib_alias *fib_find_alias(struct fib_node *fn, u8 tos, u32 prio) { if (fn) { struct list_head *head = &fn->fn_alias; - struct fib_alias *fa, *prev_fa; + struct fib_alias *fa; - prev_fa = NULL; list_for_each_entry(fa, head, fa_list) { - if (fa->fa_tos != tos) + if (fa->fa_tos > tos) continue; - prev_fa = fa; - if (prio <= fa->fa_info->fib_priority) - break; + if (fa->fa_info->fib_priority >= prio || + fa->fa_tos < tos) + return fa; } - return prev_fa; } return NULL; } @@ -505,7 +503,7 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, * and we need to allocate a new one of those as well. */ - if (fa && + if (fa && fa->fa_tos == tos && fa->fa_info->fib_priority == fi->fib_priority) { struct fib_alias *fa_orig; @@ -537,7 +535,8 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, * information. */ fa_orig = fa; - list_for_each_entry(fa, fa_orig->fa_list.prev, fa_list) { + fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); + list_for_each_entry_continue(fa, &f->fn_alias, fa_list) { if (fa->fa_tos != tos) break; if (fa->fa_info->fib_priority != fi->fib_priority) @@ -585,7 +584,7 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, write_lock_bh(&fib_hash_lock); if (new_f) fib_insert_node(fz, new_f); - list_add(&new_fa->fa_list, + list_add_tail(&new_fa->fa_list, (fa ? &fa->fa_list : &f->fn_alias)); write_unlock_bh(&fib_hash_lock); @@ -611,7 +610,6 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, struct fn_hash *table = (struct fn_hash*)tb->tb_data; struct fib_node *f; struct fib_alias *fa, *fa_to_delete; - struct list_head *fa_head; int z = r->rtm_dst_len; struct fn_zone *fz; u32 key; @@ -637,8 +635,8 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, return -ESRCH; fa_to_delete = NULL; - fa_head = fa->fa_list.prev; - list_for_each_entry(fa, fa_head, fa_list) { + fa = list_entry(fa->fa_list.prev, struct fib_alias, fa_list); + list_for_each_entry_continue(fa, &f->fn_alias, fa_list) { struct fib_info *fi = fa->fa_info; if (fa->fa_tos != tos) diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index e0f8a7664f7e..4f7cde805c1c 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -41,6 +41,12 @@ static struct sock *tcpnl; rta->rta_len = rtalen; \ RTA_DATA(rta); }) +static inline unsigned int jiffies_to_usecs(const unsigned long j) +{ + return 1000*jiffies_to_msecs(j); +} + + /* Return information about state of tcp endpoint in API format. */ void tcp_get_info(struct sock *sk, struct tcp_info *info) { @@ -68,8 +74,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) if (tp->ecn_flags&TCP_ECN_OK) info->tcpi_options |= TCPI_OPT_ECN; - info->tcpi_rto = (1000000*tp->rto)/HZ; - info->tcpi_ato = (1000000*tp->ack.ato)/HZ; + info->tcpi_rto = jiffies_to_usecs(tp->rto); + info->tcpi_ato = jiffies_to_usecs(tp->ack.ato); info->tcpi_snd_mss = tp->mss_cache_std; info->tcpi_rcv_mss = tp->ack.rcv_mss; @@ -79,20 +85,20 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_retrans = tcp_get_pcount(&tp->retrans_out); info->tcpi_fackets = tcp_get_pcount(&tp->fackets_out); - info->tcpi_last_data_sent = ((now - tp->lsndtime)*1000)/HZ; - info->tcpi_last_data_recv = ((now - tp->ack.lrcvtime)*1000)/HZ; - info->tcpi_last_ack_recv = ((now - tp->rcv_tstamp)*1000)/HZ; + info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime); + info->tcpi_last_data_recv = jiffies_to_msecs(now - tp->ack.lrcvtime); + info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp); info->tcpi_pmtu = tp->pmtu_cookie; info->tcpi_rcv_ssthresh = tp->rcv_ssthresh; - info->tcpi_rtt = ((1000000*tp->srtt)/HZ)>>3; - info->tcpi_rttvar = ((1000000*tp->mdev)/HZ)>>2; + info->tcpi_rtt = jiffies_to_usecs(tp->srtt)>>3; + info->tcpi_rttvar = jiffies_to_usecs(tp->mdev)>>2; info->tcpi_snd_ssthresh = tp->snd_ssthresh; info->tcpi_snd_cwnd = tp->snd_cwnd; info->tcpi_advmss = tp->advmss; info->tcpi_reordering = tp->reordering; - info->tcpi_rcv_rtt = ((1000000*tp->rcv_rtt_est.rtt)/HZ)>>3; + info->tcpi_rcv_rtt = jiffies_to_usecs(tp->rcv_rtt_est.rtt)>>3; info->tcpi_rcv_space = tp->rcvq_space.space; } @@ -116,7 +122,8 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, if (ext & (1<<(TCPDIAG_INFO-1))) info = TCPDIAG_PUT(skb, TCPDIAG_INFO, sizeof(*info)); - if (tcp_is_vegas(tp) && (ext & (1<<(TCPDIAG_VEGASINFO-1)))) + if ((tcp_is_westwood(tp) || tcp_is_vegas(tp)) + && (ext & (1<<(TCPDIAG_VEGASINFO-1)))) vinfo = TCPDIAG_PUT(skb, TCPDIAG_VEGASINFO, sizeof(*vinfo)); } r->tcpdiag_family = sk->sk_family; @@ -209,10 +216,17 @@ static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk, tcp_get_info(sk, info); if (vinfo) { - vinfo->tcpv_enabled = tp->vegas.doing_vegas_now; - vinfo->tcpv_rttcnt = tp->vegas.cntRTT; - vinfo->tcpv_rtt = tp->vegas.baseRTT; - vinfo->tcpv_minrtt = tp->vegas.minRTT; + if (tcp_is_vegas(tp)) { + vinfo->tcpv_enabled = tp->vegas.doing_vegas_now; + vinfo->tcpv_rttcnt = tp->vegas.cntRTT; + vinfo->tcpv_rtt = jiffies_to_usecs(tp->vegas.baseRTT); + vinfo->tcpv_minrtt = jiffies_to_usecs(tp->vegas.minRTT); + } else { + vinfo->tcpv_enabled = 0; + vinfo->tcpv_rttcnt = 0; + vinfo->tcpv_rtt = jiffies_to_usecs(tp->westwood.rtt); + vinfo->tcpv_minrtt = jiffies_to_usecs(tp->westwood.rtt_min); + } } nlh->nlmsg_len = skb->tail - b; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 1c53442639ae..a7c1f4a5277f 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -555,17 +555,20 @@ static void tcp_event_data_recv(struct sock *sk, struct tcp_opt *tp, struct sk_b tcp_grow_window(sk, tp, skb); } -/* Set up a new TCP connection, depending on whether it should be - * using Vegas or not. - */ -void tcp_vegas_init(struct tcp_opt *tp) +/* When starting a new connection, pin down the current choice of + * congestion algorithm. + */ +void tcp_ca_init(struct tcp_opt *tp) { - if (sysctl_tcp_vegas_cong_avoid) { - tp->vegas.do_vegas = 1; + if (sysctl_tcp_westwood) + tp->adv_cong = TCP_WESTWOOD; + else if (sysctl_tcp_bic) + tp->adv_cong = TCP_BIC; + else if (sysctl_tcp_vegas_cong_avoid) { + tp->adv_cong = TCP_VEGAS; tp->vegas.baseRTT = 0x7fffffff; tcp_vegas_enable(tp); - } else - tcp_vegas_disable(tp); + } } /* Do RTT sampling needed for Vegas. @@ -2039,7 +2042,7 @@ tcp_ack_update_rtt(struct tcp_opt *tp, int flag, s32 seq_rtt) static inline __u32 bictcp_cwnd(struct tcp_opt *tp) { /* orignal Reno behaviour */ - if (!sysctl_tcp_bic) + if (!tcp_is_bic(tp)) return tp->snd_cwnd; if (tp->bictcp.last_cwnd == tp->snd_cwnd && @@ -2617,18 +2620,16 @@ static void westwood_filter(struct sock *sk, __u32 delta) * WESTWOOD_RTT_MIN minimum bound since we could be on a LAN! */ -static inline __u32 westwood_update_rttmin(struct sock *sk) +static inline __u32 westwood_update_rttmin(const struct sock *sk) { - struct tcp_opt *tp = tcp_sk(sk); + const struct tcp_opt *tp = tcp_sk(sk); __u32 rttmin = tp->westwood.rtt_min; - if (tp->westwood.rtt == 0) - return(rttmin); - - if (tp->westwood.rtt < tp->westwood.rtt_min || !rttmin) + if (tp->westwood.rtt != 0 && + (tp->westwood.rtt < tp->westwood.rtt_min || !rttmin)) rttmin = tp->westwood.rtt; - return(rttmin); + return rttmin; } /* @@ -2636,11 +2637,11 @@ static inline __u32 westwood_update_rttmin(struct sock *sk) * Evaluate increases for dk. */ -static inline __u32 westwood_acked(struct sock *sk) +static inline __u32 westwood_acked(const struct sock *sk) { - struct tcp_opt *tp = tcp_sk(sk); + const struct tcp_opt *tp = tcp_sk(sk); - return ((tp->snd_una) - (tp->westwood.snd_una)); + return tp->snd_una - tp->westwood.snd_una; } /* @@ -2652,9 +2653,9 @@ static inline __u32 westwood_acked(struct sock *sk) * window, 1 if the sample has to be considered in the next window. */ -static int westwood_new_window(struct sock *sk) +static int westwood_new_window(const struct sock *sk) { - struct tcp_opt *tp = tcp_sk(sk); + const struct tcp_opt *tp = tcp_sk(sk); __u32 left_bound; __u32 rtt; int ret = 0; @@ -2688,14 +2689,13 @@ static void __westwood_update_window(struct sock *sk, __u32 now) struct tcp_opt *tp = tcp_sk(sk); __u32 delta = now - tp->westwood.rtt_win_sx; - if (!delta) - return; - - if (tp->westwood.rtt) - westwood_filter(sk, delta); + if (delta) { + if (tp->westwood.rtt) + westwood_filter(sk, delta); - tp->westwood.bk = 0; - tp->westwood.rtt_win_sx = tcp_time_stamp; + tp->westwood.bk = 0; + tp->westwood.rtt_win_sx = tcp_time_stamp; + } } @@ -2739,7 +2739,7 @@ static void westwood_dupack_update(struct sock *sk) static inline int westwood_may_change_cumul(struct tcp_opt *tp) { - return ((tp->westwood.cumul_ack) > tp->mss_cache_std); + return (tp->westwood.cumul_ack > tp->mss_cache_std); } static inline void westwood_partial_update(struct tcp_opt *tp) @@ -2760,7 +2760,7 @@ static inline void westwood_complete_update(struct tcp_opt *tp) * delayed or partial acks. */ -static __u32 westwood_acked_count(struct sock *sk) +static inline __u32 westwood_acked_count(struct sock *sk) { struct tcp_opt *tp = tcp_sk(sk); @@ -2774,7 +2774,7 @@ static __u32 westwood_acked_count(struct sock *sk) if (westwood_may_change_cumul(tp)) { /* Partial or delayed ack */ - if ((tp->westwood.accounted) >= (tp->westwood.cumul_ack)) + if (tp->westwood.accounted >= tp->westwood.cumul_ack) westwood_partial_update(tp); else westwood_complete_update(tp); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index c526f9dd97a7..2bdc1975c319 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -841,7 +841,8 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct open_request *req, if (newtp->ecn_flags&TCP_ECN_OK) newsk->sk_no_largesend = 1; - tcp_vegas_init(newtp); + tcp_ca_init(newtp); + TCP_INC_STATS_BH(TCP_MIB_PASSIVEOPENS); } return newsk; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index e37d16e019e0..cb3d52a67c26 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1359,7 +1359,7 @@ static inline void tcp_connect_init(struct sock *sk) tp->window_clamp = dst_metric(dst, RTAX_WINDOW); tp->advmss = dst_metric(dst, RTAX_ADVMSS); tcp_initialize_rcv_mss(sk); - tcp_vegas_init(tp); + tcp_ca_init(tp); tcp_select_initial_window(tcp_full_space(sk), tp->advmss - (tp->ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), @@ -1411,7 +1411,7 @@ int tcp_connect(struct sock *sk) TCP_SKB_CB(buff)->end_seq = tp->write_seq; tp->snd_nxt = tp->write_seq; tp->pushed_seq = tp->write_seq; - tcp_vegas_init(tp); + tcp_ca_init(tp); /* Send it off. */ TCP_SKB_CB(buff)->when = tcp_time_stamp; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 999071accc71..737937e4471b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -128,6 +128,9 @@ static struct timer_list addr_chk_timer = TIMER_INITIALIZER(addrconf_verify, 0, 0); static spinlock_t addrconf_verify_lock = SPIN_LOCK_UNLOCKED; +static void addrconf_join_anycast(struct inet6_ifaddr *ifp); +static void addrconf_leave_anycast(struct inet6_ifaddr *ifp); + static int addrconf_ifdown(struct net_device *dev, int how); static void addrconf_dad_start(struct inet6_ifaddr *ifp, int flags); @@ -419,33 +422,28 @@ static void dev_forward_change(struct inet6_dev *idev) ipv6_dev_mc_dec(dev, &addr); } for (ifa=idev->addr_list; ifa; ifa=ifa->if_next) { - ipv6_addr_prefix(&addr, &ifa->addr, ifa->prefix_len); - if (ipv6_addr_any(&addr)) - continue; if (idev->cnf.forwarding) - ipv6_dev_ac_inc(idev->dev, &addr); + addrconf_join_anycast(ifa); else - ipv6_dev_ac_dec(idev->dev, &addr); + addrconf_leave_anycast(ifa); } } -static void addrconf_forward_change(struct inet6_dev *idev) +static void addrconf_forward_change(void) { struct net_device *dev; - - if (idev) { - dev_forward_change(idev); - return; - } + struct inet6_dev *idev; read_lock(&dev_base_lock); for (dev=dev_base; dev; dev=dev->next) { read_lock(&addrconf_lock); idev = __in6_dev_get(dev); if (idev) { + int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding); idev->cnf.forwarding = ipv6_devconf.forwarding; - dev_forward_change(idev); + if (changed) + dev_forward_change(idev); } read_unlock(&addrconf_lock); } @@ -1062,17 +1060,34 @@ void addrconf_join_solict(struct net_device *dev, struct in6_addr *addr) ipv6_dev_mc_inc(dev, &maddr); } -void addrconf_leave_solict(struct net_device *dev, struct in6_addr *addr) +void addrconf_leave_solict(struct inet6_dev *idev, struct in6_addr *addr) { struct in6_addr maddr; - if (dev->flags&(IFF_LOOPBACK|IFF_NOARP)) + if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP)) return; addrconf_addr_solict_mult(addr, &maddr); - ipv6_dev_mc_dec(dev, &maddr); + __ipv6_dev_mc_dec(idev, &maddr); +} + +void addrconf_join_anycast(struct inet6_ifaddr *ifp) +{ + struct in6_addr addr; + ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); + if (ipv6_addr_any(&addr)) + return; + ipv6_dev_ac_inc(ifp->idev->dev, &addr); } +void addrconf_leave_anycast(struct inet6_ifaddr *ifp) +{ + struct in6_addr addr; + ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); + if (ipv6_addr_any(&addr)) + return; + __ipv6_dev_ac_dec(ifp->idev, &addr); +} static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) { @@ -2225,14 +2240,6 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) addrconf_mod_timer(ifp, AC_RS, ifp->idev->cnf.rtr_solicit_interval); spin_unlock_bh(&ifp->lock); } - - if (ifp->idev->cnf.forwarding) { - struct in6_addr addr; - - ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); - if (!ipv6_addr_any(&addr)) - ipv6_dev_ac_inc(ifp->idev->dev, &addr); - } } #ifdef CONFIG_PROC_FS @@ -2994,16 +3001,13 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) dst_hold(&ifp->rt->u.dst); if (ip6_ins_rt(ifp->rt, NULL, NULL)) dst_release(&ifp->rt->u.dst); + if (ifp->idev->cnf.forwarding) + addrconf_join_anycast(ifp); break; case RTM_DELADDR: - addrconf_leave_solict(ifp->idev->dev, &ifp->addr); - if (ifp->idev->cnf.forwarding) { - struct in6_addr addr; - - ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); - if (!ipv6_addr_any(&addr)) - ipv6_dev_ac_dec(ifp->idev->dev, &addr); - } + if (ifp->idev->cnf.forwarding) + addrconf_leave_anycast(ifp); + addrconf_leave_solict(ifp->idev, &ifp->addr); dst_hold(&ifp->rt->u.dst); if (ip6_del_rt(ifp->rt, NULL, NULL)) dst_free(&ifp->rt->u.dst); @@ -3025,18 +3029,18 @@ int addrconf_sysctl_forward(ctl_table *ctl, int write, struct file * filp, ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos); - if (write && *valp != val && valp != &ipv6_devconf_dflt.forwarding) { - struct inet6_dev *idev = NULL; - + if (write && valp != &ipv6_devconf_dflt.forwarding) { if (valp != &ipv6_devconf.forwarding) { - idev = (struct inet6_dev *)ctl->extra1; - if (idev == NULL) - return ret; - } else + if ((!*valp) ^ (!val)) { + struct inet6_dev *idev = (struct inet6_dev *)ctl->extra1; + if (idev == NULL) + return ret; + dev_forward_change(idev); + } + } else { ipv6_devconf_dflt.forwarding = ipv6_devconf.forwarding; - - addrconf_forward_change(idev); - + addrconf_forward_change(); + } if (*valp) rt6_purge_dflt_routers(0); } @@ -3077,15 +3081,19 @@ static int addrconf_sysctl_forward_strategy(ctl_table *table, } if (valp != &ipv6_devconf_dflt.forwarding) { - struct inet6_dev *idev; if (valp != &ipv6_devconf.forwarding) { - idev = (struct inet6_dev *)table->extra1; + struct inet6_dev *idev = (struct inet6_dev *)table->extra1; + int changed; if (unlikely(idev == NULL)) return -ENODEV; - } else - idev = NULL; - *valp = new; - addrconf_forward_change(idev); + changed = (!*valp) ^ (!new); + *valp = new; + if (changed) + dev_forward_change(idev); + } else { + *valp = new; + addrconf_forward_change(); + } if (*valp) rt6_purge_dflt_routers(0); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 537dc37be239..a0de548c56ca 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -377,15 +377,10 @@ out: /* * device anycast group decrement */ -int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr) +int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr) { - struct inet6_dev *idev; struct ifacaddr6 *aca, *prev_aca; - idev = in6_dev_get(dev); - if (idev == NULL) - return -ENODEV; - write_lock_bh(&idev->lock); prev_aca = NULL; for (aca = idev->ac_list; aca; aca = aca->aca_next) { @@ -395,12 +390,10 @@ int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr) } if (!aca) { write_unlock_bh(&idev->lock); - in6_dev_put(idev); return -ENOENT; } if (--aca->aca_users > 0) { write_unlock_bh(&idev->lock); - in6_dev_put(idev); return 0; } if (prev_aca) @@ -408,7 +401,7 @@ int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr) else idev->ac_list = aca->aca_next; write_unlock_bh(&idev->lock); - addrconf_leave_solict(dev, &aca->aca_addr); + addrconf_leave_solict(idev, &aca->aca_addr); dst_hold(&aca->aca_rt->u.dst); if (ip6_del_rt(aca->aca_rt, NULL, NULL)) @@ -417,10 +410,20 @@ int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr) dst_release(&aca->aca_rt->u.dst); aca_put(aca); - in6_dev_put(idev); return 0; } +int ipv6_dev_ac_dec(struct net_device *dev, struct in6_addr *addr) +{ + int ret; + struct inet6_dev *idev = in6_dev_get(dev); + if (idev == NULL) + return -ENODEV; + ret = __ipv6_dev_ac_dec(idev, addr); + in6_dev_put(idev); + return ret; +} + /* * check if the interface has this anycast address */ diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 893fe40817ec..51193e0f09a1 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -128,6 +128,8 @@ static rwlock_t ipv6_sk_mc_lock = RW_LOCK_UNLOCKED; static struct socket *igmp6_socket; +int __ipv6_dev_mc_dec(struct inet6_dev *idev, struct in6_addr *addr); + static void igmp6_join_group(struct ifmcaddr6 *ma); static void igmp6_leave_group(struct ifmcaddr6 *ma); static void igmp6_timer_handler(unsigned long data); @@ -256,9 +258,9 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, struct in6_addr *addr) if (idev) { (void) ip6_mc_leave_src(sk,mc_lst,idev); + __ipv6_dev_mc_dec(idev, &mc_lst->addr); in6_dev_put(idev); } - ipv6_dev_mc_dec(dev, &mc_lst->addr); dev_put(dev); } sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); @@ -322,9 +324,9 @@ void ipv6_sock_mc_close(struct sock *sk) if (idev) { (void) ip6_mc_leave_src(sk, mc_lst, idev); + __ipv6_dev_mc_dec(idev, &mc_lst->addr); in6_dev_put(idev); } - ipv6_dev_mc_dec(dev, &mc_lst->addr); dev_put(dev); } @@ -870,7 +872,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, struct in6_addr *addr) /* * device multicast group del */ -static int __ipv6_dev_mc_dec(struct net_device *dev, struct inet6_dev *idev, struct in6_addr *addr) +int __ipv6_dev_mc_dec(struct inet6_dev *idev, struct in6_addr *addr) { struct ifmcaddr6 *ma, **map; @@ -903,7 +905,7 @@ int ipv6_dev_mc_dec(struct net_device *dev, struct in6_addr *addr) if (!idev) return -ENODEV; - err = __ipv6_dev_mc_dec(dev, idev, addr); + err = __ipv6_dev_mc_dec(idev, addr); in6_dev_put(idev); @@ -2108,7 +2110,12 @@ void ipv6_mc_destroy_dev(struct inet6_dev *idev) * addrconf.c has NULL'd out dev->ip6_ptr so in6_dev_get() will * fail. */ - __ipv6_dev_mc_dec(idev->dev, idev, &maddr); + __ipv6_dev_mc_dec(idev, &maddr); + + if (idev->cnf.forwarding) { + ipv6_addr_all_routers(&maddr); + __ipv6_dev_mc_dec(idev, &maddr); + } write_lock_bh(&idev->lock); while ((i = idev->mc_list) != NULL) { diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 06a838145a4d..3ed60fb4f11e 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -66,6 +66,7 @@ #include <linux/if_arp.h> #include <linux/ipv6.h> #include <linux/icmpv6.h> +#include <linux/jhash.h> #include <net/sock.h> #include <net/snmp.h> @@ -270,15 +271,14 @@ int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int d static u32 ndisc_hash(const void *pkey, const struct net_device *dev) { - u32 hash_val; + const u32 *p32 = pkey; + u32 addr_hash, i; - hash_val = *(u32*)(pkey + sizeof(struct in6_addr) - 4); - hash_val ^= (hash_val>>16); - hash_val ^= hash_val>>8; - hash_val ^= hash_val>>3; - hash_val = (hash_val^dev->ifindex)&NEIGH_HASHMASK; + addr_hash = 0; + for (i = 0; i < (sizeof(struct in6_addr) / sizeof(u32)); i++) + addr_hash ^= *p32++; - return hash_val; + return jhash_2words(addr_hash, dev->ifindex, nd_tbl.hash_rnd); } static int ndisc_constructor(struct neighbour *neigh) |
