summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@athlon.transmeta.com>2002-02-04 18:13:58 -0800
committerLinus Torvalds <torvalds@athlon.transmeta.com>2002-02-04 18:13:58 -0800
commit7216d3e927c3b6c5d28e5ffaa54afbb34649debb (patch)
tree2b81b74fda9084131cd90731b0ec9e93e8edb853
parent4095b99c09e3db837b17f031da096a0213cdd527 (diff)
v2.4.3.8 -> v2.4.4
- Andrea Arkangeli: raw-io fixes - Johannes Erdfelt: USB updates - reiserfs update - Al Viro: fsync/umount race fix - Rusty Russell: netfilter sync
-rw-r--r--Makefile2
-rw-r--r--arch/arm/kernel/semaphore.c91
-rw-r--r--arch/arm/kernel/setup.c2
-rw-r--r--arch/cris/kernel/setup.c6
-rw-r--r--arch/i386/defconfig4
-rw-r--r--arch/i386/kernel/setup.c6
-rw-r--r--arch/ppc/kernel/chrp_setup.c6
-rw-r--r--arch/ppc/kernel/m8260_setup.c6
-rw-r--r--arch/ppc/kernel/m8xx_setup.c6
-rw-r--r--arch/ppc/kernel/prep_setup.c6
-rw-r--r--arch/sh/kernel/setup.c6
-rw-r--r--drivers/char/mem.c1
-rw-r--r--drivers/char/raw.c152
-rw-r--r--drivers/ide/alim15x3.c2
-rw-r--r--drivers/md/lvm-snap.c20
-rw-r--r--drivers/sbus/char/Makefile1
-rw-r--r--drivers/scsi/3w-xxxx.c1
-rw-r--r--drivers/scsi/53c7,8xx.c10
-rw-r--r--drivers/scsi/AM53C974.c1
-rw-r--r--drivers/scsi/advansys.c3
-rw-r--r--drivers/scsi/aic7xxx/aic7xxx_linux.c1
-rw-r--r--drivers/scsi/aic7xxx_old.c1
-rw-r--r--drivers/scsi/atp870u.c1
-rw-r--r--drivers/scsi/cpqfcTSinit.c2
-rw-r--r--drivers/scsi/dmx3191d.c1
-rw-r--r--drivers/scsi/fdomain.c7
-rw-r--r--drivers/scsi/gdth.c1
-rw-r--r--drivers/scsi/hosts.h14
-rw-r--r--drivers/scsi/ini9100u.h1
-rw-r--r--drivers/scsi/ips.c1
-rw-r--r--drivers/scsi/megaraid.c1
-rw-r--r--drivers/scsi/ncr53c8xx.c1
-rw-r--r--drivers/scsi/pci2000.c1
-rw-r--r--drivers/scsi/pci2220i.c2
-rw-r--r--drivers/scsi/qla1280.c1
-rw-r--r--drivers/scsi/qlogicfc.c1
-rw-r--r--drivers/scsi/qlogicisp.c1
-rw-r--r--drivers/scsi/scsi_ioctl.c23
-rw-r--r--drivers/scsi/sym53c8xx.c1
-rw-r--r--drivers/scsi/tmscsim.c1
-rw-r--r--drivers/usb/usb-uhci.c36
-rw-r--r--fs/buffer.c77
-rw-r--r--fs/coda/inode.c2
-rw-r--r--fs/dcache.c14
-rw-r--r--fs/inode.c161
-rw-r--r--fs/iobuf.c57
-rw-r--r--fs/reiserfs/bitmap.c51
-rw-r--r--fs/reiserfs/inode.c8
-rw-r--r--fs/reiserfs/journal.c10
-rw-r--r--fs/reiserfs/namei.c44
-rw-r--r--fs/reiserfs/objectid.c7
-rw-r--r--fs/reiserfs/super.c1
-rw-r--r--fs/smbfs/getopt.c6
-rw-r--r--fs/super.c6
-rw-r--r--include/linux/fs.h5
-rw-r--r--include/linux/iobuf.h12
-rw-r--r--include/linux/netfilter.h2
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack.h18
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_core.h13
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_icmp.h11
-rw-r--r--include/linux/netfilter_ipv4/ip_conntrack_protocol.h6
-rw-r--r--include/linux/raw.h7
-rw-r--r--include/linux/reiserfs_fs.h3
-rw-r--r--include/linux/reiserfs_fs_i.h5
-rw-r--r--include/linux/reiserfs_fs_sb.h1
-rw-r--r--include/linux/usb.h1
-rw-r--r--include/scsi/scsi.h3
-rw-r--r--init/main.c1
-rw-r--r--kernel/ksyms.c2
-rw-r--r--lib/rwsem.c6
-rw-r--r--lib/string.c27
-rw-r--r--mm/highmem.c8
-rw-r--r--mm/memory.c89
-rw-r--r--mm/page_alloc.c3
-rw-r--r--net/core/netfilter.c6
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c317
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_generic.c4
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_icmp.c22
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_tcp.c6
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_udp.c4
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c24
-rw-r--r--net/ipv4/netfilter/ip_fw_compat.c23
-rw-r--r--net/ipv4/netfilter/ip_nat_core.c53
-rw-r--r--net/ipv4/netfilter/ip_nat_helper.c1
-rw-r--r--net/ipv4/netfilter/ip_nat_rule.c12
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c24
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c37
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c182
-rw-r--r--net/ipv6/addrconf.c6
-rw-r--r--net/netsyms.c4
-rw-r--r--net/sunrpc/svcsock.c12
91 files changed, 1220 insertions, 615 deletions
diff --git a/Makefile b/Makefile
index c089b08cc53a..08c430ede067 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
VERSION = 2
PATCHLEVEL = 4
SUBLEVEL = 4
-EXTRAVERSION =-pre8
+EXTRAVERSION =
KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION)
diff --git a/arch/arm/kernel/semaphore.c b/arch/arm/kernel/semaphore.c
index 49b0bb5463d9..969f09546ff9 100644
--- a/arch/arm/kernel/semaphore.c
+++ b/arch/arm/kernel/semaphore.c
@@ -165,3 +165,94 @@ int __down_trylock(struct semaphore * sem)
spin_unlock_irqrestore(&semaphore_lock, flags);
return 1;
}
+
+/*
+ * The semaphore operations have a special calling sequence that
+ * allow us to do a simpler in-line version of them. These routines
+ * need to convert that sequence back into the C sequence when
+ * there is contention on the semaphore.
+ *
+ * ip contains the semaphore pointer on entry. Save the C-clobbered
+ * registers (r0 to r3 and lr), but not ip, as we use it as a return
+ * value in some cases..
+ */
+#ifdef CONFIG_CPU_26
+asm(" .section .text.lock, \"ax\"
+ .align 5
+ .globl __down_failed
+__down_failed:
+ stmfd sp!, {r0 - r3, lr}
+ mov r0, ip
+ bl __down
+ ldmfd sp!, {r0 - r3, pc}^
+
+ .align 5
+ .globl __down_interruptible_failed
+__down_interruptible_failed:
+ stmfd sp!, {r0 - r3, lr}
+ mov r0, ip
+ bl __down_interruptible
+ mov ip, r0
+ ldmfd sp!, {r0 - r3, pc}^
+
+ .align 5
+ .globl __down_trylock_failed
+__down_trylock_failed:
+ stmfd sp!, {r0 - r3, lr}
+ mov r0, ip
+ bl __down_trylock
+ mov ip, r0
+ ldmfd sp!, {r0 - r3, pc}^
+
+ .align 5
+ .globl __up_wakeup
+__up_wakeup:
+ stmfd sp!, {r0 - r3, lr}
+ mov r0, ip
+ bl __up
+ ldmfd sp!, {r0 - r3, pc}^
+
+ .previous
+ ");
+
+#else
+/* 32 bit version */
+asm(" .section .text.lock, \"ax\"
+ .align 5
+ .globl __down_failed
+__down_failed:
+ stmfd sp!, {r0 - r3, lr}
+ mov r0, ip
+ bl __down
+ ldmfd sp!, {r0 - r3, pc}
+
+ .align 5
+ .globl __down_interruptible_failed
+__down_interruptible_failed:
+ stmfd sp!, {r0 - r3, lr}
+ mov r0, ip
+ bl __down_interruptible
+ mov ip, r0
+ ldmfd sp!, {r0 - r3, pc}
+
+ .align 5
+ .globl __down_trylock_failed
+__down_trylock_failed:
+ stmfd sp!, {r0 - r3, lr}
+ mov r0, ip
+ bl __down_trylock
+ mov ip, r0
+ ldmfd sp!, {r0 - r3, pc}
+
+ .align 5
+ .globl __up_wakeup
+__up_wakeup:
+ stmfd sp!, {r0 - r3, lr}
+ mov r0, ip
+ bl __up
+ ldmfd sp!, {r0 - r3, pc}
+
+ .previous
+ ");
+
+#endif
diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index 279f01f99e6d..85a2caa89124 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -241,7 +241,7 @@ void __init
setup_ramdisk(int doload, int prompt, int image_start, unsigned int rd_sz)
{
#ifdef CONFIG_BLK_DEV_RAM
- extern int rd_doload, rd_prompt, rd_image_start, rd_size;
+ extern int rd_size;
rd_image_start = image_start;
rd_prompt = prompt;
diff --git a/arch/cris/kernel/setup.c b/arch/cris/kernel/setup.c
index a082393de565..85af7212b06a 100644
--- a/arch/cris/kernel/setup.c
+++ b/arch/cris/kernel/setup.c
@@ -41,12 +41,6 @@ struct screen_info screen_info;
unsigned char aux_device_present;
-#ifdef CONFIG_BLK_DEV_RAM
-extern int rd_doload; /* 1 = load ramdisk, 0 = don't load */
-extern int rd_prompt; /* 1 = prompt for ramdisk, 0 = don't prompt */
-extern int rd_image_start; /* starting block # of image */
-#endif
-
extern int root_mountflags;
extern char _etext, _edata, _end;
diff --git a/arch/i386/defconfig b/arch/i386/defconfig
index 8a8e09d14217..250cf3eec0ca 100644
--- a/arch/i386/defconfig
+++ b/arch/i386/defconfig
@@ -5,8 +5,6 @@ CONFIG_X86=y
CONFIG_ISA=y
# CONFIG_SBUS is not set
CONFIG_UID16=y
-# CONFIG_RWSEM_GENERIC_SPINLOCK is not set
-CONFIG_RWSEM_XCHGADD_ALGORITHM=y
#
# Code maturity level options
@@ -44,6 +42,8 @@ CONFIG_X86_CMPXCHG=y
CONFIG_X86_XADD=y
CONFIG_X86_BSWAP=y
CONFIG_X86_POPAD_OK=y
+# CONFIG_RWSEM_GENERIC_SPINLOCK is not set
+CONFIG_RWSEM_XCHGADD_ALGORITHM=y
CONFIG_X86_L1_CACHE_SHIFT=5
CONFIG_X86_TSC=y
CONFIG_X86_GOOD_APIC=y
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index c2d87bf73d3d..05080042d2b1 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -142,12 +142,6 @@ struct e820map e820;
unsigned char aux_device_present;
-#ifdef CONFIG_BLK_DEV_RAM
-extern int rd_doload; /* 1 = load ramdisk, 0 = don't load */
-extern int rd_prompt; /* 1 = prompt for ramdisk, 0 = don't prompt */
-extern int rd_image_start; /* starting block # of image */
-#endif
-
extern int root_mountflags;
extern char _text, _etext, _edata, _end;
extern unsigned long cpu_khz;
diff --git a/arch/ppc/kernel/chrp_setup.c b/arch/ppc/kernel/chrp_setup.c
index b7136d00179c..0ba0c6c68873 100644
--- a/arch/ppc/kernel/chrp_setup.c
+++ b/arch/ppc/kernel/chrp_setup.c
@@ -95,12 +95,6 @@ static int max_width;
unsigned long empty_zero_page[1024];
-#ifdef CONFIG_BLK_DEV_RAM
-extern int rd_doload; /* 1 = load ramdisk, 0 = don't load */
-extern int rd_prompt; /* 1 = prompt for ramdisk, 0 = don't prompt */
-extern int rd_image_start; /* starting block # of image */
-#endif
-
static const char *gg2_memtypes[4] = {
"FPM", "SDRAM", "EDO", "BEDO"
};
diff --git a/arch/ppc/kernel/m8260_setup.c b/arch/ppc/kernel/m8260_setup.c
index 4185e5b44d16..5b4f8e85a386 100644
--- a/arch/ppc/kernel/m8260_setup.c
+++ b/arch/ppc/kernel/m8260_setup.c
@@ -67,12 +67,6 @@ extern unsigned long loops_per_jiffy;
unsigned char __res[sizeof(bd_t)];
unsigned long empty_zero_page[1024];
-#ifdef CONFIG_BLK_DEV_RAM
-extern int rd_doload; /* 1 = load ramdisk, 0 = don't load */
-extern int rd_prompt; /* 1 = prompt for ramdisk, 0 = don't prompt */
-extern int rd_image_start; /* starting block # of image */
-#endif
-
extern char saved_command_line[256];
extern unsigned long find_available_memory(void);
diff --git a/arch/ppc/kernel/m8xx_setup.c b/arch/ppc/kernel/m8xx_setup.c
index 1d3260d784f4..9cbce65ac609 100644
--- a/arch/ppc/kernel/m8xx_setup.c
+++ b/arch/ppc/kernel/m8xx_setup.c
@@ -112,12 +112,6 @@ ide_pio_timings_t ide_pio_clocks[6];
#endif /* CONFIG_BLK_DEV_MPC8xx_IDE */
#endif /* CONFIG_BLK_DEV_IDE || CONFIG_BLK_DEV_IDE_MODULE */
-#ifdef CONFIG_BLK_DEV_RAM
-extern int rd_doload; /* 1 = load ramdisk, 0 = don't load */
-extern int rd_prompt; /* 1 = prompt for ramdisk, 0 = don't prompt */
-extern int rd_image_start; /* starting block # of image */
-#endif
-
extern char saved_command_line[256];
extern unsigned long find_available_memory(void);
diff --git a/arch/ppc/kernel/prep_setup.c b/arch/ppc/kernel/prep_setup.c
index 772cceb61586..cf6edf78e556 100644
--- a/arch/ppc/kernel/prep_setup.c
+++ b/arch/ppc/kernel/prep_setup.c
@@ -103,12 +103,6 @@ extern unsigned long Hash_size, Hash_mask;
extern int probingmem;
extern unsigned long loops_per_jiffy;
-#ifdef CONFIG_BLK_DEV_RAM
-extern int rd_doload; /* 1 = load ramdisk, 0 = don't load */
-extern int rd_prompt; /* 1 = prompt for ramdisk, 0 = don't prompt */
-extern int rd_image_start; /* starting block # of image */
-#endif
-
int __prep
prep_get_cpuinfo(char *buffer)
{
diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c
index 20b0bf018643..ba1337bec8dc 100644
--- a/arch/sh/kernel/setup.c
+++ b/arch/sh/kernel/setup.c
@@ -52,12 +52,6 @@ struct sh_cpuinfo boot_cpu_data = { CPU_SH_NONE, 0, 0, 0, };
struct screen_info screen_info;
unsigned char aux_device_present = 0xaa;
-#ifdef CONFIG_BLK_DEV_RAM
-extern int rd_doload; /* 1 = load ramdisk, 0 = don't load */
-extern int rd_prompt; /* 1 = prompt for ramdisk, 0 = don't prompt */
-extern int rd_image_start; /* starting block # of image */
-#endif
-
#if defined(CONFIG_SH_GENERIC) || defined(CONFIG_SH_UNKNOWN)
struct sh_machine_vector sh_mv;
#endif
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 25f97d211463..a0bfe6c1231e 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -616,7 +616,6 @@ int __init chr_dev_init(void)
printk("unable to get major %d for memory devs\n", MEM_MAJOR);
memory_devfs_register();
rand_initialize();
- raw_init();
#ifdef CONFIG_I2C
i2c_init_all();
#endif
diff --git a/drivers/char/raw.c b/drivers/char/raw.c
index ae4f0bbd3ec1..0b382aa4fceb 100644
--- a/drivers/char/raw.c
+++ b/drivers/char/raw.c
@@ -19,10 +19,15 @@
#define dprintk(x...)
-static struct block_device *raw_device_bindings[256];
-static int raw_device_inuse[256];
-static int raw_device_sector_size[256];
-static int raw_device_sector_bits[256];
+typedef struct raw_device_data_s {
+ struct kiobuf * iobuf;
+ long iobuf_lock;
+ struct block_device *binding;
+ int inuse, sector_size, sector_bits;
+ struct semaphore mutex;
+} raw_device_data_t;
+
+static raw_device_data_t raw_devices[256];
static ssize_t rw_raw_dev(int rw, struct file *, char *, size_t, loff_t *);
@@ -45,11 +50,19 @@ static struct file_operations raw_ctl_fops = {
open: raw_open,
};
-void __init raw_init(void)
+static int __init raw_init(void)
{
+ int i;
register_chrdev(RAW_MAJOR, "raw", &raw_fops);
+
+ for (i = 0; i < 256; i++)
+ init_MUTEX(&raw_devices[i].mutex);
+
+ return 0;
}
+__initcall(raw_init);
+
/*
* Open/close code for raw IO.
*/
@@ -74,28 +87,43 @@ int raw_open(struct inode *inode, struct file *filp)
return 0;
}
+ down(&raw_devices[minor].mutex);
/*
* No, it is a normal raw device. All we need to do on open is
* to check that the device is bound, and force the underlying
* block device to a sector-size blocksize.
*/
- bdev = raw_device_bindings[minor];
+ bdev = raw_devices[minor].binding;
+ err = -ENODEV;
if (!bdev)
- return -ENODEV;
+ goto out;
rdev = to_kdev_t(bdev->bd_dev);
err = blkdev_get(bdev, filp->f_mode, 0, BDEV_RAW);
if (err)
- return err;
+ goto out;
/*
* Don't change the blocksize if we already have users using
* this device
*/
- if (raw_device_inuse[minor]++)
- return 0;
+ if (raw_devices[minor].inuse++)
+ goto out;
+
+ /*
+ * We'll just use one kiobuf
+ */
+
+ err = alloc_kiovec(1, &raw_devices[minor].iobuf);
+ if (err) {
+ raw_devices[minor].inuse--;
+ up(&raw_devices[minor].mutex);
+ blkdev_put(bdev, BDEV_RAW);
+ return err;
+ }
+
/*
* Don't interfere with mounted devices: we cannot safely set
@@ -112,13 +140,16 @@ int raw_open(struct inode *inode, struct file *filp)
}
set_blocksize(rdev, sector_size);
- raw_device_sector_size[minor] = sector_size;
+ raw_devices[minor].sector_size = sector_size;
for (sector_bits = 0; !(sector_size & 1); )
sector_size>>=1, sector_bits++;
- raw_device_sector_bits[minor] = sector_bits;
+ raw_devices[minor].sector_bits = sector_bits;
+
+ out:
+ up(&raw_devices[minor].mutex);
- return 0;
+ return err;
}
int raw_release(struct inode *inode, struct file *filp)
@@ -127,11 +158,12 @@ int raw_release(struct inode *inode, struct file *filp)
struct block_device *bdev;
minor = MINOR(inode->i_rdev);
- lock_kernel();
- bdev = raw_device_bindings[minor];
+ down(&raw_devices[minor].mutex);
+ bdev = raw_devices[minor].binding;
+ if (!--raw_devices[minor].inuse)
+ free_kiovec(1, &raw_devices[minor].iobuf);
+ up(&raw_devices[minor].mutex);
blkdev_put(bdev, BDEV_RAW);
- raw_device_inuse[minor]--;
- unlock_kernel();
return 0;
}
@@ -184,26 +216,30 @@ int raw_ctl_ioctl(struct inode *inode,
* major/minor numbers make sense.
*/
- if (rq.block_major == NODEV ||
+ if ((rq.block_major == NODEV &&
+ rq.block_minor != NODEV) ||
rq.block_major > MAX_BLKDEV ||
rq.block_minor > MINORMASK) {
err = -EINVAL;
break;
}
- if (raw_device_inuse[minor]) {
+ down(&raw_devices[minor].mutex);
+ if (raw_devices[minor].inuse) {
+ up(&raw_devices[minor].mutex);
err = -EBUSY;
break;
}
- if (raw_device_bindings[minor])
- bdput(raw_device_bindings[minor]);
- raw_device_bindings[minor] =
+ if (raw_devices[minor].binding)
+ bdput(raw_devices[minor].binding);
+ raw_devices[minor].binding =
bdget(kdev_t_to_nr(MKDEV(rq.block_major, rq.block_minor)));
+ up(&raw_devices[minor].mutex);
} else {
struct block_device *bdev;
kdev_t dev;
- bdev = raw_device_bindings[minor];
+ bdev = raw_devices[minor].binding;
if (bdev) {
dev = to_kdev_t(bdev->bd_dev);
rq.block_major = MAJOR(dev);
@@ -244,9 +280,9 @@ ssize_t rw_raw_dev(int rw, struct file *filp, char *buf,
size_t size, loff_t *offp)
{
struct kiobuf * iobuf;
- int err;
+ int new_iobuf;
+ int err = 0;
unsigned long blocknr, blocks;
- unsigned long b[KIO_MAX_SECTORS];
size_t transferred;
int iosize;
int i;
@@ -262,9 +298,23 @@ ssize_t rw_raw_dev(int rw, struct file *filp, char *buf,
*/
minor = MINOR(filp->f_dentry->d_inode->i_rdev);
- dev = to_kdev_t(raw_device_bindings[minor]->bd_dev);
- sector_size = raw_device_sector_size[minor];
- sector_bits = raw_device_sector_bits[minor];
+
+ new_iobuf = 0;
+ iobuf = raw_devices[minor].iobuf;
+ if (test_and_set_bit(0, &raw_devices[minor].iobuf_lock)) {
+ /*
+ * A parallel read/write is using the preallocated iobuf
+ * so just run slow and allocate a new one.
+ */
+ err = alloc_kiovec(1, &iobuf);
+ if (err)
+ goto out;
+ new_iobuf = 1;
+ }
+
+ dev = to_kdev_t(raw_devices[minor].binding->bd_dev);
+ sector_size = raw_devices[minor].sector_size;
+ sector_bits = raw_devices[minor].sector_bits;
sector_mask = sector_size- 1;
max_sectors = KIO_MAX_SECTORS >> (sector_bits - 9);
@@ -275,18 +325,14 @@ ssize_t rw_raw_dev(int rw, struct file *filp, char *buf,
dprintk ("rw_raw_dev: dev %d:%d (+%d)\n",
MAJOR(dev), MINOR(dev), limit);
+ err = -EINVAL;
if ((*offp & sector_mask) || (size & sector_mask))
- return -EINVAL;
- if ((*offp >> sector_bits) > limit)
- return 0;
-
- /*
- * We'll just use one kiobuf
- */
-
- err = alloc_kiovec(1, &iobuf);
- if (err)
- return err;
+ goto out_free;
+ err = 0;
+ if (size)
+ err = -ENXIO;
+ if ((*offp >> sector_bits) >= limit)
+ goto out_free;
/*
* Split the IO into KIO_MAX_SECTORS chunks, mapping and
@@ -310,35 +356,37 @@ ssize_t rw_raw_dev(int rw, struct file *filp, char *buf,
err = map_user_kiobuf(rw, iobuf, (unsigned long) buf, iosize);
if (err)
break;
-#if 0
- err = lock_kiovec(1, &iobuf, 1);
- if (err)
- break;
-#endif
-
+
for (i=0; i < blocks; i++)
- b[i] = blocknr++;
+ iobuf->blocks[i] = blocknr++;
- err = brw_kiovec(rw, 1, &iobuf, dev, b, sector_size);
+ err = brw_kiovec(rw, 1, &iobuf, dev, iobuf->blocks, sector_size);
+ if (rw == READ && err > 0)
+ mark_dirty_kiobuf(iobuf, err);
+
if (err >= 0) {
transferred += err;
size -= err;
buf += err;
}
- unmap_kiobuf(iobuf); /* The unlock_kiobuf is implicit here */
+ unmap_kiobuf(iobuf);
if (err != iosize)
break;
}
- free_kiovec(1, &iobuf);
-
if (transferred) {
*offp += transferred;
- return transferred;
+ err = transferred;
}
-
+
+ out_free:
+ if (!new_iobuf)
+ clear_bit(0, &raw_devices[minor].iobuf_lock);
+ else
+ free_kiovec(1, &iobuf);
+ out:
return err;
}
diff --git a/drivers/ide/alim15x3.c b/drivers/ide/alim15x3.c
index dbea97bd7d05..f4c069d78aea 100644
--- a/drivers/ide/alim15x3.c
+++ b/drivers/ide/alim15x3.c
@@ -640,11 +640,11 @@ unsigned int __init ata66_ali15x3 (ide_hwif_t *hwif)
void __init ide_init_ali15x3 (ide_hwif_t *hwif)
{
+#ifndef CONFIG_SPARC64
byte ideic, inmir;
byte irq_routing_table[] = { -1, 9, 3, 10, 4, 5, 7, 6,
1, 11, 0, 12, 0, 14, 0, 15 };
-#ifndef CONFIG_SPARC64
hwif->irq = hwif->channel ? 15 : 14;
if (isa_dev) {
diff --git a/drivers/md/lvm-snap.c b/drivers/md/lvm-snap.c
index e28ffdbe9449..20e40c022e90 100644
--- a/drivers/md/lvm-snap.c
+++ b/drivers/md/lvm-snap.c
@@ -246,7 +246,6 @@ int lvm_write_COW_table_block(vg_t * vg, lv_t * lv_snap)
int length_tmp;
ulong snap_pe_start, COW_table_sector_offset,
COW_entries_per_pe, COW_chunks_per_pe, COW_entries_per_block;
- ulong blocks[1];
const char * reason;
kdev_t snap_phys_dev;
struct kiobuf * iobuf = lv_snap->lv_iobuf;
@@ -274,7 +273,7 @@ int lvm_write_COW_table_block(vg_t * vg, lv_t * lv_snap)
COW_table_sector_offset = (idx % COW_entries_per_pe) / (SECTOR_SIZE / sizeof(lv_COW_table_disk_t));
/* COW table block to write next */
- blocks[0] = (snap_pe_start + COW_table_sector_offset) >> (blksize_snap >> 10);
+ iobuf->blocks[0] = (snap_pe_start + COW_table_sector_offset) >> (blksize_snap >> 10);
/* store new COW_table entry */
lv_COW_table[idx_COW_table].pv_org_number = cpu_to_le64(lvm_pv_get_number(vg, lv_snap->lv_block_exception[idx].rdev_org));
@@ -290,7 +289,7 @@ int lvm_write_COW_table_block(vg_t * vg, lv_t * lv_snap)
iobuf->nr_pages = 1;
if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev,
- blocks, blksize_snap) != blksize_snap)
+ iobuf->blocks, blksize_snap) != blksize_snap)
goto fail_raw_write;
@@ -309,11 +308,11 @@ int lvm_write_COW_table_block(vg_t * vg, lv_t * lv_snap)
snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new;
snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size;
blksize_snap = lvm_get_blksize(snap_phys_dev);
- blocks[0] = snap_pe_start >> (blksize_snap >> 10);
- } else blocks[0]++;
+ iobuf->blocks[0] = snap_pe_start >> (blksize_snap >> 10);
+ } else iobuf->blocks[0]++;
if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev,
- blocks, blksize_snap) != blksize_snap)
+ iobuf->blocks, blksize_snap) != blksize_snap)
goto fail_raw_write;
}
@@ -352,7 +351,6 @@ int lvm_snapshot_COW(kdev_t org_phys_dev,
unsigned long org_start, snap_start, snap_phys_dev, virt_start, pe_off;
int idx = lv_snap->lv_remap_ptr, chunk_size = lv_snap->lv_chunk_size;
struct kiobuf * iobuf;
- unsigned long blocks[KIO_MAX_SECTORS];
int blksize_snap, blksize_org, min_blksize, max_blksize;
int max_sectors, nr_sectors;
@@ -402,16 +400,16 @@ int lvm_snapshot_COW(kdev_t org_phys_dev,
iobuf->length = nr_sectors << 9;
- lvm_snapshot_prepare_blocks(blocks, org_start,
+ lvm_snapshot_prepare_blocks(iobuf->blocks, org_start,
nr_sectors, blksize_org);
if (brw_kiovec(READ, 1, &iobuf, org_phys_dev,
- blocks, blksize_org) != (nr_sectors<<9))
+ iobuf->blocks, blksize_org) != (nr_sectors<<9))
goto fail_raw_read;
- lvm_snapshot_prepare_blocks(blocks, snap_start,
+ lvm_snapshot_prepare_blocks(iobuf->blocks, snap_start,
nr_sectors, blksize_snap);
if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev,
- blocks, blksize_snap) != (nr_sectors<<9))
+ iobuf->blocks, blksize_snap) != (nr_sectors<<9))
goto fail_raw_write;
}
diff --git a/drivers/sbus/char/Makefile b/drivers/sbus/char/Makefile
index dd0a8c59884f..12ee27686e6c 100644
--- a/drivers/sbus/char/Makefile
+++ b/drivers/sbus/char/Makefile
@@ -12,6 +12,7 @@ O_TARGET := sunchar.o
export-objs := su.o bbc_i2c.o
obj-y := sunkbd.o sunkbdmap.o sunmouse.o sunserial.o zs.o
+list-multi := vfc.o bbc.o
vfc-objs := vfc_dev.o vfc_i2c.o
bbc-objs := bbc_i2c.o bbc_envctrl.o
diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c
index 9126933566a0..c4673d9ae094 100644
--- a/drivers/scsi/3w-xxxx.c
+++ b/drivers/scsi/3w-xxxx.c
@@ -717,6 +717,7 @@ int tw_findcards(Scsi_Host_Template *tw_host)
continue;
}
+ scsi_set_pci_device(host, tw_pci_dev);
status_reg_value = inl(tw_dev->registers.status_reg_addr);
dprintk(KERN_NOTICE "scsi%d : Found a 3ware Storage Controller at 0x%x, IRQ: %d P-chip: %d.%d\n", host->host_no,
diff --git a/drivers/scsi/53c7,8xx.c b/drivers/scsi/53c7,8xx.c
index 518352ad1426..e4ef976fa547 100644
--- a/drivers/scsi/53c7,8xx.c
+++ b/drivers/scsi/53c7,8xx.c
@@ -1155,7 +1155,7 @@ NCR53c7x0_init (struct Scsi_Host *host) {
* Function : static int normal_init(Scsi_Host_Template *tpnt, int board,
* int chip, u32 base, int io_port, int irq, int dma, int pcivalid,
* unsigned char pci_bus, unsigned char pci_device_fn,
- * long long options);
+ * struct pci_dev *pci_dev, long long options);
*
* Purpose : initializes a NCR53c7,8x0 based on base addresses,
* IRQ, and DMA channel.
@@ -1175,7 +1175,9 @@ NCR53c7x0_init (struct Scsi_Host *host) {
static int __init
normal_init (Scsi_Host_Template *tpnt, int board, int chip,
u32 base, int io_port, int irq, int dma, int pci_valid,
- unsigned char pci_bus, unsigned char pci_device_fn, long long options){
+ unsigned char pci_bus, unsigned char pci_device_fn,
+ struct pci_dev *pci_dev, long long options)
+{
struct Scsi_Host *instance;
struct NCR53c7x0_hostdata *hostdata;
char chip_str[80];
@@ -1319,6 +1321,7 @@ normal_init (Scsi_Host_Template *tpnt, int board, int chip,
}
instance->irq = irq;
instance->dma_channel = dma;
+ scsi_set_pci_device(instance, pci_dev);
hostdata->options = options;
hostdata->dsa_len = dsa_len;
@@ -1509,7 +1512,7 @@ ncr_pci_init (Scsi_Host_Template *tpnt, int board, int chip,
}
return normal_init (tpnt, board, chip, (int) base, io_port,
- (int) irq, DMA_NONE, 1, bus, device_fn, options);
+ (int) irq, DMA_NONE, 1, bus, device_fn, pdev, options);
}
@@ -1553,6 +1556,7 @@ NCR53c7xx_detect(Scsi_Host_Template *tpnt){
overrides[current_override].data.normal.dma,
0 /* PCI data invalid */, 0 /* PCI bus place holder */,
0 /* PCI device_function place holder */,
+ NULL /* PCI pci_dev place holder */,
overrides[current_override].options)) {
++count;
}
diff --git a/drivers/scsi/AM53C974.c b/drivers/scsi/AM53C974.c
index f540892984f7..80b807bf723c 100644
--- a/drivers/scsi/AM53C974.c
+++ b/drivers/scsi/AM53C974.c
@@ -680,6 +680,7 @@ static int __init AM53C974_init(Scsi_Host_Template * tpnt, struct pci_dev *pdev
printk(KERN_WARNING "AM53C974: Unable to register host, aborting.\n");
return 0;
}
+ scsi_set_pci_device(instance, pdev);
hostdata = (struct AM53C974_hostdata *) instance->hostdata;
instance->base = 0;
instance->io_port = pci_resource_start(pdev, 0);
diff --git a/drivers/scsi/advansys.c b/drivers/scsi/advansys.c
index c357279d14bd..00b3eba88525 100644
--- a/drivers/scsi/advansys.c
+++ b/drivers/scsi/advansys.c
@@ -4827,6 +4827,9 @@ advansys_detect(Scsi_Host_Template *tpnt)
if (shp == NULL) {
continue;
}
+#ifdef CONFIG_PCI
+ scsi_set_pci_device(shp, pci_devp);
+#endif
/* Save a pointer to the Scsi_host of each board found. */
asc_host[asc_board_count++] = shp;
diff --git a/drivers/scsi/aic7xxx/aic7xxx_linux.c b/drivers/scsi/aic7xxx/aic7xxx_linux.c
index edaa0b3b2957..c02fc1d2f858 100644
--- a/drivers/scsi/aic7xxx/aic7xxx_linux.c
+++ b/drivers/scsi/aic7xxx/aic7xxx_linux.c
@@ -1094,6 +1094,7 @@ aic7xxx_register_host(struct ahc_softc *ahc, Scsi_Host_Template *template)
ahc_set_name(ahc, new_name);
}
host->unique_id = ahc->unit;
+ scsi_set_pci_device(host, ahc->dev_softc);
aic7xxx_initialize_scsi_bus(ahc);
ahc_unlock(ahc, &s);
return (0);
diff --git a/drivers/scsi/aic7xxx_old.c b/drivers/scsi/aic7xxx_old.c
index e21138a20b7d..0ae0dfa94306 100644
--- a/drivers/scsi/aic7xxx_old.c
+++ b/drivers/scsi/aic7xxx_old.c
@@ -8867,6 +8867,7 @@ aic7xxx_alloc(Scsi_Host_Template *sht, struct aic7xxx_host *temp)
}
DRIVER_LOCK_INIT
}
+ scsi_set_pci_device(host, p->pdev);
return (p);
}
diff --git a/drivers/scsi/atp870u.c b/drivers/scsi/atp870u.c
index 890d44426f7d..7f9d35696739 100644
--- a/drivers/scsi/atp870u.c
+++ b/drivers/scsi/atp870u.c
@@ -1787,6 +1787,7 @@ int atp870u_detect(Scsi_Host_Template * tpnt)
shpnt->io_port = base_io;
shpnt->n_io_port = 0x40; /* Number of bytes of I/O space used */
shpnt->irq = irq;
+ scsi_set_pci_device(shpnt, pdev[h]);
restore_flags(flags);
request_region(base_io, 0x40, "atp870u"); /* Register the IO ports that we use */
count++;
diff --git a/drivers/scsi/cpqfcTSinit.c b/drivers/scsi/cpqfcTSinit.c
index 434119ee1db4..83661d94d791 100644
--- a/drivers/scsi/cpqfcTSinit.c
+++ b/drivers/scsi/cpqfcTSinit.c
@@ -300,7 +300,7 @@ int cpqfcTS_detect(Scsi_Host_Template *ScsiHostTemplate)
DEBUG_PCI(printk(" PciDev->baseaddress[]= %lx\n", PciDev->base_address[2]));
DEBUG_PCI(printk(" PciDev->baseaddress[]= %lx\n", PciDev->base_address[3]));
-
+ scsi_set_pci_device(HostAdapter, PciDev);
HostAdapter->irq = PciDev->irq; // copy for Scsi layers
// HP Tachlite uses two (255-byte) ranges of Port I/O (lower & upper),
diff --git a/drivers/scsi/dmx3191d.c b/drivers/scsi/dmx3191d.c
index 0d30d1f31574..543d380a25c8 100644
--- a/drivers/scsi/dmx3191d.c
+++ b/drivers/scsi/dmx3191d.c
@@ -86,6 +86,7 @@ int __init dmx3191d_detect(Scsi_Host_Template *tmpl) {
release_region(port, DMX3191D_REGION);
continue;
}
+ scsi_set_pci_device(instance, pdev);
instance->io_port = port;
instance->irq = pdev->irq;
NCR5380_init(instance, FLAG_NO_PSEUDO_DMA | FLAG_DTC3181E);
diff --git a/drivers/scsi/fdomain.c b/drivers/scsi/fdomain.c
index 884a2ea25a75..a2ff2bc74b85 100644
--- a/drivers/scsi/fdomain.c
+++ b/drivers/scsi/fdomain.c
@@ -805,7 +805,7 @@ static int fdomain_isa_detect( int *irq, int *iobase )
the PCI configuration registers. */
#ifdef CONFIG_PCI
-static int fdomain_pci_bios_detect( int *irq, int *iobase )
+static int fdomain_pci_bios_detect( int *irq, int *iobase, struct pci_dev **ret_pdev )
{
unsigned int pci_irq; /* PCI interrupt line */
unsigned long pci_base; /* PCI I/O base address */
@@ -849,6 +849,7 @@ static int fdomain_pci_bios_detect( int *irq, int *iobase )
*irq = pci_irq;
*iobase = pci_base;
+ *ret_pdev = pdev;
#if DEBUG_DETECT
printk( "scsi: <fdomain> TMC-3260 detect:"
@@ -875,6 +876,7 @@ int fdomain_16x0_detect( Scsi_Host_Template *tpnt )
{
int retcode;
struct Scsi_Host *shpnt;
+ struct pci_dev *pdev = NULL;
#if DO_DETECT
int i = 0;
int j = 0;
@@ -910,7 +912,7 @@ int fdomain_16x0_detect( Scsi_Host_Template *tpnt )
#ifdef CONFIG_PCI
/* Try PCI detection first */
- flag = fdomain_pci_bios_detect( &interrupt_level, &port_base );
+ flag = fdomain_pci_bios_detect( &interrupt_level, &port_base, &pdev );
#endif
if (!flag) {
/* Then try ISA bus detection */
@@ -969,6 +971,7 @@ int fdomain_16x0_detect( Scsi_Host_Template *tpnt )
return 0;
shpnt->irq = interrupt_level;
shpnt->io_port = port_base;
+ scsi_set_pci_device(shpnt->pci_dev, pdev);
shpnt->n_io_port = 0x10;
print_banner( shpnt );
diff --git a/drivers/scsi/gdth.c b/drivers/scsi/gdth.c
index ec5814c7fce3..eddbe8059578 100644
--- a/drivers/scsi/gdth.c
+++ b/drivers/scsi/gdth.c
@@ -3290,6 +3290,7 @@ int __init gdth_detect(Scsi_Host_Template *shtp)
scsi_unregister(shp);
continue;
}
+ scsi_set_pci_device(shp, pcistr[ctr].pdev);
shp->unchecked_isa_dma = 0;
shp->irq = ha->irq;
shp->dma_channel = 0xff;
diff --git a/drivers/scsi/hosts.h b/drivers/scsi/hosts.h
index 7d107541e8ec..b0f8412cac27 100644
--- a/drivers/scsi/hosts.h
+++ b/drivers/scsi/hosts.h
@@ -27,6 +27,7 @@
#include <linux/config.h>
#include <linux/proc_fs.h>
+#include <linux/pci.h>
/* It is senseless to set SG_ALL any higher than this - the performance
* does not get any better, and it wastes memory
@@ -414,6 +415,12 @@ struct Scsi_Host
void (*select_queue_depths)(struct Scsi_Host *, Scsi_Device *);
/*
+ * For SCSI hosts which are PCI devices, set pci_dev so that
+ * we can do BIOS EDD 3.0 mappings
+ */
+ struct pci_dev *pci_dev;
+
+ /*
* We should ensure that this is aligned, both for better performance
* and also because some compilers (m68k) don't automatically force
* alignment to a long boundary.
@@ -466,6 +473,13 @@ extern void scsi_unregister(struct Scsi_Host * i);
extern void scsi_register_blocked_host(struct Scsi_Host * SHpnt);
extern void scsi_deregister_blocked_host(struct Scsi_Host * SHpnt);
+static inline void scsi_set_pci_device(struct Scsi_Host *SHpnt,
+ struct pci_dev *pdev)
+{
+ SHpnt->pci_dev = pdev;
+}
+
+
/*
* Prototypes for functions/data in scsi_scan.c
*/
diff --git a/drivers/scsi/ini9100u.h b/drivers/scsi/ini9100u.h
index f3f11920eecc..6b907ed6de1f 100644
--- a/drivers/scsi/ini9100u.h
+++ b/drivers/scsi/ini9100u.h
@@ -276,6 +276,7 @@ typedef struct Ha_Ctrl_Struc {
spinlock_t HCS_AvailLock;
spinlock_t HCS_SemaphLock;
spinlock_t pSRB_lock;
+ struct pci_dev *pci_dev;
} HCS;
/* Bit Definition for HCB_Flags */
diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c
index 604c1c308f6a..d4d14f97e54e 100644
--- a/drivers/scsi/ips.c
+++ b/drivers/scsi/ips.c
@@ -879,6 +879,7 @@ ips_detect(Scsi_Host_Template *SHT) {
sh->cmd_per_lun = sh->hostt->cmd_per_lun;
sh->unchecked_isa_dma = sh->hostt->unchecked_isa_dma;
sh->use_clustering = sh->hostt->use_clustering;
+ scsi_set_pci_device(sh, dev[i]);
#if LINUX_VERSION_CODE < LinuxVersionCode(2,3,32)
sh->wish_block = FALSE;
diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c
index 7989cbd0f6ec..13671dc607ec 100644
--- a/drivers/scsi/megaraid.c
+++ b/drivers/scsi/megaraid.c
@@ -2510,6 +2510,7 @@ static int mega_findCard (Scsi_Host_Template * pHostTmpl,
if (!host)
goto err_unmap;
+ scsi_set_pci_device(host, pdev);
megaCfg = (mega_host_config *) host->hostdata;
memset (megaCfg, 0, sizeof (mega_host_config));
diff --git a/drivers/scsi/ncr53c8xx.c b/drivers/scsi/ncr53c8xx.c
index 524d8a7b6913..4df173a780b7 100644
--- a/drivers/scsi/ncr53c8xx.c
+++ b/drivers/scsi/ncr53c8xx.c
@@ -3715,6 +3715,7 @@ ncr_attach (Scsi_Host_Template *tpnt, int unit, ncr_device *device)
instance->cmd_per_lun = MAX_TAGS;
instance->can_queue = (MAX_START-4);
instance->select_queue_depths = ncr53c8xx_select_queue_depths;
+ scsi_set_pci_device(instance, device->pdev);
#ifdef SCSI_NCR_INTEGRITY_CHECKING
np->check_integrity = 0;
diff --git a/drivers/scsi/pci2000.c b/drivers/scsi/pci2000.c
index 8b91bb2bf11f..38fa634e5919 100644
--- a/drivers/scsi/pci2000.c
+++ b/drivers/scsi/pci2000.c
@@ -711,6 +711,7 @@ int Pci2000_Detect (Scsi_Host_Template *tpnt)
goto unregister;
}
+ scsi_set_pci_device(pshost, pdev);
pshost->irq = pdev->irq;
setirq = 1;
padapter->irqOwned = 0;
diff --git a/drivers/scsi/pci2220i.c b/drivers/scsi/pci2220i.c
index b217520e3700..b490a1225e82 100644
--- a/drivers/scsi/pci2220i.c
+++ b/drivers/scsi/pci2220i.c
@@ -2553,6 +2553,7 @@ int Pci2220i_Detect (Scsi_Host_Template *tpnt)
if ( GetRegs (pshost, FALSE, pcidev) )
goto unregister;
+ scsi_set_pci_device(pshost, pcidev);
pshost->max_id = padapter->numberOfDrives;
for ( z = 0; z < padapter->numberOfDrives; z++ )
{
@@ -2656,6 +2657,7 @@ unregister:;
for ( z = 0; z < BIGD_MAXDRIVES; z++ )
DiskMirror[z].status = inb_p (padapter->regScratchPad + BIGD_RAID_0_STATUS + z);
+ scsi_set_pci_info(pshost, pcidev);
pshost->max_id = padapter->numberOfDrives;
padapter->failRegister = inb_p (padapter->regScratchPad + BIGD_ALARM_IMAGE);
for ( z = 0; z < padapter->numberOfDrives; z++ )
diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c
index 4656c964538e..9724f2091efd 100644
--- a/drivers/scsi/qla1280.c
+++ b/drivers/scsi/qla1280.c
@@ -814,6 +814,7 @@ qla1280_detect(Scsi_Host_Template *template)
printk(KERN_WARNING "qla1280: Failed to register host, aborting.\n");
return 0;
}
+ scsi_set_pci_device(host, pdev);
ha = (scsi_qla_host_t *) host->hostdata;
/* Clear our data area */
for( j =0, cp = (char *)ha; j < sizeof(scsi_qla_host_t); j++)
diff --git a/drivers/scsi/qlogicfc.c b/drivers/scsi/qlogicfc.c
index 4f5984953a31..3bf203c8dada 100644
--- a/drivers/scsi/qlogicfc.c
+++ b/drivers/scsi/qlogicfc.c
@@ -761,6 +761,7 @@ int isp2x00_detect(Scsi_Host_Template * tmpt)
printk("qlogicfc%d : could not register host.\n", hosts);
continue;
}
+ scsi_set_pci_device(host, pdev);
host->max_id = QLOGICFC_MAX_ID + 1;
host->max_lun = QLOGICFC_MAX_LUN;
host->hostt->use_new_eh_code = 1;
diff --git a/drivers/scsi/qlogicisp.c b/drivers/scsi/qlogicisp.c
index babf17608e2d..5e65cf8123ff 100644
--- a/drivers/scsi/qlogicisp.c
+++ b/drivers/scsi/qlogicisp.c
@@ -690,6 +690,7 @@ int isp1020_detect(Scsi_Host_Template *tmpt)
memset(hostdata, 0, sizeof(struct isp1020_hostdata));
hostdata->pci_dev = pdev;
+ scsi_set_pci_device(host, pdev);
if (isp1020_init(host))
goto fail_and_unregister;
diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c
index ebf9a364a862..6e479d04e203 100644
--- a/drivers/scsi/scsi_ioctl.c
+++ b/drivers/scsi/scsi_ioctl.c
@@ -352,6 +352,26 @@ int scsi_ioctl_send_command(Scsi_Device * dev, Scsi_Ioctl_Command * sic)
}
/*
+ * The scsi_ioctl_get_pci() function places into arg the value
+ * pci_dev::slot_name (8 characters) for the PCI device (if any).
+ * Returns: 0 on success
+ * -ENXIO if there isn't a PCI device pointer
+ * (could be because the SCSI driver hasn't been
+ * updated yet, or because it isn't a SCSI
+ * device)
+ * any copy_to_user() error on failure there
+ */
+static int
+scsi_ioctl_get_pci(Scsi_Device * dev, void *arg)
+{
+
+ if (!dev->host->pci_dev) return -ENXIO;
+ return copy_to_user(arg, dev->host->pci_dev->slot_name,
+ sizeof(dev->host->pci_dev->slot_name));
+}
+
+
+/*
* the scsi_ioctl() function differs from most ioctls in that it does
* not take a major/minor number as the dev field. Rather, it takes
* a pointer to a scsi_devices[] element, a structure.
@@ -453,6 +473,9 @@ int scsi_ioctl(Scsi_Device * dev, int cmd, void *arg)
return ioctl_internal_command((Scsi_Device *) dev, scsi_cmd,
START_STOP_TIMEOUT, NORMAL_RETRIES);
break;
+ case SCSI_IOCTL_GET_PCI:
+ return scsi_ioctl_get_pci(dev, arg);
+ break;
default:
if (dev->host->hostt->ioctl)
return dev->host->hostt->ioctl(dev, cmd, arg);
diff --git a/drivers/scsi/sym53c8xx.c b/drivers/scsi/sym53c8xx.c
index 9e80eeee10fd..734ada28ce69 100644
--- a/drivers/scsi/sym53c8xx.c
+++ b/drivers/scsi/sym53c8xx.c
@@ -5905,6 +5905,7 @@ ncr_attach (Scsi_Host_Template *tpnt, int unit, ncr_device *device)
instance->dma_channel = 0;
instance->cmd_per_lun = MAX_TAGS;
instance->can_queue = (MAX_START-4);
+ scsi_set_pci_device(instance, device->pdev);
np->check_integrity = 0;
diff --git a/drivers/scsi/tmscsim.c b/drivers/scsi/tmscsim.c
index d5937962f26f..7bcbaf62960c 100644
--- a/drivers/scsi/tmscsim.c
+++ b/drivers/scsi/tmscsim.c
@@ -2205,6 +2205,7 @@ static int __init DC390_init (PSHT psht, ULONG io_port, UCHAR Irq, PDEVDECL, UCH
psh = scsi_register( psht, sizeof(DC390_ACB) );
if( !psh ) return( -1 );
+ scsi_set_pci_device(psh, pdev);
pACB = (PACB) psh->hostdata;
DC390_LOCKA_INIT;
DC390_LOCK_ACB;
diff --git a/drivers/usb/usb-uhci.c b/drivers/usb/usb-uhci.c
index b2750a6fe2e8..8f1e806332c9 100644
--- a/drivers/usb/usb-uhci.c
+++ b/drivers/usb/usb-uhci.c
@@ -16,7 +16,7 @@
* (C) Copyright 1999 Randy Dunlap
* (C) Copyright 1999 Gregory P. Smith
*
- * $Id: usb-uhci.c,v 1.251 2000/11/30 09:47:54 acher Exp $
+ * $Id: usb-uhci.c,v 1.259 2001/03/30 14:51:59 acher Exp $
*/
#include <linux/config.h>
@@ -52,7 +52,7 @@
/* This enables an extra UHCI slab for memory debugging */
#define DEBUG_SLAB
-#define VERSTR "$Revision: 1.251 $ time " __TIME__ " " __DATE__
+#define VERSTR "$Revision: 1.259 $ time " __TIME__ " " __DATE__
#include <linux/usb.h>
#include "usb-uhci.h"
@@ -803,7 +803,7 @@ _static int uhci_submit_bulk_urb (urb_t *urb, urb_t *bulk_urb)
{
uhci_t *s = (uhci_t*) urb->dev->bus->hcpriv;
urb_priv_t *urb_priv = urb->hcpriv;
- uhci_desc_t *qh, *td, *nqh, *bqh, *first_td=NULL;
+ uhci_desc_t *qh, *td, *nqh=NULL, *bqh=NULL, *first_td=NULL;
unsigned long destination, status;
char *data;
unsigned int pipe = urb->pipe;
@@ -900,8 +900,8 @@ _static int uhci_submit_bulk_urb (urb_t *urb, urb_t *bulk_urb)
data += pktsze;
len -= pktsze;
-
- last = (len == 0 && (usb_pipein(pipe) || pktsze < maxsze || !(urb->transfer_flags & USB_DISABLE_SPD)));
+ // Use USB_ZERO_PACKET to finish bulk OUTs always with a zero length packet
+ last = (len == 0 && (usb_pipein(pipe) || pktsze < maxsze || !(urb->transfer_flags & USB_ZERO_PACKET)));
if (last)
td->hw.td.status |= TD_CTRL_IOC; // last one generates INT
@@ -1178,6 +1178,9 @@ _static void uhci_cleanup_unlink(uhci_t *s, int force)
urb_priv = (urb_priv_t*)urb->hcpriv;
q = urb->urb_list.next;
+ if (!urb_priv) // avoid crash when URB is corrupted
+ break;
+
if (force ||
((urb_priv->started != 0xffffffff) && (urb_priv->started != now))) {
async_dbg("async cleanup %p",urb);
@@ -1205,7 +1208,8 @@ _static void uhci_cleanup_unlink(uhci_t *s, int force)
pipe = urb->pipe; // completion may destroy all...
dev = urb->dev;
urb_priv = urb->hcpriv;
-
+ list_del (&urb->urb_list);
+
if (urb->complete) {
spin_unlock(&s->urb_list_lock);
urb->dev = NULL;
@@ -1229,7 +1233,6 @@ _static void uhci_cleanup_unlink(uhci_t *s, int force)
kfree (urb_priv);
#endif
- list_del (&urb->urb_list);
}
}
}
@@ -2282,8 +2285,11 @@ _static int process_transfer (uhci_t *s, urb_t *urb, int mode)
for (; p != &qh->vertical; p = p->next) {
desc = list_entry (p, uhci_desc_t, vertical);
- if (desc->hw.td.status & TD_CTRL_ACTIVE) // do not process active TDs
+ if (desc->hw.td.status & TD_CTRL_ACTIVE) { // do not process active TDs
+ if (mode==2) // if called from async_unlink
+ uhci_clean_transfer(s, urb, qh, mode);
return ret;
+ }
actual_length = (desc->hw.td.status + 1) & 0x7ff; // extract transfer parameters from TD
maxlength = (((desc->hw.td.info >> 21) & 0x7ff) + 1) & 0x7ff;
@@ -2625,19 +2631,22 @@ _static int process_urb (uhci_t *s, struct list_head *p)
// Completion
if (urb->complete) {
+ int was_unlinked = (urb->status == -ENOENT);
urb->dev = NULL;
spin_unlock(&s->urb_list_lock);
urb->complete ((struct urb *) urb);
// Re-submit the URB if ring-linked
- if (is_ring && (urb->status != -ENOENT) && !contains_killed) {
+ if (is_ring && !was_unlinked && !contains_killed) {
urb->dev=usb_dev;
uhci_submit_urb (urb);
- }
+ } else
+ urb = 0;
spin_lock(&s->urb_list_lock);
}
usb_dec_dev_use (usb_dev);
- spin_unlock(&urb->lock);
+ if (urb)
+ spin_unlock(&urb->lock);
}
}
@@ -2942,6 +2951,8 @@ uhci_pci_probe (struct pci_dev *dev, const struct pci_device_id *id)
if (pci_enable_device(dev) < 0)
return -ENODEV;
+ pci_set_master(dev);
+
/* Search for the IO base address.. */
for (i = 0; i < 6; i++) {
@@ -2955,8 +2966,7 @@ uhci_pci_probe (struct pci_dev *dev, const struct pci_device_id *id)
break;
/* disable legacy emulation */
pci_write_config_word (dev, USBLEGSUP, 0);
-
- pci_set_master(dev);
+
return alloc_uhci(dev, dev->irq, io_addr, io_size);
}
return -ENODEV;
diff --git a/fs/buffer.c b/fs/buffer.c
index 07528fb47f43..3ad092069461 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -301,6 +301,23 @@ void sync_dev(kdev_t dev)
*/
}
+int fsync_super(struct super_block *sb)
+{
+ kdev_t dev = sb->s_dev;
+ sync_buffers(dev, 0);
+
+ lock_kernel();
+ sync_inodes_sb(sb);
+ lock_super(sb);
+ if (sb->s_dirt && sb->s_op && sb->s_op->write_super)
+ sb->s_op->write_super(sb);
+ unlock_super(sb);
+ DQUOT_SYNC(dev);
+ unlock_kernel();
+
+ return sync_buffers(dev, 1);
+}
+
int fsync_dev(kdev_t dev)
{
sync_buffers(dev, 0);
@@ -1181,10 +1198,10 @@ static __inline__ void __put_unused_buffer_head(struct buffer_head * bh)
kmem_cache_free(bh_cachep, bh);
} else {
bh->b_blocknr = -1;
- init_waitqueue_head(&bh->b_wait);
+ bh->b_this_page = NULL;
+
nr_unused_buffer_heads++;
bh->b_next_free = unused_list;
- bh->b_this_page = NULL;
unused_list = bh;
}
}
@@ -1213,8 +1230,8 @@ static struct buffer_head * get_unused_buffer_head(int async)
* more buffer-heads itself. Thus SLAB_BUFFER.
*/
if((bh = kmem_cache_alloc(bh_cachep, SLAB_BUFFER)) != NULL) {
- memset(bh, 0, sizeof(*bh));
- init_waitqueue_head(&bh->b_wait);
+ bh->b_blocknr = -1;
+ bh->b_this_page = NULL;
return bh;
}
@@ -1976,7 +1993,6 @@ static void end_buffer_io_kiobuf(struct buffer_head *bh, int uptodate)
end_kio_request(kiobuf, uptodate);
}
-
/*
* For brw_kiovec: submit a set of buffer_head temporary IOs and wait
* for them to complete. Clean up the buffer_heads afterwards.
@@ -1984,21 +2000,18 @@ static void end_buffer_io_kiobuf(struct buffer_head *bh, int uptodate)
static int wait_kio(int rw, int nr, struct buffer_head *bh[], int size)
{
- int iosize;
+ int iosize, err;
int i;
struct buffer_head *tmp;
-
iosize = 0;
- spin_lock(&unused_list_lock);
+ err = 0;
for (i = nr; --i >= 0; ) {
iosize += size;
tmp = bh[i];
if (buffer_locked(tmp)) {
- spin_unlock(&unused_list_lock);
wait_on_buffer(tmp);
- spin_lock(&unused_list_lock);
}
if (!buffer_uptodate(tmp)) {
@@ -2006,13 +2019,13 @@ static int wait_kio(int rw, int nr, struct buffer_head *bh[], int size)
clearing iosize on error calculates the
amount of IO before the first error. */
iosize = 0;
+ err = -EIO;
}
- __put_unused_buffer_head(tmp);
}
- spin_unlock(&unused_list_lock);
-
- return iosize;
+ if (iosize)
+ return iosize;
+ return err;
}
/*
@@ -2041,7 +2054,7 @@ int brw_kiovec(int rw, int nr, struct kiobuf *iovec[],
unsigned long blocknr;
struct kiobuf * iobuf = NULL;
struct page * map;
- struct buffer_head *tmp, *bh[KIO_MAX_SECTORS];
+ struct buffer_head *tmp, **bhs = NULL;
if (!nr)
return 0;
@@ -2067,22 +2080,20 @@ int brw_kiovec(int rw, int nr, struct kiobuf *iovec[],
offset = iobuf->offset;
length = iobuf->length;
iobuf->errno = 0;
+ if (!bhs)
+ bhs = iobuf->bh;
for (pageind = 0; pageind < iobuf->nr_pages; pageind++) {
map = iobuf->maplist[pageind];
if (!map) {
err = -EFAULT;
- goto error;
+ goto finished;
}
while (length > 0) {
blocknr = b[bufind++];
- tmp = get_unused_buffer_head(0);
- if (!tmp) {
- err = -ENOMEM;
- goto error;
- }
-
+ tmp = bhs[bhind++];
+
tmp->b_dev = B_FREE;
tmp->b_size = size;
set_bh_page(tmp, map, offset);
@@ -2096,9 +2107,9 @@ int brw_kiovec(int rw, int nr, struct kiobuf *iovec[],
if (rw == WRITE) {
set_bit(BH_Uptodate, &tmp->b_state);
clear_bit(BH_Dirty, &tmp->b_state);
- }
+ } else
+ set_bit(BH_Uptodate, &tmp->b_state);
- bh[bhind++] = tmp;
length -= size;
offset += size;
@@ -2109,7 +2120,8 @@ int brw_kiovec(int rw, int nr, struct kiobuf *iovec[],
* Wait for IO if we have got too much
*/
if (bhind >= KIO_MAX_SECTORS) {
- err = wait_kio(rw, bhind, bh, size);
+ kiobuf_wait_for_io(iobuf); /* wake-one */
+ err = wait_kio(rw, bhind, bhs, size);
if (err >= 0)
transferred += err;
else
@@ -2127,7 +2139,8 @@ int brw_kiovec(int rw, int nr, struct kiobuf *iovec[],
/* Is there any IO still left to submit? */
if (bhind) {
- err = wait_kio(rw, bhind, bh, size);
+ kiobuf_wait_for_io(iobuf); /* wake-one */
+ err = wait_kio(rw, bhind, bhs, size);
if (err >= 0)
transferred += err;
else
@@ -2138,16 +2151,6 @@ int brw_kiovec(int rw, int nr, struct kiobuf *iovec[],
if (transferred)
return transferred;
return err;
-
- error:
- /* We got an error allocating the bh'es. Just free the current
- buffer_heads and exit. */
- spin_lock(&unused_list_lock);
- for (i = bhind; --i >= 0; ) {
- __put_unused_buffer_head(bh[i]);
- }
- spin_unlock(&unused_list_lock);
- goto finished;
}
/*
@@ -2599,7 +2602,7 @@ static int sync_old_buffers(void)
{
lock_kernel();
sync_supers(0);
- sync_inodes(0);
+ sync_unlocked_inodes();
unlock_kernel();
flush_dirty_buffers(1);
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 5a640dda718c..fad286d0e326 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -97,7 +97,6 @@ static struct super_block * coda_read_super(struct super_block *sb,
struct coda_sb_info *sbi = NULL;
struct venus_comm *vc = NULL;
ViceFid fid;
- kdev_t dev = sb->s_dev;
int error;
int idx;
ENTRY;
@@ -139,7 +138,6 @@ static struct super_block * coda_read_super(struct super_block *sb,
sb->s_blocksize = 1024; /* XXXXX what do we put here?? */
sb->s_blocksize_bits = 10;
sb->s_magic = CODA_SUPER_MAGIC;
- sb->s_dev = dev;
sb->s_op = &coda_super_operations;
/* get root fid from Venus: this needs the root inode */
diff --git a/fs/dcache.c b/fs/dcache.c
index 4b7ab57f4d9f..8c0c05187726 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1225,6 +1225,18 @@ static void __init dcache_init(unsigned long mempages)
} while (i);
}
+static void init_buffer_head(void * foo, kmem_cache_t * cachep, unsigned long flags)
+{
+ if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+ SLAB_CTOR_CONSTRUCTOR)
+ {
+ struct buffer_head * bh = (struct buffer_head *) foo;
+
+ memset(bh, 0, sizeof(*bh));
+ init_waitqueue_head(&bh->b_wait);
+ }
+}
+
/* SLAB cache for __getname() consumers */
kmem_cache_t *names_cachep;
@@ -1242,7 +1254,7 @@ void __init vfs_caches_init(unsigned long mempages)
{
bh_cachep = kmem_cache_create("buffer_head",
sizeof(struct buffer_head), 0,
- SLAB_HWCACHE_ALIGN, NULL, NULL);
+ SLAB_HWCACHE_ALIGN, init_buffer_head, NULL);
if(!bh_cachep)
panic("Cannot create buffer head SLAB cache");
diff --git a/fs/inode.c b/fs/inode.c
index d32edf37ce00..a40c75c73dcd 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -133,28 +133,26 @@ void __mark_inode_dirty(struct inode *inode, int flags)
{
struct super_block * sb = inode->i_sb;
- if (sb) {
- /* Don't do this for I_DIRTY_PAGES - that doesn't actually dirty the inode itself */
- if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
- if (sb->s_op && sb->s_op->dirty_inode)
- sb->s_op->dirty_inode(inode);
- }
+ /* Don't do this for I_DIRTY_PAGES - that doesn't actually dirty the inode itself */
+ if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
+ if (sb->s_op && sb->s_op->dirty_inode)
+ sb->s_op->dirty_inode(inode);
+ }
- /* avoid the locking if we can */
- if ((inode->i_state & flags) == flags)
- return;
+ /* avoid the locking if we can */
+ if ((inode->i_state & flags) == flags)
+ return;
- spin_lock(&inode_lock);
- if ((inode->i_state & flags) != flags) {
- inode->i_state |= flags;
- /* Only add valid (ie hashed) inodes to the dirty list */
- if (!list_empty(&inode->i_hash)) {
- list_del(&inode->i_list);
- list_add(&inode->i_list, &sb->s_dirty);
- }
+ spin_lock(&inode_lock);
+ if ((inode->i_state & flags) != flags) {
+ inode->i_state |= flags;
+ /* Only add valid (ie hashed) inodes to the dirty list */
+ if (!(inode->i_state & I_LOCK) && !list_empty(&inode->i_hash)) {
+ list_del(&inode->i_list);
+ list_add(&inode->i_list, &sb->s_dirty);
}
- spin_unlock(&inode_lock);
}
+ spin_unlock(&inode_lock);
}
static void __wait_on_inode(struct inode * inode)
@@ -192,7 +190,7 @@ static inline void __iget(struct inode * inode)
return;
}
atomic_inc(&inode->i_count);
- if (!(inode->i_state & I_DIRTY)) {
+ if (!(inode->i_state & (I_DIRTY|I_LOCK))) {
list_del(&inode->i_list);
list_add(&inode->i_list, &inode_in_use);
}
@@ -204,9 +202,10 @@ static inline void __sync_one(struct inode *inode, int sync)
unsigned dirty;
list_del(&inode->i_list);
- list_add(&inode->i_list, atomic_read(&inode->i_count)
- ? &inode_in_use
- : &inode_unused);
+ list_add(&inode->i_list, &inode->i_sb->s_locked_inodes);
+
+ if (inode->i_state & I_LOCK)
+ BUG();
/* Set I_LOCK, reset I_DIRTY */
dirty = inode->i_state & I_DIRTY;
@@ -224,6 +223,17 @@ static inline void __sync_one(struct inode *inode, int sync)
spin_lock(&inode_lock);
inode->i_state &= ~I_LOCK;
+ if (!(inode->i_state & I_FREEING)) {
+ struct list_head *to;
+ if (inode->i_state & I_DIRTY)
+ to = &inode->i_sb->s_dirty;
+ else if (atomic_read(&inode->i_count))
+ to = &inode_in_use;
+ else
+ to = &inode_unused;
+ list_del(&inode->i_list);
+ list_add(&inode->i_list, to);
+ }
wake_up(&inode->i_wait);
}
@@ -245,7 +255,37 @@ static inline void sync_list(struct list_head *head)
struct list_head * tmp;
while ((tmp = head->prev) != head)
- sync_one(list_entry(tmp, struct inode, i_list), 0);
+ __sync_one(list_entry(tmp, struct inode, i_list), 0);
+}
+
+static inline int wait_on_dirty(struct list_head *head)
+{
+ struct list_head * tmp;
+ list_for_each(tmp, head) {
+ struct inode *inode = list_entry(tmp, struct inode, i_list);
+ if (!inode->i_state & I_DIRTY)
+ continue;
+ __iget(inode);
+ spin_unlock(&inode_lock);
+ __wait_on_inode(inode);
+ iput(inode);
+ spin_lock(&inode_lock);
+ return 1;
+ }
+ return 0;
+}
+
+static inline void wait_on_locked(struct list_head *head)
+{
+ struct list_head * tmp;
+ while ((tmp = head->prev) != head) {
+ struct inode *inode = list_entry(tmp, struct inode, i_list);
+ __iget(inode);
+ spin_unlock(&inode_lock);
+ __wait_on_inode(inode);
+ iput(inode);
+ spin_lock(&inode_lock);
+ }
}
static inline int try_to_sync_unused_list(struct list_head *head)
@@ -256,8 +296,7 @@ static inline int try_to_sync_unused_list(struct list_head *head)
while ((tmp = tmp->prev) != head) {
inode = list_entry(tmp, struct inode, i_list);
- if (!(inode->i_state & I_LOCK)
- && !atomic_read(&inode->i_count)) {
+ if (!atomic_read(&inode->i_count)) {
/*
* We're under PF_MEMALLOC here, and syncing the
* inode may have to allocate memory. To avoid
@@ -287,7 +326,31 @@ static inline int try_to_sync_unused_list(struct list_head *head)
* sync_inodes goes through the super block's dirty list,
* writes them out, and puts them back on the normal list.
*/
+
+/*
+ * caller holds exclusive lock on sb->s_umount
+ */
+void sync_inodes_sb(struct super_block *sb)
+{
+ spin_lock(&inode_lock);
+ sync_list(&sb->s_dirty);
+ wait_on_locked(&sb->s_locked_inodes);
+ spin_unlock(&inode_lock);
+}
+
+void sync_unlocked_inodes(void)
+{
+ struct super_block * sb = sb_entry(super_blocks.next);
+ for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.next)) {
+ if (!list_empty(&sb->s_dirty)) {
+ spin_lock(&inode_lock);
+ sync_list(&sb->s_dirty);
+ spin_unlock(&inode_lock);
+ }
+ }
+}
+
void sync_inodes(kdev_t dev)
{
struct super_block * sb = sb_entry(super_blocks.next);
@@ -295,19 +358,23 @@ void sync_inodes(kdev_t dev)
/*
* Search the super_blocks array for the device(s) to sync.
*/
- spin_lock(&inode_lock);
for (; sb != sb_entry(&super_blocks); sb = sb_entry(sb->s_list.next)) {
if (!sb->s_dev)
continue;
if (dev && sb->s_dev != dev)
continue;
-
- sync_list(&sb->s_dirty);
-
+ down_read(&sb->s_umount);
+ if (sb->s_dev && (sb->s_dev == dev || !dev)) {
+ spin_lock(&inode_lock);
+ do {
+ sync_list(&sb->s_dirty);
+ } while (wait_on_dirty(&sb->s_locked_inodes));
+ spin_unlock(&inode_lock);
+ }
+ up_read(&sb->s_umount);
if (dev)
break;
}
- spin_unlock(&inode_lock);
}
/*
@@ -517,6 +584,7 @@ int invalidate_inodes(struct super_block * sb)
busy = invalidate_list(&inode_in_use, sb, &throw_away);
busy |= invalidate_list(&inode_unused, sb, &throw_away);
busy |= invalidate_list(&sb->s_dirty, sb, &throw_away);
+ busy |= invalidate_list(&sb->s_locked_inodes, sb, &throw_away);
spin_unlock(&inode_lock);
dispose_list(&throw_away);
@@ -557,7 +625,7 @@ free_unused:
entry = entry->prev;
inode = INODE(tmp);
- if (inode->i_state & (I_FREEING|I_CLEAR))
+ if (inode->i_state & (I_FREEING|I_CLEAR|I_LOCK))
BUG();
if (!CAN_UNUSE(inode))
continue;
@@ -940,10 +1008,9 @@ void iput(struct inode *inode)
BUG();
} else {
if (!list_empty(&inode->i_hash)) {
- if (!(inode->i_state & I_DIRTY)) {
+ if (!(inode->i_state & (I_DIRTY|I_LOCK))) {
list_del(&inode->i_list);
- list_add(&inode->i_list,
- &inode_unused);
+ list_add(&inode->i_list, &inode_unused);
}
inodes_stat.nr_unused++;
spin_unlock(&inode_lock);
@@ -1086,23 +1153,25 @@ void remove_dquot_ref(kdev_t dev, short type)
/* We have to be protected against other CPUs */
spin_lock(&inode_lock);
- for (act_head = inode_in_use.next; act_head != &inode_in_use; act_head = act_head->next) {
+ list_for_each(act_head, &inode_in_use) {
inode = list_entry(act_head, struct inode, i_list);
- if (inode->i_sb != sb || !IS_QUOTAINIT(inode))
- continue;
- remove_inode_dquot_ref(inode, type, &tofree_head);
+ if (inode->i_sb == sb && IS_QUOTAINIT(inode))
+ remove_inode_dquot_ref(inode, type, &tofree_head);
}
- for (act_head = inode_unused.next; act_head != &inode_unused; act_head = act_head->next) {
+ list_for_each(act_head, &inode_unused) {
inode = list_entry(act_head, struct inode, i_list);
- if (inode->i_sb != sb || !IS_QUOTAINIT(inode))
- continue;
- remove_inode_dquot_ref(inode, type, &tofree_head);
+ if (inode->i_sb == sb && IS_QUOTAINIT(inode))
+ remove_inode_dquot_ref(inode, type, &tofree_head);
}
- for (act_head = sb->s_dirty.next; act_head != &sb->s_dirty; act_head = act_head->next) {
+ list_for_each(act_head, &sb->s_dirty) {
inode = list_entry(act_head, struct inode, i_list);
- if (!IS_QUOTAINIT(inode))
- continue;
- remove_inode_dquot_ref(inode, type, &tofree_head);
+ if (IS_QUOTAINIT(inode))
+ remove_inode_dquot_ref(inode, type, &tofree_head);
+ }
+ list_for_each(act_head, &sb->s_locked_inodes) {
+ inode = list_entry(act_head, struct inode, i_list);
+ if (IS_QUOTAINIT(inode))
+ remove_inode_dquot_ref(inode, type, &tofree_head);
}
spin_unlock(&inode_lock);
diff --git a/fs/iobuf.c b/fs/iobuf.c
index 5401243557b4..9cfd01eaf9a4 100644
--- a/fs/iobuf.c
+++ b/fs/iobuf.c
@@ -8,9 +8,7 @@
#include <linux/iobuf.h>
#include <linux/slab.h>
-
-static kmem_cache_t *kiobuf_cachep;
-
+#include <linux/vmalloc.h>
void end_kio_request(struct kiobuf *kiobuf, int uptodate)
{
@@ -24,18 +22,7 @@ void end_kio_request(struct kiobuf *kiobuf, int uptodate)
}
}
-
-void __init kiobuf_setup(void)
-{
- kiobuf_cachep = kmem_cache_create("kiobuf",
- sizeof(struct kiobuf),
- 0,
- SLAB_HWCACHE_ALIGN, NULL, NULL);
- if(!kiobuf_cachep)
- panic("Cannot create kernel iobuf cache\n");
-}
-
-void kiobuf_init(struct kiobuf *iobuf)
+static void kiobuf_init(struct kiobuf *iobuf)
{
memset(iobuf, 0, sizeof(*iobuf));
init_waitqueue_head(&iobuf->wait_queue);
@@ -43,18 +30,48 @@ void kiobuf_init(struct kiobuf *iobuf)
iobuf->maplist = iobuf->map_array;
}
+int alloc_kiobuf_bhs(struct kiobuf * kiobuf)
+{
+ int i;
+
+ for (i = 0; i < KIO_MAX_SECTORS; i++)
+ if (!(kiobuf->bh[i] = kmem_cache_alloc(bh_cachep, SLAB_KERNEL))) {
+ while (i--) {
+ kmem_cache_free(bh_cachep, kiobuf->bh[i]);
+ kiobuf->bh[i] = NULL;
+ }
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+void free_kiobuf_bhs(struct kiobuf * kiobuf)
+{
+ int i;
+
+ for (i = 0; i < KIO_MAX_SECTORS; i++) {
+ kmem_cache_free(bh_cachep, kiobuf->bh[i]);
+ kiobuf->bh[i] = NULL;
+ }
+}
+
int alloc_kiovec(int nr, struct kiobuf **bufp)
{
int i;
struct kiobuf *iobuf;
for (i = 0; i < nr; i++) {
- iobuf = kmem_cache_alloc(kiobuf_cachep, SLAB_KERNEL);
+ iobuf = vmalloc(sizeof(struct kiobuf));
if (!iobuf) {
free_kiovec(i, bufp);
return -ENOMEM;
}
kiobuf_init(iobuf);
+ if (alloc_kiobuf_bhs(iobuf)) {
+ vfree(iobuf);
+ free_kiovec(i, bufp);
+ return -ENOMEM;
+ }
bufp[i] = iobuf;
}
@@ -72,7 +89,8 @@ void free_kiovec(int nr, struct kiobuf **bufp)
unlock_kiovec(1, &iobuf);
if (iobuf->array_len > KIO_STATIC_PAGES)
kfree (iobuf->maplist);
- kmem_cache_free(kiobuf_cachep, bufp[i]);
+ free_kiobuf_bhs(iobuf);
+ vfree(bufp[i]);
}
}
@@ -115,11 +133,12 @@ void kiobuf_wait_for_io(struct kiobuf *kiobuf)
add_wait_queue(&kiobuf->wait_queue, &wait);
repeat:
- run_task_queue(&tq_disk);
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
if (atomic_read(&kiobuf->io_count) != 0) {
+ run_task_queue(&tq_disk);
schedule();
- goto repeat;
+ if (atomic_read(&kiobuf->io_count) != 0)
+ goto repeat;
}
tsk->state = TASK_RUNNING;
remove_wait_queue(&kiobuf->wait_queue, &wait);
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index ea83fd5496af..eb9eb64b994f 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -8,6 +8,7 @@
#include <linux/reiserfs_fs.h>
#include <linux/locks.h>
#include <asm/bitops.h>
+#include <linux/list.h>
#else
@@ -580,6 +581,12 @@ int reiserfs_new_unf_blocknrs2 (struct reiserfs_transaction_handle *th,
if (p_s_inode->u.reiserfs_i.i_prealloc_count > 0) {
p_s_inode->u.reiserfs_i.i_prealloc_count--;
*free_blocknrs = p_s_inode->u.reiserfs_i.i_prealloc_block++;
+
+ /* if no more preallocated blocks, remove inode from list */
+ if (! p_s_inode->u.reiserfs_i.i_prealloc_count) {
+ list_del(&p_s_inode->u.reiserfs_i.i_prealloc_list);
+ }
+
return ret;
}
@@ -633,6 +640,11 @@ int reiserfs_new_unf_blocknrs2 (struct reiserfs_transaction_handle *th,
*free_blocknrs = p_s_inode->u.reiserfs_i.i_prealloc_block;
p_s_inode->u.reiserfs_i.i_prealloc_block++;
+ /* if inode has preallocated blocks, link him to list */
+ if (p_s_inode->u.reiserfs_i.i_prealloc_count) {
+ list_add(&p_s_inode->u.reiserfs_i.i_prealloc_list,
+ &SB_JOURNAL(th->t_super)->j_prealloc_list);
+ }
/* we did actually manage to get 1 block */
if (ret != CARRY_ON && allocated[0] > 0) {
return CARRY_ON ;
@@ -664,16 +676,43 @@ int reiserfs_new_unf_blocknrs2 (struct reiserfs_transaction_handle *th,
// a portion of this function, was derived from minix or ext2's
// analog. You should be able to tell which portion by looking at the
// ext2 code and comparing.
+static void __discard_prealloc (struct reiserfs_transaction_handle * th,
+ struct inode * inode)
+{
+ while (inode->u.reiserfs_i.i_prealloc_count > 0) {
+ reiserfs_free_block(th,inode->u.reiserfs_i.i_prealloc_block);
+ inode->u.reiserfs_i.i_prealloc_block++;
+ inode->u.reiserfs_i.i_prealloc_count --;
+ }
+ list_del (&(inode->u.reiserfs_i.i_prealloc_list));
+}
+
void reiserfs_discard_prealloc (struct reiserfs_transaction_handle *th,
struct inode * inode)
{
- if (inode->u.reiserfs_i.i_prealloc_count > 0) {
- while (inode->u.reiserfs_i.i_prealloc_count--) {
- reiserfs_free_block(th,inode->u.reiserfs_i.i_prealloc_block);
- inode->u.reiserfs_i.i_prealloc_block++;
- }
+#ifdef CONFIG_REISERFS_CHECK
+ if (inode->u.reiserfs_i.i_prealloc_count < 0)
+ reiserfs_warning("zam-4001:" __FUNCTION__ ": inode has negative prealloc blocks count.\n");
+#endif
+ if (inode->u.reiserfs_i.i_prealloc_count > 0) {
+ __discard_prealloc(th, inode);
+ }
+}
+
+void reiserfs_discard_all_prealloc (struct reiserfs_transaction_handle *th)
+{
+ struct list_head * plist = &SB_JOURNAL(th->t_super)->j_prealloc_list;
+ struct inode * inode;
+
+ while (!list_empty(plist)) {
+ inode = list_entry(plist->next, struct inode, u.reiserfs_i.i_prealloc_list);
+#ifdef CONFIG_REISERFS_CHECK
+ if (!inode->u.reiserfs_i.i_prealloc_count) {
+ reiserfs_warning("zam-4001:" __FUNCTION__ ": inode is in prealloc list but has no preallocated blocks.\n");
}
- inode->u.reiserfs_i.i_prealloc_count = 0;
+#endif
+ __discard_prealloc(th, inode);
+ }
}
#endif
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 52237eb283f1..c2c3222791ba 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1185,13 +1185,19 @@ struct inode * reiserfs_iget (struct super_block * s, struct cpu_key * key)
if (!inode)
return inode ;
- // if (comp_short_keys (INODE_PKEY (inode), key)) {
if (is_bad_inode (inode)) {
reiserfs_warning ("vs-13048: reiserfs_iget: "
"bad_inode. Stat data of (%lu %lu) not found\n",
key->on_disk_key.k_dir_id, key->on_disk_key.k_objectid);
iput (inode);
inode = 0;
+ } else if (comp_short_keys (INODE_PKEY (inode), key)) {
+ reiserfs_warning ("vs-13049: reiserfs_iget: "
+ "Looking for (%lu %lu), found inode of (%lu %lu)\n",
+ key->on_disk_key.k_dir_id, key->on_disk_key.k_objectid,
+ INODE_PKEY (inode)->k_dir_id, INODE_PKEY (inode)->k_objectid);
+ iput (inode);
+ inode = 0;
}
return inode;
}
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index d5513d59ffbb..bf77724ef03c 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -1925,8 +1925,11 @@ int journal_init(struct super_block *p_s_sb) {
free_journal_ram(p_s_sb) ;
return 1 ;
}
- SB_JOURNAL_LIST_INDEX(p_s_sb) = 0 ; /* once the read is done, we can set this where it belongs */
+ SB_JOURNAL_LIST_INDEX(p_s_sb) = 0 ; /* once the read is done, we can set this
+ where it belongs */
+ INIT_LIST_HEAD (&SB_JOURNAL(p_s_sb)->j_prealloc_list);
+
if (reiserfs_dont_log (p_s_sb))
return 0;
@@ -2983,6 +2986,11 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, struct super_b
flush = 1 ;
}
+#ifdef REISERFS_PREALLOCATE
+ reiserfs_discard_all_prealloc(th); /* it should not involve new blocks into
+ * the transaction */
+#endif
+
rs = SB_DISK_SUPER_BLOCK(p_s_sb) ;
/* setup description block */
d_bh = getblk(p_s_sb->s_dev, reiserfs_get_journal_block(p_s_sb) + SB_JOURNAL(p_s_sb)->j_start, p_s_sb->s_blocksize) ;
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index 27305305eced..ed782d909382 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -466,40 +466,42 @@ static int reiserfs_add_entry (struct reiserfs_transaction_handle *th, struct in
/* find the proper place for the new entry */
memset (bit_string, 0, sizeof (bit_string));
de.de_gen_number_bit_string = (char *)bit_string;
- if (reiserfs_find_entry (dir, name, namelen, &path, &de) == NAME_FOUND) {
+ retval = reiserfs_find_entry (dir, name, namelen, &path, &de);
+ if (retval != NAME_NOT_FOUND) {
if (buffer != small_buf)
reiserfs_kfree (buffer, buflen, dir->i_sb);
pathrelse (&path);
+
+ if (retval != NAME_FOUND) {
+ reiserfs_warning ("zam-7002:" __FUNCTION__ ": \"reiserfs_find_entry\" has returned"
+ " unexpected value (%d)\n", retval);
+ }
+
return -EEXIST;
}
- if (find_first_nonzero_bit (bit_string, MAX_GENERATION_NUMBER + 1) < MAX_GENERATION_NUMBER + 1) {
- /* there are few names with given hash value */
- gen_number = find_first_zero_bit (bit_string, MAX_GENERATION_NUMBER + 1);
- if (gen_number > MAX_GENERATION_NUMBER) {
- /* there is no free generation number */
- reiserfs_warning ("reiserfs_add_entry: Congratulations! we have got hash function screwed up\n");
- if (buffer != small_buf)
- reiserfs_kfree (buffer, buflen, dir->i_sb);
- pathrelse (&path);
- return -EBUSY; //HASHCOLLISION;//EBADSLT
- }
- /* adjust offset of directory enrty */
- deh->deh_offset = cpu_to_le32 (SET_GENERATION_NUMBER (deh_offset (deh), gen_number));
- set_cpu_key_k_offset (&entry_key, le32_to_cpu (deh->deh_offset));
+ gen_number = find_first_zero_bit (bit_string, MAX_GENERATION_NUMBER + 1);
+ if (gen_number > MAX_GENERATION_NUMBER) {
+ /* there is no free generation number */
+ reiserfs_warning ("reiserfs_add_entry: Congratulations! we have got hash function screwed up\n");
+ if (buffer != small_buf)
+ reiserfs_kfree (buffer, buflen, dir->i_sb);
+ pathrelse (&path);
+ return -EBUSY;
+ }
+ /* adjust offset of directory enrty */
+ deh->deh_offset = cpu_to_le32 (SET_GENERATION_NUMBER (deh_offset (deh), gen_number));
+ set_cpu_key_k_offset (&entry_key, le32_to_cpu (deh->deh_offset));
- /* find place for new entry */
- if (search_by_entry_key (dir->i_sb, &entry_key, &path, &de) == NAME_FOUND) {
+ if (gen_number != 0) { /* we need to re-search for the insertion point */
+ if (search_by_entry_key (dir->i_sb, &entry_key, &path, &de) != NAME_NOT_FOUND) {
reiserfs_warning ("vs-7032: reiserfs_add_entry: "
- "entry with this key (%k) already exists", &entry_key);
+ "entry with this key (%k) already exists\n", &entry_key);
if (buffer != small_buf)
reiserfs_kfree (buffer, buflen, dir->i_sb);
pathrelse (&path);
return -EBUSY;
}
- } else {
- deh->deh_offset = cpu_to_le32 (SET_GENERATION_NUMBER (le32_to_cpu (deh->deh_offset), 0));
- set_cpu_key_k_offset (&entry_key, le32_to_cpu (deh->deh_offset));
}
/* perform the insertion of the entry that we have prepared */
diff --git a/fs/reiserfs/objectid.c b/fs/reiserfs/objectid.c
index 0bcbd4559c65..26c47f2cc7c0 100644
--- a/fs/reiserfs/objectid.c
+++ b/fs/reiserfs/objectid.c
@@ -18,8 +18,8 @@
// find where objectid map starts
#define objectid_map(s,rs) (old_format_only (s) ? \
- (__u32 *)((struct reiserfs_super_block_v1 *)rs + 1) :\
- (__u32 *)(rs + 1))
+ (__u32 *)((struct reiserfs_super_block_v1 *)(rs) + 1) :\
+ (__u32 *)((rs) + 1))
#ifdef CONFIG_REISERFS_CHECK
@@ -27,7 +27,8 @@
static void check_objectid_map (struct super_block * s, __u32 * map)
{
if (le32_to_cpu (map[0]) != 1)
- reiserfs_panic (s, "vs-15010: check_objectid_map: map corrupted");
+ reiserfs_panic (s, "vs-15010: check_objectid_map: map corrupted: %lx",
+ le32_to_cpu (map[0]));
// FIXME: add something else here
}
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index f7ccb4206566..a9f2c6d962e7 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -77,6 +77,7 @@ void reiserfs_write_super_lockfs (struct super_block * s)
lock_kernel() ;
if (!(s->s_flags & MS_RDONLY)) {
journal_begin(&th, s, 1) ;
+ reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1);
journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB (s));
reiserfs_block_writes(&th) ;
journal_end(&th, s, 1) ;
diff --git a/fs/smbfs/getopt.c b/fs/smbfs/getopt.c
index 0c5d111b3fa8..20276da095bf 100644
--- a/fs/smbfs/getopt.c
+++ b/fs/smbfs/getopt.c
@@ -30,8 +30,10 @@ int smb_getopt(char *caller, char **options, struct option *opts,
char *val;
int i;
- if ( (token = strsep(options, ",")) == NULL)
- return 0;
+ do {
+ if ((token = strsep(options, ",")) == NULL)
+ return 0;
+ } while (*token == '\0');
*optopt = token;
*optarg = NULL;
diff --git a/fs/super.c b/fs/super.c
index 20b50923196c..861a46872d54 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -712,10 +712,12 @@ static struct super_block *get_empty_super(void)
nr_super_blocks++;
memset(s, 0, sizeof(struct super_block));
INIT_LIST_HEAD(&s->s_dirty);
+ INIT_LIST_HEAD(&s->s_locked_inodes);
list_add (&s->s_list, super_blocks.prev);
init_waitqueue_head(&s->s_wait);
INIT_LIST_HEAD(&s->s_files);
INIT_LIST_HEAD(&s->s_mounts);
+ init_rwsem(&s->s_umount);
}
return s;
}
@@ -895,13 +897,14 @@ static void kill_super(struct super_block *sb, int umount_root)
struct file_system_type *fs = sb->s_type;
struct super_operations *sop = sb->s_op;
+ down_write(&sb->s_umount);
sb->s_root = NULL;
/* Need to clean after the sucker */
if (fs->fs_flags & FS_LITTER)
d_genocide(root);
shrink_dcache_parent(root);
dput(root);
- fsync_dev(sb->s_dev);
+ fsync_super(sb);
lock_super(sb);
if (sop) {
if (sop->write_super && sb->s_dirt)
@@ -923,6 +926,7 @@ static void kill_super(struct super_block *sb, int umount_root)
put_filesystem(fs);
sb->s_type = NULL;
unlock_super(sb);
+ up_write(&sb->s_umount);
if (umount_root) {
/* special: the old device driver is going to be
a ramdisk and the point of this call is to free its
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 39dc8ac3aa36..b1fa3cc77bff 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -655,9 +655,11 @@ struct super_block {
unsigned long s_flags;
unsigned long s_magic;
struct dentry *s_root;
+ struct rw_semaphore s_umount;
wait_queue_head_t s_wait;
struct list_head s_dirty; /* dirty inodes */
+ struct list_head s_locked_inodes;/* inodes being synced */
struct list_head s_files;
struct block_device *s_bdev;
@@ -1090,9 +1092,12 @@ extern void invalidate_inode_buffers(struct inode *);
#define destroy_buffers(dev) __invalidate_buffers((dev), 1)
extern void __invalidate_buffers(kdev_t dev, int);
extern void sync_inodes(kdev_t);
+extern void sync_unlocked_inodes(void);
extern void write_inode_now(struct inode *, int);
extern void sync_dev(kdev_t);
extern int fsync_dev(kdev_t);
+extern int fsync_super(struct super_block *);
+extern void sync_inodes_sb(struct super_block *);
extern int fsync_inode_buffers(struct inode *);
extern int osync_inode_buffers(struct inode *);
extern int inode_has_buffers(struct inode *);
diff --git a/include/linux/iobuf.h b/include/linux/iobuf.h
index 3de43c924039..619187efec8d 100644
--- a/include/linux/iobuf.h
+++ b/include/linux/iobuf.h
@@ -24,8 +24,7 @@
* entire iovec.
*/
-#define KIO_MAX_ATOMIC_IO 64 /* in kb */
-#define KIO_MAX_ATOMIC_BYTES (64 * 1024)
+#define KIO_MAX_ATOMIC_IO 512 /* in kb */
#define KIO_STATIC_PAGES (KIO_MAX_ATOMIC_IO / (PAGE_SIZE >> 10) + 1)
#define KIO_MAX_SECTORS (KIO_MAX_ATOMIC_IO * 2)
@@ -47,8 +46,10 @@ struct kiobuf
unsigned int locked : 1; /* If set, pages has been locked */
- /* Always embed enough struct pages for 64k of IO */
+ /* Always embed enough struct pages for atomic IO */
struct page * map_array[KIO_STATIC_PAGES];
+ struct buffer_head * bh[KIO_MAX_SECTORS];
+ unsigned long blocks[KIO_MAX_SECTORS];
/* Dynamic state for IO completion: */
atomic_t io_count; /* IOs still in progress */
@@ -64,17 +65,18 @@ int map_user_kiobuf(int rw, struct kiobuf *, unsigned long va, size_t len);
void unmap_kiobuf(struct kiobuf *iobuf);
int lock_kiovec(int nr, struct kiobuf *iovec[], int wait);
int unlock_kiovec(int nr, struct kiobuf *iovec[]);
+void mark_dirty_kiobuf(struct kiobuf *iobuf, int bytes);
/* fs/iobuf.c */
-void __init kiobuf_setup(void);
-void kiobuf_init(struct kiobuf *);
void end_kio_request(struct kiobuf *, int);
void simple_wakeup_kiobuf(struct kiobuf *);
int alloc_kiovec(int nr, struct kiobuf **);
void free_kiovec(int nr, struct kiobuf **);
int expand_kiobuf(struct kiobuf *, int);
void kiobuf_wait_for_io(struct kiobuf *);
+extern int alloc_kiobuf_bhs(struct kiobuf *);
+extern void free_kiobuf_bhs(struct kiobuf *);
/* fs/buffer.c */
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index c18810e526c4..46c2b72e33f3 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -145,6 +145,8 @@ extern void nf_reinject(struct sk_buff *skb,
struct nf_info *info,
unsigned int verdict);
+extern void (*ip_ct_attach)(struct sk_buff *, struct nf_ct_info *);
+
#ifdef CONFIG_NETFILTER_DEBUG
extern void nf_dump_skb(int pf, struct sk_buff *skb);
#endif
diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h
index 35caff35f1f5..9d8a18a06048 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack.h
@@ -32,6 +32,7 @@ enum ip_conntrack_info
#include <linux/types.h>
#include <linux/skbuff.h>
#include <linux/netfilter_ipv4/ip_conntrack_tcp.h>
+#include <linux/netfilter_ipv4/ip_conntrack_icmp.h>
#ifdef CONFIG_NF_DEBUG
#define IP_NF_ASSERT(x) \
@@ -56,12 +57,8 @@ enum ip_conntrack_status {
IPS_SEEN_REPLY_BIT = 1,
IPS_SEEN_REPLY = (1 << IPS_SEEN_REPLY_BIT),
- /* Packet seen leaving box: bit 2 set. Can be set, not unset. */
- IPS_CONFIRMED_BIT = 2,
- IPS_CONFIRMED = (1 << IPS_CONFIRMED_BIT),
-
/* Conntrack should never be early-expired. */
- IPS_ASSURED_BIT = 4,
+ IPS_ASSURED_BIT = 2,
IPS_ASSURED = (1 << IPS_ASSURED_BIT),
};
@@ -88,7 +85,7 @@ struct ip_conntrack_expect
struct ip_conntrack
{
- /* Usage count in here is 1 for destruct timer, 1 per skb,
+ /* Usage count in here is 1 for hash table/destruct timer, 1 per skb,
plus 1 for any connection(s) we are `master' for */
struct nf_conntrack ct_general;
@@ -119,6 +116,7 @@ struct ip_conntrack
union {
struct ip_ct_tcp tcp;
+ struct ip_ct_icmp icmp;
} proto;
union {
@@ -177,5 +175,13 @@ ip_ct_gather_frags(struct sk_buff *skb);
extern void
ip_ct_selective_cleanup(int (*kill)(const struct ip_conntrack *i, void *data),
void *data);
+
+/* It's confirmed if it is, or has been in the hash table. */
+static inline int is_confirmed(struct ip_conntrack *ct)
+{
+ return ct->tuplehash[IP_CT_DIR_ORIGINAL].list.next != NULL;
+}
+
+extern unsigned int ip_conntrack_htable_size;
#endif /* __KERNEL__ */
#endif /* _IP_CONNTRACK_H */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_core.h b/include/linux/netfilter_ipv4/ip_conntrack_core.h
index bcf300c51d45..6ed40793af6a 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_core.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_core.h
@@ -33,10 +33,17 @@ struct ip_conntrack_tuple_hash *
ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
const struct ip_conntrack *ignored_conntrack);
-/* Confirm a connection */
-void ip_conntrack_confirm(struct ip_conntrack *ct);
+extern int __ip_conntrack_confirm(struct nf_ct_info *nfct);
+
+/* Confirm a connection: returns NF_DROP if packet must be dropped. */
+static inline int ip_conntrack_confirm(struct sk_buff *skb)
+{
+ if (skb->nfct
+ && !is_confirmed((struct ip_conntrack *)skb->nfct->master))
+ return __ip_conntrack_confirm(skb->nfct);
+ return NF_ACCEPT;
+}
-extern unsigned int ip_conntrack_htable_size;
extern struct list_head *ip_conntrack_hash;
extern struct list_head expect_list;
DECLARE_RWLOCK_EXTERN(ip_conntrack_lock);
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_icmp.h b/include/linux/netfilter_ipv4/ip_conntrack_icmp.h
new file mode 100644
index 000000000000..f1664abbe392
--- /dev/null
+++ b/include/linux/netfilter_ipv4/ip_conntrack_icmp.h
@@ -0,0 +1,11 @@
+#ifndef _IP_CONNTRACK_ICMP_H
+#define _IP_CONNTRACK_ICMP_H
+/* ICMP tracking. */
+#include <asm/atomic.h>
+
+struct ip_ct_icmp
+{
+ /* Optimization: when number in == number out, forget immediately. */
+ atomic_t count;
+};
+#endif /* _IP_CONNTRACK_ICMP_H */
diff --git a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
index f5fd96690f46..83076c3c5f25 100644
--- a/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
+++ b/include/linux/netfilter_ipv4/ip_conntrack_protocol.h
@@ -38,9 +38,9 @@ struct ip_conntrack_protocol
enum ip_conntrack_info ctinfo);
/* Called when a new connection for this protocol found;
- * returns timeout. If so, packet() called next. */
- unsigned long (*new)(struct ip_conntrack *conntrack,
- struct iphdr *iph, size_t len);
+ * returns TRUE if it's OK. If so, packet() called next. */
+ int (*new)(struct ip_conntrack *conntrack, struct iphdr *iph,
+ size_t len);
/* Module (if any) which this is connected to. */
struct module *me;
diff --git a/include/linux/raw.h b/include/linux/raw.h
index a2d9b14cd302..4736390a5db5 100644
--- a/include/linux/raw.h
+++ b/include/linux/raw.h
@@ -13,11 +13,4 @@ struct raw_config_request
__u64 block_minor;
};
-#ifdef __KERNEL__
-
-/* drivers/char/raw.c */
-extern void raw_init(void);
-
-#endif /* __KERNEL__ */
-
#endif /* __LINUX_RAW_H */
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 97dbc003473b..d02476de1485 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -1572,7 +1572,7 @@ extern wait_queue_head_t reiserfs_commit_thread_wait ;
#define JOURNAL_MAX_BATCH 900 /* max blocks to batch into one transaction, don't make this any bigger than 900 */
#define JOURNAL_MAX_COMMIT_AGE 30
#define JOURNAL_MAX_TRANS_AGE 30
-#define JOURNAL_PER_BALANCE_CNT 12 /* must be >= (5 + 2 * (MAX_HEIGHT-2) + 1) */
+#define JOURNAL_PER_BALANCE_CNT (3 * (MAX_HEIGHT-2) + 9)
/* both of these can be as low as 1, or as high as you want. The min is the
** number of 4k bitmap nodes preallocated on mount. New nodes are allocated
@@ -1950,6 +1950,7 @@ int reiserfs_new_unf_blocknrs2 (struct reiserfs_transaction_handle *th,
void reiserfs_discard_prealloc (struct reiserfs_transaction_handle *th,
struct inode * inode);
+void reiserfs_discard_all_prealloc (struct reiserfs_transaction_handle *th);
#endif
/* hashes.c */
diff --git a/include/linux/reiserfs_fs_i.h b/include/linux/reiserfs_fs_i.h
index 3a60b86675c5..39c37e2045c6 100644
--- a/include/linux/reiserfs_fs_i.h
+++ b/include/linux/reiserfs_fs_i.h
@@ -1,6 +1,8 @@
#ifndef _REISER_FS_I
#define _REISER_FS_I
+#include <linux/list.h>
+
/* these are used to keep track of the pages that need
** flushing before the current transaction can commit
*/
@@ -52,7 +54,8 @@ struct reiserfs_inode_info {
//For preallocation
int i_prealloc_block;
int i_prealloc_count;
-
+ struct list_head i_prealloc_list; /* per-transaction list of inodes which
+ * have preallocated blocks */
/* I regret that you think the below
is a comment you should make.... -Hans */
//nopack-attribute
diff --git a/include/linux/reiserfs_fs_sb.h b/include/linux/reiserfs_fs_sb.h
index a6921dad0365..68d2e003b724 100644
--- a/include/linux/reiserfs_fs_sb.h
+++ b/include/linux/reiserfs_fs_sb.h
@@ -254,6 +254,7 @@ struct reiserfs_journal {
struct reiserfs_journal_cnode *j_hash_table[JOURNAL_HASH_SIZE] ; /* hash table for real buffer heads in current trans */
struct reiserfs_journal_cnode *j_list_hash_table[JOURNAL_HASH_SIZE] ; /* hash table for all the real buffer heads in all
the transactions */
+ struct list_head j_prealloc_list; /* list of inodes which have preallocated blocks */
};
#define JOURNAL_DESC_MAGIC "ReIsErLB" /* ick. magic string to find desc blocks in the journal */
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 5068f1a80656..2e1217ad1b27 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -424,6 +424,7 @@ struct usb_driver {
#define USB_ASYNC_UNLINK 0x0008
#define USB_QUEUE_BULK 0x0010
#define USB_NO_FSBR 0x0020
+#define USB_ZERO_PACKET 0x0040 // Finish bulk OUTs always with zero length packet
#define USB_TIMEOUT_KILLED 0x1000 // only set by HCD!
typedef struct
diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h
index ded8f2ea4637..1bcce4c08dcd 100644
--- a/include/scsi/scsi.h
+++ b/include/scsi/scsi.h
@@ -211,6 +211,9 @@ struct ccs_modesel_head
/* Used to get the bus number for a device */
#define SCSI_IOCTL_GET_BUS_NUMBER 0x5386
+/* Used to get the PCI location of a device */
+#define SCSI_IOCTL_GET_PCI 0x5387
+
/*
* Overrides for Emacs so that we follow Linus's tabbing style.
* Emacs will notice this stuff at the end of the file and automatically
diff --git a/init/main.c b/init/main.c
index e06a6d80354b..53c4be5c68ce 100644
--- a/init/main.c
+++ b/init/main.c
@@ -574,7 +574,6 @@ asmlinkage void __init start_kernel(void)
#if defined(CONFIG_ARCH_S390)
ccwcache_init();
#endif
- kiobuf_setup();
signals_init();
bdev_init();
inode_init(mempages);
diff --git a/kernel/ksyms.c b/kernel/ksyms.c
index e7dd1f35e682..5fcbe2d34109 100644
--- a/kernel/ksyms.c
+++ b/kernel/ksyms.c
@@ -386,8 +386,6 @@ EXPORT_SYMBOL(__br_write_unlock);
#endif
/* Kiobufs */
-EXPORT_SYMBOL(kiobuf_init);
-
EXPORT_SYMBOL(alloc_kiovec);
EXPORT_SYMBOL(free_kiovec);
EXPORT_SYMBOL(expand_kiobuf);
diff --git a/lib/rwsem.c b/lib/rwsem.c
index 4672fc3c342d..8d480e7e7f11 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -202,9 +202,9 @@ struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
return sem;
}
-EXPORT_SYMBOL(rwsem_down_read_failed);
-EXPORT_SYMBOL(rwsem_down_write_failed);
-EXPORT_SYMBOL(rwsem_wake);
+EXPORT_SYMBOL_NOVERS(rwsem_down_read_failed);
+EXPORT_SYMBOL_NOVERS(rwsem_down_write_failed);
+EXPORT_SYMBOL_NOVERS(rwsem_wake);
#if RWSEM_DEBUG
EXPORT_SYMBOL(rwsemtrace);
#endif
diff --git a/lib/string.c b/lib/string.c
index 0bdf942391c0..41a90d37e35a 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -326,21 +326,24 @@ char * strtok(char * s,const char * ct)
* @ct: The characters to search for
*
* strsep() updates @s to point after the token, ready for the next call.
+ *
+ * It returns empty tokens, too, behaving exactly like the libc function
+ * of that name. In fact, it was stolen from glibc2 and de-fancy-fied.
+ * Same semantics, slimmer shape. ;)
*/
-char * strsep(char **s, const char * ct)
+char * strsep(char **s, const char *ct)
{
- char *sbegin=*s;
- if (!sbegin)
- return NULL;
-
- sbegin += strspn(sbegin,ct);
- if (*sbegin == '\0')
+ char *sbegin = *s, *end;
+
+ if (sbegin == NULL)
return NULL;
-
- *s = strpbrk( sbegin, ct);
- if (*s && **s != '\0')
- *(*s)++ = '\0';
- return (sbegin);
+
+ end = strpbrk(sbegin, ct);
+ if (end)
+ *end++ = '\0';
+ *s = end;
+
+ return sbegin;
}
#endif
diff --git a/mm/highmem.c b/mm/highmem.c
index 7935d1280d1c..f093fb67e2c5 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -207,6 +207,10 @@ static inline void bounce_end_io (struct buffer_head *bh, int uptodate)
bh_orig->b_end_io(bh_orig, uptodate);
__free_page(bh->b_page);
+#ifdef HIGHMEM_DEBUG
+ /* Don't clobber the constructed slab cache */
+ init_waitqueue_head(&bh->b_wait);
+#endif
kmem_cache_free(bh_cachep, bh);
}
@@ -260,12 +264,14 @@ repeat_page:
bh->b_count = bh_orig->b_count;
bh->b_rdev = bh_orig->b_rdev;
bh->b_state = bh_orig->b_state;
+#ifdef HIGHMEM_DEBUG
bh->b_flushtime = jiffies;
bh->b_next_free = NULL;
bh->b_prev_free = NULL;
/* bh->b_this_page */
bh->b_reqnext = NULL;
bh->b_pprev = NULL;
+#endif
/* bh->b_page */
if (rw == WRITE) {
bh->b_end_io = bounce_end_io_write;
@@ -274,7 +280,9 @@ repeat_page:
bh->b_end_io = bounce_end_io_read;
bh->b_private = (void *)bh_orig;
bh->b_rsector = bh_orig->b_rsector;
+#ifdef HIGHMEM_DEBUG
memset(&bh->b_wait, -1, sizeof(bh->b_wait));
+#endif
return bh;
}
diff --git a/mm/memory.c b/mm/memory.c
index 07aa8d2475ad..4b6e70995b0b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -389,20 +389,33 @@ void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long s
/*
* Do a quick page-table lookup for a single page.
*/
-static struct page * follow_page(unsigned long address)
+static struct page * follow_page(unsigned long address, int write)
{
pgd_t *pgd;
pmd_t *pmd;
+ pte_t *ptep, pte;
pgd = pgd_offset(current->mm, address);
+ if (pgd_none(*pgd) || pgd_bad(*pgd))
+ goto out;
+
pmd = pmd_offset(pgd, address);
- if (pmd) {
- pte_t * pte = pte_offset(pmd, address);
- if (pte && pte_present(*pte))
- return pte_page(*pte);
+ if (pmd_none(*pmd) || pmd_bad(*pmd))
+ goto out;
+
+ ptep = pte_offset(pmd, address);
+ if (!ptep)
+ goto out;
+
+ pte = *ptep;
+ if (pte_present(pte)) {
+ if (!write ||
+ (pte_write(pte) && pte_dirty(pte)))
+ return pte_page(pte);
}
-
- return NULL;
+
+out:
+ return 0;
}
/*
@@ -476,15 +489,22 @@ int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len)
goto out_unlock;
}
}
- if (handle_mm_fault(current->mm, vma, ptr, datain) <= 0)
- goto out_unlock;
spin_lock(&mm->page_table_lock);
- map = follow_page(ptr);
- if (!map) {
+ while (!(map = follow_page(ptr, datain))) {
+ int ret;
+
spin_unlock(&mm->page_table_lock);
- dprintk (KERN_ERR "Missing page in map_user_kiobuf\n");
- goto out_unlock;
- }
+ ret = handle_mm_fault(current->mm, vma, ptr, datain);
+ if (ret <= 0) {
+ if (!ret)
+ goto out_unlock;
+ else {
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+ }
+ spin_lock(&mm->page_table_lock);
+ }
map = get_page_map(map);
if (map) {
flush_dcache_page(map);
@@ -509,6 +529,37 @@ int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len)
return err;
}
+/*
+ * Mark all of the pages in a kiobuf as dirty
+ *
+ * We need to be able to deal with short reads from disk: if an IO error
+ * occurs, the number of bytes read into memory may be less than the
+ * size of the kiobuf, so we have to stop marking pages dirty once the
+ * requested byte count has been reached.
+ */
+
+void mark_dirty_kiobuf(struct kiobuf *iobuf, int bytes)
+{
+ int index, offset, remaining;
+ struct page *page;
+
+ index = iobuf->offset >> PAGE_SHIFT;
+ offset = iobuf->offset & ~PAGE_MASK;
+ remaining = bytes;
+ if (remaining > iobuf->length)
+ remaining = iobuf->length;
+
+ while (remaining > 0 && index < iobuf->nr_pages) {
+ page = iobuf->maplist[index];
+
+ if (!PageReserved(page))
+ SetPageDirty(page);
+
+ remaining -= (PAGE_SIZE - offset);
+ offset = 0;
+ index++;
+ }
+}
/*
* Unmap all of the pages referenced by a kiobuf. We release the pages,
@@ -559,7 +610,6 @@ int lock_kiovec(int nr, struct kiobuf *iovec[], int wait)
if (iobuf->locked)
continue;
- iobuf->locked = 1;
ppage = iobuf->maplist;
for (j = 0; j < iobuf->nr_pages; ppage++, j++) {
@@ -567,9 +617,16 @@ int lock_kiovec(int nr, struct kiobuf *iovec[], int wait)
if (!page)
continue;
- if (TryLockPage(page))
+ if (TryLockPage(page)) {
+ while (j--) {
+ page = *(--ppage);
+ if (page)
+ UnlockPage(page);
+ }
goto retry;
+ }
}
+ iobuf->locked = 1;
}
return 0;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 91e1d3643293..822f5fc3f7e6 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -295,8 +295,7 @@ struct page * __alloc_pages(zonelist_t *zonelist, unsigned long order)
* Can we take pages directly from the inactive_clean
* list?
*/
- if (order == 0 && (gfp_mask & __GFP_WAIT) &&
- !(current->flags & PF_MEMALLOC))
+ if (order == 0 && (gfp_mask & __GFP_WAIT))
direct_reclaim = 1;
/*
diff --git a/net/core/netfilter.c b/net/core/netfilter.c
index 31f9a46e92c3..ae0e27833976 100644
--- a/net/core/netfilter.c
+++ b/net/core/netfilter.c
@@ -553,6 +553,12 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info,
return;
}
+/* This does not belong here, but ipt_REJECT needs it if connection
+ tracking in use: without this, connection may not be in hash table,
+ and hence manufactured ICMP or RST packets will not be associated
+ with it. */
+void (*ip_ct_attach)(struct sk_buff *, struct nf_ct_info *);
+
void __init netfilter_init(void)
{
int i, h;
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
index 5dd141dca269..91eb091f5484 100644
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ b/net/ipv4/netfilter/ip_conntrack_core.c
@@ -23,6 +23,8 @@
#include <linux/stddef.h>
#include <linux/sysctl.h>
#include <linux/slab.h>
+/* For ERR_PTR(). Yeah, I know... --RR */
+#include <linux/fs.h>
/* This rwlock protects the main hash table, protocol/helper/expected
registrations, conntrack timers*/
@@ -152,7 +154,9 @@ static void
clean_from_lists(struct ip_conntrack *ct)
{
MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
- /* Remove from both hash lists */
+ /* Remove from both hash lists: must not NULL out next ptrs,
+ otherwise we'll look unconfirmed. Fortunately, LIST_DELETE
+ doesn't do this. --RR */
LIST_DELETE(&ip_conntrack_hash
[hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple)],
&ct->tuplehash[IP_CT_DIR_ORIGINAL]);
@@ -172,24 +176,6 @@ destroy_conntrack(struct nf_conntrack *nfct)
{
struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
- /* Unconfirmed connections haven't been cleaned up by the
- timer: hence they cannot be simply deleted here. */
- if (!(ct->status & IPS_CONFIRMED)) {
- WRITE_LOCK(&ip_conntrack_lock);
- /* Race check: they can't get a reference if noone has
- one and we have the write lock. */
- if (atomic_read(&ct->ct_general.use) == 0) {
- clean_from_lists(ct);
- WRITE_UNLOCK(&ip_conntrack_lock);
- } else {
- /* Either a last-minute confirmation (ie. ct
- now has timer attached), or a last-minute
- new skb has reference (still unconfirmed). */
- WRITE_UNLOCK(&ip_conntrack_lock);
- return;
- }
- }
-
IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
IP_NF_ASSERT(!timer_pending(&ct->timeout));
@@ -207,7 +193,6 @@ static void death_by_timeout(unsigned long ul_conntrack)
struct ip_conntrack *ct = (void *)ul_conntrack;
WRITE_LOCK(&ip_conntrack_lock);
- IP_NF_ASSERT(ct->status & IPS_CONFIRMED);
clean_from_lists(ct);
WRITE_UNLOCK(&ip_conntrack_lock);
ip_conntrack_put(ct);
@@ -253,24 +238,85 @@ ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
return h;
}
-/* Confirm a connection */
-void
-ip_conntrack_confirm(struct ip_conntrack *ct)
+static inline struct ip_conntrack *
+__ip_conntrack_get(struct nf_ct_info *nfct, enum ip_conntrack_info *ctinfo)
{
+ struct ip_conntrack *ct
+ = (struct ip_conntrack *)nfct->master;
+
+ /* ctinfo is the index of the nfct inside the conntrack */
+ *ctinfo = nfct - ct->infos;
+ IP_NF_ASSERT(*ctinfo >= 0 && *ctinfo < IP_CT_NUMBER);
+ return ct;
+}
+
+/* Return conntrack and conntrack_info given skb->nfct->master */
+struct ip_conntrack *
+ip_conntrack_get(struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
+{
+ if (skb->nfct)
+ return __ip_conntrack_get(skb->nfct, ctinfo);
+ return NULL;
+}
+
+/* Confirm a connection given skb->nfct; places it in hash table */
+int
+__ip_conntrack_confirm(struct nf_ct_info *nfct)
+{
+ unsigned int hash, repl_hash;
+ struct ip_conntrack *ct;
+ enum ip_conntrack_info ctinfo;
+
+ ct = __ip_conntrack_get(nfct, &ctinfo);
+
+ /* ipt_REJECT uses ip_conntrack_attach to attach related
+ ICMP/TCP RST packets in other direction. Actual packet
+ which created connection will be IP_CT_NEW or for an
+ expected connection, IP_CT_RELATED. */
+ if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+ return NF_ACCEPT;
+
+ hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+ repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+
+ /* We're not in hash table, and we refuse to set up related
+ connections for unconfirmed conns. But packet copies and
+ REJECT will give spurious warnings here. */
+ /* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
+
+ /* No external references means noone else could have
+ confirmed us. */
+ IP_NF_ASSERT(!is_confirmed(ct));
DEBUGP("Confirming conntrack %p\n", ct);
+
WRITE_LOCK(&ip_conntrack_lock);
- /* Race check */
- if (!(ct->status & IPS_CONFIRMED)) {
- IP_NF_ASSERT(!timer_pending(&ct->timeout));
- set_bit(IPS_CONFIRMED_BIT, &ct->status);
+ /* See if there's one in the list already, including reverse:
+ NAT could have grabbed it without realizing, since we're
+ not in the hash. If there is, we lost race. */
+ if (!LIST_FIND(&ip_conntrack_hash[hash],
+ conntrack_tuple_cmp,
+ struct ip_conntrack_tuple_hash *,
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
+ && !LIST_FIND(&ip_conntrack_hash[repl_hash],
+ conntrack_tuple_cmp,
+ struct ip_conntrack_tuple_hash *,
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
+ list_prepend(&ip_conntrack_hash[hash],
+ &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
+ list_prepend(&ip_conntrack_hash[repl_hash],
+ &ct->tuplehash[IP_CT_DIR_REPLY]);
/* Timer relative to confirmation time, not original
setting time, otherwise we'd get timer wrap in
wierd delay cases. */
ct->timeout.expires += jiffies;
add_timer(&ct->timeout);
atomic_inc(&ct->ct_general.use);
+ WRITE_UNLOCK(&ip_conntrack_lock);
+ return NF_ACCEPT;
}
+
WRITE_UNLOCK(&ip_conntrack_lock);
+ return NF_DROP;
}
/* Returns true if a connection correspondings to the tuple (required
@@ -374,30 +420,16 @@ icmp_error_track(struct sk_buff *skb,
*ctinfo += IP_CT_IS_REPLY;
}
- /* REJECT target does this commonly, so allow locally
- generated ICMP errors --RR */
- if (!(h->ctrack->status & IPS_CONFIRMED)
- && hooknum != NF_IP_LOCAL_OUT) {
- DEBUGP("icmp_error_track: unconfirmed\n");
- ip_conntrack_put(h->ctrack);
- return NULL;
- }
-
/* Update skb to refer to this connection */
skb->nfct = &h->ctrack->infos[*ctinfo];
return h->ctrack;
}
-/* There's a small race here where we may free a just-replied to
+/* There's a small race here where we may free a just-assured
connection. Too bad: we're in trouble anyway. */
static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
{
- /* Unconfirmed connections either really fresh or transitory
- anyway */
- if (!(i->ctrack->status & IPS_ASSURED)
- && (i->ctrack->status & IPS_CONFIRMED))
- return 1;
- return 0;
+ return !(i->ctrack->status & IPS_ASSURED);
}
static int early_drop(struct list_head *chain)
@@ -436,10 +468,9 @@ static inline int expect_cmp(const struct ip_conntrack_expect *i,
return ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask);
}
-/* Allocate a new conntrack; we set everything up, then grab write
- lock and see if we lost a race. If we lost it we return 0,
- indicating the controlling code should look again. */
-static int
+/* Allocate a new conntrack: we return -ENOMEM if classification
+ failed due to stress. Otherwise it really is unclassifiable. */
+static struct ip_conntrack_tuple_hash *
init_conntrack(const struct ip_conntrack_tuple *tuple,
struct ip_conntrack_protocol *protocol,
struct sk_buff *skb)
@@ -448,8 +479,6 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
struct ip_conntrack_tuple repl_tuple;
size_t hash, repl_hash;
struct ip_conntrack_expect *expected;
- enum ip_conntrack_info ctinfo;
- unsigned long extra_jiffies;
int i;
static unsigned int drop_next = 0;
@@ -457,30 +486,31 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
if (ip_conntrack_max &&
atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
- if (net_ratelimit())
- printk(KERN_WARNING "ip_conntrack: maximum limit of"
- " %d entries exceeded\n", ip_conntrack_max);
-
/* Try dropping from random chain, or else from the
chain about to put into (in case they're trying to
bomb one hash chain). */
if (drop_next >= ip_conntrack_htable_size)
drop_next = 0;
if (!early_drop(&ip_conntrack_hash[drop_next++])
- && !early_drop(&ip_conntrack_hash[hash]))
- return 1;
+ && !early_drop(&ip_conntrack_hash[hash])) {
+ if (net_ratelimit())
+ printk(KERN_WARNING
+ "ip_conntrack: table full, dropping"
+ " packet.\n");
+ return ERR_PTR(-ENOMEM);
+ }
}
if (!invert_tuple(&repl_tuple, tuple, protocol)) {
DEBUGP("Can't invert tuple.\n");
- return 1;
+ return NULL;
}
repl_hash = hash_conntrack(&repl_tuple);
conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
if (!conntrack) {
DEBUGP("Can't allocate conntrack.\n");
- return 1;
+ return ERR_PTR(-ENOMEM);
}
memset(conntrack, 0, sizeof(struct ip_conntrack));
@@ -493,32 +523,33 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
for (i=0; i < IP_CT_NUMBER; i++)
conntrack->infos[i].master = &conntrack->ct_general;
- extra_jiffies = protocol->new(conntrack, skb->nh.iph, skb->len);
- if (!extra_jiffies) {
+ if (!protocol->new(conntrack, skb->nh.iph, skb->len)) {
kmem_cache_free(ip_conntrack_cachep, conntrack);
- return 1;
+ return NULL;
}
/* Don't set timer yet: wait for confirmation */
init_timer(&conntrack->timeout);
conntrack->timeout.data = (unsigned long)conntrack;
conntrack->timeout.function = death_by_timeout;
- conntrack->timeout.expires = extra_jiffies;
- /* Sew in at head of hash list. */
+ /* Mark clearly that it's not in the hash table. */
+ conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list.next = NULL;
+
+ /* Write lock required for deletion of expected. Without
+ this, a read-lock would do. */
WRITE_LOCK(&ip_conntrack_lock);
- /* Check noone else beat us in the race... */
- if (__ip_conntrack_find(tuple, NULL)) {
- WRITE_UNLOCK(&ip_conntrack_lock);
- kmem_cache_free(ip_conntrack_cachep, conntrack);
- return 0;
- }
conntrack->helper = LIST_FIND(&helpers, helper_cmp,
struct ip_conntrack_helper *,
&repl_tuple);
/* Need finding and deleting of expected ONLY if we win race */
expected = LIST_FIND(&expect_list, expect_cmp,
struct ip_conntrack_expect *, tuple);
- if (expected) {
+ /* If master is not in hash table yet (ie. packet hasn't left
+ this machine yet), how can other end know about expected?
+ Hence these are not the droids you are looking for (if
+ master ct never got confirmed, we'd hold a reference to it
+ and weird things would happen to future packets). */
+ if (expected && is_confirmed(expected->expectant)) {
/* Welcome, Mr. Bond. We've been expecting you... */
conntrack->status = IPS_EXPECTED;
conntrack->master.master = &expected->expectant->ct_general;
@@ -526,23 +557,13 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
LIST_DELETE(&expect_list, expected);
expected->expectant = NULL;
nf_conntrack_get(&conntrack->master);
- ctinfo = IP_CT_RELATED;
- } else {
- ctinfo = IP_CT_NEW;
}
- list_prepend(&ip_conntrack_hash[hash],
- &conntrack->tuplehash[IP_CT_DIR_ORIGINAL]);
- list_prepend(&ip_conntrack_hash[repl_hash],
- &conntrack->tuplehash[IP_CT_DIR_REPLY]);
atomic_inc(&ip_conntrack_count);
WRITE_UNLOCK(&ip_conntrack_lock);
- /* Update skb to refer to this connection */
- skb->nfct = &conntrack->infos[ctinfo];
if (expected && expected->expectfn)
expected->expectfn(conntrack);
-
- return 1;
+ return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
}
/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
@@ -561,38 +582,18 @@ resolve_normal_ct(struct sk_buff *skb,
if (!get_tuple(skb->nh.iph, skb->len, &tuple, proto))
return NULL;
- /* Loop around search/insert race */
- do {
- /* look for tuple match */
- h = ip_conntrack_find_get(&tuple, NULL);
- if (!h && init_conntrack(&tuple, proto, skb))
+ /* look for tuple match */
+ h = ip_conntrack_find_get(&tuple, NULL);
+ if (!h) {
+ h = init_conntrack(&tuple, proto, skb);
+ if (!h)
return NULL;
- } while (!h);
+ if (IS_ERR(h))
+ return (void *)h;
+ }
/* It exists; we have (non-exclusive) reference. */
if (DIRECTION(h) == IP_CT_DIR_REPLY) {
- /* Reply on unconfirmed connection => unclassifiable */
- if (!(h->ctrack->status & IPS_CONFIRMED)) {
- /* Exception: local TCP RSTs (generated by
- REJECT target). */
- if (hooknum == NF_IP_LOCAL_OUT
- && h->tuple.dst.protonum == IPPROTO_TCP) {
- const struct tcphdr *tcph
- = (const struct tcphdr *)
- ((u_int32_t *)skb->nh.iph
- + skb->nh.iph->ihl);
- if (tcph->rst) {
- *ctinfo = IP_CT_ESTABLISHED
- + IP_CT_IS_REPLY;
- *set_reply = 0;
- goto set_skb;
- }
- }
- DEBUGP("Reply on unconfirmed connection\n");
- ip_conntrack_put(h->ctrack);
- return NULL;
- }
-
*ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
/* Please set reply bit if this packet OK */
*set_reply = 1;
@@ -613,28 +614,10 @@ resolve_normal_ct(struct sk_buff *skb,
}
*set_reply = 0;
}
- set_skb:
skb->nfct = &h->ctrack->infos[*ctinfo];
return h->ctrack;
}
-/* Return conntrack and conntrack_info a given skb */
-inline struct ip_conntrack *
-ip_conntrack_get(struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
-{
- if (skb->nfct) {
- struct ip_conntrack *ct
- = (struct ip_conntrack *)skb->nfct->master;
-
- /* ctinfo is the index of the nfct inside the conntrack */
- *ctinfo = skb->nfct - ct->infos;
- IP_NF_ASSERT(*ctinfo >= 0 && *ctinfo < IP_CT_NUMBER);
- return ct;
- }
- return NULL;
-}
-
-
/* Netfilter hook itself. */
unsigned int ip_conntrack_in(unsigned int hooknum,
struct sk_buff **pskb,
@@ -689,6 +672,10 @@ unsigned int ip_conntrack_in(unsigned int hooknum,
/* Not valid part of a connection */
return NF_ACCEPT;
+ if (IS_ERR(ct))
+ /* Too stressed to deal. */
+ return NF_DROP;
+
IP_NF_ASSERT((*pskb)->nfct);
ret = proto->packet(ct, (*pskb)->nh.iph, (*pskb)->len, ctinfo);
@@ -783,23 +770,18 @@ void ip_conntrack_unexpect_related(struct ip_conntrack *related_to)
int ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
const struct ip_conntrack_tuple *newreply)
{
- unsigned int newindex = hash_conntrack(newreply);
-
WRITE_LOCK(&ip_conntrack_lock);
if (__ip_conntrack_find(newreply, conntrack)) {
WRITE_UNLOCK(&ip_conntrack_lock);
return 0;
}
+ /* Should be unconfirmed, so not in hash table yet */
+ IP_NF_ASSERT(!is_confirmed(conntrack));
+
DEBUGP("Altering reply tuple of %p to ", conntrack);
DUMP_TUPLE(newreply);
- LIST_DELETE(&ip_conntrack_hash
- [hash_conntrack(&conntrack->tuplehash[IP_CT_DIR_REPLY]
- .tuple)],
- &conntrack->tuplehash[IP_CT_DIR_REPLY]);
conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
- list_prepend(&ip_conntrack_hash[newindex],
- &conntrack->tuplehash[IP_CT_DIR_REPLY]);
conntrack->helper = LIST_FIND(&helpers, helper_cmp,
struct ip_conntrack_helper *,
newreply);
@@ -861,8 +843,8 @@ void ip_ct_refresh(struct ip_conntrack *ct, unsigned long extra_jiffies)
IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
WRITE_LOCK(&ip_conntrack_lock);
- /* Timer may not be active yet */
- if (!(ct->status & IPS_CONFIRMED))
+ /* If not in hash table, timer will not be active yet */
+ if (!is_confirmed(ct))
ct->timeout.expires = extra_jiffies;
else {
/* Need del_timer for race avoidance (may already be dying). */
@@ -914,6 +896,26 @@ ip_ct_gather_frags(struct sk_buff *skb)
return skb;
}
+/* Used by ipt_REJECT. */
+static void ip_conntrack_attach(struct sk_buff *nskb, struct nf_ct_info *nfct)
+{
+ struct ip_conntrack *ct;
+ enum ip_conntrack_info ctinfo;
+
+ ct = __ip_conntrack_get(nfct, &ctinfo);
+
+ /* This ICMP is in reverse direction to the packet which
+ caused it */
+ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
+ ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
+ else
+ ctinfo = IP_CT_RELATED;
+
+ /* Attach new skbuff, and increment count */
+ nskb->nfct = &ct->infos[ctinfo];
+ atomic_inc(&ct->ct_general.use);
+}
+
static inline int
do_kill(const struct ip_conntrack_tuple_hash *i,
int (*kill)(const struct ip_conntrack *i, void *data),
@@ -953,20 +955,6 @@ ip_ct_selective_cleanup(int (*kill)(const struct ip_conntrack *i, void *data),
/* Time to push up daises... */
if (del_timer(&h->ctrack->timeout))
death_by_timeout((unsigned long)h->ctrack);
- else if (!(h->ctrack->status & IPS_CONFIRMED)) {
- /* Unconfirmed connection. Clean from lists,
- mark confirmed so it gets cleaned as soon
- as skb freed. */
- WRITE_LOCK(&ip_conntrack_lock);
- /* Lock protects race against another setting
- of confirmed bit. set_bit isolates this
- bit from the others. */
- if (!(h->ctrack->status & IPS_CONFIRMED)) {
- clean_from_lists(h->ctrack);
- set_bit(IPS_CONFIRMED_BIT, &h->ctrack->status);
- }
- WRITE_UNLOCK(&ip_conntrack_lock);
- }
/* ... else the timer will get him soon. */
ip_conntrack_put(h->ctrack);
@@ -1062,6 +1050,12 @@ void ip_conntrack_cleanup(void)
#ifdef CONFIG_SYSCTL
unregister_sysctl_table(ip_conntrack_sysctl_header);
#endif
+ ip_ct_attach = NULL;
+ /* This makes sure all current packets have passed through
+ netfilter framework. Roll on, two-stage module
+ delete... */
+ br_write_lock_bh(BR_NETPROTO_LOCK);
+ br_write_unlock_bh(BR_NETPROTO_LOCK);
i_see_dead_people:
ip_ct_selective_cleanup(kill_all, NULL);
@@ -1075,6 +1069,9 @@ void ip_conntrack_cleanup(void)
nf_unregister_sockopt(&so_getorigdst);
}
+static int hashsize = 0;
+MODULE_PARM(hashsize, "i");
+
int __init ip_conntrack_init(void)
{
unsigned int i;
@@ -1082,13 +1079,17 @@ int __init ip_conntrack_init(void)
/* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
* machine has 256 buckets. >= 1GB machines have 8192 buckets. */
- ip_conntrack_htable_size
- = (((num_physpages << PAGE_SHIFT) / 16384)
- / sizeof(struct list_head));
- if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
- ip_conntrack_htable_size = 8192;
- if (ip_conntrack_htable_size < 16)
- ip_conntrack_htable_size = 16;
+ if (hashsize) {
+ ip_conntrack_htable_size = hashsize;
+ } else {
+ ip_conntrack_htable_size
+ = (((num_physpages << PAGE_SHIFT) / 16384)
+ / sizeof(struct list_head));
+ if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
+ ip_conntrack_htable_size = 8192;
+ if (ip_conntrack_htable_size < 16)
+ ip_conntrack_htable_size = 16;
+ }
ip_conntrack_max = 8 * ip_conntrack_htable_size;
printk("ip_conntrack (%u buckets, %d max)\n",
@@ -1140,5 +1141,7 @@ int __init ip_conntrack_init(void)
}
#endif /*CONFIG_SYSCTL*/
+ /* For use by ipt_REJECT */
+ ip_ct_attach = ip_conntrack_attach;
return ret;
}
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
index bd566db53eee..fcc0eed71a0f 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_generic.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
@@ -48,10 +48,10 @@ static int established(struct ip_conntrack *conntrack,
}
/* Called when a new connection for this protocol found. */
-static unsigned long
+static int
new(struct ip_conntrack *conntrack, struct iphdr *iph, size_t len)
{
- return GENERIC_TIMEOUT;
+ return 1;
}
struct ip_conntrack_protocol ip_conntrack_generic_protocol
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
index 17e126119b9b..b0eb65891d5f 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
@@ -72,22 +72,25 @@ static int icmp_packet(struct ip_conntrack *ct,
struct iphdr *iph, size_t len,
enum ip_conntrack_info ctinfo)
{
- /* FIXME: Should keep count of orig - reply packets: if == 0,
- destroy --RR */
- /* Delete connection immediately on reply: won't actually
- vanish as we still have skb */
+ /* Try to delete connection immediately after all replies:
+ won't actually vanish as we still have skb, and del_timer
+ means this will only run once even if count hits zero twice
+ (theoretically possible with SMP) */
if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
- if (del_timer(&ct->timeout))
+ if (atomic_dec_and_test(&ct->proto.icmp.count)
+ && del_timer(&ct->timeout))
ct->timeout.function((unsigned long)ct);
- } else
+ } else {
+ atomic_inc(&ct->proto.icmp.count);
ip_ct_refresh(ct, ICMP_TIMEOUT);
+ }
return NF_ACCEPT;
}
/* Called when a new connection for this protocol found. */
-static unsigned long icmp_new(struct ip_conntrack *conntrack,
- struct iphdr *iph, size_t len)
+static int icmp_new(struct ip_conntrack *conntrack,
+ struct iphdr *iph, size_t len)
{
static u_int8_t valid_new[]
= { [ICMP_ECHO] = 1,
@@ -103,7 +106,8 @@ static unsigned long icmp_new(struct ip_conntrack *conntrack,
DUMP_TUPLE(&conntrack->tuplehash[0].tuple);
return 0;
}
- return ICMP_TIMEOUT;
+ atomic_set(&conntrack->proto.icmp.count, 0);
+ return 1;
}
struct ip_conntrack_protocol ip_conntrack_protocol_icmp
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
index 0aa8426de3a7..4f52a027fb3c 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
@@ -206,8 +206,8 @@ static int tcp_packet(struct ip_conntrack *conntrack,
}
/* Called when a new connection for this protocol found. */
-static unsigned long tcp_new(struct ip_conntrack *conntrack,
- struct iphdr *iph, size_t len)
+static int tcp_new(struct ip_conntrack *conntrack,
+ struct iphdr *iph, size_t len)
{
enum tcp_conntrack newconntrack;
struct tcphdr *tcph = (struct tcphdr *)((u_int32_t *)iph + iph->ihl);
@@ -224,7 +224,7 @@ static unsigned long tcp_new(struct ip_conntrack *conntrack,
}
conntrack->proto.tcp.state = newconntrack;
- return tcp_timeouts[conntrack->proto.tcp.state];
+ return 1;
}
struct ip_conntrack_protocol ip_conntrack_protocol_tcp
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
index 644a86a1397a..86544b03d2ce 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
@@ -62,10 +62,10 @@ static int udp_packet(struct ip_conntrack *conntrack,
}
/* Called when a new connection for this protocol found. */
-static unsigned long udp_new(struct ip_conntrack *conntrack,
+static int udp_new(struct ip_conntrack *conntrack,
struct iphdr *iph, size_t len)
{
- return UDP_TIMEOUT;
+ return 1;
}
struct ip_conntrack_protocol ip_conntrack_protocol_udp
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index f1faab1be187..ba94a4d6c030 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -88,8 +88,6 @@ print_conntrack(char *buffer, const struct ip_conntrack *conntrack)
proto);
if (conntrack->status & IPS_ASSURED)
len += sprintf(buffer + len, "[ASSURED] ");
- if (!(conntrack->status & IPS_CONFIRMED))
- len += sprintf(buffer + len, "[UNCONFIRMED] ");
len += sprintf(buffer + len, "use=%u ",
atomic_read(&conntrack->ct_general.use));
len += sprintf(buffer + len, "\n");
@@ -169,22 +167,8 @@ static unsigned int ip_confirm(unsigned int hooknum,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- /* We've seen it coming out the other side: confirm. Beware
- REJECT generating TCP RESET response (IP_CT_REPLY), or ICMP
- errors (IP_CT_REPLY + IP_CT_RELATED). But new expected
- connections must be confirmed as well (eg. ftp data,
- IP_CT_RELATED). */
- if ((*pskb)->nfct) {
- struct ip_conntrack *ct
- = (struct ip_conntrack *)(*pskb)->nfct->master;
- /* ctinfo is the index of the nfct inside the conntrack */
- enum ip_conntrack_info ctinfo = (*pskb)->nfct - ct->infos;
-
- if ((ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)
- && !(ct->status & IPS_CONFIRMED))
- ip_conntrack_confirm(ct);
- }
- return NF_ACCEPT;
+ /* We've seen it coming out the other side: confirm it */
+ return ip_conntrack_confirm(*pskb);
}
static unsigned int ip_refrag(unsigned int hooknum,
@@ -196,7 +180,8 @@ static unsigned int ip_refrag(unsigned int hooknum,
struct rtable *rt = (struct rtable *)(*pskb)->dst;
/* We've seen it coming out the other side: confirm */
- ip_confirm(hooknum, pskb, in, out, okfn);
+ if (ip_confirm(hooknum, pskb, in, out, okfn) != NF_ACCEPT)
+ return NF_DROP;
/* Local packets are never produced too large for their
interface. We degfragment them at LOCAL_OUT, however,
@@ -345,3 +330,4 @@ EXPORT_SYMBOL(ip_ct_refresh);
EXPORT_SYMBOL(ip_conntrack_expect_related);
EXPORT_SYMBOL(ip_conntrack_tuple_taken);
EXPORT_SYMBOL(ip_ct_gather_frags);
+EXPORT_SYMBOL(ip_conntrack_htable_size);
diff --git a/net/ipv4/netfilter/ip_fw_compat.c b/net/ipv4/netfilter/ip_fw_compat.c
index 240f3b47fcc9..6489ad787110 100644
--- a/net/ipv4/netfilter/ip_fw_compat.c
+++ b/net/ipv4/netfilter/ip_fw_compat.c
@@ -69,21 +69,6 @@ int unregister_firewall(int pf, struct firewall_ops *fw)
return 0;
}
-static inline void
-confirm_connection(struct sk_buff *skb)
-{
- if (skb->nfct) {
- struct ip_conntrack *ct
- = (struct ip_conntrack *)skb->nfct->master;
- /* ctinfo is the index of the nfct inside the conntrack */
- enum ip_conntrack_info ctinfo = skb->nfct - ct->infos;
-
- if ((ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)
- && !(ct->status & IPS_CONFIRMED))
- ip_conntrack_confirm(ct);
- }
-}
-
static unsigned int
fw_in(unsigned int hooknum,
struct sk_buff **pskb,
@@ -137,7 +122,10 @@ fw_in(unsigned int hooknum,
(struct net_device *)out,
(*pskb)->nh.raw, &redirpt,
pskb);
- confirm_connection(*pskb);
+
+ /* ip_conntrack_confirm return NF_DROP or NF_ACCEPT */
+ if (ip_conntrack_confirm(*pskb) == NF_DROP)
+ ret = FW_BLOCK;
}
break;
}
@@ -195,8 +183,7 @@ static unsigned int fw_confirm(unsigned int hooknum,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- confirm_connection(*pskb);
- return NF_ACCEPT;
+ return ip_conntrack_confirm(*pskb);
}
extern int ip_fw_ctl(int optval, void *m, unsigned int len);
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
index 4583332d7a37..daece0d6286c 100644
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ b/net/ipv4/netfilter/ip_nat_core.c
@@ -12,6 +12,7 @@
#include <linux/skbuff.h>
#include <linux/netfilter_ipv4.h>
#include <linux/brlock.h>
+#include <linux/vmalloc.h>
#include <net/checksum.h>
#include <net/icmp.h>
#include <net/ip.h>
@@ -34,11 +35,13 @@
DECLARE_RWLOCK(ip_nat_lock);
-#define IP_NAT_HTABLE_SIZE 64
+/* Calculated at init based on memory size */
+static unsigned int ip_nat_htable_size;
-static struct list_head bysource[IP_NAT_HTABLE_SIZE];
-static struct list_head byipsproto[IP_NAT_HTABLE_SIZE];
+static struct list_head *bysource;
+static struct list_head *byipsproto;
LIST_HEAD(protos);
+LIST_HEAD(helpers);
extern struct ip_nat_protocol unknown_nat_protocol;
@@ -48,14 +51,14 @@ hash_by_ipsproto(u_int32_t src, u_int32_t dst, u_int16_t proto)
{
/* Modified src and dst, to ensure we don't create two
identical streams. */
- return (src + dst + proto) % IP_NAT_HTABLE_SIZE;
+ return (src + dst + proto) % ip_nat_htable_size;
}
static inline size_t
hash_by_src(const struct ip_conntrack_manip *manip, u_int16_t proto)
{
/* Original src, to ensure we map it consistently if poss. */
- return (manip->ip + manip->u.all + proto) % IP_NAT_HTABLE_SIZE;
+ return (manip->ip + manip->u.all + proto) % ip_nat_htable_size;
}
/* Noone using conntrack by the time this called. */
@@ -269,6 +272,7 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple,
struct ip_conntrack_tuple tuple;
} best = { NULL, 0xFFFFFFFF };
u_int32_t *var_ipp, *other_ipp, saved_ip, orig_dstip;
+ static unsigned int randomness = 0;
if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC) {
var_ipp = &tuple->src.ip;
@@ -285,7 +289,8 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple,
IP_NF_ASSERT(mr->rangesize >= 1);
for (i = 0; i < mr->rangesize; i++) {
- u_int32_t minip, maxip;
+ /* Host order */
+ u_int32_t minip, maxip, j;
/* Don't do ranges which are already eliminated. */
if (mr->range[i].flags & IP_NAT_RANGE_FULL) {
@@ -293,16 +298,18 @@ find_best_ips_proto(struct ip_conntrack_tuple *tuple,
}
if (mr->range[i].flags & IP_NAT_RANGE_MAP_IPS) {
- minip = mr->range[i].min_ip;
- maxip = mr->range[i].max_ip;
+ minip = ntohl(mr->range[i].min_ip);
+ maxip = ntohl(mr->range[i].max_ip);
} else
- minip = maxip = *var_ipp;
+ minip = maxip = ntohl(*var_ipp);
- for (*var_ipp = minip;
- ntohl(*var_ipp) <= ntohl(maxip);
- *var_ipp = htonl(ntohl(*var_ipp) + 1)) {
+ randomness++;
+ for (j = 0; j < maxip - minip + 1; j++) {
unsigned int score;
+ *var_ipp = htonl(minip + (randomness + j)
+ % (maxip - minip + 1));
+
/* Reset the other ip in case it was mangled by
* do_extra_mangle last time. */
*other_ipp = saved_ip;
@@ -853,6 +860,16 @@ int __init ip_nat_init(void)
{
size_t i;
+ /* Leave them the same for the moment. */
+ ip_nat_htable_size = ip_conntrack_htable_size;
+
+ /* One vmalloc for both hash tables */
+ bysource = vmalloc(sizeof(struct list_head) * ip_nat_htable_size*2);
+ if (!bysource) {
+ return -ENOMEM;
+ }
+ byipsproto = bysource + ip_nat_htable_size;
+
/* Sew in builtin protocols. */
WRITE_LOCK(&ip_nat_lock);
list_append(&protos, &ip_nat_protocol_tcp);
@@ -860,7 +877,7 @@ int __init ip_nat_init(void)
list_append(&protos, &ip_nat_protocol_icmp);
WRITE_UNLOCK(&ip_nat_lock);
- for (i = 0; i < IP_NAT_HTABLE_SIZE; i++) {
+ for (i = 0; i < ip_nat_htable_size; i++) {
INIT_LIST_HEAD(&bysource[i]);
INIT_LIST_HEAD(&byipsproto[i]);
}
@@ -872,7 +889,15 @@ int __init ip_nat_init(void)
return 0;
}
-void ip_nat_cleanup(void)
+/* Clear NAT section of all conntracks, in case we're loaded again. */
+static int __exit clean_nat(const struct ip_conntrack *i, void *data)
+{
+ memset((void *)&i->nat, 0, sizeof(i->nat));
+ return 0;
+}
+
+void __exit ip_nat_cleanup(void)
{
+ ip_ct_selective_cleanup(&clean_nat, NULL);
ip_conntrack_destroyed = NULL;
}
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
index e35362350c68..23d1a5ed9e58 100644
--- a/net/ipv4/netfilter/ip_nat_helper.c
+++ b/net/ipv4/netfilter/ip_nat_helper.c
@@ -34,7 +34,6 @@
#endif
DECLARE_LOCK(ip_nat_seqofs_lock);
-LIST_HEAD(helpers);
static inline int
ip_nat_resize_packet(struct sk_buff **skb,
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
index a22858cb3fa3..d83562c292a1 100644
--- a/net/ipv4/netfilter/ip_nat_rule.c
+++ b/net/ipv4/netfilter/ip_nat_rule.c
@@ -173,6 +173,12 @@ static int ipt_snat_checkentry(const char *tablename,
return 0;
}
+ /* Only allow these for NAT. */
+ if (strcmp(tablename, "nat") != 0) {
+ DEBUGP("SNAT: wrong table %s\n", tablename);
+ return 0;
+ }
+
if (hook_mask & ~(1 << NF_IP_POST_ROUTING)) {
DEBUGP("SNAT: hook mask 0x%x bad\n", hook_mask);
return 0;
@@ -202,6 +208,12 @@ static int ipt_dnat_checkentry(const char *tablename,
return 0;
}
+ /* Only allow these for NAT. */
+ if (strcmp(tablename, "nat") != 0) {
+ DEBUGP("SNAT: wrong table %s\n", tablename);
+ return 0;
+ }
+
if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT))) {
DEBUGP("DNAT: hook mask 0x%x bad\n", hook_mask);
return 0;
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
index fd04ad40cfa4..20982c479db5 100644
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ b/net/ipv4/netfilter/ip_nat_standalone.c
@@ -68,17 +68,21 @@ ip_nat_fn(unsigned int hooknum,
(*pskb)->ip_summed = CHECKSUM_NONE;
ct = ip_conntrack_get(*pskb, &ctinfo);
- /* Can't track? Maybe out of memory: this would make NAT
- unreliable. */
+ /* Can't track? It's not due to stress, or conntrack would
+ have dropped it. Hence it's the user's responsibilty to
+ packet filter it out, or implement conntrack/NAT for that
+ protocol. 8) --RR */
if (!ct) {
- if (net_ratelimit())
- printk(KERN_DEBUG "NAT: %u dropping untracked packet %p %u %u.%u.%u.%u -> %u.%u.%u.%u\n",
- hooknum,
- *pskb,
- (*pskb)->nh.iph->protocol,
- NIPQUAD((*pskb)->nh.iph->saddr),
- NIPQUAD((*pskb)->nh.iph->daddr));
- return NF_DROP;
+ /* Exception: ICMP redirect to new connection (not in
+ hash table yet). We must not let this through, in
+ case we're doing NAT to the same network. */
+ struct iphdr *iph = (*pskb)->nh.iph;
+ struct icmphdr *hdr = (struct icmphdr *)
+ ((u_int32_t *)iph + iph->ihl);
+ if (iph->protocol == IPPROTO_ICMP
+ && hdr->type == ICMP_REDIRECT)
+ return NF_DROP;
+ return NF_ACCEPT;
}
switch (ctinfo) {
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index f2a19702d729..38d619f387bc 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -127,14 +127,32 @@ device_cmp(const struct ip_conntrack *i, void *ifindex)
return ret;
}
-int masq_device_event(struct notifier_block *this,
- unsigned long event,
- void *ptr)
+static int masq_device_event(struct notifier_block *this,
+ unsigned long event,
+ void *ptr)
{
struct net_device *dev = ptr;
- if (event == NETDEV_DOWN || event == NETDEV_CHANGEADDR) {
- /* Device was downed/changed (diald) Search entire table for
+ if (event == NETDEV_DOWN) {
+ /* Device was downed. Search entire table for
+ conntracks which were associated with that device,
+ and forget them. */
+ IP_NF_ASSERT(dev->ifindex != 0);
+
+ ip_ct_selective_cleanup(device_cmp, (void *)(long)dev->ifindex);
+ }
+
+ return NOTIFY_DONE;
+}
+
+static int masq_inet_event(struct notifier_block *this,
+ unsigned long event,
+ void *ptr)
+{
+ struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev;
+
+ if (event == NETDEV_DOWN) {
+ /* IP address was deleted. Search entire table for
conntracks which were associated with that device,
and forget them. */
IP_NF_ASSERT(dev->ifindex != 0);
@@ -151,6 +169,12 @@ static struct notifier_block masq_dev_notifier = {
0
};
+static struct notifier_block masq_inet_notifier = {
+ masq_inet_event,
+ NULL,
+ 0
+};
+
static struct ipt_target masquerade
= { { NULL, NULL }, "MASQUERADE", masquerade_target, masquerade_check, NULL,
THIS_MODULE };
@@ -164,6 +188,8 @@ static int __init init(void)
if (ret == 0) {
/* Register for device down reports */
register_netdevice_notifier(&masq_dev_notifier);
+ /* Register IP address change reports */
+ register_inetaddr_notifier(&masq_inet_notifier);
}
return ret;
@@ -173,6 +199,7 @@ static void __exit fini(void)
{
ipt_unregister_target(&masquerade);
unregister_netdevice_notifier(&masq_dev_notifier);
+ unregister_inetaddr_notifier(&masq_inet_notifier);
}
module_init(init);
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index cc5ffbc4a093..30a52697904d 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -20,6 +20,18 @@ struct in_device;
#define DEBUGP(format, args...)
#endif
+/* If the original packet is part of a connection, but the connection
+ is not confirmed, our manufactured reply will not be associated
+ with it, so we need to do this manually. */
+static void connection_attach(struct sk_buff *new_skb, struct nf_ct_info *nfct)
+{
+ void (*attach)(struct sk_buff *, struct nf_ct_info *);
+
+ /* Avoid module unload race with ip_ct_attach being NULLed out */
+ if (nfct && (attach = ip_ct_attach) != NULL)
+ attach(new_skb, nfct);
+}
+
/* Send RST reply */
static void send_reset(struct sk_buff *oldskb, int local)
{
@@ -128,6 +140,8 @@ static void send_reset(struct sk_buff *oldskb, int local)
if (nskb->len > nskb->dst->pmtu)
goto free_nskb;
+ connection_attach(nskb, oldskb->nfct);
+
NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, nskb, NULL, nskb->dst->dev,
ip_finish_output);
return;
@@ -136,6 +150,127 @@ static void send_reset(struct sk_buff *oldskb, int local)
kfree_skb(nskb);
}
+static void send_unreach(struct sk_buff *skb_in, int code)
+{
+ struct iphdr *iph;
+ struct icmphdr *icmph;
+ struct sk_buff *nskb;
+ u32 saddr;
+ u8 tos;
+ int hh_len, length;
+ struct rtable *rt = (struct rtable*)skb_in->dst;
+ unsigned char *data;
+
+ if (!rt)
+ return;
+
+ /* FIXME: Use sysctl number. --RR */
+ if (!xrlim_allow(&rt->u.dst, 1*HZ))
+ return;
+
+ iph = skb_in->nh.iph;
+
+ /* No replies to physical multicast/broadcast */
+ if (skb_in->pkt_type!=PACKET_HOST)
+ return;
+
+ /* Now check at the protocol level */
+ if (rt->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST))
+ return;
+
+ /* Only reply to fragment 0. */
+ if (iph->frag_off&htons(IP_OFFSET))
+ return;
+
+ /* If we send an ICMP error to an ICMP error a mess would result.. */
+ if (iph->protocol == IPPROTO_ICMP
+ && skb_in->tail-(u8*)iph >= sizeof(struct icmphdr)) {
+ icmph = (struct icmphdr *)((char *)iph + (iph->ihl<<2));
+ /* Between echo-reply (0) and timestamp (13),
+ everything except echo-request (8) is an error.
+ Also, anything greater than NR_ICMP_TYPES is
+ unknown, and hence should be treated as an error... */
+ if ((icmph->type < ICMP_TIMESTAMP
+ && icmph->type != ICMP_ECHOREPLY
+ && icmph->type != ICMP_ECHO)
+ || icmph->type > NR_ICMP_TYPES)
+ return;
+ }
+
+ saddr = iph->daddr;
+ if (!(rt->rt_flags & RTCF_LOCAL))
+ saddr = 0;
+
+ tos = (iph->tos & IPTOS_TOS_MASK) | IPTOS_PREC_INTERNETCONTROL;
+
+ if (ip_route_output(&rt, iph->saddr, saddr, RT_TOS(tos), 0))
+ return;
+
+ /* RFC says return as much as we can without exceeding 576 bytes. */
+ length = skb_in->len + sizeof(struct iphdr) + sizeof(struct icmphdr);
+
+ if (length > rt->u.dst.pmtu)
+ length = rt->u.dst.pmtu;
+ if (length > 576)
+ length = 576;
+
+ hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15;
+
+ nskb = alloc_skb(hh_len+15+length, GFP_ATOMIC);
+ if (!nskb) {
+ ip_rt_put(rt);
+ return;
+ }
+
+ nskb->priority = 0;
+ nskb->dst = &rt->u.dst;
+ skb_reserve(nskb, hh_len);
+
+ /* Set up IP header */
+ iph = nskb->nh.iph
+ = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr));
+ iph->version=4;
+ iph->ihl=5;
+ iph->tos=tos;
+ iph->tot_len = htons(length);
+
+ /* This abbreviates icmp->send->ip_build_xmit->ip_dont_fragment */
+ if (!ipv4_config.no_pmtu_disc
+ && !(rt->u.dst.mxlock&(1<<RTAX_MTU)))
+ iph->frag_off = htons(IP_DF);
+ else iph->frag_off = 0;
+
+ iph->ttl = MAXTTL;
+ ip_select_ident(iph, &rt->u.dst, NULL);
+ iph->protocol=IPPROTO_ICMP;
+ iph->saddr=rt->rt_src;
+ iph->daddr=rt->rt_dst;
+ iph->check=0;
+ iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
+
+ /* Set up ICMP header. */
+ icmph = nskb->h.icmph
+ = (struct icmphdr *)skb_put(nskb, sizeof(struct icmphdr));
+ icmph->type = ICMP_DEST_UNREACH;
+ icmph->code = code;
+ icmph->un.gateway = 0;
+ icmph->checksum = 0;
+
+ /* Copy as much of original packet as will fit */
+ data = skb_put(nskb,
+ length - sizeof(struct iphdr) - sizeof(struct icmphdr));
+ /* FIXME: won't work with nonlinear skbs --RR */
+ memcpy(data, skb_in->nh.iph,
+ length - sizeof(struct iphdr) - sizeof(struct icmphdr));
+ icmph->checksum = ip_compute_csum((unsigned char *)icmph,
+ length - sizeof(struct iphdr));
+
+ connection_attach(nskb, skb_in->nfct);
+
+ NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, nskb, NULL, nskb->dst->dev,
+ ip_finish_output);
+}
+
static unsigned int reject(struct sk_buff **pskb,
unsigned int hooknum,
const struct net_device *in,
@@ -145,51 +280,43 @@ static unsigned int reject(struct sk_buff **pskb,
{
const struct ipt_reject_info *reject = targinfo;
+ /* Our naive response construction doesn't deal with IP
+ options, and probably shouldn't try. */
+ if ((*pskb)->nh.iph->ihl<<2 != sizeof(struct iphdr))
+ return NF_DROP;
+
/* WARNING: This code causes reentry within iptables.
This means that the iptables jump stack is now crap. We
must return an absolute verdict. --RR */
switch (reject->with) {
case IPT_ICMP_NET_UNREACHABLE:
- icmp_send(*pskb, ICMP_DEST_UNREACH, ICMP_NET_UNREACH, 0);
+ send_unreach(*pskb, ICMP_NET_UNREACH);
break;
case IPT_ICMP_HOST_UNREACHABLE:
- icmp_send(*pskb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
+ send_unreach(*pskb, ICMP_HOST_UNREACH);
break;
case IPT_ICMP_PROT_UNREACHABLE:
- icmp_send(*pskb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0);
+ send_unreach(*pskb, ICMP_PROT_UNREACH);
break;
case IPT_ICMP_PORT_UNREACHABLE:
- icmp_send(*pskb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+ send_unreach(*pskb, ICMP_PORT_UNREACH);
break;
case IPT_ICMP_NET_PROHIBITED:
- icmp_send(*pskb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0);
+ send_unreach(*pskb, ICMP_NET_ANO);
break;
case IPT_ICMP_HOST_PROHIBITED:
- icmp_send(*pskb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0);
+ send_unreach(*pskb, ICMP_HOST_ANO);
break;
- case IPT_ICMP_ECHOREPLY:
- printk("REJECT: ECHOREPLY no longer supported.\n");
- break;
case IPT_TCP_RESET:
send_reset(*pskb, hooknum == NF_IP_LOCAL_IN);
break;
+ case IPT_ICMP_ECHOREPLY:
+ /* Doesn't happen. */
}
return NF_DROP;
}
-static inline int find_ping_match(const struct ipt_entry_match *m)
-{
- const struct ipt_icmp *icmpinfo = (const struct ipt_icmp *)m->data;
-
- if (strcmp(m->u.kernel.match->name, "icmp") == 0
- && icmpinfo->type == ICMP_ECHO
- && !(icmpinfo->invflags & IPT_ICMP_INV))
- return 1;
-
- return 0;
-}
-
static int check(const char *tablename,
const struct ipt_entry *e,
void *targinfo,
@@ -216,17 +343,8 @@ static int check(const char *tablename,
}
if (rejinfo->with == IPT_ICMP_ECHOREPLY) {
- /* Must specify that it's an ICMP ping packet. */
- if (e->ip.proto != IPPROTO_ICMP
- || (e->ip.invflags & IPT_INV_PROTO)) {
- DEBUGP("REJECT: ECHOREPLY illegal for non-icmp\n");
- return 0;
- }
- /* Must contain ICMP match. */
- if (IPT_MATCH_ITERATE(e, find_ping_match) == 0) {
- DEBUGP("REJECT: ECHOREPLY illegal for non-ping\n");
- return 0;
- }
+ printk("REJECT: ECHOREPLY no longer supported.\n");
+ return 0;
} else if (rejinfo->with == IPT_TCP_RESET) {
/* Must specify that it's a TCP packet */
if (e->ip.proto != IPPROTO_TCP
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3dc1895756cb..f41e47b55c77 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -6,7 +6,7 @@
* Pedro Roque <roque@di.fc.ul.pt>
* Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
*
- * $Id: addrconf.c,v 1.61 2001/04/25 20:46:34 davem Exp $
+ * $Id: addrconf.c,v 1.62 2001/04/26 19:11:59 davem Exp $
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -285,9 +285,9 @@ static struct inet6_dev * ipv6_find_idev(struct net_device *dev)
if ((idev = __in6_dev_get(dev)) == NULL) {
if ((idev = ipv6_add_dev(dev)) == NULL)
return NULL;
- if (dev->flags&IFF_UP)
- ipv6_mc_up(idev);
}
+ if (dev->flags&IFF_UP)
+ ipv6_mc_up(idev);
return idev;
}
diff --git a/net/netsyms.c b/net/netsyms.c
index bb254f2aaad1..1949d89287ef 100644
--- a/net/netsyms.c
+++ b/net/netsyms.c
@@ -374,8 +374,6 @@ EXPORT_SYMBOL(sysctl_tcp_ecn);
EXPORT_SYMBOL(tcp_cwnd_application_limited);
EXPORT_SYMBOL(tcp_sendpage);
-EXPORT_SYMBOL(xrlim_allow);
-
EXPORT_SYMBOL(tcp_write_xmit);
EXPORT_SYMBOL(tcp_v4_remember_stamp);
@@ -434,6 +432,7 @@ EXPORT_SYMBOL(dev_open);
/* Used by other modules */
EXPORT_SYMBOL(in_ntoa);
+EXPORT_SYMBOL(xrlim_allow);
EXPORT_SYMBOL(ip_rcv);
EXPORT_SYMBOL(arp_rcv);
@@ -561,6 +560,7 @@ EXPORT_SYMBOL(nf_hook_slow);
EXPORT_SYMBOL(nf_hooks);
EXPORT_SYMBOL(nf_setsockopt);
EXPORT_SYMBOL(nf_getsockopt);
+EXPORT_SYMBOL(ip_ct_attach);
#endif
EXPORT_SYMBOL(register_gifconf);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 39b2989c6331..1ee0ddc0cbf8 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -462,11 +462,11 @@ svc_udp_init(struct svc_sock *svsk)
}
/*
- * A state change on a listening socket means there's a connection
- * pending.
+ * A data_ready event on a listening socket means there's a connection
+ * pending. Do not use state_change as a substitute for it.
*/
static void
-svc_tcp_state_change1(struct sock *sk)
+svc_tcp_listen_data_ready(struct sock *sk, int count_unused)
{
struct svc_sock *svsk;
@@ -494,7 +494,7 @@ svc_tcp_state_change1(struct sock *sk)
* A state change on a connected socket means it's dying or dead.
*/
static void
-svc_tcp_state_change2(struct sock *sk)
+svc_tcp_state_change(struct sock *sk)
{
struct svc_sock *svsk;
@@ -777,10 +777,10 @@ svc_tcp_init(struct svc_sock *svsk)
if (sk->state == TCP_LISTEN) {
dprintk("setting up TCP socket for listening\n");
- sk->state_change = svc_tcp_state_change1;
+ sk->data_ready = svc_tcp_listen_data_ready;
} else {
dprintk("setting up TCP socket for reading\n");
- sk->state_change = svc_tcp_state_change2;
+ sk->state_change = svc_tcp_state_change;
sk->data_ready = svc_tcp_data_ready;
svsk->sk_reclen = 0;