From 704126ad81b8cb7d3d70adb9ecb143f4d3fb38af Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Sun, 4 Jan 2009 16:28:52 +0800 Subject: VT-d: handle Invalidation Queue Error to avoid system hang When hardware detects any error with a descriptor from the invalidation queue, it stops fetching new descriptors from the queue until software clears the Invalidation Queue Error bit in the Fault Status register. Following fix handles the IQE so the kernel won't be trapped in an infinite loop. Signed-off-by: Yu Zhao Signed-off-by: David Woodhouse --- include/linux/intel-iommu.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index c4f6c101dbcd..d2e3cbfba14f 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -194,6 +194,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) /* FSTS_REG */ #define DMA_FSTS_PPF ((u32)2) #define DMA_FSTS_PFO ((u32)1) +#define DMA_FSTS_IQE (1 << 4) #define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff) /* FRCD_REG, 32 bits access */ @@ -328,7 +329,7 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, unsigned int size_order, u64 type, int non_present_entry_flush); -extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); +extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); extern void *intel_alloc_coherent(struct device *, size_t, dma_addr_t *, gfp_t); extern void intel_free_coherent(struct device *, size_t, void *, dma_addr_t); -- cgit v1.2.3 From 92a0acce186cde8ead56c6915d9479773673ea1a Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 17 Feb 2009 21:24:05 -0800 Subject: net: Kill skb_truesize_check(), it only catches false-positives. A long time ago we had bugs, primarily in TCP, where we would modify skb->truesize (for TSO queue collapsing) in ways which would corrupt the socket memory accounting. skb_truesize_check() was added in order to try and catch this error more systematically. However this debugging check has morphed into a Frankenstein of sorts and these days it does nothing other than catch false-positives. Signed-off-by: David S. Miller --- include/linux/skbuff.h | 9 --------- include/net/sock.h | 1 - net/core/skbuff.c | 8 -------- net/core/sock.c | 1 - 4 files changed, 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index cf2cb50f77d1..9dcf956ad18a 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -416,15 +416,6 @@ extern void skb_over_panic(struct sk_buff *skb, int len, void *here); extern void skb_under_panic(struct sk_buff *skb, int len, void *here); -extern void skb_truesize_bug(struct sk_buff *skb); - -static inline void skb_truesize_check(struct sk_buff *skb) -{ - int len = sizeof(struct sk_buff) + skb->len; - - if (unlikely((int)skb->truesize < len)) - skb_truesize_bug(skb); -} extern int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, int getfrag(void *from, char *to, int offset, diff --git a/include/net/sock.h b/include/net/sock.h index ce3b5b622683..eefeeaf7fc46 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -860,7 +860,6 @@ static inline void sk_mem_uncharge(struct sock *sk, int size) static inline void sk_wmem_free_skb(struct sock *sk, struct sk_buff *skb) { - skb_truesize_check(skb); sock_set_flag(sk, SOCK_QUEUE_SHRUNK); sk->sk_wmem_queued -= skb->truesize; sk_mem_uncharge(sk, skb->truesize); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index da74b844f4ea..c6a6b166f8d6 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -143,14 +143,6 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here) BUG(); } -void skb_truesize_bug(struct sk_buff *skb) -{ - WARN(net_ratelimit(), KERN_ERR "SKB BUG: Invalid truesize (%u) " - "len=%u, sizeof(sk_buff)=%Zd\n", - skb->truesize, skb->len, sizeof(struct sk_buff)); -} -EXPORT_SYMBOL(skb_truesize_bug); - /* Allocate a new skbuff. We do this ourselves so we can fill in a few * 'private' fields and also do memory statistics to find all the * [BEEP] leaks. diff --git a/net/core/sock.c b/net/core/sock.c index 6f2e1337975d..6e4f14d1ef81 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1137,7 +1137,6 @@ void sock_rfree(struct sk_buff *skb) { struct sock *sk = skb->sk; - skb_truesize_check(skb); atomic_sub(skb->truesize, &sk->sk_rmem_alloc); sk_mem_uncharge(skb->sk, skb->truesize); } -- cgit v1.2.3 From 5ca431f9ae8db8c6edb9c64bebe6d6521077afd6 Mon Sep 17 00:00:00 2001 From: Eric Leblond Date: Wed, 18 Feb 2009 15:29:23 +0100 Subject: netfilter: nfnetlink_log: fix per-rule qthreshold override In NFLOG the per-rule qthreshold should overrides per-instance only it is set. With current code, the per-rule qthreshold is 1 if not set and it overrides the per-instance qthreshold. This patch modifies the default xt_NFLOG threshold from 1 to 0. Thus a value of 0 means there is no per-rule setting and the instance parameter has to apply. Signed-off-by: Eric Leblond Signed-off-by: Patrick McHardy --- include/linux/netfilter/xt_NFLOG.h | 2 +- net/netfilter/nfnetlink_log.c | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/xt_NFLOG.h b/include/linux/netfilter/xt_NFLOG.h index cdcd0ed58f7a..4b36aeb46a10 100644 --- a/include/linux/netfilter/xt_NFLOG.h +++ b/include/linux/netfilter/xt_NFLOG.h @@ -2,7 +2,7 @@ #define _XT_NFLOG_TARGET #define XT_NFLOG_DEFAULT_GROUP 0x1 -#define XT_NFLOG_DEFAULT_THRESHOLD 1 +#define XT_NFLOG_DEFAULT_THRESHOLD 0 #define XT_NFLOG_MASK 0x0 diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index fa49dc7fe100..580b837e44dd 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -590,8 +590,10 @@ nfulnl_log_packet(u_int8_t pf, qthreshold = inst->qthreshold; /* per-rule qthreshold overrides per-instance */ - if (qthreshold > li->u.ulog.qthreshold) - qthreshold = li->u.ulog.qthreshold; + if (li->u.ulog.qthreshold) + if (qthreshold > li->u.ulog.qthreshold) + qthreshold = li->u.ulog.qthreshold; + switch (inst->copy_mode) { case NFULNL_COPY_META: -- cgit v1.2.3 From e4dd61882e2cfe47ea72ecd825671e8e5ae29038 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Wed, 18 Feb 2009 23:31:11 -0800 Subject: vlan: Update skb->mac_header in __vlan_put_tag(). After moving mac addresses in __vlan_put_tag() skb->mac_header needs to be updated. Reported-by: Karl Hiramoto Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index f8ff918c208f..e1ff5b14310e 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -210,6 +210,7 @@ static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb, u16 vlan_tci) /* Move the mac addresses to the beginning of the new header. */ memmove(skb->data, skb->data + VLAN_HLEN, 2 * VLAN_ETH_ALEN); + skb->mac_header -= VLAN_HLEN; /* first, the ethernet type */ veth->h_vlan_proto = htons(ETH_P_8021Q); -- cgit v1.2.3 From cd97f39b7cdf1c8a9c9f52865eec795b7f0c811d Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 24 Feb 2009 19:19:49 +0100 Subject: i2c-dev: Clarify the unit of ioctl I2C_TIMEOUT The unit in which user-space can set the bus timeout value is jiffies for historical reasons (back when HZ was always 100.) This is however not good because user-space doesn't know how long a jiffy lasts. The timeout value should instead be set in a fixed time unit. Given the original value of HZ, this unit should be 10 ms, for compatibility. Signed-off-by: Jean Delvare Acked-by: Wolfram Sang --- drivers/i2c/i2c-dev.c | 6 +++++- include/linux/i2c-dev.h | 2 +- include/linux/i2c.h | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c index c171988a9f51..7e13d2df9af3 100644 --- a/drivers/i2c/i2c-dev.c +++ b/drivers/i2c/i2c-dev.c @@ -35,6 +35,7 @@ #include #include #include +#include #include static struct i2c_driver i2cdev_driver; @@ -422,7 +423,10 @@ static long i2cdev_ioctl(struct file *file, unsigned int cmd, unsigned long arg) client->adapter->retries = arg; break; case I2C_TIMEOUT: - client->adapter->timeout = arg; + /* For historical reasons, user-space sets the timeout + * value in units of 10 ms. + */ + client->adapter->timeout = msecs_to_jiffies(arg * 10); break; default: /* NOTE: returning a fault code here could cause trouble diff --git a/include/linux/i2c-dev.h b/include/linux/i2c-dev.h index 311315b56b61..fd53bfd26470 100644 --- a/include/linux/i2c-dev.h +++ b/include/linux/i2c-dev.h @@ -33,7 +33,7 @@ */ #define I2C_RETRIES 0x0701 /* number of times a device address should be polled when not acknowledging */ -#define I2C_TIMEOUT 0x0702 /* set timeout in jiffies - call with int */ +#define I2C_TIMEOUT 0x0702 /* set timeout in units of 10 ms */ /* NOTE: Slave address is 7 or 10 bits, but 10-bit addresses * are NOT supported! (due to code brokenness) diff --git a/include/linux/i2c.h b/include/linux/i2c.h index fcfbfea3af72..c86c3b07604c 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -361,7 +361,7 @@ struct i2c_adapter { struct mutex bus_lock; struct mutex clist_lock; - int timeout; + int timeout; /* in jiffies */ int retries; struct device dev; /* the adapter device */ -- cgit v1.2.3 From 4ab0d47d0ab311eb181532c1ecb6d02905685071 Mon Sep 17 00:00:00 2001 From: Venkatesh Pallipadi Date: Tue, 24 Feb 2009 17:35:12 -0800 Subject: gpu/drm, x86, PAT: io_mapping_create_wc and resource_size_t io_mapping_create_wc should take a resource_size_t parameter in place of unsigned long. With unsigned long, there will be no way to map greater than 4GB address in i386/32 bit. On x86, greater than 4GB addresses cannot be mapped on i386 without PAE. Return error for such a case. Patch also adds a structure for io_mapping, that saves the base, size and type on HAVE_ATOMIC_IOMAP archs, that can be used to verify the offset on io_mapping_map calls. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Cc: Dave Airlie Cc: Jesse Barnes Cc: Eric Anholt Cc: Keith Packard Signed-off-by: Ingo Molnar --- arch/x86/include/asm/iomap.h | 3 +++ arch/x86/mm/iomap_32.c | 18 +++++++++++++++++ include/linux/io-mapping.h | 46 +++++++++++++++++++++++++++++++++----------- 3 files changed, 56 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/include/asm/iomap.h b/arch/x86/include/asm/iomap.h index c1f06289b14b..86af26091d6c 100644 --- a/arch/x86/include/asm/iomap.h +++ b/arch/x86/include/asm/iomap.h @@ -23,6 +23,9 @@ #include #include +int +is_io_mapping_possible(resource_size_t base, unsigned long size); + void * iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot); diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c index ca53224fc56c..6c2b1af16926 100644 --- a/arch/x86/mm/iomap_32.c +++ b/arch/x86/mm/iomap_32.c @@ -20,6 +20,24 @@ #include #include +#ifdef CONFIG_X86_PAE +int +is_io_mapping_possible(resource_size_t base, unsigned long size) +{ + return 1; +} +#else +int +is_io_mapping_possible(resource_size_t base, unsigned long size) +{ + /* There is no way to map greater than 1 << 32 address without PAE */ + if (base + size > 0x100000000ULL) + return 0; + + return 1; +} +#endif + /* Map 'pfn' using fixed map 'type' and protections 'prot' */ void * diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 82df31726a54..cbc2f0cd631b 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -30,11 +30,14 @@ * See Documentation/io_mapping.txt */ -/* this struct isn't actually defined anywhere */ -struct io_mapping; - #ifdef CONFIG_HAVE_ATOMIC_IOMAP +struct io_mapping { + resource_size_t base; + unsigned long size; + pgprot_t prot; +}; + /* * For small address space machines, mapping large objects * into the kernel virtual space isn't practical. Where @@ -43,23 +46,40 @@ struct io_mapping; */ static inline struct io_mapping * -io_mapping_create_wc(unsigned long base, unsigned long size) +io_mapping_create_wc(resource_size_t base, unsigned long size) { - return (struct io_mapping *) base; + struct io_mapping *iomap; + + if (!is_io_mapping_possible(base, size)) + return NULL; + + iomap = kmalloc(sizeof(*iomap), GFP_KERNEL); + if (!iomap) + return NULL; + + iomap->base = base; + iomap->size = size; + iomap->prot = pgprot_writecombine(__pgprot(__PAGE_KERNEL)); + return iomap; } static inline void io_mapping_free(struct io_mapping *mapping) { + kfree(mapping); } /* Atomic map/unmap */ static inline void * io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset) { - offset += (unsigned long) mapping; - return iomap_atomic_prot_pfn(offset >> PAGE_SHIFT, KM_USER0, - __pgprot(__PAGE_KERNEL_WC)); + resource_size_t phys_addr; + unsigned long pfn; + + BUG_ON(offset >= mapping->size); + phys_addr = mapping->base + offset; + pfn = (unsigned long) (phys_addr >> PAGE_SHIFT); + return iomap_atomic_prot_pfn(pfn, KM_USER0, mapping->prot); } static inline void @@ -71,8 +91,9 @@ io_mapping_unmap_atomic(void *vaddr) static inline void * io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) { - offset += (unsigned long) mapping; - return ioremap_wc(offset, PAGE_SIZE); + BUG_ON(offset >= mapping->size); + resource_size_t phys_addr = mapping->base + offset; + return ioremap_wc(phys_addr, PAGE_SIZE); } static inline void @@ -83,9 +104,12 @@ io_mapping_unmap(void *vaddr) #else +/* this struct isn't actually defined anywhere */ +struct io_mapping; + /* Create the io_mapping object*/ static inline struct io_mapping * -io_mapping_create_wc(unsigned long base, unsigned long size) +io_mapping_create_wc(resource_size_t base, unsigned long size) { return (struct io_mapping *) ioremap_wc(base, size); } -- cgit v1.2.3 From 8fed43684174b68f04d01d1210fd00536af790df Mon Sep 17 00:00:00 2001 From: Bartlomiej Zolnierkiewicz Date: Wed, 25 Feb 2009 20:28:24 +0100 Subject: ide: fix refcounting in device drivers During host driver module removal del_gendisk() results in a final put on drive->gendev and freeing the drive by drive_release_dev(). Convert device drivers from using struct kref to use struct device so device driver's object holds reference on ->gendev and prevents drive from prematurely going away. Also fix ->remove methods to not erroneously drop reference on a host driver by using only put_device() instead of ide*_put(). Reported-by: Stanislaw Gruszka Tested-by: Stanislaw Gruszka Signed-off-by: Bartlomiej Zolnierkiewicz --- drivers/ide/ide-cd.c | 27 ++++++++++++++++++--------- drivers/ide/ide-cd.h | 2 +- drivers/ide/ide-gd.c | 26 +++++++++++++++++--------- drivers/ide/ide-gd.h | 2 +- drivers/ide/ide-tape.c | 29 +++++++++++++++++++---------- include/linux/ide.h | 2 +- 6 files changed, 57 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 690475b834de..ddfbea41d296 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -55,7 +55,7 @@ static DEFINE_MUTEX(idecd_ref_mutex); -static void ide_cd_release(struct kref *); +static void ide_cd_release(struct device *); static struct cdrom_info *ide_cd_get(struct gendisk *disk) { @@ -67,7 +67,7 @@ static struct cdrom_info *ide_cd_get(struct gendisk *disk) if (ide_device_get(cd->drive)) cd = NULL; else - kref_get(&cd->kref); + get_device(&cd->dev); } mutex_unlock(&idecd_ref_mutex); @@ -79,7 +79,7 @@ static void ide_cd_put(struct cdrom_info *cd) ide_drive_t *drive = cd->drive; mutex_lock(&idecd_ref_mutex); - kref_put(&cd->kref, ide_cd_release); + put_device(&cd->dev); ide_device_put(drive); mutex_unlock(&idecd_ref_mutex); } @@ -1798,15 +1798,17 @@ static void ide_cd_remove(ide_drive_t *drive) ide_debug_log(IDE_DBG_FUNC, "Call %s\n", __func__); ide_proc_unregister_driver(drive, info->driver); - + device_del(&info->dev); del_gendisk(info->disk); - ide_cd_put(info); + mutex_lock(&idecd_ref_mutex); + put_device(&info->dev); + mutex_unlock(&idecd_ref_mutex); } -static void ide_cd_release(struct kref *kref) +static void ide_cd_release(struct device *dev) { - struct cdrom_info *info = to_ide_drv(kref, cdrom_info); + struct cdrom_info *info = to_ide_drv(dev, cdrom_info); struct cdrom_device_info *devinfo = &info->devinfo; ide_drive_t *drive = info->drive; struct gendisk *g = info->disk; @@ -2005,7 +2007,12 @@ static int ide_cd_probe(ide_drive_t *drive) ide_init_disk(g, drive); - kref_init(&info->kref); + info->dev.parent = &drive->gendev; + info->dev.release = ide_cd_release; + dev_set_name(&info->dev, dev_name(&drive->gendev)); + + if (device_register(&info->dev)) + goto out_free_disk; info->drive = drive; info->driver = &ide_cdrom_driver; @@ -2019,7 +2026,7 @@ static int ide_cd_probe(ide_drive_t *drive) g->driverfs_dev = &drive->gendev; g->flags = GENHD_FL_CD | GENHD_FL_REMOVABLE; if (ide_cdrom_setup(drive)) { - ide_cd_release(&info->kref); + put_device(&info->dev); goto failed; } @@ -2029,6 +2036,8 @@ static int ide_cd_probe(ide_drive_t *drive) add_disk(g); return 0; +out_free_disk: + put_disk(g); out_free_cd: kfree(info); failed: diff --git a/drivers/ide/ide-cd.h b/drivers/ide/ide-cd.h index ac40d6cb90a2..c878bfcf1116 100644 --- a/drivers/ide/ide-cd.h +++ b/drivers/ide/ide-cd.h @@ -80,7 +80,7 @@ struct cdrom_info { ide_drive_t *drive; struct ide_driver *driver; struct gendisk *disk; - struct kref kref; + struct device dev; /* Buffer for table of contents. NULL if we haven't allocated a TOC buffer for this device yet. */ diff --git a/drivers/ide/ide-gd.c b/drivers/ide/ide-gd.c index 7857b209c6df..047109419902 100644 --- a/drivers/ide/ide-gd.c +++ b/drivers/ide/ide-gd.c @@ -25,7 +25,7 @@ module_param(debug_mask, ulong, 0644); static DEFINE_MUTEX(ide_disk_ref_mutex); -static void ide_disk_release(struct kref *); +static void ide_disk_release(struct device *); static struct ide_disk_obj *ide_disk_get(struct gendisk *disk) { @@ -37,7 +37,7 @@ static struct ide_disk_obj *ide_disk_get(struct gendisk *disk) if (ide_device_get(idkp->drive)) idkp = NULL; else - kref_get(&idkp->kref); + get_device(&idkp->dev); } mutex_unlock(&ide_disk_ref_mutex); return idkp; @@ -48,7 +48,7 @@ static void ide_disk_put(struct ide_disk_obj *idkp) ide_drive_t *drive = idkp->drive; mutex_lock(&ide_disk_ref_mutex); - kref_put(&idkp->kref, ide_disk_release); + put_device(&idkp->dev); ide_device_put(drive); mutex_unlock(&ide_disk_ref_mutex); } @@ -66,17 +66,18 @@ static void ide_gd_remove(ide_drive_t *drive) struct gendisk *g = idkp->disk; ide_proc_unregister_driver(drive, idkp->driver); - + device_del(&idkp->dev); del_gendisk(g); - drive->disk_ops->flush(drive); - ide_disk_put(idkp); + mutex_lock(&ide_disk_ref_mutex); + put_device(&idkp->dev); + mutex_unlock(&ide_disk_ref_mutex); } -static void ide_disk_release(struct kref *kref) +static void ide_disk_release(struct device *dev) { - struct ide_disk_obj *idkp = to_ide_drv(kref, ide_disk_obj); + struct ide_disk_obj *idkp = to_ide_drv(dev, ide_disk_obj); ide_drive_t *drive = idkp->drive; struct gendisk *g = idkp->disk; @@ -348,7 +349,12 @@ static int ide_gd_probe(ide_drive_t *drive) ide_init_disk(g, drive); - kref_init(&idkp->kref); + idkp->dev.parent = &drive->gendev; + idkp->dev.release = ide_disk_release; + dev_set_name(&idkp->dev, dev_name(&drive->gendev)); + + if (device_register(&idkp->dev)) + goto out_free_disk; idkp->drive = drive; idkp->driver = &ide_gd_driver; @@ -373,6 +379,8 @@ static int ide_gd_probe(ide_drive_t *drive) add_disk(g); return 0; +out_free_disk: + put_disk(g); out_free_idkp: kfree(idkp); failed: diff --git a/drivers/ide/ide-gd.h b/drivers/ide/ide-gd.h index a86779f0756b..b604bdd318a1 100644 --- a/drivers/ide/ide-gd.h +++ b/drivers/ide/ide-gd.h @@ -17,7 +17,7 @@ struct ide_disk_obj { ide_drive_t *drive; struct ide_driver *driver; struct gendisk *disk; - struct kref kref; + struct device dev; unsigned int openers; /* protected by BKL for now */ /* Last failed packet command */ diff --git a/drivers/ide/ide-tape.c b/drivers/ide/ide-tape.c index d7ecd3c79757..bb450a7608c2 100644 --- a/drivers/ide/ide-tape.c +++ b/drivers/ide/ide-tape.c @@ -169,7 +169,7 @@ typedef struct ide_tape_obj { ide_drive_t *drive; struct ide_driver *driver; struct gendisk *disk; - struct kref kref; + struct device dev; /* * failed_pc points to the last failed packet command, or contains @@ -267,7 +267,7 @@ static DEFINE_MUTEX(idetape_ref_mutex); static struct class *idetape_sysfs_class; -static void ide_tape_release(struct kref *); +static void ide_tape_release(struct device *); static struct ide_tape_obj *ide_tape_get(struct gendisk *disk) { @@ -279,7 +279,7 @@ static struct ide_tape_obj *ide_tape_get(struct gendisk *disk) if (ide_device_get(tape->drive)) tape = NULL; else - kref_get(&tape->kref); + get_device(&tape->dev); } mutex_unlock(&idetape_ref_mutex); return tape; @@ -290,7 +290,7 @@ static void ide_tape_put(struct ide_tape_obj *tape) ide_drive_t *drive = tape->drive; mutex_lock(&idetape_ref_mutex); - kref_put(&tape->kref, ide_tape_release); + put_device(&tape->dev); ide_device_put(drive); mutex_unlock(&idetape_ref_mutex); } @@ -308,7 +308,7 @@ static struct ide_tape_obj *ide_tape_chrdev_get(unsigned int i) mutex_lock(&idetape_ref_mutex); tape = idetape_devs[i]; if (tape) - kref_get(&tape->kref); + get_device(&tape->dev); mutex_unlock(&idetape_ref_mutex); return tape; } @@ -2256,15 +2256,17 @@ static void ide_tape_remove(ide_drive_t *drive) idetape_tape_t *tape = drive->driver_data; ide_proc_unregister_driver(drive, tape->driver); - + device_del(&tape->dev); ide_unregister_region(tape->disk); - ide_tape_put(tape); + mutex_lock(&idetape_ref_mutex); + put_device(&tape->dev); + mutex_unlock(&idetape_ref_mutex); } -static void ide_tape_release(struct kref *kref) +static void ide_tape_release(struct device *dev) { - struct ide_tape_obj *tape = to_ide_drv(kref, ide_tape_obj); + struct ide_tape_obj *tape = to_ide_drv(dev, ide_tape_obj); ide_drive_t *drive = tape->drive; struct gendisk *g = tape->disk; @@ -2407,7 +2409,12 @@ static int ide_tape_probe(ide_drive_t *drive) ide_init_disk(g, drive); - kref_init(&tape->kref); + tape->dev.parent = &drive->gendev; + tape->dev.release = ide_tape_release; + dev_set_name(&tape->dev, dev_name(&drive->gendev)); + + if (device_register(&tape->dev)) + goto out_free_disk; tape->drive = drive; tape->driver = &idetape_driver; @@ -2436,6 +2443,8 @@ static int ide_tape_probe(ide_drive_t *drive) return 0; +out_free_disk: + put_disk(g); out_free_tape: kfree(tape); failed: diff --git a/include/linux/ide.h b/include/linux/ide.h index 194da5a4b0d6..fe235b65207e 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -663,7 +663,7 @@ typedef struct ide_drive_s ide_drive_t; #define to_ide_device(dev) container_of(dev, ide_drive_t, gendev) #define to_ide_drv(obj, cont_type) \ - container_of(obj, struct cont_type, kref) + container_of(obj, struct cont_type, dev) #define ide_drv_g(disk, cont_type) \ container_of((disk)->private_data, struct cont_type, driver) -- cgit v1.2.3 From a682604838763981613e42015cd0e39f2989d6bb Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 25 Feb 2009 18:03:42 -0800 Subject: rcu: Teach RCU that idle task is not quiscent state at boot This patch fixes a bug located by Vegard Nossum with the aid of kmemcheck, updated based on review comments from Nick Piggin, Ingo Molnar, and Andrew Morton. And cleans up the variable-name and function-name language. ;-) The boot CPU runs in the context of its idle thread during boot-up. During this time, idle_cpu(0) will always return nonzero, which will fool Classic and Hierarchical RCU into deciding that a large chunk of the boot-up sequence is a big long quiescent state. This in turn causes RCU to prematurely end grace periods during this time. This patch changes the rcutree.c and rcuclassic.c rcu_check_callbacks() function to ignore the idle task as a quiescent state until the system has started up the scheduler in rest_init(), introducing a new non-API function rcu_idle_now_means_idle() to inform RCU of this transition. RCU maintains an internal rcu_idle_cpu_truthful variable to track this state, which is then used by rcu_check_callback() to determine if it should believe idle_cpu(). Because this patch has the effect of disallowing RCU grace periods during long stretches of the boot-up sequence, this patch also introduces Josh Triplett's UP-only optimization that makes synchronize_rcu() be a no-op if num_online_cpus() returns 1. This allows boot-time code that calls synchronize_rcu() to proceed normally. Note, however, that RCU callbacks registered by call_rcu() will likely queue up until later in the boot sequence. Although rcuclassic and rcutree can also use this same optimization after boot completes, rcupreempt must restrict its use of this optimization to the portion of the boot sequence before the scheduler starts up, given that an rcupreempt RCU read-side critical section may be preeempted. In addition, this patch takes Nick Piggin's suggestion to make the system_state global variable be __read_mostly. Changes since v4: o Changes the name of the introduced function and variable to be less emotional. ;-) Changes since v3: o WARN_ON(nr_context_switches() > 0) to verify that RCU switches out of boot-time mode before the first context switch, as suggested by Nick Piggin. Changes since v2: o Created rcu_blocking_is_gp() internal-to-RCU API that determines whether a call to synchronize_rcu() is itself a grace period. o The definition of rcu_blocking_is_gp() for rcuclassic and rcutree checks to see if but a single CPU is online. o The definition of rcu_blocking_is_gp() for rcupreempt checks to see both if but a single CPU is online and if the system is still in early boot. This allows rcupreempt to again work correctly if running on a single CPU after booting is complete. o Added check to rcupreempt's synchronize_sched() for there being but one online CPU. Tested all three variants both SMP and !SMP, booted fine, passed a short rcutorture test on both x86 and Power. Located-by: Vegard Nossum Tested-by: Vegard Nossum Tested-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/rcuclassic.h | 6 ++++++ include/linux/rcupdate.h | 4 ++++ include/linux/rcupreempt.h | 15 +++++++++++++++ include/linux/rcutree.h | 6 ++++++ init/main.c | 3 ++- kernel/rcuclassic.c | 4 ++-- kernel/rcupdate.c | 12 ++++++++++++ kernel/rcupreempt.c | 3 +++ kernel/rcutree.c | 4 ++-- 9 files changed, 52 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcuclassic.h b/include/linux/rcuclassic.h index f3f697df1d71..80044a4f3ab9 100644 --- a/include/linux/rcuclassic.h +++ b/include/linux/rcuclassic.h @@ -181,4 +181,10 @@ extern long rcu_batches_completed_bh(void); #define rcu_enter_nohz() do { } while (0) #define rcu_exit_nohz() do { } while (0) +/* A context switch is a grace period for rcuclassic. */ +static inline int rcu_blocking_is_gp(void) +{ + return num_online_cpus() == 1; +} + #endif /* __LINUX_RCUCLASSIC_H */ diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 921340a7b71c..528343e6da51 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -52,6 +52,9 @@ struct rcu_head { void (*func)(struct rcu_head *head); }; +/* Internal to kernel, but needed by rcupreempt.h. */ +extern int rcu_scheduler_active; + #if defined(CONFIG_CLASSIC_RCU) #include #elif defined(CONFIG_TREE_RCU) @@ -265,6 +268,7 @@ extern void rcu_barrier_sched(void); /* Internal to kernel */ extern void rcu_init(void); +extern void rcu_scheduler_starting(void); extern int rcu_needs_cpu(int cpu); #endif /* __LINUX_RCUPDATE_H */ diff --git a/include/linux/rcupreempt.h b/include/linux/rcupreempt.h index 3e05c09b54a2..74304b4538d8 100644 --- a/include/linux/rcupreempt.h +++ b/include/linux/rcupreempt.h @@ -142,4 +142,19 @@ static inline void rcu_exit_nohz(void) #define rcu_exit_nohz() do { } while (0) #endif /* CONFIG_NO_HZ */ +/* + * A context switch is a grace period for rcupreempt synchronize_rcu() + * only during early boot, before the scheduler has been initialized. + * So, how the heck do we get a context switch? Well, if the caller + * invokes synchronize_rcu(), they are willing to accept a context + * switch, so we simply pretend that one happened. + * + * After boot, there might be a blocked or preempted task in an RCU + * read-side critical section, so we cannot then take the fastpath. + */ +static inline int rcu_blocking_is_gp(void) +{ + return num_online_cpus() == 1 && !rcu_scheduler_active; +} + #endif /* __LINUX_RCUPREEMPT_H */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index d4368b7975c3..a722fb67bb2d 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -326,4 +326,10 @@ static inline void rcu_exit_nohz(void) } #endif /* CONFIG_NO_HZ */ +/* A context switch is a grace period for rcutree. */ +static inline int rcu_blocking_is_gp(void) +{ + return num_online_cpus() == 1; +} + #endif /* __LINUX_RCUTREE_H */ diff --git a/init/main.c b/init/main.c index 844209453c02..83697e160b3a 100644 --- a/init/main.c +++ b/init/main.c @@ -97,7 +97,7 @@ static inline void mark_rodata_ro(void) { } extern void tc_init(void); #endif -enum system_states system_state; +enum system_states system_state __read_mostly; EXPORT_SYMBOL(system_state); /* @@ -463,6 +463,7 @@ static noinline void __init_refok rest_init(void) * at least once to get things moving: */ init_idle_bootup_task(current); + rcu_scheduler_starting(); preempt_enable_no_resched(); schedule(); preempt_disable(); diff --git a/kernel/rcuclassic.c b/kernel/rcuclassic.c index bd5a9003497c..654c640a6b9c 100644 --- a/kernel/rcuclassic.c +++ b/kernel/rcuclassic.c @@ -679,8 +679,8 @@ int rcu_needs_cpu(int cpu) void rcu_check_callbacks(int cpu, int user) { if (user || - (idle_cpu(cpu) && !in_softirq() && - hardirq_count() <= (1 << HARDIRQ_SHIFT))) { + (idle_cpu(cpu) && rcu_scheduler_active && + !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { /* * Get here if this CPU took its interrupt from user diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c index d92a76a881aa..cae8a059cf47 100644 --- a/kernel/rcupdate.c +++ b/kernel/rcupdate.c @@ -44,6 +44,7 @@ #include #include #include +#include enum rcu_barrier { RCU_BARRIER_STD, @@ -55,6 +56,7 @@ static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; static atomic_t rcu_barrier_cpu_count; static DEFINE_MUTEX(rcu_barrier_mutex); static struct completion rcu_barrier_completion; +int rcu_scheduler_active __read_mostly; /* * Awaken the corresponding synchronize_rcu() instance now that a @@ -80,6 +82,10 @@ void wakeme_after_rcu(struct rcu_head *head) void synchronize_rcu(void) { struct rcu_synchronize rcu; + + if (rcu_blocking_is_gp()) + return; + init_completion(&rcu.completion); /* Will wake me after RCU finished. */ call_rcu(&rcu.head, wakeme_after_rcu); @@ -175,3 +181,9 @@ void __init rcu_init(void) __rcu_init(); } +void rcu_scheduler_starting(void) +{ + WARN_ON(num_online_cpus() != 1); + WARN_ON(nr_context_switches() > 0); + rcu_scheduler_active = 1; +} diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index 33cfc50781f9..5d59e850fb71 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c @@ -1181,6 +1181,9 @@ void __synchronize_sched(void) { struct rcu_synchronize rcu; + if (num_online_cpus() == 1) + return; /* blocking is gp if only one CPU! */ + init_completion(&rcu.completion); /* Will wake me after RCU finished. */ call_rcu_sched(&rcu.head, wakeme_after_rcu); diff --git a/kernel/rcutree.c b/kernel/rcutree.c index b2fd602a6f6f..97ce31579ec0 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -948,8 +948,8 @@ static void rcu_do_batch(struct rcu_data *rdp) void rcu_check_callbacks(int cpu, int user) { if (user || - (idle_cpu(cpu) && !in_softirq() && - hardirq_count() <= (1 << HARDIRQ_SHIFT))) { + (idle_cpu(cpu) && rcu_scheduler_active && + !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { /* * Get here if this CPU took its interrupt from user -- cgit v1.2.3 From 1e42807918d17e8c93bf14fbb74be84b141334c1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 23 Feb 2009 09:03:10 +0100 Subject: block: reduce stack footprint of blk_recount_segments() blk_recalc_rq_segments() requires a request structure passed in, which we don't have from blk_recount_segments(). So the latter allocates one on the stack, using > 400 bytes of stack for that. This can cause us to spill over one page of stack from ext4 at least: 0) 4560 400 blk_recount_segments+0x43/0x62 1) 4160 32 bio_phys_segments+0x1c/0x24 2) 4128 32 blk_rq_bio_prep+0x2a/0xf9 3) 4096 32 init_request_from_bio+0xf9/0xfe 4) 4064 112 __make_request+0x33c/0x3f6 5) 3952 144 generic_make_request+0x2d1/0x321 6) 3808 64 submit_bio+0xb9/0xc3 7) 3744 48 submit_bh+0xea/0x10e 8) 3696 368 ext4_mb_init_cache+0x257/0xa6a [ext4] 9) 3328 288 ext4_mb_regular_allocator+0x421/0xcd9 [ext4] 10) 3040 160 ext4_mb_new_blocks+0x211/0x4b4 [ext4] 11) 2880 336 ext4_ext_get_blocks+0xb61/0xd45 [ext4] 12) 2544 96 ext4_get_blocks_wrap+0xf2/0x200 [ext4] 13) 2448 80 ext4_da_get_block_write+0x6e/0x16b [ext4] 14) 2368 352 mpage_da_map_blocks+0x7e/0x4b3 [ext4] 15) 2016 352 ext4_da_writepages+0x2ce/0x43c [ext4] 16) 1664 32 do_writepages+0x2d/0x3c 17) 1632 144 __writeback_single_inode+0x162/0x2cd 18) 1488 96 generic_sync_sb_inodes+0x1e3/0x32b 19) 1392 16 sync_sb_inodes+0xe/0x10 20) 1376 48 writeback_inodes+0x69/0xb3 21) 1328 208 balance_dirty_pages_ratelimited_nr+0x187/0x2f9 22) 1120 224 generic_file_buffered_write+0x1d4/0x2c4 23) 896 176 __generic_file_aio_write_nolock+0x35f/0x393 24) 720 80 generic_file_aio_write+0x6c/0xc8 25) 640 80 ext4_file_write+0xa9/0x137 [ext4] 26) 560 320 do_sync_write+0xf0/0x137 27) 240 48 vfs_write+0xb3/0x13c 28) 192 64 sys_write+0x4c/0x74 29) 128 128 system_call_fastpath+0x16/0x1b Split the segment counting out into a __blk_recalc_rq_segments() helper to avoid allocating an onstack request just for checking the physical segment count. Signed-off-by: Jens Axboe --- block/blk-merge.c | 94 ++++++++++++++++++++++++++++---------------------- include/linux/blkdev.h | 2 ++ 2 files changed, 55 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/block/blk-merge.c b/block/blk-merge.c index b92f5b0866b0..a104593e70c3 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -38,72 +38,84 @@ void blk_recalc_rq_sectors(struct request *rq, int nsect) } } -void blk_recalc_rq_segments(struct request *rq) +static unsigned int __blk_recalc_rq_segments(struct request_queue *q, + struct bio *bio, + unsigned int *seg_size_ptr) { - int nr_phys_segs; unsigned int phys_size; struct bio_vec *bv, *bvprv = NULL; - int seg_size; - int cluster; - struct req_iterator iter; - int high, highprv = 1; - struct request_queue *q = rq->q; + int cluster, i, high, highprv = 1; + unsigned int seg_size, nr_phys_segs; + struct bio *fbio; - if (!rq->bio) - return; + if (!bio) + return 0; + fbio = bio; cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); seg_size = 0; phys_size = nr_phys_segs = 0; - rq_for_each_segment(bv, rq, iter) { - /* - * the trick here is making sure that a high page is never - * considered part of another segment, since that might - * change with the bounce page. - */ - high = page_to_pfn(bv->bv_page) > q->bounce_pfn; - if (high || highprv) - goto new_segment; - if (cluster) { - if (seg_size + bv->bv_len > q->max_segment_size) - goto new_segment; - if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv)) - goto new_segment; - if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv)) + for_each_bio(bio) { + bio_for_each_segment(bv, bio, i) { + /* + * the trick here is making sure that a high page is + * never considered part of another segment, since that + * might change with the bounce page. + */ + high = page_to_pfn(bv->bv_page) > q->bounce_pfn; + if (high || highprv) goto new_segment; + if (cluster) { + if (seg_size + bv->bv_len > q->max_segment_size) + goto new_segment; + if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv)) + goto new_segment; + if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv)) + goto new_segment; + + seg_size += bv->bv_len; + bvprv = bv; + continue; + } +new_segment: + if (nr_phys_segs == 1 && seg_size > + fbio->bi_seg_front_size) + fbio->bi_seg_front_size = seg_size; - seg_size += bv->bv_len; + nr_phys_segs++; bvprv = bv; - continue; + seg_size = bv->bv_len; + highprv = high; } -new_segment: - if (nr_phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size) - rq->bio->bi_seg_front_size = seg_size; - - nr_phys_segs++; - bvprv = bv; - seg_size = bv->bv_len; - highprv = high; } - if (nr_phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size) + if (seg_size_ptr) + *seg_size_ptr = seg_size; + + return nr_phys_segs; +} + +void blk_recalc_rq_segments(struct request *rq) +{ + unsigned int seg_size = 0, phys_segs; + + phys_segs = __blk_recalc_rq_segments(rq->q, rq->bio, &seg_size); + + if (phys_segs == 1 && seg_size > rq->bio->bi_seg_front_size) rq->bio->bi_seg_front_size = seg_size; if (seg_size > rq->biotail->bi_seg_back_size) rq->biotail->bi_seg_back_size = seg_size; - rq->nr_phys_segments = nr_phys_segs; + rq->nr_phys_segments = phys_segs; } void blk_recount_segments(struct request_queue *q, struct bio *bio) { - struct request rq; struct bio *nxt = bio->bi_next; - rq.q = q; - rq.bio = rq.biotail = bio; + bio->bi_next = NULL; - blk_recalc_rq_segments(&rq); + bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio, NULL); bio->bi_next = nxt; - bio->bi_phys_segments = rq.nr_phys_segments; bio->bi_flags |= (1 << BIO_SEG_VALID); } EXPORT_SYMBOL(blk_recount_segments); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index dcaa0fd84b02..465d6babc847 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -708,6 +708,8 @@ struct req_iterator { }; /* This should not be used directly - use rq_for_each_segment */ +#define for_each_bio(_bio) \ + for (; _bio; _bio = _bio->bi_next) #define __rq_for_each_bio(_bio, rq) \ if ((rq->bio)) \ for (_bio = (rq)->bio; _bio; _bio = _bio->bi_next) -- cgit v1.2.3 From 54e991242850edc8c53f71fa5aa3ba7a93ce38f5 Mon Sep 17 00:00:00 2001 From: Dhaval Giani Date: Fri, 27 Feb 2009 15:13:54 +0530 Subject: sched: don't allow setuid to succeed if the user does not have rt bandwidth Impact: fix hung task with certain (non-default) rt-limit settings Corey Hickey reported that on using setuid to change the uid of a rt process, the process would be unkillable and not be running. This is because there was no rt runtime for that user group. Add in a check to see if a user can attach an rt task to its task group. On failure, return EINVAL, which is also returned in CONFIG_CGROUP_SCHED. Reported-by: Corey Hickey Signed-off-by: Dhaval Giani Acked-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++++ kernel/sched.c | 13 +++++++++++-- kernel/sys.c | 31 ++++++++++++++++++++----------- kernel/user.c | 18 ++++++++++++++++++ 4 files changed, 53 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 8981e52c714f..8c216e057c94 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2291,9 +2291,13 @@ extern long sched_group_rt_runtime(struct task_group *tg); extern int sched_group_set_rt_period(struct task_group *tg, long rt_period_us); extern long sched_group_rt_period(struct task_group *tg); +extern int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk); #endif #endif +extern int task_can_switch_user(struct user_struct *up, + struct task_struct *tsk); + #ifdef CONFIG_TASK_XACCT static inline void add_rchar(struct task_struct *tsk, ssize_t amt) { diff --git a/kernel/sched.c b/kernel/sched.c index c3baa9653d1d..8e2558c2ba67 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -9224,6 +9224,16 @@ static int sched_rt_global_constraints(void) return ret; } + +int sched_rt_can_attach(struct task_group *tg, struct task_struct *tsk) +{ + /* Don't accept realtime tasks when there is no way for them to run */ + if (rt_task(tsk) && tg->rt_bandwidth.rt_runtime == 0) + return 0; + + return 1; +} + #else /* !CONFIG_RT_GROUP_SCHED */ static int sched_rt_global_constraints(void) { @@ -9317,8 +9327,7 @@ cpu_cgroup_can_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, struct task_struct *tsk) { #ifdef CONFIG_RT_GROUP_SCHED - /* Don't accept realtime tasks when there is no way for them to run */ - if (rt_task(tsk) && cgroup_tg(cgrp)->rt_bandwidth.rt_runtime == 0) + if (!sched_rt_can_attach(cgroup_tg(cgrp), tsk)) return -EINVAL; #else /* We don't support RT-tasks being in separate groups */ diff --git a/kernel/sys.c b/kernel/sys.c index f145c415bc16..37f458e6882a 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -559,7 +559,7 @@ error: abort_creds(new); return retval; } - + /* * change the user struct in a credentials set to match the new UID */ @@ -571,6 +571,11 @@ static int set_user(struct cred *new) if (!new_user) return -EAGAIN; + if (!task_can_switch_user(new_user, current)) { + free_uid(new_user); + return -EINVAL; + } + if (atomic_read(&new_user->processes) >= current->signal->rlim[RLIMIT_NPROC].rlim_cur && new_user != INIT_USER) { @@ -631,10 +636,11 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid) goto error; } - retval = -EAGAIN; - if (new->uid != old->uid && set_user(new) < 0) - goto error; - + if (new->uid != old->uid) { + retval = set_user(new); + if (retval < 0) + goto error; + } if (ruid != (uid_t) -1 || (euid != (uid_t) -1 && euid != old->uid)) new->suid = new->euid; @@ -680,9 +686,10 @@ SYSCALL_DEFINE1(setuid, uid_t, uid) retval = -EPERM; if (capable(CAP_SETUID)) { new->suid = new->uid = uid; - if (uid != old->uid && set_user(new) < 0) { - retval = -EAGAIN; - goto error; + if (uid != old->uid) { + retval = set_user(new); + if (retval < 0) + goto error; } } else if (uid != old->uid && uid != new->suid) { goto error; @@ -734,11 +741,13 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid) goto error; } - retval = -EAGAIN; if (ruid != (uid_t) -1) { new->uid = ruid; - if (ruid != old->uid && set_user(new) < 0) - goto error; + if (ruid != old->uid) { + retval = set_user(new); + if (retval < 0) + goto error; + } } if (euid != (uid_t) -1) new->euid = euid; diff --git a/kernel/user.c b/kernel/user.c index 3551ac742395..6a9b696128c8 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -362,6 +362,24 @@ static void free_user(struct user_struct *up, unsigned long flags) #endif +#if defined(CONFIG_RT_GROUP_SCHED) && defined(CONFIG_USER_SCHED) +/* + * We need to check if a setuid can take place. This function should be called + * before successfully completing the setuid. + */ +int task_can_switch_user(struct user_struct *up, struct task_struct *tsk) +{ + + return sched_rt_can_attach(up->tg, tsk); + +} +#else +int task_can_switch_user(struct user_struct *up, struct task_struct *tsk) +{ + return 1; +} +#endif + /* * Locate the user_struct for the passed UID. If found, take a ref on it. The * caller must undo that ref with free_uid(). -- cgit v1.2.3 From 5170836679185357dc1b7660bad13287b39e1e33 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 27 Feb 2009 14:03:03 -0800 Subject: Fix recursive lock in free_uid()/free_user_ns() free_uid() and free_user_ns() are corecursive when CONFIG_USER_SCHED=n, but free_user_ns() is called from free_uid() by way of uid_hash_remove(), which requires uidhash_lock to be held. free_user_ns() then calls free_uid() to complete the destruction. Fix this by deferring the destruction of the user_namespace. Signed-off-by: David Howells Acked-by: Serge Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/user_namespace.h | 1 + kernel/user_namespace.c | 21 +++++++++++++++++---- 2 files changed, 18 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 315bcd375224..cc4f45361dbb 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -13,6 +13,7 @@ struct user_namespace { struct kref kref; struct hlist_head uidhash_table[UIDHASH_SZ]; struct user_struct *creator; + struct work_struct destroyer; }; extern struct user_namespace init_user_ns; diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 79084311ee57..076c7c8215b0 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -60,12 +60,25 @@ int create_user_ns(struct cred *new) return 0; } -void free_user_ns(struct kref *kref) +/* + * Deferred destructor for a user namespace. This is required because + * free_user_ns() may be called with uidhash_lock held, but we need to call + * back to free_uid() which will want to take the lock again. + */ +static void free_user_ns_work(struct work_struct *work) { - struct user_namespace *ns; - - ns = container_of(kref, struct user_namespace, kref); + struct user_namespace *ns = + container_of(work, struct user_namespace, destroyer); free_uid(ns->creator); kfree(ns); } + +void free_user_ns(struct kref *kref) +{ + struct user_namespace *ns = + container_of(kref, struct user_namespace, kref); + + INIT_WORK(&ns->destroyer, free_user_ns_work); + schedule_work(&ns->destroyer); +} EXPORT_SYMBOL(free_user_ns); -- cgit v1.2.3 From 5c2522218059ca1f4174a568923b988aad3ddfda Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Fri, 27 Feb 2009 10:01:36 +0000 Subject: net headers: cleanup dcbnl.h 1) add an include for 2) change dcbmsg.dcb_family from unsigned char to __u8 to be more consistent with use of kernel types Signed-off-by: Chris Leech Acked-by: Sam Ravnborg Signed-off-by: David S. Miller --- include/linux/dcbnl.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h index b0ef274e0031..7d2e10006188 100644 --- a/include/linux/dcbnl.h +++ b/include/linux/dcbnl.h @@ -20,10 +20,12 @@ #ifndef __LINUX_DCBNL_H__ #define __LINUX_DCBNL_H__ +#include + #define DCB_PROTO_VERSION 1 struct dcbmsg { - unsigned char dcb_family; + __u8 dcb_family; __u8 cmd; __u16 dcb_pad; }; -- cgit v1.2.3 From 709ab3261e3ed789c0bb31c6ab53c9eccb276522 Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Fri, 27 Feb 2009 10:01:42 +0000 Subject: net headers: export dcbnl.h The DCB netlink interface is required for building the userspace tools available at e1000.sourceforge.net Signed-off-by: Chris Leech Signed-off-by: David S. Miller --- include/linux/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/Kbuild b/include/linux/Kbuild index b97cdc516a8f..106c3ba50844 100644 --- a/include/linux/Kbuild +++ b/include/linux/Kbuild @@ -52,6 +52,7 @@ header-y += const.h header-y += cgroupstats.h header-y += cramfs_fs.h header-y += cycx_cfm.h +header-y += dcbnl.h header-y += dlmconstants.h header-y += dlm_device.h header-y += dlm_netlink.h -- cgit v1.2.3 From 5ce04e3de8c36ba37c56e94e3c4dc7973c7f546c Mon Sep 17 00:00:00 2001 From: "Pallipadi, Venkatesh" Date: Sun, 1 Mar 2009 08:53:27 -0800 Subject: fix warning in io_mapping_map_wc() Signed-off-by: Venkatesh Pallipadi Signed-off-by: Ingo Molnar --- include/linux/io-mapping.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index cbc2f0cd631b..0adb0f91568c 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -91,8 +91,11 @@ io_mapping_unmap_atomic(void *vaddr) static inline void * io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) { + resource_size_t phys_addr; + BUG_ON(offset >= mapping->size); - resource_size_t phys_addr = mapping->base + offset; + phys_addr = mapping->base + offset; + return ioremap_wc(phys_addr, PAGE_SIZE); } -- cgit v1.2.3