From 5dd531a03ad721b41911ddb32e6e0481404e7aaf Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 23 Aug 2010 13:52:19 +0200 Subject: block: add function call to switch the IO scheduler from a driver Currently drivers must do an elevator_exit() + elevator_init() to switch IO schedulers. There are a few problems with this: - Since commit 1abec4fdbb142e3ccb6ce99832fae42129134a96, elevator_init() requires a zeroed out q->elevator pointer. The two existing in-kernel users don't do that. - It will only work at initialization time, since using the above two-staged construct does not properly quisce the queue. So add elevator_change() which takes care of this, and convert the elv_iosched_store() sysfs interface to use this helper as well. Reported-by: Peter Oberparleiter Reported-by: Kevin Vigor Signed-off-by: Jens Axboe --- include/linux/elevator.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 2c958f4fce1e..926b50322a46 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -136,6 +136,7 @@ extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t); extern int elevator_init(struct request_queue *, char *); extern void elevator_exit(struct elevator_queue *); +extern int elevator_change(struct request_queue *, const char *); extern int elv_rq_merge_ok(struct request *, struct bio *); /* -- cgit v1.2.3 From 4e4438b86527e8bf1f49503a30d487e401e64f9c Mon Sep 17 00:00:00 2001 From: Anton Vorontsov Date: Wed, 1 Sep 2010 08:55:24 -0600 Subject: gpiolib: Add 'struct gpio_chip' forward declaration for !GPIOLIB case With CONFIG_GPIOLIB=n, the 'struct gpio_chip' is not declared, so the following pops up on PowerPC: cc1: warnings being treated as errors In file included from arch/powerpc/platforms/52xx/mpc52xx_common.c:19: include/linux/of_gpio.h:74: warning: 'struct gpio_chip' declared inside parameter list include/linux/of_gpio.h:74: warning: its scope is only this definition or declaration, which is probably not what you want include/linux/of_gpio.h:75: warning: 'struct gpio_chip' declared inside parameter list make[2]: *** [arch/powerpc/platforms/52xx/mpc52xx_common.o] Error 1 This patch fixes the issue by providing the proper forward declaration. Signed-off-by: Anton Vorontsov Signed-off-by: Grant Likely --- include/linux/gpio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/gpio.h b/include/linux/gpio.h index 03f616b78cfa..e41f7dd1ae67 100644 --- a/include/linux/gpio.h +++ b/include/linux/gpio.h @@ -13,6 +13,7 @@ #include struct device; +struct gpio_chip; /* * Some platforms don't support the GPIO programming interface. -- cgit v1.2.3 From ef5dc121d5a0bb1fa477c5395277259f07d318a3 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 2 Sep 2010 15:48:16 -0700 Subject: mutex: Fix annotations to include it in kernel-locking docbook Fix kernel-doc notation in linux/mutex.h and kernel/mutex.c, then add these 2 files to the kernel-locking docbook as the Mutex API reference chapter. Add one API function to mutex-design.txt and correct a typo in that file. Signed-off-by: Randy Dunlap Cc: Rusty Russell LKML-Reference: <20100902154816.6cc2f9ad.randy.dunlap@oracle.com> Signed-off-by: Ingo Molnar --- Documentation/DocBook/kernel-locking.tmpl | 6 ++++++ Documentation/mutex-design.txt | 3 ++- include/linux/mutex.h | 8 ++++++++ kernel/mutex.c | 23 +++++++---------------- 4 files changed, 23 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl index 0b1a3f97f285..a0d479d1e1dd 100644 --- a/Documentation/DocBook/kernel-locking.tmpl +++ b/Documentation/DocBook/kernel-locking.tmpl @@ -1961,6 +1961,12 @@ machines due to caching. + + Mutex API reference +!Iinclude/linux/mutex.h +!Ekernel/mutex.c + + Further reading diff --git a/Documentation/mutex-design.txt b/Documentation/mutex-design.txt index c91ccc0720fa..38c10fd7f411 100644 --- a/Documentation/mutex-design.txt +++ b/Documentation/mutex-design.txt @@ -9,7 +9,7 @@ firstly, there's nothing wrong with semaphores. But if the simpler mutex semantics are sufficient for your code, then there are a couple of advantages of mutexes: - - 'struct mutex' is smaller on most architectures: .e.g on x86, + - 'struct mutex' is smaller on most architectures: E.g. on x86, 'struct semaphore' is 20 bytes, 'struct mutex' is 16 bytes. A smaller structure size means less RAM footprint, and better CPU-cache utilization. @@ -136,3 +136,4 @@ the APIs of 'struct mutex' have been streamlined: void mutex_lock_nested(struct mutex *lock, unsigned int subclass); int mutex_lock_interruptible_nested(struct mutex *lock, unsigned int subclass); + int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock); diff --git a/include/linux/mutex.h b/include/linux/mutex.h index 878cab4f5fcc..f363bc8fdc74 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -78,6 +78,14 @@ struct mutex_waiter { # include #else # define __DEBUG_MUTEX_INITIALIZER(lockname) +/** + * mutex_init - initialize the mutex + * @mutex: the mutex to be initialized + * + * Initialize the mutex to unlocked state. + * + * It is not allowed to initialize an already locked mutex. + */ # define mutex_init(mutex) \ do { \ static struct lock_class_key __key; \ diff --git a/kernel/mutex.c b/kernel/mutex.c index 4c0b7b3e6d2e..200407c1502f 100644 --- a/kernel/mutex.c +++ b/kernel/mutex.c @@ -36,15 +36,6 @@ # include #endif -/*** - * mutex_init - initialize the mutex - * @lock: the mutex to be initialized - * @key: the lock_class_key for the class; used by mutex lock debugging - * - * Initialize the mutex to unlocked state. - * - * It is not allowed to initialize an already locked mutex. - */ void __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) { @@ -68,7 +59,7 @@ EXPORT_SYMBOL(__mutex_init); static __used noinline void __sched __mutex_lock_slowpath(atomic_t *lock_count); -/*** +/** * mutex_lock - acquire the mutex * @lock: the mutex to be acquired * @@ -105,7 +96,7 @@ EXPORT_SYMBOL(mutex_lock); static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count); -/*** +/** * mutex_unlock - release the mutex * @lock: the mutex to be released * @@ -364,8 +355,8 @@ __mutex_lock_killable_slowpath(atomic_t *lock_count); static noinline int __sched __mutex_lock_interruptible_slowpath(atomic_t *lock_count); -/*** - * mutex_lock_interruptible - acquire the mutex, interruptable +/** + * mutex_lock_interruptible - acquire the mutex, interruptible * @lock: the mutex to be acquired * * Lock the mutex like mutex_lock(), and return 0 if the mutex has @@ -456,15 +447,15 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count) return prev == 1; } -/*** - * mutex_trylock - try acquire the mutex, without waiting +/** + * mutex_trylock - try to acquire the mutex, without waiting * @lock: the mutex to be acquired * * Try to acquire the mutex atomically. Returns 1 if the mutex * has been acquired successfully, and 0 on contention. * * NOTE: this function follows the spin_trylock() convention, so - * it is negated to the down_trylock() return values! Be careful + * it is negated from the down_trylock() return values! Be careful * about this when converting semaphore users to mutexes. * * This function must not be used in interrupt context. The -- cgit v1.2.3 From 29bc17ecb856ffb2b47c7009a71971c6f9334205 Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Sat, 4 Sep 2010 22:56:44 +0200 Subject: io-mapping: Fix the address space annotations Fixes a bunch of sparse warnings in io-mapping.h because of the inconsistent __iomem usage. Signed-off-by: Francisco Jerez LKML-Reference: <1283633804-11749-2-git-send-email-currojerez@riseup.net> Cc: Andrew Morton Signed-off-by: Ingo Molnar --- include/linux/io-mapping.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h index 0a6b3d5c490c..7fb592793738 100644 --- a/include/linux/io-mapping.h +++ b/include/linux/io-mapping.h @@ -79,7 +79,7 @@ io_mapping_free(struct io_mapping *mapping) } /* Atomic map/unmap */ -static inline void * +static inline void __iomem * io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset, int slot) @@ -94,12 +94,12 @@ io_mapping_map_atomic_wc(struct io_mapping *mapping, } static inline void -io_mapping_unmap_atomic(void *vaddr, int slot) +io_mapping_unmap_atomic(void __iomem *vaddr, int slot) { iounmap_atomic(vaddr, slot); } -static inline void * +static inline void __iomem * io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) { resource_size_t phys_addr; @@ -111,7 +111,7 @@ io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) } static inline void -io_mapping_unmap(void *vaddr) +io_mapping_unmap(void __iomem *vaddr) { iounmap(vaddr); } @@ -125,38 +125,38 @@ struct io_mapping; static inline struct io_mapping * io_mapping_create_wc(resource_size_t base, unsigned long size) { - return (struct io_mapping *) ioremap_wc(base, size); + return (struct io_mapping __force *) ioremap_wc(base, size); } static inline void io_mapping_free(struct io_mapping *mapping) { - iounmap(mapping); + iounmap((void __force __iomem *) mapping); } /* Atomic map/unmap */ -static inline void * +static inline void __iomem * io_mapping_map_atomic_wc(struct io_mapping *mapping, unsigned long offset, int slot) { - return ((char *) mapping) + offset; + return ((char __force __iomem *) mapping) + offset; } static inline void -io_mapping_unmap_atomic(void *vaddr, int slot) +io_mapping_unmap_atomic(void __iomem *vaddr, int slot) { } /* Non-atomic map/unmap */ -static inline void * +static inline void __iomem * io_mapping_map_wc(struct io_mapping *mapping, unsigned long offset) { - return ((char *) mapping) + offset; + return ((char __force __iomem *) mapping) + offset; } static inline void -io_mapping_unmap(void *vaddr) +io_mapping_unmap(void __iomem *vaddr) { } -- cgit v1.2.3 From febc88c5948f81114f64c3412011d695aecae233 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 7 Sep 2010 14:46:37 +0200 Subject: semaphore: Add DEFINE_SEMAPHORE The full cleanup of init_MUTEX[_LOCKED] and DECLARE_MUTEX has not been done. Some of the users are real semaphores and we should name them as such instead of confusing everyone with "MUTEX". Provide the infrastructure to get finally rid of init_MUTEX[_LOCKED] and DECLARE_MUTEX. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Christoph Hellwig LKML-Reference: <20100907125054.795929962@linutronix.de> --- include/linux/semaphore.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h index 7415839ac890..5310d27abd2a 100644 --- a/include/linux/semaphore.h +++ b/include/linux/semaphore.h @@ -26,6 +26,9 @@ struct semaphore { .wait_list = LIST_HEAD_INIT((name).wait_list), \ } +#define DEFINE_SEMAPHORE(name) \ + struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1) + #define DECLARE_MUTEX(name) \ struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1) -- cgit v1.2.3 From e3e55ff5854655d8723ad8b307f02515aecc3df5 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Tue, 7 Sep 2010 15:52:06 +0800 Subject: spi/dw_spi: clean the cs_control code commit 052dc7c45i "spi/dw_spi: conditional transfer mode change" introduced cs_control code, which has a bug by using bit offset for spi mode to set transfer mode in control register. Also it forces devices who don't need cs_control to re-configure the control registers for each spi transfer. This patch will fix them Signed-off-by: Feng Tang Signed-off-by: Grant Likely --- drivers/spi/dw_spi.c | 17 +++++------------ include/linux/spi/dw_spi.h | 2 ++ 2 files changed, 7 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/spi/dw_spi.c b/drivers/spi/dw_spi.c index 11fbbf6fb07b..56247853c298 100644 --- a/drivers/spi/dw_spi.c +++ b/drivers/spi/dw_spi.c @@ -181,10 +181,6 @@ static void flush(struct dw_spi *dws) wait_till_not_busy(dws); } -static void null_cs_control(u32 command) -{ -} - static int null_writer(struct dw_spi *dws) { u8 n_bytes = dws->n_bytes; @@ -322,7 +318,7 @@ static void giveback(struct dw_spi *dws) struct spi_transfer, transfer_list); - if (!last_transfer->cs_change) + if (!last_transfer->cs_change && dws->cs_control) dws->cs_control(MRST_SPI_DEASSERT); msg->state = NULL; @@ -549,13 +545,13 @@ static void pump_transfers(unsigned long data) */ if (dws->cs_control) { if (dws->rx && dws->tx) - chip->tmode = 0x00; + chip->tmode = SPI_TMOD_TR; else if (dws->rx) - chip->tmode = 0x02; + chip->tmode = SPI_TMOD_RO; else - chip->tmode = 0x01; + chip->tmode = SPI_TMOD_TO; - cr0 &= ~(0x3 << SPI_MODE_OFFSET); + cr0 &= ~SPI_TMOD_MASK; cr0 |= (chip->tmode << SPI_TMOD_OFFSET); } @@ -704,9 +700,6 @@ static int dw_spi_setup(struct spi_device *spi) chip = kzalloc(sizeof(struct chip_data), GFP_KERNEL); if (!chip) return -ENOMEM; - - chip->cs_control = null_cs_control; - chip->enable_dma = 0; } /* diff --git a/include/linux/spi/dw_spi.h b/include/linux/spi/dw_spi.h index cc813f95a2f2..c91302f3a257 100644 --- a/include/linux/spi/dw_spi.h +++ b/include/linux/spi/dw_spi.h @@ -14,7 +14,9 @@ #define SPI_MODE_OFFSET 6 #define SPI_SCPH_OFFSET 6 #define SPI_SCOL_OFFSET 7 + #define SPI_TMOD_OFFSET 8 +#define SPI_TMOD_MASK (0x3 << SPI_TMOD_OFFSET) #define SPI_TMOD_TR 0x0 /* xmit & recv */ #define SPI_TMOD_TO 0x1 /* xmit only */ #define SPI_TMOD_RO 0x2 /* recv only */ -- cgit v1.2.3 From d530148ae8bffe1b33f50d1776d185a6e85dc774 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 20 Aug 2010 16:49:43 +0800 Subject: dquot: do full inode dirty in allocating space Alex Shi found a regression when doing ffsb test. The test has several threads, and each thread creates a small file, write to it and then delete it. ffsb reports about 20% regression and Alex bisected it to 43d2932d88e4. The test will call __mark_inode_dirty 3 times. without this commit, we only take inode_lock one time, while with it, we take the lock 3 times with flags ( I_DIRTY_SYNC,I_DIRTY_PAGES,I_DIRTY). Perf shows the lock contention increased too much. Below proposed patch fixes it. fs is allocating blocks, which usually means file writes and the inode will be dirtied soon. We fully dirty the inode to reduce some inode_lock contention in several calls of __mark_inode_dirty. Jan Kara: Added comment. Signed-off-by: Shaohua Li Signed-off-by: Alex Shi Signed-off-by: Jan Kara --- include/linux/quotaops.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index d50ba858cfe0..d1a9193960f1 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -274,8 +274,14 @@ static inline int dquot_alloc_space(struct inode *inode, qsize_t nr) int ret; ret = dquot_alloc_space_nodirty(inode, nr); - if (!ret) - mark_inode_dirty_sync(inode); + if (!ret) { + /* + * Mark inode fully dirty. Since we are allocating blocks, inode + * would become fully dirty soon anyway and it reportedly + * reduces inode_lock contention. + */ + mark_inode_dirty(inode); + } return ret; } -- cgit v1.2.3 From 39aa3cb3e8250db9188a6f1e3fb62ffa1a717678 Mon Sep 17 00:00:00 2001 From: Stefan Bader Date: Tue, 31 Aug 2010 15:52:27 +0200 Subject: mm: Move vma_stack_continue into mm.h So it can be used by all that need to check for that. Signed-off-by: Stefan Bader Signed-off-by: Linus Torvalds --- fs/proc/task_mmu.c | 3 ++- include/linux/mm.h | 6 ++++++ mm/mlock.c | 6 ------ 3 files changed, 8 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 439fc1f1c1c4..271afc48b9a5 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -224,7 +224,8 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma) /* We don't show the stack guard page in /proc/maps */ start = vma->vm_start; if (vma->vm_flags & VM_GROWSDOWN) - start += PAGE_SIZE; + if (!vma_stack_continue(vma->vm_prev, vma->vm_start)) + start += PAGE_SIZE; seq_printf(m, "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", start, diff --git a/include/linux/mm.h b/include/linux/mm.h index e6b1210772ce..74949fbef8c6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -864,6 +864,12 @@ int set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); int clear_page_dirty_for_io(struct page *page); +/* Is the vma a continuation of the stack vma above it? */ +static inline int vma_stack_continue(struct vm_area_struct *vma, unsigned long addr) +{ + return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN); +} + extern unsigned long move_page_tables(struct vm_area_struct *vma, unsigned long old_addr, struct vm_area_struct *new_vma, unsigned long new_addr, unsigned long len); diff --git a/mm/mlock.c b/mm/mlock.c index cbae7c5b9568..b70919ce4f72 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -135,12 +135,6 @@ void munlock_vma_page(struct page *page) } } -/* Is the vma a continuation of the stack vma above it? */ -static inline int vma_stack_continue(struct vm_area_struct *vma, unsigned long addr) -{ - return vma && (vma->vm_end == addr) && (vma->vm_flags & VM_GROWSDOWN); -} - static inline int stack_guard_page(struct vm_area_struct *vma, unsigned long addr) { return (vma->vm_flags & VM_GROWSDOWN) && -- cgit v1.2.3 From a73f8844e1fc54c3762555c1cf1f71774142ca91 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Wed, 8 Sep 2010 16:54:54 -0600 Subject: lglock: make lg_lock_global() actually lock globally lg_lock_global() currently only acquires spinlocks for online CPUs, but it's meant to lock all possible CPUs. Lglock-protected resources may be associated with removed CPUs - and, indeed, that could happen with the per-superblock open files lists. At Nick's suggestion, change for_each_online_cpu() to for_each_possible_cpu() to protect accesses to those resources. Cc: Al Viro Acked-by: Nick Piggin Signed-off-by: Jonathan Corbet Signed-off-by: Linus Torvalds --- include/linux/lglock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/lglock.h b/include/linux/lglock.h index b288cb713b90..f549056fb20b 100644 --- a/include/linux/lglock.h +++ b/include/linux/lglock.h @@ -150,7 +150,7 @@ int i; \ preempt_disable(); \ rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_); \ - for_each_online_cpu(i) { \ + for_each_possible_cpu(i) { \ arch_spinlock_t *lock; \ lock = &per_cpu(name##_lock, i); \ arch_spin_lock(lock); \ @@ -161,7 +161,7 @@ void name##_global_unlock(void) { \ int i; \ rwlock_release(&name##_lock_dep_map, 1, _RET_IP_); \ - for_each_online_cpu(i) { \ + for_each_possible_cpu(i) { \ arch_spinlock_t *lock; \ lock = &per_cpu(name##_lock, i); \ arch_spin_unlock(lock); \ -- cgit v1.2.3 From f3c65b2870f2481f3646bc410a58a12989ecc704 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 9 Sep 2010 16:37:24 -0700 Subject: mmc: avoid getting CID on SDIO-only cards The introduction of support for SD combo cards breaks the initialization of all CSR SDIO chips. The GO_IDLE (CMD0) in mmc_sd_get_cid() causes CSR chips to be reset (this is non-standard behavior). When initializing an SDIO card check for a combo card by using the memory present bit in the R4 response to IO_SEND_OP_COND (CMD5). This avoids the call to mmc_sd_get_cid() on an SDIO-only card. Signed-off-by: David Vrabel Acked-by: Michal Mirolaw Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/mmc/core/sdio.c | 5 ++--- include/linux/mmc/sdio.h | 2 ++ 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c index bd2755e8d9a3..f332c52968b7 100644 --- a/drivers/mmc/core/sdio.c +++ b/drivers/mmc/core/sdio.c @@ -362,9 +362,8 @@ static int mmc_sdio_init_card(struct mmc_host *host, u32 ocr, goto err; } - err = mmc_sd_get_cid(host, host->ocr & ocr, card->raw_cid); - - if (!err) { + if (ocr & R4_MEMORY_PRESENT + && mmc_sd_get_cid(host, host->ocr & ocr, card->raw_cid) == 0) { card->type = MMC_TYPE_SD_COMBO; if (oldcard && (oldcard->type != MMC_TYPE_SD_COMBO || diff --git a/include/linux/mmc/sdio.h b/include/linux/mmc/sdio.h index 329a8faa6e37..245cdacee544 100644 --- a/include/linux/mmc/sdio.h +++ b/include/linux/mmc/sdio.h @@ -38,6 +38,8 @@ * [8:0] Byte/block count */ +#define R4_MEMORY_PRESENT (1 << 27) + /* SDIO status in R5 Type -- cgit v1.2.3 From e0bf1024b36be90da241af3c2767311e055b612c Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Thu, 9 Sep 2010 16:37:26 -0700 Subject: kfifo: add parenthesis for macro parameter reference Some macro parameter references inside typeof() operator are not enclosed with parenthesis. It should be safer to add them. Signed-off-by: Huang Ying Acked-by: Stefani Seibold Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kfifo.h | 58 +++++++++++++++++++++++++-------------------------- 1 file changed, 29 insertions(+), 29 deletions(-) (limited to 'include') diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 4aa95f203f3e..62dbee554f60 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -214,7 +214,7 @@ __kfifo_must_check_helper(unsigned int val) */ #define kfifo_reset(fifo) \ (void)({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ __tmp->kfifo.in = __tmp->kfifo.out = 0; \ }) @@ -228,7 +228,7 @@ __kfifo_must_check_helper(unsigned int val) */ #define kfifo_reset_out(fifo) \ (void)({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ __tmp->kfifo.out = __tmp->kfifo.in; \ }) @@ -238,7 +238,7 @@ __kfifo_must_check_helper(unsigned int val) */ #define kfifo_len(fifo) \ ({ \ - typeof(fifo + 1) __tmpl = (fifo); \ + typeof((fifo) + 1) __tmpl = (fifo); \ __tmpl->kfifo.in - __tmpl->kfifo.out; \ }) @@ -248,7 +248,7 @@ __kfifo_must_check_helper(unsigned int val) */ #define kfifo_is_empty(fifo) \ ({ \ - typeof(fifo + 1) __tmpq = (fifo); \ + typeof((fifo) + 1) __tmpq = (fifo); \ __tmpq->kfifo.in == __tmpq->kfifo.out; \ }) @@ -258,7 +258,7 @@ __kfifo_must_check_helper(unsigned int val) */ #define kfifo_is_full(fifo) \ ({ \ - typeof(fifo + 1) __tmpq = (fifo); \ + typeof((fifo) + 1) __tmpq = (fifo); \ kfifo_len(__tmpq) > __tmpq->kfifo.mask; \ }) @@ -269,7 +269,7 @@ __kfifo_must_check_helper(unsigned int val) #define kfifo_avail(fifo) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmpq = (fifo); \ + typeof((fifo) + 1) __tmpq = (fifo); \ const size_t __recsize = sizeof(*__tmpq->rectype); \ unsigned int __avail = kfifo_size(__tmpq) - kfifo_len(__tmpq); \ (__recsize) ? ((__avail <= __recsize) ? 0 : \ @@ -284,7 +284,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_skip(fifo) \ (void)({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ if (__recsize) \ @@ -302,7 +302,7 @@ __kfifo_must_check_helper( \ #define kfifo_peek_len(fifo) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ (!__recsize) ? kfifo_len(__tmp) * sizeof(*__tmp->type) : \ @@ -325,7 +325,7 @@ __kfifo_must_check_helper( \ #define kfifo_alloc(fifo, size, gfp_mask) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ __is_kfifo_ptr(__tmp) ? \ __kfifo_alloc(__kfifo, size, sizeof(*__tmp->type), gfp_mask) : \ @@ -339,7 +339,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_free(fifo) \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ if (__is_kfifo_ptr(__tmp)) \ __kfifo_free(__kfifo); \ @@ -358,7 +358,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_init(fifo, buffer, size) \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ __is_kfifo_ptr(__tmp) ? \ __kfifo_init(__kfifo, buffer, size, sizeof(*__tmp->type)) : \ @@ -379,8 +379,8 @@ __kfifo_must_check_helper( \ */ #define kfifo_put(fifo, val) \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ - typeof(val + 1) __val = (val); \ + typeof((fifo) + 1) __tmp = (fifo); \ + typeof((val) + 1) __val = (val); \ unsigned int __ret; \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -421,8 +421,8 @@ __kfifo_must_check_helper( \ #define kfifo_get(fifo, val) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ - typeof(val + 1) __val = (val); \ + typeof((fifo) + 1) __tmp = (fifo); \ + typeof((val) + 1) __val = (val); \ unsigned int __ret; \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -462,8 +462,8 @@ __kfifo_must_check_helper( \ #define kfifo_peek(fifo, val) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ - typeof(val + 1) __val = (val); \ + typeof((fifo) + 1) __tmp = (fifo); \ + typeof((val) + 1) __val = (val); \ unsigned int __ret; \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -501,8 +501,8 @@ __kfifo_must_check_helper( \ */ #define kfifo_in(fifo, buf, n) \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ - typeof(buf + 1) __buf = (buf); \ + typeof((fifo) + 1) __tmp = (fifo); \ + typeof((buf) + 1) __buf = (buf); \ unsigned long __n = (n); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -554,8 +554,8 @@ __kfifo_must_check_helper( \ #define kfifo_out(fifo, buf, n) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ - typeof(buf + 1) __buf = (buf); \ + typeof((fifo) + 1) __tmp = (fifo); \ + typeof((buf) + 1) __buf = (buf); \ unsigned long __n = (n); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -611,7 +611,7 @@ __kfifo_must_check_helper( \ #define kfifo_from_user(fifo, from, len, copied) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ const void __user *__from = (from); \ unsigned int __len = (len); \ unsigned int *__copied = (copied); \ @@ -639,7 +639,7 @@ __kfifo_must_check_helper( \ #define kfifo_to_user(fifo, to, len, copied) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ void __user *__to = (to); \ unsigned int __len = (len); \ unsigned int *__copied = (copied); \ @@ -666,7 +666,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_dma_in_prepare(fifo, sgl, nents, len) \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ struct scatterlist *__sgl = (sgl); \ int __nents = (nents); \ unsigned int __len = (len); \ @@ -690,7 +690,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_dma_in_finish(fifo, len) \ (void)({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ unsigned int __len = (len); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -717,7 +717,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_dma_out_prepare(fifo, sgl, nents, len) \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ struct scatterlist *__sgl = (sgl); \ int __nents = (nents); \ unsigned int __len = (len); \ @@ -741,7 +741,7 @@ __kfifo_must_check_helper( \ */ #define kfifo_dma_out_finish(fifo, len) \ (void)({ \ - typeof(fifo + 1) __tmp = (fifo); \ + typeof((fifo) + 1) __tmp = (fifo); \ unsigned int __len = (len); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ @@ -766,8 +766,8 @@ __kfifo_must_check_helper( \ #define kfifo_out_peek(fifo, buf, n) \ __kfifo_must_check_helper( \ ({ \ - typeof(fifo + 1) __tmp = (fifo); \ - typeof(buf + 1) __buf = (buf); \ + typeof((fifo) + 1) __tmp = (fifo); \ + typeof((buf) + 1) __buf = (buf); \ unsigned long __n = (n); \ const size_t __recsize = sizeof(*__tmp->rectype); \ struct __kfifo *__kfifo = &__tmp->kfifo; \ -- cgit v1.2.3 From 31583bb0cf6cc40f2a468a4d2f3b9cbefd24f891 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 9 Sep 2010 16:37:37 -0700 Subject: cgroups: fix API thinko Add cgroup_attach_task_all() The existing cgroup_attach_task_current_cg() API is called by a thread to attach another thread to all of its cgroups; this is unsuitable for cases where a privileged task wants to attach itself to the cgroups of a less privileged one, since the call must be made from the context of the target task. This patch adds a more generic cgroup_attach_task_all() API that allows both the source task and to-be-moved task to be specified. cgroup_attach_task_current_cg() becomes a specialization of the more generic new function. [menage@google.com: rewrote changelog] [akpm@linux-foundation.org: address reviewer comments] Signed-off-by: Michael S. Tsirkin Tested-by: Alex Williamson Acked-by: Paul Menage Cc: Li Zefan Cc: Ben Blum Cc: Sridhar Samudrala Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cgroup.h | 12 +++++++++++- kernel/cgroup.c | 13 +++++++------ 2 files changed, 18 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index ed3e92e41c6e..0c991023ee47 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -578,7 +578,12 @@ struct task_struct *cgroup_iter_next(struct cgroup *cgrp, void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it); int cgroup_scan_tasks(struct cgroup_scanner *scan); int cgroup_attach_task(struct cgroup *, struct task_struct *); -int cgroup_attach_task_current_cg(struct task_struct *); +int cgroup_attach_task_all(struct task_struct *from, struct task_struct *); + +static inline int cgroup_attach_task_current_cg(struct task_struct *tsk) +{ + return cgroup_attach_task_all(current, tsk); +} /* * CSS ID is ID for cgroup_subsys_state structs under subsys. This only works @@ -636,6 +641,11 @@ static inline int cgroupstats_build(struct cgroupstats *stats, } /* No cgroups - nothing to do */ +static inline int cgroup_attach_task_all(struct task_struct *from, + struct task_struct *t) +{ + return 0; +} static inline int cgroup_attach_task_current_cg(struct task_struct *t) { return 0; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 192f88c5b0f9..c9483d8f6140 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1791,19 +1791,20 @@ out: } /** - * cgroup_attach_task_current_cg - attach task 'tsk' to current task's cgroup + * cgroup_attach_task_all - attach task 'tsk' to all cgroups of task 'from' + * @from: attach to all cgroups of a given task * @tsk: the task to be attached */ -int cgroup_attach_task_current_cg(struct task_struct *tsk) +int cgroup_attach_task_all(struct task_struct *from, struct task_struct *tsk) { struct cgroupfs_root *root; - struct cgroup *cur_cg; int retval = 0; cgroup_lock(); for_each_active_root(root) { - cur_cg = task_cgroup_from_root(current, root); - retval = cgroup_attach_task(cur_cg, tsk); + struct cgroup *from_cg = task_cgroup_from_root(from, root); + + retval = cgroup_attach_task(from_cg, tsk); if (retval) break; } @@ -1811,7 +1812,7 @@ int cgroup_attach_task_current_cg(struct task_struct *tsk) return retval; } -EXPORT_SYMBOL_GPL(cgroup_attach_task_current_cg); +EXPORT_SYMBOL_GPL(cgroup_attach_task_all); /* * Attach task with pid 'pid' to cgroup 'cgrp'. Call with cgroup_mutex -- cgit v1.2.3 From 4969c1192d15afa3389e7ae3302096ff684ba655 Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 9 Sep 2010 16:37:52 -0700 Subject: mm: fix swapin race condition The pte_same check is reliable only if the swap entry remains pinned (by the page lock on swapcache). We've also to ensure the swapcache isn't removed before we take the lock as try_to_free_swap won't care about the page pin. One of the possible impacts of this patch is that a KSM-shared page can point to the anon_vma of another process, which could exit before the page is freed. This can leave a page with a pointer to a recycled anon_vma object, or worse, a pointer to something that is no longer an anon_vma. [riel@redhat.com: changelog help] Signed-off-by: Andrea Arcangeli Acked-by: Hugh Dickins Reviewed-by: Rik van Riel Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ksm.h | 20 +++++++++----------- mm/ksm.c | 3 --- mm/memory.c | 39 ++++++++++++++++++++++++++++++++++----- 3 files changed, 43 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/ksm.h b/include/linux/ksm.h index 74d691ee9121..3319a6967626 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -16,6 +16,9 @@ struct stable_node; struct mem_cgroup; +struct page *ksm_does_need_to_copy(struct page *page, + struct vm_area_struct *vma, unsigned long address); + #ifdef CONFIG_KSM int ksm_madvise(struct vm_area_struct *vma, unsigned long start, unsigned long end, int advice, unsigned long *vm_flags); @@ -70,19 +73,14 @@ static inline void set_page_stable_node(struct page *page, * We'd like to make this conditional on vma->vm_flags & VM_MERGEABLE, * but what if the vma was unmerged while the page was swapped out? */ -struct page *ksm_does_need_to_copy(struct page *page, - struct vm_area_struct *vma, unsigned long address); -static inline struct page *ksm_might_need_to_copy(struct page *page, +static inline int ksm_might_need_to_copy(struct page *page, struct vm_area_struct *vma, unsigned long address) { struct anon_vma *anon_vma = page_anon_vma(page); - if (!anon_vma || - (anon_vma->root == vma->anon_vma->root && - page->index == linear_page_index(vma, address))) - return page; - - return ksm_does_need_to_copy(page, vma, address); + return anon_vma && + (anon_vma->root != vma->anon_vma->root || + page->index != linear_page_index(vma, address)); } int page_referenced_ksm(struct page *page, @@ -115,10 +113,10 @@ static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start, return 0; } -static inline struct page *ksm_might_need_to_copy(struct page *page, +static inline int ksm_might_need_to_copy(struct page *page, struct vm_area_struct *vma, unsigned long address) { - return page; + return 0; } static inline int page_referenced_ksm(struct page *page, diff --git a/mm/ksm.c b/mm/ksm.c index e2ae00458320..b1873cf03ed9 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1504,8 +1504,6 @@ struct page *ksm_does_need_to_copy(struct page *page, { struct page *new_page; - unlock_page(page); /* any racers will COW it, not modify it */ - new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address); if (new_page) { copy_user_highpage(new_page, page, address, vma); @@ -1521,7 +1519,6 @@ struct page *ksm_does_need_to_copy(struct page *page, add_page_to_unevictable_list(new_page); } - page_cache_release(page); return new_page; } diff --git a/mm/memory.c b/mm/memory.c index 6b2ab1051851..71b161b73bb5 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2623,7 +2623,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned int flags, pte_t orig_pte) { spinlock_t *ptl; - struct page *page; + struct page *page, *swapcache = NULL; swp_entry_t entry; pte_t pte; struct mem_cgroup *ptr = NULL; @@ -2679,10 +2679,23 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, lock_page(page); delayacct_clear_flag(DELAYACCT_PF_SWAPIN); - page = ksm_might_need_to_copy(page, vma, address); - if (!page) { - ret = VM_FAULT_OOM; - goto out; + /* + * Make sure try_to_free_swap didn't release the swapcache + * from under us. The page pin isn't enough to prevent that. + */ + if (unlikely(!PageSwapCache(page))) + goto out_page; + + if (ksm_might_need_to_copy(page, vma, address)) { + swapcache = page; + page = ksm_does_need_to_copy(page, vma, address); + + if (unlikely(!page)) { + ret = VM_FAULT_OOM; + page = swapcache; + swapcache = NULL; + goto out_page; + } } if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) { @@ -2735,6 +2748,18 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) try_to_free_swap(page); unlock_page(page); + if (swapcache) { + /* + * Hold the lock to avoid the swap entry to be reused + * until we take the PT lock for the pte_same() check + * (to avoid false positives from pte_same). For + * further safety release the lock after the swap_free + * so that the swap count won't change under a + * parallel locked swapcache. + */ + unlock_page(swapcache); + page_cache_release(swapcache); + } if (flags & FAULT_FLAG_WRITE) { ret |= do_wp_page(mm, vma, address, page_table, pmd, ptl, pte); @@ -2756,6 +2781,10 @@ out_page: unlock_page(page); out_release: page_cache_release(page); + if (swapcache) { + unlock_page(swapcache); + page_cache_release(swapcache); + } return ret; } -- cgit v1.2.3 From 5affb607720d734ca572b8a77c5c7d62d3042b6f Mon Sep 17 00:00:00 2001 From: Gregory Bean Date: Thu, 9 Sep 2010 16:38:02 -0700 Subject: gpio: sx150x: correct and refine reset-on-probe behavior Replace the arbitrary software-reset call from the device-probe method, because: - It is defective. To work correctly, it should be two byte writes, not a single word write. As it stands, it does nothing. - Some devices with sx150x expanders installed have their NRESET pins ganged on the same line, so resetting one causes the others to reset - not a nice thing to do arbitrarily! - The probe, usually taking place at boot, implies a recent hard-reset, so a software reset at this point is just a waste of energy anyway. Therefore, make it optional, defaulting to off, as this will match the common case of probing at powerup and also matches the current broken no-op behavior. Signed-off-by: Gregory Bean Reviewed-by: Jean Delvare Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/gpio/sx150x.c | 26 +++++++++++++++++++++----- include/linux/i2c/sx150x.h | 4 ++++ 2 files changed, 25 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/gpio/sx150x.c b/drivers/gpio/sx150x.c index b42f42ca70c3..823559ab0e24 100644 --- a/drivers/gpio/sx150x.c +++ b/drivers/gpio/sx150x.c @@ -459,17 +459,33 @@ static int sx150x_init_io(struct sx150x_chip *chip, u8 base, u16 cfg) return err; } -static int sx150x_init_hw(struct sx150x_chip *chip, - struct sx150x_platform_data *pdata) +static int sx150x_reset(struct sx150x_chip *chip) { - int err = 0; + int err; - err = i2c_smbus_write_word_data(chip->client, + err = i2c_smbus_write_byte_data(chip->client, chip->dev_cfg->reg_reset, - 0x3412); + 0x12); if (err < 0) return err; + err = i2c_smbus_write_byte_data(chip->client, + chip->dev_cfg->reg_reset, + 0x34); + return err; +} + +static int sx150x_init_hw(struct sx150x_chip *chip, + struct sx150x_platform_data *pdata) +{ + int err = 0; + + if (pdata->reset_during_probe) { + err = sx150x_reset(chip); + if (err < 0) + return err; + } + err = sx150x_i2c_write(chip->client, chip->dev_cfg->reg_misc, 0x01); diff --git a/include/linux/i2c/sx150x.h b/include/linux/i2c/sx150x.h index ee3049cb9ba5..52baa79d69a7 100644 --- a/include/linux/i2c/sx150x.h +++ b/include/linux/i2c/sx150x.h @@ -63,6 +63,9 @@ * IRQ lines will appear. Similarly to gpio_base, the expander * will create a block of irqs beginning at this number. * This value is ignored if irq_summary is < 0. + * @reset_during_probe: If set to true, the driver will trigger a full + * reset of the chip at the beginning of the probe + * in order to place it in a known state. */ struct sx150x_platform_data { unsigned gpio_base; @@ -73,6 +76,7 @@ struct sx150x_platform_data { u16 io_polarity; int irq_summary; unsigned irq_base; + bool reset_during_probe; }; #endif /* __LINUX_I2C_SX150X_H */ -- cgit v1.2.3 From c956126c137d97acb6f4d56fa9572d0bcc84e4ed Mon Sep 17 00:00:00 2001 From: David Brownell Date: Thu, 9 Sep 2010 16:38:03 -0700 Subject: gpio: doc updates There's been some recent confusion about error checking GPIO numbers. briefly, it should be handled mostly during setup, when gpio_request() is called, and NEVER by expectig gpio_is_valid to report more than never-usable GPIO numbers. [akpm@linux-foundation.org: terminate unterminated comment] Signed-off-by: David Brownell Cc: Eric Miao" Cc: "Ryan Mallon" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/gpio.txt | 22 ++++++++++++++-------- include/asm-generic/gpio.h | 14 +++++++++++++- 2 files changed, 27 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/Documentation/gpio.txt b/Documentation/gpio.txt index d96a6dba5748..9633da01ff46 100644 --- a/Documentation/gpio.txt +++ b/Documentation/gpio.txt @@ -109,17 +109,19 @@ use numbers 2000-2063 to identify GPIOs in a bank of I2C GPIO expanders. If you want to initialize a structure with an invalid GPIO number, use some negative number (perhaps "-EINVAL"); that will never be valid. To -test if a number could reference a GPIO, you may use this predicate: +test if such number from such a structure could reference a GPIO, you +may use this predicate: int gpio_is_valid(int number); A number that's not valid will be rejected by calls which may request or free GPIOs (see below). Other numbers may also be rejected; for -example, a number might be valid but unused on a given board. - -Whether a platform supports multiple GPIO controllers is currently a -platform-specific implementation issue. +example, a number might be valid but temporarily unused on a given board. +Whether a platform supports multiple GPIO controllers is a platform-specific +implementation issue, as are whether that support can leave "holes" in the space +of GPIO numbers, and whether new controllers can be added at runtime. Such issues +can affect things including whether adjacent GPIO numbers are both valid. Using GPIOs ----------- @@ -480,12 +482,16 @@ To support this framework, a platform's Kconfig will "select" either ARCH_REQUIRE_GPIOLIB or ARCH_WANT_OPTIONAL_GPIOLIB and arrange that its includes and defines three functions: gpio_get_value(), gpio_set_value(), and gpio_cansleep(). -They may also want to provide a custom value for ARCH_NR_GPIOS. -ARCH_REQUIRE_GPIOLIB means that the gpio-lib code will always get compiled +It may also provide a custom value for ARCH_NR_GPIOS, so that it better +reflects the number of GPIOs in actual use on that platform, without +wasting static table space. (It should count both built-in/SoC GPIOs and +also ones on GPIO expanders. + +ARCH_REQUIRE_GPIOLIB means that the gpiolib code will always get compiled into the kernel on that architecture. -ARCH_WANT_OPTIONAL_GPIOLIB means the gpio-lib code defaults to off and the user +ARCH_WANT_OPTIONAL_GPIOLIB means the gpiolib code defaults to off and the user can enable it and build it into the kernel optionally. If neither of these options are selected, the platform does not support diff --git a/include/asm-generic/gpio.h b/include/asm-generic/gpio.h index c7376bf80b06..8ca18e26d7e3 100644 --- a/include/asm-generic/gpio.h +++ b/include/asm-generic/gpio.h @@ -16,15 +16,27 @@ * While the GPIO programming interface defines valid GPIO numbers * to be in the range 0..MAX_INT, this library restricts them to the * smaller range 0..ARCH_NR_GPIOS-1. + * + * ARCH_NR_GPIOS is somewhat arbitrary; it usually reflects the sum of + * builtin/SoC GPIOs plus a number of GPIOs on expanders; the latter is + * actually an estimate of a board-specific value. */ #ifndef ARCH_NR_GPIOS #define ARCH_NR_GPIOS 256 #endif +/* + * "valid" GPIO numbers are nonnegative and may be passed to + * setup routines like gpio_request(). only some valid numbers + * can successfully be requested and used. + * + * Invalid GPIO numbers are useful for indicating no-such-GPIO in + * platform data and other tables. + */ + static inline int gpio_is_valid(int number) { - /* only some non-negative numbers are valid */ return ((unsigned)number) < ARCH_NR_GPIOS; } -- cgit v1.2.3 From 910321ea817a202ff70fac666e37e2c8e2f88823 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 9 Sep 2010 16:38:07 -0700 Subject: swap: revert special hibernation allocation Please revert 2.6.36-rc commit d2997b1042ec150616c1963b5e5e919ffd0b0ebf "hibernation: freeze swap at hibernation". It complicated matters by adding a second swap allocation path, just for hibernation; without in any way fixing the issue that it was intended to address - page reclaim after fixing the hibernation image might free swap from a page already imaged as swapcache, letting its swap be reallocated to store a different page of the image: resulting in data corruption if the imaged page were freed as clean then swapped back in. Pages freed to si->swap_map were still in danger of being reallocated by the alternative allocation path. I guess it inadvertently fixed slow SSD swap allocation for hibernation, as reported by Nigel Cunningham: by missing out the discards that occur on the usual swap allocation path; but that was unintentional, and needs a separate fix. Signed-off-by: Hugh Dickins Cc: KAMEZAWA Hiroyuki Cc: KOSAKI Motohiro Cc: "Rafael J. Wysocki" Cc: Ondrej Zary Cc: Andrea Gelmini Cc: Balbir Singh Cc: Andrea Arcangeli Cc: Nigel Cunningham Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 8 +---- kernel/power/hibernate.c | 1 - kernel/power/snapshot.c | 1 - kernel/power/swap.c | 6 ++-- mm/swapfile.c | 94 ++++++++++++------------------------------------ 5 files changed, 26 insertions(+), 84 deletions(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 2fee51a11b73..bf4eb62506db 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -315,6 +315,7 @@ extern long nr_swap_pages; extern long total_swap_pages; extern void si_swapinfo(struct sysinfo *); extern swp_entry_t get_swap_page(void); +extern swp_entry_t get_swap_page_of_type(int); extern int valid_swaphandles(swp_entry_t, unsigned long *); extern int add_swap_count_continuation(swp_entry_t, gfp_t); extern void swap_shmem_alloc(swp_entry_t); @@ -331,13 +332,6 @@ extern int reuse_swap_page(struct page *); extern int try_to_free_swap(struct page *); struct backing_dev_info; -#ifdef CONFIG_HIBERNATION -void hibernation_freeze_swap(void); -void hibernation_thaw_swap(void); -swp_entry_t get_swap_for_hibernation(int type); -void swap_free_for_hibernation(swp_entry_t val); -#endif - /* linux/mm/thrash.c */ extern struct mm_struct *swap_token_mm; extern void grab_swap_token(struct mm_struct *); diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index c77963938bca..8dc31e02ae12 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -338,7 +338,6 @@ int hibernation_snapshot(int platform_mode) goto Close; suspend_console(); - hibernation_freeze_swap(); saved_mask = clear_gfp_allowed_mask(GFP_IOFS); error = dpm_suspend_start(PMSG_FREEZE); if (error) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 5e7edfb05e66..f6cd6faf84fd 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1086,7 +1086,6 @@ void swsusp_free(void) buffer = NULL; alloc_normal = 0; alloc_highmem = 0; - hibernation_thaw_swap(); } /* Helper functions used for the shrinking of memory. */ diff --git a/kernel/power/swap.c b/kernel/power/swap.c index 5d0059eed3e4..e6a5bdf61a37 100644 --- a/kernel/power/swap.c +++ b/kernel/power/swap.c @@ -136,10 +136,10 @@ sector_t alloc_swapdev_block(int swap) { unsigned long offset; - offset = swp_offset(get_swap_for_hibernation(swap)); + offset = swp_offset(get_swap_page_of_type(swap)); if (offset) { if (swsusp_extents_insert(offset)) - swap_free_for_hibernation(swp_entry(swap, offset)); + swap_free(swp_entry(swap, offset)); else return swapdev_block(swap, offset); } @@ -163,7 +163,7 @@ void free_all_swap_pages(int swap) ext = container_of(node, struct swsusp_extent, node); rb_erase(node, &swsusp_extents); for (offset = ext->start; offset <= ext->end; offset++) - swap_free_for_hibernation(swp_entry(swap, offset)); + swap_free(swp_entry(swap, offset)); kfree(ext); } diff --git a/mm/swapfile.c b/mm/swapfile.c index 1f3f9c59a73a..f08d165871b3 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -47,8 +47,6 @@ long nr_swap_pages; long total_swap_pages; static int least_priority; -static bool swap_for_hibernation; - static const char Bad_file[] = "Bad swap file entry "; static const char Unused_file[] = "Unused swap file entry "; static const char Bad_offset[] = "Bad swap offset entry "; @@ -453,8 +451,6 @@ swp_entry_t get_swap_page(void) spin_lock(&swap_lock); if (nr_swap_pages <= 0) goto noswap; - if (swap_for_hibernation) - goto noswap; nr_swap_pages--; for (type = swap_list.next; type >= 0 && wrapped < 2; type = next) { @@ -487,6 +483,28 @@ noswap: return (swp_entry_t) {0}; } +/* The only caller of this function is now susupend routine */ +swp_entry_t get_swap_page_of_type(int type) +{ + struct swap_info_struct *si; + pgoff_t offset; + + spin_lock(&swap_lock); + si = swap_info[type]; + if (si && (si->flags & SWP_WRITEOK)) { + nr_swap_pages--; + /* This is called for allocating swap entry, not cache */ + offset = scan_swap_map(si, 1); + if (offset) { + spin_unlock(&swap_lock); + return swp_entry(type, offset); + } + nr_swap_pages++; + } + spin_unlock(&swap_lock); + return (swp_entry_t) {0}; +} + static struct swap_info_struct *swap_info_get(swp_entry_t entry) { struct swap_info_struct *p; @@ -746,74 +764,6 @@ int mem_cgroup_count_swap_user(swp_entry_t ent, struct page **pagep) #endif #ifdef CONFIG_HIBERNATION - -static pgoff_t hibernation_offset[MAX_SWAPFILES]; -/* - * Once hibernation starts to use swap, we freeze swap_map[]. Otherwise, - * saved swap_map[] image to the disk will be an incomplete because it's - * changing without synchronization with hibernation snap shot. - * At resume, we just make swap_for_hibernation=false. We can forget - * used maps easily. - */ -void hibernation_freeze_swap(void) -{ - int i; - - spin_lock(&swap_lock); - - printk(KERN_INFO "PM: Freeze Swap\n"); - swap_for_hibernation = true; - for (i = 0; i < MAX_SWAPFILES; i++) - hibernation_offset[i] = 1; - spin_unlock(&swap_lock); -} - -void hibernation_thaw_swap(void) -{ - spin_lock(&swap_lock); - if (swap_for_hibernation) { - printk(KERN_INFO "PM: Thaw Swap\n"); - swap_for_hibernation = false; - } - spin_unlock(&swap_lock); -} - -/* - * Because updateing swap_map[] can make not-saved-status-change, - * we use our own easy allocator. - * Please see kernel/power/swap.c, Used swaps are recorded into - * RB-tree. - */ -swp_entry_t get_swap_for_hibernation(int type) -{ - pgoff_t off; - swp_entry_t val = {0}; - struct swap_info_struct *si; - - spin_lock(&swap_lock); - - si = swap_info[type]; - if (!si || !(si->flags & SWP_WRITEOK)) - goto done; - - for (off = hibernation_offset[type]; off < si->max; ++off) { - if (!si->swap_map[off]) - break; - } - if (off < si->max) { - val = swp_entry(type, off); - hibernation_offset[type] = off + 1; - } -done: - spin_unlock(&swap_lock); - return val; -} - -void swap_free_for_hibernation(swp_entry_t ent) -{ - /* Nothing to do */ -} - /* * Find the swap type that corresponds to given device (if any). * -- cgit v1.2.3 From 3399446632739fcd05fd8b272b476a69c6e6d14a Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 9 Sep 2010 16:38:11 -0700 Subject: swap: discard while swapping only if SWAP_FLAG_DISCARD Tests with recent firmware on Intel X25-M 80GB and OCZ Vertex 60GB SSDs show a shift since I last tested in December: in part because of firmware updates, in part because of the necessary move from barriers to awaiting completion at the block layer. While discard at swapon still shows as slightly beneficial on both, discarding 1MB swap cluster when allocating is now disadvanteous: adds 25% overhead on Intel, adds 230% on OCZ (YMMV). Surrender: discard as presently implemented is more hindrance than help for swap; but might prove useful on other devices, or with improvements. So continue to do the discard at swapon, but make discard while swapping conditional on a SWAP_FLAG_DISCARD to sys_swapon() (which has been using only the lower 16 bits of int flags). We can add a --discard or -d to swapon(8), and a "discard" to swap in /etc/fstab: matching the mount option for btrfs, ext4, fat, gfs2, nilfs2. Signed-off-by: Hugh Dickins Cc: Christoph Hellwig Cc: Nigel Cunningham Cc: Tejun Heo Cc: Jens Axboe Cc: James Bottomley Cc: "Martin K. Petersen" Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 3 ++- mm/swapfile.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index bf4eb62506db..7cdd63366f88 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -19,6 +19,7 @@ struct bio; #define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */ #define SWAP_FLAG_PRIO_MASK 0x7fff #define SWAP_FLAG_PRIO_SHIFT 0 +#define SWAP_FLAG_DISCARD 0x10000 /* discard swap cluster after use */ static inline int current_is_kswapd(void) { @@ -142,7 +143,7 @@ struct swap_extent { enum { SWP_USED = (1 << 0), /* is slot in swap_info[] used? */ SWP_WRITEOK = (1 << 1), /* ok to write to this swap? */ - SWP_DISCARDABLE = (1 << 2), /* blkdev supports discard */ + SWP_DISCARDABLE = (1 << 2), /* swapon+blkdev support discard */ SWP_DISCARDING = (1 << 3), /* now discarding a free cluster */ SWP_SOLIDSTATE = (1 << 4), /* blkdev seeks are cheap */ SWP_CONTINUED = (1 << 5), /* swap_map has count continuation */ diff --git a/mm/swapfile.c b/mm/swapfile.c index 1894dead0b58..7c703ff2f36f 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2047,7 +2047,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) p->flags |= SWP_SOLIDSTATE; p->cluster_next = 1 + (random32() % p->highest_bit); } - if (discard_swap(p) == 0) + if (discard_swap(p) == 0 && (swap_flags & SWAP_FLAG_DISCARD)) p->flags |= SWP_DISCARDABLE; } -- cgit v1.2.3 From aa45484031ddee09b06350ab8528bfe5b2c76d1c Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Thu, 9 Sep 2010 16:38:17 -0700 Subject: mm: page allocator: calculate a better estimate of NR_FREE_PAGES when memory is low and kswapd is awake Ordinarily watermark checks are based on the vmstat NR_FREE_PAGES as it is cheaper than scanning a number of lists. To avoid synchronization overhead, counter deltas are maintained on a per-cpu basis and drained both periodically and when the delta is above a threshold. On large CPU systems, the difference between the estimated and real value of NR_FREE_PAGES can be very high. If NR_FREE_PAGES is much higher than number of real free page in buddy, the VM can allocate pages below min watermark, at worst reducing the real number of pages to zero. Even if the OOM killer kills some victim for freeing memory, it may not free memory if the exit path requires a new page resulting in livelock. This patch introduces a zone_page_state_snapshot() function (courtesy of Christoph) that takes a slightly more accurate view of an arbitrary vmstat counter. It is used to read NR_FREE_PAGES while kswapd is awake to avoid the watermark being accidentally broken. The estimate is not perfect and may result in cache line bounces but is expected to be lighter than the IPI calls necessary to continually drain the per-cpu counters while kswapd is awake. Signed-off-by: Christoph Lameter Signed-off-by: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 13 +++++++++++++ include/linux/vmstat.h | 22 ++++++++++++++++++++++ mm/mmzone.c | 21 +++++++++++++++++++++ mm/page_alloc.c | 4 ++-- mm/vmstat.c | 15 ++++++++++++++- 5 files changed, 72 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 6e6e62648a4d..3984c4eb41fd 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -283,6 +283,13 @@ struct zone { /* zone watermarks, access with *_wmark_pages(zone) macros */ unsigned long watermark[NR_WMARK]; + /* + * When free pages are below this point, additional steps are taken + * when reading the number of free pages to avoid per-cpu counter + * drift allowing watermarks to be breached + */ + unsigned long percpu_drift_mark; + /* * We don't know if the memory that we're going to allocate will be freeable * or/and it will be released eventually, so to avoid totally wasting several @@ -441,6 +448,12 @@ static inline int zone_is_oom_locked(const struct zone *zone) return test_bit(ZONE_OOM_LOCKED, &zone->flags); } +#ifdef CONFIG_SMP +unsigned long zone_nr_free_pages(struct zone *zone); +#else +#define zone_nr_free_pages(zone) zone_page_state(zone, NR_FREE_PAGES) +#endif /* CONFIG_SMP */ + /* * The "priority" of VM scanning is how much of the queues we will scan in one * go. A value of 12 for DEF_PRIORITY implies that we will scan 1/4096th of the diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 7f43ccdc1d38..eaaea37b3b75 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -170,6 +170,28 @@ static inline unsigned long zone_page_state(struct zone *zone, return x; } +/* + * More accurate version that also considers the currently pending + * deltas. For that we need to loop over all cpus to find the current + * deltas. There is no synchronization so the result cannot be + * exactly accurate either. + */ +static inline unsigned long zone_page_state_snapshot(struct zone *zone, + enum zone_stat_item item) +{ + long x = atomic_long_read(&zone->vm_stat[item]); + +#ifdef CONFIG_SMP + int cpu; + for_each_online_cpu(cpu) + x += per_cpu_ptr(zone->pageset, cpu)->vm_stat_diff[item]; + + if (x < 0) + x = 0; +#endif + return x; +} + extern unsigned long global_reclaimable_pages(void); extern unsigned long zone_reclaimable_pages(struct zone *zone); diff --git a/mm/mmzone.c b/mm/mmzone.c index f5b7d1760213..e35bfb82c855 100644 --- a/mm/mmzone.c +++ b/mm/mmzone.c @@ -87,3 +87,24 @@ int memmap_valid_within(unsigned long pfn, return 1; } #endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ + +#ifdef CONFIG_SMP +/* Called when a more accurate view of NR_FREE_PAGES is needed */ +unsigned long zone_nr_free_pages(struct zone *zone) +{ + unsigned long nr_free_pages = zone_page_state(zone, NR_FREE_PAGES); + + /* + * While kswapd is awake, it is considered the zone is under some + * memory pressure. Under pressure, there is a risk that + * per-cpu-counter-drift will allow the min watermark to be breached + * potentially causing a live-lock. While kswapd is awake and + * free pages are low, get a better estimate for free pages + */ + if (nr_free_pages < zone->percpu_drift_mark && + !waitqueue_active(&zone->zone_pgdat->kswapd_wait)) + return zone_page_state_snapshot(zone, NR_FREE_PAGES); + + return nr_free_pages; +} +#endif /* CONFIG_SMP */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 452e2ba06c7c..b2d21e06d45d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1462,7 +1462,7 @@ int zone_watermark_ok(struct zone *z, int order, unsigned long mark, { /* free_pages my go negative - that's OK */ long min = mark; - long free_pages = zone_page_state(z, NR_FREE_PAGES) - (1 << order) + 1; + long free_pages = zone_nr_free_pages(z) - (1 << order) + 1; int o; if (alloc_flags & ALLOC_HIGH) @@ -2424,7 +2424,7 @@ void show_free_areas(void) " all_unreclaimable? %s" "\n", zone->name, - K(zone_page_state(zone, NR_FREE_PAGES)), + K(zone_nr_free_pages(zone)), K(min_wmark_pages(zone)), K(low_wmark_pages(zone)), K(high_wmark_pages(zone)), diff --git a/mm/vmstat.c b/mm/vmstat.c index a8d6b59e609a..355a9e669aaa 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -138,11 +138,24 @@ static void refresh_zone_stat_thresholds(void) int threshold; for_each_populated_zone(zone) { + unsigned long max_drift, tolerate_drift; + threshold = calculate_threshold(zone); for_each_online_cpu(cpu) per_cpu_ptr(zone->pageset, cpu)->stat_threshold = threshold; + + /* + * Only set percpu_drift_mark if there is a danger that + * NR_FREE_PAGES reports the low watermark is ok when in fact + * the min watermark could be breached by an allocation + */ + tolerate_drift = low_wmark_pages(zone) - min_wmark_pages(zone); + max_drift = num_online_cpus() * threshold; + if (max_drift > tolerate_drift) + zone->percpu_drift_mark = high_wmark_pages(zone) + + max_drift; } } @@ -813,7 +826,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, "\n scanned %lu" "\n spanned %lu" "\n present %lu", - zone_page_state(zone, NR_FREE_PAGES), + zone_nr_free_pages(zone), min_wmark_pages(zone), low_wmark_pages(zone), high_wmark_pages(zone), -- cgit v1.2.3 From e2f3d75fc0e4a0d03c61872bad39ffa2e74a04ff Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 7 Sep 2010 14:05:31 +0200 Subject: libata: skip EH autopsy and recovery during suspend For some mysterious reason, certain hardware reacts badly to usual EH actions while the system is going for suspend. As the devices won't be needed until the system is resumed, ask EH to skip usual autopsy and recovery and proceed directly to suspend. Signed-off-by: Tejun Heo Tested-by: Stephan Diestelhorst Cc: stable@kernel.org Signed-off-by: Jeff Garzik --- drivers/ata/libata-core.c | 14 +++++++++++++- drivers/ata/libata-eh.c | 4 ++++ include/linux/libata.h | 1 + 3 files changed, 18 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c index c035b3d041ee..932eaee50245 100644 --- a/drivers/ata/libata-core.c +++ b/drivers/ata/libata-core.c @@ -5418,6 +5418,7 @@ static int ata_host_request_pm(struct ata_host *host, pm_message_t mesg, */ int ata_host_suspend(struct ata_host *host, pm_message_t mesg) { + unsigned int ehi_flags = ATA_EHI_QUIET; int rc; /* @@ -5426,7 +5427,18 @@ int ata_host_suspend(struct ata_host *host, pm_message_t mesg) */ ata_lpm_enable(host); - rc = ata_host_request_pm(host, mesg, 0, ATA_EHI_QUIET, 1); + /* + * On some hardware, device fails to respond after spun down + * for suspend. As the device won't be used before being + * resumed, we don't need to touch the device. Ask EH to skip + * the usual stuff and proceed directly to suspend. + * + * http://thread.gmane.org/gmane.linux.ide/46764 + */ + if (mesg.event == PM_EVENT_SUSPEND) + ehi_flags |= ATA_EHI_NO_AUTOPSY | ATA_EHI_NO_RECOVERY; + + rc = ata_host_request_pm(host, mesg, 0, ehi_flags, 1); if (rc == 0) host->dev->power.power_state = mesg; return rc; diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c index c9ae299b8342..e48302eae55f 100644 --- a/drivers/ata/libata-eh.c +++ b/drivers/ata/libata-eh.c @@ -3235,6 +3235,10 @@ static int ata_eh_skip_recovery(struct ata_link *link) if (link->flags & ATA_LFLAG_DISABLED) return 1; + /* skip if explicitly requested */ + if (ehc->i.flags & ATA_EHI_NO_RECOVERY) + return 1; + /* thaw frozen port and recover failed devices */ if ((ap->pflags & ATA_PFLAG_FROZEN) || ata_link_nr_enabled(link)) return 0; diff --git a/include/linux/libata.h b/include/linux/libata.h index f010f18a0f86..7de282d8bedf 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -335,6 +335,7 @@ enum { ATA_EHI_HOTPLUGGED = (1 << 0), /* could have been hotplugged */ ATA_EHI_NO_AUTOPSY = (1 << 2), /* no autopsy */ ATA_EHI_QUIET = (1 << 3), /* be quiet */ + ATA_EHI_NO_RECOVERY = (1 << 4), /* no recovery */ ATA_EHI_DID_SOFTRESET = (1 << 16), /* already soft-reset this port */ ATA_EHI_DID_HARDRESET = (1 << 17), /* already soft-reset this port */ -- cgit v1.2.3 From ea3c64506ea7965f86f030155e6fdef381de10e2 Mon Sep 17 00:00:00 2001 From: Gwendal Grignou Date: Tue, 31 Aug 2010 16:20:36 -0700 Subject: libata-sff: Reenable Port Multiplier after libata-sff remodeling. Keep track of the link on the which the current request is in progress. It allows support of links behind port multiplier. Not all libata-sff is PMP compliant. Code for native BMDMA controller does not take in accound PMP. Tested on Marvell 7042 and Sil7526. Signed-off-by: Gwendal Grignou Signed-off-by: Jeff Garzik --- drivers/ata/libata-sff.c | 38 ++++++++++++++++++++++++++++---------- drivers/ata/sata_mv.c | 2 +- include/linux/libata.h | 3 ++- 3 files changed, 31 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c index dee3c2c52562..e30c537cce32 100644 --- a/drivers/ata/libata-sff.c +++ b/drivers/ata/libata-sff.c @@ -1045,7 +1045,8 @@ static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq) int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc, u8 status, int in_wq) { - struct ata_eh_info *ehi = &ap->link.eh_info; + struct ata_link *link = qc->dev->link; + struct ata_eh_info *ehi = &link->eh_info; unsigned long flags = 0; int poll_next; @@ -1301,8 +1302,14 @@ fsm_start: } EXPORT_SYMBOL_GPL(ata_sff_hsm_move); -void ata_sff_queue_pio_task(struct ata_port *ap, unsigned long delay) +void ata_sff_queue_pio_task(struct ata_link *link, unsigned long delay) { + struct ata_port *ap = link->ap; + + WARN_ON((ap->sff_pio_task_link != NULL) && + (ap->sff_pio_task_link != link)); + ap->sff_pio_task_link = link; + /* may fail if ata_sff_flush_pio_task() in progress */ queue_delayed_work(ata_sff_wq, &ap->sff_pio_task, msecs_to_jiffies(delay)); @@ -1324,14 +1331,18 @@ static void ata_sff_pio_task(struct work_struct *work) { struct ata_port *ap = container_of(work, struct ata_port, sff_pio_task.work); + struct ata_link *link = ap->sff_pio_task_link; struct ata_queued_cmd *qc; u8 status; int poll_next; + BUG_ON(ap->sff_pio_task_link == NULL); /* qc can be NULL if timeout occurred */ - qc = ata_qc_from_tag(ap, ap->link.active_tag); - if (!qc) + qc = ata_qc_from_tag(ap, link->active_tag); + if (!qc) { + ap->sff_pio_task_link = NULL; return; + } fsm_start: WARN_ON_ONCE(ap->hsm_task_state == HSM_ST_IDLE); @@ -1348,11 +1359,16 @@ fsm_start: msleep(2); status = ata_sff_busy_wait(ap, ATA_BUSY, 10); if (status & ATA_BUSY) { - ata_sff_queue_pio_task(ap, ATA_SHORT_PAUSE); + ata_sff_queue_pio_task(link, ATA_SHORT_PAUSE); return; } } + /* + * hsm_move() may trigger another command to be processed. + * clean the link beforehand. + */ + ap->sff_pio_task_link = NULL; /* move the HSM */ poll_next = ata_sff_hsm_move(ap, qc, status, 1); @@ -1379,6 +1395,7 @@ fsm_start: unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; + struct ata_link *link = qc->dev->link; /* Use polling pio if the LLD doesn't handle * interrupt driven pio and atapi CDB interrupt. @@ -1399,7 +1416,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc) ap->hsm_task_state = HSM_ST_LAST; if (qc->tf.flags & ATA_TFLAG_POLLING) - ata_sff_queue_pio_task(ap, 0); + ata_sff_queue_pio_task(link, 0); break; @@ -1412,7 +1429,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc) if (qc->tf.flags & ATA_TFLAG_WRITE) { /* PIO data out protocol */ ap->hsm_task_state = HSM_ST_FIRST; - ata_sff_queue_pio_task(ap, 0); + ata_sff_queue_pio_task(link, 0); /* always send first data block using the * ata_sff_pio_task() codepath. @@ -1422,7 +1439,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc) ap->hsm_task_state = HSM_ST; if (qc->tf.flags & ATA_TFLAG_POLLING) - ata_sff_queue_pio_task(ap, 0); + ata_sff_queue_pio_task(link, 0); /* if polling, ata_sff_pio_task() handles the * rest. otherwise, interrupt handler takes @@ -1444,7 +1461,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc) /* send cdb by polling if no cdb interrupt */ if ((!(qc->dev->flags & ATA_DFLAG_CDB_INTR)) || (qc->tf.flags & ATA_TFLAG_POLLING)) - ata_sff_queue_pio_task(ap, 0); + ata_sff_queue_pio_task(link, 0); break; default: @@ -2737,6 +2754,7 @@ EXPORT_SYMBOL_GPL(ata_bmdma_dumb_qc_prep); unsigned int ata_bmdma_qc_issue(struct ata_queued_cmd *qc) { struct ata_port *ap = qc->ap; + struct ata_link *link = qc->dev->link; /* defer PIO handling to sff_qc_issue */ if (!ata_is_dma(qc->tf.protocol)) @@ -2765,7 +2783,7 @@ unsigned int ata_bmdma_qc_issue(struct ata_queued_cmd *qc) /* send cdb by polling if no cdb interrupt */ if (!(qc->dev->flags & ATA_DFLAG_CDB_INTR)) - ata_sff_queue_pio_task(ap, 0); + ata_sff_queue_pio_task(link, 0); break; default: diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c index 81982594a014..a9fd9709c262 100644 --- a/drivers/ata/sata_mv.c +++ b/drivers/ata/sata_mv.c @@ -2284,7 +2284,7 @@ static unsigned int mv_qc_issue_fis(struct ata_queued_cmd *qc) } if (qc->tf.flags & ATA_TFLAG_POLLING) - ata_sff_queue_pio_task(ap, 0); + ata_sff_queue_pio_task(link, 0); return 0; } diff --git a/include/linux/libata.h b/include/linux/libata.h index 7de282d8bedf..45fb2967b66d 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -724,6 +724,7 @@ struct ata_port { struct ata_ioports ioaddr; /* ATA cmd/ctl/dma register blocks */ u8 ctl; /* cache of ATA control register */ u8 last_ctl; /* Cache last written value */ + struct ata_link* sff_pio_task_link; /* link currently used */ struct delayed_work sff_pio_task; #ifdef CONFIG_ATA_BMDMA struct ata_bmdma_prd *bmdma_prd; /* BMDMA SG list */ @@ -1595,7 +1596,7 @@ extern void ata_sff_irq_on(struct ata_port *ap); extern void ata_sff_irq_clear(struct ata_port *ap); extern int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc, u8 status, int in_wq); -extern void ata_sff_queue_pio_task(struct ata_port *ap, unsigned long delay); +extern void ata_sff_queue_pio_task(struct ata_link *link, unsigned long delay); extern unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc); extern bool ata_sff_qc_fill_rtf(struct ata_queued_cmd *qc); extern unsigned int ata_sff_port_intr(struct ata_port *ap, -- cgit v1.2.3 From 006abe887c5e637d059c44310de6c92f36aded3b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 12 Sep 2010 19:55:25 -0400 Subject: SUNRPC: Fix a race in rpc_info_open There is a race between rpc_info_open and rpc_release_client() in that nothing stops a process from opening the file after the clnt->cl_kref goes to zero. Fix this by using atomic_inc_unless_zero()... Reported-by: J. Bruce Fields Signed-off-by: Trond Myklebust Cc: stable@kernel.org --- include/linux/sunrpc/clnt.h | 2 +- net/sunrpc/clnt.c | 26 ++++++++++++-------------- net/sunrpc/rpc_pipe.c | 14 ++++++++------ 3 files changed, 21 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 569dc722a600..85f38a63f098 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -30,7 +30,7 @@ struct rpc_inode; * The high-level client handle */ struct rpc_clnt { - struct kref cl_kref; /* Number of references */ + atomic_t cl_count; /* Number of references */ struct list_head cl_clients; /* Global list of clients */ struct list_head cl_tasks; /* List of tasks */ spinlock_t cl_lock; /* spinlock */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 657aac630fc9..3a8f53e7ba07 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -226,7 +226,7 @@ static struct rpc_clnt * rpc_new_client(const struct rpc_create_args *args, stru goto out_no_principal; } - kref_init(&clnt->cl_kref); + atomic_set(&clnt->cl_count, 1); err = rpc_setup_pipedir(clnt, program->pipe_dir_name); if (err < 0) @@ -390,14 +390,14 @@ rpc_clone_client(struct rpc_clnt *clnt) if (new->cl_principal == NULL) goto out_no_principal; } - kref_init(&new->cl_kref); + atomic_set(&new->cl_count, 1); err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name); if (err != 0) goto out_no_path; if (new->cl_auth) atomic_inc(&new->cl_auth->au_count); xprt_get(clnt->cl_xprt); - kref_get(&clnt->cl_kref); + atomic_inc(&clnt->cl_count); rpc_register_client(new); rpciod_up(); return new; @@ -465,10 +465,8 @@ EXPORT_SYMBOL_GPL(rpc_shutdown_client); * Free an RPC client */ static void -rpc_free_client(struct kref *kref) +rpc_free_client(struct rpc_clnt *clnt) { - struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref); - dprintk("RPC: destroying %s client for %s\n", clnt->cl_protname, clnt->cl_server); if (!IS_ERR(clnt->cl_path.dentry)) { @@ -495,12 +493,10 @@ out_free: * Free an RPC client */ static void -rpc_free_auth(struct kref *kref) +rpc_free_auth(struct rpc_clnt *clnt) { - struct rpc_clnt *clnt = container_of(kref, struct rpc_clnt, cl_kref); - if (clnt->cl_auth == NULL) { - rpc_free_client(kref); + rpc_free_client(clnt); return; } @@ -509,10 +505,11 @@ rpc_free_auth(struct kref *kref) * release remaining GSS contexts. This mechanism ensures * that it can do so safely. */ - kref_init(kref); + atomic_inc(&clnt->cl_count); rpcauth_release(clnt->cl_auth); clnt->cl_auth = NULL; - kref_put(kref, rpc_free_client); + if (atomic_dec_and_test(&clnt->cl_count)) + rpc_free_client(clnt); } /* @@ -525,7 +522,8 @@ rpc_release_client(struct rpc_clnt *clnt) if (list_empty(&clnt->cl_tasks)) wake_up(&destroy_wait); - kref_put(&clnt->cl_kref, rpc_free_auth); + if (atomic_dec_and_test(&clnt->cl_count)) + rpc_free_auth(clnt); } /** @@ -588,7 +586,7 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) if (clnt != NULL) { rpc_task_release_client(task); task->tk_client = clnt; - kref_get(&clnt->cl_kref); + atomic_inc(&clnt->cl_count); if (clnt->cl_softrtry) task->tk_flags |= RPC_TASK_SOFT; /* Add to the client's list of all tasks */ diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 41a762f82630..8c8eef2b8f26 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -371,21 +371,23 @@ rpc_show_info(struct seq_file *m, void *v) static int rpc_info_open(struct inode *inode, struct file *file) { - struct rpc_clnt *clnt; + struct rpc_clnt *clnt = NULL; int ret = single_open(file, rpc_show_info, NULL); if (!ret) { struct seq_file *m = file->private_data; - mutex_lock(&inode->i_mutex); - clnt = RPC_I(inode)->private; - if (clnt) { - kref_get(&clnt->cl_kref); + + spin_lock(&file->f_path.dentry->d_lock); + if (!d_unhashed(file->f_path.dentry)) + clnt = RPC_I(inode)->private; + if (clnt != NULL && atomic_inc_not_zero(&clnt->cl_count)) { + spin_unlock(&file->f_path.dentry->d_lock); m->private = clnt; } else { + spin_unlock(&file->f_path.dentry->d_lock); single_release(inode, file); ret = -EINVAL; } - mutex_unlock(&inode->i_mutex); } return ret; } -- cgit v1.2.3 From c54fce6eff197d9c57c97afbf6c9722ce434fc8f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 10 Sep 2010 16:51:36 +0200 Subject: workqueue: add documentation Update copyright notice and add Documentation/workqueue.txt. Randy Dunlap, Dave Chinner: misc fixes. Signed-off-by: Tejun Heo Reviewed-By: Florian Mickler Cc: Ingo Molnar Cc: Christoph Lameter Cc: Randy Dunlap Cc: Dave Chinner --- Documentation/workqueue.txt | 380 ++++++++++++++++++++++++++++++++++++++++++++ include/linux/workqueue.h | 4 + kernel/workqueue.c | 27 ++-- 3 files changed, 401 insertions(+), 10 deletions(-) create mode 100644 Documentation/workqueue.txt (limited to 'include') diff --git a/Documentation/workqueue.txt b/Documentation/workqueue.txt new file mode 100644 index 000000000000..e4498a2872c3 --- /dev/null +++ b/Documentation/workqueue.txt @@ -0,0 +1,380 @@ + +Concurrency Managed Workqueue (cmwq) + +September, 2010 Tejun Heo + Florian Mickler + +CONTENTS + +1. Introduction +2. Why cmwq? +3. The Design +4. Application Programming Interface (API) +5. Example Execution Scenarios +6. Guidelines + + +1. Introduction + +There are many cases where an asynchronous process execution context +is needed and the workqueue (wq) API is the most commonly used +mechanism for such cases. + +When such an asynchronous execution context is needed, a work item +describing which function to execute is put on a queue. An +independent thread serves as the asynchronous execution context. The +queue is called workqueue and the thread is called worker. + +While there are work items on the workqueue the worker executes the +functions associated with the work items one after the other. When +there is no work item left on the workqueue the worker becomes idle. +When a new work item gets queued, the worker begins executing again. + + +2. Why cmwq? + +In the original wq implementation, a multi threaded (MT) wq had one +worker thread per CPU and a single threaded (ST) wq had one worker +thread system-wide. A single MT wq needed to keep around the same +number of workers as the number of CPUs. The kernel grew a lot of MT +wq users over the years and with the number of CPU cores continuously +rising, some systems saturated the default 32k PID space just booting +up. + +Although MT wq wasted a lot of resource, the level of concurrency +provided was unsatisfactory. The limitation was common to both ST and +MT wq albeit less severe on MT. Each wq maintained its own separate +worker pool. A MT wq could provide only one execution context per CPU +while a ST wq one for the whole system. Work items had to compete for +those very limited execution contexts leading to various problems +including proneness to deadlocks around the single execution context. + +The tension between the provided level of concurrency and resource +usage also forced its users to make unnecessary tradeoffs like libata +choosing to use ST wq for polling PIOs and accepting an unnecessary +limitation that no two polling PIOs can progress at the same time. As +MT wq don't provide much better concurrency, users which require +higher level of concurrency, like async or fscache, had to implement +their own thread pool. + +Concurrency Managed Workqueue (cmwq) is a reimplementation of wq with +focus on the following goals. + +* Maintain compatibility with the original workqueue API. + +* Use per-CPU unified worker pools shared by all wq to provide + flexible level of concurrency on demand without wasting a lot of + resource. + +* Automatically regulate worker pool and level of concurrency so that + the API users don't need to worry about such details. + + +3. The Design + +In order to ease the asynchronous execution of functions a new +abstraction, the work item, is introduced. + +A work item is a simple struct that holds a pointer to the function +that is to be executed asynchronously. Whenever a driver or subsystem +wants a function to be executed asynchronously it has to set up a work +item pointing to that function and queue that work item on a +workqueue. + +Special purpose threads, called worker threads, execute the functions +off of the queue, one after the other. If no work is queued, the +worker threads become idle. These worker threads are managed in so +called thread-pools. + +The cmwq design differentiates between the user-facing workqueues that +subsystems and drivers queue work items on and the backend mechanism +which manages thread-pool and processes the queued work items. + +The backend is called gcwq. There is one gcwq for each possible CPU +and one gcwq to serve work items queued on unbound workqueues. + +Subsystems and drivers can create and queue work items through special +workqueue API functions as they see fit. They can influence some +aspects of the way the work items are executed by setting flags on the +workqueue they are putting the work item on. These flags include +things like CPU locality, reentrancy, concurrency limits and more. To +get a detailed overview refer to the API description of +alloc_workqueue() below. + +When a work item is queued to a workqueue, the target gcwq is +determined according to the queue parameters and workqueue attributes +and appended on the shared worklist of the gcwq. For example, unless +specifically overridden, a work item of a bound workqueue will be +queued on the worklist of exactly that gcwq that is associated to the +CPU the issuer is running on. + +For any worker pool implementation, managing the concurrency level +(how many execution contexts are active) is an important issue. cmwq +tries to keep the concurrency at a minimal but sufficient level. +Minimal to save resources and sufficient in that the system is used at +its full capacity. + +Each gcwq bound to an actual CPU implements concurrency management by +hooking into the scheduler. The gcwq is notified whenever an active +worker wakes up or sleeps and keeps track of the number of the +currently runnable workers. Generally, work items are not expected to +hog a CPU and consume many cycles. That means maintaining just enough +concurrency to prevent work processing from stalling should be +optimal. As long as there are one or more runnable workers on the +CPU, the gcwq doesn't start execution of a new work, but, when the +last running worker goes to sleep, it immediately schedules a new +worker so that the CPU doesn't sit idle while there are pending work +items. This allows using a minimal number of workers without losing +execution bandwidth. + +Keeping idle workers around doesn't cost other than the memory space +for kthreads, so cmwq holds onto idle ones for a while before killing +them. + +For an unbound wq, the above concurrency management doesn't apply and +the gcwq for the pseudo unbound CPU tries to start executing all work +items as soon as possible. The responsibility of regulating +concurrency level is on the users. There is also a flag to mark a +bound wq to ignore the concurrency management. Please refer to the +API section for details. + +Forward progress guarantee relies on that workers can be created when +more execution contexts are necessary, which in turn is guaranteed +through the use of rescue workers. All work items which might be used +on code paths that handle memory reclaim are required to be queued on +wq's that have a rescue-worker reserved for execution under memory +pressure. Else it is possible that the thread-pool deadlocks waiting +for execution contexts to free up. + + +4. Application Programming Interface (API) + +alloc_workqueue() allocates a wq. The original create_*workqueue() +functions are deprecated and scheduled for removal. alloc_workqueue() +takes three arguments - @name, @flags and @max_active. @name is the +name of the wq and also used as the name of the rescuer thread if +there is one. + +A wq no longer manages execution resources but serves as a domain for +forward progress guarantee, flush and work item attributes. @flags +and @max_active control how work items are assigned execution +resources, scheduled and executed. + +@flags: + + WQ_NON_REENTRANT + + By default, a wq guarantees non-reentrance only on the same + CPU. A work item may not be executed concurrently on the same + CPU by multiple workers but is allowed to be executed + concurrently on multiple CPUs. This flag makes sure + non-reentrance is enforced across all CPUs. Work items queued + to a non-reentrant wq are guaranteed to be executed by at most + one worker system-wide at any given time. + + WQ_UNBOUND + + Work items queued to an unbound wq are served by a special + gcwq which hosts workers which are not bound to any specific + CPU. This makes the wq behave as a simple execution context + provider without concurrency management. The unbound gcwq + tries to start execution of work items as soon as possible. + Unbound wq sacrifices locality but is useful for the following + cases. + + * Wide fluctuation in the concurrency level requirement is + expected and using bound wq may end up creating large number + of mostly unused workers across different CPUs as the issuer + hops through different CPUs. + + * Long running CPU intensive workloads which can be better + managed by the system scheduler. + + WQ_FREEZEABLE + + A freezeable wq participates in the freeze phase of the system + suspend operations. Work items on the wq are drained and no + new work item starts execution until thawed. + + WQ_RESCUER + + All wq which might be used in the memory reclaim paths _MUST_ + have this flag set. This reserves one worker exclusively for + the execution of this wq under memory pressure. + + WQ_HIGHPRI + + Work items of a highpri wq are queued at the head of the + worklist of the target gcwq and start execution regardless of + the current concurrency level. In other words, highpri work + items will always start execution as soon as execution + resource is available. + + Ordering among highpri work items is preserved - a highpri + work item queued after another highpri work item will start + execution after the earlier highpri work item starts. + + Although highpri work items are not held back by other + runnable work items, they still contribute to the concurrency + level. Highpri work items in runnable state will prevent + non-highpri work items from starting execution. + + This flag is meaningless for unbound wq. + + WQ_CPU_INTENSIVE + + Work items of a CPU intensive wq do not contribute to the + concurrency level. In other words, runnable CPU intensive + work items will not prevent other work items from starting + execution. This is useful for bound work items which are + expected to hog CPU cycles so that their execution is + regulated by the system scheduler. + + Although CPU intensive work items don't contribute to the + concurrency level, start of their executions is still + regulated by the concurrency management and runnable + non-CPU-intensive work items can delay execution of CPU + intensive work items. + + This flag is meaningless for unbound wq. + + WQ_HIGHPRI | WQ_CPU_INTENSIVE + + This combination makes the wq avoid interaction with + concurrency management completely and behave as a simple + per-CPU execution context provider. Work items queued on a + highpri CPU-intensive wq start execution as soon as resources + are available and don't affect execution of other work items. + +@max_active: + +@max_active determines the maximum number of execution contexts per +CPU which can be assigned to the work items of a wq. For example, +with @max_active of 16, at most 16 work items of the wq can be +executing at the same time per CPU. + +Currently, for a bound wq, the maximum limit for @max_active is 512 +and the default value used when 0 is specified is 256. For an unbound +wq, the limit is higher of 512 and 4 * num_possible_cpus(). These +values are chosen sufficiently high such that they are not the +limiting factor while providing protection in runaway cases. + +The number of active work items of a wq is usually regulated by the +users of the wq, more specifically, by how many work items the users +may queue at the same time. Unless there is a specific need for +throttling the number of active work items, specifying '0' is +recommended. + +Some users depend on the strict execution ordering of ST wq. The +combination of @max_active of 1 and WQ_UNBOUND is used to achieve this +behavior. Work items on such wq are always queued to the unbound gcwq +and only one work item can be active at any given time thus achieving +the same ordering property as ST wq. + + +5. Example Execution Scenarios + +The following example execution scenarios try to illustrate how cmwq +behave under different configurations. + + Work items w0, w1, w2 are queued to a bound wq q0 on the same CPU. + w0 burns CPU for 5ms then sleeps for 10ms then burns CPU for 5ms + again before finishing. w1 and w2 burn CPU for 5ms then sleep for + 10ms. + +Ignoring all other tasks, works and processing overhead, and assuming +simple FIFO scheduling, the following is one highly simplified version +of possible sequences of events with the original wq. + + TIME IN MSECS EVENT + 0 w0 starts and burns CPU + 5 w0 sleeps + 15 w0 wakes up and burns CPU + 20 w0 finishes + 20 w1 starts and burns CPU + 25 w1 sleeps + 35 w1 wakes up and finishes + 35 w2 starts and burns CPU + 40 w2 sleeps + 50 w2 wakes up and finishes + +And with cmwq with @max_active >= 3, + + TIME IN MSECS EVENT + 0 w0 starts and burns CPU + 5 w0 sleeps + 5 w1 starts and burns CPU + 10 w1 sleeps + 10 w2 starts and burns CPU + 15 w2 sleeps + 15 w0 wakes up and burns CPU + 20 w0 finishes + 20 w1 wakes up and finishes + 25 w2 wakes up and finishes + +If @max_active == 2, + + TIME IN MSECS EVENT + 0 w0 starts and burns CPU + 5 w0 sleeps + 5 w1 starts and burns CPU + 10 w1 sleeps + 15 w0 wakes up and burns CPU + 20 w0 finishes + 20 w1 wakes up and finishes + 20 w2 starts and burns CPU + 25 w2 sleeps + 35 w2 wakes up and finishes + +Now, let's assume w1 and w2 are queued to a different wq q1 which has +WQ_HIGHPRI set, + + TIME IN MSECS EVENT + 0 w1 and w2 start and burn CPU + 5 w1 sleeps + 10 w2 sleeps + 10 w0 starts and burns CPU + 15 w0 sleeps + 15 w1 wakes up and finishes + 20 w2 wakes up and finishes + 25 w0 wakes up and burns CPU + 30 w0 finishes + +If q1 has WQ_CPU_INTENSIVE set, + + TIME IN MSECS EVENT + 0 w0 starts and burns CPU + 5 w0 sleeps + 5 w1 and w2 start and burn CPU + 10 w1 sleeps + 15 w2 sleeps + 15 w0 wakes up and burns CPU + 20 w0 finishes + 20 w1 wakes up and finishes + 25 w2 wakes up and finishes + + +6. Guidelines + +* Do not forget to use WQ_RESCUER if a wq may process work items which + are used during memory reclaim. Each wq with WQ_RESCUER set has one + rescuer thread reserved for it. If there is dependency among + multiple work items used during memory reclaim, they should be + queued to separate wq each with WQ_RESCUER. + +* Unless strict ordering is required, there is no need to use ST wq. + +* Unless there is a specific need, using 0 for @max_active is + recommended. In most use cases, concurrency level usually stays + well under the default limit. + +* A wq serves as a domain for forward progress guarantee (WQ_RESCUER), + flush and work item attributes. Work items which are not involved + in memory reclaim and don't need to be flushed as a part of a group + of work items, and don't require any special attribute, can use one + of the system wq. There is no difference in execution + characteristics between using a dedicated wq and a system wq. + +* Unless work items are expected to consume a huge amount of CPU + cycles, using a bound wq is usually beneficial due to the increased + level of locality in wq operations and work item execution. diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index f11100f96482..25e02c941bac 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -235,6 +235,10 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } #define work_clear_pending(work) \ clear_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)) +/* + * Workqueue flags and constants. For details, please refer to + * Documentation/workqueue.txt. + */ enum { WQ_NON_REENTRANT = 1 << 0, /* guarantee non-reentrance */ WQ_UNBOUND = 1 << 1, /* not bound to any cpu */ diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 727f24e563ae..f77afd939229 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1,19 +1,26 @@ /* - * linux/kernel/workqueue.c + * kernel/workqueue.c - generic async execution with shared worker pool * - * Generic mechanism for defining kernel helper threads for running - * arbitrary tasks in process context. + * Copyright (C) 2002 Ingo Molnar * - * Started by Ingo Molnar, Copyright (C) 2002 + * Derived from the taskqueue/keventd code by: + * David Woodhouse + * Andrew Morton + * Kai Petzke + * Theodore Ts'o * - * Derived from the taskqueue/keventd code by: + * Made to use alloc_percpu by Christoph Lameter. * - * David Woodhouse - * Andrew Morton - * Kai Petzke - * Theodore Ts'o + * Copyright (C) 2010 SUSE Linux Products GmbH + * Copyright (C) 2010 Tejun Heo * - * Made to use alloc_percpu by Christoph Lameter. + * This is the generic async execution mechanism. Work items as are + * executed in process context. The worker pool is shared and + * automatically managed. There is one worker pool for each CPU and + * one extra for works which are better served by workers which are + * not bound to any specific CPU. + * + * Please read Documentation/workqueue.txt for details. */ #include -- cgit v1.2.3 From 7b334fcb45b757ffb093696ca3de1b0c8b4a33f1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 9 Sep 2010 23:51:02 +0100 Subject: drm: Use a nondestructive mode for output detect when polling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Destructive load-detection is very expensive and due to failings elsewhere can trigger system wide stalls of up to 600ms. A simple first step to correcting this is not to invoke such an expensive and destructive load-detection operation automatically. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=29536 Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=16265 Reported-by: Bruno Prémont Tested-by: Sitsofe Wheeler Signed-off-by: Chris Wilson Cc: stable@kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_crtc_helper.c | 4 ++-- drivers/gpu/drm/drm_sysfs.c | 2 +- drivers/gpu/drm/i915/intel_crt.c | 7 ++++++- drivers/gpu/drm/i915/intel_dp.c | 3 ++- drivers/gpu/drm/i915/intel_dvo.c | 4 +++- drivers/gpu/drm/i915/intel_hdmi.c | 3 ++- drivers/gpu/drm/i915/intel_lvds.c | 8 ++++++-- drivers/gpu/drm/i915/intel_sdvo.c | 6 ++++-- drivers/gpu/drm/i915/intel_tv.c | 12 ++++++------ drivers/gpu/drm/nouveau/nouveau_connector.c | 8 +++++--- drivers/gpu/drm/radeon/radeon_connectors.c | 20 +++++++++++++++----- drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c | 3 ++- include/drm/drm_crtc.h | 3 ++- 13 files changed, 56 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c index de152a58967d..fb6b70fc6572 100644 --- a/drivers/gpu/drm/drm_crtc_helper.c +++ b/drivers/gpu/drm/drm_crtc_helper.c @@ -103,7 +103,7 @@ int drm_helper_probe_single_connector_modes(struct drm_connector *connector, if (connector->funcs->force) connector->funcs->force(connector); } else { - connector->status = connector->funcs->detect(connector); + connector->status = connector->funcs->detect(connector, false); drm_kms_helper_poll_enable(dev); } @@ -866,7 +866,7 @@ static void output_poll_execute(struct work_struct *work) !(connector->polled & DRM_CONNECTOR_POLL_HPD)) continue; - status = connector->funcs->detect(connector); + status = connector->funcs->detect(connector, true); if (old_status != status) changed = true; } diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c index 86118a742231..85da4c40694c 100644 --- a/drivers/gpu/drm/drm_sysfs.c +++ b/drivers/gpu/drm/drm_sysfs.c @@ -159,7 +159,7 @@ static ssize_t status_show(struct device *device, struct drm_connector *connector = to_drm_connector(device); enum drm_connector_status status; - status = connector->funcs->detect(connector); + status = connector->funcs->detect(connector, true); return snprintf(buf, PAGE_SIZE, "%s\n", drm_get_connector_status_name(status)); } diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 4b7735196cd5..0350e5d711f8 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -400,7 +400,9 @@ intel_crt_load_detect(struct drm_crtc *crtc, struct intel_encoder *intel_encoder return status; } -static enum drm_connector_status intel_crt_detect(struct drm_connector *connector) +static enum drm_connector_status +intel_crt_detect(struct drm_connector *connector, + bool nondestructive) { struct drm_device *dev = connector->dev; struct drm_encoder *encoder = intel_attached_encoder(connector); @@ -419,6 +421,9 @@ static enum drm_connector_status intel_crt_detect(struct drm_connector *connecto if (intel_crt_detect_ddc(encoder)) return connector_status_connected; + if (nondestructive) + return connector->status; + /* for pre-945g platforms use load detect */ if (encoder->crtc && encoder->crtc->enabled) { status = intel_crt_load_detect(encoder->crtc, intel_encoder); diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index 51d142939a26..e1a2a05fb838 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1386,7 +1386,8 @@ ironlake_dp_detect(struct drm_connector *connector) * \return false if DP port is disconnected. */ static enum drm_connector_status -intel_dp_detect(struct drm_connector *connector) +intel_dp_detect(struct drm_connector *connector, + bool nondestructive) { struct drm_encoder *encoder = intel_attached_encoder(connector); struct intel_dp *intel_dp = enc_to_intel_dp(encoder); diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c index a399f4b2c1c5..f0de1addf8a4 100644 --- a/drivers/gpu/drm/i915/intel_dvo.c +++ b/drivers/gpu/drm/i915/intel_dvo.c @@ -221,7 +221,9 @@ static void intel_dvo_mode_set(struct drm_encoder *encoder, * * Unimplemented. */ -static enum drm_connector_status intel_dvo_detect(struct drm_connector *connector) +static enum drm_connector_status +intel_dvo_detect(struct drm_connector *connector, + bool nondestructive) { struct drm_encoder *encoder = intel_attached_encoder(connector); struct intel_dvo *intel_dvo = enc_to_intel_dvo(encoder); diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index ccd4c97e6524..2ea123d8d22b 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -139,7 +139,8 @@ static bool intel_hdmi_mode_fixup(struct drm_encoder *encoder, } static enum drm_connector_status -intel_hdmi_detect(struct drm_connector *connector) +intel_hdmi_detect(struct drm_connector *connector, + bool nondestructive) { struct drm_encoder *encoder = intel_attached_encoder(connector); struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index 4fbb0165b26f..fb1bed8f4071 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -445,7 +445,9 @@ static void intel_lvds_mode_set(struct drm_encoder *encoder, * connected and closed means disconnected. We also send hotplug events as * needed, using lid status notification from the input layer. */ -static enum drm_connector_status intel_lvds_detect(struct drm_connector *connector) +static enum drm_connector_status +intel_lvds_detect(struct drm_connector *connector, + bool nondestructive) { struct drm_device *dev = connector->dev; enum drm_connector_status status = connector_status_connected; @@ -540,7 +542,9 @@ static int intel_lid_notify(struct notifier_block *nb, unsigned long val, * the LID nofication event. */ if (connector) - connector->status = connector->funcs->detect(connector); + connector->status = connector->funcs->detect(connector, + true); + /* Don't force modeset on machines where it causes a GPU lockup */ if (dmi_check_system(intel_no_modeset_on_lid)) return NOTIFY_OK; diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index e3b7a7ee39cb..db6b6d4b8fae 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -1417,7 +1417,7 @@ intel_analog_is_connected(struct drm_device *dev) if (!analog_connector) return false; - if (analog_connector->funcs->detect(analog_connector) == + if (analog_connector->funcs->detect(analog_connector, true) == connector_status_disconnected) return false; @@ -1486,7 +1486,9 @@ intel_sdvo_hdmi_sink_detect(struct drm_connector *connector) return status; } -static enum drm_connector_status intel_sdvo_detect(struct drm_connector *connector) +static enum drm_connector_status +intel_sdvo_detect(struct drm_connector *connector, + bool nondestructive) { uint16_t response; struct drm_encoder *encoder = intel_attached_encoder(connector); diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index c671f60ce80b..d20b550c0f55 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -1341,7 +1341,8 @@ static void intel_tv_find_better_format(struct drm_connector *connector) * we have a pipe programmed in order to probe the TV. */ static enum drm_connector_status -intel_tv_detect(struct drm_connector *connector) +intel_tv_detect(struct drm_connector *connector, + bool nondestructive) { struct drm_display_mode mode; struct drm_encoder *encoder = intel_attached_encoder(connector); @@ -1353,7 +1354,7 @@ intel_tv_detect(struct drm_connector *connector) if (encoder->crtc && encoder->crtc->enabled) { type = intel_tv_detect_type(intel_tv); - } else { + } else if (nondestructive) { struct drm_crtc *crtc; int dpms_mode; @@ -1364,10 +1365,9 @@ intel_tv_detect(struct drm_connector *connector) intel_release_load_detect_pipe(&intel_tv->base, connector, dpms_mode); } else - type = -1; - } - - intel_tv->type = type; + return connector_status_unknown; + } else + return connector->status; if (type < 0) return connector_status_disconnected; diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index a1473fff06ac..67d515cb67e0 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -168,7 +168,8 @@ nouveau_connector_set_encoder(struct drm_connector *connector, } static enum drm_connector_status -nouveau_connector_detect(struct drm_connector *connector) +nouveau_connector_detect(struct drm_connector *connector, + bool nondestructive) { struct drm_device *dev = connector->dev; struct nouveau_connector *nv_connector = nouveau_connector(connector); @@ -246,7 +247,8 @@ detect_analog: } static enum drm_connector_status -nouveau_connector_detect_lvds(struct drm_connector *connector) +nouveau_connector_detect_lvds(struct drm_connector *connector, + bool nondestructive) { struct drm_device *dev = connector->dev; struct drm_nouveau_private *dev_priv = dev->dev_private; @@ -267,7 +269,7 @@ nouveau_connector_detect_lvds(struct drm_connector *connector) /* Try retrieving EDID via DDC */ if (!dev_priv->vbios.fp_no_ddc) { - status = nouveau_connector_detect(connector); + status = nouveau_connector_detect(connector, nondestructive); if (status == connector_status_connected) goto out; } diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index a9dd7847d96e..31d309a8e75b 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -481,7 +481,9 @@ static int radeon_lvds_mode_valid(struct drm_connector *connector, return MODE_OK; } -static enum drm_connector_status radeon_lvds_detect(struct drm_connector *connector) +static enum drm_connector_status +radeon_lvds_detect(struct drm_connector *connector, + bool nondestructive) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); struct drm_encoder *encoder = radeon_best_single_encoder(connector); @@ -594,7 +596,9 @@ static int radeon_vga_mode_valid(struct drm_connector *connector, return MODE_OK; } -static enum drm_connector_status radeon_vga_detect(struct drm_connector *connector) +static enum drm_connector_status +radeon_vga_detect(struct drm_connector *connector, + bool nondestructive) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); struct drm_encoder *encoder; @@ -691,7 +695,9 @@ static int radeon_tv_mode_valid(struct drm_connector *connector, return MODE_OK; } -static enum drm_connector_status radeon_tv_detect(struct drm_connector *connector) +static enum drm_connector_status +radeon_tv_detect(struct drm_connector *connector, + bool nondestructive) { struct drm_encoder *encoder; struct drm_encoder_helper_funcs *encoder_funcs; @@ -748,7 +754,9 @@ static int radeon_dvi_get_modes(struct drm_connector *connector) * we have to check if this analog encoder is shared with anyone else (TV) * if its shared we have to set the other connector to disconnected. */ -static enum drm_connector_status radeon_dvi_detect(struct drm_connector *connector) +static enum drm_connector_status +radeon_dvi_detect(struct drm_connector *connector, + bool nondestructive) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); struct drm_encoder *encoder = NULL; @@ -972,7 +980,9 @@ static int radeon_dp_get_modes(struct drm_connector *connector) return ret; } -static enum drm_connector_status radeon_dp_detect(struct drm_connector *connector) +static enum drm_connector_status +radeon_dp_detect(struct drm_connector *connector, + bool nondestructive) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); enum drm_connector_status ret = connector_status_disconnected; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c index 2ff5cf78235f..a527c91c0ba6 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c @@ -335,7 +335,8 @@ static void vmw_ldu_connector_restore(struct drm_connector *connector) } static enum drm_connector_status - vmw_ldu_connector_detect(struct drm_connector *connector) + vmw_ldu_connector_detect(struct drm_connector *connector, + bool nondestructive) { if (vmw_connector_to_ldu(connector)->pref_active) return connector_status_connected; diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index c9f3cc5949a8..5536223fbac8 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -386,7 +386,8 @@ struct drm_connector_funcs { void (*dpms)(struct drm_connector *connector, int mode); void (*save)(struct drm_connector *connector); void (*restore)(struct drm_connector *connector); - enum drm_connector_status (*detect)(struct drm_connector *connector); + enum drm_connector_status (*detect)(struct drm_connector *connector, + bool nondestructive); int (*fill_modes)(struct drm_connector *connector, uint32_t max_width, uint32_t max_height); int (*set_property)(struct drm_connector *connector, struct drm_property *property, uint64_t val); -- cgit v1.2.3 From 930a9e283516a3a3595c0c515113f1b78d07f695 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 14 Sep 2010 11:07:23 +0100 Subject: drm: Use a nondestructive mode for output detect when polling (v2) v2: Julien Cristau pointed out that @nondestructive results in double-negatives and confusion when trying to interpret the parameter, so use @force instead. Much easier to type as well. ;-) And fix the miscompilation of vmgfx reported by Sedat Dilek. Signed-off-by: Chris Wilson Cc: stable@kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_crtc_helper.c | 4 ++-- drivers/gpu/drm/i915/intel_crt.c | 5 ++--- drivers/gpu/drm/i915/intel_dp.c | 3 +-- drivers/gpu/drm/i915/intel_dvo.c | 3 +-- drivers/gpu/drm/i915/intel_hdmi.c | 3 +-- drivers/gpu/drm/i915/intel_lvds.c | 5 ++--- drivers/gpu/drm/i915/intel_sdvo.c | 5 ++--- drivers/gpu/drm/i915/intel_tv.c | 5 ++--- drivers/gpu/drm/nouveau/nouveau_connector.c | 8 +++----- drivers/gpu/drm/radeon/radeon_connectors.c | 15 +++++---------- drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c | 6 +++--- include/drm/drm_crtc.h | 9 ++++++++- 12 files changed, 32 insertions(+), 39 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c index fb6b70fc6572..dcbeb98f195a 100644 --- a/drivers/gpu/drm/drm_crtc_helper.c +++ b/drivers/gpu/drm/drm_crtc_helper.c @@ -103,7 +103,7 @@ int drm_helper_probe_single_connector_modes(struct drm_connector *connector, if (connector->funcs->force) connector->funcs->force(connector); } else { - connector->status = connector->funcs->detect(connector, false); + connector->status = connector->funcs->detect(connector, true); drm_kms_helper_poll_enable(dev); } @@ -866,7 +866,7 @@ static void output_poll_execute(struct work_struct *work) !(connector->polled & DRM_CONNECTOR_POLL_HPD)) continue; - status = connector->funcs->detect(connector, true); + status = connector->funcs->detect(connector, false); if (old_status != status) changed = true; } diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c index 0350e5d711f8..a02a8df73727 100644 --- a/drivers/gpu/drm/i915/intel_crt.c +++ b/drivers/gpu/drm/i915/intel_crt.c @@ -401,8 +401,7 @@ intel_crt_load_detect(struct drm_crtc *crtc, struct intel_encoder *intel_encoder } static enum drm_connector_status -intel_crt_detect(struct drm_connector *connector, - bool nondestructive) +intel_crt_detect(struct drm_connector *connector, bool force) { struct drm_device *dev = connector->dev; struct drm_encoder *encoder = intel_attached_encoder(connector); @@ -421,7 +420,7 @@ intel_crt_detect(struct drm_connector *connector, if (intel_crt_detect_ddc(encoder)) return connector_status_connected; - if (nondestructive) + if (!force) return connector->status; /* for pre-945g platforms use load detect */ diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c index e1a2a05fb838..1a51ee07de3e 100644 --- a/drivers/gpu/drm/i915/intel_dp.c +++ b/drivers/gpu/drm/i915/intel_dp.c @@ -1386,8 +1386,7 @@ ironlake_dp_detect(struct drm_connector *connector) * \return false if DP port is disconnected. */ static enum drm_connector_status -intel_dp_detect(struct drm_connector *connector, - bool nondestructive) +intel_dp_detect(struct drm_connector *connector, bool force) { struct drm_encoder *encoder = intel_attached_encoder(connector); struct intel_dp *intel_dp = enc_to_intel_dp(encoder); diff --git a/drivers/gpu/drm/i915/intel_dvo.c b/drivers/gpu/drm/i915/intel_dvo.c index f0de1addf8a4..7c9ec1472d46 100644 --- a/drivers/gpu/drm/i915/intel_dvo.c +++ b/drivers/gpu/drm/i915/intel_dvo.c @@ -222,8 +222,7 @@ static void intel_dvo_mode_set(struct drm_encoder *encoder, * Unimplemented. */ static enum drm_connector_status -intel_dvo_detect(struct drm_connector *connector, - bool nondestructive) +intel_dvo_detect(struct drm_connector *connector, bool force) { struct drm_encoder *encoder = intel_attached_encoder(connector); struct intel_dvo *intel_dvo = enc_to_intel_dvo(encoder); diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c index 2ea123d8d22b..926934a482ec 100644 --- a/drivers/gpu/drm/i915/intel_hdmi.c +++ b/drivers/gpu/drm/i915/intel_hdmi.c @@ -139,8 +139,7 @@ static bool intel_hdmi_mode_fixup(struct drm_encoder *encoder, } static enum drm_connector_status -intel_hdmi_detect(struct drm_connector *connector, - bool nondestructive) +intel_hdmi_detect(struct drm_connector *connector, bool force) { struct drm_encoder *encoder = intel_attached_encoder(connector); struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(encoder); diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c index fb1bed8f4071..6ec39a86ed06 100644 --- a/drivers/gpu/drm/i915/intel_lvds.c +++ b/drivers/gpu/drm/i915/intel_lvds.c @@ -446,8 +446,7 @@ static void intel_lvds_mode_set(struct drm_encoder *encoder, * needed, using lid status notification from the input layer. */ static enum drm_connector_status -intel_lvds_detect(struct drm_connector *connector, - bool nondestructive) +intel_lvds_detect(struct drm_connector *connector, bool force) { struct drm_device *dev = connector->dev; enum drm_connector_status status = connector_status_connected; @@ -543,7 +542,7 @@ static int intel_lid_notify(struct notifier_block *nb, unsigned long val, */ if (connector) connector->status = connector->funcs->detect(connector, - true); + false); /* Don't force modeset on machines where it causes a GPU lockup */ if (dmi_check_system(intel_no_modeset_on_lid)) diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c index db6b6d4b8fae..e8e902d614ed 100644 --- a/drivers/gpu/drm/i915/intel_sdvo.c +++ b/drivers/gpu/drm/i915/intel_sdvo.c @@ -1417,7 +1417,7 @@ intel_analog_is_connected(struct drm_device *dev) if (!analog_connector) return false; - if (analog_connector->funcs->detect(analog_connector, true) == + if (analog_connector->funcs->detect(analog_connector, false) == connector_status_disconnected) return false; @@ -1487,8 +1487,7 @@ intel_sdvo_hdmi_sink_detect(struct drm_connector *connector) } static enum drm_connector_status -intel_sdvo_detect(struct drm_connector *connector, - bool nondestructive) +intel_sdvo_detect(struct drm_connector *connector, bool force) { uint16_t response; struct drm_encoder *encoder = intel_attached_encoder(connector); diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c index d20b550c0f55..4a117e318a73 100644 --- a/drivers/gpu/drm/i915/intel_tv.c +++ b/drivers/gpu/drm/i915/intel_tv.c @@ -1341,8 +1341,7 @@ static void intel_tv_find_better_format(struct drm_connector *connector) * we have a pipe programmed in order to probe the TV. */ static enum drm_connector_status -intel_tv_detect(struct drm_connector *connector, - bool nondestructive) +intel_tv_detect(struct drm_connector *connector, bool force) { struct drm_display_mode mode; struct drm_encoder *encoder = intel_attached_encoder(connector); @@ -1354,7 +1353,7 @@ intel_tv_detect(struct drm_connector *connector, if (encoder->crtc && encoder->crtc->enabled) { type = intel_tv_detect_type(intel_tv); - } else if (nondestructive) { + } else if (force) { struct drm_crtc *crtc; int dpms_mode; diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c index 67d515cb67e0..87186a4bbf03 100644 --- a/drivers/gpu/drm/nouveau/nouveau_connector.c +++ b/drivers/gpu/drm/nouveau/nouveau_connector.c @@ -168,8 +168,7 @@ nouveau_connector_set_encoder(struct drm_connector *connector, } static enum drm_connector_status -nouveau_connector_detect(struct drm_connector *connector, - bool nondestructive) +nouveau_connector_detect(struct drm_connector *connector, bool force) { struct drm_device *dev = connector->dev; struct nouveau_connector *nv_connector = nouveau_connector(connector); @@ -247,8 +246,7 @@ detect_analog: } static enum drm_connector_status -nouveau_connector_detect_lvds(struct drm_connector *connector, - bool nondestructive) +nouveau_connector_detect_lvds(struct drm_connector *connector, bool force) { struct drm_device *dev = connector->dev; struct drm_nouveau_private *dev_priv = dev->dev_private; @@ -269,7 +267,7 @@ nouveau_connector_detect_lvds(struct drm_connector *connector, /* Try retrieving EDID via DDC */ if (!dev_priv->vbios.fp_no_ddc) { - status = nouveau_connector_detect(connector, nondestructive); + status = nouveau_connector_detect(connector, force); if (status == connector_status_connected) goto out; } diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c index 31d309a8e75b..ecc1a8fafbfd 100644 --- a/drivers/gpu/drm/radeon/radeon_connectors.c +++ b/drivers/gpu/drm/radeon/radeon_connectors.c @@ -482,8 +482,7 @@ static int radeon_lvds_mode_valid(struct drm_connector *connector, } static enum drm_connector_status -radeon_lvds_detect(struct drm_connector *connector, - bool nondestructive) +radeon_lvds_detect(struct drm_connector *connector, bool force) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); struct drm_encoder *encoder = radeon_best_single_encoder(connector); @@ -597,8 +596,7 @@ static int radeon_vga_mode_valid(struct drm_connector *connector, } static enum drm_connector_status -radeon_vga_detect(struct drm_connector *connector, - bool nondestructive) +radeon_vga_detect(struct drm_connector *connector, bool force) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); struct drm_encoder *encoder; @@ -696,8 +694,7 @@ static int radeon_tv_mode_valid(struct drm_connector *connector, } static enum drm_connector_status -radeon_tv_detect(struct drm_connector *connector, - bool nondestructive) +radeon_tv_detect(struct drm_connector *connector, bool force) { struct drm_encoder *encoder; struct drm_encoder_helper_funcs *encoder_funcs; @@ -755,8 +752,7 @@ static int radeon_dvi_get_modes(struct drm_connector *connector) * if its shared we have to set the other connector to disconnected. */ static enum drm_connector_status -radeon_dvi_detect(struct drm_connector *connector, - bool nondestructive) +radeon_dvi_detect(struct drm_connector *connector, bool force) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); struct drm_encoder *encoder = NULL; @@ -981,8 +977,7 @@ static int radeon_dp_get_modes(struct drm_connector *connector) } static enum drm_connector_status -radeon_dp_detect(struct drm_connector *connector, - bool nondestructive) +radeon_dp_detect(struct drm_connector *connector, bool force) { struct radeon_connector *radeon_connector = to_radeon_connector(connector); enum drm_connector_status ret = connector_status_disconnected; diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c index a527c91c0ba6..7083b1a24df3 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ldu.c @@ -336,7 +336,7 @@ static void vmw_ldu_connector_restore(struct drm_connector *connector) static enum drm_connector_status vmw_ldu_connector_detect(struct drm_connector *connector, - bool nondestructive) + bool force) { if (vmw_connector_to_ldu(connector)->pref_active) return connector_status_connected; @@ -517,7 +517,7 @@ static int vmw_ldu_init(struct vmw_private *dev_priv, unsigned unit) drm_connector_init(dev, connector, &vmw_legacy_connector_funcs, DRM_MODE_CONNECTOR_LVDS); - connector->status = vmw_ldu_connector_detect(connector); + connector->status = vmw_ldu_connector_detect(connector, true); drm_encoder_init(dev, encoder, &vmw_legacy_encoder_funcs, DRM_MODE_ENCODER_LVDS); @@ -611,7 +611,7 @@ int vmw_kms_ldu_update_layout(struct vmw_private *dev_priv, unsigned num, ldu->pref_height = 600; ldu->pref_active = false; } - con->status = vmw_ldu_connector_detect(con); + con->status = vmw_ldu_connector_detect(con, true); } mutex_unlock(&dev->mode_config.mutex); diff --git a/include/drm/drm_crtc.h b/include/drm/drm_crtc.h index 5536223fbac8..3e5a51af757c 100644 --- a/include/drm/drm_crtc.h +++ b/include/drm/drm_crtc.h @@ -386,8 +386,15 @@ struct drm_connector_funcs { void (*dpms)(struct drm_connector *connector, int mode); void (*save)(struct drm_connector *connector); void (*restore)(struct drm_connector *connector); + + /* Check to see if anything is attached to the connector. + * @force is set to false whilst polling, true when checking the + * connector due to user request. @force can be used by the driver + * to avoid expensive, destructive operations during automated + * probing. + */ enum drm_connector_status (*detect)(struct drm_connector *connector, - bool nondestructive); + bool force); int (*fill_modes)(struct drm_connector *connector, uint32_t max_width, uint32_t max_height); int (*set_property)(struct drm_connector *connector, struct drm_property *property, uint64_t val); -- cgit v1.2.3 From c41d68a513c71e35a14f66d71782d27a79a81ea6 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 7 Sep 2010 16:16:18 -0700 Subject: compat: Make compat_alloc_user_space() incorporate the access_ok() compat_alloc_user_space() expects the caller to independently call access_ok() to verify the returned area. A missing call could introduce problems on some architectures. This patch incorporates the access_ok() check into compat_alloc_user_space() and also adds a sanity check on the length. The existing compat_alloc_user_space() implementations are renamed arch_compat_alloc_user_space() and are used as part of the implementation of the new global function. This patch assumes NULL will cause __get_user()/__put_user() to either fail or access userspace on all architectures. This should be followed by checking the return value of compat_access_user_space() for NULL in the callers, at which time the access_ok() in the callers can also be removed. Reported-by: Ben Hawkes Signed-off-by: H. Peter Anvin Acked-by: Benjamin Herrenschmidt Acked-by: Chris Metcalf Acked-by: David S. Miller Acked-by: Ingo Molnar Acked-by: Thomas Gleixner Acked-by: Tony Luck Cc: Andrew Morton Cc: Arnd Bergmann Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Heiko Carstens Cc: Helge Deller Cc: James Bottomley Cc: Kyle McMartin Cc: Martin Schwidefsky Cc: Paul Mackerras Cc: Ralf Baechle Cc: --- arch/ia64/include/asm/compat.h | 2 +- arch/mips/include/asm/compat.h | 2 +- arch/parisc/include/asm/compat.h | 2 +- arch/powerpc/include/asm/compat.h | 2 +- arch/s390/include/asm/compat.h | 2 +- arch/sparc/include/asm/compat.h | 2 +- arch/tile/include/asm/compat.h | 2 +- arch/x86/include/asm/compat.h | 2 +- include/linux/compat.h | 3 +++ kernel/compat.c | 21 +++++++++++++++++++++ 10 files changed, 32 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/arch/ia64/include/asm/compat.h b/arch/ia64/include/asm/compat.h index f90edc85b509..9301a2821615 100644 --- a/arch/ia64/include/asm/compat.h +++ b/arch/ia64/include/asm/compat.h @@ -199,7 +199,7 @@ ptr_to_compat(void __user *uptr) } static __inline__ void __user * -compat_alloc_user_space (long len) +arch_compat_alloc_user_space (long len) { struct pt_regs *regs = task_pt_regs(current); return (void __user *) (((regs->r12 & 0xffffffff) & -16) - len); diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h index 613f6912dfc1..dbc51065df5b 100644 --- a/arch/mips/include/asm/compat.h +++ b/arch/mips/include/asm/compat.h @@ -145,7 +145,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) return (u32)(unsigned long)uptr; } -static inline void __user *compat_alloc_user_space(long len) +static inline void __user *arch_compat_alloc_user_space(long len) { struct pt_regs *regs = (struct pt_regs *) ((unsigned long) current_thread_info() + THREAD_SIZE - 32) - 1; diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h index 02b77baa5da6..efa0b60c63fe 100644 --- a/arch/parisc/include/asm/compat.h +++ b/arch/parisc/include/asm/compat.h @@ -147,7 +147,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) return (u32)(unsigned long)uptr; } -static __inline__ void __user *compat_alloc_user_space(long len) +static __inline__ void __user *arch_compat_alloc_user_space(long len) { struct pt_regs *regs = ¤t->thread.regs; return (void __user *)regs->gr[30]; diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h index 396d21a80058..a11d4eac4f97 100644 --- a/arch/powerpc/include/asm/compat.h +++ b/arch/powerpc/include/asm/compat.h @@ -134,7 +134,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) return (u32)(unsigned long)uptr; } -static inline void __user *compat_alloc_user_space(long len) +static inline void __user *arch_compat_alloc_user_space(long len) { struct pt_regs *regs = current->thread.regs; unsigned long usp = regs->gpr[1]; diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index 104f2007f097..a875c2f542e1 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -181,7 +181,7 @@ static inline int is_compat_task(void) #endif -static inline void __user *compat_alloc_user_space(long len) +static inline void __user *arch_compat_alloc_user_space(long len) { unsigned long stack; diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h index 5016f76ea98a..6f57325bb883 100644 --- a/arch/sparc/include/asm/compat.h +++ b/arch/sparc/include/asm/compat.h @@ -167,7 +167,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) return (u32)(unsigned long)uptr; } -static inline void __user *compat_alloc_user_space(long len) +static inline void __user *arch_compat_alloc_user_space(long len) { struct pt_regs *regs = current_thread_info()->kregs; unsigned long usp = regs->u_regs[UREG_I6]; diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h index 5a34da6cdd79..345d81ce44bb 100644 --- a/arch/tile/include/asm/compat.h +++ b/arch/tile/include/asm/compat.h @@ -195,7 +195,7 @@ static inline unsigned long ptr_to_compat_reg(void __user *uptr) return (long)(int)(long __force)uptr; } -static inline void __user *compat_alloc_user_space(long len) +static inline void __user *arch_compat_alloc_user_space(long len) { struct pt_regs *regs = task_pt_regs(current); return (void __user *)regs->sp - len; diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 306160e58b48..1d9cd27c2920 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -205,7 +205,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) return (u32)(unsigned long)uptr; } -static inline void __user *compat_alloc_user_space(long len) +static inline void __user *arch_compat_alloc_user_space(long len) { struct pt_regs *regs = task_pt_regs(current); return (void __user *)regs->sp - len; diff --git a/include/linux/compat.h b/include/linux/compat.h index 9ddc8780e8db..5778b559d59c 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -360,5 +360,8 @@ extern ssize_t compat_rw_copy_check_uvector(int type, const struct compat_iovec __user *uvector, unsigned long nr_segs, unsigned long fast_segs, struct iovec *fast_pointer, struct iovec **ret_pointer); + +extern void __user *compat_alloc_user_space(unsigned long len); + #endif /* CONFIG_COMPAT */ #endif /* _LINUX_COMPAT_H */ diff --git a/kernel/compat.c b/kernel/compat.c index e167efce8423..c9e2ec0b34a8 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -1126,3 +1126,24 @@ compat_sys_sysinfo(struct compat_sysinfo __user *info) return 0; } + +/* + * Allocate user-space memory for the duration of a single system call, + * in order to marshall parameters inside a compat thunk. + */ +void __user *compat_alloc_user_space(unsigned long len) +{ + void __user *ptr; + + /* If len would occupy more than half of the entire compat space... */ + if (unlikely(len > (((compat_uptr_t)~0) >> 1))) + return NULL; + + ptr = arch_compat_alloc_user_space(len); + + if (unlikely(!access_ok(VERIFY_WRITE, ptr, len))) + return NULL; + + return ptr; +} +EXPORT_SYMBOL_GPL(compat_alloc_user_space); -- cgit v1.2.3 From 8b15575cae7a93a784c3005c42b069edd9ba64dd Mon Sep 17 00:00:00 2001 From: Sage Weil Date: Tue, 21 Sep 2010 14:35:37 -0700 Subject: fs: {lock,unlock}_flocks() stubs to prepare for BKL removal The lock structs are currently protected by the BKL, but are accessed by code in fs/locks.c and misc file system and DLM code. These stubs will allow all users to switch to the new interface before the implementation is changed to a spinlock. Acked-by: Arnd Bergmann Signed-off-by: Sage Weil Signed-off-by: Linus Torvalds --- include/linux/fs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 76041b614758..63d069bd80b7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1093,6 +1093,10 @@ struct file_lock { #include +/* temporary stubs for BKL removal */ +#define lock_flocks() lock_kernel() +#define unlock_flocks() unlock_kernel() + extern void send_sigio(struct fown_struct *fown, int fd, int band); #ifdef CONFIG_FILE_LOCKING -- cgit v1.2.3 From d3f3cf859db17cc5f8156c5bfcd032413e44483b Mon Sep 17 00:00:00 2001 From: Mathieu Lacage Date: Sat, 14 Aug 2010 15:02:44 +0200 Subject: missing inline keyword for static function in linux/dmaengine.h Add a missing inline keyword for static function in linux/dmaengine.h to avoid duplicate symbol definitions. Signed-off-by: Mathieu Lacage Signed-off-by: Dan Williams --- include/linux/dmaengine.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index c61d4ca27bcc..e2106495cc11 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -548,7 +548,7 @@ static inline bool dma_dev_has_pq_continue(struct dma_device *dma) return (dma->max_pq & DMA_HAS_PQ_CONTINUE) == DMA_HAS_PQ_CONTINUE; } -static unsigned short dma_dev_to_maxpq(struct dma_device *dma) +static inline unsigned short dma_dev_to_maxpq(struct dma_device *dma) { return dma->max_pq & ~DMA_HAS_PQ_CONTINUE; } -- cgit v1.2.3 From 710224fa2750cf449c02dd115548acebfdd2c86a Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Wed, 22 Sep 2010 13:04:55 -0700 Subject: arm: fix "arm: fix pci_set_consistent_dma_mask for dmabounce devices" This fixes the regression caused by the commit 6fee48cd330c68 ("dma-mapping: arm: use generic pci_set_dma_mask and pci_set_consistent_dma_mask"). ARM needs to clip the dma coherent mask for dmabounce devices. This restores the old trick. Note that strictly speaking, the DMA API doesn't allow architectures to do such but I'm not sure it's worth adding the new API to set the dma mask that allows architectures to clip it. Reported-by: Krzysztof Halasa Signed-off-by: FUJITA Tomonori Acked-by: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/common/it8152.c | 8 ++++++++ arch/arm/mach-ixp4xx/common-pci.c | 8 ++++++++ arch/arm/mach-ixp4xx/include/mach/hardware.h | 2 ++ arch/arm/mach-pxa/include/mach/hardware.h | 2 +- arch/arm/mach-pxa/include/mach/io.h | 2 ++ include/linux/dma-mapping.h | 4 ++++ 6 files changed, 25 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/arm/common/it8152.c b/arch/arm/common/it8152.c index 7974baacafce..1bec96e85196 100644 --- a/arch/arm/common/it8152.c +++ b/arch/arm/common/it8152.c @@ -271,6 +271,14 @@ int dma_needs_bounce(struct device *dev, dma_addr_t dma_addr, size_t size) ((dma_addr + size - PHYS_OFFSET) >= SZ_64M); } +int dma_set_coherent_mask(struct device *dev, u64 mask) +{ + if (mask >= PHYS_OFFSET + SZ_64M - 1) + return 0; + + return -EIO; +} + int __init it8152_pci_setup(int nr, struct pci_sys_data *sys) { it8152_io.start = IT8152_IO_BASE + 0x12000; diff --git a/arch/arm/mach-ixp4xx/common-pci.c b/arch/arm/mach-ixp4xx/common-pci.c index 61cd4d64b985..24498a932ba6 100644 --- a/arch/arm/mach-ixp4xx/common-pci.c +++ b/arch/arm/mach-ixp4xx/common-pci.c @@ -503,6 +503,14 @@ struct pci_bus * __devinit ixp4xx_scan_bus(int nr, struct pci_sys_data *sys) return pci_scan_bus(sys->busnr, &ixp4xx_ops, sys); } +int dma_set_coherent_mask(struct device *dev, u64 mask) +{ + if (mask >= SZ_64M - 1) + return 0; + + return -EIO; +} + EXPORT_SYMBOL(ixp4xx_pci_read); EXPORT_SYMBOL(ixp4xx_pci_write); diff --git a/arch/arm/mach-ixp4xx/include/mach/hardware.h b/arch/arm/mach-ixp4xx/include/mach/hardware.h index f91ca6d4fbe8..8138371c406e 100644 --- a/arch/arm/mach-ixp4xx/include/mach/hardware.h +++ b/arch/arm/mach-ixp4xx/include/mach/hardware.h @@ -26,6 +26,8 @@ #define PCIBIOS_MAX_MEM 0x4BFFFFFF #endif +#define ARCH_HAS_DMA_SET_COHERENT_MASK + #define pcibios_assign_all_busses() 1 /* Register locations and bits */ diff --git a/arch/arm/mach-pxa/include/mach/hardware.h b/arch/arm/mach-pxa/include/mach/hardware.h index 7f64d24cd564..428cc7bda9a4 100644 --- a/arch/arm/mach-pxa/include/mach/hardware.h +++ b/arch/arm/mach-pxa/include/mach/hardware.h @@ -309,7 +309,7 @@ extern unsigned long get_clock_tick_rate(void); #define PCIBIOS_MIN_IO 0 #define PCIBIOS_MIN_MEM 0 #define pcibios_assign_all_busses() 1 +#define ARCH_HAS_DMA_SET_COHERENT_MASK #endif - #endif /* _ASM_ARCH_HARDWARE_H */ diff --git a/arch/arm/mach-pxa/include/mach/io.h b/arch/arm/mach-pxa/include/mach/io.h index 262691fb97d8..fdca3be47d9b 100644 --- a/arch/arm/mach-pxa/include/mach/io.h +++ b/arch/arm/mach-pxa/include/mach/io.h @@ -6,6 +6,8 @@ #ifndef __ASM_ARM_ARCH_IO_H #define __ASM_ARM_ARCH_IO_H +#include + #define IO_SPACE_LIMIT 0xffffffff /* diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index ce29b8151198..ba8319ae5fcc 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -102,6 +102,9 @@ static inline u64 dma_get_mask(struct device *dev) return DMA_BIT_MASK(32); } +#ifdef ARCH_HAS_DMA_SET_COHERENT_MASK +int dma_set_coherent_mask(struct device *dev, u64 mask); +#else static inline int dma_set_coherent_mask(struct device *dev, u64 mask) { if (!dma_supported(dev, mask)) @@ -109,6 +112,7 @@ static inline int dma_set_coherent_mask(struct device *dev, u64 mask) dev->coherent_dma_mask = mask; return 0; } +#endif extern u64 dma_get_required_mask(struct device *dev); -- cgit v1.2.3 From 4c894f47bb49284008073d351c0ddaac8860864e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 23 Sep 2010 15:15:19 +0200 Subject: x86/amd-iommu: Work around S3 BIOS bug This patch adds a workaround for an IOMMU BIOS problem to the AMD IOMMU driver. The result of the bug is that the IOMMU does not execute commands anymore when the system comes out of the S3 state resulting in system failure. The bug in the BIOS is that is does not restore certain hardware specific registers correctly. This workaround reads out the contents of these registers at boot time and restores them on resume from S3. The workaround is limited to the specific IOMMU chipset where this problem occurs. Cc: stable@kernel.org Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu_proto.h | 6 ++++++ arch/x86/include/asm/amd_iommu_types.h | 9 +++++++++ arch/x86/kernel/amd_iommu_init.c | 18 ++++++++++++++++++ include/linux/pci_ids.h | 3 +++ 4 files changed, 36 insertions(+) (limited to 'include') diff --git a/arch/x86/include/asm/amd_iommu_proto.h b/arch/x86/include/asm/amd_iommu_proto.h index d2544f1d705d..cb030374b90a 100644 --- a/arch/x86/include/asm/amd_iommu_proto.h +++ b/arch/x86/include/asm/amd_iommu_proto.h @@ -38,4 +38,10 @@ static inline void amd_iommu_stats_init(void) { } #endif /* !CONFIG_AMD_IOMMU_STATS */ +static inline bool is_rd890_iommu(struct pci_dev *pdev) +{ + return (pdev->vendor == PCI_VENDOR_ID_ATI) && + (pdev->device == PCI_DEVICE_ID_RD890_IOMMU); +} + #endif /* _ASM_X86_AMD_IOMMU_PROTO_H */ diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index ef2d5cd7d7e7..08616180deaf 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -414,6 +414,15 @@ struct amd_iommu { /* default dma_ops domain for that IOMMU */ struct dma_ops_domain *default_dom; + + /* + * This array is required to work around a potential BIOS bug. + * The BIOS may miss to restore parts of the PCI configuration + * space when the system resumes from S3. The result is that the + * IOMMU does not execute commands anymore which leads to system + * failure. + */ + u32 cache_cfg[4]; }; /* diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index 85e9817ead43..5a170cbbbed8 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -632,6 +632,13 @@ static void __init init_iommu_from_pci(struct amd_iommu *iommu) iommu->last_device = calc_devid(MMIO_GET_BUS(range), MMIO_GET_LD(range)); iommu->evt_msi_num = MMIO_MSI_NUM(misc); + + if (is_rd890_iommu(iommu->dev)) { + pci_read_config_dword(iommu->dev, 0xf0, &iommu->cache_cfg[0]); + pci_read_config_dword(iommu->dev, 0xf4, &iommu->cache_cfg[1]); + pci_read_config_dword(iommu->dev, 0xf8, &iommu->cache_cfg[2]); + pci_read_config_dword(iommu->dev, 0xfc, &iommu->cache_cfg[3]); + } } /* @@ -1120,6 +1127,16 @@ static void iommu_init_flags(struct amd_iommu *iommu) iommu_feature_enable(iommu, CONTROL_COHERENT_EN); } +static void iommu_apply_quirks(struct amd_iommu *iommu) +{ + if (is_rd890_iommu(iommu->dev)) { + pci_write_config_dword(iommu->dev, 0xf0, iommu->cache_cfg[0]); + pci_write_config_dword(iommu->dev, 0xf4, iommu->cache_cfg[1]); + pci_write_config_dword(iommu->dev, 0xf8, iommu->cache_cfg[2]); + pci_write_config_dword(iommu->dev, 0xfc, iommu->cache_cfg[3]); + } +} + /* * This function finally enables all IOMMUs found in the system after * they have been initialized @@ -1130,6 +1147,7 @@ static void enable_iommus(void) for_each_iommu(iommu) { iommu_disable(iommu); + iommu_apply_quirks(iommu); iommu_init_flags(iommu); iommu_set_device_table(iommu); iommu_enable_command_buffer(iommu); diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 10d33309e9a6..570fddeb0388 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -393,6 +393,9 @@ #define PCI_DEVICE_ID_VLSI_82C147 0x0105 #define PCI_DEVICE_ID_VLSI_VAS96011 0x0702 +/* AMD RD890 Chipset */ +#define PCI_DEVICE_ID_RD890_IOMMU 0x5a23 + #define PCI_VENDOR_ID_ADL 0x1005 #define PCI_DEVICE_ID_ADL_2301 0x2301 -- cgit v1.2.3 From b3a084b9b684622b149e8dcf03855bf0d5fb588b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Sep 2010 08:38:44 +0200 Subject: rcu: rcu_read_lock_bh_held(): disabling irqs also disables bh rcu_dereference_bh() doesnt know yet about hard irq being disabled, so lockdep can trigger in netpoll_rx() after commit f0f9deae9e7c4 (netpoll: Disable IRQ around RCU dereference in netpoll_rx) Reported-by: Miles Lane Signed-off-by: Eric Dumazet Tested-by: Miles Lane Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 9fbc54a2585d..83af1f8d8b74 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -454,7 +454,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) * Makes rcu_dereference_check() do the dirty work. */ #define rcu_dereference_bh(p) \ - rcu_dereference_check(p, rcu_read_lock_bh_held()) + rcu_dereference_check(p, rcu_read_lock_bh_held() || irqs_disabled()) /** * rcu_dereference_sched - fetch RCU-protected pointer, checking for RCU-sched -- cgit v1.2.3 From f459ffbdfd04edb4a8ce6eea33170eb057a5e695 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Sat, 25 Sep 2010 17:45:50 +1000 Subject: drm/radeon: fix PCI ID 5657 to be an RV410 fixes https://bugzilla.kernel.org/show_bug.cgi?id=19012 cc: stable@kernel.org Signed-off-by: Dave Airlie --- include/drm/drm_pciids.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_pciids.h b/include/drm/drm_pciids.h index 3a9940ef728b..883c1d439899 100644 --- a/include/drm/drm_pciids.h +++ b/include/drm/drm_pciids.h @@ -85,7 +85,6 @@ {0x1002, 0x5460, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \ {0x1002, 0x5462, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \ {0x1002, 0x5464, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_IS_MOBILITY}, \ - {0x1002, 0x5657, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV380|RADEON_NEW_MEMMAP}, \ {0x1002, 0x5548, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R423|RADEON_NEW_MEMMAP}, \ {0x1002, 0x5549, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R423|RADEON_NEW_MEMMAP}, \ {0x1002, 0x554A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_R423|RADEON_NEW_MEMMAP}, \ @@ -103,6 +102,7 @@ {0x1002, 0x564F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x5652, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ {0x1002, 0x5653, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_IS_MOBILITY|RADEON_NEW_MEMMAP}, \ + {0x1002, 0x5657, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RV410|RADEON_NEW_MEMMAP}, \ {0x1002, 0x5834, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS300|RADEON_IS_IGP}, \ {0x1002, 0x5835, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS300|RADEON_IS_IGP|RADEON_IS_MOBILITY}, \ {0x1002, 0x5954, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RS480|RADEON_IS_IGP|RADEON_IS_MOBILITY|RADEON_IS_IGPGART}, \ -- cgit v1.2.3 From 31dfbc93923c0aaa0440b809f80ff2830c6a531a Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 27 Sep 2010 21:28:30 +0100 Subject: drm: Prune GEM vma entries Hook the GEM vm open/close ops into the generic drm vm open/close so that the private vma entries are created and destroy appropriately. Fixes the leak of the drm_vma_entries during the lifetime of the filp. Reported-by: Matt Mackall Cc: Jesse Barnes Signed-off-by: Chris Wilson Acked-by: Jesse Barnes Cc: stable@kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_gem.c | 9 ++++++++- drivers/gpu/drm/drm_vm.c | 28 ++++++++++++++++++---------- include/drm/drmP.h | 1 + 3 files changed, 27 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index bf92d07510df..6fe2cd298c12 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -528,6 +528,10 @@ void drm_gem_vm_open(struct vm_area_struct *vma) struct drm_gem_object *obj = vma->vm_private_data; drm_gem_object_reference(obj); + + mutex_lock(&obj->dev->struct_mutex); + drm_vm_open_locked(vma); + mutex_unlock(&obj->dev->struct_mutex); } EXPORT_SYMBOL(drm_gem_vm_open); @@ -535,7 +539,10 @@ void drm_gem_vm_close(struct vm_area_struct *vma) { struct drm_gem_object *obj = vma->vm_private_data; - drm_gem_object_unreference_unlocked(obj); + mutex_lock(&obj->dev->struct_mutex); + drm_vm_close_locked(vma); + drm_gem_object_unreference(obj); + mutex_unlock(&obj->dev->struct_mutex); } EXPORT_SYMBOL(drm_gem_vm_close); diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c index fda67468e603..5df450683aab 100644 --- a/drivers/gpu/drm/drm_vm.c +++ b/drivers/gpu/drm/drm_vm.c @@ -433,15 +433,7 @@ static void drm_vm_open(struct vm_area_struct *vma) mutex_unlock(&dev->struct_mutex); } -/** - * \c close method for all virtual memory types. - * - * \param vma virtual memory area. - * - * Search the \p vma private data entry in drm_device::vmalist, unlink it, and - * free it. - */ -static void drm_vm_close(struct vm_area_struct *vma) +void drm_vm_close_locked(struct vm_area_struct *vma) { struct drm_file *priv = vma->vm_file->private_data; struct drm_device *dev = priv->minor->dev; @@ -451,7 +443,6 @@ static void drm_vm_close(struct vm_area_struct *vma) vma->vm_start, vma->vm_end - vma->vm_start); atomic_dec(&dev->vma_count); - mutex_lock(&dev->struct_mutex); list_for_each_entry_safe(pt, temp, &dev->vmalist, head) { if (pt->vma == vma) { list_del(&pt->head); @@ -459,6 +450,23 @@ static void drm_vm_close(struct vm_area_struct *vma) break; } } +} + +/** + * \c close method for all virtual memory types. + * + * \param vma virtual memory area. + * + * Search the \p vma private data entry in drm_device::vmalist, unlink it, and + * free it. + */ +static void drm_vm_close(struct vm_area_struct *vma) +{ + struct drm_file *priv = vma->vm_file->private_data; + struct drm_device *dev = priv->minor->dev; + + mutex_lock(&dev->struct_mutex); + drm_vm_close_locked(vma); mutex_unlock(&dev->struct_mutex); } diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 7809d230adee..774e1d49509b 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -1175,6 +1175,7 @@ extern int drm_release(struct inode *inode, struct file *filp); extern int drm_mmap(struct file *filp, struct vm_area_struct *vma); extern int drm_mmap_locked(struct file *filp, struct vm_area_struct *vma); extern void drm_vm_open_locked(struct vm_area_struct *vma); +extern void drm_vm_close_locked(struct vm_area_struct *vma); extern resource_size_t drm_core_get_map_ofs(struct drm_local_map * map); extern resource_size_t drm_core_get_reg_ofs(struct drm_device *dev); extern unsigned int drm_poll(struct file *filp, struct poll_table_struct *wait); -- cgit v1.2.3 From 58f87ed0d45141a90167f34c0959d607160a26df Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Tue, 7 Sep 2010 12:49:45 -0400 Subject: ACPI: Fix typos Signed-off-by: Len Brown --- drivers/acpi/acpica/exutils.c | 2 +- drivers/acpi/acpica/rsutils.c | 2 +- drivers/acpi/apei/Kconfig | 2 +- drivers/acpi/apei/erst-dbg.c | 2 +- drivers/acpi/apei/erst.c | 2 +- drivers/acpi/bus.c | 4 ++-- drivers/acpi/processor_perflib.c | 4 ++-- include/acpi/acpixf.h | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/acpi/acpica/exutils.c b/drivers/acpi/acpica/exutils.c index 74c24d517f81..4093522eed45 100644 --- a/drivers/acpi/acpica/exutils.c +++ b/drivers/acpi/acpica/exutils.c @@ -109,7 +109,7 @@ void acpi_ex_enter_interpreter(void) * * DESCRIPTION: Reacquire the interpreter execution region from within the * interpreter code. Failure to enter the interpreter region is a - * fatal system error. Used in conjuction with + * fatal system error. Used in conjunction with * relinquish_interpreter * ******************************************************************************/ diff --git a/drivers/acpi/acpica/rsutils.c b/drivers/acpi/acpica/rsutils.c index 22cfcfbd9fff..491191e6cf69 100644 --- a/drivers/acpi/acpica/rsutils.c +++ b/drivers/acpi/acpica/rsutils.c @@ -149,7 +149,7 @@ acpi_rs_move_data(void *destination, void *source, u16 item_count, u8 move_type) /* * 16-, 32-, and 64-bit cases must use the move macros that perform - * endian conversion and/or accomodate hardware that cannot perform + * endian conversion and/or accommodate hardware that cannot perform * misaligned memory transfers */ case ACPI_RSC_MOVE16: diff --git a/drivers/acpi/apei/Kconfig b/drivers/acpi/apei/Kconfig index 907e350f1c7d..fca34ccfd294 100644 --- a/drivers/acpi/apei/Kconfig +++ b/drivers/acpi/apei/Kconfig @@ -34,6 +34,6 @@ config ACPI_APEI_ERST_DEBUG depends on ACPI_APEI help ERST is a way provided by APEI to save and retrieve hardware - error infomation to and from a persistent store. Enable this + error information to and from a persistent store. Enable this if you want to debugging and testing the ERST kernel support and firmware implementation. diff --git a/drivers/acpi/apei/erst-dbg.c b/drivers/acpi/apei/erst-dbg.c index 5281ddda2777..98ffa2991ebc 100644 --- a/drivers/acpi/apei/erst-dbg.c +++ b/drivers/acpi/apei/erst-dbg.c @@ -2,7 +2,7 @@ * APEI Error Record Serialization Table debug support * * ERST is a way provided by APEI to save and retrieve hardware error - * infomation to and from a persistent store. This file provide the + * information to and from a persistent store. This file provide the * debugging/testing support for ERST kernel support and firmware * implementation. * diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index 18645f4e83cd..a4904f1680cf 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -2,7 +2,7 @@ * APEI Error Record Serialization Table support * * ERST is a way provided by APEI to save and retrieve hardware error - * infomation to and from a persistent store. + * information to and from a persistent store. * * For more information about ERST, please refer to ACPI Specification * version 4.0, section 17.4. diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 5c221ab535d5..cc17b352d1c5 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -55,7 +55,7 @@ EXPORT_SYMBOL(acpi_root_dir); static int set_power_nocheck(const struct dmi_system_id *id) { printk(KERN_NOTICE PREFIX "%s detected - " - "disable power check in power transistion\n", id->ident); + "disable power check in power transition\n", id->ident); acpi_power_nocheck = 1; return 0; } @@ -1027,7 +1027,7 @@ static int __init acpi_init(void) /* * If the laptop falls into the DMI check table, the power state check - * will be disabled in the course of device power transistion. + * will be disabled in the course of device power transition. */ dmi_check_system(power_nocheck_dmi_table); diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index ba1bd263d903..3a73a93596e8 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -447,8 +447,8 @@ int acpi_processor_notify_smm(struct module *calling_module) if (!try_module_get(calling_module)) return -EINVAL; - /* is_done is set to negative if an error occured, - * and to postitive if _no_ error occured, but SMM + /* is_done is set to negative if an error occurred, + * and to postitive if _no_ error occurred, but SMM * was already notified. This avoids double notification * which might lead to unexpected results... */ diff --git a/include/acpi/acpixf.h b/include/acpi/acpixf.h index c0786d446a00..984cdc62e30b 100644 --- a/include/acpi/acpixf.h +++ b/include/acpi/acpixf.h @@ -55,7 +55,7 @@ extern u8 acpi_gbl_permanent_mmap; /* - * Globals that are publically available, allowing for + * Globals that are publicly available, allowing for * run time configuration */ extern u32 acpi_dbg_level; -- cgit v1.2.3 From 29d08b3efddca628b0360411ab2b85f7b1723f48 Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 27 Sep 2010 16:17:17 +1000 Subject: drm/gem: handlecount isn't really a kref so don't make it one. There were lots of places being inconsistent since handle count looked like a kref but it really wasn't. Fix this my just making handle count an atomic on the object, and have it increase the normal object kref. Now i915/radeon/nouveau drivers can drop the normal reference on userspace object creation, and have the handle hold it. This patch fixes a memory leak or corruption on unload, because the driver had no way of knowing if a handle had been actually added for this object, and the fbcon object needed to know this to clean itself up properly. Reviewed-by: Chris Wilson Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_gem.c | 8 ++------ drivers/gpu/drm/drm_info.c | 2 +- drivers/gpu/drm/i915/i915_gem.c | 6 ++---- drivers/gpu/drm/i915/intel_fb.c | 4 +++- drivers/gpu/drm/nouveau/nouveau_fbcon.c | 1 + drivers/gpu/drm/nouveau/nouveau_gem.c | 6 ++---- drivers/gpu/drm/nouveau/nouveau_notifier.c | 1 + drivers/gpu/drm/radeon/radeon_display.c | 3 ++- drivers/gpu/drm/radeon/radeon_fb.c | 14 ++++---------- drivers/gpu/drm/radeon/radeon_gem.c | 4 ++-- include/drm/drmP.h | 18 +++++++++++++----- 11 files changed, 33 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index 6fe2cd298c12..f7e61be8430a 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -148,7 +148,7 @@ int drm_gem_object_init(struct drm_device *dev, return -ENOMEM; kref_init(&obj->refcount); - kref_init(&obj->handlecount); + atomic_set(&obj->handle_count, 0); obj->size = size; atomic_inc(&dev->object_count); @@ -496,12 +496,8 @@ static void drm_gem_object_ref_bug(struct kref *list_kref) * called before drm_gem_object_free or we'll be touching * freed memory */ -void -drm_gem_object_handle_free(struct kref *kref) +void drm_gem_object_handle_free(struct drm_gem_object *obj) { - struct drm_gem_object *obj = container_of(kref, - struct drm_gem_object, - handlecount); struct drm_device *dev = obj->dev; /* Remove any name for this object */ diff --git a/drivers/gpu/drm/drm_info.c b/drivers/gpu/drm/drm_info.c index 2ef2c7827243..974e970ce3f8 100644 --- a/drivers/gpu/drm/drm_info.c +++ b/drivers/gpu/drm/drm_info.c @@ -255,7 +255,7 @@ int drm_gem_one_name_info(int id, void *ptr, void *data) seq_printf(m, "%6d %8zd %7d %8d\n", obj->name, obj->size, - atomic_read(&obj->handlecount.refcount), + atomic_read(&obj->handle_count), atomic_read(&obj->refcount.refcount)); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index cf4ffbee1c00..4cdf74264ee8 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -136,14 +136,12 @@ i915_gem_create_ioctl(struct drm_device *dev, void *data, return -ENOMEM; ret = drm_gem_handle_create(file_priv, obj, &handle); + /* drop reference from allocate - handle holds it now */ + drm_gem_object_unreference_unlocked(obj); if (ret) { - drm_gem_object_unreference_unlocked(obj); return ret; } - /* Sink the floating reference from kref_init(handlecount) */ - drm_gem_object_handle_unreference_unlocked(obj); - args->handle = handle; return 0; } diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c index 7bdc96256bf5..56ad9df2ccb5 100644 --- a/drivers/gpu/drm/i915/intel_fb.c +++ b/drivers/gpu/drm/i915/intel_fb.c @@ -237,8 +237,10 @@ int intel_fbdev_destroy(struct drm_device *dev, drm_fb_helper_fini(&ifbdev->helper); drm_framebuffer_cleanup(&ifb->base); - if (ifb->obj) + if (ifb->obj) { + drm_gem_object_handle_unreference(ifb->obj); drm_gem_object_unreference(ifb->obj); + } return 0; } diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c index dbd30b2e43fd..d2047713dc59 100644 --- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c +++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c @@ -352,6 +352,7 @@ nouveau_fbcon_destroy(struct drm_device *dev, struct nouveau_fbdev *nfbdev) if (nouveau_fb->nvbo) { nouveau_bo_unmap(nouveau_fb->nvbo); + drm_gem_object_handle_unreference_unlocked(nouveau_fb->nvbo->gem); drm_gem_object_unreference_unlocked(nouveau_fb->nvbo->gem); nouveau_fb->nvbo = NULL; } diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c index ead7b8fc53fc..19620a6709f5 100644 --- a/drivers/gpu/drm/nouveau/nouveau_gem.c +++ b/drivers/gpu/drm/nouveau/nouveau_gem.c @@ -167,11 +167,9 @@ nouveau_gem_ioctl_new(struct drm_device *dev, void *data, goto out; ret = drm_gem_handle_create(file_priv, nvbo->gem, &req->info.handle); + /* drop reference from allocate - handle holds it now */ + drm_gem_object_unreference_unlocked(nvbo->gem); out: - drm_gem_object_handle_unreference_unlocked(nvbo->gem); - - if (ret) - drm_gem_object_unreference_unlocked(nvbo->gem); return ret; } diff --git a/drivers/gpu/drm/nouveau/nouveau_notifier.c b/drivers/gpu/drm/nouveau/nouveau_notifier.c index 3ec181ff50ce..3c9964a8fbad 100644 --- a/drivers/gpu/drm/nouveau/nouveau_notifier.c +++ b/drivers/gpu/drm/nouveau/nouveau_notifier.c @@ -79,6 +79,7 @@ nouveau_notifier_takedown_channel(struct nouveau_channel *chan) mutex_lock(&dev->struct_mutex); nouveau_bo_unpin(chan->notifier_bo); mutex_unlock(&dev->struct_mutex); + drm_gem_object_handle_unreference_unlocked(chan->notifier_bo->gem); drm_gem_object_unreference_unlocked(chan->notifier_bo->gem); drm_mm_takedown(&chan->notifier_heap); } diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c index 7422f274615a..b92d2f2fcbed 100644 --- a/drivers/gpu/drm/radeon/radeon_display.c +++ b/drivers/gpu/drm/radeon/radeon_display.c @@ -843,8 +843,9 @@ static void radeon_user_framebuffer_destroy(struct drm_framebuffer *fb) { struct radeon_framebuffer *radeon_fb = to_radeon_framebuffer(fb); - if (radeon_fb->obj) + if (radeon_fb->obj) { drm_gem_object_unreference_unlocked(radeon_fb->obj); + } drm_framebuffer_cleanup(fb); kfree(radeon_fb); } diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c index c74a8b20d941..9cdf6a35bc2c 100644 --- a/drivers/gpu/drm/radeon/radeon_fb.c +++ b/drivers/gpu/drm/radeon/radeon_fb.c @@ -94,8 +94,10 @@ static void radeonfb_destroy_pinned_object(struct drm_gem_object *gobj) ret = radeon_bo_reserve(rbo, false); if (likely(ret == 0)) { radeon_bo_kunmap(rbo); + radeon_bo_unpin(rbo); radeon_bo_unreserve(rbo); } + drm_gem_object_handle_unreference(gobj); drm_gem_object_unreference_unlocked(gobj); } @@ -325,8 +327,6 @@ static int radeon_fbdev_destroy(struct drm_device *dev, struct radeon_fbdev *rfb { struct fb_info *info; struct radeon_framebuffer *rfb = &rfbdev->rfb; - struct radeon_bo *rbo; - int r; if (rfbdev->helper.fbdev) { info = rfbdev->helper.fbdev; @@ -338,14 +338,8 @@ static int radeon_fbdev_destroy(struct drm_device *dev, struct radeon_fbdev *rfb } if (rfb->obj) { - rbo = rfb->obj->driver_private; - r = radeon_bo_reserve(rbo, false); - if (likely(r == 0)) { - radeon_bo_kunmap(rbo); - radeon_bo_unpin(rbo); - radeon_bo_unreserve(rbo); - } - drm_gem_object_unreference_unlocked(rfb->obj); + radeonfb_destroy_pinned_object(rfb->obj); + rfb->obj = NULL; } drm_fb_helper_fini(&rfbdev->helper); drm_framebuffer_cleanup(&rfb->base); diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c index c578f265b24c..d1e595d91723 100644 --- a/drivers/gpu/drm/radeon/radeon_gem.c +++ b/drivers/gpu/drm/radeon/radeon_gem.c @@ -201,11 +201,11 @@ int radeon_gem_create_ioctl(struct drm_device *dev, void *data, return r; } r = drm_gem_handle_create(filp, gobj, &handle); + /* drop reference from allocate - handle holds it now */ + drm_gem_object_unreference_unlocked(gobj); if (r) { - drm_gem_object_unreference_unlocked(gobj); return r; } - drm_gem_object_handle_unreference_unlocked(gobj); args->handle = handle; return 0; } diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 774e1d49509b..07e4726a4ee0 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -612,7 +612,7 @@ struct drm_gem_object { struct kref refcount; /** Handle count of this object. Each handle also holds a reference */ - struct kref handlecount; + atomic_t handle_count; /* number of handles on this object */ /** Related drm device */ struct drm_device *dev; @@ -1461,7 +1461,7 @@ struct drm_gem_object *drm_gem_object_alloc(struct drm_device *dev, size_t size); int drm_gem_object_init(struct drm_device *dev, struct drm_gem_object *obj, size_t size); -void drm_gem_object_handle_free(struct kref *kref); +void drm_gem_object_handle_free(struct drm_gem_object *obj); void drm_gem_vm_open(struct vm_area_struct *vma); void drm_gem_vm_close(struct vm_area_struct *vma); int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma); @@ -1496,7 +1496,7 @@ static inline void drm_gem_object_handle_reference(struct drm_gem_object *obj) { drm_gem_object_reference(obj); - kref_get(&obj->handlecount); + atomic_inc(&obj->handle_count); } static inline void @@ -1505,12 +1505,15 @@ drm_gem_object_handle_unreference(struct drm_gem_object *obj) if (obj == NULL) return; + if (atomic_read(&obj->handle_count) == 0) + return; /* * Must bump handle count first as this may be the last * ref, in which case the object would disappear before we * checked for a name */ - kref_put(&obj->handlecount, drm_gem_object_handle_free); + if (atomic_dec_and_test(&obj->handle_count)) + drm_gem_object_handle_free(obj); drm_gem_object_unreference(obj); } @@ -1520,12 +1523,17 @@ drm_gem_object_handle_unreference_unlocked(struct drm_gem_object *obj) if (obj == NULL) return; + if (atomic_read(&obj->handle_count) == 0) + return; + /* * Must bump handle count first as this may be the last * ref, in which case the object would disappear before we * checked for a name */ - kref_put(&obj->handlecount, drm_gem_object_handle_free); + + if (atomic_dec_and_test(&obj->handle_count)) + drm_gem_object_handle_free(obj); drm_gem_object_unreference_unlocked(obj); } -- cgit v1.2.3 From 6110a1f43c27b516e16d5ce8860fca50748c2a87 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Thu, 30 Sep 2010 21:19:07 -0400 Subject: intel_idle: Voluntary leave_mm before entering deeper Avoid TLB flush IPIs for the cores in deeper c-states by voluntary leave_mm() before entering into that state. CPUs tend to flush TLB in those c-states anyways. acpi_idle does this with C3-type states, but it was not caried over when intel_idle was introduced. intel_idle can apply it to C-states in addition to those that ACPI might export as C3... Signed-off-by: Suresh Siddha Signed-off-by: Len Brown --- drivers/idle/intel_idle.c | 18 ++++++++++++++---- include/linux/cpuidle.h | 1 + 2 files changed, 15 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c index 96bf38097996..0906fc5b69b9 100644 --- a/drivers/idle/intel_idle.c +++ b/drivers/idle/intel_idle.c @@ -108,7 +108,7 @@ static struct cpuidle_state nehalem_cstates[MWAIT_MAX_NUM_CSTATES] = { .name = "NHM-C3", .desc = "MWAIT 0x10", .driver_data = (void *) 0x10, - .flags = CPUIDLE_FLAG_TIME_VALID, + .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 20, .power_usage = 500, .target_residency = 80, @@ -117,7 +117,7 @@ static struct cpuidle_state nehalem_cstates[MWAIT_MAX_NUM_CSTATES] = { .name = "NHM-C6", .desc = "MWAIT 0x20", .driver_data = (void *) 0x20, - .flags = CPUIDLE_FLAG_TIME_VALID, + .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 200, .power_usage = 350, .target_residency = 800, @@ -149,7 +149,7 @@ static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = { .name = "ATM-C4", .desc = "MWAIT 0x30", .driver_data = (void *) 0x30, - .flags = CPUIDLE_FLAG_TIME_VALID, + .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 100, .power_usage = 250, .target_residency = 400, @@ -159,7 +159,7 @@ static struct cpuidle_state atom_cstates[MWAIT_MAX_NUM_CSTATES] = { .name = "ATM-C6", .desc = "MWAIT 0x40", .driver_data = (void *) 0x40, - .flags = CPUIDLE_FLAG_TIME_VALID, + .flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED, .exit_latency = 200, .power_usage = 150, .target_residency = 800, @@ -185,6 +185,16 @@ static int intel_idle(struct cpuidle_device *dev, struct cpuidle_state *state) local_irq_disable(); + /* + * If the state flag indicates that the TLB will be flushed or if this + * is the deepest c-state supported, do a voluntary leave mm to avoid + * costly and mostly unnecessary wakeups for flushing the user TLB's + * associated with the active mm. + */ + if (state->flags & CPUIDLE_FLAG_TLB_FLUSHED || + (&dev->states[dev->state_count - 1] == state)) + leave_mm(cpu); + if (!(lapic_timer_reliable_states & (1 << (cstate)))) clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu); diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index 36ca9721a0c2..1be416bbbb82 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -53,6 +53,7 @@ struct cpuidle_state { #define CPUIDLE_FLAG_BALANCED (0x40) /* medium latency, moderate savings */ #define CPUIDLE_FLAG_DEEP (0x80) /* high latency, large savings */ #define CPUIDLE_FLAG_IGNORE (0x100) /* ignore during this idle period */ +#define CPUIDLE_FLAG_TLB_FLUSHED (0x200) /* tlb will be flushed */ #define CPUIDLE_DRIVER_FLAGS_MASK (0xFFFF0000) -- cgit v1.2.3 From 39b4d07aa3583ceefe73622841303a0a3e942ca1 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 30 Sep 2010 09:10:26 +0100 Subject: drm: Hold the mutex when dropping the last GEM reference (v2) In order to be fully threadsafe we need to check that the drm_gem_object refcount is still 0 after acquiring the mutex in order to call the free function. Otherwise, we may encounter scenarios like: Thread A: Thread B: drm_gem_close unreference_unlocked kref_put mutex_lock ... i915_gem_evict ... kref_get -> BUG ... i915_gem_unbind ... kref_put ... i915_gem_object_free ... mutex_unlock mutex_lock i915_gem_object_free -> BUG i915_gem_object_unbind kfree mutex_unlock Note that no driver is currently using the free_unlocked vfunc and it is scheduled for removal, hasten that process. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=30454 Reported-and-Tested-by: Magnus Kessler Signed-off-by: Chris Wilson Cc: stable@kernel.org Signed-off-by: Dave Airlie --- drivers/gpu/drm/drm_gem.c | 22 ---------------------- include/drm/drmP.h | 10 ++++++---- 2 files changed, 6 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_gem.c b/drivers/gpu/drm/drm_gem.c index f7e61be8430a..5663d2719063 100644 --- a/drivers/gpu/drm/drm_gem.c +++ b/drivers/gpu/drm/drm_gem.c @@ -462,28 +462,6 @@ drm_gem_object_free(struct kref *kref) } EXPORT_SYMBOL(drm_gem_object_free); -/** - * Called after the last reference to the object has been lost. - * Must be called without holding struct_mutex - * - * Frees the object - */ -void -drm_gem_object_free_unlocked(struct kref *kref) -{ - struct drm_gem_object *obj = (struct drm_gem_object *) kref; - struct drm_device *dev = obj->dev; - - if (dev->driver->gem_free_object_unlocked != NULL) - dev->driver->gem_free_object_unlocked(obj); - else if (dev->driver->gem_free_object != NULL) { - mutex_lock(&dev->struct_mutex); - dev->driver->gem_free_object(obj); - mutex_unlock(&dev->struct_mutex); - } -} -EXPORT_SYMBOL(drm_gem_object_free_unlocked); - static void drm_gem_object_ref_bug(struct kref *list_kref) { BUG(); diff --git a/include/drm/drmP.h b/include/drm/drmP.h index 07e4726a4ee0..4c9461a4f9e6 100644 --- a/include/drm/drmP.h +++ b/include/drm/drmP.h @@ -808,7 +808,6 @@ struct drm_driver { */ int (*gem_init_object) (struct drm_gem_object *obj); void (*gem_free_object) (struct drm_gem_object *obj); - void (*gem_free_object_unlocked) (struct drm_gem_object *obj); /* vga arb irq handler */ void (*vgaarb_irq)(struct drm_device *dev, bool state); @@ -1456,7 +1455,6 @@ int drm_gem_init(struct drm_device *dev); void drm_gem_destroy(struct drm_device *dev); void drm_gem_object_release(struct drm_gem_object *obj); void drm_gem_object_free(struct kref *kref); -void drm_gem_object_free_unlocked(struct kref *kref); struct drm_gem_object *drm_gem_object_alloc(struct drm_device *dev, size_t size); int drm_gem_object_init(struct drm_device *dev, @@ -1484,8 +1482,12 @@ drm_gem_object_unreference(struct drm_gem_object *obj) static inline void drm_gem_object_unreference_unlocked(struct drm_gem_object *obj) { - if (obj != NULL) - kref_put(&obj->refcount, drm_gem_object_free_unlocked); + if (obj != NULL) { + struct drm_device *dev = obj->dev; + mutex_lock(&dev->struct_mutex); + kref_put(&obj->refcount, drm_gem_object_free); + mutex_unlock(&dev->struct_mutex); + } } int drm_gem_handle_create(struct drm_file *file_priv, -- cgit v1.2.3 From 5336377d6225959624146629ce3fc88ee8ecda3d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 5 Oct 2010 11:29:27 -0700 Subject: modules: Fix module_bug_list list corruption race With all the recent module loading cleanups, we've minimized the code that sits under module_mutex, fixing various deadlocks and making it possible to do most of the module loading in parallel. However, that whole conversion totally missed the rather obscure code that adds a new module to the list for BUG() handling. That code was doubly obscure because (a) the code itself lives in lib/bugs.c (for dubious reasons) and (b) it gets called from the architecture-specific "module_finalize()" rather than from generic code. Calling it from arch-specific code makes no sense what-so-ever to begin with, and is now actively wrong since that code isn't protected by the module loading lock any more. So this commit moves the "module_bug_{finalize,cleanup}()" calls away from the arch-specific code, and into the generic code - and in the process protects it with the module_mutex so that the list operations are now safe. Future fixups: - move the module list handling code into kernel/module.c where it belongs. - get rid of 'module_bug_list' and just use the regular list of modules (called 'modules' - imagine that) that we already create and maintain for other reasons. Reported-and-tested-by: Thomas Gleixner Cc: Rusty Russell Cc: Adrian Bunk Cc: Andrew Morton Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- arch/avr32/kernel/module.c | 3 +-- arch/h8300/kernel/module.c | 3 +-- arch/mn10300/kernel/module.c | 3 +-- arch/parisc/kernel/module.c | 3 +-- arch/powerpc/kernel/module.c | 5 ----- arch/s390/kernel/module.c | 3 +-- arch/sh/kernel/module.c | 2 -- arch/x86/kernel/module.c | 3 +-- include/linux/module.h | 5 ++--- kernel/module.c | 4 ++++ lib/bug.c | 6 ++---- 11 files changed, 14 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/arch/avr32/kernel/module.c b/arch/avr32/kernel/module.c index 98f94d041d9c..a727f54d64d6 100644 --- a/arch/avr32/kernel/module.c +++ b/arch/avr32/kernel/module.c @@ -314,10 +314,9 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, vfree(module->arch.syminfo); module->arch.syminfo = NULL; - return module_bug_finalize(hdr, sechdrs, module); + return 0; } void module_arch_cleanup(struct module *module) { - module_bug_cleanup(module); } diff --git a/arch/h8300/kernel/module.c b/arch/h8300/kernel/module.c index 0865e291c20d..db4953dc4e1b 100644 --- a/arch/h8300/kernel/module.c +++ b/arch/h8300/kernel/module.c @@ -112,10 +112,9 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { - return module_bug_finalize(hdr, sechdrs, me); + return 0; } void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); } diff --git a/arch/mn10300/kernel/module.c b/arch/mn10300/kernel/module.c index 6aea7fd76993..196a111e2e29 100644 --- a/arch/mn10300/kernel/module.c +++ b/arch/mn10300/kernel/module.c @@ -206,7 +206,7 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { - return module_bug_finalize(hdr, sechdrs, me); + return 0; } /* @@ -214,5 +214,4 @@ int module_finalize(const Elf_Ehdr *hdr, */ void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); } diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index 159a2b81e90c..6e81bb596e5b 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -941,11 +941,10 @@ int module_finalize(const Elf_Ehdr *hdr, nsyms = newptr - (Elf_Sym *)symhdr->sh_addr; DEBUGP("NEW num_symtab %lu\n", nsyms); symhdr->sh_size = nsyms * sizeof(Elf_Sym); - return module_bug_finalize(hdr, sechdrs, me); + return 0; } void module_arch_cleanup(struct module *mod) { deregister_unwind_table(mod); - module_bug_cleanup(mod); } diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c index 477c663e0140..4ef93ae2235f 100644 --- a/arch/powerpc/kernel/module.c +++ b/arch/powerpc/kernel/module.c @@ -65,10 +65,6 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sect; int err; - err = module_bug_finalize(hdr, sechdrs, me); - if (err) - return err; - /* Apply feature fixups */ sect = find_section(hdr, sechdrs, "__ftr_fixup"); if (sect != NULL) @@ -101,5 +97,4 @@ int module_finalize(const Elf_Ehdr *hdr, void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); } diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 22cfd634c355..f7167ee4604c 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -407,10 +407,9 @@ int module_finalize(const Elf_Ehdr *hdr, { vfree(me->arch.syminfo); me->arch.syminfo = NULL; - return module_bug_finalize(hdr, sechdrs, me); + return 0; } void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); } diff --git a/arch/sh/kernel/module.c b/arch/sh/kernel/module.c index 43adddfe4c04..ae0be697a89e 100644 --- a/arch/sh/kernel/module.c +++ b/arch/sh/kernel/module.c @@ -149,13 +149,11 @@ int module_finalize(const Elf_Ehdr *hdr, int ret = 0; ret |= module_dwarf_finalize(hdr, sechdrs, me); - ret |= module_bug_finalize(hdr, sechdrs, me); return ret; } void module_arch_cleanup(struct module *mod) { - module_bug_cleanup(mod); module_dwarf_cleanup(mod); } diff --git a/arch/x86/kernel/module.c b/arch/x86/kernel/module.c index e0bc186d7501..1c355c550960 100644 --- a/arch/x86/kernel/module.c +++ b/arch/x86/kernel/module.c @@ -239,11 +239,10 @@ int module_finalize(const Elf_Ehdr *hdr, apply_paravirt(pseg, pseg + para->sh_size); } - return module_bug_finalize(hdr, sechdrs, me); + return 0; } void module_arch_cleanup(struct module *mod) { alternatives_smp_module_del(mod); - module_bug_cleanup(mod); } diff --git a/include/linux/module.h b/include/linux/module.h index 8a6b9fdc7ffa..aace066bad8f 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -686,17 +686,16 @@ extern int module_sysfs_initialized; #ifdef CONFIG_GENERIC_BUG -int module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *, +void module_bug_finalize(const Elf_Ehdr *, const Elf_Shdr *, struct module *); void module_bug_cleanup(struct module *); #else /* !CONFIG_GENERIC_BUG */ -static inline int module_bug_finalize(const Elf_Ehdr *hdr, +static inline void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod) { - return 0; } static inline void module_bug_cleanup(struct module *mod) {} #endif /* CONFIG_GENERIC_BUG */ diff --git a/kernel/module.c b/kernel/module.c index d0b5f8db11b4..ccd641991842 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1537,6 +1537,7 @@ static int __unlink_module(void *_mod) { struct module *mod = _mod; list_del(&mod->list); + module_bug_cleanup(mod); return 0; } @@ -2625,6 +2626,7 @@ static struct module *load_module(void __user *umod, if (err < 0) goto ddebug; + module_bug_finalize(info.hdr, info.sechdrs, mod); list_add_rcu(&mod->list, &modules); mutex_unlock(&module_mutex); @@ -2650,6 +2652,8 @@ static struct module *load_module(void __user *umod, mutex_lock(&module_mutex); /* Unlink carefully: kallsyms could be walking list. */ list_del_rcu(&mod->list); + module_bug_cleanup(mod); + ddebug: if (!mod->taints) dynamic_debug_remove(info.debug); diff --git a/lib/bug.c b/lib/bug.c index 7cdfad88128f..19552096d16b 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -72,8 +72,8 @@ static const struct bug_entry *module_find_bug(unsigned long bugaddr) return NULL; } -int module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, - struct module *mod) +void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, + struct module *mod) { char *secstrings; unsigned int i; @@ -97,8 +97,6 @@ int module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, * could potentially lead to deadlock and thus be counter-productive. */ list_add(&mod->bug_list, &module_bug_list); - - return 0; } void module_bug_cleanup(struct module *mod) -- cgit v1.2.3 From 231d0aefd88e94129cb8fb84794f9bb788c6366e Mon Sep 17 00:00:00 2001 From: Evgeny Kuznetsov Date: Tue, 5 Oct 2010 12:47:57 +0400 Subject: wait: using uninitialized member of wait queue The "flags" member of "struct wait_queue_t" is used in several places in the kernel code without beeing initialized by init_wait(). "flags" is used in bitwise operations. If "flags" not initialized then unexpected behaviour may take place. Incorrect flags might used later in code. Added initialization of "wait_queue_t.flags" with zero value into "init_wait". Signed-off-by: Evgeny Kuznetsov [ The bit we care about does end up being initialized by both prepare_to_wait() and add_to_wait_queue(), so this doesn't seem to cause actual bugs, but is definitely the right thing to do -Linus ] Signed-off-by: Linus Torvalds --- include/linux/wait.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/wait.h b/include/linux/wait.h index 0836ccc57121..3efc9f3f43a0 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -614,6 +614,7 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key); (wait)->private = current; \ (wait)->func = autoremove_wake_function; \ INIT_LIST_HEAD(&(wait)->task_list); \ + (wait)->flags = 0; \ } while (0) /** -- cgit v1.2.3