From 39e2e173fb1f900959d3a25c21c65fa88b06c6ee Mon Sep 17 00:00:00 2001 From: Alfredo Alvarez Fernandez Date: Wed, 30 Mar 2016 19:03:36 +0200 Subject: locking/lockdep: Print chain_key collision information A sequence of pairs [class_idx -> corresponding chain_key iteration] is printed for both the current held_lock chain and the cached chain. That exposes the two different class_idx sequences that led to that particular hash value. This helps with debugging hash chain collision reports. Signed-off-by: Alfredo Alvarez Fernandez Acked-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-fsdevel@vger.kernel.org Cc: sedat.dilek@gmail.com Cc: tytso@mit.edu Link: http://lkml.kernel.org/r/1459357416-19190-1-git-send-email-alfredoalvarezernandez@gmail.com Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 77 insertions(+), 2 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 53ab2f85d77e..2324ba5310db 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1999,6 +1999,77 @@ static inline int get_first_held_lock(struct task_struct *curr, return ++i; } +/* + * Returns the next chain_key iteration + */ +static u64 print_chain_key_iteration(int class_idx, u64 chain_key) +{ + u64 new_chain_key = iterate_chain_key(chain_key, class_idx); + + printk(" class_idx:%d -> chain_key:%016Lx", + class_idx, + (unsigned long long)new_chain_key); + return new_chain_key; +} + +static void +print_chain_keys_held_locks(struct task_struct *curr, struct held_lock *hlock_next) +{ + struct held_lock *hlock; + u64 chain_key = 0; + int depth = curr->lockdep_depth; + int i; + + printk("depth: %u\n", depth + 1); + for (i = get_first_held_lock(curr, hlock_next); i < depth; i++) { + hlock = curr->held_locks + i; + chain_key = print_chain_key_iteration(hlock->class_idx, chain_key); + + print_lock(hlock); + } + + print_chain_key_iteration(hlock_next->class_idx, chain_key); + print_lock(hlock_next); +} + +static void print_chain_keys_chain(struct lock_chain *chain) +{ + int i; + u64 chain_key = 0; + int class_id; + + printk("depth: %u\n", chain->depth); + for (i = 0; i < chain->depth; i++) { + class_id = chain_hlocks[chain->base + i]; + chain_key = print_chain_key_iteration(class_id + 1, chain_key); + + print_lock_name(lock_classes + class_id); + printk("\n"); + } +} + +static void print_collision(struct task_struct *curr, + struct held_lock *hlock_next, + struct lock_chain *chain) +{ + printk("\n"); + printk("======================\n"); + printk("[chain_key collision ]\n"); + print_kernel_ident(); + printk("----------------------\n"); + printk("%s/%d: ", current->comm, task_pid_nr(current)); + printk("Hash chain already cached but the contents don't match!\n"); + + printk("Held locks:"); + print_chain_keys_held_locks(curr, hlock_next); + + printk("Locks in cached chain:"); + print_chain_keys_chain(chain); + + printk("\nstack backtrace:\n"); + dump_stack(); +} + /* * Checks whether the chain and the current held locks are consistent * in depth and also in content. If they are not it most likely means @@ -2014,14 +2085,18 @@ static int check_no_collision(struct task_struct *curr, i = get_first_held_lock(curr, hlock); - if (DEBUG_LOCKS_WARN_ON(chain->depth != curr->lockdep_depth - (i - 1))) + if (DEBUG_LOCKS_WARN_ON(chain->depth != curr->lockdep_depth - (i - 1))) { + print_collision(curr, hlock, chain); return 0; + } for (j = 0; j < chain->depth - 1; j++, i++) { id = curr->held_locks[i].class_idx - 1; - if (DEBUG_LOCKS_WARN_ON(chain_hlocks[chain->base + j] != id)) + if (DEBUG_LOCKS_WARN_ON(chain_hlocks[chain->base + j] != id)) { + print_collision(curr, hlock, chain); return 0; + } } #endif return 1; -- cgit v1.2.3 From 5c8a010c2411729a07cb1b90c09fa978ac0ac6c0 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 4 Apr 2016 10:42:07 +0200 Subject: locking/lockdep: Fix print_collision() unused warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix this: kernel/locking/lockdep.c:2051:13: warning: ‘print_collision’ defined but not used [-Wunused-function] static void print_collision(struct task_struct *curr, ^ Signed-off-by: Borislav Petkov Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1459759327-2880-1-git-send-email-bp@alien8.de Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/locking') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 2324ba5310db..ed9410936a22 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -1999,6 +1999,7 @@ static inline int get_first_held_lock(struct task_struct *curr, return ++i; } +#ifdef CONFIG_DEBUG_LOCKDEP /* * Returns the next chain_key iteration */ @@ -2069,6 +2070,7 @@ static void print_collision(struct task_struct *curr, printk("\nstack backtrace:\n"); dump_stack(); } +#endif /* * Checks whether the chain and the current held locks are consistent -- cgit v1.2.3 From 1f190931893a98ffd5d4cfdfbfc2452ad0ed3e1b Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 12 Apr 2016 08:47:17 -0700 Subject: locking/locktorture: Fix deboosting NULL pointer dereference For the case of rtmutex torturing we will randomly call into the boost() handler, including upon module exiting when the tasks are deboosted before stopping. In such cases the task may or may not have already been boosted, and therefore the NULL being explicitly passed can occur anywhere. Currently we only assume that the task will is at a higher prio, and in consequence, dereference a NULL pointer. This patch fixes the case of a rmmod locktorture exploding while pounding on the rtmutex lock (partial trace): task: ffff88081026cf80 ti: ffff880816120000 task.ti: ffff880816120000 RSP: 0018:ffff880816123eb0 EFLAGS: 00010206 RAX: ffff88081026cf80 RBX: ffff880816bfa630 RCX: 0000000000160d1b RDX: 0000000000000000 RSI: 0000000000000202 RDI: 0000000000000000 RBP: ffff88081026cf80 R08: 000000000000001f R09: ffff88017c20ca80 R10: 0000000000000000 R11: 000000000048c316 R12: ffffffffa05d1840 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88203f880000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000008 CR3: 0000000001c0a000 CR4: 00000000000406e0 Stack: ffffffffa05d141d ffff880816bfa630 ffffffffa05d1922 ffff88081e70c2c0 ffff880816bfa630 ffffffff81095fed 0000000000000000 ffffffff8107bf60 ffff880816bfa630 ffffffff00000000 ffff880800000000 ffff880816123f08 Call Trace: [] kthread+0xbd/0xe0 [] ret_from_fork+0x3f/0x70 This patch ensures that if the random state pointer is not NULL and current is not boosted, then do nothing. RIP: 0010:[] [] torture_random+0x5/0x60 [torture] [] torture_rtmutex_boost+0x1d/0x90 [locktorture] [] lock_torture_writer+0xe2/0x170 [locktorture] Signed-off-by: Davidlohr Bueso Signed-off-by: Paul E. McKenney Cc: Andrew Morton Cc: Davidlohr Bueso Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bobby.prani@gmail.com Cc: dhowells@redhat.com Cc: dipankar@in.ibm.com Cc: dvhart@linux.intel.com Cc: edumazet@google.com Cc: fweisbec@gmail.com Cc: jiangshanlai@gmail.com Cc: josh@joshtriplett.org Cc: mathieu.desnoyers@efficios.com Cc: oleg@redhat.com Cc: rostedt@goodmis.org Link: http://lkml.kernel.org/r/1460476038-27060-1-git-send-email-paulmck@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- kernel/locking/locktorture.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 8ef1919d63b2..9e9c5f454f5c 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -394,12 +394,12 @@ static void torture_rtmutex_boost(struct torture_random_state *trsp) if (!rt_task(current)) { /* - * (1) Boost priority once every ~50k operations. When the + * Boost priority once every ~50k operations. When the * task tries to take the lock, the rtmutex it will account * for the new priority, and do any corresponding pi-dance. */ - if (!(torture_random(trsp) % - (cxt.nrealwriters_stress * factor))) { + if (trsp && !(torture_random(trsp) % + (cxt.nrealwriters_stress * factor))) { policy = SCHED_FIFO; param.sched_priority = MAX_RT_PRIO - 1; } else /* common case, do nothing */ -- cgit v1.2.3 From c1c33b92db4fb274dfbff778ccf2459e4bebd48e Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 12 Apr 2016 08:47:18 -0700 Subject: locking/locktorture: Fix NULL pointer dereference for cleanup paths It has been found that paths that invoke cleanups through lock_torture_cleanup() can trigger NULL pointer dereferencing bugs during the statistics printing phase. This is mainly because we should not be calling into statistics before we are sure things have been set up correctly. Specifically, early checks (and the need for handling this in the cleanup call) only include parameter checks and basic statistics allocation. Once we start write/read kthreads we then consider the test as started. As such, update the function in question to check for cxt.lwsa writer stats, if not set, we either have a bogus parameter or -ENOMEM situation and therefore only need to deal with general torture calls. Reported-and-tested-by: Kefeng Wang Signed-off-by: Davidlohr Bueso Signed-off-by: Paul E. McKenney Cc: Andrew Morton Cc: Davidlohr Bueso Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bobby.prani@gmail.com Cc: dhowells@redhat.com Cc: dipankar@in.ibm.com Cc: dvhart@linux.intel.com Cc: edumazet@google.com Cc: fweisbec@gmail.com Cc: jiangshanlai@gmail.com Cc: josh@joshtriplett.org Cc: mathieu.desnoyers@efficios.com Cc: oleg@redhat.com Cc: rostedt@goodmis.org Link: http://lkml.kernel.org/r/1460476038-27060-2-git-send-email-paulmck@linux.vnet.ibm.com [ Improved the changelog. ] Signed-off-by: Ingo Molnar --- kernel/locking/locktorture.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'kernel/locking') diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 9e9c5f454f5c..d066a50dc87e 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -748,6 +748,15 @@ static void lock_torture_cleanup(void) if (torture_cleanup_begin()) return; + /* + * Indicates early cleanup, meaning that the test has not run, + * such as when passing bogus args when loading the module. As + * such, only perform the underlying torture-specific cleanups, + * and avoid anything related to locktorture. + */ + if (!cxt.lwsa) + goto end; + if (writer_tasks) { for (i = 0; i < cxt.nrealwriters_stress; i++) torture_stop_kthread(lock_torture_writer, @@ -776,6 +785,7 @@ static void lock_torture_cleanup(void) else lock_torture_print_module_parms(cxt.cur_ops, "End of test: SUCCESS"); +end: torture_cleanup_end(); } @@ -870,6 +880,7 @@ static int __init lock_torture_init(void) VERBOSE_TOROUT_STRING("cxt.lrsa: Out of memory"); firsterr = -ENOMEM; kfree(cxt.lwsa); + cxt.lwsa = NULL; goto unwind; } @@ -878,6 +889,7 @@ static int __init lock_torture_init(void) cxt.lrsa[i].n_lock_acquired = 0; } } + lock_torture_print_module_parms(cxt.cur_ops, "Start of test"); /* Prepare torture context. */ -- cgit v1.2.3 From c003ed928962a55eb446e78c544b1d7c4f6cb88a Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Fri, 8 Apr 2016 20:58:46 +0200 Subject: locking/lockdep: Deinline register_lock_class(), save 2328 bytes This function compiles to 1328 bytes of machine code. Three callsites. Registering a new lock class is definitely not *that* time-critical to inline it. Signed-off-by: Denys Vlasenko Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1460141926-13069-5-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/locking') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index ed9410936a22..7cc43ef856c1 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -708,7 +708,7 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) * yet. Otherwise we look it up. We cache the result in the lock object * itself, so actual lookup of the hash should be once per lock object. */ -static inline struct lock_class * +static struct lock_class * register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) { struct lockdep_subclass_key *key; -- cgit v1.2.3 From f8e04d854506ddfeba9cb41b601972b28521f104 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 7 Apr 2016 17:12:21 +0200 Subject: locking/rwsem: Get rid of __down_write_nested() This is no longer used anywhere and all callers (__down_write()) use 0 as a subclass. Ditch __down_write_nested() to make the code easier to follow. This shouldn't introduce any functional change. Signed-off-by: Michal Hocko Acked-by: Davidlohr Bueso Cc: Andrew Morton Cc: Chris Zankel Cc: David S. Miller Cc: Linus Torvalds Cc: Max Filippov Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Signed-off-by: Davidlohr Bueso Cc: Signed-off-by: Jason Low Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-alpha@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-s390@vger.kernel.org Cc: linux-sh@vger.kernel.org Cc: linux-xtensa@linux-xtensa.org Cc: sparclinux@vger.kernel.org Link: http://lkml.kernel.org/r/1460041951-22347-2-git-send-email-mhocko@kernel.org Signed-off-by: Ingo Molnar --- arch/s390/include/asm/rwsem.h | 7 +------ arch/sh/include/asm/rwsem.h | 5 ----- arch/sparc/include/asm/rwsem.h | 7 +------ arch/x86/include/asm/rwsem.h | 7 +------ include/asm-generic/rwsem.h | 7 +------ include/linux/rwsem-spinlock.h | 1 - kernel/locking/rwsem-spinlock.c | 7 +------ 7 files changed, 5 insertions(+), 36 deletions(-) (limited to 'kernel/locking') diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h index fead491dfc28..555d23b6b6d1 100644 --- a/arch/s390/include/asm/rwsem.h +++ b/arch/s390/include/asm/rwsem.h @@ -90,7 +90,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) /* * lock for writing */ -static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) +static inline void __down_write(struct rw_semaphore *sem) { signed long old, new, tmp; @@ -108,11 +108,6 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) rwsem_down_write_failed(sem); } -static inline void __down_write(struct rw_semaphore *sem) -{ - __down_write_nested(sem, 0); -} - /* * trylock for writing -- returns 1 if successful, 0 if contention */ diff --git a/arch/sh/include/asm/rwsem.h b/arch/sh/include/asm/rwsem.h index edab57265293..a5104bebd1eb 100644 --- a/arch/sh/include/asm/rwsem.h +++ b/arch/sh/include/asm/rwsem.h @@ -114,11 +114,6 @@ static inline void __downgrade_write(struct rw_semaphore *sem) rwsem_downgrade_wake(sem); } -static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) -{ - __down_write(sem); -} - /* * implement exchange and add functionality */ diff --git a/arch/sparc/include/asm/rwsem.h b/arch/sparc/include/asm/rwsem.h index 069bf4d663a1..e5a0d575bc7f 100644 --- a/arch/sparc/include/asm/rwsem.h +++ b/arch/sparc/include/asm/rwsem.h @@ -45,7 +45,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) /* * lock for writing */ -static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) +static inline void __down_write(struct rw_semaphore *sem) { long tmp; @@ -55,11 +55,6 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) rwsem_down_write_failed(sem); } -static inline void __down_write(struct rw_semaphore *sem) -{ - __down_write_nested(sem, 0); -} - static inline int __down_write_trylock(struct rw_semaphore *sem) { long tmp; diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index ceec86eb68e9..4a8292a0d6e1 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -99,7 +99,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) /* * lock for writing */ -static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) +static inline void __down_write(struct rw_semaphore *sem) { long tmp; asm volatile("# beginning down_write\n\t" @@ -116,11 +116,6 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) : "memory", "cc"); } -static inline void __down_write(struct rw_semaphore *sem) -{ - __down_write_nested(sem, 0); -} - /* * trylock for writing -- returns 1 if successful, 0 if contention */ diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h index d6d5dc98d7da..b8d8a6cf4ca8 100644 --- a/include/asm-generic/rwsem.h +++ b/include/asm-generic/rwsem.h @@ -53,7 +53,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) /* * lock for writing */ -static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) +static inline void __down_write(struct rw_semaphore *sem) { long tmp; @@ -63,11 +63,6 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass) rwsem_down_write_failed(sem); } -static inline void __down_write(struct rw_semaphore *sem) -{ - __down_write_nested(sem, 0); -} - static inline int __down_write_trylock(struct rw_semaphore *sem) { long tmp; diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h index 561e8615528d..a733a5467e6c 100644 --- a/include/linux/rwsem-spinlock.h +++ b/include/linux/rwsem-spinlock.h @@ -34,7 +34,6 @@ struct rw_semaphore { extern void __down_read(struct rw_semaphore *sem); extern int __down_read_trylock(struct rw_semaphore *sem); extern void __down_write(struct rw_semaphore *sem); -extern void __down_write_nested(struct rw_semaphore *sem, int subclass); extern int __down_write_trylock(struct rw_semaphore *sem); extern void __up_read(struct rw_semaphore *sem); extern void __up_write(struct rw_semaphore *sem); diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c index 3a5048572065..bab26104a5d0 100644 --- a/kernel/locking/rwsem-spinlock.c +++ b/kernel/locking/rwsem-spinlock.c @@ -191,7 +191,7 @@ int __down_read_trylock(struct rw_semaphore *sem) /* * get a write lock on the semaphore */ -void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) +void __sched __down_write(struct rw_semaphore *sem) { struct rwsem_waiter waiter; struct task_struct *tsk; @@ -227,11 +227,6 @@ void __sched __down_write_nested(struct rw_semaphore *sem, int subclass) raw_spin_unlock_irqrestore(&sem->wait_lock, flags); } -void __sched __down_write(struct rw_semaphore *sem) -{ - __down_write_nested(sem, 0); -} - /* * trylock for writing -- returns 1 if successful, 0 if contention */ -- cgit v1.2.3 From d47996082f52baa0ca8b48d26b3cbef5ede70a73 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 7 Apr 2016 17:12:26 +0200 Subject: locking/rwsem: Introduce basis for down_write_killable() Introduce a generic implementation necessary for down_write_killable(). This is a trivial extension of the already existing down_write() call which can be interrupted by SIGKILL. This patch doesn't provide down_write_killable() yet because arches have to provide the necessary pieces before. rwsem_down_write_failed() which is a generic slow path for the write lock is extended to take a task state and renamed to __rwsem_down_write_failed_common(). The return value is either a valid semaphore pointer or ERR_PTR(-EINTR). rwsem_down_write_failed_killable() is exported as a new way to wait for the lock and be killable. For rwsem-spinlock implementation the current __down_write() it updated in a similar way as __rwsem_down_write_failed_common() except it doesn't need new exports just visible __down_write_killable(). Architectures which are not using the generic rwsem implementation are supposed to provide their __down_write_killable() implementation and use rwsem_down_write_failed_killable() for the slow path. Signed-off-by: Michal Hocko Cc: Andrew Morton Cc: Chris Zankel Cc: David S. Miller Cc: Linus Torvalds Cc: Max Filippov Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Signed-off-by: Davidlohr Bueso Cc: Signed-off-by: Jason Low Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-alpha@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-s390@vger.kernel.org Cc: linux-sh@vger.kernel.org Cc: linux-xtensa@linux-xtensa.org Cc: sparclinux@vger.kernel.org Link: http://lkml.kernel.org/r/1460041951-22347-7-git-send-email-mhocko@kernel.org Signed-off-by: Ingo Molnar --- include/asm-generic/rwsem.h | 12 ++++++++++++ include/linux/rwsem-spinlock.h | 1 + include/linux/rwsem.h | 2 ++ kernel/locking/rwsem-spinlock.c | 22 ++++++++++++++++++++-- kernel/locking/rwsem-xadd.c | 31 +++++++++++++++++++++++++------ 5 files changed, 60 insertions(+), 8 deletions(-) (limited to 'kernel/locking') diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h index b8d8a6cf4ca8..3fc94a046bf5 100644 --- a/include/asm-generic/rwsem.h +++ b/include/asm-generic/rwsem.h @@ -63,6 +63,18 @@ static inline void __down_write(struct rw_semaphore *sem) rwsem_down_write_failed(sem); } +static inline int __down_write_killable(struct rw_semaphore *sem) +{ + long tmp; + + tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS, + (atomic_long_t *)&sem->count); + if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS)) + if (IS_ERR(rwsem_down_write_failed_killable(sem))) + return -EINTR; + return 0; +} + static inline int __down_write_trylock(struct rw_semaphore *sem) { long tmp; diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h index a733a5467e6c..ae0528b834cd 100644 --- a/include/linux/rwsem-spinlock.h +++ b/include/linux/rwsem-spinlock.h @@ -34,6 +34,7 @@ struct rw_semaphore { extern void __down_read(struct rw_semaphore *sem); extern int __down_read_trylock(struct rw_semaphore *sem); extern void __down_write(struct rw_semaphore *sem); +extern int __must_check __down_write_killable(struct rw_semaphore *sem); extern int __down_write_trylock(struct rw_semaphore *sem); extern void __up_read(struct rw_semaphore *sem); extern void __up_write(struct rw_semaphore *sem); diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 8f498cdde280..7d7ae029dac5 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -14,6 +14,7 @@ #include #include #include +#include #ifdef CONFIG_RWSEM_SPIN_ON_OWNER #include #endif @@ -43,6 +44,7 @@ struct rw_semaphore { extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); +extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem); extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *); extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c index bab26104a5d0..1591f6b3539f 100644 --- a/kernel/locking/rwsem-spinlock.c +++ b/kernel/locking/rwsem-spinlock.c @@ -191,11 +191,12 @@ int __down_read_trylock(struct rw_semaphore *sem) /* * get a write lock on the semaphore */ -void __sched __down_write(struct rw_semaphore *sem) +int __sched __down_write_common(struct rw_semaphore *sem, int state) { struct rwsem_waiter waiter; struct task_struct *tsk; unsigned long flags; + int ret = 0; raw_spin_lock_irqsave(&sem->wait_lock, flags); @@ -215,16 +216,33 @@ void __sched __down_write(struct rw_semaphore *sem) */ if (sem->count == 0) break; - set_task_state(tsk, TASK_UNINTERRUPTIBLE); + if (signal_pending_state(state, current)) { + ret = -EINTR; + goto out; + } + set_task_state(tsk, state); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); schedule(); raw_spin_lock_irqsave(&sem->wait_lock, flags); } /* got the lock */ sem->count = -1; +out: list_del(&waiter.list); raw_spin_unlock_irqrestore(&sem->wait_lock, flags); + + return ret; +} + +void __sched __down_write(struct rw_semaphore *sem) +{ + __down_write_common(sem, TASK_UNINTERRUPTIBLE); +} + +int __sched __down_write_killable(struct rw_semaphore *sem) +{ + return __down_write_common(sem, TASK_KILLABLE); } /* diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index a4d4de05b2d1..df4dcb883b50 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -433,12 +433,13 @@ static inline bool rwsem_has_spinner(struct rw_semaphore *sem) /* * Wait until we successfully acquire the write lock */ -__visible -struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) +static inline struct rw_semaphore * +__rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) { long count; bool waiting = true; /* any queued threads before us */ struct rwsem_waiter waiter; + struct rw_semaphore *ret = sem; /* undo write bias from down_write operation, stop active locking */ count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem); @@ -478,7 +479,7 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem); /* wait until we successfully acquire the lock */ - set_current_state(TASK_UNINTERRUPTIBLE); + set_current_state(state); while (true) { if (rwsem_try_write_lock(count, sem)) break; @@ -486,21 +487,39 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) /* Block until there are no active lockers. */ do { + if (signal_pending_state(state, current)) { + raw_spin_lock_irq(&sem->wait_lock); + ret = ERR_PTR(-EINTR); + goto out; + } schedule(); - set_current_state(TASK_UNINTERRUPTIBLE); + set_current_state(state); } while ((count = sem->count) & RWSEM_ACTIVE_MASK); raw_spin_lock_irq(&sem->wait_lock); } +out: __set_current_state(TASK_RUNNING); - list_del(&waiter.list); raw_spin_unlock_irq(&sem->wait_lock); - return sem; + return ret; +} + +__visible struct rw_semaphore * __sched +rwsem_down_write_failed(struct rw_semaphore *sem) +{ + return __rwsem_down_write_failed_common(sem, TASK_UNINTERRUPTIBLE); } EXPORT_SYMBOL(rwsem_down_write_failed); +__visible struct rw_semaphore * __sched +rwsem_down_write_failed_killable(struct rw_semaphore *sem) +{ + return __rwsem_down_write_failed_common(sem, TASK_KILLABLE); +} +EXPORT_SYMBOL(rwsem_down_write_failed_killable); + /* * handle waking up a waiter on the semaphore * - up_read/up_write has decremented the active part of count if we come here -- cgit v1.2.3 From 6687659568e2ec5b3ac24b39c5d26ce8b9d90434 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 17 Apr 2016 23:31:41 -0700 Subject: locking/pvqspinlock: Fix division by zero in qstat_read() While playing with the qstat statistics (in /qlockstat/) I ran into the following splat on a VM when opening pv_hash_hops: divide error: 0000 [#1] SMP ... RIP: 0010:[] [] qstat_read+0x12e/0x1e0 ... Call Trace: [] ? mem_cgroup_commit_charge+0x6c/0xd0 [] ? page_add_new_anon_rmap+0x8c/0xd0 [] ? handle_mm_fault+0x1439/0x1b40 [] ? do_mmap+0x449/0x550 [] ? __vfs_read+0x23/0xd0 [] ? rw_verify_area+0x52/0xd0 [] ? vfs_read+0x81/0x120 [] ? SyS_read+0x42/0xa0 [] ? entry_SYSCALL_64_fastpath+0x1e/0xa8 Fix this by verifying that qstat_pv_kick_unlock is in fact non-zero, similarly to what the qstat_pv_latency_wake case does, as if nothing else, this can come from resetting the statistics, thus having 0 kicks should be quite valid in this context. Signed-off-by: Davidlohr Bueso Reviewed-by: Waiman Long Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave@stgolabs.net Cc: waiman.long@hpe.com Link: http://lkml.kernel.org/r/1460961103-24953-1-git-send-email-dave@stgolabs.net Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock_stat.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h index eb2a2c9bc3fc..d734b7502001 100644 --- a/kernel/locking/qspinlock_stat.h +++ b/kernel/locking/qspinlock_stat.h @@ -136,10 +136,12 @@ static ssize_t qstat_read(struct file *file, char __user *user_buf, } if (counter == qstat_pv_hash_hops) { - u64 frac; + u64 frac = 0; - frac = 100ULL * do_div(stat, kicks); - frac = DIV_ROUND_CLOSEST_ULL(frac, kicks); + if (kicks) { + frac = 100ULL * do_div(stat, kicks); + frac = DIV_ROUND_CLOSEST_ULL(frac, kicks); + } /* * Return a X.XX decimal number -- cgit v1.2.3 From 916633a403702549d37ea353e63a68e5b0dc27ad Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 7 Apr 2016 17:12:31 +0200 Subject: locking/rwsem: Provide down_write_killable() Now that all the architectures implement the necessary glue code we can introduce down_write_killable(). The only difference wrt. regular down_write() is that the slow path waits in TASK_KILLABLE state and the interruption by the fatal signal is reported as -EINTR to the caller. Signed-off-by: Michal Hocko Cc: Andrew Morton Cc: Chris Zankel Cc: David S. Miller Cc: Linus Torvalds Cc: Max Filippov Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Signed-off-by: Davidlohr Bueso Cc: Signed-off-by: Jason Low Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-alpha@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: linux-ia64@vger.kernel.org Cc: linux-s390@vger.kernel.org Cc: linux-sh@vger.kernel.org Cc: linux-xtensa@linux-xtensa.org Cc: sparclinux@vger.kernel.org Link: http://lkml.kernel.org/r/1460041951-22347-12-git-send-email-mhocko@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/rwsem.h | 6 +++--- include/linux/lockdep.h | 15 +++++++++++++++ include/linux/rwsem.h | 1 + kernel/locking/rwsem.c | 19 +++++++++++++++++++ 4 files changed, 38 insertions(+), 3 deletions(-) (limited to 'kernel/locking') diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index d759c5f70f49..453744c1d347 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -102,9 +102,9 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) #define ____down_write(sem, slow_path) \ ({ \ long tmp; \ - struct rw_semaphore* ret = sem; \ + struct rw_semaphore* ret; \ asm volatile("# beginning down_write\n\t" \ - LOCK_PREFIX " xadd %1,(%2)\n\t" \ + LOCK_PREFIX " xadd %1,(%3)\n\t" \ /* adds 0xffff0001, returns the old value */ \ " test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" \ /* was the active mask 0 before? */\ @@ -112,7 +112,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) " call " slow_path "\n" \ "1:\n" \ "# ending down_write" \ - : "+m" (sem->count), "=d" (tmp), "+a" (ret) \ + : "+m" (sem->count), "=d" (tmp), "=a" (ret) \ : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) \ : "memory", "cc"); \ ret; \ diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index d026b190c530..accfe56d8c51 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -444,6 +444,18 @@ do { \ lock_acquired(&(_lock)->dep_map, _RET_IP_); \ } while (0) +#define LOCK_CONTENDED_RETURN(_lock, try, lock) \ +({ \ + int ____err = 0; \ + if (!try(_lock)) { \ + lock_contended(&(_lock)->dep_map, _RET_IP_); \ + ____err = lock(_lock); \ + } \ + if (!____err) \ + lock_acquired(&(_lock)->dep_map, _RET_IP_); \ + ____err; \ +}) + #else /* CONFIG_LOCK_STAT */ #define lock_contended(lockdep_map, ip) do {} while (0) @@ -452,6 +464,9 @@ do { \ #define LOCK_CONTENDED(_lock, try, lock) \ lock(_lock) +#define LOCK_CONTENDED_RETURN(_lock, try, lock) \ + lock(_lock) + #endif /* CONFIG_LOCK_STAT */ #ifdef CONFIG_LOCKDEP diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 7d7ae029dac5..d1c12d160ace 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -118,6 +118,7 @@ extern int down_read_trylock(struct rw_semaphore *sem); * lock for writing */ extern void down_write(struct rw_semaphore *sem); +extern int __must_check down_write_killable(struct rw_semaphore *sem); /* * trylock for writing -- returns 1 if successful, 0 if contention diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 205be0ce34de..c817216c1615 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -54,6 +54,25 @@ void __sched down_write(struct rw_semaphore *sem) EXPORT_SYMBOL(down_write); +/* + * lock for writing + */ +int __sched down_write_killable(struct rw_semaphore *sem) +{ + might_sleep(); + rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); + + if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock, __down_write_killable)) { + rwsem_release(&sem->dep_map, 1, _RET_IP_); + return -EINTR; + } + + rwsem_set_owner(sem); + return 0; +} + +EXPORT_SYMBOL(down_write_killable); + /* * trylock for writing -- returns 1 if successful, 0 if contention */ -- cgit v1.2.3 From c24697566298df04cac9913e0601501b5ee2b3f5 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Tue, 16 Feb 2016 13:57:40 +0800 Subject: locking/lockdep: Fix ->irq_context calculation task_irq_context() returns the encoded irq_context of the task, the return value is encoded in the same as ->irq_context of held_lock. Always return 0 if !(CONFIG_TRACE_IRQFLAGS && CONFIG_PROVE_LOCKING) Signed-off-by: Boqun Feng Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Josh Triplett Cc: Lai Jiangshan Cc: Linus Torvalds Cc: Mathieu Desnoyers Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Cc: sasha.levin@oracle.com Link: http://lkml.kernel.org/r/1455602265-16490-2-git-send-email-boqun.feng@gmail.com Signed-off-by: Ingo Molnar --- kernel/locking/lockdep.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index ed9410936a22..beb06f604420 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2932,6 +2932,11 @@ static int mark_irqflags(struct task_struct *curr, struct held_lock *hlock) return 1; } +static inline unsigned int task_irq_context(struct task_struct *task) +{ + return 2 * !!task->hardirq_context + !!task->softirq_context; +} + static int separate_irq_context(struct task_struct *curr, struct held_lock *hlock) { @@ -2940,8 +2945,6 @@ static int separate_irq_context(struct task_struct *curr, /* * Keep track of points where we cross into an interrupt context: */ - hlock->irq_context = 2*(curr->hardirq_context ? 1 : 0) + - curr->softirq_context; if (depth) { struct held_lock *prev_hlock; @@ -2973,6 +2976,11 @@ static inline int mark_irqflags(struct task_struct *curr, return 1; } +static inline unsigned int task_irq_context(struct task_struct *task) +{ + return 0; +} + static inline int separate_irq_context(struct task_struct *curr, struct held_lock *hlock) { @@ -3241,6 +3249,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, hlock->acquire_ip = ip; hlock->instance = lock; hlock->nest_lock = nest_lock; + hlock->irq_context = task_irq_context(curr); hlock->trylock = trylock; hlock->read = read; hlock->check = check; -- cgit v1.2.3 From 75dd602a5198a6e5f75534db52b6e6fbaabb33d1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 30 Mar 2016 11:36:59 +0200 Subject: lockdep: Fix lock_chain::base size lock_chain::base is used to store an index into the chain_hlocks[] array, however that array contains more elements than can be indexed using the u16. Change the lock_chain structure to use a bitfield to encode the data it needs and add BUILD_BUG_ON() assertions to check the fields are wide enough. Also, for DEBUG_LOCKDEP, assert that we don't run out of elements of that array; as that would wreck the collision detectoring. Signed-off-by: Peter Zijlstra (Intel) Cc: Alfredo Alvarez Fernandez Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Sedat Dilek Cc: Theodore Ts'o Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160330093659.GS3408@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 8 +++++--- kernel/locking/lockdep.c | 24 +++++++++++++++++++++++- kernel/locking/lockdep_proc.c | 2 ++ 3 files changed, 30 insertions(+), 4 deletions(-) (limited to 'kernel/locking') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index d026b190c530..d10ef06971b5 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -196,9 +196,11 @@ struct lock_list { * We record lock dependency chains, so that we can cache them: */ struct lock_chain { - u8 irq_context; - u8 depth; - u16 base; + /* see BUILD_BUG_ON()s in lookup_chain_cache() */ + unsigned int irq_context : 2, + depth : 6, + base : 24; + /* 4 byte hole */ struct hlist_node entry; u64 chain_key; }; diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index beb06f604420..78c1c0ee6dc1 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -2176,15 +2176,37 @@ cache_hit: chain->irq_context = hlock->irq_context; i = get_first_held_lock(curr, hlock); chain->depth = curr->lockdep_depth + 1 - i; + + BUILD_BUG_ON((1UL << 24) <= ARRAY_SIZE(chain_hlocks)); + BUILD_BUG_ON((1UL << 6) <= ARRAY_SIZE(curr->held_locks)); + BUILD_BUG_ON((1UL << 8*sizeof(chain_hlocks[0])) <= ARRAY_SIZE(lock_classes)); + if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) { chain->base = nr_chain_hlocks; - nr_chain_hlocks += chain->depth; for (j = 0; j < chain->depth - 1; j++, i++) { int lock_id = curr->held_locks[i].class_idx - 1; chain_hlocks[chain->base + j] = lock_id; } chain_hlocks[chain->base + j] = class - lock_classes; } + + if (nr_chain_hlocks < MAX_LOCKDEP_CHAIN_HLOCKS) + nr_chain_hlocks += chain->depth; + +#ifdef CONFIG_DEBUG_LOCKDEP + /* + * Important for check_no_collision(). + */ + if (unlikely(nr_chain_hlocks > MAX_LOCKDEP_CHAIN_HLOCKS)) { + if (debug_locks_off_graph_unlock()) + return 0; + + print_lockdep_off("BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!"); + dump_stack(); + return 0; + } +#endif + hlist_add_head_rcu(&chain->entry, hash_head); debug_atomic_inc(chain_lookup_misses); inc_chains(); diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c index dbb61a302548..a0f61effad25 100644 --- a/kernel/locking/lockdep_proc.c +++ b/kernel/locking/lockdep_proc.c @@ -141,6 +141,8 @@ static int lc_show(struct seq_file *m, void *v) int i; if (v == SEQ_START_TOKEN) { + if (nr_chain_hlocks > MAX_LOCKDEP_CHAIN_HLOCKS) + seq_printf(m, "(buggered) "); seq_printf(m, "all lock chains:\n"); return 0; } -- cgit v1.2.3 From c8585c6fcaf2011de54c3592e80a634a2b9e1a7f Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Mon, 25 Apr 2016 23:22:35 -0400 Subject: ext4: fix races between changing inode journal mode and ext4_writepages In ext4, there is a race condition between changing inode journal mode and ext4_writepages(). While ext4_writepages() is executed on a non-journalled mode inode, the inode's journal mode could be enabled by ioctl() and then, some pages dirtied after switching the journal mode will be still exposed to ext4_writepages() in non-journaled mode. To resolve this problem, we use fs-wide per-cpu rw semaphore by Jan Kara's suggestion because we don't want to waste ext4_inode_info's space for this extra rare case. Signed-off-by: Daeho Jeong Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- fs/ext4/ext4.h | 4 ++++ fs/ext4/inode.c | 15 ++++++++++++--- fs/ext4/super.c | 4 ++++ kernel/locking/percpu-rwsem.c | 1 + 4 files changed, 21 insertions(+), 3 deletions(-) (limited to 'kernel/locking') diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index cb00b1119ec9..ba5aecc07fbc 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -33,6 +33,7 @@ #include #include #include +#include #ifdef __KERNEL__ #include #endif @@ -1508,6 +1509,9 @@ struct ext4_sb_info { struct ratelimit_state s_err_ratelimit_state; struct ratelimit_state s_warning_ratelimit_state; struct ratelimit_state s_msg_ratelimit_state; + + /* Barrier between changing inodes' journal flags and writepages ops. */ + struct percpu_rw_semaphore s_journal_flag_rwsem; }; static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 779ef4c11bc1..4d8ebbe00456 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2612,11 +2612,14 @@ static int ext4_writepages(struct address_space *mapping, struct blk_plug plug; bool give_up_on_write = false; + percpu_down_read(&sbi->s_journal_flag_rwsem); trace_ext4_writepages(inode, wbc); - if (dax_mapping(mapping)) - return dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev, - wbc); + if (dax_mapping(mapping)) { + ret = dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev, + wbc); + goto out_writepages; + } /* * No pages to write? This is mainly a kludge to avoid starting @@ -2786,6 +2789,7 @@ retry: out_writepages: trace_ext4_writepages_result(inode, wbc, ret, nr_to_write - wbc->nr_to_write); + percpu_up_read(&sbi->s_journal_flag_rwsem); return ret; } @@ -5436,6 +5440,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) journal_t *journal; handle_t *handle; int err; + struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); /* * We have to be very careful here: changing a data block's @@ -5475,6 +5480,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) } } + percpu_down_write(&sbi->s_journal_flag_rwsem); jbd2_journal_lock_updates(journal); /* @@ -5491,6 +5497,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) err = jbd2_journal_flush(journal); if (err < 0) { jbd2_journal_unlock_updates(journal); + percpu_up_write(&sbi->s_journal_flag_rwsem); ext4_inode_resume_unlocked_dio(inode); return err; } @@ -5499,6 +5506,8 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val) ext4_set_aops(inode); jbd2_journal_unlock_updates(journal); + percpu_up_write(&sbi->s_journal_flag_rwsem); + if (val) up_write(&EXT4_I(inode)->i_mmap_sem); ext4_inode_resume_unlocked_dio(inode); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 304c712dbe12..20c5d52253b4 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -859,6 +859,7 @@ static void ext4_put_super(struct super_block *sb) percpu_counter_destroy(&sbi->s_freeinodes_counter); percpu_counter_destroy(&sbi->s_dirs_counter); percpu_counter_destroy(&sbi->s_dirtyclusters_counter); + percpu_free_rwsem(&sbi->s_journal_flag_rwsem); brelse(sbi->s_sbh); #ifdef CONFIG_QUOTA for (i = 0; i < EXT4_MAXQUOTAS; i++) @@ -3930,6 +3931,9 @@ no_journal: if (!err) err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0, GFP_KERNEL); + if (!err) + err = percpu_init_rwsem(&sbi->s_journal_flag_rwsem); + if (err) { ext4_msg(sb, KERN_ERR, "insufficient memory"); goto failed_mount6; diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c index f231e0bb311c..bec0b647f9cc 100644 --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c @@ -37,6 +37,7 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw) free_percpu(brw->fast_read_ctr); brw->fast_read_ctr = NULL; /* catch use after free bugs */ } +EXPORT_SYMBOL_GPL(percpu_free_rwsem); /* * This is the fast-path for down_read/up_read. If it succeeds we rely -- cgit v1.2.3 From 5db4298133d99b3dfc60d6899ac9df169769c899 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 26 Apr 2016 10:22:08 -0700 Subject: lcoking/locktorture: Simplify the torture_runnable computation This commit replaces an #ifdef with IS_ENABLED(), saving five lines. Signed-off-by: Paul E. McKenney Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: corbet@lwn.net Cc: dave@stgolabs.net Cc: dhowells@redhat.com Cc: linux-doc@vger.kernel.org Cc: will.deacon@arm.com Link: http://lkml.kernel.org/r/1461691328-5429-4-git-send-email-paulmck@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- kernel/locking/locktorture.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index d066a50dc87e..f8c5af52a131 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -75,12 +75,7 @@ struct lock_stress_stats { long n_lock_acquired; }; -#if defined(MODULE) -#define LOCKTORTURE_RUNNABLE_INIT 1 -#else -#define LOCKTORTURE_RUNNABLE_INIT 0 -#endif -int torture_runnable = LOCKTORTURE_RUNNABLE_INIT; +int torture_runnable = IS_ENABLED(MODULE); module_param(torture_runnable, int, 0444); MODULE_PARM_DESC(torture_runnable, "Start locktorture at module init"); -- cgit v1.2.3 From e7904a28f5331c21d17af638cb477c83662e3cb6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 1 Aug 2015 19:25:08 +0200 Subject: locking/lockdep, sched/core: Implement a better lock pinning scheme The problem with the existing lock pinning is that each pin is of value 1; this mean you can simply unpin if you know its pinned, without having any extra information. This scheme generates a random (16 bit) cookie for each pin and requires this same cookie to unpin. This means you have to keep the cookie in context. No objsize difference for !LOCKDEP kernels. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 23 ++++++++++---- kernel/locking/lockdep.c | 71 +++++++++++++++++++++++++++++++++++++------ kernel/sched/core.c | 79 +++++++++++++++++++++++++----------------------- kernel/sched/deadline.c | 11 ++++--- kernel/sched/fair.c | 6 ++-- kernel/sched/idle_task.c | 2 +- kernel/sched/rt.c | 6 ++-- kernel/sched/sched.h | 8 +++-- kernel/sched/stop_task.c | 2 +- 9 files changed, 140 insertions(+), 68 deletions(-) (limited to 'kernel/locking') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index d026b190c530..cf9bf9612702 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -354,8 +354,13 @@ extern void lockdep_set_current_reclaim_state(gfp_t gfp_mask); extern void lockdep_clear_current_reclaim_state(void); extern void lockdep_trace_alloc(gfp_t mask); -extern void lock_pin_lock(struct lockdep_map *lock); -extern void lock_unpin_lock(struct lockdep_map *lock); +struct pin_cookie { unsigned int val; }; + +#define NIL_COOKIE (struct pin_cookie){ .val = 0U, } + +extern struct pin_cookie lock_pin_lock(struct lockdep_map *lock); +extern void lock_repin_lock(struct lockdep_map *lock, struct pin_cookie); +extern void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie); # define INIT_LOCKDEP .lockdep_recursion = 0, .lockdep_reclaim_gfp = 0, @@ -371,8 +376,9 @@ extern void lock_unpin_lock(struct lockdep_map *lock); #define lockdep_recursing(tsk) ((tsk)->lockdep_recursion) -#define lockdep_pin_lock(l) lock_pin_lock(&(l)->dep_map) -#define lockdep_unpin_lock(l) lock_unpin_lock(&(l)->dep_map) +#define lockdep_pin_lock(l) lock_pin_lock(&(l)->dep_map) +#define lockdep_repin_lock(l,c) lock_repin_lock(&(l)->dep_map, (c)) +#define lockdep_unpin_lock(l,c) lock_unpin_lock(&(l)->dep_map, (c)) #else /* !CONFIG_LOCKDEP */ @@ -425,8 +431,13 @@ struct lock_class_key { }; #define lockdep_recursing(tsk) (0) -#define lockdep_pin_lock(l) do { (void)(l); } while (0) -#define lockdep_unpin_lock(l) do { (void)(l); } while (0) +struct pin_cookie { }; + +#define NIL_COOKIE (struct pin_cookie){ } + +#define lockdep_pin_lock(l) ({ struct pin_cookie cookie; cookie; }) +#define lockdep_repin_lock(l, c) do { (void)(l); (void)(c); } while (0) +#define lockdep_unpin_lock(l, c) do { (void)(l); (void)(c); } while (0) #endif /* !LOCKDEP */ diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index ed9410936a22..d7f94f4c811d 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -45,6 +45,7 @@ #include #include #include +#include #include @@ -3554,7 +3555,35 @@ static int __lock_is_held(struct lockdep_map *lock) return 0; } -static void __lock_pin_lock(struct lockdep_map *lock) +static struct pin_cookie __lock_pin_lock(struct lockdep_map *lock) +{ + struct pin_cookie cookie = NIL_COOKIE; + struct task_struct *curr = current; + int i; + + if (unlikely(!debug_locks)) + return cookie; + + for (i = 0; i < curr->lockdep_depth; i++) { + struct held_lock *hlock = curr->held_locks + i; + + if (match_held_lock(hlock, lock)) { + /* + * Grab 16bits of randomness; this is sufficient to not + * be guessable and still allows some pin nesting in + * our u32 pin_count. + */ + cookie.val = 1 + (prandom_u32() >> 16); + hlock->pin_count += cookie.val; + return cookie; + } + } + + WARN(1, "pinning an unheld lock\n"); + return cookie; +} + +static void __lock_repin_lock(struct lockdep_map *lock, struct pin_cookie cookie) { struct task_struct *curr = current; int i; @@ -3566,7 +3595,7 @@ static void __lock_pin_lock(struct lockdep_map *lock) struct held_lock *hlock = curr->held_locks + i; if (match_held_lock(hlock, lock)) { - hlock->pin_count++; + hlock->pin_count += cookie.val; return; } } @@ -3574,7 +3603,7 @@ static void __lock_pin_lock(struct lockdep_map *lock) WARN(1, "pinning an unheld lock\n"); } -static void __lock_unpin_lock(struct lockdep_map *lock) +static void __lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie cookie) { struct task_struct *curr = current; int i; @@ -3589,7 +3618,11 @@ static void __lock_unpin_lock(struct lockdep_map *lock) if (WARN(!hlock->pin_count, "unpinning an unpinned lock\n")) return; - hlock->pin_count--; + hlock->pin_count -= cookie.val; + + if (WARN((int)hlock->pin_count < 0, "pin count corrupted\n")) + hlock->pin_count = 0; + return; } } @@ -3720,24 +3753,44 @@ int lock_is_held(struct lockdep_map *lock) } EXPORT_SYMBOL_GPL(lock_is_held); -void lock_pin_lock(struct lockdep_map *lock) +struct pin_cookie lock_pin_lock(struct lockdep_map *lock) { + struct pin_cookie cookie = NIL_COOKIE; unsigned long flags; if (unlikely(current->lockdep_recursion)) - return; + return cookie; raw_local_irq_save(flags); check_flags(flags); current->lockdep_recursion = 1; - __lock_pin_lock(lock); + cookie = __lock_pin_lock(lock); current->lockdep_recursion = 0; raw_local_irq_restore(flags); + + return cookie; } EXPORT_SYMBOL_GPL(lock_pin_lock); -void lock_unpin_lock(struct lockdep_map *lock) +void lock_repin_lock(struct lockdep_map *lock, struct pin_cookie cookie) +{ + unsigned long flags; + + if (unlikely(current->lockdep_recursion)) + return; + + raw_local_irq_save(flags); + check_flags(flags); + + current->lockdep_recursion = 1; + __lock_repin_lock(lock, cookie); + current->lockdep_recursion = 0; + raw_local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(lock_repin_lock); + +void lock_unpin_lock(struct lockdep_map *lock, struct pin_cookie cookie) { unsigned long flags; @@ -3748,7 +3801,7 @@ void lock_unpin_lock(struct lockdep_map *lock) check_flags(flags); current->lockdep_recursion = 1; - __lock_unpin_lock(lock); + __lock_unpin_lock(lock, cookie); current->lockdep_recursion = 0; raw_local_irq_restore(flags); } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 7c7db60115b4..71c5a753e6e9 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -184,7 +184,7 @@ struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf) rq = task_rq(p); raw_spin_lock(&rq->lock); if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) { - lockdep_pin_lock(&rq->lock); + rf->cookie = lockdep_pin_lock(&rq->lock); return rq; } raw_spin_unlock(&rq->lock); @@ -224,7 +224,7 @@ struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf) * pair with the WMB to ensure we must then also see migrating. */ if (likely(rq == task_rq(p) && !task_on_rq_migrating(p))) { - lockdep_pin_lock(&rq->lock); + rf->cookie = lockdep_pin_lock(&rq->lock); return rq; } raw_spin_unlock(&rq->lock); @@ -1193,9 +1193,9 @@ static int __set_cpus_allowed_ptr(struct task_struct *p, * OK, since we're going to drop the lock immediately * afterwards anyway. */ - lockdep_unpin_lock(&rq->lock); + lockdep_unpin_lock(&rq->lock, rf.cookie); rq = move_queued_task(rq, p, dest_cpu); - lockdep_pin_lock(&rq->lock); + lockdep_repin_lock(&rq->lock, rf.cookie); } out: task_rq_unlock(rq, p, &rf); @@ -1669,8 +1669,8 @@ static inline void ttwu_activate(struct rq *rq, struct task_struct *p, int en_fl /* * Mark the task runnable and perform wakeup-preemption. */ -static void -ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) +static void ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags, + struct pin_cookie cookie) { check_preempt_curr(rq, p, wake_flags); p->state = TASK_RUNNING; @@ -1682,9 +1682,9 @@ ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) * Our task @p is fully woken up and running; so its safe to * drop the rq->lock, hereafter rq is only used for statistics. */ - lockdep_unpin_lock(&rq->lock); + lockdep_unpin_lock(&rq->lock, cookie); p->sched_class->task_woken(rq, p); - lockdep_pin_lock(&rq->lock); + lockdep_repin_lock(&rq->lock, cookie); } if (rq->idle_stamp) { @@ -1702,7 +1702,8 @@ ttwu_do_wakeup(struct rq *rq, struct task_struct *p, int wake_flags) } static void -ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags) +ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags, + struct pin_cookie cookie) { lockdep_assert_held(&rq->lock); @@ -1712,7 +1713,7 @@ ttwu_do_activate(struct rq *rq, struct task_struct *p, int wake_flags) #endif ttwu_activate(rq, p, ENQUEUE_WAKEUP | ENQUEUE_WAKING); - ttwu_do_wakeup(rq, p, wake_flags); + ttwu_do_wakeup(rq, p, wake_flags, cookie); } /* @@ -1731,7 +1732,7 @@ static int ttwu_remote(struct task_struct *p, int wake_flags) if (task_on_rq_queued(p)) { /* check_preempt_curr() may use rq clock */ update_rq_clock(rq); - ttwu_do_wakeup(rq, p, wake_flags); + ttwu_do_wakeup(rq, p, wake_flags, rf.cookie); ret = 1; } __task_rq_unlock(rq, &rf); @@ -1744,6 +1745,7 @@ void sched_ttwu_pending(void) { struct rq *rq = this_rq(); struct llist_node *llist = llist_del_all(&rq->wake_list); + struct pin_cookie cookie; struct task_struct *p; unsigned long flags; @@ -1751,15 +1753,15 @@ void sched_ttwu_pending(void) return; raw_spin_lock_irqsave(&rq->lock, flags); - lockdep_pin_lock(&rq->lock); + cookie = lockdep_pin_lock(&rq->lock); while (llist) { p = llist_entry(llist, struct task_struct, wake_entry); llist = llist_next(llist); - ttwu_do_activate(rq, p, 0); + ttwu_do_activate(rq, p, 0, cookie); } - lockdep_unpin_lock(&rq->lock); + lockdep_unpin_lock(&rq->lock, cookie); raw_spin_unlock_irqrestore(&rq->lock, flags); } @@ -1846,6 +1848,7 @@ bool cpus_share_cache(int this_cpu, int that_cpu) static void ttwu_queue(struct task_struct *p, int cpu) { struct rq *rq = cpu_rq(cpu); + struct pin_cookie cookie; #if defined(CONFIG_SMP) if (sched_feat(TTWU_QUEUE) && !cpus_share_cache(smp_processor_id(), cpu)) { @@ -1856,9 +1859,9 @@ static void ttwu_queue(struct task_struct *p, int cpu) #endif raw_spin_lock(&rq->lock); - lockdep_pin_lock(&rq->lock); - ttwu_do_activate(rq, p, 0); - lockdep_unpin_lock(&rq->lock); + cookie = lockdep_pin_lock(&rq->lock); + ttwu_do_activate(rq, p, 0, cookie); + lockdep_unpin_lock(&rq->lock, cookie); raw_spin_unlock(&rq->lock); } @@ -2055,7 +2058,7 @@ out: * ensure that this_rq() is locked, @p is bound to this_rq() and not * the current task. */ -static void try_to_wake_up_local(struct task_struct *p) +static void try_to_wake_up_local(struct task_struct *p, struct pin_cookie cookie) { struct rq *rq = task_rq(p); @@ -2072,11 +2075,11 @@ static void try_to_wake_up_local(struct task_struct *p) * disabled avoiding further scheduler activity on it and we've * not yet picked a replacement task. */ - lockdep_unpin_lock(&rq->lock); + lockdep_unpin_lock(&rq->lock, cookie); raw_spin_unlock(&rq->lock); raw_spin_lock(&p->pi_lock); raw_spin_lock(&rq->lock); - lockdep_pin_lock(&rq->lock); + lockdep_repin_lock(&rq->lock, cookie); } if (!(p->state & TASK_NORMAL)) @@ -2087,7 +2090,7 @@ static void try_to_wake_up_local(struct task_struct *p) if (!task_on_rq_queued(p)) ttwu_activate(rq, p, ENQUEUE_WAKEUP); - ttwu_do_wakeup(rq, p, 0); + ttwu_do_wakeup(rq, p, 0, cookie); if (schedstat_enabled()) ttwu_stat(p, smp_processor_id(), 0); out: @@ -2515,9 +2518,9 @@ void wake_up_new_task(struct task_struct *p) * Nothing relies on rq->lock after this, so its fine to * drop it. */ - lockdep_unpin_lock(&rq->lock); + lockdep_unpin_lock(&rq->lock, rf.cookie); p->sched_class->task_woken(rq, p); - lockdep_pin_lock(&rq->lock); + lockdep_repin_lock(&rq->lock, rf.cookie); } #endif task_rq_unlock(rq, p, &rf); @@ -2782,7 +2785,7 @@ asmlinkage __visible void schedule_tail(struct task_struct *prev) */ static __always_inline struct rq * context_switch(struct rq *rq, struct task_struct *prev, - struct task_struct *next) + struct task_struct *next, struct pin_cookie cookie) { struct mm_struct *mm, *oldmm; @@ -2814,7 +2817,7 @@ context_switch(struct rq *rq, struct task_struct *prev, * of the scheduler it's an obvious special-case), so we * do an early lockdep release here: */ - lockdep_unpin_lock(&rq->lock); + lockdep_unpin_lock(&rq->lock, cookie); spin_release(&rq->lock.dep_map, 1, _THIS_IP_); /* Here we just switch the register state and the stack. */ @@ -3154,7 +3157,7 @@ static inline void schedule_debug(struct task_struct *prev) * Pick up the highest-prio task: */ static inline struct task_struct * -pick_next_task(struct rq *rq, struct task_struct *prev) +pick_next_task(struct rq *rq, struct task_struct *prev, struct pin_cookie cookie) { const struct sched_class *class = &fair_sched_class; struct task_struct *p; @@ -3165,20 +3168,20 @@ pick_next_task(struct rq *rq, struct task_struct *prev) */ if (likely(prev->sched_class == class && rq->nr_running == rq->cfs.h_nr_running)) { - p = fair_sched_class.pick_next_task(rq, prev); + p = fair_sched_class.pick_next_task(rq, prev, cookie); if (unlikely(p == RETRY_TASK)) goto again; /* assumes fair_sched_class->next == idle_sched_class */ if (unlikely(!p)) - p = idle_sched_class.pick_next_task(rq, prev); + p = idle_sched_class.pick_next_task(rq, prev, cookie); return p; } again: for_each_class(class) { - p = class->pick_next_task(rq, prev); + p = class->pick_next_task(rq, prev, cookie); if (p) { if (unlikely(p == RETRY_TASK)) goto again; @@ -3232,6 +3235,7 @@ static void __sched notrace __schedule(bool preempt) { struct task_struct *prev, *next; unsigned long *switch_count; + struct pin_cookie cookie; struct rq *rq; int cpu; @@ -3265,7 +3269,7 @@ static void __sched notrace __schedule(bool preempt) */ smp_mb__before_spinlock(); raw_spin_lock(&rq->lock); - lockdep_pin_lock(&rq->lock); + cookie = lockdep_pin_lock(&rq->lock); rq->clock_skip_update <<= 1; /* promote REQ to ACT */ @@ -3287,7 +3291,7 @@ static void __sched notrace __schedule(bool preempt) to_wakeup = wq_worker_sleeping(prev); if (to_wakeup) - try_to_wake_up_local(to_wakeup); + try_to_wake_up_local(to_wakeup, cookie); } } switch_count = &prev->nvcsw; @@ -3296,7 +3300,7 @@ static void __sched notrace __schedule(bool preempt) if (task_on_rq_queued(prev)) update_rq_clock(rq); - next = pick_next_task(rq, prev); + next = pick_next_task(rq, prev, cookie); clear_tsk_need_resched(prev); clear_preempt_need_resched(); rq->clock_skip_update = 0; @@ -3307,9 +3311,9 @@ static void __sched notrace __schedule(bool preempt) ++*switch_count; trace_sched_switch(preempt, prev, next); - rq = context_switch(rq, prev, next); /* unlocks the rq */ + rq = context_switch(rq, prev, next, cookie); /* unlocks the rq */ } else { - lockdep_unpin_lock(&rq->lock); + lockdep_unpin_lock(&rq->lock, cookie); raw_spin_unlock_irq(&rq->lock); } @@ -5392,6 +5396,7 @@ static void migrate_tasks(struct rq *dead_rq) { struct rq *rq = dead_rq; struct task_struct *next, *stop = rq->stop; + struct pin_cookie cookie; int dest_cpu; /* @@ -5423,8 +5428,8 @@ static void migrate_tasks(struct rq *dead_rq) /* * pick_next_task assumes pinned rq->lock. */ - lockdep_pin_lock(&rq->lock); - next = pick_next_task(rq, &fake_task); + cookie = lockdep_pin_lock(&rq->lock); + next = pick_next_task(rq, &fake_task, cookie); BUG_ON(!next); next->sched_class->put_prev_task(rq, next); @@ -5437,7 +5442,7 @@ static void migrate_tasks(struct rq *dead_rq) * because !cpu_active at this point, which means load-balance * will not interfere. Also, stop-machine. */ - lockdep_unpin_lock(&rq->lock); + lockdep_unpin_lock(&rq->lock, cookie); raw_spin_unlock(&rq->lock); raw_spin_lock(&next->pi_lock); raw_spin_lock(&rq->lock); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 738e3c84dfe1..ba53a87bb978 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -670,9 +670,9 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) * Nothing relies on rq->lock after this, so its safe to drop * rq->lock. */ - lockdep_unpin_lock(&rq->lock); + lockdep_unpin_lock(&rq->lock, rf.cookie); push_dl_task(rq); - lockdep_pin_lock(&rq->lock); + lockdep_repin_lock(&rq->lock, rf.cookie); } #endif @@ -1125,7 +1125,8 @@ static struct sched_dl_entity *pick_next_dl_entity(struct rq *rq, return rb_entry(left, struct sched_dl_entity, rb_node); } -struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev) +struct task_struct * +pick_next_task_dl(struct rq *rq, struct task_struct *prev, struct pin_cookie cookie) { struct sched_dl_entity *dl_se; struct task_struct *p; @@ -1140,9 +1141,9 @@ struct task_struct *pick_next_task_dl(struct rq *rq, struct task_struct *prev) * disabled avoiding further scheduler activity on it and we're * being very careful to re-start the picking loop. */ - lockdep_unpin_lock(&rq->lock); + lockdep_unpin_lock(&rq->lock, cookie); pull_dl_task(rq); - lockdep_pin_lock(&rq->lock); + lockdep_repin_lock(&rq->lock, cookie); /* * pull_rt_task() can drop (and re-acquire) rq->lock; this * means a stop task can slip in, in which case we need to diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b8a33abce650..91395e1552ae 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -5542,7 +5542,7 @@ preempt: } static struct task_struct * -pick_next_task_fair(struct rq *rq, struct task_struct *prev) +pick_next_task_fair(struct rq *rq, struct task_struct *prev, struct pin_cookie cookie) { struct cfs_rq *cfs_rq = &rq->cfs; struct sched_entity *se; @@ -5655,9 +5655,9 @@ idle: * further scheduler activity on it and we're being very careful to * re-start the picking loop. */ - lockdep_unpin_lock(&rq->lock); + lockdep_unpin_lock(&rq->lock, cookie); new_tasks = idle_balance(rq); - lockdep_pin_lock(&rq->lock); + lockdep_repin_lock(&rq->lock, cookie); /* * Because idle_balance() releases (and re-acquires) rq->lock, it is * possible for any higher priority task to appear. In that case we diff --git a/kernel/sched/idle_task.c b/kernel/sched/idle_task.c index 47ce94931f1b..2ce5458bbe1d 100644 --- a/kernel/sched/idle_task.c +++ b/kernel/sched/idle_task.c @@ -24,7 +24,7 @@ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int fl } static struct task_struct * -pick_next_task_idle(struct rq *rq, struct task_struct *prev) +pick_next_task_idle(struct rq *rq, struct task_struct *prev, struct pin_cookie cookie) { put_prev_task(rq, prev); diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 19e13060fcd5..68deaf901a12 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -1524,7 +1524,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq) } static struct task_struct * -pick_next_task_rt(struct rq *rq, struct task_struct *prev) +pick_next_task_rt(struct rq *rq, struct task_struct *prev, struct pin_cookie cookie) { struct task_struct *p; struct rt_rq *rt_rq = &rq->rt; @@ -1536,9 +1536,9 @@ pick_next_task_rt(struct rq *rq, struct task_struct *prev) * disabled avoiding further scheduler activity on it and we're * being very careful to re-start the picking loop. */ - lockdep_unpin_lock(&rq->lock); + lockdep_unpin_lock(&rq->lock, cookie); pull_rt_task(rq); - lockdep_pin_lock(&rq->lock); + lockdep_repin_lock(&rq->lock, cookie); /* * pull_rt_task() can drop (and re-acquire) rq->lock; this * means a dl or stop task can slip in, in which case we need diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index a5eecb1e5e4b..0b6a838e9e73 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1202,7 +1202,8 @@ struct sched_class { * tasks. */ struct task_struct * (*pick_next_task) (struct rq *rq, - struct task_struct *prev); + struct task_struct *prev, + struct pin_cookie cookie); void (*put_prev_task) (struct rq *rq, struct task_struct *p); #ifdef CONFIG_SMP @@ -1453,6 +1454,7 @@ static inline void sched_avg_update(struct rq *rq) { } struct rq_flags { unsigned long flags; + struct pin_cookie cookie; }; struct rq *__task_rq_lock(struct task_struct *p, struct rq_flags *rf) @@ -1464,7 +1466,7 @@ struct rq *task_rq_lock(struct task_struct *p, struct rq_flags *rf) static inline void __task_rq_unlock(struct rq *rq, struct rq_flags *rf) __releases(rq->lock) { - lockdep_unpin_lock(&rq->lock); + lockdep_unpin_lock(&rq->lock, rf->cookie); raw_spin_unlock(&rq->lock); } @@ -1473,7 +1475,7 @@ task_rq_unlock(struct rq *rq, struct task_struct *p, struct rq_flags *rf) __releases(rq->lock) __releases(p->pi_lock) { - lockdep_unpin_lock(&rq->lock); + lockdep_unpin_lock(&rq->lock, rf->cookie); raw_spin_unlock(&rq->lock); raw_spin_unlock_irqrestore(&p->pi_lock, rf->flags); } diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c index cbc67da10954..604297a08b3a 100644 --- a/kernel/sched/stop_task.c +++ b/kernel/sched/stop_task.c @@ -24,7 +24,7 @@ check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags) } static struct task_struct * -pick_next_task_stop(struct rq *rq, struct task_struct *prev) +pick_next_task_stop(struct rq *rq, struct task_struct *prev, struct pin_cookie cookie) { struct task_struct *stop = rq->stop; -- cgit v1.2.3 From dc209a3fd73ec96d4491bcc128c3b50b0a8e8017 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 17 Apr 2016 23:31:42 -0700 Subject: locking/pvqspinlock: Avoid double resetting of stats ... remove the redundant second iteration, this is most likely a copy/past buglet. Signed-off-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave@stgolabs.net Cc: waiman.long@hpe.com Link: http://lkml.kernel.org/r/1460961103-24953-2-git-send-email-dave@stgolabs.net Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock_stat.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h index d734b7502001..72722334237a 100644 --- a/kernel/locking/qspinlock_stat.h +++ b/kernel/locking/qspinlock_stat.h @@ -191,8 +191,6 @@ static ssize_t qstat_write(struct file *file, const char __user *user_buf, for (i = 0 ; i < qstat_num; i++) WRITE_ONCE(ptr[i], 0); - for (i = 0 ; i < qstat_num; i++) - WRITE_ONCE(ptr[i], 0); } return count; } -- cgit v1.2.3 From b96bbdde19cc56f288372d25fd5ea7af04fc1271 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Tue, 19 Apr 2016 21:17:25 -0700 Subject: locking/pvqspinlock: Robustify init_qspinlock_stat() Specifically around the debugfs file creation calls, I have no idea if they could ever possibly fail, but this is core code (debug aside) so lets at least check the return value and inform anything fishy. Signed-off-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Waiman Long Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160420041725.GC3472@linux-uzut.site Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock_stat.h | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h index 72722334237a..22e025309845 100644 --- a/kernel/locking/qspinlock_stat.h +++ b/kernel/locking/qspinlock_stat.h @@ -212,10 +212,8 @@ static int __init init_qspinlock_stat(void) struct dentry *d_qstat = debugfs_create_dir("qlockstat", NULL); int i; - if (!d_qstat) { - pr_warn("Could not create 'qlockstat' debugfs directory\n"); - return 0; - } + if (!d_qstat) + goto out; /* * Create the debugfs files @@ -225,12 +223,20 @@ static int __init init_qspinlock_stat(void) * performance. */ for (i = 0; i < qstat_num; i++) - debugfs_create_file(qstat_names[i], 0400, d_qstat, - (void *)(long)i, &fops_qstat); + if (!debugfs_create_file(qstat_names[i], 0400, d_qstat, + (void *)(long)i, &fops_qstat)) + goto fail_undo; + + if (!debugfs_create_file(qstat_names[qstat_reset_cnts], 0200, d_qstat, + (void *)(long)qstat_reset_cnts, &fops_qstat)) + goto fail_undo; - debugfs_create_file(qstat_names[qstat_reset_cnts], 0200, d_qstat, - (void *)(long)qstat_reset_cnts, &fops_qstat); return 0; +fail_undo: + debugfs_remove_recursive(d_qstat); +out: + pr_warn("Could not create 'qlockstat' debugfs entries\n"); + return -ENOMEM; } fs_initcall(init_qspinlock_stat); -- cgit v1.2.3 From 04cafed7fc19a8010771c788708ac97c405fc3de Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 12 May 2016 13:57:45 +0200 Subject: locking/rwsem: Fix down_write_killable() The new signal_pending exit path in __rwsem_down_write_failed_common() was fingered as breaking his kernel by Tetsuo Handa. Upon inspection it was found that there are two things wrong with it; - it forgets to remove WAITING_BIAS if it leaves the list empty, or - it forgets to wake further waiters that were blocked on the now removed waiter. Especially the first issue causes new lock attempts to block and stall indefinitely, as the code assumes that pending waiters mean there is an owner that will wake when it releases the lock. Reported-by: Tetsuo Handa Tested-by: Tetsuo Handa Tested-by: Michal Hocko Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Andrew Morton Cc: Arnaldo Carvalho de Melo Cc: Chris Zankel Cc: David S. Miller Cc: Davidlohr Bueso Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Linus Torvalds Cc: Max Filippov Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Tony Luck Cc: Vince Weaver Cc: Waiman Long Link: http://lkml.kernel.org/r/20160512115745.GP3192@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/locking/rwsem-xadd.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index df4dcb883b50..09e30c6225e5 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -487,23 +487,32 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) /* Block until there are no active lockers. */ do { - if (signal_pending_state(state, current)) { - raw_spin_lock_irq(&sem->wait_lock); - ret = ERR_PTR(-EINTR); - goto out; - } + if (signal_pending_state(state, current)) + goto out_nolock; + schedule(); set_current_state(state); } while ((count = sem->count) & RWSEM_ACTIVE_MASK); raw_spin_lock_irq(&sem->wait_lock); } -out: __set_current_state(TASK_RUNNING); list_del(&waiter.list); raw_spin_unlock_irq(&sem->wait_lock); return ret; + +out_nolock: + __set_current_state(TASK_RUNNING); + raw_spin_lock_irq(&sem->wait_lock); + list_del(&waiter.list); + if (list_empty(&sem->wait_list)) + rwsem_atomic_update(-RWSEM_WAITING_BIAS, sem); + else + __rwsem_do_wake(sem, RWSEM_WAKE_ANY); + raw_spin_unlock_irq(&sem->wait_lock); + + return ERR_PTR(-EINTR); } __visible struct rw_semaphore * __sched -- cgit v1.2.3 From 887bddfa90c79957d61067cd54a10087be0c8b23 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 26 May 2016 00:04:58 -0400 Subject: add down_write_killable_nested() Signed-off-by: Al Viro --- include/linux/rwsem.h | 2 ++ kernel/locking/rwsem.c | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'kernel/locking') diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index d1c12d160ace..d37fbb34d06f 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -156,6 +156,7 @@ extern void downgrade_write(struct rw_semaphore *sem); */ extern void down_read_nested(struct rw_semaphore *sem, int subclass); extern void down_write_nested(struct rw_semaphore *sem, int subclass); +extern int down_write_killable_nested(struct rw_semaphore *sem, int subclass); extern void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest_lock); # define down_write_nest_lock(sem, nest_lock) \ @@ -176,6 +177,7 @@ extern void up_read_non_owner(struct rw_semaphore *sem); # define down_read_nested(sem, subclass) down_read(sem) # define down_write_nest_lock(sem, nest_lock) down_write(sem) # define down_write_nested(sem, subclass) down_write(sem) +# define down_write_killable_nested(sem, subclass) down_write_killable(sem) # define down_read_non_owner(sem) down_read(sem) # define up_read_non_owner(sem) up_read(sem) #endif diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index c817216c1615..2e853ad93a3a 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -173,6 +173,22 @@ void down_write_nested(struct rw_semaphore *sem, int subclass) EXPORT_SYMBOL(down_write_nested); +int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass) +{ + might_sleep(); + rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); + + if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock, __down_write_killable)) { + rwsem_release(&sem->dep_map, 1, _RET_IP_); + return -EINTR; + } + + rwsem_set_owner(sem); + return 0; +} + +EXPORT_SYMBOL(down_write_killable_nested); + void up_read_non_owner(struct rw_semaphore *sem) { __up_read(sem); -- cgit v1.2.3 From 0422e83d84ae24b933e4b0d4c1e0f0b4ae8a0a3b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 26 May 2016 21:08:17 +0100 Subject: locking/ww_mutex: Report recursive ww_mutex locking early Recursive locking for ww_mutexes was originally conceived as an exception. However, it is heavily used by the DRM atomic modesetting code. Currently, the recursive deadlock is checked after we have queued up for a busy-spin and as we never release the lock, we spin until kicked, whereupon the deadlock is discovered and reported. A simple solution for the now common problem is to move the recursive deadlock discovery to the first action when taking the ww_mutex. Suggested-by: Maarten Lankhorst Signed-off-by: Chris Wilson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Maarten Lankhorst Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1464293297-19777-1-git-send-email-chris@chris-wilson.co.uk Signed-off-by: Ingo Molnar --- kernel/locking/mutex.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index e364b424b019..79d2d765a75f 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -486,9 +486,6 @@ __ww_mutex_lock_check_stamp(struct mutex *lock, struct ww_acquire_ctx *ctx) if (!hold_ctx) return 0; - if (unlikely(ctx == hold_ctx)) - return -EALREADY; - if (ctx->stamp - hold_ctx->stamp <= LONG_MAX && (ctx->stamp != hold_ctx->stamp || ctx > hold_ctx)) { #ifdef CONFIG_DEBUG_MUTEXES @@ -514,6 +511,12 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, unsigned long flags; int ret; + if (use_ww_ctx) { + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); + if (unlikely(ww_ctx == READ_ONCE(ww->ctx))) + return -EALREADY; + } + preempt_disable(); mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip); -- cgit v1.2.3 From 2c610022711675ee908b903d242f0b90e1db661f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 8 Jun 2016 10:19:51 +0200 Subject: locking/qspinlock: Fix spin_unlock_wait() some more While this prior commit: 54cf809b9512 ("locking,qspinlock: Fix spin_is_locked() and spin_unlock_wait()") ... fixes spin_is_locked() and spin_unlock_wait() for the usage in ipc/sem and netfilter, it does not in fact work right for the usage in task_work and futex. So while the 2 locks crossed problem: spin_lock(A) spin_lock(B) if (!spin_is_locked(B)) spin_unlock_wait(A) foo() foo(); ... works with the smp_mb() injected by both spin_is_locked() and spin_unlock_wait(), this is not sufficient for: flag = 1; smp_mb(); spin_lock() spin_unlock_wait() if (!flag) // add to lockless list // iterate lockless list ... because in this scenario, the store from spin_lock() can be delayed past the load of flag, uncrossing the variables and loosing the guarantee. This patch reworks spin_is_locked() and spin_unlock_wait() to work in both cases by exploiting the observation that while the lock byte store can be delayed, the contender must have registered itself visibly in other state contained in the word. It also allows for architectures to override both functions, as PPC and ARM64 have an additional issue for which we currently have no generic solution. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Boqun Feng Cc: Davidlohr Bueso Cc: Giovanni Gherdovich Cc: Linus Torvalds Cc: Pan Xinhui Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Waiman Long Cc: Will Deacon Cc: stable@vger.kernel.org # v4.2 and later Fixes: 54cf809b9512 ("locking,qspinlock: Fix spin_is_locked() and spin_unlock_wait()") Signed-off-by: Ingo Molnar --- include/asm-generic/qspinlock.h | 53 ++++++++++++------------------------ kernel/locking/qspinlock.c | 60 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 36 deletions(-) (limited to 'kernel/locking') diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h index 6bd05700d8c9..05f05f17a7c2 100644 --- a/include/asm-generic/qspinlock.h +++ b/include/asm-generic/qspinlock.h @@ -21,38 +21,34 @@ #include +/** + * queued_spin_unlock_wait - wait until the _current_ lock holder releases the lock + * @lock : Pointer to queued spinlock structure + * + * There is a very slight possibility of live-lock if the lockers keep coming + * and the waiter is just unfortunate enough to not see any unlock state. + */ +#ifndef queued_spin_unlock_wait +extern void queued_spin_unlock_wait(struct qspinlock *lock); +#endif + /** * queued_spin_is_locked - is the spinlock locked? * @lock: Pointer to queued spinlock structure * Return: 1 if it is locked, 0 otherwise */ +#ifndef queued_spin_is_locked static __always_inline int queued_spin_is_locked(struct qspinlock *lock) { /* - * queued_spin_lock_slowpath() can ACQUIRE the lock before - * issuing the unordered store that sets _Q_LOCKED_VAL. - * - * See both smp_cond_acquire() sites for more detail. - * - * This however means that in code like: - * - * spin_lock(A) spin_lock(B) - * spin_unlock_wait(B) spin_is_locked(A) - * do_something() do_something() - * - * Both CPUs can end up running do_something() because the store - * setting _Q_LOCKED_VAL will pass through the loads in - * spin_unlock_wait() and/or spin_is_locked(). + * See queued_spin_unlock_wait(). * - * Avoid this by issuing a full memory barrier between the spin_lock() - * and the loads in spin_unlock_wait() and spin_is_locked(). - * - * Note that regular mutual exclusion doesn't care about this - * delayed store. + * Any !0 state indicates it is locked, even if _Q_LOCKED_VAL + * isn't immediately observable. */ - smp_mb(); - return atomic_read(&lock->val) & _Q_LOCKED_MASK; + return atomic_read(&lock->val); } +#endif /** * queued_spin_value_unlocked - is the spinlock structure unlocked? @@ -122,21 +118,6 @@ static __always_inline void queued_spin_unlock(struct qspinlock *lock) } #endif -/** - * queued_spin_unlock_wait - wait until current lock holder releases the lock - * @lock : Pointer to queued spinlock structure - * - * There is a very slight possibility of live-lock if the lockers keep coming - * and the waiter is just unfortunate enough to not see any unlock state. - */ -static inline void queued_spin_unlock_wait(struct qspinlock *lock) -{ - /* See queued_spin_is_locked() */ - smp_mb(); - while (atomic_read(&lock->val) & _Q_LOCKED_MASK) - cpu_relax(); -} - #ifndef virt_spin_lock static __always_inline bool virt_spin_lock(struct qspinlock *lock) { diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index ce2f75e32ae1..5fc8c311b8fe 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -267,6 +267,66 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock, #define queued_spin_lock_slowpath native_queued_spin_lock_slowpath #endif +/* + * queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before + * issuing an _unordered_ store to set _Q_LOCKED_VAL. + * + * This means that the store can be delayed, but no later than the + * store-release from the unlock. This means that simply observing + * _Q_LOCKED_VAL is not sufficient to determine if the lock is acquired. + * + * There are two paths that can issue the unordered store: + * + * (1) clear_pending_set_locked(): *,1,0 -> *,0,1 + * + * (2) set_locked(): t,0,0 -> t,0,1 ; t != 0 + * atomic_cmpxchg_relaxed(): t,0,0 -> 0,0,1 + * + * However, in both cases we have other !0 state we've set before to queue + * ourseves: + * + * For (1) we have the atomic_cmpxchg_acquire() that set _Q_PENDING_VAL, our + * load is constrained by that ACQUIRE to not pass before that, and thus must + * observe the store. + * + * For (2) we have a more intersting scenario. We enqueue ourselves using + * xchg_tail(), which ends up being a RELEASE. This in itself is not + * sufficient, however that is followed by an smp_cond_acquire() on the same + * word, giving a RELEASE->ACQUIRE ordering. This again constrains our load and + * guarantees we must observe that store. + * + * Therefore both cases have other !0 state that is observable before the + * unordered locked byte store comes through. This means we can use that to + * wait for the lock store, and then wait for an unlock. + */ +#ifndef queued_spin_unlock_wait +void queued_spin_unlock_wait(struct qspinlock *lock) +{ + u32 val; + + for (;;) { + val = atomic_read(&lock->val); + + if (!val) /* not locked, we're done */ + goto done; + + if (val & _Q_LOCKED_MASK) /* locked, go wait for unlock */ + break; + + /* not locked, but pending, wait until we observe the lock */ + cpu_relax(); + } + + /* any unlock is good */ + while (atomic_read(&lock->val) & _Q_LOCKED_MASK) + cpu_relax(); + +done: + smp_rmb(); /* CTRL + RMB -> ACQUIRE */ +} +EXPORT_SYMBOL(queued_spin_unlock_wait); +#endif + #endif /* _GEN_PV_LOCK_SLOWPATH */ /** -- cgit v1.2.3 From 6720a305df74ca30bcc10fc316881641b6ff0c80 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 23 Jun 2016 12:11:17 -0700 Subject: locking: avoid passing around 'thread_info' in mutex debugging code None of the code actually wants a thread_info, it all wants a task_struct, and it's just converting back and forth between the two ("ti->task" to get the task_struct from the thread_info, and "task_thread_info(task)" to go the other way). No semantic change. Acked-by: Peter Zijlstra Signed-off-by: Linus Torvalds --- kernel/locking/mutex-debug.c | 12 ++++++------ kernel/locking/mutex-debug.h | 4 ++-- kernel/locking/mutex.c | 6 +++--- kernel/locking/mutex.h | 2 +- 4 files changed, 12 insertions(+), 12 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c index 3ef3736002d8..9c951fade415 100644 --- a/kernel/locking/mutex-debug.c +++ b/kernel/locking/mutex-debug.c @@ -49,21 +49,21 @@ void debug_mutex_free_waiter(struct mutex_waiter *waiter) } void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, - struct thread_info *ti) + struct task_struct *task) { SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock)); /* Mark the current thread as blocked on the lock: */ - ti->task->blocked_on = waiter; + task->blocked_on = waiter; } void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, - struct thread_info *ti) + struct task_struct *task) { DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); - DEBUG_LOCKS_WARN_ON(waiter->task != ti->task); - DEBUG_LOCKS_WARN_ON(ti->task->blocked_on != waiter); - ti->task->blocked_on = NULL; + DEBUG_LOCKS_WARN_ON(waiter->task != task); + DEBUG_LOCKS_WARN_ON(task->blocked_on != waiter); + task->blocked_on = NULL; list_del_init(&waiter->list); waiter->task = NULL; diff --git a/kernel/locking/mutex-debug.h b/kernel/locking/mutex-debug.h index 0799fd3e4cfa..d06ae3bb46c5 100644 --- a/kernel/locking/mutex-debug.h +++ b/kernel/locking/mutex-debug.h @@ -20,9 +20,9 @@ extern void debug_mutex_wake_waiter(struct mutex *lock, extern void debug_mutex_free_waiter(struct mutex_waiter *waiter); extern void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, - struct thread_info *ti); + struct task_struct *task); extern void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, - struct thread_info *ti); + struct task_struct *task); extern void debug_mutex_unlock(struct mutex *lock); extern void debug_mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key); diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 79d2d765a75f..a70b90db3909 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -537,7 +537,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, goto skip_wait; debug_mutex_lock_common(lock, &waiter); - debug_mutex_add_waiter(lock, &waiter, task_thread_info(task)); + debug_mutex_add_waiter(lock, &waiter, task); /* add waiting tasks to the end of the waitqueue (FIFO): */ list_add_tail(&waiter.list, &lock->wait_list); @@ -584,7 +584,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, } __set_task_state(task, TASK_RUNNING); - mutex_remove_waiter(lock, &waiter, current_thread_info()); + mutex_remove_waiter(lock, &waiter, task); /* set it to 0 if there are no waiters left: */ if (likely(list_empty(&lock->wait_list))) atomic_set(&lock->count, 0); @@ -605,7 +605,7 @@ skip_wait: return 0; err: - mutex_remove_waiter(lock, &waiter, task_thread_info(task)); + mutex_remove_waiter(lock, &waiter, task); spin_unlock_mutex(&lock->wait_lock, flags); debug_mutex_free_waiter(&waiter); mutex_release(&lock->dep_map, 1, ip); diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h index 5cda397607f2..a68bae5e852a 100644 --- a/kernel/locking/mutex.h +++ b/kernel/locking/mutex.h @@ -13,7 +13,7 @@ do { spin_lock(lock); (void)(flags); } while (0) #define spin_unlock_mutex(lock, flags) \ do { spin_unlock(lock); (void)(flags); } while (0) -#define mutex_remove_waiter(lock, waiter, ti) \ +#define mutex_remove_waiter(lock, waiter, task) \ __list_del((waiter)->list.prev, (waiter)->list.next) #ifdef CONFIG_MUTEX_SPIN_ON_OWNER -- cgit v1.2.3