From afc256e131bb0e1ecb5e2b1df310b20fa7bd714d Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 2 Oct 2024 17:03:55 +0200 Subject: locking/spinlocks: Make __raw_* lock ops static If CONFIG_GENERIC_LOCKBREAK=y and CONFIG_DEBUG_LOCK_ALLOC=n (e.g. sh/sdk7786_defconfig): kernel/locking/spinlock.c:68:17: warning: no previous prototype for '__raw_spin_lock' [-Wmissing-prototypes] kernel/locking/spinlock.c:80:26: warning: no previous prototype for '__raw_spin_lock_irqsave' [-Wmissing-prototypes] kernel/locking/spinlock.c:98:17: warning: no previous prototype for '__raw_spin_lock_irq' [-Wmissing-prototypes] kernel/locking/spinlock.c:103:17: warning: no previous prototype for '__raw_spin_lock_bh' [-Wmissing-prototypes] kernel/locking/spinlock.c:68:17: warning: no previous prototype for '__raw_read_lock' [-Wmissing-prototypes] kernel/locking/spinlock.c:80:26: warning: no previous prototype for '__raw_read_lock_irqsave' [-Wmissing-prototypes] kernel/locking/spinlock.c:98:17: warning: no previous prototype for '__raw_read_lock_irq' [-Wmissing-prototypes] kernel/locking/spinlock.c:103:17: warning: no previous prototype for '__raw_read_lock_bh' [-Wmissing-prototypes] kernel/locking/spinlock.c:68:17: warning: no previous prototype for '__raw_write_lock' [-Wmissing-prototypes] kernel/locking/spinlock.c:80:26: warning: no previous prototype for '__raw_write_lock_irqsave' [-Wmissing-prototypes] kernel/locking/spinlock.c:98:17: warning: no previous prototype for '__raw_write_lock_irq' [-Wmissing-prototypes] kernel/locking/spinlock.c:103:17: warning: no previous prototype for '__raw_write_lock_bh' [-Wmissing-prototypes] All __raw_* lock ops are internal functions without external callers. Hence fix this by making them static. Note that if CONFIG_GENERIC_LOCKBREAK=y, no lock ops are inlined, as all of CONFIG_INLINE_*_LOCK* depend on !GENERIC_LOCKBREAK. Signed-off-by: Geert Uytterhoeven Signed-off-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Link: https://lkml.kernel.org/r/7201d7fb408375c6c4df541270d787b1b4a32354.1727879348.git.geert+renesas@glider.be --- kernel/locking/spinlock.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c index 438c6086d540..7685defd7c52 100644 --- a/kernel/locking/spinlock.c +++ b/kernel/locking/spinlock.c @@ -65,7 +65,7 @@ EXPORT_PER_CPU_SYMBOL(__mmiowb_state); * towards that other CPU that it should break the lock ASAP. */ #define BUILD_LOCK_OPS(op, locktype) \ -void __lockfunc __raw_##op##_lock(locktype##_t *lock) \ +static void __lockfunc __raw_##op##_lock(locktype##_t *lock) \ { \ for (;;) { \ preempt_disable(); \ @@ -77,7 +77,7 @@ void __lockfunc __raw_##op##_lock(locktype##_t *lock) \ } \ } \ \ -unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \ +static unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \ { \ unsigned long flags; \ \ @@ -95,12 +95,12 @@ unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \ return flags; \ } \ \ -void __lockfunc __raw_##op##_lock_irq(locktype##_t *lock) \ +static void __lockfunc __raw_##op##_lock_irq(locktype##_t *lock) \ { \ _raw_##op##_lock_irqsave(lock); \ } \ \ -void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \ +static void __lockfunc __raw_##op##_lock_bh(locktype##_t *lock) \ { \ unsigned long flags; \ \ -- cgit v1.2.3 From 823a566221a5639f6c69424897218e5d6431a970 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Wed, 9 Oct 2024 11:20:31 +0200 Subject: locking/ww_mutex: Adjust to lockdep nest_lock requirements MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When using mutex_acquire_nest() with a nest_lock, lockdep refcounts the number of acquired lockdep_maps of mutexes of the same class, and also keeps a pointer to the first acquired lockdep_map of a class. That pointer is then used for various comparison-, printing- and checking purposes, but there is no mechanism to actively ensure that lockdep_map stays in memory. Instead, a warning is printed if the lockdep_map is freed and there are still held locks of the same lock class, even if the lockdep_map itself has been released. In the context of WW/WD transactions that means that if a user unlocks and frees a ww_mutex from within an ongoing ww transaction, and that mutex happens to be the first ww_mutex grabbed in the transaction, such a warning is printed and there might be a risk of a UAF. Note that this is only problem when lockdep is enabled and affects only dereferences of struct lockdep_map. Adjust to this by adding a fake lockdep_map to the acquired context and make sure it is the first acquired lockdep map of the associated ww_mutex class. Then hold it for the duration of the WW/WD transaction. This has the side effect that trying to lock a ww mutex *without* a ww_acquire_context but where a such context has been acquire, we'd see a lockdep splat. The test-ww_mutex.c selftest attempts to do that, so modify that particular test to not acquire a ww_acquire_context if it is not going to be used. Signed-off-by: Thomas Hellström Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20241009092031.6356-1-thomas.hellstrom@linux.intel.com --- include/linux/ww_mutex.h | 14 ++++++++++++++ kernel/locking/test-ww_mutex.c | 8 +++++--- 2 files changed, 19 insertions(+), 3 deletions(-) (limited to 'kernel/locking') diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index bb763085479a..a401a2f31a77 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -65,6 +65,16 @@ struct ww_acquire_ctx { #endif #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; + /** + * @first_lock_dep_map: fake lockdep_map for first locked ww_mutex. + * + * lockdep requires the lockdep_map for the first locked ww_mutex + * in a ww transaction to remain in memory until all ww_mutexes of + * the transaction have been unlocked. Ensure this by keeping a + * fake locked ww_mutex lockdep map between ww_acquire_init() and + * ww_acquire_fini(). + */ + struct lockdep_map first_lock_dep_map; #endif #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH unsigned int deadlock_inject_interval; @@ -146,7 +156,10 @@ static inline void ww_acquire_init(struct ww_acquire_ctx *ctx, debug_check_no_locks_freed((void *)ctx, sizeof(*ctx)); lockdep_init_map(&ctx->dep_map, ww_class->acquire_name, &ww_class->acquire_key, 0); + lockdep_init_map(&ctx->first_lock_dep_map, ww_class->mutex_name, + &ww_class->mutex_key, 0); mutex_acquire(&ctx->dep_map, 0, 0, _RET_IP_); + mutex_acquire_nest(&ctx->first_lock_dep_map, 0, 0, &ctx->dep_map, _RET_IP_); #endif #ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH ctx->deadlock_inject_interval = 1; @@ -185,6 +198,7 @@ static inline void ww_acquire_done(struct ww_acquire_ctx *ctx) static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx) { #ifdef CONFIG_DEBUG_LOCK_ALLOC + mutex_release(&ctx->first_lock_dep_map, _THIS_IP_); mutex_release(&ctx->dep_map, _THIS_IP_); #endif #ifdef DEBUG_WW_MUTEXES diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c index 10a5736a21c2..5d58b2c0ef98 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -62,7 +62,8 @@ static int __test_mutex(unsigned int flags) int ret; ww_mutex_init(&mtx.mutex, &ww_class); - ww_acquire_init(&ctx, &ww_class); + if (flags & TEST_MTX_CTX) + ww_acquire_init(&ctx, &ww_class); INIT_WORK_ONSTACK(&mtx.work, test_mutex_work); init_completion(&mtx.ready); @@ -90,7 +91,8 @@ static int __test_mutex(unsigned int flags) ret = wait_for_completion_timeout(&mtx.done, TIMEOUT); } ww_mutex_unlock(&mtx.mutex); - ww_acquire_fini(&ctx); + if (flags & TEST_MTX_CTX) + ww_acquire_fini(&ctx); if (ret) { pr_err("%s(flags=%x): mutual exclusion failure\n", @@ -679,7 +681,7 @@ static int __init test_ww_mutex_init(void) if (ret) return ret; - ret = stress(2047, hweight32(STRESS_ALL)*ncpus, STRESS_ALL); + ret = stress(2046, hweight32(STRESS_ALL)*ncpus, STRESS_ALL); if (ret) return ret; -- cgit v1.2.3 From 894d1b3db41cf7e6ae0304429a1747b3c3f390bc Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 9 Oct 2024 16:53:34 -0700 Subject: locking/mutex: Remove wakeups from under mutex::wait_lock In preparation to nest mutex::wait_lock under rq::lock we need to remove wakeups from under it. Do this by utilizing wake_qs to defer the wakeup until after the lock is dropped. [Heavily changed after 55f036ca7e74 ("locking: WW mutex cleanup") and 08295b3b5bee ("locking: Implement an algorithm choice for Wound-Wait mutexes")] [jstultz: rebased to mainline, added extra wake_up_q & init to avoid hangs, similar to Connor's rework of this patch] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Juri Lelli Signed-off-by: John Stultz Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Metin Kaya Acked-by: Davidlohr Bueso Tested-by: K Prateek Nayak Tested-by: Metin Kaya Link: https://lore.kernel.org/r/20241009235352.1614323-2-jstultz@google.com --- kernel/futex/pi.c | 6 ++++- kernel/locking/mutex.c | 16 +++++++++---- kernel/locking/rtmutex.c | 51 ++++++++++++++++++++++++++++++----------- kernel/locking/rtmutex_api.c | 12 +++++++--- kernel/locking/rtmutex_common.h | 3 ++- kernel/locking/rwbase_rt.c | 8 ++++++- kernel/locking/rwsem.c | 4 ++-- kernel/locking/spinlock_rt.c | 5 ++-- kernel/locking/ww_mutex.h | 30 +++++++++++++++--------- 9 files changed, 96 insertions(+), 39 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/futex/pi.c b/kernel/futex/pi.c index 5722467f2737..d62cca5ed8f4 100644 --- a/kernel/futex/pi.c +++ b/kernel/futex/pi.c @@ -922,6 +922,7 @@ int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int tryl struct rt_mutex_waiter rt_waiter; struct futex_hash_bucket *hb; struct futex_q q = futex_q_init; + DEFINE_WAKE_Q(wake_q); int res, ret; if (!IS_ENABLED(CONFIG_FUTEX_PI)) @@ -1018,8 +1019,11 @@ retry_private: * such that futex_unlock_pi() is guaranteed to observe the waiter when * it sees the futex_q::pi_state. */ - ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current); + ret = __rt_mutex_start_proxy_lock(&q.pi_state->pi_mutex, &rt_waiter, current, &wake_q); + preempt_disable(); raw_spin_unlock_irq(&q.pi_state->pi_mutex.wait_lock); + wake_up_q(&wake_q); + preempt_enable(); if (ret) { if (ret == 1) diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index cbae8c0b89ab..6c94da061ec2 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -575,6 +575,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas struct lockdep_map *nest_lock, unsigned long ip, struct ww_acquire_ctx *ww_ctx, const bool use_ww_ctx) { + DEFINE_WAKE_Q(wake_q); struct mutex_waiter waiter; struct ww_mutex *ww; int ret; @@ -625,7 +626,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas */ if (__mutex_trylock(lock)) { if (ww_ctx) - __ww_mutex_check_waiters(lock, ww_ctx); + __ww_mutex_check_waiters(lock, ww_ctx, &wake_q); goto skip_wait; } @@ -645,7 +646,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas * Add in stamp order, waking up waiters that must kill * themselves. */ - ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx); + ret = __ww_mutex_add_waiter(&waiter, lock, ww_ctx, &wake_q); if (ret) goto err_early_kill; } @@ -681,6 +682,10 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas } raw_spin_unlock(&lock->wait_lock); + /* Make sure we do wakeups before calling schedule */ + wake_up_q(&wake_q); + wake_q_init(&wake_q); + schedule_preempt_disabled(); first = __mutex_waiter_is_first(lock, &waiter); @@ -714,7 +719,7 @@ acquired: */ if (!ww_ctx->is_wait_die && !__mutex_waiter_is_first(lock, &waiter)) - __ww_mutex_check_waiters(lock, ww_ctx); + __ww_mutex_check_waiters(lock, ww_ctx, &wake_q); } __mutex_remove_waiter(lock, &waiter); @@ -730,6 +735,7 @@ skip_wait: ww_mutex_lock_acquired(ww, ww_ctx); raw_spin_unlock(&lock->wait_lock); + wake_up_q(&wake_q); preempt_enable(); return 0; @@ -741,6 +747,7 @@ err_early_kill: raw_spin_unlock(&lock->wait_lock); debug_mutex_free_waiter(&waiter); mutex_release(&lock->dep_map, ip); + wake_up_q(&wake_q); preempt_enable(); return ret; } @@ -951,9 +958,10 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne if (owner & MUTEX_FLAG_HANDOFF) __mutex_handoff(lock, next); + preempt_disable(); raw_spin_unlock(&lock->wait_lock); - wake_up_q(&wake_q); + preempt_enable(); } #ifndef CONFIG_DEBUG_LOCK_ALLOC diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index ebebd0eec7f6..c7de80ee1f9d 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -34,13 +34,15 @@ static inline int __ww_mutex_add_waiter(struct rt_mutex_waiter *waiter, struct rt_mutex *lock, - struct ww_acquire_ctx *ww_ctx) + struct ww_acquire_ctx *ww_ctx, + struct wake_q_head *wake_q) { return 0; } static inline void __ww_mutex_check_waiters(struct rt_mutex *lock, - struct ww_acquire_ctx *ww_ctx) + struct ww_acquire_ctx *ww_ctx, + struct wake_q_head *wake_q) { } @@ -1201,7 +1203,8 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter, struct task_struct *task, struct ww_acquire_ctx *ww_ctx, - enum rtmutex_chainwalk chwalk) + enum rtmutex_chainwalk chwalk, + struct wake_q_head *wake_q) { struct task_struct *owner = rt_mutex_owner(lock); struct rt_mutex_waiter *top_waiter = waiter; @@ -1245,7 +1248,10 @@ static int __sched task_blocks_on_rt_mutex(struct rt_mutex_base *lock, /* Check whether the waiter should back out immediately */ rtm = container_of(lock, struct rt_mutex, rtmutex); - res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx); + preempt_disable(); + res = __ww_mutex_add_waiter(waiter, rtm, ww_ctx, wake_q); + wake_up_q(wake_q); + preempt_enable(); if (res) { raw_spin_lock(&task->pi_lock); rt_mutex_dequeue(lock, waiter); @@ -1674,12 +1680,14 @@ static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock, * @state: The task state for sleeping * @chwalk: Indicator whether full or partial chainwalk is requested * @waiter: Initializer waiter for blocking + * @wake_q: The wake_q to wake tasks after we release the wait_lock */ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, struct ww_acquire_ctx *ww_ctx, unsigned int state, enum rtmutex_chainwalk chwalk, - struct rt_mutex_waiter *waiter) + struct rt_mutex_waiter *waiter, + struct wake_q_head *wake_q) { struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex); struct ww_mutex *ww = ww_container_of(rtm); @@ -1690,7 +1698,7 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, /* Try to acquire the lock again: */ if (try_to_take_rt_mutex(lock, current, NULL)) { if (build_ww_mutex() && ww_ctx) { - __ww_mutex_check_waiters(rtm, ww_ctx); + __ww_mutex_check_waiters(rtm, ww_ctx, wake_q); ww_mutex_lock_acquired(ww, ww_ctx); } return 0; @@ -1700,7 +1708,7 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, trace_contention_begin(lock, LCB_F_RT); - ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk); + ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk, wake_q); if (likely(!ret)) ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter); @@ -1708,7 +1716,7 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, /* acquired the lock */ if (build_ww_mutex() && ww_ctx) { if (!ww_ctx->is_wait_die) - __ww_mutex_check_waiters(rtm, ww_ctx); + __ww_mutex_check_waiters(rtm, ww_ctx, wake_q); ww_mutex_lock_acquired(ww, ww_ctx); } } else { @@ -1730,7 +1738,8 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock, struct ww_acquire_ctx *ww_ctx, - unsigned int state) + unsigned int state, + struct wake_q_head *wake_q) { struct rt_mutex_waiter waiter; int ret; @@ -1739,7 +1748,7 @@ static inline int __rt_mutex_slowlock_locked(struct rt_mutex_base *lock, waiter.ww_ctx = ww_ctx; ret = __rt_mutex_slowlock(lock, ww_ctx, state, RT_MUTEX_MIN_CHAINWALK, - &waiter); + &waiter, wake_q); debug_rt_mutex_free_waiter(&waiter); return ret; @@ -1755,6 +1764,7 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, struct ww_acquire_ctx *ww_ctx, unsigned int state) { + DEFINE_WAKE_Q(wake_q); unsigned long flags; int ret; @@ -1776,8 +1786,11 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, * irqsave/restore variants. */ raw_spin_lock_irqsave(&lock->wait_lock, flags); - ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state); + ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state, &wake_q); + preempt_disable(); raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + wake_up_q(&wake_q); + preempt_enable(); rt_mutex_post_schedule(); return ret; @@ -1803,8 +1816,10 @@ static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock, /** * rtlock_slowlock_locked - Slow path lock acquisition for RT locks * @lock: The underlying RT mutex + * @wake_q: The wake_q to wake tasks after we release the wait_lock */ -static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) +static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock, + struct wake_q_head *wake_q) { struct rt_mutex_waiter waiter; struct task_struct *owner; @@ -1821,7 +1836,7 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) trace_contention_begin(lock, LCB_F_RT); - task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK); + task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK, wake_q); for (;;) { /* Try to acquire the lock again */ @@ -1832,7 +1847,11 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) owner = rt_mutex_owner(lock); else owner = NULL; + preempt_disable(); raw_spin_unlock_irq(&lock->wait_lock); + wake_up_q(wake_q); + wake_q_init(wake_q); + preempt_enable(); if (!owner || !rtmutex_spin_on_owner(lock, &waiter, owner)) schedule_rtlock(); @@ -1857,10 +1876,14 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock) { unsigned long flags; + DEFINE_WAKE_Q(wake_q); raw_spin_lock_irqsave(&lock->wait_lock, flags); - rtlock_slowlock_locked(lock); + rtlock_slowlock_locked(lock, &wake_q); + preempt_disable(); raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + wake_up_q(&wake_q); + preempt_enable(); } #endif /* RT_MUTEX_BUILD_SPINLOCKS */ diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c index a6974d044593..2bc14c049a64 100644 --- a/kernel/locking/rtmutex_api.c +++ b/kernel/locking/rtmutex_api.c @@ -275,6 +275,7 @@ void __sched rt_mutex_proxy_unlock(struct rt_mutex_base *lock) * @lock: the rt_mutex to take * @waiter: the pre-initialized rt_mutex_waiter * @task: the task to prepare + * @wake_q: the wake_q to wake tasks after we release the wait_lock * * Starts the rt_mutex acquire; it enqueues the @waiter and does deadlock * detection. It does not wait, see rt_mutex_wait_proxy_lock() for that. @@ -291,7 +292,8 @@ void __sched rt_mutex_proxy_unlock(struct rt_mutex_base *lock) */ int __sched __rt_mutex_start_proxy_lock(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter, - struct task_struct *task) + struct task_struct *task, + struct wake_q_head *wake_q) { int ret; @@ -302,7 +304,7 @@ int __sched __rt_mutex_start_proxy_lock(struct rt_mutex_base *lock, /* We enforce deadlock detection for futexes */ ret = task_blocks_on_rt_mutex(lock, waiter, task, NULL, - RT_MUTEX_FULL_CHAINWALK); + RT_MUTEX_FULL_CHAINWALK, wake_q); if (ret && !rt_mutex_owner(lock)) { /* @@ -341,12 +343,16 @@ int __sched rt_mutex_start_proxy_lock(struct rt_mutex_base *lock, struct task_struct *task) { int ret; + DEFINE_WAKE_Q(wake_q); raw_spin_lock_irq(&lock->wait_lock); - ret = __rt_mutex_start_proxy_lock(lock, waiter, task); + ret = __rt_mutex_start_proxy_lock(lock, waiter, task, &wake_q); if (unlikely(ret)) remove_waiter(lock, waiter); + preempt_disable(); raw_spin_unlock_irq(&lock->wait_lock); + wake_up_q(&wake_q); + preempt_enable(); return ret; } diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index 1162e07cdaea..c38a2d2d4a7e 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -83,7 +83,8 @@ extern void rt_mutex_init_proxy_locked(struct rt_mutex_base *lock, extern void rt_mutex_proxy_unlock(struct rt_mutex_base *lock); extern int __rt_mutex_start_proxy_lock(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter, - struct task_struct *task); + struct task_struct *task, + struct wake_q_head *); extern int rt_mutex_start_proxy_lock(struct rt_mutex_base *lock, struct rt_mutex_waiter *waiter, struct task_struct *task); diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c index 34a59569db6b..9f4322c07486 100644 --- a/kernel/locking/rwbase_rt.c +++ b/kernel/locking/rwbase_rt.c @@ -69,6 +69,7 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb, unsigned int state) { struct rt_mutex_base *rtm = &rwb->rtmutex; + DEFINE_WAKE_Q(wake_q); int ret; rwbase_pre_schedule(); @@ -110,7 +111,7 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb, * For rwlocks this returns 0 unconditionally, so the below * !ret conditionals are optimized out. */ - ret = rwbase_rtmutex_slowlock_locked(rtm, state); + ret = rwbase_rtmutex_slowlock_locked(rtm, state, &wake_q); /* * On success the rtmutex is held, so there can't be a writer @@ -121,7 +122,12 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb, */ if (!ret) atomic_inc(&rwb->readers); + + preempt_disable(); raw_spin_unlock_irq(&rtm->wait_lock); + wake_up_q(&wake_q); + preempt_enable(); + if (!ret) rwbase_rtmutex_unlock(rtm); diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 2bbb6eca5144..2ddb827e3bea 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -1413,8 +1413,8 @@ static inline void __downgrade_write(struct rw_semaphore *sem) #define rwbase_rtmutex_lock_state(rtm, state) \ __rt_mutex_lock(rtm, state) -#define rwbase_rtmutex_slowlock_locked(rtm, state) \ - __rt_mutex_slowlock_locked(rtm, NULL, state) +#define rwbase_rtmutex_slowlock_locked(rtm, state, wq) \ + __rt_mutex_slowlock_locked(rtm, NULL, state, wq) #define rwbase_rtmutex_unlock(rtm) \ __rt_mutex_unlock(rtm) diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c index 38e292454fcc..014143934e00 100644 --- a/kernel/locking/spinlock_rt.c +++ b/kernel/locking/spinlock_rt.c @@ -162,9 +162,10 @@ rwbase_rtmutex_lock_state(struct rt_mutex_base *rtm, unsigned int state) } static __always_inline int -rwbase_rtmutex_slowlock_locked(struct rt_mutex_base *rtm, unsigned int state) +rwbase_rtmutex_slowlock_locked(struct rt_mutex_base *rtm, unsigned int state, + struct wake_q_head *wake_q) { - rtlock_slowlock_locked(rtm); + rtlock_slowlock_locked(rtm, wake_q); return 0; } diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h index 76d204b7d29c..a54bd16d0f17 100644 --- a/kernel/locking/ww_mutex.h +++ b/kernel/locking/ww_mutex.h @@ -275,7 +275,7 @@ __ww_ctx_less(struct ww_acquire_ctx *a, struct ww_acquire_ctx *b) */ static bool __ww_mutex_die(struct MUTEX *lock, struct MUTEX_WAITER *waiter, - struct ww_acquire_ctx *ww_ctx) + struct ww_acquire_ctx *ww_ctx, struct wake_q_head *wake_q) { if (!ww_ctx->is_wait_die) return false; @@ -284,7 +284,7 @@ __ww_mutex_die(struct MUTEX *lock, struct MUTEX_WAITER *waiter, #ifndef WW_RT debug_mutex_wake_waiter(lock, waiter); #endif - wake_up_process(waiter->task); + wake_q_add(wake_q, waiter->task); } return true; @@ -299,7 +299,8 @@ __ww_mutex_die(struct MUTEX *lock, struct MUTEX_WAITER *waiter, */ static bool __ww_mutex_wound(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx, - struct ww_acquire_ctx *hold_ctx) + struct ww_acquire_ctx *hold_ctx, + struct wake_q_head *wake_q) { struct task_struct *owner = __ww_mutex_owner(lock); @@ -331,7 +332,7 @@ static bool __ww_mutex_wound(struct MUTEX *lock, * wakeup pending to re-read the wounded state. */ if (owner != current) - wake_up_process(owner); + wake_q_add(wake_q, owner); return true; } @@ -352,7 +353,8 @@ static bool __ww_mutex_wound(struct MUTEX *lock, * The current task must not be on the wait list. */ static void -__ww_mutex_check_waiters(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx) +__ww_mutex_check_waiters(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx, + struct wake_q_head *wake_q) { struct MUTEX_WAITER *cur; @@ -364,8 +366,8 @@ __ww_mutex_check_waiters(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx) if (!cur->ww_ctx) continue; - if (__ww_mutex_die(lock, cur, ww_ctx) || - __ww_mutex_wound(lock, cur->ww_ctx, ww_ctx)) + if (__ww_mutex_die(lock, cur, ww_ctx, wake_q) || + __ww_mutex_wound(lock, cur->ww_ctx, ww_ctx, wake_q)) break; } } @@ -377,6 +379,8 @@ __ww_mutex_check_waiters(struct MUTEX *lock, struct ww_acquire_ctx *ww_ctx) static __always_inline void ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { + DEFINE_WAKE_Q(wake_q); + ww_mutex_lock_acquired(lock, ctx); /* @@ -405,8 +409,11 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) * die or wound us. */ lock_wait_lock(&lock->base); - __ww_mutex_check_waiters(&lock->base, ctx); + __ww_mutex_check_waiters(&lock->base, ctx, &wake_q); + preempt_disable(); unlock_wait_lock(&lock->base); + wake_up_q(&wake_q); + preempt_enable(); } static __always_inline int @@ -488,7 +495,8 @@ __ww_mutex_check_kill(struct MUTEX *lock, struct MUTEX_WAITER *waiter, static inline int __ww_mutex_add_waiter(struct MUTEX_WAITER *waiter, struct MUTEX *lock, - struct ww_acquire_ctx *ww_ctx) + struct ww_acquire_ctx *ww_ctx, + struct wake_q_head *wake_q) { struct MUTEX_WAITER *cur, *pos = NULL; bool is_wait_die; @@ -532,7 +540,7 @@ __ww_mutex_add_waiter(struct MUTEX_WAITER *waiter, pos = cur; /* Wait-Die: ensure younger waiters die. */ - __ww_mutex_die(lock, cur, ww_ctx); + __ww_mutex_die(lock, cur, ww_ctx, wake_q); } __ww_waiter_add(lock, waiter, pos); @@ -550,7 +558,7 @@ __ww_mutex_add_waiter(struct MUTEX_WAITER *waiter, * such that either we or the fastpath will wound @ww->ctx. */ smp_mb(); - __ww_mutex_wound(lock, ww_ctx, ww->ctx); + __ww_mutex_wound(lock, ww_ctx, ww->ctx, wake_q); } return 0; -- cgit v1.2.3 From 5ec58525a1f1bd6ca8ea778e9df55cd82bc02e11 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Wed, 9 Oct 2024 16:53:35 -0700 Subject: locking/mutex: Make mutex::wait_lock irq safe With the proxy-execution series, we traverse the task->mutex->task blocked_on/owner chain in the scheduler core. We do this while holding the rq::lock to keep the structures in place while taking and releasing the alternating lock types. Since the mutex::wait_lock is one of the locks we will take in this way under the rq::lock in the scheduler core, we need to make sure that its usage elsewhere is irq safe. [rebase & fix {un,}lock_wait_lock helpers in ww_mutex.h] Signed-off-by: Juri Lelli Signed-off-by: Connor O'Brien Signed-off-by: John Stultz Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Metin Kaya Reviewed-by: Valentin Schneider Tested-by: K Prateek Nayak Tested-by: Metin Kaya Link: https://lore.kernel.org/r/20241009235352.1614323-3-jstultz@google.com --- kernel/locking/mutex.c | 18 ++++++++++-------- kernel/locking/ww_mutex.h | 21 +++++++++++---------- 2 files changed, 21 insertions(+), 18 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 6c94da061ec2..cd248d1767eb 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -578,6 +578,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas DEFINE_WAKE_Q(wake_q); struct mutex_waiter waiter; struct ww_mutex *ww; + unsigned long flags; int ret; if (!use_ww_ctx) @@ -620,7 +621,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas return 0; } - raw_spin_lock(&lock->wait_lock); + raw_spin_lock_irqsave(&lock->wait_lock, flags); /* * After waiting to acquire the wait_lock, try again. */ @@ -681,7 +682,7 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas goto err; } - raw_spin_unlock(&lock->wait_lock); + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); /* Make sure we do wakeups before calling schedule */ wake_up_q(&wake_q); wake_q_init(&wake_q); @@ -706,9 +707,9 @@ __mutex_lock_common(struct mutex *lock, unsigned int state, unsigned int subclas trace_contention_begin(lock, LCB_F_MUTEX); } - raw_spin_lock(&lock->wait_lock); + raw_spin_lock_irqsave(&lock->wait_lock, flags); } - raw_spin_lock(&lock->wait_lock); + raw_spin_lock_irqsave(&lock->wait_lock, flags); acquired: __set_current_state(TASK_RUNNING); @@ -734,7 +735,7 @@ skip_wait: if (ww_ctx) ww_mutex_lock_acquired(ww, ww_ctx); - raw_spin_unlock(&lock->wait_lock); + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); wake_up_q(&wake_q); preempt_enable(); return 0; @@ -744,7 +745,7 @@ err: __mutex_remove_waiter(lock, &waiter); err_early_kill: trace_contention_end(lock, ret); - raw_spin_unlock(&lock->wait_lock); + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); debug_mutex_free_waiter(&waiter); mutex_release(&lock->dep_map, ip); wake_up_q(&wake_q); @@ -915,6 +916,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne struct task_struct *next = NULL; DEFINE_WAKE_Q(wake_q); unsigned long owner; + unsigned long flags; mutex_release(&lock->dep_map, ip); @@ -941,7 +943,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne } } - raw_spin_lock(&lock->wait_lock); + raw_spin_lock_irqsave(&lock->wait_lock, flags); debug_mutex_unlock(lock); if (!list_empty(&lock->wait_list)) { /* get the first entry from the wait-list: */ @@ -959,7 +961,7 @@ static noinline void __sched __mutex_unlock_slowpath(struct mutex *lock, unsigne __mutex_handoff(lock, next); preempt_disable(); - raw_spin_unlock(&lock->wait_lock); + raw_spin_unlock_irqrestore(&lock->wait_lock, flags); wake_up_q(&wake_q); preempt_enable(); } diff --git a/kernel/locking/ww_mutex.h b/kernel/locking/ww_mutex.h index a54bd16d0f17..37f025a096c9 100644 --- a/kernel/locking/ww_mutex.h +++ b/kernel/locking/ww_mutex.h @@ -70,14 +70,14 @@ __ww_mutex_has_waiters(struct mutex *lock) return atomic_long_read(&lock->owner) & MUTEX_FLAG_WAITERS; } -static inline void lock_wait_lock(struct mutex *lock) +static inline void lock_wait_lock(struct mutex *lock, unsigned long *flags) { - raw_spin_lock(&lock->wait_lock); + raw_spin_lock_irqsave(&lock->wait_lock, *flags); } -static inline void unlock_wait_lock(struct mutex *lock) +static inline void unlock_wait_lock(struct mutex *lock, unsigned long *flags) { - raw_spin_unlock(&lock->wait_lock); + raw_spin_unlock_irqrestore(&lock->wait_lock, *flags); } static inline void lockdep_assert_wait_lock_held(struct mutex *lock) @@ -144,14 +144,14 @@ __ww_mutex_has_waiters(struct rt_mutex *lock) return rt_mutex_has_waiters(&lock->rtmutex); } -static inline void lock_wait_lock(struct rt_mutex *lock) +static inline void lock_wait_lock(struct rt_mutex *lock, unsigned long *flags) { - raw_spin_lock(&lock->rtmutex.wait_lock); + raw_spin_lock_irqsave(&lock->rtmutex.wait_lock, *flags); } -static inline void unlock_wait_lock(struct rt_mutex *lock) +static inline void unlock_wait_lock(struct rt_mutex *lock, unsigned long *flags) { - raw_spin_unlock(&lock->rtmutex.wait_lock); + raw_spin_unlock_irqrestore(&lock->rtmutex.wait_lock, *flags); } static inline void lockdep_assert_wait_lock_held(struct rt_mutex *lock) @@ -380,6 +380,7 @@ static __always_inline void ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) { DEFINE_WAKE_Q(wake_q); + unsigned long flags; ww_mutex_lock_acquired(lock, ctx); @@ -408,10 +409,10 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) * Uh oh, we raced in fastpath, check if any of the waiters need to * die or wound us. */ - lock_wait_lock(&lock->base); + lock_wait_lock(&lock->base, &flags); __ww_mutex_check_waiters(&lock->base, ctx, &wake_q); preempt_disable(); - unlock_wait_lock(&lock->base); + unlock_wait_lock(&lock->base, &flags); wake_up_q(&wake_q); preempt_enable(); } -- cgit v1.2.3 From 3a9320ecb06c6c5ca5a8a595717e5186b5f20141 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Wed, 9 Oct 2024 16:53:36 -0700 Subject: locking/mutex: Expose __mutex_owner() Implementing proxy execution requires that scheduler code be able to identify the current owner of a mutex. Expose __mutex_owner() for this purpose (alone!). Includes a null mutex check, so that users of the function can be simplified. [Removed the EXPORT_SYMBOL] [jstultz: Reworked per Peter's suggestions] Signed-off-by: Juri Lelli Signed-off-by: Valentin Schneider Signed-off-by: Connor O'Brien Signed-off-by: John Stultz Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Metin Kaya Reviewed-by: Valentin Schneider Tested-by: K Prateek Nayak Tested-by: Metin Kaya Link: https://lore.kernel.org/r/20241009235352.1614323-4-jstultz@google.com --- kernel/locking/mutex.c | 25 ------------------------- kernel/locking/mutex.h | 27 +++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 25 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index cd248d1767eb..3302e52f0c96 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -56,31 +56,6 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) } EXPORT_SYMBOL(__mutex_init); -/* - * @owner: contains: 'struct task_struct *' to the current lock owner, - * NULL means not owned. Since task_struct pointers are aligned at - * at least L1_CACHE_BYTES, we have low bits to store extra state. - * - * Bit0 indicates a non-empty waiter list; unlock must issue a wakeup. - * Bit1 indicates unlock needs to hand the lock to the top-waiter - * Bit2 indicates handoff has been done and we're waiting for pickup. - */ -#define MUTEX_FLAG_WAITERS 0x01 -#define MUTEX_FLAG_HANDOFF 0x02 -#define MUTEX_FLAG_PICKUP 0x04 - -#define MUTEX_FLAGS 0x07 - -/* - * Internal helper function; C doesn't allow us to hide it :/ - * - * DO NOT USE (outside of mutex code). - */ -static inline struct task_struct *__mutex_owner(struct mutex *lock) -{ - return (struct task_struct *)(atomic_long_read(&lock->owner) & ~MUTEX_FLAGS); -} - static inline struct task_struct *__owner_task(unsigned long owner) { return (struct task_struct *)(owner & ~MUTEX_FLAGS); diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h index 0b2a79c4013b..cbff35b9b7ae 100644 --- a/kernel/locking/mutex.h +++ b/kernel/locking/mutex.h @@ -20,6 +20,33 @@ struct mutex_waiter { #endif }; +/* + * @owner: contains: 'struct task_struct *' to the current lock owner, + * NULL means not owned. Since task_struct pointers are aligned at + * at least L1_CACHE_BYTES, we have low bits to store extra state. + * + * Bit0 indicates a non-empty waiter list; unlock must issue a wakeup. + * Bit1 indicates unlock needs to hand the lock to the top-waiter + * Bit2 indicates handoff has been done and we're waiting for pickup. + */ +#define MUTEX_FLAG_WAITERS 0x01 +#define MUTEX_FLAG_HANDOFF 0x02 +#define MUTEX_FLAG_PICKUP 0x04 + +#define MUTEX_FLAGS 0x07 + +/* + * Internal helper function; C doesn't allow us to hide it :/ + * + * DO NOT USE (outside of mutex & scheduler code). + */ +static inline struct task_struct *__mutex_owner(struct mutex *lock) +{ + if (!lock) + return NULL; + return (struct task_struct *)(atomic_long_read(&lock->owner) & ~MUTEX_FLAGS); +} + #ifdef CONFIG_DEBUG_MUTEXES extern void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter); -- cgit v1.2.3 From 0784181b44af831a3fa52e1e5ff77c388d699dba Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 26 Sep 2024 16:17:37 +0100 Subject: lockdep: Add lockdep_cleanup_dead_cpu() Add a function to check that an offline CPU has left the tracing infrastructure in a sane state. Commit 9bb69ba4c177 ("ACPI: processor_idle: use raw_safe_halt() in acpi_idle_play_dead()") fixed an issue where the acpi_idle_play_dead() function called safe_halt() instead of raw_safe_halt(), which had the side-effect of setting the hardirqs_enabled flag for the offline CPU. On x86 this triggered warnings from lockdep_assert_irqs_disabled() when the CPU was brought back online again later. These warnings were too early for the exception to be handled correctly, leading to a triple-fault. Add lockdep_cleanup_dead_cpu() to check for this kind of failure mode, print the events leading up to it, and correct it so that the CPU can come online again correctly. Re-introducing the original bug now merely results in this warning instead: [ 61.556652] smpboot: CPU 1 is now offline [ 61.556769] CPU 1 left hardirqs enabled! [ 61.556915] irq event stamp: 128149 [ 61.556965] hardirqs last enabled at (128149): [] acpi_idle_play_dead+0x46/0x70 [ 61.557055] hardirqs last disabled at (128148): [] do_idle+0x90/0xe0 [ 61.557117] softirqs last enabled at (128078): [] __do_softirq+0x31c/0x423 [ 61.557199] softirqs last disabled at (128065): [] __irq_exit_rcu+0x91/0x100 [boqun: Capitalize the title and reword the message a bit] Signed-off-by: David Woodhouse Reviewed-by: Thomas Gleixner Signed-off-by: Boqun Feng Link: https://lore.kernel.org/r/f7bd2b3b999051bb3ef4be34526a9262008285f5.camel@infradead.org --- include/linux/irqflags.h | 6 ++++++ kernel/cpu.c | 1 + kernel/locking/lockdep.c | 24 ++++++++++++++++++++++++ 3 files changed, 31 insertions(+) (limited to 'kernel/locking') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 3f003d5fde53..57b074e0cfbb 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -18,6 +18,8 @@ #include #include +struct task_struct; + /* Currently lockdep_softirqs_on/off is used only by lockdep */ #ifdef CONFIG_PROVE_LOCKING extern void lockdep_softirqs_on(unsigned long ip); @@ -25,12 +27,16 @@ extern void lockdep_hardirqs_on_prepare(void); extern void lockdep_hardirqs_on(unsigned long ip); extern void lockdep_hardirqs_off(unsigned long ip); + extern void lockdep_cleanup_dead_cpu(unsigned int cpu, + struct task_struct *idle); #else static inline void lockdep_softirqs_on(unsigned long ip) { } static inline void lockdep_softirqs_off(unsigned long ip) { } static inline void lockdep_hardirqs_on_prepare(void) { } static inline void lockdep_hardirqs_on(unsigned long ip) { } static inline void lockdep_hardirqs_off(unsigned long ip) { } + static inline void lockdep_cleanup_dead_cpu(unsigned int cpu, + struct task_struct *idle) {} #endif #ifdef CONFIG_TRACE_IRQFLAGS diff --git a/kernel/cpu.c b/kernel/cpu.c index d293d52a3e00..c4aaf73dec9e 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1338,6 +1338,7 @@ static int takedown_cpu(unsigned int cpu) cpuhp_bp_sync_dead(cpu); + lockdep_cleanup_dead_cpu(cpu, idle_thread_get(cpu)); tick_cleanup_dead_cpu(cpu); /* diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 536bd471557f..6fd4af217e71 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4586,6 +4586,30 @@ void lockdep_softirqs_off(unsigned long ip) debug_atomic_inc(redundant_softirqs_off); } +/** + * lockdep_cleanup_dead_cpu - Ensure CPU lockdep state is cleanly stopped + * + * @cpu: index of offlined CPU + * @idle: task pointer for offlined CPU's idle thread + * + * Invoked after the CPU is dead. Ensures that the tracing infrastructure + * is left in a suitable state for the CPU to be subsequently brought + * online again. + */ +void lockdep_cleanup_dead_cpu(unsigned int cpu, struct task_struct *idle) +{ + if (unlikely(!debug_locks)) + return; + + if (unlikely(per_cpu(hardirqs_enabled, cpu))) { + pr_warn("CPU %u left hardirqs enabled!", cpu); + if (idle) + print_irqtrace_events(idle); + /* Clean it up for when the CPU comes online again. */ + per_cpu(hardirqs_enabled, cpu) = 0; + } +} + static int mark_usage(struct task_struct *curr, struct held_lock *hlock, int check) { -- cgit v1.2.3 From e48bf7ca6056297664eb260fa88cae8e50d9b698 Mon Sep 17 00:00:00 2001 From: "Jiri Slaby (SUSE)" Date: Mon, 7 Oct 2024 08:54:57 +0200 Subject: lockdep: Use info level for lockdep initial info messages All those: Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar ... MAX_LOCKDEP_SUBCLASSES: 8 ... MAX_LOCK_DEPTH: 48 ... MAX_LOCKDEP_KEYS: 8192 and so on are dumped with the KERN_WARNING level. It is due to missing KERN_* annotation. Use pr_info() instead of bare printk() to dump the info with the info level. Signed-off-by: Jiri Slaby (SUSE) Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Will Deacon Cc: Waiman Long Cc: Boqun Feng Reviewed-by: Waiman Long Signed-off-by: Boqun Feng Link: https://lore.kernel.org/r/20241007065457.20128-1-jirislaby@kernel.org --- kernel/locking/lockdep.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 6fd4af217e71..2d8ec0351ef9 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -6600,17 +6600,17 @@ EXPORT_SYMBOL_GPL(lockdep_unregister_key); void __init lockdep_init(void) { - printk("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n"); + pr_info("Lock dependency validator: Copyright (c) 2006 Red Hat, Inc., Ingo Molnar\n"); - printk("... MAX_LOCKDEP_SUBCLASSES: %lu\n", MAX_LOCKDEP_SUBCLASSES); - printk("... MAX_LOCK_DEPTH: %lu\n", MAX_LOCK_DEPTH); - printk("... MAX_LOCKDEP_KEYS: %lu\n", MAX_LOCKDEP_KEYS); - printk("... CLASSHASH_SIZE: %lu\n", CLASSHASH_SIZE); - printk("... MAX_LOCKDEP_ENTRIES: %lu\n", MAX_LOCKDEP_ENTRIES); - printk("... MAX_LOCKDEP_CHAINS: %lu\n", MAX_LOCKDEP_CHAINS); - printk("... CHAINHASH_SIZE: %lu\n", CHAINHASH_SIZE); + pr_info("... MAX_LOCKDEP_SUBCLASSES: %lu\n", MAX_LOCKDEP_SUBCLASSES); + pr_info("... MAX_LOCK_DEPTH: %lu\n", MAX_LOCK_DEPTH); + pr_info("... MAX_LOCKDEP_KEYS: %lu\n", MAX_LOCKDEP_KEYS); + pr_info("... CLASSHASH_SIZE: %lu\n", CLASSHASH_SIZE); + pr_info("... MAX_LOCKDEP_ENTRIES: %lu\n", MAX_LOCKDEP_ENTRIES); + pr_info("... MAX_LOCKDEP_CHAINS: %lu\n", MAX_LOCKDEP_CHAINS); + pr_info("... CHAINHASH_SIZE: %lu\n", CHAINHASH_SIZE); - printk(" memory used by lock dependency info: %zu kB\n", + pr_info(" memory used by lock dependency info: %zu kB\n", (sizeof(lock_classes) + sizeof(lock_classes_in_use) + sizeof(classhash_table) + @@ -6628,12 +6628,12 @@ void __init lockdep_init(void) ); #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) - printk(" memory used for stack traces: %zu kB\n", + pr_info(" memory used for stack traces: %zu kB\n", (sizeof(stack_trace) + sizeof(stack_trace_hash)) / 1024 ); #endif - printk(" per task-struct memory footprint: %zu bytes\n", + pr_info(" per task-struct memory footprint: %zu bytes\n", sizeof(((struct task_struct *)NULL)->held_locks)); } -- cgit v1.2.3 From 2628cbd03924b91a360f72117a9b9c78cfd050e7 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Fri, 9 Aug 2024 09:48:02 +0800 Subject: locking/pvqspinlock: Convert fields of 'enum vcpu_state' to uppercase Convert the fields of 'enum vcpu_state' to uppercase for better readability. No functional changes intended. Acked-by: Waiman Long Signed-off-by: Qiuxu Zhuo Signed-off-by: Boqun Feng Link: https://lore.kernel.org/r/20240809014802.15320-1-qiuxu.zhuo@intel.com --- kernel/locking/qspinlock_paravirt.h | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index ac2e22502741..dc1cb90e3644 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -38,13 +38,13 @@ #define PV_PREV_CHECK_MASK 0xff /* - * Queue node uses: vcpu_running & vcpu_halted. - * Queue head uses: vcpu_running & vcpu_hashed. + * Queue node uses: VCPU_RUNNING & VCPU_HALTED. + * Queue head uses: VCPU_RUNNING & VCPU_HASHED. */ enum vcpu_state { - vcpu_running = 0, - vcpu_halted, /* Used only in pv_wait_node */ - vcpu_hashed, /* = pv_hash'ed + vcpu_halted */ + VCPU_RUNNING = 0, + VCPU_HALTED, /* Used only in pv_wait_node */ + VCPU_HASHED, /* = pv_hash'ed + VCPU_HALTED */ }; struct pv_node { @@ -266,7 +266,7 @@ pv_wait_early(struct pv_node *prev, int loop) if ((loop & PV_PREV_CHECK_MASK) != 0) return false; - return READ_ONCE(prev->state) != vcpu_running; + return READ_ONCE(prev->state) != VCPU_RUNNING; } /* @@ -279,7 +279,7 @@ static void pv_init_node(struct mcs_spinlock *node) BUILD_BUG_ON(sizeof(struct pv_node) > sizeof(struct qnode)); pn->cpu = smp_processor_id(); - pn->state = vcpu_running; + pn->state = VCPU_RUNNING; } /* @@ -308,26 +308,26 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev) /* * Order pn->state vs pn->locked thusly: * - * [S] pn->state = vcpu_halted [S] next->locked = 1 + * [S] pn->state = VCPU_HALTED [S] next->locked = 1 * MB MB - * [L] pn->locked [RmW] pn->state = vcpu_hashed + * [L] pn->locked [RmW] pn->state = VCPU_HASHED * * Matches the cmpxchg() from pv_kick_node(). */ - smp_store_mb(pn->state, vcpu_halted); + smp_store_mb(pn->state, VCPU_HALTED); if (!READ_ONCE(node->locked)) { lockevent_inc(pv_wait_node); lockevent_cond_inc(pv_wait_early, wait_early); - pv_wait(&pn->state, vcpu_halted); + pv_wait(&pn->state, VCPU_HALTED); } /* - * If pv_kick_node() changed us to vcpu_hashed, retain that + * If pv_kick_node() changed us to VCPU_HASHED, retain that * value so that pv_wait_head_or_lock() knows to not also try * to hash this lock. */ - cmpxchg(&pn->state, vcpu_halted, vcpu_running); + cmpxchg(&pn->state, VCPU_HALTED, VCPU_RUNNING); /* * If the locked flag is still not set after wakeup, it is a @@ -357,7 +357,7 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev) static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) { struct pv_node *pn = (struct pv_node *)node; - u8 old = vcpu_halted; + u8 old = VCPU_HALTED; /* * If the vCPU is indeed halted, advance its state to match that of * pv_wait_node(). If OTOH this fails, the vCPU was running and will @@ -374,7 +374,7 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node) * subsequent writes. */ smp_mb__before_atomic(); - if (!try_cmpxchg_relaxed(&pn->state, &old, vcpu_hashed)) + if (!try_cmpxchg_relaxed(&pn->state, &old, VCPU_HASHED)) return; /* @@ -407,7 +407,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) * If pv_kick_node() already advanced our state, we don't need to * insert ourselves into the hash table anymore. */ - if (READ_ONCE(pn->state) == vcpu_hashed) + if (READ_ONCE(pn->state) == VCPU_HASHED) lp = (struct qspinlock **)1; /* @@ -420,7 +420,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) * Set correct vCPU state to be used by queue node wait-early * mechanism. */ - WRITE_ONCE(pn->state, vcpu_running); + WRITE_ONCE(pn->state, VCPU_RUNNING); /* * Set the pending bit in the active lock spinning loop to @@ -460,7 +460,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) goto gotlock; } } - WRITE_ONCE(pn->state, vcpu_hashed); + WRITE_ONCE(pn->state, VCPU_HASHED); lockevent_inc(pv_wait_head); lockevent_cond_inc(pv_wait_again, waitcnt); pv_wait(&lock->locked, _Q_SLOW_VAL); -- cgit v1.2.3 From 168660b826a77fda28235e0b0b3027041d6a5240 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 12 Aug 2024 12:39:04 +0200 Subject: locking/rt: Add sparse annotation for RCU. Every lock, that becomes a sleeping lock on PREEMPT_RT, starts a RCU read side critical section. There is no sparse annotation for this and sparse complains about unbalanced locking. Add __acquires/ __releases for the RCU lock. This covers all but the trylock functions. A __cond_acquires() annotation didn't work. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20240812104200.2239232-4-bigeasy@linutronix.de --- kernel/locking/spinlock_rt.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c index 38e292454fcc..d1cf8b2b6dca 100644 --- a/kernel/locking/spinlock_rt.c +++ b/kernel/locking/spinlock_rt.c @@ -51,7 +51,7 @@ static __always_inline void __rt_spin_lock(spinlock_t *lock) migrate_disable(); } -void __sched rt_spin_lock(spinlock_t *lock) +void __sched rt_spin_lock(spinlock_t *lock) __acquires(RCU) { spin_acquire(&lock->dep_map, 0, 0, _RET_IP_); __rt_spin_lock(lock); @@ -75,7 +75,7 @@ void __sched rt_spin_lock_nest_lock(spinlock_t *lock, EXPORT_SYMBOL(rt_spin_lock_nest_lock); #endif -void __sched rt_spin_unlock(spinlock_t *lock) +void __sched rt_spin_unlock(spinlock_t *lock) __releases(RCU) { spin_release(&lock->dep_map, _RET_IP_); migrate_enable(); @@ -225,7 +225,7 @@ int __sched rt_write_trylock(rwlock_t *rwlock) } EXPORT_SYMBOL(rt_write_trylock); -void __sched rt_read_lock(rwlock_t *rwlock) +void __sched rt_read_lock(rwlock_t *rwlock) __acquires(RCU) { rtlock_might_resched(); rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_); @@ -235,7 +235,7 @@ void __sched rt_read_lock(rwlock_t *rwlock) } EXPORT_SYMBOL(rt_read_lock); -void __sched rt_write_lock(rwlock_t *rwlock) +void __sched rt_write_lock(rwlock_t *rwlock) __acquires(RCU) { rtlock_might_resched(); rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); @@ -246,7 +246,7 @@ void __sched rt_write_lock(rwlock_t *rwlock) EXPORT_SYMBOL(rt_write_lock); #ifdef CONFIG_DEBUG_LOCK_ALLOC -void __sched rt_write_lock_nested(rwlock_t *rwlock, int subclass) +void __sched rt_write_lock_nested(rwlock_t *rwlock, int subclass) __acquires(RCU) { rtlock_might_resched(); rwlock_acquire(&rwlock->dep_map, subclass, 0, _RET_IP_); @@ -257,7 +257,7 @@ void __sched rt_write_lock_nested(rwlock_t *rwlock, int subclass) EXPORT_SYMBOL(rt_write_lock_nested); #endif -void __sched rt_read_unlock(rwlock_t *rwlock) +void __sched rt_read_unlock(rwlock_t *rwlock) __releases(RCU) { rwlock_release(&rwlock->dep_map, _RET_IP_); migrate_enable(); @@ -266,7 +266,7 @@ void __sched rt_read_unlock(rwlock_t *rwlock) } EXPORT_SYMBOL(rt_read_unlock); -void __sched rt_write_unlock(rwlock_t *rwlock) +void __sched rt_write_unlock(rwlock_t *rwlock) __releases(RCU) { rwlock_release(&rwlock->dep_map, _RET_IP_); rcu_read_unlock(); -- cgit v1.2.3 From 77abd3b7d9bf384306872b6201b1dfeb1e899892 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Mon, 12 Aug 2024 12:39:05 +0200 Subject: locking/rt: Annotate unlock followed by lock for sparse. rt_mutex_slowlock_block() and rtlock_slowlock_locked() both unlock lock::wait_lock and then lock it later. This is unusual and sparse complains about it. Add __releases() + __acquires() annotation to mark that it is expected. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/all/20240812104200.2239232-5-bigeasy@linutronix.de --- kernel/locking/rtmutex.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel/locking') diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index ebebd0eec7f6..d3b72c2f983f 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1601,6 +1601,7 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock, unsigned int state, struct hrtimer_sleeper *timeout, struct rt_mutex_waiter *waiter) + __releases(&lock->wait_lock) __acquires(&lock->wait_lock) { struct rt_mutex *rtm = container_of(lock, struct rt_mutex, rtmutex); struct task_struct *owner; @@ -1805,6 +1806,7 @@ static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock, * @lock: The underlying RT mutex */ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) + __releases(&lock->wait_lock) __acquires(&lock->wait_lock) { struct rt_mutex_waiter waiter; struct task_struct *owner; -- cgit v1.2.3 From d12b802f183667d4c28589314c99c380a458d57e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 8 Oct 2024 11:26:06 +0200 Subject: locking/rtmutex: Fix misleading comment Going through the RCU-boost and rtmutex code, I ran into this utterly confusing comment. Fix it to avoid confusing future readers. [ tglx: Wordsmithed the comment ] Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Reviewed-by: Sebastian Andrzej Siewior Link: https://lore.kernel.org/all/20241008092606.GJ33184@noisy.programming.kicks-ass.net --- kernel/locking/rtmutex_api.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/rtmutex_api.c b/kernel/locking/rtmutex_api.c index a6974d044593..7e79258feb27 100644 --- a/kernel/locking/rtmutex_api.c +++ b/kernel/locking/rtmutex_api.c @@ -175,10 +175,10 @@ bool __sched __rt_mutex_futex_unlock(struct rt_mutex_base *lock, } /* - * We've already deboosted, mark_wakeup_next_waiter() will - * retain preempt_disabled when we drop the wait_lock, to - * avoid inversion prior to the wakeup. preempt_disable() - * therein pairs with rt_mutex_postunlock(). + * mark_wakeup_next_waiter() deboosts and retains preemption + * disabled when dropping the wait_lock, to avoid inversion prior + * to the wakeup. preempt_disable() therein pairs with the + * preempt_enable() in rt_mutex_postunlock(). */ mark_wakeup_next_waiter(wqh, lock); -- cgit v1.2.3 From 0d75e0c420e52b4057a2de274054a5274209a2ae Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 1 Oct 2024 13:45:57 +0200 Subject: locking/osq_lock: Use atomic_try_cmpxchg_release() in osq_unlock() Replace this pattern in osq_unlock(): atomic_cmpxchg(*ptr, old, new) == old ... with the simpler and faster: atomic_try_cmpxchg(*ptr, &old, new) The x86 CMPXCHG instruction returns success in the ZF flag, so this change saves a compare after the CMPXCHG. The code in the fast path of osq_unlock() improves from: 11b: 31 c9 xor %ecx,%ecx 11d: 8d 50 01 lea 0x1(%rax),%edx 120: 89 d0 mov %edx,%eax 122: f0 0f b1 0f lock cmpxchg %ecx,(%rdi) 126: 39 c2 cmp %eax,%edx 128: 75 05 jne 12f <...> to: 12b: 31 d2 xor %edx,%edx 12d: 83 c0 01 add $0x1,%eax 130: f0 0f b1 17 lock cmpxchg %edx,(%rdi) 134: 75 05 jne 13b <...> Signed-off-by: Uros Bizjak Signed-off-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Link: https://lore.kernel.org/r/20241001114606.820277-1-ubizjak@gmail.com --- kernel/locking/osq_lock.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/locking') diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c index 75a6f6133866..b4233dc2c2b0 100644 --- a/kernel/locking/osq_lock.c +++ b/kernel/locking/osq_lock.c @@ -215,8 +215,7 @@ void osq_unlock(struct optimistic_spin_queue *lock) /* * Fast path for the uncontended case. */ - if (likely(atomic_cmpxchg_release(&lock->tail, curr, - OSQ_UNLOCKED_VAL) == curr)) + if (atomic_try_cmpxchg_release(&lock->tail, &curr, OSQ_UNLOCKED_VAL)) return; /* -- cgit v1.2.3