From ee9f8fce99640811b2b8e79d0d1dbe8bab69ba67 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 24 Jul 2017 18:36:57 -0500 Subject: x86/unwind: Add the ORC unwinder Add the new ORC unwinder which is enabled by CONFIG_ORC_UNWINDER=y. It plugs into the existing x86 unwinder framework. It relies on objtool to generate the needed .orc_unwind and .orc_unwind_ip sections. For more details on why ORC is used instead of DWARF, see Documentation/x86/orc-unwinder.txt - but the short version is that it's a simplified, fundamentally more robust debugninfo data structure, which also allows up to two orders of magnitude faster lookups than the DWARF unwinder - which matters to profiling workloads like perf. Thanks to Andy Lutomirski for the performance improvement ideas: splitting the ORC unwind table into two parallel arrays and creating a fast lookup table to search a subset of the unwind table. Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jiri Slaby Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: live-patching@vger.kernel.org Link: http://lkml.kernel.org/r/0a6cbfb40f8da99b7a45a1a8302dc6aef16ec812.1500938583.git.jpoimboe@redhat.com [ Extended the changelog. ] Signed-off-by: Ingo Molnar --- lib/Kconfig.debug | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib/Kconfig.debug') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 98fe715522e8..0f0d019ffb99 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -374,6 +374,9 @@ config STACK_VALIDATION pointers (if CONFIG_FRAME_POINTER is enabled). This helps ensure that runtime stack traces are more reliable. + This is also a prerequisite for generation of ORC unwind data, which + is needed for CONFIG_ORC_UNWINDER. + For more information, see tools/objtool/Documentation/stack-validation.txt. -- cgit v1.2.3 From a34a766ff96d9e88572e35a45066279e40a85d84 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 24 Jul 2017 18:36:58 -0500 Subject: x86/kconfig: Make it easier to switch to the new ORC unwinder A couple of Kconfig changes which make it much easier to switch to the new CONFIG_ORC_UNWINDER: 1) Remove x86 dependencies on CONFIG_FRAME_POINTER for lockdep, latencytop, and fault injection. x86 has a 'guess' unwinder which just scans the stack for kernel text addresses. It's not 100% accurate but in many cases it's good enough. This allows those users who don't want the text overhead of the frame pointer or ORC unwinders to still use these features. More importantly, this also makes it much more straightforward to disable frame pointers. 2) Make CONFIG_ORC_UNWINDER depend on !CONFIG_FRAME_POINTER. While it would be possible to have both enabled, it doesn't really make sense to do so. So enforce a sane configuration to prevent the user from making a dumb mistake. With these changes, when you disable CONFIG_FRAME_POINTER, "make oldconfig" will ask if you want to enable CONFIG_ORC_UNWINDER. Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jiri Slaby Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: live-patching@vger.kernel.org Link: http://lkml.kernel.org/r/9985fb91ce5005fe33ea5cc2a20f14bd33c61d03.1500938583.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.debug | 7 +++---- lib/Kconfig.debug | 6 +++--- 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'lib/Kconfig.debug') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index dc10ec6ead6e..268a318f9439 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -357,7 +357,7 @@ config PUNIT_ATOM_DEBUG config ORC_UNWINDER bool "ORC unwinder" - depends on X86_64 + depends on X86_64 && !FRAME_POINTER select STACK_VALIDATION ---help--- This option enables the ORC (Oops Rewind Capability) unwinder for @@ -365,9 +365,8 @@ config ORC_UNWINDER a simplified version of the DWARF Call Frame Information standard. This unwinder is more accurate across interrupt entry frames than the - frame pointer unwinder. It can also enable a 5-10% performance - improvement across the entire kernel if CONFIG_FRAME_POINTER is - disabled. + frame pointer unwinder. It also enables a 5-10% performance + improvement across the entire kernel compared to frame pointers. Enabling this option will increase the kernel's runtime memory usage by roughly 2-4MB, depending on your kernel config. diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 0f0d019ffb99..32a48e739e26 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1124,7 +1124,7 @@ config LOCKDEP bool depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT select STACKTRACE - select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE && !ARC && !SCORE + select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE && !ARC && !SCORE && !X86 select KALLSYMS select KALLSYMS_ALL @@ -1543,7 +1543,7 @@ config FAULT_INJECTION_STACKTRACE_FILTER depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT depends on !X86_64 select STACKTRACE - select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC && !SCORE + select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC && !SCORE && !X86 help Provide stacktrace filter for fault-injection capabilities @@ -1552,7 +1552,7 @@ config LATENCYTOP depends on DEBUG_KERNEL depends on STACKTRACE_SUPPORT depends on PROC_FS - select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC + select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC && !X86 select KALLSYMS select KALLSYMS_ALL select STACKTRACE -- cgit v1.2.3 From b09be676e0ff25bd6d2e7637e26d349f9109ad75 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Mon, 7 Aug 2017 16:12:52 +0900 Subject: locking/lockdep: Implement the 'crossrelease' feature Lockdep is a runtime locking correctness validator that detects and reports a deadlock or its possibility by checking dependencies between locks. It's useful since it does not report just an actual deadlock but also the possibility of a deadlock that has not actually happened yet. That enables problems to be fixed before they affect real systems. However, this facility is only applicable to typical locks, such as spinlocks and mutexes, which are normally released within the context in which they were acquired. However, synchronization primitives like page locks or completions, which are allowed to be released in any context, also create dependencies and can cause a deadlock. So lockdep should track these locks to do a better job. The 'crossrelease' implementation makes these primitives also be tracked. Signed-off-by: Byungchul Park Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: boqun.feng@gmail.com Cc: kernel-team@lge.com Cc: kirill@shutemov.name Cc: npiggin@gmail.com Cc: walken@google.com Cc: willy@infradead.org Link: http://lkml.kernel.org/r/1502089981-21272-6-git-send-email-byungchul.park@lge.com Signed-off-by: Ingo Molnar --- include/linux/irqflags.h | 24 ++- include/linux/lockdep.h | 110 +++++++++- include/linux/sched.h | 8 + kernel/exit.c | 1 + kernel/fork.c | 4 + kernel/locking/lockdep.c | 508 ++++++++++++++++++++++++++++++++++++++++++++--- kernel/workqueue.c | 2 + lib/Kconfig.debug | 12 ++ 8 files changed, 635 insertions(+), 34 deletions(-) (limited to 'lib/Kconfig.debug') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 5dd1272d1ab2..5fdd93bb9300 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -23,10 +23,26 @@ # define trace_softirq_context(p) ((p)->softirq_context) # define trace_hardirqs_enabled(p) ((p)->hardirqs_enabled) # define trace_softirqs_enabled(p) ((p)->softirqs_enabled) -# define trace_hardirq_enter() do { current->hardirq_context++; } while (0) -# define trace_hardirq_exit() do { current->hardirq_context--; } while (0) -# define lockdep_softirq_enter() do { current->softirq_context++; } while (0) -# define lockdep_softirq_exit() do { current->softirq_context--; } while (0) +# define trace_hardirq_enter() \ +do { \ + current->hardirq_context++; \ + crossrelease_hist_start(XHLOCK_HARD); \ +} while (0) +# define trace_hardirq_exit() \ +do { \ + current->hardirq_context--; \ + crossrelease_hist_end(XHLOCK_HARD); \ +} while (0) +# define lockdep_softirq_enter() \ +do { \ + current->softirq_context++; \ + crossrelease_hist_start(XHLOCK_SOFT); \ +} while (0) +# define lockdep_softirq_exit() \ +do { \ + current->softirq_context--; \ + crossrelease_hist_end(XHLOCK_SOFT); \ +} while (0) # define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1, #else # define trace_hardirqs_on() do { } while (0) diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 0a4c02c2d7a2..e1e0fcd99613 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -155,6 +155,12 @@ struct lockdep_map { int cpu; unsigned long ip; #endif +#ifdef CONFIG_LOCKDEP_CROSSRELEASE + /* + * Whether it's a crosslock. + */ + int cross; +#endif }; static inline void lockdep_copy_map(struct lockdep_map *to, @@ -258,8 +264,62 @@ struct held_lock { unsigned int hardirqs_off:1; unsigned int references:12; /* 32 bits */ unsigned int pin_count; +#ifdef CONFIG_LOCKDEP_CROSSRELEASE + /* + * Generation id. + * + * A value of cross_gen_id will be stored when holding this, + * which is globally increased whenever each crosslock is held. + */ + unsigned int gen_id; +#endif +}; + +#ifdef CONFIG_LOCKDEP_CROSSRELEASE +#define MAX_XHLOCK_TRACE_ENTRIES 5 + +/* + * This is for keeping locks waiting for commit so that true dependencies + * can be added at commit step. + */ +struct hist_lock { + /* + * Seperate stack_trace data. This will be used at commit step. + */ + struct stack_trace trace; + unsigned long trace_entries[MAX_XHLOCK_TRACE_ENTRIES]; + + /* + * Seperate hlock instance. This will be used at commit step. + * + * TODO: Use a smaller data structure containing only necessary + * data. However, we should make lockdep code able to handle the + * smaller one first. + */ + struct held_lock hlock; +}; + +/* + * To initialize a lock as crosslock, lockdep_init_map_crosslock() should + * be called instead of lockdep_init_map(). + */ +struct cross_lock { + /* + * Seperate hlock instance. This will be used at commit step. + * + * TODO: Use a smaller data structure containing only necessary + * data. However, we should make lockdep code able to handle the + * smaller one first. + */ + struct held_lock hlock; }; +struct lockdep_map_cross { + struct lockdep_map map; + struct cross_lock xlock; +}; +#endif + /* * Initialization, self-test and debugging-output methods: */ @@ -281,13 +341,6 @@ extern void lockdep_on(void); extern void lockdep_init_map(struct lockdep_map *lock, const char *name, struct lock_class_key *key, int subclass); -/* - * To initialize a lockdep_map statically use this macro. - * Note that _name must not be NULL. - */ -#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ - { .name = (_name), .key = (void *)(_key), } - /* * Reinitialize a lock key - for cases where there is special locking or * special initialization of locks so that the validator gets the scope @@ -460,6 +513,49 @@ struct pin_cookie { }; #endif /* !LOCKDEP */ +enum xhlock_context_t { + XHLOCK_HARD, + XHLOCK_SOFT, + XHLOCK_PROC, + XHLOCK_CTX_NR, +}; + +#ifdef CONFIG_LOCKDEP_CROSSRELEASE +extern void lockdep_init_map_crosslock(struct lockdep_map *lock, + const char *name, + struct lock_class_key *key, + int subclass); +extern void lock_commit_crosslock(struct lockdep_map *lock); + +#define STATIC_CROSS_LOCKDEP_MAP_INIT(_name, _key) \ + { .map.name = (_name), .map.key = (void *)(_key), \ + .map.cross = 1, } + +/* + * To initialize a lockdep_map statically use this macro. + * Note that _name must not be NULL. + */ +#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ + { .name = (_name), .key = (void *)(_key), .cross = 0, } + +extern void crossrelease_hist_start(enum xhlock_context_t c); +extern void crossrelease_hist_end(enum xhlock_context_t c); +extern void lockdep_init_task(struct task_struct *task); +extern void lockdep_free_task(struct task_struct *task); +#else +/* + * To initialize a lockdep_map statically use this macro. + * Note that _name must not be NULL. + */ +#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ + { .name = (_name), .key = (void *)(_key), } + +static inline void crossrelease_hist_start(enum xhlock_context_t c) {} +static inline void crossrelease_hist_end(enum xhlock_context_t c) {} +static inline void lockdep_init_task(struct task_struct *task) {} +static inline void lockdep_free_task(struct task_struct *task) {} +#endif + #ifdef CONFIG_LOCK_STAT extern void lock_contended(struct lockdep_map *lock, unsigned long ip); diff --git a/include/linux/sched.h b/include/linux/sched.h index 57db70ec70d0..5235fba537fc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -848,6 +848,14 @@ struct task_struct { struct held_lock held_locks[MAX_LOCK_DEPTH]; #endif +#ifdef CONFIG_LOCKDEP_CROSSRELEASE +#define MAX_XHLOCKS_NR 64UL + struct hist_lock *xhlocks; /* Crossrelease history locks */ + unsigned int xhlock_idx; + /* For restoring at history boundaries */ + unsigned int xhlock_idx_hist[XHLOCK_CTX_NR]; +#endif + #ifdef CONFIG_UBSAN unsigned int in_ubsan; #endif diff --git a/kernel/exit.c b/kernel/exit.c index c5548faa9f37..fa72d57db747 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -920,6 +920,7 @@ void __noreturn do_exit(long code) exit_rcu(); TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i)); + lockdep_free_task(tsk); do_task_dead(); } EXPORT_SYMBOL_GPL(do_exit); diff --git a/kernel/fork.c b/kernel/fork.c index 17921b0390b4..cbf2221ee81a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -484,6 +484,8 @@ void __init fork_init(void) cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "fork:vm_stack_cache", NULL, free_vm_stack_cache); #endif + + lockdep_init_task(&init_task); } int __weak arch_dup_task_struct(struct task_struct *dst, @@ -1691,6 +1693,7 @@ static __latent_entropy struct task_struct *copy_process( p->lockdep_depth = 0; /* no locks held yet */ p->curr_chain_key = 0; p->lockdep_recursion = 0; + lockdep_init_task(p); #endif #ifdef CONFIG_DEBUG_MUTEXES @@ -1949,6 +1952,7 @@ bad_fork_cleanup_audit: bad_fork_cleanup_perf: perf_event_free_task(p); bad_fork_cleanup_policy: + lockdep_free_task(p); #ifdef CONFIG_NUMA mpol_put(p->mempolicy); bad_fork_cleanup_threadgroup_lock: diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 841828ba35b9..56f69cc53ddc 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -58,6 +58,10 @@ #define CREATE_TRACE_POINTS #include +#ifdef CONFIG_LOCKDEP_CROSSRELEASE +#include +#endif + #ifdef CONFIG_PROVE_LOCKING int prove_locking = 1; module_param(prove_locking, int, 0644); @@ -724,6 +728,18 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) return is_static || static_obj(lock->key) ? NULL : ERR_PTR(-EINVAL); } +#ifdef CONFIG_LOCKDEP_CROSSRELEASE +static void cross_init(struct lockdep_map *lock, int cross); +static int cross_lock(struct lockdep_map *lock); +static int lock_acquire_crosslock(struct held_lock *hlock); +static int lock_release_crosslock(struct lockdep_map *lock); +#else +static inline void cross_init(struct lockdep_map *lock, int cross) {} +static inline int cross_lock(struct lockdep_map *lock) { return 0; } +static inline int lock_acquire_crosslock(struct held_lock *hlock) { return 2; } +static inline int lock_release_crosslock(struct lockdep_map *lock) { return 2; } +#endif + /* * Register a lock's class in the hash-table, if the class is not present * yet. Otherwise we look it up. We cache the result in the lock object @@ -1795,6 +1811,9 @@ check_deadlock(struct task_struct *curr, struct held_lock *next, if (nest) return 2; + if (cross_lock(prev->instance)) + continue; + return print_deadlock_bug(curr, prev, next); } return 1; @@ -1962,30 +1981,36 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) int distance = curr->lockdep_depth - depth + 1; hlock = curr->held_locks + depth - 1; /* - * Only non-recursive-read entries get new dependencies - * added: + * Only non-crosslock entries get new dependencies added. + * Crosslock entries will be added by commit later: */ - if (hlock->read != 2 && hlock->check) { - int ret = check_prev_add(curr, hlock, next, - distance, &trace, save); - if (!ret) - return 0; - + if (!cross_lock(hlock->instance)) { /* - * Stop saving stack_trace if save_trace() was - * called at least once: + * Only non-recursive-read entries get new dependencies + * added: */ - if (save && ret == 2) - save = NULL; + if (hlock->read != 2 && hlock->check) { + int ret = check_prev_add(curr, hlock, next, + distance, &trace, save); + if (!ret) + return 0; - /* - * Stop after the first non-trylock entry, - * as non-trylock entries have added their - * own direct dependencies already, so this - * lock is connected to them indirectly: - */ - if (!hlock->trylock) - break; + /* + * Stop saving stack_trace if save_trace() was + * called at least once: + */ + if (save && ret == 2) + save = NULL; + + /* + * Stop after the first non-trylock entry, + * as non-trylock entries have added their + * own direct dependencies already, so this + * lock is connected to them indirectly: + */ + if (!hlock->trylock) + break; + } } depth--; /* @@ -3176,7 +3201,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this, /* * Initialize a lock instance's lock-class mapping info: */ -void lockdep_init_map(struct lockdep_map *lock, const char *name, +static void __lockdep_init_map(struct lockdep_map *lock, const char *name, struct lock_class_key *key, int subclass) { int i; @@ -3234,8 +3259,25 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name, raw_local_irq_restore(flags); } } + +void lockdep_init_map(struct lockdep_map *lock, const char *name, + struct lock_class_key *key, int subclass) +{ + cross_init(lock, 0); + __lockdep_init_map(lock, name, key, subclass); +} EXPORT_SYMBOL_GPL(lockdep_init_map); +#ifdef CONFIG_LOCKDEP_CROSSRELEASE +void lockdep_init_map_crosslock(struct lockdep_map *lock, const char *name, + struct lock_class_key *key, int subclass) +{ + cross_init(lock, 1); + __lockdep_init_map(lock, name, key, subclass); +} +EXPORT_SYMBOL_GPL(lockdep_init_map_crosslock); +#endif + struct lock_class_key __lockdep_no_validate__; EXPORT_SYMBOL_GPL(__lockdep_no_validate__); @@ -3291,6 +3333,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, int chain_head = 0; int class_idx; u64 chain_key; + int ret; if (unlikely(!debug_locks)) return 0; @@ -3339,7 +3382,8 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, class_idx = class - lock_classes + 1; - if (depth) { + /* TODO: nest_lock is not implemented for crosslock yet. */ + if (depth && !cross_lock(lock)) { hlock = curr->held_locks + depth - 1; if (hlock->class_idx == class_idx && nest_lock) { if (hlock->references) { @@ -3427,6 +3471,14 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, if (!validate_chain(curr, lock, hlock, chain_head, chain_key)) return 0; + ret = lock_acquire_crosslock(hlock); + /* + * 2 means normal acquire operations are needed. Otherwise, it's + * ok just to return with '0:fail, 1:success'. + */ + if (ret != 2) + return ret; + curr->curr_chain_key = chain_key; curr->lockdep_depth++; check_chain_key(curr); @@ -3664,11 +3716,19 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip) struct task_struct *curr = current; struct held_lock *hlock; unsigned int depth; - int i; + int ret, i; if (unlikely(!debug_locks)) return 0; + ret = lock_release_crosslock(lock); + /* + * 2 means normal release operations are needed. Otherwise, it's + * ok just to return with '0:fail, 1:success'. + */ + if (ret != 2) + return ret; + depth = curr->lockdep_depth; /* * So we're all set to release this lock.. wait what lock? We don't @@ -4532,6 +4592,13 @@ asmlinkage __visible void lockdep_sys_exit(void) curr->comm, curr->pid); lockdep_print_held_locks(curr); } + + /* + * The lock history for each syscall should be independent. So wipe the + * slate clean on return to userspace. + */ + crossrelease_hist_end(XHLOCK_PROC); + crossrelease_hist_start(XHLOCK_PROC); } void lockdep_rcu_suspicious(const char *file, const int line, const char *s) @@ -4580,3 +4647,398 @@ void lockdep_rcu_suspicious(const char *file, const int line, const char *s) dump_stack(); } EXPORT_SYMBOL_GPL(lockdep_rcu_suspicious); + +#ifdef CONFIG_LOCKDEP_CROSSRELEASE + +/* + * Crossrelease works by recording a lock history for each thread and + * connecting those historic locks that were taken after the + * wait_for_completion() in the complete() context. + * + * Task-A Task-B + * + * mutex_lock(&A); + * mutex_unlock(&A); + * + * wait_for_completion(&C); + * lock_acquire_crosslock(); + * atomic_inc_return(&cross_gen_id); + * | + * | mutex_lock(&B); + * | mutex_unlock(&B); + * | + * | complete(&C); + * `-- lock_commit_crosslock(); + * + * Which will then add a dependency between B and C. + */ + +#define xhlock(i) (current->xhlocks[(i) % MAX_XHLOCKS_NR]) + +/* + * Whenever a crosslock is held, cross_gen_id will be increased. + */ +static atomic_t cross_gen_id; /* Can be wrapped */ + +/* + * Lock history stacks; we have 3 nested lock history stacks: + * + * Hard IRQ + * Soft IRQ + * History / Task + * + * The thing is that once we complete a (Hard/Soft) IRQ the future task locks + * should not depend on any of the locks observed while running the IRQ. + * + * So what we do is rewind the history buffer and erase all our knowledge of + * that temporal event. + */ + +/* + * We need this to annotate lock history boundaries. Take for instance + * workqueues; each work is independent of the last. The completion of a future + * work does not depend on the completion of a past work (in general). + * Therefore we must not carry that (lock) dependency across works. + * + * This is true for many things; pretty much all kthreads fall into this + * pattern, where they have an 'idle' state and future completions do not + * depend on past completions. Its just that since they all have the 'same' + * form -- the kthread does the same over and over -- it doesn't typically + * matter. + * + * The same is true for system-calls, once a system call is completed (we've + * returned to userspace) the next system call does not depend on the lock + * history of the previous system call. + */ +void crossrelease_hist_start(enum xhlock_context_t c) +{ + if (current->xhlocks) + current->xhlock_idx_hist[c] = current->xhlock_idx; +} + +void crossrelease_hist_end(enum xhlock_context_t c) +{ + if (current->xhlocks) + current->xhlock_idx = current->xhlock_idx_hist[c]; +} + +static int cross_lock(struct lockdep_map *lock) +{ + return lock ? lock->cross : 0; +} + +/* + * This is needed to decide the relationship between wrapable variables. + */ +static inline int before(unsigned int a, unsigned int b) +{ + return (int)(a - b) < 0; +} + +static inline struct lock_class *xhlock_class(struct hist_lock *xhlock) +{ + return hlock_class(&xhlock->hlock); +} + +static inline struct lock_class *xlock_class(struct cross_lock *xlock) +{ + return hlock_class(&xlock->hlock); +} + +/* + * Should we check a dependency with previous one? + */ +static inline int depend_before(struct held_lock *hlock) +{ + return hlock->read != 2 && hlock->check && !hlock->trylock; +} + +/* + * Should we check a dependency with next one? + */ +static inline int depend_after(struct held_lock *hlock) +{ + return hlock->read != 2 && hlock->check; +} + +/* + * Check if the xhlock is valid, which would be false if, + * + * 1. Has not used after initializaion yet. + * + * Remind hist_lock is implemented as a ring buffer. + */ +static inline int xhlock_valid(struct hist_lock *xhlock) +{ + /* + * xhlock->hlock.instance must be !NULL. + */ + return !!xhlock->hlock.instance; +} + +/* + * Record a hist_lock entry. + * + * Irq disable is only required. + */ +static void add_xhlock(struct held_lock *hlock) +{ + unsigned int idx = ++current->xhlock_idx; + struct hist_lock *xhlock = &xhlock(idx); + +#ifdef CONFIG_DEBUG_LOCKDEP + /* + * This can be done locklessly because they are all task-local + * state, we must however ensure IRQs are disabled. + */ + WARN_ON_ONCE(!irqs_disabled()); +#endif + + /* Initialize hist_lock's members */ + xhlock->hlock = *hlock; + + xhlock->trace.nr_entries = 0; + xhlock->trace.max_entries = MAX_XHLOCK_TRACE_ENTRIES; + xhlock->trace.entries = xhlock->trace_entries; + xhlock->trace.skip = 3; + save_stack_trace(&xhlock->trace); +} + +static inline int same_context_xhlock(struct hist_lock *xhlock) +{ + return xhlock->hlock.irq_context == task_irq_context(current); +} + +/* + * This should be lockless as far as possible because this would be + * called very frequently. + */ +static void check_add_xhlock(struct held_lock *hlock) +{ + /* + * Record a hist_lock, only in case that acquisitions ahead + * could depend on the held_lock. For example, if the held_lock + * is trylock then acquisitions ahead never depends on that. + * In that case, we don't need to record it. Just return. + */ + if (!current->xhlocks || !depend_before(hlock)) + return; + + add_xhlock(hlock); +} + +/* + * For crosslock. + */ +static int add_xlock(struct held_lock *hlock) +{ + struct cross_lock *xlock; + unsigned int gen_id; + + if (!graph_lock()) + return 0; + + xlock = &((struct lockdep_map_cross *)hlock->instance)->xlock; + + gen_id = (unsigned int)atomic_inc_return(&cross_gen_id); + xlock->hlock = *hlock; + xlock->hlock.gen_id = gen_id; + graph_unlock(); + + return 1; +} + +/* + * Called for both normal and crosslock acquires. Normal locks will be + * pushed on the hist_lock queue. Cross locks will record state and + * stop regular lock_acquire() to avoid being placed on the held_lock + * stack. + * + * Return: 0 - failure; + * 1 - crosslock, done; + * 2 - normal lock, continue to held_lock[] ops. + */ +static int lock_acquire_crosslock(struct held_lock *hlock) +{ + /* + * CONTEXT 1 CONTEXT 2 + * --------- --------- + * lock A (cross) + * X = atomic_inc_return(&cross_gen_id) + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * Y = atomic_read_acquire(&cross_gen_id) + * lock B + * + * atomic_read_acquire() is for ordering between A and B, + * IOW, A happens before B, when CONTEXT 2 see Y >= X. + * + * Pairs with atomic_inc_return() in add_xlock(). + */ + hlock->gen_id = (unsigned int)atomic_read_acquire(&cross_gen_id); + + if (cross_lock(hlock->instance)) + return add_xlock(hlock); + + check_add_xhlock(hlock); + return 2; +} + +static int copy_trace(struct stack_trace *trace) +{ + unsigned long *buf = stack_trace + nr_stack_trace_entries; + unsigned int max_nr = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries; + unsigned int nr = min(max_nr, trace->nr_entries); + + trace->nr_entries = nr; + memcpy(buf, trace->entries, nr * sizeof(trace->entries[0])); + trace->entries = buf; + nr_stack_trace_entries += nr; + + if (nr_stack_trace_entries >= MAX_STACK_TRACE_ENTRIES-1) { + if (!debug_locks_off_graph_unlock()) + return 0; + + print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!"); + dump_stack(); + + return 0; + } + + return 1; +} + +static int commit_xhlock(struct cross_lock *xlock, struct hist_lock *xhlock) +{ + unsigned int xid, pid; + u64 chain_key; + + xid = xlock_class(xlock) - lock_classes; + chain_key = iterate_chain_key((u64)0, xid); + pid = xhlock_class(xhlock) - lock_classes; + chain_key = iterate_chain_key(chain_key, pid); + + if (lookup_chain_cache(chain_key)) + return 1; + + if (!add_chain_cache_classes(xid, pid, xhlock->hlock.irq_context, + chain_key)) + return 0; + + if (!check_prev_add(current, &xlock->hlock, &xhlock->hlock, 1, + &xhlock->trace, copy_trace)) + return 0; + + return 1; +} + +static void commit_xhlocks(struct cross_lock *xlock) +{ + unsigned int cur = current->xhlock_idx; + unsigned int i; + + if (!graph_lock()) + return; + + for (i = 0; i < MAX_XHLOCKS_NR; i++) { + struct hist_lock *xhlock = &xhlock(cur - i); + + if (!xhlock_valid(xhlock)) + break; + + if (before(xhlock->hlock.gen_id, xlock->hlock.gen_id)) + break; + + if (!same_context_xhlock(xhlock)) + break; + + /* + * commit_xhlock() returns 0 with graph_lock already + * released if fail. + */ + if (!commit_xhlock(xlock, xhlock)) + return; + } + + graph_unlock(); +} + +void lock_commit_crosslock(struct lockdep_map *lock) +{ + struct cross_lock *xlock; + unsigned long flags; + + if (unlikely(!debug_locks || current->lockdep_recursion)) + return; + + if (!current->xhlocks) + return; + + /* + * Do commit hist_locks with the cross_lock, only in case that + * the cross_lock could depend on acquisitions after that. + * + * For example, if the cross_lock does not have the 'check' flag + * then we don't need to check dependencies and commit for that. + * Just skip it. In that case, of course, the cross_lock does + * not depend on acquisitions ahead, either. + * + * WARNING: Don't do that in add_xlock() in advance. When an + * acquisition context is different from the commit context, + * invalid(skipped) cross_lock might be accessed. + */ + if (!depend_after(&((struct lockdep_map_cross *)lock)->xlock.hlock)) + return; + + raw_local_irq_save(flags); + check_flags(flags); + current->lockdep_recursion = 1; + xlock = &((struct lockdep_map_cross *)lock)->xlock; + commit_xhlocks(xlock); + current->lockdep_recursion = 0; + raw_local_irq_restore(flags); +} +EXPORT_SYMBOL_GPL(lock_commit_crosslock); + +/* + * Return: 1 - crosslock, done; + * 2 - normal lock, continue to held_lock[] ops. + */ +static int lock_release_crosslock(struct lockdep_map *lock) +{ + return cross_lock(lock) ? 1 : 2; +} + +static void cross_init(struct lockdep_map *lock, int cross) +{ + lock->cross = cross; + + /* + * Crossrelease assumes that the ring buffer size of xhlocks + * is aligned with power of 2. So force it on build. + */ + BUILD_BUG_ON(MAX_XHLOCKS_NR & (MAX_XHLOCKS_NR - 1)); +} + +void lockdep_init_task(struct task_struct *task) +{ + int i; + + task->xhlock_idx = UINT_MAX; + + for (i = 0; i < XHLOCK_CTX_NR; i++) + task->xhlock_idx_hist[i] = UINT_MAX; + + task->xhlocks = kzalloc(sizeof(struct hist_lock) * MAX_XHLOCKS_NR, + GFP_KERNEL); +} + +void lockdep_free_task(struct task_struct *task) +{ + if (task->xhlocks) { + void *tmp = task->xhlocks; + /* Diable crossrelease for current */ + task->xhlocks = NULL; + kfree(tmp); + } +} +#endif diff --git a/kernel/workqueue.c b/kernel/workqueue.c index ca937b0c3a96..e86733a8b344 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2093,6 +2093,7 @@ __acquires(&pool->lock) lock_map_acquire_read(&pwq->wq->lockdep_map); lock_map_acquire(&lockdep_map); + crossrelease_hist_start(XHLOCK_PROC); trace_workqueue_execute_start(work); worker->current_func(work); /* @@ -2100,6 +2101,7 @@ __acquires(&pool->lock) * point will only record its address. */ trace_workqueue_execute_end(work); + crossrelease_hist_end(XHLOCK_PROC); lock_map_release(&lockdep_map); lock_map_release(&pwq->wq->lockdep_map); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 98fe715522e8..c6038f23bb1a 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1150,6 +1150,18 @@ config LOCK_STAT CONFIG_LOCK_STAT defines "contended" and "acquired" lock events. (CONFIG_LOCKDEP defines "acquire" and "release" events.) +config LOCKDEP_CROSSRELEASE + bool "Lock debugging: make lockdep work for crosslocks" + depends on PROVE_LOCKING + default n + help + This makes lockdep work for crosslock which is a lock allowed to + be released in a different context from the acquisition context. + Normally a lock must be released in the context acquiring the lock. + However, relexing this constraint helps synchronization primitives + such as page locks or completions can use the lock correctness + detector, lockdep. + config DEBUG_LOCKDEP bool "Lock dependency engine debugging" depends on DEBUG_KERNEL && LOCKDEP -- cgit v1.2.3 From cd8084f91c02c1afd256a39aa833bff737631304 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Mon, 7 Aug 2017 16:12:56 +0900 Subject: locking/lockdep: Apply crossrelease to completions Although wait_for_completion() and its family can cause deadlock, the lock correctness validator could not be applied to them until now, because things like complete() are usually called in a different context from the waiting context, which violates lockdep's assumption. Thanks to CONFIG_LOCKDEP_CROSSRELEASE, we can now apply the lockdep detector to those completion operations. Applied it. Signed-off-by: Byungchul Park Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: boqun.feng@gmail.com Cc: kernel-team@lge.com Cc: kirill@shutemov.name Cc: npiggin@gmail.com Cc: walken@google.com Cc: willy@infradead.org Link: http://lkml.kernel.org/r/1502089981-21272-10-git-send-email-byungchul.park@lge.com Signed-off-by: Ingo Molnar --- include/linux/completion.h | 45 ++++++++++++++++++++++++++++++++++++++++++++- kernel/sched/completion.c | 11 +++++++++++ lib/Kconfig.debug | 9 +++++++++ 3 files changed, 64 insertions(+), 1 deletion(-) (limited to 'lib/Kconfig.debug') diff --git a/include/linux/completion.h b/include/linux/completion.h index 5d5aaae3af43..9bcebf509b4d 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -9,6 +9,9 @@ */ #include +#ifdef CONFIG_LOCKDEP_COMPLETE +#include +#endif /* * struct completion - structure used to maintain state for a "completion" @@ -25,10 +28,50 @@ struct completion { unsigned int done; wait_queue_head_t wait; +#ifdef CONFIG_LOCKDEP_COMPLETE + struct lockdep_map_cross map; +#endif }; +#ifdef CONFIG_LOCKDEP_COMPLETE +static inline void complete_acquire(struct completion *x) +{ + lock_acquire_exclusive((struct lockdep_map *)&x->map, 0, 0, NULL, _RET_IP_); +} + +static inline void complete_release(struct completion *x) +{ + lock_release((struct lockdep_map *)&x->map, 0, _RET_IP_); +} + +static inline void complete_release_commit(struct completion *x) +{ + lock_commit_crosslock((struct lockdep_map *)&x->map); +} + +#define init_completion(x) \ +do { \ + static struct lock_class_key __key; \ + lockdep_init_map_crosslock((struct lockdep_map *)&(x)->map, \ + "(complete)" #x, \ + &__key, 0); \ + __init_completion(x); \ +} while (0) +#else +#define init_completion(x) __init_completion(x) +static inline void complete_acquire(struct completion *x) {} +static inline void complete_release(struct completion *x) {} +static inline void complete_release_commit(struct completion *x) {} +#endif + +#ifdef CONFIG_LOCKDEP_COMPLETE +#define COMPLETION_INITIALIZER(work) \ + { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait), \ + STATIC_CROSS_LOCKDEP_MAP_INIT("(complete)" #work, &(work)) } +#else #define COMPLETION_INITIALIZER(work) \ { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } +#endif #define COMPLETION_INITIALIZER_ONSTACK(work) \ ({ init_completion(&work); work; }) @@ -70,7 +113,7 @@ struct completion { * This inline function will initialize a dynamically created completion * structure. */ -static inline void init_completion(struct completion *x) +static inline void __init_completion(struct completion *x) { x->done = 0; init_waitqueue_head(&x->wait); diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c index 13fc5ae9bf2f..566b6ec7b6fe 100644 --- a/kernel/sched/completion.c +++ b/kernel/sched/completion.c @@ -32,6 +32,12 @@ void complete(struct completion *x) unsigned long flags; spin_lock_irqsave(&x->wait.lock, flags); + + /* + * Perform commit of crossrelease here. + */ + complete_release_commit(x); + if (x->done != UINT_MAX) x->done++; __wake_up_locked(&x->wait, TASK_NORMAL, 1); @@ -92,9 +98,14 @@ __wait_for_common(struct completion *x, { might_sleep(); + complete_acquire(x); + spin_lock_irq(&x->wait.lock); timeout = do_wait_for_common(x, action, timeout, state); spin_unlock_irq(&x->wait.lock); + + complete_release(x); + return timeout; } diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c6038f23bb1a..ebd40d3b0b28 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1162,6 +1162,15 @@ config LOCKDEP_CROSSRELEASE such as page locks or completions can use the lock correctness detector, lockdep. +config LOCKDEP_COMPLETE + bool "Lock debugging: allow completions to use deadlock detector" + depends on PROVE_LOCKING + select LOCKDEP_CROSSRELEASE + default n + help + A deadlock caused by wait_for_completion() and complete() can be + detected by lockdep using crossrelease feature. + config DEBUG_LOCKDEP bool "Lock dependency engine debugging" depends on DEBUG_KERNEL && LOCKDEP -- cgit v1.2.3 From d0541b0fa64b36665d6261079974a26943c75009 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Thu, 17 Aug 2017 17:57:39 +0900 Subject: locking/lockdep: Make CONFIG_LOCKDEP_CROSSRELEASE part of CONFIG_PROVE_LOCKING Crossrelease support added the CONFIG_LOCKDEP_CROSSRELEASE and CONFIG_LOCKDEP_COMPLETE options. It makes little sense to enable them when PROVE_LOCKING is disabled. Make them non-interative options and part of PROVE_LOCKING to simplify the UI. Signed-off-by: Byungchul Park Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kernel-team@lge.com Link: http://lkml.kernel.org/r/1502960261-16206-1-git-send-email-byungchul.park@lge.com Signed-off-by: Ingo Molnar --- lib/Kconfig.debug | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'lib/Kconfig.debug') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ebd40d3b0b28..1ad7f1b9160e 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1081,6 +1081,8 @@ config PROVE_LOCKING select DEBUG_MUTEXES select DEBUG_RT_MUTEXES if RT_MUTEXES select DEBUG_LOCK_ALLOC + select LOCKDEP_CROSSRELEASE + select LOCKDEP_COMPLETE select TRACE_IRQFLAGS default n help @@ -1152,8 +1154,6 @@ config LOCK_STAT config LOCKDEP_CROSSRELEASE bool "Lock debugging: make lockdep work for crosslocks" - depends on PROVE_LOCKING - default n help This makes lockdep work for crosslock which is a lock allowed to be released in a different context from the acquisition context. @@ -1164,9 +1164,6 @@ config LOCKDEP_CROSSRELEASE config LOCKDEP_COMPLETE bool "Lock debugging: allow completions to use deadlock detector" - depends on PROVE_LOCKING - select LOCKDEP_CROSSRELEASE - default n help A deadlock caused by wait_for_completion() and complete() can be detected by lockdep using crossrelease feature. -- cgit v1.2.3 From 0f0a22260d613b4ee3f483ee1ea6fa27f92a9e40 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Thu, 17 Aug 2017 17:57:40 +0900 Subject: locking/lockdep: Reword title of LOCKDEP_CROSSRELEASE config Lockdep doesn't have to be made to work with crossrelease and just works with them. Reword the title so that what the option does is clear. Signed-off-by: Byungchul Park Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kernel-team@lge.com Link: http://lkml.kernel.org/r/1502960261-16206-2-git-send-email-byungchul.park@lge.com Signed-off-by: Ingo Molnar --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Kconfig.debug') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1ad7f1b9160e..8fb8a206db12 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1153,7 +1153,7 @@ config LOCK_STAT (CONFIG_LOCKDEP defines "acquire" and "release" events.) config LOCKDEP_CROSSRELEASE - bool "Lock debugging: make lockdep work for crosslocks" + bool "Lock debugging: enable cross-locking checks in lockdep" help This makes lockdep work for crosslock which is a lock allowed to be released in a different context from the acquisition context. -- cgit v1.2.3 From ea3f2c0fdfbb180f142cdbc0d1f055c7b9a5e63e Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Thu, 17 Aug 2017 17:57:41 +0900 Subject: locking/lockdep: Rename CONFIG_LOCKDEP_COMPLETE to CONFIG_LOCKDEP_COMPLETIONS 'complete' is an adjective and LOCKDEP_COMPLETE sounds like 'lockdep is complete', so pick a better name that uses a noun. Signed-off-by: Byungchul Park Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kernel-team@lge.com Link: http://lkml.kernel.org/r/1502960261-16206-3-git-send-email-byungchul.park@lge.com Signed-off-by: Ingo Molnar --- include/linux/completion.h | 8 ++++---- lib/Kconfig.debug | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'lib/Kconfig.debug') diff --git a/include/linux/completion.h b/include/linux/completion.h index 9bcebf509b4d..791f053f28b7 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -9,7 +9,7 @@ */ #include -#ifdef CONFIG_LOCKDEP_COMPLETE +#ifdef CONFIG_LOCKDEP_COMPLETIONS #include #endif @@ -28,12 +28,12 @@ struct completion { unsigned int done; wait_queue_head_t wait; -#ifdef CONFIG_LOCKDEP_COMPLETE +#ifdef CONFIG_LOCKDEP_COMPLETIONS struct lockdep_map_cross map; #endif }; -#ifdef CONFIG_LOCKDEP_COMPLETE +#ifdef CONFIG_LOCKDEP_COMPLETIONS static inline void complete_acquire(struct completion *x) { lock_acquire_exclusive((struct lockdep_map *)&x->map, 0, 0, NULL, _RET_IP_); @@ -64,7 +64,7 @@ static inline void complete_release(struct completion *x) {} static inline void complete_release_commit(struct completion *x) {} #endif -#ifdef CONFIG_LOCKDEP_COMPLETE +#ifdef CONFIG_LOCKDEP_COMPLETIONS #define COMPLETION_INITIALIZER(work) \ { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait), \ STATIC_CROSS_LOCKDEP_MAP_INIT("(complete)" #work, &(work)) } diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 8fb8a206db12..a0e60d56303e 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1082,7 +1082,7 @@ config PROVE_LOCKING select DEBUG_RT_MUTEXES if RT_MUTEXES select DEBUG_LOCK_ALLOC select LOCKDEP_CROSSRELEASE - select LOCKDEP_COMPLETE + select LOCKDEP_COMPLETIONS select TRACE_IRQFLAGS default n help @@ -1162,7 +1162,7 @@ config LOCKDEP_CROSSRELEASE such as page locks or completions can use the lock correctness detector, lockdep. -config LOCKDEP_COMPLETE +config LOCKDEP_COMPLETIONS bool "Lock debugging: allow completions to use deadlock detector" help A deadlock caused by wait_for_completion() and complete() can be -- cgit v1.2.3 From e26f34a407aec9c65bce2bc0c838fabe4f051fc6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 17 Aug 2017 12:48:29 +0200 Subject: locking/lockdep: Make CONFIG_LOCKDEP_CROSSRELEASE and CONFIG_LOCKDEP_COMPLETIONS truly non-interactive The syntax to turn Kconfig options into non-interactive ones is to not offer interactive prompt help texts. Remove them. Cc: Boqun Feng Cc: Byungchul Park Cc: Lai Jiangshan Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Tejun Heo Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- lib/Kconfig.debug | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib/Kconfig.debug') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index a0e60d56303e..eb0f316b731a 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1153,7 +1153,7 @@ config LOCK_STAT (CONFIG_LOCKDEP defines "acquire" and "release" events.) config LOCKDEP_CROSSRELEASE - bool "Lock debugging: enable cross-locking checks in lockdep" + bool help This makes lockdep work for crosslock which is a lock allowed to be released in a different context from the acquisition context. @@ -1163,7 +1163,7 @@ config LOCKDEP_CROSSRELEASE detector, lockdep. config LOCKDEP_COMPLETIONS - bool "Lock debugging: allow completions to use deadlock detector" + bool help A deadlock caused by wait_for_completion() and complete() can be detected by lockdep using crossrelease feature. -- cgit v1.2.3 From 7edaeb6841dfb27e362288ab8466ebdc4972e867 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 15 Aug 2017 09:50:13 +0200 Subject: kernel/watchdog: Prevent false positives with turbo modes The hardlockup detector on x86 uses a performance counter based on unhalted CPU cycles and a periodic hrtimer. The hrtimer period is about 2/5 of the performance counter period, so the hrtimer should fire 2-3 times before the performance counter NMI fires. The NMI code checks whether the hrtimer fired since the last invocation. If not, it assumess a hard lockup. The calculation of those periods is based on the nominal CPU frequency. Turbo modes increase the CPU clock frequency and therefore shorten the period of the perf/NMI watchdog. With extreme Turbo-modes (3x nominal frequency) the perf/NMI period is shorter than the hrtimer period which leads to false positives. A simple fix would be to shorten the hrtimer period, but that comes with the side effect of more frequent hrtimer and softlockup thread wakeups, which is not desired. Implement a low pass filter, which checks the perf/NMI period against kernel time. If the perf/NMI fires before 4/5 of the watchdog period has elapsed then the event is ignored and postponed to the next perf/NMI. That solves the problem and avoids the overhead of shorter hrtimer periods and more frequent softlockup thread wakeups. Fixes: 58687acba592 ("lockup_detector: Combine nmi_watchdog and softlockup detector") Reported-and-tested-by: Kan Liang Signed-off-by: Thomas Gleixner Cc: dzickus@redhat.com Cc: prarit@redhat.com Cc: ak@linux.intel.com Cc: babu.moger@oracle.com Cc: peterz@infradead.org Cc: eranian@google.com Cc: acme@redhat.com Cc: stable@vger.kernel.org Cc: atomlin@redhat.com Cc: akpm@linux-foundation.org Cc: torvalds@linux-foundation.org Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1708150931310.1886@nanos --- arch/x86/Kconfig | 1 + include/linux/nmi.h | 8 +++++++ kernel/watchdog.c | 1 + kernel/watchdog_hld.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++ lib/Kconfig.debug | 7 ++++++ 5 files changed, 76 insertions(+) (limited to 'lib/Kconfig.debug') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 781521b7cf9e..9101bfc85539 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -100,6 +100,7 @@ config X86 select GENERIC_STRNCPY_FROM_USER select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL + select HARDLOCKUP_CHECK_TIMESTAMP if X86_64 select HAVE_ACPI_APEI if ACPI select HAVE_ACPI_APEI_NMI if ACPI select HAVE_ALIGNED_STRUCT_PAGE if SLUB diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 8aa01fd859fb..a36abe2da13e 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -168,6 +168,14 @@ extern int sysctl_hardlockup_all_cpu_backtrace; #define sysctl_softlockup_all_cpu_backtrace 0 #define sysctl_hardlockup_all_cpu_backtrace 0 #endif + +#if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \ + defined(CONFIG_HARDLOCKUP_DETECTOR) +void watchdog_update_hrtimer_threshold(u64 period); +#else +static inline void watchdog_update_hrtimer_threshold(u64 period) { } +#endif + extern bool is_hardlockup(void); struct ctl_table; extern int proc_watchdog(struct ctl_table *, int , diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 06d3389bca0d..f5d52024f6b7 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -240,6 +240,7 @@ static void set_sample_period(void) * hardlockup detector generates a warning */ sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5); + watchdog_update_hrtimer_threshold(sample_period); } /* Commands for resetting the watchdog */ diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 295a0d84934c..3a09ea1b1d3d 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -37,6 +37,62 @@ void arch_touch_nmi_watchdog(void) } EXPORT_SYMBOL(arch_touch_nmi_watchdog); +#ifdef CONFIG_HARDLOCKUP_CHECK_TIMESTAMP +static DEFINE_PER_CPU(ktime_t, last_timestamp); +static DEFINE_PER_CPU(unsigned int, nmi_rearmed); +static ktime_t watchdog_hrtimer_sample_threshold __read_mostly; + +void watchdog_update_hrtimer_threshold(u64 period) +{ + /* + * The hrtimer runs with a period of (watchdog_threshold * 2) / 5 + * + * So it runs effectively with 2.5 times the rate of the NMI + * watchdog. That means the hrtimer should fire 2-3 times before + * the NMI watchdog expires. The NMI watchdog on x86 is based on + * unhalted CPU cycles, so if Turbo-Mode is enabled the CPU cycles + * might run way faster than expected and the NMI fires in a + * smaller period than the one deduced from the nominal CPU + * frequency. Depending on the Turbo-Mode factor this might be fast + * enough to get the NMI period smaller than the hrtimer watchdog + * period and trigger false positives. + * + * The sample threshold is used to check in the NMI handler whether + * the minimum time between two NMI samples has elapsed. That + * prevents false positives. + * + * Set this to 4/5 of the actual watchdog threshold period so the + * hrtimer is guaranteed to fire at least once within the real + * watchdog threshold. + */ + watchdog_hrtimer_sample_threshold = period * 2; +} + +static bool watchdog_check_timestamp(void) +{ + ktime_t delta, now = ktime_get_mono_fast_ns(); + + delta = now - __this_cpu_read(last_timestamp); + if (delta < watchdog_hrtimer_sample_threshold) { + /* + * If ktime is jiffies based, a stalled timer would prevent + * jiffies from being incremented and the filter would look + * at a stale timestamp and never trigger. + */ + if (__this_cpu_inc_return(nmi_rearmed) < 10) + return false; + } + __this_cpu_write(nmi_rearmed, 0); + __this_cpu_write(last_timestamp, now); + return true; +} +#else +static inline bool watchdog_check_timestamp(void) +{ + return true; +} +#endif + static struct perf_event_attr wd_hw_attr = { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, @@ -61,6 +117,9 @@ static void watchdog_overflow_callback(struct perf_event *event, return; } + if (!watchdog_check_timestamp()) + return; + /* check for a hardlockup * This is done by making sure our timer interrupt * is incrementing. The timer interrupt should have diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 98fe715522e8..c617b9d1d6cb 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -797,6 +797,13 @@ config HARDLOCKUP_DETECTOR_PERF bool select SOFTLOCKUP_DETECTOR +# +# Enables a timestamp based low pass filter to compensate for perf based +# hard lockup detection which runs too fast due to turbo modes. +# +config HARDLOCKUP_CHECK_TIMESTAMP + bool + # # arch/ can define HAVE_HARDLOCKUP_DETECTOR_ARCH to provide their own hard # lockup detector rather than the perf based detector. -- cgit v1.2.3 From e4dace3615526fd66c86dd535ee4bc9e8c706e37 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 8 Sep 2017 16:15:31 -0700 Subject: lib: add test module for CONFIG_DEBUG_VIRTUAL Add a test module that allows testing that CONFIG_DEBUG_VIRTUAL works correctly, at least that it can catch invalid calls to virt_to_phys() against the non-linear kernel virtual address map. Link: http://lkml.kernel.org/r/20170808164035.26725-1-f.fainelli@gmail.com Signed-off-by: Florian Fainelli Cc: "Luis R. Rodriguez" Cc: Kees Cook Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 11 +++++++++++ lib/Makefile | 1 + lib/test_debug_virtual.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+) create mode 100644 lib/test_debug_virtual.c (limited to 'lib/Kconfig.debug') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 7396f5044397..b19c491cbc4e 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1930,6 +1930,17 @@ config TEST_KMOD If unsure, say N. +config TEST_DEBUG_VIRTUAL + tristate "Test CONFIG_DEBUG_VIRTUAL feature" + depends on DEBUG_VIRTUAL + help + Test the kernel's ability to detect incorrect calls to + virt_to_phys() done against the non-linear part of the + kernel's virtual address map. + + If unsure, say N. + + source "samples/Kconfig" source "lib/Kconfig.kgdb" diff --git a/lib/Makefile b/lib/Makefile index 40c18372b301..469ce5e24e4f 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -62,6 +62,7 @@ obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o obj-$(CONFIG_TEST_UUID) += test_uuid.o obj-$(CONFIG_TEST_PARMAN) += test_parman.o obj-$(CONFIG_TEST_KMOD) += test_kmod.o +obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG diff --git a/lib/test_debug_virtual.c b/lib/test_debug_virtual.c new file mode 100644 index 000000000000..b9cdeecc19dc --- /dev/null +++ b/lib/test_debug_virtual.c @@ -0,0 +1,49 @@ +#include +#include +#include +#include +#include +#include +#include + +#include +#ifdef CONFIG_MIPS +#include +#endif + +struct foo { + unsigned int bar; +}; + +struct foo *foo; + +static int __init test_debug_virtual_init(void) +{ + phys_addr_t pa; + void *va; + + va = (void *)VMALLOC_START; + pa = virt_to_phys(va); + + pr_info("PA: %pa for VA: 0x%lx\n", &pa, (unsigned long)va); + + foo = kzalloc(sizeof(*foo), GFP_KERNEL); + if (!foo) + return -ENOMEM; + + pa = virt_to_phys(foo); + va = foo; + pr_info("PA: %pa for VA: 0x%lx\n", &pa, (unsigned long)va); + + return 0; +} +module_init(test_debug_virtual_init); + +static void __exit test_debug_virtual_exit(void) +{ + kfree(foo); +} +module_exit(test_debug_virtual_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Test module for CONFIG_DEBUG_VIRTUAL"); -- cgit v1.2.3 From 432654df90f2a7d8ac8438905208074604ed3e00 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 11 Sep 2017 21:41:43 +0200 Subject: parisc: Fix too large frame size warnings The parisc architecture has larger stack frames than most other architectures on 32-bit kernels. Increase the maximum allowed stack frame to 1280 bytes for parisc to avoid warnings in the do_sys_poll() and pat_memconfig() functions. Signed-off-by: Helge Deller --- lib/Kconfig.debug | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/Kconfig.debug') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index b19c491cbc4e..2689b7c50c52 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -219,7 +219,8 @@ config FRAME_WARN range 0 8192 default 0 if KASAN default 2048 if GCC_PLUGIN_LATENT_ENTROPY - default 1024 if !64BIT + default 1280 if (!64BIT && PARISC) + default 1024 if (!64BIT && !PARISC) default 2048 if 64BIT help Tell gcc to warn at build time for stack frames larger than this. -- cgit v1.2.3