From dfd402a4c4baae42398ce9180ff424d589b8bffc Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 14 Nov 2019 19:02:54 +0100 Subject: kcsan: Add Kernel Concurrency Sanitizer infrastructure Kernel Concurrency Sanitizer (KCSAN) is a dynamic data-race detector for kernel space. KCSAN is a sampling watchpoint-based data-race detector. See the included Documentation/dev-tools/kcsan.rst for more details. This patch adds basic infrastructure, but does not yet enable KCSAN for any architecture. Signed-off-by: Marco Elver Acked-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/compiler-clang.h | 9 ++++ include/linux/compiler-gcc.h | 7 +++ include/linux/compiler.h | 37 ++++++++++--- include/linux/kcsan-checks.h | 97 ++++++++++++++++++++++++++++++++++ include/linux/kcsan.h | 115 +++++++++++++++++++++++++++++++++++++++++ include/linux/sched.h | 4 ++ 6 files changed, 261 insertions(+), 8 deletions(-) create mode 100644 include/linux/kcsan-checks.h create mode 100644 include/linux/kcsan.h (limited to 'include/linux') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index 333a6695a918..a213eb55e725 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -24,6 +24,15 @@ #define __no_sanitize_address #endif +#if __has_feature(thread_sanitizer) +/* emulate gcc's __SANITIZE_THREAD__ flag */ +#define __SANITIZE_THREAD__ +#define __no_sanitize_thread \ + __attribute__((no_sanitize("thread"))) +#else +#define __no_sanitize_thread +#endif + /* * Not all versions of clang implement the the type-generic versions * of the builtin overflow checkers. Fortunately, clang implements diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index d7ee4c6bad48..0eb2a1cc411d 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -145,6 +145,13 @@ #define __no_sanitize_address #endif +#if defined(__SANITIZE_THREAD__) && __has_attribute(__no_sanitize_thread__) +#define __no_sanitize_thread \ + __attribute__((__noinline__)) __attribute__((no_sanitize_thread)) +#else +#define __no_sanitize_thread +#endif + #if GCC_VERSION >= 50100 #define COMPILER_HAS_GENERIC_BUILTIN_OVERFLOW 1 #endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 5e88e7e33abe..c42fa83f23fb 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -178,6 +178,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #endif #include +#include #define __READ_ONCE_SIZE \ ({ \ @@ -193,12 +194,6 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, } \ }) -static __always_inline -void __read_once_size(const volatile void *p, void *res, int size) -{ - __READ_ONCE_SIZE; -} - #ifdef CONFIG_KASAN /* * We can't declare function 'inline' because __no_sanitize_address confilcts @@ -207,18 +202,44 @@ void __read_once_size(const volatile void *p, void *res, int size) * '__maybe_unused' allows us to avoid defined-but-not-used warnings. */ # define __no_kasan_or_inline __no_sanitize_address notrace __maybe_unused +# define __no_sanitize_or_inline __no_kasan_or_inline #else # define __no_kasan_or_inline __always_inline #endif -static __no_kasan_or_inline +#ifdef __SANITIZE_THREAD__ +/* + * Rely on __SANITIZE_THREAD__ instead of CONFIG_KCSAN, to avoid not inlining in + * compilation units where instrumentation is disabled. + */ +# define __no_kcsan_or_inline __no_sanitize_thread notrace __maybe_unused +# define __no_sanitize_or_inline __no_kcsan_or_inline +#else +# define __no_kcsan_or_inline __always_inline +#endif + +#ifndef __no_sanitize_or_inline +#define __no_sanitize_or_inline __always_inline +#endif + +static __no_kcsan_or_inline +void __read_once_size(const volatile void *p, void *res, int size) +{ + kcsan_check_atomic_read(p, size); + __READ_ONCE_SIZE; +} + +static __no_sanitize_or_inline void __read_once_size_nocheck(const volatile void *p, void *res, int size) { __READ_ONCE_SIZE; } -static __always_inline void __write_once_size(volatile void *p, void *res, int size) +static __no_kcsan_or_inline +void __write_once_size(volatile void *p, void *res, int size) { + kcsan_check_atomic_write(p, size); + switch (size) { case 1: *(volatile __u8 *)p = *(__u8 *)res; break; case 2: *(volatile __u16 *)p = *(__u16 *)res; break; diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h new file mode 100644 index 000000000000..e78220661086 --- /dev/null +++ b/include/linux/kcsan-checks.h @@ -0,0 +1,97 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_KCSAN_CHECKS_H +#define _LINUX_KCSAN_CHECKS_H + +#include + +/* + * Access type modifiers. + */ +#define KCSAN_ACCESS_WRITE 0x1 +#define KCSAN_ACCESS_ATOMIC 0x2 + +/* + * __kcsan_*: Always calls into runtime when KCSAN is enabled. This may be used + * even in compilation units that selectively disable KCSAN, but must use KCSAN + * to validate access to an address. Never use these in header files! + */ +#ifdef CONFIG_KCSAN +/** + * __kcsan_check_access - check generic access for data race + * + * @ptr address of access + * @size size of access + * @type access type modifier + */ +void __kcsan_check_access(const volatile void *ptr, size_t size, int type); + +#else +static inline void __kcsan_check_access(const volatile void *ptr, size_t size, + int type) { } +#endif + +/* + * kcsan_*: Only calls into runtime when the particular compilation unit has + * KCSAN instrumentation enabled. May be used in header files. + */ +#ifdef __SANITIZE_THREAD__ +#define kcsan_check_access __kcsan_check_access +#else +static inline void kcsan_check_access(const volatile void *ptr, size_t size, + int type) { } +#endif + +/** + * __kcsan_check_read - check regular read access for data races + * + * @ptr address of access + * @size size of access + */ +#define __kcsan_check_read(ptr, size) __kcsan_check_access(ptr, size, 0) + +/** + * __kcsan_check_write - check regular write access for data races + * + * @ptr address of access + * @size size of access + */ +#define __kcsan_check_write(ptr, size) \ + __kcsan_check_access(ptr, size, KCSAN_ACCESS_WRITE) + +/** + * kcsan_check_read - check regular read access for data races + * + * @ptr address of access + * @size size of access + */ +#define kcsan_check_read(ptr, size) kcsan_check_access(ptr, size, 0) + +/** + * kcsan_check_write - check regular write access for data races + * + * @ptr address of access + * @size size of access + */ +#define kcsan_check_write(ptr, size) \ + kcsan_check_access(ptr, size, KCSAN_ACCESS_WRITE) + +/* + * Check for atomic accesses: if atomic access are not ignored, this simply + * aliases to kcsan_check_access, otherwise becomes a no-op. + */ +#ifdef CONFIG_KCSAN_IGNORE_ATOMICS +#define kcsan_check_atomic_read(...) \ + do { \ + } while (0) +#define kcsan_check_atomic_write(...) \ + do { \ + } while (0) +#else +#define kcsan_check_atomic_read(ptr, size) \ + kcsan_check_access(ptr, size, KCSAN_ACCESS_ATOMIC) +#define kcsan_check_atomic_write(ptr, size) \ + kcsan_check_access(ptr, size, KCSAN_ACCESS_ATOMIC | KCSAN_ACCESS_WRITE) +#endif + +#endif /* _LINUX_KCSAN_CHECKS_H */ diff --git a/include/linux/kcsan.h b/include/linux/kcsan.h new file mode 100644 index 000000000000..9047048fee84 --- /dev/null +++ b/include/linux/kcsan.h @@ -0,0 +1,115 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_KCSAN_H +#define _LINUX_KCSAN_H + +#include +#include + +#ifdef CONFIG_KCSAN + +/* + * Context for each thread of execution: for tasks, this is stored in + * task_struct, and interrupts access internal per-CPU storage. + */ +struct kcsan_ctx { + int disable_count; /* disable counter */ + int atomic_next; /* number of following atomic ops */ + + /* + * We distinguish between: (a) nestable atomic regions that may contain + * other nestable regions; and (b) flat atomic regions that do not keep + * track of nesting. Both (a) and (b) are entirely independent of each + * other, and a flat region may be started in a nestable region or + * vice-versa. + * + * This is required because, for example, in the annotations for + * seqlocks, we declare seqlock writer critical sections as (a) nestable + * atomic regions, but reader critical sections as (b) flat atomic + * regions, but have encountered cases where seqlock reader critical + * sections are contained within writer critical sections (the opposite + * may be possible, too). + * + * To support these cases, we independently track the depth of nesting + * for (a), and whether the leaf level is flat for (b). + */ + int atomic_nest_count; + bool in_flat_atomic; +}; + +/** + * kcsan_init - initialize KCSAN runtime + */ +void kcsan_init(void); + +/** + * kcsan_disable_current - disable KCSAN for the current context + * + * Supports nesting. + */ +void kcsan_disable_current(void); + +/** + * kcsan_enable_current - re-enable KCSAN for the current context + * + * Supports nesting. + */ +void kcsan_enable_current(void); + +/** + * kcsan_nestable_atomic_begin - begin nestable atomic region + * + * Accesses within the atomic region may appear to race with other accesses but + * should be considered atomic. + */ +void kcsan_nestable_atomic_begin(void); + +/** + * kcsan_nestable_atomic_end - end nestable atomic region + */ +void kcsan_nestable_atomic_end(void); + +/** + * kcsan_flat_atomic_begin - begin flat atomic region + * + * Accesses within the atomic region may appear to race with other accesses but + * should be considered atomic. + */ +void kcsan_flat_atomic_begin(void); + +/** + * kcsan_flat_atomic_end - end flat atomic region + */ +void kcsan_flat_atomic_end(void); + +/** + * kcsan_atomic_next - consider following accesses as atomic + * + * Force treating the next n memory accesses for the current context as atomic + * operations. + * + * @n number of following memory accesses to treat as atomic. + */ +void kcsan_atomic_next(int n); + +#else /* CONFIG_KCSAN */ + +static inline void kcsan_init(void) { } + +static inline void kcsan_disable_current(void) { } + +static inline void kcsan_enable_current(void) { } + +static inline void kcsan_nestable_atomic_begin(void) { } + +static inline void kcsan_nestable_atomic_end(void) { } + +static inline void kcsan_flat_atomic_begin(void) { } + +static inline void kcsan_flat_atomic_end(void) { } + +static inline void kcsan_atomic_next(int n) { } + +#endif /* CONFIG_KCSAN */ + +#endif /* _LINUX_KCSAN_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 67a1d86981a9..ae4f341c1db4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -31,6 +31,7 @@ #include #include #include +#include /* task_struct member predeclarations (sorted alphabetically): */ struct audit_context; @@ -1172,6 +1173,9 @@ struct task_struct { #ifdef CONFIG_KASAN unsigned int kasan_depth; #endif +#ifdef CONFIG_KCSAN + struct kcsan_ctx kcsan_ctx; +#endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* Index of current stored address in ret_stack: */ -- cgit v1.3 From c48981eeb0d56e107691df590007d6699441a689 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 14 Nov 2019 19:02:55 +0100 Subject: include/linux/compiler.h: Introduce data_race(expr) macro This introduces the data_race(expr) macro, which can be used to annotate expressions for purposes of (1) documenting, and (2) giving tooling such as KCSAN information about which data races are deemed "safe". More context: http://lkml.kernel.org/r/CAHk-=wg5CkOEF8DTez1Qu0XTEFw_oHhxN98bDnFqbY7HL5AB2g@mail.gmail.com Signed-off-by: Marco Elver Cc: Alan Stern Cc: Eric Dumazet Cc: Linus Torvalds Cc: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/compiler.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index c42fa83f23fb..7d3e77781578 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -310,6 +310,26 @@ unsigned long read_word_at_a_time(const void *addr) __u.__val; \ }) +#include + +/* + * data_race: macro to document that accesses in an expression may conflict with + * other concurrent accesses resulting in data races, but the resulting + * behaviour is deemed safe regardless. + * + * This macro *does not* affect normal code generation, but is a hint to tooling + * that data races here should be ignored. + */ +#define data_race(expr) \ + ({ \ + typeof(({ expr; })) __val; \ + kcsan_nestable_atomic_begin(); \ + __val = ({ expr; }); \ + kcsan_nestable_atomic_end(); \ + __val; \ + }) +#else + #endif /* __KERNEL__ */ /* -- cgit v1.3 From 88ecd153be9519f259b87a9f6f4c8383a8b3bbf1 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 14 Nov 2019 19:02:59 +0100 Subject: seqlock, kcsan: Add annotations for KCSAN Since seqlocks in the Linux kernel do not require the use of marked atomic accesses in critical sections, we teach KCSAN to assume such accesses are atomic. KCSAN currently also pretends that writes to `sequence` are atomic, although currently plain writes are used (their corresponding reads are READ_ONCE). Further, to avoid false positives in the absence of clear ending of a seqlock reader critical section (only when using the raw interface), KCSAN assumes a fixed number of accesses after start of a seqlock critical section are atomic. === Commentary on design around absence of clear begin/end markings === Seqlock usage via seqlock_t follows a predictable usage pattern, where clear critical section begin/end is enforced. With subtle special cases for readers needing to be flat atomic regions, e.g. because usage such as in: - fs/namespace.c:__legitimize_mnt - unbalanced read_seqretry - fs/dcache.c:d_walk - unbalanced need_seqretry But, anything directly accessing seqcount_t seems to be unpredictable. Filtering for usage of read_seqcount_retry not following 'do { .. } while (read_seqcount_retry(..));': $ git grep 'read_seqcount_retry' | grep -Ev 'while \(|seqlock.h|Doc|\* ' => about 1/3 of the total read_seqcount_retry usage. Just looking at fs/namei.c, we conclude that it is non-trivial to prescribe and migrate to an interface that would force clear begin/end seqlock markings for critical sections. As such, we concluded that the best design currently, is to simply ensure that KCSAN works well with the existing code. Signed-off-by: Marco Elver Acked-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/seqlock.h | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index bcf4cf26b8c8..61232bc223fd 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -37,8 +37,24 @@ #include #include #include +#include #include +/* + * The seqlock interface does not prescribe a precise sequence of read + * begin/retry/end. For readers, typically there is a call to + * read_seqcount_begin() and read_seqcount_retry(), however, there are more + * esoteric cases which do not follow this pattern. + * + * As a consequence, we take the following best-effort approach for raw usage + * via seqcount_t under KCSAN: upon beginning a seq-reader critical section, + * pessimistically mark then next KCSAN_SEQLOCK_REGION_MAX memory accesses as + * atomics; if there is a matching read_seqcount_retry() call, no following + * memory operations are considered atomic. Usage of seqlocks via seqlock_t + * interface is not affected. + */ +#define KCSAN_SEQLOCK_REGION_MAX 1000 + /* * Version using sequence counter only. * This can be used when code has its own mutex protecting the @@ -115,6 +131,7 @@ repeat: cpu_relax(); goto repeat; } + kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); return ret; } @@ -131,6 +148,7 @@ static inline unsigned raw_read_seqcount(const seqcount_t *s) { unsigned ret = READ_ONCE(s->sequence); smp_rmb(); + kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); return ret; } @@ -183,6 +201,7 @@ static inline unsigned raw_seqcount_begin(const seqcount_t *s) { unsigned ret = READ_ONCE(s->sequence); smp_rmb(); + kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); return ret & ~1; } @@ -202,7 +221,8 @@ static inline unsigned raw_seqcount_begin(const seqcount_t *s) */ static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start) { - return unlikely(s->sequence != start); + kcsan_atomic_next(0); + return unlikely(READ_ONCE(s->sequence) != start); } /** @@ -225,6 +245,7 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) static inline void raw_write_seqcount_begin(seqcount_t *s) { + kcsan_nestable_atomic_begin(); s->sequence++; smp_wmb(); } @@ -233,6 +254,7 @@ static inline void raw_write_seqcount_end(seqcount_t *s) { smp_wmb(); s->sequence++; + kcsan_nestable_atomic_end(); } /** @@ -271,9 +293,11 @@ static inline void raw_write_seqcount_end(seqcount_t *s) */ static inline void raw_write_seqcount_barrier(seqcount_t *s) { + kcsan_nestable_atomic_begin(); s->sequence++; smp_wmb(); s->sequence++; + kcsan_nestable_atomic_end(); } static inline int raw_read_seqcount_latch(seqcount_t *s) @@ -398,7 +422,9 @@ static inline void write_seqcount_end(seqcount_t *s) static inline void write_seqcount_invalidate(seqcount_t *s) { smp_wmb(); + kcsan_nestable_atomic_begin(); s->sequence+=2; + kcsan_nestable_atomic_end(); } typedef struct { @@ -430,11 +456,21 @@ typedef struct { */ static inline unsigned read_seqbegin(const seqlock_t *sl) { - return read_seqcount_begin(&sl->seqcount); + unsigned ret = read_seqcount_begin(&sl->seqcount); + + kcsan_atomic_next(0); /* non-raw usage, assume closing read_seqretry */ + kcsan_flat_atomic_begin(); + return ret; } static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) { + /* + * Assume not nested: read_seqretry may be called multiple times when + * completing read critical section. + */ + kcsan_flat_atomic_end(); + return read_seqcount_retry(&sl->seqcount, start); } -- cgit v1.3 From bf07132f96d426bcbf2098227fb680915cf44498 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 14 Nov 2019 19:03:00 +0100 Subject: seqlock: Require WRITE_ONCE surrounding raw_seqcount_barrier This patch proposes to require marked atomic accesses surrounding raw_write_seqcount_barrier. We reason that otherwise there is no way to guarantee propagation nor atomicity of writes before/after the barrier [1]. For example, consider the compiler tears stores either before or after the barrier; in this case, readers may observe a partial value, and because readers are unaware that writes are going on (writes are not in a seq-writer critical section), will complete the seq-reader critical section while having observed some partial state. [1] https://lwn.net/Articles/793253/ This came up when designing and implementing KCSAN, because KCSAN would flag these accesses as data-races. After careful analysis, our reasoning as above led us to conclude that the best thing to do is to propose an amendment to the raw_seqcount_barrier usage. Signed-off-by: Marco Elver Acked-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/seqlock.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 61232bc223fd..f52c91be8939 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -265,6 +265,13 @@ static inline void raw_write_seqcount_end(seqcount_t *s) * usual consistency guarantee. It is one wmb cheaper, because we can * collapse the two back-to-back wmb()s. * + * Note that, writes surrounding the barrier should be declared atomic (e.g. + * via WRITE_ONCE): a) to ensure the writes become visible to other threads + * atomically, avoiding compiler optimizations; b) to document which writes are + * meant to propagate to the reader critical section. This is necessary because + * neither writes before and after the barrier are enclosed in a seq-writer + * critical section that would ensure readers are aware of ongoing writes. + * * seqcount_t seq; * bool X = true, Y = false; * @@ -284,11 +291,11 @@ static inline void raw_write_seqcount_end(seqcount_t *s) * * void write(void) * { - * Y = true; + * WRITE_ONCE(Y, true); * * raw_write_seqcount_barrier(seq); * - * X = false; + * WRITE_ONCE(X, false); * } */ static inline void raw_write_seqcount_barrier(seqcount_t *s) -- cgit v1.3 From 5cbaefe9743bf14c9d3106db0cc19f8cb0a3ca22 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 20 Nov 2019 10:41:43 +0100 Subject: kcsan: Improve various small stylistic details Tidy up a few bits: - Fix typos and grammar, improve wording. - Remove spurious newlines that are col80 warning artifacts where the resulting line-break is worse than the disease it's curing. - Use core kernel coding style to improve readability and reduce spurious code pattern variations. - Use better vertical alignment for structure definitions and initialization sequences. - Misc other small details. No change in functionality intended. Cc: linux-kernel@vger.kernel.org Cc: Marco Elver Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner Cc: Paul E. McKenney Cc: Will Deacon Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- include/linux/compiler-clang.h | 2 +- include/linux/compiler.h | 2 +- include/linux/kcsan-checks.h | 22 ++++++--------- include/linux/kcsan.h | 23 ++++++---------- include/linux/seqlock.h | 8 +++--- kernel/kcsan/atomic.h | 2 +- kernel/kcsan/core.c | 59 ++++++++++++++++++---------------------- kernel/kcsan/debugfs.c | 62 ++++++++++++++++++++---------------------- kernel/kcsan/encoding.h | 25 +++++++++-------- kernel/kcsan/kcsan.h | 11 ++++---- kernel/kcsan/report.c | 42 ++++++++++++++-------------- kernel/kcsan/test.c | 6 ++-- kernel/sched/Makefile | 2 +- lib/Kconfig.kcsan | 16 +++++------ 15 files changed, 131 insertions(+), 153 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9933ca8ffe16..9cfa4a5c6f42 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -226,7 +226,7 @@ config X86 select VIRT_TO_BUS select X86_FEATURE_NAMES if PROC_FS select PROC_PID_ARCH_STATUS if PROC_FS - select HAVE_ARCH_KCSAN if X86_64 + select HAVE_ARCH_KCSAN if X86_64 config INSTRUCTION_DECODER def_bool y diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index a213eb55e725..2cb42d8bdedc 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -16,7 +16,7 @@ #define KASAN_ABI_VERSION 5 #if __has_feature(address_sanitizer) || __has_feature(hwaddress_sanitizer) -/* emulate gcc's __SANITIZE_ADDRESS__ flag */ +/* Emulate GCC's __SANITIZE_ADDRESS__ flag */ #define __SANITIZE_ADDRESS__ #define __no_sanitize_address \ __attribute__((no_sanitize("address", "hwaddress"))) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 7d3e77781578..ad8c76144a3c 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -313,7 +313,7 @@ unsigned long read_word_at_a_time(const void *addr) #include /* - * data_race: macro to document that accesses in an expression may conflict with + * data_race(): macro to document that accesses in an expression may conflict with * other concurrent accesses resulting in data races, but the resulting * behaviour is deemed safe regardless. * diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h index e78220661086..ef3ee233a3fa 100644 --- a/include/linux/kcsan-checks.h +++ b/include/linux/kcsan-checks.h @@ -8,17 +8,17 @@ /* * Access type modifiers. */ -#define KCSAN_ACCESS_WRITE 0x1 +#define KCSAN_ACCESS_WRITE 0x1 #define KCSAN_ACCESS_ATOMIC 0x2 /* - * __kcsan_*: Always calls into runtime when KCSAN is enabled. This may be used + * __kcsan_*: Always calls into the runtime when KCSAN is enabled. This may be used * even in compilation units that selectively disable KCSAN, but must use KCSAN - * to validate access to an address. Never use these in header files! + * to validate access to an address. Never use these in header files! */ #ifdef CONFIG_KCSAN /** - * __kcsan_check_access - check generic access for data race + * __kcsan_check_access - check generic access for data races * * @ptr address of access * @size size of access @@ -32,7 +32,7 @@ static inline void __kcsan_check_access(const volatile void *ptr, size_t size, #endif /* - * kcsan_*: Only calls into runtime when the particular compilation unit has + * kcsan_*: Only calls into the runtime when the particular compilation unit has * KCSAN instrumentation enabled. May be used in header files. */ #ifdef __SANITIZE_THREAD__ @@ -77,16 +77,12 @@ static inline void kcsan_check_access(const volatile void *ptr, size_t size, kcsan_check_access(ptr, size, KCSAN_ACCESS_WRITE) /* - * Check for atomic accesses: if atomic access are not ignored, this simply - * aliases to kcsan_check_access, otherwise becomes a no-op. + * Check for atomic accesses: if atomic accesses are not ignored, this simply + * aliases to kcsan_check_access(), otherwise becomes a no-op. */ #ifdef CONFIG_KCSAN_IGNORE_ATOMICS -#define kcsan_check_atomic_read(...) \ - do { \ - } while (0) -#define kcsan_check_atomic_write(...) \ - do { \ - } while (0) +#define kcsan_check_atomic_read(...) do { } while (0) +#define kcsan_check_atomic_write(...) do { } while (0) #else #define kcsan_check_atomic_read(ptr, size) \ kcsan_check_access(ptr, size, KCSAN_ACCESS_ATOMIC) diff --git a/include/linux/kcsan.h b/include/linux/kcsan.h index 9047048fee84..1019e3a2c689 100644 --- a/include/linux/kcsan.h +++ b/include/linux/kcsan.h @@ -94,21 +94,14 @@ void kcsan_atomic_next(int n); #else /* CONFIG_KCSAN */ -static inline void kcsan_init(void) { } - -static inline void kcsan_disable_current(void) { } - -static inline void kcsan_enable_current(void) { } - -static inline void kcsan_nestable_atomic_begin(void) { } - -static inline void kcsan_nestable_atomic_end(void) { } - -static inline void kcsan_flat_atomic_begin(void) { } - -static inline void kcsan_flat_atomic_end(void) { } - -static inline void kcsan_atomic_next(int n) { } +static inline void kcsan_init(void) { } +static inline void kcsan_disable_current(void) { } +static inline void kcsan_enable_current(void) { } +static inline void kcsan_nestable_atomic_begin(void) { } +static inline void kcsan_nestable_atomic_end(void) { } +static inline void kcsan_flat_atomic_begin(void) { } +static inline void kcsan_flat_atomic_end(void) { } +static inline void kcsan_atomic_next(int n) { } #endif /* CONFIG_KCSAN */ diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index f52c91be8939..f80d50cac199 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -48,7 +48,7 @@ * * As a consequence, we take the following best-effort approach for raw usage * via seqcount_t under KCSAN: upon beginning a seq-reader critical section, - * pessimistically mark then next KCSAN_SEQLOCK_REGION_MAX memory accesses as + * pessimistically mark the next KCSAN_SEQLOCK_REGION_MAX memory accesses as * atomics; if there is a matching read_seqcount_retry() call, no following * memory operations are considered atomic. Usage of seqlocks via seqlock_t * interface is not affected. @@ -265,7 +265,7 @@ static inline void raw_write_seqcount_end(seqcount_t *s) * usual consistency guarantee. It is one wmb cheaper, because we can * collapse the two back-to-back wmb()s. * - * Note that, writes surrounding the barrier should be declared atomic (e.g. + * Note that writes surrounding the barrier should be declared atomic (e.g. * via WRITE_ONCE): a) to ensure the writes become visible to other threads * atomically, avoiding compiler optimizations; b) to document which writes are * meant to propagate to the reader critical section. This is necessary because @@ -465,7 +465,7 @@ static inline unsigned read_seqbegin(const seqlock_t *sl) { unsigned ret = read_seqcount_begin(&sl->seqcount); - kcsan_atomic_next(0); /* non-raw usage, assume closing read_seqretry */ + kcsan_atomic_next(0); /* non-raw usage, assume closing read_seqretry() */ kcsan_flat_atomic_begin(); return ret; } @@ -473,7 +473,7 @@ static inline unsigned read_seqbegin(const seqlock_t *sl) static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) { /* - * Assume not nested: read_seqretry may be called multiple times when + * Assume not nested: read_seqretry() may be called multiple times when * completing read critical section. */ kcsan_flat_atomic_end(); diff --git a/kernel/kcsan/atomic.h b/kernel/kcsan/atomic.h index c9c3fe628011..576e03ddd6a3 100644 --- a/kernel/kcsan/atomic.h +++ b/kernel/kcsan/atomic.h @@ -6,7 +6,7 @@ #include /* - * Helper that returns true if access to ptr should be considered as an atomic + * Helper that returns true if access to @ptr should be considered an atomic * access, even though it is not explicitly atomic. * * List all volatile globals that have been observed in races, to suppress diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index d9410d58c93e..3314fc29e236 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -19,10 +19,10 @@ bool kcsan_enabled; /* Per-CPU kcsan_ctx for interrupts */ static DEFINE_PER_CPU(struct kcsan_ctx, kcsan_cpu_ctx) = { - .disable_count = 0, - .atomic_next = 0, - .atomic_nest_count = 0, - .in_flat_atomic = false, + .disable_count = 0, + .atomic_next = 0, + .atomic_nest_count = 0, + .in_flat_atomic = false, }; /* @@ -50,11 +50,11 @@ static DEFINE_PER_CPU(struct kcsan_ctx, kcsan_cpu_ctx) = { * slot=9: [10, 11, 9] * slot=63: [64, 65, 63] */ -#define NUM_SLOTS (1 + 2 * KCSAN_CHECK_ADJACENT) +#define NUM_SLOTS (1 + 2*KCSAN_CHECK_ADJACENT) #define SLOT_IDX(slot, i) (slot + ((i + KCSAN_CHECK_ADJACENT) % NUM_SLOTS)) /* - * SLOT_IDX_FAST is used in fast-path. Not first checking the address's primary + * SLOT_IDX_FAST is used in the fast-path. Not first checking the address's primary * slot (middle) is fine if we assume that data races occur rarely. The set of * indices {SLOT_IDX(slot, i) | i in [0, NUM_SLOTS)} is equivalent to * {SLOT_IDX_FAST(slot, i) | i in [0, NUM_SLOTS)}. @@ -68,9 +68,9 @@ static DEFINE_PER_CPU(struct kcsan_ctx, kcsan_cpu_ctx) = { * zero-initialized state matches INVALID_WATCHPOINT. * * Add NUM_SLOTS-1 entries to account for overflow; this helps avoid having to - * use more complicated SLOT_IDX_FAST calculation with modulo in fast-path. + * use more complicated SLOT_IDX_FAST calculation with modulo in the fast-path. */ -static atomic_long_t watchpoints[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS - 1]; +static atomic_long_t watchpoints[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1]; /* * Instructions to skip watching counter, used in should_watch(). We use a @@ -78,7 +78,8 @@ static atomic_long_t watchpoints[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS - 1]; */ static DEFINE_PER_CPU(long, kcsan_skip); -static inline atomic_long_t *find_watchpoint(unsigned long addr, size_t size, +static inline atomic_long_t *find_watchpoint(unsigned long addr, + size_t size, bool expect_write, long *encoded_watchpoint) { @@ -110,8 +111,8 @@ static inline atomic_long_t *find_watchpoint(unsigned long addr, size_t size, return NULL; } -static inline atomic_long_t *insert_watchpoint(unsigned long addr, size_t size, - bool is_write) +static inline atomic_long_t * +insert_watchpoint(unsigned long addr, size_t size, bool is_write) { const int slot = watchpoint_slot(addr); const long encoded_watchpoint = encode_watchpoint(addr, size, is_write); @@ -120,21 +121,16 @@ static inline atomic_long_t *insert_watchpoint(unsigned long addr, size_t size, /* Check slot index logic, ensuring we stay within array bounds. */ BUILD_BUG_ON(SLOT_IDX(0, 0) != KCSAN_CHECK_ADJACENT); - BUILD_BUG_ON(SLOT_IDX(0, KCSAN_CHECK_ADJACENT + 1) != 0); - BUILD_BUG_ON(SLOT_IDX(CONFIG_KCSAN_NUM_WATCHPOINTS - 1, - KCSAN_CHECK_ADJACENT) != - ARRAY_SIZE(watchpoints) - 1); - BUILD_BUG_ON(SLOT_IDX(CONFIG_KCSAN_NUM_WATCHPOINTS - 1, - KCSAN_CHECK_ADJACENT + 1) != - ARRAY_SIZE(watchpoints) - NUM_SLOTS); + BUILD_BUG_ON(SLOT_IDX(0, KCSAN_CHECK_ADJACENT+1) != 0); + BUILD_BUG_ON(SLOT_IDX(CONFIG_KCSAN_NUM_WATCHPOINTS-1, KCSAN_CHECK_ADJACENT) != ARRAY_SIZE(watchpoints)-1); + BUILD_BUG_ON(SLOT_IDX(CONFIG_KCSAN_NUM_WATCHPOINTS-1, KCSAN_CHECK_ADJACENT+1) != ARRAY_SIZE(watchpoints) - NUM_SLOTS); for (i = 0; i < NUM_SLOTS; ++i) { long expect_val = INVALID_WATCHPOINT; /* Try to acquire this slot. */ watchpoint = &watchpoints[SLOT_IDX(slot, i)]; - if (atomic_long_try_cmpxchg_relaxed(watchpoint, &expect_val, - encoded_watchpoint)) + if (atomic_long_try_cmpxchg_relaxed(watchpoint, &expect_val, encoded_watchpoint)) return watchpoint; } @@ -150,11 +146,10 @@ static inline atomic_long_t *insert_watchpoint(unsigned long addr, size_t size, * 2. the thread that set up the watchpoint already removed it; * 3. the watchpoint was removed and then re-used. */ -static inline bool try_consume_watchpoint(atomic_long_t *watchpoint, - long encoded_watchpoint) +static inline bool +try_consume_watchpoint(atomic_long_t *watchpoint, long encoded_watchpoint) { - return atomic_long_try_cmpxchg_relaxed(watchpoint, &encoded_watchpoint, - CONSUMED_WATCHPOINT); + return atomic_long_try_cmpxchg_relaxed(watchpoint, &encoded_watchpoint, CONSUMED_WATCHPOINT); } /* @@ -162,14 +157,13 @@ static inline bool try_consume_watchpoint(atomic_long_t *watchpoint, */ static inline bool remove_watchpoint(atomic_long_t *watchpoint) { - return atomic_long_xchg_relaxed(watchpoint, INVALID_WATCHPOINT) != - CONSUMED_WATCHPOINT; + return atomic_long_xchg_relaxed(watchpoint, INVALID_WATCHPOINT) != CONSUMED_WATCHPOINT; } static inline struct kcsan_ctx *get_ctx(void) { /* - * In interrupt, use raw_cpu_ptr to avoid unnecessary checks, that would + * In interrupts, use raw_cpu_ptr to avoid unnecessary checks, that would * also result in calls that generate warnings in uaccess regions. */ return in_task() ? ¤t->kcsan_ctx : raw_cpu_ptr(&kcsan_cpu_ctx); @@ -260,7 +254,8 @@ static inline unsigned int get_delay(void) */ static noinline void kcsan_found_watchpoint(const volatile void *ptr, - size_t size, bool is_write, + size_t size, + bool is_write, atomic_long_t *watchpoint, long encoded_watchpoint) { @@ -296,8 +291,8 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr, user_access_restore(flags); } -static noinline void kcsan_setup_watchpoint(const volatile void *ptr, - size_t size, bool is_write) +static noinline void +kcsan_setup_watchpoint(const volatile void *ptr, size_t size, bool is_write) { atomic_long_t *watchpoint; union { @@ -346,8 +341,8 @@ static noinline void kcsan_setup_watchpoint(const volatile void *ptr, watchpoint = insert_watchpoint((unsigned long)ptr, size, is_write); if (watchpoint == NULL) { /* - * Out of capacity: the size of `watchpoints`, and the frequency - * with which `should_watch()` returns true should be tweaked so + * Out of capacity: the size of 'watchpoints', and the frequency + * with which should_watch() returns true should be tweaked so * that this case happens very rarely. */ kcsan_counter_inc(KCSAN_COUNTER_NO_CAPACITY); diff --git a/kernel/kcsan/debugfs.c b/kernel/kcsan/debugfs.c index 041d520a0183..bec42dab32ee 100644 --- a/kernel/kcsan/debugfs.c +++ b/kernel/kcsan/debugfs.c @@ -24,39 +24,31 @@ static atomic_long_t counters[KCSAN_COUNTER_COUNT]; * whitelist or blacklist. */ static struct { - unsigned long *addrs; /* array of addresses */ - size_t size; /* current size */ - int used; /* number of elements used */ - bool sorted; /* if elements are sorted */ - bool whitelist; /* if list is a blacklist or whitelist */ + unsigned long *addrs; /* array of addresses */ + size_t size; /* current size */ + int used; /* number of elements used */ + bool sorted; /* if elements are sorted */ + bool whitelist; /* if list is a blacklist or whitelist */ } report_filterlist = { - .addrs = NULL, - .size = 8, /* small initial size */ - .used = 0, - .sorted = false, - .whitelist = false, /* default is blacklist */ + .addrs = NULL, + .size = 8, /* small initial size */ + .used = 0, + .sorted = false, + .whitelist = false, /* default is blacklist */ }; static DEFINE_SPINLOCK(report_filterlist_lock); static const char *counter_to_name(enum kcsan_counter_id id) { switch (id) { - case KCSAN_COUNTER_USED_WATCHPOINTS: - return "used_watchpoints"; - case KCSAN_COUNTER_SETUP_WATCHPOINTS: - return "setup_watchpoints"; - case KCSAN_COUNTER_DATA_RACES: - return "data_races"; - case KCSAN_COUNTER_NO_CAPACITY: - return "no_capacity"; - case KCSAN_COUNTER_REPORT_RACES: - return "report_races"; - case KCSAN_COUNTER_RACES_UNKNOWN_ORIGIN: - return "races_unknown_origin"; - case KCSAN_COUNTER_UNENCODABLE_ACCESSES: - return "unencodable_accesses"; - case KCSAN_COUNTER_ENCODING_FALSE_POSITIVES: - return "encoding_false_positives"; + case KCSAN_COUNTER_USED_WATCHPOINTS: return "used_watchpoints"; + case KCSAN_COUNTER_SETUP_WATCHPOINTS: return "setup_watchpoints"; + case KCSAN_COUNTER_DATA_RACES: return "data_races"; + case KCSAN_COUNTER_NO_CAPACITY: return "no_capacity"; + case KCSAN_COUNTER_REPORT_RACES: return "report_races"; + case KCSAN_COUNTER_RACES_UNKNOWN_ORIGIN: return "races_unknown_origin"; + case KCSAN_COUNTER_UNENCODABLE_ACCESSES: return "unencodable_accesses"; + case KCSAN_COUNTER_ENCODING_FALSE_POSITIVES: return "encoding_false_positives"; case KCSAN_COUNTER_COUNT: BUG(); } @@ -116,7 +108,7 @@ bool kcsan_skip_report_debugfs(unsigned long func_addr) if (!kallsyms_lookup_size_offset(func_addr, &symbolsize, &offset)) return false; - func_addr -= offset; /* get function start */ + func_addr -= offset; /* Get function start */ spin_lock_irqsave(&report_filterlist_lock, flags); if (report_filterlist.used == 0) @@ -195,6 +187,7 @@ static ssize_t insert_report_filterlist(const char *func) out: spin_unlock_irqrestore(&report_filterlist_lock, flags); + return ret; } @@ -226,8 +219,8 @@ static int debugfs_open(struct inode *inode, struct file *file) return single_open(file, show_info, NULL); } -static ssize_t debugfs_write(struct file *file, const char __user *buf, - size_t count, loff_t *off) +static ssize_t +debugfs_write(struct file *file, const char __user *buf, size_t count, loff_t *off) { char kbuf[KSYM_NAME_LEN]; char *arg; @@ -264,10 +257,13 @@ static ssize_t debugfs_write(struct file *file, const char __user *buf, return count; } -static const struct file_operations debugfs_ops = { .read = seq_read, - .open = debugfs_open, - .write = debugfs_write, - .release = single_release }; +static const struct file_operations debugfs_ops = +{ + .read = seq_read, + .open = debugfs_open, + .write = debugfs_write, + .release = single_release +}; void __init kcsan_debugfs_init(void) { diff --git a/kernel/kcsan/encoding.h b/kernel/kcsan/encoding.h index e17bdac0e54b..b63890e86449 100644 --- a/kernel/kcsan/encoding.h +++ b/kernel/kcsan/encoding.h @@ -10,7 +10,8 @@ #include "kcsan.h" #define SLOT_RANGE PAGE_SIZE -#define INVALID_WATCHPOINT 0 + +#define INVALID_WATCHPOINT 0 #define CONSUMED_WATCHPOINT 1 /* @@ -34,24 +35,24 @@ * Both these are assumed to be very unlikely. However, in case it still happens * happens, the report logic will filter out the false positive (see report.c). */ -#define WATCHPOINT_ADDR_BITS (BITS_PER_LONG - 1 - WATCHPOINT_SIZE_BITS) +#define WATCHPOINT_ADDR_BITS (BITS_PER_LONG-1 - WATCHPOINT_SIZE_BITS) /* * Masks to set/retrieve the encoded data. */ -#define WATCHPOINT_WRITE_MASK BIT(BITS_PER_LONG - 1) +#define WATCHPOINT_WRITE_MASK BIT(BITS_PER_LONG-1) #define WATCHPOINT_SIZE_MASK \ - GENMASK(BITS_PER_LONG - 2, BITS_PER_LONG - 2 - WATCHPOINT_SIZE_BITS) + GENMASK(BITS_PER_LONG-2, BITS_PER_LONG-2 - WATCHPOINT_SIZE_BITS) #define WATCHPOINT_ADDR_MASK \ - GENMASK(BITS_PER_LONG - 3 - WATCHPOINT_SIZE_BITS, 0) + GENMASK(BITS_PER_LONG-3 - WATCHPOINT_SIZE_BITS, 0) static inline bool check_encodable(unsigned long addr, size_t size) { return size <= MAX_ENCODABLE_SIZE; } -static inline long encode_watchpoint(unsigned long addr, size_t size, - bool is_write) +static inline long +encode_watchpoint(unsigned long addr, size_t size, bool is_write) { return (long)((is_write ? WATCHPOINT_WRITE_MASK : 0) | (size << WATCHPOINT_ADDR_BITS) | @@ -59,17 +60,17 @@ static inline long encode_watchpoint(unsigned long addr, size_t size, } static inline bool decode_watchpoint(long watchpoint, - unsigned long *addr_masked, size_t *size, + unsigned long *addr_masked, + size_t *size, bool *is_write) { if (watchpoint == INVALID_WATCHPOINT || watchpoint == CONSUMED_WATCHPOINT) return false; - *addr_masked = (unsigned long)watchpoint & WATCHPOINT_ADDR_MASK; - *size = ((unsigned long)watchpoint & WATCHPOINT_SIZE_MASK) >> - WATCHPOINT_ADDR_BITS; - *is_write = !!((unsigned long)watchpoint & WATCHPOINT_WRITE_MASK); + *addr_masked = (unsigned long)watchpoint & WATCHPOINT_ADDR_MASK; + *size = ((unsigned long)watchpoint & WATCHPOINT_SIZE_MASK) >> WATCHPOINT_ADDR_BITS; + *is_write = !!((unsigned long)watchpoint & WATCHPOINT_WRITE_MASK); return true; } diff --git a/kernel/kcsan/kcsan.h b/kernel/kcsan/kcsan.h index 1bb2f1c0d61e..d3b9a96ac8a4 100644 --- a/kernel/kcsan/kcsan.h +++ b/kernel/kcsan/kcsan.h @@ -72,14 +72,14 @@ enum kcsan_counter_id { /* * Increment/decrement counter with given id; avoid calling these in fast-path. */ -void kcsan_counter_inc(enum kcsan_counter_id id); -void kcsan_counter_dec(enum kcsan_counter_id id); +extern void kcsan_counter_inc(enum kcsan_counter_id id); +extern void kcsan_counter_dec(enum kcsan_counter_id id); /* * Returns true if data races in the function symbol that maps to func_addr * (offsets are ignored) should *not* be reported. */ -bool kcsan_skip_report_debugfs(unsigned long func_addr); +extern bool kcsan_skip_report_debugfs(unsigned long func_addr); enum kcsan_report_type { /* @@ -99,10 +99,11 @@ enum kcsan_report_type { */ KCSAN_REPORT_RACE_UNKNOWN_ORIGIN, }; + /* * Print a race report from thread that encountered the race. */ -void kcsan_report(const volatile void *ptr, size_t size, bool is_write, - bool value_change, int cpu_id, enum kcsan_report_type type); +extern void kcsan_report(const volatile void *ptr, size_t size, bool is_write, + bool value_change, int cpu_id, enum kcsan_report_type type); #endif /* _KERNEL_KCSAN_KCSAN_H */ diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index ead5610bafa7..0eea05a3135b 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -22,13 +22,13 @@ * the reports, with reporting being in the slow-path. */ static struct { - const volatile void *ptr; - size_t size; - bool is_write; - int task_pid; - int cpu_id; - unsigned long stack_entries[NUM_STACK_ENTRIES]; - int num_stack_entries; + const volatile void *ptr; + size_t size; + bool is_write; + int task_pid; + int cpu_id; + unsigned long stack_entries[NUM_STACK_ENTRIES]; + int num_stack_entries; } other_info = { .ptr = NULL }; /* @@ -40,8 +40,8 @@ static DEFINE_SPINLOCK(report_lock); /* * Special rules to skip reporting. */ -static bool skip_report(bool is_write, bool value_change, - unsigned long top_frame) +static bool +skip_report(bool is_write, bool value_change, unsigned long top_frame) { if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY) && is_write && !value_change) { @@ -105,6 +105,7 @@ static int sym_strcmp(void *addr1, void *addr2) snprintf(buf1, sizeof(buf1), "%pS", addr1); snprintf(buf2, sizeof(buf2), "%pS", addr2); + return strncmp(buf1, buf2, sizeof(buf1)); } @@ -116,8 +117,7 @@ static bool print_report(const volatile void *ptr, size_t size, bool is_write, enum kcsan_report_type type) { unsigned long stack_entries[NUM_STACK_ENTRIES] = { 0 }; - int num_stack_entries = - stack_trace_save(stack_entries, NUM_STACK_ENTRIES, 1); + int num_stack_entries = stack_trace_save(stack_entries, NUM_STACK_ENTRIES, 1); int skipnr = get_stack_skipnr(stack_entries, num_stack_entries); int other_skipnr; @@ -131,7 +131,7 @@ static bool print_report(const volatile void *ptr, size_t size, bool is_write, other_skipnr = get_stack_skipnr(other_info.stack_entries, other_info.num_stack_entries); - /* value_change is only known for the other thread */ + /* @value_change is only known for the other thread */ if (skip_report(other_info.is_write, value_change, other_info.stack_entries[other_skipnr])) return false; @@ -241,13 +241,12 @@ retry: if (other_info.ptr != NULL) break; /* still in use, retry */ - other_info.ptr = ptr; - other_info.size = size; - other_info.is_write = is_write; - other_info.task_pid = in_task() ? task_pid_nr(current) : -1; - other_info.cpu_id = cpu_id; - other_info.num_stack_entries = stack_trace_save( - other_info.stack_entries, NUM_STACK_ENTRIES, 1); + other_info.ptr = ptr; + other_info.size = size; + other_info.is_write = is_write; + other_info.task_pid = in_task() ? task_pid_nr(current) : -1; + other_info.cpu_id = cpu_id; + other_info.num_stack_entries = stack_trace_save(other_info.stack_entries, NUM_STACK_ENTRIES, 1); spin_unlock_irqrestore(&report_lock, *flags); @@ -299,6 +298,7 @@ retry: } spin_unlock_irqrestore(&report_lock, *flags); + goto retry; } @@ -309,9 +309,7 @@ void kcsan_report(const volatile void *ptr, size_t size, bool is_write, kcsan_disable_current(); if (prepare_report(&flags, ptr, size, is_write, cpu_id, type)) { - if (print_report(ptr, size, is_write, value_change, cpu_id, - type) && - panic_on_warn) + if (print_report(ptr, size, is_write, value_change, cpu_id, type) && panic_on_warn) panic("panic_on_warn set ...\n"); release_report(&flags, type); diff --git a/kernel/kcsan/test.c b/kernel/kcsan/test.c index 0bae63c5ca65..cc6000239dc0 100644 --- a/kernel/kcsan/test.c +++ b/kernel/kcsan/test.c @@ -34,7 +34,7 @@ static bool test_encode_decode(void) if (WARN_ON(!check_encodable(addr, size))) return false; - /* encode and decode */ + /* Encode and decode */ { const long encoded_watchpoint = encode_watchpoint(addr, size, is_write); @@ -42,7 +42,7 @@ static bool test_encode_decode(void) size_t verif_size; bool verif_is_write; - /* check special watchpoints */ + /* Check special watchpoints */ if (WARN_ON(decode_watchpoint( INVALID_WATCHPOINT, &verif_masked_addr, &verif_size, &verif_is_write))) @@ -52,7 +52,7 @@ static bool test_encode_decode(void) &verif_size, &verif_is_write))) return false; - /* check decoding watchpoint returns same data */ + /* Check decoding watchpoint returns same data */ if (WARN_ON(!decode_watchpoint( encoded_watchpoint, &verif_masked_addr, &verif_size, &verif_is_write))) diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index e9307a9c54e7..5fc9c9b70862 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -7,7 +7,7 @@ endif # that is not a function of syscall inputs. E.g. involuntary context switches. KCOV_INSTRUMENT := n -# There are numerous races here, however, most of them due to plain accesses. +# There are numerous data races here, however, most of them are due to plain accesses. # This would make it even harder for syzbot to find reproducers, because these # bugs trigger without specific input. Disable by default, but should re-enable # eventually. diff --git a/lib/Kconfig.kcsan b/lib/Kconfig.kcsan index 5dd464e52ab4..3f78b1434375 100644 --- a/lib/Kconfig.kcsan +++ b/lib/Kconfig.kcsan @@ -6,7 +6,6 @@ config HAVE_ARCH_KCSAN menuconfig KCSAN bool "KCSAN: watchpoint-based dynamic data race detector" depends on HAVE_ARCH_KCSAN && !KASAN && STACKTRACE - default n help Kernel Concurrency Sanitizer is a dynamic data race detector, which uses a watchpoint-based sampling approach to detect races. See @@ -16,13 +15,12 @@ if KCSAN config KCSAN_DEBUG bool "Debugging of KCSAN internals" - default n config KCSAN_SELFTEST bool "Perform short selftests on boot" default y help - Run KCSAN selftests on boot. On test failure, causes kernel to panic. + Run KCSAN selftests on boot. On test failure, causes the kernel to panic. config KCSAN_EARLY_ENABLE bool "Early enable during boot" @@ -62,7 +60,8 @@ config KCSAN_DELAY_RANDOMIZE default y help If delays should be randomized, where the maximum is KCSAN_UDELAY_*. - If false, the chosen delays are always KCSAN_UDELAY_* defined above. + If false, the chosen delays are always the KCSAN_UDELAY_* values + as defined above. config KCSAN_SKIP_WATCH int "Skip instructions before setting up watchpoint" @@ -86,9 +85,9 @@ config KCSAN_SKIP_WATCH_RANDOMIZE # parameters, to optimize for the common use-case, we avoid this because: (a) # it would impact performance (and we want to avoid static branch for all # {READ,WRITE}_ONCE, atomic_*, bitops, etc.), and (b) complicate the design -# without real benefit. The main purpose of the below options are for use in -# fuzzer configs to control reported data races, and are not expected to be -# switched frequently by a user. +# without real benefit. The main purpose of the below options is for use in +# fuzzer configs to control reported data races, and they are not expected +# to be switched frequently by a user. config KCSAN_REPORT_RACE_UNKNOWN_ORIGIN bool "Report races of unknown origin" @@ -103,13 +102,12 @@ config KCSAN_REPORT_VALUE_CHANGE_ONLY bool "Only report races where watcher observed a data value change" default y help - If enabled and a conflicting write is observed via watchpoint, but + If enabled and a conflicting write is observed via a watchpoint, but the data value of the memory location was observed to remain unchanged, do not report the data race. config KCSAN_IGNORE_ATOMICS bool "Do not instrument marked atomic accesses" - default n help If enabled, never instruments marked atomic accesses. This results in not reporting data races where one access is atomic and the other is -- cgit v1.3 From 944bc9cca7c392879fa2c3f911bbef7422707679 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 26 Nov 2019 15:04:05 +0100 Subject: asm-generic/atomic: Use __always_inline for fallback wrappers Use __always_inline for atomic fallback wrappers. When building for size (CC_OPTIMIZE_FOR_SIZE), some compilers appear to be less inclined to inline even relatively small static inline functions that are assumed to be inlinable such as atomic ops. This can cause problems, for example in UACCESS regions. While the fallback wrappers aren't pure wrappers, they are trivial nonetheless, and the function they wrap should determine the final inlining policy. For x86 tinyconfig we observe: - vmlinux baseline: 1315988 - vmlinux with patch: 1315928 (-60 bytes) Suggested-by: Mark Rutland Signed-off-by: Marco Elver Acked-by: Mark Rutland Signed-off-by: Paul E. McKenney --- include/linux/atomic-fallback.h | 340 ++++++++++++++------------- scripts/atomic/fallbacks/acquire | 2 +- scripts/atomic/fallbacks/add_negative | 2 +- scripts/atomic/fallbacks/add_unless | 2 +- scripts/atomic/fallbacks/andnot | 2 +- scripts/atomic/fallbacks/dec | 2 +- scripts/atomic/fallbacks/dec_and_test | 2 +- scripts/atomic/fallbacks/dec_if_positive | 2 +- scripts/atomic/fallbacks/dec_unless_positive | 2 +- scripts/atomic/fallbacks/fence | 2 +- scripts/atomic/fallbacks/fetch_add_unless | 2 +- scripts/atomic/fallbacks/inc | 2 +- scripts/atomic/fallbacks/inc_and_test | 2 +- scripts/atomic/fallbacks/inc_not_zero | 2 +- scripts/atomic/fallbacks/inc_unless_negative | 2 +- scripts/atomic/fallbacks/read_acquire | 2 +- scripts/atomic/fallbacks/release | 2 +- scripts/atomic/fallbacks/set_release | 2 +- scripts/atomic/fallbacks/sub_and_test | 2 +- scripts/atomic/fallbacks/try_cmpxchg | 2 +- scripts/atomic/gen-atomic-fallback.sh | 2 + 21 files changed, 192 insertions(+), 188 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atomic-fallback.h b/include/linux/atomic-fallback.h index a7d240e465c0..656b5489b673 100644 --- a/include/linux/atomic-fallback.h +++ b/include/linux/atomic-fallback.h @@ -6,6 +6,8 @@ #ifndef _LINUX_ATOMIC_FALLBACK_H #define _LINUX_ATOMIC_FALLBACK_H +#include + #ifndef xchg_relaxed #define xchg_relaxed xchg #define xchg_acquire xchg @@ -76,7 +78,7 @@ #endif /* cmpxchg64_relaxed */ #ifndef atomic_read_acquire -static inline int +static __always_inline int atomic_read_acquire(const atomic_t *v) { return smp_load_acquire(&(v)->counter); @@ -85,7 +87,7 @@ atomic_read_acquire(const atomic_t *v) #endif #ifndef atomic_set_release -static inline void +static __always_inline void atomic_set_release(atomic_t *v, int i) { smp_store_release(&(v)->counter, i); @@ -100,7 +102,7 @@ atomic_set_release(atomic_t *v, int i) #else /* atomic_add_return_relaxed */ #ifndef atomic_add_return_acquire -static inline int +static __always_inline int atomic_add_return_acquire(int i, atomic_t *v) { int ret = atomic_add_return_relaxed(i, v); @@ -111,7 +113,7 @@ atomic_add_return_acquire(int i, atomic_t *v) #endif #ifndef atomic_add_return_release -static inline int +static __always_inline int atomic_add_return_release(int i, atomic_t *v) { __atomic_release_fence(); @@ -121,7 +123,7 @@ atomic_add_return_release(int i, atomic_t *v) #endif #ifndef atomic_add_return -static inline int +static __always_inline int atomic_add_return(int i, atomic_t *v) { int ret; @@ -142,7 +144,7 @@ atomic_add_return(int i, atomic_t *v) #else /* atomic_fetch_add_relaxed */ #ifndef atomic_fetch_add_acquire -static inline int +static __always_inline int atomic_fetch_add_acquire(int i, atomic_t *v) { int ret = atomic_fetch_add_relaxed(i, v); @@ -153,7 +155,7 @@ atomic_fetch_add_acquire(int i, atomic_t *v) #endif #ifndef atomic_fetch_add_release -static inline int +static __always_inline int atomic_fetch_add_release(int i, atomic_t *v) { __atomic_release_fence(); @@ -163,7 +165,7 @@ atomic_fetch_add_release(int i, atomic_t *v) #endif #ifndef atomic_fetch_add -static inline int +static __always_inline int atomic_fetch_add(int i, atomic_t *v) { int ret; @@ -184,7 +186,7 @@ atomic_fetch_add(int i, atomic_t *v) #else /* atomic_sub_return_relaxed */ #ifndef atomic_sub_return_acquire -static inline int +static __always_inline int atomic_sub_return_acquire(int i, atomic_t *v) { int ret = atomic_sub_return_relaxed(i, v); @@ -195,7 +197,7 @@ atomic_sub_return_acquire(int i, atomic_t *v) #endif #ifndef atomic_sub_return_release -static inline int +static __always_inline int atomic_sub_return_release(int i, atomic_t *v) { __atomic_release_fence(); @@ -205,7 +207,7 @@ atomic_sub_return_release(int i, atomic_t *v) #endif #ifndef atomic_sub_return -static inline int +static __always_inline int atomic_sub_return(int i, atomic_t *v) { int ret; @@ -226,7 +228,7 @@ atomic_sub_return(int i, atomic_t *v) #else /* atomic_fetch_sub_relaxed */ #ifndef atomic_fetch_sub_acquire -static inline int +static __always_inline int atomic_fetch_sub_acquire(int i, atomic_t *v) { int ret = atomic_fetch_sub_relaxed(i, v); @@ -237,7 +239,7 @@ atomic_fetch_sub_acquire(int i, atomic_t *v) #endif #ifndef atomic_fetch_sub_release -static inline int +static __always_inline int atomic_fetch_sub_release(int i, atomic_t *v) { __atomic_release_fence(); @@ -247,7 +249,7 @@ atomic_fetch_sub_release(int i, atomic_t *v) #endif #ifndef atomic_fetch_sub -static inline int +static __always_inline int atomic_fetch_sub(int i, atomic_t *v) { int ret; @@ -262,7 +264,7 @@ atomic_fetch_sub(int i, atomic_t *v) #endif /* atomic_fetch_sub_relaxed */ #ifndef atomic_inc -static inline void +static __always_inline void atomic_inc(atomic_t *v) { atomic_add(1, v); @@ -278,7 +280,7 @@ atomic_inc(atomic_t *v) #endif /* atomic_inc_return */ #ifndef atomic_inc_return -static inline int +static __always_inline int atomic_inc_return(atomic_t *v) { return atomic_add_return(1, v); @@ -287,7 +289,7 @@ atomic_inc_return(atomic_t *v) #endif #ifndef atomic_inc_return_acquire -static inline int +static __always_inline int atomic_inc_return_acquire(atomic_t *v) { return atomic_add_return_acquire(1, v); @@ -296,7 +298,7 @@ atomic_inc_return_acquire(atomic_t *v) #endif #ifndef atomic_inc_return_release -static inline int +static __always_inline int atomic_inc_return_release(atomic_t *v) { return atomic_add_return_release(1, v); @@ -305,7 +307,7 @@ atomic_inc_return_release(atomic_t *v) #endif #ifndef atomic_inc_return_relaxed -static inline int +static __always_inline int atomic_inc_return_relaxed(atomic_t *v) { return atomic_add_return_relaxed(1, v); @@ -316,7 +318,7 @@ atomic_inc_return_relaxed(atomic_t *v) #else /* atomic_inc_return_relaxed */ #ifndef atomic_inc_return_acquire -static inline int +static __always_inline int atomic_inc_return_acquire(atomic_t *v) { int ret = atomic_inc_return_relaxed(v); @@ -327,7 +329,7 @@ atomic_inc_return_acquire(atomic_t *v) #endif #ifndef atomic_inc_return_release -static inline int +static __always_inline int atomic_inc_return_release(atomic_t *v) { __atomic_release_fence(); @@ -337,7 +339,7 @@ atomic_inc_return_release(atomic_t *v) #endif #ifndef atomic_inc_return -static inline int +static __always_inline int atomic_inc_return(atomic_t *v) { int ret; @@ -359,7 +361,7 @@ atomic_inc_return(atomic_t *v) #endif /* atomic_fetch_inc */ #ifndef atomic_fetch_inc -static inline int +static __always_inline int atomic_fetch_inc(atomic_t *v) { return atomic_fetch_add(1, v); @@ -368,7 +370,7 @@ atomic_fetch_inc(atomic_t *v) #endif #ifndef atomic_fetch_inc_acquire -static inline int +static __always_inline int atomic_fetch_inc_acquire(atomic_t *v) { return atomic_fetch_add_acquire(1, v); @@ -377,7 +379,7 @@ atomic_fetch_inc_acquire(atomic_t *v) #endif #ifndef atomic_fetch_inc_release -static inline int +static __always_inline int atomic_fetch_inc_release(atomic_t *v) { return atomic_fetch_add_release(1, v); @@ -386,7 +388,7 @@ atomic_fetch_inc_release(atomic_t *v) #endif #ifndef atomic_fetch_inc_relaxed -static inline int +static __always_inline int atomic_fetch_inc_relaxed(atomic_t *v) { return atomic_fetch_add_relaxed(1, v); @@ -397,7 +399,7 @@ atomic_fetch_inc_relaxed(atomic_t *v) #else /* atomic_fetch_inc_relaxed */ #ifndef atomic_fetch_inc_acquire -static inline int +static __always_inline int atomic_fetch_inc_acquire(atomic_t *v) { int ret = atomic_fetch_inc_relaxed(v); @@ -408,7 +410,7 @@ atomic_fetch_inc_acquire(atomic_t *v) #endif #ifndef atomic_fetch_inc_release -static inline int +static __always_inline int atomic_fetch_inc_release(atomic_t *v) { __atomic_release_fence(); @@ -418,7 +420,7 @@ atomic_fetch_inc_release(atomic_t *v) #endif #ifndef atomic_fetch_inc -static inline int +static __always_inline int atomic_fetch_inc(atomic_t *v) { int ret; @@ -433,7 +435,7 @@ atomic_fetch_inc(atomic_t *v) #endif /* atomic_fetch_inc_relaxed */ #ifndef atomic_dec -static inline void +static __always_inline void atomic_dec(atomic_t *v) { atomic_sub(1, v); @@ -449,7 +451,7 @@ atomic_dec(atomic_t *v) #endif /* atomic_dec_return */ #ifndef atomic_dec_return -static inline int +static __always_inline int atomic_dec_return(atomic_t *v) { return atomic_sub_return(1, v); @@ -458,7 +460,7 @@ atomic_dec_return(atomic_t *v) #endif #ifndef atomic_dec_return_acquire -static inline int +static __always_inline int atomic_dec_return_acquire(atomic_t *v) { return atomic_sub_return_acquire(1, v); @@ -467,7 +469,7 @@ atomic_dec_return_acquire(atomic_t *v) #endif #ifndef atomic_dec_return_release -static inline int +static __always_inline int atomic_dec_return_release(atomic_t *v) { return atomic_sub_return_release(1, v); @@ -476,7 +478,7 @@ atomic_dec_return_release(atomic_t *v) #endif #ifndef atomic_dec_return_relaxed -static inline int +static __always_inline int atomic_dec_return_relaxed(atomic_t *v) { return atomic_sub_return_relaxed(1, v); @@ -487,7 +489,7 @@ atomic_dec_return_relaxed(atomic_t *v) #else /* atomic_dec_return_relaxed */ #ifndef atomic_dec_return_acquire -static inline int +static __always_inline int atomic_dec_return_acquire(atomic_t *v) { int ret = atomic_dec_return_relaxed(v); @@ -498,7 +500,7 @@ atomic_dec_return_acquire(atomic_t *v) #endif #ifndef atomic_dec_return_release -static inline int +static __always_inline int atomic_dec_return_release(atomic_t *v) { __atomic_release_fence(); @@ -508,7 +510,7 @@ atomic_dec_return_release(atomic_t *v) #endif #ifndef atomic_dec_return -static inline int +static __always_inline int atomic_dec_return(atomic_t *v) { int ret; @@ -530,7 +532,7 @@ atomic_dec_return(atomic_t *v) #endif /* atomic_fetch_dec */ #ifndef atomic_fetch_dec -static inline int +static __always_inline int atomic_fetch_dec(atomic_t *v) { return atomic_fetch_sub(1, v); @@ -539,7 +541,7 @@ atomic_fetch_dec(atomic_t *v) #endif #ifndef atomic_fetch_dec_acquire -static inline int +static __always_inline int atomic_fetch_dec_acquire(atomic_t *v) { return atomic_fetch_sub_acquire(1, v); @@ -548,7 +550,7 @@ atomic_fetch_dec_acquire(atomic_t *v) #endif #ifndef atomic_fetch_dec_release -static inline int +static __always_inline int atomic_fetch_dec_release(atomic_t *v) { return atomic_fetch_sub_release(1, v); @@ -557,7 +559,7 @@ atomic_fetch_dec_release(atomic_t *v) #endif #ifndef atomic_fetch_dec_relaxed -static inline int +static __always_inline int atomic_fetch_dec_relaxed(atomic_t *v) { return atomic_fetch_sub_relaxed(1, v); @@ -568,7 +570,7 @@ atomic_fetch_dec_relaxed(atomic_t *v) #else /* atomic_fetch_dec_relaxed */ #ifndef atomic_fetch_dec_acquire -static inline int +static __always_inline int atomic_fetch_dec_acquire(atomic_t *v) { int ret = atomic_fetch_dec_relaxed(v); @@ -579,7 +581,7 @@ atomic_fetch_dec_acquire(atomic_t *v) #endif #ifndef atomic_fetch_dec_release -static inline int +static __always_inline int atomic_fetch_dec_release(atomic_t *v) { __atomic_release_fence(); @@ -589,7 +591,7 @@ atomic_fetch_dec_release(atomic_t *v) #endif #ifndef atomic_fetch_dec -static inline int +static __always_inline int atomic_fetch_dec(atomic_t *v) { int ret; @@ -610,7 +612,7 @@ atomic_fetch_dec(atomic_t *v) #else /* atomic_fetch_and_relaxed */ #ifndef atomic_fetch_and_acquire -static inline int +static __always_inline int atomic_fetch_and_acquire(int i, atomic_t *v) { int ret = atomic_fetch_and_relaxed(i, v); @@ -621,7 +623,7 @@ atomic_fetch_and_acquire(int i, atomic_t *v) #endif #ifndef atomic_fetch_and_release -static inline int +static __always_inline int atomic_fetch_and_release(int i, atomic_t *v) { __atomic_release_fence(); @@ -631,7 +633,7 @@ atomic_fetch_and_release(int i, atomic_t *v) #endif #ifndef atomic_fetch_and -static inline int +static __always_inline int atomic_fetch_and(int i, atomic_t *v) { int ret; @@ -646,7 +648,7 @@ atomic_fetch_and(int i, atomic_t *v) #endif /* atomic_fetch_and_relaxed */ #ifndef atomic_andnot -static inline void +static __always_inline void atomic_andnot(int i, atomic_t *v) { atomic_and(~i, v); @@ -662,7 +664,7 @@ atomic_andnot(int i, atomic_t *v) #endif /* atomic_fetch_andnot */ #ifndef atomic_fetch_andnot -static inline int +static __always_inline int atomic_fetch_andnot(int i, atomic_t *v) { return atomic_fetch_and(~i, v); @@ -671,7 +673,7 @@ atomic_fetch_andnot(int i, atomic_t *v) #endif #ifndef atomic_fetch_andnot_acquire -static inline int +static __always_inline int atomic_fetch_andnot_acquire(int i, atomic_t *v) { return atomic_fetch_and_acquire(~i, v); @@ -680,7 +682,7 @@ atomic_fetch_andnot_acquire(int i, atomic_t *v) #endif #ifndef atomic_fetch_andnot_release -static inline int +static __always_inline int atomic_fetch_andnot_release(int i, atomic_t *v) { return atomic_fetch_and_release(~i, v); @@ -689,7 +691,7 @@ atomic_fetch_andnot_release(int i, atomic_t *v) #endif #ifndef atomic_fetch_andnot_relaxed -static inline int +static __always_inline int atomic_fetch_andnot_relaxed(int i, atomic_t *v) { return atomic_fetch_and_relaxed(~i, v); @@ -700,7 +702,7 @@ atomic_fetch_andnot_relaxed(int i, atomic_t *v) #else /* atomic_fetch_andnot_relaxed */ #ifndef atomic_fetch_andnot_acquire -static inline int +static __always_inline int atomic_fetch_andnot_acquire(int i, atomic_t *v) { int ret = atomic_fetch_andnot_relaxed(i, v); @@ -711,7 +713,7 @@ atomic_fetch_andnot_acquire(int i, atomic_t *v) #endif #ifndef atomic_fetch_andnot_release -static inline int +static __always_inline int atomic_fetch_andnot_release(int i, atomic_t *v) { __atomic_release_fence(); @@ -721,7 +723,7 @@ atomic_fetch_andnot_release(int i, atomic_t *v) #endif #ifndef atomic_fetch_andnot -static inline int +static __always_inline int atomic_fetch_andnot(int i, atomic_t *v) { int ret; @@ -742,7 +744,7 @@ atomic_fetch_andnot(int i, atomic_t *v) #else /* atomic_fetch_or_relaxed */ #ifndef atomic_fetch_or_acquire -static inline int +static __always_inline int atomic_fetch_or_acquire(int i, atomic_t *v) { int ret = atomic_fetch_or_relaxed(i, v); @@ -753,7 +755,7 @@ atomic_fetch_or_acquire(int i, atomic_t *v) #endif #ifndef atomic_fetch_or_release -static inline int +static __always_inline int atomic_fetch_or_release(int i, atomic_t *v) { __atomic_release_fence(); @@ -763,7 +765,7 @@ atomic_fetch_or_release(int i, atomic_t *v) #endif #ifndef atomic_fetch_or -static inline int +static __always_inline int atomic_fetch_or(int i, atomic_t *v) { int ret; @@ -784,7 +786,7 @@ atomic_fetch_or(int i, atomic_t *v) #else /* atomic_fetch_xor_relaxed */ #ifndef atomic_fetch_xor_acquire -static inline int +static __always_inline int atomic_fetch_xor_acquire(int i, atomic_t *v) { int ret = atomic_fetch_xor_relaxed(i, v); @@ -795,7 +797,7 @@ atomic_fetch_xor_acquire(int i, atomic_t *v) #endif #ifndef atomic_fetch_xor_release -static inline int +static __always_inline int atomic_fetch_xor_release(int i, atomic_t *v) { __atomic_release_fence(); @@ -805,7 +807,7 @@ atomic_fetch_xor_release(int i, atomic_t *v) #endif #ifndef atomic_fetch_xor -static inline int +static __always_inline int atomic_fetch_xor(int i, atomic_t *v) { int ret; @@ -826,7 +828,7 @@ atomic_fetch_xor(int i, atomic_t *v) #else /* atomic_xchg_relaxed */ #ifndef atomic_xchg_acquire -static inline int +static __always_inline int atomic_xchg_acquire(atomic_t *v, int i) { int ret = atomic_xchg_relaxed(v, i); @@ -837,7 +839,7 @@ atomic_xchg_acquire(atomic_t *v, int i) #endif #ifndef atomic_xchg_release -static inline int +static __always_inline int atomic_xchg_release(atomic_t *v, int i) { __atomic_release_fence(); @@ -847,7 +849,7 @@ atomic_xchg_release(atomic_t *v, int i) #endif #ifndef atomic_xchg -static inline int +static __always_inline int atomic_xchg(atomic_t *v, int i) { int ret; @@ -868,7 +870,7 @@ atomic_xchg(atomic_t *v, int i) #else /* atomic_cmpxchg_relaxed */ #ifndef atomic_cmpxchg_acquire -static inline int +static __always_inline int atomic_cmpxchg_acquire(atomic_t *v, int old, int new) { int ret = atomic_cmpxchg_relaxed(v, old, new); @@ -879,7 +881,7 @@ atomic_cmpxchg_acquire(atomic_t *v, int old, int new) #endif #ifndef atomic_cmpxchg_release -static inline int +static __always_inline int atomic_cmpxchg_release(atomic_t *v, int old, int new) { __atomic_release_fence(); @@ -889,7 +891,7 @@ atomic_cmpxchg_release(atomic_t *v, int old, int new) #endif #ifndef atomic_cmpxchg -static inline int +static __always_inline int atomic_cmpxchg(atomic_t *v, int old, int new) { int ret; @@ -911,7 +913,7 @@ atomic_cmpxchg(atomic_t *v, int old, int new) #endif /* atomic_try_cmpxchg */ #ifndef atomic_try_cmpxchg -static inline bool +static __always_inline bool atomic_try_cmpxchg(atomic_t *v, int *old, int new) { int r, o = *old; @@ -924,7 +926,7 @@ atomic_try_cmpxchg(atomic_t *v, int *old, int new) #endif #ifndef atomic_try_cmpxchg_acquire -static inline bool +static __always_inline bool atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) { int r, o = *old; @@ -937,7 +939,7 @@ atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) #endif #ifndef atomic_try_cmpxchg_release -static inline bool +static __always_inline bool atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) { int r, o = *old; @@ -950,7 +952,7 @@ atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) #endif #ifndef atomic_try_cmpxchg_relaxed -static inline bool +static __always_inline bool atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new) { int r, o = *old; @@ -965,7 +967,7 @@ atomic_try_cmpxchg_relaxed(atomic_t *v, int *old, int new) #else /* atomic_try_cmpxchg_relaxed */ #ifndef atomic_try_cmpxchg_acquire -static inline bool +static __always_inline bool atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) { bool ret = atomic_try_cmpxchg_relaxed(v, old, new); @@ -976,7 +978,7 @@ atomic_try_cmpxchg_acquire(atomic_t *v, int *old, int new) #endif #ifndef atomic_try_cmpxchg_release -static inline bool +static __always_inline bool atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) { __atomic_release_fence(); @@ -986,7 +988,7 @@ atomic_try_cmpxchg_release(atomic_t *v, int *old, int new) #endif #ifndef atomic_try_cmpxchg -static inline bool +static __always_inline bool atomic_try_cmpxchg(atomic_t *v, int *old, int new) { bool ret; @@ -1010,7 +1012,7 @@ atomic_try_cmpxchg(atomic_t *v, int *old, int new) * true if the result is zero, or false for all * other cases. */ -static inline bool +static __always_inline bool atomic_sub_and_test(int i, atomic_t *v) { return atomic_sub_return(i, v) == 0; @@ -1027,7 +1029,7 @@ atomic_sub_and_test(int i, atomic_t *v) * returns true if the result is 0, or false for all other * cases. */ -static inline bool +static __always_inline bool atomic_dec_and_test(atomic_t *v) { return atomic_dec_return(v) == 0; @@ -1044,7 +1046,7 @@ atomic_dec_and_test(atomic_t *v) * and returns true if the result is zero, or false for all * other cases. */ -static inline bool +static __always_inline bool atomic_inc_and_test(atomic_t *v) { return atomic_inc_return(v) == 0; @@ -1062,7 +1064,7 @@ atomic_inc_and_test(atomic_t *v) * if the result is negative, or false when * result is greater than or equal to zero. */ -static inline bool +static __always_inline bool atomic_add_negative(int i, atomic_t *v) { return atomic_add_return(i, v) < 0; @@ -1080,7 +1082,7 @@ atomic_add_negative(int i, atomic_t *v) * Atomically adds @a to @v, so long as @v was not already @u. * Returns original value of @v */ -static inline int +static __always_inline int atomic_fetch_add_unless(atomic_t *v, int a, int u) { int c = atomic_read(v); @@ -1105,7 +1107,7 @@ atomic_fetch_add_unless(atomic_t *v, int a, int u) * Atomically adds @a to @v, if @v was not already @u. * Returns true if the addition was done. */ -static inline bool +static __always_inline bool atomic_add_unless(atomic_t *v, int a, int u) { return atomic_fetch_add_unless(v, a, u) != u; @@ -1121,7 +1123,7 @@ atomic_add_unless(atomic_t *v, int a, int u) * Atomically increments @v by 1, if @v is non-zero. * Returns true if the increment was done. */ -static inline bool +static __always_inline bool atomic_inc_not_zero(atomic_t *v) { return atomic_add_unless(v, 1, 0); @@ -1130,7 +1132,7 @@ atomic_inc_not_zero(atomic_t *v) #endif #ifndef atomic_inc_unless_negative -static inline bool +static __always_inline bool atomic_inc_unless_negative(atomic_t *v) { int c = atomic_read(v); @@ -1146,7 +1148,7 @@ atomic_inc_unless_negative(atomic_t *v) #endif #ifndef atomic_dec_unless_positive -static inline bool +static __always_inline bool atomic_dec_unless_positive(atomic_t *v) { int c = atomic_read(v); @@ -1162,7 +1164,7 @@ atomic_dec_unless_positive(atomic_t *v) #endif #ifndef atomic_dec_if_positive -static inline int +static __always_inline int atomic_dec_if_positive(atomic_t *v) { int dec, c = atomic_read(v); @@ -1186,7 +1188,7 @@ atomic_dec_if_positive(atomic_t *v) #endif #ifndef atomic64_read_acquire -static inline s64 +static __always_inline s64 atomic64_read_acquire(const atomic64_t *v) { return smp_load_acquire(&(v)->counter); @@ -1195,7 +1197,7 @@ atomic64_read_acquire(const atomic64_t *v) #endif #ifndef atomic64_set_release -static inline void +static __always_inline void atomic64_set_release(atomic64_t *v, s64 i) { smp_store_release(&(v)->counter, i); @@ -1210,7 +1212,7 @@ atomic64_set_release(atomic64_t *v, s64 i) #else /* atomic64_add_return_relaxed */ #ifndef atomic64_add_return_acquire -static inline s64 +static __always_inline s64 atomic64_add_return_acquire(s64 i, atomic64_t *v) { s64 ret = atomic64_add_return_relaxed(i, v); @@ -1221,7 +1223,7 @@ atomic64_add_return_acquire(s64 i, atomic64_t *v) #endif #ifndef atomic64_add_return_release -static inline s64 +static __always_inline s64 atomic64_add_return_release(s64 i, atomic64_t *v) { __atomic_release_fence(); @@ -1231,7 +1233,7 @@ atomic64_add_return_release(s64 i, atomic64_t *v) #endif #ifndef atomic64_add_return -static inline s64 +static __always_inline s64 atomic64_add_return(s64 i, atomic64_t *v) { s64 ret; @@ -1252,7 +1254,7 @@ atomic64_add_return(s64 i, atomic64_t *v) #else /* atomic64_fetch_add_relaxed */ #ifndef atomic64_fetch_add_acquire -static inline s64 +static __always_inline s64 atomic64_fetch_add_acquire(s64 i, atomic64_t *v) { s64 ret = atomic64_fetch_add_relaxed(i, v); @@ -1263,7 +1265,7 @@ atomic64_fetch_add_acquire(s64 i, atomic64_t *v) #endif #ifndef atomic64_fetch_add_release -static inline s64 +static __always_inline s64 atomic64_fetch_add_release(s64 i, atomic64_t *v) { __atomic_release_fence(); @@ -1273,7 +1275,7 @@ atomic64_fetch_add_release(s64 i, atomic64_t *v) #endif #ifndef atomic64_fetch_add -static inline s64 +static __always_inline s64 atomic64_fetch_add(s64 i, atomic64_t *v) { s64 ret; @@ -1294,7 +1296,7 @@ atomic64_fetch_add(s64 i, atomic64_t *v) #else /* atomic64_sub_return_relaxed */ #ifndef atomic64_sub_return_acquire -static inline s64 +static __always_inline s64 atomic64_sub_return_acquire(s64 i, atomic64_t *v) { s64 ret = atomic64_sub_return_relaxed(i, v); @@ -1305,7 +1307,7 @@ atomic64_sub_return_acquire(s64 i, atomic64_t *v) #endif #ifndef atomic64_sub_return_release -static inline s64 +static __always_inline s64 atomic64_sub_return_release(s64 i, atomic64_t *v) { __atomic_release_fence(); @@ -1315,7 +1317,7 @@ atomic64_sub_return_release(s64 i, atomic64_t *v) #endif #ifndef atomic64_sub_return -static inline s64 +static __always_inline s64 atomic64_sub_return(s64 i, atomic64_t *v) { s64 ret; @@ -1336,7 +1338,7 @@ atomic64_sub_return(s64 i, atomic64_t *v) #else /* atomic64_fetch_sub_relaxed */ #ifndef atomic64_fetch_sub_acquire -static inline s64 +static __always_inline s64 atomic64_fetch_sub_acquire(s64 i, atomic64_t *v) { s64 ret = atomic64_fetch_sub_relaxed(i, v); @@ -1347,7 +1349,7 @@ atomic64_fetch_sub_acquire(s64 i, atomic64_t *v) #endif #ifndef atomic64_fetch_sub_release -static inline s64 +static __always_inline s64 atomic64_fetch_sub_release(s64 i, atomic64_t *v) { __atomic_release_fence(); @@ -1357,7 +1359,7 @@ atomic64_fetch_sub_release(s64 i, atomic64_t *v) #endif #ifndef atomic64_fetch_sub -static inline s64 +static __always_inline s64 atomic64_fetch_sub(s64 i, atomic64_t *v) { s64 ret; @@ -1372,7 +1374,7 @@ atomic64_fetch_sub(s64 i, atomic64_t *v) #endif /* atomic64_fetch_sub_relaxed */ #ifndef atomic64_inc -static inline void +static __always_inline void atomic64_inc(atomic64_t *v) { atomic64_add(1, v); @@ -1388,7 +1390,7 @@ atomic64_inc(atomic64_t *v) #endif /* atomic64_inc_return */ #ifndef atomic64_inc_return -static inline s64 +static __always_inline s64 atomic64_inc_return(atomic64_t *v) { return atomic64_add_return(1, v); @@ -1397,7 +1399,7 @@ atomic64_inc_return(atomic64_t *v) #endif #ifndef atomic64_inc_return_acquire -static inline s64 +static __always_inline s64 atomic64_inc_return_acquire(atomic64_t *v) { return atomic64_add_return_acquire(1, v); @@ -1406,7 +1408,7 @@ atomic64_inc_return_acquire(atomic64_t *v) #endif #ifndef atomic64_inc_return_release -static inline s64 +static __always_inline s64 atomic64_inc_return_release(atomic64_t *v) { return atomic64_add_return_release(1, v); @@ -1415,7 +1417,7 @@ atomic64_inc_return_release(atomic64_t *v) #endif #ifndef atomic64_inc_return_relaxed -static inline s64 +static __always_inline s64 atomic64_inc_return_relaxed(atomic64_t *v) { return atomic64_add_return_relaxed(1, v); @@ -1426,7 +1428,7 @@ atomic64_inc_return_relaxed(atomic64_t *v) #else /* atomic64_inc_return_relaxed */ #ifndef atomic64_inc_return_acquire -static inline s64 +static __always_inline s64 atomic64_inc_return_acquire(atomic64_t *v) { s64 ret = atomic64_inc_return_relaxed(v); @@ -1437,7 +1439,7 @@ atomic64_inc_return_acquire(atomic64_t *v) #endif #ifndef atomic64_inc_return_release -static inline s64 +static __always_inline s64 atomic64_inc_return_release(atomic64_t *v) { __atomic_release_fence(); @@ -1447,7 +1449,7 @@ atomic64_inc_return_release(atomic64_t *v) #endif #ifndef atomic64_inc_return -static inline s64 +static __always_inline s64 atomic64_inc_return(atomic64_t *v) { s64 ret; @@ -1469,7 +1471,7 @@ atomic64_inc_return(atomic64_t *v) #endif /* atomic64_fetch_inc */ #ifndef atomic64_fetch_inc -static inline s64 +static __always_inline s64 atomic64_fetch_inc(atomic64_t *v) { return atomic64_fetch_add(1, v); @@ -1478,7 +1480,7 @@ atomic64_fetch_inc(atomic64_t *v) #endif #ifndef atomic64_fetch_inc_acquire -static inline s64 +static __always_inline s64 atomic64_fetch_inc_acquire(atomic64_t *v) { return atomic64_fetch_add_acquire(1, v); @@ -1487,7 +1489,7 @@ atomic64_fetch_inc_acquire(atomic64_t *v) #endif #ifndef atomic64_fetch_inc_release -static inline s64 +static __always_inline s64 atomic64_fetch_inc_release(atomic64_t *v) { return atomic64_fetch_add_release(1, v); @@ -1496,7 +1498,7 @@ atomic64_fetch_inc_release(atomic64_t *v) #endif #ifndef atomic64_fetch_inc_relaxed -static inline s64 +static __always_inline s64 atomic64_fetch_inc_relaxed(atomic64_t *v) { return atomic64_fetch_add_relaxed(1, v); @@ -1507,7 +1509,7 @@ atomic64_fetch_inc_relaxed(atomic64_t *v) #else /* atomic64_fetch_inc_relaxed */ #ifndef atomic64_fetch_inc_acquire -static inline s64 +static __always_inline s64 atomic64_fetch_inc_acquire(atomic64_t *v) { s64 ret = atomic64_fetch_inc_relaxed(v); @@ -1518,7 +1520,7 @@ atomic64_fetch_inc_acquire(atomic64_t *v) #endif #ifndef atomic64_fetch_inc_release -static inline s64 +static __always_inline s64 atomic64_fetch_inc_release(atomic64_t *v) { __atomic_release_fence(); @@ -1528,7 +1530,7 @@ atomic64_fetch_inc_release(atomic64_t *v) #endif #ifndef atomic64_fetch_inc -static inline s64 +static __always_inline s64 atomic64_fetch_inc(atomic64_t *v) { s64 ret; @@ -1543,7 +1545,7 @@ atomic64_fetch_inc(atomic64_t *v) #endif /* atomic64_fetch_inc_relaxed */ #ifndef atomic64_dec -static inline void +static __always_inline void atomic64_dec(atomic64_t *v) { atomic64_sub(1, v); @@ -1559,7 +1561,7 @@ atomic64_dec(atomic64_t *v) #endif /* atomic64_dec_return */ #ifndef atomic64_dec_return -static inline s64 +static __always_inline s64 atomic64_dec_return(atomic64_t *v) { return atomic64_sub_return(1, v); @@ -1568,7 +1570,7 @@ atomic64_dec_return(atomic64_t *v) #endif #ifndef atomic64_dec_return_acquire -static inline s64 +static __always_inline s64 atomic64_dec_return_acquire(atomic64_t *v) { return atomic64_sub_return_acquire(1, v); @@ -1577,7 +1579,7 @@ atomic64_dec_return_acquire(atomic64_t *v) #endif #ifndef atomic64_dec_return_release -static inline s64 +static __always_inline s64 atomic64_dec_return_release(atomic64_t *v) { return atomic64_sub_return_release(1, v); @@ -1586,7 +1588,7 @@ atomic64_dec_return_release(atomic64_t *v) #endif #ifndef atomic64_dec_return_relaxed -static inline s64 +static __always_inline s64 atomic64_dec_return_relaxed(atomic64_t *v) { return atomic64_sub_return_relaxed(1, v); @@ -1597,7 +1599,7 @@ atomic64_dec_return_relaxed(atomic64_t *v) #else /* atomic64_dec_return_relaxed */ #ifndef atomic64_dec_return_acquire -static inline s64 +static __always_inline s64 atomic64_dec_return_acquire(atomic64_t *v) { s64 ret = atomic64_dec_return_relaxed(v); @@ -1608,7 +1610,7 @@ atomic64_dec_return_acquire(atomic64_t *v) #endif #ifndef atomic64_dec_return_release -static inline s64 +static __always_inline s64 atomic64_dec_return_release(atomic64_t *v) { __atomic_release_fence(); @@ -1618,7 +1620,7 @@ atomic64_dec_return_release(atomic64_t *v) #endif #ifndef atomic64_dec_return -static inline s64 +static __always_inline s64 atomic64_dec_return(atomic64_t *v) { s64 ret; @@ -1640,7 +1642,7 @@ atomic64_dec_return(atomic64_t *v) #endif /* atomic64_fetch_dec */ #ifndef atomic64_fetch_dec -static inline s64 +static __always_inline s64 atomic64_fetch_dec(atomic64_t *v) { return atomic64_fetch_sub(1, v); @@ -1649,7 +1651,7 @@ atomic64_fetch_dec(atomic64_t *v) #endif #ifndef atomic64_fetch_dec_acquire -static inline s64 +static __always_inline s64 atomic64_fetch_dec_acquire(atomic64_t *v) { return atomic64_fetch_sub_acquire(1, v); @@ -1658,7 +1660,7 @@ atomic64_fetch_dec_acquire(atomic64_t *v) #endif #ifndef atomic64_fetch_dec_release -static inline s64 +static __always_inline s64 atomic64_fetch_dec_release(atomic64_t *v) { return atomic64_fetch_sub_release(1, v); @@ -1667,7 +1669,7 @@ atomic64_fetch_dec_release(atomic64_t *v) #endif #ifndef atomic64_fetch_dec_relaxed -static inline s64 +static __always_inline s64 atomic64_fetch_dec_relaxed(atomic64_t *v) { return atomic64_fetch_sub_relaxed(1, v); @@ -1678,7 +1680,7 @@ atomic64_fetch_dec_relaxed(atomic64_t *v) #else /* atomic64_fetch_dec_relaxed */ #ifndef atomic64_fetch_dec_acquire -static inline s64 +static __always_inline s64 atomic64_fetch_dec_acquire(atomic64_t *v) { s64 ret = atomic64_fetch_dec_relaxed(v); @@ -1689,7 +1691,7 @@ atomic64_fetch_dec_acquire(atomic64_t *v) #endif #ifndef atomic64_fetch_dec_release -static inline s64 +static __always_inline s64 atomic64_fetch_dec_release(atomic64_t *v) { __atomic_release_fence(); @@ -1699,7 +1701,7 @@ atomic64_fetch_dec_release(atomic64_t *v) #endif #ifndef atomic64_fetch_dec -static inline s64 +static __always_inline s64 atomic64_fetch_dec(atomic64_t *v) { s64 ret; @@ -1720,7 +1722,7 @@ atomic64_fetch_dec(atomic64_t *v) #else /* atomic64_fetch_and_relaxed */ #ifndef atomic64_fetch_and_acquire -static inline s64 +static __always_inline s64 atomic64_fetch_and_acquire(s64 i, atomic64_t *v) { s64 ret = atomic64_fetch_and_relaxed(i, v); @@ -1731,7 +1733,7 @@ atomic64_fetch_and_acquire(s64 i, atomic64_t *v) #endif #ifndef atomic64_fetch_and_release -static inline s64 +static __always_inline s64 atomic64_fetch_and_release(s64 i, atomic64_t *v) { __atomic_release_fence(); @@ -1741,7 +1743,7 @@ atomic64_fetch_and_release(s64 i, atomic64_t *v) #endif #ifndef atomic64_fetch_and -static inline s64 +static __always_inline s64 atomic64_fetch_and(s64 i, atomic64_t *v) { s64 ret; @@ -1756,7 +1758,7 @@ atomic64_fetch_and(s64 i, atomic64_t *v) #endif /* atomic64_fetch_and_relaxed */ #ifndef atomic64_andnot -static inline void +static __always_inline void atomic64_andnot(s64 i, atomic64_t *v) { atomic64_and(~i, v); @@ -1772,7 +1774,7 @@ atomic64_andnot(s64 i, atomic64_t *v) #endif /* atomic64_fetch_andnot */ #ifndef atomic64_fetch_andnot -static inline s64 +static __always_inline s64 atomic64_fetch_andnot(s64 i, atomic64_t *v) { return atomic64_fetch_and(~i, v); @@ -1781,7 +1783,7 @@ atomic64_fetch_andnot(s64 i, atomic64_t *v) #endif #ifndef atomic64_fetch_andnot_acquire -static inline s64 +static __always_inline s64 atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v) { return atomic64_fetch_and_acquire(~i, v); @@ -1790,7 +1792,7 @@ atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v) #endif #ifndef atomic64_fetch_andnot_release -static inline s64 +static __always_inline s64 atomic64_fetch_andnot_release(s64 i, atomic64_t *v) { return atomic64_fetch_and_release(~i, v); @@ -1799,7 +1801,7 @@ atomic64_fetch_andnot_release(s64 i, atomic64_t *v) #endif #ifndef atomic64_fetch_andnot_relaxed -static inline s64 +static __always_inline s64 atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v) { return atomic64_fetch_and_relaxed(~i, v); @@ -1810,7 +1812,7 @@ atomic64_fetch_andnot_relaxed(s64 i, atomic64_t *v) #else /* atomic64_fetch_andnot_relaxed */ #ifndef atomic64_fetch_andnot_acquire -static inline s64 +static __always_inline s64 atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v) { s64 ret = atomic64_fetch_andnot_relaxed(i, v); @@ -1821,7 +1823,7 @@ atomic64_fetch_andnot_acquire(s64 i, atomic64_t *v) #endif #ifndef atomic64_fetch_andnot_release -static inline s64 +static __always_inline s64 atomic64_fetch_andnot_release(s64 i, atomic64_t *v) { __atomic_release_fence(); @@ -1831,7 +1833,7 @@ atomic64_fetch_andnot_release(s64 i, atomic64_t *v) #endif #ifndef atomic64_fetch_andnot -static inline s64 +static __always_inline s64 atomic64_fetch_andnot(s64 i, atomic64_t *v) { s64 ret; @@ -1852,7 +1854,7 @@ atomic64_fetch_andnot(s64 i, atomic64_t *v) #else /* atomic64_fetch_or_relaxed */ #ifndef atomic64_fetch_or_acquire -static inline s64 +static __always_inline s64 atomic64_fetch_or_acquire(s64 i, atomic64_t *v) { s64 ret = atomic64_fetch_or_relaxed(i, v); @@ -1863,7 +1865,7 @@ atomic64_fetch_or_acquire(s64 i, atomic64_t *v) #endif #ifndef atomic64_fetch_or_release -static inline s64 +static __always_inline s64 atomic64_fetch_or_release(s64 i, atomic64_t *v) { __atomic_release_fence(); @@ -1873,7 +1875,7 @@ atomic64_fetch_or_release(s64 i, atomic64_t *v) #endif #ifndef atomic64_fetch_or -static inline s64 +static __always_inline s64 atomic64_fetch_or(s64 i, atomic64_t *v) { s64 ret; @@ -1894,7 +1896,7 @@ atomic64_fetch_or(s64 i, atomic64_t *v) #else /* atomic64_fetch_xor_relaxed */ #ifndef atomic64_fetch_xor_acquire -static inline s64 +static __always_inline s64 atomic64_fetch_xor_acquire(s64 i, atomic64_t *v) { s64 ret = atomic64_fetch_xor_relaxed(i, v); @@ -1905,7 +1907,7 @@ atomic64_fetch_xor_acquire(s64 i, atomic64_t *v) #endif #ifndef atomic64_fetch_xor_release -static inline s64 +static __always_inline s64 atomic64_fetch_xor_release(s64 i, atomic64_t *v) { __atomic_release_fence(); @@ -1915,7 +1917,7 @@ atomic64_fetch_xor_release(s64 i, atomic64_t *v) #endif #ifndef atomic64_fetch_xor -static inline s64 +static __always_inline s64 atomic64_fetch_xor(s64 i, atomic64_t *v) { s64 ret; @@ -1936,7 +1938,7 @@ atomic64_fetch_xor(s64 i, atomic64_t *v) #else /* atomic64_xchg_relaxed */ #ifndef atomic64_xchg_acquire -static inline s64 +static __always_inline s64 atomic64_xchg_acquire(atomic64_t *v, s64 i) { s64 ret = atomic64_xchg_relaxed(v, i); @@ -1947,7 +1949,7 @@ atomic64_xchg_acquire(atomic64_t *v, s64 i) #endif #ifndef atomic64_xchg_release -static inline s64 +static __always_inline s64 atomic64_xchg_release(atomic64_t *v, s64 i) { __atomic_release_fence(); @@ -1957,7 +1959,7 @@ atomic64_xchg_release(atomic64_t *v, s64 i) #endif #ifndef atomic64_xchg -static inline s64 +static __always_inline s64 atomic64_xchg(atomic64_t *v, s64 i) { s64 ret; @@ -1978,7 +1980,7 @@ atomic64_xchg(atomic64_t *v, s64 i) #else /* atomic64_cmpxchg_relaxed */ #ifndef atomic64_cmpxchg_acquire -static inline s64 +static __always_inline s64 atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new) { s64 ret = atomic64_cmpxchg_relaxed(v, old, new); @@ -1989,7 +1991,7 @@ atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new) #endif #ifndef atomic64_cmpxchg_release -static inline s64 +static __always_inline s64 atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new) { __atomic_release_fence(); @@ -1999,7 +2001,7 @@ atomic64_cmpxchg_release(atomic64_t *v, s64 old, s64 new) #endif #ifndef atomic64_cmpxchg -static inline s64 +static __always_inline s64 atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) { s64 ret; @@ -2021,7 +2023,7 @@ atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) #endif /* atomic64_try_cmpxchg */ #ifndef atomic64_try_cmpxchg -static inline bool +static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) { s64 r, o = *old; @@ -2034,7 +2036,7 @@ atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) #endif #ifndef atomic64_try_cmpxchg_acquire -static inline bool +static __always_inline bool atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) { s64 r, o = *old; @@ -2047,7 +2049,7 @@ atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) #endif #ifndef atomic64_try_cmpxchg_release -static inline bool +static __always_inline bool atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) { s64 r, o = *old; @@ -2060,7 +2062,7 @@ atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) #endif #ifndef atomic64_try_cmpxchg_relaxed -static inline bool +static __always_inline bool atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new) { s64 r, o = *old; @@ -2075,7 +2077,7 @@ atomic64_try_cmpxchg_relaxed(atomic64_t *v, s64 *old, s64 new) #else /* atomic64_try_cmpxchg_relaxed */ #ifndef atomic64_try_cmpxchg_acquire -static inline bool +static __always_inline bool atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) { bool ret = atomic64_try_cmpxchg_relaxed(v, old, new); @@ -2086,7 +2088,7 @@ atomic64_try_cmpxchg_acquire(atomic64_t *v, s64 *old, s64 new) #endif #ifndef atomic64_try_cmpxchg_release -static inline bool +static __always_inline bool atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) { __atomic_release_fence(); @@ -2096,7 +2098,7 @@ atomic64_try_cmpxchg_release(atomic64_t *v, s64 *old, s64 new) #endif #ifndef atomic64_try_cmpxchg -static inline bool +static __always_inline bool atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) { bool ret; @@ -2120,7 +2122,7 @@ atomic64_try_cmpxchg(atomic64_t *v, s64 *old, s64 new) * true if the result is zero, or false for all * other cases. */ -static inline bool +static __always_inline bool atomic64_sub_and_test(s64 i, atomic64_t *v) { return atomic64_sub_return(i, v) == 0; @@ -2137,7 +2139,7 @@ atomic64_sub_and_test(s64 i, atomic64_t *v) * returns true if the result is 0, or false for all other * cases. */ -static inline bool +static __always_inline bool atomic64_dec_and_test(atomic64_t *v) { return atomic64_dec_return(v) == 0; @@ -2154,7 +2156,7 @@ atomic64_dec_and_test(atomic64_t *v) * and returns true if the result is zero, or false for all * other cases. */ -static inline bool +static __always_inline bool atomic64_inc_and_test(atomic64_t *v) { return atomic64_inc_return(v) == 0; @@ -2172,7 +2174,7 @@ atomic64_inc_and_test(atomic64_t *v) * if the result is negative, or false when * result is greater than or equal to zero. */ -static inline bool +static __always_inline bool atomic64_add_negative(s64 i, atomic64_t *v) { return atomic64_add_return(i, v) < 0; @@ -2190,7 +2192,7 @@ atomic64_add_negative(s64 i, atomic64_t *v) * Atomically adds @a to @v, so long as @v was not already @u. * Returns original value of @v */ -static inline s64 +static __always_inline s64 atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) { s64 c = atomic64_read(v); @@ -2215,7 +2217,7 @@ atomic64_fetch_add_unless(atomic64_t *v, s64 a, s64 u) * Atomically adds @a to @v, if @v was not already @u. * Returns true if the addition was done. */ -static inline bool +static __always_inline bool atomic64_add_unless(atomic64_t *v, s64 a, s64 u) { return atomic64_fetch_add_unless(v, a, u) != u; @@ -2231,7 +2233,7 @@ atomic64_add_unless(atomic64_t *v, s64 a, s64 u) * Atomically increments @v by 1, if @v is non-zero. * Returns true if the increment was done. */ -static inline bool +static __always_inline bool atomic64_inc_not_zero(atomic64_t *v) { return atomic64_add_unless(v, 1, 0); @@ -2240,7 +2242,7 @@ atomic64_inc_not_zero(atomic64_t *v) #endif #ifndef atomic64_inc_unless_negative -static inline bool +static __always_inline bool atomic64_inc_unless_negative(atomic64_t *v) { s64 c = atomic64_read(v); @@ -2256,7 +2258,7 @@ atomic64_inc_unless_negative(atomic64_t *v) #endif #ifndef atomic64_dec_unless_positive -static inline bool +static __always_inline bool atomic64_dec_unless_positive(atomic64_t *v) { s64 c = atomic64_read(v); @@ -2272,7 +2274,7 @@ atomic64_dec_unless_positive(atomic64_t *v) #endif #ifndef atomic64_dec_if_positive -static inline s64 +static __always_inline s64 atomic64_dec_if_positive(atomic64_t *v) { s64 dec, c = atomic64_read(v); @@ -2292,4 +2294,4 @@ atomic64_dec_if_positive(atomic64_t *v) #define atomic64_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c)) #endif /* _LINUX_ATOMIC_FALLBACK_H */ -// 25de4a2804d70f57e994fe3b419148658bb5378a +// baaf45f4c24ed88ceae58baca39d7fd80bb8101b diff --git a/scripts/atomic/fallbacks/acquire b/scripts/atomic/fallbacks/acquire index e38871e64db6..ea489acc285e 100755 --- a/scripts/atomic/fallbacks/acquire +++ b/scripts/atomic/fallbacks/acquire @@ -1,5 +1,5 @@ cat <counter); diff --git a/scripts/atomic/fallbacks/release b/scripts/atomic/fallbacks/release index 3f628a3802d9..730d2a6d3e07 100755 --- a/scripts/atomic/fallbacks/release +++ b/scripts/atomic/fallbacks/release @@ -1,5 +1,5 @@ cat <counter, i); diff --git a/scripts/atomic/fallbacks/sub_and_test b/scripts/atomic/fallbacks/sub_and_test index 289ef17a2d7a..6cfe4ed49746 100755 --- a/scripts/atomic/fallbacks/sub_and_test +++ b/scripts/atomic/fallbacks/sub_and_test @@ -8,7 +8,7 @@ cat < + EOF for xchg in "xchg" "cmpxchg" "cmpxchg64"; do -- cgit v1.3 From e33f9a169747880a008dd5e7b934fc592e91cd63 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 12 Dec 2019 01:07:09 +0100 Subject: kcsan: Add __no_kcsan function attribute Since the use of -fsanitize=thread is an implementation detail of KCSAN, the name __no_sanitize_thread could be misleading if used widely. Instead, we introduce the __no_kcsan attribute which is shorter and more accurate in the context of KCSAN. This matches the attribute name __no_kcsan_or_inline. The use of __kcsan_or_inline itself is still required for __always_inline functions to retain compatibility with older compilers. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- include/linux/compiler-gcc.h | 3 +-- include/linux/compiler.h | 7 +++++-- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 0eb2a1cc411d..cf294faec2f8 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -146,8 +146,7 @@ #endif #if defined(__SANITIZE_THREAD__) && __has_attribute(__no_sanitize_thread__) -#define __no_sanitize_thread \ - __attribute__((__noinline__)) __attribute__((no_sanitize_thread)) +#define __no_sanitize_thread __attribute__((no_sanitize_thread)) #else #define __no_sanitize_thread #endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index ad8c76144a3c..8c0beb10c1dd 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -207,12 +207,15 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, # define __no_kasan_or_inline __always_inline #endif +#define __no_kcsan __no_sanitize_thread #ifdef __SANITIZE_THREAD__ /* * Rely on __SANITIZE_THREAD__ instead of CONFIG_KCSAN, to avoid not inlining in - * compilation units where instrumentation is disabled. + * compilation units where instrumentation is disabled. The attribute 'noinline' + * is required for older compilers, where implicit inlining of very small + * functions renders __no_sanitize_thread ineffective. */ -# define __no_kcsan_or_inline __no_sanitize_thread notrace __maybe_unused +# define __no_kcsan_or_inline __no_kcsan noinline notrace __maybe_unused # define __no_sanitize_or_inline __no_kcsan_or_inline #else # define __no_kcsan_or_inline __always_inline -- cgit v1.3 From 36e4d4dd4fc4f1d99e7966a460a2b12ce438abc2 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 21 Jan 2020 17:05:08 +0100 Subject: include/linux: Add instrumented.h infrastructure This adds instrumented.h, which provides generic wrappers for memory access instrumentation that the compiler cannot emit for various sanitizers. Currently this unifies KASAN and KCSAN instrumentation. In future this will also include KMSAN instrumentation. Note that, copy_{to,from}_user should use special instrumentation, since we should be able to instrument both source and destination memory accesses if both are kernel memory. The current patch only instruments the memory access where the address is always in kernel space, however, both may in fact be kernel addresses when a compat syscall passes an argument allocated in the kernel to a real syscall. In a future change, both KASAN and KCSAN should check both addresses in such cases, as well as KMSAN will make use of both addresses. [It made more sense to provide the completed function signature, rather than updating it and changing all locations again at a later time.] Suggested-by: Arnd Bergmann Signed-off-by: Marco Elver Acked-by: Alexander Potapenko Reviewed-by: Dmitry Vyukov Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/instrumented.h | 109 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 include/linux/instrumented.h (limited to 'include/linux') diff --git a/include/linux/instrumented.h b/include/linux/instrumented.h new file mode 100644 index 000000000000..43e6ea591975 --- /dev/null +++ b/include/linux/instrumented.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * This header provides generic wrappers for memory access instrumentation that + * the compiler cannot emit for: KASAN, KCSAN. + */ +#ifndef _LINUX_INSTRUMENTED_H +#define _LINUX_INSTRUMENTED_H + +#include +#include +#include +#include + +/** + * instrument_read - instrument regular read access + * + * Instrument a regular read access. The instrumentation should be inserted + * before the actual read happens. + * + * @ptr address of access + * @size size of access + */ +static __always_inline void instrument_read(const volatile void *v, size_t size) +{ + kasan_check_read(v, size); + kcsan_check_read(v, size); +} + +/** + * instrument_write - instrument regular write access + * + * Instrument a regular write access. The instrumentation should be inserted + * before the actual write happens. + * + * @ptr address of access + * @size size of access + */ +static __always_inline void instrument_write(const volatile void *v, size_t size) +{ + kasan_check_write(v, size); + kcsan_check_write(v, size); +} + +/** + * instrument_atomic_read - instrument atomic read access + * + * Instrument an atomic read access. The instrumentation should be inserted + * before the actual read happens. + * + * @ptr address of access + * @size size of access + */ +static __always_inline void instrument_atomic_read(const volatile void *v, size_t size) +{ + kasan_check_read(v, size); + kcsan_check_atomic_read(v, size); +} + +/** + * instrument_atomic_write - instrument atomic write access + * + * Instrument an atomic write access. The instrumentation should be inserted + * before the actual write happens. + * + * @ptr address of access + * @size size of access + */ +static __always_inline void instrument_atomic_write(const volatile void *v, size_t size) +{ + kasan_check_write(v, size); + kcsan_check_atomic_write(v, size); +} + +/** + * instrument_copy_to_user - instrument reads of copy_to_user + * + * Instrument reads from kernel memory, that are due to copy_to_user (and + * variants). The instrumentation must be inserted before the accesses. + * + * @to destination address + * @from source address + * @n number of bytes to copy + */ +static __always_inline void +instrument_copy_to_user(void __user *to, const void *from, unsigned long n) +{ + kasan_check_read(from, n); + kcsan_check_read(from, n); +} + +/** + * instrument_copy_from_user - instrument writes of copy_from_user + * + * Instrument writes to kernel memory, that are due to copy_from_user (and + * variants). The instrumentation should be inserted before the accesses. + * + * @to destination address + * @from source address + * @n number of bytes to copy + */ +static __always_inline void +instrument_copy_from_user(const void *to, const void __user *from, unsigned long n) +{ + kasan_check_write(to, n); + kcsan_check_write(to, n); +} + +#endif /* _LINUX_INSTRUMENTED_H */ -- cgit v1.3 From 76d6f06c36a3b5cc402eeeb709613cb211fdaa8f Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 21 Jan 2020 17:05:12 +0100 Subject: copy_to_user, copy_from_user: Use generic instrumented.h This replaces the KASAN instrumentation with generic instrumentation, implicitly adding KCSAN instrumentation support. For KASAN no functional change is intended. Suggested-by: Arnd Bergmann Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/uaccess.h | 14 +++++++------- lib/usercopy.c | 7 ++++--- 2 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 67f016010aad..8a215c5c1aed 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -2,9 +2,9 @@ #ifndef __LINUX_UACCESS_H__ #define __LINUX_UACCESS_H__ +#include #include #include -#include #define uaccess_kernel() segment_eq(get_fs(), KERNEL_DS) @@ -58,7 +58,7 @@ static __always_inline __must_check unsigned long __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n) { - kasan_check_write(to, n); + instrument_copy_from_user(to, from, n); check_object_size(to, n, false); return raw_copy_from_user(to, from, n); } @@ -67,7 +67,7 @@ static __always_inline __must_check unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { might_fault(); - kasan_check_write(to, n); + instrument_copy_from_user(to, from, n); check_object_size(to, n, false); return raw_copy_from_user(to, from, n); } @@ -88,7 +88,7 @@ __copy_from_user(void *to, const void __user *from, unsigned long n) static __always_inline __must_check unsigned long __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) { - kasan_check_read(from, n); + instrument_copy_to_user(to, from, n); check_object_size(from, n, true); return raw_copy_to_user(to, from, n); } @@ -97,7 +97,7 @@ static __always_inline __must_check unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n) { might_fault(); - kasan_check_read(from, n); + instrument_copy_to_user(to, from, n); check_object_size(from, n, true); return raw_copy_to_user(to, from, n); } @@ -109,7 +109,7 @@ _copy_from_user(void *to, const void __user *from, unsigned long n) unsigned long res = n; might_fault(); if (likely(access_ok(from, n))) { - kasan_check_write(to, n); + instrument_copy_from_user(to, from, n); res = raw_copy_from_user(to, from, n); } if (unlikely(res)) @@ -127,7 +127,7 @@ _copy_to_user(void __user *to, const void *from, unsigned long n) { might_fault(); if (access_ok(to, n)) { - kasan_check_read(from, n); + instrument_copy_to_user(to, from, n); n = raw_copy_to_user(to, from, n); } return n; diff --git a/lib/usercopy.c b/lib/usercopy.c index cbb4d9ec00f2..4bb1c5e7a3eb 100644 --- a/lib/usercopy.c +++ b/lib/usercopy.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 -#include #include +#include +#include /* out-of-line parts */ @@ -10,7 +11,7 @@ unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n unsigned long res = n; might_fault(); if (likely(access_ok(from, n))) { - kasan_check_write(to, n); + instrument_copy_from_user(to, from, n); res = raw_copy_from_user(to, from, n); } if (unlikely(res)) @@ -25,7 +26,7 @@ unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n) { might_fault(); if (likely(access_ok(to, n))) { - kasan_check_read(from, n); + instrument_copy_to_user(to, from, n); n = raw_copy_to_user(to, from, n); } return n; -- cgit v1.3 From 7ad900d35b49af5a05f595d2274c32e69e01b055 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 3 Feb 2020 14:42:18 -0800 Subject: kcsan: Add docbook header for data_race() Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar Cc: Marco Elver Cc: Dmitry Vyukov --- include/linux/compiler.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 8c0beb10c1dd..c1bdf37571cb 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -315,13 +315,15 @@ unsigned long read_word_at_a_time(const void *addr) #include -/* - * data_race(): macro to document that accesses in an expression may conflict with - * other concurrent accesses resulting in data races, but the resulting - * behaviour is deemed safe regardless. +/** + * data_race - mark an expression as containing intentional data races + * + * This data_race() macro is useful for situations in which data races + * should be forgiven. One example is diagnostic code that accesses + * shared variables but is not a part of the core synchronization design. * - * This macro *does not* affect normal code generation, but is a hint to tooling - * that data races here should be ignored. + * This macro *does not* affect normal code generation, but is a hint + * to tooling that data races here are to be ignored. */ #define data_race(expr) \ ({ \ -- cgit v1.3 From d591ec3db75f9eadfa7976ff8796c674c0027715 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 6 Feb 2020 16:46:24 +0100 Subject: kcsan: Introduce KCSAN_ACCESS_ASSERT access type The KCSAN_ACCESS_ASSERT access type may be used to introduce dummy reads and writes to assert certain properties of concurrent code, where bugs could not be detected as normal data races. For example, a variable that is only meant to be written by a single CPU, but may be read (without locking) by other CPUs must still be marked properly to avoid data races. However, concurrent writes, regardless if WRITE_ONCE() or not, would be a bug. Using kcsan_check_access(&x, sizeof(x), KCSAN_ACCESS_ASSERT) would allow catching such bugs. To support KCSAN_ACCESS_ASSERT the following notable changes were made: * If an access is of type KCSAN_ASSERT_ACCESS, disable various filters that only apply to data races, so that all races that KCSAN observes are reported. * Bug reports that involve an ASSERT access type will be reported as "KCSAN: assert: race in ..." instead of "data-race"; this will help more easily distinguish them. * Update a few comments to just mention 'races' where we do not always mean pure data races. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/kcsan-checks.h | 18 ++++++++++++------ kernel/kcsan/core.c | 44 ++++++++++++++++++++++++++++++++++++++------ kernel/kcsan/debugfs.c | 1 + kernel/kcsan/kcsan.h | 7 +++++++ kernel/kcsan/report.c | 43 +++++++++++++++++++++++++++++++------------ lib/Kconfig.kcsan | 24 ++++++++++++++---------- 6 files changed, 103 insertions(+), 34 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h index ef3ee233a3fa..5dcadc221026 100644 --- a/include/linux/kcsan-checks.h +++ b/include/linux/kcsan-checks.h @@ -6,10 +6,16 @@ #include /* - * Access type modifiers. + * ACCESS TYPE MODIFIERS + * + * : normal read access; + * WRITE : write access; + * ATOMIC: access is atomic; + * ASSERT: access is not a regular access, but an assertion; */ #define KCSAN_ACCESS_WRITE 0x1 #define KCSAN_ACCESS_ATOMIC 0x2 +#define KCSAN_ACCESS_ASSERT 0x4 /* * __kcsan_*: Always calls into the runtime when KCSAN is enabled. This may be used @@ -18,7 +24,7 @@ */ #ifdef CONFIG_KCSAN /** - * __kcsan_check_access - check generic access for data races + * __kcsan_check_access - check generic access for races * * @ptr address of access * @size size of access @@ -43,7 +49,7 @@ static inline void kcsan_check_access(const volatile void *ptr, size_t size, #endif /** - * __kcsan_check_read - check regular read access for data races + * __kcsan_check_read - check regular read access for races * * @ptr address of access * @size size of access @@ -51,7 +57,7 @@ static inline void kcsan_check_access(const volatile void *ptr, size_t size, #define __kcsan_check_read(ptr, size) __kcsan_check_access(ptr, size, 0) /** - * __kcsan_check_write - check regular write access for data races + * __kcsan_check_write - check regular write access for races * * @ptr address of access * @size size of access @@ -60,7 +66,7 @@ static inline void kcsan_check_access(const volatile void *ptr, size_t size, __kcsan_check_access(ptr, size, KCSAN_ACCESS_WRITE) /** - * kcsan_check_read - check regular read access for data races + * kcsan_check_read - check regular read access for races * * @ptr address of access * @size size of access @@ -68,7 +74,7 @@ static inline void kcsan_check_access(const volatile void *ptr, size_t size, #define kcsan_check_read(ptr, size) kcsan_check_access(ptr, size, 0) /** - * kcsan_check_write - check regular write access for data races + * kcsan_check_write - check regular write access for races * * @ptr address of access * @size size of access diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index 82c2bef827d4..87ef01e40199 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -56,7 +56,7 @@ static DEFINE_PER_CPU(struct kcsan_ctx, kcsan_cpu_ctx) = { /* * SLOT_IDX_FAST is used in the fast-path. Not first checking the address's primary - * slot (middle) is fine if we assume that data races occur rarely. The set of + * slot (middle) is fine if we assume that races occur rarely. The set of * indices {SLOT_IDX(slot, i) | i in [0, NUM_SLOTS)} is equivalent to * {SLOT_IDX_FAST(slot, i) | i in [0, NUM_SLOTS)}. */ @@ -178,6 +178,14 @@ is_atomic(const volatile void *ptr, size_t size, int type) if ((type & KCSAN_ACCESS_ATOMIC) != 0) return true; + /* + * Unless explicitly declared atomic, never consider an assertion access + * as atomic. This allows using them also in atomic regions, such as + * seqlocks, without implicitly changing their semantics. + */ + if ((type & KCSAN_ACCESS_ASSERT) != 0) + return false; + if (IS_ENABLED(CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC) && (type & KCSAN_ACCESS_WRITE) != 0 && size <= sizeof(long) && IS_ALIGNED((unsigned long)ptr, size)) @@ -298,7 +306,11 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr, */ kcsan_counter_inc(KCSAN_COUNTER_REPORT_RACES); } - kcsan_counter_inc(KCSAN_COUNTER_DATA_RACES); + + if ((type & KCSAN_ACCESS_ASSERT) != 0) + kcsan_counter_inc(KCSAN_COUNTER_ASSERT_FAILURES); + else + kcsan_counter_inc(KCSAN_COUNTER_DATA_RACES); user_access_restore(flags); } @@ -307,6 +319,7 @@ static noinline void kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) { const bool is_write = (type & KCSAN_ACCESS_WRITE) != 0; + const bool is_assert = (type & KCSAN_ACCESS_ASSERT) != 0; atomic_long_t *watchpoint; union { u8 _1; @@ -429,13 +442,32 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) /* * No need to increment 'data_races' counter, as the racing * thread already did. + * + * Count 'assert_failures' for each failed ASSERT access, + * therefore both this thread and the racing thread may + * increment this counter. */ - kcsan_report(ptr, size, type, size > 8 || value_change, - smp_processor_id(), KCSAN_REPORT_RACE_SIGNAL); + if (is_assert) + kcsan_counter_inc(KCSAN_COUNTER_ASSERT_FAILURES); + + /* + * - If we were not able to observe a value change due to size + * constraints, always assume a value change. + * - If the access type is an assertion, we also always assume a + * value change to always report the race. + */ + value_change = value_change || size > 8 || is_assert; + + kcsan_report(ptr, size, type, value_change, smp_processor_id(), + KCSAN_REPORT_RACE_SIGNAL); } else if (value_change) { /* Inferring a race, since the value should not have changed. */ + kcsan_counter_inc(KCSAN_COUNTER_RACES_UNKNOWN_ORIGIN); - if (IS_ENABLED(CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN)) + if (is_assert) + kcsan_counter_inc(KCSAN_COUNTER_ASSERT_FAILURES); + + if (IS_ENABLED(CONFIG_KCSAN_REPORT_RACE_UNKNOWN_ORIGIN) || is_assert) kcsan_report(ptr, size, type, true, smp_processor_id(), KCSAN_REPORT_RACE_UNKNOWN_ORIGIN); @@ -471,7 +503,7 @@ static __always_inline void check_access(const volatile void *ptr, size_t size, &encoded_watchpoint); /* * It is safe to check kcsan_is_enabled() after find_watchpoint in the - * slow-path, as long as no state changes that cause a data race to be + * slow-path, as long as no state changes that cause a race to be * detected and reported have occurred until kcsan_is_enabled() is * checked. */ diff --git a/kernel/kcsan/debugfs.c b/kernel/kcsan/debugfs.c index bec42dab32ee..a9dad44130e6 100644 --- a/kernel/kcsan/debugfs.c +++ b/kernel/kcsan/debugfs.c @@ -44,6 +44,7 @@ static const char *counter_to_name(enum kcsan_counter_id id) case KCSAN_COUNTER_USED_WATCHPOINTS: return "used_watchpoints"; case KCSAN_COUNTER_SETUP_WATCHPOINTS: return "setup_watchpoints"; case KCSAN_COUNTER_DATA_RACES: return "data_races"; + case KCSAN_COUNTER_ASSERT_FAILURES: return "assert_failures"; case KCSAN_COUNTER_NO_CAPACITY: return "no_capacity"; case KCSAN_COUNTER_REPORT_RACES: return "report_races"; case KCSAN_COUNTER_RACES_UNKNOWN_ORIGIN: return "races_unknown_origin"; diff --git a/kernel/kcsan/kcsan.h b/kernel/kcsan/kcsan.h index 8492da45494b..50078e7d43c3 100644 --- a/kernel/kcsan/kcsan.h +++ b/kernel/kcsan/kcsan.h @@ -39,6 +39,13 @@ enum kcsan_counter_id { */ KCSAN_COUNTER_DATA_RACES, + /* + * Total number of ASSERT failures due to races. If the observed race is + * due to two conflicting ASSERT type accesses, then both will be + * counted. + */ + KCSAN_COUNTER_ASSERT_FAILURES, + /* * Number of times no watchpoints were available. */ diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index 7cd34285df74..3bc590e6be7e 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -34,11 +34,11 @@ static struct { } other_info = { .ptr = NULL }; /* - * Information about reported data races; used to rate limit reporting. + * Information about reported races; used to rate limit reporting. */ struct report_time { /* - * The last time the data race was reported. + * The last time the race was reported. */ unsigned long time; @@ -57,7 +57,7 @@ struct report_time { * * Therefore, we use a fixed-size array, which at most will occupy a page. This * still adequately rate limits reports, assuming that a) number of unique data - * races is not excessive, and b) occurrence of unique data races within the + * races is not excessive, and b) occurrence of unique races within the * same time window is limited. */ #define REPORT_TIMES_MAX (PAGE_SIZE / sizeof(struct report_time)) @@ -74,7 +74,7 @@ static struct report_time report_times[REPORT_TIMES_SIZE]; static DEFINE_SPINLOCK(report_lock); /* - * Checks if the data race identified by thread frames frame1 and frame2 has + * Checks if the race identified by thread frames frame1 and frame2 has * been reported since (now - KCSAN_REPORT_ONCE_IN_MS). */ static bool rate_limit_report(unsigned long frame1, unsigned long frame2) @@ -90,7 +90,7 @@ static bool rate_limit_report(unsigned long frame1, unsigned long frame2) invalid_before = jiffies - msecs_to_jiffies(CONFIG_KCSAN_REPORT_ONCE_IN_MS); - /* Check if a matching data race report exists. */ + /* Check if a matching race report exists. */ for (i = 0; i < REPORT_TIMES_SIZE; ++i) { struct report_time *rt = &report_times[i]; @@ -114,7 +114,7 @@ static bool rate_limit_report(unsigned long frame1, unsigned long frame2) if (time_before(rt->time, invalid_before)) continue; /* before KCSAN_REPORT_ONCE_IN_MS ago */ - /* Reported recently, check if data race matches. */ + /* Reported recently, check if race matches. */ if ((rt->frame1 == frame1 && rt->frame2 == frame2) || (rt->frame1 == frame2 && rt->frame2 == frame1)) return true; @@ -142,11 +142,12 @@ skip_report(bool value_change, unsigned long top_frame) * 3. write watchpoint, conflicting write (value_change==true): report; * 4. write watchpoint, conflicting write (value_change==false): skip; * 5. write watchpoint, conflicting read (value_change==false): skip; - * 6. write watchpoint, conflicting read (value_change==true): impossible; + * 6. write watchpoint, conflicting read (value_change==true): report; * * Cases 1-4 are intuitive and expected; case 5 ensures we do not report - * data races where the write may have rewritten the same value; and - * case 6 is simply impossible. + * data races where the write may have rewritten the same value; case 6 + * is possible either if the size is larger than what we check value + * changes for or the access type is KCSAN_ACCESS_ASSERT. */ if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY) && !value_change) { /* @@ -178,11 +179,27 @@ static const char *get_access_type(int type) return "write"; case KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: return "write (marked)"; + + /* + * ASSERT variants: + */ + case KCSAN_ACCESS_ASSERT: + case KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_ATOMIC: + return "assert no writes"; + case KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE: + case KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: + return "assert no accesses"; + default: BUG(); } } +static const char *get_bug_type(int type) +{ + return (type & KCSAN_ACCESS_ASSERT) != 0 ? "assert: race" : "data-race"; +} + /* Return thread description: in task or interrupt. */ static const char *get_thread_desc(int task_id) { @@ -268,13 +285,15 @@ static bool print_report(const volatile void *ptr, size_t size, int access_type, * Do not print offset of functions to keep title short. */ cmp = sym_strcmp((void *)other_frame, (void *)this_frame); - pr_err("BUG: KCSAN: data-race in %ps / %ps\n", + pr_err("BUG: KCSAN: %s in %ps / %ps\n", + get_bug_type(access_type | other_info.access_type), (void *)(cmp < 0 ? other_frame : this_frame), (void *)(cmp < 0 ? this_frame : other_frame)); } break; case KCSAN_REPORT_RACE_UNKNOWN_ORIGIN: - pr_err("BUG: KCSAN: data-race in %pS\n", (void *)this_frame); + pr_err("BUG: KCSAN: %s in %pS\n", get_bug_type(access_type), + (void *)this_frame); break; default: @@ -427,7 +446,7 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type, /* * With TRACE_IRQFLAGS, lockdep's IRQ trace state becomes corrupted if * we do not turn off lockdep here; this could happen due to recursion - * into lockdep via KCSAN if we detect a data race in utilities used by + * into lockdep via KCSAN if we detect a race in utilities used by * lockdep. */ lockdep_off(); diff --git a/lib/Kconfig.kcsan b/lib/Kconfig.kcsan index 9785bbf9a1d1..f0b791143c6a 100644 --- a/lib/Kconfig.kcsan +++ b/lib/Kconfig.kcsan @@ -4,13 +4,17 @@ config HAVE_ARCH_KCSAN bool menuconfig KCSAN - bool "KCSAN: dynamic data race detector" + bool "KCSAN: dynamic race detector" depends on HAVE_ARCH_KCSAN && DEBUG_KERNEL && !KASAN select STACKTRACE help - The Kernel Concurrency Sanitizer (KCSAN) is a dynamic data race - detector, which relies on compile-time instrumentation, and uses a - watchpoint-based sampling approach to detect data races. + The Kernel Concurrency Sanitizer (KCSAN) is a dynamic race detector, + which relies on compile-time instrumentation, and uses a + watchpoint-based sampling approach to detect races. + + KCSAN's primary purpose is to detect data races. KCSAN can also be + used to check properties, with the help of provided assertions, of + concurrent code where bugs do not manifest as data races. See for more details. @@ -85,14 +89,14 @@ config KCSAN_SKIP_WATCH_RANDOMIZE KCSAN_WATCH_SKIP. config KCSAN_REPORT_ONCE_IN_MS - int "Duration in milliseconds, in which any given data race is only reported once" + int "Duration in milliseconds, in which any given race is only reported once" default 3000 help - Any given data race is only reported once in the defined time window. - Different data races may still generate reports within a duration - that is smaller than the duration defined here. This allows rate - limiting reporting to avoid flooding the console with reports. - Setting this to 0 disables rate limiting. + Any given race is only reported once in the defined time window. + Different races may still generate reports within a duration that is + smaller than the duration defined here. This allows rate limiting + reporting to avoid flooding the console with reports. Setting this + to 0 disables rate limiting. # The main purpose of the below options is to control reported data races (e.g. # in fuzzer configs), and are not expected to be switched frequently by other -- cgit v1.3 From f97f713dc25714ac13f3b5abdeffce2da3f6fe70 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 6 Feb 2020 16:46:25 +0100 Subject: kcsan: Introduce ASSERT_EXCLUSIVE_*() macros Introduces ASSERT_EXCLUSIVE_WRITER() and ASSERT_EXCLUSIVE_ACCESS(), which may be used to assert properties of synchronization logic, where violation cannot be detected as a normal data race. Examples of the reports that may be generated: ================================================================== BUG: KCSAN: assert: race in test_thread / test_thread write to 0xffffffffab3d1540 of 8 bytes by task 466 on cpu 2: test_thread+0x8d/0x111 debugfs_write.cold+0x32/0x44 ... assert no writes to 0xffffffffab3d1540 of 8 bytes by task 464 on cpu 0: test_thread+0xa3/0x111 debugfs_write.cold+0x32/0x44 ... ================================================================== ================================================================== BUG: KCSAN: assert: race in test_thread / test_thread assert no accesses to 0xffffffffab3d1540 of 8 bytes by task 465 on cpu 1: test_thread+0xb9/0x111 debugfs_write.cold+0x32/0x44 ... read to 0xffffffffab3d1540 of 8 bytes by task 464 on cpu 0: test_thread+0x77/0x111 debugfs_write.cold+0x32/0x44 ... ================================================================== Suggested-by: Paul E. McKenney Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/kcsan-checks.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h index 5dcadc221026..cf6961794e9a 100644 --- a/include/linux/kcsan-checks.h +++ b/include/linux/kcsan-checks.h @@ -96,4 +96,44 @@ static inline void kcsan_check_access(const volatile void *ptr, size_t size, kcsan_check_access(ptr, size, KCSAN_ACCESS_ATOMIC | KCSAN_ACCESS_WRITE) #endif +/** + * ASSERT_EXCLUSIVE_WRITER - assert no other threads are writing @var + * + * Assert that there are no other threads writing @var; other readers are + * allowed. This assertion can be used to specify properties of concurrent code, + * where violation cannot be detected as a normal data race. + * + * For example, if a per-CPU variable is only meant to be written by a single + * CPU, but may be read from other CPUs; in this case, reads and writes must be + * marked properly, however, if an off-CPU WRITE_ONCE() races with the owning + * CPU's WRITE_ONCE(), would not constitute a data race but could be a harmful + * race condition. Using this macro allows specifying this property in the code + * and catch such bugs. + * + * @var variable to assert on + */ +#define ASSERT_EXCLUSIVE_WRITER(var) \ + __kcsan_check_access(&(var), sizeof(var), KCSAN_ACCESS_ASSERT) + +/** + * ASSERT_EXCLUSIVE_ACCESS - assert no other threads are accessing @var + * + * Assert that no other thread is accessing @var (no readers nor writers). This + * assertion can be used to specify properties of concurrent code, where + * violation cannot be detected as a normal data race. + * + * For example, in a reference-counting algorithm where exclusive access is + * expected after the refcount reaches 0. We can check that this property + * actually holds as follows: + * + * if (refcount_dec_and_test(&obj->refcnt)) { + * ASSERT_EXCLUSIVE_ACCESS(*obj); + * safely_dispose_of(obj); + * } + * + * @var variable to assert on + */ +#define ASSERT_EXCLUSIVE_ACCESS(var) \ + __kcsan_check_access(&(var), sizeof(var), KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT) + #endif /* _LINUX_KCSAN_CHECKS_H */ -- cgit v1.3 From f0f6928c2c4c19ab6171d4f468f542fac1888a8f Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 11 Feb 2020 17:04:19 +0100 Subject: kcsan: Move interfaces that affects checks to kcsan-checks.h This moves functions that affect state changing the behaviour of kcsan_check_access() to kcsan-checks.h. Since these are likely used with kcsan_check_access() it makes more sense to have them in kcsan-checks.h, to avoid including all of 'include/linux/kcsan.h'. No functional change intended. Acked-by: John Hubbard Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/kcsan-checks.h | 48 ++++++++++++++++++++++++++++++++++++++++++-- include/linux/kcsan.h | 41 ------------------------------------- 2 files changed, 46 insertions(+), 43 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h index cf6961794e9a..8675411c8dbc 100644 --- a/include/linux/kcsan-checks.h +++ b/include/linux/kcsan-checks.h @@ -32,10 +32,54 @@ */ void __kcsan_check_access(const volatile void *ptr, size_t size, int type); -#else +/** + * kcsan_nestable_atomic_begin - begin nestable atomic region + * + * Accesses within the atomic region may appear to race with other accesses but + * should be considered atomic. + */ +void kcsan_nestable_atomic_begin(void); + +/** + * kcsan_nestable_atomic_end - end nestable atomic region + */ +void kcsan_nestable_atomic_end(void); + +/** + * kcsan_flat_atomic_begin - begin flat atomic region + * + * Accesses within the atomic region may appear to race with other accesses but + * should be considered atomic. + */ +void kcsan_flat_atomic_begin(void); + +/** + * kcsan_flat_atomic_end - end flat atomic region + */ +void kcsan_flat_atomic_end(void); + +/** + * kcsan_atomic_next - consider following accesses as atomic + * + * Force treating the next n memory accesses for the current context as atomic + * operations. + * + * @n number of following memory accesses to treat as atomic. + */ +void kcsan_atomic_next(int n); + +#else /* CONFIG_KCSAN */ + static inline void __kcsan_check_access(const volatile void *ptr, size_t size, int type) { } -#endif + +static inline void kcsan_nestable_atomic_begin(void) { } +static inline void kcsan_nestable_atomic_end(void) { } +static inline void kcsan_flat_atomic_begin(void) { } +static inline void kcsan_flat_atomic_end(void) { } +static inline void kcsan_atomic_next(int n) { } + +#endif /* CONFIG_KCSAN */ /* * kcsan_*: Only calls into the runtime when the particular compilation unit has diff --git a/include/linux/kcsan.h b/include/linux/kcsan.h index 1019e3a2c689..7a614ca558f6 100644 --- a/include/linux/kcsan.h +++ b/include/linux/kcsan.h @@ -56,52 +56,11 @@ void kcsan_disable_current(void); */ void kcsan_enable_current(void); -/** - * kcsan_nestable_atomic_begin - begin nestable atomic region - * - * Accesses within the atomic region may appear to race with other accesses but - * should be considered atomic. - */ -void kcsan_nestable_atomic_begin(void); - -/** - * kcsan_nestable_atomic_end - end nestable atomic region - */ -void kcsan_nestable_atomic_end(void); - -/** - * kcsan_flat_atomic_begin - begin flat atomic region - * - * Accesses within the atomic region may appear to race with other accesses but - * should be considered atomic. - */ -void kcsan_flat_atomic_begin(void); - -/** - * kcsan_flat_atomic_end - end flat atomic region - */ -void kcsan_flat_atomic_end(void); - -/** - * kcsan_atomic_next - consider following accesses as atomic - * - * Force treating the next n memory accesses for the current context as atomic - * operations. - * - * @n number of following memory accesses to treat as atomic. - */ -void kcsan_atomic_next(int n); - #else /* CONFIG_KCSAN */ static inline void kcsan_init(void) { } static inline void kcsan_disable_current(void) { } static inline void kcsan_enable_current(void) { } -static inline void kcsan_nestable_atomic_begin(void) { } -static inline void kcsan_nestable_atomic_end(void) { } -static inline void kcsan_flat_atomic_begin(void) { } -static inline void kcsan_flat_atomic_end(void) { } -static inline void kcsan_atomic_next(int n) { } #endif /* CONFIG_KCSAN */ -- cgit v1.3 From b968a08f242d51982e46041c506115b5e11a7570 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 11 Feb 2020 17:04:20 +0100 Subject: compiler.h, seqlock.h: Remove unnecessary kcsan.h includes No we longer have to include kcsan.h, since the required KCSAN interface for both compiler.h and seqlock.h are now provided by kcsan-checks.h. Acked-by: John Hubbard Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 2 -- include/linux/seqlock.h | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index c1bdf37571cb..f504edebd5d7 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -313,8 +313,6 @@ unsigned long read_word_at_a_time(const void *addr) __u.__val; \ }) -#include - /** * data_race - mark an expression as containing intentional data races * diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 239701cae376..8b97204f35a7 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include /* -- cgit v1.3 From 81af89e15862909881ff010a0adb67148487e88a Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 11 Feb 2020 17:04:22 +0100 Subject: kcsan: Add kcsan_set_access_mask() support When setting up an access mask with kcsan_set_access_mask(), KCSAN will only report races if concurrent changes to bits set in access_mask are observed. Conveying access_mask via a separate call avoids introducing overhead in the common-case fast-path. Acked-by: John Hubbard Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/kcsan-checks.h | 11 +++++++++++ include/linux/kcsan.h | 5 +++++ init/init_task.c | 1 + kernel/kcsan/core.c | 43 +++++++++++++++++++++++++++++++++++++++---- kernel/kcsan/kcsan.h | 5 +++++ kernel/kcsan/report.c | 13 ++++++++++++- 6 files changed, 73 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h index 8675411c8dbc..4ef5233ff3f0 100644 --- a/include/linux/kcsan-checks.h +++ b/include/linux/kcsan-checks.h @@ -68,6 +68,16 @@ void kcsan_flat_atomic_end(void); */ void kcsan_atomic_next(int n); +/** + * kcsan_set_access_mask - set access mask + * + * Set the access mask for all accesses for the current context if non-zero. + * Only value changes to bits set in the mask will be reported. + * + * @mask bitmask + */ +void kcsan_set_access_mask(unsigned long mask); + #else /* CONFIG_KCSAN */ static inline void __kcsan_check_access(const volatile void *ptr, size_t size, @@ -78,6 +88,7 @@ static inline void kcsan_nestable_atomic_end(void) { } static inline void kcsan_flat_atomic_begin(void) { } static inline void kcsan_flat_atomic_end(void) { } static inline void kcsan_atomic_next(int n) { } +static inline void kcsan_set_access_mask(unsigned long mask) { } #endif /* CONFIG_KCSAN */ diff --git a/include/linux/kcsan.h b/include/linux/kcsan.h index 7a614ca558f6..3b84606e1e67 100644 --- a/include/linux/kcsan.h +++ b/include/linux/kcsan.h @@ -35,6 +35,11 @@ struct kcsan_ctx { */ int atomic_nest_count; bool in_flat_atomic; + + /* + * Access mask for all accesses if non-zero. + */ + unsigned long access_mask; }; /** diff --git a/init/init_task.c b/init/init_task.c index 2b4fe98b0f09..096191d177d5 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -167,6 +167,7 @@ struct task_struct init_task .atomic_next = 0, .atomic_nest_count = 0, .in_flat_atomic = false, + .access_mask = 0, }, #endif #ifdef CONFIG_TRACE_IRQFLAGS diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index 3f89801161d3..589b1e7f0f25 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -39,6 +39,7 @@ static DEFINE_PER_CPU(struct kcsan_ctx, kcsan_cpu_ctx) = { .atomic_next = 0, .atomic_nest_count = 0, .in_flat_atomic = false, + .access_mask = 0, }; /* @@ -298,6 +299,15 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr, if (!kcsan_is_enabled()) return; + + /* + * The access_mask check relies on value-change comparison. To avoid + * reporting a race where e.g. the writer set up the watchpoint, but the + * reader has access_mask!=0, we have to ignore the found watchpoint. + */ + if (get_ctx()->access_mask != 0) + return; + /* * Consume the watchpoint as soon as possible, to minimize the chances * of !consumed. Consuming the watchpoint must always be guarded by @@ -341,6 +351,7 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) u32 _4; u64 _8; } expect_value; + unsigned long access_mask; enum kcsan_value_change value_change = KCSAN_VALUE_CHANGE_MAYBE; unsigned long ua_flags = user_access_save(); unsigned long irq_flags; @@ -435,18 +446,27 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) * Re-read value, and check if it is as expected; if not, we infer a * racy access. */ + access_mask = get_ctx()->access_mask; switch (size) { case 1: expect_value._1 ^= READ_ONCE(*(const u8 *)ptr); + if (access_mask) + expect_value._1 &= (u8)access_mask; break; case 2: expect_value._2 ^= READ_ONCE(*(const u16 *)ptr); + if (access_mask) + expect_value._2 &= (u16)access_mask; break; case 4: expect_value._4 ^= READ_ONCE(*(const u32 *)ptr); + if (access_mask) + expect_value._4 &= (u32)access_mask; break; case 8: expect_value._8 ^= READ_ONCE(*(const u64 *)ptr); + if (access_mask) + expect_value._8 &= (u64)access_mask; break; default: break; /* ignore; we do not diff the values */ @@ -460,11 +480,20 @@ kcsan_setup_watchpoint(const volatile void *ptr, size_t size, int type) if (!remove_watchpoint(watchpoint)) { /* * Depending on the access type, map a value_change of MAYBE to - * TRUE (require reporting). + * TRUE (always report) or FALSE (never report). */ - if (value_change == KCSAN_VALUE_CHANGE_MAYBE && (size > 8 || is_assert)) { - /* Always assume a value-change. */ - value_change = KCSAN_VALUE_CHANGE_TRUE; + if (value_change == KCSAN_VALUE_CHANGE_MAYBE) { + if (access_mask != 0) { + /* + * For access with access_mask, we require a + * value-change, as it is likely that races on + * ~access_mask bits are expected. + */ + value_change = KCSAN_VALUE_CHANGE_FALSE; + } else if (size > 8 || is_assert) { + /* Always assume a value-change. */ + value_change = KCSAN_VALUE_CHANGE_TRUE; + } } /* @@ -622,6 +651,12 @@ void kcsan_atomic_next(int n) } EXPORT_SYMBOL(kcsan_atomic_next); +void kcsan_set_access_mask(unsigned long mask) +{ + get_ctx()->access_mask = mask; +} +EXPORT_SYMBOL(kcsan_set_access_mask); + void __kcsan_check_access(const volatile void *ptr, size_t size, int type) { check_access(ptr, size, type); diff --git a/kernel/kcsan/kcsan.h b/kernel/kcsan/kcsan.h index 83a79b08b550..892de5120c1b 100644 --- a/kernel/kcsan/kcsan.h +++ b/kernel/kcsan/kcsan.h @@ -98,6 +98,11 @@ enum kcsan_value_change { */ KCSAN_VALUE_CHANGE_MAYBE, + /* + * Did not observe a value-change, and it is invalid to report the race. + */ + KCSAN_VALUE_CHANGE_FALSE, + /* * The value was observed to change, and the race should be reported. */ diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index d871476dc134..11c791b886f3 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -132,6 +132,9 @@ static bool rate_limit_report(unsigned long frame1, unsigned long frame2) static bool skip_report(enum kcsan_value_change value_change, unsigned long top_frame) { + /* Should never get here if value_change==FALSE. */ + WARN_ON_ONCE(value_change == KCSAN_VALUE_CHANGE_FALSE); + /* * The first call to skip_report always has value_change==TRUE, since we * cannot know the value written of an instrumented access. For the 2nd @@ -493,7 +496,15 @@ void kcsan_report(const volatile void *ptr, size_t size, int access_type, kcsan_disable_current(); if (prepare_report(&flags, ptr, size, access_type, cpu_id, type)) { - if (print_report(ptr, size, access_type, value_change, cpu_id, type) && panic_on_warn) + /* + * Never report if value_change is FALSE, only if we it is + * either TRUE or MAYBE. In case of MAYBE, further filtering may + * be done once we know the full stack trace in print_report(). + */ + bool reported = value_change != KCSAN_VALUE_CHANGE_FALSE && + print_report(ptr, size, access_type, value_change, cpu_id, type); + + if (reported && panic_on_warn) panic("panic_on_warn set ...\n"); release_report(&flags, type); -- cgit v1.3 From 703b321501c95c658275fd76775282fe45989641 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 11 Feb 2020 17:04:23 +0100 Subject: kcsan: Introduce ASSERT_EXCLUSIVE_BITS(var, mask) This introduces ASSERT_EXCLUSIVE_BITS(var, mask). ASSERT_EXCLUSIVE_BITS(var, mask) will cause KCSAN to assume that the following access is safe w.r.t. data races (however, please see the docbook comment for disclaimer here). For more context on why this was considered necessary, please see: http://lkml.kernel.org/r/1580995070-25139-1-git-send-email-cai@lca.pw In particular, before this patch, data races between reads (that use @mask bits of an access that should not be modified concurrently) and writes (that change ~@mask bits not used by the readers) would have been annotated with "data_race()" (or "READ_ONCE()"). However, doing so would then hide real problems: we would no longer be able to detect harmful races between reads to @mask bits and writes to @mask bits. Therefore, by using ASSERT_EXCLUSIVE_BITS(var, mask), we accomplish: 1. Avoid proliferation of specific macros at the call sites: by including a single mask in the argument list, we can use the same macro in a wide variety of call sites, regardless of how and which bits in a field each call site actually accesses. 2. The existing code does not need to be modified (although READ_ONCE() may still be advisable if we cannot prove that the data race is always safe). 3. We catch bugs where the exclusive bits are modified concurrently. 4. We document properties of the current code. Acked-by: John Hubbard Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar Cc: David Hildenbrand Cc: Jan Kara Cc: Qian Cai --- include/linux/kcsan-checks.h | 69 ++++++++++++++++++++++++++++++++++++++++---- kernel/kcsan/debugfs.c | 15 +++++++++- 2 files changed, 77 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h index 4ef5233ff3f0..8f9f6e2191dc 100644 --- a/include/linux/kcsan-checks.h +++ b/include/linux/kcsan-checks.h @@ -152,9 +152,9 @@ static inline void kcsan_check_access(const volatile void *ptr, size_t size, #endif /** - * ASSERT_EXCLUSIVE_WRITER - assert no other threads are writing @var + * ASSERT_EXCLUSIVE_WRITER - assert no concurrent writes to @var * - * Assert that there are no other threads writing @var; other readers are + * Assert that there are no concurrent writes to @var; other readers are * allowed. This assertion can be used to specify properties of concurrent code, * where violation cannot be detected as a normal data race. * @@ -171,11 +171,11 @@ static inline void kcsan_check_access(const volatile void *ptr, size_t size, __kcsan_check_access(&(var), sizeof(var), KCSAN_ACCESS_ASSERT) /** - * ASSERT_EXCLUSIVE_ACCESS - assert no other threads are accessing @var + * ASSERT_EXCLUSIVE_ACCESS - assert no concurrent accesses to @var * - * Assert that no other thread is accessing @var (no readers nor writers). This - * assertion can be used to specify properties of concurrent code, where - * violation cannot be detected as a normal data race. + * Assert that there are no concurrent accesses to @var (no readers nor + * writers). This assertion can be used to specify properties of concurrent + * code, where violation cannot be detected as a normal data race. * * For example, in a reference-counting algorithm where exclusive access is * expected after the refcount reaches 0. We can check that this property @@ -191,4 +191,61 @@ static inline void kcsan_check_access(const volatile void *ptr, size_t size, #define ASSERT_EXCLUSIVE_ACCESS(var) \ __kcsan_check_access(&(var), sizeof(var), KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT) +/** + * ASSERT_EXCLUSIVE_BITS - assert no concurrent writes to subset of bits in @var + * + * Bit-granular variant of ASSERT_EXCLUSIVE_WRITER(var). + * + * Assert that there are no concurrent writes to a subset of bits in @var; + * concurrent readers are permitted. This assertion captures more detailed + * bit-level properties, compared to the other (word granularity) assertions. + * Only the bits set in @mask are checked for concurrent modifications, while + * ignoring the remaining bits, i.e. concurrent writes (or reads) to ~@mask bits + * are ignored. + * + * Use this for variables, where some bits must not be modified concurrently, + * yet other bits are expected to be modified concurrently. + * + * For example, variables where, after initialization, some bits are read-only, + * but other bits may still be modified concurrently. A reader may wish to + * assert that this is true as follows: + * + * ASSERT_EXCLUSIVE_BITS(flags, READ_ONLY_MASK); + * foo = (READ_ONCE(flags) & READ_ONLY_MASK) >> READ_ONLY_SHIFT; + * + * Note: The access that immediately follows ASSERT_EXCLUSIVE_BITS() is + * assumed to access the masked bits only, and KCSAN optimistically assumes it + * is therefore safe, even in the presence of data races, and marking it with + * READ_ONCE() is optional from KCSAN's point-of-view. We caution, however, + * that it may still be advisable to do so, since we cannot reason about all + * compiler optimizations when it comes to bit manipulations (on the reader + * and writer side). If you are sure nothing can go wrong, we can write the + * above simply as: + * + * ASSERT_EXCLUSIVE_BITS(flags, READ_ONLY_MASK); + * foo = (flags & READ_ONLY_MASK) >> READ_ONLY_SHIFT; + * + * Another example, where this may be used, is when certain bits of @var may + * only be modified when holding the appropriate lock, but other bits may still + * be modified concurrently. Writers, where other bits may change concurrently, + * could use the assertion as follows: + * + * spin_lock(&foo_lock); + * ASSERT_EXCLUSIVE_BITS(flags, FOO_MASK); + * old_flags = READ_ONCE(flags); + * new_flags = (old_flags & ~FOO_MASK) | (new_foo << FOO_SHIFT); + * if (cmpxchg(&flags, old_flags, new_flags) != old_flags) { ... } + * spin_unlock(&foo_lock); + * + * @var variable to assert on + * @mask only check for modifications to bits set in @mask + */ +#define ASSERT_EXCLUSIVE_BITS(var, mask) \ + do { \ + kcsan_set_access_mask(mask); \ + __kcsan_check_access(&(var), sizeof(var), KCSAN_ACCESS_ASSERT);\ + kcsan_set_access_mask(0); \ + kcsan_atomic_next(1); \ + } while (0) + #endif /* _LINUX_KCSAN_CHECKS_H */ diff --git a/kernel/kcsan/debugfs.c b/kernel/kcsan/debugfs.c index 9bbba0e57c9b..2ff196123977 100644 --- a/kernel/kcsan/debugfs.c +++ b/kernel/kcsan/debugfs.c @@ -100,8 +100,10 @@ static noinline void microbenchmark(unsigned long iters) * debugfs file from multiple tasks to generate real conflicts and show reports. */ static long test_dummy; +static long test_flags; static noinline void test_thread(unsigned long iters) { + const long CHANGE_BITS = 0xff00ff00ff00ff00L; const struct kcsan_ctx ctx_save = current->kcsan_ctx; cycles_t cycles; @@ -109,16 +111,27 @@ static noinline void test_thread(unsigned long iters) memset(¤t->kcsan_ctx, 0, sizeof(current->kcsan_ctx)); pr_info("KCSAN: %s begin | iters: %lu\n", __func__, iters); + pr_info("test_dummy@%px, test_flags@%px\n", &test_dummy, &test_flags); cycles = get_cycles(); while (iters--) { + /* These all should generate reports. */ __kcsan_check_read(&test_dummy, sizeof(test_dummy)); - __kcsan_check_write(&test_dummy, sizeof(test_dummy)); ASSERT_EXCLUSIVE_WRITER(test_dummy); ASSERT_EXCLUSIVE_ACCESS(test_dummy); + ASSERT_EXCLUSIVE_BITS(test_flags, ~CHANGE_BITS); /* no report */ + __kcsan_check_read(&test_flags, sizeof(test_flags)); /* no report */ + + ASSERT_EXCLUSIVE_BITS(test_flags, CHANGE_BITS); /* report */ + __kcsan_check_read(&test_flags, sizeof(test_flags)); /* no report */ + /* not actually instrumented */ WRITE_ONCE(test_dummy, iters); /* to observe value-change */ + __kcsan_check_write(&test_dummy, sizeof(test_dummy)); + + test_flags ^= CHANGE_BITS; /* generate value-change */ + __kcsan_check_write(&test_flags, sizeof(test_flags)); } cycles = get_cycles() - cycles; -- cgit v1.3 From 1443b8c9e712ef8914a2cab9ae7ce133229ed96c Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 5 Mar 2020 15:21:09 +0100 Subject: kcsan: Update API documentation in kcsan-checks.h Update the API documentation for ASSERT_EXCLUSIVE_* macros and make them generate readable documentation for the code examples. All @variable short summaries were missing ':', which was updated for the whole file. Tested with "make htmldocs". Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- include/linux/kcsan-checks.h | 98 +++++++++++++++++++++++++++----------------- 1 file changed, 61 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h index 8f9f6e2191dc..3cd8bb03eb41 100644 --- a/include/linux/kcsan-checks.h +++ b/include/linux/kcsan-checks.h @@ -26,9 +26,9 @@ /** * __kcsan_check_access - check generic access for races * - * @ptr address of access - * @size size of access - * @type access type modifier + * @ptr: address of access + * @size: size of access + * @type: access type modifier */ void __kcsan_check_access(const volatile void *ptr, size_t size, int type); @@ -64,7 +64,7 @@ void kcsan_flat_atomic_end(void); * Force treating the next n memory accesses for the current context as atomic * operations. * - * @n number of following memory accesses to treat as atomic. + * @n: number of following memory accesses to treat as atomic. */ void kcsan_atomic_next(int n); @@ -74,7 +74,7 @@ void kcsan_atomic_next(int n); * Set the access mask for all accesses for the current context if non-zero. * Only value changes to bits set in the mask will be reported. * - * @mask bitmask + * @mask: bitmask */ void kcsan_set_access_mask(unsigned long mask); @@ -106,16 +106,16 @@ static inline void kcsan_check_access(const volatile void *ptr, size_t size, /** * __kcsan_check_read - check regular read access for races * - * @ptr address of access - * @size size of access + * @ptr: address of access + * @size: size of access */ #define __kcsan_check_read(ptr, size) __kcsan_check_access(ptr, size, 0) /** * __kcsan_check_write - check regular write access for races * - * @ptr address of access - * @size size of access + * @ptr: address of access + * @size: size of access */ #define __kcsan_check_write(ptr, size) \ __kcsan_check_access(ptr, size, KCSAN_ACCESS_WRITE) @@ -123,16 +123,16 @@ static inline void kcsan_check_access(const volatile void *ptr, size_t size, /** * kcsan_check_read - check regular read access for races * - * @ptr address of access - * @size size of access + * @ptr: address of access + * @size: size of access */ #define kcsan_check_read(ptr, size) kcsan_check_access(ptr, size, 0) /** * kcsan_check_write - check regular write access for races * - * @ptr address of access - * @size size of access + * @ptr: address of access + * @size: size of access */ #define kcsan_check_write(ptr, size) \ kcsan_check_access(ptr, size, KCSAN_ACCESS_WRITE) @@ -158,14 +158,26 @@ static inline void kcsan_check_access(const volatile void *ptr, size_t size, * allowed. This assertion can be used to specify properties of concurrent code, * where violation cannot be detected as a normal data race. * - * For example, if a per-CPU variable is only meant to be written by a single - * CPU, but may be read from other CPUs; in this case, reads and writes must be - * marked properly, however, if an off-CPU WRITE_ONCE() races with the owning - * CPU's WRITE_ONCE(), would not constitute a data race but could be a harmful - * race condition. Using this macro allows specifying this property in the code - * and catch such bugs. + * For example, if we only have a single writer, but multiple concurrent + * readers, to avoid data races, all these accesses must be marked; even + * concurrent marked writes racing with the single writer are bugs. + * Unfortunately, due to being marked, they are no longer data races. For cases + * like these, we can use the macro as follows: * - * @var variable to assert on + * .. code-block:: c + * + * void writer(void) { + * spin_lock(&update_foo_lock); + * ASSERT_EXCLUSIVE_WRITER(shared_foo); + * WRITE_ONCE(shared_foo, ...); + * spin_unlock(&update_foo_lock); + * } + * void reader(void) { + * // update_foo_lock does not need to be held! + * ... = READ_ONCE(shared_foo); + * } + * + * @var: variable to assert on */ #define ASSERT_EXCLUSIVE_WRITER(var) \ __kcsan_check_access(&(var), sizeof(var), KCSAN_ACCESS_ASSERT) @@ -177,16 +189,22 @@ static inline void kcsan_check_access(const volatile void *ptr, size_t size, * writers). This assertion can be used to specify properties of concurrent * code, where violation cannot be detected as a normal data race. * - * For example, in a reference-counting algorithm where exclusive access is - * expected after the refcount reaches 0. We can check that this property - * actually holds as follows: + * For example, where exclusive access is expected after determining no other + * users of an object are left, but the object is not actually freed. We can + * check that this property actually holds as follows: + * + * .. code-block:: c * * if (refcount_dec_and_test(&obj->refcnt)) { * ASSERT_EXCLUSIVE_ACCESS(*obj); - * safely_dispose_of(obj); + * do_some_cleanup(obj); + * release_for_reuse(obj); * } * - * @var variable to assert on + * Note: For cases where the object is freed, `KASAN `_ is a better + * fit to detect use-after-free bugs. + * + * @var: variable to assert on */ #define ASSERT_EXCLUSIVE_ACCESS(var) \ __kcsan_check_access(&(var), sizeof(var), KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT) @@ -200,7 +218,7 @@ static inline void kcsan_check_access(const volatile void *ptr, size_t size, * concurrent readers are permitted. This assertion captures more detailed * bit-level properties, compared to the other (word granularity) assertions. * Only the bits set in @mask are checked for concurrent modifications, while - * ignoring the remaining bits, i.e. concurrent writes (or reads) to ~@mask bits + * ignoring the remaining bits, i.e. concurrent writes (or reads) to ~mask bits * are ignored. * * Use this for variables, where some bits must not be modified concurrently, @@ -210,17 +228,21 @@ static inline void kcsan_check_access(const volatile void *ptr, size_t size, * but other bits may still be modified concurrently. A reader may wish to * assert that this is true as follows: * + * .. code-block:: c + * * ASSERT_EXCLUSIVE_BITS(flags, READ_ONLY_MASK); * foo = (READ_ONCE(flags) & READ_ONLY_MASK) >> READ_ONLY_SHIFT; * - * Note: The access that immediately follows ASSERT_EXCLUSIVE_BITS() is - * assumed to access the masked bits only, and KCSAN optimistically assumes it - * is therefore safe, even in the presence of data races, and marking it with - * READ_ONCE() is optional from KCSAN's point-of-view. We caution, however, - * that it may still be advisable to do so, since we cannot reason about all - * compiler optimizations when it comes to bit manipulations (on the reader - * and writer side). If you are sure nothing can go wrong, we can write the - * above simply as: + * Note: The access that immediately follows ASSERT_EXCLUSIVE_BITS() is assumed + * to access the masked bits only, and KCSAN optimistically assumes it is + * therefore safe, even in the presence of data races, and marking it with + * READ_ONCE() is optional from KCSAN's point-of-view. We caution, however, that + * it may still be advisable to do so, since we cannot reason about all compiler + * optimizations when it comes to bit manipulations (on the reader and writer + * side). If you are sure nothing can go wrong, we can write the above simply + * as: + * + * .. code-block:: c * * ASSERT_EXCLUSIVE_BITS(flags, READ_ONLY_MASK); * foo = (flags & READ_ONLY_MASK) >> READ_ONLY_SHIFT; @@ -230,15 +252,17 @@ static inline void kcsan_check_access(const volatile void *ptr, size_t size, * be modified concurrently. Writers, where other bits may change concurrently, * could use the assertion as follows: * + * .. code-block:: c + * * spin_lock(&foo_lock); * ASSERT_EXCLUSIVE_BITS(flags, FOO_MASK); - * old_flags = READ_ONCE(flags); + * old_flags = flags; * new_flags = (old_flags & ~FOO_MASK) | (new_foo << FOO_SHIFT); * if (cmpxchg(&flags, old_flags, new_flags) != old_flags) { ... } * spin_unlock(&foo_lock); * - * @var variable to assert on - * @mask only check for modifications to bits set in @mask + * @var: variable to assert on + * @mask: only check for modifications to bits set in @mask */ #define ASSERT_EXCLUSIVE_BITS(var, mask) \ do { \ -- cgit v1.3 From 757a4cefde76697af2b2c284c8a320912b77e7e6 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 25 Mar 2020 17:41:56 +0100 Subject: kcsan: Add support for scoped accesses This adds support for scoped accesses, where the memory range is checked for the duration of the scope. The feature is implemented by inserting the relevant access information into a list of scoped accesses for the current execution context, which are then checked (until removed) on every call (through instrumentation) into the KCSAN runtime. An alternative, more complex, implementation could set up a watchpoint for the scoped access, and keep the watchpoint set up. This, however, would require first exposing a handle to the watchpoint, as well as dealing with cases such as accesses by the same thread while the watchpoint is still set up (and several more cases). It is also doubtful if this would provide any benefit, since the majority of delay where the watchpoint is set up is likely due to the injected delays by KCSAN. Therefore, the implementation in this patch is simpler and avoids hurting KCSAN's main use-case (normal data race detection); it also implicitly increases scoped-access race-detection-ability due to increased probability of setting up watchpoints by repeatedly calling __kcsan_check_access() throughout the scope of the access. The implementation required adding an additional conditional branch to the fast-path. However, the microbenchmark showed a *speedup* of ~5% on the fast-path. This appears to be due to subtly improved codegen by GCC from moving get_ctx() and associated load of preempt_count earlier. Suggested-by: Boqun Feng Suggested-by: Paul E. McKenney Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- include/linux/kcsan-checks.h | 57 ++++++++++++++++++++++++++++++ include/linux/kcsan.h | 3 ++ init/init_task.c | 1 + kernel/kcsan/core.c | 83 +++++++++++++++++++++++++++++++++++++++----- kernel/kcsan/report.c | 33 ++++++++++++------ 5 files changed, 158 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h index 3cd8bb03eb41..b24253d3a442 100644 --- a/include/linux/kcsan-checks.h +++ b/include/linux/kcsan-checks.h @@ -3,6 +3,8 @@ #ifndef _LINUX_KCSAN_CHECKS_H #define _LINUX_KCSAN_CHECKS_H +/* Note: Only include what is already included by compiler.h. */ +#include #include /* @@ -12,10 +14,12 @@ * WRITE : write access; * ATOMIC: access is atomic; * ASSERT: access is not a regular access, but an assertion; + * SCOPED: access is a scoped access; */ #define KCSAN_ACCESS_WRITE 0x1 #define KCSAN_ACCESS_ATOMIC 0x2 #define KCSAN_ACCESS_ASSERT 0x4 +#define KCSAN_ACCESS_SCOPED 0x8 /* * __kcsan_*: Always calls into the runtime when KCSAN is enabled. This may be used @@ -78,6 +82,52 @@ void kcsan_atomic_next(int n); */ void kcsan_set_access_mask(unsigned long mask); +/* Scoped access information. */ +struct kcsan_scoped_access { + struct list_head list; + const volatile void *ptr; + size_t size; + int type; +}; +/* + * Automatically call kcsan_end_scoped_access() when kcsan_scoped_access goes + * out of scope; relies on attribute "cleanup", which is supported by all + * compilers that support KCSAN. + */ +#define __kcsan_cleanup_scoped \ + __maybe_unused __attribute__((__cleanup__(kcsan_end_scoped_access))) + +/** + * kcsan_begin_scoped_access - begin scoped access + * + * Begin scoped access and initialize @sa, which will cause KCSAN to + * continuously check the memory range in the current thread until + * kcsan_end_scoped_access() is called for @sa. + * + * Scoped accesses are implemented by appending @sa to an internal list for the + * current execution context, and then checked on every call into the KCSAN + * runtime. + * + * @ptr: address of access + * @size: size of access + * @type: access type modifier + * @sa: struct kcsan_scoped_access to use for the scope of the access + */ +struct kcsan_scoped_access * +kcsan_begin_scoped_access(const volatile void *ptr, size_t size, int type, + struct kcsan_scoped_access *sa); + +/** + * kcsan_end_scoped_access - end scoped access + * + * End a scoped access, which will stop KCSAN checking the memory range. + * Requires that kcsan_begin_scoped_access() was previously called once for @sa. + * + * @sa: a previously initialized struct kcsan_scoped_access + */ +void kcsan_end_scoped_access(struct kcsan_scoped_access *sa); + + #else /* CONFIG_KCSAN */ static inline void __kcsan_check_access(const volatile void *ptr, size_t size, @@ -90,6 +140,13 @@ static inline void kcsan_flat_atomic_end(void) { } static inline void kcsan_atomic_next(int n) { } static inline void kcsan_set_access_mask(unsigned long mask) { } +struct kcsan_scoped_access { }; +#define __kcsan_cleanup_scoped __maybe_unused +static inline struct kcsan_scoped_access * +kcsan_begin_scoped_access(const volatile void *ptr, size_t size, int type, + struct kcsan_scoped_access *sa) { return sa; } +static inline void kcsan_end_scoped_access(struct kcsan_scoped_access *sa) { } + #endif /* CONFIG_KCSAN */ /* diff --git a/include/linux/kcsan.h b/include/linux/kcsan.h index 3b84606e1e67..17ae59e4b685 100644 --- a/include/linux/kcsan.h +++ b/include/linux/kcsan.h @@ -40,6 +40,9 @@ struct kcsan_ctx { * Access mask for all accesses if non-zero. */ unsigned long access_mask; + + /* List of scoped accesses. */ + struct list_head scoped_accesses; }; /** diff --git a/init/init_task.c b/init/init_task.c index 096191d177d5..198943851caf 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -168,6 +168,7 @@ struct task_struct init_task .atomic_nest_count = 0, .in_flat_atomic = false, .access_mask = 0, + .scoped_accesses = {LIST_POISON1, NULL}, }, #endif #ifdef CONFIG_TRACE_IRQFLAGS diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index 4d8ea0fca5f1..a572aae61b98 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -42,6 +43,7 @@ static DEFINE_PER_CPU(struct kcsan_ctx, kcsan_cpu_ctx) = { .atomic_nest_count = 0, .in_flat_atomic = false, .access_mask = 0, + .scoped_accesses = {LIST_POISON1, NULL}, }; /* @@ -191,12 +193,23 @@ static __always_inline struct kcsan_ctx *get_ctx(void) return in_task() ? ¤t->kcsan_ctx : raw_cpu_ptr(&kcsan_cpu_ctx); } +/* Check scoped accesses; never inline because this is a slow-path! */ +static noinline void kcsan_check_scoped_accesses(void) +{ + struct kcsan_ctx *ctx = get_ctx(); + struct list_head *prev_save = ctx->scoped_accesses.prev; + struct kcsan_scoped_access *scoped_access; + + ctx->scoped_accesses.prev = NULL; /* Avoid recursion. */ + list_for_each_entry(scoped_access, &ctx->scoped_accesses, list) + __kcsan_check_access(scoped_access->ptr, scoped_access->size, scoped_access->type); + ctx->scoped_accesses.prev = prev_save; +} + /* Rules for generic atomic accesses. Called from fast-path. */ static __always_inline bool -is_atomic(const volatile void *ptr, size_t size, int type) +is_atomic(const volatile void *ptr, size_t size, int type, struct kcsan_ctx *ctx) { - struct kcsan_ctx *ctx; - if (type & KCSAN_ACCESS_ATOMIC) return true; @@ -213,7 +226,6 @@ is_atomic(const volatile void *ptr, size_t size, int type) IS_ALIGNED((unsigned long)ptr, size)) return true; /* Assume aligned writes up to word size are atomic. */ - ctx = get_ctx(); if (ctx->atomic_next > 0) { /* * Because we do not have separate contexts for nested @@ -233,7 +245,7 @@ is_atomic(const volatile void *ptr, size_t size, int type) } static __always_inline bool -should_watch(const volatile void *ptr, size_t size, int type) +should_watch(const volatile void *ptr, size_t size, int type, struct kcsan_ctx *ctx) { /* * Never set up watchpoints when memory operations are atomic. @@ -242,7 +254,7 @@ should_watch(const volatile void *ptr, size_t size, int type) * should not count towards skipped instructions, and (2) to actually * decrement kcsan_atomic_next for consecutive instruction stream. */ - if (is_atomic(ptr, size, type)) + if (is_atomic(ptr, size, type, ctx)) return false; if (this_cpu_dec_return(kcsan_skip) >= 0) @@ -563,8 +575,14 @@ static __always_inline void check_access(const volatile void *ptr, size_t size, if (unlikely(watchpoint != NULL)) kcsan_found_watchpoint(ptr, size, type, watchpoint, encoded_watchpoint); - else if (unlikely(should_watch(ptr, size, type))) - kcsan_setup_watchpoint(ptr, size, type); + else { + struct kcsan_ctx *ctx = get_ctx(); /* Call only once in fast-path. */ + + if (unlikely(should_watch(ptr, size, type, ctx))) + kcsan_setup_watchpoint(ptr, size, type); + else if (unlikely(ctx->scoped_accesses.prev)) + kcsan_check_scoped_accesses(); + } } /* === Public interface ===================================================== */ @@ -660,6 +678,55 @@ void kcsan_set_access_mask(unsigned long mask) } EXPORT_SYMBOL(kcsan_set_access_mask); +struct kcsan_scoped_access * +kcsan_begin_scoped_access(const volatile void *ptr, size_t size, int type, + struct kcsan_scoped_access *sa) +{ + struct kcsan_ctx *ctx = get_ctx(); + + __kcsan_check_access(ptr, size, type); + + ctx->disable_count++; /* Disable KCSAN, in case list debugging is on. */ + + INIT_LIST_HEAD(&sa->list); + sa->ptr = ptr; + sa->size = size; + sa->type = type; + + if (!ctx->scoped_accesses.prev) /* Lazy initialize list head. */ + INIT_LIST_HEAD(&ctx->scoped_accesses); + list_add(&sa->list, &ctx->scoped_accesses); + + ctx->disable_count--; + return sa; +} +EXPORT_SYMBOL(kcsan_begin_scoped_access); + +void kcsan_end_scoped_access(struct kcsan_scoped_access *sa) +{ + struct kcsan_ctx *ctx = get_ctx(); + + if (WARN(!ctx->scoped_accesses.prev, "Unbalanced %s()?", __func__)) + return; + + ctx->disable_count++; /* Disable KCSAN, in case list debugging is on. */ + + list_del(&sa->list); + if (list_empty(&ctx->scoped_accesses)) + /* + * Ensure we do not enter kcsan_check_scoped_accesses() + * slow-path if unnecessary, and avoids requiring list_empty() + * in the fast-path (to avoid a READ_ONCE() and potential + * uaccess warning). + */ + ctx->scoped_accesses.prev = NULL; + + ctx->disable_count--; + + __kcsan_check_access(sa->ptr, sa->size, sa->type); +} +EXPORT_SYMBOL(kcsan_end_scoped_access); + void __kcsan_check_access(const volatile void *ptr, size_t size, int type) { check_access(ptr, size, type); diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index ae0a383238ea..ddc18f1224a4 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -205,6 +205,20 @@ skip_report(enum kcsan_value_change value_change, unsigned long top_frame) static const char *get_access_type(int type) { + if (type & KCSAN_ACCESS_ASSERT) { + if (type & KCSAN_ACCESS_SCOPED) { + if (type & KCSAN_ACCESS_WRITE) + return "assert no accesses (scoped)"; + else + return "assert no writes (scoped)"; + } else { + if (type & KCSAN_ACCESS_WRITE) + return "assert no accesses"; + else + return "assert no writes"; + } + } + switch (type) { case 0: return "read"; @@ -214,17 +228,14 @@ static const char *get_access_type(int type) return "write"; case KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: return "write (marked)"; - - /* - * ASSERT variants: - */ - case KCSAN_ACCESS_ASSERT: - case KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_ATOMIC: - return "assert no writes"; - case KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE: - case KCSAN_ACCESS_ASSERT | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: - return "assert no accesses"; - + case KCSAN_ACCESS_SCOPED: + return "read (scoped)"; + case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_ATOMIC: + return "read (marked, scoped)"; + case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE: + return "write (scoped)"; + case KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC: + return "write (marked, scoped)"; default: BUG(); } -- cgit v1.3 From d8949ef1d9f1062848cd068cf369a57ce33dae6f Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 25 Mar 2020 17:41:58 +0100 Subject: kcsan: Introduce scoped ASSERT_EXCLUSIVE macros Introduce ASSERT_EXCLUSIVE_*_SCOPED(), which provide an intuitive interface to use the scoped-access feature, without having to explicitly mark the start and end of the desired scope. Basing duration of the checks on scope avoids accidental misuse and resulting false positives, which may be hard to debug. See added comments for usage. The macros are implemented using __attribute__((__cleanup__(func))), which is supported by all compilers that currently support KCSAN. Suggested-by: Boqun Feng Suggested-by: Paul E. McKenney Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- Documentation/dev-tools/kcsan.rst | 3 +- include/linux/kcsan-checks.h | 73 ++++++++++++++++++++++++++++++++++++++- kernel/kcsan/debugfs.c | 16 ++++++++- 3 files changed, 89 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/Documentation/dev-tools/kcsan.rst b/Documentation/dev-tools/kcsan.rst index 52a5d6fb9701..f4b5766f12cc 100644 --- a/Documentation/dev-tools/kcsan.rst +++ b/Documentation/dev-tools/kcsan.rst @@ -238,7 +238,8 @@ are defined at the C-language level. The following macros can be used to check properties of concurrent code where bugs would not manifest as data races. .. kernel-doc:: include/linux/kcsan-checks.h - :functions: ASSERT_EXCLUSIVE_WRITER ASSERT_EXCLUSIVE_ACCESS + :functions: ASSERT_EXCLUSIVE_WRITER ASSERT_EXCLUSIVE_WRITER_SCOPED + ASSERT_EXCLUSIVE_ACCESS ASSERT_EXCLUSIVE_ACCESS_SCOPED ASSERT_EXCLUSIVE_BITS Implementation Details diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h index b24253d3a442..101df7f46d89 100644 --- a/include/linux/kcsan-checks.h +++ b/include/linux/kcsan-checks.h @@ -234,11 +234,63 @@ static inline void kcsan_check_access(const volatile void *ptr, size_t size, * ... = READ_ONCE(shared_foo); * } * + * Note: ASSERT_EXCLUSIVE_WRITER_SCOPED(), if applicable, performs more thorough + * checking if a clear scope where no concurrent writes are expected exists. + * * @var: variable to assert on */ #define ASSERT_EXCLUSIVE_WRITER(var) \ __kcsan_check_access(&(var), sizeof(var), KCSAN_ACCESS_ASSERT) +/* + * Helper macros for implementation of for ASSERT_EXCLUSIVE_*_SCOPED(). @id is + * expected to be unique for the scope in which instances of kcsan_scoped_access + * are declared. + */ +#define __kcsan_scoped_name(c, suffix) __kcsan_scoped_##c##suffix +#define __ASSERT_EXCLUSIVE_SCOPED(var, type, id) \ + struct kcsan_scoped_access __kcsan_scoped_name(id, _) \ + __kcsan_cleanup_scoped; \ + struct kcsan_scoped_access *__kcsan_scoped_name(id, _dummy_p) \ + __maybe_unused = kcsan_begin_scoped_access( \ + &(var), sizeof(var), KCSAN_ACCESS_SCOPED | (type), \ + &__kcsan_scoped_name(id, _)) + +/** + * ASSERT_EXCLUSIVE_WRITER_SCOPED - assert no concurrent writes to @var in scope + * + * Scoped variant of ASSERT_EXCLUSIVE_WRITER(). + * + * Assert that there are no concurrent writes to @var for the duration of the + * scope in which it is introduced. This provides a better way to fully cover + * the enclosing scope, compared to multiple ASSERT_EXCLUSIVE_WRITER(), and + * increases the likelihood for KCSAN to detect racing accesses. + * + * For example, it allows finding race-condition bugs that only occur due to + * state changes within the scope itself: + * + * .. code-block:: c + * + * void writer(void) { + * spin_lock(&update_foo_lock); + * { + * ASSERT_EXCLUSIVE_WRITER_SCOPED(shared_foo); + * WRITE_ONCE(shared_foo, 42); + * ... + * // shared_foo should still be 42 here! + * } + * spin_unlock(&update_foo_lock); + * } + * void buggy(void) { + * if (READ_ONCE(shared_foo) == 42) + * WRITE_ONCE(shared_foo, 1); // bug! + * } + * + * @var: variable to assert on + */ +#define ASSERT_EXCLUSIVE_WRITER_SCOPED(var) \ + __ASSERT_EXCLUSIVE_SCOPED(var, KCSAN_ACCESS_ASSERT, __COUNTER__) + /** * ASSERT_EXCLUSIVE_ACCESS - assert no concurrent accesses to @var * @@ -258,6 +310,9 @@ static inline void kcsan_check_access(const volatile void *ptr, size_t size, * release_for_reuse(obj); * } * + * Note: ASSERT_EXCLUSIVE_ACCESS_SCOPED(), if applicable, performs more thorough + * checking if a clear scope where no concurrent accesses are expected exists. + * * Note: For cases where the object is freed, `KASAN `_ is a better * fit to detect use-after-free bugs. * @@ -266,10 +321,26 @@ static inline void kcsan_check_access(const volatile void *ptr, size_t size, #define ASSERT_EXCLUSIVE_ACCESS(var) \ __kcsan_check_access(&(var), sizeof(var), KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT) +/** + * ASSERT_EXCLUSIVE_ACCESS_SCOPED - assert no concurrent accesses to @var in scope + * + * Scoped variant of ASSERT_EXCLUSIVE_ACCESS(). + * + * Assert that there are no concurrent accesses to @var (no readers nor writers) + * for the entire duration of the scope in which it is introduced. This provides + * a better way to fully cover the enclosing scope, compared to multiple + * ASSERT_EXCLUSIVE_ACCESS(), and increases the likelihood for KCSAN to detect + * racing accesses. + * + * @var: variable to assert on + */ +#define ASSERT_EXCLUSIVE_ACCESS_SCOPED(var) \ + __ASSERT_EXCLUSIVE_SCOPED(var, KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT, __COUNTER__) + /** * ASSERT_EXCLUSIVE_BITS - assert no concurrent writes to subset of bits in @var * - * Bit-granular variant of ASSERT_EXCLUSIVE_WRITER(var). + * Bit-granular variant of ASSERT_EXCLUSIVE_WRITER(). * * Assert that there are no concurrent writes to a subset of bits in @var; * concurrent readers are permitted. This assertion captures more detailed diff --git a/kernel/kcsan/debugfs.c b/kernel/kcsan/debugfs.c index 72ee188ebc54..1a08664a7fab 100644 --- a/kernel/kcsan/debugfs.c +++ b/kernel/kcsan/debugfs.c @@ -110,6 +110,7 @@ static noinline void microbenchmark(unsigned long iters) */ static long test_dummy; static long test_flags; +static long test_scoped; static noinline void test_thread(unsigned long iters) { const long CHANGE_BITS = 0xff00ff00ff00ff00L; @@ -120,7 +121,8 @@ static noinline void test_thread(unsigned long iters) memset(¤t->kcsan_ctx, 0, sizeof(current->kcsan_ctx)); pr_info("KCSAN: %s begin | iters: %lu\n", __func__, iters); - pr_info("test_dummy@%px, test_flags@%px\n", &test_dummy, &test_flags); + pr_info("test_dummy@%px, test_flags@%px, test_scoped@%px,\n", + &test_dummy, &test_flags, &test_scoped); cycles = get_cycles(); while (iters--) { @@ -141,6 +143,18 @@ static noinline void test_thread(unsigned long iters) test_flags ^= CHANGE_BITS; /* generate value-change */ __kcsan_check_write(&test_flags, sizeof(test_flags)); + + BUG_ON(current->kcsan_ctx.scoped_accesses.prev); + { + /* Should generate reports anywhere in this block. */ + ASSERT_EXCLUSIVE_WRITER_SCOPED(test_scoped); + ASSERT_EXCLUSIVE_ACCESS_SCOPED(test_scoped); + BUG_ON(!current->kcsan_ctx.scoped_accesses.prev); + /* Unrelated accesses. */ + __kcsan_check_access(&cycles, sizeof(cycles), 0); + __kcsan_check_access(&cycles, sizeof(cycles), KCSAN_ACCESS_ATOMIC); + } + BUG_ON(current->kcsan_ctx.scoped_accesses.prev); } cycles = get_cycles() - cycles; -- cgit v1.3 From 01b4ff58f72dbee926077d9afa0650f6e685e866 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 31 Mar 2020 21:32:32 +0200 Subject: kcsan: Move kcsan_{disable,enable}_current() to kcsan-checks.h Both affect access checks, and should therefore be in kcsan-checks.h. This is in preparation to use these in compiler.h. Acked-by: Will Deacon Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- include/linux/kcsan-checks.h | 16 ++++++++++++++++ include/linux/kcsan.h | 16 ---------------- 2 files changed, 16 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h index 101df7f46d89..ef95ddc49182 100644 --- a/include/linux/kcsan-checks.h +++ b/include/linux/kcsan-checks.h @@ -36,6 +36,20 @@ */ void __kcsan_check_access(const volatile void *ptr, size_t size, int type); +/** + * kcsan_disable_current - disable KCSAN for the current context + * + * Supports nesting. + */ +void kcsan_disable_current(void); + +/** + * kcsan_enable_current - re-enable KCSAN for the current context + * + * Supports nesting. + */ +void kcsan_enable_current(void); + /** * kcsan_nestable_atomic_begin - begin nestable atomic region * @@ -133,6 +147,8 @@ void kcsan_end_scoped_access(struct kcsan_scoped_access *sa); static inline void __kcsan_check_access(const volatile void *ptr, size_t size, int type) { } +static inline void kcsan_disable_current(void) { } +static inline void kcsan_enable_current(void) { } static inline void kcsan_nestable_atomic_begin(void) { } static inline void kcsan_nestable_atomic_end(void) { } static inline void kcsan_flat_atomic_begin(void) { } diff --git a/include/linux/kcsan.h b/include/linux/kcsan.h index 17ae59e4b685..53340d8789f9 100644 --- a/include/linux/kcsan.h +++ b/include/linux/kcsan.h @@ -50,25 +50,9 @@ struct kcsan_ctx { */ void kcsan_init(void); -/** - * kcsan_disable_current - disable KCSAN for the current context - * - * Supports nesting. - */ -void kcsan_disable_current(void); - -/** - * kcsan_enable_current - re-enable KCSAN for the current context - * - * Supports nesting. - */ -void kcsan_enable_current(void); - #else /* CONFIG_KCSAN */ static inline void kcsan_init(void) { } -static inline void kcsan_disable_current(void) { } -static inline void kcsan_enable_current(void) { } #endif /* CONFIG_KCSAN */ -- cgit v1.3 From d071e91361bbfef524ae8abf7e560fb294d0ad64 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 31 Mar 2020 21:32:33 +0200 Subject: kcsan: Change data_race() to no longer require marking racing accesses Thus far, accesses marked with data_race() would still require the racing access to be marked in some way (be it with READ_ONCE(), WRITE_ONCE(), or data_race() itself), as otherwise KCSAN would still report a data race. This requirement, however, seems to be unintuitive, and some valid use-cases demand *not* marking other accesses, as it might hide more serious bugs (e.g. diagnostic reads). Therefore, this commit changes data_race() to no longer require marking racing accesses (although it's still recommended if possible). The alternative would have been introducing another variant of data_race(), however, since usage of data_race() already needs to be carefully reasoned about, distinguishing between these cases likely adds more complexity in the wrong place. Link: https://lkml.kernel.org/r/20200331131002.GA30975@willie-the-truck Cc: Paul E. McKenney Cc: Will Deacon Cc: Qian Cai Acked-by: Will Deacon Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- include/linux/compiler.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index f504edebd5d7..1729bd17e9b7 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -326,9 +326,9 @@ unsigned long read_word_at_a_time(const void *addr) #define data_race(expr) \ ({ \ typeof(({ expr; })) __val; \ - kcsan_nestable_atomic_begin(); \ + kcsan_disable_current(); \ __val = ({ expr; }); \ - kcsan_nestable_atomic_end(); \ + kcsan_enable_current(); \ __val; \ }) #else -- cgit v1.3 From bceb5646a15dad49ceb29ec16b8acc10075d0627 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 31 Mar 2020 18:54:48 +0200 Subject: thermal: core: Make thermal_zone_set_trips private The function thermal_zone_set_trips() is used by the thermal core code in order to update the next trip points, there are no other users. Move the function definition in the thermal_core.h, remove the EXPORT_SYMBOL_GPL and document the function. Signed-off-by: Daniel Lezcano Reviewed-by: Lukasz Luba Reviewed-by: Amit Kucheria Link: https://lore.kernel.org/r/20200331165449.30355-1-daniel.lezcano@linaro.org --- drivers/thermal/thermal_core.h | 3 +++ drivers/thermal/thermal_helpers.c | 13 ++++++++++++- include/linux/thermal.h | 3 --- 3 files changed, 15 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index a9bf00e91d64..37cd4e2bead2 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -69,6 +69,9 @@ void thermal_zone_device_unbind_exception(struct thermal_zone_device *, int thermal_zone_device_set_policy(struct thermal_zone_device *, char *); int thermal_build_list_of_policies(char *buf); +/* Helpers */ +void thermal_zone_set_trips(struct thermal_zone_device *tz); + /* sysfs I/F */ int thermal_zone_create_device_groups(struct thermal_zone_device *, int); void thermal_zone_destroy_device_groups(struct thermal_zone_device *); diff --git a/drivers/thermal/thermal_helpers.c b/drivers/thermal/thermal_helpers.c index 2ba756af76b7..59eaf2d0fdb3 100644 --- a/drivers/thermal/thermal_helpers.c +++ b/drivers/thermal/thermal_helpers.c @@ -113,6 +113,18 @@ exit: } EXPORT_SYMBOL_GPL(thermal_zone_get_temp); +/** + * thermal_zone_set_trips - Computes the next trip points for the driver + * @tz: a pointer to a thermal zone device structure + * + * The function computes the next temperature boundaries by browsing + * the trip points. The result is the closer low and high trip points + * to the current temperature. These values are passed to the backend + * driver to let it set its own notification mechanism (usually an + * interrupt). + * + * It does not return a value + */ void thermal_zone_set_trips(struct thermal_zone_device *tz) { int low = -INT_MAX; @@ -161,7 +173,6 @@ void thermal_zone_set_trips(struct thermal_zone_device *tz) exit: mutex_unlock(&tz->lock); } -EXPORT_SYMBOL_GPL(thermal_zone_set_trips); void thermal_cdev_update(struct thermal_cooling_device *cdev) { diff --git a/include/linux/thermal.h b/include/linux/thermal.h index c91b1e344d56..448841ab0dca 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -439,7 +439,6 @@ int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *); void thermal_zone_device_update(struct thermal_zone_device *, enum thermal_notify_event); -void thermal_zone_set_trips(struct thermal_zone_device *); struct thermal_cooling_device *thermal_cooling_device_register(const char *, void *, const struct thermal_cooling_device_ops *); @@ -497,8 +496,6 @@ static inline int thermal_zone_unbind_cooling_device( static inline void thermal_zone_device_update(struct thermal_zone_device *tz, enum thermal_notify_event event) { } -static inline void thermal_zone_set_trips(struct thermal_zone_device *tz) -{ } static inline struct thermal_cooling_device * thermal_cooling_device_register(char *type, void *devdata, const struct thermal_cooling_device_ops *ops) -- cgit v1.3 From 8097db407a08f33236b6fa6ba8b62d669321c720 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 2 Apr 2020 16:27:39 +0200 Subject: thermal: Move default governor config option to the internal header The default governor set at compilation time is a thermal internal business, no need to export to the global thermal header. Move the config options to the internal header. Signed-off-by: Daniel Lezcano Reviewed-by: Amit Kucheria Acked-by: Zhang Rui Link: https://lore.kernel.org/r/20200402142747.8307-1-daniel.lezcano@linaro.org --- drivers/thermal/thermal_core.h | 11 +++++++++++ include/linux/thermal.h | 11 ----------- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index 37cd4e2bead2..828305508556 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -12,6 +12,17 @@ #include #include +/* Default Thermal Governor */ +#if defined(CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE) +#define DEFAULT_THERMAL_GOVERNOR "step_wise" +#elif defined(CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE) +#define DEFAULT_THERMAL_GOVERNOR "fair_share" +#elif defined(CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE) +#define DEFAULT_THERMAL_GOVERNOR "user_space" +#elif defined(CONFIG_THERMAL_DEFAULT_GOV_POWER_ALLOCATOR) +#define DEFAULT_THERMAL_GOVERNOR "power_allocator" +#endif + /* Initial state of a cooling device during binding */ #define THERMAL_NO_TARGET -1UL diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 448841ab0dca..71cff87dcb46 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -32,17 +32,6 @@ /* use value, which < 0K, to indicate an invalid/uninitialized temperature */ #define THERMAL_TEMP_INVALID -274000 -/* Default Thermal Governor */ -#if defined(CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE) -#define DEFAULT_THERMAL_GOVERNOR "step_wise" -#elif defined(CONFIG_THERMAL_DEFAULT_GOV_FAIR_SHARE) -#define DEFAULT_THERMAL_GOVERNOR "fair_share" -#elif defined(CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE) -#define DEFAULT_THERMAL_GOVERNOR "user_space" -#elif defined(CONFIG_THERMAL_DEFAULT_GOV_POWER_ALLOCATOR) -#define DEFAULT_THERMAL_GOVERNOR "power_allocator" -#endif - struct thermal_zone_device; struct thermal_cooling_device; struct thermal_instance; -- cgit v1.3 From c68df440b07f88210c5839d4507b5cbfa35e3df9 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 2 Apr 2020 16:27:40 +0200 Subject: thermal: Move struct thermal_attr to the private header The structure belongs to the thermal core internals but it is exported in the include/linux/thermal.h For better self-encapsulation and less impact for the compilation if a change is made on it. Move the structure in the thermal core internal header file. Signed-off-by: Daniel Lezcano Reviewed-by: Amit Kucheria Acked-by: Zhang Rui Link: https://lore.kernel.org/r/20200402142747.8307-2-daniel.lezcano@linaro.org --- drivers/thermal/thermal_core.h | 5 +++++ include/linux/thermal.h | 6 +----- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index 828305508556..5d08ad60d9df 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -41,6 +41,11 @@ extern struct thermal_governor *__governor_thermal_table_end[]; __governor < __governor_thermal_table_end; \ __governor++) +struct thermal_attr { + struct device_attribute attr; + char name[THERMAL_NAME_LENGTH]; +}; + /* * This structure is used to describe the behavior of * a certain cooling device on a certain trip point diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 71cff87dcb46..5aa80fb2fb61 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -35,6 +35,7 @@ struct thermal_zone_device; struct thermal_cooling_device; struct thermal_instance; +struct thermal_attr; enum thermal_device_mode { THERMAL_DEVICE_DISABLED = 0, @@ -119,11 +120,6 @@ struct thermal_cooling_device { struct list_head node; }; -struct thermal_attr { - struct device_attribute attr; - char name[THERMAL_NAME_LENGTH]; -}; - /** * struct thermal_zone_device - structure for a thermal zone * @id: unique id number for each thermal zone -- cgit v1.3 From 33a88af10944edc7fd390000cd6bc9bbde918bc3 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 2 Apr 2020 16:27:41 +0200 Subject: thermal: Move internal IPA functions The exported IPA functions are used by the IPA. It is pointless to declare the functions in the thermal.h file. For better self-encapsulation and less impact for the compilation if a change is made on it. Move the code in the thermal core internal header file. As the users depends on THERMAL then it is pointless to have the stub, remove them. Take also the opportunity to fix checkpatch warnings/errors when moving the code around. Signed-off-by: Daniel Lezcano Reviewed-by: Amit Kucheria Acked-by: Zhang Rui Link: https://lore.kernel.org/r/20200402142747.8307-3-daniel.lezcano@linaro.org --- drivers/thermal/thermal_core.h | 13 +++++++++++++ include/linux/thermal.h | 24 ------------------------ 2 files changed, 13 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index 5d08ad60d9df..f99551ce9838 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -46,6 +46,19 @@ struct thermal_attr { char name[THERMAL_NAME_LENGTH]; }; +static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev) +{ + return cdev->ops->get_requested_power && cdev->ops->state2power && + cdev->ops->power2state; +} + +int power_actor_get_max_power(struct thermal_cooling_device *cdev, + struct thermal_zone_device *tz, u32 *max_power); +int power_actor_get_min_power(struct thermal_cooling_device *cdev, + struct thermal_zone_device *tz, u32 *min_power); +int power_actor_set_power(struct thermal_cooling_device *cdev, + struct thermal_instance *ti, u32 power); + /* * This structure is used to describe the behavior of * a certain cooling device on a certain trip point diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 5aa80fb2fb61..e0279f7b43f4 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -399,18 +399,6 @@ void devm_thermal_zone_of_sensor_unregister(struct device *dev, #endif #if IS_ENABLED(CONFIG_THERMAL) -static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev) -{ - return cdev->ops->get_requested_power && cdev->ops->state2power && - cdev->ops->power2state; -} - -int power_actor_get_max_power(struct thermal_cooling_device *, - struct thermal_zone_device *tz, u32 *max_power); -int power_actor_get_min_power(struct thermal_cooling_device *, - struct thermal_zone_device *tz, u32 *min_power); -int power_actor_set_power(struct thermal_cooling_device *, - struct thermal_instance *, u32); struct thermal_zone_device *thermal_zone_device_register(const char *, int, int, void *, struct thermal_zone_device_ops *, struct thermal_zone_params *, int, int); @@ -447,18 +435,6 @@ struct thermal_instance *get_thermal_instance(struct thermal_zone_device *, void thermal_cdev_update(struct thermal_cooling_device *); void thermal_notify_framework(struct thermal_zone_device *, int); #else -static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev) -{ return false; } -static inline int power_actor_get_max_power(struct thermal_cooling_device *cdev, - struct thermal_zone_device *tz, u32 *max_power) -{ return 0; } -static inline int power_actor_get_min_power(struct thermal_cooling_device *cdev, - struct thermal_zone_device *tz, - u32 *min_power) -{ return -ENODEV; } -static inline int power_actor_set_power(struct thermal_cooling_device *cdev, - struct thermal_instance *tz, u32 power) -{ return 0; } static inline struct thermal_zone_device *thermal_zone_device_register( const char *type, int trips, int mask, void *devdata, struct thermal_zone_device_ops *ops, -- cgit v1.3 From 2e7700dc336dde93cdc9394d10ccd79593fff214 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 2 Apr 2020 16:27:42 +0200 Subject: thermal: Move trip point structure definition to private header The struct thermal_trip is only used by the thermal internals, it is pointless to export the definition in the global header. Move the structure to the thermal_core.h internal header. Signed-off-by: Daniel Lezcano Reviewed-by: Amit Kucheria Acked-by: Zhang Rui Link: https://lore.kernel.org/r/20200402142747.8307-4-daniel.lezcano@linaro.org --- drivers/thermal/thermal_core.h | 13 +++++++++++++ include/linux/thermal.h | 15 --------------- 2 files changed, 13 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index f99551ce9838..d37de708c28a 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -58,6 +58,19 @@ int power_actor_get_min_power(struct thermal_cooling_device *cdev, struct thermal_zone_device *tz, u32 *min_power); int power_actor_set_power(struct thermal_cooling_device *cdev, struct thermal_instance *ti, u32 power); +/** + * struct thermal_trip - representation of a point in temperature domain + * @np: pointer to struct device_node that this trip point was created from + * @temperature: temperature value in miliCelsius + * @hysteresis: relative hysteresis in miliCelsius + * @type: trip point type + */ +struct thermal_trip { + struct device_node *np; + int temperature; + int hysteresis; + enum thermal_trip_type type; +}; /* * This structure is used to describe the behavior of diff --git a/include/linux/thermal.h b/include/linux/thermal.h index e0279f7b43f4..7adbfe092281 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -332,21 +332,6 @@ struct thermal_zone_of_device_ops { int (*set_trip_temp)(void *, int, int); }; -/** - * struct thermal_trip - representation of a point in temperature domain - * @np: pointer to struct device_node that this trip point was created from - * @temperature: temperature value in miliCelsius - * @hysteresis: relative hysteresis in miliCelsius - * @type: trip point type - */ - -struct thermal_trip { - struct device_node *np; - int temperature; - int hysteresis; - enum thermal_trip_type type; -}; - /* Function declarations */ #ifdef CONFIG_THERMAL_OF int thermal_zone_of_get_sensor_id(struct device_node *tz_np, -- cgit v1.3 From f0129c231772a85a726b233756cdc58ea42a9d84 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 2 Apr 2020 16:27:43 +0200 Subject: thermal: Move get_tz_trend to the internal header The function is not used any place other than the thermal directory. It does not make sense to export its definition in the global header as there is no use of it. Move the definition to the internal header and allow better self-encapsulation. Take the opportunity to add the parameter names to make checkpatch happy and remove the pointless stubs. Signed-off-by: Daniel Lezcano Reviewed-by: Amit Kucheria Acked-by: Zhang Rui Link: https://lore.kernel.org/r/20200402142747.8307-5-daniel.lezcano@linaro.org --- drivers/thermal/thermal_core.h | 2 ++ include/linux/thermal.h | 4 +--- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index d37de708c28a..5fb2bd9c7034 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -72,6 +72,8 @@ struct thermal_trip { enum thermal_trip_type type; }; +int get_tz_trend(struct thermal_zone_device *tz, int trip); + /* * This structure is used to describe the behavior of * a certain cooling device on a certain trip point diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 7adbfe092281..8006ba5de855 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -414,7 +414,6 @@ int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp); int thermal_zone_get_slope(struct thermal_zone_device *tz); int thermal_zone_get_offset(struct thermal_zone_device *tz); -int get_tz_trend(struct thermal_zone_device *, int); struct thermal_instance *get_thermal_instance(struct thermal_zone_device *, struct thermal_cooling_device *, int); void thermal_cdev_update(struct thermal_cooling_device *); @@ -473,8 +472,7 @@ static inline int thermal_zone_get_slope( static inline int thermal_zone_get_offset( struct thermal_zone_device *tz) { return -ENODEV; } -static inline int get_tz_trend(struct thermal_zone_device *tz, int trip) -{ return -ENODEV; } + static inline struct thermal_instance * get_thermal_instance(struct thermal_zone_device *tz, struct thermal_cooling_device *cdev, int trip) -- cgit v1.3 From 06f1041f5023c00a54f63c269b997c61d1b3b739 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 2 Apr 2020 16:27:44 +0200 Subject: thermal: Move get_thermal_instance to the internal header The function is not used any place other than the thermal directory. It does not make sense to export its definition in the global header as there is no use of it. Move the definition to the internal header and allow better self-encapsulation. Take the opportunity to add the parameter names to make checkpatch happy and remove the pointless stubs. Signed-off-by: Daniel Lezcano Reviewed-by: Amit Kucheria Acked-by: Zhang Rui Link: https://lore.kernel.org/r/20200402142747.8307-6-daniel.lezcano@linaro.org --- drivers/thermal/thermal_core.h | 5 +++++ include/linux/thermal.h | 6 ------ 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index 5fb2bd9c7034..c95689586e19 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -74,6 +74,11 @@ struct thermal_trip { int get_tz_trend(struct thermal_zone_device *tz, int trip); +struct thermal_instance * +get_thermal_instance(struct thermal_zone_device *tz, + struct thermal_cooling_device *cdev, + int trip); + /* * This structure is used to describe the behavior of * a certain cooling device on a certain trip point diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 8006ba5de855..47e745c5dfca 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -414,8 +414,6 @@ int thermal_zone_get_temp(struct thermal_zone_device *tz, int *temp); int thermal_zone_get_slope(struct thermal_zone_device *tz); int thermal_zone_get_offset(struct thermal_zone_device *tz); -struct thermal_instance *get_thermal_instance(struct thermal_zone_device *, - struct thermal_cooling_device *, int); void thermal_cdev_update(struct thermal_cooling_device *); void thermal_notify_framework(struct thermal_zone_device *, int); #else @@ -473,10 +471,6 @@ static inline int thermal_zone_get_offset( struct thermal_zone_device *tz) { return -ENODEV; } -static inline struct thermal_instance * -get_thermal_instance(struct thermal_zone_device *tz, - struct thermal_cooling_device *cdev, int trip) -{ return ERR_PTR(-ENODEV); } static inline void thermal_cdev_update(struct thermal_cooling_device *cdev) { } static inline void thermal_notify_framework(struct thermal_zone_device *tz, -- cgit v1.3 From 60518260cab21e749704baa5246ff13f7559fa91 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 2 Apr 2020 16:27:45 +0200 Subject: thermal: Change IS_ENABLED to IFDEF in the header file The thermal framework can not be compiled as a module. The IS_ENABLED macro is useless here and can be replaced by an ifdef. Signed-off-by: Daniel Lezcano Reviewed-by: Amit Kucheria Acked-by: Zhang Rui Link: https://lore.kernel.org/r/20200402142747.8307-7-daniel.lezcano@linaro.org --- include/linux/thermal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 47e745c5dfca..12df9ff0182d 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -383,7 +383,7 @@ void devm_thermal_zone_of_sensor_unregister(struct device *dev, #endif -#if IS_ENABLED(CONFIG_THERMAL) +#ifdef CONFIG_THERMAL struct thermal_zone_device *thermal_zone_device_register(const char *, int, int, void *, struct thermal_zone_device_ops *, struct thermal_zone_params *, int, int); -- cgit v1.3 From 708418500644c248d6f266e4df0bf43ce53bf746 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 2 Apr 2020 16:27:46 +0200 Subject: thermal: Remove stubs for thermal_zone_[un]bind_cooling_device All callers of the functions depends on THERMAL, it is pointless to define stubs. Remove them. Signed-off-by: Daniel Lezcano Reviewed-by: Amit Kucheria Acked-by: Zhang Rui Link: https://lore.kernel.org/r/20200402142747.8307-8-daniel.lezcano@linaro.org --- include/linux/thermal.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 12df9ff0182d..7b3dbfe15b59 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -426,16 +426,6 @@ static inline struct thermal_zone_device *thermal_zone_device_register( static inline void thermal_zone_device_unregister( struct thermal_zone_device *tz) { } -static inline int thermal_zone_bind_cooling_device( - struct thermal_zone_device *tz, int trip, - struct thermal_cooling_device *cdev, - unsigned long upper, unsigned long lower, - unsigned int weight) -{ return -ENODEV; } -static inline int thermal_zone_unbind_cooling_device( - struct thermal_zone_device *tz, int trip, - struct thermal_cooling_device *cdev) -{ return -ENODEV; } static inline void thermal_zone_device_update(struct thermal_zone_device *tz, enum thermal_notify_event event) { } -- cgit v1.3 From 0145f67866b71e8c1da3c1d9412623db7ba8a0c8 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Thu, 2 Apr 2020 16:27:47 +0200 Subject: thermal: Remove thermal_zone_device_update() stub All users of the function depends on THERMAL, no stub is needed. Remove it. Signed-off-by: Daniel Lezcano Reviewed-by: Amit Kucheria Acked-by: Zhang Rui Link: https://lore.kernel.org/r/20200402142747.8307-9-daniel.lezcano@linaro.org --- include/linux/thermal.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 7b3dbfe15b59..216185bb3014 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -426,9 +426,6 @@ static inline struct thermal_zone_device *thermal_zone_device_register( static inline void thermal_zone_device_unregister( struct thermal_zone_device *tz) { } -static inline void thermal_zone_device_update(struct thermal_zone_device *tz, - enum thermal_notify_event event) -{ } static inline struct thermal_cooling_device * thermal_cooling_device_register(char *type, void *devdata, const struct thermal_cooling_device_ops *ops) -- cgit v1.3 From 9554bfe403bdfc084823df8695a01f28c680af61 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 14 Feb 2020 14:27:15 -0800 Subject: x86/mce: Convert the CEC to use the MCE notifier The CEC code has its claws in a couple of routines in mce/core.c. Convert it to just register itself on the normal MCE notifier chain. [ bp: Make cec_add_elem() and cec_init() static. ] Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Tested-by: Tony Luck Link: https://lkml.kernel.org/r/20200214222720.13168-3-tony.luck@intel.com --- arch/x86/kernel/cpu/mce/core.c | 19 ------------------- drivers/ras/cec.c | 30 ++++++++++++++++++++++++++++-- include/linux/ras.h | 5 ----- 3 files changed, 28 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 43b1519ad4e5..b033b3589630 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -544,21 +544,6 @@ bool mce_is_correctable(struct mce *m) } EXPORT_SYMBOL_GPL(mce_is_correctable); -static bool cec_add_mce(struct mce *m) -{ - if (!m) - return false; - - /* We eat only correctable DRAM errors with usable addresses. */ - if (mce_is_memory_error(m) && - mce_is_correctable(m) && - mce_usable_address(m)) - if (!cec_add_elem(m->addr >> PAGE_SHIFT)) - return true; - - return false; -} - static int mce_early_notifier(struct notifier_block *nb, unsigned long val, void *data) { @@ -567,9 +552,6 @@ static int mce_early_notifier(struct notifier_block *nb, unsigned long val, if (!m) return NOTIFY_DONE; - if (cec_add_mce(m)) - return NOTIFY_STOP; - /* Emit the trace record: */ trace_mce_record(m); @@ -2612,7 +2594,6 @@ static int __init mcheck_late_init(void) static_branch_inc(&mcsafe_key); mcheck_debugfs_init(); - cec_init(); /* * Flush out everything that has been logged during early boot, now that diff --git a/drivers/ras/cec.c b/drivers/ras/cec.c index c09cf55e2d20..6b42040bf956 100644 --- a/drivers/ras/cec.c +++ b/drivers/ras/cec.c @@ -309,7 +309,7 @@ static bool sanity_check(struct ce_array *ca) return ret; } -int cec_add_elem(u64 pfn) +static int cec_add_elem(u64 pfn) { struct ce_array *ca = &ce_arr; unsigned int to = 0; @@ -527,7 +527,30 @@ err: return 1; } -void __init cec_init(void) +static int cec_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct mce *m = (struct mce *)data; + + if (!m) + return NOTIFY_DONE; + + /* We eat only correctable DRAM errors with usable addresses. */ + if (mce_is_memory_error(m) && + mce_is_correctable(m) && + mce_usable_address(m)) + if (!cec_add_elem(m->addr >> PAGE_SHIFT)) + return NOTIFY_STOP; + + return NOTIFY_DONE; +} + +static struct notifier_block cec_nb = { + .notifier_call = cec_notifier, + .priority = MCE_PRIO_CEC, +}; + +static void __init cec_init(void) { if (ce_arr.disabled) return; @@ -546,8 +569,11 @@ void __init cec_init(void) INIT_DELAYED_WORK(&cec_work, cec_work_fn); schedule_delayed_work(&cec_work, CEC_DECAY_DEFAULT_INTERVAL); + mce_register_decode_chain(&cec_nb); + pr_info("Correctable Errors collector initialized.\n"); } +late_initcall(cec_init); int __init parse_cec_param(char *str) { diff --git a/include/linux/ras.h b/include/linux/ras.h index 7c3debb47c87..1f4048bf2674 100644 --- a/include/linux/ras.h +++ b/include/linux/ras.h @@ -17,12 +17,7 @@ static inline int ras_add_daemon_trace(void) { return 0; } #endif #ifdef CONFIG_RAS_CEC -void __init cec_init(void); int __init parse_cec_param(char *str); -int cec_add_elem(u64 pfn); -#else -static inline void __init cec_init(void) { } -static inline int cec_add_elem(u64 pfn) { return -ENODEV; } #endif #ifdef CONFIG_RAS -- cgit v1.3 From 7fc0b9b995f222646ece8d5bca528060c098ee88 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Fri, 14 Feb 2020 14:27:20 -0800 Subject: EDAC: Drop the EDAC report status checks When acpi_extlog was added, we were worried that the same error would be reported more than once by different subsystems. But in the ensuing years I've seen complaints that people could not find an error log (because this mechanism suppressed the log they were looking for). Rip it all out. People are smart enough to notice the same address from different reporting mechanisms. Signed-off-by: Tony Luck Signed-off-by: Borislav Petkov Tested-by: Tony Luck Link: https://lkml.kernel.org/r/20200214222720.13168-8-tony.luck@intel.com --- drivers/acpi/acpi_extlog.c | 14 ----------- drivers/edac/edac_mc.c | 61 ---------------------------------------------- drivers/edac/pnd2_edac.c | 3 --- drivers/edac/sb_edac.c | 4 --- drivers/edac/skx_common.c | 3 --- include/linux/edac.h | 8 ------ 6 files changed, 93 deletions(-) (limited to 'include/linux') diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index 9cc3c1f92db5..f138e12b7b82 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c @@ -42,8 +42,6 @@ struct extlog_l1_head { u8 rev1[12]; }; -static int old_edac_report_status; - static u8 extlog_dsm_uuid[] __initdata = "663E35AF-CC10-41A4-88EA-5470AF055295"; /* L1 table related physical address */ @@ -229,11 +227,6 @@ static int __init extlog_init(void) if (!(cap & MCG_ELOG_P) || !extlog_get_l1addr()) return -ENODEV; - if (edac_get_report_status() == EDAC_REPORTING_FORCE) { - pr_warn("Not loading eMCA, error reporting force-enabled through EDAC.\n"); - return -EPERM; - } - rc = -EINVAL; /* get L1 header to fetch necessary information */ l1_hdr_size = sizeof(struct extlog_l1_head); @@ -281,12 +274,6 @@ static int __init extlog_init(void) if (elog_buf == NULL) goto err_release_elog; - /* - * eMCA event report method has higher priority than EDAC method, - * unless EDAC event report method is mandatory. - */ - old_edac_report_status = edac_get_report_status(); - edac_set_report_status(EDAC_REPORTING_DISABLED); mce_register_decode_chain(&extlog_mce_dec); /* enable OS to be involved to take over management from BIOS */ ((struct extlog_l1_head *)extlog_l1_addr)->flags |= FLAG_OS_OPTIN; @@ -308,7 +295,6 @@ err: static void __exit extlog_exit(void) { - edac_set_report_status(old_edac_report_status); mce_unregister_decode_chain(&extlog_mce_dec); ((struct extlog_l1_head *)extlog_l1_addr)->flags &= ~FLAG_OS_OPTIN; if (extlog_l1_addr) diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 75ede27bdf6a..5813e931f2f0 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c @@ -43,8 +43,6 @@ int edac_op_state = EDAC_OPSTATE_INVAL; EXPORT_SYMBOL_GPL(edac_op_state); -static int edac_report = EDAC_REPORTING_ENABLED; - /* lock to memory controller's control array */ static DEFINE_MUTEX(mem_ctls_mutex); static LIST_HEAD(mc_devices); @@ -60,65 +58,6 @@ static struct mem_ctl_info *error_desc_to_mci(struct edac_raw_error_desc *e) return container_of(e, struct mem_ctl_info, error_desc); } -int edac_get_report_status(void) -{ - return edac_report; -} -EXPORT_SYMBOL_GPL(edac_get_report_status); - -void edac_set_report_status(int new) -{ - if (new == EDAC_REPORTING_ENABLED || - new == EDAC_REPORTING_DISABLED || - new == EDAC_REPORTING_FORCE) - edac_report = new; -} -EXPORT_SYMBOL_GPL(edac_set_report_status); - -static int edac_report_set(const char *str, const struct kernel_param *kp) -{ - if (!str) - return -EINVAL; - - if (!strncmp(str, "on", 2)) - edac_report = EDAC_REPORTING_ENABLED; - else if (!strncmp(str, "off", 3)) - edac_report = EDAC_REPORTING_DISABLED; - else if (!strncmp(str, "force", 5)) - edac_report = EDAC_REPORTING_FORCE; - - return 0; -} - -static int edac_report_get(char *buffer, const struct kernel_param *kp) -{ - int ret = 0; - - switch (edac_report) { - case EDAC_REPORTING_ENABLED: - ret = sprintf(buffer, "on"); - break; - case EDAC_REPORTING_DISABLED: - ret = sprintf(buffer, "off"); - break; - case EDAC_REPORTING_FORCE: - ret = sprintf(buffer, "force"); - break; - default: - ret = -EINVAL; - break; - } - - return ret; -} - -static const struct kernel_param_ops edac_report_ops = { - .set = edac_report_set, - .get = edac_report_get, -}; - -module_param_cb(edac_report, &edac_report_ops, &edac_report, 0644); - unsigned int edac_dimm_info_location(struct dimm_info *dimm, char *buf, unsigned int len) { diff --git a/drivers/edac/pnd2_edac.c b/drivers/edac/pnd2_edac.c index 1929a5dc8f94..c1f2e6deb021 100644 --- a/drivers/edac/pnd2_edac.c +++ b/drivers/edac/pnd2_edac.c @@ -1396,9 +1396,6 @@ static int pnd2_mce_check_error(struct notifier_block *nb, unsigned long val, vo struct dram_addr daddr; char *type; - if (edac_get_report_status() == EDAC_REPORTING_DISABLED) - return NOTIFY_DONE; - mci = pnd2_mci; if (!mci || (mce->kflags & MCE_HANDLED_CEC)) return NOTIFY_DONE; diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index f790f7d08688..d414698ca324 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -3134,8 +3134,6 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, struct mem_ctl_info *mci; char *type; - if (edac_get_report_status() == EDAC_REPORTING_DISABLED) - return NOTIFY_DONE; if (mce->kflags & MCE_HANDLED_CEC) return NOTIFY_DONE; @@ -3526,8 +3524,6 @@ static int __init sbridge_init(void) if (rc >= 0) { mce_register_decode_chain(&sbridge_mce_dec); - if (edac_get_report_status() == EDAC_REPORTING_DISABLED) - sbridge_printk(KERN_WARNING, "Loading driver, error reporting disabled.\n"); return 0; } diff --git a/drivers/edac/skx_common.c b/drivers/edac/skx_common.c index 6f08a12f6b11..423d33aef54f 100644 --- a/drivers/edac/skx_common.c +++ b/drivers/edac/skx_common.c @@ -574,9 +574,6 @@ int skx_mce_check_error(struct notifier_block *nb, unsigned long val, struct mem_ctl_info *mci; char *type; - if (edac_get_report_status() == EDAC_REPORTING_DISABLED) - return NOTIFY_DONE; - if (mce->kflags & MCE_HANDLED_CEC) return NOTIFY_DONE; diff --git a/include/linux/edac.h b/include/linux/edac.h index 0f20b986b0ab..6eb7d55d7c3d 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -31,14 +31,6 @@ struct device; extern int edac_op_state; struct bus_type *edac_get_sysfs_subsys(void); -int edac_get_report_status(void); -void edac_set_report_status(int new); - -enum { - EDAC_REPORTING_ENABLED, - EDAC_REPORTING_DISABLED, - EDAC_REPORTING_FORCE -}; static inline void opstate_init(void) { -- cgit v1.3 From 5429ef62bcf360aae06740cbe065be01e5cfb6fc Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 22 Jan 2020 19:38:21 +0000 Subject: compiler/gcc: Raise minimum GCC version for kernel builds to 4.8 It is very rare to see versions of GCC prior to 4.8 being used to build the mainline kernel. These old compilers are also know to have codegen issues which can lead to silent miscompilation: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 Raise the minimum GCC version for kernel build to 4.8 and remove some tautological Kconfig dependencies as a consequence. Cc: Masahiro Yamada Acked-by: Arnd Bergmann Reviewed-by: Nick Desaulniers Signed-off-by: Will Deacon --- Documentation/process/changes.rst | 2 +- arch/arm/crypto/Kconfig | 12 ++++++------ crypto/Kconfig | 1 - include/linux/compiler-gcc.h | 5 ++--- init/Kconfig | 1 - scripts/gcc-plugins/Kconfig | 2 +- 6 files changed, 10 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/Documentation/process/changes.rst b/Documentation/process/changes.rst index 91c5ff8e161e..5cfb54c2aaa6 100644 --- a/Documentation/process/changes.rst +++ b/Documentation/process/changes.rst @@ -29,7 +29,7 @@ you probably needn't concern yourself with pcmciautils. ====================== =============== ======================================== Program Minimal version Command to check the version ====================== =============== ======================================== -GNU C 4.6 gcc --version +GNU C 4.8 gcc --version GNU make 3.81 make --version binutils 2.23 ld -v flex 2.5.35 flex --version diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig index 2674de6ada1f..c9bf2df85cb9 100644 --- a/arch/arm/crypto/Kconfig +++ b/arch/arm/crypto/Kconfig @@ -30,7 +30,7 @@ config CRYPTO_SHA1_ARM_NEON config CRYPTO_SHA1_ARM_CE tristate "SHA1 digest algorithm (ARM v8 Crypto Extensions)" - depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) + depends on KERNEL_MODE_NEON select CRYPTO_SHA1_ARM select CRYPTO_HASH help @@ -39,7 +39,7 @@ config CRYPTO_SHA1_ARM_CE config CRYPTO_SHA2_ARM_CE tristate "SHA-224/256 digest algorithm (ARM v8 Crypto Extensions)" - depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) + depends on KERNEL_MODE_NEON select CRYPTO_SHA256_ARM select CRYPTO_HASH help @@ -96,7 +96,7 @@ config CRYPTO_AES_ARM_BS config CRYPTO_AES_ARM_CE tristate "Accelerated AES using ARMv8 Crypto Extensions" - depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) + depends on KERNEL_MODE_NEON select CRYPTO_SKCIPHER select CRYPTO_LIB_AES select CRYPTO_SIMD @@ -106,7 +106,7 @@ config CRYPTO_AES_ARM_CE config CRYPTO_GHASH_ARM_CE tristate "PMULL-accelerated GHASH using NEON/ARMv8 Crypto Extensions" - depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) + depends on KERNEL_MODE_NEON select CRYPTO_HASH select CRYPTO_CRYPTD select CRYPTO_GF128MUL @@ -118,13 +118,13 @@ config CRYPTO_GHASH_ARM_CE config CRYPTO_CRCT10DIF_ARM_CE tristate "CRCT10DIF digest algorithm using PMULL instructions" - depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) + depends on KERNEL_MODE_NEON depends on CRC_T10DIF select CRYPTO_HASH config CRYPTO_CRC32_ARM_CE tristate "CRC32(C) digest algorithm using CRC and/or PMULL instructions" - depends on KERNEL_MODE_NEON && (CC_IS_CLANG || GCC_VERSION >= 40800) + depends on KERNEL_MODE_NEON depends on CRC32 select CRYPTO_HASH diff --git a/crypto/Kconfig b/crypto/Kconfig index c24a47406f8f..34a8c5bfd062 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -316,7 +316,6 @@ config CRYPTO_AEGIS128 config CRYPTO_AEGIS128_SIMD bool "Support SIMD acceleration for AEGIS-128" depends on CRYPTO_AEGIS128 && ((ARM || ARM64) && KERNEL_MODE_NEON) - depends on !ARM || CC_IS_CLANG || GCC_VERSION >= 40800 default y config CRYPTO_AEGIS128_AESNI_SSE2 diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index d7ee4c6bad48..e2f725273261 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -10,7 +10,8 @@ + __GNUC_MINOR__ * 100 \ + __GNUC_PATCHLEVEL__) -#if GCC_VERSION < 40600 +/* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 */ +#if GCC_VERSION < 40800 # error Sorry, your compiler is too old - please upgrade it. #endif @@ -126,9 +127,7 @@ #if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP) && !defined(__CHECKER__) #define __HAVE_BUILTIN_BSWAP32__ #define __HAVE_BUILTIN_BSWAP64__ -#if GCC_VERSION >= 40800 #define __HAVE_BUILTIN_BSWAP16__ -#endif #endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP && !__CHECKER__ */ #if GCC_VERSION >= 70000 diff --git a/init/Kconfig b/init/Kconfig index 9e22ee8fbd75..035d38a4f9ad 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1285,7 +1285,6 @@ config LD_DEAD_CODE_DATA_ELIMINATION bool "Dead code and data elimination (EXPERIMENTAL)" depends on HAVE_LD_DEAD_CODE_DATA_ELIMINATION depends on EXPERT - depends on !(FUNCTION_TRACER && CC_IS_GCC && GCC_VERSION < 40800) depends on $(cc-option,-ffunction-sections -fdata-sections) depends on $(ld-option,--gc-sections) help diff --git a/scripts/gcc-plugins/Kconfig b/scripts/gcc-plugins/Kconfig index 013ba3a57669..ce0b99fb5847 100644 --- a/scripts/gcc-plugins/Kconfig +++ b/scripts/gcc-plugins/Kconfig @@ -8,7 +8,7 @@ config HAVE_GCC_PLUGINS menuconfig GCC_PLUGINS bool "GCC plugins" depends on HAVE_GCC_PLUGINS - depends on CC_IS_GCC && GCC_VERSION >= 40800 + depends on CC_IS_GCC depends on $(success,$(srctree)/scripts/gcc-plugin.sh $(CC)) default y help -- cgit v1.3 From a5460b5e5fb82656807840d40d3deaecad094044 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 16 Dec 2019 16:51:45 +0000 Subject: READ_ONCE: Simplify implementations of {READ,WRITE}_ONCE() The implementations of {READ,WRITE}_ONCE() suffer from a significant amount of indirection and complexity due to a historic GCC bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 which was originally worked around by 230fa253df63 ("kernel: Provide READ_ONCE and ASSIGN_ONCE"). Since GCC 4.8 is fairly vintage at this point and we emit a warning if we detect it during the build, return {READ,WRITE}_ONCE() to their former glory with an implementation that is easier to understand and, crucially, more amenable to optimisation. A side effect of this simplification is that WRITE_ONCE() no longer returns a value, but nobody seems to be relying on that and the new behaviour is aligned with smp_store_release(). Suggested-by: Linus Torvalds Cc: Peter Zijlstra Cc: Michael Ellerman Cc: Arnd Bergmann Cc: Christian Borntraeger Signed-off-by: Will Deacon --- include/linux/compiler.h | 118 ++++++++++++++++------------------------------- 1 file changed, 39 insertions(+), 79 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 034b0a644efc..338111a448d0 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -177,60 +177,6 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, # define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__) #endif -#include - -#define __READ_ONCE_SIZE \ -({ \ - switch (size) { \ - case 1: *(__u8 *)res = *(volatile __u8 *)p; break; \ - case 2: *(__u16 *)res = *(volatile __u16 *)p; break; \ - case 4: *(__u32 *)res = *(volatile __u32 *)p; break; \ - case 8: *(__u64 *)res = *(volatile __u64 *)p; break; \ - default: \ - barrier(); \ - __builtin_memcpy((void *)res, (const void *)p, size); \ - barrier(); \ - } \ -}) - -static __always_inline -void __read_once_size(const volatile void *p, void *res, int size) -{ - __READ_ONCE_SIZE; -} - -#ifdef CONFIG_KASAN -/* - * We can't declare function 'inline' because __no_sanitize_address confilcts - * with inlining. Attempt to inline it may cause a build failure. - * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368 - * '__maybe_unused' allows us to avoid defined-but-not-used warnings. - */ -# define __no_kasan_or_inline __no_sanitize_address notrace __maybe_unused -#else -# define __no_kasan_or_inline __always_inline -#endif - -static __no_kasan_or_inline -void __read_once_size_nocheck(const volatile void *p, void *res, int size) -{ - __READ_ONCE_SIZE; -} - -static __always_inline void __write_once_size(volatile void *p, void *res, int size) -{ - switch (size) { - case 1: *(volatile __u8 *)p = *(__u8 *)res; break; - case 2: *(volatile __u16 *)p = *(__u16 *)res; break; - case 4: *(volatile __u32 *)p = *(__u32 *)res; break; - case 8: *(volatile __u64 *)p = *(__u64 *)res; break; - default: - barrier(); - __builtin_memcpy((void *)p, (const void *)res, size); - barrier(); - } -} - /* * Prevent the compiler from merging or refetching reads or writes. The * compiler is also forbidden from reordering successive instances of @@ -240,11 +186,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s * statements. * * These two macros will also work on aggregate data types like structs or - * unions. If the size of the accessed data type exceeds the word size of - * the machine (e.g., 32 bits or 64 bits) READ_ONCE() and WRITE_ONCE() will - * fall back to memcpy(). There's at least two memcpy()s: one for the - * __builtin_memcpy() and then one for the macro doing the copy of variable - * - '__u' allocated on the stack. + * unions. * * Their two major use cases are: (1) Mediating communication between * process-level code and irq/NMI handlers, all running on the same CPU, @@ -256,23 +198,49 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s #include #include -#define __READ_ONCE(x, check) \ +#define __READ_ONCE(x) (*(volatile typeof(x) *)&(x)) + +#define READ_ONCE(x) \ ({ \ - union { typeof(x) __val; char __c[1]; } __u; \ - if (check) \ - __read_once_size(&(x), __u.__c, sizeof(x)); \ - else \ - __read_once_size_nocheck(&(x), __u.__c, sizeof(x)); \ - smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \ - __u.__val; \ + typeof(x) __x = __READ_ONCE(x); \ + smp_read_barrier_depends(); \ + __x; \ }) -#define READ_ONCE(x) __READ_ONCE(x, 1) + +#define WRITE_ONCE(x, val) \ +do { \ + *(volatile typeof(x) *)&(x) = (val); \ +} while (0) + +#ifdef CONFIG_KASAN +/* + * We can't declare function 'inline' because __no_sanitize_address conflicts + * with inlining. Attempt to inline it may cause a build failure. + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368 + * '__maybe_unused' allows us to avoid defined-but-not-used warnings. + */ +# define __no_kasan_or_inline __no_sanitize_address notrace __maybe_unused +#else +# define __no_kasan_or_inline __always_inline +#endif + +static __no_kasan_or_inline +unsigned long __read_once_word_nocheck(const void *addr) +{ + return __READ_ONCE(*(unsigned long *)addr); +} /* - * Use READ_ONCE_NOCHECK() instead of READ_ONCE() if you need - * to hide memory access from KASAN. + * Use READ_ONCE_NOCHECK() instead of READ_ONCE() if you need to load a + * word from memory atomically but without telling KASAN. This is usually + * used by unwinding code when walking the stack of a running process. */ -#define READ_ONCE_NOCHECK(x) __READ_ONCE(x, 0) +#define READ_ONCE_NOCHECK(x) \ +({ \ + unsigned long __x = __read_once_word_nocheck(&(x)); \ + smp_read_barrier_depends(); \ + __x; \ +}) static __no_kasan_or_inline unsigned long read_word_at_a_time(const void *addr) @@ -281,14 +249,6 @@ unsigned long read_word_at_a_time(const void *addr) return *(unsigned long *)addr; } -#define WRITE_ONCE(x, val) \ -({ \ - union { typeof(x) __val; char __c[1]; } __u = \ - { .__val = (__force typeof(x)) (val) }; \ - __write_once_size(&(x), __u.__c, sizeof(x)); \ - __u.__val; \ -}) - #endif /* __KERNEL__ */ /* -- cgit v1.3 From 9e343b467c70379e66b8b771d96f03ae23eba351 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 13 Dec 2019 14:47:02 +0000 Subject: READ_ONCE: Enforce atomicity for {READ,WRITE}_ONCE() memory accesses {READ,WRITE}_ONCE() cannot guarantee atomicity for arbitrary data sizes. This can be surprising to callers that might incorrectly be expecting atomicity for accesses to aggregate structures, although there are other callers where tearing is actually permissable (e.g. if they are using something akin to sequence locking to protect the access). Linus sayeth: | We could also look at being stricter for the normal READ/WRITE_ONCE(), | and require that they are | | (a) regular integer types | | (b) fit in an atomic word | | We actually did (b) for a while, until we noticed that we do it on | loff_t's etc and relaxed the rules. But maybe we could have a | "non-atomic" version of READ/WRITE_ONCE() that is used for the | questionable cases? The slight snag is that we also have to support 64-bit accesses on 32-bit architectures, as these appear to be widespread and tend to work out ok if either the architecture supports atomic 64-bit accesses (x86, armv7) or if the variable being accesses represents a virtual address and therefore only requires 32-bit atomicity in practice. Take a step in that direction by introducing a variant of 'compiletime_assert_atomic_type()' and use it to check the pointer argument to {READ,WRITE}_ONCE(). Expose __{READ,WRITE}_ONCE() variants which are allowed to tear and convert the one broken caller over to the new macros. Suggested-by: Linus Torvalds Cc: Peter Zijlstra Cc: Michael Ellerman Cc: Arnd Bergmann Signed-off-by: Will Deacon --- drivers/xen/time.c | 2 +- include/linux/compiler.h | 33 ++++++++++++++++++++++++++++++--- 2 files changed, 31 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/drivers/xen/time.c b/drivers/xen/time.c index 0968859c29d0..108edbcbc040 100644 --- a/drivers/xen/time.c +++ b/drivers/xen/time.c @@ -64,7 +64,7 @@ static void xen_get_runstate_snapshot_cpu_delta( do { state_time = get64(&state->state_entry_time); rmb(); /* Hypervisor might update data. */ - *res = READ_ONCE(*state); + *res = __READ_ONCE(*state); rmb(); /* Hypervisor might update data. */ } while (get64(&state->state_entry_time) != state_time || (state_time & XEN_RUNSTATE_UPDATE)); diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 338111a448d0..50bb2461648f 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -198,20 +198,37 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, #include #include -#define __READ_ONCE(x) (*(volatile typeof(x) *)&(x)) +/* + * Use __READ_ONCE() instead of READ_ONCE() if you do not require any + * atomicity or dependency ordering guarantees. Note that this may result + * in tears! + */ +#define __READ_ONCE(x) (*(const volatile typeof(x) *)&(x)) -#define READ_ONCE(x) \ +#define __READ_ONCE_SCALAR(x) \ ({ \ typeof(x) __x = __READ_ONCE(x); \ smp_read_barrier_depends(); \ __x; \ }) -#define WRITE_ONCE(x, val) \ +#define READ_ONCE(x) \ +({ \ + compiletime_assert_rwonce_type(x); \ + __READ_ONCE_SCALAR(x); \ +}) + +#define __WRITE_ONCE(x, val) \ do { \ *(volatile typeof(x) *)&(x) = (val); \ } while (0) +#define WRITE_ONCE(x, val) \ +do { \ + compiletime_assert_rwonce_type(x); \ + __WRITE_ONCE(x, val); \ +} while (0) + #ifdef CONFIG_KASAN /* * We can't declare function 'inline' because __no_sanitize_address conflicts @@ -313,6 +330,16 @@ static inline void *offset_to_ptr(const int *off) compiletime_assert(__native_word(t), \ "Need native word sized stores/loads for atomicity.") +/* + * Yes, this permits 64-bit accesses on 32-bit architectures. These will + * actually be atomic in many cases (namely x86), but for others we rely on + * the access being split into 2x32-bit accesses for a 32-bit quantity (e.g. + * a virtual address) and a strong prevailing wind. + */ +#define compiletime_assert_rwonce_type(t) \ + compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ + "Unsupported access size for {READ,WRITE}_ONCE().") + /* &a[0] degrades to a pointer: a different type from an array */ #define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) -- cgit v1.3 From dee081bf8f824cabeb7c7495367d5dad0a444eb1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 19 Dec 2019 14:22:31 +0000 Subject: READ_ONCE: Drop pointer qualifiers when reading from scalar types Passing a volatile-qualified pointer to READ_ONCE() is an absolute trainwreck for code generation: the use of 'typeof()' to define a temporary variable inside the macro means that the final evaluation in macro scope ends up forcing a read back from the stack. When stack protector is enabled (the default for arm64, at least), this causes the compiler to vomit up all sorts of junk. Unfortunately, dropping pointer qualifiers inside the macro poses quite a challenge, especially since the pointed-to type is permitted to be an aggregate, and this is relied upon by mm/ code accessing things like 'pmd_t'. Based on numerous hacks and discussions on the mailing list, this is the best I've managed to come up with. Introduce '__unqual_scalar_typeof()' which takes an expression and, if the expression is an optionally qualified 8, 16, 32 or 64-bit scalar type, evaluates to the unqualified type. Other input types, including aggregates, remain unchanged. Hopefully READ_ONCE() on volatile aggregate pointers isn't something we do on a fast-path. Cc: Peter Zijlstra Cc: Arnd Bergmann Suggested-by: Linus Torvalds Reported-by: Michael Ellerman Signed-off-by: Will Deacon --- include/linux/compiler.h | 6 +++--- include/linux/compiler_types.h | 21 +++++++++++++++++++++ 2 files changed, 24 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 50bb2461648f..c363d8debc43 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -203,13 +203,13 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, * atomicity or dependency ordering guarantees. Note that this may result * in tears! */ -#define __READ_ONCE(x) (*(const volatile typeof(x) *)&(x)) +#define __READ_ONCE(x) (*(const volatile __unqual_scalar_typeof(x) *)&(x)) #define __READ_ONCE_SCALAR(x) \ ({ \ - typeof(x) __x = __READ_ONCE(x); \ + __unqual_scalar_typeof(x) __x = __READ_ONCE(x); \ smp_read_barrier_depends(); \ - __x; \ + (typeof(x))__x; \ }) #define READ_ONCE(x) \ diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index e970f97a7fcb..233066c92f6f 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -210,6 +210,27 @@ struct ftrace_likely_data { /* Are two types/vars the same type (ignoring qualifiers)? */ #define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) +/* + * __unqual_scalar_typeof(x) - Declare an unqualified scalar type, leaving + * non-scalar types unchanged. + * + * We build this out of a couple of helper macros in a vain attempt to + * help you keep your lunch down while reading it. + */ +#define __pick_scalar_type(x, type, otherwise) \ + __builtin_choose_expr(__same_type(x, type), (type)0, otherwise) + +#define __pick_integer_type(x, type, otherwise) \ + __pick_scalar_type(x, unsigned type, \ + __pick_scalar_type(x, signed type, otherwise)) + +#define __unqual_scalar_typeof(x) typeof( \ + __pick_integer_type(x, char, \ + __pick_integer_type(x, short, \ + __pick_integer_type(x, int, \ + __pick_integer_type(x, long, \ + __pick_integer_type(x, long long, x)))))) + /* Is this type a native word size -- useful for atomic operations */ #define __native_word(t) \ (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || \ -- cgit v1.3 From 4ea8391e3556ad08ff0ea8fb282f9a550b8a3333 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 18 Apr 2020 13:40:09 -0700 Subject: Input: delete unused GP2AP002A00F driver There is now an IIO driver for GP2AP002A00F and GP2AP002S00F in drivers/iio/light/gp2ap002.c. Delete this driver, it is unused in the kernel tree and new users can make use of the IIO driver. Signed-off-by: Linus Walleij Link: https://lore.kernel.org/r/20200417203059.8151-1-linus.walleij@linaro.org Signed-off-by: Dmitry Torokhov --- drivers/input/misc/Kconfig | 11 -- drivers/input/misc/Makefile | 1 - drivers/input/misc/gp2ap002a00f.c | 281 ------------------------------------- include/linux/input/gp2ap002a00f.h | 23 --- 4 files changed, 316 deletions(-) delete mode 100644 drivers/input/misc/gp2ap002a00f.c delete mode 100644 include/linux/input/gp2ap002a00f.h (limited to 'include/linux') diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig index 7e2e658d551c..293e55fb7a4e 100644 --- a/drivers/input/misc/Kconfig +++ b/drivers/input/misc/Kconfig @@ -265,17 +265,6 @@ config INPUT_APANEL To compile this driver as a module, choose M here: the module will be called apanel. -config INPUT_GP2A - tristate "Sharp GP2AP002A00F I2C Proximity/Opto sensor driver" - depends on I2C - depends on GPIOLIB || COMPILE_TEST - help - Say Y here if you have a Sharp GP2AP002A00F proximity/als combo-chip - hooked to an I2C bus. - - To compile this driver as a module, choose M here: the - module will be called gp2ap002a00f. - config INPUT_GPIO_BEEPER tristate "Generic GPIO Beeper support" depends on GPIOLIB || COMPILE_TEST diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile index 8fd187f314bd..1db1815ce803 100644 --- a/drivers/input/misc/Makefile +++ b/drivers/input/misc/Makefile @@ -33,7 +33,6 @@ obj-$(CONFIG_INPUT_E3X0_BUTTON) += e3x0-button.o obj-$(CONFIG_INPUT_DRV260X_HAPTICS) += drv260x.o obj-$(CONFIG_INPUT_DRV2665_HAPTICS) += drv2665.o obj-$(CONFIG_INPUT_DRV2667_HAPTICS) += drv2667.o -obj-$(CONFIG_INPUT_GP2A) += gp2ap002a00f.o obj-$(CONFIG_INPUT_GPIO_BEEPER) += gpio-beeper.o obj-$(CONFIG_INPUT_GPIO_DECODER) += gpio_decoder.o obj-$(CONFIG_INPUT_GPIO_VIBRA) += gpio-vibra.o diff --git a/drivers/input/misc/gp2ap002a00f.c b/drivers/input/misc/gp2ap002a00f.c deleted file mode 100644 index 90abda8eea67..000000000000 --- a/drivers/input/misc/gp2ap002a00f.c +++ /dev/null @@ -1,281 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2011 Sony Ericsson Mobile Communications Inc. - * - * Author: Courtney Cavin - * Prepared for up-stream by: Oskar Andero - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct gp2a_data { - struct input_dev *input; - const struct gp2a_platform_data *pdata; - struct i2c_client *i2c_client; -}; - -enum gp2a_addr { - GP2A_ADDR_PROX = 0x0, - GP2A_ADDR_GAIN = 0x1, - GP2A_ADDR_HYS = 0x2, - GP2A_ADDR_CYCLE = 0x3, - GP2A_ADDR_OPMOD = 0x4, - GP2A_ADDR_CON = 0x6 -}; - -enum gp2a_controls { - /* Software Shutdown control: 0 = shutdown, 1 = normal operation */ - GP2A_CTRL_SSD = 0x01 -}; - -static int gp2a_report(struct gp2a_data *dt) -{ - int vo = gpio_get_value(dt->pdata->vout_gpio); - - input_report_switch(dt->input, SW_FRONT_PROXIMITY, !vo); - input_sync(dt->input); - - return 0; -} - -static irqreturn_t gp2a_irq(int irq, void *handle) -{ - struct gp2a_data *dt = handle; - - gp2a_report(dt); - - return IRQ_HANDLED; -} - -static int gp2a_enable(struct gp2a_data *dt) -{ - return i2c_smbus_write_byte_data(dt->i2c_client, GP2A_ADDR_OPMOD, - GP2A_CTRL_SSD); -} - -static int gp2a_disable(struct gp2a_data *dt) -{ - return i2c_smbus_write_byte_data(dt->i2c_client, GP2A_ADDR_OPMOD, - 0x00); -} - -static int gp2a_device_open(struct input_dev *dev) -{ - struct gp2a_data *dt = input_get_drvdata(dev); - int error; - - error = gp2a_enable(dt); - if (error < 0) { - dev_err(&dt->i2c_client->dev, - "unable to activate, err %d\n", error); - return error; - } - - gp2a_report(dt); - - return 0; -} - -static void gp2a_device_close(struct input_dev *dev) -{ - struct gp2a_data *dt = input_get_drvdata(dev); - int error; - - error = gp2a_disable(dt); - if (error < 0) - dev_err(&dt->i2c_client->dev, - "unable to deactivate, err %d\n", error); -} - -static int gp2a_initialize(struct gp2a_data *dt) -{ - int error; - - error = i2c_smbus_write_byte_data(dt->i2c_client, GP2A_ADDR_GAIN, - 0x08); - if (error < 0) - return error; - - error = i2c_smbus_write_byte_data(dt->i2c_client, GP2A_ADDR_HYS, - 0xc2); - if (error < 0) - return error; - - error = i2c_smbus_write_byte_data(dt->i2c_client, GP2A_ADDR_CYCLE, - 0x04); - if (error < 0) - return error; - - error = gp2a_disable(dt); - - return error; -} - -static int gp2a_probe(struct i2c_client *client, - const struct i2c_device_id *id) -{ - const struct gp2a_platform_data *pdata = dev_get_platdata(&client->dev); - struct gp2a_data *dt; - int error; - - if (!pdata) - return -EINVAL; - - if (pdata->hw_setup) { - error = pdata->hw_setup(client); - if (error < 0) - return error; - } - - error = gpio_request_one(pdata->vout_gpio, GPIOF_IN, GP2A_I2C_NAME); - if (error) - goto err_hw_shutdown; - - dt = kzalloc(sizeof(struct gp2a_data), GFP_KERNEL); - if (!dt) { - error = -ENOMEM; - goto err_free_gpio; - } - - dt->pdata = pdata; - dt->i2c_client = client; - - error = gp2a_initialize(dt); - if (error < 0) - goto err_free_mem; - - dt->input = input_allocate_device(); - if (!dt->input) { - error = -ENOMEM; - goto err_free_mem; - } - - input_set_drvdata(dt->input, dt); - - dt->input->open = gp2a_device_open; - dt->input->close = gp2a_device_close; - dt->input->name = GP2A_I2C_NAME; - dt->input->id.bustype = BUS_I2C; - dt->input->dev.parent = &client->dev; - - input_set_capability(dt->input, EV_SW, SW_FRONT_PROXIMITY); - - error = request_threaded_irq(client->irq, NULL, gp2a_irq, - IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING | - IRQF_ONESHOT, - GP2A_I2C_NAME, dt); - if (error) { - dev_err(&client->dev, "irq request failed\n"); - goto err_free_input_dev; - } - - error = input_register_device(dt->input); - if (error) { - dev_err(&client->dev, "device registration failed\n"); - goto err_free_irq; - } - - device_init_wakeup(&client->dev, pdata->wakeup); - i2c_set_clientdata(client, dt); - - return 0; - -err_free_irq: - free_irq(client->irq, dt); -err_free_input_dev: - input_free_device(dt->input); -err_free_mem: - kfree(dt); -err_free_gpio: - gpio_free(pdata->vout_gpio); -err_hw_shutdown: - if (pdata->hw_shutdown) - pdata->hw_shutdown(client); - return error; -} - -static int gp2a_remove(struct i2c_client *client) -{ - struct gp2a_data *dt = i2c_get_clientdata(client); - const struct gp2a_platform_data *pdata = dt->pdata; - - free_irq(client->irq, dt); - - input_unregister_device(dt->input); - kfree(dt); - - gpio_free(pdata->vout_gpio); - - if (pdata->hw_shutdown) - pdata->hw_shutdown(client); - - return 0; -} - -static int __maybe_unused gp2a_suspend(struct device *dev) -{ - struct i2c_client *client = to_i2c_client(dev); - struct gp2a_data *dt = i2c_get_clientdata(client); - int retval = 0; - - if (device_may_wakeup(&client->dev)) { - enable_irq_wake(client->irq); - } else { - mutex_lock(&dt->input->mutex); - if (dt->input->users) - retval = gp2a_disable(dt); - mutex_unlock(&dt->input->mutex); - } - - return retval; -} - -static int __maybe_unused gp2a_resume(struct device *dev) -{ - struct i2c_client *client = to_i2c_client(dev); - struct gp2a_data *dt = i2c_get_clientdata(client); - int retval = 0; - - if (device_may_wakeup(&client->dev)) { - disable_irq_wake(client->irq); - } else { - mutex_lock(&dt->input->mutex); - if (dt->input->users) - retval = gp2a_enable(dt); - mutex_unlock(&dt->input->mutex); - } - - return retval; -} - -static SIMPLE_DEV_PM_OPS(gp2a_pm, gp2a_suspend, gp2a_resume); - -static const struct i2c_device_id gp2a_i2c_id[] = { - { GP2A_I2C_NAME, 0 }, - { } -}; -MODULE_DEVICE_TABLE(i2c, gp2a_i2c_id); - -static struct i2c_driver gp2a_i2c_driver = { - .driver = { - .name = GP2A_I2C_NAME, - .pm = &gp2a_pm, - }, - .probe = gp2a_probe, - .remove = gp2a_remove, - .id_table = gp2a_i2c_id, -}; - -module_i2c_driver(gp2a_i2c_driver); - -MODULE_AUTHOR("Courtney Cavin "); -MODULE_DESCRIPTION("Sharp GP2AP002A00F I2C Proximity/Opto sensor driver"); -MODULE_LICENSE("GPL v2"); diff --git a/include/linux/input/gp2ap002a00f.h b/include/linux/input/gp2ap002a00f.h deleted file mode 100644 index 3614a13a8297..000000000000 --- a/include/linux/input/gp2ap002a00f.h +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _GP2AP002A00F_H_ -#define _GP2AP002A00F_H_ - -#include - -#define GP2A_I2C_NAME "gp2ap002a00f" - -/** - * struct gp2a_platform_data - Sharp gp2ap002a00f proximity platform data - * @vout_gpio: The gpio connected to the object detected pin (VOUT) - * @wakeup: Set to true if the proximity can wake the device from suspend - * @hw_setup: Callback for setting up hardware such as gpios and vregs - * @hw_shutdown: Callback for properly shutting down hardware - */ -struct gp2a_platform_data { - int vout_gpio; - bool wakeup; - int (*hw_setup)(struct i2c_client *client); - int (*hw_shutdown)(struct i2c_client *client); -}; - -#endif -- cgit v1.3 From bb15aded5144d36b2a050e1dad415876c0b94234 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Tue, 7 Apr 2020 23:56:43 +0300 Subject: mtd: spi-nor: move #define SPINOR_OP_WRDI The write disable (WRDI) opcode is not really specific to the SST flashes (anymore?) -- move the #define to the main opcode group, just before WREN. Signed-off-by: Sergei Shtylyov Signed-off-by: Tudor Ambarus --- include/linux/mtd/spi-nor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 1e2af0ec1f03..1cc8ed5d59ed 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -20,6 +20,7 @@ */ /* Flash opcodes. */ +#define SPINOR_OP_WRDI 0x04 /* Write disable */ #define SPINOR_OP_WREN 0x06 /* Write enable */ #define SPINOR_OP_RDSR 0x05 /* Read status register */ #define SPINOR_OP_WRSR 0x01 /* Write status register 1 byte */ @@ -80,7 +81,6 @@ /* Used for SST flashes only. */ #define SPINOR_OP_BP 0x02 /* Byte program */ -#define SPINOR_OP_WRDI 0x04 /* Write disable */ #define SPINOR_OP_AAI_WP 0xad /* Auto address increment word program */ /* Used for S3AN flashes only */ -- cgit v1.3 From 40a571bc408bf10ac8be0eadf838483b9cf90141 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Fri, 3 Apr 2020 23:41:27 +0300 Subject: mtd: spi-nor: fix kernel-doc for 'struct spi_nor' When introducing 'struct spi_nor', a number of issues was added in its kernel-doc comment: - double article in the heading kernel-doc comment; - "point" instead of "pointer" for the 'mtd' and 'dev' fields; - "a" articles instead of "an" for the 'dev' field; - acronyms in the lower case for the 'dev' field; - missing "pointer to" for the 'priv' field. Fix all of those at once... Fixes: 6e602ef73334 ("mtd: spi-nor: add the basic data structures") Signed-off-by: Sergei Shtylyov Signed-off-by: Tudor Ambarus --- include/linux/mtd/spi-nor.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 1cc8ed5d59ed..d3fcf7654040 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -327,10 +327,10 @@ struct spi_nor_manufacturer; struct spi_nor_flash_parameter; /** - * struct spi_nor - Structure for defining a the SPI NOR layer - * @mtd: point to a mtd_info structure + * struct spi_nor - Structure for defining the SPI NOR layer + * @mtd: pointer to an mtd_info structure * @lock: the lock for the read/write/erase/lock/unlock operations - * @dev: point to a spi device, or a spi nor controller device. + * @dev: pointer to an SPI device or an SPI NOR controller device * @spimem: point to the spi mem device * @bouncebuf: bounce buffer used when the buffer passed by the MTD * layer is not DMA-able @@ -354,7 +354,7 @@ struct spi_nor_flash_parameter; * settings that can be overwritten by the spi_nor_fixups * hooks, or dynamically when parsing the SFDP tables. * @dirmap: pointers to struct spi_mem_dirmap_desc for reads/writes. - * @priv: the private data + * @priv: pointer to the private data */ struct spi_nor { struct mtd_info mtd; -- cgit v1.3 From ba0aa311b0eb9d0a66a1b6b6fdeaa3b7584b798a Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Fri, 3 Apr 2020 23:44:16 +0300 Subject: mtd: spi-nor: fix kernel-doc for spi_nor::mtd When embedding 'struct mtd_info' within 'struct spi_nor', the kernel-doc comment was forgotten. Fix it by dropping the "pointer to" part from the comment. Fixes: 1976367173a4 ("mtd: spi-nor: embed struct mtd_info within struct spi_nor") Signed-off-by: Sergei Shtylyov Signed-off-by: Tudor Ambarus --- include/linux/mtd/spi-nor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index d3fcf7654040..20077881c955 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -328,7 +328,7 @@ struct spi_nor_flash_parameter; /** * struct spi_nor - Structure for defining the SPI NOR layer - * @mtd: pointer to an mtd_info structure + * @mtd: an mtd_info structure * @lock: the lock for the read/write/erase/lock/unlock operations * @dev: pointer to an SPI device or an SPI NOR controller device * @spimem: point to the spi mem device -- cgit v1.3 From ba053dd3b4d841e17e910d0b91e22d9ea813fa39 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Fri, 3 Apr 2020 23:45:49 +0300 Subject: mtd: spi-nor: fix kernel-doc for spi_nor::reg_proto When adding the '{read|write|reg}_proto' fields to 'struct spi_nor', a colon was missed in the comment for the spi_nor::reg_proto' -- add it. Fixes: cfc5604c488c ("mtd: spi-nor: introduce SPI 1-2-2 and SPI 1-4-4 protocols") Signed-off-by: Sergei Shtylyov Signed-off-by: Tudor Ambarus --- include/linux/mtd/spi-nor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 20077881c955..1b2143d921ac 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -347,7 +347,7 @@ struct spi_nor_flash_parameter; * @flags: flag options for the current SPI-NOR (SNOR_F_*) * @read_proto: the SPI protocol for read operations * @write_proto: the SPI protocol for write operations - * @reg_proto the SPI protocol for read_reg/write_reg/erase operations + * @reg_proto: the SPI protocol for read_reg/write_reg/erase operations * @controller_ops: SPI NOR controller driver specific operations. * @params: [FLASH-SPECIFIC] SPI-NOR flash parameters and settings. * The structure includes legacy flash parameters and -- cgit v1.3 From 80cb801144265866ce29fabe5ea7ac8588aa673c Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Fri, 3 Apr 2020 23:49:48 +0300 Subject: mtd: spi-nor: fix kernel-doc for spi_nor::info When adding the 'info' field to 'struct spi_nor', some acronyms were in lower case and some in upper case and the JEDEC acronym mistyped -- fix these issues. Fixes: 46dde01f6bab ("mtd: spi-nor: add spi_nor_init() function") Signed-off-by: Sergei Shtylyov Signed-off-by: Tudor Ambarus --- include/linux/mtd/spi-nor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 1b2143d921ac..3333103c519b 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -335,7 +335,7 @@ struct spi_nor_flash_parameter; * @bouncebuf: bounce buffer used when the buffer passed by the MTD * layer is not DMA-able * @bouncebuf_size: size of the bounce buffer - * @info: spi-nor part JDEC MFR id and other info + * @info: SPI NOR part JEDEC MFR ID and other info * @manufacturer: spi-nor manufacturer * @page_size: the page size of the SPI NOR * @addr_width: number of address bytes -- cgit v1.3 From 1f241ad2a093b889122bd6bfdce57551d21bba5b Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Fri, 3 Apr 2020 23:50:57 +0300 Subject: mtd: spi-nor: fix kernel-doc for spi_nor::spimem When adding the 'spimem' field to 'struct spi_nor', a grammar mistake ("point" instead of "pointer") was made -- fix it and convert the SPI acronym to uppercase and fully spell out "memory", while at it... Fixes: b35b9a10362 ("mtd: spi-nor: Move m25p80 code in spi-nor.c") Signed-off-by: Sergei Shtylyov Signed-off-by: Tudor Ambarus --- include/linux/mtd/spi-nor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 3333103c519b..bebff2729c18 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -331,7 +331,7 @@ struct spi_nor_flash_parameter; * @mtd: an mtd_info structure * @lock: the lock for the read/write/erase/lock/unlock operations * @dev: pointer to an SPI device or an SPI NOR controller device - * @spimem: point to the spi mem device + * @spimem: pointer to the SPI memory device * @bouncebuf: bounce buffer used when the buffer passed by the MTD * layer is not DMA-able * @bouncebuf_size: size of the bounce buffer -- cgit v1.3 From 9ba2353b2cc58ba13f7a7369208107f133f6a27b Mon Sep 17 00:00:00 2001 From: Michał Mirosław Date: Fri, 3 Apr 2020 22:20:32 +0200 Subject: power: supply: core: allow to constify property lists MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since tables pointed to by power_supply_desc->properties and ->usb_types are not expected to change after registration, mark the pointers accordingly Signed-off-by: Michał Mirosław Signed-off-by: Sebastian Reichel --- include/linux/power_supply.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index dcd5a71e6c67..6a34df65d4d1 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -223,9 +223,9 @@ struct power_supply_config { struct power_supply_desc { const char *name; enum power_supply_type type; - enum power_supply_usb_type *usb_types; + const enum power_supply_usb_type *usb_types; size_t num_usb_types; - enum power_supply_property *properties; + const enum power_supply_property *properties; size_t num_properties; /* -- cgit v1.3 From 79622f372b8679916bf4e38aabc6df2659d1e109 Mon Sep 17 00:00:00 2001 From: Russell King Date: Mon, 27 Apr 2020 19:49:12 +0100 Subject: i2c: pxa: move private definitions to i2c-pxa.c Move driver-private definitions out of the i2c-pxa.h platform data header file into the driver itself. Nothing outside of the driver makes use of these constants. Signed-off-by: Russell King Signed-off-by: Wolfram Sang --- drivers/i2c/busses/i2c-pxa.c | 43 +++++++++++++++++++++++++++++++ include/linux/platform_data/i2c-pxa.h | 48 ----------------------------------- 2 files changed, 43 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/busses/i2c-pxa.c b/drivers/i2c/busses/i2c-pxa.c index 3286b5f8bf17..9c811a09eac9 100644 --- a/drivers/i2c/busses/i2c-pxa.c +++ b/drivers/i2c/busses/i2c-pxa.c @@ -85,6 +85,49 @@ #define IWCR_HS_CNT2_SHIFT 10 #define IWCR_HS_CNT2_MASK (0x1F << IWCR_HS_CNT2_SHIFT) +/* need a longer timeout if we're dealing with the fact we may well be + * looking at a multi-master environment + */ +#define DEF_TIMEOUT 32 + +#define BUS_ERROR (-EREMOTEIO) +#define XFER_NAKED (-ECONNREFUSED) +#define I2C_RETRY (-2000) /* an error has occurred retry transmit */ + +/* ICR initialize bit values + * + * 15 FM 0 (100 kHz operation) + * 14 UR 0 (No unit reset) + * 13 SADIE 0 (Disables the unit from interrupting on slave addresses + * matching its slave address) + * 12 ALDIE 0 (Disables the unit from interrupt when it loses arbitration + * in master mode) + * 11 SSDIE 0 (Disables interrupts from a slave stop detected, in slave mode) + * 10 BEIE 1 (Enable interrupts from detected bus errors, no ACK sent) + * 9 IRFIE 1 (Enable interrupts from full buffer received) + * 8 ITEIE 1 (Enables the I2C unit to interrupt when transmit buffer empty) + * 7 GCD 1 (Disables i2c unit response to general call messages as a slave) + * 6 IUE 0 (Disable unit until we change settings) + * 5 SCLE 1 (Enables the i2c clock output for master mode (drives SCL) + * 4 MA 0 (Only send stop with the ICR stop bit) + * 3 TB 0 (We are not transmitting a byte initially) + * 2 ACKNAK 0 (Send an ACK after the unit receives a byte) + * 1 STOP 0 (Do not send a STOP) + * 0 START 0 (Do not send a START) + */ +#define I2C_ICR_INIT (ICR_BEIE | ICR_IRFIE | ICR_ITEIE | ICR_GCD | ICR_SCLE) + +/* I2C status register init values + * + * 10 BED 1 (Clear bus error detected) + * 9 SAD 1 (Clear slave address detected) + * 7 IRF 1 (Clear IDBR Receive Full) + * 6 ITE 1 (Clear IDBR Transmit Empty) + * 5 ALD 1 (Clear Arbitration Loss Detected) + * 4 SSD 1 (Clear Slave Stop Detected) + */ +#define I2C_ISR_INIT 0x7FF /* status register init */ + struct pxa_reg_layout { u32 ibmr; u32 idbr; diff --git a/include/linux/platform_data/i2c-pxa.h b/include/linux/platform_data/i2c-pxa.h index 6a9b28399b39..24953981bd9f 100644 --- a/include/linux/platform_data/i2c-pxa.h +++ b/include/linux/platform_data/i2c-pxa.h @@ -7,54 +7,6 @@ #ifndef _I2C_PXA_H_ #define _I2C_PXA_H_ -#if 0 -#define DEF_TIMEOUT 3 -#else -/* need a longer timeout if we're dealing with the fact we may well be - * looking at a multi-master environment -*/ -#define DEF_TIMEOUT 32 -#endif - -#define BUS_ERROR (-EREMOTEIO) -#define XFER_NAKED (-ECONNREFUSED) -#define I2C_RETRY (-2000) /* an error has occurred retry transmit */ - -/* ICR initialize bit values -* -* 15. FM 0 (100 Khz operation) -* 14. UR 0 (No unit reset) -* 13. SADIE 0 (Disables the unit from interrupting on slave addresses -* matching its slave address) -* 12. ALDIE 0 (Disables the unit from interrupt when it loses arbitration -* in master mode) -* 11. SSDIE 0 (Disables interrupts from a slave stop detected, in slave mode) -* 10. BEIE 1 (Enable interrupts from detected bus errors, no ACK sent) -* 9. IRFIE 1 (Enable interrupts from full buffer received) -* 8. ITEIE 1 (Enables the I2C unit to interrupt when transmit buffer empty) -* 7. GCD 1 (Disables i2c unit response to general call messages as a slave) -* 6. IUE 0 (Disable unit until we change settings) -* 5. SCLE 1 (Enables the i2c clock output for master mode (drives SCL) -* 4. MA 0 (Only send stop with the ICR stop bit) -* 3. TB 0 (We are not transmitting a byte initially) -* 2. ACKNAK 0 (Send an ACK after the unit receives a byte) -* 1. STOP 0 (Do not send a STOP) -* 0. START 0 (Do not send a START) -* -*/ -#define I2C_ICR_INIT (ICR_BEIE | ICR_IRFIE | ICR_ITEIE | ICR_GCD | ICR_SCLE) - -/* I2C status register init values - * - * 10. BED 1 (Clear bus error detected) - * 9. SAD 1 (Clear slave address detected) - * 7. IRF 1 (Clear IDBR Receive Full) - * 6. ITE 1 (Clear IDBR Transmit Empty) - * 5. ALD 1 (Clear Arbitration Loss Detected) - * 4. SSD 1 (Clear Slave Stop Detected) - */ -#define I2C_ISR_INIT 0x7FF /* status register init */ - struct i2c_pxa_platform_data { unsigned int class; unsigned int use_pio :1; -- cgit v1.3 From 19acd03d95dad1f50d06f28179a1866fca431fed Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 24 Apr 2020 17:47:29 +0200 Subject: kcsan: Add __kcsan_{enable,disable}_current() variants The __kcsan_{enable,disable}_current() variants only call into KCSAN if KCSAN is enabled for the current compilation unit. Note: This is typically not what we want, as we usually want to ensure that even calls into other functions still have KCSAN disabled. These variants may safely be used in header files that are shared between regular kernel code and code that does not link the KCSAN runtime. Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney --- include/linux/kcsan-checks.h | 17 ++++++++++++++--- kernel/kcsan/core.c | 7 +++++++ 2 files changed, 21 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h index ef95ddc49182..7b0b9c44f5f3 100644 --- a/include/linux/kcsan-checks.h +++ b/include/linux/kcsan-checks.h @@ -49,6 +49,7 @@ void kcsan_disable_current(void); * Supports nesting. */ void kcsan_enable_current(void); +void kcsan_enable_current_nowarn(void); /* Safe in uaccess regions. */ /** * kcsan_nestable_atomic_begin - begin nestable atomic region @@ -149,6 +150,7 @@ static inline void __kcsan_check_access(const volatile void *ptr, size_t size, static inline void kcsan_disable_current(void) { } static inline void kcsan_enable_current(void) { } +static inline void kcsan_enable_current_nowarn(void) { } static inline void kcsan_nestable_atomic_begin(void) { } static inline void kcsan_nestable_atomic_end(void) { } static inline void kcsan_flat_atomic_begin(void) { } @@ -165,15 +167,24 @@ static inline void kcsan_end_scoped_access(struct kcsan_scoped_access *sa) { } #endif /* CONFIG_KCSAN */ +#ifdef __SANITIZE_THREAD__ /* - * kcsan_*: Only calls into the runtime when the particular compilation unit has - * KCSAN instrumentation enabled. May be used in header files. + * Only calls into the runtime when the particular compilation unit has KCSAN + * instrumentation enabled. May be used in header files. */ -#ifdef __SANITIZE_THREAD__ #define kcsan_check_access __kcsan_check_access + +/* + * Only use these to disable KCSAN for accesses in the current compilation unit; + * calls into libraries may still perform KCSAN checks. + */ +#define __kcsan_disable_current kcsan_disable_current +#define __kcsan_enable_current kcsan_enable_current_nowarn #else static inline void kcsan_check_access(const volatile void *ptr, size_t size, int type) { } +static inline void __kcsan_enable_current(void) { } +static inline void __kcsan_disable_current(void) { } #endif /** diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index a572aae61b98..a73a66cf79df 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -625,6 +625,13 @@ void kcsan_enable_current(void) } EXPORT_SYMBOL(kcsan_enable_current); +void kcsan_enable_current_nowarn(void) +{ + if (get_ctx()->disable_count-- == 0) + kcsan_disable_current(); +} +EXPORT_SYMBOL(kcsan_enable_current_nowarn); + void kcsan_nestable_atomic_begin(void) { /* -- cgit v1.3 From 52782c92ac85c4e393eb4a903a62e6c24afa633f Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 6 May 2020 12:09:34 -0400 Subject: kthread: save thread function It's handy to keep the kthread_fn just as a unique cookie to identify classes of kthreads. E.g. if you can verify that a given task is running your thread_fn, then you may know what sort of type kthread_data points to. We'll use this in nfsd to pass some information into the vfs. Note it will need kthread_data() exported too. Original-patch-by: Tejun Heo Signed-off-by: J. Bruce Fields --- include/linux/kthread.h | 1 + kernel/kthread.c | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 8bbcaad7ef0f..c2a274b79c42 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -57,6 +57,7 @@ bool kthread_should_stop(void); bool kthread_should_park(void); bool __kthread_should_park(struct task_struct *k); bool kthread_freezable_should_stop(bool *was_frozen); +void *kthread_func(struct task_struct *k); void *kthread_data(struct task_struct *k); void *kthread_probe_data(struct task_struct *k); int kthread_park(struct task_struct *k); diff --git a/kernel/kthread.c b/kernel/kthread.c index bfbfa481be3a..b84fc7eec035 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -46,6 +46,7 @@ struct kthread_create_info struct kthread { unsigned long flags; unsigned int cpu; + int (*threadfn)(void *); void *data; struct completion parked; struct completion exited; @@ -152,6 +153,20 @@ bool kthread_freezable_should_stop(bool *was_frozen) } EXPORT_SYMBOL_GPL(kthread_freezable_should_stop); +/** + * kthread_func - return the function specified on kthread creation + * @task: kthread task in question + * + * Returns NULL if the task is not a kthread. + */ +void *kthread_func(struct task_struct *task) +{ + if (task->flags & PF_KTHREAD) + return to_kthread(task)->threadfn; + return NULL; +} +EXPORT_SYMBOL_GPL(kthread_func); + /** * kthread_data - return data value specified on kthread creation * @task: kthread task in question @@ -164,6 +179,7 @@ void *kthread_data(struct task_struct *task) { return to_kthread(task)->data; } +EXPORT_SYMBOL_GPL(kthread_data); /** * kthread_probe_data - speculative version of kthread_data() @@ -244,6 +260,7 @@ static int kthread(void *_create) do_exit(-ENOMEM); } + self->threadfn = threadfn; self->data = data; init_completion(&self->exited); init_completion(&self->parked); -- cgit v1.3 From 28df3d1539de5090f7916f6fff03891b67f366f4 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 28 Jul 2017 16:35:15 -0400 Subject: nfsd: clients don't need to break their own delegations We currently revoke read delegations on any write open or any operation that modifies file data or metadata (including rename, link, and unlink). But if the delegation in question is the only read delegation and is held by the client performing the operation, that's not really necessary. It's not always possible to prevent this in the NFSv4.0 case, because there's not always a way to determine which client an NFSv4.0 delegation came from. (In theory we could try to guess this from the transport layer, e.g., by assuming all traffic on a given TCP connection comes from the same client. But that's not really correct.) In the NFSv4.1 case the session layer always tells us the client. This patch should remove such self-conflicts in all cases where we can reliably determine the client from the compound. To do that we need to track "who" is performing a given (possibly lease-breaking) file operation. We're doing that by storing the information in the svc_rqst and using kthread_data() to map the current task back to a svc_rqst. Signed-off-by: J. Bruce Fields --- Documentation/filesystems/locking.rst | 2 ++ fs/locks.c | 3 +++ fs/nfsd/nfs4proc.c | 2 ++ fs/nfsd/nfs4state.c | 14 ++++++++++++++ fs/nfsd/nfsd.h | 2 ++ fs/nfsd/nfssvc.c | 6 ++++++ include/linux/fs.h | 1 + include/linux/sunrpc/svc.h | 1 + 8 files changed, 31 insertions(+) (limited to 'include/linux') diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index 5057e4d9dcd1..9fdcec416614 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -425,6 +425,7 @@ prototypes:: int (*lm_grant)(struct file_lock *, struct file_lock *, int); void (*lm_break)(struct file_lock *); /* break_lease callback */ int (*lm_change)(struct file_lock **, int); + bool (*lm_breaker_owns_lease)(struct file_lock *); locking rules: @@ -435,6 +436,7 @@ lm_notify: yes yes no lm_grant: no no no lm_break: yes no no lm_change yes no no +lm_breaker_owns_lease: no no no ========== ============= ================= ========= buffer_head diff --git a/fs/locks.c b/fs/locks.c index b8a31c1c4fff..a3f186846e93 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -1557,6 +1557,9 @@ static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker) { bool rc; + if (lease->fl_lmops->lm_breaker_owns_lease + && lease->fl_lmops->lm_breaker_owns_lease(lease)) + return false; if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT)) { rc = false; goto trace; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 0e75f7fb5fec..a6d73aa51ce4 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -2302,6 +2302,8 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) } check_if_stalefh_allowed(args); + rqstp->rq_lease_breaker = (void **)&cstate->clp; + trace_nfsd_compound(rqstp, args->opcnt); while (!status && resp->opcnt < args->opcnt) { op = &args->ops[resp->opcnt++]; diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index fe88f5f23cf4..31388046c6fe 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4579,6 +4579,19 @@ nfsd_break_deleg_cb(struct file_lock *fl) return ret; } +static bool nfsd_breaker_owns_lease(struct file_lock *fl) +{ + struct nfs4_delegation *dl = fl->fl_owner; + struct svc_rqst *rqst; + struct nfs4_client *clp; + + if (!i_am_nfsd()) + return NULL; + rqst = kthread_data(current); + clp = *(rqst->rq_lease_breaker); + return dl->dl_stid.sc_client == clp; +} + static int nfsd_change_deleg_cb(struct file_lock *onlist, int arg, struct list_head *dispose) @@ -4590,6 +4603,7 @@ nfsd_change_deleg_cb(struct file_lock *onlist, int arg, } static const struct lock_manager_operations nfsd_lease_mng_ops = { + .lm_breaker_owns_lease = nfsd_breaker_owns_lease, .lm_break = nfsd_break_deleg_cb, .lm_change = nfsd_change_deleg_cb, }; diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h index 2ab5569126b8..36cdd81b6688 100644 --- a/fs/nfsd/nfsd.h +++ b/fs/nfsd/nfsd.h @@ -88,6 +88,8 @@ int nfsd_pool_stats_release(struct inode *, struct file *); void nfsd_destroy(struct net *net); +bool i_am_nfsd(void); + struct nfsdfs_client { struct kref cl_ref; void (*cl_release)(struct kref *kref); diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index ca9fd348548b..4f588c0eaaf4 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -601,6 +601,11 @@ static const struct svc_serv_ops nfsd_thread_sv_ops = { .svo_module = THIS_MODULE, }; +bool i_am_nfsd() +{ + return kthread_func(current) == nfsd; +} + int nfsd_create_serv(struct net *net) { int error; @@ -1011,6 +1016,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) *statp = rpc_garbage_args; return 1; } + rqstp->rq_lease_breaker = NULL; /* * Give the xdr decoder a chance to change this if it wants * (necessary in the NFSv4.0 compound case) diff --git a/include/linux/fs.h b/include/linux/fs.h index 4f6f59b4f22a..4b784560ffaf 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1045,6 +1045,7 @@ struct lock_manager_operations { bool (*lm_break)(struct file_lock *); int (*lm_change)(struct file_lock *, int, struct list_head *); void (*lm_setup)(struct file_lock *, void **); + bool (*lm_breaker_owns_lease)(struct file_lock *); }; struct lock_manager { diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index fd390894a584..abf4a57ce4a7 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -299,6 +299,7 @@ struct svc_rqst { struct net *rq_bc_net; /* pointer to backchannel's * net namespace */ + void ** rq_lease_breaker; /* The v4 client breaking a lease */ }; #define SVC_NET(rqst) (rqst->rq_xprt ? rqst->rq_xprt->xpt_net : rqst->rq_bc_net) -- cgit v1.3 From 5a63b7ba50fd6b7a897bf9353dbf31d579cfe116 Mon Sep 17 00:00:00 2001 From: Matti Vaittinen Date: Fri, 8 May 2020 18:47:20 +0300 Subject: power: supply: add battery parameters Add parsing of new device-tree battery bindings. - trickle-charge-current-microamp - precharge-upper-limit-microvolt - re-charge-voltage-microvolt - over-voltage-threshold-microvolt Signed-off-by: Matti Vaittinen Signed-off-by: Sebastian Reichel --- drivers/power/supply/power_supply_core.c | 8 ++++++++ include/linux/power_supply.h | 4 ++++ 2 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/drivers/power/supply/power_supply_core.c b/drivers/power/supply/power_supply_core.c index 1a9a9fae73d3..02b37fe6061c 100644 --- a/drivers/power/supply/power_supply_core.c +++ b/drivers/power/supply/power_supply_core.c @@ -620,10 +620,18 @@ int power_supply_get_battery_info(struct power_supply *psy, &info->voltage_min_design_uv); of_property_read_u32(battery_np, "voltage-max-design-microvolt", &info->voltage_max_design_uv); + of_property_read_u32(battery_np, "trickle-charge-current-microamp", + &info->tricklecharge_current_ua); of_property_read_u32(battery_np, "precharge-current-microamp", &info->precharge_current_ua); + of_property_read_u32(battery_np, "precharge-upper-limit-microvolt", + &info->precharge_voltage_max_uv); of_property_read_u32(battery_np, "charge-term-current-microamp", &info->charge_term_current_ua); + of_property_read_u32(battery_np, "re-charge-voltage-microvolt", + &info->charge_restart_voltage_uv); + of_property_read_u32(battery_np, "over-voltage-threshold-microvolt", + &info->overvoltage_limit_uv); of_property_read_u32(battery_np, "constant-charge-current-max-microamp", &info->constant_charge_current_max_ua); of_property_read_u32(battery_np, "constant-charge-voltage-max-microvolt", diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 6a34df65d4d1..1f60731ec7fe 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -346,8 +346,12 @@ struct power_supply_battery_info { int charge_full_design_uah; /* microAmp-hours */ int voltage_min_design_uv; /* microVolts */ int voltage_max_design_uv; /* microVolts */ + int tricklecharge_current_ua; /* microAmps */ int precharge_current_ua; /* microAmps */ + int precharge_voltage_max_uv; /* microVolts */ int charge_term_current_ua; /* microAmps */ + int charge_restart_voltage_uv; /* microVolts */ + int overvoltage_limit_uv; /* microVolts */ int constant_charge_current_max_ua; /* microAmps */ int constant_charge_voltage_max_uv; /* microVolts */ int factory_internal_resistance_uohm; /* microOhms */ -- cgit v1.3 From 83c411c29b90b7de505a2fe1d655293c95f8ba90 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 28 Apr 2020 11:42:54 +0200 Subject: mtd: rawnand: timings: Add mode information to the timings structure Convert the timings union into a structure containing the mode and the actual values. The values are still a union in prevision of the addition of the NVDDR modes. Signed-off-by: Miquel Raynal Reviewed-by: Boris Brezillon Link: https://lore.kernel.org/linux-mtd/20200428094302.14624-2-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/nand_timings.c | 6 ++++++ include/linux/mtd/rawnand.h | 10 +++++++--- 2 files changed, 13 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/nand_timings.c b/drivers/mtd/nand/raw/nand_timings.c index f64b06a71dfa..0061cbaf931d 100644 --- a/drivers/mtd/nand/raw/nand_timings.c +++ b/drivers/mtd/nand/raw/nand_timings.c @@ -16,6 +16,7 @@ static const struct nand_data_interface onfi_sdr_timings[] = { /* Mode 0 */ { .type = NAND_SDR_IFACE, + .timings.mode = 0, .timings.sdr = { .tCCS_min = 500000, .tR_max = 200000000, @@ -58,6 +59,7 @@ static const struct nand_data_interface onfi_sdr_timings[] = { /* Mode 1 */ { .type = NAND_SDR_IFACE, + .timings.mode = 1, .timings.sdr = { .tCCS_min = 500000, .tR_max = 200000000, @@ -100,6 +102,7 @@ static const struct nand_data_interface onfi_sdr_timings[] = { /* Mode 2 */ { .type = NAND_SDR_IFACE, + .timings.mode = 2, .timings.sdr = { .tCCS_min = 500000, .tR_max = 200000000, @@ -142,6 +145,7 @@ static const struct nand_data_interface onfi_sdr_timings[] = { /* Mode 3 */ { .type = NAND_SDR_IFACE, + .timings.mode = 3, .timings.sdr = { .tCCS_min = 500000, .tR_max = 200000000, @@ -184,6 +188,7 @@ static const struct nand_data_interface onfi_sdr_timings[] = { /* Mode 4 */ { .type = NAND_SDR_IFACE, + .timings.mode = 4, .timings.sdr = { .tCCS_min = 500000, .tR_max = 200000000, @@ -226,6 +231,7 @@ static const struct nand_data_interface onfi_sdr_timings[] = { /* Mode 5 */ { .type = NAND_SDR_IFACE, + .timings.mode = 5, .timings.sdr = { .tCCS_min = 500000, .tR_max = 200000000, diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 1e76196f9829..21873168ba4d 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -491,13 +491,17 @@ enum nand_data_interface_type { /** * struct nand_data_interface - NAND interface timing * @type: type of the timing - * @timings: The timing, type according to @type + * @timings: The timing information + * @timings.mode: Timing mode as defined in the specification * @timings.sdr: Use it when @type is %NAND_SDR_IFACE. */ struct nand_data_interface { enum nand_data_interface_type type; - union { - struct nand_sdr_timings sdr; + struct nand_timings { + unsigned int mode; + union { + struct nand_sdr_timings sdr; + }; } timings; }; -- cgit v1.3 From 9e3307a169537a6adc30b13bf9063e94990a5493 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Sun, 3 May 2020 17:53:37 +0200 Subject: mtd: Add support for emulated SLC mode on MLC NANDs MLC NANDs can be made a bit more reliable if we only program the lower page of each pair. At least, this solves the paired-pages corruption issue. Signed-off-by: Boris Brezillon Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200503155341.16712-5-miquel.raynal@bootlin.com --- drivers/mtd/mtdcore.c | 189 +++++++++++++++++++++++++++++++++++++---- drivers/mtd/mtdpart.c | 54 +++++++----- include/linux/mtd/mtd.h | 7 +- include/linux/mtd/partitions.h | 2 + include/uapi/mtd/mtd-abi.h | 1 + 5 files changed, 213 insertions(+), 40 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c index 2916674208b3..50437b4ffe76 100644 --- a/drivers/mtd/mtdcore.c +++ b/drivers/mtd/mtdcore.c @@ -617,6 +617,19 @@ int add_mtd_device(struct mtd_info *mtd) !(mtd->flags & MTD_NO_ERASE))) return -EINVAL; + /* + * MTD_SLC_ON_MLC_EMULATION can only be set on partitions, when the + * master is an MLC NAND and has a proper pairing scheme defined. + * We also reject masters that implement ->_writev() for now, because + * NAND controller drivers don't implement this hook, and adding the + * SLC -> MLC address/length conversion to this path is useless if we + * don't have a user. + */ + if (mtd->flags & MTD_SLC_ON_MLC_EMULATION && + (!mtd_is_partition(mtd) || master->type != MTD_MLCNANDFLASH || + !master->pairing || master->_writev)) + return -EINVAL; + mutex_lock(&mtd_table_mutex); i = idr_alloc(&mtd_idr, mtd, 0, 0, GFP_KERNEL); @@ -632,6 +645,14 @@ int add_mtd_device(struct mtd_info *mtd) if (mtd->bitflip_threshold == 0) mtd->bitflip_threshold = mtd->ecc_strength; + if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) { + int ngroups = mtd_pairing_groups(master); + + mtd->erasesize /= ngroups; + mtd->size = (u64)mtd_div_by_eb(mtd->size, master) * + mtd->erasesize; + } + if (is_power_of_2(mtd->erasesize)) mtd->erasesize_shift = ffs(mtd->erasesize) - 1; else @@ -1074,9 +1095,11 @@ int mtd_erase(struct mtd_info *mtd, struct erase_info *instr) { struct mtd_info *master = mtd_get_master(mtd); u64 mst_ofs = mtd_get_master_ofs(mtd, 0); + struct erase_info adjinstr; int ret; instr->fail_addr = MTD_FAIL_ADDR_UNKNOWN; + adjinstr = *instr; if (!mtd->erasesize || !master->_erase) return -ENOTSUPP; @@ -1091,12 +1114,27 @@ int mtd_erase(struct mtd_info *mtd, struct erase_info *instr) ledtrig_mtd_activity(); - instr->addr += mst_ofs; - ret = master->_erase(master, instr); - if (instr->fail_addr != MTD_FAIL_ADDR_UNKNOWN) - instr->fail_addr -= mst_ofs; + if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) { + adjinstr.addr = (loff_t)mtd_div_by_eb(instr->addr, mtd) * + master->erasesize; + adjinstr.len = ((u64)mtd_div_by_eb(instr->addr + instr->len, mtd) * + master->erasesize) - + adjinstr.addr; + } + + adjinstr.addr += mst_ofs; + + ret = master->_erase(master, &adjinstr); + + if (adjinstr.fail_addr != MTD_FAIL_ADDR_UNKNOWN) { + instr->fail_addr = adjinstr.fail_addr - mst_ofs; + if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) { + instr->fail_addr = mtd_div_by_eb(instr->fail_addr, + master); + instr->fail_addr *= mtd->erasesize; + } + } - instr->addr -= mst_ofs; return ret; } EXPORT_SYMBOL_GPL(mtd_erase); @@ -1276,6 +1314,101 @@ static int mtd_check_oob_ops(struct mtd_info *mtd, loff_t offs, return 0; } +static int mtd_read_oob_std(struct mtd_info *mtd, loff_t from, + struct mtd_oob_ops *ops) +{ + struct mtd_info *master = mtd_get_master(mtd); + int ret; + + from = mtd_get_master_ofs(mtd, from); + if (master->_read_oob) + ret = master->_read_oob(master, from, ops); + else + ret = master->_read(master, from, ops->len, &ops->retlen, + ops->datbuf); + + return ret; +} + +static int mtd_write_oob_std(struct mtd_info *mtd, loff_t to, + struct mtd_oob_ops *ops) +{ + struct mtd_info *master = mtd_get_master(mtd); + int ret; + + to = mtd_get_master_ofs(mtd, to); + if (master->_write_oob) + ret = master->_write_oob(master, to, ops); + else + ret = master->_write(master, to, ops->len, &ops->retlen, + ops->datbuf); + + return ret; +} + +static int mtd_io_emulated_slc(struct mtd_info *mtd, loff_t start, bool read, + struct mtd_oob_ops *ops) +{ + struct mtd_info *master = mtd_get_master(mtd); + int ngroups = mtd_pairing_groups(master); + int npairs = mtd_wunit_per_eb(master) / ngroups; + struct mtd_oob_ops adjops = *ops; + unsigned int wunit, oobavail; + struct mtd_pairing_info info; + int max_bitflips = 0; + u32 ebofs, pageofs; + loff_t base, pos; + + ebofs = mtd_mod_by_eb(start, mtd); + base = (loff_t)mtd_div_by_eb(start, mtd) * master->erasesize; + info.group = 0; + info.pair = mtd_div_by_ws(ebofs, mtd); + pageofs = mtd_mod_by_ws(ebofs, mtd); + oobavail = mtd_oobavail(mtd, ops); + + while (ops->retlen < ops->len || ops->oobretlen < ops->ooblen) { + int ret; + + if (info.pair >= npairs) { + info.pair = 0; + base += master->erasesize; + } + + wunit = mtd_pairing_info_to_wunit(master, &info); + pos = mtd_wunit_to_offset(mtd, base, wunit); + + adjops.len = ops->len - ops->retlen; + if (adjops.len > mtd->writesize - pageofs) + adjops.len = mtd->writesize - pageofs; + + adjops.ooblen = ops->ooblen - ops->oobretlen; + if (adjops.ooblen > oobavail - adjops.ooboffs) + adjops.ooblen = oobavail - adjops.ooboffs; + + if (read) { + ret = mtd_read_oob_std(mtd, pos + pageofs, &adjops); + if (ret > 0) + max_bitflips = max(max_bitflips, ret); + } else { + ret = mtd_write_oob_std(mtd, pos + pageofs, &adjops); + } + + if (ret < 0) + return ret; + + max_bitflips = max(max_bitflips, ret); + ops->retlen += adjops.retlen; + ops->oobretlen += adjops.oobretlen; + adjops.datbuf += adjops.retlen; + adjops.oobbuf += adjops.oobretlen; + adjops.ooboffs = 0; + pageofs = 0; + info.pair++; + } + + return max_bitflips; +} + int mtd_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops) { struct mtd_info *master = mtd_get_master(mtd); @@ -1294,12 +1427,10 @@ int mtd_read_oob(struct mtd_info *mtd, loff_t from, struct mtd_oob_ops *ops) if (!master->_read_oob && (!master->_read || ops->oobbuf)) return -EOPNOTSUPP; - from = mtd_get_master_ofs(mtd, from); - if (master->_read_oob) - ret_code = master->_read_oob(master, from, ops); + if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) + ret_code = mtd_io_emulated_slc(mtd, from, true, ops); else - ret_code = master->_read(master, from, ops->len, &ops->retlen, - ops->datbuf); + ret_code = mtd_read_oob_std(mtd, from, ops); mtd_update_ecc_stats(mtd, master, &old_stats); @@ -1338,13 +1469,10 @@ int mtd_write_oob(struct mtd_info *mtd, loff_t to, if (!master->_write_oob && (!master->_write || ops->oobbuf)) return -EOPNOTSUPP; - to = mtd_get_master_ofs(mtd, to); + if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) + return mtd_io_emulated_slc(mtd, to, false, ops); - if (master->_write_oob) - return master->_write_oob(master, to, ops); - else - return master->_write(master, to, ops->len, &ops->retlen, - ops->datbuf); + return mtd_write_oob_std(mtd, to, ops); } EXPORT_SYMBOL_GPL(mtd_write_oob); @@ -1817,6 +1945,12 @@ int mtd_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len) return -EINVAL; if (!len) return 0; + + if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) { + ofs = (loff_t)mtd_div_by_eb(ofs, mtd) * master->erasesize; + len = (u64)mtd_div_by_eb(len, mtd) * master->erasesize; + } + return master->_lock(master, mtd_get_master_ofs(mtd, ofs), len); } EXPORT_SYMBOL_GPL(mtd_lock); @@ -1831,6 +1965,12 @@ int mtd_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len) return -EINVAL; if (!len) return 0; + + if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) { + ofs = (loff_t)mtd_div_by_eb(ofs, mtd) * master->erasesize; + len = (u64)mtd_div_by_eb(len, mtd) * master->erasesize; + } + return master->_unlock(master, mtd_get_master_ofs(mtd, ofs), len); } EXPORT_SYMBOL_GPL(mtd_unlock); @@ -1845,6 +1985,12 @@ int mtd_is_locked(struct mtd_info *mtd, loff_t ofs, uint64_t len) return -EINVAL; if (!len) return 0; + + if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) { + ofs = (loff_t)mtd_div_by_eb(ofs, mtd) * master->erasesize; + len = (u64)mtd_div_by_eb(len, mtd) * master->erasesize; + } + return master->_is_locked(master, mtd_get_master_ofs(mtd, ofs), len); } EXPORT_SYMBOL_GPL(mtd_is_locked); @@ -1857,6 +2003,10 @@ int mtd_block_isreserved(struct mtd_info *mtd, loff_t ofs) return -EINVAL; if (!master->_block_isreserved) return 0; + + if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) + ofs = (loff_t)mtd_div_by_eb(ofs, mtd) * master->erasesize; + return master->_block_isreserved(master, mtd_get_master_ofs(mtd, ofs)); } EXPORT_SYMBOL_GPL(mtd_block_isreserved); @@ -1869,6 +2019,10 @@ int mtd_block_isbad(struct mtd_info *mtd, loff_t ofs) return -EINVAL; if (!master->_block_isbad) return 0; + + if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) + ofs = (loff_t)mtd_div_by_eb(ofs, mtd) * master->erasesize; + return master->_block_isbad(master, mtd_get_master_ofs(mtd, ofs)); } EXPORT_SYMBOL_GPL(mtd_block_isbad); @@ -1885,6 +2039,9 @@ int mtd_block_markbad(struct mtd_info *mtd, loff_t ofs) if (!(mtd->flags & MTD_WRITEABLE)) return -EROFS; + if (mtd->flags & MTD_SLC_ON_MLC_EMULATION) + ofs = (loff_t)mtd_div_by_eb(ofs, mtd) * master->erasesize; + ret = master->_block_markbad(master, mtd_get_master_ofs(mtd, ofs)); if (ret) return ret; diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c index 3f6025684f58..c3575b686f79 100644 --- a/drivers/mtd/mtdpart.c +++ b/drivers/mtd/mtdpart.c @@ -35,9 +35,12 @@ static struct mtd_info *allocate_partition(struct mtd_info *parent, const struct mtd_partition *part, int partno, uint64_t cur_offset) { - int wr_alignment = (parent->flags & MTD_NO_ERASE) ? parent->writesize : - parent->erasesize; - struct mtd_info *child, *master = mtd_get_master(parent); + struct mtd_info *master = mtd_get_master(parent); + int wr_alignment = (parent->flags & MTD_NO_ERASE) ? + master->writesize : master->erasesize; + u64 parent_size = mtd_is_partition(parent) ? + parent->part.size : parent->size; + struct mtd_info *child; u32 remainder; char *name; u64 tmp; @@ -56,8 +59,9 @@ static struct mtd_info *allocate_partition(struct mtd_info *parent, /* set up the MTD object for this partition */ child->type = parent->type; child->part.flags = parent->flags & ~part->mask_flags; + child->part.flags |= part->add_flags; child->flags = child->part.flags; - child->size = part->size; + child->part.size = part->size; child->writesize = parent->writesize; child->writebufsize = parent->writebufsize; child->oobsize = parent->oobsize; @@ -98,29 +102,29 @@ static struct mtd_info *allocate_partition(struct mtd_info *parent, } if (child->part.offset == MTDPART_OFS_RETAIN) { child->part.offset = cur_offset; - if (parent->size - child->part.offset >= child->size) { - child->size = parent->size - child->part.offset - - child->size; + if (parent_size - child->part.offset >= child->part.size) { + child->part.size = parent_size - child->part.offset - + child->part.size; } else { printk(KERN_ERR "mtd partition \"%s\" doesn't have enough space: %#llx < %#llx, disabled\n", - part->name, parent->size - child->part.offset, - child->size); + part->name, parent_size - child->part.offset, + child->part.size); /* register to preserve ordering */ goto out_register; } } - if (child->size == MTDPART_SIZ_FULL) - child->size = parent->size - child->part.offset; + if (child->part.size == MTDPART_SIZ_FULL) + child->part.size = parent_size - child->part.offset; printk(KERN_NOTICE "0x%012llx-0x%012llx : \"%s\"\n", - child->part.offset, child->part.offset + child->size, + child->part.offset, child->part.offset + child->part.size, child->name); /* let's do some sanity checks */ - if (child->part.offset >= parent->size) { + if (child->part.offset >= parent_size) { /* let's register it anyway to preserve ordering */ child->part.offset = 0; - child->size = 0; + child->part.size = 0; /* Initialize ->erasesize to make add_mtd_device() happy. */ child->erasesize = parent->erasesize; @@ -128,15 +132,16 @@ static struct mtd_info *allocate_partition(struct mtd_info *parent, part->name); goto out_register; } - if (child->part.offset + child->size > parent->size) { - child->size = parent->size - child->part.offset; + if (child->part.offset + child->part.size > parent->size) { + child->part.size = parent_size - child->part.offset; printk(KERN_WARNING"mtd: partition \"%s\" extends beyond the end of device \"%s\" -- size truncated to %#llx\n", - part->name, parent->name, child->size); + part->name, parent->name, child->part.size); } + if (parent->numeraseregions > 1) { /* Deal with variable erase size stuff */ int i, max = parent->numeraseregions; - u64 end = child->part.offset + child->size; + u64 end = child->part.offset + child->part.size; struct mtd_erase_region_info *regions = parent->eraseregions; /* Find the first erase regions which is part of this @@ -156,7 +161,7 @@ static struct mtd_info *allocate_partition(struct mtd_info *parent, BUG_ON(child->erasesize == 0); } else { /* Single erase size */ - child->erasesize = parent->erasesize; + child->erasesize = master->erasesize; } /* @@ -178,7 +183,7 @@ static struct mtd_info *allocate_partition(struct mtd_info *parent, part->name); } - tmp = mtd_get_master_ofs(child, 0) + child->size; + tmp = mtd_get_master_ofs(child, 0) + child->part.size; remainder = do_div(tmp, wr_alignment); if ((child->flags & MTD_WRITEABLE) && remainder) { child->flags &= ~MTD_WRITEABLE; @@ -186,6 +191,7 @@ static struct mtd_info *allocate_partition(struct mtd_info *parent, part->name); } + child->size = child->part.size; child->ecc_step_size = parent->ecc_step_size; child->ecc_strength = parent->ecc_strength; child->bitflip_threshold = parent->bitflip_threshold; @@ -193,7 +199,7 @@ static struct mtd_info *allocate_partition(struct mtd_info *parent, if (master->_block_isbad) { uint64_t offs = 0; - while (offs < child->size) { + while (offs < child->part.size) { if (mtd_block_isreserved(child, offs)) child->ecc_stats.bbtblocks++; else if (mtd_block_isbad(child, offs)) @@ -234,6 +240,8 @@ int mtd_add_partition(struct mtd_info *parent, const char *name, long long offset, long long length) { struct mtd_info *master = mtd_get_master(parent); + u64 parent_size = mtd_is_partition(parent) ? + parent->part.size : parent->size; struct mtd_partition part; struct mtd_info *child; int ret = 0; @@ -244,7 +252,7 @@ int mtd_add_partition(struct mtd_info *parent, const char *name, return -EINVAL; if (length == MTDPART_SIZ_FULL) - length = parent->size - offset; + length = parent_size - offset; if (length <= 0) return -EINVAL; @@ -419,7 +427,7 @@ int add_mtd_partitions(struct mtd_info *parent, /* Look for subpartitions */ parse_mtd_partitions(child, parts[i].types, NULL); - cur_offset = child->part.offset + child->size; + cur_offset = child->part.offset + child->part.size; } return 0; diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 2d1f4a61f4ac..157357ec1441 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -200,6 +200,8 @@ struct mtd_debug_info { * * @node: list node used to add an MTD partition to the parent partition list * @offset: offset of the partition relatively to the parent offset + * @size: partition size. Should be equal to mtd->size unless + * MTD_SLC_ON_MLC_EMULATION is set * @flags: original flags (before the mtdpart logic decided to tweak them based * on flash constraints, like eraseblock/pagesize alignment) * @@ -209,6 +211,7 @@ struct mtd_debug_info { struct mtd_part { struct list_head node; u64 offset; + u64 size; u32 flags; }; @@ -622,7 +625,9 @@ static inline uint32_t mtd_mod_by_ws(uint64_t sz, struct mtd_info *mtd) static inline int mtd_wunit_per_eb(struct mtd_info *mtd) { - return mtd->erasesize / mtd->writesize; + struct mtd_info *master = mtd_get_master(mtd); + + return master->erasesize / mtd->writesize; } static inline int mtd_offset_to_wunit(struct mtd_info *mtd, loff_t offs) diff --git a/include/linux/mtd/partitions.h b/include/linux/mtd/partitions.h index e545c050d3e8..b74a539ec581 100644 --- a/include/linux/mtd/partitions.h +++ b/include/linux/mtd/partitions.h @@ -37,6 +37,7 @@ * master MTD flag set for the corresponding MTD partition. * For example, to force a read-only partition, simply adding * MTD_WRITEABLE to the mask_flags will do the trick. + * add_flags: contains flags to add to the parent flags * * Note: writeable partitions require their size and offset be * erasesize aligned (e.g. use MTDPART_OFS_NEXTBLK). @@ -48,6 +49,7 @@ struct mtd_partition { uint64_t size; /* partition size */ uint64_t offset; /* offset within the master MTD space */ uint32_t mask_flags; /* master MTD flags to mask out for this partition */ + uint32_t add_flags; /* flags to add to the partition */ struct device_node *of_node; }; diff --git a/include/uapi/mtd/mtd-abi.h b/include/uapi/mtd/mtd-abi.h index 47ffe3208c27..4b48fbf7d343 100644 --- a/include/uapi/mtd/mtd-abi.h +++ b/include/uapi/mtd/mtd-abi.h @@ -104,6 +104,7 @@ struct mtd_write_req { #define MTD_BIT_WRITEABLE 0x800 /* Single bits can be flipped */ #define MTD_NO_ERASE 0x1000 /* No erase necessary */ #define MTD_POWERUP_LOCK 0x2000 /* Always locked after reset */ +#define MTD_SLC_ON_MLC_EMULATION 0x4000 /* Emulate SLC behavior on MLC NANDs */ /* Some common devices / combinations of capabilities */ #define MTD_CAP_ROM 0 -- cgit v1.3 From dd6ed5c9890b759ba1b56697b9f3f50e71909e43 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 7 May 2020 12:52:29 +0200 Subject: mtd: rawnand: Translate obscure bitfields into readable macros Use the BIT() macro instead of defining a 8-digit value. Signed-off-by: Miquel Raynal Reviewed-by: Boris Brezillon Link: https://lore.kernel.org/linux-mtd/20200507105241.14299-2-miquel.raynal@bootlin.com --- include/linux/mtd/rawnand.h | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 21873168ba4d..4b58de842340 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -129,36 +129,36 @@ enum nand_ecc_algo { * features. */ /* Buswidth is 16 bit */ -#define NAND_BUSWIDTH_16 0x00000002 +#define NAND_BUSWIDTH_16 BIT(1) /* Chip has cache program function */ -#define NAND_CACHEPRG 0x00000008 +#define NAND_CACHEPRG BIT(3) /* * Chip requires ready check on read (for auto-incremented sequential read). * True only for small page devices; large page devices do not support * autoincrement. */ -#define NAND_NEED_READRDY 0x00000100 +#define NAND_NEED_READRDY BIT(8) /* Chip does not allow subpage writes */ -#define NAND_NO_SUBPAGE_WRITE 0x00000200 +#define NAND_NO_SUBPAGE_WRITE BIT(9) /* Device is one of 'new' xD cards that expose fake nand command set */ -#define NAND_BROKEN_XD 0x00000400 +#define NAND_BROKEN_XD BIT(10) /* Device behaves just like nand, but is readonly */ -#define NAND_ROM 0x00000800 +#define NAND_ROM BIT(11) /* Device supports subpage reads */ -#define NAND_SUBPAGE_READ 0x00001000 +#define NAND_SUBPAGE_READ BIT(12) /* * Some MLC NANDs need data scrambling to limit bitflips caused by repeated * patterns. */ -#define NAND_NEED_SCRAMBLING 0x00002000 +#define NAND_NEED_SCRAMBLING BIT(13) /* Device needs 3rd row address cycle */ -#define NAND_ROW_ADDR_3 0x00004000 +#define NAND_ROW_ADDR_3 BIT(14) /* Options valid for Samsung large page devices */ #define NAND_SAMSUNG_LP_OPTIONS NAND_CACHEPRG @@ -173,9 +173,9 @@ enum nand_ecc_algo { * Position within the block: Each of these pages needs to be checked for a * bad block marking pattern. */ -#define NAND_BBM_FIRSTPAGE 0x01000000 -#define NAND_BBM_SECONDPAGE 0x02000000 -#define NAND_BBM_LASTPAGE 0x04000000 +#define NAND_BBM_FIRSTPAGE BIT(24) +#define NAND_BBM_SECONDPAGE BIT(25) +#define NAND_BBM_LASTPAGE BIT(26) /* Position within the OOB data of the page */ #define NAND_BBM_POS_SMALL 5 @@ -183,21 +183,21 @@ enum nand_ecc_algo { /* Non chip related options */ /* This option skips the bbt scan during initialization. */ -#define NAND_SKIP_BBTSCAN 0x00010000 +#define NAND_SKIP_BBTSCAN BIT(16) /* Chip may not exist, so silence any errors in scan */ -#define NAND_SCAN_SILENT_NODEV 0x00040000 +#define NAND_SCAN_SILENT_NODEV BIT(18) /* * Autodetect nand buswidth with readid/onfi. * This suppose the driver will configure the hardware in 8 bits mode * when calling nand_scan_ident, and update its configuration * before calling nand_scan_tail. */ -#define NAND_BUSWIDTH_AUTO 0x00080000 +#define NAND_BUSWIDTH_AUTO BIT(19) /* * This option could be defined by controller drivers to protect against * kmap'ed, vmalloc'ed highmem buffers being passed from upper layers */ -#define NAND_USE_BOUNCE_BUFFER 0x00100000 +#define NAND_USE_BOUNCE_BUFFER BIT(20) /* * In case your controller is implementing ->legacy.cmd_ctrl() and is relying @@ -207,20 +207,20 @@ enum nand_ecc_algo { * If your controller already takes care of this delay, you don't need to set * this flag. */ -#define NAND_WAIT_TCCS 0x00200000 +#define NAND_WAIT_TCCS BIT(21) /* * Whether the NAND chip is a boot medium. Drivers might use this information * to select ECC algorithms supported by the boot ROM or similar restrictions. */ -#define NAND_IS_BOOT_MEDIUM 0x00400000 +#define NAND_IS_BOOT_MEDIUM BIT(22) /* * Do not try to tweak the timings at runtime. This is needed when the * controller initializes the timings on itself or when it relies on * configuration done by the bootloader. */ -#define NAND_KEEP_TIMINGS 0x00800000 +#define NAND_KEEP_TIMINGS BIT(23) /* Cell info constants */ #define NAND_CI_CHIPNR_MSK 0x03 -- cgit v1.3 From 96d627bdf112d116a79c86435550cd18b9a9d82b Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 7 May 2020 12:52:30 +0200 Subject: mtd: rawnand: Reorder the nand_chip->options flags These flags are in a strange order, reorder the list, add spaces when it is relevant, pack definitions that are related. There is no functional change. Signed-off-by: Miquel Raynal Reviewed-by: Boris Brezillon Link: https://lore.kernel.org/linux-mtd/20200507105241.14299-3-miquel.raynal@bootlin.com --- include/linux/mtd/rawnand.h | 57 ++++++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 4b58de842340..e70fea67030b 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -118,20 +118,25 @@ enum nand_ecc_algo { #define NAND_ECC_GENERIC_ERASED_CHECK BIT(0) #define NAND_ECC_MAXIMIZE BIT(1) -/* - * When using software implementation of Hamming, we can specify which byte - * ordering should be used. - */ -#define NAND_ECC_SOFT_HAMMING_SM_ORDER BIT(2) - /* * Option constants for bizarre disfunctionality and real * features. */ + /* Buswidth is 16 bit */ #define NAND_BUSWIDTH_16 BIT(1) + +/* + * When using software implementation of Hamming, we can specify which byte + * ordering should be used. + */ +#define NAND_ECC_SOFT_HAMMING_SM_ORDER BIT(2) + /* Chip has cache program function */ #define NAND_CACHEPRG BIT(3) +/* Options valid for Samsung large page devices */ +#define NAND_SAMSUNG_LP_OPTIONS NAND_CACHEPRG + /* * Chip requires ready check on read (for auto-incremented sequential read). * True only for small page devices; large page devices do not support @@ -150,6 +155,8 @@ enum nand_ecc_algo { /* Device supports subpage reads */ #define NAND_SUBPAGE_READ BIT(12) +/* Macros to identify the above */ +#define NAND_HAS_SUBPAGE_READ(chip) ((chip->options & NAND_SUBPAGE_READ)) /* * Some MLC NANDs need data scrambling to limit bitflips caused by repeated @@ -160,32 +167,12 @@ enum nand_ecc_algo { /* Device needs 3rd row address cycle */ #define NAND_ROW_ADDR_3 BIT(14) -/* Options valid for Samsung large page devices */ -#define NAND_SAMSUNG_LP_OPTIONS NAND_CACHEPRG - -/* Macros to identify the above */ -#define NAND_HAS_SUBPAGE_READ(chip) ((chip->options & NAND_SUBPAGE_READ)) - -/* - * There are different places where the manufacturer stores the factory bad - * block markers. - * - * Position within the block: Each of these pages needs to be checked for a - * bad block marking pattern. - */ -#define NAND_BBM_FIRSTPAGE BIT(24) -#define NAND_BBM_SECONDPAGE BIT(25) -#define NAND_BBM_LASTPAGE BIT(26) - -/* Position within the OOB data of the page */ -#define NAND_BBM_POS_SMALL 5 -#define NAND_BBM_POS_LARGE 0 - /* Non chip related options */ /* This option skips the bbt scan during initialization. */ #define NAND_SKIP_BBTSCAN BIT(16) /* Chip may not exist, so silence any errors in scan */ #define NAND_SCAN_SILENT_NODEV BIT(18) + /* * Autodetect nand buswidth with readid/onfi. * This suppose the driver will configure the hardware in 8 bits mode @@ -193,6 +180,7 @@ enum nand_ecc_algo { * before calling nand_scan_tail. */ #define NAND_BUSWIDTH_AUTO BIT(19) + /* * This option could be defined by controller drivers to protect against * kmap'ed, vmalloc'ed highmem buffers being passed from upper layers @@ -222,11 +210,26 @@ enum nand_ecc_algo { */ #define NAND_KEEP_TIMINGS BIT(23) +/* + * There are different places where the manufacturer stores the factory bad + * block markers. + * + * Position within the block: Each of these pages needs to be checked for a + * bad block marking pattern. + */ +#define NAND_BBM_FIRSTPAGE BIT(24) +#define NAND_BBM_SECONDPAGE BIT(25) +#define NAND_BBM_LASTPAGE BIT(26) + /* Cell info constants */ #define NAND_CI_CHIPNR_MSK 0x03 #define NAND_CI_CELLTYPE_MSK 0x0C #define NAND_CI_CELLTYPE_SHIFT 2 +/* Position within the OOB data of the page */ +#define NAND_BBM_POS_SMALL 5 +#define NAND_BBM_POS_LARGE 0 + /** * struct nand_parameters - NAND generic parameters from the parameter page * @model: Model name -- cgit v1.3 From ce8148d7b8f204a18188e3cd7386c8dddbb461a1 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 7 May 2020 12:52:31 +0200 Subject: mtd: rawnand: Rename a NAND chip option NAND controller drivers can set the NAND_USE_BOUNCE_BUFFER flag to a chip 'option' field. With this flag, the core is responsible of providing DMA-able buffers. The current behavior is to not force the use of a bounce buffer when the core thinks this is not needed. So in the end the name is a bit misleading, because in theory we will always have a DMA buffer but in practice it will not always be a bounce buffer. Rename this flag NAND_USES_DMA to be more accurate. Signed-off-by: Miquel Raynal Reviewed-by: Boris Brezillon Link: https://lore.kernel.org/linux-mtd/20200507105241.14299-4-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/atmel/nand-controller.c | 2 +- drivers/mtd/nand/raw/brcmnand/brcmnand.c | 2 +- drivers/mtd/nand/raw/denali.c | 2 +- drivers/mtd/nand/raw/meson_nand.c | 2 +- drivers/mtd/nand/raw/mtk_nand.c | 2 +- drivers/mtd/nand/raw/nand_base.c | 4 ++-- drivers/mtd/nand/raw/qcom_nandc.c | 2 +- drivers/mtd/nand/raw/stm32_fmc2_nand.c | 2 +- drivers/mtd/nand/raw/sunxi_nand.c | 2 +- drivers/mtd/nand/raw/tango_nand.c | 2 +- drivers/mtd/nand/raw/tegra_nand.c | 2 +- include/linux/mtd/rawnand.h | 2 +- 12 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/atmel/nand-controller.c b/drivers/mtd/nand/raw/atmel/nand-controller.c index 3ba17a98df4d..46a3724a788e 100644 --- a/drivers/mtd/nand/raw/atmel/nand-controller.c +++ b/drivers/mtd/nand/raw/atmel/nand-controller.c @@ -1494,7 +1494,7 @@ static void atmel_nand_init(struct atmel_nand_controller *nc, * suitable for DMA. */ if (nc->dmac) - chip->options |= NAND_USE_BOUNCE_BUFFER; + chip->options |= NAND_USES_DMA; /* Default to HW ECC if pmecc is available. */ if (nc->pmecc) diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c index 57076c3d98dc..fe7cd3aa0cd6 100644 --- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c +++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c @@ -2576,7 +2576,7 @@ static int brcmnand_attach_chip(struct nand_chip *chip) * to/from, and have nand_base pass us a bounce buffer instead, as * needed. */ - chip->options |= NAND_USE_BOUNCE_BUFFER; + chip->options |= NAND_USES_DMA; if (chip->bbt_options & NAND_BBT_USE_FLASH) chip->bbt_options |= NAND_BBT_NO_OOB; diff --git a/drivers/mtd/nand/raw/denali.c b/drivers/mtd/nand/raw/denali.c index 2fcd2baf6e35..7a76b761dd0b 100644 --- a/drivers/mtd/nand/raw/denali.c +++ b/drivers/mtd/nand/raw/denali.c @@ -1226,7 +1226,7 @@ int denali_chip_init(struct denali_controller *denali, mtd->name = "denali-nand"; if (denali->dma_avail) { - chip->options |= NAND_USE_BOUNCE_BUFFER; + chip->options |= NAND_USES_DMA; chip->buf_align = 16; } diff --git a/drivers/mtd/nand/raw/meson_nand.c b/drivers/mtd/nand/raw/meson_nand.c index e961f7bebf0a..3f376471f3f7 100644 --- a/drivers/mtd/nand/raw/meson_nand.c +++ b/drivers/mtd/nand/raw/meson_nand.c @@ -1269,7 +1269,7 @@ meson_nfc_nand_chip_init(struct device *dev, nand_set_flash_node(nand, np); nand_set_controller_data(nand, nfc); - nand->options |= NAND_USE_BOUNCE_BUFFER; + nand->options |= NAND_USES_DMA; mtd = nand_to_mtd(nand); mtd->owner = THIS_MODULE; mtd->dev.parent = dev; diff --git a/drivers/mtd/nand/raw/mtk_nand.c b/drivers/mtd/nand/raw/mtk_nand.c index ef149e8b26d0..e7ec30e784fd 100644 --- a/drivers/mtd/nand/raw/mtk_nand.c +++ b/drivers/mtd/nand/raw/mtk_nand.c @@ -1380,7 +1380,7 @@ static int mtk_nfc_nand_chip_init(struct device *dev, struct mtk_nfc *nfc, nand_set_flash_node(nand, np); nand_set_controller_data(nand, nfc); - nand->options |= NAND_USE_BOUNCE_BUFFER | NAND_SUBPAGE_READ; + nand->options |= NAND_USES_DMA | NAND_SUBPAGE_READ; nand->legacy.dev_ready = mtk_nfc_dev_ready; nand->legacy.select_chip = mtk_nfc_select_chip; nand->legacy.write_byte = mtk_nfc_write_byte; diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 106edd514c00..906b8cd94bc4 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -3241,7 +3241,7 @@ static int nand_do_read_ops(struct nand_chip *chip, loff_t from, if (!aligned) use_bufpoi = 1; - else if (chip->options & NAND_USE_BOUNCE_BUFFER) + else if (chip->options & NAND_USES_DMA) use_bufpoi = !virt_addr_valid(buf) || !IS_ALIGNED((unsigned long)buf, chip->buf_align); @@ -4067,7 +4067,7 @@ static int nand_do_write_ops(struct nand_chip *chip, loff_t to, if (part_pagewr) use_bufpoi = 1; - else if (chip->options & NAND_USE_BOUNCE_BUFFER) + else if (chip->options & NAND_USES_DMA) use_bufpoi = !virt_addr_valid(buf) || !IS_ALIGNED((unsigned long)buf, chip->buf_align); diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c index 5b11c7061497..9ab22c5d4166 100644 --- a/drivers/mtd/nand/raw/qcom_nandc.c +++ b/drivers/mtd/nand/raw/qcom_nandc.c @@ -2836,7 +2836,7 @@ static int qcom_nand_host_init_and_register(struct qcom_nand_controller *nandc, chip->legacy.block_markbad = qcom_nandc_block_markbad; chip->controller = &nandc->controller; - chip->options |= NAND_NO_SUBPAGE_WRITE | NAND_USE_BOUNCE_BUFFER | + chip->options |= NAND_NO_SUBPAGE_WRITE | NAND_USES_DMA | NAND_SKIP_BBTSCAN; /* set up initial status value */ diff --git a/drivers/mtd/nand/raw/stm32_fmc2_nand.c b/drivers/mtd/nand/raw/stm32_fmc2_nand.c index 46b7d04e2c87..c5fde09e0175 100644 --- a/drivers/mtd/nand/raw/stm32_fmc2_nand.c +++ b/drivers/mtd/nand/raw/stm32_fmc2_nand.c @@ -1987,7 +1987,7 @@ static int stm32_fmc2_probe(struct platform_device *pdev) chip->controller = &fmc2->base; chip->options |= NAND_BUSWIDTH_AUTO | NAND_NO_SUBPAGE_WRITE | - NAND_USE_BOUNCE_BUFFER; + NAND_USES_DMA; /* Default ECC settings */ chip->ecc.mode = NAND_ECC_HW; diff --git a/drivers/mtd/nand/raw/sunxi_nand.c b/drivers/mtd/nand/raw/sunxi_nand.c index 18ac0b36abfa..26d862213cac 100644 --- a/drivers/mtd/nand/raw/sunxi_nand.c +++ b/drivers/mtd/nand/raw/sunxi_nand.c @@ -1698,7 +1698,7 @@ static int sunxi_nand_hw_ecc_ctrl_init(struct nand_chip *nand, ecc->read_page = sunxi_nfc_hw_ecc_read_page_dma; ecc->read_subpage = sunxi_nfc_hw_ecc_read_subpage_dma; ecc->write_page = sunxi_nfc_hw_ecc_write_page_dma; - nand->options |= NAND_USE_BOUNCE_BUFFER; + nand->options |= NAND_USES_DMA; } else { ecc->read_page = sunxi_nfc_hw_ecc_read_page; ecc->read_subpage = sunxi_nfc_hw_ecc_read_subpage; diff --git a/drivers/mtd/nand/raw/tango_nand.c b/drivers/mtd/nand/raw/tango_nand.c index 9acf2de37ee0..b92de603e6db 100644 --- a/drivers/mtd/nand/raw/tango_nand.c +++ b/drivers/mtd/nand/raw/tango_nand.c @@ -568,7 +568,7 @@ static int chip_init(struct device *dev, struct device_node *np) chip->legacy.select_chip = tango_select_chip; chip->legacy.cmd_ctrl = tango_cmd_ctrl; chip->legacy.dev_ready = tango_dev_ready; - chip->options = NAND_USE_BOUNCE_BUFFER | + chip->options = NAND_USES_DMA | NAND_NO_SUBPAGE_WRITE | NAND_WAIT_TCCS; chip->controller = &nfc->hw; diff --git a/drivers/mtd/nand/raw/tegra_nand.c b/drivers/mtd/nand/raw/tegra_nand.c index 6a255ba0f288..f9d046b2cd3b 100644 --- a/drivers/mtd/nand/raw/tegra_nand.c +++ b/drivers/mtd/nand/raw/tegra_nand.c @@ -1115,7 +1115,7 @@ static int tegra_nand_chips_init(struct device *dev, if (!mtd->name) mtd->name = "tegra_nand"; - chip->options = NAND_NO_SUBPAGE_WRITE | NAND_USE_BOUNCE_BUFFER; + chip->options = NAND_NO_SUBPAGE_WRITE | NAND_USES_DMA; ret = nand_scan(chip, 1); if (ret) diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index e70fea67030b..d1f5c5258e35 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -185,7 +185,7 @@ enum nand_ecc_algo { * This option could be defined by controller drivers to protect against * kmap'ed, vmalloc'ed highmem buffers being passed from upper layers */ -#define NAND_USE_BOUNCE_BUFFER BIT(20) +#define NAND_USES_DMA BIT(20) /* * In case your controller is implementing ->legacy.cmd_ctrl() and is relying -- cgit v1.3 From b451f5beece3f5556920992e7498d23f6da6ef6e Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 7 May 2020 12:52:36 +0200 Subject: mtd: rawnand: Give the possibility to verify a read operation is supported This can be used to discriminate between two path in the parameter page detection: use data_in cycles (like before) if supported, use the CHANGE READ COLUMN command otherwise. Signed-off-by: Miquel Raynal Reviewed-by: Boris Brezillon Link: https://lore.kernel.org/linux-mtd/20200507105241.14299-9-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/fsmc_nand.c | 2 +- drivers/mtd/nand/raw/marvell_nand.c | 4 ++-- drivers/mtd/nand/raw/nand_base.c | 48 ++++++++++++++++++++++--------------- drivers/mtd/nand/raw/nand_jedec.c | 2 +- drivers/mtd/nand/raw/nand_legacy.c | 8 ++++--- drivers/mtd/nand/raw/nand_micron.c | 6 ++--- drivers/mtd/nand/raw/nand_onfi.c | 3 ++- include/linux/mtd/rawnand.h | 2 +- 8 files changed, 44 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/fsmc_nand.c b/drivers/mtd/nand/raw/fsmc_nand.c index 31dc9fd0a94d..2a9222e99bcc 100644 --- a/drivers/mtd/nand/raw/fsmc_nand.c +++ b/drivers/mtd/nand/raw/fsmc_nand.c @@ -694,7 +694,7 @@ static int fsmc_read_page_hwecc(struct nand_chip *chip, u8 *buf, for (i = 0, s = 0; s < eccsteps; s++, i += eccbytes, p += eccsize) { nand_read_page_op(chip, page, s * eccsize, NULL, 0); chip->ecc.hwctl(chip, NAND_ECC_READ); - ret = nand_read_data_op(chip, p, eccsize, false); + ret = nand_read_data_op(chip, p, eccsize, false, false); if (ret) return ret; diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c index 88269c44b2b4..a79ce4bdd31c 100644 --- a/drivers/mtd/nand/raw/marvell_nand.c +++ b/drivers/mtd/nand/raw/marvell_nand.c @@ -1224,12 +1224,12 @@ static int marvell_nfc_hw_ecc_bch_read_page_raw(struct nand_chip *chip, u8 *buf, /* Read spare bytes */ nand_read_data_op(chip, oob + (lt->spare_bytes * chunk), - spare_len, false); + spare_len, false, false); /* Read ECC bytes */ nand_read_data_op(chip, oob + ecc_offset + (ALIGN(lt->ecc_bytes, 32) * chunk), - ecc_len, false); + ecc_len, false, false); } return 0; diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index c440f11a34e9..8df864881fff 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -740,7 +740,8 @@ int nand_soft_waitrdy(struct nand_chip *chip, unsigned long timeout_ms) */ timeout_ms = jiffies + msecs_to_jiffies(timeout_ms) + 1; do { - ret = nand_read_data_op(chip, &status, sizeof(status), true); + ret = nand_read_data_op(chip, &status, sizeof(status), true, + false); if (ret) break; @@ -820,7 +821,7 @@ void panic_nand_wait(struct nand_chip *chip, unsigned long timeo) u8 status; ret = nand_read_data_op(chip, &status, sizeof(status), - true); + true, false); if (ret) return; @@ -1918,6 +1919,8 @@ EXPORT_SYMBOL_GPL(nand_reset_op); * @buf: buffer used to store the data * @len: length of the buffer * @force_8bit: force 8-bit bus access + * @check_only: do not actually run the command, only checks if the + * controller driver supports it * * This function does a raw data read on the bus. Usually used after launching * another NAND operation like nand_read_page_op(). @@ -1926,7 +1929,7 @@ EXPORT_SYMBOL_GPL(nand_reset_op); * Returns 0 on success, a negative error code otherwise. */ int nand_read_data_op(struct nand_chip *chip, void *buf, unsigned int len, - bool force_8bit) + bool force_8bit, bool check_only) { if (!len || !buf) return -EINVAL; @@ -1939,9 +1942,15 @@ int nand_read_data_op(struct nand_chip *chip, void *buf, unsigned int len, instrs[0].ctx.data.force_8bit = force_8bit; + if (check_only) + return nand_check_op(chip, &op); + return nand_exec_op(chip, &op); } + if (check_only) + return 0; + if (force_8bit) { u8 *p = buf; unsigned int i; @@ -2670,7 +2679,7 @@ int nand_read_page_raw(struct nand_chip *chip, uint8_t *buf, int oob_required, if (oob_required) { ret = nand_read_data_op(chip, chip->oob_poi, mtd->oobsize, - false); + false, false); if (ret) return ret; } @@ -2702,7 +2711,7 @@ static int nand_read_page_raw_syndrome(struct nand_chip *chip, uint8_t *buf, return ret; for (steps = chip->ecc.steps; steps > 0; steps--) { - ret = nand_read_data_op(chip, buf, eccsize, false); + ret = nand_read_data_op(chip, buf, eccsize, false, false); if (ret) return ret; @@ -2710,14 +2719,14 @@ static int nand_read_page_raw_syndrome(struct nand_chip *chip, uint8_t *buf, if (chip->ecc.prepad) { ret = nand_read_data_op(chip, oob, chip->ecc.prepad, - false); + false, false); if (ret) return ret; oob += chip->ecc.prepad; } - ret = nand_read_data_op(chip, oob, eccbytes, false); + ret = nand_read_data_op(chip, oob, eccbytes, false, false); if (ret) return ret; @@ -2725,7 +2734,7 @@ static int nand_read_page_raw_syndrome(struct nand_chip *chip, uint8_t *buf, if (chip->ecc.postpad) { ret = nand_read_data_op(chip, oob, chip->ecc.postpad, - false); + false, false); if (ret) return ret; @@ -2735,7 +2744,7 @@ static int nand_read_page_raw_syndrome(struct nand_chip *chip, uint8_t *buf, size = mtd->oobsize - (oob - chip->oob_poi); if (size) { - ret = nand_read_data_op(chip, oob, size, false); + ret = nand_read_data_op(chip, oob, size, false, false); if (ret) return ret; } @@ -2928,14 +2937,15 @@ static int nand_read_page_hwecc(struct nand_chip *chip, uint8_t *buf, for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) { chip->ecc.hwctl(chip, NAND_ECC_READ); - ret = nand_read_data_op(chip, p, eccsize, false); + ret = nand_read_data_op(chip, p, eccsize, false, false); if (ret) return ret; chip->ecc.calculate(chip, p, &ecc_calc[i]); } - ret = nand_read_data_op(chip, chip->oob_poi, mtd->oobsize, false); + ret = nand_read_data_op(chip, chip->oob_poi, mtd->oobsize, false, + false); if (ret) return ret; @@ -3014,7 +3024,7 @@ static int nand_read_page_hwecc_oob_first(struct nand_chip *chip, uint8_t *buf, chip->ecc.hwctl(chip, NAND_ECC_READ); - ret = nand_read_data_op(chip, p, eccsize, false); + ret = nand_read_data_op(chip, p, eccsize, false, false); if (ret) return ret; @@ -3071,13 +3081,13 @@ static int nand_read_page_syndrome(struct nand_chip *chip, uint8_t *buf, chip->ecc.hwctl(chip, NAND_ECC_READ); - ret = nand_read_data_op(chip, p, eccsize, false); + ret = nand_read_data_op(chip, p, eccsize, false, false); if (ret) return ret; if (chip->ecc.prepad) { ret = nand_read_data_op(chip, oob, chip->ecc.prepad, - false); + false, false); if (ret) return ret; @@ -3086,7 +3096,7 @@ static int nand_read_page_syndrome(struct nand_chip *chip, uint8_t *buf, chip->ecc.hwctl(chip, NAND_ECC_READSYN); - ret = nand_read_data_op(chip, oob, eccbytes, false); + ret = nand_read_data_op(chip, oob, eccbytes, false, false); if (ret) return ret; @@ -3096,7 +3106,7 @@ static int nand_read_page_syndrome(struct nand_chip *chip, uint8_t *buf, if (chip->ecc.postpad) { ret = nand_read_data_op(chip, oob, chip->ecc.postpad, - false); + false, false); if (ret) return ret; @@ -3124,7 +3134,7 @@ static int nand_read_page_syndrome(struct nand_chip *chip, uint8_t *buf, /* Calculate remaining oob bytes */ i = mtd->oobsize - (oob - chip->oob_poi); if (i) { - ret = nand_read_data_op(chip, oob, i, false); + ret = nand_read_data_op(chip, oob, i, false, false); if (ret) return ret; } @@ -3426,7 +3436,7 @@ static int nand_read_oob_syndrome(struct nand_chip *chip, int page) sndrnd = 1; toread = min_t(int, length, chunk); - ret = nand_read_data_op(chip, bufpoi, toread, false); + ret = nand_read_data_op(chip, bufpoi, toread, false, false); if (ret) return ret; @@ -3434,7 +3444,7 @@ static int nand_read_oob_syndrome(struct nand_chip *chip, int page) length -= toread; } if (length > 0) { - ret = nand_read_data_op(chip, bufpoi, length, false); + ret = nand_read_data_op(chip, bufpoi, length, false, false); if (ret) return ret; } diff --git a/drivers/mtd/nand/raw/nand_jedec.c b/drivers/mtd/nand/raw/nand_jedec.c index 15937e02c64f..63069f1948a8 100644 --- a/drivers/mtd/nand/raw/nand_jedec.c +++ b/drivers/mtd/nand/raw/nand_jedec.c @@ -51,7 +51,7 @@ int nand_jedec_detect(struct nand_chip *chip) } for (i = 0; i < JEDEC_PARAM_PAGES; i++) { - ret = nand_read_data_op(chip, p, sizeof(*p), true); + ret = nand_read_data_op(chip, p, sizeof(*p), true, false); if (ret) { ret = 0; goto free_jedec_param_page; diff --git a/drivers/mtd/nand/raw/nand_legacy.c b/drivers/mtd/nand/raw/nand_legacy.c index f91e92e1b972..d64791c06a97 100644 --- a/drivers/mtd/nand/raw/nand_legacy.c +++ b/drivers/mtd/nand/raw/nand_legacy.c @@ -225,7 +225,8 @@ static void nand_wait_status_ready(struct nand_chip *chip, unsigned long timeo) do { u8 status; - ret = nand_read_data_op(chip, &status, sizeof(status), true); + ret = nand_read_data_op(chip, &status, sizeof(status), true, + false); if (ret) return; @@ -552,7 +553,8 @@ static int nand_wait(struct nand_chip *chip) break; } else { ret = nand_read_data_op(chip, &status, - sizeof(status), true); + sizeof(status), true, + false); if (ret) return ret; @@ -563,7 +565,7 @@ static int nand_wait(struct nand_chip *chip) } while (time_before(jiffies, timeo)); } - ret = nand_read_data_op(chip, &status, sizeof(status), true); + ret = nand_read_data_op(chip, &status, sizeof(status), true, false); if (ret) return ret; diff --git a/drivers/mtd/nand/raw/nand_micron.c b/drivers/mtd/nand/raw/nand_micron.c index 56654030ec7f..3a37d48c9472 100644 --- a/drivers/mtd/nand/raw/nand_micron.c +++ b/drivers/mtd/nand/raw/nand_micron.c @@ -212,7 +212,7 @@ static int micron_nand_on_die_ecc_status_4(struct nand_chip *chip, u8 status, */ if (!oob_required) { ret = nand_read_data_op(chip, chip->oob_poi, mtd->oobsize, - false); + false, false); if (ret) return ret; } @@ -304,10 +304,10 @@ micron_nand_read_page_on_die_ecc(struct nand_chip *chip, uint8_t *buf, if (ret) goto out; - ret = nand_read_data_op(chip, buf, mtd->writesize, false); + ret = nand_read_data_op(chip, buf, mtd->writesize, false, false); if (!ret && oob_required) ret = nand_read_data_op(chip, chip->oob_poi, mtd->oobsize, - false); + false, false); if (chip->ecc.strength == 4) max_bitflips = micron_nand_on_die_ecc_status_4(chip, status, diff --git a/drivers/mtd/nand/raw/nand_onfi.c b/drivers/mtd/nand/raw/nand_onfi.c index ee0f2c2549c1..e6ffbe8c9a0c 100644 --- a/drivers/mtd/nand/raw/nand_onfi.c +++ b/drivers/mtd/nand/raw/nand_onfi.c @@ -167,7 +167,8 @@ int nand_onfi_detect(struct nand_chip *chip) } for (i = 0; i < ONFI_PARAM_PAGES; i++) { - ret = nand_read_data_op(chip, &pbuf[i], sizeof(*pbuf), true); + ret = nand_read_data_op(chip, &pbuf[i], sizeof(*pbuf), true, + false); if (ret) { ret = 0; goto free_onfi_param_page; diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index d1f5c5258e35..70380c91731c 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1363,7 +1363,7 @@ int nand_change_write_column_op(struct nand_chip *chip, unsigned int offset_in_page, const void *buf, unsigned int len, bool force_8bit); int nand_read_data_op(struct nand_chip *chip, void *buf, unsigned int len, - bool force_8bit); + bool force_8bit, bool check_only); int nand_write_data_op(struct nand_chip *chip, const void *buf, unsigned int len, bool force_8bit); -- cgit v1.3 From 658beb6639606268bd48eb7de7cf308d6b10600f Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Thu, 7 May 2020 12:52:39 +0200 Subject: mtd: rawnand: Expose monolithic read/write_page_raw() helpers The current nand_read/write_page_raw() helpers are already widely used but do not fit the purpose of "constrained" controllers which cannot, for instance, separate command/address cycles with data cycles. Workaround this issue by proposing alternative helpers that can be used by these controller drivers instead. Signed-off-by: Miquel Raynal Reviewed-by: Boris Brezillon Link: https://lore.kernel.org/linux-mtd/20200507105241.14299-12-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/nand_base.c | 77 ++++++++++++++++++++++++++++++++++++++++ include/linux/mtd/rawnand.h | 8 +++-- 2 files changed, 83 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 8df864881fff..43c7ebc7e859 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -2688,6 +2688,47 @@ int nand_read_page_raw(struct nand_chip *chip, uint8_t *buf, int oob_required, } EXPORT_SYMBOL(nand_read_page_raw); +/** + * nand_monolithic_read_page_raw - Monolithic page read in raw mode + * @chip: NAND chip info structure + * @buf: buffer to store read data + * @oob_required: caller requires OOB data read to chip->oob_poi + * @page: page number to read + * + * This is a raw page read, ie. without any error detection/correction. + * Monolithic means we are requesting all the relevant data (main plus + * eventually OOB) to be loaded in the NAND cache and sent over the + * bus (from the NAND chip to the NAND controller) in a single + * operation. This is an alternative to nand_read_page_raw(), which + * first reads the main data, and if the OOB data is requested too, + * then reads more data on the bus. + */ +int nand_monolithic_read_page_raw(struct nand_chip *chip, u8 *buf, + int oob_required, int page) +{ + struct mtd_info *mtd = nand_to_mtd(chip); + unsigned int size = mtd->writesize; + u8 *read_buf = buf; + int ret; + + if (oob_required) { + size += mtd->oobsize; + + if (buf != chip->data_buf) + read_buf = nand_get_data_buf(chip); + } + + ret = nand_read_page_op(chip, page, 0, read_buf, size); + if (ret) + return ret; + + if (buf != chip->data_buf) + memcpy(buf, read_buf, mtd->writesize); + + return 0; +} +EXPORT_SYMBOL(nand_monolithic_read_page_raw); + /** * nand_read_page_raw_syndrome - [INTERN] read raw page data without ecc * @chip: nand chip info structure @@ -3696,6 +3737,42 @@ int nand_write_page_raw(struct nand_chip *chip, const uint8_t *buf, } EXPORT_SYMBOL(nand_write_page_raw); +/** + * nand_monolithic_write_page_raw - Monolithic page write in raw mode + * @chip: NAND chip info structure + * @buf: data buffer to write + * @oob_required: must write chip->oob_poi to OOB + * @page: page number to write + * + * This is a raw page write, ie. without any error detection/correction. + * Monolithic means we are requesting all the relevant data (main plus + * eventually OOB) to be sent over the bus and effectively programmed + * into the NAND chip arrays in a single operation. This is an + * alternative to nand_write_page_raw(), which first sends the main + * data, then eventually send the OOB data by latching more data + * cycles on the NAND bus, and finally sends the program command to + * synchronyze the NAND chip cache. + */ +int nand_monolithic_write_page_raw(struct nand_chip *chip, const u8 *buf, + int oob_required, int page) +{ + struct mtd_info *mtd = nand_to_mtd(chip); + unsigned int size = mtd->writesize; + u8 *write_buf = (u8 *)buf; + + if (oob_required) { + size += mtd->oobsize; + + if (buf != chip->data_buf) { + write_buf = nand_get_data_buf(chip); + memcpy(write_buf, buf, mtd->writesize); + } + } + + return nand_prog_page_op(chip, page, 0, write_buf, size); +} +EXPORT_SYMBOL(nand_monolithic_write_page_raw); + /** * nand_write_page_raw_syndrome - [INTERN] raw page write function * @chip: nand chip info structure diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 70380c91731c..406e9ff0f45c 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1328,13 +1328,17 @@ int nand_read_oob_std(struct nand_chip *chip, int page); int nand_get_set_features_notsupp(struct nand_chip *chip, int addr, u8 *subfeature_param); -/* Default read_page_raw implementation */ +/* read_page_raw implementations */ int nand_read_page_raw(struct nand_chip *chip, uint8_t *buf, int oob_required, int page); +int nand_monolithic_read_page_raw(struct nand_chip *chip, uint8_t *buf, + int oob_required, int page); -/* Default write_page_raw implementation */ +/* write_page_raw implementations */ int nand_write_page_raw(struct nand_chip *chip, const uint8_t *buf, int oob_required, int page); +int nand_monolithic_write_page_raw(struct nand_chip *chip, const uint8_t *buf, + int oob_required, int page); /* Reset and initialize a NAND device */ int nand_reset(struct nand_chip *chip, int chipnr); -- cgit v1.3 From ec7cfc3d763c761ff1ad5a4e66c4f94098d7e161 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Mon, 11 May 2020 08:49:15 +0200 Subject: mtd: rawnand: Add a NAND_NO_BBM_QUIRK flag Some controllers with embedded ECC engines override the BBM marker with data or ECC bytes, thus making bad block detection through bad block marker impossible. Let's flag those chips so the core knows it shouldn't check the BBM and consider all blocks good. This should allow us to get rid of two implementers of the legacy.block_bad() hook. Signed-off-by: Boris Brezillon Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200511064917.6255-1-boris.brezillon@collabora.com --- drivers/mtd/nand/raw/nand_base.c | 3 +++ include/linux/mtd/rawnand.h | 8 ++++++++ 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index ba37ca9c1260..2d2a216af120 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -395,6 +395,9 @@ static int nand_block_bad(struct nand_chip *chip, loff_t ofs) static int nand_isbad_bbm(struct nand_chip *chip, loff_t ofs) { + if (chip->options & NAND_NO_BBM_QUIRK) + return 0; + if (chip->legacy.block_bad) return chip->legacy.block_bad(chip, ofs); diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 406e9ff0f45c..0f45b6984ad1 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -221,6 +221,14 @@ enum nand_ecc_algo { #define NAND_BBM_SECONDPAGE BIT(25) #define NAND_BBM_LASTPAGE BIT(26) +/* + * Some controllers with pipelined ECC engines override the BBM marker with + * data or ECC bytes, thus making bad block detection through bad block marker + * impossible. Let's flag those chips so the core knows it shouldn't check the + * BBM and consider all blocks good. + */ +#define NAND_NO_BBM_QUIRK BIT(27) + /* Cell info constants */ #define NAND_CI_CHIPNR_MSK 0x03 #define NAND_CI_CELLTYPE_MSK 0x0C -- cgit v1.3 From 5fc70e350edd30fb22d2f9b4e6d680c5471890ff Mon Sep 17 00:00:00 2001 From: Jiada Wang Date: Mon, 11 May 2020 13:12:13 -0700 Subject: Input: introduce input_mt_report_slot_inactive() input_mt_report_slot_state() ignores "tool" argument when the slot is closed, which has caused a bit of confusion. Let's introduce input_mt_report_slot_inactive() to report inactive slot state. Suggested-by: Dmitry Torokhov Signed-off-by: Jiada Wang Link: https://lore.kernel.org/r/20200508055656.96389-2-jiada_wang@mentor.com Signed-off-by: Dmitry Torokhov --- drivers/hid/hid-alps.c | 3 +-- drivers/hid/hid-multitouch.c | 6 ++---- drivers/input/misc/xen-kbdfront.c | 2 +- drivers/input/mouse/elan_i2c_core.c | 2 +- drivers/input/touchscreen/atmel_mxt_ts.c | 7 +++---- drivers/input/touchscreen/cyttsp4_core.c | 5 ++--- drivers/input/touchscreen/cyttsp_core.c | 2 +- drivers/input/touchscreen/melfas_mip4.c | 4 ++-- drivers/input/touchscreen/mms114.c | 2 +- drivers/input/touchscreen/raspberrypi-ts.c | 2 +- drivers/input/touchscreen/stmfts.c | 2 +- include/linux/input/mt.h | 5 +++++ 12 files changed, 21 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/drivers/hid/hid-alps.c b/drivers/hid/hid-alps.c index fa704153cb00..28588f74425e 100644 --- a/drivers/hid/hid-alps.c +++ b/drivers/hid/hid-alps.c @@ -387,8 +387,7 @@ static int u1_raw_event(struct alps_dev *hdata, u8 *data, int size) input_report_abs(hdata->input, ABS_MT_PRESSURE, z); } else { - input_mt_report_slot_state(hdata->input, - MT_TOOL_FINGER, 0); + input_mt_report_slot_inactive(hdata->input); } } diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c index 362805ddf377..e2ce790ff4a4 100644 --- a/drivers/hid/hid-multitouch.c +++ b/drivers/hid/hid-multitouch.c @@ -896,7 +896,7 @@ static void mt_release_pending_palms(struct mt_device *td, clear_bit(slotnum, app->pending_palm_slots); input_mt_slot(input, slotnum); - input_mt_report_slot_state(input, MT_TOOL_PALM, false); + input_mt_report_slot_inactive(input); need_sync = true; } @@ -1640,9 +1640,7 @@ static void mt_release_contacts(struct hid_device *hid) if (mt) { for (i = 0; i < mt->num_slots; i++) { input_mt_slot(input_dev, i); - input_mt_report_slot_state(input_dev, - MT_TOOL_FINGER, - false); + input_mt_report_slot_inactive(input_dev); } input_mt_sync_frame(input_dev); input_sync(input_dev); diff --git a/drivers/input/misc/xen-kbdfront.c b/drivers/input/misc/xen-kbdfront.c index 24bc5c5d876f..a1bba722b234 100644 --- a/drivers/input/misc/xen-kbdfront.c +++ b/drivers/input/misc/xen-kbdfront.c @@ -146,7 +146,7 @@ static void xenkbd_handle_mt_event(struct xenkbd_info *info, break; case XENKBD_MT_EV_UP: - input_mt_report_slot_state(info->mtouch, MT_TOOL_FINGER, false); + input_mt_report_slot_inactive(info->mtouch); break; case XENKBD_MT_EV_SYN: diff --git a/drivers/input/mouse/elan_i2c_core.c b/drivers/input/mouse/elan_i2c_core.c index 8719da540383..3f9354baac4b 100644 --- a/drivers/input/mouse/elan_i2c_core.c +++ b/drivers/input/mouse/elan_i2c_core.c @@ -938,7 +938,7 @@ static void elan_report_contact(struct elan_tp_data *data, input_report_abs(input, ABS_MT_TOUCH_MINOR, minor); } else { input_mt_slot(input, contact_num); - input_mt_report_slot_state(input, MT_TOOL_FINGER, false); + input_mt_report_slot_inactive(input); } } diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index ae60442efda0..a2189739e30f 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -822,8 +822,7 @@ static void mxt_proc_t9_message(struct mxt_data *data, u8 *message) * have happened. */ if (status & MXT_T9_RELEASE) { - input_mt_report_slot_state(input_dev, - MT_TOOL_FINGER, 0); + input_mt_report_slot_inactive(input_dev); mxt_input_sync(data); } @@ -839,7 +838,7 @@ static void mxt_proc_t9_message(struct mxt_data *data, u8 *message) input_report_abs(input_dev, ABS_MT_TOUCH_MAJOR, area); } else { /* Touch no longer active, close out slot */ - input_mt_report_slot_state(input_dev, MT_TOOL_FINGER, 0); + input_mt_report_slot_inactive(input_dev); } data->update_input = true; @@ -947,7 +946,7 @@ static void mxt_proc_t100_message(struct mxt_data *data, u8 *message) dev_dbg(dev, "[%u] release\n", id); /* close out slot */ - input_mt_report_slot_state(input_dev, 0, 0); + input_mt_report_slot_inactive(input_dev); } data->update_input = true; diff --git a/drivers/input/touchscreen/cyttsp4_core.c b/drivers/input/touchscreen/cyttsp4_core.c index 6bcffc930384..02a73d9a4def 100644 --- a/drivers/input/touchscreen/cyttsp4_core.c +++ b/drivers/input/touchscreen/cyttsp4_core.c @@ -744,8 +744,7 @@ static void cyttsp4_report_slot_liftoff(struct cyttsp4_mt_data *md, for (t = 0; t < max_slots; t++) { input_mt_slot(md->input, t); - input_mt_report_slot_state(md->input, - MT_TOOL_FINGER, false); + input_mt_report_slot_inactive(md->input); } } @@ -845,7 +844,7 @@ static void cyttsp4_final_sync(struct input_dev *input, int max_slots, int *ids) if (ids[t]) continue; input_mt_slot(input, t); - input_mt_report_slot_state(input, MT_TOOL_FINGER, false); + input_mt_report_slot_inactive(input); } input_sync(input); diff --git a/drivers/input/touchscreen/cyttsp_core.c b/drivers/input/touchscreen/cyttsp_core.c index 3f5d463dbeed..697aa2c158f7 100644 --- a/drivers/input/touchscreen/cyttsp_core.c +++ b/drivers/input/touchscreen/cyttsp_core.c @@ -340,7 +340,7 @@ static void cyttsp_report_tchdata(struct cyttsp *ts) continue; input_mt_slot(input, i); - input_mt_report_slot_state(input, MT_TOOL_FINGER, false); + input_mt_report_slot_inactive(input); } input_sync(input); diff --git a/drivers/input/touchscreen/melfas_mip4.c b/drivers/input/touchscreen/melfas_mip4.c index 247c3aaba2d8..f67efdd040b2 100644 --- a/drivers/input/touchscreen/melfas_mip4.c +++ b/drivers/input/touchscreen/melfas_mip4.c @@ -391,7 +391,7 @@ static void mip4_clear_input(struct mip4_ts *ts) /* Screen */ for (i = 0; i < MIP4_MAX_FINGERS; i++) { input_mt_slot(ts->input, i); - input_mt_report_slot_state(ts->input, MT_TOOL_FINGER, 0); + input_mt_report_slot_inactive(ts->input); } /* Keys */ @@ -534,7 +534,7 @@ static void mip4_report_touch(struct mip4_ts *ts, u8 *packet) } else { /* Release event */ input_mt_slot(ts->input, id); - input_mt_report_slot_state(ts->input, MT_TOOL_FINGER, 0); + input_mt_report_slot_inactive(ts->input); } input_mt_sync_frame(ts->input); diff --git a/drivers/input/touchscreen/mms114.c b/drivers/input/touchscreen/mms114.c index b3dda1722bae..d5e2d2a139a2 100644 --- a/drivers/input/touchscreen/mms114.c +++ b/drivers/input/touchscreen/mms114.c @@ -559,7 +559,7 @@ static int __maybe_unused mms114_suspend(struct device *dev) /* Release all touch */ for (id = 0; id < MMS114_MAX_TOUCH; id++) { input_mt_slot(input_dev, id); - input_mt_report_slot_state(input_dev, MT_TOOL_FINGER, false); + input_mt_report_slot_inactive(input_dev); } input_mt_report_pointer_emulation(input_dev, true); diff --git a/drivers/input/touchscreen/raspberrypi-ts.c b/drivers/input/touchscreen/raspberrypi-ts.c index 0e2e08f3f433..ef6aaed217cf 100644 --- a/drivers/input/touchscreen/raspberrypi-ts.c +++ b/drivers/input/touchscreen/raspberrypi-ts.c @@ -100,7 +100,7 @@ static void rpi_ts_poll(struct input_dev *input) released_ids = ts->known_ids & ~modified_ids; for_each_set_bit(i, &released_ids, RPI_TS_MAX_SUPPORTED_POINTS) { input_mt_slot(input, i); - input_mt_report_slot_state(input, MT_TOOL_FINGER, 0); + input_mt_report_slot_inactive(input); modified_ids &= ~(BIT(i)); } ts->known_ids = modified_ids; diff --git a/drivers/input/touchscreen/stmfts.c b/drivers/input/touchscreen/stmfts.c index b6f95f20f924..b54cc64e4ea6 100644 --- a/drivers/input/touchscreen/stmfts.c +++ b/drivers/input/touchscreen/stmfts.c @@ -198,7 +198,7 @@ static void stmfts_report_contact_release(struct stmfts_data *sdata, u8 slot_id = (event[0] & STMFTS_MASK_TOUCH_ID) >> 4; input_mt_slot(sdata->input, slot_id); - input_mt_report_slot_state(sdata->input, MT_TOOL_FINGER, false); + input_mt_report_slot_inactive(sdata->input); input_sync(sdata->input); } diff --git a/include/linux/input/mt.h b/include/linux/input/mt.h index 9e409bb13642..3b8580bd33c1 100644 --- a/include/linux/input/mt.h +++ b/include/linux/input/mt.h @@ -100,6 +100,11 @@ static inline bool input_is_mt_axis(int axis) bool input_mt_report_slot_state(struct input_dev *dev, unsigned int tool_type, bool active); +static inline void input_mt_report_slot_inactive(struct input_dev *dev) +{ + input_mt_report_slot_state(dev, 0, false); +} + void input_mt_report_finger_count(struct input_dev *dev, int count); void input_mt_report_pointer_emulation(struct input_dev *dev, bool use_count); void input_mt_drop_unused(struct input_dev *dev); -- cgit v1.3 From a3cba697a2a09e6769996d5265991a3228004d92 Mon Sep 17 00:00:00 2001 From: Joseph Lo Date: Wed, 29 May 2019 16:21:34 +0800 Subject: clk: tegra: Export functions for EMC clock scaling Export functions to allow accessing the CAR register required by EMC clock scaling. These functions will be used to access the CAR register as part of the scaling sequence. Signed-off-by: Joseph Lo Signed-off-by: Thierry Reding --- drivers/clk/tegra/clk-tegra210.c | 26 ++++++++++++++++++++++++++ include/linux/clk/tegra.h | 3 +++ 2 files changed, 29 insertions(+) (limited to 'include/linux') diff --git a/drivers/clk/tegra/clk-tegra210.c b/drivers/clk/tegra/clk-tegra210.c index 57d97e87d870..798920ec50e9 100644 --- a/drivers/clk/tegra/clk-tegra210.c +++ b/drivers/clk/tegra/clk-tegra210.c @@ -37,6 +37,7 @@ #define CLK_SOURCE_LA 0x1f8 #define CLK_SOURCE_SDMMC2 0x154 #define CLK_SOURCE_SDMMC4 0x164 +#define CLK_SOURCE_EMC_DLL 0x664 #define PLLC_BASE 0x80 #define PLLC_OUT 0x84 @@ -227,6 +228,10 @@ #define RST_DFLL_DVCO 0x2f4 #define DVFS_DFLL_RESET_SHIFT 0 +#define CLK_RST_CONTROLLER_CLK_OUT_ENB_X_SET 0x284 +#define CLK_RST_CONTROLLER_CLK_OUT_ENB_X_CLR 0x288 +#define CLK_OUT_ENB_X_CLK_ENB_EMC_DLL BIT(14) + #define CLK_RST_CONTROLLER_RST_DEV_Y_SET 0x2a8 #define CLK_RST_CONTROLLER_RST_DEV_Y_CLR 0x2ac #define CPU_SOFTRST_CTRL 0x380 @@ -555,6 +560,27 @@ void tegra210_set_sata_pll_seq_sw(bool state) } EXPORT_SYMBOL_GPL(tegra210_set_sata_pll_seq_sw); +void tegra210_clk_emc_dll_enable(bool flag) +{ + u32 offset = flag ? CLK_RST_CONTROLLER_CLK_OUT_ENB_X_SET : + CLK_RST_CONTROLLER_CLK_OUT_ENB_X_CLR; + + writel_relaxed(CLK_OUT_ENB_X_CLK_ENB_EMC_DLL, clk_base + offset); +} +EXPORT_SYMBOL_GPL(tegra210_clk_emc_dll_enable); + +void tegra210_clk_emc_dll_update_setting(u32 emc_dll_src_value) +{ + writel_relaxed(emc_dll_src_value, clk_base + CLK_SOURCE_EMC_DLL); +} +EXPORT_SYMBOL_GPL(tegra210_clk_emc_dll_update_setting); + +void tegra210_clk_emc_update_setting(u32 emc_src_value) +{ + writel_relaxed(emc_src_value, clk_base + CLK_SOURCE_EMC); +} +EXPORT_SYMBOL_GPL(tegra210_clk_emc_update_setting); + static void tegra210_generic_mbist_war(struct tegra210_domain_mbist_war *mbist) { u32 val; diff --git a/include/linux/clk/tegra.h b/include/linux/clk/tegra.h index 2b1b35240074..5b0bdb413460 100644 --- a/include/linux/clk/tegra.h +++ b/include/linux/clk/tegra.h @@ -131,6 +131,9 @@ extern void tegra210_set_sata_pll_seq_sw(bool state); extern void tegra210_put_utmipll_in_iddq(void); extern void tegra210_put_utmipll_out_iddq(void); extern int tegra210_clk_handle_mbist_war(unsigned int id); +extern void tegra210_clk_emc_dll_enable(bool flag); +extern void tegra210_clk_emc_dll_update_setting(u32 emc_dll_src_value); +extern void tegra210_clk_emc_update_setting(u32 emc_src_value); struct clk; -- cgit v1.3 From 0ac65fc946d3a15ff30cea28b38a00b9ba98217b Mon Sep 17 00:00:00 2001 From: Joseph Lo Date: Wed, 29 May 2019 16:21:35 +0800 Subject: clk: tegra: Implement Tegra210 EMC clock The EMC clock needs to carefully coordinate with the EMC controller programming to make sure external memory can be properly clocked. Do so by hooking up the EMC clock with an EMC provider that will specify which rates are supported by the EMC and provide a callback to use for setting the clock rate at the EMC. Based on work by Peter De Schrijver . Signed-off-by: Joseph Lo Signed-off-by: Thierry Reding --- drivers/clk/tegra/Makefile | 1 + drivers/clk/tegra/clk-tegra210-emc.c | 369 +++++++++++++++++++++++++++++++++++ drivers/clk/tegra/clk.h | 3 + include/linux/clk/tegra.h | 24 +++ 4 files changed, 397 insertions(+) create mode 100644 drivers/clk/tegra/clk-tegra210-emc.c (limited to 'include/linux') diff --git a/drivers/clk/tegra/Makefile b/drivers/clk/tegra/Makefile index 7f5f5ec33739..6e1200b3bb68 100644 --- a/drivers/clk/tegra/Makefile +++ b/drivers/clk/tegra/Makefile @@ -25,5 +25,6 @@ obj-$(CONFIG_TEGRA124_EMC) += clk-tegra124-emc.o obj-$(CONFIG_ARCH_TEGRA_132_SOC) += clk-tegra124.o obj-y += cvb.o obj-$(CONFIG_ARCH_TEGRA_210_SOC) += clk-tegra210.o +obj-$(CONFIG_ARCH_TEGRA_210_SOC) += clk-tegra210-emc.o obj-$(CONFIG_CLK_TEGRA_BPMP) += clk-bpmp.o obj-y += clk-utils.o diff --git a/drivers/clk/tegra/clk-tegra210-emc.c b/drivers/clk/tegra/clk-tegra210-emc.c new file mode 100644 index 000000000000..352a2c3fc374 --- /dev/null +++ b/drivers/clk/tegra/clk-tegra210-emc.c @@ -0,0 +1,369 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2015-2020, NVIDIA CORPORATION. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define CLK_SOURCE_EMC 0x19c +#define CLK_SOURCE_EMC_2X_CLK_SRC GENMASK(31, 29) +#define CLK_SOURCE_EMC_MC_EMC_SAME_FREQ BIT(16) +#define CLK_SOURCE_EMC_2X_CLK_DIVISOR GENMASK(7, 0) + +#define CLK_SRC_PLLM 0 +#define CLK_SRC_PLLC 1 +#define CLK_SRC_PLLP 2 +#define CLK_SRC_CLK_M 3 +#define CLK_SRC_PLLM_UD 4 +#define CLK_SRC_PLLMB_UD 5 +#define CLK_SRC_PLLMB 6 +#define CLK_SRC_PLLP_UD 7 + +struct tegra210_clk_emc { + struct clk_hw hw; + void __iomem *regs; + + struct tegra210_clk_emc_provider *provider; + + struct clk *parents[8]; +}; + +static inline struct tegra210_clk_emc * +to_tegra210_clk_emc(struct clk_hw *hw) +{ + return container_of(hw, struct tegra210_clk_emc, hw); +} + +static const char *tegra210_clk_emc_parents[] = { + "pll_m", "pll_c", "pll_p", "clk_m", "pll_m_ud", "pll_mb_ud", + "pll_mb", "pll_p_ud", +}; + +static u8 tegra210_clk_emc_get_parent(struct clk_hw *hw) +{ + struct tegra210_clk_emc *emc = to_tegra210_clk_emc(hw); + u32 value; + u8 src; + + value = readl_relaxed(emc->regs + CLK_SOURCE_EMC); + src = FIELD_GET(CLK_SOURCE_EMC_2X_CLK_SRC, value); + + return src; +} + +static unsigned long tegra210_clk_emc_recalc_rate(struct clk_hw *hw, + unsigned long parent_rate) +{ + struct tegra210_clk_emc *emc = to_tegra210_clk_emc(hw); + u32 value, div; + + /* + * CCF assumes that neither the parent nor its rate will change during + * ->set_rate(), so the parent rate passed in here was cached from the + * parent before the ->set_rate() call. + * + * This can lead to wrong results being reported for the EMC clock if + * the parent and/or parent rate have changed as part of the EMC rate + * change sequence. Fix this by overriding the parent clock with what + * we know to be the correct value after the rate change. + */ + parent_rate = clk_hw_get_rate(clk_hw_get_parent(hw)); + + value = readl_relaxed(emc->regs + CLK_SOURCE_EMC); + + div = FIELD_GET(CLK_SOURCE_EMC_2X_CLK_DIVISOR, value); + div += 2; + + return DIV_ROUND_UP(parent_rate * 2, div); +} + +static long tegra210_clk_emc_round_rate(struct clk_hw *hw, unsigned long rate, + unsigned long *prate) +{ + struct tegra210_clk_emc *emc = to_tegra210_clk_emc(hw); + struct tegra210_clk_emc_provider *provider = emc->provider; + unsigned int i; + + if (!provider || !provider->configs || provider->num_configs == 0) + return clk_hw_get_rate(hw); + + for (i = 0; i < provider->num_configs; i++) { + if (provider->configs[i].rate >= rate) + return provider->configs[i].rate; + } + + return provider->configs[i - 1].rate; +} + +static struct clk *tegra210_clk_emc_find_parent(struct tegra210_clk_emc *emc, + u8 index) +{ + struct clk_hw *parent = clk_hw_get_parent_by_index(&emc->hw, index); + const char *name = clk_hw_get_name(parent); + + /* XXX implement cache? */ + + return __clk_lookup(name); +} + +static int tegra210_clk_emc_set_rate(struct clk_hw *hw, unsigned long rate, + unsigned long parent_rate) +{ + struct tegra210_clk_emc *emc = to_tegra210_clk_emc(hw); + struct tegra210_clk_emc_provider *provider = emc->provider; + struct tegra210_clk_emc_config *config; + struct device *dev = provider->dev; + struct clk_hw *old, *new, *parent; + u8 old_idx, new_idx, index; + struct clk *clk; + unsigned int i; + int err; + + if (!provider || !provider->configs || provider->num_configs == 0) + return -EINVAL; + + for (i = 0; i < provider->num_configs; i++) { + if (provider->configs[i].rate >= rate) { + config = &provider->configs[i]; + break; + } + } + + if (i == provider->num_configs) + config = &provider->configs[i - 1]; + + old_idx = tegra210_clk_emc_get_parent(hw); + new_idx = FIELD_GET(CLK_SOURCE_EMC_2X_CLK_SRC, config->value); + + old = clk_hw_get_parent_by_index(hw, old_idx); + new = clk_hw_get_parent_by_index(hw, new_idx); + + /* if the rate has changed... */ + if (config->parent_rate != clk_hw_get_rate(old)) { + /* ... but the clock source remains the same ... */ + if (new_idx == old_idx) { + /* ... switch to the alternative clock source. */ + switch (new_idx) { + case CLK_SRC_PLLM: + new_idx = CLK_SRC_PLLMB; + break; + + case CLK_SRC_PLLM_UD: + new_idx = CLK_SRC_PLLMB_UD; + break; + + case CLK_SRC_PLLMB_UD: + new_idx = CLK_SRC_PLLM_UD; + break; + + case CLK_SRC_PLLMB: + new_idx = CLK_SRC_PLLM; + break; + } + + /* + * This should never happen because we can't deal with + * it. + */ + if (WARN_ON(new_idx == old_idx)) + return -EINVAL; + + new = clk_hw_get_parent_by_index(hw, new_idx); + } + + index = new_idx; + parent = new; + } else { + index = old_idx; + parent = old; + } + + clk = tegra210_clk_emc_find_parent(emc, index); + if (IS_ERR(clk)) { + err = PTR_ERR(clk); + dev_err(dev, "failed to get parent clock for index %u: %d\n", + index, err); + return err; + } + + /* set the new parent clock to the required rate */ + if (clk_get_rate(clk) != config->parent_rate) { + err = clk_set_rate(clk, config->parent_rate); + if (err < 0) { + dev_err(dev, "failed to set rate %lu Hz for %pC: %d\n", + config->parent_rate, clk, err); + return err; + } + } + + /* enable the new parent clock */ + if (parent != old) { + err = clk_prepare_enable(clk); + if (err < 0) { + dev_err(dev, "failed to enable parent clock %pC: %d\n", + clk, err); + return err; + } + } + + /* update the EMC source configuration to reflect the new parent */ + config->value &= ~CLK_SOURCE_EMC_2X_CLK_SRC; + config->value |= FIELD_PREP(CLK_SOURCE_EMC_2X_CLK_SRC, index); + + /* + * Finally, switch the EMC programming with both old and new parent + * clocks enabled. + */ + err = provider->set_rate(dev, config); + if (err < 0) { + dev_err(dev, "failed to set EMC rate to %lu Hz: %d\n", rate, + err); + + /* + * If we're unable to switch to the new EMC frequency, we no + * longer need the new parent to be enabled. + */ + if (parent != old) + clk_disable_unprepare(clk); + + return err; + } + + /* reparent to new parent clock and disable the old parent clock */ + if (parent != old) { + clk = tegra210_clk_emc_find_parent(emc, old_idx); + if (IS_ERR(clk)) { + err = PTR_ERR(clk); + dev_err(dev, + "failed to get parent clock for index %u: %d\n", + old_idx, err); + return err; + } + + clk_hw_reparent(hw, parent); + clk_disable_unprepare(clk); + } + + return err; +} + +static const struct clk_ops tegra210_clk_emc_ops = { + .get_parent = tegra210_clk_emc_get_parent, + .recalc_rate = tegra210_clk_emc_recalc_rate, + .round_rate = tegra210_clk_emc_round_rate, + .set_rate = tegra210_clk_emc_set_rate, +}; + +struct clk *tegra210_clk_register_emc(struct device_node *np, + void __iomem *regs) +{ + struct tegra210_clk_emc *emc; + struct clk_init_data init; + struct clk *clk; + + emc = kzalloc(sizeof(*emc), GFP_KERNEL); + if (!emc) + return ERR_PTR(-ENOMEM); + + emc->regs = regs; + + init.name = "emc"; + init.ops = &tegra210_clk_emc_ops; + init.flags = CLK_IS_CRITICAL | CLK_GET_RATE_NOCACHE; + init.parent_names = tegra210_clk_emc_parents; + init.num_parents = ARRAY_SIZE(tegra210_clk_emc_parents); + emc->hw.init = &init; + + clk = clk_register(NULL, &emc->hw); + if (IS_ERR(clk)) { + kfree(emc); + return clk; + } + + return clk; +} + +int tegra210_clk_emc_attach(struct clk *clk, + struct tegra210_clk_emc_provider *provider) +{ + struct clk_hw *hw = __clk_get_hw(clk); + struct tegra210_clk_emc *emc = to_tegra210_clk_emc(hw); + struct device *dev = provider->dev; + unsigned int i; + int err; + + if (!try_module_get(provider->owner)) + return -ENODEV; + + for (i = 0; i < provider->num_configs; i++) { + struct tegra210_clk_emc_config *config = &provider->configs[i]; + struct clk_hw *parent; + bool same_freq; + u8 div, src; + + div = FIELD_GET(CLK_SOURCE_EMC_2X_CLK_DIVISOR, config->value); + src = FIELD_GET(CLK_SOURCE_EMC_2X_CLK_SRC, config->value); + + /* do basic sanity checking on the EMC timings */ + if (div & 0x1) { + dev_err(dev, "invalid odd divider %u for rate %lu Hz\n", + div, config->rate); + err = -EINVAL; + goto put; + } + + same_freq = config->value & CLK_SOURCE_EMC_MC_EMC_SAME_FREQ; + + if (same_freq != config->same_freq) { + dev_err(dev, + "ambiguous EMC to MC ratio for rate %lu Hz\n", + config->rate); + err = -EINVAL; + goto put; + } + + parent = clk_hw_get_parent_by_index(hw, src); + config->parent = src; + + if (src == CLK_SRC_PLLM || src == CLK_SRC_PLLM_UD) { + config->parent_rate = config->rate * (1 + div / 2); + } else { + unsigned long rate = config->rate * (1 + div / 2); + + config->parent_rate = clk_hw_get_rate(parent); + + if (config->parent_rate != rate) { + dev_err(dev, + "rate %lu Hz does not match input\n", + config->rate); + err = -EINVAL; + goto put; + } + } + } + + emc->provider = provider; + + return 0; + +put: + module_put(provider->owner); + return err; +} +EXPORT_SYMBOL_GPL(tegra210_clk_emc_attach); + +void tegra210_clk_emc_detach(struct clk *clk) +{ + struct tegra210_clk_emc *emc = to_tegra210_clk_emc(__clk_get_hw(clk)); + + module_put(emc->provider->owner); + emc->provider = NULL; +} +EXPORT_SYMBOL_GPL(tegra210_clk_emc_detach); diff --git a/drivers/clk/tegra/clk.h b/drivers/clk/tegra/clk.h index 1d9f1bc18334..09cc700bab41 100644 --- a/drivers/clk/tegra/clk.h +++ b/drivers/clk/tegra/clk.h @@ -907,4 +907,7 @@ void tegra_clk_periph_resume(void); bool tegra20_clk_emc_driver_available(struct clk_hw *emc_hw); struct clk *tegra20_clk_register_emc(void __iomem *ioaddr, bool low_jitter); +struct clk *tegra210_clk_register_emc(struct device_node *np, + void __iomem *regs); + #endif /* TEGRA_CLK_H */ diff --git a/include/linux/clk/tegra.h b/include/linux/clk/tegra.h index 5b0bdb413460..3f01d43f0598 100644 --- a/include/linux/clk/tegra.h +++ b/include/linux/clk/tegra.h @@ -146,4 +146,28 @@ void tegra20_clk_set_emc_round_callback(tegra20_clk_emc_round_cb *round_cb, void *cb_arg); int tegra20_clk_prepare_emc_mc_same_freq(struct clk *emc_clk, bool same); +struct tegra210_clk_emc_config { + unsigned long rate; + bool same_freq; + u32 value; + + unsigned long parent_rate; + u8 parent; +}; + +struct tegra210_clk_emc_provider { + struct module *owner; + struct device *dev; + + struct tegra210_clk_emc_config *configs; + unsigned int num_configs; + + int (*set_rate)(struct device *dev, + const struct tegra210_clk_emc_config *config); +}; + +int tegra210_clk_emc_attach(struct clk *clk, + struct tegra210_clk_emc_provider *provider); +void tegra210_clk_emc_detach(struct clk *clk); + #endif /* __LINUX_CLK_TEGRA_H_ */ -- cgit v1.3 From 32b1924b210a70dcacdf65abd687c5ef86a67541 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Thu, 9 Apr 2020 11:29:47 +0300 Subject: ovl: skip overlayfs superblocks at global sync Stacked filesystems like overlayfs has no own writeback, but they have to forward syncfs() requests to backend for keeping data integrity. During global sync() each overlayfs instance calls method ->sync_fs() for backend although it itself is in global list of superblocks too. As a result one syscall sync() could write one superblock several times and send multiple disk barriers. This patch adds flag SB_I_SKIP_SYNC into sb->sb_iflags to avoid that. Reported-by: Dmitry Monakhov Signed-off-by: Konstantin Khlebnikov Reviewed-by: Amir Goldstein Signed-off-by: Miklos Szeredi --- fs/overlayfs/super.c | 5 +++-- fs/sync.c | 3 ++- include/linux/fs.h | 2 ++ 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 6e3fa96613f5..f57aa348dcd6 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -261,8 +261,8 @@ static int ovl_sync_fs(struct super_block *sb, int wait) return 0; /* - * If this is a sync(2) call or an emergency sync, all the super blocks - * will be iterated, including upper_sb, so no need to do anything. + * Not called for sync(2) call or an emergency sync (SB_I_SKIP_SYNC). + * All the super blocks will be iterated, including upper_sb. * * If this is a syncfs(2) call, then we do need to call * sync_filesystem() on upper_sb, but enough if we do it when being @@ -1870,6 +1870,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent) sb->s_xattr = ovl_xattr_handlers; sb->s_fs_info = ofs; sb->s_flags |= SB_POSIXACL; + sb->s_iflags |= SB_I_SKIP_SYNC; err = -ENOMEM; root_dentry = ovl_get_root(sb, upperpath.dentry, oe); diff --git a/fs/sync.c b/fs/sync.c index 4d1ff010bc5a..16c2630ee4bf 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -76,7 +76,8 @@ static void sync_inodes_one_sb(struct super_block *sb, void *arg) static void sync_fs_one_sb(struct super_block *sb, void *arg) { - if (!sb_rdonly(sb) && sb->s_op->sync_fs) + if (!sb_rdonly(sb) && !(sb->s_iflags & SB_I_SKIP_SYNC) && + sb->s_op->sync_fs) sb->s_op->sync_fs(sb, *(int *)arg); } diff --git a/include/linux/fs.h b/include/linux/fs.h index 4f6f59b4f22a..f186a966a36c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1409,6 +1409,8 @@ extern int send_sigurg(struct fown_struct *fown); #define SB_I_IMA_UNVERIFIABLE_SIGNATURE 0x00000020 #define SB_I_UNTRUSTED_MOUNTER 0x00000040 +#define SB_I_SKIP_SYNC 0x00000100 /* Skip superblock at global sync */ + /* Possible states of 'frozen' field */ enum { SB_UNFROZEN = 0, /* FS is unfrozen */ -- cgit v1.3 From ca4faf543a33373bed3650812d5f0cd0bd295b1a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 2 May 2020 10:37:44 -0400 Subject: SUNRPC: Move xpt_mutex into socket xpo_sendto methods It appears that the RPC/RDMA transport does not need serialization of calls to its xpo_sendto method. Move the mutex into the socket methods that still need that serialization. Tail latencies are unambiguously better with this patch applied. fio randrw 8KB 70/30 on NFSv3, smaller numbers are better: clat percentiles (usec): With xpt_mutex: r | 99.99th=[ 8848] w | 99.99th=[ 9634] Without xpt_mutex: r | 99.99th=[ 8586] w | 99.99th=[ 8979] Serializing the construction of RPC/RDMA transport headers is not really necessary at this point, because the Linux NFS server implementation never changes its credit grant on a connection. If that should change, then svc_rdma_sendto will need to serialize access to the transport's credit grant fields. Reported-by: kbuild test robot [ cel: fix uninitialized variable warning ] Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_xprt.h | 6 +++++ net/sunrpc/svc_xprt.c | 12 +++------- net/sunrpc/svcsock.c | 25 +++++++++++++++++++++ net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 35 ++++++++++++++---------------- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 10 ++++----- net/sunrpc/xprtsock.c | 12 ++++++++-- 6 files changed, 64 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 9e1e046de176..aca35ab5cff2 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -117,6 +117,12 @@ static inline int register_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u return 0; } +static inline bool svc_xprt_is_dead(const struct svc_xprt *xprt) +{ + return (test_bit(XPT_DEAD, &xprt->xpt_flags) != 0) || + (test_bit(XPT_CLOSE, &xprt->xpt_flags) != 0); +} + int svc_reg_xprt_class(struct svc_xprt_class *); void svc_unreg_xprt_class(struct svc_xprt_class *); void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *, diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 2284ff038dad..07cdbf7d5764 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -914,16 +914,10 @@ int svc_send(struct svc_rqst *rqstp) xb->page_len + xb->tail[0].iov_len; trace_svc_sendto(xb); - - /* Grab mutex to serialize outgoing data. */ - mutex_lock(&xprt->xpt_mutex); trace_svc_stats_latency(rqstp); - if (test_bit(XPT_DEAD, &xprt->xpt_flags) - || test_bit(XPT_CLOSE, &xprt->xpt_flags)) - len = -ENOTCONN; - else - len = xprt->xpt_ops->xpo_sendto(rqstp); - mutex_unlock(&xprt->xpt_mutex); + + len = xprt->xpt_ops->xpo_sendto(rqstp); + trace_svc_send(rqstp, len); svc_xprt_release(rqstp); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 023514e392b3..3e7b6445e317 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -506,6 +506,9 @@ out_free: * svc_udp_sendto - Send out a reply on a UDP socket * @rqstp: completed svc_rqst * + * xpt_mutex ensures @rqstp's whole message is written to the socket + * without interruption. + * * Returns the number of bytes sent, or a negative errno. */ static int svc_udp_sendto(struct svc_rqst *rqstp) @@ -531,6 +534,11 @@ static int svc_udp_sendto(struct svc_rqst *rqstp) svc_set_cmsg_data(rqstp, cmh); + mutex_lock(&xprt->xpt_mutex); + + if (svc_xprt_is_dead(xprt)) + goto out_notconn; + err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent); xdr_free_bvec(xdr); if (err == -ECONNREFUSED) { @@ -538,9 +546,15 @@ static int svc_udp_sendto(struct svc_rqst *rqstp) err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent); xdr_free_bvec(xdr); } + + mutex_unlock(&xprt->xpt_mutex); if (err < 0) return err; return sent; + +out_notconn: + mutex_unlock(&xprt->xpt_mutex); + return -ENOTCONN; } static int svc_udp_has_wspace(struct svc_xprt *xprt) @@ -1063,6 +1077,9 @@ err_noclose: * svc_tcp_sendto - Send out a reply on a TCP socket * @rqstp: completed svc_rqst * + * xpt_mutex ensures @rqstp's whole message is written to the socket + * without interruption. + * * Returns the number of bytes sent, or a negative errno. */ static int svc_tcp_sendto(struct svc_rqst *rqstp) @@ -1080,12 +1097,19 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp) svc_release_skb(rqstp); + mutex_lock(&xprt->xpt_mutex); + if (svc_xprt_is_dead(xprt)) + goto out_notconn; err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, marker, &sent); xdr_free_bvec(xdr); if (err < 0 || sent != (xdr->len + sizeof(marker))) goto out_close; + mutex_unlock(&xprt->xpt_mutex); return sent; +out_notconn: + mutex_unlock(&xprt->xpt_mutex); + return -ENOTCONN; out_close: pr_notice("rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n", xprt->xpt_server->sv_name, @@ -1093,6 +1117,7 @@ out_close: (err < 0) ? err : sent, xdr->len); set_bit(XPT_CLOSE, &xprt->xpt_flags); svc_xprt_enqueue(xprt); + mutex_unlock(&xprt->xpt_mutex); return -EAGAIN; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index af7eb8d202ae..d9aab4504f2c 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -210,34 +210,31 @@ drop_connection: return -ENOTCONN; } -/* Send an RPC call on the passive end of a transport - * connection. +/** + * xprt_rdma_bc_send_request - Send a reverse-direction Call + * @rqst: rpc_rqst containing Call message to be sent + * + * Return values: + * %0 if the message was sent successfully + * %ENOTCONN if the message was not sent */ -static int -xprt_rdma_bc_send_request(struct rpc_rqst *rqst) +static int xprt_rdma_bc_send_request(struct rpc_rqst *rqst) { struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt; - struct svcxprt_rdma *rdma; + struct svcxprt_rdma *rdma = + container_of(sxprt, struct svcxprt_rdma, sc_xprt); int ret; dprintk("svcrdma: sending bc call with xid: %08x\n", be32_to_cpu(rqst->rq_xid)); - mutex_lock(&sxprt->xpt_mutex); - - ret = -ENOTCONN; - rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt); - if (!test_bit(XPT_DEAD, &sxprt->xpt_flags)) { - ret = rpcrdma_bc_send_request(rdma, rqst); - if (ret == -ENOTCONN) - svc_close_xprt(sxprt); - } + if (test_bit(XPT_DEAD, &sxprt->xpt_flags)) + return -ENOTCONN; - mutex_unlock(&sxprt->xpt_mutex); - - if (ret < 0) - return ret; - return 0; + ret = rpcrdma_bc_send_request(rdma, rqst); + if (ret == -ENOTCONN) + svc_close_xprt(sxprt); + return ret; } static void diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index b6c8643867f2..38e7c3c8c4a9 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -868,12 +868,10 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) __be32 *p; int ret; - /* Create the RDMA response header. xprt->xpt_mutex, - * acquired in svc_send(), serializes RPC replies. The - * code path below that inserts the credit grant value - * into each transport header runs only inside this - * critical section. - */ + ret = -ENOTCONN; + if (svc_xprt_is_dead(xprt)) + goto err0; + ret = -ENOMEM; sctxt = svc_rdma_send_ctxt_get(rdma); if (!sctxt) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 845d0be805ec..839c49330785 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2548,8 +2548,16 @@ static int bc_sendto(struct rpc_rqst *req) return sent; } -/* - * The send routine. Borrows from svc_send +/** + * bc_send_request - Send a backchannel Call on a TCP socket + * @req: rpc_rqst containing Call message to be sent + * + * xpt_mutex ensures @rqstp's whole message is written to the socket + * without interruption. + * + * Return values: + * %0 if the message was sent successfully + * %ENOTCONN if the message was not sent */ static int bc_send_request(struct rpc_rqst *req) { -- cgit v1.3 From ea740bd5f58e2912e74f401fd01a9d6aa985ca05 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 20 Mar 2020 17:32:41 -0400 Subject: svcrdma: Fix backchannel return code Way back when I was writing the RPC/RDMA server-side backchannel code, I misread the TCP backchannel reply handler logic. When svc_tcp_recvfrom() successfully receives a backchannel reply, it does not return -EAGAIN. It sets XPT_DATA and returns zero. Update svc_rdma_recvfrom() to return zero. Here, XPT_DATA doesn't need to be set again: it is set whenever a new message is received, behind a spin lock in a single threaded context. Also, if handling the cb reply is not successful, the message is simply dropped. There's no special message framing to deal with as there is in the TCP case. Now that the handle_bc_reply() return value is ignored, I've removed the dprintk call sites in the error exit of handle_bc_reply() in favor of trace points in other areas that already report the error cases. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 5 ++-- net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 38 ++++++++---------------------- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 11 ++++----- 3 files changed, 17 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index cbcfbd0521e3..8518c3f37e56 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -160,9 +160,8 @@ struct svc_rdma_send_ctxt { }; /* svc_rdma_backchannel.c */ -extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, - __be32 *rdma_resp, - struct xdr_buf *rcvbuf); +extern void svc_rdma_handle_bc_reply(struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *rctxt); /* svc_rdma_recvfrom.c */ extern void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma); diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index d9aab4504f2c..b174f3b109a5 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -15,26 +15,25 @@ #undef SVCRDMA_BACKCHANNEL_DEBUG /** - * svc_rdma_handle_bc_reply - Process incoming backchannel reply - * @xprt: controlling backchannel transport - * @rdma_resp: pointer to incoming transport header - * @rcvbuf: XDR buffer into which to decode the reply + * svc_rdma_handle_bc_reply - Process incoming backchannel Reply + * @rqstp: resources for handling the Reply + * @rctxt: Received message * - * Returns: - * %0 if @rcvbuf is filled in, xprt_complete_rqst called, - * %-EAGAIN if server should call ->recvfrom again. */ -int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp, - struct xdr_buf *rcvbuf) +void svc_rdma_handle_bc_reply(struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *rctxt) { + struct svc_xprt *sxprt = rqstp->rq_xprt; + struct rpc_xprt *xprt = sxprt->xpt_bc_xprt; struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + struct xdr_buf *rcvbuf = &rqstp->rq_arg; struct kvec *dst, *src = &rcvbuf->head[0]; + __be32 *rdma_resp = rctxt->rc_recv_buf; struct rpc_rqst *req; u32 credits; size_t len; __be32 xid; __be32 *p; - int ret; p = (__be32 *)src->iov_base; len = src->iov_len; @@ -49,14 +48,10 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp, __func__, (int)len, p); #endif - ret = -EAGAIN; - if (src->iov_len < 24) - goto out_shortreply; - spin_lock(&xprt->queue_lock); req = xprt_lookup_rqst(xprt, xid); if (!req) - goto out_notfound; + goto out_unlock; dst = &req->rq_private_buf.head[0]; memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf)); @@ -77,25 +72,12 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp, spin_unlock(&xprt->transport_lock); spin_lock(&xprt->queue_lock); - ret = 0; xprt_complete_rqst(req->rq_task, rcvbuf->len); xprt_unpin_rqst(req); rcvbuf->len = 0; out_unlock: spin_unlock(&xprt->queue_lock); -out: - return ret; - -out_shortreply: - dprintk("svcrdma: short bc reply: xprt=%p, len=%zu\n", - xprt, src->iov_len); - goto out; - -out_notfound: - dprintk("svcrdma: unrecognized bc reply: xprt=%p, xid=%08x\n", - xprt, be32_to_cpu(xid)); - goto out_unlock; } /* Send a backwards direction RPC call. diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index efa5fcb5793f..eee7c6478b30 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -878,12 +878,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) goto out_drop; rqstp->rq_xprt_hlen = ret; - if (svc_rdma_is_backchannel_reply(xprt, p)) { - ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, p, - &rqstp->rq_arg); - svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); - return ret; - } + if (svc_rdma_is_backchannel_reply(xprt, p)) + goto out_backchannel; + svc_rdma_get_inv_rkey(rdma_xprt, ctxt); p += rpcrdma_fixed_maxsz; @@ -913,6 +910,8 @@ out_postfail: svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); return ret; +out_backchannel: + svc_rdma_handle_bc_reply(rqstp, ctxt); out_drop: svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); return 0; -- cgit v1.3 From 08e3c9f181bfb78d842c4f432888116d66e07c2f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 29 Apr 2020 16:00:12 -0400 Subject: svcrdma: Remove the SVCRDMA_DEBUG macro Clean up: Commit d21b05f101ae ("rdma: SVCRMDA Header File") introduced the SVCRDMA_DEBUG macro, but it doesn't seem to have been used. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 8518c3f37e56..7ed82625dc0b 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -48,7 +48,6 @@ #include #include #include -#define SVCRDMA_DEBUG /* Default and maximum inline threshold sizes */ enum { -- cgit v1.3 From 02648908d19a99532b0839959e38ae53d95d2798 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 17 Mar 2020 14:12:15 -0400 Subject: SUNRPC: Rename svc_sock::sk_reclen Clean up. I find the name of the svc_sock::sk_reclen field confusing, so I've changed it to better reflect its function. This field is not read directly to get the record length. Rather, it is a buffer containing a record marker that needs to be decoded. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svcsock.h | 6 +++--- net/sunrpc/svcsock.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 771baadaee9d..b7ac7fe68306 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -28,7 +28,7 @@ struct svc_sock { /* private TCP part */ /* On-the-wire fragment header: */ - __be32 sk_reclen; + __be32 sk_marker; /* As we receive a record, this includes the length received so * far (including the fragment header): */ u32 sk_tcplen; @@ -41,12 +41,12 @@ struct svc_sock { static inline u32 svc_sock_reclen(struct svc_sock *svsk) { - return ntohl(svsk->sk_reclen) & RPC_FRAGMENT_SIZE_MASK; + return be32_to_cpu(svsk->sk_marker) & RPC_FRAGMENT_SIZE_MASK; } static inline u32 svc_sock_final_rec(struct svc_sock *svsk) { - return ntohl(svsk->sk_reclen) & RPC_LAST_STREAM_FRAGMENT; + return be32_to_cpu(svsk->sk_marker) & RPC_LAST_STREAM_FRAGMENT; } /* diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 4ac1180c6306..d63b21f3f207 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -841,7 +841,7 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) struct kvec iov; want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; - iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen; + iov.iov_base = ((char *)&svsk->sk_marker) + svsk->sk_tcplen; iov.iov_len = want; len = svc_recvfrom(rqstp, &iov, 1, want, 0); if (len < 0) @@ -938,7 +938,7 @@ static void svc_tcp_fragment_received(struct svc_sock *svsk) svc_sock_final_rec(svsk) ? "final" : "nonfinal", svc_sock_reclen(svsk)); svsk->sk_tcplen = 0; - svsk->sk_reclen = 0; + svsk->sk_marker = xdr_zero; } /* @@ -1154,7 +1154,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) sk->sk_data_ready = svc_data_ready; sk->sk_write_space = svc_write_space; - svsk->sk_reclen = 0; + svsk->sk_marker = xdr_zero; svsk->sk_tcplen = 0; svsk->sk_datalen = 0; memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages)); -- cgit v1.3 From 333cff6c963fbc8b9820ca2b6a8b2e22a572cd43 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 29 Apr 2020 12:36:39 +0200 Subject: powercap/drivers/idle_inject: Specify idle state max latency Currently the idle injection framework uses the play_idle() function which puts the current CPU in an idle state. The idle state is the deepest one, as specified by the latency constraint when calling the subsequent play_idle_precise() function with the INT_MAX. The idle_injection is used by the cpuidle_cooling device which computes the idle / run duration to mitigate the temperature by injecting idle cycles. The cooling device has no control on the depth of the idle state. Allow finer control of the idle injection mechanism by allowing to specify the latency for the idle state. Thus the cooling device has the ability to have a guarantee on the exit latency of the idle states it is injecting. Acked-by: Rafael J. Wysocki Signed-off-by: Daniel Lezcano Reviewed-by: Amit Kucheria Link: https://lore.kernel.org/r/20200429103644.5492-1-daniel.lezcano@linaro.org --- drivers/powercap/idle_inject.c | 16 +++++++++++++++- include/linux/idle_inject.h | 4 ++++ 2 files changed, 19 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/powercap/idle_inject.c b/drivers/powercap/idle_inject.c index e9bbd3c42eef..c90f0990968b 100644 --- a/drivers/powercap/idle_inject.c +++ b/drivers/powercap/idle_inject.c @@ -61,12 +61,14 @@ struct idle_inject_thread { * @timer: idle injection period timer * @idle_duration_us: duration of CPU idle time to inject * @run_duration_us: duration of CPU run time to allow + * @latency_us: max allowed latency * @cpumask: mask of CPUs affected by idle injection */ struct idle_inject_device { struct hrtimer timer; unsigned int idle_duration_us; unsigned int run_duration_us; + unsigned int latency_us; unsigned long cpumask[]; }; @@ -138,7 +140,8 @@ static void idle_inject_fn(unsigned int cpu) */ iit->should_run = 0; - play_idle(READ_ONCE(ii_dev->idle_duration_us)); + play_idle_precise(READ_ONCE(ii_dev->idle_duration_us) * NSEC_PER_USEC, + READ_ONCE(ii_dev->latency_us) * NSEC_PER_USEC); } /** @@ -169,6 +172,16 @@ void idle_inject_get_duration(struct idle_inject_device *ii_dev, *idle_duration_us = READ_ONCE(ii_dev->idle_duration_us); } +/** + * idle_inject_set_latency - set the maximum latency allowed + * @latency_us: set the latency requirement for the idle state + */ +void idle_inject_set_latency(struct idle_inject_device *ii_dev, + unsigned int latency_us) +{ + WRITE_ONCE(ii_dev->latency_us, latency_us); +} + /** * idle_inject_start - start idle injections * @ii_dev: idle injection control device structure @@ -297,6 +310,7 @@ struct idle_inject_device *idle_inject_register(struct cpumask *cpumask) cpumask_copy(to_cpumask(ii_dev->cpumask), cpumask); hrtimer_init(&ii_dev->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); ii_dev->timer.function = idle_inject_timer_fn; + ii_dev->latency_us = UINT_MAX; for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) { diff --git a/include/linux/idle_inject.h b/include/linux/idle_inject.h index a445cd1a36c5..91a8612b8bf9 100644 --- a/include/linux/idle_inject.h +++ b/include/linux/idle_inject.h @@ -26,4 +26,8 @@ void idle_inject_set_duration(struct idle_inject_device *ii_dev, void idle_inject_get_duration(struct idle_inject_device *ii_dev, unsigned int *run_duration_us, unsigned int *idle_duration_us); + +void idle_inject_set_latency(struct idle_inject_device *ii_dev, + unsigned int latency_ns); + #endif /* __IDLE_INJECT_H__ */ -- cgit v1.3 From dfd0bda3703cdaf1fccd5da72cb7101a4fedfe68 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 29 Apr 2020 12:36:41 +0200 Subject: thermal/drivers/cpuidle_cooling: Change the registration function Today, there is no user for the cpuidle cooling device. The targetted platform is ARM and ARM64. The cpuidle and the cpufreq cooling device are based on the device tree. As the cpuidle cooling device can have its own configuration depending on the platform and the available idle states. The DT node description will give the optional properties to set the cooling device up. Do no longer rely on the CPU node which is prone to error and will lead to a confusion in the DT because the cpufreq cooling device is also using it. Let initialize the cpuidle cooling device with the DT binding. This was tested on: - hikey960 - hikey6220 - rock960 - db845c Acked-by: Viresh Kumar Signed-off-by: Daniel Lezcano Reviewed-by: Lukasz Luba Reviewed-by: Amit Kucheria Tested-by: Amit Kucheria Link: https://lore.kernel.org/r/20200429103644.5492-3-daniel.lezcano@linaro.org --- drivers/thermal/cpuidle_cooling.c | 63 +++++++++++++++++++++++++++++++-------- include/linux/cpu_cooling.h | 12 ++------ 2 files changed, 53 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/drivers/thermal/cpuidle_cooling.c b/drivers/thermal/cpuidle_cooling.c index 0bb843246f59..78e3e8238116 100644 --- a/drivers/thermal/cpuidle_cooling.c +++ b/drivers/thermal/cpuidle_cooling.c @@ -5,11 +5,14 @@ * Author: Daniel Lezcano * */ +#define pr_fmt(fmt) "cpuidle cooling: " fmt + #include #include #include #include #include +#include #include #include @@ -154,22 +157,25 @@ static struct thermal_cooling_device_ops cpuidle_cooling_ops = { }; /** - * cpuidle_of_cooling_register - Idle cooling device initialization function + * __cpuidle_cooling_register: register the cooling device * @drv: a cpuidle driver structure pointer - * @np: a node pointer to a device tree cooling device node + * @np: a device node structure pointer used for the thermal binding * - * This function is in charge of creating a cooling device per cpuidle - * driver and register it to thermal framework. + * This function is in charge of allocating the cpuidle cooling device + * structure, the idle injection, initialize them and register the + * cooling device to the thermal framework. * - * Return: zero on success, or negative value corresponding to the - * error detected in the underlying subsystems. + * Return: zero on success, a negative value returned by one of the + * underlying subsystem in case of error */ -int cpuidle_of_cooling_register(struct device_node *np, - struct cpuidle_driver *drv) +static int __cpuidle_cooling_register(struct device_node *np, + struct cpuidle_driver *drv) { struct idle_inject_device *ii_dev; struct cpuidle_cooling_device *idle_cdev; struct thermal_cooling_device *cdev; + unsigned int idle_duration_us = TICK_USEC; + unsigned int latency_us = UINT_MAX; char dev_name[THERMAL_NAME_LENGTH]; int id, ret; @@ -191,7 +197,11 @@ int cpuidle_of_cooling_register(struct device_node *np, goto out_id; } - idle_inject_set_duration(ii_dev, TICK_USEC, TICK_USEC); + of_property_read_u32(np, "duration-us", &idle_duration_us); + of_property_read_u32(np, "exit-latency-us", &latency_us); + + idle_inject_set_duration(ii_dev, TICK_USEC, idle_duration_us); + idle_inject_set_latency(ii_dev, latency_us); idle_cdev->ii_dev = ii_dev; @@ -204,6 +214,9 @@ int cpuidle_of_cooling_register(struct device_node *np, goto out_unregister; } + pr_debug("%s: Idle injection set with idle duration=%u, latency=%u\n", + dev_name, idle_duration_us, latency_us); + return 0; out_unregister: @@ -221,12 +234,38 @@ out: * @drv: a cpuidle driver structure pointer * * This function is in charge of creating a cooling device per cpuidle - * driver and register it to thermal framework. + * driver and register it to the thermal framework. * * Return: zero on success, or negative value corresponding to the * error detected in the underlying subsystems. */ -int cpuidle_cooling_register(struct cpuidle_driver *drv) +void cpuidle_cooling_register(struct cpuidle_driver *drv) { - return cpuidle_of_cooling_register(NULL, drv); + struct device_node *cooling_node; + struct device_node *cpu_node; + int cpu, ret; + + for_each_cpu(cpu, drv->cpumask) { + + cpu_node = of_cpu_device_node_get(cpu); + + cooling_node = of_get_child_by_name(cpu_node, "thermal-idle"); + + of_node_put(cpu_node); + + if (!cooling_node) { + pr_debug("'thermal-idle' node not found for cpu%d\n", cpu); + continue; + } + + ret = __cpuidle_cooling_register(cooling_node, drv); + + of_node_put(cooling_node); + + if (ret) { + pr_err("Failed to register the cpuidle cooling device" \ + "for cpu%d: %d\n", cpu, ret); + break; + } + } } diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h index 65501d8f9778..a3bdc8a98f2c 100644 --- a/include/linux/cpu_cooling.h +++ b/include/linux/cpu_cooling.h @@ -63,18 +63,10 @@ of_cpufreq_cooling_register(struct cpufreq_policy *policy) struct cpuidle_driver; #ifdef CONFIG_CPU_IDLE_THERMAL -int cpuidle_cooling_register(struct cpuidle_driver *drv); -int cpuidle_of_cooling_register(struct device_node *np, - struct cpuidle_driver *drv); +void cpuidle_cooling_register(struct cpuidle_driver *drv); #else /* CONFIG_CPU_IDLE_THERMAL */ -static inline int cpuidle_cooling_register(struct cpuidle_driver *drv) +static inline void cpuidle_cooling_register(struct cpuidle_driver *drv) { - return 0; -} -static inline int cpuidle_of_cooling_register(struct device_node *np, - struct cpuidle_driver *drv) -{ - return 0; } #endif /* CONFIG_CPU_IDLE_THERMAL */ -- cgit v1.3 From 344fa64ef8f6740e99b32ab788b6e3742d7284b3 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 12 Feb 2020 13:58:35 +0000 Subject: security: Add a hook for the point of notification insertion Add a security hook that allows an LSM to rule on whether a notification message is allowed to be inserted into a particular watch queue. The hook is given the following information: (1) The credentials of the triggerer (which may be init_cred for a system notification, eg. a hardware error). (2) The credentials of the whoever set the watch. (3) The notification message. Signed-off-by: David Howells Acked-by: James Morris cc: Casey Schaufler cc: Stephen Smalley cc: linux-security-module@vger.kernel.org --- include/linux/lsm_hook_defs.h | 5 +++++ include/linux/lsm_hooks.h | 9 +++++++++ include/linux/security.h | 15 +++++++++++++++ security/security.c | 9 +++++++++ 4 files changed, 38 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 5616b2567aa7..e0def45b5cc5 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -253,6 +253,11 @@ LSM_HOOK(int, 0, inode_setsecctx, struct dentry *dentry, void *ctx, u32 ctxlen) LSM_HOOK(int, 0, inode_getsecctx, struct inode *inode, void **ctx, u32 *ctxlen) +#if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE) +LSM_HOOK(int, 0, post_notification, const struct cred *w_cred, + const struct cred *cred, struct watch_notification *n) +#endif /* CONFIG_SECURITY && CONFIG_KEY_NOTIFICATIONS */ + #ifdef CONFIG_SECURITY_NETWORK LSM_HOOK(int, 0, unix_stream_connect, struct sock *sock, struct sock *other, struct sock *newsk) diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 988ca0df7824..0b5e5769b836 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1437,6 +1437,15 @@ * @ctx is a pointer in which to place the allocated security context. * @ctxlen points to the place to put the length of @ctx. * + * Security hooks for the general notification queue: + * + * @post_notification: + * Check to see if a watch notification can be posted to a particular + * queue. + * @w_cred: The credentials of the whoever set the watch. + * @cred: The event-triggerer's credentials + * @n: The notification being posted + * * Security hooks for using the eBPF maps and programs functionalities through * eBPF syscalls. * diff --git a/include/linux/security.h b/include/linux/security.h index a8d9310472df..9a5d12ab491b 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -56,6 +56,8 @@ struct mm_struct; struct fs_context; struct fs_parameter; enum fs_value_type; +struct watch; +struct watch_notification; /* Default (no) options for the capable function */ #define CAP_OPT_NONE 0x0 @@ -1275,6 +1277,19 @@ static inline int security_locked_down(enum lockdown_reason what) } #endif /* CONFIG_SECURITY */ +#if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE) +int security_post_notification(const struct cred *w_cred, + const struct cred *cred, + struct watch_notification *n); +#else +static inline int security_post_notification(const struct cred *w_cred, + const struct cred *cred, + struct watch_notification *n) +{ + return 0; +} +#endif + #ifdef CONFIG_SECURITY_NETWORK int security_unix_stream_connect(struct sock *sock, struct sock *other, struct sock *newsk); diff --git a/security/security.c b/security/security.c index 7fed24b9d57e..7d55607120b4 100644 --- a/security/security.c +++ b/security/security.c @@ -2007,6 +2007,15 @@ int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen) } EXPORT_SYMBOL(security_inode_getsecctx); +#ifdef CONFIG_WATCH_QUEUE +int security_post_notification(const struct cred *w_cred, + const struct cred *cred, + struct watch_notification *n) +{ + return call_int_hook(post_notification, 0, w_cred, cred, n); +} +#endif /* CONFIG_WATCH_QUEUE */ + #ifdef CONFIG_SECURITY_NETWORK int security_unix_stream_connect(struct sock *sock, struct sock *other, struct sock *newsk) -- cgit v1.3 From c73be61cede5882f9605a852414db559c0ebedfd Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 14 Jan 2020 17:07:11 +0000 Subject: pipe: Add general notification queue support Make it possible to have a general notification queue built on top of a standard pipe. Notifications are 'spliced' into the pipe and then read out. splice(), vmsplice() and sendfile() are forbidden on pipes used for notifications as post_one_notification() cannot take pipe->mutex. This means that notifications could be posted in between individual pipe buffers, making iov_iter_revert() difficult to effect. The way the notification queue is used is: (1) An application opens a pipe with a special flag and indicates the number of messages it wishes to be able to queue at once (this can only be set once): pipe2(fds, O_NOTIFICATION_PIPE); ioctl(fds[0], IOC_WATCH_QUEUE_SET_SIZE, queue_depth); (2) The application then uses poll() and read() as normal to extract data from the pipe. read() will return multiple notifications if the buffer is big enough, but it will not split a notification across buffers - rather it will return a short read or EMSGSIZE. Notification messages include a length in the header so that the caller can split them up. Each message has a header that describes it: struct watch_notification { __u32 type:24; __u32 subtype:8; __u32 info; }; The type indicates the source (eg. mount tree changes, superblock events, keyring changes, block layer events) and the subtype indicates the event type (eg. mount, unmount; EIO, EDQUOT; link, unlink). The info field indicates a number of things, including the entry length, an ID assigned to a watchpoint contributing to this buffer and type-specific flags. Supplementary data, such as the key ID that generated an event, can be attached in additional slots. The maximum message size is 127 bytes. Messages may not be padded or aligned, so there is no guarantee, for example, that the notification type will be on a 4-byte bounary. Signed-off-by: David Howells --- Documentation/userspace-api/ioctl/ioctl-number.rst | 1 + Documentation/watch_queue.rst | 339 +++++++++++ fs/pipe.c | 206 ++++--- fs/splice.c | 12 +- include/linux/pipe_fs_i.h | 19 +- include/linux/watch_queue.h | 127 ++++ include/uapi/linux/watch_queue.h | 20 + init/Kconfig | 12 + kernel/Makefile | 1 + kernel/watch_queue.c | 657 +++++++++++++++++++++ 10 files changed, 1318 insertions(+), 76 deletions(-) create mode 100644 Documentation/watch_queue.rst create mode 100644 include/linux/watch_queue.h create mode 100644 kernel/watch_queue.c (limited to 'include/linux') diff --git a/Documentation/userspace-api/ioctl/ioctl-number.rst b/Documentation/userspace-api/ioctl/ioctl-number.rst index f759edafd938..9425377615ce 100644 --- a/Documentation/userspace-api/ioctl/ioctl-number.rst +++ b/Documentation/userspace-api/ioctl/ioctl-number.rst @@ -201,6 +201,7 @@ Code Seq# Include File Comments 'W' 00-1F linux/wanrouter.h conflict! (pre 3.9) 'W' 00-3F sound/asound.h conflict! 'W' 40-5F drivers/pci/switch/switchtec.c +'W' 60-61 linux/watch_queue.h 'X' all fs/xfs/xfs_fs.h, conflict! fs/xfs/linux-2.6/xfs_ioctl32.h, include/linux/falloc.h, diff --git a/Documentation/watch_queue.rst b/Documentation/watch_queue.rst new file mode 100644 index 000000000000..849fad6893ef --- /dev/null +++ b/Documentation/watch_queue.rst @@ -0,0 +1,339 @@ +============================== +General notification mechanism +============================== + +The general notification mechanism is built on top of the standard pipe driver +whereby it effectively splices notification messages from the kernel into pipes +opened by userspace. This can be used in conjunction with:: + + * Key/keyring notifications + + +The notifications buffers can be enabled by: + + "General setup"/"General notification queue" + (CONFIG_WATCH_QUEUE) + +This document has the following sections: + +.. contents:: :local: + + +Overview +======== + +This facility appears as a pipe that is opened in a special mode. The pipe's +internal ring buffer is used to hold messages that are generated by the kernel. +These messages are then read out by read(). Splice and similar are disabled on +such pipes due to them wanting to, under some circumstances, revert their +additions to the ring - which might end up interleaved with notification +messages. + +The owner of the pipe has to tell the kernel which sources it would like to +watch through that pipe. Only sources that have been connected to a pipe will +insert messages into it. Note that a source may be bound to multiple pipes and +insert messages into all of them simultaneously. + +Filters may also be emplaced on a pipe so that certain source types and +subevents can be ignored if they're not of interest. + +A message will be discarded if there isn't a slot available in the ring or if +no preallocated message buffer is available. In both of these cases, read() +will insert a WATCH_META_LOSS_NOTIFICATION message into the output buffer after +the last message currently in the buffer has been read. + +Note that when producing a notification, the kernel does not wait for the +consumers to collect it, but rather just continues on. This means that +notifications can be generated whilst spinlocks are held and also protects the +kernel from being held up indefinitely by a userspace malfunction. + + +Message Structure +================= + +Notification messages begin with a short header:: + + struct watch_notification { + __u32 type:24; + __u32 subtype:8; + __u32 info; + }; + +"type" indicates the source of the notification record and "subtype" indicates +the type of record from that source (see the Watch Sources section below). The +type may also be "WATCH_TYPE_META". This is a special record type generated +internally by the watch queue itself. There are two subtypes: + + * WATCH_META_REMOVAL_NOTIFICATION + * WATCH_META_LOSS_NOTIFICATION + +The first indicates that an object on which a watch was installed was removed +or destroyed and the second indicates that some messages have been lost. + +"info" indicates a bunch of things, including: + + * The length of the message in bytes, including the header (mask with + WATCH_INFO_LENGTH and shift by WATCH_INFO_LENGTH__SHIFT). This indicates + the size of the record, which may be between 8 and 127 bytes. + + * The watch ID (mask with WATCH_INFO_ID and shift by WATCH_INFO_ID__SHIFT). + This indicates that caller's ID of the watch, which may be between 0 + and 255. Multiple watches may share a queue, and this provides a means to + distinguish them. + + * A type-specific field (WATCH_INFO_TYPE_INFO). This is set by the + notification producer to indicate some meaning specific to the type and + subtype. + +Everything in info apart from the length can be used for filtering. + +The header can be followed by supplementary information. The format of this is +at the discretion is defined by the type and subtype. + + +Watch List (Notification Source) API +==================================== + +A "watch list" is a list of watchers that are subscribed to a source of +notifications. A list may be attached to an object (say a key or a superblock) +or may be global (say for device events). From a userspace perspective, a +non-global watch list is typically referred to by reference to the object it +belongs to (such as using KEYCTL_NOTIFY and giving it a key serial number to +watch that specific key). + +To manage a watch list, the following functions are provided: + + * ``void init_watch_list(struct watch_list *wlist, + void (*release_watch)(struct watch *wlist));`` + + Initialise a watch list. If ``release_watch`` is not NULL, then this + indicates a function that should be called when the watch_list object is + destroyed to discard any references the watch list holds on the watched + object. + + * ``void remove_watch_list(struct watch_list *wlist);`` + + This removes all of the watches subscribed to a watch_list and frees them + and then destroys the watch_list object itself. + + +Watch Queue (Notification Output) API +===================================== + +A "watch queue" is the buffer allocated by an application that notification +records will be written into. The workings of this are hidden entirely inside +of the pipe device driver, but it is necessary to gain a reference to it to set +a watch. These can be managed with: + + * ``struct watch_queue *get_watch_queue(int fd);`` + + Since watch queues are indicated to the kernel by the fd of the pipe that + implements the buffer, userspace must hand that fd through a system call. + This can be used to look up an opaque pointer to the watch queue from the + system call. + + * ``void put_watch_queue(struct watch_queue *wqueue);`` + + This discards the reference obtained from ``get_watch_queue()``. + + +Watch Subscription API +====================== + +A "watch" is a subscription on a watch list, indicating the watch queue, and +thus the buffer, into which notification records should be written. The watch +queue object may also carry filtering rules for that object, as set by +userspace. Some parts of the watch struct can be set by the driver:: + + struct watch { + union { + u32 info_id; /* ID to be OR'd in to info field */ + ... + }; + void *private; /* Private data for the watched object */ + u64 id; /* Internal identifier */ + ... + }; + +The ``info_id`` value should be an 8-bit number obtained from userspace and +shifted by WATCH_INFO_ID__SHIFT. This is OR'd into the WATCH_INFO_ID field of +struct watch_notification::info when and if the notification is written into +the associated watch queue buffer. + +The ``private`` field is the driver's data associated with the watch_list and +is cleaned up by the ``watch_list::release_watch()`` method. + +The ``id`` field is the source's ID. Notifications that are posted with a +different ID are ignored. + +The following functions are provided to manage watches: + + * ``void init_watch(struct watch *watch, struct watch_queue *wqueue);`` + + Initialise a watch object, setting its pointer to the watch queue, using + appropriate barriering to avoid lockdep complaints. + + * ``int add_watch_to_object(struct watch *watch, struct watch_list *wlist);`` + + Subscribe a watch to a watch list (notification source). The + driver-settable fields in the watch struct must have been set before this + is called. + + * ``int remove_watch_from_object(struct watch_list *wlist, + struct watch_queue *wqueue, + u64 id, false);`` + + Remove a watch from a watch list, where the watch must match the specified + watch queue (``wqueue``) and object identifier (``id``). A notification + (``WATCH_META_REMOVAL_NOTIFICATION``) is sent to the watch queue to + indicate that the watch got removed. + + * ``int remove_watch_from_object(struct watch_list *wlist, NULL, 0, true);`` + + Remove all the watches from a watch list. It is expected that this will be + called preparatory to destruction and that the watch list will be + inaccessible to new watches by this point. A notification + (``WATCH_META_REMOVAL_NOTIFICATION``) is sent to the watch queue of each + subscribed watch to indicate that the watch got removed. + + +Notification Posting API +======================== + +To post a notification to watch list so that the subscribed watches can see it, +the following function should be used:: + + void post_watch_notification(struct watch_list *wlist, + struct watch_notification *n, + const struct cred *cred, + u64 id); + +The notification should be preformatted and a pointer to the header (``n``) +should be passed in. The notification may be larger than this and the size in +units of buffer slots is noted in ``n->info & WATCH_INFO_LENGTH``. + +The ``cred`` struct indicates the credentials of the source (subject) and is +passed to the LSMs, such as SELinux, to allow or suppress the recording of the +note in each individual queue according to the credentials of that queue +(object). + +The ``id`` is the ID of the source object (such as the serial number on a key). +Only watches that have the same ID set in them will see this notification. + + +Watch Sources +============= + +Any particular buffer can be fed from multiple sources. Sources include: + + * WATCH_TYPE_KEY_NOTIFY + + Notifications of this type indicate changes to keys and keyrings, including + the changes of keyring contents or the attributes of keys. + + See Documentation/security/keys/core.rst for more information. + + +Event Filtering +=============== + +Once a watch queue has been created, a set of filters can be applied to limit +the events that are received using:: + + struct watch_notification_filter filter = { + ... + }; + ioctl(fd, IOC_WATCH_QUEUE_SET_FILTER, &filter) + +The filter description is a variable of type:: + + struct watch_notification_filter { + __u32 nr_filters; + __u32 __reserved; + struct watch_notification_type_filter filters[]; + }; + +Where "nr_filters" is the number of filters in filters[] and "__reserved" +should be 0. The "filters" array has elements of the following type:: + + struct watch_notification_type_filter { + __u32 type; + __u32 info_filter; + __u32 info_mask; + __u32 subtype_filter[8]; + }; + +Where: + + * ``type`` is the event type to filter for and should be something like + "WATCH_TYPE_KEY_NOTIFY" + + * ``info_filter`` and ``info_mask`` act as a filter on the info field of the + notification record. The notification is only written into the buffer if:: + + (watch.info & info_mask) == info_filter + + This could be used, for example, to ignore events that are not exactly on + the watched point in a mount tree. + + * ``subtype_filter`` is a bitmask indicating the subtypes that are of + interest. Bit 0 of subtype_filter[0] corresponds to subtype 0, bit 1 to + subtype 1, and so on. + +If the argument to the ioctl() is NULL, then the filters will be removed and +all events from the watched sources will come through. + + +Userspace Code Example +====================== + +A buffer is created with something like the following:: + + pipe2(fds, O_TMPFILE); + ioctl(fds[1], IOC_WATCH_QUEUE_SET_SIZE, 256); + +It can then be set to receive keyring change notifications:: + + keyctl(KEYCTL_WATCH_KEY, KEY_SPEC_SESSION_KEYRING, fds[1], 0x01); + +The notifications can then be consumed by something like the following:: + + static void consumer(int rfd, struct watch_queue_buffer *buf) + { + unsigned char buffer[128]; + ssize_t buf_len; + + while (buf_len = read(rfd, buffer, sizeof(buffer)), + buf_len > 0 + ) { + void *p = buffer; + void *end = buffer + buf_len; + while (p < end) { + union { + struct watch_notification n; + unsigned char buf1[128]; + } n; + size_t largest, len; + + largest = end - p; + if (largest > 128) + largest = 128; + memcpy(&n, p, largest); + + len = (n->info & WATCH_INFO_LENGTH) >> + WATCH_INFO_LENGTH__SHIFT; + if (len == 0 || len > largest) + return; + + switch (n.n.type) { + case WATCH_TYPE_META: + got_meta(&n.n); + case WATCH_TYPE_KEY_NOTIFY: + saw_key_change(&n.n); + break; + } + + p += len; + } + } + } diff --git a/fs/pipe.c b/fs/pipe.c index 16fb72e9abf7..da9bc1f21fd1 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -459,6 +460,13 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) goto out; } +#ifdef CONFIG_WATCH_QUEUE + if (pipe->watch_queue) { + ret = -EXDEV; + goto out; + } +#endif + /* * Only wake up if the pipe started out empty, since * otherwise there should be no readers waiting. @@ -628,22 +636,37 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) int count, head, tail, mask; switch (cmd) { - case FIONREAD: - __pipe_lock(pipe); - count = 0; - head = pipe->head; - tail = pipe->tail; - mask = pipe->ring_size - 1; + case FIONREAD: + __pipe_lock(pipe); + count = 0; + head = pipe->head; + tail = pipe->tail; + mask = pipe->ring_size - 1; - while (tail != head) { - count += pipe->bufs[tail & mask].len; - tail++; - } - __pipe_unlock(pipe); + while (tail != head) { + count += pipe->bufs[tail & mask].len; + tail++; + } + __pipe_unlock(pipe); - return put_user(count, (int __user *)arg); - default: - return -ENOIOCTLCMD; + return put_user(count, (int __user *)arg); + +#ifdef CONFIG_WATCH_QUEUE + case IOC_WATCH_QUEUE_SET_SIZE: { + int ret; + __pipe_lock(pipe); + ret = watch_queue_set_size(pipe, arg); + __pipe_unlock(pipe); + return ret; + } + + case IOC_WATCH_QUEUE_SET_FILTER: + return watch_queue_set_filter( + pipe, (struct watch_notification_filter __user *)arg); +#endif + + default: + return -ENOIOCTLCMD; } } @@ -754,27 +777,27 @@ pipe_fasync(int fd, struct file *filp, int on) return retval; } -static unsigned long account_pipe_buffers(struct user_struct *user, - unsigned long old, unsigned long new) +unsigned long account_pipe_buffers(struct user_struct *user, + unsigned long old, unsigned long new) { return atomic_long_add_return(new - old, &user->pipe_bufs); } -static bool too_many_pipe_buffers_soft(unsigned long user_bufs) +bool too_many_pipe_buffers_soft(unsigned long user_bufs) { unsigned long soft_limit = READ_ONCE(pipe_user_pages_soft); return soft_limit && user_bufs > soft_limit; } -static bool too_many_pipe_buffers_hard(unsigned long user_bufs) +bool too_many_pipe_buffers_hard(unsigned long user_bufs) { unsigned long hard_limit = READ_ONCE(pipe_user_pages_hard); return hard_limit && user_bufs > hard_limit; } -static bool is_unprivileged_user(void) +bool pipe_is_unprivileged_user(void) { return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); } @@ -796,12 +819,12 @@ struct pipe_inode_info *alloc_pipe_info(void) user_bufs = account_pipe_buffers(user, 0, pipe_bufs); - if (too_many_pipe_buffers_soft(user_bufs) && is_unprivileged_user()) { + if (too_many_pipe_buffers_soft(user_bufs) && pipe_is_unprivileged_user()) { user_bufs = account_pipe_buffers(user, pipe_bufs, 1); pipe_bufs = 1; } - if (too_many_pipe_buffers_hard(user_bufs) && is_unprivileged_user()) + if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user()) goto out_revert_acct; pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer), @@ -813,6 +836,7 @@ struct pipe_inode_info *alloc_pipe_info(void) pipe->r_counter = pipe->w_counter = 1; pipe->max_usage = pipe_bufs; pipe->ring_size = pipe_bufs; + pipe->nr_accounted = pipe_bufs; pipe->user = user; mutex_init(&pipe->mutex); return pipe; @@ -830,7 +854,14 @@ void free_pipe_info(struct pipe_inode_info *pipe) { int i; - (void) account_pipe_buffers(pipe->user, pipe->ring_size, 0); +#ifdef CONFIG_WATCH_QUEUE + if (pipe->watch_queue) { + watch_queue_clear(pipe->watch_queue); + put_watch_queue(pipe->watch_queue); + } +#endif + + (void) account_pipe_buffers(pipe->user, pipe->nr_accounted, 0); free_uid(pipe->user); for (i = 0; i < pipe->ring_size; i++) { struct pipe_buffer *buf = pipe->bufs + i; @@ -906,6 +937,17 @@ int create_pipe_files(struct file **res, int flags) if (!inode) return -ENFILE; + if (flags & O_NOTIFICATION_PIPE) { +#ifdef CONFIG_WATCH_QUEUE + if (watch_queue_init(inode->i_pipe) < 0) { + iput(inode); + return -ENOMEM; + } +#else + return -ENOPKG; +#endif + } + f = alloc_file_pseudo(inode, pipe_mnt, "", O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)), &pipefifo_fops); @@ -936,7 +978,7 @@ static int __do_pipe_flags(int *fd, struct file **files, int flags) int error; int fdw, fdr; - if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT)) + if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT | O_NOTIFICATION_PIPE)) return -EINVAL; error = create_pipe_files(files, flags); @@ -1184,42 +1226,12 @@ unsigned int round_pipe_size(unsigned long size) } /* - * Allocate a new array of pipe buffers and copy the info over. Returns the - * pipe size if successful, or return -ERROR on error. + * Resize the pipe ring to a number of slots. */ -static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) +int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots) { struct pipe_buffer *bufs; - unsigned int size, nr_slots, head, tail, mask, n; - unsigned long user_bufs; - long ret = 0; - - size = round_pipe_size(arg); - nr_slots = size >> PAGE_SHIFT; - - if (!nr_slots) - return -EINVAL; - - /* - * If trying to increase the pipe capacity, check that an - * unprivileged user is not trying to exceed various limits - * (soft limit check here, hard limit check just below). - * Decreasing the pipe capacity is always permitted, even - * if the user is currently over a limit. - */ - if (nr_slots > pipe->ring_size && - size > pipe_max_size && !capable(CAP_SYS_RESOURCE)) - return -EPERM; - - user_bufs = account_pipe_buffers(pipe->user, pipe->ring_size, nr_slots); - - if (nr_slots > pipe->ring_size && - (too_many_pipe_buffers_hard(user_bufs) || - too_many_pipe_buffers_soft(user_bufs)) && - is_unprivileged_user()) { - ret = -EPERM; - goto out_revert_acct; - } + unsigned int head, tail, mask, n; /* * We can shrink the pipe, if arg is greater than the ring occupancy. @@ -1231,17 +1243,13 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) head = pipe->head; tail = pipe->tail; n = pipe_occupancy(pipe->head, pipe->tail); - if (nr_slots < n) { - ret = -EBUSY; - goto out_revert_acct; - } + if (nr_slots < n) + return -EBUSY; bufs = kcalloc(nr_slots, sizeof(*bufs), GFP_KERNEL_ACCOUNT | __GFP_NOWARN); - if (unlikely(!bufs)) { - ret = -ENOMEM; - goto out_revert_acct; - } + if (unlikely(!bufs)) + return -ENOMEM; /* * The pipe array wraps around, so just start the new one at zero @@ -1269,16 +1277,68 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) kfree(pipe->bufs); pipe->bufs = bufs; pipe->ring_size = nr_slots; - pipe->max_usage = nr_slots; + if (pipe->max_usage > nr_slots) + pipe->max_usage = nr_slots; pipe->tail = tail; pipe->head = head; /* This might have made more room for writers */ wake_up_interruptible(&pipe->wr_wait); + return 0; +} + +/* + * Allocate a new array of pipe buffers and copy the info over. Returns the + * pipe size if successful, or return -ERROR on error. + */ +static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) +{ + unsigned long user_bufs; + unsigned int nr_slots, size; + long ret = 0; + +#ifdef CONFIG_WATCH_QUEUE + if (pipe->watch_queue) + return -EBUSY; +#endif + + size = round_pipe_size(arg); + nr_slots = size >> PAGE_SHIFT; + + if (!nr_slots) + return -EINVAL; + + /* + * If trying to increase the pipe capacity, check that an + * unprivileged user is not trying to exceed various limits + * (soft limit check here, hard limit check just below). + * Decreasing the pipe capacity is always permitted, even + * if the user is currently over a limit. + */ + if (nr_slots > pipe->max_usage && + size > pipe_max_size && !capable(CAP_SYS_RESOURCE)) + return -EPERM; + + user_bufs = account_pipe_buffers(pipe->user, pipe->nr_accounted, nr_slots); + + if (nr_slots > pipe->max_usage && + (too_many_pipe_buffers_hard(user_bufs) || + too_many_pipe_buffers_soft(user_bufs)) && + pipe_is_unprivileged_user()) { + ret = -EPERM; + goto out_revert_acct; + } + + ret = pipe_resize_ring(pipe, nr_slots); + if (ret < 0) + goto out_revert_acct; + + pipe->max_usage = nr_slots; + pipe->nr_accounted = nr_slots; return pipe->max_usage * PAGE_SIZE; out_revert_acct: - (void) account_pipe_buffers(pipe->user, nr_slots, pipe->ring_size); + (void) account_pipe_buffers(pipe->user, nr_slots, pipe->nr_accounted); return ret; } @@ -1287,9 +1347,17 @@ out_revert_acct: * location, so checking ->i_pipe is not enough to verify that this is a * pipe. */ -struct pipe_inode_info *get_pipe_info(struct file *file) +struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice) { - return file->f_op == &pipefifo_fops ? file->private_data : NULL; + struct pipe_inode_info *pipe = file->private_data; + + if (file->f_op != &pipefifo_fops || !pipe) + return NULL; +#ifdef CONFIG_WATCH_QUEUE + if (for_splice && pipe->watch_queue) + return NULL; +#endif + return pipe; } long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) @@ -1297,7 +1365,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) struct pipe_inode_info *pipe; long ret; - pipe = get_pipe_info(file); + pipe = get_pipe_info(file, false); if (!pipe) return -EBADF; diff --git a/fs/splice.c b/fs/splice.c index fd0a1e7e5959..50f3c0260c00 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1122,8 +1122,8 @@ long do_splice(struct file *in, loff_t __user *off_in, !(out->f_mode & FMODE_WRITE))) return -EBADF; - ipipe = get_pipe_info(in); - opipe = get_pipe_info(out); + ipipe = get_pipe_info(in, true); + opipe = get_pipe_info(out, true); if (ipipe && opipe) { if (off_in || off_out) @@ -1273,7 +1273,7 @@ static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf, static long vmsplice_to_user(struct file *file, struct iov_iter *iter, unsigned int flags) { - struct pipe_inode_info *pipe = get_pipe_info(file); + struct pipe_inode_info *pipe = get_pipe_info(file, true); struct splice_desc sd = { .total_len = iov_iter_count(iter), .flags = flags, @@ -1308,7 +1308,7 @@ static long vmsplice_to_pipe(struct file *file, struct iov_iter *iter, if (flags & SPLICE_F_GIFT) buf_flag = PIPE_BUF_FLAG_GIFT; - pipe = get_pipe_info(file); + pipe = get_pipe_info(file, true); if (!pipe) return -EBADF; @@ -1757,8 +1757,8 @@ static int link_pipe(struct pipe_inode_info *ipipe, static long do_tee(struct file *in, struct file *out, size_t len, unsigned int flags) { - struct pipe_inode_info *ipipe = get_pipe_info(in); - struct pipe_inode_info *opipe = get_pipe_info(out); + struct pipe_inode_info *ipipe = get_pipe_info(in, true); + struct pipe_inode_info *opipe = get_pipe_info(out, true); int ret = -EINVAL; if (unlikely(!(in->f_mode & FMODE_READ) || diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index ae58fad7f1e0..1d3eaa233f4a 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -35,6 +35,7 @@ struct pipe_buffer { * @tail: The point of buffer consumption * @max_usage: The maximum number of slots that may be used in the ring * @ring_size: total number of buffers (should be a power of 2) + * @nr_accounted: The amount this pipe accounts for in user->pipe_bufs * @tmp_page: cached released page * @readers: number of current readers of this pipe * @writers: number of current writers of this pipe @@ -45,6 +46,7 @@ struct pipe_buffer { * @fasync_writers: writer side fasync * @bufs: the circular array of pipe buffers * @user: the user who created this pipe + * @watch_queue: If this pipe is a watch_queue, this is the stuff for that **/ struct pipe_inode_info { struct mutex mutex; @@ -53,6 +55,7 @@ struct pipe_inode_info { unsigned int tail; unsigned int max_usage; unsigned int ring_size; + unsigned int nr_accounted; unsigned int readers; unsigned int writers; unsigned int files; @@ -63,6 +66,9 @@ struct pipe_inode_info { struct fasync_struct *fasync_writers; struct pipe_buffer *bufs; struct user_struct *user; +#ifdef CONFIG_WATCH_QUEUE + struct watch_queue *watch_queue; +#endif }; /* @@ -237,9 +243,20 @@ void pipe_buf_mark_unmergeable(struct pipe_buffer *buf); extern const struct pipe_buf_operations nosteal_pipe_buf_ops; +#ifdef CONFIG_WATCH_QUEUE +unsigned long account_pipe_buffers(struct user_struct *user, + unsigned long old, unsigned long new); +bool too_many_pipe_buffers_soft(unsigned long user_bufs); +bool too_many_pipe_buffers_hard(unsigned long user_bufs); +bool pipe_is_unprivileged_user(void); +#endif + /* for F_SETPIPE_SZ and F_GETPIPE_SZ */ +#ifdef CONFIG_WATCH_QUEUE +int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots); +#endif long pipe_fcntl(struct file *, unsigned int, unsigned long arg); -struct pipe_inode_info *get_pipe_info(struct file *file); +struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice); int create_pipe_files(struct file **, int); unsigned int round_pipe_size(unsigned long size); diff --git a/include/linux/watch_queue.h b/include/linux/watch_queue.h new file mode 100644 index 000000000000..5e08db2adc31 --- /dev/null +++ b/include/linux/watch_queue.h @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-2.0 +/* User-mappable watch queue + * + * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * See Documentation/watch_queue.rst + */ + +#ifndef _LINUX_WATCH_QUEUE_H +#define _LINUX_WATCH_QUEUE_H + +#include +#include +#include + +#ifdef CONFIG_WATCH_QUEUE + +struct cred; + +struct watch_type_filter { + enum watch_notification_type type; + __u32 subtype_filter[1]; /* Bitmask of subtypes to filter on */ + __u32 info_filter; /* Filter on watch_notification::info */ + __u32 info_mask; /* Mask of relevant bits in info_filter */ +}; + +struct watch_filter { + union { + struct rcu_head rcu; + unsigned long type_filter[2]; /* Bitmask of accepted types */ + }; + u32 nr_filters; /* Number of filters */ + struct watch_type_filter filters[]; +}; + +struct watch_queue { + struct rcu_head rcu; + struct watch_filter __rcu *filter; + struct pipe_inode_info *pipe; /* The pipe we're using as a buffer */ + struct hlist_head watches; /* Contributory watches */ + struct page **notes; /* Preallocated notifications */ + unsigned long *notes_bitmap; /* Allocation bitmap for notes */ + struct kref usage; /* Object usage count */ + spinlock_t lock; + unsigned int nr_notes; /* Number of notes */ + unsigned int nr_pages; /* Number of pages in notes[] */ + bool defunct; /* T when queues closed */ +}; + +/* + * Representation of a watch on an object. + */ +struct watch { + union { + struct rcu_head rcu; + u32 info_id; /* ID to be OR'd in to info field */ + }; + struct watch_queue __rcu *queue; /* Queue to post events to */ + struct hlist_node queue_node; /* Link in queue->watches */ + struct watch_list __rcu *watch_list; + struct hlist_node list_node; /* Link in watch_list->watchers */ + const struct cred *cred; /* Creds of the owner of the watch */ + void *private; /* Private data for the watched object */ + u64 id; /* Internal identifier */ + struct kref usage; /* Object usage count */ +}; + +/* + * List of watches on an object. + */ +struct watch_list { + struct rcu_head rcu; + struct hlist_head watchers; + void (*release_watch)(struct watch *); + spinlock_t lock; +}; + +extern void __post_watch_notification(struct watch_list *, + struct watch_notification *, + const struct cred *, + u64); +extern struct watch_queue *get_watch_queue(int); +extern void put_watch_queue(struct watch_queue *); +extern void init_watch(struct watch *, struct watch_queue *); +extern int add_watch_to_object(struct watch *, struct watch_list *); +extern int remove_watch_from_object(struct watch_list *, struct watch_queue *, u64, bool); +extern long watch_queue_set_size(struct pipe_inode_info *, unsigned int); +extern long watch_queue_set_filter(struct pipe_inode_info *, + struct watch_notification_filter __user *); +extern int watch_queue_init(struct pipe_inode_info *); +extern void watch_queue_clear(struct watch_queue *); + +static inline void init_watch_list(struct watch_list *wlist, + void (*release_watch)(struct watch *)) +{ + INIT_HLIST_HEAD(&wlist->watchers); + spin_lock_init(&wlist->lock); + wlist->release_watch = release_watch; +} + +static inline void post_watch_notification(struct watch_list *wlist, + struct watch_notification *n, + const struct cred *cred, + u64 id) +{ + if (unlikely(wlist)) + __post_watch_notification(wlist, n, cred, id); +} + +static inline void remove_watch_list(struct watch_list *wlist, u64 id) +{ + if (wlist) { + remove_watch_from_object(wlist, NULL, id, true); + kfree_rcu(wlist, rcu); + } +} + +/** + * watch_sizeof - Calculate the information part of the size of a watch record, + * given the structure size. + */ +#define watch_sizeof(STRUCT) (sizeof(STRUCT) << WATCH_INFO_LENGTH__SHIFT) + +#endif + +#endif /* _LINUX_WATCH_QUEUE_H */ diff --git a/include/uapi/linux/watch_queue.h b/include/uapi/linux/watch_queue.h index 9df72227f515..3a5790f1f05d 100644 --- a/include/uapi/linux/watch_queue.h +++ b/include/uapi/linux/watch_queue.h @@ -4,9 +4,13 @@ #include #include +#include #define O_NOTIFICATION_PIPE O_EXCL /* Parameter to pipe2() selecting notification pipe */ +#define IOC_WATCH_QUEUE_SET_SIZE _IO('W', 0x60) /* Set the size in pages */ +#define IOC_WATCH_QUEUE_SET_FILTER _IO('W', 0x61) /* Set the filter */ + enum watch_notification_type { WATCH_TYPE_META = 0, /* Special record */ WATCH_TYPE__NR = 1 @@ -41,6 +45,22 @@ struct watch_notification { #define WATCH_INFO_FLAG_7 0x00800000 }; +/* + * Notification filtering rules (IOC_WATCH_QUEUE_SET_FILTER). + */ +struct watch_notification_type_filter { + __u32 type; /* Type to apply filter to */ + __u32 info_filter; /* Filter on watch_notification::info */ + __u32 info_mask; /* Mask of relevant bits in info_filter */ + __u32 subtype_filter[8]; /* Bitmask of subtypes to filter on */ +}; + +struct watch_notification_filter { + __u32 nr_filters; /* Number of filters */ + __u32 __reserved; /* Must be 0 */ + struct watch_notification_type_filter filters[]; +}; + /* * Extended watch removal notification. This is used optionally if the type diff --git a/init/Kconfig b/init/Kconfig index 74a5ac65644f..c95a2a5654a9 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -326,6 +326,18 @@ config POSIX_MQUEUE_SYSCTL depends on SYSCTL default y +config WATCH_QUEUE + bool "General notification queue" + default n + help + + This is a general notification queue for the kernel to pass events to + userspace by splicing them into pipes. It can be used in conjunction + with watches for key/keyring change notifications and device + notifications. + + See Documentation/watch_queue.rst + config CROSS_MEMORY_ATTACH bool "Enable process_vm_readv/writev syscalls" depends on MMU diff --git a/kernel/Makefile b/kernel/Makefile index 4cb4130ced32..41e7e3ae07ec 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -115,6 +115,7 @@ obj-$(CONFIG_TORTURE_TEST) += torture.o obj-$(CONFIG_HAS_IOMEM) += iomem.o obj-$(CONFIG_RSEQ) += rseq.o +obj-$(CONFIG_WATCH_QUEUE) += watch_queue.o obj-$(CONFIG_SYSCTL_KUNIT_TEST) += sysctl-test.o diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c new file mode 100644 index 000000000000..c103e34f8705 --- /dev/null +++ b/kernel/watch_queue.c @@ -0,0 +1,657 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Watch queue and general notification mechanism, built on pipes + * + * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * See Documentation/watch_queue.rst + */ + +#define pr_fmt(fmt) "watchq: " fmt +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MODULE_DESCRIPTION("Watch queue"); +MODULE_AUTHOR("Red Hat, Inc."); +MODULE_LICENSE("GPL"); + +#define WATCH_QUEUE_NOTE_SIZE 128 +#define WATCH_QUEUE_NOTES_PER_PAGE (PAGE_SIZE / WATCH_QUEUE_NOTE_SIZE) + +static void watch_queue_pipe_buf_release(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + struct watch_queue *wqueue = (struct watch_queue *)buf->private; + struct page *page; + unsigned int bit; + + /* We need to work out which note within the page this refers to, but + * the note might have been maximum size, so merely ANDing the offset + * off doesn't work. OTOH, the note must've been more than zero size. + */ + bit = buf->offset + buf->len; + if ((bit & (WATCH_QUEUE_NOTE_SIZE - 1)) == 0) + bit -= WATCH_QUEUE_NOTE_SIZE; + bit /= WATCH_QUEUE_NOTE_SIZE; + + page = buf->page; + bit += page->index; + + set_bit(bit, wqueue->notes_bitmap); +} + +static int watch_queue_pipe_buf_steal(struct pipe_inode_info *pipe, + struct pipe_buffer *buf) +{ + return -1; /* No. */ +} + +/* New data written to a pipe may be appended to a buffer with this type. */ +static const struct pipe_buf_operations watch_queue_pipe_buf_ops = { + .confirm = generic_pipe_buf_confirm, + .release = watch_queue_pipe_buf_release, + .steal = watch_queue_pipe_buf_steal, + .get = generic_pipe_buf_get, +}; + +/* + * Post a notification to a watch queue. + */ +static bool post_one_notification(struct watch_queue *wqueue, + struct watch_notification *n) +{ + void *p; + struct pipe_inode_info *pipe = wqueue->pipe; + struct pipe_buffer *buf; + struct page *page; + unsigned int head, tail, mask, note, offset, len; + bool done = false; + + if (!pipe) + return false; + + spin_lock_irq(&pipe->rd_wait.lock); + + if (wqueue->defunct) + goto out; + + mask = pipe->ring_size - 1; + head = pipe->head; + tail = pipe->tail; + if (pipe_full(head, tail, pipe->ring_size)) + goto lost; + + note = find_first_bit(wqueue->notes_bitmap, wqueue->nr_notes); + if (note >= wqueue->nr_notes) + goto lost; + + page = wqueue->notes[note / WATCH_QUEUE_NOTES_PER_PAGE]; + offset = note % WATCH_QUEUE_NOTES_PER_PAGE * WATCH_QUEUE_NOTE_SIZE; + get_page(page); + len = n->info & WATCH_INFO_LENGTH; + p = kmap_atomic(page); + memcpy(p + offset, n, len); + kunmap_atomic(p); + + buf = &pipe->bufs[head & mask]; + buf->page = page; + buf->private = (unsigned long)wqueue; + buf->ops = &watch_queue_pipe_buf_ops; + buf->offset = offset; + buf->len = len; + buf->flags = 0; + pipe->head = head + 1; + + if (!test_and_clear_bit(note, wqueue->notes_bitmap)) { + spin_unlock_irq(&pipe->rd_wait.lock); + BUG(); + } + wake_up_interruptible_sync_poll_locked(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); + done = true; + +out: + spin_unlock_irq(&pipe->rd_wait.lock); + if (done) + kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); + return done; + +lost: + goto out; +} + +/* + * Apply filter rules to a notification. + */ +static bool filter_watch_notification(const struct watch_filter *wf, + const struct watch_notification *n) +{ + const struct watch_type_filter *wt; + unsigned int st_bits = sizeof(wt->subtype_filter[0]) * 8; + unsigned int st_index = n->subtype / st_bits; + unsigned int st_bit = 1U << (n->subtype % st_bits); + int i; + + if (!test_bit(n->type, wf->type_filter)) + return false; + + for (i = 0; i < wf->nr_filters; i++) { + wt = &wf->filters[i]; + if (n->type == wt->type && + (wt->subtype_filter[st_index] & st_bit) && + (n->info & wt->info_mask) == wt->info_filter) + return true; + } + + return false; /* If there is a filter, the default is to reject. */ +} + +/** + * __post_watch_notification - Post an event notification + * @wlist: The watch list to post the event to. + * @n: The notification record to post. + * @cred: The creds of the process that triggered the notification. + * @id: The ID to match on the watch. + * + * Post a notification of an event into a set of watch queues and let the users + * know. + * + * The size of the notification should be set in n->info & WATCH_INFO_LENGTH and + * should be in units of sizeof(*n). + */ +void __post_watch_notification(struct watch_list *wlist, + struct watch_notification *n, + const struct cred *cred, + u64 id) +{ + const struct watch_filter *wf; + struct watch_queue *wqueue; + struct watch *watch; + + if (((n->info & WATCH_INFO_LENGTH) >> WATCH_INFO_LENGTH__SHIFT) == 0) { + WARN_ON(1); + return; + } + + rcu_read_lock(); + + hlist_for_each_entry_rcu(watch, &wlist->watchers, list_node) { + if (watch->id != id) + continue; + n->info &= ~WATCH_INFO_ID; + n->info |= watch->info_id; + + wqueue = rcu_dereference(watch->queue); + wf = rcu_dereference(wqueue->filter); + if (wf && !filter_watch_notification(wf, n)) + continue; + + if (security_post_notification(watch->cred, cred, n) < 0) + continue; + + post_one_notification(wqueue, n); + } + + rcu_read_unlock(); +} +EXPORT_SYMBOL(__post_watch_notification); + +/* + * Allocate sufficient pages to preallocation for the requested number of + * notifications. + */ +long watch_queue_set_size(struct pipe_inode_info *pipe, unsigned int nr_notes) +{ + struct watch_queue *wqueue = pipe->watch_queue; + struct page **pages; + unsigned long *bitmap; + unsigned long user_bufs; + unsigned int bmsize; + int ret, i, nr_pages; + + if (!wqueue) + return -ENODEV; + if (wqueue->notes) + return -EBUSY; + + if (nr_notes < 1 || + nr_notes > 512) /* TODO: choose a better hard limit */ + return -EINVAL; + + nr_pages = (nr_notes + WATCH_QUEUE_NOTES_PER_PAGE - 1); + nr_pages /= WATCH_QUEUE_NOTES_PER_PAGE; + user_bufs = account_pipe_buffers(pipe->user, pipe->nr_accounted, nr_pages); + + if (nr_pages > pipe->max_usage && + (too_many_pipe_buffers_hard(user_bufs) || + too_many_pipe_buffers_soft(user_bufs)) && + pipe_is_unprivileged_user()) { + ret = -EPERM; + goto error; + } + + ret = pipe_resize_ring(pipe, nr_notes); + if (ret < 0) + goto error; + + pages = kcalloc(sizeof(struct page *), nr_pages, GFP_KERNEL); + if (!pages) + goto error; + + for (i = 0; i < nr_pages; i++) { + pages[i] = alloc_page(GFP_KERNEL); + if (!pages[i]) + goto error_p; + pages[i]->index = i * WATCH_QUEUE_NOTES_PER_PAGE; + } + + bmsize = (nr_notes + BITS_PER_LONG - 1) / BITS_PER_LONG; + bmsize *= sizeof(unsigned long); + bitmap = kmalloc(bmsize, GFP_KERNEL); + if (!bitmap) + goto error_p; + + memset(bitmap, 0xff, bmsize); + wqueue->notes = pages; + wqueue->notes_bitmap = bitmap; + wqueue->nr_pages = nr_pages; + wqueue->nr_notes = nr_pages * WATCH_QUEUE_NOTES_PER_PAGE; + return 0; + +error_p: + for (i = 0; i < nr_pages; i++) + __free_page(pages[i]); + kfree(pages); +error: + (void) account_pipe_buffers(pipe->user, nr_pages, pipe->nr_accounted); + return ret; +} + +/* + * Set the filter on a watch queue. + */ +long watch_queue_set_filter(struct pipe_inode_info *pipe, + struct watch_notification_filter __user *_filter) +{ + struct watch_notification_type_filter *tf; + struct watch_notification_filter filter; + struct watch_type_filter *q; + struct watch_filter *wfilter; + struct watch_queue *wqueue = pipe->watch_queue; + int ret, nr_filter = 0, i; + + if (!wqueue) + return -ENODEV; + + if (!_filter) { + /* Remove the old filter */ + wfilter = NULL; + goto set; + } + + /* Grab the user's filter specification */ + if (copy_from_user(&filter, _filter, sizeof(filter)) != 0) + return -EFAULT; + if (filter.nr_filters == 0 || + filter.nr_filters > 16 || + filter.__reserved != 0) + return -EINVAL; + + tf = memdup_user(_filter->filters, filter.nr_filters * sizeof(*tf)); + if (IS_ERR(tf)) + return PTR_ERR(tf); + + ret = -EINVAL; + for (i = 0; i < filter.nr_filters; i++) { + if ((tf[i].info_filter & ~tf[i].info_mask) || + tf[i].info_mask & WATCH_INFO_LENGTH) + goto err_filter; + /* Ignore any unknown types */ + if (tf[i].type >= sizeof(wfilter->type_filter) * 8) + continue; + nr_filter++; + } + + /* Now we need to build the internal filter from only the relevant + * user-specified filters. + */ + ret = -ENOMEM; + wfilter = kzalloc(struct_size(wfilter, filters, nr_filter), GFP_KERNEL); + if (!wfilter) + goto err_filter; + wfilter->nr_filters = nr_filter; + + q = wfilter->filters; + for (i = 0; i < filter.nr_filters; i++) { + if (tf[i].type >= sizeof(wfilter->type_filter) * BITS_PER_LONG) + continue; + + q->type = tf[i].type; + q->info_filter = tf[i].info_filter; + q->info_mask = tf[i].info_mask; + q->subtype_filter[0] = tf[i].subtype_filter[0]; + __set_bit(q->type, wfilter->type_filter); + q++; + } + + kfree(tf); +set: + pipe_lock(pipe); + wfilter = rcu_replace_pointer(wqueue->filter, wfilter, + lockdep_is_held(&pipe->mutex)); + pipe_unlock(pipe); + if (wfilter) + kfree_rcu(wfilter, rcu); + return 0; + +err_filter: + kfree(tf); + return ret; +} + +static void __put_watch_queue(struct kref *kref) +{ + struct watch_queue *wqueue = + container_of(kref, struct watch_queue, usage); + struct watch_filter *wfilter; + int i; + + for (i = 0; i < wqueue->nr_pages; i++) + __free_page(wqueue->notes[i]); + + wfilter = rcu_access_pointer(wqueue->filter); + if (wfilter) + kfree_rcu(wfilter, rcu); + kfree_rcu(wqueue, rcu); +} + +/** + * put_watch_queue - Dispose of a ref on a watchqueue. + * @wqueue: The watch queue to unref. + */ +void put_watch_queue(struct watch_queue *wqueue) +{ + kref_put(&wqueue->usage, __put_watch_queue); +} +EXPORT_SYMBOL(put_watch_queue); + +static void free_watch(struct rcu_head *rcu) +{ + struct watch *watch = container_of(rcu, struct watch, rcu); + + put_watch_queue(rcu_access_pointer(watch->queue)); + put_cred(watch->cred); +} + +static void __put_watch(struct kref *kref) +{ + struct watch *watch = container_of(kref, struct watch, usage); + + call_rcu(&watch->rcu, free_watch); +} + +/* + * Discard a watch. + */ +static void put_watch(struct watch *watch) +{ + kref_put(&watch->usage, __put_watch); +} + +/** + * init_watch_queue - Initialise a watch + * @watch: The watch to initialise. + * @wqueue: The queue to assign. + * + * Initialise a watch and set the watch queue. + */ +void init_watch(struct watch *watch, struct watch_queue *wqueue) +{ + kref_init(&watch->usage); + INIT_HLIST_NODE(&watch->list_node); + INIT_HLIST_NODE(&watch->queue_node); + rcu_assign_pointer(watch->queue, wqueue); +} + +/** + * add_watch_to_object - Add a watch on an object to a watch list + * @watch: The watch to add + * @wlist: The watch list to add to + * + * @watch->queue must have been set to point to the queue to post notifications + * to and the watch list of the object to be watched. @watch->cred must also + * have been set to the appropriate credentials and a ref taken on them. + * + * The caller must pin the queue and the list both and must hold the list + * locked against racing watch additions/removals. + */ +int add_watch_to_object(struct watch *watch, struct watch_list *wlist) +{ + struct watch_queue *wqueue = rcu_access_pointer(watch->queue); + struct watch *w; + + hlist_for_each_entry(w, &wlist->watchers, list_node) { + struct watch_queue *wq = rcu_access_pointer(w->queue); + if (wqueue == wq && watch->id == w->id) + return -EBUSY; + } + + watch->cred = get_current_cred(); + rcu_assign_pointer(watch->watch_list, wlist); + + spin_lock_bh(&wqueue->lock); + kref_get(&wqueue->usage); + kref_get(&watch->usage); + hlist_add_head(&watch->queue_node, &wqueue->watches); + spin_unlock_bh(&wqueue->lock); + + hlist_add_head(&watch->list_node, &wlist->watchers); + return 0; +} +EXPORT_SYMBOL(add_watch_to_object); + +/** + * remove_watch_from_object - Remove a watch or all watches from an object. + * @wlist: The watch list to remove from + * @wq: The watch queue of interest (ignored if @all is true) + * @id: The ID of the watch to remove (ignored if @all is true) + * @all: True to remove all objects + * + * Remove a specific watch or all watches from an object. A notification is + * sent to the watcher to tell them that this happened. + */ +int remove_watch_from_object(struct watch_list *wlist, struct watch_queue *wq, + u64 id, bool all) +{ + struct watch_notification_removal n; + struct watch_queue *wqueue; + struct watch *watch; + int ret = -EBADSLT; + + rcu_read_lock(); + +again: + spin_lock(&wlist->lock); + hlist_for_each_entry(watch, &wlist->watchers, list_node) { + if (all || + (watch->id == id && rcu_access_pointer(watch->queue) == wq)) + goto found; + } + spin_unlock(&wlist->lock); + goto out; + +found: + ret = 0; + hlist_del_init_rcu(&watch->list_node); + rcu_assign_pointer(watch->watch_list, NULL); + spin_unlock(&wlist->lock); + + /* We now own the reference on watch that used to belong to wlist. */ + + n.watch.type = WATCH_TYPE_META; + n.watch.subtype = WATCH_META_REMOVAL_NOTIFICATION; + n.watch.info = watch->info_id | watch_sizeof(n.watch); + n.id = id; + if (id != 0) + n.watch.info = watch->info_id | watch_sizeof(n); + + wqueue = rcu_dereference(watch->queue); + + /* We don't need the watch list lock for the next bit as RCU is + * protecting *wqueue from deallocation. + */ + if (wqueue) { + post_one_notification(wqueue, &n.watch); + + spin_lock_bh(&wqueue->lock); + + if (!hlist_unhashed(&watch->queue_node)) { + hlist_del_init_rcu(&watch->queue_node); + put_watch(watch); + } + + spin_unlock_bh(&wqueue->lock); + } + + if (wlist->release_watch) { + void (*release_watch)(struct watch *); + + release_watch = wlist->release_watch; + rcu_read_unlock(); + (*release_watch)(watch); + rcu_read_lock(); + } + put_watch(watch); + + if (all && !hlist_empty(&wlist->watchers)) + goto again; +out: + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL(remove_watch_from_object); + +/* + * Remove all the watches that are contributory to a queue. This has the + * potential to race with removal of the watches by the destruction of the + * objects being watched or with the distribution of notifications. + */ +void watch_queue_clear(struct watch_queue *wqueue) +{ + struct watch_list *wlist; + struct watch *watch; + bool release; + + rcu_read_lock(); + spin_lock_bh(&wqueue->lock); + + /* Prevent new additions and prevent notifications from happening */ + wqueue->defunct = true; + + while (!hlist_empty(&wqueue->watches)) { + watch = hlist_entry(wqueue->watches.first, struct watch, queue_node); + hlist_del_init_rcu(&watch->queue_node); + /* We now own a ref on the watch. */ + spin_unlock_bh(&wqueue->lock); + + /* We can't do the next bit under the queue lock as we need to + * get the list lock - which would cause a deadlock if someone + * was removing from the opposite direction at the same time or + * posting a notification. + */ + wlist = rcu_dereference(watch->watch_list); + if (wlist) { + void (*release_watch)(struct watch *); + + spin_lock(&wlist->lock); + + release = !hlist_unhashed(&watch->list_node); + if (release) { + hlist_del_init_rcu(&watch->list_node); + rcu_assign_pointer(watch->watch_list, NULL); + + /* We now own a second ref on the watch. */ + } + + release_watch = wlist->release_watch; + spin_unlock(&wlist->lock); + + if (release) { + if (release_watch) { + rcu_read_unlock(); + /* This might need to call dput(), so + * we have to drop all the locks. + */ + (*release_watch)(watch); + rcu_read_lock(); + } + put_watch(watch); + } + } + + put_watch(watch); + spin_lock_bh(&wqueue->lock); + } + + spin_unlock_bh(&wqueue->lock); + rcu_read_unlock(); +} + +/** + * get_watch_queue - Get a watch queue from its file descriptor. + * @fd: The fd to query. + */ +struct watch_queue *get_watch_queue(int fd) +{ + struct pipe_inode_info *pipe; + struct watch_queue *wqueue = ERR_PTR(-EINVAL); + struct fd f; + + f = fdget(fd); + if (f.file) { + pipe = get_pipe_info(f.file, false); + if (pipe && pipe->watch_queue) { + wqueue = pipe->watch_queue; + kref_get(&wqueue->usage); + } + fdput(f); + } + + return wqueue; +} +EXPORT_SYMBOL(get_watch_queue); + +/* + * Initialise a watch queue + */ +int watch_queue_init(struct pipe_inode_info *pipe) +{ + struct watch_queue *wqueue; + + wqueue = kzalloc(sizeof(*wqueue), GFP_KERNEL); + if (!wqueue) + return -ENOMEM; + + wqueue->pipe = pipe; + kref_init(&wqueue->usage); + spin_lock_init(&wqueue->lock); + INIT_HLIST_HEAD(&wqueue->watches); + + pipe->watch_queue = wqueue; + return 0; +} -- cgit v1.3 From 998f50407ffc9370565c7ed3fcd1366adccdfbbf Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 12 Feb 2020 13:58:35 +0000 Subject: security: Add hooks to rule on setting a watch Add security hooks that will allow an LSM to rule on whether or not a watch may be set. More than one hook is required as the watches watch different types of object. Signed-off-by: David Howells Acked-by: James Morris cc: Casey Schaufler cc: Stephen Smalley cc: linux-security-module@vger.kernel.org --- include/linux/lsm_hook_defs.h | 4 ++++ include/linux/lsm_hooks.h | 5 +++++ include/linux/security.h | 9 +++++++++ security/security.c | 7 +++++++ 4 files changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index e0def45b5cc5..a54f49e95708 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -256,6 +256,10 @@ LSM_HOOK(int, 0, inode_getsecctx, struct inode *inode, void **ctx, #if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE) LSM_HOOK(int, 0, post_notification, const struct cred *w_cred, const struct cred *cred, struct watch_notification *n) +#endif /* CONFIG_SECURITY && CONFIG_WATCH_QUEUE */ + +#if defined(CONFIG_SECURITY) && defined(CONFIG_KEY_NOTIFICATIONS) +LSM_HOOK(int, 0, watch_key, struct key *key) #endif /* CONFIG_SECURITY && CONFIG_KEY_NOTIFICATIONS */ #ifdef CONFIG_SECURITY_NETWORK diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 0b5e5769b836..3f1374cffb76 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1446,6 +1446,11 @@ * @cred: The event-triggerer's credentials * @n: The notification being posted * + * @watch_key: + * Check to see if a process is allowed to watch for event notifications + * from a key or keyring. + * @key: The key to watch. + * * Security hooks for using the eBPF maps and programs functionalities through * eBPF syscalls. * diff --git a/include/linux/security.h b/include/linux/security.h index 9a5d12ab491b..e7914e4e0b02 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1290,6 +1290,15 @@ static inline int security_post_notification(const struct cred *w_cred, } #endif +#if defined(CONFIG_SECURITY) && defined(CONFIG_KEY_NOTIFICATIONS) +int security_watch_key(struct key *key); +#else +static inline int security_watch_key(struct key *key) +{ + return 0; +} +#endif + #ifdef CONFIG_SECURITY_NETWORK int security_unix_stream_connect(struct sock *sock, struct sock *other, struct sock *newsk); diff --git a/security/security.c b/security/security.c index 7d55607120b4..c73334ab2882 100644 --- a/security/security.c +++ b/security/security.c @@ -2016,6 +2016,13 @@ int security_post_notification(const struct cred *w_cred, } #endif /* CONFIG_WATCH_QUEUE */ +#ifdef CONFIG_KEY_NOTIFICATIONS +int security_watch_key(struct key *key) +{ + return call_int_hook(watch_key, 0, key); +} +#endif + #ifdef CONFIG_SECURITY_NETWORK int security_unix_stream_connect(struct sock *sock, struct sock *other, struct sock *newsk) -- cgit v1.3 From f7e47677e39a03057dcced2016c92a9c868693ec Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 14 Jan 2020 17:07:11 +0000 Subject: watch_queue: Add a key/keyring notification facility Add a key/keyring change notification facility whereby notifications about changes in key and keyring content and attributes can be received. Firstly, an event queue needs to be created: pipe2(fds, O_NOTIFICATION_PIPE); ioctl(fds[1], IOC_WATCH_QUEUE_SET_SIZE, 256); then a notification can be set up to report notifications via that queue: struct watch_notification_filter filter = { .nr_filters = 1, .filters = { [0] = { .type = WATCH_TYPE_KEY_NOTIFY, .subtype_filter[0] = UINT_MAX, }, }, }; ioctl(fds[1], IOC_WATCH_QUEUE_SET_FILTER, &filter); keyctl_watch_key(KEY_SPEC_SESSION_KEYRING, fds[1], 0x01); After that, records will be placed into the queue when events occur in which keys are changed in some way. Records are of the following format: struct key_notification { struct watch_notification watch; __u32 key_id; __u32 aux; } *n; Where: n->watch.type will be WATCH_TYPE_KEY_NOTIFY. n->watch.subtype will indicate the type of event, such as NOTIFY_KEY_REVOKED. n->watch.info & WATCH_INFO_LENGTH will indicate the length of the record. n->watch.info & WATCH_INFO_ID will be the second argument to keyctl_watch_key(), shifted. n->key will be the ID of the affected key. n->aux will hold subtype-dependent information, such as the key being linked into the keyring specified by n->key in the case of NOTIFY_KEY_LINKED. Note that it is permissible for event records to be of variable length - or, at least, the length may be dependent on the subtype. Note also that the queue can be shared between multiple notifications of various types. Signed-off-by: David Howells Reviewed-by: James Morris --- Documentation/security/keys/core.rst | 57 +++++++++++++++++++++ include/linux/key.h | 3 ++ include/uapi/linux/keyctl.h | 2 + include/uapi/linux/watch_queue.h | 28 +++++++++- security/keys/Kconfig | 9 ++++ security/keys/compat.c | 3 ++ security/keys/gc.c | 5 ++ security/keys/internal.h | 30 ++++++++++- security/keys/key.c | 38 +++++++++----- security/keys/keyctl.c | 99 ++++++++++++++++++++++++++++++++++-- security/keys/keyring.c | 20 +++++--- security/keys/request_key.c | 4 +- 12 files changed, 270 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/Documentation/security/keys/core.rst b/Documentation/security/keys/core.rst index d9b0b859018b..6cff2b5f88ed 100644 --- a/Documentation/security/keys/core.rst +++ b/Documentation/security/keys/core.rst @@ -1026,6 +1026,63 @@ The keyctl syscall functions are: written into the output buffer. Verification returns 0 on success. + * Watch a key or keyring for changes:: + + long keyctl(KEYCTL_WATCH_KEY, key_serial_t key, int queue_fd, + const struct watch_notification_filter *filter); + + This will set or remove a watch for changes on the specified key or + keyring. + + "key" is the ID of the key to be watched. + + "queue_fd" is a file descriptor referring to an open "/dev/watch_queue" + which manages the buffer into which notifications will be delivered. + + "filter" is either NULL to remove a watch or a filter specification to + indicate what events are required from the key. + + See Documentation/watch_queue.rst for more information. + + Note that only one watch may be emplaced for any particular { key, + queue_fd } combination. + + Notification records look like:: + + struct key_notification { + struct watch_notification watch; + __u32 key_id; + __u32 aux; + }; + + In this, watch::type will be "WATCH_TYPE_KEY_NOTIFY" and subtype will be + one of:: + + NOTIFY_KEY_INSTANTIATED + NOTIFY_KEY_UPDATED + NOTIFY_KEY_LINKED + NOTIFY_KEY_UNLINKED + NOTIFY_KEY_CLEARED + NOTIFY_KEY_REVOKED + NOTIFY_KEY_INVALIDATED + NOTIFY_KEY_SETATTR + + Where these indicate a key being instantiated/rejected, updated, a link + being made in a keyring, a link being removed from a keyring, a keyring + being cleared, a key being revoked, a key being invalidated or a key + having one of its attributes changed (user, group, perm, timeout, + restriction). + + If a watched key is deleted, a basic watch_notification will be issued + with "type" set to WATCH_TYPE_META and "subtype" set to + watch_meta_removal_notification. The watchpoint ID will be set in the + "info" field. + + This needs to be configured by enabling: + + "Provide key/keyring change notifications" (KEY_NOTIFICATIONS) + + Kernel Services =============== diff --git a/include/linux/key.h b/include/linux/key.h index 6cf8e71cf8b7..b99b40db08fc 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -176,6 +176,9 @@ struct key { struct list_head graveyard_link; struct rb_node serial_node; }; +#ifdef CONFIG_KEY_NOTIFICATIONS + struct watch_list *watchers; /* Entities watching this key for changes */ +#endif struct rw_semaphore sem; /* change vs change sem */ struct key_user *user; /* owner of this key */ void *security; /* security data for this key */ diff --git a/include/uapi/linux/keyctl.h b/include/uapi/linux/keyctl.h index ed3d5893830d..4c8884eea808 100644 --- a/include/uapi/linux/keyctl.h +++ b/include/uapi/linux/keyctl.h @@ -69,6 +69,7 @@ #define KEYCTL_RESTRICT_KEYRING 29 /* Restrict keys allowed to link to a keyring */ #define KEYCTL_MOVE 30 /* Move keys between keyrings */ #define KEYCTL_CAPABILITIES 31 /* Find capabilities of keyrings subsystem */ +#define KEYCTL_WATCH_KEY 32 /* Watch a key or ring of keys for changes */ /* keyctl structures */ struct keyctl_dh_params { @@ -130,5 +131,6 @@ struct keyctl_pkey_params { #define KEYCTL_CAPS0_MOVE 0x80 /* KEYCTL_MOVE supported */ #define KEYCTL_CAPS1_NS_KEYRING_NAME 0x01 /* Keyring names are per-user_namespace */ #define KEYCTL_CAPS1_NS_KEY_TAG 0x02 /* Key indexing can include a namespace tag */ +#define KEYCTL_CAPS1_NOTIFICATIONS 0x04 /* Keys generate watchable notifications */ #endif /* _LINUX_KEYCTL_H */ diff --git a/include/uapi/linux/watch_queue.h b/include/uapi/linux/watch_queue.h index 3a5790f1f05d..c3d8320b5d3a 100644 --- a/include/uapi/linux/watch_queue.h +++ b/include/uapi/linux/watch_queue.h @@ -13,7 +13,8 @@ enum watch_notification_type { WATCH_TYPE_META = 0, /* Special record */ - WATCH_TYPE__NR = 1 + WATCH_TYPE_KEY_NOTIFY = 1, /* Key change event notification */ + WATCH_TYPE__NR = 2 }; enum watch_meta_notification_subtype { @@ -75,4 +76,29 @@ struct watch_notification_removal { __u64 id; /* Type-dependent identifier */ }; +/* + * Type of key/keyring change notification. + */ +enum key_notification_subtype { + NOTIFY_KEY_INSTANTIATED = 0, /* Key was instantiated (aux is error code) */ + NOTIFY_KEY_UPDATED = 1, /* Key was updated */ + NOTIFY_KEY_LINKED = 2, /* Key (aux) was added to watched keyring */ + NOTIFY_KEY_UNLINKED = 3, /* Key (aux) was removed from watched keyring */ + NOTIFY_KEY_CLEARED = 4, /* Keyring was cleared */ + NOTIFY_KEY_REVOKED = 5, /* Key was revoked */ + NOTIFY_KEY_INVALIDATED = 6, /* Key was invalidated */ + NOTIFY_KEY_SETATTR = 7, /* Key's attributes got changed */ +}; + +/* + * Key/keyring notification record. + * - watch.type = WATCH_TYPE_KEY_NOTIFY + * - watch.subtype = enum key_notification_type + */ +struct key_notification { + struct watch_notification watch; + __u32 key_id; /* The key/keyring affected */ + __u32 aux; /* Per-type auxiliary data */ +}; + #endif /* _UAPI_LINUX_WATCH_QUEUE_H */ diff --git a/security/keys/Kconfig b/security/keys/Kconfig index 47c041563d41..d4dc5ea208af 100644 --- a/security/keys/Kconfig +++ b/security/keys/Kconfig @@ -116,3 +116,12 @@ config KEY_DH_OPERATIONS in the kernel. If you are unsure as to whether this is required, answer N. + +config KEY_NOTIFICATIONS + bool "Provide key/keyring change notifications" + depends on KEYS && WATCH_QUEUE + help + This option provides support for getting change notifications on keys + and keyrings on which the caller has View permission. This makes use + of the /dev/watch_queue misc device to handle the notification + buffer and provides KEYCTL_WATCH_KEY to enable/disable watches. diff --git a/security/keys/compat.c b/security/keys/compat.c index b975f8f11124..6ee9d8f6a4a5 100644 --- a/security/keys/compat.c +++ b/security/keys/compat.c @@ -156,6 +156,9 @@ COMPAT_SYSCALL_DEFINE5(keyctl, u32, option, case KEYCTL_CAPABILITIES: return keyctl_capabilities(compat_ptr(arg2), arg3); + case KEYCTL_WATCH_KEY: + return keyctl_watch_key(arg2, arg3, arg4); + default: return -EOPNOTSUPP; } diff --git a/security/keys/gc.c b/security/keys/gc.c index 671dd730ecfc..3c90807476eb 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -131,6 +131,11 @@ static noinline void key_gc_unused_keys(struct list_head *keys) kdebug("- %u", key->serial); key_check(key); +#ifdef CONFIG_KEY_NOTIFICATIONS + remove_watch_list(key->watchers, key->serial); + key->watchers = NULL; +#endif + /* Throw away the key data if the key is instantiated */ if (state == KEY_IS_POSITIVE && key->type->destroy) key->type->destroy(key); diff --git a/security/keys/internal.h b/security/keys/internal.h index 6d0ca48ae9a5..28e17f4f3328 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -99,7 +100,8 @@ extern int __key_link_begin(struct key *keyring, const struct keyring_index_key *index_key, struct assoc_array_edit **_edit); extern int __key_link_check_live_key(struct key *keyring, struct key *key); -extern void __key_link(struct key *key, struct assoc_array_edit **_edit); +extern void __key_link(struct key *keyring, struct key *key, + struct assoc_array_edit **_edit); extern void __key_link_end(struct key *keyring, const struct keyring_index_key *index_key, struct assoc_array_edit *edit); @@ -183,6 +185,23 @@ extern int key_task_permission(const key_ref_t key_ref, const struct cred *cred, key_perm_t perm); +static inline void notify_key(struct key *key, + enum key_notification_subtype subtype, u32 aux) +{ +#ifdef CONFIG_KEY_NOTIFICATIONS + struct key_notification n = { + .watch.type = WATCH_TYPE_KEY_NOTIFY, + .watch.subtype = subtype, + .watch.info = watch_sizeof(n), + .key_id = key_serial(key), + .aux = aux, + }; + + post_watch_notification(key->watchers, &n.watch, current_cred(), + n.key_id); +#endif +} + /* * Check to see whether permission is granted to use a key in the desired way. */ @@ -333,6 +352,15 @@ static inline long keyctl_pkey_e_d_s(int op, extern long keyctl_capabilities(unsigned char __user *_buffer, size_t buflen); +#ifdef CONFIG_KEY_NOTIFICATIONS +extern long keyctl_watch_key(key_serial_t, int, int); +#else +static inline long keyctl_watch_key(key_serial_t key_id, int watch_fd, int watch_id) +{ + return -EOPNOTSUPP; +} +#endif + /* * Debugging key validation */ diff --git a/security/keys/key.c b/security/keys/key.c index e959b3c96b48..e282c6179b21 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -444,6 +444,7 @@ static int __key_instantiate_and_link(struct key *key, /* mark the key as being instantiated */ atomic_inc(&key->user->nikeys); mark_key_instantiated(key, 0); + notify_key(key, NOTIFY_KEY_INSTANTIATED, 0); if (test_and_clear_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags)) awaken = 1; @@ -453,7 +454,7 @@ static int __key_instantiate_and_link(struct key *key, if (test_bit(KEY_FLAG_KEEP, &keyring->flags)) set_bit(KEY_FLAG_KEEP, &key->flags); - __key_link(key, _edit); + __key_link(keyring, key, _edit); } /* disable the authorisation key */ @@ -601,6 +602,7 @@ int key_reject_and_link(struct key *key, /* mark the key as being negatively instantiated */ atomic_inc(&key->user->nikeys); mark_key_instantiated(key, -error); + notify_key(key, NOTIFY_KEY_INSTANTIATED, -error); key->expiry = ktime_get_real_seconds() + timeout; key_schedule_gc(key->expiry + key_gc_delay); @@ -611,7 +613,7 @@ int key_reject_and_link(struct key *key, /* and link it into the destination keyring */ if (keyring && link_ret == 0) - __key_link(key, &edit); + __key_link(keyring, key, &edit); /* disable the authorisation key */ if (authkey) @@ -764,9 +766,11 @@ static inline key_ref_t __key_update(key_ref_t key_ref, down_write(&key->sem); ret = key->type->update(key, prep); - if (ret == 0) + if (ret == 0) { /* Updating a negative key positively instantiates it */ mark_key_instantiated(key, 0); + notify_key(key, NOTIFY_KEY_UPDATED, 0); + } up_write(&key->sem); @@ -1023,9 +1027,11 @@ int key_update(key_ref_t key_ref, const void *payload, size_t plen) down_write(&key->sem); ret = key->type->update(key, &prep); - if (ret == 0) + if (ret == 0) { /* Updating a negative key positively instantiates it */ mark_key_instantiated(key, 0); + notify_key(key, NOTIFY_KEY_UPDATED, 0); + } up_write(&key->sem); @@ -1057,15 +1063,17 @@ void key_revoke(struct key *key) * instantiated */ down_write_nested(&key->sem, 1); - if (!test_and_set_bit(KEY_FLAG_REVOKED, &key->flags) && - key->type->revoke) - key->type->revoke(key); - - /* set the death time to no more than the expiry time */ - time = ktime_get_real_seconds(); - if (key->revoked_at == 0 || key->revoked_at > time) { - key->revoked_at = time; - key_schedule_gc(key->revoked_at + key_gc_delay); + if (!test_and_set_bit(KEY_FLAG_REVOKED, &key->flags)) { + notify_key(key, NOTIFY_KEY_REVOKED, 0); + if (key->type->revoke) + key->type->revoke(key); + + /* set the death time to no more than the expiry time */ + time = ktime_get_real_seconds(); + if (key->revoked_at == 0 || key->revoked_at > time) { + key->revoked_at = time; + key_schedule_gc(key->revoked_at + key_gc_delay); + } } up_write(&key->sem); @@ -1087,8 +1095,10 @@ void key_invalidate(struct key *key) if (!test_bit(KEY_FLAG_INVALIDATED, &key->flags)) { down_write_nested(&key->sem, 1); - if (!test_and_set_bit(KEY_FLAG_INVALIDATED, &key->flags)) + if (!test_and_set_bit(KEY_FLAG_INVALIDATED, &key->flags)) { + notify_key(key, NOTIFY_KEY_INVALIDATED, 0); key_schedule_gc_links(); + } up_write(&key->sem); } } diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 5e01192e222a..7d8de1c9a478 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -37,7 +37,9 @@ static const unsigned char keyrings_capabilities[2] = { KEYCTL_CAPS0_MOVE ), [1] = (KEYCTL_CAPS1_NS_KEYRING_NAME | - KEYCTL_CAPS1_NS_KEY_TAG), + KEYCTL_CAPS1_NS_KEY_TAG | + (IS_ENABLED(CONFIG_KEY_NOTIFICATIONS) ? KEYCTL_CAPS1_NOTIFICATIONS : 0) + ), }; static int key_get_type_from_user(char *type, @@ -1039,6 +1041,7 @@ long keyctl_chown_key(key_serial_t id, uid_t user, gid_t group) if (group != (gid_t) -1) key->gid = gid; + notify_key(key, NOTIFY_KEY_SETATTR, 0); ret = 0; error_put: @@ -1089,6 +1092,7 @@ long keyctl_setperm_key(key_serial_t id, key_perm_t perm) /* if we're not the sysadmin, we can only change a key that we own */ if (capable(CAP_SYS_ADMIN) || uid_eq(key->uid, current_fsuid())) { key->perm = perm; + notify_key(key, NOTIFY_KEY_SETATTR, 0); ret = 0; } @@ -1480,10 +1484,12 @@ long keyctl_set_timeout(key_serial_t id, unsigned timeout) okay: key = key_ref_to_ptr(key_ref); ret = 0; - if (test_bit(KEY_FLAG_KEEP, &key->flags)) + if (test_bit(KEY_FLAG_KEEP, &key->flags)) { ret = -EPERM; - else + } else { key_set_timeout(key, timeout); + notify_key(key, NOTIFY_KEY_SETATTR, 0); + } key_put(key); error: @@ -1757,6 +1763,90 @@ error: return ret; } +#ifdef CONFIG_KEY_NOTIFICATIONS +/* + * Watch for changes to a key. + * + * The caller must have View permission to watch a key or keyring. + */ +long keyctl_watch_key(key_serial_t id, int watch_queue_fd, int watch_id) +{ + struct watch_queue *wqueue; + struct watch_list *wlist = NULL; + struct watch *watch = NULL; + struct key *key; + key_ref_t key_ref; + long ret; + + if (watch_id < -1 || watch_id > 0xff) + return -EINVAL; + + key_ref = lookup_user_key(id, KEY_LOOKUP_CREATE, KEY_NEED_VIEW); + if (IS_ERR(key_ref)) + return PTR_ERR(key_ref); + key = key_ref_to_ptr(key_ref); + + wqueue = get_watch_queue(watch_queue_fd); + if (IS_ERR(wqueue)) { + ret = PTR_ERR(wqueue); + goto err_key; + } + + if (watch_id >= 0) { + ret = -ENOMEM; + if (!key->watchers) { + wlist = kzalloc(sizeof(*wlist), GFP_KERNEL); + if (!wlist) + goto err_wqueue; + init_watch_list(wlist, NULL); + } + + watch = kzalloc(sizeof(*watch), GFP_KERNEL); + if (!watch) + goto err_wlist; + + init_watch(watch, wqueue); + watch->id = key->serial; + watch->info_id = (u32)watch_id << WATCH_INFO_ID__SHIFT; + + ret = security_watch_key(key); + if (ret < 0) + goto err_watch; + + down_write(&key->sem); + if (!key->watchers) { + key->watchers = wlist; + wlist = NULL; + } + + ret = add_watch_to_object(watch, key->watchers); + up_write(&key->sem); + + if (ret == 0) + watch = NULL; + } else { + ret = -EBADSLT; + if (key->watchers) { + down_write(&key->sem); + ret = remove_watch_from_object(key->watchers, + wqueue, key_serial(key), + false); + up_write(&key->sem); + } + } + +err_watch: + kfree(watch); +err_wlist: + kfree(wlist); +err_wqueue: + put_watch_queue(wqueue); +err_key: + key_put(key); + return ret; +} +#endif /* CONFIG_KEY_NOTIFICATIONS */ + /* * Get keyrings subsystem capabilities. */ @@ -1926,6 +2016,9 @@ SYSCALL_DEFINE5(keyctl, int, option, unsigned long, arg2, unsigned long, arg3, case KEYCTL_CAPABILITIES: return keyctl_capabilities((unsigned char __user *)arg2, (size_t)arg3); + case KEYCTL_WATCH_KEY: + return keyctl_watch_key((key_serial_t)arg2, (int)arg3, (int)arg4); + default: return -EOPNOTSUPP; } diff --git a/security/keys/keyring.c b/security/keys/keyring.c index 5ca620d31cd3..14abfe765b7e 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -1056,12 +1056,14 @@ int keyring_restrict(key_ref_t keyring_ref, const char *type, down_write(&keyring->sem); down_write(&keyring_serialise_restrict_sem); - if (keyring->restrict_link) + if (keyring->restrict_link) { ret = -EEXIST; - else if (keyring_detect_restriction_cycle(keyring, restrict_link)) + } else if (keyring_detect_restriction_cycle(keyring, restrict_link)) { ret = -EDEADLK; - else + } else { keyring->restrict_link = restrict_link; + notify_key(keyring, NOTIFY_KEY_SETATTR, 0); + } up_write(&keyring_serialise_restrict_sem); up_write(&keyring->sem); @@ -1362,12 +1364,14 @@ int __key_link_check_live_key(struct key *keyring, struct key *key) * holds at most one link to any given key of a particular type+description * combination. */ -void __key_link(struct key *key, struct assoc_array_edit **_edit) +void __key_link(struct key *keyring, struct key *key, + struct assoc_array_edit **_edit) { __key_get(key); assoc_array_insert_set_object(*_edit, keyring_key_to_ptr(key)); assoc_array_apply_edit(*_edit); *_edit = NULL; + notify_key(keyring, NOTIFY_KEY_LINKED, key_serial(key)); } /* @@ -1451,7 +1455,7 @@ int key_link(struct key *keyring, struct key *key) if (ret == 0) ret = __key_link_check_live_key(keyring, key); if (ret == 0) - __key_link(key, &edit); + __key_link(keyring, key, &edit); error_end: __key_link_end(keyring, &key->index_key, edit); @@ -1483,7 +1487,7 @@ static int __key_unlink_begin(struct key *keyring, struct key *key, struct assoc_array_edit *edit; BUG_ON(*_edit != NULL); - + edit = assoc_array_delete(&keyring->keys, &keyring_assoc_array_ops, &key->index_key); if (IS_ERR(edit)) @@ -1503,6 +1507,7 @@ static void __key_unlink(struct key *keyring, struct key *key, struct assoc_array_edit **_edit) { assoc_array_apply_edit(*_edit); + notify_key(keyring, NOTIFY_KEY_UNLINKED, key_serial(key)); *_edit = NULL; key_payload_reserve(keyring, keyring->datalen - KEYQUOTA_LINK_BYTES); } @@ -1621,7 +1626,7 @@ int key_move(struct key *key, goto error; __key_unlink(from_keyring, key, &from_edit); - __key_link(key, &to_edit); + __key_link(to_keyring, key, &to_edit); error: __key_link_end(to_keyring, &key->index_key, to_edit); __key_unlink_end(from_keyring, key, from_edit); @@ -1655,6 +1660,7 @@ int keyring_clear(struct key *keyring) } else { if (edit) assoc_array_apply_edit(edit); + notify_key(keyring, NOTIFY_KEY_CLEARED, 0); key_payload_reserve(keyring, 0); ret = 0; } diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 957b9e3e1492..e1b9f1a80676 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -418,7 +418,7 @@ static int construct_alloc_key(struct keyring_search_context *ctx, goto key_already_present; if (dest_keyring) - __key_link(key, &edit); + __key_link(dest_keyring, key, &edit); mutex_unlock(&key_construction_mutex); if (dest_keyring) @@ -437,7 +437,7 @@ key_already_present: if (dest_keyring) { ret = __key_link_check_live_key(dest_keyring, key); if (ret == 0) - __key_link(key, &edit); + __key_link(dest_keyring, key, &edit); __key_link_end(dest_keyring, &ctx->index_key, edit); if (ret < 0) goto link_check_failed; -- cgit v1.3 From 8cfba76383e902acbed95092163052b1572f17a8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 14 Jan 2020 17:07:11 +0000 Subject: pipe: Allow buffers to be marked read-whole-or-error for notifications Allow a buffer to be marked such that read() must return the entire buffer in one go or return ENOBUFS. Multiple buffers can be amalgamated into a single read, but a short read will occur if the next "whole" buffer won't fit. This is useful for watch queue notifications to make sure we don't split a notification across multiple reads, especially given that we need to fabricate an overrun record under some circumstances - and that isn't in the buffers. Signed-off-by: David Howells --- fs/pipe.c | 8 +++++++- include/linux/pipe_fs_i.h | 1 + kernel/watch_queue.c | 2 +- samples/watch_queue/watch_test.c | 2 +- 4 files changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/fs/pipe.c b/fs/pipe.c index da9bc1f21fd1..0f9fd897ceb5 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -320,8 +320,14 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) size_t written; int error; - if (chars > total_len) + if (chars > total_len) { + if (buf->flags & PIPE_BUF_FLAG_WHOLE) { + if (ret == 0) + ret = -ENOBUFS; + break; + } chars = total_len; + } error = pipe_buf_confirm(pipe, buf); if (error) { diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 1d3eaa233f4a..eaff59a2f074 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -8,6 +8,7 @@ #define PIPE_BUF_FLAG_ATOMIC 0x02 /* was atomically mapped */ #define PIPE_BUF_FLAG_GIFT 0x04 /* page is a gift */ #define PIPE_BUF_FLAG_PACKET 0x08 /* read() as a packet */ +#define PIPE_BUF_FLAG_WHOLE 0x10 /* read() must return entire buffer or error */ /** * struct pipe_buffer - a linux kernel pipe buffer diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c index c103e34f8705..ad64ea300f6d 100644 --- a/kernel/watch_queue.c +++ b/kernel/watch_queue.c @@ -115,7 +115,7 @@ static bool post_one_notification(struct watch_queue *wqueue, buf->ops = &watch_queue_pipe_buf_ops; buf->offset = offset; buf->len = len; - buf->flags = 0; + buf->flags = PIPE_BUF_FLAG_WHOLE; pipe->head = head + 1; if (!test_and_clear_bit(note, wqueue->notes_bitmap)) { diff --git a/samples/watch_queue/watch_test.c b/samples/watch_queue/watch_test.c index 4a311790d4ca..8628b4c5d567 100644 --- a/samples/watch_queue/watch_test.c +++ b/samples/watch_queue/watch_test.c @@ -63,7 +63,7 @@ static void saw_key_change(struct watch_notification *n, size_t len) */ static void consumer(int fd) { - unsigned char buffer[4096], *p, *end; + unsigned char buffer[433], *p, *end; union { struct watch_notification n; unsigned char buf1[128]; -- cgit v1.3 From e7d553d69cf63aec7de0f38fed49ccbb30922e1e Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 14 Jan 2020 17:07:12 +0000 Subject: pipe: Add notification lossage handling Add handling for loss of notifications by having read() insert a loss-notification message after it has read the pipe buffer that was last in the ring when the loss occurred. Lossage can come about either by running out of notification descriptors or by running out of space in the pipe ring. Signed-off-by: David Howells --- fs/pipe.c | 28 ++++++++++++++++++++++++++++ include/linux/pipe_fs_i.h | 7 +++++++ kernel/watch_queue.c | 2 ++ samples/watch_queue/watch_test.c | 3 +++ 4 files changed, 40 insertions(+) (limited to 'include/linux') diff --git a/fs/pipe.c b/fs/pipe.c index 0f9fd897ceb5..620a113f92eb 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -314,6 +314,30 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) unsigned int tail = pipe->tail; unsigned int mask = pipe->ring_size - 1; +#ifdef CONFIG_WATCH_QUEUE + if (pipe->note_loss) { + struct watch_notification n; + + if (total_len < 8) { + if (ret == 0) + ret = -ENOBUFS; + break; + } + + n.type = WATCH_TYPE_META; + n.subtype = WATCH_META_LOSS_NOTIFICATION; + n.info = watch_sizeof(n); + if (copy_to_iter(&n, sizeof(n), to) != sizeof(n)) { + if (ret == 0) + ret = -EFAULT; + break; + } + ret += sizeof(n); + total_len -= sizeof(n); + pipe->note_loss = false; + } +#endif + if (!pipe_empty(head, tail)) { struct pipe_buffer *buf = &pipe->bufs[tail & mask]; size_t chars = buf->len; @@ -355,6 +379,10 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) if (!buf->len) { pipe_buf_release(pipe, buf); spin_lock_irq(&pipe->rd_wait.lock); +#ifdef CONFIG_WATCH_QUEUE + if (buf->flags & PIPE_BUF_FLAG_LOSS) + pipe->note_loss = true; +#endif tail++; pipe->tail = tail; spin_unlock_irq(&pipe->rd_wait.lock); diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index eaff59a2f074..6626f511de6f 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -9,6 +9,9 @@ #define PIPE_BUF_FLAG_GIFT 0x04 /* page is a gift */ #define PIPE_BUF_FLAG_PACKET 0x08 /* read() as a packet */ #define PIPE_BUF_FLAG_WHOLE 0x10 /* read() must return entire buffer or error */ +#ifdef CONFIG_WATCH_QUEUE +#define PIPE_BUF_FLAG_LOSS 0x20 /* Message loss happened after this buffer */ +#endif /** * struct pipe_buffer - a linux kernel pipe buffer @@ -34,6 +37,7 @@ struct pipe_buffer { * @wr_wait: writer wait point in case of full pipe * @head: The point of buffer production * @tail: The point of buffer consumption + * @note_loss: The next read() should insert a data-lost message * @max_usage: The maximum number of slots that may be used in the ring * @ring_size: total number of buffers (should be a power of 2) * @nr_accounted: The amount this pipe accounts for in user->pipe_bufs @@ -56,6 +60,9 @@ struct pipe_inode_info { unsigned int tail; unsigned int max_usage; unsigned int ring_size; +#ifdef CONFIG_WATCH_QUEUE + bool note_loss; +#endif unsigned int nr_accounted; unsigned int readers; unsigned int writers; diff --git a/kernel/watch_queue.c b/kernel/watch_queue.c index ad64ea300f6d..9a9699c06709 100644 --- a/kernel/watch_queue.c +++ b/kernel/watch_queue.c @@ -132,6 +132,8 @@ out: return done; lost: + buf = &pipe->bufs[(head - 1) & mask]; + buf->flags |= PIPE_BUF_FLAG_LOSS; goto out; } diff --git a/samples/watch_queue/watch_test.c b/samples/watch_queue/watch_test.c index 8628b4c5d567..46e618a897fe 100644 --- a/samples/watch_queue/watch_test.c +++ b/samples/watch_queue/watch_test.c @@ -120,6 +120,9 @@ static void consumer(int fd) (n.n.info & WATCH_INFO_ID) >> WATCH_INFO_ID__SHIFT); break; + case WATCH_META_LOSS_NOTIFICATION: + printf("-- LOSS --\n"); + break; default: printf("other meta record\n"); break; -- cgit v1.3 From 8c0637e950d68933a67f7438f779d79b049b5e5c Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 12 May 2020 15:16:29 +0100 Subject: keys: Make the KEY_NEED_* perms an enum rather than a mask Since the meaning of combining the KEY_NEED_* constants is undefined, make it so that you can't do that by turning them into an enum. The enum is also given some extra values to represent special circumstances, such as: (1) The '0' value is reserved and causes a warning to trap the parameter being unset. (2) The key is to be unlinked and we require no permissions on it, only the keyring, (this replaces the KEY_LOOKUP_FOR_UNLINK flag). (3) An override due to CAP_SYS_ADMIN. (4) An override due to an instantiation token being present. (5) The permissions check is being deferred to later key_permission() calls. The extra values give the opportunity for LSMs to audit these situations. [Note: This really needs overhauling so that lookup_user_key() tells key_task_permission() and the LSM what operation is being done and leaves it to those functions to decide how to map that onto the available permits. However, I don't really want to make these change in the middle of the notifications patchset.] Signed-off-by: David Howells cc: Jarkko Sakkinen cc: Paul Moore cc: Stephen Smalley cc: Casey Schaufler cc: keyrings@vger.kernel.org cc: selinux@vger.kernel.org --- include/linux/key.h | 30 +++++++++++++++++------------ include/linux/security.h | 6 +++--- security/keys/internal.h | 8 ++++---- security/keys/keyctl.c | 16 ++++++++------- security/keys/permission.c | 31 ++++++++++++++++++++++------- security/keys/process_keys.c | 46 +++++++++++++++++++++----------------------- security/security.c | 6 +++--- security/selinux/hooks.c | 37 ++++++++++++++++++++++++++++------- security/smack/smack_lsm.c | 29 +++++++++++++++++++++------- 9 files changed, 135 insertions(+), 74 deletions(-) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index b99b40db08fc..0f2e24f13c2b 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -71,6 +71,23 @@ struct net; #define KEY_PERM_UNDEF 0xffffffff +/* + * The permissions required on a key that we're looking up. + */ +enum key_need_perm { + KEY_NEED_UNSPECIFIED, /* Needed permission unspecified */ + KEY_NEED_VIEW, /* Require permission to view attributes */ + KEY_NEED_READ, /* Require permission to read content */ + KEY_NEED_WRITE, /* Require permission to update / modify */ + KEY_NEED_SEARCH, /* Require permission to search (keyring) or find (key) */ + KEY_NEED_LINK, /* Require permission to link */ + KEY_NEED_SETATTR, /* Require permission to change attributes */ + KEY_NEED_UNLINK, /* Require permission to unlink key */ + KEY_SYSADMIN_OVERRIDE, /* Special: override by CAP_SYS_ADMIN */ + KEY_AUTHTOKEN_OVERRIDE, /* Special: override by possession of auth token */ + KEY_DEFER_PERM_CHECK, /* Special: permission check is deferred */ +}; + struct seq_file; struct user_struct; struct signal_struct; @@ -420,20 +437,9 @@ static inline key_serial_t key_serial(const struct key *key) extern void key_set_timeout(struct key *, unsigned); extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags, - key_perm_t perm); + enum key_need_perm need_perm); extern void key_free_user_ns(struct user_namespace *); -/* - * The permissions required on a key that we're looking up. - */ -#define KEY_NEED_VIEW 0x01 /* Require permission to view attributes */ -#define KEY_NEED_READ 0x02 /* Require permission to read content */ -#define KEY_NEED_WRITE 0x04 /* Require permission to update / modify */ -#define KEY_NEED_SEARCH 0x08 /* Require permission to search (keyring) or find (key) */ -#define KEY_NEED_LINK 0x10 /* Require permission to link */ -#define KEY_NEED_SETATTR 0x20 /* Require permission to change attributes */ -#define KEY_NEED_ALL 0x3f /* All the above permissions */ - static inline short key_read_state(const struct key *key) { /* Barrier versus mark_key_instantiated(). */ diff --git a/include/linux/security.h b/include/linux/security.h index e7914e4e0b02..57aac14e3418 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -1767,8 +1767,8 @@ static inline int security_path_chroot(const struct path *path) int security_key_alloc(struct key *key, const struct cred *cred, unsigned long flags); void security_key_free(struct key *key); -int security_key_permission(key_ref_t key_ref, - const struct cred *cred, unsigned perm); +int security_key_permission(key_ref_t key_ref, const struct cred *cred, + enum key_need_perm need_perm); int security_key_getsecurity(struct key *key, char **_buffer); #else @@ -1786,7 +1786,7 @@ static inline void security_key_free(struct key *key) static inline int security_key_permission(key_ref_t key_ref, const struct cred *cred, - unsigned perm) + enum key_need_perm need_perm) { return 0; } diff --git a/security/keys/internal.h b/security/keys/internal.h index 28e17f4f3328..1fc17cb317a9 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -167,7 +167,6 @@ extern bool lookup_user_key_possessed(const struct key *key, const struct key_match_data *match_data); #define KEY_LOOKUP_CREATE 0x01 #define KEY_LOOKUP_PARTIAL 0x02 -#define KEY_LOOKUP_FOR_UNLINK 0x04 extern long join_session_keyring(const char *name); extern void key_change_session_keyring(struct callback_head *twork); @@ -183,7 +182,7 @@ extern void key_gc_keytype(struct key_type *ktype); extern int key_task_permission(const key_ref_t key_ref, const struct cred *cred, - key_perm_t perm); + enum key_need_perm need_perm); static inline void notify_key(struct key *key, enum key_notification_subtype subtype, u32 aux) @@ -205,9 +204,10 @@ static inline void notify_key(struct key *key, /* * Check to see whether permission is granted to use a key in the desired way. */ -static inline int key_permission(const key_ref_t key_ref, unsigned perm) +static inline int key_permission(const key_ref_t key_ref, + enum key_need_perm need_perm) { - return key_task_permission(key_ref, current_cred(), perm); + return key_task_permission(key_ref, current_cred(), need_perm); } extern struct key_type key_type_request_key_auth; diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 7d8de1c9a478..6763ee45e04d 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -434,7 +434,7 @@ long keyctl_invalidate_key(key_serial_t id) /* Root is permitted to invalidate certain special keys */ if (capable(CAP_SYS_ADMIN)) { - key_ref = lookup_user_key(id, 0, 0); + key_ref = lookup_user_key(id, 0, KEY_SYSADMIN_OVERRIDE); if (IS_ERR(key_ref)) goto error; if (test_bit(KEY_FLAG_ROOT_CAN_INVAL, @@ -479,7 +479,8 @@ long keyctl_keyring_clear(key_serial_t ringid) /* Root is permitted to invalidate certain special keyrings */ if (capable(CAP_SYS_ADMIN)) { - keyring_ref = lookup_user_key(ringid, 0, 0); + keyring_ref = lookup_user_key(ringid, 0, + KEY_SYSADMIN_OVERRIDE); if (IS_ERR(keyring_ref)) goto error; if (test_bit(KEY_FLAG_ROOT_CAN_CLEAR, @@ -563,7 +564,7 @@ long keyctl_keyring_unlink(key_serial_t id, key_serial_t ringid) goto error; } - key_ref = lookup_user_key(id, KEY_LOOKUP_FOR_UNLINK, 0); + key_ref = lookup_user_key(id, KEY_LOOKUP_PARTIAL, KEY_NEED_UNLINK); if (IS_ERR(key_ref)) { ret = PTR_ERR(key_ref); goto error2; @@ -663,7 +664,7 @@ long keyctl_describe_key(key_serial_t keyid, key_put(instkey); key_ref = lookup_user_key(keyid, KEY_LOOKUP_PARTIAL, - 0); + KEY_AUTHTOKEN_OVERRIDE); if (!IS_ERR(key_ref)) goto okay; } @@ -833,7 +834,7 @@ long keyctl_read_key(key_serial_t keyid, char __user *buffer, size_t buflen) size_t key_data_len; /* find the key first */ - key_ref = lookup_user_key(keyid, 0, 0); + key_ref = lookup_user_key(keyid, 0, KEY_DEFER_PERM_CHECK); if (IS_ERR(key_ref)) { ret = -ENOKEY; goto out; @@ -1471,7 +1472,7 @@ long keyctl_set_timeout(key_serial_t id, unsigned timeout) key_put(instkey); key_ref = lookup_user_key(id, KEY_LOOKUP_PARTIAL, - 0); + KEY_AUTHTOKEN_OVERRIDE); if (!IS_ERR(key_ref)) goto okay; } @@ -1579,7 +1580,8 @@ long keyctl_get_security(key_serial_t keyid, return PTR_ERR(instkey); key_put(instkey); - key_ref = lookup_user_key(keyid, KEY_LOOKUP_PARTIAL, 0); + key_ref = lookup_user_key(keyid, KEY_LOOKUP_PARTIAL, + KEY_AUTHTOKEN_OVERRIDE); if (IS_ERR(key_ref)) return PTR_ERR(key_ref); } diff --git a/security/keys/permission.c b/security/keys/permission.c index 085f907b64ac..4a61f804e80f 100644 --- a/security/keys/permission.c +++ b/security/keys/permission.c @@ -13,7 +13,7 @@ * key_task_permission - Check a key can be used * @key_ref: The key to check. * @cred: The credentials to use. - * @perm: The permissions to check for. + * @need_perm: The permission required. * * Check to see whether permission is granted to use a key in the desired way, * but permit the security modules to override. @@ -24,12 +24,30 @@ * permissions bits or the LSM check. */ int key_task_permission(const key_ref_t key_ref, const struct cred *cred, - unsigned perm) + enum key_need_perm need_perm) { struct key *key; - key_perm_t kperm; + key_perm_t kperm, mask; int ret; + switch (need_perm) { + default: + WARN_ON(1); + return -EACCES; + case KEY_NEED_UNLINK: + case KEY_SYSADMIN_OVERRIDE: + case KEY_AUTHTOKEN_OVERRIDE: + case KEY_DEFER_PERM_CHECK: + goto lsm; + + case KEY_NEED_VIEW: mask = KEY_OTH_VIEW; break; + case KEY_NEED_READ: mask = KEY_OTH_READ; break; + case KEY_NEED_WRITE: mask = KEY_OTH_WRITE; break; + case KEY_NEED_SEARCH: mask = KEY_OTH_SEARCH; break; + case KEY_NEED_LINK: mask = KEY_OTH_LINK; break; + case KEY_NEED_SETATTR: mask = KEY_OTH_SETATTR; break; + } + key = key_ref_to_ptr(key_ref); /* use the second 8-bits of permissions for keys the caller owns */ @@ -64,13 +82,12 @@ use_these_perms: if (is_key_possessed(key_ref)) kperm |= key->perm >> 24; - kperm = kperm & perm & KEY_NEED_ALL; - - if (kperm != perm) + if ((kperm & mask) != mask) return -EACCES; /* let LSM be the final arbiter */ - return security_key_permission(key_ref, cred, perm); +lsm: + return security_key_permission(key_ref, cred, need_perm); } EXPORT_SYMBOL(key_task_permission); diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 09541de31f2f..7e0232db1707 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -609,7 +609,7 @@ bool lookup_user_key_possessed(const struct key *key, * returned key reference. */ key_ref_t lookup_user_key(key_serial_t id, unsigned long lflags, - key_perm_t perm) + enum key_need_perm need_perm) { struct keyring_search_context ctx = { .match_data.cmp = lookup_user_key_possessed, @@ -773,35 +773,33 @@ try_again: /* unlink does not use the nominated key in any way, so can skip all * the permission checks as it is only concerned with the keyring */ - if (lflags & KEY_LOOKUP_FOR_UNLINK) { - ret = 0; - goto error; - } - - if (!(lflags & KEY_LOOKUP_PARTIAL)) { - ret = wait_for_key_construction(key, true); - switch (ret) { - case -ERESTARTSYS: - goto invalid_key; - default: - if (perm) + if (need_perm != KEY_NEED_UNLINK) { + if (!(lflags & KEY_LOOKUP_PARTIAL)) { + ret = wait_for_key_construction(key, true); + switch (ret) { + case -ERESTARTSYS: + goto invalid_key; + default: + if (need_perm != KEY_AUTHTOKEN_OVERRIDE && + need_perm != KEY_DEFER_PERM_CHECK) + goto invalid_key; + case 0: + break; + } + } else if (need_perm != KEY_DEFER_PERM_CHECK) { + ret = key_validate(key); + if (ret < 0) goto invalid_key; - case 0: - break; } - } else if (perm) { - ret = key_validate(key); - if (ret < 0) + + ret = -EIO; + if (!(lflags & KEY_LOOKUP_PARTIAL) && + key_read_state(key) == KEY_IS_UNINSTANTIATED) goto invalid_key; } - ret = -EIO; - if (!(lflags & KEY_LOOKUP_PARTIAL) && - key_read_state(key) == KEY_IS_UNINSTANTIATED) - goto invalid_key; - /* check the permissions */ - ret = key_task_permission(key_ref, ctx.cred, perm); + ret = key_task_permission(key_ref, ctx.cred, need_perm); if (ret < 0) goto invalid_key; diff --git a/security/security.c b/security/security.c index c73334ab2882..af32d4cd0462 100644 --- a/security/security.c +++ b/security/security.c @@ -2398,10 +2398,10 @@ void security_key_free(struct key *key) call_void_hook(key_free, key); } -int security_key_permission(key_ref_t key_ref, - const struct cred *cred, unsigned perm) +int security_key_permission(key_ref_t key_ref, const struct cred *cred, + enum key_need_perm need_perm) { - return call_int_hook(key_permission, 0, key_ref, cred, perm); + return call_int_hook(key_permission, 0, key_ref, cred, need_perm); } int security_key_getsecurity(struct key *key, char **_buffer) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 4c037c2545c1..196acaccbfdd 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -6561,20 +6561,43 @@ static void selinux_key_free(struct key *k) static int selinux_key_permission(key_ref_t key_ref, const struct cred *cred, - unsigned perm) + enum key_need_perm need_perm) { struct key *key; struct key_security_struct *ksec; - u32 sid; + u32 perm, sid; - /* if no specific permissions are requested, we skip the - permission check. No serious, additional covert channels - appear to be created. */ - if (perm == 0) + switch (need_perm) { + case KEY_NEED_VIEW: + perm = KEY__VIEW; + break; + case KEY_NEED_READ: + perm = KEY__READ; + break; + case KEY_NEED_WRITE: + perm = KEY__WRITE; + break; + case KEY_NEED_SEARCH: + perm = KEY__SEARCH; + break; + case KEY_NEED_LINK: + perm = KEY__LINK; + break; + case KEY_NEED_SETATTR: + perm = KEY__SETATTR; + break; + case KEY_NEED_UNLINK: + case KEY_SYSADMIN_OVERRIDE: + case KEY_AUTHTOKEN_OVERRIDE: + case KEY_DEFER_PERM_CHECK: return 0; + default: + WARN_ON(1); + return -EPERM; - sid = cred_sid(cred); + } + sid = cred_sid(cred); key = key_ref_to_ptr(key_ref); ksec = key->security; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 8c61d175e195..0d6bb53efe74 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -4230,13 +4230,14 @@ static void smack_key_free(struct key *key) * smack_key_permission - Smack access on a key * @key_ref: gets to the object * @cred: the credentials to use - * @perm: requested key permissions + * @need_perm: requested key permission * * Return 0 if the task has read and write to the object, * an error code otherwise */ static int smack_key_permission(key_ref_t key_ref, - const struct cred *cred, unsigned perm) + const struct cred *cred, + enum key_need_perm need_perm) { struct key *keyp; struct smk_audit_info ad; @@ -4247,8 +4248,26 @@ static int smack_key_permission(key_ref_t key_ref, /* * Validate requested permissions */ - if (perm & ~KEY_NEED_ALL) + switch (need_perm) { + case KEY_NEED_READ: + case KEY_NEED_SEARCH: + case KEY_NEED_VIEW: + request |= MAY_READ; + break; + case KEY_NEED_WRITE: + case KEY_NEED_LINK: + case KEY_NEED_SETATTR: + request |= MAY_WRITE; + break; + case KEY_NEED_UNSPECIFIED: + case KEY_NEED_UNLINK: + case KEY_SYSADMIN_OVERRIDE: + case KEY_AUTHTOKEN_OVERRIDE: + case KEY_DEFER_PERM_CHECK: + return 0; + default: return -EINVAL; + } keyp = key_ref_to_ptr(key_ref); if (keyp == NULL) @@ -4273,10 +4292,6 @@ static int smack_key_permission(key_ref_t key_ref, ad.a.u.key_struct.key = keyp->serial; ad.a.u.key_struct.key_desc = keyp->description; #endif - if (perm & (KEY_NEED_READ | KEY_NEED_SEARCH | KEY_NEED_VIEW)) - request |= MAY_READ; - if (perm & (KEY_NEED_WRITE | KEY_NEED_LINK | KEY_NEED_SETATTR)) - request |= MAY_WRITE; rc = smk_access(tkp, keyp->security, request, &ad); rc = smk_bu_note("key access", tkp, keyp->security, request, rc); return rc; -- cgit v1.3 From a8478a602913dc89a7cd2060e613edecd07e1dbd Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 14 Jan 2020 17:07:13 +0000 Subject: smack: Implement the watch_key and post_notification hooks Implement the watch_key security hook in Smack to make sure that a key grants the caller Read permission in order to set a watch on a key. Also implement the post_notification security hook to make sure that the notification source is granted Write permission by the watch queue. For the moment, the watch_devices security hook is left unimplemented as it's not obvious what the object should be since the queue is global and didn't previously exist. Signed-off-by: David Howells Acked-by: Casey Schaufler --- include/linux/lsm_audit.h | 1 + security/smack/smack_lsm.c | 83 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 83 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h index 99d629fd9944..28f23b341c1c 100644 --- a/include/linux/lsm_audit.h +++ b/include/linux/lsm_audit.h @@ -75,6 +75,7 @@ struct common_audit_data { #define LSM_AUDIT_DATA_IBPKEY 13 #define LSM_AUDIT_DATA_IBENDPORT 14 #define LSM_AUDIT_DATA_LOCKDOWN 15 +#define LSM_AUDIT_DATA_NOTIFICATION 16 union { struct path path; struct dentry *dentry; diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 0d6bb53efe74..3c4d4836da4a 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -41,6 +41,7 @@ #include #include #include +#include #include "smack.h" #define TRANS_TRUE "TRUE" @@ -4284,7 +4285,7 @@ static int smack_key_permission(key_ref_t key_ref, if (tkp == NULL) return -EACCES; - if (smack_privileged_cred(CAP_MAC_OVERRIDE, cred)) + if (smack_privileged(CAP_MAC_OVERRIDE)) return 0; #ifdef CONFIG_AUDIT @@ -4326,8 +4327,81 @@ static int smack_key_getsecurity(struct key *key, char **_buffer) return length; } + +#ifdef CONFIG_KEY_NOTIFICATIONS +/** + * smack_watch_key - Smack access to watch a key for notifications. + * @key: The key to be watched + * + * Return 0 if the @watch->cred has permission to read from the key object and + * an error otherwise. + */ +static int smack_watch_key(struct key *key) +{ + struct smk_audit_info ad; + struct smack_known *tkp = smk_of_current(); + int rc; + + if (key == NULL) + return -EINVAL; + /* + * If the key hasn't been initialized give it access so that + * it may do so. + */ + if (key->security == NULL) + return 0; + /* + * This should not occur + */ + if (tkp == NULL) + return -EACCES; + + if (smack_privileged_cred(CAP_MAC_OVERRIDE, current_cred())) + return 0; + +#ifdef CONFIG_AUDIT + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_KEY); + ad.a.u.key_struct.key = key->serial; + ad.a.u.key_struct.key_desc = key->description; +#endif + rc = smk_access(tkp, key->security, MAY_READ, &ad); + rc = smk_bu_note("key watch", tkp, key->security, MAY_READ, rc); + return rc; +} +#endif /* CONFIG_KEY_NOTIFICATIONS */ #endif /* CONFIG_KEYS */ +#ifdef CONFIG_WATCH_QUEUE +/** + * smack_post_notification - Smack access to post a notification to a queue + * @w_cred: The credentials of the watcher. + * @cred: The credentials of the event source (may be NULL). + * @n: The notification message to be posted. + */ +static int smack_post_notification(const struct cred *w_cred, + const struct cred *cred, + struct watch_notification *n) +{ + struct smk_audit_info ad; + struct smack_known *subj, *obj; + int rc; + + /* Always let maintenance notifications through. */ + if (n->type == WATCH_TYPE_META) + return 0; + + if (!cred) + return 0; + subj = smk_of_task(smack_cred(cred)); + obj = smk_of_task(smack_cred(w_cred)); + + smk_ad_init(&ad, __func__, LSM_AUDIT_DATA_NOTIFICATION); + rc = smk_access(subj, obj, MAY_WRITE, &ad); + rc = smk_bu_note("notification", subj, obj, MAY_WRITE, rc); + return rc; +} +#endif /* CONFIG_WATCH_QUEUE */ + /* * Smack Audit hooks * @@ -4716,8 +4790,15 @@ static struct security_hook_list smack_hooks[] __lsm_ro_after_init = { LSM_HOOK_INIT(key_free, smack_key_free), LSM_HOOK_INIT(key_permission, smack_key_permission), LSM_HOOK_INIT(key_getsecurity, smack_key_getsecurity), +#ifdef CONFIG_KEY_NOTIFICATIONS + LSM_HOOK_INIT(watch_key, smack_watch_key), +#endif #endif /* CONFIG_KEYS */ +#ifdef CONFIG_WATCH_QUEUE + LSM_HOOK_INIT(post_notification, smack_post_notification), +#endif + /* Audit hooks */ #ifdef CONFIG_AUDIT LSM_HOOK_INIT(audit_rule_init, smack_audit_rule_init), -- cgit v1.3 From 6fe12cdbcfe35ad4726a619a9546822d34fc934c Mon Sep 17 00:00:00 2001 From: Bibby Hsieh Date: Tue, 19 May 2020 15:27:29 +0800 Subject: i2c: core: support bus regulator controlling in adapter Although in the most platforms, the bus power of i2c are alway on, some platforms disable the i2c bus power in order to meet low power request. We get and enable bulk regulator in i2c adapter device. Signed-off-by: Bibby Hsieh Reviewed-by: Tomasz Figa Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-base.c | 84 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/i2c.h | 2 ++ 2 files changed, 86 insertions(+) (limited to 'include/linux') diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index a7c21d4cbd90..5be24bf8a194 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -313,12 +313,14 @@ static int i2c_smbus_host_notify_to_irq(const struct i2c_client *client) static int i2c_device_probe(struct device *dev) { struct i2c_client *client = i2c_verify_client(dev); + struct i2c_adapter *adap; struct i2c_driver *driver; int status; if (!client) return 0; + adap = client->adapter; driver = to_i2c_driver(dev->driver); client->irq = client->init_irq; @@ -378,6 +380,12 @@ static int i2c_device_probe(struct device *dev) dev_dbg(dev, "probe\n"); + status = regulator_enable(adap->bus_regulator); + if (status < 0) { + dev_err(&adap->dev, "Failed to enable power regulator\n"); + goto err_clear_wakeup_irq; + } + status = of_clk_set_defaults(dev->of_node, false); if (status < 0) goto err_clear_wakeup_irq; @@ -414,12 +422,14 @@ err_clear_wakeup_irq: static int i2c_device_remove(struct device *dev) { struct i2c_client *client = i2c_verify_client(dev); + struct i2c_adapter *adap; struct i2c_driver *driver; int status = 0; if (!client || !dev->driver) return 0; + adap = client->adapter; driver = to_i2c_driver(dev->driver); if (driver->remove) { dev_dbg(dev, "remove\n"); @@ -427,6 +437,8 @@ static int i2c_device_remove(struct device *dev) } dev_pm_domain_detach(&client->dev, true); + if (!pm_runtime_status_suspended(&client->dev)) + regulator_disable(adap->bus_regulator); dev_pm_clear_wake_irq(&client->dev); device_init_wakeup(&client->dev, false); @@ -438,6 +450,72 @@ static int i2c_device_remove(struct device *dev) return status; } +#ifdef CONFIG_PM_SLEEP +static int i2c_resume_early(struct device *dev) +{ + struct i2c_client *client = i2c_verify_client(dev); + struct i2c_adapter *adap = client->adapter; + int err; + + if (!pm_runtime_status_suspended(&client->dev)) { + err = regulator_enable(adap->bus_regulator); + if (err) + return err; + } + + return pm_generic_resume_early(&client->dev); +} + +static int i2c_suspend_late(struct device *dev) +{ + struct i2c_client *client = i2c_verify_client(dev); + struct i2c_adapter *adap = client->adapter; + int err; + + err = pm_generic_suspend_late(&client->dev); + if (err) + return err; + + if (!pm_runtime_status_suspended(&client->dev)) + return regulator_disable(adap->bus_regulator); + + return 0; +} +#endif + +#ifdef CONFIG_PM +static int i2c_runtime_resume(struct device *dev) +{ + struct i2c_client *client = i2c_verify_client(dev); + struct i2c_adapter *adap = client->adapter; + int err; + + err = regulator_enable(adap->bus_regulator); + if (err) + return err; + + return pm_generic_runtime_resume(&client->dev); +} + +static int i2c_runtime_suspend(struct device *dev) +{ + struct i2c_client *client = i2c_verify_client(dev); + struct i2c_adapter *adap = client->adapter; + int err; + + err = pm_generic_runtime_suspend(&client->dev); + if (err) + return err; + + return regulator_disable(adap->bus_regulator); +} +#endif + +static const struct dev_pm_ops i2c_device_pm = { + SET_LATE_SYSTEM_SLEEP_PM_OPS(i2c_suspend_late, i2c_resume_early) + SET_RUNTIME_PM_OPS(i2c_runtime_suspend, i2c_runtime_resume, NULL) +}; + static void i2c_device_shutdown(struct device *dev) { struct i2c_client *client = i2c_verify_client(dev); @@ -495,6 +573,7 @@ struct bus_type i2c_bus_type = { .probe = i2c_device_probe, .remove = i2c_device_remove, .shutdown = i2c_device_shutdown, + .pm = &i2c_device_pm, }; EXPORT_SYMBOL_GPL(i2c_bus_type); @@ -1333,6 +1412,11 @@ static int i2c_register_adapter(struct i2c_adapter *adap) if (res) goto out_reg; + adap->bus_regulator = devm_regulator_get(&adap->dev, "bus"); + if (IS_ERR(adap->bus_regulator)) { + res = PTR_ERR(adap->bus_regulator); + goto out_reg; + } dev_dbg(&adap->dev, "adapter [%s] registered\n", adap->name); pm_runtime_no_callbacks(&adap->dev); diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 456fc17ecb1c..bc83af0d38d1 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -15,6 +15,7 @@ #include /* for struct device */ #include /* for completion */ #include +#include #include #include /* for Host Notify IRQ */ #include /* for struct device_node */ @@ -721,6 +722,7 @@ struct i2c_adapter { const struct i2c_adapter_quirks *quirks; struct irq_domain *host_notify_domain; + struct regulator *bus_regulator; }; #define to_i2c_adapter(d) container_of(d, struct i2c_adapter, dev) -- cgit v1.3 From ca07eda33e01eafa7a26ec06974f7eacee6a89c8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 May 2020 17:30:12 -0400 Subject: SUNRPC: Refactor svc_recvfrom() This function is not currently "generic" so remove the documenting comment and rename it appropriately. Its internals are converted to use bio_vecs for reading from the transport socket. In existing typical sunrpc uses of bio_vecs, the bio_vec array is allocated dynamically. Here, instead, an array of bio_vecs is added to svc_rqst. The lifetime of this array can be greater than one call to xpo_recvfrom(): - Multiple calls to xpo_recvfrom() might be needed to read an RPC message completely. - At some later point, rq_arg.bvecs will point to this array and it will carry the received message into svc_process(). I also expect that a future optimization will remove either the rq_vec or rq_pages array in favor of rq_bvec, thus conserving the size of struct svc_rqst. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 1 + net/sunrpc/svcsock.c | 109 ++++++++++++++++++++++++++++----------------- 2 files changed, 69 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index fd390894a584..05da19a0516d 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -254,6 +254,7 @@ struct svc_rqst { struct page * *rq_page_end; /* one past the last page */ struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */ + struct bio_vec rq_bvec[RPCSVC_MAXPAGES]; __be32 rq_xid; /* transmission id */ u32 rq_prog; /* program number */ diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 5b2981a0711c..3e25511b800e 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -223,26 +223,62 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining) return len; } +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE +static void svc_flush_bvec(const struct bio_vec *bvec, size_t size, size_t seek) +{ + struct bvec_iter bi = { + .bi_size = size, + }; + struct bio_vec bv; + + bvec_iter_advance(bvec, &bi, seek & PAGE_MASK); + for_each_bvec(bv, bvec, bi, bi) + flush_dcache_page(bv.bv_page); +} +#else +static inline void svc_flush_bvec(const struct bio_vec *bvec, size_t size, + size_t seek) +{ +} +#endif + /* - * Generic recvfrom routine. + * Read from @rqstp's transport socket. The incoming message fills whole + * pages in @rqstp's rq_pages array until the last page of the message + * has been received into a partial page. */ -static ssize_t svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, - unsigned int nr, size_t buflen, unsigned int base) +static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen, + size_t seek) { struct svc_sock *svsk = container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); + struct bio_vec *bvec = rqstp->rq_bvec; struct msghdr msg = { NULL }; + unsigned int i; ssize_t len; + size_t t; rqstp->rq_xprt_hlen = 0; clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); - iov_iter_kvec(&msg.msg_iter, READ, iov, nr, buflen); - if (base != 0) { - iov_iter_advance(&msg.msg_iter, base); - buflen -= base; + + for (i = 0, t = 0; t < buflen; i++, t += PAGE_SIZE) { + bvec[i].bv_page = rqstp->rq_pages[i]; + bvec[i].bv_len = PAGE_SIZE; + bvec[i].bv_offset = 0; + } + rqstp->rq_respages = &rqstp->rq_pages[i]; + rqstp->rq_next_page = rqstp->rq_respages + 1; + + iov_iter_bvec(&msg.msg_iter, READ, bvec, i, buflen); + if (seek) { + iov_iter_advance(&msg.msg_iter, seek); + buflen -= seek; } len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT); + if (len > 0) + svc_flush_bvec(bvec, len, seek); + /* If we read a full record, then assume there may be more * data to read (stream based sockets only!) */ @@ -775,13 +811,14 @@ failed: return NULL; } -static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst *rqstp) +static size_t svc_tcp_restore_pages(struct svc_sock *svsk, + struct svc_rqst *rqstp) { - unsigned int i, len, npages; + size_t len = svsk->sk_datalen; + unsigned int i, npages; - if (svsk->sk_datalen == 0) + if (!len) return 0; - len = svsk->sk_datalen; npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < npages; i++) { if (rqstp->rq_pages[i] != NULL) @@ -917,20 +954,6 @@ unlock_eagain: return -EAGAIN; } -static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len) -{ - int i = 0; - int t = 0; - - while (t < len) { - vec[i].iov_base = page_address(pages[i]); - vec[i].iov_len = PAGE_SIZE; - i++; - t += PAGE_SIZE; - } - return i; -} - static void svc_tcp_fragment_received(struct svc_sock *svsk) { /* If we have more data, signal svc_xprt_enqueue() to try again */ @@ -938,20 +961,33 @@ static void svc_tcp_fragment_received(struct svc_sock *svsk) svsk->sk_marker = xdr_zero; } -/* - * Receive data from a TCP socket. +/** + * svc_tcp_recvfrom - Receive data from a TCP socket + * @rqstp: request structure into which to receive an RPC Call + * + * Called in a loop when XPT_DATA has been set. + * + * Read the 4-byte stream record marker, then use the record length + * in that marker to set up exactly the resources needed to receive + * the next RPC message into @rqstp. + * + * Returns: + * On success, the number of bytes in a received RPC Call, or + * %0 if a complete RPC Call message was not ready to return + * + * The zero return case handles partial receives and callback Replies. + * The state of a partial receive is preserved in the svc_sock for + * the next call to svc_tcp_recvfrom. */ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) { struct svc_sock *svsk = container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); struct svc_serv *serv = svsk->sk_xprt.xpt_server; - int len; - struct kvec *vec; - unsigned int want, base; + size_t want, base; + ssize_t len; __be32 *p; __be32 calldir; - int pnum; clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); len = svc_tcp_read_marker(svsk, rqstp); @@ -960,16 +996,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) base = svc_tcp_restore_pages(svsk, rqstp); want = len - (svsk->sk_tcplen - sizeof(rpc_fraghdr)); - - vec = rqstp->rq_vec; - - pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0], base + want); - - rqstp->rq_respages = &rqstp->rq_pages[pnum]; - rqstp->rq_next_page = rqstp->rq_respages + 1; - - /* Now receive data */ - len = svc_recvfrom(rqstp, vec, pnum, base + want, base); + len = svc_tcp_read_msg(rqstp, base + want, base); if (len >= 0) { trace_svcsock_tcp_recv(&svsk->sk_xprt, len); svsk->sk_tcplen += len; -- cgit v1.3 From 7a4e63cb0905672fd52e7316a885f19d4aeed976 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 22 May 2020 16:56:58 +0200 Subject: Revert "i2c: core: support bus regulator controlling in adapter" This reverts commit 6fe12cdbcfe35ad4726a619a9546822d34fc934c. Testing in linux-next showed it needs some more time. Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-core-base.c | 84 --------------------------------------------- include/linux/i2c.h | 2 -- 2 files changed, 86 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/i2c-core-base.c b/drivers/i2c/i2c-core-base.c index ac7edcc1d5ee..9987e72752c4 100644 --- a/drivers/i2c/i2c-core-base.c +++ b/drivers/i2c/i2c-core-base.c @@ -313,14 +313,12 @@ static int i2c_smbus_host_notify_to_irq(const struct i2c_client *client) static int i2c_device_probe(struct device *dev) { struct i2c_client *client = i2c_verify_client(dev); - struct i2c_adapter *adap; struct i2c_driver *driver; int status; if (!client) return 0; - adap = client->adapter; driver = to_i2c_driver(dev->driver); client->irq = client->init_irq; @@ -386,12 +384,6 @@ static int i2c_device_probe(struct device *dev) dev_dbg(dev, "probe\n"); - status = regulator_enable(adap->bus_regulator); - if (status < 0) { - dev_err(&adap->dev, "Failed to enable power regulator\n"); - goto err_clear_wakeup_irq; - } - status = of_clk_set_defaults(dev->of_node, false); if (status < 0) goto err_clear_wakeup_irq; @@ -432,14 +424,12 @@ put_sync_adapter: static int i2c_device_remove(struct device *dev) { struct i2c_client *client = i2c_verify_client(dev); - struct i2c_adapter *adap; struct i2c_driver *driver; int status = 0; if (!client || !dev->driver) return 0; - adap = client->adapter; driver = to_i2c_driver(dev->driver); if (driver->remove) { dev_dbg(dev, "remove\n"); @@ -447,8 +437,6 @@ static int i2c_device_remove(struct device *dev) } dev_pm_domain_detach(&client->dev, true); - if (!pm_runtime_status_suspended(&client->dev)) - regulator_disable(adap->bus_regulator); dev_pm_clear_wake_irq(&client->dev); device_init_wakeup(&client->dev, false); @@ -460,72 +448,6 @@ static int i2c_device_remove(struct device *dev) return status; } -#ifdef CONFIG_PM_SLEEP -static int i2c_resume_early(struct device *dev) -{ - struct i2c_client *client = i2c_verify_client(dev); - struct i2c_adapter *adap = client->adapter; - int err; - - if (!pm_runtime_status_suspended(&client->dev)) { - err = regulator_enable(adap->bus_regulator); - if (err) - return err; - } - - return pm_generic_resume_early(&client->dev); -} - -static int i2c_suspend_late(struct device *dev) -{ - struct i2c_client *client = i2c_verify_client(dev); - struct i2c_adapter *adap = client->adapter; - int err; - - err = pm_generic_suspend_late(&client->dev); - if (err) - return err; - - if (!pm_runtime_status_suspended(&client->dev)) - return regulator_disable(adap->bus_regulator); - - return 0; -} -#endif - -#ifdef CONFIG_PM -static int i2c_runtime_resume(struct device *dev) -{ - struct i2c_client *client = i2c_verify_client(dev); - struct i2c_adapter *adap = client->adapter; - int err; - - err = regulator_enable(adap->bus_regulator); - if (err) - return err; - - return pm_generic_runtime_resume(&client->dev); -} - -static int i2c_runtime_suspend(struct device *dev) -{ - struct i2c_client *client = i2c_verify_client(dev); - struct i2c_adapter *adap = client->adapter; - int err; - - err = pm_generic_runtime_suspend(&client->dev); - if (err) - return err; - - return regulator_disable(adap->bus_regulator); -} -#endif - -static const struct dev_pm_ops i2c_device_pm = { - SET_LATE_SYSTEM_SLEEP_PM_OPS(i2c_suspend_late, i2c_resume_early) - SET_RUNTIME_PM_OPS(i2c_runtime_suspend, i2c_runtime_resume, NULL) -}; - static void i2c_device_shutdown(struct device *dev) { struct i2c_client *client = i2c_verify_client(dev); @@ -583,7 +505,6 @@ struct bus_type i2c_bus_type = { .probe = i2c_device_probe, .remove = i2c_device_remove, .shutdown = i2c_device_shutdown, - .pm = &i2c_device_pm, }; EXPORT_SYMBOL_GPL(i2c_bus_type); @@ -1422,11 +1343,6 @@ static int i2c_register_adapter(struct i2c_adapter *adap) if (res) goto out_reg; - adap->bus_regulator = devm_regulator_get(&adap->dev, "bus"); - if (IS_ERR(adap->bus_regulator)) { - res = PTR_ERR(adap->bus_regulator); - goto out_reg; - } dev_dbg(&adap->dev, "adapter [%s] registered\n", adap->name); pm_runtime_no_callbacks(&adap->dev); diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 7433aba207bd..49d29054e657 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -15,7 +15,6 @@ #include /* for struct device */ #include /* for completion */ #include -#include #include #include /* for Host Notify IRQ */ #include /* for struct device_node */ @@ -716,7 +715,6 @@ struct i2c_adapter { const struct i2c_adapter_quirks *quirks; struct irq_domain *host_notify_domain; - struct regulator *bus_regulator; }; #define to_i2c_adapter(d) container_of(d, struct i2c_adapter, dev) -- cgit v1.3 From a4e91825d7e1252f7cba005f1451e5464b23c15d Mon Sep 17 00:00:00 2001 From: Alexander Monakov Date: Sun, 10 May 2020 20:48:40 +0000 Subject: x86/amd_nb: Add AMD family 17h model 60h PCI IDs Add PCI IDs for AMD Renoir (4000-series Ryzen CPUs). This is necessary to enable support for temperature sensors via the k10temp module. Signed-off-by: Alexander Monakov Signed-off-by: Borislav Petkov Acked-by: Yazen Ghannam Acked-by: Guenter Roeck Link: https://lkml.kernel.org/r/20200510204842.2603-2-amonakov@ispras.ru --- arch/x86/kernel/amd_nb.c | 5 +++++ include/linux/pci_ids.h | 1 + 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index b6b3297851f3..18f6b7c4bd79 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -18,9 +18,11 @@ #define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450 #define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0 #define PCI_DEVICE_ID_AMD_17H_M30H_ROOT 0x1480 +#define PCI_DEVICE_ID_AMD_17H_M60H_ROOT 0x1630 #define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464 #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F4 0x1494 +#define PCI_DEVICE_ID_AMD_17H_M60H_DF_F4 0x144c #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F4 0x1444 #define PCI_DEVICE_ID_AMD_19H_DF_F4 0x1654 @@ -33,6 +35,7 @@ static const struct pci_device_id amd_root_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_ROOT) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_ROOT) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_ROOT) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_ROOT) }, {} }; @@ -50,6 +53,7 @@ static const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F3) }, @@ -65,6 +69,7 @@ static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M30H_DF_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M60H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M70H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) }, diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 1dfc4e1dcb94..3155f5ada02e 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -550,6 +550,7 @@ #define PCI_DEVICE_ID_AMD_17H_DF_F3 0x1463 #define PCI_DEVICE_ID_AMD_17H_M10H_DF_F3 0x15eb #define PCI_DEVICE_ID_AMD_17H_M30H_DF_F3 0x1493 +#define PCI_DEVICE_ID_AMD_17H_M60H_DF_F3 0x144b #define PCI_DEVICE_ID_AMD_17H_M70H_DF_F3 0x1443 #define PCI_DEVICE_ID_AMD_19H_DF_F3 0x1653 #define PCI_DEVICE_ID_AMD_CNB17H_F3 0x1703 -- cgit v1.3 From 1f1ec622623fe8e49a1036d4127b895a0f0e2c43 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 5 May 2020 12:13:35 +0200 Subject: mtd: rawnand: Propage CS selection to sub operations Some controller using the instruction parse infrastructure might need to know which CS a specific sub-operation is targeting. Let's propagate this information. Signed-off-by: Boris Brezillon Reviewed-by: Miquel Raynal Signed-off-by: Miquel Raynal Link: https://lore.kernel.org/linux-mtd/20200505101353.1776394-2-boris.brezillon@collabora.com --- drivers/mtd/nand/raw/nand_base.c | 3 ++- include/linux/mtd/rawnand.h | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 2d2a216af120..c4d6d5482b31 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -2174,7 +2174,7 @@ static void nand_op_parser_trace(const struct nand_op_parser_ctx *ctx) char *prefix = " "; unsigned int i; - pr_debug("executing subop:\n"); + pr_debug("executing subop (CS%d):\n", ctx->subop.cs); for (i = 0; i < ctx->ninstrs; i++) { instr = &ctx->instrs[i]; @@ -2238,6 +2238,7 @@ int nand_op_parser_exec_op(struct nand_chip *chip, const struct nand_operation *op, bool check_only) { struct nand_op_parser_ctx ctx = { + .subop.cs = op->cs, .subop.instrs = op->instrs, .instrs = op->instrs, .ninstrs = op->ninstrs, diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 0f45b6984ad1..a3bac8824937 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -709,6 +709,7 @@ struct nand_op_instr { /** * struct nand_subop - a sub operation + * @cs: the CS line to select for this NAND sub-operation * @instrs: array of instructions * @ninstrs: length of the @instrs array * @first_instr_start_off: offset to start from for the first instruction @@ -724,6 +725,7 @@ struct nand_op_instr { * controller driver. */ struct nand_subop { + unsigned int cs; const struct nand_op_instr *instrs; unsigned int ninstrs; unsigned int first_instr_start_off; -- cgit v1.3 From c8ae3f744ddca0da164bcacee42d1d4b6fe7027d Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 19 May 2020 09:45:42 +0200 Subject: lib/bch: Rework a little bit the exported function names There are four exported functions, all suffixed by _bch, which is clearly not the norm. Let's rename them by prefixing them with bch_ instead. This is a mechanical change: init_bch -> bch_init free_bch -> bch_free encode_bch -> bch_encode decode_bch -> bch_decode Signed-off-by: Miquel Raynal Reviewed-by: Boris Brezillon Link: https://lore.kernel.org/linux-mtd/20200519074549.23673-2-miquel.raynal@bootlin.com --- drivers/mtd/devices/docg3.c | 10 +++---- drivers/mtd/nand/raw/nand_bch.c | 10 +++---- include/linux/bch.h | 8 +++--- lib/bch.c | 64 ++++++++++++++++++++--------------------- 4 files changed, 46 insertions(+), 46 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/devices/docg3.c b/drivers/mtd/devices/docg3.c index eb0f4600efd1..799df8d03357 100644 --- a/drivers/mtd/devices/docg3.c +++ b/drivers/mtd/devices/docg3.c @@ -647,7 +647,7 @@ static int doc_ecc_bch_fix_data(struct docg3 *docg3, void *buf, u8 *hwecc) for (i = 0; i < DOC_ECC_BCH_SIZE; i++) ecc[i] = bitrev8(hwecc[i]); - numerrs = decode_bch(docg3->cascade->bch, NULL, + numerrs = bch_decode(docg3->cascade->bch, NULL, DOC_ECC_BCH_COVERED_BYTES, NULL, ecc, NULL, errorpos); BUG_ON(numerrs == -EINVAL); @@ -1984,8 +1984,8 @@ static int __init docg3_probe(struct platform_device *pdev) return ret; cascade->base = base; mutex_init(&cascade->lock); - cascade->bch = init_bch(DOC_ECC_BCH_M, DOC_ECC_BCH_T, - DOC_ECC_BCH_PRIMPOLY); + cascade->bch = bch_init(DOC_ECC_BCH_M, DOC_ECC_BCH_T, + DOC_ECC_BCH_PRIMPOLY); if (!cascade->bch) return ret; @@ -2021,7 +2021,7 @@ notfound: ret = -ENODEV; dev_info(dev, "No supported DiskOnChip found\n"); err_probe: - free_bch(cascade->bch); + bch_free(cascade->bch); for (floor = 0; floor < DOC_MAX_NBFLOORS; floor++) if (cascade->floors[floor]) doc_release_device(cascade->floors[floor]); @@ -2045,7 +2045,7 @@ static int docg3_release(struct platform_device *pdev) if (cascade->floors[floor]) doc_release_device(cascade->floors[floor]); - free_bch(docg3->cascade->bch); + bch_free(docg3->cascade->bch); return 0; } diff --git a/drivers/mtd/nand/raw/nand_bch.c b/drivers/mtd/nand/raw/nand_bch.c index 17527310c3a1..d95fcc7358e9 100644 --- a/drivers/mtd/nand/raw/nand_bch.c +++ b/drivers/mtd/nand/raw/nand_bch.c @@ -41,7 +41,7 @@ int nand_bch_calculate_ecc(struct nand_chip *chip, const unsigned char *buf, unsigned int i; memset(code, 0, chip->ecc.bytes); - encode_bch(nbc->bch, buf, chip->ecc.size, code); + bch_encode(nbc->bch, buf, chip->ecc.size, code); /* apply mask so that an erased page is a valid codeword */ for (i = 0; i < chip->ecc.bytes; i++) @@ -67,7 +67,7 @@ int nand_bch_correct_data(struct nand_chip *chip, unsigned char *buf, unsigned int *errloc = nbc->errloc; int i, count; - count = decode_bch(nbc->bch, NULL, chip->ecc.size, read_ecc, calc_ecc, + count = bch_decode(nbc->bch, NULL, chip->ecc.size, read_ecc, calc_ecc, NULL, errloc); if (count > 0) { for (i = 0; i < count; i++) { @@ -130,7 +130,7 @@ struct nand_bch_control *nand_bch_init(struct mtd_info *mtd) if (!nbc) goto fail; - nbc->bch = init_bch(m, t, 0); + nbc->bch = bch_init(m, t, 0); if (!nbc->bch) goto fail; @@ -182,7 +182,7 @@ struct nand_bch_control *nand_bch_init(struct mtd_info *mtd) goto fail; memset(erased_page, 0xff, eccsize); - encode_bch(nbc->bch, erased_page, eccsize, nbc->eccmask); + bch_encode(nbc->bch, erased_page, eccsize, nbc->eccmask); kfree(erased_page); for (i = 0; i < eccbytes; i++) @@ -205,7 +205,7 @@ EXPORT_SYMBOL(nand_bch_init); void nand_bch_free(struct nand_bch_control *nbc) { if (nbc) { - free_bch(nbc->bch); + bch_free(nbc->bch); kfree(nbc->errloc); kfree(nbc->eccmask); kfree(nbc); diff --git a/include/linux/bch.h b/include/linux/bch.h index aa765af85c38..9c35e7cd5890 100644 --- a/include/linux/bch.h +++ b/include/linux/bch.h @@ -53,14 +53,14 @@ struct bch_control { struct gf_poly *poly_2t[4]; }; -struct bch_control *init_bch(int m, int t, unsigned int prim_poly); +struct bch_control *bch_init(int m, int t, unsigned int prim_poly); -void free_bch(struct bch_control *bch); +void bch_free(struct bch_control *bch); -void encode_bch(struct bch_control *bch, const uint8_t *data, +void bch_encode(struct bch_control *bch, const uint8_t *data, unsigned int len, uint8_t *ecc); -int decode_bch(struct bch_control *bch, const uint8_t *data, unsigned int len, +int bch_decode(struct bch_control *bch, const uint8_t *data, unsigned int len, const uint8_t *recv_ecc, const uint8_t *calc_ecc, const unsigned int *syn, unsigned int *errloc); diff --git a/lib/bch.c b/lib/bch.c index 052d3fb753a0..1091841ac716 100644 --- a/lib/bch.c +++ b/lib/bch.c @@ -23,15 +23,15 @@ * This library provides runtime configurable encoding/decoding of binary * Bose-Chaudhuri-Hocquenghem (BCH) codes. * - * Call init_bch to get a pointer to a newly allocated bch_control structure for + * Call bch_init to get a pointer to a newly allocated bch_control structure for * the given m (Galois field order), t (error correction capability) and * (optional) primitive polynomial parameters. * - * Call encode_bch to compute and store ecc parity bytes to a given buffer. - * Call decode_bch to detect and locate errors in received data. + * Call bch_encode to compute and store ecc parity bytes to a given buffer. + * Call bch_decode to detect and locate errors in received data. * * On systems supporting hw BCH features, intermediate results may be provided - * to decode_bch in order to skip certain steps. See decode_bch() documentation + * to bch_decode in order to skip certain steps. See bch_decode() documentation * for details. * * Option CONFIG_BCH_CONST_PARAMS can be used to force fixed values of @@ -115,9 +115,9 @@ struct gf_poly_deg1 { }; /* - * same as encode_bch(), but process input data one byte at a time + * same as bch_encode(), but process input data one byte at a time */ -static void encode_bch_unaligned(struct bch_control *bch, +static void bch_encode_unaligned(struct bch_control *bch, const unsigned char *data, unsigned int len, uint32_t *ecc) { @@ -174,7 +174,7 @@ static void store_ecc8(struct bch_control *bch, uint8_t *dst, } /** - * encode_bch - calculate BCH ecc parity of data + * bch_encode - calculate BCH ecc parity of data * @bch: BCH control structure * @data: data to encode * @len: data length in bytes @@ -187,7 +187,7 @@ static void store_ecc8(struct bch_control *bch, uint8_t *dst, * The exact number of computed ecc parity bits is given by member @ecc_bits of * @bch; it may be less than m*t for large values of t. */ -void encode_bch(struct bch_control *bch, const uint8_t *data, +void bch_encode(struct bch_control *bch, const uint8_t *data, unsigned int len, uint8_t *ecc) { const unsigned int l = BCH_ECC_WORDS(bch)-1; @@ -215,7 +215,7 @@ void encode_bch(struct bch_control *bch, const uint8_t *data, m = ((unsigned long)data) & 3; if (m) { mlen = (len < (4-m)) ? len : 4-m; - encode_bch_unaligned(bch, data, mlen, bch->ecc_buf); + bch_encode_unaligned(bch, data, mlen, bch->ecc_buf); data += mlen; len -= mlen; } @@ -255,13 +255,13 @@ void encode_bch(struct bch_control *bch, const uint8_t *data, /* process last unaligned bytes */ if (len) - encode_bch_unaligned(bch, data, len, bch->ecc_buf); + bch_encode_unaligned(bch, data, len, bch->ecc_buf); /* store ecc parity bytes into original parity buffer */ if (ecc) store_ecc8(bch, ecc, bch->ecc_buf); } -EXPORT_SYMBOL_GPL(encode_bch); +EXPORT_SYMBOL_GPL(bch_encode); static inline int modulo(struct bch_control *bch, unsigned int v) { @@ -952,7 +952,7 @@ static int chien_search(struct bch_control *bch, unsigned int len, #endif /* USE_CHIEN_SEARCH */ /** - * decode_bch - decode received codeword and find bit error locations + * bch_decode - decode received codeword and find bit error locations * @bch: BCH control structure * @data: received data, ignored if @calc_ecc is provided * @len: data length in bytes, must always be provided @@ -966,22 +966,22 @@ static int chien_search(struct bch_control *bch, unsigned int len, * invalid parameters were provided * * Depending on the available hw BCH support and the need to compute @calc_ecc - * separately (using encode_bch()), this function should be called with one of + * separately (using bch_encode()), this function should be called with one of * the following parameter configurations - * * by providing @data and @recv_ecc only: - * decode_bch(@bch, @data, @len, @recv_ecc, NULL, NULL, @errloc) + * bch_decode(@bch, @data, @len, @recv_ecc, NULL, NULL, @errloc) * * by providing @recv_ecc and @calc_ecc: - * decode_bch(@bch, NULL, @len, @recv_ecc, @calc_ecc, NULL, @errloc) + * bch_decode(@bch, NULL, @len, @recv_ecc, @calc_ecc, NULL, @errloc) * * by providing ecc = recv_ecc XOR calc_ecc: - * decode_bch(@bch, NULL, @len, NULL, ecc, NULL, @errloc) + * bch_decode(@bch, NULL, @len, NULL, ecc, NULL, @errloc) * * by providing syndrome results @syn: - * decode_bch(@bch, NULL, @len, NULL, NULL, @syn, @errloc) + * bch_decode(@bch, NULL, @len, NULL, NULL, @syn, @errloc) * - * Once decode_bch() has successfully returned with a positive value, error + * Once bch_decode() has successfully returned with a positive value, error * locations returned in array @errloc should be interpreted as follows - * * if (errloc[n] >= 8*len), then n-th error is located in ecc (no need for @@ -993,7 +993,7 @@ static int chien_search(struct bch_control *bch, unsigned int len, * Note that this function does not perform any data correction by itself, it * merely indicates error locations. */ -int decode_bch(struct bch_control *bch, const uint8_t *data, unsigned int len, +int bch_decode(struct bch_control *bch, const uint8_t *data, unsigned int len, const uint8_t *recv_ecc, const uint8_t *calc_ecc, const unsigned int *syn, unsigned int *errloc) { @@ -1012,7 +1012,7 @@ int decode_bch(struct bch_control *bch, const uint8_t *data, unsigned int len, /* compute received data ecc into an internal buffer */ if (!data || !recv_ecc) return -EINVAL; - encode_bch(bch, data, len, NULL); + bch_encode(bch, data, len, NULL); } else { /* load provided calculated ecc */ load_ecc8(bch, bch->ecc_buf, calc_ecc); @@ -1053,7 +1053,7 @@ int decode_bch(struct bch_control *bch, const uint8_t *data, unsigned int len, } return (err >= 0) ? err : -EBADMSG; } -EXPORT_SYMBOL_GPL(decode_bch); +EXPORT_SYMBOL_GPL(bch_decode); /* * generate Galois field lookup tables @@ -1236,7 +1236,7 @@ finish: } /** - * init_bch - initialize a BCH encoder/decoder + * bch_init - initialize a BCH encoder/decoder * @m: Galois field order, should be in the range 5-15 * @t: maximum error correction capability, in bits * @prim_poly: user-provided primitive polynomial (or 0 to use default) @@ -1246,17 +1246,17 @@ finish: * * This initialization can take some time, as lookup tables are built for fast * encoding/decoding; make sure not to call this function from a time critical - * path. Usually, init_bch() should be called on module/driver init and - * free_bch() should be called to release memory on exit. + * path. Usually, bch_init() should be called on module/driver init and + * bch_free() should be called to release memory on exit. * * You may provide your own primitive polynomial of degree @m in argument - * @prim_poly, or let init_bch() use its default polynomial. + * @prim_poly, or let bch_init() use its default polynomial. * - * Once init_bch() has successfully returned a pointer to a newly allocated + * Once bch_init() has successfully returned a pointer to a newly allocated * BCH control structure, ecc length in bytes is given by member @ecc_bytes of * the structure. */ -struct bch_control *init_bch(int m, int t, unsigned int prim_poly) +struct bch_control *bch_init(int m, int t, unsigned int prim_poly) { int err = 0; unsigned int i, words; @@ -1347,16 +1347,16 @@ struct bch_control *init_bch(int m, int t, unsigned int prim_poly) return bch; fail: - free_bch(bch); + bch_free(bch); return NULL; } -EXPORT_SYMBOL_GPL(init_bch); +EXPORT_SYMBOL_GPL(bch_init); /** - * free_bch - free the BCH control structure + * bch_free - free the BCH control structure * @bch: BCH control structure to release */ -void free_bch(struct bch_control *bch) +void bch_free(struct bch_control *bch) { unsigned int i; @@ -1377,7 +1377,7 @@ void free_bch(struct bch_control *bch) kfree(bch); } } -EXPORT_SYMBOL_GPL(free_bch); +EXPORT_SYMBOL_GPL(bch_free); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Ivan Djelic "); -- cgit v1.3 From 1759279ad138cb0a903224a89f4bf40f69c417e8 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 19 May 2020 09:45:43 +0200 Subject: lib/bch: Allow easy bit swapping It seems that several hardware ECC engine use a swapped representation of bytes compared to software. This might having to do with how the ECC engine is wired to the NAND controller or the order the bits are passed to the hardware BCH logic. This means that when the software BCH engine is working in conjunction with data generated with hardware, sometimes we might need to swap the bits inside bytes, eg: 0x0A = b0000_1010 -> b0101_0000 = 0x50 Make it possible by adding a boolean to the BCH initialization routine. Regarding the implementation itself, this is a rather simple approach that can probably be enhanced in the future by preparing the ->a_{mod,pow}_tab tables with the swapping in mind. Suggested-by: Boris Brezillon Signed-off-by: Miquel Raynal Reviewed-by: Boris Brezillon Link: https://lore.kernel.org/linux-mtd/20200519074549.23673-3-miquel.raynal@bootlin.com --- drivers/mtd/devices/docg3.c | 2 +- drivers/mtd/nand/raw/nand_bch.c | 2 +- include/linux/bch.h | 5 ++- lib/bch.c | 90 ++++++++++++++++++++++++++++++++++------- 4 files changed, 82 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/devices/docg3.c b/drivers/mtd/devices/docg3.c index 799df8d03357..a030792115bc 100644 --- a/drivers/mtd/devices/docg3.c +++ b/drivers/mtd/devices/docg3.c @@ -1985,7 +1985,7 @@ static int __init docg3_probe(struct platform_device *pdev) cascade->base = base; mutex_init(&cascade->lock); cascade->bch = bch_init(DOC_ECC_BCH_M, DOC_ECC_BCH_T, - DOC_ECC_BCH_PRIMPOLY); + DOC_ECC_BCH_PRIMPOLY, false); if (!cascade->bch) return ret; diff --git a/drivers/mtd/nand/raw/nand_bch.c b/drivers/mtd/nand/raw/nand_bch.c index d95fcc7358e9..d5af8c5fd02f 100644 --- a/drivers/mtd/nand/raw/nand_bch.c +++ b/drivers/mtd/nand/raw/nand_bch.c @@ -130,7 +130,7 @@ struct nand_bch_control *nand_bch_init(struct mtd_info *mtd) if (!nbc) goto fail; - nbc->bch = bch_init(m, t, 0); + nbc->bch = bch_init(m, t, 0, false); if (!nbc->bch) goto fail; diff --git a/include/linux/bch.h b/include/linux/bch.h index 9c35e7cd5890..85fdce83d4e2 100644 --- a/include/linux/bch.h +++ b/include/linux/bch.h @@ -33,6 +33,7 @@ * @cache: log-based polynomial representation buffer * @elp: error locator polynomial * @poly_2t: temporary polynomials of degree 2t + * @swap_bits: swap bits within data and syndrome bytes */ struct bch_control { unsigned int m; @@ -51,9 +52,11 @@ struct bch_control { int *cache; struct gf_poly *elp; struct gf_poly *poly_2t[4]; + bool swap_bits; }; -struct bch_control *bch_init(int m, int t, unsigned int prim_poly); +struct bch_control *bch_init(int m, int t, unsigned int prim_poly, + bool swap_bits); void bch_free(struct bch_control *bch); diff --git a/lib/bch.c b/lib/bch.c index 1091841ac716..7c031ee8b93b 100644 --- a/lib/bch.c +++ b/lib/bch.c @@ -114,6 +114,49 @@ struct gf_poly_deg1 { unsigned int c[2]; }; +static u8 swap_bits_table[] = { + 0x00, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0, + 0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0, + 0x08, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8, + 0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8, + 0x04, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4, + 0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4, + 0x0c, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec, + 0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc, + 0x02, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2, + 0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2, + 0x0a, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea, + 0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa, + 0x06, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6, + 0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6, + 0x0e, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee, + 0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe, + 0x01, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1, + 0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1, + 0x09, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9, + 0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9, + 0x05, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5, + 0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5, + 0x0d, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed, + 0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd, + 0x03, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3, + 0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3, + 0x0b, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb, + 0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb, + 0x07, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7, + 0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7, + 0x0f, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef, + 0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff, +}; + +static u8 swap_bits(struct bch_control *bch, u8 in) +{ + if (!bch->swap_bits) + return in; + + return swap_bits_table[in]; +} + /* * same as bch_encode(), but process input data one byte at a time */ @@ -126,7 +169,9 @@ static void bch_encode_unaligned(struct bch_control *bch, const int l = BCH_ECC_WORDS(bch)-1; while (len--) { - p = bch->mod8_tab + (l+1)*(((ecc[0] >> 24)^(*data++)) & 0xff); + u8 tmp = swap_bits(bch, *data++); + + p = bch->mod8_tab + (l+1)*(((ecc[0] >> 24)^(tmp)) & 0xff); for (i = 0; i < l; i++) ecc[i] = ((ecc[i] << 8)|(ecc[i+1] >> 24))^(*p++); @@ -145,10 +190,16 @@ static void load_ecc8(struct bch_control *bch, uint32_t *dst, unsigned int i, nwords = BCH_ECC_WORDS(bch)-1; for (i = 0; i < nwords; i++, src += 4) - dst[i] = (src[0] << 24)|(src[1] << 16)|(src[2] << 8)|src[3]; + dst[i] = ((u32)swap_bits(bch, src[0]) << 24) | + ((u32)swap_bits(bch, src[1]) << 16) | + ((u32)swap_bits(bch, src[2]) << 8) | + swap_bits(bch, src[3]); memcpy(pad, src, BCH_ECC_BYTES(bch)-4*nwords); - dst[nwords] = (pad[0] << 24)|(pad[1] << 16)|(pad[2] << 8)|pad[3]; + dst[nwords] = ((u32)swap_bits(bch, pad[0]) << 24) | + ((u32)swap_bits(bch, pad[1]) << 16) | + ((u32)swap_bits(bch, pad[2]) << 8) | + swap_bits(bch, pad[3]); } /* @@ -161,15 +212,15 @@ static void store_ecc8(struct bch_control *bch, uint8_t *dst, unsigned int i, nwords = BCH_ECC_WORDS(bch)-1; for (i = 0; i < nwords; i++) { - *dst++ = (src[i] >> 24); - *dst++ = (src[i] >> 16) & 0xff; - *dst++ = (src[i] >> 8) & 0xff; - *dst++ = (src[i] >> 0) & 0xff; + *dst++ = swap_bits(bch, src[i] >> 24); + *dst++ = swap_bits(bch, src[i] >> 16); + *dst++ = swap_bits(bch, src[i] >> 8); + *dst++ = swap_bits(bch, src[i]); } - pad[0] = (src[nwords] >> 24); - pad[1] = (src[nwords] >> 16) & 0xff; - pad[2] = (src[nwords] >> 8) & 0xff; - pad[3] = (src[nwords] >> 0) & 0xff; + pad[0] = swap_bits(bch, src[nwords] >> 24); + pad[1] = swap_bits(bch, src[nwords] >> 16); + pad[2] = swap_bits(bch, src[nwords] >> 8); + pad[3] = swap_bits(bch, src[nwords]); memcpy(dst, pad, BCH_ECC_BYTES(bch)-4*nwords); } @@ -240,7 +291,13 @@ void bch_encode(struct bch_control *bch, const uint8_t *data, */ while (mlen--) { /* input data is read in big-endian format */ - w = r[0]^cpu_to_be32(*pdata++); + w = cpu_to_be32(*pdata++); + if (bch->swap_bits) + w = (u32)swap_bits(bch, w) | + ((u32)swap_bits(bch, w >> 8) << 8) | + ((u32)swap_bits(bch, w >> 16) << 16) | + ((u32)swap_bits(bch, w >> 24) << 24); + w ^= r[0]; p0 = tab0 + (l+1)*((w >> 0) & 0xff); p1 = tab1 + (l+1)*((w >> 8) & 0xff); p2 = tab2 + (l+1)*((w >> 16) & 0xff); @@ -1048,7 +1105,9 @@ int bch_decode(struct bch_control *bch, const uint8_t *data, unsigned int len, break; } errloc[i] = nbits-1-errloc[i]; - errloc[i] = (errloc[i] & ~7)|(7-(errloc[i] & 7)); + if (!bch->swap_bits) + errloc[i] = (errloc[i] & ~7) | + (7-(errloc[i] & 7)); } } return (err >= 0) ? err : -EBADMSG; @@ -1240,6 +1299,7 @@ finish: * @m: Galois field order, should be in the range 5-15 * @t: maximum error correction capability, in bits * @prim_poly: user-provided primitive polynomial (or 0 to use default) + * @swap_bits: swap bits within data and syndrome bytes * * Returns: * a newly allocated BCH control structure if successful, NULL otherwise @@ -1256,7 +1316,8 @@ finish: * BCH control structure, ecc length in bytes is given by member @ecc_bytes of * the structure. */ -struct bch_control *bch_init(int m, int t, unsigned int prim_poly) +struct bch_control *bch_init(int m, int t, unsigned int prim_poly, + bool swap_bits) { int err = 0; unsigned int i, words; @@ -1321,6 +1382,7 @@ struct bch_control *bch_init(int m, int t, unsigned int prim_poly) bch->syn = bch_alloc(2*t*sizeof(*bch->syn), &err); bch->cache = bch_alloc(2*t*sizeof(*bch->cache), &err); bch->elp = bch_alloc((t+1)*sizeof(struct gf_poly_deg1), &err); + bch->swap_bits = swap_bits; for (i = 0; i < ARRAY_SIZE(bch->poly_2t); i++) bch->poly_2t[i] = bch_alloc(GF_POLY_SZ(2*t), &err); -- cgit v1.3 From d7904619ea0636411f40fb1f34057288c0783ecf Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 19 May 2020 09:45:45 +0200 Subject: mtd: rawnand: Add nand_extract_bits() There are cases where ECC bytes are not byte-aligned. Indeed, BCH implies using a number of ECC bits, which are not always a multiple of 8. We then need a helper like nand_extract_bits() to extract these syndromes from a buffer. Signed-off-by: Miquel Raynal Reviewed-by: Boris Brezillon Link: https://lore.kernel.org/linux-mtd/20200519074549.23673-5-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/nand_base.c | 44 ++++++++++++++++++++++++++++++++++++++++ include/linux/mtd/rawnand.h | 4 ++++ 2 files changed, 48 insertions(+) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 475647ea6948..6a6a0a36b3fd 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -274,6 +274,50 @@ static int check_offs_len(struct nand_chip *chip, loff_t ofs, uint64_t len) return ret; } +/** + * nand_extract_bits - Copy unaligned bits from one buffer to another one + * @dst: destination buffer + * @dst_off: bit offset at which the writing starts + * @src: source buffer + * @src_off: bit offset at which the reading starts + * @nbits: number of bits to copy from @src to @dst + * + * Copy bits from one memory region to another (overlap authorized). + */ +void nand_extract_bits(u8 *dst, unsigned int dst_off, const u8 *src, + unsigned int src_off, unsigned int nbits) +{ + unsigned int tmp, n; + + dst += dst_off / 8; + dst_off %= 8; + src += src_off / 8; + src_off %= 8; + + while (nbits) { + n = min3(8 - dst_off, 8 - src_off, nbits); + + tmp = (*src >> src_off) & GENMASK(n - 1, 0); + *dst &= ~GENMASK(n - 1 + dst_off, dst_off); + *dst |= tmp << dst_off; + + dst_off += n; + if (dst_off >= 8) { + dst++; + dst_off -= 8; + } + + src_off += n; + if (src_off >= 8) { + src++; + src_off -= 8; + } + + nbits -= n; + } +} +EXPORT_SYMBOL_GPL(nand_extract_bits); + /** * nand_select_target() - Select a NAND target (A.K.A. die) * @chip: NAND chip object diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index a3bac8824937..2804c13e5662 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1414,6 +1414,10 @@ int nand_gpio_waitrdy(struct nand_chip *chip, struct gpio_desc *gpiod, void nand_select_target(struct nand_chip *chip, unsigned int cs); void nand_deselect_target(struct nand_chip *chip); +/* Bitops */ +void nand_extract_bits(u8 *dst, unsigned int dst_off, const u8 *src, + unsigned int src_off, unsigned int nbits); + /** * nand_get_data_buf() - Get the internal page buffer * @chip: NAND chip object -- cgit v1.3 From 11399346ac39a26ade2a90303d38ad318163c665 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 May 2020 14:00:33 -0500 Subject: mtd: Replace zero-length array with flexible-array The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] sizeof(flexible-array-member) triggers a warning because flexible array members have incomplete type[1]. There are some instances of code in which the sizeof operator is being incorrectly/erroneously applied to zero-length arrays and the result is zero. Such instances may be hiding some bugs. So, this work (flexible-array member conversions) will also help to get completely rid of those sorts of issues. This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Acked-by: Miquel Raynal Signed-off-by: Vignesh Raghavendra Link: https://lore.kernel.org/r/20200507190033.GA15215@embeddedor --- include/linux/mtd/cfi.h | 6 +++--- include/linux/mtd/qinfo.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h index c98a21108688..fd1ecb821106 100644 --- a/include/linux/mtd/cfi.h +++ b/include/linux/mtd/cfi.h @@ -138,7 +138,7 @@ struct cfi_ident { uint16_t InterfaceDesc; uint16_t MaxBufWriteSize; uint8_t NumEraseRegions; - uint32_t EraseRegionInfo[0]; /* Not host ordered */ + uint32_t EraseRegionInfo[]; /* Not host ordered */ } __packed; /* Extended Query Structure for both PRI and ALT */ @@ -165,7 +165,7 @@ struct cfi_pri_intelext { uint16_t ProtRegAddr; uint8_t FactProtRegSize; uint8_t UserProtRegSize; - uint8_t extra[0]; + uint8_t extra[]; } __packed; struct cfi_intelext_otpinfo { @@ -286,7 +286,7 @@ struct cfi_private { map_word sector_erase_cmd; unsigned long chipshift; /* Because they're of the same type */ const char *im_name; /* inter_module name for cmdset_setup */ - struct flchip chips[0]; /* per-chip data structure for each chip */ + struct flchip chips[]; /* per-chip data structure for each chip */ }; uint32_t cfi_build_cmd_addr(uint32_t cmd_ofs, diff --git a/include/linux/mtd/qinfo.h b/include/linux/mtd/qinfo.h index df5b9fddea16..2e3f43788d48 100644 --- a/include/linux/mtd/qinfo.h +++ b/include/linux/mtd/qinfo.h @@ -24,7 +24,7 @@ struct lpddr_private { struct qinfo_chip *qinfo; int numchips; unsigned long chipshift; - struct flchip chips[0]; + struct flchip chips[]; }; /* qinfo_query_info structure contains request information for -- cgit v1.3 From d04659db7b8089531ccbee364c52909fe4670408 Mon Sep 17 00:00:00 2001 From: Zheng Bin Date: Fri, 24 Apr 2020 16:17:23 +0800 Subject: nfs4: Remove unneeded semicolon Fixes coccicheck warning: include/linux/nfs4.h:298:2-3: Unneeded semicolon Reported-by: Hulk Robot Signed-off-by: Zheng Bin Signed-off-by: Anna Schumaker --- include/linux/nfs4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 82d8fb422092..4a51db7363a5 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -295,7 +295,7 @@ static inline bool seqid_mutating_err(u32 err) case NFS4ERR_NOFILEHANDLE: case NFS4ERR_MOVED: return false; - }; + } return true; } -- cgit v1.3 From 1ad8dd939a9826ff6f8c69ac13e4e1dbba076703 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Thu, 7 May 2020 14:02:23 -0500 Subject: NFS: Replace zero-length array with flexible-array The current codebase makes use of the zero-length array language extension to the C90 standard, but the preferred mechanism to declare variable-length types such as these ones is a flexible array member[1][2], introduced in C99: struct foo { int stuff; struct boo array[]; }; By making use of the mechanism above, we will get a compiler warning in case the flexible array does not occur last in the structure, which will help us prevent some kind of undefined behavior bugs from being inadvertently introduced[3] to the codebase from now on. Also, notice that, dynamic memory allocations won't be affected by this change: "Flexible array members have incomplete type, and so the sizeof operator may not be applied. As a quirk of the original implementation of zero-length arrays, sizeof evaluates to zero."[1] sizeof(flexible-array-member) triggers a warning because flexible array members have incomplete type[1]. There are some instances of code in which the sizeof operator is being incorrectly/erroneously applied to zero-length arrays and the result is zero. Such instances may be hiding some bugs. So, this work (flexible-array member conversions) will also help to get completely rid of those sorts of issues. This issue was found with the help of Coccinelle. [1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html [2] https://github.com/KSPP/linux/issues/21 [3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour") Signed-off-by: Gustavo A. R. Silva Signed-off-by: Anna Schumaker --- include/linux/nfs4.h | 2 +- include/linux/nfs_xdr.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 4a51db7363a5..4dba3c948932 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -38,7 +38,7 @@ struct nfs4_ace { struct nfs4_acl { uint32_t naces; - struct nfs4_ace aces[0]; + struct nfs4_ace aces[]; }; #define NFS4_MAXLABELLEN 2048 diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index e5f3e7d8d3d5..5fd0a9ef425f 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1227,7 +1227,7 @@ struct nfs4_secinfo4 { struct nfs4_secinfo_flavors { unsigned int num_flavors; - struct nfs4_secinfo4 flavors[0]; + struct nfs4_secinfo4 flavors[]; }; struct nfs4_secinfo_arg { -- cgit v1.3 From 1ac71ec0130cce5bed3ec11ffc88651097a24173 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 28 Apr 2020 08:47:43 +0000 Subject: mtd: spi-nor: Fix SPI NOR acronym The correct terminology is serial NOR flash or SPI NOR. s/SPI-NOR/SPI NOR and s/spi-nor/SPI NOR across the subsystem. Signed-off-by: Tudor Ambarus Reviewed-by: Sergei Shtylyov --- drivers/mtd/spi-nor/Kconfig | 4 ++-- drivers/mtd/spi-nor/controllers/Kconfig | 4 ++-- drivers/mtd/spi-nor/controllers/aspeed-smc.c | 2 +- drivers/mtd/spi-nor/controllers/hisi-sfc.c | 2 +- drivers/mtd/spi-nor/controllers/nxp-spifi.c | 2 +- drivers/mtd/spi-nor/core.c | 4 ++-- include/linux/mtd/spi-nor.h | 8 ++++---- 7 files changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/spi-nor/Kconfig b/drivers/mtd/spi-nor/Kconfig index 6e816eafb312..ffc4b380f2b1 100644 --- a/drivers/mtd/spi-nor/Kconfig +++ b/drivers/mtd/spi-nor/Kconfig @@ -1,12 +1,12 @@ # SPDX-License-Identifier: GPL-2.0-only menuconfig MTD_SPI_NOR - tristate "SPI-NOR device support" + tristate "SPI NOR device support" depends on MTD depends on MTD && SPI_MASTER select SPI_MEM help This is the framework for the SPI NOR which can be used by the SPI - device drivers and the SPI-NOR device driver. + device drivers and the SPI NOR device driver. if MTD_SPI_NOR diff --git a/drivers/mtd/spi-nor/controllers/Kconfig b/drivers/mtd/spi-nor/controllers/Kconfig index 10b86660b821..d89a5ea9446a 100644 --- a/drivers/mtd/spi-nor/controllers/Kconfig +++ b/drivers/mtd/spi-nor/controllers/Kconfig @@ -21,11 +21,11 @@ config SPI_CADENCE_QUADSPI Flash as an MTD device. config SPI_HISI_SFC - tristate "Hisilicon FMC SPI-NOR Flash Controller(SFC)" + tristate "Hisilicon FMC SPI NOR Flash Controller(SFC)" depends on ARCH_HISI || COMPILE_TEST depends on HAS_IOMEM help - This enables support for HiSilicon FMC SPI-NOR flash controller. + This enables support for HiSilicon FMC SPI NOR flash controller. config SPI_NXP_SPIFI tristate "NXP SPI Flash Interface (SPIFI)" diff --git a/drivers/mtd/spi-nor/controllers/aspeed-smc.c b/drivers/mtd/spi-nor/controllers/aspeed-smc.c index ae85e4c0e114..7225870e8b18 100644 --- a/drivers/mtd/spi-nor/controllers/aspeed-smc.c +++ b/drivers/mtd/spi-nor/controllers/aspeed-smc.c @@ -727,7 +727,7 @@ static int aspeed_smc_chip_setup_finish(struct aspeed_smc_chip *chip) /* * TODO: Adjust clocks if fast read is supported and interpret - * SPI-NOR flags to adjust controller settings. + * SPI NOR flags to adjust controller settings. */ if (chip->nor.read_proto == SNOR_PROTO_1_1_1) { if (chip->nor.read_dummy == 0) diff --git a/drivers/mtd/spi-nor/controllers/hisi-sfc.c b/drivers/mtd/spi-nor/controllers/hisi-sfc.c index 6c7a4118752e..95c502173cbd 100644 --- a/drivers/mtd/spi-nor/controllers/hisi-sfc.c +++ b/drivers/mtd/spi-nor/controllers/hisi-sfc.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-or-later /* - * HiSilicon FMC SPI-NOR flash controller driver + * HiSilicon FMC SPI NOR flash controller driver * * Copyright (c) 2015-2016 HiSilicon Technologies Co., Ltd. */ diff --git a/drivers/mtd/spi-nor/controllers/nxp-spifi.c b/drivers/mtd/spi-nor/controllers/nxp-spifi.c index 9a5b1a7c636a..5703e8313980 100644 --- a/drivers/mtd/spi-nor/controllers/nxp-spifi.c +++ b/drivers/mtd/spi-nor/controllers/nxp-spifi.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * SPI-NOR driver for NXP SPI Flash Interface (SPIFI) + * SPI NOR driver for NXP SPI Flash Interface (SPIFI) * * Copyright (C) 2015 Joachim Eastwood * diff --git a/drivers/mtd/spi-nor/core.c b/drivers/mtd/spi-nor/core.c index 1ab4386a099a..0369d98b2d12 100644 --- a/drivers/mtd/spi-nor/core.c +++ b/drivers/mtd/spi-nor/core.c @@ -2469,7 +2469,7 @@ static int spi_nor_select_read(struct spi_nor *nor, nor->read_proto = read->proto; /* - * In the spi-nor framework, we don't need to make the difference + * In the SPI NOR framework, we don't need to make the difference * between mode clock cycles and wait state clock cycles. * Indeed, the value of the mode clock cycles is used by a QSPI * flash memory to know whether it should enter or leave its 0-4-4 @@ -3126,7 +3126,7 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, /* * Make sure the XSR_RDY flag is set before calling * spi_nor_wait_till_ready(). Xilinx S3AN share MFR - * with Atmel spi-nor + * with Atmel SPI NOR. */ if (info->flags & SPI_NOR_XSR_RDY) nor->flags |= SNOR_F_READY_XSR_RDY; diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index bebff2729c18..60bac2c0ec45 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -302,7 +302,7 @@ struct spi_nor; * @read: read data from the SPI NOR. * @write: write data to the SPI NOR. * @erase: erase a sector of the SPI NOR at the offset @offs; if - * not provided by the driver, spi-nor will send the erase + * not provided by the driver, SPI NOR will send the erase * opcode via write_reg(). */ struct spi_nor_controller_ops { @@ -336,7 +336,7 @@ struct spi_nor_flash_parameter; * layer is not DMA-able * @bouncebuf_size: size of the bounce buffer * @info: SPI NOR part JEDEC MFR ID and other info - * @manufacturer: spi-nor manufacturer + * @manufacturer: SPI NOR manufacturer * @page_size: the page size of the SPI NOR * @addr_width: number of address bytes * @erase_opcode: the opcode for erasing a sector @@ -344,12 +344,12 @@ struct spi_nor_flash_parameter; * @read_dummy: the dummy needed by the read operation * @program_opcode: the program opcode * @sst_write_second: used by the SST write operation - * @flags: flag options for the current SPI-NOR (SNOR_F_*) + * @flags: flag options for the current SPI NOR (SNOR_F_*) * @read_proto: the SPI protocol for read operations * @write_proto: the SPI protocol for write operations * @reg_proto: the SPI protocol for read_reg/write_reg/erase operations * @controller_ops: SPI NOR controller driver specific operations. - * @params: [FLASH-SPECIFIC] SPI-NOR flash parameters and settings. + * @params: [FLASH-SPECIFIC] SPI NOR flash parameters and settings. * The structure includes legacy flash parameters and * settings that can be overwritten by the spi_nor_fixups * hooks, or dynamically when parsing the SFDP tables. -- cgit v1.3 From 7df915e540ec51ce9ac5f2d2d9801d0356b617da Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 25 May 2020 14:05:04 +0200 Subject: i2c: avoid confusing naming in header i2c_client pointers are usually named 'client'. Use it here to get rid of the ambiguity of 'dev->dev'. Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 49d29054e657..c10617bb980a 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -351,14 +351,14 @@ static inline struct i2c_client *kobj_to_i2c_client(struct kobject *kobj) return to_i2c_client(dev); } -static inline void *i2c_get_clientdata(const struct i2c_client *dev) +static inline void *i2c_get_clientdata(const struct i2c_client *client) { - return dev_get_drvdata(&dev->dev); + return dev_get_drvdata(&client->dev); } -static inline void i2c_set_clientdata(struct i2c_client *dev, void *data) +static inline void i2c_set_clientdata(struct i2c_client *client, void *data) { - dev_set_drvdata(&dev->dev, data); + dev_set_drvdata(&client->dev, data); } /* I2C slave support */ -- cgit v1.3 From 655078f5f5286fe0ab38e90c4695001a0e15e9dd Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Wed, 13 May 2020 20:55:57 +0200 Subject: kobject: increase allowed number of uevent variables SBS battery driver exposes 32 power supply properties now, which will result in uevent failure on (un)plugging the battery. Other drivers (e.g. bq27xxx) are also coming close to this limit, so increase it. Acked-by: Greg Kroah-Hartman Reviewed-by: Emil Velikov Signed-off-by: Sebastian Reichel --- include/linux/kobject.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kobject.h b/include/linux/kobject.h index e2ca0a292e21..75e822569e39 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -29,7 +29,7 @@ #include #define UEVENT_HELPER_PATH_LEN 256 -#define UEVENT_NUM_ENVP 32 /* number of env pointers */ +#define UEVENT_NUM_ENVP 64 /* number of env pointers */ #define UEVENT_BUFFER_SIZE 2048 /* buffer for the variables */ #ifdef CONFIG_UEVENT_HELPER -- cgit v1.3 From bac705abcf345c28e419157cfcd1c44032cc9db2 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Wed, 13 May 2020 20:55:58 +0200 Subject: power: supply: core: add capacity error margin property Add a property for reporting the error margin expected by fuel gauge chips. Signed-off-by: Sebastian Reichel --- Documentation/ABI/testing/sysfs-class-power | 15 +++++++++++++++ drivers/power/supply/power_supply_sysfs.c | 1 + include/linux/power_supply.h | 1 + 3 files changed, 17 insertions(+) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-class-power b/Documentation/ABI/testing/sysfs-class-power index bf3b48f022dc..2f896555ae23 100644 --- a/Documentation/ABI/testing/sysfs-class-power +++ b/Documentation/ABI/testing/sysfs-class-power @@ -74,6 +74,21 @@ Description: Access: Read, Write Valid values: 0 - 100 (percent) +What: /sys/class/power_supply//capacity_error_margin +Date: April 2019 +Contact: linux-pm@vger.kernel.org +Description: + Battery capacity measurement becomes unreliable without + recalibration. This values provides the maximum error + margin expected to exist by the fuel gauge in percent. + Values close to 0% will be returned after (re-)calibration + has happened. Over time the error margin will increase. + 100% means, that the capacity related values are basically + completely useless. + + Access: Read + Valid values: 0 - 100 (percent) + What: /sys/class/power_supply//capacity_level Date: June 2009 Contact: linux-pm@vger.kernel.org diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c index d21b4e0edf38..e664774a2d1e 100644 --- a/drivers/power/supply/power_supply_sysfs.c +++ b/drivers/power/supply/power_supply_sysfs.c @@ -178,6 +178,7 @@ static struct power_supply_attr power_supply_attrs[] = { POWER_SUPPLY_ATTR(CAPACITY), POWER_SUPPLY_ATTR(CAPACITY_ALERT_MIN), POWER_SUPPLY_ATTR(CAPACITY_ALERT_MAX), + POWER_SUPPLY_ATTR(CAPACITY_ERROR_MARGIN), POWER_SUPPLY_ENUM_ATTR(CAPACITY_LEVEL), POWER_SUPPLY_ATTR(TEMP), POWER_SUPPLY_ATTR(TEMP_MAX), diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 1f60731ec7fe..453a85f25635 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -139,6 +139,7 @@ enum power_supply_property { POWER_SUPPLY_PROP_CAPACITY, /* in percents! */ POWER_SUPPLY_PROP_CAPACITY_ALERT_MIN, /* in percents! */ POWER_SUPPLY_PROP_CAPACITY_ALERT_MAX, /* in percents! */ + POWER_SUPPLY_PROP_CAPACITY_ERROR_MARGIN, /* in percents! */ POWER_SUPPLY_PROP_CAPACITY_LEVEL, POWER_SUPPLY_PROP_TEMP, POWER_SUPPLY_PROP_TEMP_MAX, -- cgit v1.3 From feabe49e46bb556b8d43e28d4a0d459940f7a5cb Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Wed, 13 May 2020 20:55:59 +0200 Subject: power: supply: core: add manufacture date properties Some smart batteries store their manufacture date, which is useful to identify the battery and/or to know about the cell quality. Signed-off-by: Sebastian Reichel --- Documentation/ABI/testing/sysfs-class-power | 28 ++++++++++++++++++++++++++++ drivers/power/supply/power_supply_sysfs.c | 3 +++ include/linux/power_supply.h | 3 +++ 3 files changed, 34 insertions(+) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-class-power b/Documentation/ABI/testing/sysfs-class-power index 2f896555ae23..e6d7348766b2 100644 --- a/Documentation/ABI/testing/sysfs-class-power +++ b/Documentation/ABI/testing/sysfs-class-power @@ -680,3 +680,31 @@ Description: Valid values: - 1: enabled - 0: disabled + +What: /sys/class/power_supply//manufacture_year +Date: January 2020 +Contact: linux-pm@vger.kernel.org +Description: + Reports the year (following Gregorian calendar) when the device has been + manufactured. + + Access: Read + Valid values: Reported as integer + +What: /sys/class/power_supply//manufacture_month +Date: January 2020 +Contact: linux-pm@vger.kernel.org +Description: + Reports the month when the device has been manufactured. + + Access: Read + Valid values: 1-12 + +What: /sys/class/power_supply//manufacture_day +Date: January 2020 +Contact: linux-pm@vger.kernel.org +Description: + Reports the day of month when the device has been manufactured. + + Access: Read + Valid values: 1-31 diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c index e664774a2d1e..78d5382e69f1 100644 --- a/drivers/power/supply/power_supply_sysfs.c +++ b/drivers/power/supply/power_supply_sysfs.c @@ -198,6 +198,9 @@ static struct power_supply_attr power_supply_attrs[] = { POWER_SUPPLY_ATTR(PRECHARGE_CURRENT), POWER_SUPPLY_ATTR(CHARGE_TERM_CURRENT), POWER_SUPPLY_ATTR(CALIBRATE), + POWER_SUPPLY_ATTR(MANUFACTURE_YEAR), + POWER_SUPPLY_ATTR(MANUFACTURE_MONTH), + POWER_SUPPLY_ATTR(MANUFACTURE_DAY), /* Properties of type `const char *' */ POWER_SUPPLY_ATTR(MODEL_NAME), POWER_SUPPLY_ATTR(MANUFACTURER), diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 453a85f25635..63ffe2a0a87b 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -159,6 +159,9 @@ enum power_supply_property { POWER_SUPPLY_PROP_PRECHARGE_CURRENT, POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT, POWER_SUPPLY_PROP_CALIBRATE, + POWER_SUPPLY_PROP_MANUFACTURE_YEAR, + POWER_SUPPLY_PROP_MANUFACTURE_MONTH, + POWER_SUPPLY_PROP_MANUFACTURE_DAY, /* Properties of type `const char *' */ POWER_SUPPLY_PROP_MODEL_NAME, POWER_SUPPLY_PROP_MANUFACTURER, -- cgit v1.3 From 601c2a543f02da484362b3ff9074b2cfe08750de Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Wed, 13 May 2020 20:56:00 +0200 Subject: power: supply: core: add POWER_SUPPLY_HEALTH_CALIBRATION_REQUIRED Some battery fuel gauges know when the battery needs to be recalibrated before providing usable values. This should be reported via the health property. Signed-off-by: Sebastian Reichel --- Documentation/ABI/testing/sysfs-class-power | 2 +- drivers/power/supply/power_supply_sysfs.c | 1 + include/linux/power_supply.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/Documentation/ABI/testing/sysfs-class-power b/Documentation/ABI/testing/sysfs-class-power index e6d7348766b2..216d61a22f1e 100644 --- a/Documentation/ABI/testing/sysfs-class-power +++ b/Documentation/ABI/testing/sysfs-class-power @@ -205,7 +205,7 @@ Description: Valid values: "Unknown", "Good", "Overheat", "Dead", "Over voltage", "Unspecified failure", "Cold", "Watchdog timer expire", "Safety timer expire", - "Over current" + "Over current", "Calibration required" What: /sys/class/power_supply//precharge_current Date: June 2017 diff --git a/drivers/power/supply/power_supply_sysfs.c b/drivers/power/supply/power_supply_sysfs.c index 78d5382e69f1..bc79560229b5 100644 --- a/drivers/power/supply/power_supply_sysfs.c +++ b/drivers/power/supply/power_supply_sysfs.c @@ -100,6 +100,7 @@ static const char * const POWER_SUPPLY_HEALTH_TEXT[] = { [POWER_SUPPLY_HEALTH_WATCHDOG_TIMER_EXPIRE] = "Watchdog timer expire", [POWER_SUPPLY_HEALTH_SAFETY_TIMER_EXPIRE] = "Safety timer expire", [POWER_SUPPLY_HEALTH_OVERCURRENT] = "Over current", + [POWER_SUPPLY_HEALTH_CALIBRATION_REQUIRED] = "Calibration required", }; static const char * const POWER_SUPPLY_TECHNOLOGY_TEXT[] = { diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 63ffe2a0a87b..ac1345a48ad0 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -61,6 +61,7 @@ enum { POWER_SUPPLY_HEALTH_WATCHDOG_TIMER_EXPIRE, POWER_SUPPLY_HEALTH_SAFETY_TIMER_EXPIRE, POWER_SUPPLY_HEALTH_OVERCURRENT, + POWER_SUPPLY_HEALTH_CALIBRATION_REQUIRED, }; enum { -- cgit v1.3 From 24c5efe41c29ee3e55bcf5a1c9f61ca8709622e8 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 22 May 2020 12:01:33 +1000 Subject: sunrpc: clean up properly in gss_mech_unregister() gss_mech_register() calls svcauth_gss_register_pseudoflavor() for each flavour, but gss_mech_unregister() does not call auth_domain_put(). This is unbalanced and makes it impossible to reload the module. Change svcauth_gss_register_pseudoflavor() to return the registered auth_domain, and save it for later release. Cc: stable@vger.kernel.org (v2.6.12+) Link: https://bugzilla.kernel.org/show_bug.cgi?id=206651 Signed-off-by: NeilBrown Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/gss_api.h | 1 + include/linux/sunrpc/svcauth_gss.h | 3 ++- net/sunrpc/auth_gss/gss_mech_switch.c | 12 +++++++++--- net/sunrpc/auth_gss/svcauth_gss.c | 12 ++++++------ 4 files changed, 18 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index bc07e51f20d1..bf4ac8a0268c 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -84,6 +84,7 @@ struct pf_desc { u32 service; char *name; char *auth_domain_name; + struct auth_domain *domain; bool datatouch; }; diff --git a/include/linux/sunrpc/svcauth_gss.h b/include/linux/sunrpc/svcauth_gss.h index ca39a388dc22..f09c82b0a7ae 100644 --- a/include/linux/sunrpc/svcauth_gss.h +++ b/include/linux/sunrpc/svcauth_gss.h @@ -20,7 +20,8 @@ int gss_svc_init(void); void gss_svc_shutdown(void); int gss_svc_init_net(struct net *net); void gss_svc_shutdown_net(struct net *net); -int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name); +struct auth_domain *svcauth_gss_register_pseudoflavor(u32 pseudoflavor, + char *name); u32 svcauth_gss_flavor(struct auth_domain *dom); #endif /* _LINUX_SUNRPC_SVCAUTH_GSS_H */ diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 69316ab1b9fa..fae632da1058 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -37,6 +37,8 @@ gss_mech_free(struct gss_api_mech *gm) for (i = 0; i < gm->gm_pf_num; i++) { pf = &gm->gm_pfs[i]; + if (pf->domain) + auth_domain_put(pf->domain); kfree(pf->auth_domain_name); pf->auth_domain_name = NULL; } @@ -59,6 +61,7 @@ make_auth_domain_name(char *name) static int gss_mech_svc_setup(struct gss_api_mech *gm) { + struct auth_domain *dom; struct pf_desc *pf; int i, status; @@ -68,10 +71,13 @@ gss_mech_svc_setup(struct gss_api_mech *gm) status = -ENOMEM; if (pf->auth_domain_name == NULL) goto out; - status = svcauth_gss_register_pseudoflavor(pf->pseudoflavor, - pf->auth_domain_name); - if (status) + dom = svcauth_gss_register_pseudoflavor( + pf->pseudoflavor, pf->auth_domain_name); + if (IS_ERR(dom)) { + status = PTR_ERR(dom); goto out; + } + pf->domain = dom; } return 0; out: diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 49bb346a6215..46027d0c903f 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -809,7 +809,7 @@ u32 svcauth_gss_flavor(struct auth_domain *dom) EXPORT_SYMBOL_GPL(svcauth_gss_flavor); -int +struct auth_domain * svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name) { struct gss_domain *new; @@ -832,17 +832,17 @@ svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name) name); stat = -EADDRINUSE; auth_domain_put(test); - kfree(new->h.name); - goto out_free_dom; + goto out_free_name; } - return 0; + return test; +out_free_name: + kfree(new->h.name); out_free_dom: kfree(new); out: - return stat; + return ERR_PTR(stat); } - EXPORT_SYMBOL_GPL(svcauth_gss_register_pseudoflavor); static inline int -- cgit v1.3 From 6d3f922c46f2e91f63c92f8dd28381f097082912 Mon Sep 17 00:00:00 2001 From: Georgi Djakov Date: Tue, 12 May 2020 15:53:21 +0300 Subject: opp: Add support for parsing interconnect bandwidth The OPP bindings now support bandwidth values, so add support to parse it from device tree and store it into the new dev_pm_opp_icc_bw struct, which is part of the dev_pm_opp. Signed-off-by: Georgi Djakov Reviewed-by: Matthias Kaehlcke [ Viresh: Create _read_bw() and use it, renamed _of_find_icc_paths() to dev_pm_opp_of_find_icc_paths(), exported it and made opp_table argument optional. Also drop the depends on from Kconfig. ] Signed-off-by: Viresh Kumar --- drivers/opp/core.c | 28 ++++++++++-- drivers/opp/of.c | 114 ++++++++++++++++++++++++++++++++++++++++++++++++- drivers/opp/opp.h | 7 +++ include/linux/pm_opp.h | 18 ++++++++ 4 files changed, 162 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/drivers/opp/core.c b/drivers/opp/core.c index ce7e4103ec09..d19cc7970643 100644 --- a/drivers/opp/core.c +++ b/drivers/opp/core.c @@ -999,6 +999,12 @@ static struct opp_table *_allocate_opp_table(struct device *dev, int index) ret); } + /* Find interconnect path(s) for the device */ + ret = dev_pm_opp_of_find_icc_paths(dev, opp_table); + if (ret) + dev_warn(dev, "%s: Error finding interconnect paths: %d\n", + __func__, ret); + BLOCKING_INIT_NOTIFIER_HEAD(&opp_table->head); INIT_LIST_HEAD(&opp_table->opp_list); kref_init(&opp_table->kref); @@ -1057,6 +1063,7 @@ static void _opp_table_kref_release(struct kref *kref) { struct opp_table *opp_table = container_of(kref, struct opp_table, kref); struct opp_device *opp_dev, *temp; + int i; _of_clear_opp_table(opp_table); @@ -1064,6 +1071,12 @@ static void _opp_table_kref_release(struct kref *kref) if (!IS_ERR(opp_table->clk)) clk_put(opp_table->clk); + if (opp_table->paths) { + for (i = 0; i < opp_table->path_count; i++) + icc_put(opp_table->paths[i]); + kfree(opp_table->paths); + } + WARN_ON(!list_empty(&opp_table->opp_list)); list_for_each_entry_safe(opp_dev, temp, &opp_table->dev_list, node) { @@ -1243,19 +1256,23 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_remove_all_dynamic); struct dev_pm_opp *_opp_allocate(struct opp_table *table) { struct dev_pm_opp *opp; - int count, supply_size; + int supply_count, supply_size, icc_size; /* Allocate space for at least one supply */ - count = table->regulator_count > 0 ? table->regulator_count : 1; - supply_size = sizeof(*opp->supplies) * count; + supply_count = table->regulator_count > 0 ? table->regulator_count : 1; + supply_size = sizeof(*opp->supplies) * supply_count; + icc_size = sizeof(*opp->bandwidth) * table->path_count; /* allocate new OPP node and supplies structures */ - opp = kzalloc(sizeof(*opp) + supply_size, GFP_KERNEL); + opp = kzalloc(sizeof(*opp) + supply_size + icc_size, GFP_KERNEL); + if (!opp) return NULL; /* Put the supplies at the end of the OPP structure as an empty array */ opp->supplies = (struct dev_pm_opp_supply *)(opp + 1); + if (icc_size) + opp->bandwidth = (struct dev_pm_opp_icc_bw *)(opp->supplies + supply_count); INIT_LIST_HEAD(&opp->node); return opp; @@ -1290,6 +1307,9 @@ int _opp_compare_key(struct dev_pm_opp *opp1, struct dev_pm_opp *opp2) { if (opp1->rate != opp2->rate) return opp1->rate < opp2->rate ? -1 : 1; + if (opp1->bandwidth && opp2->bandwidth && + opp1->bandwidth[0].peak != opp2->bandwidth[0].peak) + return opp1->bandwidth[0].peak < opp2->bandwidth[0].peak ? -1 : 1; if (opp1->level != opp2->level) return opp1->level < opp2->level ? -1 : 1; return 0; diff --git a/drivers/opp/of.c b/drivers/opp/of.c index 303d2207e0ff..0c55862f5624 100644 --- a/drivers/opp/of.c +++ b/drivers/opp/of.c @@ -332,6 +332,62 @@ free_required_opps: return ret; } +int dev_pm_opp_of_find_icc_paths(struct device *dev, + struct opp_table *opp_table) +{ + struct device_node *np; + int ret = 0, i, count, num_paths; + struct icc_path **paths; + + np = of_node_get(dev->of_node); + if (!np) + return 0; + + count = of_count_phandle_with_args(np, "interconnects", + "#interconnect-cells"); + of_node_put(np); + if (count < 0) + return 0; + + /* two phandles when #interconnect-cells = <1> */ + if (count % 2) { + dev_err(dev, "%s: Invalid interconnects values\n", __func__); + return -EINVAL; + } + + num_paths = count / 2; + paths = kcalloc(num_paths, sizeof(*paths), GFP_KERNEL); + if (!paths) + return -ENOMEM; + + for (i = 0; i < num_paths; i++) { + paths[i] = of_icc_get_by_index(dev, i); + if (IS_ERR(paths[i])) { + ret = PTR_ERR(paths[i]); + if (ret != -EPROBE_DEFER) { + dev_err(dev, "%s: Unable to get path%d: %d\n", + __func__, i, ret); + } + goto err; + } + } + + if (opp_table) { + opp_table->paths = paths; + opp_table->path_count = num_paths; + return 0; + } + +err: + while (i--) + icc_put(paths[i]); + + kfree(paths); + + return ret; +} +EXPORT_SYMBOL_GPL(dev_pm_opp_of_find_icc_paths); + static bool _opp_is_supported(struct device *dev, struct opp_table *opp_table, struct device_node *np) { @@ -521,9 +577,45 @@ void dev_pm_opp_of_remove_table(struct device *dev) } EXPORT_SYMBOL_GPL(dev_pm_opp_of_remove_table); +static int _read_bw(struct dev_pm_opp *new_opp, struct device_node *np, + bool peak) +{ + const char *name = peak ? "opp-peak-kBps" : "opp-avg-kBps"; + struct property *prop; + int i, count, ret; + u32 *bw; + + prop = of_find_property(np, name, NULL); + if (!prop) + return -ENODEV; + + count = prop->length / sizeof(u32); + bw = kmalloc_array(count, sizeof(*bw), GFP_KERNEL); + if (!bw) + return -ENOMEM; + + ret = of_property_read_u32_array(np, name, bw, count); + if (ret) { + pr_err("%s: Error parsing %s: %d\n", __func__, name, ret); + goto out; + } + + for (i = 0; i < count; i++) { + if (peak) + new_opp->bandwidth[i].peak = kBps_to_icc(bw[i]); + else + new_opp->bandwidth[i].avg = kBps_to_icc(bw[i]); + } + +out: + kfree(bw); + return ret; +} + static int _read_opp_key(struct dev_pm_opp *new_opp, struct device_node *np, bool *rate_not_available) { + bool found = false; u64 rate; int ret; @@ -535,10 +627,30 @@ static int _read_opp_key(struct dev_pm_opp *new_opp, struct device_node *np, * bit guaranteed in clk API. */ new_opp->rate = (unsigned long)rate; + found = true; } *rate_not_available = !!ret; - of_property_read_u32(np, "opp-level", &new_opp->level); + /* + * Bandwidth consists of peak and average (optional) values: + * opp-peak-kBps = ; + * opp-avg-kBps = ; + */ + ret = _read_bw(new_opp, np, true); + if (!ret) { + found = true; + ret = _read_bw(new_opp, np, false); + } + + /* The properties were found but we failed to parse them */ + if (ret && ret != -ENODEV) + return ret; + + if (!of_property_read_u32(np, "opp-level", &new_opp->level)) + found = true; + + if (found) + return 0; return ret; } diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h index bcadb1e328a4..2b81ffef1ba4 100644 --- a/drivers/opp/opp.h +++ b/drivers/opp/opp.h @@ -12,6 +12,7 @@ #define __DRIVER_OPP_H__ #include +#include #include #include #include @@ -59,6 +60,7 @@ extern struct list_head opp_tables; * @rate: Frequency in hertz * @level: Performance level * @supplies: Power supplies voltage/current values + * @bandwidth: Interconnect bandwidth values * @clock_latency_ns: Latency (in nanoseconds) of switching to this OPP's * frequency from any other OPP's frequency. * @required_opps: List of OPPs that are required by this OPP. @@ -81,6 +83,7 @@ struct dev_pm_opp { unsigned int level; struct dev_pm_opp_supply *supplies; + struct dev_pm_opp_icc_bw *bandwidth; unsigned long clock_latency_ns; @@ -146,6 +149,8 @@ enum opp_table_access { * @regulator_count: Number of power supply regulators. Its value can be -1 * (uninitialized), 0 (no opp-microvolt property) or > 0 (has opp-microvolt * property). + * @paths: Interconnect path handles + * @path_count: Number of interconnect paths * @genpd_performance_state: Device's power domain support performance state. * @is_genpd: Marks if the OPP table belongs to a genpd. * @set_opp: Platform specific set_opp callback @@ -189,6 +194,8 @@ struct opp_table { struct clk *clk; struct regulator **regulators; int regulator_count; + struct icc_path **paths; + unsigned int path_count; bool genpd_performance_state; bool is_genpd; diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 747861816f4f..d5c4a329321d 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -41,6 +41,18 @@ struct dev_pm_opp_supply { unsigned long u_amp; }; +/** + * struct dev_pm_opp_icc_bw - Interconnect bandwidth values + * @avg: Average bandwidth corresponding to this OPP (in icc units) + * @peak: Peak bandwidth corresponding to this OPP (in icc units) + * + * This structure stores the bandwidth values for a single interconnect path. + */ +struct dev_pm_opp_icc_bw { + u32 avg; + u32 peak; +}; + /** * struct dev_pm_opp_info - OPP freq/voltage/current values * @rate: Target clk rate in hz @@ -360,6 +372,7 @@ int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpuma struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device *dev); struct device_node *dev_pm_opp_get_of_node(struct dev_pm_opp *opp); int of_get_required_opp_performance_state(struct device_node *np, int index); +int dev_pm_opp_of_find_icc_paths(struct device *dev, struct opp_table *opp_table); void dev_pm_opp_of_register_em(struct cpumask *cpus); #else static inline int dev_pm_opp_of_add_table(struct device *dev) @@ -408,6 +421,11 @@ static inline int of_get_required_opp_performance_state(struct device_node *np, { return -ENOTSUPP; } + +static inline int dev_pm_opp_of_find_icc_paths(struct device *dev, struct opp_table *opp_table) +{ + return -ENOTSUPP; +} #endif #endif /* __LINUX_OPP_H__ */ -- cgit v1.3 From 0430b1d5704b0f0f1d237236dde9c143f8669e49 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Mon, 18 May 2020 14:25:32 +0300 Subject: opp: Expose bandwidth information via debugfs Expose the bandwidth information as well via debugfs. Signed-off-by: Viresh Kumar Signed-off-by: Georgi Djakov --- drivers/interconnect/core.c | 18 ++++++++++++++++++ drivers/opp/debugfs.c | 42 ++++++++++++++++++++++++++++++++++++++++++ include/linux/interconnect.h | 6 ++++++ 3 files changed, 66 insertions(+) (limited to 'include/linux') diff --git a/drivers/interconnect/core.c b/drivers/interconnect/core.c index 2d2e49780511..a56349c14985 100644 --- a/drivers/interconnect/core.c +++ b/drivers/interconnect/core.c @@ -514,6 +514,24 @@ void icc_set_tag(struct icc_path *path, u32 tag) } EXPORT_SYMBOL_GPL(icc_set_tag); +/** + * icc_get_name() - Get name of the icc path + * @path: reference to the path returned by icc_get() + * + * This function is used by an interconnect consumer to get the name of the icc + * path. + * + * Returns a valid pointer on success, or NULL otherwise. + */ +const char *icc_get_name(struct icc_path *path) +{ + if (!path) + return NULL; + + return path->name; +} +EXPORT_SYMBOL_GPL(icc_get_name); + /** * icc_set_bw() - set bandwidth constraints on an interconnect path * @path: reference to the path returned by icc_get() diff --git a/drivers/opp/debugfs.c b/drivers/opp/debugfs.c index 609665e339b6..596c185b5dda 100644 --- a/drivers/opp/debugfs.c +++ b/drivers/opp/debugfs.c @@ -32,6 +32,47 @@ void opp_debug_remove_one(struct dev_pm_opp *opp) debugfs_remove_recursive(opp->dentry); } +static ssize_t bw_name_read(struct file *fp, char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct icc_path *path = fp->private_data; + char buf[64]; + int i; + + i = scnprintf(buf, sizeof(buf), "%.62s\n", icc_get_name(path)); + + return simple_read_from_buffer(userbuf, count, ppos, buf, i); +} + +static const struct file_operations bw_name_fops = { + .open = simple_open, + .read = bw_name_read, + .llseek = default_llseek, +}; + +static void opp_debug_create_bw(struct dev_pm_opp *opp, + struct opp_table *opp_table, + struct dentry *pdentry) +{ + struct dentry *d; + char name[11]; + int i; + + for (i = 0; i < opp_table->path_count; i++) { + snprintf(name, sizeof(name), "icc-path-%.1d", i); + + /* Create per-path directory */ + d = debugfs_create_dir(name, pdentry); + + debugfs_create_file("name", S_IRUGO, d, opp_table->paths[i], + &bw_name_fops); + debugfs_create_u32("peak_bw", S_IRUGO, d, + &opp->bandwidth[i].peak); + debugfs_create_u32("avg_bw", S_IRUGO, d, + &opp->bandwidth[i].avg); + } +} + static void opp_debug_create_supplies(struct dev_pm_opp *opp, struct opp_table *opp_table, struct dentry *pdentry) @@ -94,6 +135,7 @@ void opp_debug_create_one(struct dev_pm_opp *opp, struct opp_table *opp_table) &opp->clock_latency_ns); opp_debug_create_supplies(opp, opp_table, d); + opp_debug_create_bw(opp, opp_table, d); opp->dentry = d; } diff --git a/include/linux/interconnect.h b/include/linux/interconnect.h index 34e97231a6ab..1ad09efd296e 100644 --- a/include/linux/interconnect.h +++ b/include/linux/interconnect.h @@ -32,6 +32,7 @@ struct icc_path *of_icc_get_by_index(struct device *dev, int idx); void icc_put(struct icc_path *path); int icc_set_bw(struct icc_path *path, u32 avg_bw, u32 peak_bw); void icc_set_tag(struct icc_path *path, u32 tag); +const char *icc_get_name(struct icc_path *path); #else @@ -65,6 +66,11 @@ static inline void icc_set_tag(struct icc_path *path, u32 tag) { } +static inline const char *icc_get_name(struct icc_path *path) +{ + return NULL; +} + #endif /* CONFIG_INTERCONNECT */ #endif /* __LINUX_INTERCONNECT_H */ -- cgit v1.3 From 5ace60859e84113b7a185c117fbf2c429d381b59 Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Mon, 16 Mar 2020 11:22:24 +0100 Subject: i2c: smbus: Add a way to instantiate SPD EEPROMs automatically In simple cases we can instantiate SPD EEPROMs on the SMBus automatically. Start with just DDR2, DDR3 and DDR4 on x86 for now, and only for systems with no more than 4 memory slots. These limitations may be lifted later. Signed-off-by: Jean Delvare [wsa: minor change for new API] Signed-off-by: Wolfram Sang --- drivers/i2c/i2c-smbus.c | 104 +++++++++++++++++++++++++++++++++++++++++++++- include/linux/i2c-smbus.h | 8 +++- 2 files changed, 110 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/i2c/i2c-smbus.c b/drivers/i2c/i2c-smbus.c index 809bcf8387d0..dc0108287ccf 100644 --- a/drivers/i2c/i2c-smbus.c +++ b/drivers/i2c/i2c-smbus.c @@ -3,10 +3,11 @@ * i2c-smbus.c - SMBus extensions to the I2C protocol * * Copyright (C) 2008 David Brownell - * Copyright (C) 2010 Jean Delvare + * Copyright (C) 2010-2019 Jean Delvare */ #include +#include #include #include #include @@ -196,6 +197,107 @@ EXPORT_SYMBOL_GPL(i2c_handle_smbus_alert); module_i2c_driver(smbalert_driver); +/* + * SPD is not part of SMBus but we include it here for convenience as the + * target systems are the same. + * Restrictions to automatic SPD instantiation: + * - Only works if all filled slots have the same memory type + * - Only works for DDR2, DDR3 and DDR4 for now + * - Only works on systems with 1 to 4 memory slots + */ +#if IS_ENABLED(CONFIG_DMI) +void i2c_register_spd(struct i2c_adapter *adap) +{ + int n, slot_count = 0, dimm_count = 0; + u16 handle; + u8 common_mem_type = 0x0, mem_type; + u64 mem_size; + const char *name; + + while ((handle = dmi_memdev_handle(slot_count)) != 0xffff) { + slot_count++; + + /* Skip empty slots */ + mem_size = dmi_memdev_size(handle); + if (!mem_size) + continue; + + /* Skip undefined memory type */ + mem_type = dmi_memdev_type(handle); + if (mem_type <= 0x02) /* Invalid, Other, Unknown */ + continue; + + if (!common_mem_type) { + /* First filled slot */ + common_mem_type = mem_type; + } else { + /* Check that all filled slots have the same type */ + if (mem_type != common_mem_type) { + dev_warn(&adap->dev, + "Different memory types mixed, not instantiating SPD\n"); + return; + } + } + dimm_count++; + } + + /* No useful DMI data, bail out */ + if (!dimm_count) + return; + + dev_info(&adap->dev, "%d/%d memory slots populated (from DMI)\n", + dimm_count, slot_count); + + if (slot_count > 4) { + dev_warn(&adap->dev, + "Systems with more than 4 memory slots not supported yet, not instantiating SPD\n"); + return; + } + + switch (common_mem_type) { + case 0x13: /* DDR2 */ + case 0x18: /* DDR3 */ + case 0x1C: /* LPDDR2 */ + case 0x1D: /* LPDDR3 */ + name = "spd"; + break; + case 0x1A: /* DDR4 */ + case 0x1E: /* LPDDR4 */ + name = "ee1004"; + break; + default: + dev_info(&adap->dev, + "Memory type 0x%02x not supported yet, not instantiating SPD\n", + common_mem_type); + return; + } + + /* + * We don't know in which slots the memory modules are. We could + * try to guess from the slot names, but that would be rather complex + * and unreliable, so better probe all possible addresses until we + * have found all memory modules. + */ + for (n = 0; n < slot_count && dimm_count; n++) { + struct i2c_board_info info; + unsigned short addr_list[2]; + + memset(&info, 0, sizeof(struct i2c_board_info)); + strlcpy(info.type, name, I2C_NAME_SIZE); + addr_list[0] = 0x50 + n; + addr_list[1] = I2C_CLIENT_END; + + if (!IS_ERR(i2c_new_scanned_device(adap, &info, addr_list, NULL))) { + dev_info(&adap->dev, + "Successfully instantiated SPD at 0x%hx\n", + addr_list[0]); + dimm_count--; + } + } +} +EXPORT_SYMBOL_GPL(i2c_register_spd); +#endif + MODULE_AUTHOR("Jean Delvare "); MODULE_DESCRIPTION("SMBus protocol extensions support"); MODULE_LICENSE("GPL"); diff --git a/include/linux/i2c-smbus.h b/include/linux/i2c-smbus.h index 8c5459034f92..1e4e0de4ef8b 100644 --- a/include/linux/i2c-smbus.h +++ b/include/linux/i2c-smbus.h @@ -2,7 +2,7 @@ /* * i2c-smbus.h - SMBus extensions to the I2C protocol * - * Copyright (C) 2010 Jean Delvare + * Copyright (C) 2010-2019 Jean Delvare */ #ifndef _LINUX_I2C_SMBUS_H @@ -39,4 +39,10 @@ static inline int of_i2c_setup_smbus_alert(struct i2c_adapter *adap) } #endif +#if IS_ENABLED(CONFIG_I2C_SMBUS) && IS_ENABLED(CONFIG_DMI) +void i2c_register_spd(struct i2c_adapter *adap); +#else +static inline void i2c_register_spd(struct i2c_adapter *adap) { } +#endif + #endif /* _LINUX_I2C_SMBUS_H */ -- cgit v1.3 From 9630a055256de420d528ecde9f35902a9dcd8750 Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 19 May 2020 15:00:35 +0200 Subject: mtd: rawnand: Stop using nand_release() This helper is not very useful and very often people get confused: they use nand_release() instead of nand_cleanup(). Now that all drivers have been converted to do not use nand_release() anymore, let's remove this helper. Signed-off-by: Miquel Raynal Cc: Jonathan Corbet Link: https://lore.kernel.org/linux-mtd/20200519130035.1883-63-miquel.raynal@bootlin.com --- Documentation/driver-api/mtdnand.rst | 6 ++++-- drivers/mtd/nand/raw/nand_base.c | 12 ------------ include/linux/mtd/bbm.h | 2 +- include/linux/mtd/rawnand.h | 2 -- 4 files changed, 5 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/Documentation/driver-api/mtdnand.rst b/Documentation/driver-api/mtdnand.rst index 55447659b81f..0bf8d6ec3f54 100644 --- a/Documentation/driver-api/mtdnand.rst +++ b/Documentation/driver-api/mtdnand.rst @@ -276,8 +276,10 @@ unregisters the partitions in the MTD layer. #ifdef MODULE static void __exit board_cleanup (void) { - /* Release resources, unregister device */ - nand_release (mtd_to_nand(board_mtd)); + /* Unregister device */ + WARN_ON(mtd_device_unregister(board_mtd)); + /* Release resources */ + nand_cleanup(mtd_to_nand(board_mtd)); /* unmap physical address */ iounmap(baseaddr); diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 6a6a0a36b3fd..8ce31490b9d0 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -6178,18 +6178,6 @@ void nand_cleanup(struct nand_chip *chip) EXPORT_SYMBOL_GPL(nand_cleanup); -/** - * nand_release - [NAND Interface] Unregister the MTD device and free resources - * held by the NAND device - * @chip: NAND chip object - */ -void nand_release(struct nand_chip *chip) -{ - mtd_device_unregister(nand_to_mtd(chip)); - nand_cleanup(chip); -} -EXPORT_SYMBOL_GPL(nand_release); - MODULE_LICENSE("GPL"); MODULE_AUTHOR("Steven J. Hill "); MODULE_AUTHOR("Thomas Gleixner "); diff --git a/include/linux/mtd/bbm.h b/include/linux/mtd/bbm.h index 886e30441c90..d890805f5494 100644 --- a/include/linux/mtd/bbm.h +++ b/include/linux/mtd/bbm.h @@ -98,7 +98,7 @@ struct nand_bbt_descr { /* * Flag set by nand_create_default_bbt_descr(), marking that the nand_bbt_descr - * was allocated dynamicaly and must be freed in nand_release(). Has no meaning + * was allocated dynamicaly and must be freed in nand_cleanup(). Has no meaning * in nand_chip.bbt_options. */ #define NAND_BBT_DYNAMICSTRUCT 0x80000000 diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 2804c13e5662..b0b358908a47 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -1398,8 +1398,6 @@ void nand_wait_ready(struct nand_chip *chip); * sucessful nand_scan(). */ void nand_cleanup(struct nand_chip *chip); -/* Unregister the MTD device and calls nand_cleanup() */ -void nand_release(struct nand_chip *chip); /* * External helper for controller drivers that have to implement the WAITRDY -- cgit v1.3 From f66a6fd0dc7cc49c891a167937e161114f48a62e Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 26 May 2020 21:56:14 +0200 Subject: mtd: rawnand: Avoid a typedef In new code, the use of typedef is discouraged. Turn this one in the raw NAND core into a regular enumeration. Signed-off-by: Miquel Raynal Reviewed-by: Boris Brezillon Link: https://lore.kernel.org/linux-mtd/20200526195633.11543-3-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/nand_base.c | 4 ++-- include/linux/mtd/rawnand.h | 6 +++--- include/linux/platform_data/mtd-davinci.h | 2 +- include/linux/platform_data/mtd-nand-s3c2410.h | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 8ce31490b9d0..4bf614d4b7ee 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -5099,8 +5099,8 @@ static int of_get_nand_ecc_mode(struct device_node *np) /* * For backward compatibility we support few obsoleted values that don't - * have their mappings into nand_ecc_modes_t anymore (they were merged - * with other enums). + * have their mappings into the nand_ecc_mode enum anymore (they were + * merged with other enums). */ if (!strcasecmp(pm, "soft_bch")) return NAND_ECC_SOFT; diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index b0b358908a47..8f662df02fdd 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -83,14 +83,14 @@ struct nand_chip; /* * Constants for ECC_MODES */ -typedef enum { +enum nand_ecc_mode { NAND_ECC_NONE, NAND_ECC_SOFT, NAND_ECC_HW, NAND_ECC_HW_SYNDROME, NAND_ECC_HW_OOB_FIRST, NAND_ECC_ON_DIE, -} nand_ecc_modes_t; +}; enum nand_ecc_algo { NAND_ECC_UNKNOWN, @@ -362,7 +362,7 @@ static const struct nand_ecc_caps __name = { \ * @write_oob: function to write chip OOB data */ struct nand_ecc_ctrl { - nand_ecc_modes_t mode; + enum nand_ecc_mode mode; enum nand_ecc_algo algo; int steps; int size; diff --git a/include/linux/platform_data/mtd-davinci.h b/include/linux/platform_data/mtd-davinci.h index 08e639e047e5..03e92c71b3fa 100644 --- a/include/linux/platform_data/mtd-davinci.h +++ b/include/linux/platform_data/mtd-davinci.h @@ -68,7 +68,7 @@ struct davinci_nand_pdata { /* platform_data */ * Newer ones also support 4-bit ECC, but are awkward * using it with large page chips. */ - nand_ecc_modes_t ecc_mode; + enum nand_ecc_mode ecc_mode; u8 ecc_bits; /* e.g. NAND_BUSWIDTH_16 */ diff --git a/include/linux/platform_data/mtd-nand-s3c2410.h b/include/linux/platform_data/mtd-nand-s3c2410.h index deb849bcf0ec..08675b16f9e1 100644 --- a/include/linux/platform_data/mtd-nand-s3c2410.h +++ b/include/linux/platform_data/mtd-nand-s3c2410.h @@ -49,7 +49,7 @@ struct s3c2410_platform_nand { unsigned int ignore_unset_ecc:1; - nand_ecc_modes_t ecc_mode; + enum nand_ecc_mode ecc_mode; int nr_sets; struct s3c2410_nand_set *sets; -- cgit v1.3 From 74e24cd2376d9cc4cfc6edad8610780b79fd5def Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 26 May 2020 21:56:15 +0200 Subject: mtd: rawnand: Drop OOB_FIRST placement scheme This scheme has been introduced for the Davinci controller and means that the OOB area must be read *before* the rest of the data. This has nothing to do with the ECC in OOB placement as it could be understood and most importantly, there is no point in having this function out of the Davinci NAND controller driver. A DT property for this scheme has been added but never used, even by the Davinci driver which only uses this scheme to change the default nand_read_page(). Move the main read_page() helper into the Davinci driver and remove the remaining boilerplate. Signed-off-by: Miquel Raynal Reviewed-by: Boris Brezillon Link: https://lore.kernel.org/linux-mtd/20200526195633.11543-4-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/davinci_nand.c | 126 ++++++++++++++++++++++++++++-------- drivers/mtd/nand/raw/nand_base.c | 81 ----------------------- include/linux/mtd/rawnand.h | 1 - 3 files changed, 98 insertions(+), 110 deletions(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/davinci_nand.c b/drivers/mtd/nand/raw/davinci_nand.c index 52b87304954b..d975a62caaa5 100644 --- a/drivers/mtd/nand/raw/davinci_nand.c +++ b/drivers/mtd/nand/raw/davinci_nand.c @@ -371,6 +371,77 @@ correct: return corrected; } +/** + * nand_read_page_hwecc_oob_first - hw ecc, read oob first + * @chip: nand chip info structure + * @buf: buffer to store read data + * @oob_required: caller requires OOB data read to chip->oob_poi + * @page: page number to read + * + * Hardware ECC for large page chips, require OOB to be read first. For this + * ECC mode, the write_page method is re-used from ECC_HW. These methods + * read/write ECC from the OOB area, unlike the ECC_HW_SYNDROME support with + * multiple ECC steps, follows the "infix ECC" scheme and reads/writes ECC from + * the data area, by overwriting the NAND manufacturer bad block markings. + */ +static int nand_davinci_read_page_hwecc_oob_first(struct nand_chip *chip, + uint8_t *buf, + int oob_required, int page) +{ + struct mtd_info *mtd = nand_to_mtd(chip); + int i, eccsize = chip->ecc.size, ret; + int eccbytes = chip->ecc.bytes; + int eccsteps = chip->ecc.steps; + uint8_t *p = buf; + uint8_t *ecc_code = chip->ecc.code_buf; + uint8_t *ecc_calc = chip->ecc.calc_buf; + unsigned int max_bitflips = 0; + + /* Read the OOB area first */ + ret = nand_read_oob_op(chip, page, 0, chip->oob_poi, mtd->oobsize); + if (ret) + return ret; + + ret = nand_read_page_op(chip, page, 0, NULL, 0); + if (ret) + return ret; + + ret = mtd_ooblayout_get_eccbytes(mtd, ecc_code, chip->oob_poi, 0, + chip->ecc.total); + if (ret) + return ret; + + for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) { + int stat; + + chip->ecc.hwctl(chip, NAND_ECC_READ); + + ret = nand_read_data_op(chip, p, eccsize, false, false); + if (ret) + return ret; + + chip->ecc.calculate(chip, p, &ecc_calc[i]); + + stat = chip->ecc.correct(chip, p, &ecc_code[i], NULL); + if (stat == -EBADMSG && + (chip->ecc.options & NAND_ECC_GENERIC_ERASED_CHECK)) { + /* check for empty pages with bitflips */ + stat = nand_check_erased_ecc_chunk(p, eccsize, + &ecc_code[i], + eccbytes, NULL, 0, + chip->ecc.strength); + } + + if (stat < 0) { + mtd->ecc_stats.failed++; + } else { + mtd->ecc_stats.corrected += stat; + max_bitflips = max_t(unsigned int, max_bitflips, stat); + } + } + return max_bitflips; +} + /*----------------------------------------------------------------------*/ /* An ECC layout for using 4-bit ECC with small-page flash, storing @@ -530,6 +601,13 @@ static int davinci_nand_attach_chip(struct nand_chip *chip) break; case NAND_ECC_HW: if (pdata->ecc_bits == 4) { + int chunks = mtd->writesize / 512; + + if (!chunks || mtd->oobsize < 16) { + dev_dbg(&info->pdev->dev, "too small\n"); + return -EINVAL; + } + /* * No sanity checks: CPUs must support this, * and the chips may not use NAND_BUSWIDTH_16. @@ -552,6 +630,26 @@ static int davinci_nand_attach_chip(struct nand_chip *chip) info->chip.ecc.bytes = 10; info->chip.ecc.options = NAND_ECC_GENERIC_ERASED_CHECK; info->chip.ecc.algo = NAND_ECC_BCH; + + /* + * Update ECC layout if needed ... for 1-bit HW ECC, the + * default is OK, but it allocates 6 bytes when only 3 + * are needed (for each 512 bytes). For 4-bit HW ECC, + * the default is not usable: 10 bytes needed, not 6. + * + * For small page chips, preserve the manufacturer's + * badblock marking data ... and make sure a flash BBT + * table marker fits in the free bytes. + */ + if (chunks == 1) { + mtd_set_ooblayout(mtd, + &hwecc4_small_ooblayout_ops); + } else if (chunks == 4 || chunks == 8) { + mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops); + info->chip.ecc.read_page = nand_davinci_read_page_hwecc_oob_first; + } else { + return -EIO; + } } else { /* 1bit ecc hamming */ info->chip.ecc.calculate = nand_davinci_calculate_1bit; @@ -567,34 +665,6 @@ static int davinci_nand_attach_chip(struct nand_chip *chip) return -EINVAL; } - /* - * Update ECC layout if needed ... for 1-bit HW ECC, the default - * is OK, but it allocates 6 bytes when only 3 are needed (for - * each 512 bytes). For the 4-bit HW ECC, that default is not - * usable: 10 bytes are needed, not 6. - */ - if (pdata->ecc_bits == 4) { - int chunks = mtd->writesize / 512; - - if (!chunks || mtd->oobsize < 16) { - dev_dbg(&info->pdev->dev, "too small\n"); - return -EINVAL; - } - - /* For small page chips, preserve the manufacturer's - * badblock marking data ... and make sure a flash BBT - * table marker fits in the free bytes. - */ - if (chunks == 1) { - mtd_set_ooblayout(mtd, &hwecc4_small_ooblayout_ops); - } else if (chunks == 4 || chunks == 8) { - mtd_set_ooblayout(mtd, &nand_ooblayout_lp_ops); - info->chip.ecc.mode = NAND_ECC_HW_OOB_FIRST; - } else { - return -EIO; - } - } - return ret; } diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index 4bf614d4b7ee..d7c791de3e88 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -3075,76 +3075,6 @@ static int nand_read_page_hwecc(struct nand_chip *chip, uint8_t *buf, return max_bitflips; } -/** - * nand_read_page_hwecc_oob_first - [REPLACEABLE] hw ecc, read oob first - * @chip: nand chip info structure - * @buf: buffer to store read data - * @oob_required: caller requires OOB data read to chip->oob_poi - * @page: page number to read - * - * Hardware ECC for large page chips, require OOB to be read first. For this - * ECC mode, the write_page method is re-used from ECC_HW. These methods - * read/write ECC from the OOB area, unlike the ECC_HW_SYNDROME support with - * multiple ECC steps, follows the "infix ECC" scheme and reads/writes ECC from - * the data area, by overwriting the NAND manufacturer bad block markings. - */ -static int nand_read_page_hwecc_oob_first(struct nand_chip *chip, uint8_t *buf, - int oob_required, int page) -{ - struct mtd_info *mtd = nand_to_mtd(chip); - int i, eccsize = chip->ecc.size, ret; - int eccbytes = chip->ecc.bytes; - int eccsteps = chip->ecc.steps; - uint8_t *p = buf; - uint8_t *ecc_code = chip->ecc.code_buf; - uint8_t *ecc_calc = chip->ecc.calc_buf; - unsigned int max_bitflips = 0; - - /* Read the OOB area first */ - ret = nand_read_oob_op(chip, page, 0, chip->oob_poi, mtd->oobsize); - if (ret) - return ret; - - ret = nand_read_page_op(chip, page, 0, NULL, 0); - if (ret) - return ret; - - ret = mtd_ooblayout_get_eccbytes(mtd, ecc_code, chip->oob_poi, 0, - chip->ecc.total); - if (ret) - return ret; - - for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize) { - int stat; - - chip->ecc.hwctl(chip, NAND_ECC_READ); - - ret = nand_read_data_op(chip, p, eccsize, false, false); - if (ret) - return ret; - - chip->ecc.calculate(chip, p, &ecc_calc[i]); - - stat = chip->ecc.correct(chip, p, &ecc_code[i], NULL); - if (stat == -EBADMSG && - (chip->ecc.options & NAND_ECC_GENERIC_ERASED_CHECK)) { - /* check for empty pages with bitflips */ - stat = nand_check_erased_ecc_chunk(p, eccsize, - &ecc_code[i], eccbytes, - NULL, 0, - chip->ecc.strength); - } - - if (stat < 0) { - mtd->ecc_stats.failed++; - } else { - mtd->ecc_stats.corrected += stat; - max_bitflips = max_t(unsigned int, max_bitflips, stat); - } - } - return max_bitflips; -} - /** * nand_read_page_syndrome - [REPLACEABLE] hardware ECC syndrome based page read * @chip: nand chip info structure @@ -5080,7 +5010,6 @@ static const char * const nand_ecc_modes[] = { [NAND_ECC_SOFT] = "soft", [NAND_ECC_HW] = "hw", [NAND_ECC_HW_SYNDROME] = "hw_syndrome", - [NAND_ECC_HW_OOB_FIRST] = "hw_oob_first", [NAND_ECC_ON_DIE] = "on-die", }; @@ -5829,16 +5758,6 @@ static int nand_scan_tail(struct nand_chip *chip) */ switch (ecc->mode) { - case NAND_ECC_HW_OOB_FIRST: - /* Similar to NAND_ECC_HW, but a separate read_page handle */ - if (!ecc->calculate || !ecc->correct || !ecc->hwctl) { - WARN(1, "No ECC functions supplied; hardware ECC not possible\n"); - ret = -EINVAL; - goto err_nand_manuf_cleanup; - } - if (!ecc->read_page) - ecc->read_page = nand_read_page_hwecc_oob_first; - fallthrough; case NAND_ECC_HW: /* Use standard hwecc read page function? */ if (!ecc->read_page) diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 8f662df02fdd..605d64ead3a8 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -88,7 +88,6 @@ enum nand_ecc_mode { NAND_ECC_SOFT, NAND_ECC_HW, NAND_ECC_HW_SYNDROME, - NAND_ECC_HW_OOB_FIRST, NAND_ECC_ON_DIE, }; -- cgit v1.3 From 86f2b225adf4ecd2edfdc8541a7342645556ac3b Mon Sep 17 00:00:00 2001 From: Miquel Raynal Date: Tue, 26 May 2020 21:56:18 +0200 Subject: mtd: rawnand: Add an invalid ECC mode to discriminate with valid ones NAND ECC modes (or providers) have their own enumeration but, unlike their algorithms counterpart, there is no invalid or uninitialized value to discriminate between an error and having chosen a no-ECC situation. Add an "invalid" entry for this purpose. Signed-off-by: Miquel Raynal Reviewed-by: Boris Brezillon Link: https://lore.kernel.org/linux-mtd/20200526195633.11543-7-miquel.raynal@bootlin.com --- drivers/mtd/nand/raw/nand_base.c | 2 +- include/linux/mtd/rawnand.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/mtd/nand/raw/nand_base.c b/drivers/mtd/nand/raw/nand_base.c index d46d34d0d8be..45124dbb1835 100644 --- a/drivers/mtd/nand/raw/nand_base.c +++ b/drivers/mtd/nand/raw/nand_base.c @@ -5022,7 +5022,7 @@ static int of_get_nand_ecc_mode(struct device_node *np) if (err < 0) return err; - for (i = 0; i < ARRAY_SIZE(nand_ecc_modes); i++) + for (i = NAND_ECC_NONE; i < ARRAY_SIZE(nand_ecc_modes); i++) if (!strcasecmp(pm, nand_ecc_modes[i])) return i; diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 605d64ead3a8..65b1c1c18b41 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -84,6 +84,7 @@ struct nand_chip; * Constants for ECC_MODES */ enum nand_ecc_mode { + NAND_ECC_INVALID, NAND_ECC_NONE, NAND_ECC_SOFT, NAND_ECC_HW, -- cgit v1.3 From a865e420b9561235851c3f5d483c82ef389d29bd Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 6 Apr 2020 08:42:55 -0400 Subject: virtio: force spec specified alignment on types The ring element addresses are passed between components with different alignments assumptions. Thus, if guest/userspace selects a pointer and host then gets and dereferences it, we might need to decrease the compiler-selected alignment to prevent compiler on the host from assuming pointer is aligned. This actually triggers on ARM with -mabi=apcs-gnu - which is a deprecated configuration, but it seems safer to handle this generally. Note that userspace that allocates the memory is actually OK and does not need to be fixed, but userspace that gets it from guest or another process does need to be fixed. The later doesn't generally talk to the kernel so while it might be buggy it's not talking to the kernel in the buggy way - it's just using the header in the buggy way - so fixing header and asking userspace to recompile is the best we can do. I verified that the produced kernel binary on x86 is exactly identical before and after the change. Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/vhost/vhost.c | 8 +++---- drivers/vhost/vhost.h | 6 +++--- drivers/vhost/vringh.c | 6 +++--- include/linux/vringh.h | 6 +++--- include/uapi/linux/virtio_ring.h | 46 ++++++++++++++++++++++++++++++---------- 5 files changed, 48 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 21a59b598ed8..96d9871fa0cb 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -1244,9 +1244,9 @@ static int vhost_iotlb_miss(struct vhost_virtqueue *vq, u64 iova, int access) } static bool vq_access_ok(struct vhost_virtqueue *vq, unsigned int num, - struct vring_desc __user *desc, - struct vring_avail __user *avail, - struct vring_used __user *used) + vring_desc_t __user *desc, + vring_avail_t __user *avail, + vring_used_t __user *used) { return access_ok(desc, vhost_get_desc_size(vq, num)) && @@ -2301,7 +2301,7 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, unsigned count) { - struct vring_used_elem __user *used; + vring_used_elem_t __user *used; u16 old, new; int start; diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index f8403bd46b85..60cab4c78229 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -67,9 +67,9 @@ struct vhost_virtqueue { /* The actual ring of buffers. */ struct mutex mutex; unsigned int num; - struct vring_desc __user *desc; - struct vring_avail __user *avail; - struct vring_used __user *used; + vring_desc_t __user *desc; + vring_avail_t __user *avail; + vring_used_t __user *used; const struct vhost_iotlb_map *meta_iotlb[VHOST_NUM_ADDRS]; struct file *kick; struct eventfd_ctx *call_ctx; diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c index ba8e0d6cfd97..e059a9a47cdf 100644 --- a/drivers/vhost/vringh.c +++ b/drivers/vhost/vringh.c @@ -620,9 +620,9 @@ static inline int xfer_to_user(const struct vringh *vrh, */ int vringh_init_user(struct vringh *vrh, u64 features, unsigned int num, bool weak_barriers, - struct vring_desc __user *desc, - struct vring_avail __user *avail, - struct vring_used __user *used) + vring_desc_t __user *desc, + vring_avail_t __user *avail, + vring_used_t __user *used) { /* Sane power of 2 please! */ if (!num || num > 0xffff || (num & (num - 1))) { diff --git a/include/linux/vringh.h b/include/linux/vringh.h index 9e2763d7c159..59bd50f99291 100644 --- a/include/linux/vringh.h +++ b/include/linux/vringh.h @@ -105,9 +105,9 @@ struct vringh_kiov { /* Helpers for userspace vrings. */ int vringh_init_user(struct vringh *vrh, u64 features, unsigned int num, bool weak_barriers, - struct vring_desc __user *desc, - struct vring_avail __user *avail, - struct vring_used __user *used); + vring_desc_t __user *desc, + vring_avail_t __user *avail, + vring_used_t __user *used); static inline void vringh_iov_init(struct vringh_iov *iov, struct iovec *iovec, unsigned num) diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h index 9223c3a5c46a..476d3e5c0fe7 100644 --- a/include/uapi/linux/virtio_ring.h +++ b/include/uapi/linux/virtio_ring.h @@ -86,6 +86,13 @@ * at the end of the used ring. Guest should ignore the used->flags field. */ #define VIRTIO_RING_F_EVENT_IDX 29 +/* Alignment requirements for vring elements. + * When using pre-virtio 1.0 layout, these fall out naturally. + */ +#define VRING_AVAIL_ALIGN_SIZE 2 +#define VRING_USED_ALIGN_SIZE 4 +#define VRING_DESC_ALIGN_SIZE 16 + /* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ struct vring_desc { /* Address (guest-physical). */ @@ -112,29 +119,46 @@ struct vring_used_elem { __virtio32 len; }; +typedef struct vring_used_elem __attribute__((aligned(VRING_USED_ALIGN_SIZE))) + vring_used_elem_t; + struct vring_used { __virtio16 flags; __virtio16 idx; - struct vring_used_elem ring[]; + vring_used_elem_t ring[]; }; +/* + * The ring element addresses are passed between components with different + * alignments assumptions. Thus, we might need to decrease the compiler-selected + * alignment, and so must use a typedef to make sure the aligned attribute + * actually takes hold: + * + * https://gcc.gnu.org/onlinedocs//gcc/Common-Type-Attributes.html#Common-Type-Attributes + * + * When used on a struct, or struct member, the aligned attribute can only + * increase the alignment; in order to decrease it, the packed attribute must + * be specified as well. When used as part of a typedef, the aligned attribute + * can both increase and decrease alignment, and specifying the packed + * attribute generates a warning. + */ +typedef struct vring_desc __attribute__((aligned(VRING_DESC_ALIGN_SIZE))) + vring_desc_t; +typedef struct vring_avail __attribute__((aligned(VRING_AVAIL_ALIGN_SIZE))) + vring_avail_t; +typedef struct vring_used __attribute__((aligned(VRING_USED_ALIGN_SIZE))) + vring_used_t; + struct vring { unsigned int num; - struct vring_desc *desc; + vring_desc_t *desc; - struct vring_avail *avail; + vring_avail_t *avail; - struct vring_used *used; + vring_used_t *used; }; -/* Alignment requirements for vring elements. - * When using pre-virtio 1.0 layout, these fall out naturally. - */ -#define VRING_AVAIL_ALIGN_SIZE 2 -#define VRING_USED_ALIGN_SIZE 4 -#define VRING_DESC_ALIGN_SIZE 16 - #ifndef VIRTIO_RING_NO_LEGACY /* The standard layout for the ring is a continuous chunk of memory which looks -- cgit v1.3 From 5ea75ae6ae60d13dfa35fd5d2e2a81824cba6662 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 18 Feb 2020 17:30:05 -0500 Subject: user_regset_copyout_zero(): use clear_user() that's the only caller of __clear_user() in generic code, and it's not hot enough to bother with skipping access_ok(). Signed-off-by: Al Viro --- include/linux/regset.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/regset.h b/include/linux/regset.h index bf0243779738..46d6ae68c455 100644 --- a/include/linux/regset.h +++ b/include/linux/regset.h @@ -320,7 +320,7 @@ static inline int user_regset_copyout_zero(unsigned int *pos, if (*kbuf) { memset(*kbuf, 0, copy); *kbuf += copy; - } else if (__clear_user(*ubuf, copy)) + } else if (clear_user(*ubuf, copy)) return -EFAULT; else *ubuf += copy; -- cgit v1.3 From df820f8de4e481222b17f9bcee7b909ae8167529 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 4 Jun 2020 10:48:19 +0200 Subject: ovl: make private mounts longterm Overlayfs is using clone_private_mount() to create internal mounts for underlying layers. These are used for operations requiring a path, such as dentry_open(). Since these private mounts are not in any namespace they are treated as short term, "detached" mounts and mntput() involves taking the global mount_lock, which can result in serious cacheline pingpong. Make these private mounts longterm instead, which trade the penalty on mntput() for a slightly longer shutdown time due to an added RCU grace period when putting these mounts. Introduce a new helper kern_unmount_many() that can take care of multiple longterm mounts with a single RCU grace period. Cc: Al Viro Signed-off-by: Miklos Szeredi --- Documentation/filesystems/porting.rst | 7 +++++++ fs/namespace.c | 16 ++++++++++++++++ fs/overlayfs/super.c | 7 ++++++- include/linux/mount.h | 2 ++ 4 files changed, 31 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/Documentation/filesystems/porting.rst b/Documentation/filesystems/porting.rst index 26c093969573..867036aa90b8 100644 --- a/Documentation/filesystems/porting.rst +++ b/Documentation/filesystems/porting.rst @@ -858,3 +858,10 @@ be misspelled d_alloc_anon(). [should've been added in 2016] stale comment in finish_open() nonwithstanding, failure exits in ->atomic_open() instances should *NOT* fput() the file, no matter what. Everything is handled by the caller. + +--- + +**mandatory** + +clone_private_mount() returns a longterm mount now, so the proper destructor of +its result is kern_unmount() or kern_unmount_array(). diff --git a/fs/namespace.c b/fs/namespace.c index a28e4db075ed..d53517f1d741 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1879,6 +1879,9 @@ struct vfsmount *clone_private_mount(const struct path *path) if (IS_ERR(new_mnt)) return ERR_CAST(new_mnt); + /* Longterm mount to be removed by kern_unmount*() */ + new_mnt->mnt_ns = MNT_NS_INTERNAL; + return &new_mnt->mnt; } EXPORT_SYMBOL_GPL(clone_private_mount); @@ -3804,6 +3807,19 @@ void kern_unmount(struct vfsmount *mnt) } EXPORT_SYMBOL(kern_unmount); +void kern_unmount_array(struct vfsmount *mnt[], unsigned int num) +{ + unsigned int i; + + for (i = 0; i < num; i++) + if (mnt[i]) + real_mount(mnt[i])->mnt_ns = NULL; + synchronize_rcu_expedited(); + for (i = 0; i < num; i++) + mntput(mnt[i]); +} +EXPORT_SYMBOL(kern_unmount_array); + bool our_mnt(struct vfsmount *mnt) { return check_mnt(real_mount(mnt)); diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index eb81d8760a6a..8d8cd46e1482 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -211,6 +211,7 @@ static void ovl_destroy_inode(struct inode *inode) static void ovl_free_fs(struct ovl_fs *ofs) { + struct vfsmount **mounts; unsigned i; iput(ofs->workbasedir_trap); @@ -224,10 +225,14 @@ static void ovl_free_fs(struct ovl_fs *ofs) dput(ofs->workbasedir); if (ofs->upperdir_locked) ovl_inuse_unlock(ovl_upper_mnt(ofs)->mnt_root); + + /* Hack! Reuse ofs->layers as a vfsmount array before freeing it */ + mounts = (struct vfsmount **) ofs->layers; for (i = 0; i < ofs->numlayer; i++) { iput(ofs->layers[i].trap); - mntput(ofs->layers[i].mnt); + mounts[i] = ofs->layers[i].mnt; } + kern_unmount_array(mounts, ofs->numlayer); kfree(ofs->layers); for (i = 0; i < ofs->numfs; i++) free_anon_bdev(ofs->fs[i].pseudo_dev); diff --git a/include/linux/mount.h b/include/linux/mount.h index bf8cc4108b8f..8de95a0bec8d 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -109,4 +109,6 @@ extern unsigned int sysctl_mount_max; extern bool path_is_mountpoint(const struct path *path); +extern void kern_unmount_array(struct vfsmount *mnt[], unsigned int num); + #endif /* _LINUX_MOUNT_H */ -- cgit v1.3 From d56f5136b01020155b6b0a29f69d924687529bee Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 4 Jun 2020 15:16:52 +0200 Subject: KVM: let kvm_destroy_vm_debugfs clean up vCPU debugfs directories After commit 63d0434 ("KVM: x86: move kvm_create_vcpu_debugfs after last failure point") we are creating the pre-vCPU debugfs files after the creation of the vCPU file descriptor. This makes it possible for userspace to reach kvm_vcpu_release before kvm_create_vcpu_debugfs has finished. The vcpu->debugfs_dentry then does not have any associated inode anymore, and this causes a NULL-pointer dereference in debugfs_create_file. The solution is simply to avoid removing the files; they are cleaned up when the VM file descriptor is closed (and that must be after KVM_CREATE_VCPU returns). We can stop storing the dentry in struct kvm_vcpu too, because it is not needed anywhere after kvm_create_vcpu_debugfs returns. Reported-by: syzbot+705f4401d5a93a59b87d@syzkaller.appspotmail.com Fixes: 63d04348371b ("KVM: x86: move kvm_create_vcpu_debugfs after last failure point") Signed-off-by: Paolo Bonzini --- arch/arm64/kvm/arm.c | 5 ----- arch/x86/kvm/debugfs.c | 10 +++++----- include/linux/kvm_host.h | 3 +-- virt/kvm/kvm_main.c | 8 ++++---- 4 files changed, 10 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 7a57381c05e8..45276ed50dd6 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -144,11 +144,6 @@ out_fail_alloc: return ret; } -int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) -{ - return 0; -} - vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) { return VM_FAULT_SIGBUS; diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c index 018aebce33ff..7e818d64bb4d 100644 --- a/arch/x86/kvm/debugfs.c +++ b/arch/x86/kvm/debugfs.c @@ -43,22 +43,22 @@ static int vcpu_get_tsc_scaling_frac_bits(void *data, u64 *val) DEFINE_SIMPLE_ATTRIBUTE(vcpu_tsc_scaling_frac_fops, vcpu_get_tsc_scaling_frac_bits, NULL, "%llu\n"); -void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) +void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry) { - debugfs_create_file("tsc-offset", 0444, vcpu->debugfs_dentry, vcpu, + debugfs_create_file("tsc-offset", 0444, debugfs_dentry, vcpu, &vcpu_tsc_offset_fops); if (lapic_in_kernel(vcpu)) debugfs_create_file("lapic_timer_advance_ns", 0444, - vcpu->debugfs_dentry, vcpu, + debugfs_dentry, vcpu, &vcpu_timer_advance_ns_fops); if (kvm_has_tsc_control) { debugfs_create_file("tsc-scaling-ratio", 0444, - vcpu->debugfs_dentry, vcpu, + debugfs_dentry, vcpu, &vcpu_tsc_scaling_fops); debugfs_create_file("tsc-scaling-ratio-frac-bits", 0444, - vcpu->debugfs_dentry, vcpu, + debugfs_dentry, vcpu, &vcpu_tsc_scaling_frac_fops); } } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f43b59b1294c..d38d6b9c24be 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -318,7 +318,6 @@ struct kvm_vcpu { bool preempted; bool ready; struct kvm_vcpu_arch arch; - struct dentry *debugfs_dentry; }; static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) @@ -888,7 +887,7 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu); #ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS -void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu); +void kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu, struct dentry *debugfs_dentry); #endif int kvm_arch_hardware_enable(void); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7fa1e38e1659..3577eb84eac0 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2973,7 +2973,6 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp) { struct kvm_vcpu *vcpu = filp->private_data; - debugfs_remove_recursive(vcpu->debugfs_dentry); kvm_put_kvm(vcpu->kvm); return 0; } @@ -3000,16 +2999,17 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu) static void kvm_create_vcpu_debugfs(struct kvm_vcpu *vcpu) { #ifdef __KVM_HAVE_ARCH_VCPU_DEBUGFS + struct dentry *debugfs_dentry; char dir_name[ITOA_MAX_LEN * 2]; if (!debugfs_initialized()) return; snprintf(dir_name, sizeof(dir_name), "vcpu%d", vcpu->vcpu_id); - vcpu->debugfs_dentry = debugfs_create_dir(dir_name, - vcpu->kvm->debugfs_dentry); + debugfs_dentry = debugfs_create_dir(dir_name, + vcpu->kvm->debugfs_dentry); - kvm_arch_create_vcpu_debugfs(vcpu); + kvm_arch_create_vcpu_debugfs(vcpu, debugfs_dentry); #endif } -- cgit v1.3 From c25a26e653a61b93339dcd1b734c889df60b3eef Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Fri, 29 May 2020 16:03:00 +0800 Subject: vdpa: introduce get_vq_notification method This patch introduces a new method in the vdpa_config_ops which reports the physical address and the size of the doorbell for a specific virtqueue. This will be used by the future patches that maps doorbell to userspace. Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20200529080303.15449-4-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- include/linux/vdpa.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vdpa.h b/include/linux/vdpa.h index 5453af87a33e..239db794357c 100644 --- a/include/linux/vdpa.h +++ b/include/linux/vdpa.h @@ -17,6 +17,16 @@ struct vdpa_callback { void *private; }; +/** + * vDPA notification area + * @addr: base address of the notification area + * @size: size of the notification area + */ +struct vdpa_notification_area { + resource_size_t addr; + resource_size_t size; +}; + /** * vDPA device - representation of a vDPA device * @dev: underlying device @@ -73,6 +83,10 @@ struct vdpa_device { * @vdev: vdpa device * @idx: virtqueue index * Returns virtqueue state (last_avail_idx) + * @get_vq_notification: Get the notification area for a virtqueue + * @vdev: vdpa device + * @idx: virtqueue index + * Returns the notifcation area * @get_vq_align: Get the virtqueue align requirement * for the device * @vdev: vdpa device @@ -162,6 +176,8 @@ struct vdpa_config_ops { bool (*get_vq_ready)(struct vdpa_device *vdev, u16 idx); int (*set_vq_state)(struct vdpa_device *vdev, u16 idx, u64 state); u64 (*get_vq_state)(struct vdpa_device *vdev, u16 idx); + struct vdpa_notification_area + (*get_vq_notification)(struct vdpa_device *vdev, u16 idx); /* Device ops */ u32 (*get_vq_align)(struct vdpa_device *vdev); -- cgit v1.3 From aa218795cb5fd583c94fc838dc76b7379dc4976a Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 7 May 2020 16:01:30 +0200 Subject: mm: Allow to offline unmovable PageOffline() pages via MEM_GOING_OFFLINE virtio-mem wants to allow to offline memory blocks of which some parts were unplugged (allocated via alloc_contig_range()), especially, to later offline and remove completely unplugged memory blocks. The important part is that PageOffline() has to remain set until the section is offline, so these pages will never get accessed (e.g., when dumping). The pages should not be handed back to the buddy (which would require clearing PageOffline() and result in issues if offlining fails and the pages are suddenly in the buddy). Let's allow to do that by allowing to isolate any PageOffline() page when offlining. This way, we can reach the memory hotplug notifier MEM_GOING_OFFLINE, where the driver can signal that he is fine with offlining this page by dropping its reference count. PageOffline() pages with a reference count of 0 can then be skipped when offlining the pages (like if they were free, however they are not in the buddy). Anybody who uses PageOffline() pages and does not agree to offline them (e.g., Hyper-V balloon, XEN balloon, VMWare balloon for 2MB pages) will not decrement the reference count and make offlining fail when trying to migrate such an unmovable page. So there should be no observable change. Same applies to balloon compaction users (movable PageOffline() pages), the pages will simply be migrated. Note 1: If offlining fails, a driver has to increment the reference count again in MEM_CANCEL_OFFLINE. Note 2: A driver that makes use of this has to be aware that re-onlining the memory block has to be handled by hooking into onlining code (online_page_callback_t), resetting the page PageOffline() and not giving them to the buddy. Reviewed-by: Alexander Duyck Acked-by: Michal Hocko Tested-by: Pankaj Gupta Acked-by: Andrew Morton Cc: Andrew Morton Cc: Juergen Gross Cc: Konrad Rzeszutek Wilk Cc: Pavel Tatashin Cc: Alexander Duyck Cc: Vlastimil Babka Cc: Johannes Weiner Cc: Anthony Yznaga Cc: Michal Hocko Cc: Oscar Salvador Cc: Mel Gorman Cc: Mike Rapoport Cc: Dan Williams Cc: Anshuman Khandual Cc: Qian Cai Cc: Pingfan Liu Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20200507140139.17083-7-david@redhat.com Signed-off-by: Michael S. Tsirkin --- include/linux/page-flags.h | 10 ++++++++++ mm/memory_hotplug.c | 44 ++++++++++++++++++++++++++++++++++---------- mm/page_alloc.c | 24 ++++++++++++++++++++++++ mm/page_isolation.c | 9 +++++++++ 4 files changed, 77 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 222f6f7b2bb3..6be1aa559b1e 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -777,6 +777,16 @@ PAGE_TYPE_OPS(Buddy, buddy) * not onlined when onlining the section). * The content of these pages is effectively stale. Such pages should not * be touched (read/write/dump/save) except by their owner. + * + * If a driver wants to allow to offline unmovable PageOffline() pages without + * putting them back to the buddy, it can do so via the memory notifier by + * decrementing the reference count in MEM_GOING_OFFLINE and incrementing the + * reference count in MEM_CANCEL_OFFLINE. When offlining, the PageOffline() + * pages (now with a reference count of zero) are treated like free pages, + * allowing the containing memory block to get offlined. A driver that + * relies on this feature is aware that re-onlining the memory block will + * require to re-set the pages PageOffline() and not giving them to the + * buddy via online_page_callback_t. */ PAGE_TYPE_OPS(Offline, offline) diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index fc0aad0bc1f5..008e4a7ed8bc 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1224,11 +1224,17 @@ struct zone *test_pages_in_a_zone(unsigned long start_pfn, /* * Scan pfn range [start,end) to find movable/migratable pages (LRU pages, - * non-lru movable pages and hugepages). We scan pfn because it's much - * easier than scanning over linked list. This function returns the pfn - * of the first found movable page if it's found, otherwise 0. + * non-lru movable pages and hugepages). Will skip over most unmovable + * pages (esp., pages that can be skipped when offlining), but bail out on + * definitely unmovable pages. + * + * Returns: + * 0 in case a movable page is found and movable_pfn was updated. + * -ENOENT in case no movable page was found. + * -EBUSY in case a definitely unmovable page was found. */ -static unsigned long scan_movable_pages(unsigned long start, unsigned long end) +static int scan_movable_pages(unsigned long start, unsigned long end, + unsigned long *movable_pfn) { unsigned long pfn; @@ -1240,18 +1246,30 @@ static unsigned long scan_movable_pages(unsigned long start, unsigned long end) continue; page = pfn_to_page(pfn); if (PageLRU(page)) - return pfn; + goto found; if (__PageMovable(page)) - return pfn; + goto found; + + /* + * PageOffline() pages that are not marked __PageMovable() and + * have a reference count > 0 (after MEM_GOING_OFFLINE) are + * definitely unmovable. If their reference count would be 0, + * they could at least be skipped when offlining memory. + */ + if (PageOffline(page) && page_count(page)) + return -EBUSY; if (!PageHuge(page)) continue; head = compound_head(page); if (page_huge_active(head)) - return pfn; + goto found; skip = compound_nr(head) - (page - head); pfn += skip - 1; } + return -ENOENT; +found: + *movable_pfn = pfn; return 0; } @@ -1518,7 +1536,8 @@ static int __ref __offline_pages(unsigned long start_pfn, } do { - for (pfn = start_pfn; pfn;) { + pfn = start_pfn; + do { if (signal_pending(current)) { ret = -EINTR; reason = "signal backoff"; @@ -1528,14 +1547,19 @@ static int __ref __offline_pages(unsigned long start_pfn, cond_resched(); lru_add_drain_all(); - pfn = scan_movable_pages(pfn, end_pfn); - if (pfn) { + ret = scan_movable_pages(pfn, end_pfn, &pfn); + if (!ret) { /* * TODO: fatal migration failures should bail * out */ do_migrate_range(pfn, end_pfn); } + } while (!ret); + + if (ret != -ENOENT) { + reason = "unmovable page"; + goto failed_removal_isolated; } /* diff --git a/mm/page_alloc.c b/mm/page_alloc.c index ce1c9df54eac..9b1c49c3097d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -8372,6 +8372,19 @@ struct page *has_unmovable_pages(struct zone *zone, struct page *page, if ((flags & MEMORY_OFFLINE) && PageHWPoison(page)) continue; + /* + * We treat all PageOffline() pages as movable when offlining + * to give drivers a chance to decrement their reference count + * in MEM_GOING_OFFLINE in order to indicate that these pages + * can be offlined as there are no direct references anymore. + * For actually unmovable PageOffline() where the driver does + * not support this, we will fail later when trying to actually + * move these pages that still have a reference count > 0. + * (false negatives in this function only) + */ + if ((flags & MEMORY_OFFLINE) && PageOffline(page)) + continue; + if (__PageMovable(page) || PageLRU(page)) continue; @@ -8792,6 +8805,17 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) offlined_pages++; continue; } + /* + * At this point all remaining PageOffline() pages have a + * reference count of 0 and can simply be skipped. + */ + if (PageOffline(page)) { + BUG_ON(page_count(page)); + BUG_ON(PageBuddy(page)); + pfn++; + offlined_pages++; + continue; + } BUG_ON(page_count(page)); BUG_ON(!PageBuddy(page)); diff --git a/mm/page_isolation.c b/mm/page_isolation.c index 2c11a38d6e87..f6d07c5f0d34 100644 --- a/mm/page_isolation.c +++ b/mm/page_isolation.c @@ -151,6 +151,7 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages) * a bit mask) * MEMORY_OFFLINE - isolate to offline (!allocate) memory * e.g., skip over PageHWPoison() pages + * and PageOffline() pages. * REPORT_FAILURE - report details about the failure to * isolate the range * @@ -259,6 +260,14 @@ __test_page_isolated_in_pageblock(unsigned long pfn, unsigned long end_pfn, else if ((flags & MEMORY_OFFLINE) && PageHWPoison(page)) /* A HWPoisoned page cannot be also PageBuddy */ pfn++; + else if ((flags & MEMORY_OFFLINE) && PageOffline(page) && + !page_count(page)) + /* + * The responsible driver agreed to skip PageOffline() + * pages when offlining memory by dropping its + * reference in MEM_GOING_OFFLINE. + */ + pfn++; else break; } -- cgit v1.3 From 08b3acd7a68fc17902e1cb6b146389322840deab Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 7 May 2020 16:01:32 +0200 Subject: mm/memory_hotplug: Introduce offline_and_remove_memory() virtio-mem wants to offline and remove a memory block once it unplugged all subblocks (e.g., using alloc_contig_range()). Let's provide an interface to do that from a driver. virtio-mem already supports to offline partially unplugged memory blocks. Offlining a fully unplugged memory block will not require to migrate any pages. All unplugged subblocks are PageOffline() and have a reference count of 0 - so offlining code will simply skip them. All we need is an interface to offline and remove the memory from kernel module context, where we don't have access to the memory block devices (esp. find_memory_block() and device_offline()) and the device hotplug lock. To keep things simple, allow to only work on a single memory block. Acked-by: Michal Hocko Tested-by: Pankaj Gupta Acked-by: Andrew Morton Cc: Andrew Morton Cc: David Hildenbrand Cc: Oscar Salvador Cc: Michal Hocko Cc: Pavel Tatashin Cc: Wei Yang Cc: Dan Williams Cc: Qian Cai Signed-off-by: David Hildenbrand Link: https://lore.kernel.org/r/20200507140139.17083-9-david@redhat.com Signed-off-by: Michael S. Tsirkin --- include/linux/memory_hotplug.h | 1 + mm/memory_hotplug.c | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 93d9ada74ddd..cb7499843f5c 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -319,6 +319,7 @@ extern void try_offline_node(int nid); extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages); extern int remove_memory(int nid, u64 start, u64 size); extern void __remove_memory(int nid, u64 start, u64 size); +extern int offline_and_remove_memory(int nid, u64 start, u64 size); #else static inline bool is_mem_section_removable(unsigned long pfn, diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 008e4a7ed8bc..4acb99aa9bf4 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1821,4 +1821,41 @@ int remove_memory(int nid, u64 start, u64 size) return rc; } EXPORT_SYMBOL_GPL(remove_memory); + +/* + * Try to offline and remove a memory block. Might take a long time to + * finish in case memory is still in use. Primarily useful for memory devices + * that logically unplugged all memory (so it's no longer in use) and want to + * offline + remove the memory block. + */ +int offline_and_remove_memory(int nid, u64 start, u64 size) +{ + struct memory_block *mem; + int rc = -EINVAL; + + if (!IS_ALIGNED(start, memory_block_size_bytes()) || + size != memory_block_size_bytes()) + return rc; + + lock_device_hotplug(); + mem = find_memory_block(__pfn_to_section(PFN_DOWN(start))); + if (mem) + rc = device_offline(&mem->dev); + /* Ignore if the device is already offline. */ + if (rc > 0) + rc = 0; + + /* + * In case we succeeded to offline the memory block, remove it. + * This cannot fail as it cannot get onlined in the meantime. + */ + if (!rc) { + rc = try_remove_memory(nid, start, size); + WARN_ON_ONCE(rc); + } + unlock_device_hotplug(); + + return rc; +} +EXPORT_SYMBOL_GPL(offline_and_remove_memory); #endif /* CONFIG_MEMORY_HOTREMOVE */ -- cgit v1.3 From 5872f1a2e5c783783d51e96468f0ff6aede61182 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 11 May 2020 21:59:51 +0100 Subject: READ_ONCE: Fix comment describing 2x32-bit atomicity READ_ONCE() permits 64-bit accesses on 32-bit architectures, since this crops up in a few places and is generally harmless because either the upper bits are always zero (e.g. for a virtual address or 32-bit time_t) or the architecture provides 64-bit atomicity anyway. Update the corresponding comment above compiletime_assert_rwonce_type(), which incorrectly states that 32-bit x86 provides 64-bit atomicity, and instead reference 32-bit Armv7 with LPAE. Cc: Thomas Gleixner Cc: Peter Zijlstra Reported-by: Jann Horn Signed-off-by: Will Deacon --- include/linux/compiler.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index c363d8debc43..657e4fd38a77 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -332,9 +332,9 @@ static inline void *offset_to_ptr(const int *off) /* * Yes, this permits 64-bit accesses on 32-bit architectures. These will - * actually be atomic in many cases (namely x86), but for others we rely on - * the access being split into 2x32-bit accesses for a 32-bit quantity (e.g. - * a virtual address) and a strong prevailing wind. + * actually be atomic in some cases (namely Armv7 + LPAE), but for others we + * rely on the access being split into 2x32-bit accesses for a 32-bit quantity + * (e.g. a virtual address) and a strong prevailing wind. */ #define compiletime_assert_rwonce_type(t) \ compiletime_assert(__native_word(t) || sizeof(t) == sizeof(long long), \ -- cgit v1.3 From 8d4beed7bbc71666de2630b79899c8852c3bf5cd Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 5 Jun 2020 11:05:51 +0100 Subject: compiler-types.h: Include naked type in __pick_integer_type() match __pick_integer_type() checks whether the type of its first argument is compatible with an explicitly signed or unsigned integer type, returning the compatible type if it exists. Unfortunately, 'char' is neither compatible with 'signed char' nor 'unsigned char', so add a check against the naked type to allow the __unqual_scalar_typeof() macro to strip qualifiers from char types without an explicit signedness. Reported-by: Rasmus Villemoes Signed-off-by: Will Deacon --- include/linux/compiler_types.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 233066c92f6f..6ed0612bc143 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -220,9 +220,14 @@ struct ftrace_likely_data { #define __pick_scalar_type(x, type, otherwise) \ __builtin_choose_expr(__same_type(x, type), (type)0, otherwise) +/* + * 'char' is not type-compatible with either 'signed char' or 'unsigned char', + * so we include the naked type here as well as the signed/unsigned variants. + */ #define __pick_integer_type(x, type, otherwise) \ - __pick_scalar_type(x, unsigned type, \ - __pick_scalar_type(x, signed type, otherwise)) + __pick_scalar_type(x, type, \ + __pick_scalar_type(x, unsigned type, \ + __pick_scalar_type(x, signed type, otherwise))) #define __unqual_scalar_typeof(x) typeof( \ __pick_integer_type(x, char, \ -- cgit v1.3 From b16d8ecf4fa17e16fff20638364f9bd2205615e7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 5 Jun 2020 11:19:46 +0100 Subject: compiler.h: Enforce that READ_ONCE_NOCHECK() access size is sizeof(long) READ_ONCE_NOCHECK() unconditionally performs a sizeof(long)-sized access, so enforce that the size of the pointed-to object that we are loading from is the same size as 'long'. Reported-by: Marco Elver Signed-off-by: Will Deacon --- include/linux/compiler.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 657e4fd38a77..a0aa56e6b782 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -254,9 +254,12 @@ unsigned long __read_once_word_nocheck(const void *addr) */ #define READ_ONCE_NOCHECK(x) \ ({ \ - unsigned long __x = __read_once_word_nocheck(&(x)); \ + unsigned long __x; \ + compiletime_assert(sizeof(x) == sizeof(__x), \ + "Unsupported access size for READ_ONCE_NOCHECK()."); \ + __x = __read_once_word_nocheck(&(x)); \ smp_read_barrier_depends(); \ - __x; \ + (typeof(x))__x; \ }) static __no_kasan_or_inline -- cgit v1.3 From 1fd76043ecb04b8567a76b106db09ac778e1e2b8 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Wed, 27 May 2020 12:32:36 +0200 Subject: compiler_types.h: Optimize __unqual_scalar_typeof compilation time If the compiler supports C11's _Generic, use it to speed up compilation times of __unqual_scalar_typeof(). GCC version 4.9 or later and all supported versions of Clang support the feature (the oldest supported compiler that doesn't support _Generic is GCC 4.8, for which we use the slower alternative). The non-_Generic variant relies on multiple expansions of __pick_integer_type -> __pick_scalar_type -> __builtin_choose_expr, which increases pre-processed code size, and can cause compile times to increase in files with numerous expansions of READ_ONCE(), or other users of __unqual_scalar_typeof(). Summary of compile-time benchmarking done by Arnd Bergmann: clang-11 gcc-9 this patch 0.78 0.91 ideal 0.76 0.86 See https://lkml.kernel.org/r/CAK8P3a3UYQeXhiufUevz=rwe09WM_vSTCd9W+KvJHJcOeQyWVA@mail.gmail.com Further compile-testing done with: gcc 4.8, 4.9, 5.5, 6.4, 7.5, 8.4; clang 9, 10. Reported-by: Arnd Bergmann Signed-off-by: Marco Elver Signed-off-by: Borislav Petkov Acked-by: Peter Zijlstra Tested-by: Arnd Bergmann Link: https://lkml.kernel.org/r/20200527103236.148700-1-elver@google.com Link: https://lkml.kernel.org/r/CAK8P3a0RJtbVi1JMsfik=jkHCNFv+DJn_FeDg-YLW+ueQW3tNg@mail.gmail.com [will: tweak new macros to make them a bit more readable] Signed-off-by: Will Deacon --- include/linux/compiler_types.h | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 6ed0612bc143..31416b60eabf 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -213,7 +213,9 @@ struct ftrace_likely_data { /* * __unqual_scalar_typeof(x) - Declare an unqualified scalar type, leaving * non-scalar types unchanged. - * + */ +#if defined(CONFIG_CC_IS_GCC) && CONFIG_GCC_VERSION < 40900 +/* * We build this out of a couple of helper macros in a vain attempt to * help you keep your lunch down while reading it. */ @@ -235,6 +237,25 @@ struct ftrace_likely_data { __pick_integer_type(x, int, \ __pick_integer_type(x, long, \ __pick_integer_type(x, long long, x)))))) +#else +/* + * If supported, prefer C11 _Generic for better compile-times. As above, 'char' + * is not type-compatible with 'signed char', and we define a separate case. + */ +#define __scalar_type_to_expr_cases(type) \ + unsigned type: (unsigned type)0, \ + signed type: (signed type)0 + +#define __unqual_scalar_typeof(x) typeof( \ + _Generic((x), \ + char: (char)0, \ + __scalar_type_to_expr_cases(char), \ + __scalar_type_to_expr_cases(short), \ + __scalar_type_to_expr_cases(int), \ + __scalar_type_to_expr_cases(long), \ + __scalar_type_to_expr_cases(long long), \ + default: (x))) +#endif /* Is this type a native word size -- useful for atomic operations */ #define __native_word(t) \ -- cgit v1.3 From b398ace5d2ea0b7f00d9f1ce23c647e289c206ca Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 28 May 2020 09:43:13 +0200 Subject: compiler_types.h: Use unoptimized __unqual_scalar_typeof for sparse If the file is being checked with sparse, use the unoptimized version of __unqual_scalar_typeof(), since sparse does not support _Generic. Reported-by: kbuild test robot Signed-off-by: Marco Elver Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/202005280727.lXn1VnTw%lkp@intel.com Signed-off-by: Will Deacon --- include/linux/compiler_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 31416b60eabf..cd73e3857a87 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -214,7 +214,7 @@ struct ftrace_likely_data { * __unqual_scalar_typeof(x) - Declare an unqualified scalar type, leaving * non-scalar types unchanged. */ -#if defined(CONFIG_CC_IS_GCC) && CONFIG_GCC_VERSION < 40900 +#if (defined(CONFIG_CC_IS_GCC) && CONFIG_GCC_VERSION < 40900) || defined(__CHECKER__) /* * We build this out of a couple of helper macros in a vain attempt to * help you keep your lunch down while reading it. -- cgit v1.3 From cf6fada71543ceea0f6228ffdc0b85778f3f5a6e Mon Sep 17 00:00:00 2001 From: Xiongfeng Wang Date: Sat, 30 May 2020 10:08:30 +0800 Subject: cpufreq: change '.set_boost' to act on one policy Macro 'for_each_active_policy()' is defined internally. To avoid some cpufreq driver needing this macro to iterate over all the policies in '.set_boost' callback, we redefine '.set_boost' to act on only one policy and pass the policy as an argument. 'cpufreq_boost_trigger_state()' iterates over all the policies to set boost for the system. This is preparation for adding SW BOOST support for CPPC. To protect Boost enable/disable by sysfs from CPU online/offline, add 'cpu_hotplug_lock' before calling '.set_boost' for each CPU. Also move the lock from 'set_boost()' to 'store_cpb()' in acpi_cpufreq. Signed-off-by: Xiongfeng Wang Suggested-by: Viresh Kumar Acked-by: Viresh Kumar [ rjw: Subject & changelog ] Signed-off-by: Rafael J. Wysocki --- drivers/cpufreq/acpi-cpufreq.c | 14 ++++++----- drivers/cpufreq/cpufreq.c | 57 +++++++++++++++++++++++------------------- include/linux/cpufreq.h | 2 +- 3 files changed, 40 insertions(+), 33 deletions(-) (limited to 'include/linux') diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 289e8ce3fd13..429e5a36c08a 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -126,12 +126,12 @@ static void boost_set_msr_each(void *p_en) boost_set_msr(enable); } -static int set_boost(int val) +static int set_boost(struct cpufreq_policy *policy, int val) { - get_online_cpus(); - on_each_cpu(boost_set_msr_each, (void *)(long)val, 1); - put_online_cpus(); - pr_debug("Core Boosting %sabled.\n", val ? "en" : "dis"); + on_each_cpu_mask(policy->cpus, boost_set_msr_each, + (void *)(long)val, 1); + pr_debug("CPU %*pbl: Core Boosting %sabled.\n", + cpumask_pr_args(policy->cpus), val ? "en" : "dis"); return 0; } @@ -162,7 +162,9 @@ static ssize_t store_cpb(struct cpufreq_policy *policy, const char *buf, if (ret || val > 1) return -EINVAL; - set_boost(val); + get_online_cpus(); + set_boost(policy, val); + put_online_cpus(); return count; } diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index d03f250f68e4..0128de3603df 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -2532,34 +2532,29 @@ EXPORT_SYMBOL_GPL(cpufreq_update_limits); /********************************************************************* * BOOST * *********************************************************************/ -static int cpufreq_boost_set_sw(int state) +static int cpufreq_boost_set_sw(struct cpufreq_policy *policy, int state) { - struct cpufreq_policy *policy; - - for_each_active_policy(policy) { - int ret; + int ret; - if (!policy->freq_table) - return -ENXIO; + if (!policy->freq_table) + return -ENXIO; - ret = cpufreq_frequency_table_cpuinfo(policy, - policy->freq_table); - if (ret) { - pr_err("%s: Policy frequency update failed\n", - __func__); - return ret; - } - - ret = freq_qos_update_request(policy->max_freq_req, policy->max); - if (ret < 0) - return ret; + ret = cpufreq_frequency_table_cpuinfo(policy, policy->freq_table); + if (ret) { + pr_err("%s: Policy frequency update failed\n", __func__); + return ret; } + ret = freq_qos_update_request(policy->max_freq_req, policy->max); + if (ret < 0) + return ret; + return 0; } int cpufreq_boost_trigger_state(int state) { + struct cpufreq_policy *policy; unsigned long flags; int ret = 0; @@ -2570,15 +2565,25 @@ int cpufreq_boost_trigger_state(int state) cpufreq_driver->boost_enabled = state; write_unlock_irqrestore(&cpufreq_driver_lock, flags); - ret = cpufreq_driver->set_boost(state); - if (ret) { - write_lock_irqsave(&cpufreq_driver_lock, flags); - cpufreq_driver->boost_enabled = !state; - write_unlock_irqrestore(&cpufreq_driver_lock, flags); - - pr_err("%s: Cannot %s BOOST\n", - __func__, state ? "enable" : "disable"); + get_online_cpus(); + for_each_active_policy(policy) { + ret = cpufreq_driver->set_boost(policy, state); + if (ret) + goto err_reset_state; } + put_online_cpus(); + + return 0; + +err_reset_state: + put_online_cpus(); + + write_lock_irqsave(&cpufreq_driver_lock, flags); + cpufreq_driver->boost_enabled = !state; + write_unlock_irqrestore(&cpufreq_driver_lock, flags); + + pr_err("%s: Cannot %s BOOST\n", + __func__, state ? "enable" : "disable"); return ret; } diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 67d5950bd878..3494f6763597 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -367,7 +367,7 @@ struct cpufreq_driver { /* platform specific boost support code */ bool boost_enabled; - int (*set_boost)(int state); + int (*set_boost)(struct cpufreq_policy *policy, int state); }; /* flags */ -- cgit v1.3 From e649b3f0188f8fd34dd0dde8d43fd3312b902fb2 Mon Sep 17 00:00:00 2001 From: Eiichi Tsukata Date: Sat, 6 Jun 2020 13:26:27 +0900 Subject: KVM: x86: Fix APIC page invalidation race Commit b1394e745b94 ("KVM: x86: fix APIC page invalidation") tried to fix inappropriate APIC page invalidation by re-introducing arch specific kvm_arch_mmu_notifier_invalidate_range() and calling it from kvm_mmu_notifier_invalidate_range_start. However, the patch left a possible race where the VMCS APIC address cache is updated *before* it is unmapped: (Invalidator) kvm_mmu_notifier_invalidate_range_start() (Invalidator) kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD) (KVM VCPU) vcpu_enter_guest() (KVM VCPU) kvm_vcpu_reload_apic_access_page() (Invalidator) actually unmap page Because of the above race, there can be a mismatch between the host physical address stored in the APIC_ACCESS_PAGE VMCS field and the host physical address stored in the EPT entry for the APIC GPA (0xfee0000). When this happens, the processor will not trap APIC accesses, and will instead show the raw contents of the APIC-access page. Because Windows OS periodically checks for unexpected modifications to the LAPIC register, this will show up as a BSOD crash with BugCheck CRITICAL_STRUCTURE_CORRUPTION (109) we are currently seeing in https://bugzilla.redhat.com/show_bug.cgi?id=1751017. The root cause of the issue is that kvm_arch_mmu_notifier_invalidate_range() cannot guarantee that no additional references are taken to the pages in the range before kvm_mmu_notifier_invalidate_range_end(). Fortunately, this case is supported by the MMU notifier API, as documented in include/linux/mmu_notifier.h: * If the subsystem * can't guarantee that no additional references are taken to * the pages in the range, it has to implement the * invalidate_range() notifier to remove any references taken * after invalidate_range_start(). The fix therefore is to reload the APIC-access page field in the VMCS from kvm_mmu_notifier_invalidate_range() instead of ..._range_start(). Cc: stable@vger.kernel.org Fixes: b1394e745b94 ("KVM: x86: fix APIC page invalidation") Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=197951 Signed-off-by: Eiichi Tsukata Message-Id: <20200606042627.61070-1-eiichi.tsukata@nutanix.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 7 ++----- include/linux/kvm_host.h | 4 ++-- virt/kvm/kvm_main.c | 26 ++++++++++++++++---------- 3 files changed, 20 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c26dd1363151..24de847af52e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8270,9 +8270,8 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu) kvm_x86_ops.load_eoi_exitmap(vcpu, eoi_exit_bitmap); } -int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, - unsigned long start, unsigned long end, - bool blockable) +void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, + unsigned long start, unsigned long end) { unsigned long apic_address; @@ -8283,8 +8282,6 @@ int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); if (start <= apic_address && apic_address < end) kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); - - return 0; } void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d38d6b9c24be..e2f82131bb3e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1420,8 +1420,8 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp, } #endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */ -int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, - unsigned long start, unsigned long end, bool blockable); +void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, + unsigned long start, unsigned long end); #ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE int kvm_arch_vcpu_run_pid_change(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 4db151f6101e..7b6013f2ba19 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -155,10 +155,9 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm); static unsigned long long kvm_createvm_count; static unsigned long long kvm_active_vms; -__weak int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, - unsigned long start, unsigned long end, bool blockable) +__weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, + unsigned long start, unsigned long end) { - return 0; } bool kvm_is_zone_device_pfn(kvm_pfn_t pfn) @@ -384,6 +383,18 @@ static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) return container_of(mn, struct kvm, mmu_notifier); } +static void kvm_mmu_notifier_invalidate_range(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + struct kvm *kvm = mmu_notifier_to_kvm(mn); + int idx; + + idx = srcu_read_lock(&kvm->srcu); + kvm_arch_mmu_notifier_invalidate_range(kvm, start, end); + srcu_read_unlock(&kvm->srcu, idx); +} + static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long address, @@ -408,7 +419,6 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, { struct kvm *kvm = mmu_notifier_to_kvm(mn); int need_tlb_flush = 0, idx; - int ret; idx = srcu_read_lock(&kvm->srcu); spin_lock(&kvm->mmu_lock); @@ -425,14 +435,9 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn, kvm_flush_remote_tlbs(kvm); spin_unlock(&kvm->mmu_lock); - - ret = kvm_arch_mmu_notifier_invalidate_range(kvm, range->start, - range->end, - mmu_notifier_range_blockable(range)); - srcu_read_unlock(&kvm->srcu, idx); - return ret; + return 0; } static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, @@ -538,6 +543,7 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn, } static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { + .invalidate_range = kvm_mmu_notifier_invalidate_range, .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, .clear_flush_young = kvm_mmu_notifier_clear_flush_young, -- cgit v1.3 From 7ff0d4490ebadae8037a079a70c5cac13385a808 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 3 Jun 2020 07:52:37 +0200 Subject: trace: fix an incorrect __user annotation on stack_trace_sysctl No user pointers for sysctls anymore. Fixes: 32927393dc1c ("sysctl: pass kernel pointers to ->proc_handler") Reported-by: build test robot Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/ftrace.h | 5 ++--- kernel/trace/trace_stack.c | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ddfc377de0d2..fce81238f304 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -319,9 +319,8 @@ static inline void arch_ftrace_set_direct_caller(struct pt_regs *regs, extern int stack_tracer_enabled; -int stack_trace_sysctl(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); +int stack_trace_sysctl(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); /* DO NOT MODIFY THIS VARIABLE DIRECTLY! */ DECLARE_PER_CPU(int, disable_stack_tracer); diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index c557f42a9397..98bba4764c52 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -515,9 +515,8 @@ static const struct file_operations stack_trace_filter_fops = { #endif /* CONFIG_DYNAMIC_FTRACE */ int -stack_trace_sysctl(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) +stack_trace_sysctl(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) { int was_enabled; int ret; -- cgit v1.3 From 2062a4e8ae9f486847652927aaf88e21ab8d195d Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 8 Jun 2020 21:29:56 -0700 Subject: kallsyms/printk: add loglvl to print_ip_sym() Patch series "Add log level to show_stack()", v3. Add log level argument to show_stack(). Done in three stages: 1. Introducing show_stack_loglvl() for every architecture 2. Migrating old users with an explicit log level 3. Renaming show_stack_loglvl() into show_stack() Justification: - It's a design mistake to move a business-logic decision into platform realization detail. - I have currently two patches sets that would benefit from this work: Removing console_loglevel jumps in sysrq driver [1] Hung task warning before panic [2] - suggested by Tetsuo (but he probably didn't realise what it would involve). - While doing (1), (2) the backtraces were adjusted to headers and other messages for each situation - so there won't be a situation when the backtrace is printed, but the headers are missing because they have lesser log level (or the reverse). - As the result in (2) plays with console_loglevel for kdb are removed. The least important for upstream, but maybe still worth to note that every company I've worked in so far had an off-list patch to print backtrace with the needed log level (but only for the architecture they cared about). If you have other ideas how you will benefit from show_stack() with a log level - please, reply to this cover letter. See also discussion on v1: https://lore.kernel.org/linux-riscv/20191106083538.z5nlpuf64cigxigh@pathway.suse.cz/ This patch (of 50): print_ip_sym() needs to have a log level parameter to comply with other parts being printed. Otherwise, half of the expected backtrace would be printed and other may be missing with some logging level. The following callee(s) are using now the adjusted log level: - microblaze/unwind: the same level as headers & userspace unwind. Note that pr_debug()'s there are for debugging the unwinder itself. - nds32/traps: symbol addresses are printed with the same log level as backtrace headers. - lockdep: ip for locking issues is printed with the same log level as other part of the warning. - sched: ip where preemption was disabled is printed as error like the rest part of the message. - ftrace: bug reports are now consistent in the log level being used. Signed-off-by: Dmitry Safonov Signed-off-by: Andrew Morton Acked-by: Steven Rostedt (VMware) Cc: Albert Ou Cc: Ben Segall Cc: Dietmar Eggemann Cc: Greentime Hu Cc: Greg Kroah-Hartman Cc: Ingo Molnar Cc: James Hogan Cc: Juri Lelli Cc: Mel Gorman Cc: Michal Simek Cc: Palmer Dabbelt Cc: Paul Burton Cc: Paul Walmsley Cc: Peter Zijlstra Cc: Ralf Baechle Cc: Thomas Gleixner Cc: Vincent Chen Cc: Vincent Guittot Cc: Will Deacon Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: Dmitry Safonov Cc: Jiri Slaby Cc: Petr Mladek Cc: Sergey Senozhatsky Cc: Ivan Kokshaysky Cc: Matt Turner Cc: Richard Henderson Cc: Vineet Gupta Cc: Russell King Cc: Catalin Marinas Cc: Aurelien Jacquiot Cc: Mark Salter Cc: Guo Ren Cc: Yoshinori Sato Cc: Brian Cain Cc: Fenghua Yu Cc: Tony Luck Cc: Geert Uytterhoeven Cc: Ley Foon Tan Cc: Jonas Bonn Cc: Stafford Horne Cc: Stefan Kristiansson Cc: Helge Deller Cc: "James E.J. Bottomley" Cc: Benjamin Herrenschmidt Cc: Michael Ellerman Cc: Paul Mackerras Cc: Christian Borntraeger Cc: Heiko Carstens Cc: Vasily Gorbik Cc: Rich Felker Cc: "David S. Miller" Cc: Anton Ivanov Cc: Jeff Dike Cc: Richard Weinberger Cc: Guan Xuetao Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Chris Zankel Cc: Max Filippov Cc: Len Brown Cc: Pavel Machek Cc: "Rafael J. Wysocki" Cc: "Rafael J. Wysocki" Cc: Daniel Thompson Cc: Douglas Anderson Cc: Jason Wessel Link: http://lkml.kernel.org/r/20200418201944.482088-2-dima@arista.com Signed-off-by: Linus Torvalds --- arch/microblaze/kernel/unwind.c | 2 +- arch/mips/kernel/traps.c | 4 ++-- arch/nds32/kernel/traps.c | 4 ++-- arch/riscv/kernel/stacktrace.c | 2 +- include/linux/kallsyms.h | 4 ++-- kernel/locking/lockdep.c | 4 ++-- kernel/sched/core.c | 6 ++---- kernel/trace/ftrace.c | 8 ++++---- tools/include/linux/kallsyms.h | 2 +- 9 files changed, 17 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/arch/microblaze/kernel/unwind.c b/arch/microblaze/kernel/unwind.c index 34c270cb11fc..4241cdd28ee7 100644 --- a/arch/microblaze/kernel/unwind.c +++ b/arch/microblaze/kernel/unwind.c @@ -254,7 +254,7 @@ static void microblaze_unwind_inner(struct task_struct *task, task->comm); break; } else - print_ip_sym(pc); + print_ip_sym(KERN_INFO, pc); } /* Stop when we reach anything not part of the kernel */ diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 22f805a73921..210fea63de75 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -125,7 +125,7 @@ static void show_raw_backtrace(unsigned long reg29) break; } if (__kernel_text_address(addr)) - print_ip_sym(addr); + print_ip_sym(KERN_DEFAULT, addr); } printk("\n"); } @@ -155,7 +155,7 @@ static void show_backtrace(struct task_struct *task, const struct pt_regs *regs) } printk("Call Trace:\n"); do { - print_ip_sym(pc); + print_ip_sym(KERN_DEFAULT, pc); pc = unwind_stack(task, &sp, pc, &ra); } while (pc); pr_cont("\n"); diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c index f4d386b52622..40625760a125 100644 --- a/arch/nds32/kernel/traps.c +++ b/arch/nds32/kernel/traps.c @@ -108,7 +108,7 @@ static void __dump(struct task_struct *tsk, unsigned long *base_reg) if (__kernel_text_address(ret_addr)) { ret_addr = ftrace_graph_ret_addr( tsk, &graph, ret_addr, NULL); - print_ip_sym(ret_addr); + print_ip_sym(KERN_EMERG, ret_addr); } if (--cnt < 0) break; @@ -124,7 +124,7 @@ static void __dump(struct task_struct *tsk, unsigned long *base_reg) ret_addr = ftrace_graph_ret_addr( tsk, &graph, ret_addr, NULL); - print_ip_sym(ret_addr); + print_ip_sym(KERN_EMERG, ret_addr); } if (--cnt < 0) break; diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 837b9b38f825..9f1ac258482f 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -99,7 +99,7 @@ void notrace walk_stackframe(struct task_struct *task, static bool print_trace_address(unsigned long pc, void *arg) { - print_ip_sym(pc); + print_ip_sym(KERN_DEFAULT, pc); return false; } diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 657a83b943f0..98338dc6b5d2 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -165,9 +165,9 @@ static inline int kallsyms_show_value(void) #endif /*CONFIG_KALLSYMS*/ -static inline void print_ip_sym(unsigned long ip) +static inline void print_ip_sym(const char *loglvl, unsigned long ip) { - printk("[<%px>] %pS\n", (void *) ip, (void *) ip); + printk("%s[<%px>] %pS\n", loglvl, (void *) ip, (void *) ip); } #endif /*_LINUX_KALLSYMS_H*/ diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 4c057dd8e93b..38cce34d03dc 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4424,7 +4424,7 @@ static void print_unlock_imbalance_bug(struct task_struct *curr, curr->comm, task_pid_nr(curr)); print_lockdep_cache(lock); pr_cont(") at:\n"); - print_ip_sym(ip); + print_ip_sym(KERN_WARNING, ip); pr_warn("but there are no more locks to release!\n"); pr_warn("\nother info that might help us debug this:\n"); lockdep_print_held_locks(curr); @@ -5075,7 +5075,7 @@ static void print_lock_contention_bug(struct task_struct *curr, curr->comm, task_pid_nr(curr)); print_lockdep_cache(lock); pr_cont(") at:\n"); - print_ip_sym(ip); + print_ip_sym(KERN_WARNING, ip); pr_warn("but there are no locks held!\n"); pr_warn("\nother info that might help us debug this:\n"); lockdep_print_held_locks(curr); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 8298b2c240ce..c06da3c3e317 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3922,8 +3922,7 @@ static noinline void __schedule_bug(struct task_struct *prev) if (IS_ENABLED(CONFIG_DEBUG_PREEMPT) && in_atomic_preempt_off()) { pr_err("Preemption disabled at:"); - print_ip_sym(preempt_disable_ip); - pr_cont("\n"); + print_ip_sym(KERN_ERR, preempt_disable_ip); } if (panic_on_warn) panic("scheduling while atomic\n"); @@ -6871,8 +6870,7 @@ void ___might_sleep(const char *file, int line, int preempt_offset) if (IS_ENABLED(CONFIG_DEBUG_PREEMPT) && !preempt_count_equals(preempt_offset)) { pr_err("Preemption disabled at:"); - print_ip_sym(preempt_disable_ip); - pr_cont("\n"); + print_ip_sym(KERN_ERR, preempt_disable_ip); } dump_stack(); add_taint(TAINT_WARN, LOCKDEP_STILL_OK); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index b5765aeea698..7d0ebd104706 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2020,12 +2020,12 @@ void ftrace_bug(int failed, struct dyn_ftrace *rec) case -EFAULT: FTRACE_WARN_ON_ONCE(1); pr_info("ftrace faulted on modifying "); - print_ip_sym(ip); + print_ip_sym(KERN_INFO, ip); break; case -EINVAL: FTRACE_WARN_ON_ONCE(1); pr_info("ftrace failed to modify "); - print_ip_sym(ip); + print_ip_sym(KERN_INFO, ip); print_ip_ins(" actual: ", (unsigned char *)ip); pr_cont("\n"); if (ftrace_expected) { @@ -2036,12 +2036,12 @@ void ftrace_bug(int failed, struct dyn_ftrace *rec) case -EPERM: FTRACE_WARN_ON_ONCE(1); pr_info("ftrace faulted on writing "); - print_ip_sym(ip); + print_ip_sym(KERN_INFO, ip); break; default: FTRACE_WARN_ON_ONCE(1); pr_info("ftrace faulted on unknown error "); - print_ip_sym(ip); + print_ip_sym(KERN_INFO, ip); } print_bug_type(); if (rec) { diff --git a/tools/include/linux/kallsyms.h b/tools/include/linux/kallsyms.h index 89ca6fe257cc..efb6c3f5f2a9 100644 --- a/tools/include/linux/kallsyms.h +++ b/tools/include/linux/kallsyms.h @@ -20,7 +20,7 @@ static inline const char *kallsyms_lookup(unsigned long addr, #include #include -static inline void print_ip_sym(unsigned long ip) +static inline void print_ip_sym(const char *loglvl, unsigned long ip) { char **name; -- cgit v1.3 From ab34b46d1a74e98996e67a7da7e5d683ecfd9f57 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 8 Jun 2020 21:32:10 -0700 Subject: sysrq: use show_stack_loglvl() Show the stack trace on a CPU with the same log level as "CPU%d" header. Signed-off-by: Dmitry Safonov Signed-off-by: Andrew Morton Cc: Greg Kroah-Hartman Cc: Jiri Slaby Link: http://lkml.kernel.org/r/20200418201944.482088-45-dima@arista.com Signed-off-by: Linus Torvalds --- drivers/tty/sysrq.c | 2 +- include/linux/sched/debug.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 477cdc1e9cf3..7bd935379dec 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -235,7 +235,7 @@ static void showacpu(void *dummy) raw_spin_lock_irqsave(&show_lock, flags); pr_info("CPU%d:\n", smp_processor_id()); - show_stack(NULL, NULL); + show_stack_loglvl(NULL, NULL, KERN_INFO); raw_spin_unlock_irqrestore(&show_lock, flags); } diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h index 95fb9e025247..373e4e3faf2a 100644 --- a/include/linux/sched/debug.h +++ b/include/linux/sched/debug.h @@ -31,6 +31,8 @@ extern void show_regs(struct pt_regs *); * trace (or NULL if the entire call-chain of the task should be shown). */ extern void show_stack(struct task_struct *task, unsigned long *sp); +extern void show_stack_loglvl(struct task_struct *task, unsigned long *sp, + const char *loglvl); extern void sched_show_task(struct task_struct *p); -- cgit v1.3 From 9cb8f069deeed708bf19486d5893e297dc467ae0 Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 8 Jun 2020 21:32:29 -0700 Subject: kernel: rename show_stack_loglvl() => show_stack() Now the last users of show_stack() got converted to use an explicit log level, show_stack_loglvl() can drop it's redundant suffix and become once again well known show_stack(). Signed-off-by: Dmitry Safonov Signed-off-by: Andrew Morton Link: http://lkml.kernel.org/r/20200418201944.482088-51-dima@arista.com Signed-off-by: Linus Torvalds --- arch/alpha/kernel/traps.c | 8 +------- arch/arc/kernel/stacktrace.c | 8 +------- arch/arm/kernel/traps.c | 8 +------- arch/arm64/kernel/traps.c | 8 +------- arch/c6x/kernel/traps.c | 7 +------ arch/csky/kernel/ptrace.c | 4 ++-- arch/csky/kernel/stacktrace.c | 9 +-------- arch/h8300/kernel/traps.c | 8 +------- arch/hexagon/kernel/traps.c | 8 +------- arch/ia64/kernel/mca.c | 2 +- arch/ia64/kernel/process.c | 11 ++--------- arch/m68k/kernel/traps.c | 11 +++-------- arch/microblaze/kernel/traps.c | 8 +------- arch/mips/kernel/traps.c | 8 +------- arch/nds32/kernel/traps.c | 8 +------- arch/nios2/kernel/traps.c | 12 +++--------- arch/openrisc/kernel/traps.c | 10 ++-------- arch/parisc/kernel/traps.c | 8 +------- arch/powerpc/kernel/process.c | 11 +++-------- arch/powerpc/kernel/stacktrace.c | 2 +- arch/riscv/kernel/stacktrace.c | 8 +------- arch/s390/kernel/dumpstack.c | 9 ++------- arch/sh/kernel/dumpstack.c | 8 +------- arch/sparc/kernel/process_32.c | 11 ++--------- arch/sparc/kernel/process_64.c | 2 +- arch/sparc/kernel/traps_64.c | 8 +------- arch/um/drivers/mconsole_kern.c | 2 +- arch/um/kernel/sysrq.c | 7 +------ arch/unicore32/kernel/traps.c | 7 +------ arch/x86/kernel/amd_gart_64.c | 2 +- arch/x86/kernel/dumpstack.c | 7 +------ arch/xtensa/kernel/traps.c | 10 ++-------- drivers/base/power/main.c | 2 +- drivers/tty/sysrq.c | 2 +- include/linux/sched/debug.h | 5 ++--- kernel/debug/kdb/kdb_bt.c | 2 +- kernel/locking/rtmutex-debug.c | 2 +- kernel/sched/core.c | 2 +- lib/dump_stack.c | 2 +- 39 files changed, 52 insertions(+), 205 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index 2402f1777f54..8383ccfaccdc 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -144,8 +144,7 @@ dik_show_trace(unsigned long *sp, const char *loglvl) static int kstack_depth_to_print = 24; -void show_stack_loglvl(struct task_struct *task, unsigned long *sp, - const char *loglvl) +void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) { unsigned long *stack; int i; @@ -174,11 +173,6 @@ void show_stack_loglvl(struct task_struct *task, unsigned long *sp, dik_show_trace(sp, loglvl); } -void show_stack(struct task_struct *task, unsigned long *sp) -{ - show_stack_loglvl(task, sp, KERN_DEFAULT); -} - void die_if_kernel(char * str, struct pt_regs *regs, long err, unsigned long *r9_15) { diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index 24f9cd8a12c9..feba91c9d969 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -228,17 +228,11 @@ noinline void show_stacktrace(struct task_struct *tsk, struct pt_regs *regs, EXPORT_SYMBOL(show_stacktrace); /* Expected by sched Code */ -void show_stack_loglvl(struct task_struct *tsk, unsigned long *sp, - const char *loglvl) +void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) { show_stacktrace(tsk, NULL, loglvl); } -void show_stack(struct task_struct *tsk, unsigned long *sp) -{ - show_stack_loglvl(tsk, sp, KERN_DEFAULT); -} - /* Another API expected by schedular, shows up in "ps" as Wait Channel * Of course just returning schedule( ) would be pointless so unwind until * the function is not in schedular code diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 2ae2c23799ad..65a3b1e75480 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -247,18 +247,12 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, } #endif -void show_stack_loglvl(struct task_struct *tsk, unsigned long *sp, - const char *loglvl) +void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) { dump_backtrace(NULL, tsk, loglvl); barrier(); } -void show_stack(struct task_struct *tsk, unsigned long *sp) -{ - show_stack_loglvl(tsk, sp, KERN_DEFAULT); -} - #ifdef CONFIG_PREEMPT #define S_PREEMPT " PREEMPT" #elif defined(CONFIG_PREEMPT_RT) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 3621868b2fcc..3fd9e2731e0d 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -137,18 +137,12 @@ void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, put_task_stack(tsk); } -void show_stack_loglvl(struct task_struct *tsk, unsigned long *sp, - const char *loglvl) +void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) { dump_backtrace(NULL, tsk, loglvl); barrier(); } -void show_stack(struct task_struct *tsk, unsigned long *sp) -{ - show_stack_loglvl(tsk, sp, KERN_DEFAULT); -} - #ifdef CONFIG_PREEMPT #define S_PREEMPT " PREEMPT" #elif defined(CONFIG_PREEMPT_RT) diff --git a/arch/c6x/kernel/traps.c b/arch/c6x/kernel/traps.c index 4afbf48f1ce0..2b9121c755be 100644 --- a/arch/c6x/kernel/traps.c +++ b/arch/c6x/kernel/traps.c @@ -374,7 +374,7 @@ static void show_trace(unsigned long *stack, unsigned long *endstack, printk("%s\n", loglvl); } -void show_stack_loglvl(struct task_struct *task, unsigned long *stack, +void show_stack(struct task_struct *task, unsigned long *stack, const char *loglvl) { unsigned long *p, *endstack; @@ -403,11 +403,6 @@ void show_stack_loglvl(struct task_struct *task, unsigned long *stack, show_trace(stack, endstack, loglvl); } -void show_stack(struct task_struct *task, unsigned long *stack) -{ - show_stack_loglvl(task, stack, KERN_DEBUG); -} - int is_valid_bugaddr(unsigned long addr) { return __kernel_text_address(addr); diff --git a/arch/csky/kernel/ptrace.c b/arch/csky/kernel/ptrace.c index 5a82230bddf9..bbd801f86eb5 100644 --- a/arch/csky/kernel/ptrace.c +++ b/arch/csky/kernel/ptrace.c @@ -344,7 +344,7 @@ asmlinkage void syscall_trace_exit(struct pt_regs *regs) trace_sys_exit(regs, syscall_get_return_value(current, regs)); } -extern void show_stack(struct task_struct *task, unsigned long *stack); +extern void show_stack(struct task_struct *task, unsigned long *stack, const char *loglvl); void show_regs(struct pt_regs *fp) { unsigned long *sp; @@ -420,6 +420,6 @@ void show_regs(struct pt_regs *fp) } pr_cont("\n"); - show_stack(NULL, (unsigned long *)fp->regs[4]); + show_stack(NULL, (unsigned long *)fp->regs[4], KERN_INFO); return; } diff --git a/arch/csky/kernel/stacktrace.c b/arch/csky/kernel/stacktrace.c index ca135f13cc13..16ae20a0af34 100644 --- a/arch/csky/kernel/stacktrace.c +++ b/arch/csky/kernel/stacktrace.c @@ -95,19 +95,12 @@ static bool print_trace_address(unsigned long pc, void *arg) return false; } -void show_stack_loglvl(struct task_struct *task, unsigned long *sp, - const char *loglvl) +void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) { pr_cont("Call Trace:\n"); walk_stackframe(task, NULL, print_trace_address, (void *)loglvl); } -void show_stack(struct task_struct *task, unsigned long *sp) -{ - pr_cont("Call Trace:\n"); - walk_stackframe(task, NULL, print_trace_address, KERN_INFO); -} - static bool save_wchan(unsigned long pc, void *arg) { if (!in_sched_functions(pc)) { diff --git a/arch/h8300/kernel/traps.c b/arch/h8300/kernel/traps.c index 6362446563d6..5d8b969cd8f3 100644 --- a/arch/h8300/kernel/traps.c +++ b/arch/h8300/kernel/traps.c @@ -115,8 +115,7 @@ void die(const char *str, struct pt_regs *fp, unsigned long err) static int kstack_depth_to_print = 24; -void show_stack_loglvl(struct task_struct *task, unsigned long *esp, - const char *loglvl) +void show_stack(struct task_struct *task, unsigned long *esp, const char *loglvl) { unsigned long *stack, addr; int i; @@ -158,8 +157,3 @@ void show_stack_loglvl(struct task_struct *task, unsigned long *esp, } printk("%s\n", loglvl); } - -void show_stack(struct task_struct *task, unsigned long *esp) -{ - show_stack_loglvl(task, esp, KERN_INFO); -} diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c index a8a3a210d781..904134b37232 100644 --- a/arch/hexagon/kernel/traps.c +++ b/arch/hexagon/kernel/traps.c @@ -175,18 +175,12 @@ static void do_show_stack(struct task_struct *task, unsigned long *fp, } } -void show_stack_loglvl(struct task_struct *task, unsigned long *fp, - const char *loglvl) +void show_stack(struct task_struct *task, unsigned long *fp, const char *loglvl) { /* Saved link reg is one word above FP */ do_show_stack(task, fp, 0, loglvl); } -void show_stack(struct task_struct *task, unsigned long *fp) -{ - show_stack_loglvl(task, fp, 0, KERN_INFO); -} - int die(const char *str, struct pt_regs *regs, long err) { static struct { diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 6fb54dfa1350..2703f7795672 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -1631,7 +1631,7 @@ default_monarch_init_process(struct notifier_block *self, unsigned long val, voi if (read_trylock(&tasklist_lock)) { do_each_thread (g, t) { printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm); - show_stack(t, NULL); + show_stack(t, NULL, KERN_DEFAULT); } while_each_thread (g, t); read_unlock(&tasklist_lock); } diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 913d9a01cbf9..96dfb9e4b16f 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -85,8 +85,7 @@ ia64_do_show_stack (struct unw_frame_info *info, void *arg) } void -show_stack_loglvl (struct task_struct *task, unsigned long *sp, - const char *loglvl) +show_stack (struct task_struct *task, unsigned long *sp, const char *loglvl) { if (!task) unw_init_running(ia64_do_show_stack, (void *)loglvl); @@ -98,12 +97,6 @@ show_stack_loglvl (struct task_struct *task, unsigned long *sp, } } -void -show_stack (struct task_struct *task, unsigned long *sp) -{ - show_stack_loglvl(task, sp, KERN_DEFAULT); -} - void show_regs (struct pt_regs *regs) { @@ -158,7 +151,7 @@ show_regs (struct pt_regs *regs) ((i == sof - 1) || (i % 3) == 2) ? "\n" : " "); } } else - show_stack(NULL, NULL); + show_stack(NULL, NULL, KERN_DEFAULT); } /* local support for deprecated console_print */ diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c index ffcc5ec4fac3..df6fc782754f 100644 --- a/arch/m68k/kernel/traps.c +++ b/arch/m68k/kernel/traps.c @@ -916,7 +916,7 @@ void show_registers(struct pt_regs *regs) default: pr_cont("\n"); } - show_stack(NULL, (unsigned long *)addr); + show_stack(NULL, (unsigned long *)addr, KERN_INFO); pr_info("Code:"); set_fs(KERNEL_DS); @@ -935,8 +935,8 @@ void show_registers(struct pt_regs *regs) pr_cont("\n"); } -void show_stack_loglvl(struct task_struct *task, unsigned long *stack, - const char *loglvl) +void show_stack(struct task_struct *task, unsigned long *stack, + const char *loglvl) { unsigned long *p; unsigned long *endstack; @@ -963,11 +963,6 @@ void show_stack_loglvl(struct task_struct *task, unsigned long *stack, show_trace(stack, loglvl); } -void show_stack(struct task_struct *task, unsigned long *stack) -{ - show_stack_loglvl(task, stack, KERN_INFO); -} - /* * The vector number returned in the frame pointer may also contain * the "fs" (Fault Status) bits on ColdFire. These are in the bottom diff --git a/arch/microblaze/kernel/traps.c b/arch/microblaze/kernel/traps.c index 149ae534937e..94b6fe93147d 100644 --- a/arch/microblaze/kernel/traps.c +++ b/arch/microblaze/kernel/traps.c @@ -31,8 +31,7 @@ static int __init kstack_setup(char *s) } __setup("kstack=", kstack_setup); -void show_stack_loglvl(struct task_struct *task, unsigned long *sp, - const char *loglvl) +void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) { unsigned long words_to_show; u32 fp = (u32) sp; @@ -77,8 +76,3 @@ void show_stack_loglvl(struct task_struct *task, unsigned long *sp, debug_show_held_locks(task); } - -void show_stack(struct task_struct *task, unsigned long *sp) -{ - show_stack_loglvl(task, sp, KERN_INFO); -} diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index e49040739b61..320797bd03f6 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -198,8 +198,7 @@ static void show_stacktrace(struct task_struct *task, show_backtrace(task, regs, loglvl); } -void show_stack_loglvl(struct task_struct *task, unsigned long *sp, - const char *loglvl) +void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) { struct pt_regs regs; mm_segment_t old_fs = get_fs(); @@ -227,11 +226,6 @@ void show_stack_loglvl(struct task_struct *task, unsigned long *sp, set_fs(old_fs); } -void show_stack(struct task_struct *task, unsigned long *sp) -{ - show_stack_loglvl(task, sp, KERN_DEFAULT) -} - static void show_code(unsigned int __user *pc) { long i; diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c index 90f12582c218..6a9772ba7392 100644 --- a/arch/nds32/kernel/traps.c +++ b/arch/nds32/kernel/traps.c @@ -135,8 +135,7 @@ static void __dump(struct task_struct *tsk, unsigned long *base_reg, printk("%s\n", loglvl); } -void show_stack_loglvl(struct task_struct *tsk, unsigned long *sp, - const char *loglvl) +void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) { unsigned long *base_reg; @@ -157,11 +156,6 @@ void show_stack_loglvl(struct task_struct *tsk, unsigned long *sp, barrier(); } -void show_stack(struct task_struct *tsk, unsigned long *sp) -{ - show_stack_loglvl(tsk, sp, KERN_EMERG); -} - DEFINE_SPINLOCK(die_lock); /* diff --git a/arch/nios2/kernel/traps.c b/arch/nios2/kernel/traps.c index 08071caa9b36..b172da4eb1a9 100644 --- a/arch/nios2/kernel/traps.c +++ b/arch/nios2/kernel/traps.c @@ -52,14 +52,13 @@ void _exception(int signo, struct pt_regs *regs, int code, unsigned long addr) } /* - * The show_stack(), show_stack_loglvl() are external API - * which we do not use ourselves. + * The show_stack() is external API which we do not use ourselves. */ int kstack_depth_to_print = 48; -void show_stack_loglvl(struct task_struct *task, unsigned long *stack, - const char *loglvl) +void show_stack(struct task_struct *task, unsigned long *stack, + const char *loglvl) { unsigned long *endstack, addr; int i; @@ -106,11 +105,6 @@ void show_stack_loglvl(struct task_struct *task, unsigned long *stack, printk("%s\n", loglvl); } -void show_stack(struct task_struct *task, unsigned long *stack) -{ - show_stack_loglvl(task, stack, KERN_EMERG); -} - void __init trap_init(void) { /* Nothing to do here */ diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c index 3b7978a22d68..3022b0ad142c 100644 --- a/arch/openrisc/kernel/traps.c +++ b/arch/openrisc/kernel/traps.c @@ -48,8 +48,7 @@ void print_trace(void *data, unsigned long addr, int reliable) } /* displays a short stack trace */ -void show_stack_loglvl(struct task_struct *task, unsigned long *esp, - const char *loglvl) +void show_stack(struct task_struct *task, unsigned long *esp, const char *loglvl) { if (esp == NULL) esp = (unsigned long *)&esp; @@ -58,11 +57,6 @@ void show_stack_loglvl(struct task_struct *task, unsigned long *esp, unwind_stack((void *)loglvl, esp, print_trace); } -void show_stack(struct task_struct *task, unsigned long *esp) -{ - show_stack_loglvl(task, esp, KERN_EMERG); -} - void show_registers(struct pt_regs *regs) { int i; @@ -104,7 +98,7 @@ void show_registers(struct pt_regs *regs) if (in_kernel) { printk("\nStack: "); - show_stack(NULL, (unsigned long *)esp); + show_stack(NULL, (unsigned long *)esp, KERN_EMERG); printk("\nCode: "); if (regs->pc < PAGE_OFFSET) diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index c2411de3730f..0a89899f154a 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -198,17 +198,11 @@ static void parisc_show_stack(struct task_struct *task, do_show_stack(&info, loglvl); } -void show_stack_loglvl(struct task_struct *t, unsigned long *sp, - const char *loglvl) +void show_stack(struct task_struct *t, unsigned long *sp, const char *loglvl) { parisc_show_stack(t, NULL, loglvl); } -void show_stack(struct task_struct *t, unsigned long *sp) -{ - show_stack_loglvl(t, sp, KERN_CRIT) -} - int is_valid_bugaddr(unsigned long iaoq) { return 1; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index a456b4454b3f..99d619f81cb5 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1456,7 +1456,7 @@ void show_regs(struct pt_regs * regs) printk("NIP ["REG"] %pS\n", regs->nip, (void *)regs->nip); printk("LR ["REG"] %pS\n", regs->link, (void *)regs->link); #endif - show_stack(current, (unsigned long *) regs->gpr[1]); + show_stack(current, (unsigned long *) regs->gpr[1], KERN_DEFAULT); if (!user_mode(regs)) show_instructions(regs); } @@ -2063,8 +2063,8 @@ unsigned long get_wchan(struct task_struct *p) static int kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH; -void show_stack_loglvl(struct task_struct *tsk, unsigned long *stack, - const char *loglvl) +void show_stack(struct task_struct *tsk, unsigned long *stack, + const char *loglvl) { unsigned long sp, ip, lr, newsp; int count = 0; @@ -2133,11 +2133,6 @@ void show_stack_loglvl(struct task_struct *tsk, unsigned long *stack, put_task_stack(tsk); } -void show_stack(struct task_struct *tsk, unsigned long *stack) -{ - show_stack_loglvl(tsk, stack, KERN_DEFAULT); -} - #ifdef CONFIG_PPC64 /* Called with hard IRQs off */ void notrace __ppc64_runlatch_on(void) diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index c477b8585a29..b6440657ef92 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -260,7 +260,7 @@ static void raise_backtrace_ipi(cpumask_t *mask) pr_cont(" current pointer corrupt? (%px)\n", p->__current); pr_warn("Back trace of paca->saved_r1 (0x%016llx) (possibly stale):\n", p->saved_r1); - show_stack(p->__current, (unsigned long *)p->saved_r1); + show_stack(p->__current, (unsigned long *)p->saved_r1, KERN_WARNING); } } diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index aaa64bf007f8..595342910c3f 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -105,18 +105,12 @@ static bool print_trace_address(unsigned long pc, void *arg) return false; } -void show_stack_loglvl(struct task_struct *task, unsigned long *sp, - const char *loglvl) +void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) { pr_cont("Call Trace:\n"); walk_stackframe(task, NULL, print_trace_address, (void *)loglvl); } -void show_stack(struct task_struct *task, unsigned long *sp) -{ - show_stack_loglvl(task, sp, KERN_DEFAULT); -} - static bool save_wchan(unsigned long pc, void *arg) { if (!in_sched_functions(pc)) { diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 887a054919fc..0dc4b258b98d 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -126,7 +126,7 @@ unknown: return -EINVAL; } -void show_stack_loglvl(struct task_struct *task, unsigned long *stack, +void show_stack(struct task_struct *task, unsigned long *stack, const char *loglvl) { struct unwind_state state; @@ -139,11 +139,6 @@ void show_stack_loglvl(struct task_struct *task, unsigned long *stack, debug_show_held_locks(task ? : current); } -void show_stack(struct task_struct *task, unsigned long *stack) -{ - show_stack_loglvl(task, stack, KERN_DEFAULT); -} - static void show_last_breaking_event(struct pt_regs *regs) { printk("Last Breaking-Event-Address:\n"); @@ -181,7 +176,7 @@ void show_regs(struct pt_regs *regs) show_registers(regs); /* Show stack backtrace if pt_regs is from kernel mode */ if (!user_mode(regs)) - show_stack(NULL, (unsigned long *) regs->gprs[15]); + show_stack(NULL, (unsigned long *) regs->gprs[15], KERN_DEFAULT); show_last_breaking_event(regs); } diff --git a/arch/sh/kernel/dumpstack.c b/arch/sh/kernel/dumpstack.c index cc51e9d74667..a13c045804ed 100644 --- a/arch/sh/kernel/dumpstack.c +++ b/arch/sh/kernel/dumpstack.c @@ -144,8 +144,7 @@ void show_trace(struct task_struct *tsk, unsigned long *sp, debug_show_held_locks(tsk); } -void show_stack_loglvl(struct task_struct *tsk, unsigned long *sp, - const char *loglvl) +void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) { unsigned long stack; @@ -161,8 +160,3 @@ void show_stack_loglvl(struct task_struct *tsk, unsigned long *sp, (unsigned long)task_stack_page(tsk)); show_trace(tsk, sp, NULL, loglvl); } - -void show_stack(struct task_struct *task, unsigned long *sp) -{ - show_stack_loglvl(task, sp, KERN_DEFAULT); -} diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index 0b07de5618e5..65c0d5207b0c 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -145,12 +145,10 @@ void show_regs(struct pt_regs *r) } /* - * The show_stack(), show_stack_loglvl() are external APIs which - * we do not use ourselves. + * The show_stack() is external API which we do not use ourselves. * The oops is printed in die_if_kernel. */ -void show_stack_loglvl(struct task_struct *tsk, unsigned long *_ksp, - const char *loglvl) +void show_stack(struct task_struct *tsk, unsigned long *_ksp, const char *loglvl) { unsigned long pc, fp; unsigned long task_base; @@ -179,11 +177,6 @@ void show_stack_loglvl(struct task_struct *tsk, unsigned long *_ksp, printk("%s\n", loglvl); } -void show_stack(struct task_struct *task, unsigned long *sp) -{ - show_stack_loglvl(task, sp, KERN_DEFAULT); -} - /* * Free current thread data structures etc.. */ diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 423011e60982..5a4d9c8f8903 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -195,7 +195,7 @@ void show_regs(struct pt_regs *regs) regs->u_regs[15]); printk("RPC: <%pS>\n", (void *) regs->u_regs[15]); show_regwindow(regs); - show_stack(current, (unsigned long *) regs->u_regs[UREG_FP]); + show_stack(current, (unsigned long *)regs->u_regs[UREG_FP], KERN_DEFAULT); } union global_cpu_snapshot global_cpu_snapshot[NR_CPUS]; diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index 8715bc93bd9d..96d92f107551 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -2453,8 +2453,7 @@ static void user_instruction_dump(unsigned int __user *pc) printk("\n"); } -void show_stack_loglvl(struct task_struct *tsk, unsigned long *_ksp, - const char *loglvl) +void show_stack(struct task_struct *tsk, unsigned long *_ksp, const char *loglvl) { unsigned long fp, ksp; struct thread_info *tp; @@ -2514,11 +2513,6 @@ void show_stack_loglvl(struct task_struct *tsk, unsigned long *_ksp, } while (++count < 16); } -void show_stack(struct task_struct *tsk, unsigned long *_ksp) -{ - show_stack_loglvl(task, sp, KERN_DEFAULT); -} - static inline struct reg_window *kernel_stack_up(struct reg_window *rw) { unsigned long fp = rw->ins[6]; diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 30575bd92975..a2e680f7d39f 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -648,7 +648,7 @@ static void stack_proc(void *arg) { struct task_struct *task = arg; - show_stack(task, NULL); + show_stack(task, NULL, KERN_INFO); } /* diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c index 1b54b6431499..acbc879d2773 100644 --- a/arch/um/kernel/sysrq.c +++ b/arch/um/kernel/sysrq.c @@ -27,7 +27,7 @@ static const struct stacktrace_ops stackops = { .address = _print_addr }; -void show_stack_loglvl(struct task_struct *task, unsigned long *stack, +void show_stack(struct task_struct *task, unsigned long *stack, const char *loglvl) { struct pt_regs *segv_regs = current->thread.segv_regs; @@ -56,8 +56,3 @@ void show_stack_loglvl(struct task_struct *task, unsigned long *stack, dump_trace(current, &stackops, (void *)loglvl); printk("%s\n", loglvl); } - -void show_stack(struct task_struct *task, unsigned long *stack) -{ - show_stack_loglvl(task, stack, KERN_INFO); -} diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c index 8b1335997f50..a3ac01df1a2e 100644 --- a/arch/unicore32/kernel/traps.c +++ b/arch/unicore32/kernel/traps.c @@ -167,18 +167,13 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk, c_backtrace(fp, loglvl); } -void show_stack_loglvl(struct task_struct *tsk, unsigned long *sp, +void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) { dump_backtrace(NULL, tsk, loglvl); barrier(); } -void show_stack(struct task_struct *tsk, unsigned long *sp) -{ - show_stack_loglvl(tsk, sp, KERN_DEFAULT) -} - static int __die(const char *str, int err, struct thread_info *thread, struct pt_regs *regs) { diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 9d2c076be37a..5f816861f5d2 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -159,7 +159,7 @@ static void dump_leak(void) return; dump = 1; - show_stack_loglvl(NULL, NULL, KERN_ERR); + show_stack(NULL, NULL, KERN_ERR); debug_dma_dump_mappings(NULL); } #endif diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 4396f2cfad19..456511b2284e 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -279,7 +279,7 @@ next: } } -void show_stack_loglvl(struct task_struct *task, unsigned long *sp, +void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) { task = task ? : current; @@ -294,11 +294,6 @@ void show_stack_loglvl(struct task_struct *task, unsigned long *sp, show_trace_log_lvl(task, NULL, sp, loglvl); } -void show_stack(struct task_struct *task, unsigned long *sp) -{ - show_stack_loglvl(task, sp, KERN_DEFAULT); -} - void show_stack_regs(struct pt_regs *regs) { show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT); diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index 1013acc2e03e..e880460741d2 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -501,8 +501,7 @@ static void show_trace(struct task_struct *task, unsigned long *sp, #define STACK_DUMP_LINE_SIZE 32 static size_t kstack_depth_to_print = CONFIG_PRINT_STACK_DEPTH; -void show_stack_loglvl(struct task_struct *task, unsigned long *sp, - const char *loglvl) +void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl) { size_t len; @@ -519,11 +518,6 @@ void show_stack_loglvl(struct task_struct *task, unsigned long *sp, show_trace(task, sp, loglvl); } -void show_stack(struct task_struct *task, unsigned long *sp) -{ - show_stack_loglvl(task, sp, KERN_INFO); -} - DEFINE_SPINLOCK(die_lock); void die(const char * str, struct pt_regs * regs, long err) @@ -540,7 +534,7 @@ void die(const char * str, struct pt_regs * regs, long err) pr_info("%s: sig: %ld [#%d]%s\n", str, err, ++die_counter, pr); show_regs(regs); if (!user_mode(regs)) - show_stack(NULL, (unsigned long*)regs->areg[1]); + show_stack(NULL, (unsigned long *)regs->areg[1], KERN_INFO); add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); spin_unlock_irq(&die_lock); diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index be5af89bbff1..9dd85bea4026 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -519,7 +519,7 @@ static void dpm_watchdog_handler(struct timer_list *t) struct dpm_watchdog *wd = from_timer(wd, t, timer); dev_emerg(wd->dev, "**** DPM device timeout ****\n"); - show_stack_loglvl(wd->tsk, NULL, KERN_EMERG); + show_stack(wd->tsk, NULL, KERN_EMERG); panic("%s %s: unrecoverable failure\n", dev_driver_string(wd->dev), dev_name(wd->dev)); } diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 7bd935379dec..7c95afa905a0 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -235,7 +235,7 @@ static void showacpu(void *dummy) raw_spin_lock_irqsave(&show_lock, flags); pr_info("CPU%d:\n", smp_processor_id()); - show_stack_loglvl(NULL, NULL, KERN_INFO); + show_stack(NULL, NULL, KERN_INFO); raw_spin_unlock_irqrestore(&show_lock, flags); } diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h index 373e4e3faf2a..00c45a0e6abe 100644 --- a/include/linux/sched/debug.h +++ b/include/linux/sched/debug.h @@ -30,9 +30,8 @@ extern void show_regs(struct pt_regs *); * task), SP is the stack pointer of the first frame that should be shown in the back * trace (or NULL if the entire call-chain of the task should be shown). */ -extern void show_stack(struct task_struct *task, unsigned long *sp); -extern void show_stack_loglvl(struct task_struct *task, unsigned long *sp, - const char *loglvl); +extern void show_stack(struct task_struct *task, unsigned long *sp, + const char *loglvl); extern void sched_show_task(struct task_struct *p); diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c index 43f5dcd2b9ac..18e03aba2cfc 100644 --- a/kernel/debug/kdb/kdb_bt.c +++ b/kernel/debug/kdb/kdb_bt.c @@ -30,7 +30,7 @@ static void kdb_show_stack(struct task_struct *p, void *addr) kdb_dump_stack_on_cpu(kdb_process_cpu(p)); console_loglevel = old_lvl; } else { - show_stack_loglvl(p, addr, KERN_EMERG); + show_stack(p, addr, KERN_EMERG); } kdb_trap_printk--; diff --git a/kernel/locking/rtmutex-debug.c b/kernel/locking/rtmutex-debug.c index 5e63d6e8a223..36e69100e8e0 100644 --- a/kernel/locking/rtmutex-debug.c +++ b/kernel/locking/rtmutex-debug.c @@ -125,7 +125,7 @@ void debug_rt_mutex_print_deadlock(struct rt_mutex_waiter *waiter) printk("\n%s/%d's [blocked] stackdump:\n\n", task->comm, task_pid_nr(task)); - show_stack_loglvl(task, NULL, KERN_DEFAULT); + show_stack(task, NULL, KERN_DEFAULT); printk("\n%s/%d's [current] stackdump:\n\n", current->comm, task_pid_nr(current)); dump_stack(); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index c68a6e7b306f..8f360326861e 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6025,7 +6025,7 @@ void sched_show_task(struct task_struct *p) (unsigned long)task_thread_info(p)->flags); print_worker_info(KERN_INFO, p); - show_stack_loglvl(p, NULL, KERN_INFO); + show_stack(p, NULL, KERN_INFO); put_task_stack(p); } EXPORT_SYMBOL_GPL(sched_show_task); diff --git a/lib/dump_stack.c b/lib/dump_stack.c index 5595e8962cf6..a00ee6eedc7c 100644 --- a/lib/dump_stack.c +++ b/lib/dump_stack.c @@ -74,7 +74,7 @@ void show_regs_print_info(const char *log_lvl) static void __dump_stack(void) { dump_stack_print_info(KERN_DEFAULT); - show_stack_loglvl(NULL, NULL, KERN_DEFAULT); + show_stack(NULL, NULL, KERN_DEFAULT); } /** -- cgit v1.3 From e31cf2f4ca422ac9b14ecc4a1295b8977a20f812 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Mon, 8 Jun 2020 21:32:33 -0700 Subject: mm: don't include asm/pgtable.h if linux/mm.h is already included Patch series "mm: consolidate definitions of page table accessors", v2. The low level page table accessors (pXY_index(), pXY_offset()) are duplicated across all architectures and sometimes more than once. For instance, we have 31 definition of pgd_offset() for 25 supported architectures. Most of these definitions are actually identical and typically it boils down to, e.g. static inline unsigned long pmd_index(unsigned long address) { return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1); } static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) { return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address); } These definitions can be shared among 90% of the arches provided XYZ_SHIFT, PTRS_PER_XYZ and xyz_page_vaddr() are defined. For architectures that really need a custom version there is always possibility to override the generic version with the usual ifdefs magic. These patches introduce include/linux/pgtable.h that replaces include/asm-generic/pgtable.h and add the definitions of the page table accessors to the new header. This patch (of 12): The linux/mm.h header includes to allow inlining of the functions involving page table manipulations, e.g. pte_alloc() and pmd_alloc(). So, there is no point to explicitly include in the files that include . The include statements in such cases are remove with a simple loop: for f in $(git grep -l "include ") ; do sed -i -e '/include / d' $f done Signed-off-by: Mike Rapoport Signed-off-by: Andrew Morton Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Brian Cain Cc: Catalin Marinas Cc: Chris Zankel Cc: "David S. Miller" Cc: Geert Uytterhoeven Cc: Greentime Hu Cc: Greg Ungerer Cc: Guan Xuetao Cc: Guo Ren Cc: Heiko Carstens Cc: Helge Deller Cc: Ingo Molnar Cc: Ley Foon Tan Cc: Mark Salter Cc: Matthew Wilcox Cc: Matt Turner Cc: Max Filippov Cc: Michael Ellerman Cc: Michal Simek Cc: Mike Rapoport Cc: Nick Hu Cc: Paul Walmsley Cc: Richard Weinberger Cc: Rich Felker Cc: Russell King Cc: Stafford Horne Cc: Thomas Bogendoerfer Cc: Thomas Gleixner Cc: Tony Luck Cc: Vincent Chen Cc: Vineet Gupta Cc: Will Deacon Cc: Yoshinori Sato Link: http://lkml.kernel.org/r/20200514170327.31389-1-rppt@kernel.org Link: http://lkml.kernel.org/r/20200514170327.31389-2-rppt@kernel.org Signed-off-by: Linus Torvalds --- arch/alpha/boot/bootp.c | 1 - arch/alpha/boot/bootpz.c | 1 - arch/alpha/boot/main.c | 1 - arch/alpha/include/asm/io.h | 1 - arch/alpha/kernel/process.c | 1 - arch/alpha/kernel/ptrace.c | 1 - arch/alpha/kernel/setup.c | 1 - arch/alpha/kernel/smp.c | 1 - arch/alpha/kernel/sys_alcor.c | 1 - arch/alpha/kernel/sys_cabriolet.c | 1 - arch/alpha/kernel/sys_dp264.c | 1 - arch/alpha/kernel/sys_eb64p.c | 1 - arch/alpha/kernel/sys_eiger.c | 1 - arch/alpha/kernel/sys_jensen.c | 1 - arch/alpha/kernel/sys_marvel.c | 1 - arch/alpha/kernel/sys_miata.c | 1 - arch/alpha/kernel/sys_mikasa.c | 1 - arch/alpha/kernel/sys_nautilus.c | 1 - arch/alpha/kernel/sys_noritake.c | 1 - arch/alpha/kernel/sys_rawhide.c | 1 - arch/alpha/kernel/sys_ruffian.c | 1 - arch/alpha/kernel/sys_rx164.c | 1 - arch/alpha/kernel/sys_sable.c | 1 - arch/alpha/kernel/sys_sio.c | 1 - arch/alpha/kernel/sys_sx164.c | 1 - arch/alpha/kernel/sys_takara.c | 1 - arch/alpha/kernel/sys_titan.c | 1 - arch/alpha/kernel/sys_wildfire.c | 1 - arch/alpha/mm/init.c | 1 - arch/arm/kernel/machine_kexec.c | 1 - arch/arm/kernel/module.c | 1 - arch/arm/kernel/ptrace.c | 1 - arch/arm/kernel/smp.c | 1 - arch/arm/mach-ebsa110/core.c | 1 - arch/arm/mach-footbridge/common.c | 1 - arch/arm/mach-imx/mm-imx21.c | 1 - arch/arm/mach-imx/mm-imx27.c | 1 - arch/arm/mach-imx/mm-imx3.c | 1 - arch/arm/mach-iop32x/i2c.c | 1 - arch/arm/mach-iop32x/iq31244.c | 1 - arch/arm/mach-iop32x/iq80321.c | 1 - arch/arm/mach-iop32x/n2100.c | 1 - arch/arm/mach-ixp4xx/common.c | 1 - arch/arm/mach-sa1100/assabet.c | 1 - arch/arm/mm/copypage-v4mc.c | 1 - arch/arm/mm/copypage-v6.c | 1 - arch/arm/mm/copypage-xscale.c | 1 - arch/arm/mm/dump.c | 1 - arch/arm/mm/fault-armv.c | 1 - arch/arm/mm/fault.c | 1 - arch/arm/mm/pageattr.c | 1 - arch/arm64/kernel/hibernate.c | 1 - arch/arm64/kernel/ptrace.c | 1 - arch/arm64/kernel/smp.c | 1 - arch/arm64/mm/dump.c | 1 - arch/arm64/mm/fault.c | 1 - arch/arm64/mm/kasan_init.c | 1 - arch/arm64/mm/pageattr.c | 1 - arch/csky/kernel/module.c | 1 - arch/csky/kernel/ptrace.c | 1 - arch/csky/mm/init.c | 1 - arch/csky/mm/tlb.c | 1 - arch/h8300/kernel/process.c | 1 - arch/h8300/kernel/setup.c | 1 - arch/h8300/kernel/signal.c | 1 - arch/h8300/mm/fault.c | 1 - arch/h8300/mm/init.c | 1 - arch/h8300/mm/memory.c | 1 - arch/hexagon/mm/vm_fault.c | 1 - arch/ia64/kernel/efi.c | 1 - arch/ia64/kernel/ptrace.c | 1 - arch/ia64/kernel/smp.c | 1 - arch/ia64/kernel/smpboot.c | 1 - arch/ia64/mm/contig.c | 1 - arch/ia64/mm/fault.c | 1 - arch/m68k/68000/timers.c | 1 - arch/m68k/amiga/config.c | 1 - arch/m68k/apollo/config.c | 1 - arch/m68k/atari/atasound.c | 1 - arch/m68k/atari/stram.c | 1 - arch/m68k/bvme6000/config.c | 1 - arch/m68k/kernel/process.c | 1 - arch/m68k/kernel/ptrace.c | 1 - arch/m68k/kernel/setup_no.c | 1 - arch/m68k/kernel/signal.c | 1 - arch/m68k/kernel/uboot.c | 1 - arch/m68k/mac/config.c | 1 - arch/m68k/mm/mcfmmu.c | 1 - arch/m68k/mm/sun3kmap.c | 1 - arch/m68k/mm/sun3mmu.c | 1 - arch/m68k/mvme147/config.c | 1 - arch/m68k/mvme16x/config.c | 1 - arch/m68k/q40/config.c | 1 - arch/m68k/sun3/config.c | 1 - arch/m68k/sun3/dvma.c | 1 - arch/m68k/sun3/mmu_emu.c | 1 - arch/m68k/sun3/sun3dvma.c | 1 - arch/m68k/sun3x/dvma.c | 1 - arch/m68k/sun3x/prom.c | 1 - arch/microblaze/kernel/signal.c | 1 - arch/microblaze/mm/fault.c | 1 - arch/mips/fw/arc/memory.c | 1 - arch/mips/include/asm/mach-generic/floppy.h | 1 - arch/mips/include/asm/mach-jazz/floppy.h | 1 - arch/mips/jazz/jazzdma.c | 1 - arch/mips/kernel/module.c | 1 - arch/mips/kernel/process.c | 1 - arch/mips/kernel/ptrace.c | 1 - arch/mips/kernel/ptrace32.c | 1 - arch/mips/kernel/smp-bmips.c | 1 - arch/mips/kernel/traps.c | 1 - arch/mips/kvm/tlb.c | 1 - arch/mips/lib/dump_tlb.c | 1 - arch/mips/lib/r3k_dump_tlb.c | 1 - arch/mips/mm/c-octeon.c | 1 - arch/mips/mm/c-r3k.c | 1 - arch/mips/mm/c-r4k.c | 1 - arch/mips/mm/c-tx39.c | 1 - arch/mips/mm/init.c | 1 - arch/mips/mm/page.c | 1 - arch/mips/mm/pgtable-32.c | 1 - arch/mips/mm/pgtable-64.c | 1 - arch/mips/mm/sc-ip22.c | 1 - arch/mips/mm/sc-mips.c | 1 - arch/mips/mm/sc-r5k.c | 1 - arch/mips/mm/tlb-r3k.c | 1 - arch/mips/mm/tlb-r4k.c | 1 - arch/mips/sgi-ip27/ip27-init.c | 1 - arch/mips/sgi-ip27/ip27-timer.c | 1 - arch/mips/sgi-ip32/ip32-memory.c | 1 - arch/nds32/mm/fault.c | 1 - arch/nds32/mm/proc.c | 1 - arch/nios2/kernel/module.c | 1 - arch/nios2/mm/init.c | 1 - arch/nios2/mm/pgtable.c | 1 - arch/nios2/mm/tlb.c | 1 - arch/openrisc/include/asm/tlbflush.h | 1 - arch/openrisc/kernel/asm-offsets.c | 1 - arch/openrisc/kernel/process.c | 1 - arch/openrisc/kernel/ptrace.c | 1 - arch/openrisc/kernel/setup.c | 1 - arch/openrisc/kernel/traps.c | 1 - arch/openrisc/mm/init.c | 1 - arch/openrisc/mm/tlb.c | 1 - arch/parisc/include/asm/mmu_context.h | 1 - arch/parisc/kernel/module.c | 1 - arch/parisc/kernel/ptrace.c | 1 - arch/parisc/kernel/smp.c | 1 - arch/parisc/mm/init.c | 1 - arch/powerpc/include/asm/io.h | 1 - arch/powerpc/kernel/asm-offsets.c | 1 - arch/powerpc/kernel/process.c | 1 - arch/powerpc/kernel/signal_32.c | 1 - arch/powerpc/kernel/signal_64.c | 1 - arch/powerpc/kernel/traps.c | 1 - arch/powerpc/kernel/vdso.c | 1 - arch/powerpc/lib/code-patching.c | 1 - arch/powerpc/mm/book3s64/hash_hugetlbpage.c | 1 - arch/powerpc/mm/book3s64/hash_pgtable.c | 1 - arch/powerpc/mm/book3s64/radix_hugetlbpage.c | 1 - arch/powerpc/mm/book3s64/radix_pgtable.c | 1 - arch/powerpc/mm/fault.c | 1 - arch/powerpc/mm/hugetlbpage.c | 1 - arch/powerpc/mm/init_32.c | 1 - arch/powerpc/mm/init_64.c | 1 - arch/powerpc/mm/mem.c | 1 - arch/powerpc/mm/nohash/40x.c | 1 - arch/powerpc/mm/nohash/fsl_booke.c | 1 - arch/powerpc/mm/pgtable_32.c | 1 - arch/powerpc/mm/pgtable_64.c | 1 - arch/powerpc/mm/ptdump/hashpagetable.c | 1 - arch/powerpc/mm/ptdump/ptdump.c | 1 - arch/powerpc/perf/callchain.c | 1 - arch/powerpc/perf/callchain_32.c | 1 - arch/powerpc/perf/callchain_64.c | 1 - arch/powerpc/platforms/8xx/cpm1.c | 1 - arch/powerpc/platforms/8xx/micropatch.c | 1 - arch/powerpc/platforms/cell/setup.c | 1 - arch/powerpc/platforms/chrp/setup.c | 1 - arch/powerpc/platforms/maple/setup.c | 1 - arch/powerpc/platforms/maple/time.c | 1 - arch/powerpc/platforms/powermac/setup.c | 1 - arch/powerpc/platforms/powermac/time.c | 1 - arch/powerpc/platforms/pseries/setup.c | 1 - arch/powerpc/sysdev/cpm2.c | 1 - arch/powerpc/xmon/xmon.c | 1 - arch/riscv/kernel/setup.c | 1 - arch/riscv/mm/init.c | 1 - arch/s390/include/asm/tlbflush.h | 1 - arch/s390/kernel/machine_kexec.c | 1 - arch/s390/kernel/ptrace.c | 1 - arch/s390/kernel/vdso.c | 1 - arch/s390/mm/dump_pagetables.c | 1 - arch/s390/mm/fault.c | 1 - arch/s390/mm/init.c | 1 - arch/s390/mm/pageattr.c | 1 - arch/s390/mm/pgtable.c | 1 - arch/s390/mm/vmem.c | 1 - arch/sh/kernel/machine_kexec.c | 1 - arch/sh/kernel/ptrace_32.c | 1 - arch/sh/kernel/signal_32.c | 1 - arch/sh/mm/cache-sh3.c | 1 - arch/sh/mm/cache-sh4.c | 1 - arch/sh/mm/cache-sh7705.c | 1 - arch/sh/mm/nommu.c | 1 - arch/sparc/kernel/leon_smp.c | 1 - arch/sparc/kernel/process_32.c | 1 - arch/sparc/kernel/process_64.c | 1 - arch/sparc/kernel/ptrace_32.c | 1 - arch/sparc/kernel/ptrace_64.c | 1 - arch/sparc/kernel/setup_32.c | 1 - arch/sparc/kernel/setup_64.c | 1 - arch/sparc/kernel/signal32.c | 1 - arch/sparc/kernel/signal_32.c | 1 - arch/sparc/kernel/signal_64.c | 1 - arch/sparc/kernel/smp_32.c | 1 - arch/sparc/kernel/smp_64.c | 1 - arch/sparc/kernel/traps_64.c | 1 - arch/sparc/mm/fault_32.c | 1 - arch/sparc/mm/fault_64.c | 1 - arch/sparc/mm/hugetlbpage.c | 1 - arch/sparc/mm/init_32.c | 1 - arch/sparc/mm/init_64.c | 1 - arch/sparc/mm/io-unit.c | 1 - arch/sparc/mm/iommu.c | 1 - arch/sparc/mm/tlb.c | 1 - arch/um/kernel/process.c | 1 - arch/um/kernel/skas/mmu.c | 1 - arch/um/kernel/skas/uaccess.c | 1 - arch/um/kernel/tlb.c | 1 - arch/um/kernel/trap.c | 1 - arch/um/kernel/um_arch.c | 1 - arch/unicore32/kernel/module.c | 1 - arch/unicore32/mm/fault.c | 1 - arch/x86/include/asm/iomap.h | 1 - arch/x86/include/asm/xen/page.h | 1 - arch/x86/kernel/alternative.c | 1 - arch/x86/kernel/amd_gart_64.c | 1 - arch/x86/kernel/doublefault_32.c | 1 - arch/x86/kernel/machine_kexec_32.c | 1 - arch/x86/kernel/machine_kexec_64.c | 1 - arch/x86/kernel/module.c | 1 - arch/x86/kernel/process_32.c | 1 - arch/x86/kernel/process_64.c | 1 - arch/x86/kernel/ptrace.c | 1 - arch/x86/kernel/tboot.c | 1 - arch/x86/mm/dump_pagetables.c | 1 - arch/x86/mm/init_32.c | 1 - arch/x86/mm/init_64.c | 1 - arch/x86/mm/kasan_init_64.c | 1 - arch/x86/mm/pat/cpa-test.c | 1 - arch/x86/mm/pat/memtype.c | 1 - arch/x86/mm/pgtable.c | 1 - arch/x86/mm/pgtable_32.c | 1 - arch/x86/mm/pti.c | 1 - arch/x86/platform/efi/efi_64.c | 1 - arch/x86/xen/enlighten_pv.c | 1 - arch/x86/xen/grant-table.c | 1 - arch/xtensa/kernel/process.c | 1 - arch/xtensa/kernel/ptrace.c | 1 - arch/xtensa/kernel/setup.c | 1 - drivers/char/agp/frontend.c | 1 - drivers/char/agp/generic.c | 1 - drivers/char/bsr.c | 1 - drivers/char/mspec.c | 1 - drivers/gpu/drm/i915/i915_mm.c | 1 - drivers/infiniband/sw/rdmavt/mmap.c | 1 - drivers/infiniband/sw/rxe/rxe_mmap.c | 1 - drivers/media/platform/davinci/vpbe_display.c | 1 - drivers/media/v4l2-core/v4l2-common.c | 1 - drivers/misc/sgi-gru/grufault.c | 1 - drivers/net/ethernet/sun/sunhme.c | 1 - drivers/sbus/char/flash.c | 1 - drivers/sbus/char/uctrl.c | 1 - drivers/scsi/a2091.c | 1 - drivers/scsi/a3000.c | 1 - drivers/scsi/gvp11.c | 1 - drivers/scsi/lasi700.c | 1 - drivers/scsi/mvme147.c | 1 - drivers/scsi/sni_53c710.c | 1 - drivers/video/console/newport_con.c | 1 - drivers/video/fbdev/acornfb.c | 1 - drivers/video/fbdev/atafb.c | 1 - drivers/video/fbdev/cirrusfb.c | 1 - drivers/video/fbdev/cyber2000fb.c | 1 - drivers/video/fbdev/fb-puv3.c | 1 - drivers/video/fbdev/hitfb.c | 1 - drivers/video/fbdev/neofb.c | 1 - drivers/video/fbdev/q40fb.c | 1 - drivers/video/fbdev/savage/savagefb_driver.c | 1 - drivers/xen/balloon.c | 1 - drivers/xen/grant-table.c | 1 - drivers/xen/privcmd.c | 1 - drivers/xen/xenbus/xenbus_probe.c | 1 - drivers/xen/xenbus/xenbus_probe_backend.c | 1 - drivers/xen/xenbus/xenbus_probe_frontend.c | 1 - fs/proc/array.c | 1 - fs/proc/meminfo.c | 1 - fs/proc/nommu.c | 1 - fs/proc/vmcore.c | 1 - include/linux/dax.h | 1 - init/init_task.c | 1 - kernel/exit.c | 1 - kernel/fork.c | 1 - kernel/power/snapshot.c | 1 - lib/ioremap.c | 1 - mm/debug_vm_pgtable.c | 1 - mm/gup.c | 1 - mm/hugetlb.c | 1 - mm/memory.c | 1 - mm/page_io.c | 1 - mm/shmem.c | 1 - mm/sparse-vmemmap.c | 1 - mm/sparse.c | 1 - mm/swap_state.c | 1 - mm/swapfile.c | 1 - mm/vmacache.c | 1 - sound/core/sgbuf.c | 1 - virt/kvm/kvm_main.c | 1 - 319 files changed, 319 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/boot/bootp.c b/arch/alpha/boot/bootp.c index 95c0359f4858..00266e6e1b71 100644 --- a/arch/alpha/boot/bootp.c +++ b/arch/alpha/boot/bootp.c @@ -16,7 +16,6 @@ #include #include -#include #include #include diff --git a/arch/alpha/boot/bootpz.c b/arch/alpha/boot/bootpz.c index 99b8d7dc344b..43af71835adf 100644 --- a/arch/alpha/boot/bootpz.c +++ b/arch/alpha/boot/bootpz.c @@ -18,7 +18,6 @@ #include #include -#include #include #include diff --git a/arch/alpha/boot/main.c b/arch/alpha/boot/main.c index 8f5ed8610970..e5347a080008 100644 --- a/arch/alpha/boot/main.c +++ b/arch/alpha/boot/main.c @@ -14,7 +14,6 @@ #include #include -#include #include diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h index d1ed5a8133c5..13bea465f1c0 100644 --- a/arch/alpha/include/asm/io.h +++ b/arch/alpha/include/asm/io.h @@ -7,7 +7,6 @@ #include #include #include -#include #include #include diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 48b81d015d8a..b45f0b0d6511 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c index cb8d599e72d6..8c43212ae38e 100644 --- a/arch/alpha/kernel/ptrace.c +++ b/arch/alpha/kernel/ptrace.c @@ -19,7 +19,6 @@ #include #include -#include #include #include "proto.h" diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index 6fa802c495b4..f5c42a8fcf9c 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -55,7 +55,6 @@ static struct notifier_block alpha_panic_block = { }; #include -#include #include #include #include diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 52995bf413fe..631cc17410d1 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -36,7 +36,6 @@ #include #include -#include #include #include #include diff --git a/arch/alpha/kernel/sys_alcor.c b/arch/alpha/kernel/sys_alcor.c index ce5430056f65..e063b3857b3d 100644 --- a/arch/alpha/kernel/sys_alcor.c +++ b/arch/alpha/kernel/sys_alcor.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include diff --git a/arch/alpha/kernel/sys_cabriolet.c b/arch/alpha/kernel/sys_cabriolet.c index 0aa6a27d0e2f..47459b73cdb7 100644 --- a/arch/alpha/kernel/sys_cabriolet.c +++ b/arch/alpha/kernel/sys_cabriolet.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/alpha/kernel/sys_dp264.c b/arch/alpha/kernel/sys_dp264.c index d33508621820..9fb445d7dca5 100644 --- a/arch/alpha/kernel/sys_dp264.c +++ b/arch/alpha/kernel/sys_dp264.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/alpha/kernel/sys_eb64p.c b/arch/alpha/kernel/sys_eb64p.c index 1cdfe55fb987..3c43fd347526 100644 --- a/arch/alpha/kernel/sys_eb64p.c +++ b/arch/alpha/kernel/sys_eb64p.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/alpha/kernel/sys_eiger.c b/arch/alpha/kernel/sys_eiger.c index 016f79251141..bf99dcfd40c4 100644 --- a/arch/alpha/kernel/sys_eiger.c +++ b/arch/alpha/kernel/sys_eiger.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/alpha/kernel/sys_jensen.c b/arch/alpha/kernel/sys_jensen.c index d0d44f543d77..0a2ab6cb18db 100644 --- a/arch/alpha/kernel/sys_jensen.c +++ b/arch/alpha/kernel/sys_jensen.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include "proto.h" diff --git a/arch/alpha/kernel/sys_marvel.c b/arch/alpha/kernel/sys_marvel.c index 533899a4a1a1..83d6c53d6d4d 100644 --- a/arch/alpha/kernel/sys_marvel.c +++ b/arch/alpha/kernel/sys_marvel.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/alpha/kernel/sys_miata.c b/arch/alpha/kernel/sys_miata.c index 702292af2225..e1bee8f84c58 100644 --- a/arch/alpha/kernel/sys_miata.c +++ b/arch/alpha/kernel/sys_miata.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include diff --git a/arch/alpha/kernel/sys_mikasa.c b/arch/alpha/kernel/sys_mikasa.c index 3af4f94113e1..7690dfd57cb6 100644 --- a/arch/alpha/kernel/sys_mikasa.c +++ b/arch/alpha/kernel/sys_mikasa.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c index 32850e45834b..53adf43dcd44 100644 --- a/arch/alpha/kernel/sys_nautilus.c +++ b/arch/alpha/kernel/sys_nautilus.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/alpha/kernel/sys_noritake.c b/arch/alpha/kernel/sys_noritake.c index b106f327f765..47f3ce4f719a 100644 --- a/arch/alpha/kernel/sys_noritake.c +++ b/arch/alpha/kernel/sys_noritake.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/alpha/kernel/sys_rawhide.c b/arch/alpha/kernel/sys_rawhide.c index b76f65d0e8b5..b5846ffdadce 100644 --- a/arch/alpha/kernel/sys_rawhide.c +++ b/arch/alpha/kernel/sys_rawhide.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include diff --git a/arch/alpha/kernel/sys_ruffian.c b/arch/alpha/kernel/sys_ruffian.c index d33074011960..4b1c8d85c4f0 100644 --- a/arch/alpha/kernel/sys_ruffian.c +++ b/arch/alpha/kernel/sys_ruffian.c @@ -23,7 +23,6 @@ #include #include #include -#include #include #include diff --git a/arch/alpha/kernel/sys_rx164.c b/arch/alpha/kernel/sys_rx164.c index 4d85eaeb44aa..94046f9aea08 100644 --- a/arch/alpha/kernel/sys_rx164.c +++ b/arch/alpha/kernel/sys_rx164.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include diff --git a/arch/alpha/kernel/sys_sable.c b/arch/alpha/kernel/sys_sable.c index 3cf0d32da5d8..930005b2f630 100644 --- a/arch/alpha/kernel/sys_sable.c +++ b/arch/alpha/kernel/sys_sable.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include diff --git a/arch/alpha/kernel/sys_sio.c b/arch/alpha/kernel/sys_sio.c index a6bdc1da47ad..7c420d8dac53 100644 --- a/arch/alpha/kernel/sys_sio.c +++ b/arch/alpha/kernel/sys_sio.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/alpha/kernel/sys_sx164.c b/arch/alpha/kernel/sys_sx164.c index 17cc203176c8..dd9de84b630c 100644 --- a/arch/alpha/kernel/sys_sx164.c +++ b/arch/alpha/kernel/sys_sx164.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/alpha/kernel/sys_takara.c b/arch/alpha/kernel/sys_takara.c index e230c6864088..9e2adb69bc74 100644 --- a/arch/alpha/kernel/sys_takara.c +++ b/arch/alpha/kernel/sys_takara.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include diff --git a/arch/alpha/kernel/sys_titan.c b/arch/alpha/kernel/sys_titan.c index c8390d8de140..b1f3b4fcf99b 100644 --- a/arch/alpha/kernel/sys_titan.c +++ b/arch/alpha/kernel/sys_titan.c @@ -26,7 +26,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/alpha/kernel/sys_wildfire.c b/arch/alpha/kernel/sys_wildfire.c index 2191bde161fd..2c54d707142a 100644 --- a/arch/alpha/kernel/sys_wildfire.c +++ b/arch/alpha/kernel/sys_wildfire.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index 667cd21393b5..3c42b3147fd6 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -24,7 +24,6 @@ #include #include -#include #include #include #include diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index 76300f3813e8..974b6c64d3e6 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index af0a8500a24e..e15444b25ca0 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -17,7 +17,6 @@ #include #include -#include #include #include #include diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 4cc6a7eff635..d0f7c8896c96 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -25,7 +25,6 @@ #include #include -#include #include #define CREATE_TRACE_POINTS diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 46e1be9e57a8..9a6432557871 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm/mach-ebsa110/core.c b/arch/arm/mach-ebsa110/core.c index 575b2e2b6759..5960e3dfd2bf 100644 --- a/arch/arm/mach-ebsa110/core.c +++ b/arch/arm/mach-ebsa110/core.c @@ -17,7 +17,6 @@ #include #include #include -#include #include #include diff --git a/arch/arm/mach-footbridge/common.c b/arch/arm/mach-footbridge/common.c index 015f75d1c98d..eee095f0e2f6 100644 --- a/arch/arm/mach-footbridge/common.c +++ b/arch/arm/mach-footbridge/common.c @@ -14,7 +14,6 @@ #include #include