From 88ecd153be9519f259b87a9f6f4c8383a8b3bbf1 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 14 Nov 2019 19:02:59 +0100 Subject: seqlock, kcsan: Add annotations for KCSAN Since seqlocks in the Linux kernel do not require the use of marked atomic accesses in critical sections, we teach KCSAN to assume such accesses are atomic. KCSAN currently also pretends that writes to `sequence` are atomic, although currently plain writes are used (their corresponding reads are READ_ONCE). Further, to avoid false positives in the absence of clear ending of a seqlock reader critical section (only when using the raw interface), KCSAN assumes a fixed number of accesses after start of a seqlock critical section are atomic. === Commentary on design around absence of clear begin/end markings === Seqlock usage via seqlock_t follows a predictable usage pattern, where clear critical section begin/end is enforced. With subtle special cases for readers needing to be flat atomic regions, e.g. because usage such as in: - fs/namespace.c:__legitimize_mnt - unbalanced read_seqretry - fs/dcache.c:d_walk - unbalanced need_seqretry But, anything directly accessing seqcount_t seems to be unpredictable. Filtering for usage of read_seqcount_retry not following 'do { .. } while (read_seqcount_retry(..));': $ git grep 'read_seqcount_retry' | grep -Ev 'while \(|seqlock.h|Doc|\* ' => about 1/3 of the total read_seqcount_retry usage. Just looking at fs/namei.c, we conclude that it is non-trivial to prescribe and migrate to an interface that would force clear begin/end seqlock markings for critical sections. As such, we concluded that the best design currently, is to simply ensure that KCSAN works well with the existing code. Signed-off-by: Marco Elver Acked-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/seqlock.h | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) (limited to 'include/linux/seqlock.h') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index bcf4cf26b8c8..61232bc223fd 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -37,8 +37,24 @@ #include #include #include +#include #include +/* + * The seqlock interface does not prescribe a precise sequence of read + * begin/retry/end. For readers, typically there is a call to + * read_seqcount_begin() and read_seqcount_retry(), however, there are more + * esoteric cases which do not follow this pattern. + * + * As a consequence, we take the following best-effort approach for raw usage + * via seqcount_t under KCSAN: upon beginning a seq-reader critical section, + * pessimistically mark then next KCSAN_SEQLOCK_REGION_MAX memory accesses as + * atomics; if there is a matching read_seqcount_retry() call, no following + * memory operations are considered atomic. Usage of seqlocks via seqlock_t + * interface is not affected. + */ +#define KCSAN_SEQLOCK_REGION_MAX 1000 + /* * Version using sequence counter only. * This can be used when code has its own mutex protecting the @@ -115,6 +131,7 @@ repeat: cpu_relax(); goto repeat; } + kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); return ret; } @@ -131,6 +148,7 @@ static inline unsigned raw_read_seqcount(const seqcount_t *s) { unsigned ret = READ_ONCE(s->sequence); smp_rmb(); + kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); return ret; } @@ -183,6 +201,7 @@ static inline unsigned raw_seqcount_begin(const seqcount_t *s) { unsigned ret = READ_ONCE(s->sequence); smp_rmb(); + kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); return ret & ~1; } @@ -202,7 +221,8 @@ static inline unsigned raw_seqcount_begin(const seqcount_t *s) */ static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start) { - return unlikely(s->sequence != start); + kcsan_atomic_next(0); + return unlikely(READ_ONCE(s->sequence) != start); } /** @@ -225,6 +245,7 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) static inline void raw_write_seqcount_begin(seqcount_t *s) { + kcsan_nestable_atomic_begin(); s->sequence++; smp_wmb(); } @@ -233,6 +254,7 @@ static inline void raw_write_seqcount_end(seqcount_t *s) { smp_wmb(); s->sequence++; + kcsan_nestable_atomic_end(); } /** @@ -271,9 +293,11 @@ static inline void raw_write_seqcount_end(seqcount_t *s) */ static inline void raw_write_seqcount_barrier(seqcount_t *s) { + kcsan_nestable_atomic_begin(); s->sequence++; smp_wmb(); s->sequence++; + kcsan_nestable_atomic_end(); } static inline int raw_read_seqcount_latch(seqcount_t *s) @@ -398,7 +422,9 @@ static inline void write_seqcount_end(seqcount_t *s) static inline void write_seqcount_invalidate(seqcount_t *s) { smp_wmb(); + kcsan_nestable_atomic_begin(); s->sequence+=2; + kcsan_nestable_atomic_end(); } typedef struct { @@ -430,11 +456,21 @@ typedef struct { */ static inline unsigned read_seqbegin(const seqlock_t *sl) { - return read_seqcount_begin(&sl->seqcount); + unsigned ret = read_seqcount_begin(&sl->seqcount); + + kcsan_atomic_next(0); /* non-raw usage, assume closing read_seqretry */ + kcsan_flat_atomic_begin(); + return ret; } static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) { + /* + * Assume not nested: read_seqretry may be called multiple times when + * completing read critical section. + */ + kcsan_flat_atomic_end(); + return read_seqcount_retry(&sl->seqcount, start); } -- cgit v1.2.3 From bf07132f96d426bcbf2098227fb680915cf44498 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 14 Nov 2019 19:03:00 +0100 Subject: seqlock: Require WRITE_ONCE surrounding raw_seqcount_barrier This patch proposes to require marked atomic accesses surrounding raw_write_seqcount_barrier. We reason that otherwise there is no way to guarantee propagation nor atomicity of writes before/after the barrier [1]. For example, consider the compiler tears stores either before or after the barrier; in this case, readers may observe a partial value, and because readers are unaware that writes are going on (writes are not in a seq-writer critical section), will complete the seq-reader critical section while having observed some partial state. [1] https://lwn.net/Articles/793253/ This came up when designing and implementing KCSAN, because KCSAN would flag these accesses as data-races. After careful analysis, our reasoning as above led us to conclude that the best thing to do is to propose an amendment to the raw_seqcount_barrier usage. Signed-off-by: Marco Elver Acked-by: Paul E. McKenney Signed-off-by: Paul E. McKenney --- include/linux/seqlock.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux/seqlock.h') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 61232bc223fd..f52c91be8939 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -265,6 +265,13 @@ static inline void raw_write_seqcount_end(seqcount_t *s) * usual consistency guarantee. It is one wmb cheaper, because we can * collapse the two back-to-back wmb()s. * + * Note that, writes surrounding the barrier should be declared atomic (e.g. + * via WRITE_ONCE): a) to ensure the writes become visible to other threads + * atomically, avoiding compiler optimizations; b) to document which writes are + * meant to propagate to the reader critical section. This is necessary because + * neither writes before and after the barrier are enclosed in a seq-writer + * critical section that would ensure readers are aware of ongoing writes. + * * seqcount_t seq; * bool X = true, Y = false; * @@ -284,11 +291,11 @@ static inline void raw_write_seqcount_end(seqcount_t *s) * * void write(void) * { - * Y = true; + * WRITE_ONCE(Y, true); * * raw_write_seqcount_barrier(seq); * - * X = false; + * WRITE_ONCE(X, false); * } */ static inline void raw_write_seqcount_barrier(seqcount_t *s) -- cgit v1.2.3 From 5cbaefe9743bf14c9d3106db0cc19f8cb0a3ca22 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 20 Nov 2019 10:41:43 +0100 Subject: kcsan: Improve various small stylistic details Tidy up a few bits: - Fix typos and grammar, improve wording. - Remove spurious newlines that are col80 warning artifacts where the resulting line-break is worse than the disease it's curing. - Use core kernel coding style to improve readability and reduce spurious code pattern variations. - Use better vertical alignment for structure definitions and initialization sequences. - Misc other small details. No change in functionality intended. Cc: linux-kernel@vger.kernel.org Cc: Marco Elver Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Andrew Morton Cc: Thomas Gleixner Cc: Paul E. McKenney Cc: Will Deacon Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- include/linux/compiler-clang.h | 2 +- include/linux/compiler.h | 2 +- include/linux/kcsan-checks.h | 22 ++++++--------- include/linux/kcsan.h | 23 ++++++---------- include/linux/seqlock.h | 8 +++--- kernel/kcsan/atomic.h | 2 +- kernel/kcsan/core.c | 59 ++++++++++++++++++---------------------- kernel/kcsan/debugfs.c | 62 ++++++++++++++++++++---------------------- kernel/kcsan/encoding.h | 25 +++++++++-------- kernel/kcsan/kcsan.h | 11 ++++---- kernel/kcsan/report.c | 42 ++++++++++++++-------------- kernel/kcsan/test.c | 6 ++-- kernel/sched/Makefile | 2 +- lib/Kconfig.kcsan | 16 +++++------ 15 files changed, 131 insertions(+), 153 deletions(-) (limited to 'include/linux/seqlock.h') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9933ca8ffe16..9cfa4a5c6f42 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -226,7 +226,7 @@ config X86 select VIRT_TO_BUS select X86_FEATURE_NAMES if PROC_FS select PROC_PID_ARCH_STATUS if PROC_FS - select HAVE_ARCH_KCSAN if X86_64 + select HAVE_ARCH_KCSAN if X86_64 config INSTRUCTION_DECODER def_bool y diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index a213eb55e725..2cb42d8bdedc 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -16,7 +16,7 @@ #define KASAN_ABI_VERSION 5 #if __has_feature(address_sanitizer) || __has_feature(hwaddress_sanitizer) -/* emulate gcc's __SANITIZE_ADDRESS__ flag */ +/* Emulate GCC's __SANITIZE_ADDRESS__ flag */ #define __SANITIZE_ADDRESS__ #define __no_sanitize_address \ __attribute__((no_sanitize("address", "hwaddress"))) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 7d3e77781578..ad8c76144a3c 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -313,7 +313,7 @@ unsigned long read_word_at_a_time(const void *addr) #include /* - * data_race: macro to document that accesses in an expression may conflict with + * data_race(): macro to document that accesses in an expression may conflict with * other concurrent accesses resulting in data races, but the resulting * behaviour is deemed safe regardless. * diff --git a/include/linux/kcsan-checks.h b/include/linux/kcsan-checks.h index e78220661086..ef3ee233a3fa 100644 --- a/include/linux/kcsan-checks.h +++ b/include/linux/kcsan-checks.h @@ -8,17 +8,17 @@ /* * Access type modifiers. */ -#define KCSAN_ACCESS_WRITE 0x1 +#define KCSAN_ACCESS_WRITE 0x1 #define KCSAN_ACCESS_ATOMIC 0x2 /* - * __kcsan_*: Always calls into runtime when KCSAN is enabled. This may be used + * __kcsan_*: Always calls into the runtime when KCSAN is enabled. This may be used * even in compilation units that selectively disable KCSAN, but must use KCSAN - * to validate access to an address. Never use these in header files! + * to validate access to an address. Never use these in header files! */ #ifdef CONFIG_KCSAN /** - * __kcsan_check_access - check generic access for data race + * __kcsan_check_access - check generic access for data races * * @ptr address of access * @size size of access @@ -32,7 +32,7 @@ static inline void __kcsan_check_access(const volatile void *ptr, size_t size, #endif /* - * kcsan_*: Only calls into runtime when the particular compilation unit has + * kcsan_*: Only calls into the runtime when the particular compilation unit has * KCSAN instrumentation enabled. May be used in header files. */ #ifdef __SANITIZE_THREAD__ @@ -77,16 +77,12 @@ static inline void kcsan_check_access(const volatile void *ptr, size_t size, kcsan_check_access(ptr, size, KCSAN_ACCESS_WRITE) /* - * Check for atomic accesses: if atomic access are not ignored, this simply - * aliases to kcsan_check_access, otherwise becomes a no-op. + * Check for atomic accesses: if atomic accesses are not ignored, this simply + * aliases to kcsan_check_access(), otherwise becomes a no-op. */ #ifdef CONFIG_KCSAN_IGNORE_ATOMICS -#define kcsan_check_atomic_read(...) \ - do { \ - } while (0) -#define kcsan_check_atomic_write(...) \ - do { \ - } while (0) +#define kcsan_check_atomic_read(...) do { } while (0) +#define kcsan_check_atomic_write(...) do { } while (0) #else #define kcsan_check_atomic_read(ptr, size) \ kcsan_check_access(ptr, size, KCSAN_ACCESS_ATOMIC) diff --git a/include/linux/kcsan.h b/include/linux/kcsan.h index 9047048fee84..1019e3a2c689 100644 --- a/include/linux/kcsan.h +++ b/include/linux/kcsan.h @@ -94,21 +94,14 @@ void kcsan_atomic_next(int n); #else /* CONFIG_KCSAN */ -static inline void kcsan_init(void) { } - -static inline void kcsan_disable_current(void) { } - -static inline void kcsan_enable_current(void) { } - -static inline void kcsan_nestable_atomic_begin(void) { } - -static inline void kcsan_nestable_atomic_end(void) { } - -static inline void kcsan_flat_atomic_begin(void) { } - -static inline void kcsan_flat_atomic_end(void) { } - -static inline void kcsan_atomic_next(int n) { } +static inline void kcsan_init(void) { } +static inline void kcsan_disable_current(void) { } +static inline void kcsan_enable_current(void) { } +static inline void kcsan_nestable_atomic_begin(void) { } +static inline void kcsan_nestable_atomic_end(void) { } +static inline void kcsan_flat_atomic_begin(void) { } +static inline void kcsan_flat_atomic_end(void) { } +static inline void kcsan_atomic_next(int n) { } #endif /* CONFIG_KCSAN */ diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index f52c91be8939..f80d50cac199 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -48,7 +48,7 @@ * * As a consequence, we take the following best-effort approach for raw usage * via seqcount_t under KCSAN: upon beginning a seq-reader critical section, - * pessimistically mark then next KCSAN_SEQLOCK_REGION_MAX memory accesses as + * pessimistically mark the next KCSAN_SEQLOCK_REGION_MAX memory accesses as * atomics; if there is a matching read_seqcount_retry() call, no following * memory operations are considered atomic. Usage of seqlocks via seqlock_t * interface is not affected. @@ -265,7 +265,7 @@ static inline void raw_write_seqcount_end(seqcount_t *s) * usual consistency guarantee. It is one wmb cheaper, because we can * collapse the two back-to-back wmb()s. * - * Note that, writes surrounding the barrier should be declared atomic (e.g. + * Note that writes surrounding the barrier should be declared atomic (e.g. * via WRITE_ONCE): a) to ensure the writes become visible to other threads * atomically, avoiding compiler optimizations; b) to document which writes are * meant to propagate to the reader critical section. This is necessary because @@ -465,7 +465,7 @@ static inline unsigned read_seqbegin(const seqlock_t *sl) { unsigned ret = read_seqcount_begin(&sl->seqcount); - kcsan_atomic_next(0); /* non-raw usage, assume closing read_seqretry */ + kcsan_atomic_next(0); /* non-raw usage, assume closing read_seqretry() */ kcsan_flat_atomic_begin(); return ret; } @@ -473,7 +473,7 @@ static inline unsigned read_seqbegin(const seqlock_t *sl) static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) { /* - * Assume not nested: read_seqretry may be called multiple times when + * Assume not nested: read_seqretry() may be called multiple times when * completing read critical section. */ kcsan_flat_atomic_end(); diff --git a/kernel/kcsan/atomic.h b/kernel/kcsan/atomic.h index c9c3fe628011..576e03ddd6a3 100644 --- a/kernel/kcsan/atomic.h +++ b/kernel/kcsan/atomic.h @@ -6,7 +6,7 @@ #include /* - * Helper that returns true if access to ptr should be considered as an atomic + * Helper that returns true if access to @ptr should be considered an atomic * access, even though it is not explicitly atomic. * * List all volatile globals that have been observed in races, to suppress diff --git a/kernel/kcsan/core.c b/kernel/kcsan/core.c index d9410d58c93e..3314fc29e236 100644 --- a/kernel/kcsan/core.c +++ b/kernel/kcsan/core.c @@ -19,10 +19,10 @@ bool kcsan_enabled; /* Per-CPU kcsan_ctx for interrupts */ static DEFINE_PER_CPU(struct kcsan_ctx, kcsan_cpu_ctx) = { - .disable_count = 0, - .atomic_next = 0, - .atomic_nest_count = 0, - .in_flat_atomic = false, + .disable_count = 0, + .atomic_next = 0, + .atomic_nest_count = 0, + .in_flat_atomic = false, }; /* @@ -50,11 +50,11 @@ static DEFINE_PER_CPU(struct kcsan_ctx, kcsan_cpu_ctx) = { * slot=9: [10, 11, 9] * slot=63: [64, 65, 63] */ -#define NUM_SLOTS (1 + 2 * KCSAN_CHECK_ADJACENT) +#define NUM_SLOTS (1 + 2*KCSAN_CHECK_ADJACENT) #define SLOT_IDX(slot, i) (slot + ((i + KCSAN_CHECK_ADJACENT) % NUM_SLOTS)) /* - * SLOT_IDX_FAST is used in fast-path. Not first checking the address's primary + * SLOT_IDX_FAST is used in the fast-path. Not first checking the address's primary * slot (middle) is fine if we assume that data races occur rarely. The set of * indices {SLOT_IDX(slot, i) | i in [0, NUM_SLOTS)} is equivalent to * {SLOT_IDX_FAST(slot, i) | i in [0, NUM_SLOTS)}. @@ -68,9 +68,9 @@ static DEFINE_PER_CPU(struct kcsan_ctx, kcsan_cpu_ctx) = { * zero-initialized state matches INVALID_WATCHPOINT. * * Add NUM_SLOTS-1 entries to account for overflow; this helps avoid having to - * use more complicated SLOT_IDX_FAST calculation with modulo in fast-path. + * use more complicated SLOT_IDX_FAST calculation with modulo in the fast-path. */ -static atomic_long_t watchpoints[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS - 1]; +static atomic_long_t watchpoints[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS-1]; /* * Instructions to skip watching counter, used in should_watch(). We use a @@ -78,7 +78,8 @@ static atomic_long_t watchpoints[CONFIG_KCSAN_NUM_WATCHPOINTS + NUM_SLOTS - 1]; */ static DEFINE_PER_CPU(long, kcsan_skip); -static inline atomic_long_t *find_watchpoint(unsigned long addr, size_t size, +static inline atomic_long_t *find_watchpoint(unsigned long addr, + size_t size, bool expect_write, long *encoded_watchpoint) { @@ -110,8 +111,8 @@ static inline atomic_long_t *find_watchpoint(unsigned long addr, size_t size, return NULL; } -static inline atomic_long_t *insert_watchpoint(unsigned long addr, size_t size, - bool is_write) +static inline atomic_long_t * +insert_watchpoint(unsigned long addr, size_t size, bool is_write) { const int slot = watchpoint_slot(addr); const long encoded_watchpoint = encode_watchpoint(addr, size, is_write); @@ -120,21 +121,16 @@ static inline atomic_long_t *insert_watchpoint(unsigned long addr, size_t size, /* Check slot index logic, ensuring we stay within array bounds. */ BUILD_BUG_ON(SLOT_IDX(0, 0) != KCSAN_CHECK_ADJACENT); - BUILD_BUG_ON(SLOT_IDX(0, KCSAN_CHECK_ADJACENT + 1) != 0); - BUILD_BUG_ON(SLOT_IDX(CONFIG_KCSAN_NUM_WATCHPOINTS - 1, - KCSAN_CHECK_ADJACENT) != - ARRAY_SIZE(watchpoints) - 1); - BUILD_BUG_ON(SLOT_IDX(CONFIG_KCSAN_NUM_WATCHPOINTS - 1, - KCSAN_CHECK_ADJACENT + 1) != - ARRAY_SIZE(watchpoints) - NUM_SLOTS); + BUILD_BUG_ON(SLOT_IDX(0, KCSAN_CHECK_ADJACENT+1) != 0); + BUILD_BUG_ON(SLOT_IDX(CONFIG_KCSAN_NUM_WATCHPOINTS-1, KCSAN_CHECK_ADJACENT) != ARRAY_SIZE(watchpoints)-1); + BUILD_BUG_ON(SLOT_IDX(CONFIG_KCSAN_NUM_WATCHPOINTS-1, KCSAN_CHECK_ADJACENT+1) != ARRAY_SIZE(watchpoints) - NUM_SLOTS); for (i = 0; i < NUM_SLOTS; ++i) { long expect_val = INVALID_WATCHPOINT; /* Try to acquire this slot. */ watchpoint = &watchpoints[SLOT_IDX(slot, i)]; - if (atomic_long_try_cmpxchg_relaxed(watchpoint, &expect_val, - encoded_watchpoint)) + if (atomic_long_try_cmpxchg_relaxed(watchpoint, &expect_val, encoded_watchpoint)) return watchpoint; } @@ -150,11 +146,10 @@ static inline atomic_long_t *insert_watchpoint(unsigned long addr, size_t size, * 2. the thread that set up the watchpoint already removed it; * 3. the watchpoint was removed and then re-used. */ -static inline bool try_consume_watchpoint(atomic_long_t *watchpoint, - long encoded_watchpoint) +static inline bool +try_consume_watchpoint(atomic_long_t *watchpoint, long encoded_watchpoint) { - return atomic_long_try_cmpxchg_relaxed(watchpoint, &encoded_watchpoint, - CONSUMED_WATCHPOINT); + return atomic_long_try_cmpxchg_relaxed(watchpoint, &encoded_watchpoint, CONSUMED_WATCHPOINT); } /* @@ -162,14 +157,13 @@ static inline bool try_consume_watchpoint(atomic_long_t *watchpoint, */ static inline bool remove_watchpoint(atomic_long_t *watchpoint) { - return atomic_long_xchg_relaxed(watchpoint, INVALID_WATCHPOINT) != - CONSUMED_WATCHPOINT; + return atomic_long_xchg_relaxed(watchpoint, INVALID_WATCHPOINT) != CONSUMED_WATCHPOINT; } static inline struct kcsan_ctx *get_ctx(void) { /* - * In interrupt, use raw_cpu_ptr to avoid unnecessary checks, that would + * In interrupts, use raw_cpu_ptr to avoid unnecessary checks, that would * also result in calls that generate warnings in uaccess regions. */ return in_task() ? ¤t->kcsan_ctx : raw_cpu_ptr(&kcsan_cpu_ctx); @@ -260,7 +254,8 @@ static inline unsigned int get_delay(void) */ static noinline void kcsan_found_watchpoint(const volatile void *ptr, - size_t size, bool is_write, + size_t size, + bool is_write, atomic_long_t *watchpoint, long encoded_watchpoint) { @@ -296,8 +291,8 @@ static noinline void kcsan_found_watchpoint(const volatile void *ptr, user_access_restore(flags); } -static noinline void kcsan_setup_watchpoint(const volatile void *ptr, - size_t size, bool is_write) +static noinline void +kcsan_setup_watchpoint(const volatile void *ptr, size_t size, bool is_write) { atomic_long_t *watchpoint; union { @@ -346,8 +341,8 @@ static noinline void kcsan_setup_watchpoint(const volatile void *ptr, watchpoint = insert_watchpoint((unsigned long)ptr, size, is_write); if (watchpoint == NULL) { /* - * Out of capacity: the size of `watchpoints`, and the frequency - * with which `should_watch()` returns true should be tweaked so + * Out of capacity: the size of 'watchpoints', and the frequency + * with which should_watch() returns true should be tweaked so * that this case happens very rarely. */ kcsan_counter_inc(KCSAN_COUNTER_NO_CAPACITY); diff --git a/kernel/kcsan/debugfs.c b/kernel/kcsan/debugfs.c index 041d520a0183..bec42dab32ee 100644 --- a/kernel/kcsan/debugfs.c +++ b/kernel/kcsan/debugfs.c @@ -24,39 +24,31 @@ static atomic_long_t counters[KCSAN_COUNTER_COUNT]; * whitelist or blacklist. */ static struct { - unsigned long *addrs; /* array of addresses */ - size_t size; /* current size */ - int used; /* number of elements used */ - bool sorted; /* if elements are sorted */ - bool whitelist; /* if list is a blacklist or whitelist */ + unsigned long *addrs; /* array of addresses */ + size_t size; /* current size */ + int used; /* number of elements used */ + bool sorted; /* if elements are sorted */ + bool whitelist; /* if list is a blacklist or whitelist */ } report_filterlist = { - .addrs = NULL, - .size = 8, /* small initial size */ - .used = 0, - .sorted = false, - .whitelist = false, /* default is blacklist */ + .addrs = NULL, + .size = 8, /* small initial size */ + .used = 0, + .sorted = false, + .whitelist = false, /* default is blacklist */ }; static DEFINE_SPINLOCK(report_filterlist_lock); static const char *counter_to_name(enum kcsan_counter_id id) { switch (id) { - case KCSAN_COUNTER_USED_WATCHPOINTS: - return "used_watchpoints"; - case KCSAN_COUNTER_SETUP_WATCHPOINTS: - return "setup_watchpoints"; - case KCSAN_COUNTER_DATA_RACES: - return "data_races"; - case KCSAN_COUNTER_NO_CAPACITY: - return "no_capacity"; - case KCSAN_COUNTER_REPORT_RACES: - return "report_races"; - case KCSAN_COUNTER_RACES_UNKNOWN_ORIGIN: - return "races_unknown_origin"; - case KCSAN_COUNTER_UNENCODABLE_ACCESSES: - return "unencodable_accesses"; - case KCSAN_COUNTER_ENCODING_FALSE_POSITIVES: - return "encoding_false_positives"; + case KCSAN_COUNTER_USED_WATCHPOINTS: return "used_watchpoints"; + case KCSAN_COUNTER_SETUP_WATCHPOINTS: return "setup_watchpoints"; + case KCSAN_COUNTER_DATA_RACES: return "data_races"; + case KCSAN_COUNTER_NO_CAPACITY: return "no_capacity"; + case KCSAN_COUNTER_REPORT_RACES: return "report_races"; + case KCSAN_COUNTER_RACES_UNKNOWN_ORIGIN: return "races_unknown_origin"; + case KCSAN_COUNTER_UNENCODABLE_ACCESSES: return "unencodable_accesses"; + case KCSAN_COUNTER_ENCODING_FALSE_POSITIVES: return "encoding_false_positives"; case KCSAN_COUNTER_COUNT: BUG(); } @@ -116,7 +108,7 @@ bool kcsan_skip_report_debugfs(unsigned long func_addr) if (!kallsyms_lookup_size_offset(func_addr, &symbolsize, &offset)) return false; - func_addr -= offset; /* get function start */ + func_addr -= offset; /* Get function start */ spin_lock_irqsave(&report_filterlist_lock, flags); if (report_filterlist.used == 0) @@ -195,6 +187,7 @@ static ssize_t insert_report_filterlist(const char *func) out: spin_unlock_irqrestore(&report_filterlist_lock, flags); + return ret; } @@ -226,8 +219,8 @@ static int debugfs_open(struct inode *inode, struct file *file) return single_open(file, show_info, NULL); } -static ssize_t debugfs_write(struct file *file, const char __user *buf, - size_t count, loff_t *off) +static ssize_t +debugfs_write(struct file *file, const char __user *buf, size_t count, loff_t *off) { char kbuf[KSYM_NAME_LEN]; char *arg; @@ -264,10 +257,13 @@ static ssize_t debugfs_write(struct file *file, const char __user *buf, return count; } -static const struct file_operations debugfs_ops = { .read = seq_read, - .open = debugfs_open, - .write = debugfs_write, - .release = single_release }; +static const struct file_operations debugfs_ops = +{ + .read = seq_read, + .open = debugfs_open, + .write = debugfs_write, + .release = single_release +}; void __init kcsan_debugfs_init(void) { diff --git a/kernel/kcsan/encoding.h b/kernel/kcsan/encoding.h index e17bdac0e54b..b63890e86449 100644 --- a/kernel/kcsan/encoding.h +++ b/kernel/kcsan/encoding.h @@ -10,7 +10,8 @@ #include "kcsan.h" #define SLOT_RANGE PAGE_SIZE -#define INVALID_WATCHPOINT 0 + +#define INVALID_WATCHPOINT 0 #define CONSUMED_WATCHPOINT 1 /* @@ -34,24 +35,24 @@ * Both these are assumed to be very unlikely. However, in case it still happens * happens, the report logic will filter out the false positive (see report.c). */ -#define WATCHPOINT_ADDR_BITS (BITS_PER_LONG - 1 - WATCHPOINT_SIZE_BITS) +#define WATCHPOINT_ADDR_BITS (BITS_PER_LONG-1 - WATCHPOINT_SIZE_BITS) /* * Masks to set/retrieve the encoded data. */ -#define WATCHPOINT_WRITE_MASK BIT(BITS_PER_LONG - 1) +#define WATCHPOINT_WRITE_MASK BIT(BITS_PER_LONG-1) #define WATCHPOINT_SIZE_MASK \ - GENMASK(BITS_PER_LONG - 2, BITS_PER_LONG - 2 - WATCHPOINT_SIZE_BITS) + GENMASK(BITS_PER_LONG-2, BITS_PER_LONG-2 - WATCHPOINT_SIZE_BITS) #define WATCHPOINT_ADDR_MASK \ - GENMASK(BITS_PER_LONG - 3 - WATCHPOINT_SIZE_BITS, 0) + GENMASK(BITS_PER_LONG-3 - WATCHPOINT_SIZE_BITS, 0) static inline bool check_encodable(unsigned long addr, size_t size) { return size <= MAX_ENCODABLE_SIZE; } -static inline long encode_watchpoint(unsigned long addr, size_t size, - bool is_write) +static inline long +encode_watchpoint(unsigned long addr, size_t size, bool is_write) { return (long)((is_write ? WATCHPOINT_WRITE_MASK : 0) | (size << WATCHPOINT_ADDR_BITS) | @@ -59,17 +60,17 @@ static inline long encode_watchpoint(unsigned long addr, size_t size, } static inline bool decode_watchpoint(long watchpoint, - unsigned long *addr_masked, size_t *size, + unsigned long *addr_masked, + size_t *size, bool *is_write) { if (watchpoint == INVALID_WATCHPOINT || watchpoint == CONSUMED_WATCHPOINT) return false; - *addr_masked = (unsigned long)watchpoint & WATCHPOINT_ADDR_MASK; - *size = ((unsigned long)watchpoint & WATCHPOINT_SIZE_MASK) >> - WATCHPOINT_ADDR_BITS; - *is_write = !!((unsigned long)watchpoint & WATCHPOINT_WRITE_MASK); + *addr_masked = (unsigned long)watchpoint & WATCHPOINT_ADDR_MASK; + *size = ((unsigned long)watchpoint & WATCHPOINT_SIZE_MASK) >> WATCHPOINT_ADDR_BITS; + *is_write = !!((unsigned long)watchpoint & WATCHPOINT_WRITE_MASK); return true; } diff --git a/kernel/kcsan/kcsan.h b/kernel/kcsan/kcsan.h index 1bb2f1c0d61e..d3b9a96ac8a4 100644 --- a/kernel/kcsan/kcsan.h +++ b/kernel/kcsan/kcsan.h @@ -72,14 +72,14 @@ enum kcsan_counter_id { /* * Increment/decrement counter with given id; avoid calling these in fast-path. */ -void kcsan_counter_inc(enum kcsan_counter_id id); -void kcsan_counter_dec(enum kcsan_counter_id id); +extern void kcsan_counter_inc(enum kcsan_counter_id id); +extern void kcsan_counter_dec(enum kcsan_counter_id id); /* * Returns true if data races in the function symbol that maps to func_addr * (offsets are ignored) should *not* be reported. */ -bool kcsan_skip_report_debugfs(unsigned long func_addr); +extern bool kcsan_skip_report_debugfs(unsigned long func_addr); enum kcsan_report_type { /* @@ -99,10 +99,11 @@ enum kcsan_report_type { */ KCSAN_REPORT_RACE_UNKNOWN_ORIGIN, }; + /* * Print a race report from thread that encountered the race. */ -void kcsan_report(const volatile void *ptr, size_t size, bool is_write, - bool value_change, int cpu_id, enum kcsan_report_type type); +extern void kcsan_report(const volatile void *ptr, size_t size, bool is_write, + bool value_change, int cpu_id, enum kcsan_report_type type); #endif /* _KERNEL_KCSAN_KCSAN_H */ diff --git a/kernel/kcsan/report.c b/kernel/kcsan/report.c index ead5610bafa7..0eea05a3135b 100644 --- a/kernel/kcsan/report.c +++ b/kernel/kcsan/report.c @@ -22,13 +22,13 @@ * the reports, with reporting being in the slow-path. */ static struct { - const volatile void *ptr; - size_t size; - bool is_write; - int task_pid; - int cpu_id; - unsigned long stack_entries[NUM_STACK_ENTRIES]; - int num_stack_entries; + const volatile void *ptr; + size_t size; + bool is_write; + int task_pid; + int cpu_id; + unsigned long stack_entries[NUM_STACK_ENTRIES]; + int num_stack_entries; } other_info = { .ptr = NULL }; /* @@ -40,8 +40,8 @@ static DEFINE_SPINLOCK(report_lock); /* * Special rules to skip reporting. */ -static bool skip_report(bool is_write, bool value_change, - unsigned long top_frame) +static bool +skip_report(bool is_write, bool value_change, unsigned long top_frame) { if (IS_ENABLED(CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY) && is_write && !value_change) { @@ -105,6 +105,7 @@ static int sym_strcmp(void *addr1, void *addr2) snprintf(buf1, sizeof(buf1), "%pS", addr1); snprintf(buf2, sizeof(buf2), "%pS", addr2); + return strncmp(buf1, buf2, sizeof(buf1)); } @@ -116,8 +117,7 @@ static bool print_report(const volatile void *ptr, size_t size, bool is_write, enum kcsan_report_type type) { unsigned long stack_entries[NUM_STACK_ENTRIES] = { 0 }; - int num_stack_entries = - stack_trace_save(stack_entries, NUM_STACK_ENTRIES, 1); + int num_stack_entries = stack_trace_save(stack_entries, NUM_STACK_ENTRIES, 1); int skipnr = get_stack_skipnr(stack_entries, num_stack_entries); int other_skipnr; @@ -131,7 +131,7 @@ static bool print_report(const volatile void *ptr, size_t size, bool is_write, other_skipnr = get_stack_skipnr(other_info.stack_entries, other_info.num_stack_entries); - /* value_change is only known for the other thread */ + /* @value_change is only known for the other thread */ if (skip_report(other_info.is_write, value_change, other_info.stack_entries[other_skipnr])) return false; @@ -241,13 +241,12 @@ retry: if (other_info.ptr != NULL) break; /* still in use, retry */ - other_info.ptr = ptr; - other_info.size = size; - other_info.is_write = is_write; - other_info.task_pid = in_task() ? task_pid_nr(current) : -1; - other_info.cpu_id = cpu_id; - other_info.num_stack_entries = stack_trace_save( - other_info.stack_entries, NUM_STACK_ENTRIES, 1); + other_info.ptr = ptr; + other_info.size = size; + other_info.is_write = is_write; + other_info.task_pid = in_task() ? task_pid_nr(current) : -1; + other_info.cpu_id = cpu_id; + other_info.num_stack_entries = stack_trace_save(other_info.stack_entries, NUM_STACK_ENTRIES, 1); spin_unlock_irqrestore(&report_lock, *flags); @@ -299,6 +298,7 @@ retry: } spin_unlock_irqrestore(&report_lock, *flags); + goto retry; } @@ -309,9 +309,7 @@ void kcsan_report(const volatile void *ptr, size_t size, bool is_write, kcsan_disable_current(); if (prepare_report(&flags, ptr, size, is_write, cpu_id, type)) { - if (print_report(ptr, size, is_write, value_change, cpu_id, - type) && - panic_on_warn) + if (print_report(ptr, size, is_write, value_change, cpu_id, type) && panic_on_warn) panic("panic_on_warn set ...\n"); release_report(&flags, type); diff --git a/kernel/kcsan/test.c b/kernel/kcsan/test.c index 0bae63c5ca65..cc6000239dc0 100644 --- a/kernel/kcsan/test.c +++ b/kernel/kcsan/test.c @@ -34,7 +34,7 @@ static bool test_encode_decode(void) if (WARN_ON(!check_encodable(addr, size))) return false; - /* encode and decode */ + /* Encode and decode */ { const long encoded_watchpoint = encode_watchpoint(addr, size, is_write); @@ -42,7 +42,7 @@ static bool test_encode_decode(void) size_t verif_size; bool verif_is_write; - /* check special watchpoints */ + /* Check special watchpoints */ if (WARN_ON(decode_watchpoint( INVALID_WATCHPOINT, &verif_masked_addr, &verif_size, &verif_is_write))) @@ -52,7 +52,7 @@ static bool test_encode_decode(void) &verif_size, &verif_is_write))) return false; - /* check decoding watchpoint returns same data */ + /* Check decoding watchpoint returns same data */ if (WARN_ON(!decode_watchpoint( encoded_watchpoint, &verif_masked_addr, &verif_size, &verif_is_write))) diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile index e9307a9c54e7..5fc9c9b70862 100644 --- a/kernel/sched/Makefile +++ b/kernel/sched/Makefile @@ -7,7 +7,7 @@ endif # that is not a function of syscall inputs. E.g. involuntary context switches. KCOV_INSTRUMENT := n -# There are numerous races here, however, most of them due to plain accesses. +# There are numerous data races here, however, most of them are due to plain accesses. # This would make it even harder for syzbot to find reproducers, because these # bugs trigger without specific input. Disable by default, but should re-enable # eventually. diff --git a/lib/Kconfig.kcsan b/lib/Kconfig.kcsan index 5dd464e52ab4..3f78b1434375 100644 --- a/lib/Kconfig.kcsan +++ b/lib/Kconfig.kcsan @@ -6,7 +6,6 @@ config HAVE_ARCH_KCSAN menuconfig KCSAN bool "KCSAN: watchpoint-based dynamic data race detector" depends on HAVE_ARCH_KCSAN && !KASAN && STACKTRACE - default n help Kernel Concurrency Sanitizer is a dynamic data race detector, which uses a watchpoint-based sampling approach to detect races. See @@ -16,13 +15,12 @@ if KCSAN config KCSAN_DEBUG bool "Debugging of KCSAN internals" - default n config KCSAN_SELFTEST bool "Perform short selftests on boot" default y help - Run KCSAN selftests on boot. On test failure, causes kernel to panic. + Run KCSAN selftests on boot. On test failure, causes the kernel to panic. config KCSAN_EARLY_ENABLE bool "Early enable during boot" @@ -62,7 +60,8 @@ config KCSAN_DELAY_RANDOMIZE default y help If delays should be randomized, where the maximum is KCSAN_UDELAY_*. - If false, the chosen delays are always KCSAN_UDELAY_* defined above. + If false, the chosen delays are always the KCSAN_UDELAY_* values + as defined above. config KCSAN_SKIP_WATCH int "Skip instructions before setting up watchpoint" @@ -86,9 +85,9 @@ config KCSAN_SKIP_WATCH_RANDOMIZE # parameters, to optimize for the common use-case, we avoid this because: (a) # it would impact performance (and we want to avoid static branch for all # {READ,WRITE}_ONCE, atomic_*, bitops, etc.), and (b) complicate the design -# without real benefit. The main purpose of the below options are for use in -# fuzzer configs to control reported data races, and are not expected to be -# switched frequently by a user. +# without real benefit. The main purpose of the below options is for use in +# fuzzer configs to control reported data races, and they are not expected +# to be switched frequently by a user. config KCSAN_REPORT_RACE_UNKNOWN_ORIGIN bool "Report races of unknown origin" @@ -103,13 +102,12 @@ config KCSAN_REPORT_VALUE_CHANGE_ONLY bool "Only report races where watcher observed a data value change" default y help - If enabled and a conflicting write is observed via watchpoint, but + If enabled and a conflicting write is observed via a watchpoint, but the data value of the memory location was observed to remain unchanged, do not report the data race. config KCSAN_IGNORE_ATOMICS bool "Do not instrument marked atomic accesses" - default n help If enabled, never instruments marked atomic accesses. This results in not reporting data races where one access is atomic and the other is -- cgit v1.2.3 From b968a08f242d51982e46041c506115b5e11a7570 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 11 Feb 2020 17:04:20 +0100 Subject: compiler.h, seqlock.h: Remove unnecessary kcsan.h includes No we longer have to include kcsan.h, since the required KCSAN interface for both compiler.h and seqlock.h are now provided by kcsan-checks.h. Acked-by: John Hubbard Signed-off-by: Marco Elver Signed-off-by: Paul E. McKenney Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 2 -- include/linux/seqlock.h | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux/seqlock.h') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index c1bdf37571cb..f504edebd5d7 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -313,8 +313,6 @@ unsigned long read_word_at_a_time(const void *addr) __u.__val; \ }) -#include - /** * data_race - mark an expression as containing intentional data races * diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 239701cae376..8b97204f35a7 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include /* -- cgit v1.2.3 From 0d24f65e933ca89d55d17f6dbdb2a72ca88f0992 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 20 Jul 2020 17:55:07 +0200 Subject: Documentation: locking: Describe seqlock design and usage Proper documentation for the design and usage of sequence counters and sequential locks does not exist. Complete the seqlock.h documentation as follows: - Divide all documentation on a seqcount_t vs. seqlock_t basis. The description for both mechanisms was intermingled, which is incorrect since the usage constrains for each type are vastly different. - Add an introductory paragraph describing the internal design of, and rationale for, sequence counters. - Document seqcount_t writer non-preemptibility requirement, which was not previously documented anywhere, and provide a clear rationale. - Provide template code for seqcount_t and seqlock_t initialization and reader/writer critical sections. - Recommend using seqlock_t by default. It implicitly handles the serialization and non-preemptibility requirements of writers. At seqlock.h: - Remove references to brlocks as they've long been removed from the kernel. - Remove references to gcc-3.x since the kernel's minimum supported gcc version is 4.9. References: 0f6ed63b1707 ("no need to keep brlock macros anymore...") References: 6ec4476ac825 ("Raise gcc version requirement to 4.9") Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200720155530.1173732-2-a.darwish@linutronix.de --- Documentation/locking/index.rst | 1 + Documentation/locking/seqlock.rst | 170 ++++++++++++++++++++++++++++++++++++++ include/linux/seqlock.h | 85 +++++++++---------- 3 files changed, 211 insertions(+), 45 deletions(-) create mode 100644 Documentation/locking/seqlock.rst (limited to 'include/linux/seqlock.h') diff --git a/Documentation/locking/index.rst b/Documentation/locking/index.rst index d785878cad65..7003bd5aeff4 100644 --- a/Documentation/locking/index.rst +++ b/Documentation/locking/index.rst @@ -14,6 +14,7 @@ locking mutex-design rt-mutex-design rt-mutex + seqlock spinlocks ww-mutex-design preempt-locking diff --git a/Documentation/locking/seqlock.rst b/Documentation/locking/seqlock.rst new file mode 100644 index 000000000000..366dd368d90a --- /dev/null +++ b/Documentation/locking/seqlock.rst @@ -0,0 +1,170 @@ +====================================== +Sequence counters and sequential locks +====================================== + +Introduction +============ + +Sequence counters are a reader-writer consistency mechanism with +lockless readers (read-only retry loops), and no writer starvation. They +are used for data that's rarely written to (e.g. system time), where the +reader wants a consistent set of information and is willing to retry if +that information changes. + +A data set is consistent when the sequence count at the beginning of the +read side critical section is even and the same sequence count value is +read again at the end of the critical section. The data in the set must +be copied out inside the read side critical section. If the sequence +count has changed between the start and the end of the critical section, +the reader must retry. + +Writers increment the sequence count at the start and the end of their +critical section. After starting the critical section the sequence count +is odd and indicates to the readers that an update is in progress. At +the end of the write side critical section the sequence count becomes +even again which lets readers make progress. + +A sequence counter write side critical section must never be preempted +or interrupted by read side sections. Otherwise the reader will spin for +the entire scheduler tick due to the odd sequence count value and the +interrupted writer. If that reader belongs to a real-time scheduling +class, it can spin forever and the kernel will livelock. + +This mechanism cannot be used if the protected data contains pointers, +as the writer can invalidate a pointer that the reader is following. + + +.. _seqcount_t: + +Sequence counters (``seqcount_t``) +================================== + +This is the the raw counting mechanism, which does not protect against +multiple writers. Write side critical sections must thus be serialized +by an external lock. + +If the write serialization primitive is not implicitly disabling +preemption, preemption must be explicitly disabled before entering the +write side section. If the read section can be invoked from hardirq or +softirq contexts, interrupts or bottom halves must also be respectively +disabled before entering the write section. + +If it's desired to automatically handle the sequence counter +requirements of writer serialization and non-preemptibility, use +:ref:`seqlock_t` instead. + +Initialization:: + + /* dynamic */ + seqcount_t foo_seqcount; + seqcount_init(&foo_seqcount); + + /* static */ + static seqcount_t foo_seqcount = SEQCNT_ZERO(foo_seqcount); + + /* C99 struct init */ + struct { + .seq = SEQCNT_ZERO(foo.seq), + } foo; + +Write path:: + + /* Serialized context with disabled preemption */ + + write_seqcount_begin(&foo_seqcount); + + /* ... [[write-side critical section]] ... */ + + write_seqcount_end(&foo_seqcount); + +Read path:: + + do { + seq = read_seqcount_begin(&foo_seqcount); + + /* ... [[read-side critical section]] ... */ + + } while (read_seqcount_retry(&foo_seqcount, seq)); + + +.. _seqlock_t: + +Sequential locks (``seqlock_t``) +================================ + +This contains the :ref:`seqcount_t` mechanism earlier discussed, plus an +embedded spinlock for writer serialization and non-preemptibility. + +If the read side section can be invoked from hardirq or softirq context, +use the write side function variants which disable interrupts or bottom +halves respectively. + +Initialization:: + + /* dynamic */ + seqlock_t foo_seqlock; + seqlock_init(&foo_seqlock); + + /* static */ + static DEFINE_SEQLOCK(foo_seqlock); + + /* C99 struct init */ + struct { + .seql = __SEQLOCK_UNLOCKED(foo.seql) + } foo; + +Write path:: + + write_seqlock(&foo_seqlock); + + /* ... [[write-side critical section]] ... */ + + write_sequnlock(&foo_seqlock); + +Read path, three categories: + +1. Normal Sequence readers which never block a writer but they must + retry if a writer is in progress by detecting change in the sequence + number. Writers do not wait for a sequence reader:: + + do { + seq = read_seqbegin(&foo_seqlock); + + /* ... [[read-side critical section]] ... */ + + } while (read_seqretry(&foo_seqlock, seq)); + +2. Locking readers which will wait if a writer or another locking reader + is in progress. A locking reader in progress will also block a writer + from entering its critical section. This read lock is + exclusive. Unlike rwlock_t, only one locking reader can acquire it:: + + read_seqlock_excl(&foo_seqlock); + + /* ... [[read-side critical section]] ... */ + + read_sequnlock_excl(&foo_seqlock); + +3. Conditional lockless reader (as in 1), or locking reader (as in 2), + according to a passed marker. This is used to avoid lockless readers + starvation (too much retry loops) in case of a sharp spike in write + activity. First, a lockless read is tried (even marker passed). If + that trial fails (odd sequence counter is returned, which is used as + the next iteration marker), the lockless read is transformed to a + full locking read and no retry loop is necessary:: + + /* marker; even initialization */ + int seq = 0; + do { + read_seqbegin_or_lock(&foo_seqlock, &seq); + + /* ... [[read-side critical section]] ... */ + + } while (need_seqretry(&foo_seqlock, seq)); + done_seqretry(&foo_seqlock, seq); + + +API documentation +================= + +.. kernel-doc:: include/linux/seqlock.h diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 8b97204f35a7..299d68f10325 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -1,36 +1,15 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __LINUX_SEQLOCK_H #define __LINUX_SEQLOCK_H + /* - * Reader/writer consistent mechanism without starving writers. This type of - * lock for data where the reader wants a consistent set of information - * and is willing to retry if the information changes. There are two types - * of readers: - * 1. Sequence readers which never block a writer but they may have to retry - * if a writer is in progress by detecting change in sequence number. - * Writers do not wait for a sequence reader. - * 2. Locking readers which will wait if a writer or another locking reader - * is in progress. A locking reader in progress will also block a writer - * from going forward. Unlike the regular rwlock, the read lock here is - * exclusive so that only one locking reader can get it. - * - * This is not as cache friendly as brlock. Also, this may not work well - * for data that contains pointers, because any writer could - * invalidate a pointer that a reader was following. - * - * Expected non-blocking reader usage: - * do { - * seq = read_seqbegin(&foo); - * ... - * } while (read_seqretry(&foo, seq)); - * - * - * On non-SMP the spin locks disappear but the writer still needs - * to increment the sequence variables because an interrupt routine could - * change the state of the data. - * - * Based on x86_64 vsyscall gettimeofday - * by Keith Owens and Andrea Arcangeli + * seqcount_t / seqlock_t - a reader-writer consistency mechanism with + * lockless readers (read-only retry loops), and no writer starvation. + * + * See Documentation/locking/seqlock.rst + * + * Copyrights: + * - Based on x86_64 vsyscall gettimeofday: Keith Owens, Andrea Arcangeli */ #include @@ -41,8 +20,8 @@ #include /* - * The seqlock interface does not prescribe a precise sequence of read - * begin/retry/end. For readers, typically there is a call to + * The seqlock seqcount_t interface does not prescribe a precise sequence of + * read begin/retry/end. For readers, typically there is a call to * read_seqcount_begin() and read_seqcount_retry(), however, there are more * esoteric cases which do not follow this pattern. * @@ -50,16 +29,30 @@ * via seqcount_t under KCSAN: upon beginning a seq-reader critical section, * pessimistically mark the next KCSAN_SEQLOCK_REGION_MAX memory accesses as * atomics; if there is a matching read_seqcount_retry() call, no following - * memory operations are considered atomic. Usage of seqlocks via seqlock_t - * interface is not affected. + * memory operations are considered atomic. Usage of the seqlock_t interface + * is not affected. */ #define KCSAN_SEQLOCK_REGION_MAX 1000 /* - * Version using sequence counter only. - * This can be used when code has its own mutex protecting the - * updating starting before the write_seqcountbeqin() and ending - * after the write_seqcount_end(). + * Sequence counters (seqcount_t) + * + * This is the raw counting mechanism, without any writer protection. + * + * Write side critical sections must be serialized and non-preemptible. + * + * If readers can be invoked from hardirq or softirq contexts, + * interrupts or bottom halves must also be respectively disabled before + * entering the write section. + * + * This mechanism can't be used if the protected data contains pointers, + * as the writer can invalidate a pointer that a reader is following. + * + * If it's desired to automatically handle the sequence counter writer + * serialization and non-preemptibility requirements, use a sequential + * lock (seqlock_t) instead. + * + * See Documentation/locking/seqlock.rst */ typedef struct seqcount { unsigned sequence; @@ -398,10 +391,6 @@ static inline void raw_write_seqcount_latch(seqcount_t *s) smp_wmb(); /* increment "sequence" before following stores */ } -/* - * Sequence counter only version assumes that callers are using their - * own mutexing. - */ static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass) { raw_write_seqcount_begin(s); @@ -434,15 +423,21 @@ static inline void write_seqcount_invalidate(seqcount_t *s) kcsan_nestable_atomic_end(); } +/* + * Sequential locks (seqlock_t) + * + * Sequence counters with an embedded spinlock for writer serialization + * and non-preemptibility. + * + * For more info, see: + * - Comments on top of seqcount_t + * - Documentation/locking/seqlock.rst + */ typedef struct { struct seqcount seqcount; spinlock_t lock; } seqlock_t; -/* - * These macros triggered gcc-3.x compile-time problems. We think these are - * OK now. Be cautious. - */ #define __SEQLOCK_UNLOCKED(lockname) \ { \ .seqcount = SEQCNT_ZERO(lockname), \ -- cgit v1.2.3 From 15cbe67bbd3adeb4854c42713dbeaf2ff876beee Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 20 Jul 2020 17:55:08 +0200 Subject: seqlock: Properly format kernel-doc code samples Align the code samples and note sections inside kernel-doc comments with tabs. This way they can be properly parsed and rendered by Sphinx. It also makes the code samples easier to read from text editors. Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200720155530.1173732-3-a.darwish@linutronix.de --- include/linux/seqlock.h | 108 +++++++++++++++++++++++++----------------------- 1 file changed, 56 insertions(+), 52 deletions(-) (limited to 'include/linux/seqlock.h') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 299d68f10325..6c4f68ef1393 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -263,32 +263,32 @@ static inline void raw_write_seqcount_end(seqcount_t *s) * atomically, avoiding compiler optimizations; b) to document which writes are * meant to propagate to the reader critical section. This is necessary because * neither writes before and after the barrier are enclosed in a seq-writer - * critical section that would ensure readers are aware of ongoing writes. + * critical section that would ensure readers are aware of ongoing writes:: * - * seqcount_t seq; - * bool X = true, Y = false; + * seqcount_t seq; + * bool X = true, Y = false; * - * void read(void) - * { - * bool x, y; + * void read(void) + * { + * bool x, y; * - * do { - * int s = read_seqcount_begin(&seq); + * do { + * int s = read_seqcount_begin(&seq); * - * x = X; y = Y; + * x = X; y = Y; * - * } while (read_seqcount_retry(&seq, s)); + * } while (read_seqcount_retry(&seq, s)); * - * BUG_ON(!x && !y); + * BUG_ON(!x && !y); * } * * void write(void) * { - * WRITE_ONCE(Y, true); + * WRITE_ONCE(Y, true); * - * raw_write_seqcount_barrier(seq); + * raw_write_seqcount_barrier(seq); * - * WRITE_ONCE(X, false); + * WRITE_ONCE(X, false); * } */ static inline void raw_write_seqcount_barrier(seqcount_t *s) @@ -325,64 +325,68 @@ static inline int raw_read_seqcount_latch(seqcount_t *s) * Very simply put: we first modify one copy and then the other. This ensures * there is always one copy in a stable state, ready to give us an answer. * - * The basic form is a data structure like: + * The basic form is a data structure like:: * - * struct latch_struct { - * seqcount_t seq; - * struct data_struct data[2]; - * }; + * struct latch_struct { + * seqcount_t seq; + * struct data_struct data[2]; + * }; * * Where a modification, which is assumed to be externally serialized, does the - * following: + * following:: * - * void latch_modify(struct latch_struct *latch, ...) - * { - * smp_wmb(); <- Ensure that the last data[1] update is visible - * latch->seq++; - * smp_wmb(); <- Ensure that the seqcount update is visible + * void latch_modify(struct latch_struct *latch, ...) + * { + * smp_wmb(); // Ensure that the last data[1] update is visible + * latch->seq++; + * smp_wmb(); // Ensure that the seqcount update is visible * - * modify(latch->data[0], ...); + * modify(latch->data[0], ...); * - * smp_wmb(); <- Ensure that the data[0] update is visible - * latch->seq++; - * smp_wmb(); <- Ensure that the seqcount update is visible + * smp_wmb(); // Ensure that the data[0] update is visible + * latch->seq++; + * smp_wmb(); // Ensure that the seqcount update is visible * - * modify(latch->data[1], ...); - * } + * modify(latch->data[1], ...); + * } * - * The query will have a form like: + * The query will have a form like:: * - * struct entry *latch_query(struct latch_struct *latch, ...) - * { - * struct entry *entry; - * unsigned seq, idx; + * struct entry *latch_query(struct latch_struct *latch, ...) + * { + * struct entry *entry; + * unsigned seq, idx; * - * do { - * seq = raw_read_seqcount_latch(&latch->seq); + * do { + * seq = raw_read_seqcount_latch(&latch->seq); * - * idx = seq & 0x01; - * entry = data_query(latch->data[idx], ...); + * idx = seq & 0x01; + * entry = data_query(latch->data[idx], ...); * - * smp_rmb(); - * } while (seq != latch->seq); + * smp_rmb(); + * } while (seq != latch->seq); * - * return entry; - * } + * return entry; + * } * * So during the modification, queries are first redirected to data[1]. Then we * modify data[0]. When that is complete, we redirect queries back to data[0] * and we can modify data[1]. * - * NOTE: The non-requirement for atomic modifications does _NOT_ include - * the publishing of new entries in the case where data is a dynamic - * data structure. + * NOTE: + * + * The non-requirement for atomic modifications does _NOT_ include + * the publishing of new entries in the case where data is a dynamic + * data structure. + * + * An iteration might start in data[0] and get suspended long enough + * to miss an entire modification sequence, once it resumes it might + * observe the new entry. * - * An iteration might start in data[0] and get suspended long enough - * to miss an entire modification sequence, once it resumes it might - * observe the new entry. + * NOTE: * - * NOTE: When data is a dynamic data structure; one should use regular RCU - * patterns to manage the lifetimes of the objects within. + * When data is a dynamic data structure; one should use regular RCU + * patterns to manage the lifetimes of the objects within. */ static inline void raw_write_seqcount_latch(seqcount_t *s) { -- cgit v1.2.3 From d3b35b87f436c1b226a8061bee9c8875ba6658bd Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 20 Jul 2020 17:55:09 +0200 Subject: seqlock: seqcount_t latch: End read sections with read_seqcount_retry() The seqcount_t latch reader example at the raw_write_seqcount_latch() kernel-doc comment ends the latch read section with a manual smp memory barrier and sequence counter comparison. This is technically correct, but it is suboptimal: read_seqcount_retry() already contains the same logic of an smp memory barrier and sequence counter comparison. End the latch read critical section example with read_seqcount_retry(). Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200720155530.1173732-4-a.darwish@linutronix.de --- include/linux/seqlock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/seqlock.h') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 6c4f68ef1393..d724b5e5408d 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -363,8 +363,8 @@ static inline int raw_read_seqcount_latch(seqcount_t *s) * idx = seq & 0x01; * entry = data_query(latch->data[idx], ...); * - * smp_rmb(); - * } while (seq != latch->seq); + * // read_seqcount_retry() includes needed smp_rmb() + * } while (read_seqcount_retry(&latch->seq, seq)); * * return entry; * } -- cgit v1.2.3 From f4a27cbcec90ac04ee60e04b222e1449dcdba0bd Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 20 Jul 2020 17:55:10 +0200 Subject: seqlock: Reorder seqcount_t and seqlock_t API definitions The seqlock.h seqcount_t and seqlock_t API definitions are presented in the chronological order of their development rather than the order that makes most sense to readers. This makes it hard to follow and understand the header file code. Group and reorder all of the exported seqlock.h functions according to their function. First, group together the seqcount_t standard read path functions: - __read_seqcount_begin() - raw_read_seqcount_begin() - read_seqcount_begin() since each function is implemented exactly in terms of the one above it. Then, group the special-case seqcount_t readers on their own as: - raw_read_seqcount() - raw_seqcount_begin() since the only difference between the two functions is that the second one masks the sequence counter LSB while the first one does not. Note that raw_seqcount_begin() can actually be implemented in terms of raw_read_seqcount(), which will be done in a follow-up commit. Then, group the seqcount_t write path functions, instead of injecting unrelated seqcount_t latch functions between them, and order them as: - raw_write_seqcount_begin() - raw_write_seqcount_end() - write_seqcount_begin_nested() - write_seqcount_begin() - write_seqcount_end() - raw_write_seqcount_barrier() - write_seqcount_invalidate() which is the expected natural order. This also isolates the seqcount_t latch functions into their own area, at the end of the sequence counters section, and before jumping to the next one: sequential locks (seqlock_t). Do a similar grouping and reordering for seqlock_t "locking" readers vs. the "conditionally locking or lockless" ones. No implementation code was changed in any of the reordering above. Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200720155530.1173732-5-a.darwish@linutronix.de --- include/linux/seqlock.h | 158 ++++++++++++++++++++++++------------------------ 1 file changed, 78 insertions(+), 80 deletions(-) (limited to 'include/linux/seqlock.h') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index d724b5e5408d..4c1456008d89 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -128,23 +128,6 @@ repeat: return ret; } -/** - * raw_read_seqcount - Read the raw seqcount - * @s: pointer to seqcount_t - * Returns: count to be passed to read_seqcount_retry - * - * raw_read_seqcount opens a read critical section of the given - * seqcount without any lockdep checking and without checking or - * masking the LSB. Calling code is responsible for handling that. - */ -static inline unsigned raw_read_seqcount(const seqcount_t *s) -{ - unsigned ret = READ_ONCE(s->sequence); - smp_rmb(); - kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); - return ret; -} - /** * raw_read_seqcount_begin - start seq-read critical section w/o lockdep * @s: pointer to seqcount_t @@ -176,6 +159,23 @@ static inline unsigned read_seqcount_begin(const seqcount_t *s) return raw_read_seqcount_begin(s); } +/** + * raw_read_seqcount - Read the raw seqcount + * @s: pointer to seqcount_t + * Returns: count to be passed to read_seqcount_retry + * + * raw_read_seqcount opens a read critical section of the given + * seqcount without any lockdep checking and without checking or + * masking the LSB. Calling code is responsible for handling that. + */ +static inline unsigned raw_read_seqcount(const seqcount_t *s) +{ + unsigned ret = READ_ONCE(s->sequence); + smp_rmb(); + kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); + return ret; +} + /** * raw_seqcount_begin - begin a seq-read critical section * @s: pointer to seqcount_t @@ -234,8 +234,6 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) return __read_seqcount_retry(s, start); } - - static inline void raw_write_seqcount_begin(seqcount_t *s) { kcsan_nestable_atomic_begin(); @@ -250,6 +248,23 @@ static inline void raw_write_seqcount_end(seqcount_t *s) kcsan_nestable_atomic_end(); } +static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass) +{ + raw_write_seqcount_begin(s); + seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); +} + +static inline void write_seqcount_begin(seqcount_t *s) +{ + write_seqcount_begin_nested(s, 0); +} + +static inline void write_seqcount_end(seqcount_t *s) +{ + seqcount_release(&s->dep_map, _RET_IP_); + raw_write_seqcount_end(s); +} + /** * raw_write_seqcount_barrier - do a seq write barrier * @s: pointer to seqcount_t @@ -300,6 +315,21 @@ static inline void raw_write_seqcount_barrier(seqcount_t *s) kcsan_nestable_atomic_end(); } +/** + * write_seqcount_invalidate - invalidate in-progress read-side seq operations + * @s: pointer to seqcount_t + * + * After write_seqcount_invalidate, no read-side seq operations will complete + * successfully and see data older than this. + */ +static inline void write_seqcount_invalidate(seqcount_t *s) +{ + smp_wmb(); + kcsan_nestable_atomic_begin(); + s->sequence+=2; + kcsan_nestable_atomic_end(); +} + static inline int raw_read_seqcount_latch(seqcount_t *s) { /* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */ @@ -395,38 +425,6 @@ static inline void raw_write_seqcount_latch(seqcount_t *s) smp_wmb(); /* increment "sequence" before following stores */ } -static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass) -{ - raw_write_seqcount_begin(s); - seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); -} - -static inline void write_seqcount_begin(seqcount_t *s) -{ - write_seqcount_begin_nested(s, 0); -} - -static inline void write_seqcount_end(seqcount_t *s) -{ - seqcount_release(&s->dep_map, _RET_IP_); - raw_write_seqcount_end(s); -} - -/** - * write_seqcount_invalidate - invalidate in-progress read-side seq operations - * @s: pointer to seqcount_t - * - * After write_seqcount_invalidate, no read-side seq operations will complete - * successfully and see data older than this. - */ -static inline void write_seqcount_invalidate(seqcount_t *s) -{ - smp_wmb(); - kcsan_nestable_atomic_begin(); - s->sequence+=2; - kcsan_nestable_atomic_end(); -} - /* * Sequential locks (seqlock_t) * @@ -555,35 +553,6 @@ static inline void read_sequnlock_excl(seqlock_t *sl) spin_unlock(&sl->lock); } -/** - * read_seqbegin_or_lock - begin a sequence number check or locking block - * @lock: sequence lock - * @seq : sequence number to be checked - * - * First try it once optimistically without taking the lock. If that fails, - * take the lock. The sequence number is also used as a marker for deciding - * whether to be a reader (even) or writer (odd). - * N.B. seq must be initialized to an even number to begin with. - */ -static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) -{ - if (!(*seq & 1)) /* Even */ - *seq = read_seqbegin(lock); - else /* Odd */ - read_seqlock_excl(lock); -} - -static inline int need_seqretry(seqlock_t *lock, int seq) -{ - return !(seq & 1) && read_seqretry(lock, seq); -} - -static inline void done_seqretry(seqlock_t *lock, int seq) -{ - if (seq & 1) - read_sequnlock_excl(lock); -} - static inline void read_seqlock_excl_bh(seqlock_t *sl) { spin_lock_bh(&sl->lock); @@ -621,6 +590,35 @@ read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags) spin_unlock_irqrestore(&sl->lock, flags); } +/** + * read_seqbegin_or_lock - begin a sequence number check or locking block + * @lock: sequence lock + * @seq : sequence number to be checked + * + * First try it once optimistically without taking the lock. If that fails, + * take the lock. The sequence number is also used as a marker for deciding + * whether to be a reader (even) or writer (odd). + * N.B. seq must be initialized to an even number to begin with. + */ +static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) +{ + if (!(*seq & 1)) /* Even */ + *seq = read_seqbegin(lock); + else /* Odd */ + read_seqlock_excl(lock); +} + +static inline int need_seqretry(seqlock_t *lock, int seq) +{ + return !(seq & 1) && read_seqretry(lock, seq); +} + +static inline void done_seqretry(seqlock_t *lock, int seq) +{ + if (seq & 1) + read_sequnlock_excl(lock); +} + static inline unsigned long read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq) { -- cgit v1.2.3 From 89b88845e05752b3d684eaf147f457c8dfa99c5f Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 20 Jul 2020 17:55:11 +0200 Subject: seqlock: Add kernel-doc for seqcount_t and seqlock_t APIs seqlock.h is now included by kernel's RST documentation, but a small number of the the exported seqlock.h functions are kernel-doc annotated. Add kernel-doc for all seqlock.h exported APIs. Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200720155530.1173732-6-a.darwish@linutronix.de --- include/linux/seqlock.h | 425 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 348 insertions(+), 77 deletions(-) (limited to 'include/linux/seqlock.h') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 4c1456008d89..85fb3ac93ffb 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -75,6 +75,10 @@ static inline void __seqcount_init(seqcount_t *s, const char *name, # define SEQCOUNT_DEP_MAP_INIT(lockname) \ .dep_map = { .name = #lockname } \ +/** + * seqcount_init() - runtime initializer for seqcount_t + * @s: Pointer to the seqcount_t instance + */ # define seqcount_init(s) \ do { \ static struct lock_class_key __key; \ @@ -98,13 +102,15 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) # define seqcount_lockdep_reader_access(x) #endif -#define SEQCNT_ZERO(lockname) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(lockname)} - +/** + * SEQCNT_ZERO() - static initializer for seqcount_t + * @name: Name of the seqcount_t instance + */ +#define SEQCNT_ZERO(name) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(name) } /** - * __read_seqcount_begin - begin a seq-read critical section (without barrier) - * @s: pointer to seqcount_t - * Returns: count to be passed to read_seqcount_retry + * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier + * @s: Pointer to seqcount_t * * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb() * barrier. Callers should ensure that smp_rmb() or equivalent ordering is @@ -113,6 +119,8 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) * * Use carefully, only in critical code, and comment how the barrier is * provided. + * + * Return: count to be passed to read_seqcount_retry() */ static inline unsigned __read_seqcount_begin(const seqcount_t *s) { @@ -129,13 +137,10 @@ repeat: } /** - * raw_read_seqcount_begin - start seq-read critical section w/o lockdep - * @s: pointer to seqcount_t - * Returns: count to be passed to read_seqcount_retry + * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep + * @s: Pointer to seqcount_t * - * raw_read_seqcount_begin opens a read critical section of the given - * seqcount, but without any lockdep checking. Validity of the critical - * section is tested by checking read_seqcount_retry function. + * Return: count to be passed to read_seqcount_retry() */ static inline unsigned raw_read_seqcount_begin(const seqcount_t *s) { @@ -145,13 +150,10 @@ static inline unsigned raw_read_seqcount_begin(const seqcount_t *s) } /** - * read_seqcount_begin - begin a seq-read critical section - * @s: pointer to seqcount_t - * Returns: count to be passed to read_seqcount_retry + * read_seqcount_begin() - begin a seqcount_t read critical section + * @s: Pointer to seqcount_t * - * read_seqcount_begin opens a read critical section of the given seqcount. - * Validity of the critical section is tested by checking read_seqcount_retry - * function. + * Return: count to be passed to read_seqcount_retry() */ static inline unsigned read_seqcount_begin(const seqcount_t *s) { @@ -160,13 +162,15 @@ static inline unsigned read_seqcount_begin(const seqcount_t *s) } /** - * raw_read_seqcount - Read the raw seqcount - * @s: pointer to seqcount_t - * Returns: count to be passed to read_seqcount_retry + * raw_read_seqcount() - read the raw seqcount_t counter value + * @s: Pointer to seqcount_t * * raw_read_seqcount opens a read critical section of the given - * seqcount without any lockdep checking and without checking or - * masking the LSB. Calling code is responsible for handling that. + * seqcount_t, without any lockdep checking, and without checking or + * masking the sequence counter LSB. Calling code is responsible for + * handling that. + * + * Return: count to be passed to read_seqcount_retry() */ static inline unsigned raw_read_seqcount(const seqcount_t *s) { @@ -177,18 +181,21 @@ static inline unsigned raw_read_seqcount(const seqcount_t *s) } /** - * raw_seqcount_begin - begin a seq-read critical section - * @s: pointer to seqcount_t - * Returns: count to be passed to read_seqcount_retry + * raw_seqcount_begin() - begin a seqcount_t read critical section w/o + * lockdep and w/o counter stabilization + * @s: Pointer to seqcount_t * - * raw_seqcount_begin opens a read critical section of the given seqcount. - * Validity of the critical section is tested by checking read_seqcount_retry - * function. + * raw_seqcount_begin opens a read critical section of the given + * seqcount_t. Unlike read_seqcount_begin(), this function will not wait + * for the count to stabilize. If a writer is active when it begins, it + * will fail the read_seqcount_retry() at the end of the read critical + * section instead of stabilizing at the beginning of it. * - * Unlike read_seqcount_begin(), this function will not wait for the count - * to stabilize. If a writer is active when we begin, we will fail the - * read_seqcount_retry() instead of stabilizing at the beginning of the - * critical section. + * Use this only in special kernel hot paths where the read section is + * small and has a high probability of success through other external + * means. It will save a single branching instruction. + * + * Return: count to be passed to read_seqcount_retry() */ static inline unsigned raw_seqcount_begin(const seqcount_t *s) { @@ -199,10 +206,9 @@ static inline unsigned raw_seqcount_begin(const seqcount_t *s) } /** - * __read_seqcount_retry - end a seq-read critical section (without barrier) - * @s: pointer to seqcount_t - * @start: count, from read_seqcount_begin - * Returns: 1 if retry is required, else 0 + * __read_seqcount_retry() - end a seqcount_t read section w/o barrier + * @s: Pointer to seqcount_t + * @start: count, from read_seqcount_begin() * * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb() * barrier. Callers should ensure that smp_rmb() or equivalent ordering is @@ -211,6 +217,8 @@ static inline unsigned raw_seqcount_begin(const seqcount_t *s) * * Use carefully, only in critical code, and comment how the barrier is * provided. + * + * Return: true if a read section retry is required, else false */ static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start) { @@ -219,14 +227,15 @@ static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start) } /** - * read_seqcount_retry - end a seq-read critical section - * @s: pointer to seqcount_t - * @start: count, from read_seqcount_begin - * Returns: 1 if retry is required, else 0 + * read_seqcount_retry() - end a seqcount_t read critical section + * @s: Pointer to seqcount_t + * @start: count, from read_seqcount_begin() * - * read_seqcount_retry closes a read critical section of the given seqcount. - * If the critical section was invalid, it must be ignored (and typically - * retried). + * read_seqcount_retry closes the read critical section of given + * seqcount_t. If the critical section was invalid, it must be ignored + * (and typically retried). + * + * Return: true if a read section retry is required, else false */ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) { @@ -234,6 +243,10 @@ static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) return __read_seqcount_retry(s, start); } +/** + * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep + * @s: Pointer to seqcount_t + */ static inline void raw_write_seqcount_begin(seqcount_t *s) { kcsan_nestable_atomic_begin(); @@ -241,6 +254,10 @@ static inline void raw_write_seqcount_begin(seqcount_t *s) smp_wmb(); } +/** + * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep + * @s: Pointer to seqcount_t + */ static inline void raw_write_seqcount_end(seqcount_t *s) { smp_wmb(); @@ -248,17 +265,42 @@ static inline void raw_write_seqcount_end(seqcount_t *s) kcsan_nestable_atomic_end(); } +/** + * write_seqcount_begin_nested() - start a seqcount_t write section with + * custom lockdep nesting level + * @s: Pointer to seqcount_t + * @subclass: lockdep nesting level + * + * See Documentation/locking/lockdep-design.rst + */ static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass) { raw_write_seqcount_begin(s); seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); } +/** + * write_seqcount_begin() - start a seqcount_t write side critical section + * @s: Pointer to seqcount_t + * + * write_seqcount_begin opens a write side critical section of the given + * seqcount_t. + * + * Context: seqcount_t write side critical sections must be serialized and + * non-preemptible. If readers can be invoked from hardirq or softirq + * context, interrupts or bottom halves must be respectively disabled. + */ static inline void write_seqcount_begin(seqcount_t *s) { write_seqcount_begin_nested(s, 0); } +/** + * write_seqcount_end() - end a seqcount_t write side critical section + * @s: Pointer to seqcount_t + * + * The write section must've been opened with write_seqcount_begin(). + */ static inline void write_seqcount_end(seqcount_t *s) { seqcount_release(&s->dep_map, _RET_IP_); @@ -266,12 +308,12 @@ static inline void write_seqcount_end(seqcount_t *s) } /** - * raw_write_seqcount_barrier - do a seq write barrier - * @s: pointer to seqcount_t + * raw_write_seqcount_barrier() - do a seqcount_t write barrier + * @s: Pointer to seqcount_t * - * This can be used to provide an ordering guarantee instead of the - * usual consistency guarantee. It is one wmb cheaper, because we can - * collapse the two back-to-back wmb()s. + * This can be used to provide an ordering guarantee instead of the usual + * consistency guarantee. It is one wmb cheaper, because it can collapse + * the two back-to-back wmb()s. * * Note that writes surrounding the barrier should be declared atomic (e.g. * via WRITE_ONCE): a) to ensure the writes become visible to other threads @@ -316,11 +358,12 @@ static inline void raw_write_seqcount_barrier(seqcount_t *s) } /** - * write_seqcount_invalidate - invalidate in-progress read-side seq operations - * @s: pointer to seqcount_t + * write_seqcount_invalidate() - invalidate in-progress seqcount_t read + * side operations + * @s: Pointer to seqcount_t * - * After write_seqcount_invalidate, no read-side seq operations will complete - * successfully and see data older than this. + * After write_seqcount_invalidate, no seqcount_t read side operations + * will complete successfully and see data older than this. */ static inline void write_seqcount_invalidate(seqcount_t *s) { @@ -330,6 +373,21 @@ static inline void write_seqcount_invalidate(seqcount_t *s) kcsan_nestable_atomic_end(); } +/** + * raw_read_seqcount_latch() - pick even/odd seqcount_t latch data copy + * @s: Pointer to seqcount_t + * + * Use seqcount_t latching to switch between two storage places protected + * by a sequence counter. Doing so allows having interruptible, preemptible, + * seqcount_t write side critical sections. + * + * Check raw_write_seqcount_latch() for more details and a full reader and + * writer usage example. + * + * Return: sequence counter raw value. Use the lowest bit as an index for + * picking which data copy to read. The full counter value must then be + * checked with read_seqcount_retry(). + */ static inline int raw_read_seqcount_latch(seqcount_t *s) { /* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */ @@ -338,8 +396,8 @@ static inline int raw_read_seqcount_latch(seqcount_t *s) } /** - * raw_write_seqcount_latch - redirect readers to even/odd copy - * @s: pointer to seqcount_t + * raw_write_seqcount_latch() - redirect readers to even/odd copy + * @s: Pointer to seqcount_t * * The latch technique is a multiversion concurrency control method that allows * queries during non-atomic modifications. If you can guarantee queries never @@ -446,17 +504,28 @@ typedef struct { .lock = __SPIN_LOCK_UNLOCKED(lockname) \ } -#define seqlock_init(x) \ +/** + * seqlock_init() - dynamic initializer for seqlock_t + * @sl: Pointer to the seqlock_t instance + */ +#define seqlock_init(sl) \ do { \ - seqcount_init(&(x)->seqcount); \ - spin_lock_init(&(x)->lock); \ + seqcount_init(&(sl)->seqcount); \ + spin_lock_init(&(sl)->lock); \ } while (0) -#define DEFINE_SEQLOCK(x) \ - seqlock_t x = __SEQLOCK_UNLOCKED(x) +/** + * DEFINE_SEQLOCK() - Define a statically allocated seqlock_t + * @sl: Name of the seqlock_t instance + */ +#define DEFINE_SEQLOCK(sl) \ + seqlock_t sl = __SEQLOCK_UNLOCKED(sl) -/* - * Read side functions for starting and finalizing a read side section. +/** + * read_seqbegin() - start a seqlock_t read side critical section + * @sl: Pointer to seqlock_t + * + * Return: count, to be passed to read_seqretry() */ static inline unsigned read_seqbegin(const seqlock_t *sl) { @@ -467,6 +536,17 @@ static inline unsigned read_seqbegin(const seqlock_t *sl) return ret; } +/** + * read_seqretry() - end a seqlock_t read side section + * @sl: Pointer to seqlock_t + * @start: count, from read_seqbegin() + * + * read_seqretry closes the read side critical section of given seqlock_t. + * If the critical section was invalid, it must be ignored (and typically + * retried). + * + * Return: true if a read section retry is required, else false + */ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) { /* @@ -478,10 +558,18 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) return read_seqcount_retry(&sl->seqcount, start); } -/* - * Lock out other writers and update the count. - * Acts like a normal spin_lock/unlock. - * Don't need preempt_disable() because that is in the spin_lock already. +/** + * write_seqlock() - start a seqlock_t write side critical section + * @sl: Pointer to seqlock_t + * + * write_seqlock opens a write side critical section for the given + * seqlock_t. It also implicitly acquires the spinlock_t embedded inside + * that sequential lock. All seqlock_t write side sections are thus + * automatically serialized and non-preemptible. + * + * Context: if the seqlock_t read section, or other write side critical + * sections, can be invoked from hardirq or softirq contexts, use the + * _irqsave or _bh variants of this function instead. */ static inline void write_seqlock(seqlock_t *sl) { @@ -489,30 +577,66 @@ static inline void write_seqlock(seqlock_t *sl) write_seqcount_begin(&sl->seqcount); } +/** + * write_sequnlock() - end a seqlock_t write side critical section + * @sl: Pointer to seqlock_t + * + * write_sequnlock closes the (serialized and non-preemptible) write side + * critical section of given seqlock_t. + */ static inline void write_sequnlock(seqlock_t *sl) { write_seqcount_end(&sl->seqcount); spin_unlock(&sl->lock); } +/** + * write_seqlock_bh() - start a softirqs-disabled seqlock_t write section + * @sl: Pointer to seqlock_t + * + * _bh variant of write_seqlock(). Use only if the read side section, or + * other write side sections, can be invoked from softirq contexts. + */ static inline void write_seqlock_bh(seqlock_t *sl) { spin_lock_bh(&sl->lock); write_seqcount_begin(&sl->seqcount); } +/** + * write_sequnlock_bh() - end a softirqs-disabled seqlock_t write section + * @sl: Pointer to seqlock_t + * + * write_sequnlock_bh closes the serialized, non-preemptible, and + * softirqs-disabled, seqlock_t write side critical section opened with + * write_seqlock_bh(). + */ static inline void write_sequnlock_bh(seqlock_t *sl) { write_seqcount_end(&sl->seqcount); spin_unlock_bh(&sl->lock); } +/** + * write_seqlock_irq() - start a non-interruptible seqlock_t write section + * @sl: Pointer to seqlock_t + * + * _irq variant of write_seqlock(). Use only if the read side section, or + * other write sections, can be invoked from hardirq contexts. + */ static inline void write_seqlock_irq(seqlock_t *sl) { spin_lock_irq(&sl->lock); write_seqcount_begin(&sl->seqcount); } +/** + * write_sequnlock_irq() - end a non-interruptible seqlock_t write section + * @sl: Pointer to seqlock_t + * + * write_sequnlock_irq closes the serialized and non-interruptible + * seqlock_t write side section opened with write_seqlock_irq(). + */ static inline void write_sequnlock_irq(seqlock_t *sl) { write_seqcount_end(&sl->seqcount); @@ -528,9 +652,28 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) return flags; } +/** + * write_seqlock_irqsave() - start a non-interruptible seqlock_t write + * section + * @lock: Pointer to seqlock_t + * @flags: Stack-allocated storage for saving caller's local interrupt + * state, to be passed to write_sequnlock_irqrestore(). + * + * _irqsave variant of write_seqlock(). Use it only if the read side + * section, or other write sections, can be invoked from hardirq context. + */ #define write_seqlock_irqsave(lock, flags) \ do { flags = __write_seqlock_irqsave(lock); } while (0) +/** + * write_sequnlock_irqrestore() - end non-interruptible seqlock_t write + * section + * @sl: Pointer to seqlock_t + * @flags: Caller's saved interrupt state, from write_seqlock_irqsave() + * + * write_sequnlock_irqrestore closes the serialized and non-interruptible + * seqlock_t write section previously opened with write_seqlock_irqsave(). + */ static inline void write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) { @@ -538,36 +681,79 @@ write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) spin_unlock_irqrestore(&sl->lock, flags); } -/* - * A locking reader exclusively locks out other writers and locking readers, - * but doesn't update the sequence number. Acts like a normal spin_lock/unlock. - * Don't need preempt_disable() because that is in the spin_lock already. +/** + * read_seqlock_excl() - begin a seqlock_t locking reader section + * @sl: Pointer to seqlock_t + * + * read_seqlock_excl opens a seqlock_t locking reader critical section. A + * locking reader exclusively locks out *both* other writers *and* other + * locking readers, but it does not update the embedded sequence number. + * + * Locking readers act like a normal spin_lock()/spin_unlock(). + * + * Context: if the seqlock_t write section, *or other read sections*, can + * be invoked from hardirq or softirq contexts, use the _irqsave or _bh + * variant of this function instead. + * + * The opened read section must be closed with read_sequnlock_excl(). */ static inline void read_seqlock_excl(seqlock_t *sl) { spin_lock(&sl->lock); } +/** + * read_sequnlock_excl() - end a seqlock_t locking reader critical section + * @sl: Pointer to seqlock_t + */ static inline void read_sequnlock_excl(seqlock_t *sl) { spin_unlock(&sl->lock); } +/** + * read_seqlock_excl_bh() - start a seqlock_t locking reader section with + * softirqs disabled + * @sl: Pointer to seqlock_t + * + * _bh variant of read_seqlock_excl(). Use this variant only if the + * seqlock_t write side section, *or other read sections*, can be invoked + * from softirq contexts. + */ static inline void read_seqlock_excl_bh(seqlock_t *sl) { spin_lock_bh(&sl->lock); } +/** + * read_sequnlock_excl_bh() - stop a seqlock_t softirq-disabled locking + * reader section + * @sl: Pointer to seqlock_t + */ static inline void read_sequnlock_excl_bh(seqlock_t *sl) { spin_unlock_bh(&sl->lock); } +/** + * read_seqlock_excl_irq() - start a non-interruptible seqlock_t locking + * reader section + * @sl: Pointer to seqlock_t + * + * _irq variant of read_seqlock_excl(). Use this only if the seqlock_t + * write side section, *or other read sections*, can be invoked from a + * hardirq context. + */ static inline void read_seqlock_excl_irq(seqlock_t *sl) { spin_lock_irq(&sl->lock); } +/** + * read_sequnlock_excl_irq() - end an interrupts-disabled seqlock_t + * locking reader section + * @sl: Pointer to seqlock_t + */ static inline void read_sequnlock_excl_irq(seqlock_t *sl) { spin_unlock_irq(&sl->lock); @@ -581,9 +767,26 @@ static inline unsigned long __read_seqlock_excl_irqsave(seqlock_t *sl) return flags; } +/** + * read_seqlock_excl_irqsave() - start a non-interruptible seqlock_t + * locking reader section + * @lock: Pointer to seqlock_t + * @flags: Stack-allocated storage for saving caller's local interrupt + * state, to be passed to read_sequnlock_excl_irqrestore(). + * + * _irqsave variant of read_seqlock_excl(). Use this only if the seqlock_t + * write side section, *or other read sections*, can be invoked from a + * hardirq context. + */ #define read_seqlock_excl_irqsave(lock, flags) \ do { flags = __read_seqlock_excl_irqsave(lock); } while (0) +/** + * read_sequnlock_excl_irqrestore() - end non-interruptible seqlock_t + * locking reader section + * @sl: Pointer to seqlock_t + * @flags: Caller saved interrupt state, from read_seqlock_excl_irqsave() + */ static inline void read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags) { @@ -591,14 +794,35 @@ read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags) } /** - * read_seqbegin_or_lock - begin a sequence number check or locking block - * @lock: sequence lock - * @seq : sequence number to be checked - * - * First try it once optimistically without taking the lock. If that fails, - * take the lock. The sequence number is also used as a marker for deciding - * whether to be a reader (even) or writer (odd). - * N.B. seq must be initialized to an even number to begin with. + * read_seqbegin_or_lock() - begin a seqlock_t lockless or locking reader + * @lock: Pointer to seqlock_t + * @seq : Marker and return parameter. If the passed value is even, the + * reader will become a *lockless* seqlock_t reader as in read_seqbegin(). + * If the passed value is odd, the reader will become a *locking* reader + * as in read_seqlock_excl(). In the first call to this function, the + * caller *must* initialize and pass an even value to @seq; this way, a + * lockless read can be optimistically tried first. + * + * read_seqbegin_or_lock is an API designed to optimistically try a normal + * lockless seqlock_t read section first. If an odd counter is found, the + * lockless read trial has failed, and the next read iteration transforms + * itself into a full seqlock_t locking reader. + * + * This is typically used to avoid seqlock_t lockless readers starvation + * (too much retry loops) in the case of a sharp spike in write side + * activity. + * + * Context: if the seqlock_t write section, *or other read sections*, can + * be invoked from hardirq or softirq contexts, use the _irqsave or _bh + * variant of this function instead. + * + * Check Documentation/locking/seqlock.rst for template example code. + * + * Return: the encountered sequence counter value, through the @seq + * parameter, which is overloaded as a return parameter. This returned + * value must be checked with need_seqretry(). If the read section need to + * be retried, this returned value must also be passed as the @seq + * parameter of the next read_seqbegin_or_lock() iteration. */ static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) { @@ -608,17 +832,52 @@ static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) read_seqlock_excl(lock); } +/** + * need_seqretry() - validate seqlock_t "locking or lockless" read section + * @lock: Pointer to seqlock_t + * @seq: sequence count, from read_seqbegin_or_lock() + * + * Return: true if a read section retry is required, false otherwise + */ static inline int need_seqretry(seqlock_t *lock, int seq) { return !(seq & 1) && read_seqretry(lock, seq); } +/** + * done_seqretry() - end seqlock_t "locking or lockless" reader section + * @lock: Pointer to seqlock_t + * @seq: count, from read_seqbegin_or_lock() + * + * done_seqretry finishes the seqlock_t read side critical section started + * with read_seqbegin_or_lock() and validated by need_seqretry(). + */ static inline void done_seqretry(seqlock_t *lock, int seq) { if (seq & 1) read_sequnlock_excl(lock); } +/** + * read_seqbegin_or_lock_irqsave() - begin a seqlock_t lockless reader, or + * a non-interruptible locking reader + * @lock: Pointer to seqlock_t + * @seq: Marker and return parameter. Check read_seqbegin_or_lock(). + * + * This is the _irqsave variant of read_seqbegin_or_lock(). Use it only if + * the seqlock_t write section, *or other read sections*, can be invoked + * from hardirq context. + * + * Note: Interrupts will be disabled only for "locking reader" mode. + * + * Return: + * + * 1. The saved local interrupts state in case of a locking reader, to + * be passed to done_seqretry_irqrestore(). + * + * 2. The encountered sequence counter value, returned through @seq + * overloaded as a return parameter. Check read_seqbegin_or_lock(). + */ static inline unsigned long read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq) { @@ -632,6 +891,18 @@ read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq) return flags; } +/** + * done_seqretry_irqrestore() - end a seqlock_t lockless reader, or a + * non-interruptible locking reader section + * @lock: Pointer to seqlock_t + * @seq: Count, from read_seqbegin_or_lock_irqsave() + * @flags: Caller's saved local interrupt state in case of a locking + * reader, also from read_seqbegin_or_lock_irqsave() + * + * This is the _irqrestore variant of done_seqretry(). The read section + * must've been opened with read_seqbegin_or_lock_irqsave(), and validated + * by need_seqretry(). + */ static inline void done_seqretry_irqrestore(seqlock_t *lock, int seq, unsigned long flags) { -- cgit v1.2.3 From 932e46365226324d2cf26d8bdec8b51ceb296948 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 20 Jul 2020 17:55:12 +0200 Subject: seqlock: Implement raw_seqcount_begin() in terms of raw_read_seqcount() raw_seqcount_begin() has the same code as raw_read_seqcount(), with the exception of masking the sequence counter's LSB before returning it to the caller. Note, raw_seqcount_begin() masks the counter's LSB before returning it to the caller so that read_seqcount_retry() can fail if the counter is odd -- without the overhead of an extra branching instruction. Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200720155530.1173732-7-a.darwish@linutronix.de --- include/linux/seqlock.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux/seqlock.h') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 85fb3ac93ffb..e885702d8b82 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -199,10 +199,11 @@ static inline unsigned raw_read_seqcount(const seqcount_t *s) */ static inline unsigned raw_seqcount_begin(const seqcount_t *s) { - unsigned ret = READ_ONCE(s->sequence); - smp_rmb(); - kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); - return ret & ~1; + /* + * If the counter is odd, let read_seqcount_retry() fail + * by decrementing the counter. + */ + return raw_read_seqcount(s) & ~1; } /** -- cgit v1.2.3 From 859247d39fb008ea812e8f0c398a58a20c12899e Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 20 Jul 2020 17:55:14 +0200 Subject: seqlock: lockdep assert non-preemptibility on seqcount_t write Preemption must be disabled before entering a sequence count write side critical section. Failing to do so, the seqcount read side can preempt the write side section and spin for the entire scheduler tick. If that reader belongs to a real-time scheduling class, it can spin forever and the kernel will livelock. Assert through lockdep that preemption is disabled for seqcount writers. Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200720155530.1173732-9-a.darwish@linutronix.de --- include/linux/seqlock.h | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) (limited to 'include/linux/seqlock.h') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index e885702d8b82..54bc20496392 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -266,6 +266,12 @@ static inline void raw_write_seqcount_end(seqcount_t *s) kcsan_nestable_atomic_end(); } +static inline void __write_seqcount_begin_nested(seqcount_t *s, int subclass) +{ + raw_write_seqcount_begin(s); + seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); +} + /** * write_seqcount_begin_nested() - start a seqcount_t write section with * custom lockdep nesting level @@ -276,8 +282,19 @@ static inline void raw_write_seqcount_end(seqcount_t *s) */ static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass) { - raw_write_seqcount_begin(s); - seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); + lockdep_assert_preemption_disabled(); + __write_seqcount_begin_nested(s, subclass); +} + +/* + * A write_seqcount_begin() variant w/o lockdep non-preemptibility checks. + * + * Use for internal seqlock.h code where it's known that preemption is + * already disabled. For example, seqlock_t write side functions. + */ +static inline void __write_seqcount_begin(seqcount_t *s) +{ + __write_seqcount_begin_nested(s, 0); } /** @@ -575,7 +592,7 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) static inline void write_seqlock(seqlock_t *sl) { spin_lock(&sl->lock); - write_seqcount_begin(&sl->seqcount); + __write_seqcount_begin(&sl->seqcount); } /** @@ -601,7 +618,7 @@ static inline void write_sequnlock(seqlock_t *sl) static inline void write_seqlock_bh(seqlock_t *sl) { spin_lock_bh(&sl->lock); - write_seqcount_begin(&sl->seqcount); + __write_seqcount_begin(&sl->seqcount); } /** @@ -628,7 +645,7 @@ static inline void write_sequnlock_bh(seqlock_t *sl) static inline void write_seqlock_irq(seqlock_t *sl) { spin_lock_irq(&sl->lock); - write_seqcount_begin(&sl->seqcount); + __write_seqcount_begin(&sl->seqcount); } /** @@ -649,7 +666,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) unsigned long flags; spin_lock_irqsave(&sl->lock, flags); - write_seqcount_begin(&sl->seqcount); + __write_seqcount_begin(&sl->seqcount); return flags; } -- cgit v1.2.3 From 55f3560df975f557c48aa6afc636808f31ecb87a Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 20 Jul 2020 17:55:15 +0200 Subject: seqlock: Extend seqcount API with associated locks A sequence counter write side critical section must be protected by some form of locking to serialize writers. If the serialization primitive is not disabling preemption implicitly, preemption has to be explicitly disabled before entering the write side critical section. There is no built-in debugging mechanism to verify that the lock used for writer serialization is held and preemption is disabled. Some usage sites like dma-buf have explicit lockdep checks for the writer-side lock, but this covers only a small portion of the sequence counter usage in the kernel. Add new sequence counter types which allows to associate a lock to the sequence counter at initialization time. The seqcount API functions are extended to provide appropriate lockdep assertions depending on the seqcount/lock type. For sequence counters with associated locks that do not implicitly disable preemption, preemption protection is enforced in the sequence counter write side functions. This removes the need to explicitly add preempt_disable/enable() around the write side critical sections: the write_begin/end() functions for these new sequence counter types automatically do this. Introduce the following seqcount types with associated locks: seqcount_spinlock_t seqcount_raw_spinlock_t seqcount_rwlock_t seqcount_mutex_t seqcount_ww_mutex_t Extend the seqcount read and write functions to branch out to the specific seqcount_LOCKTYPE_t implementation at compile-time. This avoids kernel API explosion per each new seqcount_LOCKTYPE_t added. Add such compile-time type detection logic into a new, internal, seqlock header. Document the proper seqcount_LOCKTYPE_t usage, and rationale, at Documentation/locking/seqlock.rst. If lockdep is disabled, this lock association is compiled out and has neither storage size nor runtime overhead. Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200720155530.1173732-10-a.darwish@linutronix.de --- Documentation/locking/seqlock.rst | 52 +++++ include/linux/seqlock.h | 464 ++++++++++++++++++++++++++++++++------ 2 files changed, 447 insertions(+), 69 deletions(-) (limited to 'include/linux/seqlock.h') diff --git a/Documentation/locking/seqlock.rst b/Documentation/locking/seqlock.rst index 366dd368d90a..62c5ad98c11c 100644 --- a/Documentation/locking/seqlock.rst +++ b/Documentation/locking/seqlock.rst @@ -87,6 +87,58 @@ Read path:: } while (read_seqcount_retry(&foo_seqcount, seq)); +.. _seqcount_locktype_t: + +Sequence counters with associated locks (``seqcount_LOCKTYPE_t``) +----------------------------------------------------------------- + +As discussed at :ref:`seqcount_t`, sequence count write side critical +sections must be serialized and non-preemptible. This variant of +sequence counters associate the lock used for writer serialization at +initialization time, which enables lockdep to validate that the write +side critical sections are properly serialized. + +This lock association is a NOOP if lockdep is disabled and has neither +storage nor runtime overhead. If lockdep is enabled, the lock pointer is +stored in struct seqcount and lockdep's "lock is held" assertions are +injected at the beginning of the write side critical section to validate +that it is properly protected. + +For lock types which do not implicitly disable preemption, preemption +protection is enforced in the write side function. + +The following sequence counters with associated locks are defined: + + - ``seqcount_spinlock_t`` + - ``seqcount_raw_spinlock_t`` + - ``seqcount_rwlock_t`` + - ``seqcount_mutex_t`` + - ``seqcount_ww_mutex_t`` + +The plain seqcount read and write APIs branch out to the specific +seqcount_LOCKTYPE_t implementation at compile-time. This avoids kernel +API explosion per each new seqcount LOCKTYPE. + +Initialization (replace "LOCKTYPE" with one of the supported locks):: + + /* dynamic */ + seqcount_LOCKTYPE_t foo_seqcount; + seqcount_LOCKTYPE_init(&foo_seqcount, &lock); + + /* static */ + static seqcount_LOCKTYPE_t foo_seqcount = + SEQCNT_LOCKTYPE_ZERO(foo_seqcount, &lock); + + /* C99 struct init */ + struct { + .seq = SEQCNT_LOCKTYPE_ZERO(foo.seq, &lock), + } foo; + +Write path: same as in :ref:`seqcount_t`, while running from a context +with the associated LOCKTYPE lock acquired. + +Read path: same as in :ref:`seqcount_t`. + .. _seqlock_t: Sequential locks (``seqlock_t``) diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 54bc20496392..8c16a494c968 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -10,13 +10,17 @@ * * Copyrights: * - Based on x86_64 vsyscall gettimeofday: Keith Owens, Andrea Arcangeli + * - Sequence counters with associated locks, (C) 2020 Linutronix GmbH */ -#include -#include -#include #include #include +#include +#include +#include +#include +#include + #include /* @@ -48,6 +52,10 @@ * This mechanism can't be used if the protected data contains pointers, * as the writer can invalidate a pointer that a reader is following. * + * If the write serialization mechanism is one of the common kernel + * locking primitives, use a sequence counter with associated lock + * (seqcount_LOCKTYPE_t) instead. + * * If it's desired to automatically handle the sequence counter writer * serialization and non-preemptibility requirements, use a sequential * lock (seqlock_t) instead. @@ -108,9 +116,267 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) */ #define SEQCNT_ZERO(name) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(name) } +/* + * Sequence counters with associated locks (seqcount_LOCKTYPE_t) + * + * A sequence counter which associates the lock used for writer + * serialization at initialization time. This enables lockdep to validate + * that the write side critical section is properly serialized. + * + * For associated locks which do not implicitly disable preemption, + * preemption protection is enforced in the write side function. + * + * Lockdep is never used in any for the raw write variants. + * + * See Documentation/locking/seqlock.rst + */ + +#ifdef CONFIG_LOCKDEP +#define __SEQ_LOCKDEP(expr) expr +#else +#define __SEQ_LOCKDEP(expr) +#endif + +#define SEQCOUNT_LOCKTYPE_ZERO(seq_name, assoc_lock) { \ + .seqcount = SEQCNT_ZERO(seq_name.seqcount), \ + __SEQ_LOCKDEP(.lock = (assoc_lock)) \ +} + +#define seqcount_locktype_init(s, assoc_lock) \ +do { \ + seqcount_init(&(s)->seqcount); \ + __SEQ_LOCKDEP((s)->lock = (assoc_lock)); \ +} while (0) + +/** + * typedef seqcount_spinlock_t - sequence counter with spinlock associated + * @seqcount: The real sequence counter + * @lock: Pointer to the associated spinlock + * + * A plain sequence counter with external writer synchronization by a + * spinlock. The spinlock is associated to the sequence count in the + * static initializer or init function. This enables lockdep to validate + * that the write side critical section is properly serialized. + */ +typedef struct seqcount_spinlock { + seqcount_t seqcount; + __SEQ_LOCKDEP(spinlock_t *lock); +} seqcount_spinlock_t; + +/** + * SEQCNT_SPINLOCK_ZERO - static initializer for seqcount_spinlock_t + * @name: Name of the seqcount_spinlock_t instance + * @lock: Pointer to the associated spinlock + */ +#define SEQCNT_SPINLOCK_ZERO(name, lock) \ + SEQCOUNT_LOCKTYPE_ZERO(name, lock) + +/** + * seqcount_spinlock_init - runtime initializer for seqcount_spinlock_t + * @s: Pointer to the seqcount_spinlock_t instance + * @lock: Pointer to the associated spinlock + */ +#define seqcount_spinlock_init(s, lock) \ + seqcount_locktype_init(s, lock) + +/** + * typedef seqcount_raw_spinlock_t - sequence count with raw spinlock associated + * @seqcount: The real sequence counter + * @lock: Pointer to the associated raw spinlock + * + * A plain sequence counter with external writer synchronization by a + * raw spinlock. The raw spinlock is associated to the sequence count in + * the static initializer or init function. This enables lockdep to + * validate that the write side critical section is properly serialized. + */ +typedef struct seqcount_raw_spinlock { + seqcount_t seqcount; + __SEQ_LOCKDEP(raw_spinlock_t *lock); +} seqcount_raw_spinlock_t; + +/** + * SEQCNT_RAW_SPINLOCK_ZERO - static initializer for seqcount_raw_spinlock_t + * @name: Name of the seqcount_raw_spinlock_t instance + * @lock: Pointer to the associated raw_spinlock + */ +#define SEQCNT_RAW_SPINLOCK_ZERO(name, lock) \ + SEQCOUNT_LOCKTYPE_ZERO(name, lock) + +/** + * seqcount_raw_spinlock_init - runtime initializer for seqcount_raw_spinlock_t + * @s: Pointer to the seqcount_raw_spinlock_t instance + * @lock: Pointer to the associated raw_spinlock + */ +#define seqcount_raw_spinlock_init(s, lock) \ + seqcount_locktype_init(s, lock) + +/** + * typedef seqcount_rwlock_t - sequence count with rwlock associated + * @seqcount: The real sequence counter + * @lock: Pointer to the associated rwlock + * + * A plain sequence counter with external writer synchronization by a + * rwlock. The rwlock is associated to the sequence count in the static + * initializer or init function. This enables lockdep to validate that + * the write side critical section is properly serialized. + */ +typedef struct seqcount_rwlock { + seqcount_t seqcount; + __SEQ_LOCKDEP(rwlock_t *lock); +} seqcount_rwlock_t; + +/** + * SEQCNT_RWLOCK_ZERO - static initializer for seqcount_rwlock_t + * @name: Name of the seqcount_rwlock_t instance + * @lock: Pointer to the associated rwlock + */ +#define SEQCNT_RWLOCK_ZERO(name, lock) \ + SEQCOUNT_LOCKTYPE_ZERO(name, lock) + +/** + * seqcount_rwlock_init - runtime initializer for seqcount_rwlock_t + * @s: Pointer to the seqcount_rwlock_t instance + * @lock: Pointer to the associated rwlock + */ +#define seqcount_rwlock_init(s, lock) \ + seqcount_locktype_init(s, lock) + +/** + * typedef seqcount_mutex_t - sequence count with mutex associated + * @seqcount: The real sequence counter + * @lock: Pointer to the associated mutex + * + * A plain sequence counter with external writer synchronization by a + * mutex. The mutex is associated to the sequence counter in the static + * initializer or init function. This enables lockdep to validate that + * the write side critical section is properly serialized. + * + * The write side API functions write_seqcount_begin()/end() automatically + * disable and enable preemption when used with seqcount_mutex_t. + */ +typedef struct seqcount_mutex { + seqcount_t seqcount; + __SEQ_LOCKDEP(struct mutex *lock); +} seqcount_mutex_t; + +/** + * SEQCNT_MUTEX_ZERO - static initializer for seqcount_mutex_t + * @name: Name of the seqcount_mutex_t instance + * @lock: Pointer to the associated mutex + */ +#define SEQCNT_MUTEX_ZERO(name, lock) \ + SEQCOUNT_LOCKTYPE_ZERO(name, lock) + +/** + * seqcount_mutex_init - runtime initializer for seqcount_mutex_t + * @s: Pointer to the seqcount_mutex_t instance + * @lock: Pointer to the associated mutex + */ +#define seqcount_mutex_init(s, lock) \ + seqcount_locktype_init(s, lock) + +/** + * typedef seqcount_ww_mutex_t - sequence count with ww_mutex associated + * @seqcount: The real sequence counter + * @lock: Pointer to the associated ww_mutex + * + * A plain sequence counter with external writer synchronization by a + * ww_mutex. The ww_mutex is associated to the sequence counter in the static + * initializer or init function. This enables lockdep to validate that + * the write side critical section is properly serialized. + * + * The write side API functions write_seqcount_begin()/end() automatically + * disable and enable preemption when used with seqcount_ww_mutex_t. + */ +typedef struct seqcount_ww_mutex { + seqcount_t seqcount; + __SEQ_LOCKDEP(struct ww_mutex *lock); +} seqcount_ww_mutex_t; + +/** + * SEQCNT_WW_MUTEX_ZERO - static initializer for seqcount_ww_mutex_t + * @name: Name of the seqcount_ww_mutex_t instance + * @lock: Pointer to the associated ww_mutex + */ +#define SEQCNT_WW_MUTEX_ZERO(name, lock) \ + SEQCOUNT_LOCKTYPE_ZERO(name, lock) + +/** + * seqcount_ww_mutex_init - runtime initializer for seqcount_ww_mutex_t + * @s: Pointer to the seqcount_ww_mutex_t instance + * @lock: Pointer to the associated ww_mutex + */ +#define seqcount_ww_mutex_init(s, lock) \ + seqcount_locktype_init(s, lock) + +/* + * @preempt: Is the associated write serialization lock preemtpible? + */ +#define SEQCOUNT_LOCKTYPE(locktype, preempt, lockmember) \ +static inline seqcount_t * \ +__seqcount_##locktype##_ptr(seqcount_##locktype##_t *s) \ +{ \ + return &s->seqcount; \ +} \ + \ +static inline bool \ +__seqcount_##locktype##_preemptible(seqcount_##locktype##_t *s) \ +{ \ + return preempt; \ +} \ + \ +static inline void \ +__seqcount_##locktype##_assert(seqcount_##locktype##_t *s) \ +{ \ + __SEQ_LOCKDEP(lockdep_assert_held(lockmember)); \ +} + +/* + * Similar hooks, but for plain seqcount_t + */ + +static inline seqcount_t *__seqcount_ptr(seqcount_t *s) +{ + return s; +} + +static inline bool __seqcount_preemptible(seqcount_t *s) +{ + return false; +} + +static inline void __seqcount_assert(seqcount_t *s) +{ + lockdep_assert_preemption_disabled(); +} + +/* + * @s: Pointer to seqcount_locktype_t, generated hooks first parameter. + */ +SEQCOUNT_LOCKTYPE(raw_spinlock, false, s->lock) +SEQCOUNT_LOCKTYPE(spinlock, false, s->lock) +SEQCOUNT_LOCKTYPE(rwlock, false, s->lock) +SEQCOUNT_LOCKTYPE(mutex, true, s->lock) +SEQCOUNT_LOCKTYPE(ww_mutex, true, &s->lock->base) + +#define __seqprop_case(s, locktype, prop) \ + seqcount_##locktype##_t: __seqcount_##locktype##_##prop((void *)(s)) + +#define __seqprop(s, prop) _Generic(*(s), \ + seqcount_t: __seqcount_##prop((void *)(s)), \ + __seqprop_case((s), raw_spinlock, prop), \ + __seqprop_case((s), spinlock, prop), \ + __seqprop_case((s), rwlock, prop), \ + __seqprop_case((s), mutex, prop), \ + __seqprop_case((s), ww_mutex, prop)) + +#define __to_seqcount_t(s) __seqprop(s, ptr) +#define __associated_lock_exists_and_is_preemptible(s) __seqprop(s, preemptible) +#define __assert_write_section_is_protected(s) __seqprop(s, assert) + /** * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants * * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb() * barrier. Callers should ensure that smp_rmb() or equivalent ordering is @@ -122,7 +388,10 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) * * Return: count to be passed to read_seqcount_retry() */ -static inline unsigned __read_seqcount_begin(const seqcount_t *s) +#define __read_seqcount_begin(s) \ + __read_seqcount_t_begin(__to_seqcount_t(s)) + +static inline unsigned __read_seqcount_t_begin(const seqcount_t *s) { unsigned ret; @@ -138,32 +407,38 @@ repeat: /** * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants * * Return: count to be passed to read_seqcount_retry() */ -static inline unsigned raw_read_seqcount_begin(const seqcount_t *s) +#define raw_read_seqcount_begin(s) \ + raw_read_seqcount_t_begin(__to_seqcount_t(s)) + +static inline unsigned raw_read_seqcount_t_begin(const seqcount_t *s) { - unsigned ret = __read_seqcount_begin(s); + unsigned ret = __read_seqcount_t_begin(s); smp_rmb(); return ret; } /** * read_seqcount_begin() - begin a seqcount_t read critical section - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants * * Return: count to be passed to read_seqcount_retry() */ -static inline unsigned read_seqcount_begin(const seqcount_t *s) +#define read_seqcount_begin(s) \ + read_seqcount_t_begin(__to_seqcount_t(s)) + +static inline unsigned read_seqcount_t_begin(const seqcount_t *s) { seqcount_lockdep_reader_access(s); - return raw_read_seqcount_begin(s); + return raw_read_seqcount_t_begin(s); } /** * raw_read_seqcount() - read the raw seqcount_t counter value - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants * * raw_read_seqcount opens a read critical section of the given * seqcount_t, without any lockdep checking, and without checking or @@ -172,7 +447,10 @@ static inline unsigned read_seqcount_begin(const seqcount_t *s) * * Return: count to be passed to read_seqcount_retry() */ -static inline unsigned raw_read_seqcount(const seqcount_t *s) +#define raw_read_seqcount(s) \ + raw_read_seqcount_t(__to_seqcount_t(s)) + +static inline unsigned raw_read_seqcount_t(const seqcount_t *s) { unsigned ret = READ_ONCE(s->sequence); smp_rmb(); @@ -183,7 +461,7 @@ static inline unsigned raw_read_seqcount(const seqcount_t *s) /** * raw_seqcount_begin() - begin a seqcount_t read critical section w/o * lockdep and w/o counter stabilization - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants * * raw_seqcount_begin opens a read critical section of the given * seqcount_t. Unlike read_seqcount_begin(), this function will not wait @@ -197,18 +475,21 @@ static inline unsigned raw_read_seqcount(const seqcount_t *s) * * Return: count to be passed to read_seqcount_retry() */ -static inline unsigned raw_seqcount_begin(const seqcount_t *s) +#define raw_seqcount_begin(s) \ + raw_seqcount_t_begin(__to_seqcount_t(s)) + +static inline unsigned raw_seqcount_t_begin(const seqcount_t *s) { /* * If the counter is odd, let read_seqcount_retry() fail * by decrementing the counter. */ - return raw_read_seqcount(s) & ~1; + return raw_read_seqcount_t(s) & ~1; } /** * __read_seqcount_retry() - end a seqcount_t read section w/o barrier - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants * @start: count, from read_seqcount_begin() * * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb() @@ -221,7 +502,10 @@ static inline unsigned raw_seqcount_begin(const seqcount_t *s) * * Return: true if a read section retry is required, else false */ -static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start) +#define __read_seqcount_retry(s, start) \ + __read_seqcount_t_retry(__to_seqcount_t(s), start) + +static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start) { kcsan_atomic_next(0); return unlikely(READ_ONCE(s->sequence) != start); @@ -229,7 +513,7 @@ static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start) /** * read_seqcount_retry() - end a seqcount_t read critical section - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants * @start: count, from read_seqcount_begin() * * read_seqcount_retry closes the read critical section of given @@ -238,17 +522,28 @@ static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start) * * Return: true if a read section retry is required, else false */ -static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) +#define read_seqcount_retry(s, start) \ + read_seqcount_t_retry(__to_seqcount_t(s), start) + +static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start) { smp_rmb(); - return __read_seqcount_retry(s, start); + return __read_seqcount_t_retry(s, start); } /** * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants */ -static inline void raw_write_seqcount_begin(seqcount_t *s) +#define raw_write_seqcount_begin(s) \ +do { \ + if (__associated_lock_exists_and_is_preemptible(s)) \ + preempt_disable(); \ + \ + raw_write_seqcount_t_begin(__to_seqcount_t(s)); \ +} while (0) + +static inline void raw_write_seqcount_t_begin(seqcount_t *s) { kcsan_nestable_atomic_begin(); s->sequence++; @@ -257,49 +552,50 @@ static inline void raw_write_seqcount_begin(seqcount_t *s) /** * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants */ -static inline void raw_write_seqcount_end(seqcount_t *s) +#define raw_write_seqcount_end(s) \ +do { \ + raw_write_seqcount_t_end(__to_seqcount_t(s)); \ + \ + if (__associated_lock_exists_and_is_preemptible(s)) \ + preempt_enable(); \ +} while (0) + +static inline void raw_write_seqcount_t_end(seqcount_t *s) { smp_wmb(); s->sequence++; kcsan_nestable_atomic_end(); } -static inline void __write_seqcount_begin_nested(seqcount_t *s, int subclass) -{ - raw_write_seqcount_begin(s); - seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); -} - /** * write_seqcount_begin_nested() - start a seqcount_t write section with * custom lockdep nesting level - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants * @subclass: lockdep nesting level * * See Documentation/locking/lockdep-design.rst */ -static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass) -{ - lockdep_assert_preemption_disabled(); - __write_seqcount_begin_nested(s, subclass); -} - -/* - * A write_seqcount_begin() variant w/o lockdep non-preemptibility checks. - * - * Use for internal seqlock.h code where it's known that preemption is - * already disabled. For example, seqlock_t write side functions. - */ -static inline void __write_seqcount_begin(seqcount_t *s) +#define write_seqcount_begin_nested(s, subclass) \ +do { \ + __assert_write_section_is_protected(s); \ + \ + if (__associated_lock_exists_and_is_preemptible(s)) \ + preempt_disable(); \ + \ + write_seqcount_t_begin_nested(__to_seqcount_t(s), subclass); \ +} while (0) + +static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass) { - __write_seqcount_begin_nested(s, 0); + raw_write_seqcount_t_begin(s); + seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); } /** * write_seqcount_begin() - start a seqcount_t write side critical section - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants * * write_seqcount_begin opens a write side critical section of the given * seqcount_t. @@ -308,26 +604,44 @@ static inline void __write_seqcount_begin(seqcount_t *s) * non-preemptible. If readers can be invoked from hardirq or softirq * context, interrupts or bottom halves must be respectively disabled. */ -static inline void write_seqcount_begin(seqcount_t *s) +#define write_seqcount_begin(s) \ +do { \ + __assert_write_section_is_protected(s); \ + \ + if (__associated_lock_exists_and_is_preemptible(s)) \ + preempt_disable(); \ + \ + write_seqcount_t_begin(__to_seqcount_t(s)); \ +} while (0) + +static inline void write_seqcount_t_begin(seqcount_t *s) { - write_seqcount_begin_nested(s, 0); + write_seqcount_t_begin_nested(s, 0); } /** * write_seqcount_end() - end a seqcount_t write side critical section - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants * * The write section must've been opened with write_seqcount_begin(). */ -static inline void write_seqcount_end(seqcount_t *s) +#define write_seqcount_end(s) \ +do { \ + write_seqcount_t_end(__to_seqcount_t(s)); \ + \ + if (__associated_lock_exists_and_is_preemptible(s)) \ + preempt_enable(); \ +} while (0) + +static inline void write_seqcount_t_end(seqcount_t *s) { seqcount_release(&s->dep_map, _RET_IP_); - raw_write_seqcount_end(s); + raw_write_seqcount_t_end(s); } /** * raw_write_seqcount_barrier() - do a seqcount_t write barrier - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants * * This can be used to provide an ordering guarantee instead of the usual * consistency guarantee. It is one wmb cheaper, because it can collapse @@ -366,7 +680,10 @@ static inline void write_seqcount_end(seqcount_t *s) * WRITE_ONCE(X, false); * } */ -static inline void raw_write_seqcount_barrier(seqcount_t *s) +#define raw_write_seqcount_barrier(s) \ + raw_write_seqcount_t_barrier(__to_seqcount_t(s)) + +static inline void raw_write_seqcount_t_barrier(seqcount_t *s) { kcsan_nestable_atomic_begin(); s->sequence++; @@ -378,12 +695,15 @@ static inline void raw_write_seqcount_barrier(seqcount_t *s) /** * write_seqcount_invalidate() - invalidate in-progress seqcount_t read * side operations - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants * * After write_seqcount_invalidate, no seqcount_t read side operations * will complete successfully and see data older than this. */ -static inline void write_seqcount_invalidate(seqcount_t *s) +#define write_seqcount_invalidate(s) \ + write_seqcount_t_invalidate(__to_seqcount_t(s)) + +static inline void write_seqcount_t_invalidate(seqcount_t *s) { smp_wmb(); kcsan_nestable_atomic_begin(); @@ -393,7 +713,7 @@ static inline void write_seqcount_invalidate(seqcount_t *s) /** * raw_read_seqcount_latch() - pick even/odd seqcount_t latch data copy - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants * * Use seqcount_t latching to switch between two storage places protected * by a sequence counter. Doing so allows having interruptible, preemptible, @@ -406,7 +726,10 @@ static inline void write_seqcount_invalidate(seqcount_t *s) * picking which data copy to read. The full counter value must then be * checked with read_seqcount_retry(). */ -static inline int raw_read_seqcount_latch(seqcount_t *s) +#define raw_read_seqcount_latch(s) \ + raw_read_seqcount_t_latch(__to_seqcount_t(s)) + +static inline int raw_read_seqcount_t_latch(seqcount_t *s) { /* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */ int seq = READ_ONCE(s->sequence); /* ^^^ */ @@ -415,7 +738,7 @@ static inline int raw_read_seqcount_latch(seqcount_t *s) /** * raw_write_seqcount_latch() - redirect readers to even/odd copy - * @s: Pointer to seqcount_t + * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants * * The latch technique is a multiversion concurrency control method that allows * queries during non-atomic modifications. If you can guarantee queries never @@ -494,7 +817,10 @@ static inline int raw_read_seqcount_latch(seqcount_t *s) * When data is a dynamic data structure; one should use regular RCU * patterns to manage the lifetimes of the objects within. */ -static inline void raw_write_seqcount_latch(seqcount_t *s) +#define raw_write_seqcount_latch(s) \ + raw_write_seqcount_t_latch(__to_seqcount_t(s)) + +static inline void raw_write_seqcount_t_latch(seqcount_t *s) { smp_wmb(); /* prior stores before incrementing "sequence" */ s->sequence++; @@ -592,7 +918,7 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) static inline void write_seqlock(seqlock_t *sl) { spin_lock(&sl->lock); - __write_seqcount_begin(&sl->seqcount); + write_seqcount_t_begin(&sl->seqcount); } /** @@ -604,7 +930,7 @@ static inline void write_seqlock(seqlock_t *sl) */ static inline void write_sequnlock(seqlock_t *sl) { - write_seqcount_end(&sl->seqcount); + write_seqcount_t_end(&sl->seqcount); spin_unlock(&sl->lock); } @@ -618,7 +944,7 @@ static inline void write_sequnlock(seqlock_t *sl) static inline void write_seqlock_bh(seqlock_t *sl) { spin_lock_bh(&sl->lock); - __write_seqcount_begin(&sl->seqcount); + write_seqcount_t_begin(&sl->seqcount); } /** @@ -631,7 +957,7 @@ static inline void write_seqlock_bh(seqlock_t *sl) */ static inline void write_sequnlock_bh(seqlock_t *sl) { - write_seqcount_end(&sl->seqcount); + write_seqcount_t_end(&sl->seqcount); spin_unlock_bh(&sl->lock); } @@ -645,7 +971,7 @@ static inline void write_sequnlock_bh(seqlock_t *sl) static inline void write_seqlock_irq(seqlock_t *sl) { spin_lock_irq(&sl->lock); - __write_seqcount_begin(&sl->seqcount); + write_seqcount_t_begin(&sl->seqcount); } /** @@ -657,7 +983,7 @@ static inline void write_seqlock_irq(seqlock_t *sl) */ static inline void write_sequnlock_irq(seqlock_t *sl) { - write_seqcount_end(&sl->seqcount); + write_seqcount_t_end(&sl->seqcount); spin_unlock_irq(&sl->lock); } @@ -666,7 +992,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) unsigned long flags; spin_lock_irqsave(&sl->lock, flags); - __write_seqcount_begin(&sl->seqcount); + write_seqcount_t_begin(&sl->seqcount); return flags; } @@ -695,13 +1021,13 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) static inline void write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) { - write_seqcount_end(&sl->seqcount); + write_seqcount_t_end(&sl->seqcount); spin_unlock_irqrestore(&sl->lock, flags); } /** * read_seqlock_excl() - begin a seqlock_t locking reader section - * @sl: Pointer to seqlock_t + * @sl: Pointer to seqlock_t * * read_seqlock_excl opens a seqlock_t locking reader critical section. A * locking reader exclusively locks out *both* other writers *and* other -- cgit v1.2.3 From ec8702da570ebb59f38471007bf71359c51b027b Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 20 Jul 2020 17:55:16 +0200 Subject: seqlock: Align multi-line macros newline escapes at 72 columns Parent commit, "seqlock: Extend seqcount API with associated locks", introduced a big number of multi-line macros that are newline-escaped at 72 columns. For overall cohesion, align the earlier-existing macros similarly. Signed-off-by: Ahmed S. Darwish Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200720155530.1173732-11-a.darwish@linutronix.de --- include/linux/seqlock.h | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'include/linux/seqlock.h') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 8c16a494c968..b48729988325 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -80,17 +80,18 @@ static inline void __seqcount_init(seqcount_t *s, const char *name, } #ifdef CONFIG_DEBUG_LOCK_ALLOC -# define SEQCOUNT_DEP_MAP_INIT(lockname) \ - .dep_map = { .name = #lockname } \ + +# define SEQCOUNT_DEP_MAP_INIT(lockname) \ + .dep_map = { .name = #lockname } /** * seqcount_init() - runtime initializer for seqcount_t * @s: Pointer to the seqcount_t instance */ -# define seqcount_init(s) \ - do { \ - static struct lock_class_key __key; \ - __seqcount_init((s), #s, &__key); \ +# define seqcount_init(s) \ + do { \ + static struct lock_class_key __key; \ + __seqcount_init((s), #s, &__key); \ } while (0) static inline void seqcount_lockdep_reader_access(const seqcount_t *s) @@ -842,20 +843,20 @@ typedef struct { spinlock_t lock; } seqlock_t; -#define __SEQLOCK_UNLOCKED(lockname) \ - { \ - .seqcount = SEQCNT_ZERO(lockname), \ - .lock = __SPIN_LOCK_UNLOCKED(lockname) \ +#define __SEQLOCK_UNLOCKED(lockname) \ + { \ + .seqcount = SEQCNT_ZERO(lockname), \ + .lock = __SPIN_LOCK_UNLOCKED(lockname) \ } /** * seqlock_init() - dynamic initializer for seqlock_t * @sl: Pointer to the seqlock_t instance */ -#define seqlock_init(sl) \ - do { \ - seqcount_init(&(sl)->seqcount); \ - spin_lock_init(&(sl)->lock); \ +#define seqlock_init(sl) \ + do { \ + seqcount_init(&(sl)->seqcount); \ + spin_lock_init(&(sl)->lock); \ } while (0) /** -- cgit v1.2.3 From e55687fe5c1e4849e5559a0a49199c9ca3fff36e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 23 Jul 2020 11:56:22 +0200 Subject: seqlock: s/__SEQ_LOCKDEP/__SEQ_LOCK/g __SEQ_LOCKDEP() is an expression gate for the seqcount_LOCKNAME_t::lock member. Rename it to be about the member, not the gate condition. Later (PREEMPT_RT) patches will make the member available for !LOCKDEP configs. Signed-off-by: Peter Zijlstra (Intel) --- include/linux/seqlock.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux/seqlock.h') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index b48729988325..c689abab06c8 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -133,20 +133,20 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) */ #ifdef CONFIG_LOCKDEP -#define __SEQ_LOCKDEP(expr) expr +#define __SEQ_LOCK(expr) expr #else -#define __SEQ_LOCKDEP(expr) +#define __SEQ_LOCK(expr) #endif #define SEQCOUNT_LOCKTYPE_ZERO(seq_name, assoc_lock) { \ .seqcount = SEQCNT_ZERO(seq_name.seqcount), \ - __SEQ_LOCKDEP(.lock = (assoc_lock)) \ + __SEQ_LOCK(.lock = (assoc_lock)) \ } #define seqcount_locktype_init(s, assoc_lock) \ do { \ seqcount_init(&(s)->seqcount); \ - __SEQ_LOCKDEP((s)->lock = (assoc_lock)); \ + __SEQ_LOCK((s)->lock = (assoc_lock)); \ } while (0) /** @@ -161,7 +161,7 @@ do { \ */ typedef struct seqcount_spinlock { seqcount_t seqcount; - __SEQ_LOCKDEP(spinlock_t *lock); + __SEQ_LOCK(spinlock_t *lock); } seqcount_spinlock_t; /** @@ -192,7 +192,7 @@ typedef struct seqcount_spinlock { */ typedef struct seqcount_raw_spinlock { seqcount_t seqcount; - __SEQ_LOCKDEP(raw_spinlock_t *lock); + __SEQ_LOCK(raw_spinlock_t *lock); } seqcount_raw_spinlock_t; /** @@ -223,7 +223,7 @@ typedef struct seqcount_raw_spinlock { */ typedef struct seqcount_rwlock { seqcount_t seqcount; - __SEQ_LOCKDEP(rwlock_t *lock); + __SEQ_LOCK(rwlock_t *lock); } seqcount_rwlock_t; /** @@ -257,7 +257,7 @@ typedef struct seqcount_rwlock { */ typedef struct seqcount_mutex { seqcount_t seqcount; - __SEQ_LOCKDEP(struct mutex *lock); + __SEQ_LOCK(struct mutex *lock); } seqcount_mutex_t; /** @@ -291,7 +291,7 @@ typedef struct seqcount_mutex { */ typedef struct seqcount_ww_mutex { seqcount_t seqcount; - __SEQ_LOCKDEP(struct ww_mutex *lock); + __SEQ_LOCK(struct ww_mutex *lock); } seqcount_ww_mutex_t; /** @@ -329,7 +329,7 @@ __seqcount_##locktype##_preemptible(seqcount_##locktype##_t *s) \ static inline void \ __seqcount_##locktype##_assert(seqcount_##locktype##_t *s) \ { \ - __SEQ_LOCKDEP(lockdep_assert_held(lockmember)); \ + __SEQ_LOCK(lockdep_assert_held(lockmember)); \ } /* -- cgit v1.2.3 From a8772dccb2ec7b139db1b3ba782ecb12ed92d7c3 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 23 Jul 2020 11:56:49 +0200 Subject: seqlock: Fold seqcount_LOCKNAME_t definition Manual repetition is boring and error prone. Signed-off-by: Peter Zijlstra (Intel) --- include/linux/seqlock.h | 142 +++++++++++++----------------------------------- 1 file changed, 39 insertions(+), 103 deletions(-) (limited to 'include/linux/seqlock.h') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index c689abab06c8..4b259bb4d4b9 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -149,21 +149,6 @@ do { \ __SEQ_LOCK((s)->lock = (assoc_lock)); \ } while (0) -/** - * typedef seqcount_spinlock_t - sequence counter with spinlock associated - * @seqcount: The real sequence counter - * @lock: Pointer to the associated spinlock - * - * A plain sequence counter with external writer synchronization by a - * spinlock. The spinlock is associated to the sequence count in the - * static initializer or init function. This enables lockdep to validate - * that the write side critical section is properly serialized. - */ -typedef struct seqcount_spinlock { - seqcount_t seqcount; - __SEQ_LOCK(spinlock_t *lock); -} seqcount_spinlock_t; - /** * SEQCNT_SPINLOCK_ZERO - static initializer for seqcount_spinlock_t * @name: Name of the seqcount_spinlock_t instance @@ -180,21 +165,6 @@ typedef struct seqcount_spinlock { #define seqcount_spinlock_init(s, lock) \ seqcount_locktype_init(s, lock) -/** - * typedef seqcount_raw_spinlock_t - sequence count with raw spinlock associated - * @seqcount: The real sequence counter - * @lock: Pointer to the associated raw spinlock - * - * A plain sequence counter with external writer synchronization by a - * raw spinlock. The raw spinlock is associated to the sequence count in - * the static initializer or init function. This enables lockdep to - * validate that the write side critical section is properly serialized. - */ -typedef struct seqcount_raw_spinlock { - seqcount_t seqcount; - __SEQ_LOCK(raw_spinlock_t *lock); -} seqcount_raw_spinlock_t; - /** * SEQCNT_RAW_SPINLOCK_ZERO - static initializer for seqcount_raw_spinlock_t * @name: Name of the seqcount_raw_spinlock_t instance @@ -211,21 +181,6 @@ typedef struct seqcount_raw_spinlock { #define seqcount_raw_spinlock_init(s, lock) \ seqcount_locktype_init(s, lock) -/** - * typedef seqcount_rwlock_t - sequence count with rwlock associated - * @seqcount: The real sequence counter - * @lock: Pointer to the associated rwlock - * - * A plain sequence counter with external writer synchronization by a - * rwlock. The rwlock is associated to the sequence count in the static - * initializer or init function. This enables lockdep to validate that - * the write side critical section is properly serialized. - */ -typedef struct seqcount_rwlock { - seqcount_t seqcount; - __SEQ_LOCK(rwlock_t *lock); -} seqcount_rwlock_t; - /** * SEQCNT_RWLOCK_ZERO - static initializer for seqcount_rwlock_t * @name: Name of the seqcount_rwlock_t instance @@ -242,24 +197,6 @@ typedef struct seqcount_rwlock { #define seqcount_rwlock_init(s, lock) \ seqcount_locktype_init(s, lock) -/** - * typedef seqcount_mutex_t - sequence count with mutex associated - * @seqcount: The real sequence counter - * @lock: Pointer to the associated mutex - * - * A plain sequence counter with external writer synchronization by a - * mutex. The mutex is associated to the sequence counter in the static - * initializer or init function. This enables lockdep to validate that - * the write side critical section is properly serialized. - * - * The write side API functions write_seqcount_begin()/end() automatically - * disable and enable preemption when used with seqcount_mutex_t. - */ -typedef struct seqcount_mutex { - seqcount_t seqcount; - __SEQ_LOCK(struct mutex *lock); -} seqcount_mutex_t; - /** * SEQCNT_MUTEX_ZERO - static initializer for seqcount_mutex_t * @name: Name of the seqcount_mutex_t instance @@ -276,24 +213,6 @@ typedef struct seqcount_mutex { #define seqcount_mutex_init(s, lock) \ seqcount_locktype_init(s, lock) -/** - * typedef seqcount_ww_mutex_t - sequence count with ww_mutex associated - * @seqcount: The real sequence counter - * @lock: Pointer to the associated ww_mutex - * - * A plain sequence counter with external writer synchronization by a - * ww_mutex. The ww_mutex is associated to the sequence counter in the static - * initializer or init function. This enables lockdep to validate that - * the write side critical section is properly serialized. - * - * The write side API functions write_seqcount_begin()/end() automatically - * disable and enable preemption when used with seqcount_ww_mutex_t. - */ -typedef struct seqcount_ww_mutex { - seqcount_t seqcount; - __SEQ_LOCK(struct ww_mutex *lock); -} seqcount_ww_mutex_t; - /** * SEQCNT_WW_MUTEX_ZERO - static initializer for seqcount_ww_mutex_t * @name: Name of the seqcount_ww_mutex_t instance @@ -310,30 +229,50 @@ typedef struct seqcount_ww_mutex { #define seqcount_ww_mutex_init(s, lock) \ seqcount_locktype_init(s, lock) -/* - * @preempt: Is the associated write serialization lock preemtpible? +/** + * typedef seqcount_LOCKNAME_t - sequence counter with spinlock associated + * @seqcount: The real sequence counter + * @lock: Pointer to the associated spinlock + * + * A plain sequence counter with external writer synchronization by a + * spinlock. The spinlock is associated to the sequence count in the + * static initializer or init function. This enables lockdep to validate + * that the write side critical section is properly serialized. */ -#define SEQCOUNT_LOCKTYPE(locktype, preempt, lockmember) \ -static inline seqcount_t * \ -__seqcount_##locktype##_ptr(seqcount_##locktype##_t *s) \ + +/* + * SEQCOUNT_LOCKTYPE() - Instantiate seqcount_LOCKNAME_t and helpers + * @locktype: actual typename + * @lockname: name + * @preemptible: preemptibility of above locktype + * @lockmember: argument for lockdep_assert_held() + */ +#define SEQCOUNT_LOCKTYPE(locktype, lockname, preemptible, lockmember) \ +typedef struct seqcount_##lockname { \ + seqcount_t seqcount; \ + __SEQ_LOCK(locktype *lock); \ +} seqcount_##lockname##_t; \ + \ +static __always_inline seqcount_t * \ +__seqcount_##lockname##_ptr(seqcount_##lockname##_t *s) \ { \ return &s->seqcount; \ } \ \ -static inline bool \ -__seqcount_##locktype##_preemptible(seqcount_##locktype##_t *s) \ +static __always_inline bool \ +__seqcount_##lockname##_preemptible(seqcount_##lockname##_t *s) \ { \ - return preempt; \ + return preemptible; \ } \ \ -static inline void \ -__seqcount_##locktype##_assert(seqcount_##locktype##_t *s) \ +static __always_inline void \ +__seqcount_##lockname##_assert(seqcount_##lockname##_t *s) \ { \ __SEQ_LOCK(lockdep_assert_held(lockmember)); \ } /* - * Similar hooks, but for plain seqcount_t + * __seqprop() for seqcount_t */ static inline seqcount_t *__seqcount_ptr(seqcount_t *s) @@ -351,17 +290,14 @@ static inline void __seqcount_assert(seqcount_t *s) lockdep_assert_preemption_disabled(); } -/* - * @s: Pointer to seqcount_locktype_t, generated hooks first parameter. - */ -SEQCOUNT_LOCKTYPE(raw_spinlock, false, s->lock) -SEQCOUNT_LOCKTYPE(spinlock, false, s->lock) -SEQCOUNT_LOCKTYPE(rwlock, false, s->lock) -SEQCOUNT_LOCKTYPE(mutex, true, s->lock) -SEQCOUNT_LOCKTYPE(ww_mutex, true, &s->lock->base) - -#define __seqprop_case(s, locktype, prop) \ - seqcount_##locktype##_t: __seqcount_##locktype##_##prop((void *)(s)) +SEQCOUNT_LOCKTYPE(raw_spinlock_t, raw_spinlock, false, s->lock) +SEQCOUNT_LOCKTYPE(spinlock_t, spinlock, false, s->lock) +SEQCOUNT_LOCKTYPE(rwlock_t, rwlock, false, s->lock) +SEQCOUNT_LOCKTYPE(struct mutex, mutex, true, s->lock) +SEQCOUNT_LOCKTYPE(struct ww_mutex, ww_mutex, true, &s->lock->base) + +#define __seqprop_case(s, lockname, prop) \ + seqcount_##lockname##_t: __seqcount_##lockname##_##prop((void *)(s)) #define __seqprop(s, prop) _Generic(*(s), \ seqcount_t: __seqcount_##prop((void *)(s)), \ -- cgit v1.2.3 From e4e9ab3f9f91ad3b88d12363f890e8ad9b59b645 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 23 Jul 2020 12:00:53 +0200 Subject: seqlock: Fold seqcount_LOCKNAME_init() definition Manual repetition is boring and error prone. Signed-off-by: Peter Zijlstra (Intel) --- include/linux/seqlock.h | 61 ++++++++++++------------------------------------- 1 file changed, 14 insertions(+), 47 deletions(-) (limited to 'include/linux/seqlock.h') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 4b259bb4d4b9..501ff47d1e8e 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -143,12 +143,6 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) __SEQ_LOCK(.lock = (assoc_lock)) \ } -#define seqcount_locktype_init(s, assoc_lock) \ -do { \ - seqcount_init(&(s)->seqcount); \ - __SEQ_LOCK((s)->lock = (assoc_lock)); \ -} while (0) - /** * SEQCNT_SPINLOCK_ZERO - static initializer for seqcount_spinlock_t * @name: Name of the seqcount_spinlock_t instance @@ -157,14 +151,6 @@ do { \ #define SEQCNT_SPINLOCK_ZERO(name, lock) \ SEQCOUNT_LOCKTYPE_ZERO(name, lock) -/** - * seqcount_spinlock_init - runtime initializer for seqcount_spinlock_t - * @s: Pointer to the seqcount_spinlock_t instance - * @lock: Pointer to the associated spinlock - */ -#define seqcount_spinlock_init(s, lock) \ - seqcount_locktype_init(s, lock) - /** * SEQCNT_RAW_SPINLOCK_ZERO - static initializer for seqcount_raw_spinlock_t * @name: Name of the seqcount_raw_spinlock_t instance @@ -173,14 +159,6 @@ do { \ #define SEQCNT_RAW_SPINLOCK_ZERO(name, lock) \ SEQCOUNT_LOCKTYPE_ZERO(name, lock) -/** - * seqcount_raw_spinlock_init - runtime initializer for seqcount_raw_spinlock_t - * @s: Pointer to the seqcount_raw_spinlock_t instance - * @lock: Pointer to the associated raw_spinlock - */ -#define seqcount_raw_spinlock_init(s, lock) \ - seqcount_locktype_init(s, lock) - /** * SEQCNT_RWLOCK_ZERO - static initializer for seqcount_rwlock_t * @name: Name of the seqcount_rwlock_t instance @@ -189,14 +167,6 @@ do { \ #define SEQCNT_RWLOCK_ZERO(name, lock) \ SEQCOUNT_LOCKTYPE_ZERO(name, lock) -/** - * seqcount_rwlock_init - runtime initializer for seqcount_rwlock_t - * @s: Pointer to the seqcount_rwlock_t instance - * @lock: Pointer to the associated rwlock - */ -#define seqcount_rwlock_init(s, lock) \ - seqcount_locktype_init(s, lock) - /** * SEQCNT_MUTEX_ZERO - static initializer for seqcount_mutex_t * @name: Name of the seqcount_mutex_t instance @@ -205,14 +175,6 @@ do { \ #define SEQCNT_MUTEX_ZERO(name, lock) \ SEQCOUNT_LOCKTYPE_ZERO(name, lock) -/** - * seqcount_mutex_init - runtime initializer for seqcount_mutex_t - * @s: Pointer to the seqcount_mutex_t instance - * @lock: Pointer to the associated mutex - */ -#define seqcount_mutex_init(s, lock) \ - seqcount_locktype_init(s, lock) - /** * SEQCNT_WW_MUTEX_ZERO - static initializer for seqcount_ww_mutex_t * @name: Name of the seqcount_ww_mutex_t instance @@ -222,15 +184,7 @@ do { \ SEQCOUNT_LOCKTYPE_ZERO(name, lock) /** - * seqcount_ww_mutex_init - runtime initializer for seqcount_ww_mutex_t - * @s: Pointer to the seqcount_ww_mutex_t instance - * @lock: Pointer to the associated ww_mutex - */ -#define seqcount_ww_mutex_init(s, lock) \ - seqcount_locktype_init(s, lock) - -/** - * typedef seqcount_LOCKNAME_t - sequence counter with spinlock associated + * typedef seqcount_LOCKNAME_t - sequence counter with LOCKTYPR associated * @seqcount: The real sequence counter * @lock: Pointer to the associated spinlock * @@ -240,6 +194,12 @@ do { \ * that the write side critical section is properly serialized. */ +/** + * seqcount_LOCKNAME_init() - runtime initializer for seqcount_LOCKNAME_t + * @s: Pointer to the seqcount_LOCKNAME_t instance + * @lock: Pointer to the associated LOCKTYPE + */ + /* * SEQCOUNT_LOCKTYPE() - Instantiate seqcount_LOCKNAME_t and helpers * @locktype: actual typename @@ -253,6 +213,13 @@ typedef struct seqcount_##lockname { \ __SEQ_LOCK(locktype *lock); \ } seqcount_##lockname##_t; \ \ +static __always_inline void \ +seqcount_##lockname##_init(seqcount_##lockname##_t *s, locktype *lock) \ +{ \ + seqcount_init(&s->seqcount); \ + __SEQ_LOCK(s->lock = lock); \ +} \ + \ static __always_inline seqcount_t * \ __seqcount_##lockname##_ptr(seqcount_##lockname##_t *s) \ { \ -- cgit v1.2.3 From 0efc94c5d15c3da0a69543d86ad2180f39256ed6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 23 Jul 2020 12:03:13 +0200 Subject: seqcount: Compress SEQCNT_LOCKNAME_ZERO() Less is more. Signed-off-by: Peter Zijlstra (Intel) --- include/linux/seqlock.h | 63 ++++++++++++++----------------------------------- 1 file changed, 18 insertions(+), 45 deletions(-) (limited to 'include/linux/seqlock.h') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 501ff47d1e8e..251dcd6f5cd8 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -138,51 +138,6 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) #define __SEQ_LOCK(expr) #endif -#define SEQCOUNT_LOCKTYPE_ZERO(seq_name, assoc_lock) { \ - .seqcount = SEQCNT_ZERO(seq_name.seqcount), \ - __SEQ_LOCK(.lock = (assoc_lock)) \ -} - -/** - * SEQCNT_SPINLOCK_ZERO - static initializer for seqcount_spinlock_t - * @name: Name of the seqcount_spinlock_t instance - * @lock: Pointer to the associated spinlock - */ -#define SEQCNT_SPINLOCK_ZERO(name, lock) \ - SEQCOUNT_LOCKTYPE_ZERO(name, lock) - -/** - * SEQCNT_RAW_SPINLOCK_ZERO - static initializer for seqcount_raw_spinlock_t - * @name: Name of the seqcount_raw_spinlock_t instance - * @lock: Pointer to the associated raw_spinlock - */ -#define SEQCNT_RAW_SPINLOCK_ZERO(name, lock) \ - SEQCOUNT_LOCKTYPE_ZERO(name, lock) - -/** - * SEQCNT_RWLOCK_ZERO - static initializer for seqcount_rwlock_t - * @name: Name of the seqcount_rwlock_t instance - * @lock: Pointer to the associated rwlock - */ -#define SEQCNT_RWLOCK_ZERO(name, lock) \ - SEQCOUNT_LOCKTYPE_ZERO(name, lock) - -/** - * SEQCNT_MUTEX_ZERO - static initializer for seqcount_mutex_t - * @name: Name of the seqcount_mutex_t instance - * @lock: Pointer to the associated mutex - */ -#define SEQCNT_MUTEX_ZERO(name, lock) \ - SEQCOUNT_LOCKTYPE_ZERO(name, lock) - -/** - * SEQCNT_WW_MUTEX_ZERO - static initializer for seqcount_ww_mutex_t - * @name: Name of the seqcount_ww_mutex_t instance - * @lock: Pointer to the associated ww_mutex - */ -#define SEQCNT_WW_MUTEX_ZERO(name, lock) \ - SEQCOUNT_LOCKTYPE_ZERO(name, lock) - /** * typedef seqcount_LOCKNAME_t - sequence counter with LOCKTYPR associated * @seqcount: The real sequence counter @@ -263,6 +218,24 @@ SEQCOUNT_LOCKTYPE(rwlock_t, rwlock, false, s->lock) SEQCOUNT_LOCKTYPE(struct mutex, mutex, true, s->lock) SEQCOUNT_LOCKTYPE(struct ww_mutex, ww_mutex, true, &s->lock->base) +/** + * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t + * @name: Name of the seqcount_LOCKNAME_t instance + * @lock: Pointer to the associated LOCKTYPE + */ + +#define SEQCOUNT_LOCKTYPE_ZERO(seq_name, assoc_lock) { \ + .seqcount = SEQCNT_ZERO(seq_name.seqcount), \ + __SEQ_LOCK(.lock = (assoc_lock)) \ +} + +#define SEQCNT_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKTYPE_ZERO(name, lock) +#define SEQCNT_RAW_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKTYPE_ZERO(name, lock) +#define SEQCNT_RWLOCK_ZERO(name, lock) SEQCOUNT_LOCKTYPE_ZERO(name, lock) +#define SEQCNT_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKTYPE_ZERO(name, lock) +#define SEQCNT_WW_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKTYPE_ZERO(name, lock) + + #define __seqprop_case(s, lockname, prop) \ seqcount_##lockname##_t: __seqcount_##lockname##_##prop((void *)(s)) -- cgit v1.2.3 From b5e6a027bd327daa679ca55182a920659e2cbb90 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 23 Jul 2020 12:11:49 +0200 Subject: seqcount: More consistent seqprop names Attempt uniformity and brevity. Signed-off-by: Peter Zijlstra (Intel) --- include/linux/seqlock.h | 52 ++++++++++++++++++++++++------------------------- 1 file changed, 26 insertions(+), 26 deletions(-) (limited to 'include/linux/seqlock.h') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 251dcd6f5cd8..a076f783aa36 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -247,9 +247,9 @@ SEQCOUNT_LOCKTYPE(struct ww_mutex, ww_mutex, true, &s->lock->base) __seqprop_case((s), mutex, prop), \ __seqprop_case((s), ww_mutex, prop)) -#define __to_seqcount_t(s) __seqprop(s, ptr) -#define __associated_lock_exists_and_is_preemptible(s) __seqprop(s, preemptible) -#define __assert_write_section_is_protected(s) __seqprop(s, assert) +#define __seqcount_ptr(s) __seqprop(s, ptr) +#define __seqcount_lock_preemptible(s) __seqprop(s, preemptible) +#define __seqcount_assert_lock_held(s) __seqprop(s, assert) /** * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier @@ -266,7 +266,7 @@ SEQCOUNT_LOCKTYPE(struct ww_mutex, ww_mutex, true, &s->lock->base) * Return: count to be passed to read_seqcount_retry() */ #define __read_seqcount_begin(s) \ - __read_seqcount_t_begin(__to_seqcount_t(s)) + __read_seqcount_t_begin(__seqcount_ptr(s)) static inline unsigned __read_seqcount_t_begin(const seqcount_t *s) { @@ -289,7 +289,7 @@ repeat: * Return: count to be passed to read_seqcount_retry() */ #define raw_read_seqcount_begin(s) \ - raw_read_seqcount_t_begin(__to_seqcount_t(s)) + raw_read_seqcount_t_begin(__seqcount_ptr(s)) static inline unsigned raw_read_seqcount_t_begin(const seqcount_t *s) { @@ -305,7 +305,7 @@ static inline unsigned raw_read_seqcount_t_begin(const seqcount_t *s) * Return: count to be passed to read_seqcount_retry() */ #define read_seqcount_begin(s) \ - read_seqcount_t_begin(__to_seqcount_t(s)) + read_seqcount_t_begin(__seqcount_ptr(s)) static inline unsigned read_seqcount_t_begin(const seqcount_t *s) { @@ -325,7 +325,7 @@ static inline unsigned read_seqcount_t_begin(const seqcount_t *s) * Return: count to be passed to read_seqcount_retry() */ #define raw_read_seqcount(s) \ - raw_read_seqcount_t(__to_seqcount_t(s)) + raw_read_seqcount_t(__seqcount_ptr(s)) static inline unsigned raw_read_seqcount_t(const seqcount_t *s) { @@ -353,7 +353,7 @@ static inline unsigned raw_read_seqcount_t(const seqcount_t *s) * Return: count to be passed to read_seqcount_retry() */ #define raw_seqcount_begin(s) \ - raw_seqcount_t_begin(__to_seqcount_t(s)) + raw_seqcount_t_begin(__seqcount_ptr(s)) static inline unsigned raw_seqcount_t_begin(const seqcount_t *s) { @@ -380,7 +380,7 @@ static inline unsigned raw_seqcount_t_begin(const seqcount_t *s) * Return: true if a read section retry is required, else false */ #define __read_seqcount_retry(s, start) \ - __read_seqcount_t_retry(__to_seqcount_t(s), start) + __read_seqcount_t_retry(__seqcount_ptr(s), start) static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start) { @@ -400,7 +400,7 @@ static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start) * Return: true if a read section retry is required, else false */ #define read_seqcount_retry(s, start) \ - read_seqcount_t_retry(__to_seqcount_t(s), start) + read_seqcount_t_retry(__seqcount_ptr(s), start) static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start) { @@ -414,10 +414,10 @@ static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start) */ #define raw_write_seqcount_begin(s) \ do { \ - if (__associated_lock_exists_and_is_preemptible(s)) \ + if (__seqcount_lock_preemptible(s)) \ preempt_disable(); \ \ - raw_write_seqcount_t_begin(__to_seqcount_t(s)); \ + raw_write_seqcount_t_begin(__seqcount_ptr(s)); \ } while (0) static inline void raw_write_seqcount_t_begin(seqcount_t *s) @@ -433,9 +433,9 @@ static inline void raw_write_seqcount_t_begin(seqcount_t *s) */ #define raw_write_seqcount_end(s) \ do { \ - raw_write_seqcount_t_end(__to_seqcount_t(s)); \ + raw_write_seqcount_t_end(__seqcount_ptr(s)); \ \ - if (__associated_lock_exists_and_is_preemptible(s)) \ + if (__seqcount_lock_preemptible(s)) \ preempt_enable(); \ } while (0) @@ -456,12 +456,12 @@ static inline void raw_write_seqcount_t_end(seqcount_t *s) */ #define write_seqcount_begin_nested(s, subclass) \ do { \ - __assert_write_section_is_protected(s); \ + __seqcount_assert_lock_held(s); \ \ - if (__associated_lock_exists_and_is_preemptible(s)) \ + if (__seqcount_lock_preemptible(s)) \ preempt_disable(); \ \ - write_seqcount_t_begin_nested(__to_seqcount_t(s), subclass); \ + write_seqcount_t_begin_nested(__seqcount_ptr(s), subclass); \ } while (0) static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass) @@ -483,12 +483,12 @@ static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass) */ #define write_seqcount_begin(s) \ do { \ - __assert_write_section_is_protected(s); \ + __seqcount_assert_lock_held(s); \ \ - if (__associated_lock_exists_and_is_preemptible(s)) \ + if (__seqcount_lock_preemptible(s)) \ preempt_disable(); \ \ - write_seqcount_t_begin(__to_seqcount_t(s)); \ + write_seqcount_t_begin(__seqcount_ptr(s)); \ } while (0) static inline void write_seqcount_t_begin(seqcount_t *s) @@ -504,9 +504,9 @@ static inline void write_seqcount_t_begin(seqcount_t *s) */ #define write_seqcount_end(s) \ do { \ - write_seqcount_t_end(__to_seqcount_t(s)); \ + write_seqcount_t_end(__seqcount_ptr(s)); \ \ - if (__associated_lock_exists_and_is_preemptible(s)) \ + if (__seqcount_lock_preemptible(s)) \ preempt_enable(); \ } while (0) @@ -558,7 +558,7 @@ static inline void write_seqcount_t_end(seqcount_t *s) * } */ #define raw_write_seqcount_barrier(s) \ - raw_write_seqcount_t_barrier(__to_seqcount_t(s)) + raw_write_seqcount_t_barrier(__seqcount_ptr(s)) static inline void raw_write_seqcount_t_barrier(seqcount_t *s) { @@ -578,7 +578,7 @@ static inline void raw_write_seqcount_t_barrier(seqcount_t *s) * will complete successfully and see data older than this. */ #define write_seqcount_invalidate(s) \ - write_seqcount_t_invalidate(__to_seqcount_t(s)) + write_seqcount_t_invalidate(__seqcount_ptr(s)) static inline void write_seqcount_t_invalidate(seqcount_t *s) { @@ -604,7 +604,7 @@ static inline void write_seqcount_t_invalidate(seqcount_t *s) * checked with read_seqcount_retry(). */ #define raw_read_seqcount_latch(s) \ - raw_read_seqcount_t_latch(__to_seqcount_t(s)) + raw_read_seqcount_t_latch(__seqcount_ptr(s)) static inline int raw_read_seqcount_t_latch(seqcount_t *s) { @@ -695,7 +695,7 @@ static inline int raw_read_seqcount_t_latch(seqcount_t *s) * patterns to manage the lifetimes of the objects within. */ #define raw_write_seqcount_latch(s) \ - raw_write_seqcount_t_latch(__to_seqcount_t(s)) + raw_write_seqcount_t_latch(__seqcount_ptr(s)) static inline void raw_write_seqcount_t_latch(seqcount_t *s) { -- cgit v1.2.3 From 0cd39f4600ed4de859383018eb10f0f724900e1b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 6 Aug 2020 14:35:11 +0200 Subject: locking/seqlock, headers: Untangle the spaghetti monster By using lockdep_assert_*() from seqlock.h, the spaghetti monster attacked. Attack back by reducing seqlock.h dependencies from two key high level headers: - : -Remove - : -Remove - : +Add The price was to add it to sched.h ... Core header fallout, we add direct header dependencies instead of gaining them parasitically from higher level headers: - : +Add - : +Add - : +Add - : +Add - : +Add - : +Add Arch headers fallout: - PARISC: : +Add - SH: : +Add - SPARC: : +Add - SPARC: : +Add , -Remove - X86: : +Add -Remove There's also a bunch of parasitic header dependency fallout in .c files, not listed separately. [ mingo: Extended the changelog, split up & fixed the original patch. ] Co-developed-by: Ingo Molnar Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20200804133438.GK2674@hirez.programming.kicks-ass.net --- arch/sh/include/asm/io.h | 1 + arch/sh/kernel/machvec.c | 1 + arch/sparc/include/asm/timer_64.h | 1 + arch/sparc/include/asm/vvar.h | 3 ++- arch/sparc/kernel/vdso.c | 1 - arch/x86/include/asm/fixmap.h | 2 +- arch/x86/kernel/apic/apic_noop.c | 1 + arch/x86/kernel/apic/hw_nmi.c | 1 + arch/x86/kernel/apic/probe_64.c | 1 + arch/x86/kernel/cpu/amd.c | 1 + arch/x86/kernel/cpu/common.c | 1 + arch/x86/kernel/cpu/hygon.c | 1 + arch/x86/kernel/cpu/intel.c | 1 + arch/x86/kernel/jailhouse.c | 1 + arch/x86/kernel/tsc_msr.c | 1 + arch/x86/mm/init_32.c | 1 + arch/x86/xen/apic.c | 1 + arch/x86/xen/smp_hvm.c | 1 + arch/x86/xen/suspend_pv.c | 4 ++-- include/linux/dynamic_queue_limits.h | 2 ++ include/linux/hrtimer.h | 1 + include/linux/ktime.h | 1 + include/linux/lockdep.h | 1 + include/linux/mutex.h | 11 +++++++++++ include/linux/sched.h | 1 + include/linux/seqlock.h | 1 - include/linux/time.h | 1 - include/linux/videodev2.h | 1 + include/linux/ww_mutex.h | 8 -------- 29 files changed, 38 insertions(+), 15 deletions(-) (limited to 'include/linux/seqlock.h') diff --git a/arch/sh/include/asm/io.h b/arch/sh/include/asm/io.h index 26f0f9b4658b..ec587b583822 100644 --- a/arch/sh/include/asm/io.h +++ b/arch/sh/include/asm/io.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/arch/sh/kernel/machvec.c b/arch/sh/kernel/machvec.c index beadbbdb4486..76bd8955d4fe 100644 --- a/arch/sh/kernel/machvec.c +++ b/arch/sh/kernel/machvec.c @@ -15,6 +15,7 @@ #include #include #include +#include #define MV_NAME_SIZE 32 diff --git a/arch/sparc/include/asm/timer_64.h b/arch/sparc/include/asm/timer_64.h index c7e4fb601a57..dcfad4613e18 100644 --- a/arch/sparc/include/asm/timer_64.h +++ b/arch/sparc/include/asm/timer_64.h @@ -7,6 +7,7 @@ #ifndef _SPARC64_TIMER_H #define _SPARC64_TIMER_H +#include #include #include diff --git a/arch/sparc/include/asm/vvar.h b/arch/sparc/include/asm/vvar.h index 0289503d1cb0..6eaf5cfcaae1 100644 --- a/arch/sparc/include/asm/vvar.h +++ b/arch/sparc/include/asm/vvar.h @@ -6,7 +6,8 @@ #define _ASM_SPARC_VVAR_DATA_H #include -#include +#include +#include #include #include diff --git a/arch/sparc/kernel/vdso.c b/arch/sparc/kernel/vdso.c index 58880662b271..0e27437eb97b 100644 --- a/arch/sparc/kernel/vdso.c +++ b/arch/sparc/kernel/vdso.c @@ -7,7 +7,6 @@ * a different vsyscall implementation for Linux/IA32 and for the name. */ -#include #include #include diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h index b9527a54db99..0f0dd645b594 100644 --- a/arch/x86/include/asm/fixmap.h +++ b/arch/x86/include/asm/fixmap.h @@ -26,9 +26,9 @@ #ifndef __ASSEMBLY__ #include -#include #include #include +#include #ifdef CONFIG_X86_32 #include #include diff --git a/arch/x86/kernel/apic/apic_noop.c b/arch/x86/kernel/apic/apic_noop.c index 98c9bb75d185..780c702969b7 100644 --- a/arch/x86/kernel/apic/apic_noop.c +++ b/arch/x86/kernel/apic/apic_noop.c @@ -10,6 +10,7 @@ * like self-ipi, etc... */ #include +#include #include diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index d1fc62a67320..34a992e275ef 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -9,6 +9,7 @@ * Bits copied from original nmi.c file * */ +#include #include #include diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 29f0e0984557..bd3835d6b535 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -8,6 +8,7 @@ * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and * James Cleverdon. */ +#include #include #include "local.h" diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index d4806eac9325..dcc3d943c68f 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 95c090a45b4b..52b565016eb1 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/hygon.c b/arch/x86/kernel/cpu/hygon.c index 4e28c1fc8749..ac6c30e5801d 100644 --- a/arch/x86/kernel/cpu/hygon.c +++ b/arch/x86/kernel/cpu/hygon.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 0ab48f1cdf84..6eb42d7a3dfd 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -23,6 +23,7 @@ #include #include #include +#include #ifdef CONFIG_X86_64 #include diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index 2caf5b990bf6..4eb8f2d19a87 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/tsc_msr.c b/arch/x86/kernel/tsc_msr.c index 4fec6f3a1858..46c72f2ec32f 100644 --- a/arch/x86/kernel/tsc_msr.c +++ b/arch/x86/kernel/tsc_msr.c @@ -7,6 +7,7 @@ */ #include +#include #include #include diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 8b4afad84f4a..d46a5cf6ccb0 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -52,6 +52,7 @@ #include #include #include +#include #include "mm_internal.h" diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index 2df7d089ad54..1aff4ae65655 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #include diff --git a/arch/x86/xen/smp_hvm.c b/arch/x86/xen/smp_hvm.c index f8d39440b292..f5e7db4f82ab 100644 --- a/arch/x86/xen/smp_hvm.c +++ b/arch/x86/xen/smp_hvm.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include diff --git a/arch/x86/xen/suspend_pv.c b/arch/x86/xen/suspend_pv.c index 8303b58c79a9..cae9660f4c67 100644 --- a/arch/x86/xen/suspend_pv.c +++ b/arch/x86/xen/suspend_pv.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 #include -#include - #include #include +#include + #include "xen-ops.h" void xen_pv_pre_suspend(void) diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h index 99fc06f0afc1..407c2f281b64 100644 --- a/include/linux/dynamic_queue_limits.h +++ b/include/linux/dynamic_queue_limits.h @@ -38,6 +38,8 @@ #ifdef __KERNEL__ +#include + struct dql { /* Fields accessed in enqueue path (dql_queued) */ unsigned int num_queued; /* Total ever queued */ diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 25993b86ac5c..107cedd7019a 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 42d2e6ac35f2..a12b5523cc18 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -23,6 +23,7 @@ #include #include +#include /* Nanosecond scalar representation for kernel time values */ typedef s64 ktime_t; diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 39a35699d0d6..62a382d1845b 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -11,6 +11,7 @@ #define __LINUX_LOCKDEP_H #include +#include #include struct task_struct; diff --git a/include/linux/mutex.h b/include/linux/mutex.h index ae197cc00cc8..dcd185cbfe79 100644 --- a/include/linux/mutex.h +++ b/include/linux/mutex.h @@ -65,6 +65,17 @@ struct mutex { #endif }; +struct ww_class; +struct ww_acquire_ctx; + +struct ww_mutex { + struct mutex base; + struct ww_acquire_ctx *ctx; +#ifdef CONFIG_DEBUG_MUTEXES + struct ww_class *ww_class; +#endif +}; + /* * This is the control structure for tasks blocked on mutex, * which resides on the blocked task's kernel stack: diff --git a/include/linux/sched.h b/include/linux/sched.h index 9a9d8263962d..7c7a9499d7bc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -31,6 +31,7 @@ #include #include #include +#include #include /* task_struct member predeclarations (sorted alphabetically): */ diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index a076f783aa36..962d9768945f 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -19,7 +19,6 @@ #include #include #include -#include #include diff --git a/include/linux/time.h b/include/linux/time.h index 4c325bf44ce0..b142cb5f5a53 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -3,7 +3,6 @@ #define _LINUX_TIME_H # include -# include # include # include diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h index 16c0ed6c50a7..219037f4c08d 100644 --- a/include/linux/videodev2.h +++ b/include/linux/videodev2.h @@ -57,6 +57,7 @@ #define __LINUX_VIDEODEV2_H #include /* need struct timeval */ +#include #include #endif /* __LINUX_VIDEODEV2_H */ diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index d7554252404c..850424e5d030 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -48,14 +48,6 @@ struct ww_acquire_ctx { #endif }; -struct ww_mutex { - struct mutex base; - struct ww_acquire_ctx *ctx; -#ifdef CONFIG_DEBUG_MUTEXES - struct ww_class *ww_class; -#endif -}; - #ifdef CONFIG_DEBUG_LOCK_ALLOC # define __WW_CLASS_MUTEX_INITIALIZER(lockname, class) \ , .ww_class = class -- cgit v1.2.3