From 0e862838f290147ea9c16db852d8d494b552d38d Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Fri, 24 Jun 2022 14:13:07 +0200 Subject: bitops: unify non-atomic bitops prototypes across architectures Currently, there is a mess with the prototypes of the non-atomic bitops across the different architectures: ret bool, int, unsigned long nr int, long, unsigned int, unsigned long addr volatile unsigned long *, volatile void * Thankfully, it doesn't provoke any bugs, but can sometimes make the compiler angry when it's not handy at all. Adjust all the prototypes to the following standard: ret bool retval can be only 0 or 1 nr unsigned long native; signed makes no sense addr volatile unsigned long * bitmaps are arrays of ulongs Next, some architectures don't define 'arch_' versions as they don't support instrumentation, others do. To make sure there is always the same set of callables present and to ease any potential future changes, make them all follow the rule: * architecture-specific files define only 'arch_' versions; * non-prefixed versions can be defined only in asm-generic files; and place the non-prefixed definitions into a new file in asm-generic to be included by non-instrumented architectures. Finally, add some static assertions in order to prevent people from making a mess in this room again. I also used the %__always_inline attribute consistently, so that they always get resolved to the actual operations. Suggested-by: Andy Shevchenko Signed-off-by: Alexander Lobakin Acked-by: Mark Rutland Reviewed-by: Yury Norov Reviewed-by: Andy Shevchenko Signed-off-by: Yury Norov --- include/linux/bitops.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux/bitops.h') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 7aaed501f768..87087454a288 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -26,12 +26,29 @@ extern unsigned int __sw_hweight16(unsigned int w); extern unsigned int __sw_hweight32(unsigned int w); extern unsigned long __sw_hweight64(__u64 w); +#include + /* * Include this here because some architectures need generic_ffs/fls in * scope */ #include +/* Check that the bitops prototypes are sane */ +#define __check_bitop_pr(name) \ + static_assert(__same_type(arch_##name, generic_##name) && \ + __same_type(name, generic_##name)) + +__check_bitop_pr(__set_bit); +__check_bitop_pr(__clear_bit); +__check_bitop_pr(__change_bit); +__check_bitop_pr(__test_and_set_bit); +__check_bitop_pr(__test_and_clear_bit); +__check_bitop_pr(__test_and_change_bit); +__check_bitop_pr(test_bit); + +#undef __check_bitop_pr + static inline int get_bitmask_order(unsigned int count) { int order; -- cgit v1.2.3 From bb7379bfa680bd48b468e856475778db2ad866c1 Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Fri, 24 Jun 2022 14:13:08 +0200 Subject: bitops: define const_*() versions of the non-atomics Define const_*() variants of the non-atomic bitops to be used when the input arguments are compile-time constants, so that the compiler will be always able to resolve those to compile-time constants as well. Those are mostly direct aliases for generic_*() with one exception for const_test_bit(): the original one is declared atomic-safe and thus doesn't discard the `volatile` qualifier, so in order to let optimize code, define it separately disregarding the qualifier. Add them to the compile-time type checks as well just in case. Suggested-by: Marco Elver Signed-off-by: Alexander Lobakin Reviewed-by: Marco Elver Reviewed-by: Andy Shevchenko Signed-off-by: Yury Norov --- include/asm-generic/bitops/generic-non-atomic.h | 31 +++++++++++++++++++++++++ include/linux/bitops.h | 1 + 2 files changed, 32 insertions(+) (limited to 'include/linux/bitops.h') diff --git a/include/asm-generic/bitops/generic-non-atomic.h b/include/asm-generic/bitops/generic-non-atomic.h index b85b8a2ac239..3d5ebd24652b 100644 --- a/include/asm-generic/bitops/generic-non-atomic.h +++ b/include/asm-generic/bitops/generic-non-atomic.h @@ -127,4 +127,35 @@ generic_test_bit(unsigned long nr, const volatile unsigned long *addr) return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +/* + * const_*() definitions provide good compile-time optimizations when + * the passed arguments can be resolved at compile time. + */ +#define const___set_bit generic___set_bit +#define const___clear_bit generic___clear_bit +#define const___change_bit generic___change_bit +#define const___test_and_set_bit generic___test_and_set_bit +#define const___test_and_clear_bit generic___test_and_clear_bit +#define const___test_and_change_bit generic___test_and_change_bit + +/** + * const_test_bit - Determine whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + * + * A version of generic_test_bit() which discards the `volatile` qualifier to + * allow a compiler to optimize code harder. Non-atomic and to be called only + * for testing compile-time constants, e.g. by the corresponding macros, not + * directly from "regular" code. + */ +static __always_inline bool +const_test_bit(unsigned long nr, const volatile unsigned long *addr) +{ + const unsigned long *p = (const unsigned long *)addr + BIT_WORD(nr); + unsigned long mask = BIT_MASK(nr); + unsigned long val = *p; + + return !!(val & mask); +} + #endif /* __ASM_GENERIC_BITOPS_GENERIC_NON_ATOMIC_H */ diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 87087454a288..d393297287d5 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -37,6 +37,7 @@ extern unsigned long __sw_hweight64(__u64 w); /* Check that the bitops prototypes are sane */ #define __check_bitop_pr(name) \ static_assert(__same_type(arch_##name, generic_##name) && \ + __same_type(const_##name, generic_##name) && \ __same_type(name, generic_##name)) __check_bitop_pr(__set_bit); -- cgit v1.2.3 From e69eb9c460f128b71c6b995d75a05244e4b6cc3e Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Fri, 24 Jun 2022 14:13:09 +0200 Subject: bitops: wrap non-atomic bitops with a transparent macro In preparation for altering the non-atomic bitops with a macro, wrap them in a transparent definition. This requires prepending one more '_' to their names in order to be able to do that seamlessly. It is a simple change, given that all the non-prefixed definitions are now in asm-generic. sparc32 already has several triple-underscored functions, so I had to rename them ('___' -> 'sp32_'). Signed-off-by: Alexander Lobakin Reviewed-by: Marco Elver Reviewed-by: Andy Shevchenko Signed-off-by: Yury Norov --- arch/sparc/include/asm/bitops_32.h | 18 +++++++------- arch/sparc/lib/atomic32.c | 12 +++++----- .../asm-generic/bitops/instrumented-non-atomic.h | 28 +++++++++++----------- .../bitops/non-instrumented-non-atomic.h | 14 +++++------ include/linux/bitops.h | 18 +++++++++++++- tools/include/asm-generic/bitops/non-atomic.h | 24 +++++++++---------- tools/include/linux/bitops.h | 16 +++++++++++++ 7 files changed, 81 insertions(+), 49 deletions(-) (limited to 'include/linux/bitops.h') diff --git a/arch/sparc/include/asm/bitops_32.h b/arch/sparc/include/asm/bitops_32.h index 889afa9f990f..3448c191b484 100644 --- a/arch/sparc/include/asm/bitops_32.h +++ b/arch/sparc/include/asm/bitops_32.h @@ -19,9 +19,9 @@ #error only can be included directly #endif -unsigned long ___set_bit(unsigned long *addr, unsigned long mask); -unsigned long ___clear_bit(unsigned long *addr, unsigned long mask); -unsigned long ___change_bit(unsigned long *addr, unsigned long mask); +unsigned long sp32___set_bit(unsigned long *addr, unsigned long mask); +unsigned long sp32___clear_bit(unsigned long *addr, unsigned long mask); +unsigned long sp32___change_bit(unsigned long *addr, unsigned long mask); /* * Set bit 'nr' in 32-bit quantity at address 'addr' where bit '0' @@ -36,7 +36,7 @@ static inline int test_and_set_bit(unsigned long nr, volatile unsigned long *add ADDR = ((unsigned long *) addr) + (nr >> 5); mask = 1 << (nr & 31); - return ___set_bit(ADDR, mask) != 0; + return sp32___set_bit(ADDR, mask) != 0; } static inline void set_bit(unsigned long nr, volatile unsigned long *addr) @@ -46,7 +46,7 @@ static inline void set_bit(unsigned long nr, volatile unsigned long *addr) ADDR = ((unsigned long *) addr) + (nr >> 5); mask = 1 << (nr & 31); - (void) ___set_bit(ADDR, mask); + (void) sp32___set_bit(ADDR, mask); } static inline int test_and_clear_bit(unsigned long nr, volatile unsigned long *addr) @@ -56,7 +56,7 @@ static inline int test_and_clear_bit(unsigned long nr, volatile unsigned long *a ADDR = ((unsigned long *) addr) + (nr >> 5); mask = 1 << (nr & 31); - return ___clear_bit(ADDR, mask) != 0; + return sp32___clear_bit(ADDR, mask) != 0; } static inline void clear_bit(unsigned long nr, volatile unsigned long *addr) @@ -66,7 +66,7 @@ static inline void clear_bit(unsigned long nr, volatile unsigned long *addr) ADDR = ((unsigned long *) addr) + (nr >> 5); mask = 1 << (nr & 31); - (void) ___clear_bit(ADDR, mask); + (void) sp32___clear_bit(ADDR, mask); } static inline int test_and_change_bit(unsigned long nr, volatile unsigned long *addr) @@ -76,7 +76,7 @@ static inline int test_and_change_bit(unsigned long nr, volatile unsigned long * ADDR = ((unsigned long *) addr) + (nr >> 5); mask = 1 << (nr & 31); - return ___change_bit(ADDR, mask) != 0; + return sp32___change_bit(ADDR, mask) != 0; } static inline void change_bit(unsigned long nr, volatile unsigned long *addr) @@ -86,7 +86,7 @@ static inline void change_bit(unsigned long nr, volatile unsigned long *addr) ADDR = ((unsigned long *) addr) + (nr >> 5); mask = 1 << (nr & 31); - (void) ___change_bit(ADDR, mask); + (void) sp32___change_bit(ADDR, mask); } #include diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c index 8b81d0f00c97..cf80d1ae352b 100644 --- a/arch/sparc/lib/atomic32.c +++ b/arch/sparc/lib/atomic32.c @@ -120,7 +120,7 @@ void arch_atomic_set(atomic_t *v, int i) } EXPORT_SYMBOL(arch_atomic_set); -unsigned long ___set_bit(unsigned long *addr, unsigned long mask) +unsigned long sp32___set_bit(unsigned long *addr, unsigned long mask) { unsigned long old, flags; @@ -131,9 +131,9 @@ unsigned long ___set_bit(unsigned long *addr, unsigned long mask) return old & mask; } -EXPORT_SYMBOL(___set_bit); +EXPORT_SYMBOL(sp32___set_bit); -unsigned long ___clear_bit(unsigned long *addr, unsigned long mask) +unsigned long sp32___clear_bit(unsigned long *addr, unsigned long mask) { unsigned long old, flags; @@ -144,9 +144,9 @@ unsigned long ___clear_bit(unsigned long *addr, unsigned long mask) return old & mask; } -EXPORT_SYMBOL(___clear_bit); +EXPORT_SYMBOL(sp32___clear_bit); -unsigned long ___change_bit(unsigned long *addr, unsigned long mask) +unsigned long sp32___change_bit(unsigned long *addr, unsigned long mask) { unsigned long old, flags; @@ -157,7 +157,7 @@ unsigned long ___change_bit(unsigned long *addr, unsigned long mask) return old & mask; } -EXPORT_SYMBOL(___change_bit); +EXPORT_SYMBOL(sp32___change_bit); unsigned long __cmpxchg_u32(volatile u32 *ptr, u32 old, u32 new) { diff --git a/include/asm-generic/bitops/instrumented-non-atomic.h b/include/asm-generic/bitops/instrumented-non-atomic.h index b019f77ef21c..988a3bbfba34 100644 --- a/include/asm-generic/bitops/instrumented-non-atomic.h +++ b/include/asm-generic/bitops/instrumented-non-atomic.h @@ -14,7 +14,7 @@ #include /** - * __set_bit - Set a bit in memory + * ___set_bit - Set a bit in memory * @nr: the bit to set * @addr: the address to start counting from * @@ -23,14 +23,14 @@ * succeeds. */ static __always_inline void -__set_bit(unsigned long nr, volatile unsigned long *addr) +___set_bit(unsigned long nr, volatile unsigned long *addr) { instrument_write(addr + BIT_WORD(nr), sizeof(long)); arch___set_bit(nr, addr); } /** - * __clear_bit - Clears a bit in memory + * ___clear_bit - Clears a bit in memory * @nr: the bit to clear * @addr: the address to start counting from * @@ -39,14 +39,14 @@ __set_bit(unsigned long nr, volatile unsigned long *addr) * succeeds. */ static __always_inline void -__clear_bit(unsigned long nr, volatile unsigned long *addr) +___clear_bit(unsigned long nr, volatile unsigned long *addr) { instrument_write(addr + BIT_WORD(nr), sizeof(long)); arch___clear_bit(nr, addr); } /** - * __change_bit - Toggle a bit in memory + * ___change_bit - Toggle a bit in memory * @nr: the bit to change * @addr: the address to start counting from * @@ -55,7 +55,7 @@ __clear_bit(unsigned long nr, volatile unsigned long *addr) * succeeds. */ static __always_inline void -__change_bit(unsigned long nr, volatile unsigned long *addr) +___change_bit(unsigned long nr, volatile unsigned long *addr) { instrument_write(addr + BIT_WORD(nr), sizeof(long)); arch___change_bit(nr, addr); @@ -86,7 +86,7 @@ static __always_inline void __instrument_read_write_bitop(long nr, volatile unsi } /** - * __test_and_set_bit - Set a bit and return its old value + * ___test_and_set_bit - Set a bit and return its old value * @nr: Bit to set * @addr: Address to count from * @@ -94,14 +94,14 @@ static __always_inline void __instrument_read_write_bitop(long nr, volatile unsi * can appear to succeed but actually fail. */ static __always_inline bool -__test_and_set_bit(unsigned long nr, volatile unsigned long *addr) +___test_and_set_bit(unsigned long nr, volatile unsigned long *addr) { __instrument_read_write_bitop(nr, addr); return arch___test_and_set_bit(nr, addr); } /** - * __test_and_clear_bit - Clear a bit and return its old value + * ___test_and_clear_bit - Clear a bit and return its old value * @nr: Bit to clear * @addr: Address to count from * @@ -109,14 +109,14 @@ __test_and_set_bit(unsigned long nr, volatile unsigned long *addr) * can appear to succeed but actually fail. */ static __always_inline bool -__test_and_clear_bit(unsigned long nr, volatile unsigned long *addr) +___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr) { __instrument_read_write_bitop(nr, addr); return arch___test_and_clear_bit(nr, addr); } /** - * __test_and_change_bit - Change a bit and return its old value + * ___test_and_change_bit - Change a bit and return its old value * @nr: Bit to change * @addr: Address to count from * @@ -124,19 +124,19 @@ __test_and_clear_bit(unsigned long nr, volatile unsigned long *addr) * can appear to succeed but actually fail. */ static __always_inline bool -__test_and_change_bit(unsigned long nr, volatile unsigned long *addr) +___test_and_change_bit(unsigned long nr, volatile unsigned long *addr) { __instrument_read_write_bitop(nr, addr); return arch___test_and_change_bit(nr, addr); } /** - * test_bit - Determine whether a bit is set + * _test_bit - Determine whether a bit is set * @nr: bit number to test * @addr: Address to start counting from */ static __always_inline bool -test_bit(unsigned long nr, const volatile unsigned long *addr) +_test_bit(unsigned long nr, const volatile unsigned long *addr) { instrument_atomic_read(addr + BIT_WORD(nr), sizeof(long)); return arch_test_bit(nr, addr); diff --git a/include/asm-generic/bitops/non-instrumented-non-atomic.h b/include/asm-generic/bitops/non-instrumented-non-atomic.h index e0fd7bf72a56..bdb9b1ffaee9 100644 --- a/include/asm-generic/bitops/non-instrumented-non-atomic.h +++ b/include/asm-generic/bitops/non-instrumented-non-atomic.h @@ -3,14 +3,14 @@ #ifndef __ASM_GENERIC_BITOPS_NON_INSTRUMENTED_NON_ATOMIC_H #define __ASM_GENERIC_BITOPS_NON_INSTRUMENTED_NON_ATOMIC_H -#define __set_bit arch___set_bit -#define __clear_bit arch___clear_bit -#define __change_bit arch___change_bit +#define ___set_bit arch___set_bit +#define ___clear_bit arch___clear_bit +#define ___change_bit arch___change_bit -#define __test_and_set_bit arch___test_and_set_bit -#define __test_and_clear_bit arch___test_and_clear_bit -#define __test_and_change_bit arch___test_and_change_bit +#define ___test_and_set_bit arch___test_and_set_bit +#define ___test_and_clear_bit arch___test_and_clear_bit +#define ___test_and_change_bit arch___test_and_change_bit -#define test_bit arch_test_bit +#define _test_bit arch_test_bit #endif /* __ASM_GENERIC_BITOPS_NON_INSTRUMENTED_NON_ATOMIC_H */ diff --git a/include/linux/bitops.h b/include/linux/bitops.h index d393297287d5..3c3afbae1533 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -26,8 +26,24 @@ extern unsigned int __sw_hweight16(unsigned int w); extern unsigned int __sw_hweight32(unsigned int w); extern unsigned long __sw_hweight64(__u64 w); +/* + * Defined here because those may be needed by architecture-specific static + * inlines. + */ + #include +#define bitop(op, nr, addr) \ + op(nr, addr) + +#define __set_bit(nr, addr) bitop(___set_bit, nr, addr) +#define __clear_bit(nr, addr) bitop(___clear_bit, nr, addr) +#define __change_bit(nr, addr) bitop(___change_bit, nr, addr) +#define __test_and_set_bit(nr, addr) bitop(___test_and_set_bit, nr, addr) +#define __test_and_clear_bit(nr, addr) bitop(___test_and_clear_bit, nr, addr) +#define __test_and_change_bit(nr, addr) bitop(___test_and_change_bit, nr, addr) +#define test_bit(nr, addr) bitop(_test_bit, nr, addr) + /* * Include this here because some architectures need generic_ffs/fls in * scope @@ -38,7 +54,7 @@ extern unsigned long __sw_hweight64(__u64 w); #define __check_bitop_pr(name) \ static_assert(__same_type(arch_##name, generic_##name) && \ __same_type(const_##name, generic_##name) && \ - __same_type(name, generic_##name)) + __same_type(_##name, generic_##name)) __check_bitop_pr(__set_bit); __check_bitop_pr(__clear_bit); diff --git a/tools/include/asm-generic/bitops/non-atomic.h b/tools/include/asm-generic/bitops/non-atomic.h index e5e78e42e57b..0c472a833408 100644 --- a/tools/include/asm-generic/bitops/non-atomic.h +++ b/tools/include/asm-generic/bitops/non-atomic.h @@ -5,7 +5,7 @@ #include /** - * __set_bit - Set a bit in memory + * ___set_bit - Set a bit in memory * @nr: the bit to set * @addr: the address to start counting from * @@ -14,7 +14,7 @@ * may be that only one operation succeeds. */ static __always_inline void -__set_bit(unsigned long nr, volatile unsigned long *addr) +___set_bit(unsigned long nr, volatile unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); @@ -23,7 +23,7 @@ __set_bit(unsigned long nr, volatile unsigned long *addr) } static __always_inline void -__clear_bit(unsigned long nr, volatile unsigned long *addr) +___clear_bit(unsigned long nr, volatile unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); @@ -32,7 +32,7 @@ __clear_bit(unsigned long nr, volatile unsigned long *addr) } /** - * __change_bit - Toggle a bit in memory + * ___change_bit - Toggle a bit in memory * @nr: the bit to change * @addr: the address to start counting from * @@ -41,7 +41,7 @@ __clear_bit(unsigned long nr, volatile unsigned long *addr) * may be that only one operation succeeds. */ static __always_inline void -__change_bit(unsigned long nr, volatile unsigned long *addr) +___change_bit(unsigned long nr, volatile unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); @@ -50,7 +50,7 @@ __change_bit(unsigned long nr, volatile unsigned long *addr) } /** - * __test_and_set_bit - Set a bit and return its old value + * ___test_and_set_bit - Set a bit and return its old value * @nr: Bit to set * @addr: Address to count from * @@ -59,7 +59,7 @@ __change_bit(unsigned long nr, volatile unsigned long *addr) * but actually fail. You must protect multiple accesses with a lock. */ static __always_inline bool -__test_and_set_bit(unsigned long nr, volatile unsigned long *addr) +___test_and_set_bit(unsigned long nr, volatile unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); @@ -70,7 +70,7 @@ __test_and_set_bit(unsigned long nr, volatile unsigned long *addr) } /** - * __test_and_clear_bit - Clear a bit and return its old value + * ___test_and_clear_bit - Clear a bit and return its old value * @nr: Bit to clear * @addr: Address to count from * @@ -79,7 +79,7 @@ __test_and_set_bit(unsigned long nr, volatile unsigned long *addr) * but actually fail. You must protect multiple accesses with a lock. */ static __always_inline bool -__test_and_clear_bit(unsigned long nr, volatile unsigned long *addr) +___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); @@ -91,7 +91,7 @@ __test_and_clear_bit(unsigned long nr, volatile unsigned long *addr) /* WARNING: non atomic and it can be reordered! */ static __always_inline bool -__test_and_change_bit(unsigned long nr, volatile unsigned long *addr) +___test_and_change_bit(unsigned long nr, volatile unsigned long *addr) { unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); @@ -102,12 +102,12 @@ __test_and_change_bit(unsigned long nr, volatile unsigned long *addr) } /** - * test_bit - Determine whether a bit is set + * _test_bit - Determine whether a bit is set * @nr: bit number to test * @addr: Address to start counting from */ static __always_inline bool -test_bit(unsigned long nr, const volatile unsigned long *addr) +_test_bit(unsigned long nr, const volatile unsigned long *addr) { return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } diff --git a/tools/include/linux/bitops.h b/tools/include/linux/bitops.h index 5fca38fe1ba8..f18683b95ea6 100644 --- a/tools/include/linux/bitops.h +++ b/tools/include/linux/bitops.h @@ -25,6 +25,22 @@ extern unsigned int __sw_hweight16(unsigned int w); extern unsigned int __sw_hweight32(unsigned int w); extern unsigned long __sw_hweight64(__u64 w); +/* + * Defined here because those may be needed by architecture-specific static + * inlines. + */ + +#define bitop(op, nr, addr) \ + op(nr, addr) + +#define __set_bit(nr, addr) bitop(___set_bit, nr, addr) +#define __clear_bit(nr, addr) bitop(___clear_bit, nr, addr) +#define __change_bit(nr, addr) bitop(___change_bit, nr, addr) +#define __test_and_set_bit(nr, addr) bitop(___test_and_set_bit, nr, addr) +#define __test_and_clear_bit(nr, addr) bitop(___test_and_clear_bit, nr, addr) +#define __test_and_change_bit(nr, addr) bitop(___test_and_change_bit, nr, addr) +#define test_bit(nr, addr) bitop(_test_bit, nr, addr) + /* * Include this here because some architectures need generic_ffs/fls in * scope -- cgit v1.2.3 From b03fc1173c0c2bb8fad61902a862985cecdc4b1b Mon Sep 17 00:00:00 2001 From: Alexander Lobakin Date: Fri, 24 Jun 2022 14:13:10 +0200 Subject: bitops: let optimize out non-atomic bitops on compile-time constants Currently, many architecture-specific non-atomic bitop implementations use inline asm or other hacks which are faster or more robust when working with "real" variables (i.e. fields from the structures etc.), but the compilers have no clue how to optimize them out when called on compile-time constants. That said, the following code: DECLARE_BITMAP(foo, BITS_PER_LONG) = { }; // -> unsigned long foo[1]; unsigned long bar = BIT(BAR_BIT); unsigned long baz = 0; __set_bit(FOO_BIT, foo); baz |= BIT(BAZ_BIT); BUILD_BUG_ON(!__builtin_constant_p(test_bit(FOO_BIT, foo)); BUILD_BUG_ON(!__builtin_constant_p(bar & BAR_BIT)); BUILD_BUG_ON(!__builtin_constant_p(baz & BAZ_BIT)); triggers the first assertion on x86_64, which means that the compiler is unable to evaluate it to a compile-time initializer when the architecture-specific bitop is used even if it's obvious. In order to let the compiler optimize out such cases, expand the bitop() macro to use the "constant" C non-atomic bitop implementations when all of the arguments passed are compile-time constants, which means that the result will be a compile-time constant as well, so that it produces more efficient and simple code in 100% cases, comparing to the architecture-specific counterparts. The savings are architecture, compiler and compiler flags dependent, for example, on x86_64 -O2: GCC 12: add/remove: 78/29 grow/shrink: 332/525 up/down: 31325/-61560 (-30235) LLVM 13: add/remove: 79/76 grow/shrink: 184/537 up/down: 55076/-141892 (-86816) LLVM 14: add/remove: 10/3 grow/shrink: 93/138 up/down: 3705/-6992 (-3287) and ARM64 (courtesy of Mark): GCC 11: add/remove: 92/29 grow/shrink: 933/2766 up/down: 39340/-82580 (-43240) LLVM 14: add/remove: 21/11 grow/shrink: 620/651 up/down: 12060/-15824 (-3764) Cc: Mark Rutland Signed-off-by: Alexander Lobakin Reviewed-by: Marco Elver Signed-off-by: Yury Norov --- include/linux/bitops.h | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'include/linux/bitops.h') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 3c3afbae1533..cf9bf65039f2 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -33,8 +33,24 @@ extern unsigned long __sw_hweight64(__u64 w); #include +/* + * Many architecture-specific non-atomic bitops contain inline asm code and due + * to that the compiler can't optimize them to compile-time expressions or + * constants. In contrary, generic_*() helpers are defined in pure C and + * compilers optimize them just well. + * Therefore, to make `unsigned long foo = 0; __set_bit(BAR, &foo)` effectively + * equal to `unsigned long foo = BIT(BAR)`, pick the generic C alternative when + * the arguments can be resolved at compile time. That expression itself is a + * constant and doesn't bring any functional changes to the rest of cases. + * The casts to `uintptr_t` are needed to mitigate `-Waddress` warnings when + * passing a bitmap from .bss or .data (-> `!!addr` is always true). + */ #define bitop(op, nr, addr) \ - op(nr, addr) + ((__builtin_constant_p(nr) && \ + __builtin_constant_p((uintptr_t)(addr) != (uintptr_t)NULL) && \ + (uintptr_t)(addr) != (uintptr_t)NULL && \ + __builtin_constant_p(*(const unsigned long *)(addr))) ? \ + const##op(nr, addr) : op(nr, addr)) #define __set_bit(nr, addr) bitop(___set_bit, nr, addr) #define __clear_bit(nr, addr) bitop(___clear_bit, nr, addr) -- cgit v1.2.3