diff options
Diffstat (limited to 'arch/x86/include/asm/div64.h')
| -rw-r--r-- | arch/x86/include/asm/div64.h | 39 |
1 files changed, 31 insertions, 8 deletions
diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h index 9931e4c7d73f..30fd06ede751 100644 --- a/arch/x86/include/asm/div64.h +++ b/arch/x86/include/asm/div64.h @@ -60,6 +60,12 @@ static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) } #define div_u64_rem div_u64_rem +/* + * gcc tends to zero extend 32bit values and do full 64bit maths. + * Define asm functions that avoid this. + * (clang generates better code for the C versions.) + */ +#ifndef __clang__ static inline u64 mul_u32_u32(u32 a, u32 b) { u32 high, low; @@ -71,6 +77,19 @@ static inline u64 mul_u32_u32(u32 a, u32 b) } #define mul_u32_u32 mul_u32_u32 +static inline u64 add_u64_u32(u64 a, u32 b) +{ + u32 high = a >> 32, low = a; + + asm ("addl %[b], %[low]; adcl $0, %[high]" + : [low] "+r" (low), [high] "+r" (high) + : [b] "rm" (b) ); + + return low | (u64)high << 32; +} +#define add_u64_u32 add_u64_u32 +#endif + /* * __div64_32() is never called on x86, so prevent the * generic definition from getting built. @@ -84,21 +103,25 @@ static inline u64 mul_u32_u32(u32 a, u32 b) * Will generate an #DE when the result doesn't fit u64, could fix with an * __ex_table[] entry when it becomes an issue. */ -static inline u64 mul_u64_u64_div_u64(u64 a, u64 mul, u64 div) +static inline u64 mul_u64_add_u64_div_u64(u64 rax, u64 mul, u64 add, u64 div) { - u64 q; + u64 rdx; + + asm ("mulq %[mul]" : "+a" (rax), "=d" (rdx) : [mul] "rm" (mul)); + + if (!statically_true(!add)) + asm ("addq %[add], %[lo]; adcq $0, %[hi]" : + [lo] "+r" (rax), [hi] "+r" (rdx) : [add] "irm" (add)); - asm ("mulq %2; divq %3" : "=a" (q) - : "a" (a), "rm" (mul), "rm" (div) - : "rdx"); + asm ("divq %[div]" : "+a" (rax), "+d" (rdx) : [div] "rm" (div)); - return q; + return rax; } -#define mul_u64_u64_div_u64 mul_u64_u64_div_u64 +#define mul_u64_add_u64_div_u64 mul_u64_add_u64_div_u64 static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 div) { - return mul_u64_u64_div_u64(a, mul, div); + return mul_u64_add_u64_div_u64(a, mul, 0, div); } #define mul_u64_u32_div mul_u64_u32_div |
