diff options
| author | Jeff Epler <jepler@gmail.com> | 2025-07-23 16:14:22 -0500 |
|---|---|---|
| committer | Damien George <damien@micropython.org> | 2025-10-01 09:23:05 +1000 |
| commit | a80913292153a14424b29bdb9ca8847e8d35cf73 (patch) | |
| tree | 9d073ab5c9c1773d30daa89d3062c3ee1bbed380 | |
| parent | 3dd8073c290c077f17ffdee17a019763ad82604d (diff) | |
py: Add MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC.
Most MCUs apart from Cortex-M0 with Thumb 1 have an instruction
for computing the "high part" of a multiplication (e.g., the upper
32 bits of a 32x32 multiply).
When they do, gcc uses this to implement a small and fast
overflow check using the __builtin_mul_overflow intrinsic, which
is preferable to the guard division method previously used in smallint.c.
However, in contrast to the previous mp_small_int_mul_overflow
routine, which checks that the result fits not only within mp_int_t
but is SMALL_INT_FITS(), __builtin_mul_overflow only checks for
overflow of the C type. As a result, a slight change in the code
flow is needed for MP_BINARY_OP_MULTIPLY.
Other sites using mp_small_int_mul_overflow already had the
result value flow through to a SMALL_INT_FITS check so they didn't
need any additional changes.
Do similarly for the _ll and _ull multiply overflows checks.
Signed-off-by: Jeff Epler <jepler@gmail.com>
| -rw-r--r-- | py/misc.h | 49 | ||||
| -rw-r--r-- | py/mpconfig.h | 19 | ||||
| -rw-r--r-- | py/parsenum.c | 3 | ||||
| -rw-r--r-- | py/runtime.c | 27 | ||||
| -rw-r--r-- | py/runtime_utils.c | 60 | ||||
| -rw-r--r-- | py/smallint.c | 29 | ||||
| -rw-r--r-- | py/smallint.h | 4 |
7 files changed, 105 insertions, 86 deletions
@@ -35,7 +35,11 @@ #include <stdbool.h> #include <stdint.h> #include <stddef.h> +#if __cplusplus // Required on at least one compiler to get ULLONG_MAX +#include <climits> +#else #include <limits.h> +#endif typedef unsigned char byte; typedef unsigned int uint; @@ -454,7 +458,7 @@ static inline uint32_t mp_clz_mpi(mp_int_t x) { #endif } -// Overflow-checked operations for long long +// Overflow-checked operations // Integer overflow builtins were added to GCC 5, but __has_builtin only in GCC 10 // @@ -462,45 +466,28 @@ static inline uint32_t mp_clz_mpi(mp_int_t x) { // functions below don't update the result if an overflow would occur (to avoid UB). #define MP_GCC_HAS_BUILTIN_OVERFLOW (__GNUC__ >= 5) -#if __has_builtin(__builtin_umulll_overflow) || MP_GCC_HAS_BUILTIN_OVERFLOW +#if MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC + #define mp_mul_ull_overflow __builtin_umulll_overflow +#define mp_mul_ll_overflow __builtin_smulll_overflow +static inline bool mp_mul_mp_int_t_overflow(mp_int_t x, mp_int_t y, mp_int_t *res) { + // __builtin_mul_overflow is a type-generic function, this inline ensures the argument + // types are checked to match mp_int_t. + return __builtin_mul_overflow(x, y, res); +} + #else -inline static bool mp_mul_ull_overflow(unsigned long long int x, unsigned long long int y, unsigned long long int *res) { + +bool mp_mul_ll_overflow(long long int x, long long int y, long long int *res); +bool mp_mul_mp_int_t_overflow(mp_int_t x, mp_int_t y, mp_int_t *res); +static inline bool mp_mul_ull_overflow(unsigned long long int x, unsigned long long int y, unsigned long long int *res) { if (y > 0 && x > (ULLONG_MAX / y)) { return true; // overflow } *res = x * y; return false; } -#endif - -#if __has_builtin(__builtin_smulll_overflow) || MP_GCC_HAS_BUILTIN_OVERFLOW -#define mp_mul_ll_overflow __builtin_smulll_overflow -#else -inline static bool mp_mul_ll_overflow(long long int x, long long int y, long long int *res) { - bool overflow; - // Check for multiply overflow; see CERT INT32-C - if (x > 0) { // x is positive - if (y > 0) { // x and y are positive - overflow = (x > (LLONG_MAX / y)); - } else { // x positive, y nonpositive - overflow = (y < (LLONG_MIN / x)); - } // x positive, y nonpositive - } else { // x is nonpositive - if (y > 0) { // x is nonpositive, y is positive - overflow = (x < (LLONG_MIN / y)); - } else { // x and y are nonpositive - overflow = (x != 0 && y < (LLONG_MAX / x)); - } // End if x and y are nonpositive - } // End if x is nonpositive - - if (!overflow) { - *res = x * y; - } - - return overflow; -} #endif #if __has_builtin(__builtin_saddll_overflow) || MP_GCC_HAS_BUILTIN_OVERFLOW diff --git a/py/mpconfig.h b/py/mpconfig.h index f4b7c1058..45178034a 100644 --- a/py/mpconfig.h +++ b/py/mpconfig.h @@ -2336,4 +2336,23 @@ typedef time_t mp_timestamp_t; #define MP_WARN_CAT(x) (NULL) #endif +// If true, use __builtin_mul_overflow (a gcc intrinsic supported by clang) for +// overflow checking when multiplying two small ints. Otherwise, use a portable +// algorithm. +// +// Most MCUs have a 32x32->64 bit multiply instruction, in which case the +// intrinsic is likely to be faster and generate smaller code. The main exception is +// cortex-m0 with __ARM_ARCH_ISA_THUMB == 1. +// +// The intrinsic is in GCC starting with version 5. +#ifndef MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC +#if defined(__ARM_ARCH_ISA_THUMB) && (__GNUC__ >= 5) +#define MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC (__ARM_ARCH_ISA_THUMB >= 2) +#elif (__GNUC__ >= 5) +#define MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC (1) +#else +#define MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC (0) +#endif +#endif + #endif // MICROPY_INCLUDED_PY_MPCONFIG_H diff --git a/py/parsenum.c b/py/parsenum.c index 4239f2dcd..05bcc0f0a 100644 --- a/py/parsenum.c +++ b/py/parsenum.c @@ -28,6 +28,7 @@ #include <stdlib.h> #include "py/runtime.h" +#include "py/misc.h" #include "py/parsenumbase.h" #include "py/parsenum.h" #include "py/smallint.h" @@ -52,7 +53,7 @@ static MP_NORETURN void raise_exc(mp_obj_t exc, mp_lexer_t *lex) { // to bigint parsing if supported) typedef mp_int_t parsed_int_t; -#define PARSED_INT_MUL_OVERFLOW mp_small_int_mul_overflow +#define PARSED_INT_MUL_OVERFLOW mp_mul_mp_int_t_overflow #define PARSED_INT_FITS MP_SMALL_INT_FITS #else // In the special case where bigint support is long long, we save code size by diff --git a/py/runtime.c b/py/runtime.c index 61aeb83f8..58d5732be 100644 --- a/py/runtime.c +++ b/py/runtime.c @@ -430,7 +430,7 @@ mp_obj_t MICROPY_WRAP_MP_BINARY_OP(mp_binary_op)(mp_binary_op_t op, mp_obj_t lhs // Operations that can overflow: // + result always fits in mp_int_t, then handled by SMALL_INT check // - result always fits in mp_int_t, then handled by SMALL_INT check - // * checked explicitly + // * checked explicitly for fit in mp_int_t, then handled by SMALL_INT check // / if lhs=MIN and rhs=-1; result always fits in mp_int_t, then handled by SMALL_INT check // % if lhs=MIN and rhs=-1; result always fits in mp_int_t, then handled by SMALL_INT check // << checked explicitly @@ -489,31 +489,16 @@ mp_obj_t MICROPY_WRAP_MP_BINARY_OP(mp_binary_op)(mp_binary_op_t op, mp_obj_t lhs break; case MP_BINARY_OP_MULTIPLY: case MP_BINARY_OP_INPLACE_MULTIPLY: { - - // If long long type exists and is larger than mp_int_t, then - // we can use the following code to perform overflow-checked multiplication. - // Otherwise (eg in x64 case) we must use mp_small_int_mul_overflow. - #if 0 - // compute result using long long precision - long long res = (long long)lhs_val * (long long)rhs_val; - if (res > MP_SMALL_INT_MAX || res < MP_SMALL_INT_MIN) { - // result overflowed SMALL_INT, so return higher precision integer - return mp_obj_new_int_from_ll(res); - } else { - // use standard precision - lhs_val = (mp_int_t)res; - } - #endif - mp_int_t int_res; - if (mp_small_int_mul_overflow(lhs_val, rhs_val, &int_res)) { + if (mp_mul_mp_int_t_overflow(lhs_val, rhs_val, &int_res)) { // use higher precision lhs = mp_obj_new_int_from_ll(lhs_val); goto generic_binary_op; } else { // use standard precision - return MP_OBJ_NEW_SMALL_INT(int_res); + lhs_val = int_res; } + break; } case MP_BINARY_OP_FLOOR_DIVIDE: case MP_BINARY_OP_INPLACE_FLOOR_DIVIDE: @@ -553,7 +538,7 @@ mp_obj_t MICROPY_WRAP_MP_BINARY_OP(mp_binary_op)(mp_binary_op_t op, mp_obj_t lhs mp_int_t ans = 1; while (rhs_val > 0) { if (rhs_val & 1) { - if (mp_small_int_mul_overflow(ans, lhs_val, &ans)) { + if (mp_mul_mp_int_t_overflow(ans, lhs_val, &ans)) { goto power_overflow; } } @@ -562,7 +547,7 @@ mp_obj_t MICROPY_WRAP_MP_BINARY_OP(mp_binary_op)(mp_binary_op_t op, mp_obj_t lhs } rhs_val /= 2; mp_int_t int_res; - if (mp_small_int_mul_overflow(lhs_val, lhs_val, &int_res)) { + if (mp_mul_mp_int_t_overflow(lhs_val, lhs_val, &int_res)) { goto power_overflow; } lhs_val = int_res; diff --git a/py/runtime_utils.c b/py/runtime_utils.c index b92c6bd76..50526b210 100644 --- a/py/runtime_utils.c +++ b/py/runtime_utils.c @@ -50,3 +50,63 @@ mp_obj_t mp_call_function_2_protected(mp_obj_t fun, mp_obj_t arg1, mp_obj_t arg2 return MP_OBJ_NULL; } } + +#if !MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC +bool mp_mul_ll_overflow(long long int x, long long int y, long long int *res) { + bool overflow; + + // Check for multiply overflow; see CERT INT32-C + if (x > 0) { // x is positive + if (y > 0) { // x and y are positive + overflow = (x > (LLONG_MAX / y)); + } else { // x positive, y nonpositive + overflow = (y < (LLONG_MIN / x)); + } // x positive, y nonpositive + } else { // x is nonpositive + if (y > 0) { // x is nonpositive, y is positive + overflow = (x < (LLONG_MIN / y)); + } else { // x and y are nonpositive + overflow = (x != 0 && y < (LLONG_MAX / x)); + } // End if x and y are nonpositive + } // End if x is nonpositive + + if (!overflow) { + *res = x * y; + } + + return overflow; +} + +#define MP_UINT_MAX (~(mp_uint_t)0) +#define MP_INT_MAX ((mp_int_t)(MP_UINT_MAX >> 1)) +#define MP_INT_MIN (-MP_INT_MAX - 1) + +bool mp_mul_mp_int_t_overflow(mp_int_t x, mp_int_t y, mp_int_t *res) { + // Check for multiply overflow; see CERT INT32-C + if (x > 0) { // x is positive + if (y > 0) { // x and y are positive + if (x > (MP_INT_MAX / y)) { + return true; + } + } else { // x positive, y nonpositive + if (y < (MP_INT_MIN / x)) { + return true; + } + } // x positive, y nonpositive + } else { // x is nonpositive + if (y > 0) { // x is nonpositive, y is positive + if (x < (MP_INT_MIN / y)) { + return true; + } + } else { // x and y are nonpositive + if (x != 0 && y < (MP_INT_MAX / x)) { + return true; + } + } // End if x and y are nonpositive + } // End if x is nonpositive + + // Result doesn't overflow + *res = x * y; + return false; +} +#endif diff --git a/py/smallint.c b/py/smallint.c index a494093d6..eb99b5866 100644 --- a/py/smallint.c +++ b/py/smallint.c @@ -26,35 +26,6 @@ #include "py/smallint.h" -bool mp_small_int_mul_overflow(mp_int_t x, mp_int_t y, mp_int_t *res) { - // Check for multiply overflow; see CERT INT32-C - if (x > 0) { // x is positive - if (y > 0) { // x and y are positive - if (x > (MP_SMALL_INT_MAX / y)) { - return true; - } - } else { // x positive, y nonpositive - if (y < (MP_SMALL_INT_MIN / x)) { - return true; - } - } // x positive, y nonpositive - } else { // x is nonpositive - if (y > 0) { // x is nonpositive, y is positive - if (x < (MP_SMALL_INT_MIN / y)) { - return true; - } - } else { // x and y are nonpositive - if (x != 0 && y < (MP_SMALL_INT_MAX / x)) { - return true; - } - } // End if x and y are nonpositive - } // End if x is nonpositive - - // Result doesn't overflow - *res = x * y; - return false; -} - mp_int_t mp_small_int_modulo(mp_int_t dividend, mp_int_t divisor) { // Python specs require that mod has same sign as second operand dividend %= divisor; diff --git a/py/smallint.h b/py/smallint.h index e50f98651..ec5b0af3b 100644 --- a/py/smallint.h +++ b/py/smallint.h @@ -68,10 +68,6 @@ // The number of bits in a MP_SMALL_INT including the sign bit. #define MP_SMALL_INT_BITS (MP_IMAX_BITS(MP_SMALL_INT_MAX) + 1) -// Multiply two small ints. -// If returns false, the correct result is stored in 'res' -// If returns true, the multiplication would have overflowed. 'res' is unchanged. -bool mp_small_int_mul_overflow(mp_int_t x, mp_int_t y, mp_int_t *res); mp_int_t mp_small_int_modulo(mp_int_t dividend, mp_int_t divisor); mp_int_t mp_small_int_floor_divide(mp_int_t num, mp_int_t denom); |
