summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJeff Epler <jepler@gmail.com>2025-07-23 16:14:22 -0500
committerDamien George <damien@micropython.org>2025-10-01 09:23:05 +1000
commita80913292153a14424b29bdb9ca8847e8d35cf73 (patch)
tree9d073ab5c9c1773d30daa89d3062c3ee1bbed380
parent3dd8073c290c077f17ffdee17a019763ad82604d (diff)
py: Add MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC.
Most MCUs apart from Cortex-M0 with Thumb 1 have an instruction for computing the "high part" of a multiplication (e.g., the upper 32 bits of a 32x32 multiply). When they do, gcc uses this to implement a small and fast overflow check using the __builtin_mul_overflow intrinsic, which is preferable to the guard division method previously used in smallint.c. However, in contrast to the previous mp_small_int_mul_overflow routine, which checks that the result fits not only within mp_int_t but is SMALL_INT_FITS(), __builtin_mul_overflow only checks for overflow of the C type. As a result, a slight change in the code flow is needed for MP_BINARY_OP_MULTIPLY. Other sites using mp_small_int_mul_overflow already had the result value flow through to a SMALL_INT_FITS check so they didn't need any additional changes. Do similarly for the _ll and _ull multiply overflows checks. Signed-off-by: Jeff Epler <jepler@gmail.com>
-rw-r--r--py/misc.h49
-rw-r--r--py/mpconfig.h19
-rw-r--r--py/parsenum.c3
-rw-r--r--py/runtime.c27
-rw-r--r--py/runtime_utils.c60
-rw-r--r--py/smallint.c29
-rw-r--r--py/smallint.h4
7 files changed, 105 insertions, 86 deletions
diff --git a/py/misc.h b/py/misc.h
index 081163cad..ac5e8fb0e 100644
--- a/py/misc.h
+++ b/py/misc.h
@@ -35,7 +35,11 @@
#include <stdbool.h>
#include <stdint.h>
#include <stddef.h>
+#if __cplusplus // Required on at least one compiler to get ULLONG_MAX
+#include <climits>
+#else
#include <limits.h>
+#endif
typedef unsigned char byte;
typedef unsigned int uint;
@@ -454,7 +458,7 @@ static inline uint32_t mp_clz_mpi(mp_int_t x) {
#endif
}
-// Overflow-checked operations for long long
+// Overflow-checked operations
// Integer overflow builtins were added to GCC 5, but __has_builtin only in GCC 10
//
@@ -462,45 +466,28 @@ static inline uint32_t mp_clz_mpi(mp_int_t x) {
// functions below don't update the result if an overflow would occur (to avoid UB).
#define MP_GCC_HAS_BUILTIN_OVERFLOW (__GNUC__ >= 5)
-#if __has_builtin(__builtin_umulll_overflow) || MP_GCC_HAS_BUILTIN_OVERFLOW
+#if MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC
+
#define mp_mul_ull_overflow __builtin_umulll_overflow
+#define mp_mul_ll_overflow __builtin_smulll_overflow
+static inline bool mp_mul_mp_int_t_overflow(mp_int_t x, mp_int_t y, mp_int_t *res) {
+ // __builtin_mul_overflow is a type-generic function, this inline ensures the argument
+ // types are checked to match mp_int_t.
+ return __builtin_mul_overflow(x, y, res);
+}
+
#else
-inline static bool mp_mul_ull_overflow(unsigned long long int x, unsigned long long int y, unsigned long long int *res) {
+
+bool mp_mul_ll_overflow(long long int x, long long int y, long long int *res);
+bool mp_mul_mp_int_t_overflow(mp_int_t x, mp_int_t y, mp_int_t *res);
+static inline bool mp_mul_ull_overflow(unsigned long long int x, unsigned long long int y, unsigned long long int *res) {
if (y > 0 && x > (ULLONG_MAX / y)) {
return true; // overflow
}
*res = x * y;
return false;
}
-#endif
-
-#if __has_builtin(__builtin_smulll_overflow) || MP_GCC_HAS_BUILTIN_OVERFLOW
-#define mp_mul_ll_overflow __builtin_smulll_overflow
-#else
-inline static bool mp_mul_ll_overflow(long long int x, long long int y, long long int *res) {
- bool overflow;
- // Check for multiply overflow; see CERT INT32-C
- if (x > 0) { // x is positive
- if (y > 0) { // x and y are positive
- overflow = (x > (LLONG_MAX / y));
- } else { // x positive, y nonpositive
- overflow = (y < (LLONG_MIN / x));
- } // x positive, y nonpositive
- } else { // x is nonpositive
- if (y > 0) { // x is nonpositive, y is positive
- overflow = (x < (LLONG_MIN / y));
- } else { // x and y are nonpositive
- overflow = (x != 0 && y < (LLONG_MAX / x));
- } // End if x and y are nonpositive
- } // End if x is nonpositive
-
- if (!overflow) {
- *res = x * y;
- }
-
- return overflow;
-}
#endif
#if __has_builtin(__builtin_saddll_overflow) || MP_GCC_HAS_BUILTIN_OVERFLOW
diff --git a/py/mpconfig.h b/py/mpconfig.h
index f4b7c1058..45178034a 100644
--- a/py/mpconfig.h
+++ b/py/mpconfig.h
@@ -2336,4 +2336,23 @@ typedef time_t mp_timestamp_t;
#define MP_WARN_CAT(x) (NULL)
#endif
+// If true, use __builtin_mul_overflow (a gcc intrinsic supported by clang) for
+// overflow checking when multiplying two small ints. Otherwise, use a portable
+// algorithm.
+//
+// Most MCUs have a 32x32->64 bit multiply instruction, in which case the
+// intrinsic is likely to be faster and generate smaller code. The main exception is
+// cortex-m0 with __ARM_ARCH_ISA_THUMB == 1.
+//
+// The intrinsic is in GCC starting with version 5.
+#ifndef MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC
+#if defined(__ARM_ARCH_ISA_THUMB) && (__GNUC__ >= 5)
+#define MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC (__ARM_ARCH_ISA_THUMB >= 2)
+#elif (__GNUC__ >= 5)
+#define MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC (1)
+#else
+#define MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC (0)
+#endif
+#endif
+
#endif // MICROPY_INCLUDED_PY_MPCONFIG_H
diff --git a/py/parsenum.c b/py/parsenum.c
index 4239f2dcd..05bcc0f0a 100644
--- a/py/parsenum.c
+++ b/py/parsenum.c
@@ -28,6 +28,7 @@
#include <stdlib.h>
#include "py/runtime.h"
+#include "py/misc.h"
#include "py/parsenumbase.h"
#include "py/parsenum.h"
#include "py/smallint.h"
@@ -52,7 +53,7 @@ static MP_NORETURN void raise_exc(mp_obj_t exc, mp_lexer_t *lex) {
// to bigint parsing if supported)
typedef mp_int_t parsed_int_t;
-#define PARSED_INT_MUL_OVERFLOW mp_small_int_mul_overflow
+#define PARSED_INT_MUL_OVERFLOW mp_mul_mp_int_t_overflow
#define PARSED_INT_FITS MP_SMALL_INT_FITS
#else
// In the special case where bigint support is long long, we save code size by
diff --git a/py/runtime.c b/py/runtime.c
index 61aeb83f8..58d5732be 100644
--- a/py/runtime.c
+++ b/py/runtime.c
@@ -430,7 +430,7 @@ mp_obj_t MICROPY_WRAP_MP_BINARY_OP(mp_binary_op)(mp_binary_op_t op, mp_obj_t lhs
// Operations that can overflow:
// + result always fits in mp_int_t, then handled by SMALL_INT check
// - result always fits in mp_int_t, then handled by SMALL_INT check
- // * checked explicitly
+ // * checked explicitly for fit in mp_int_t, then handled by SMALL_INT check
// / if lhs=MIN and rhs=-1; result always fits in mp_int_t, then handled by SMALL_INT check
// % if lhs=MIN and rhs=-1; result always fits in mp_int_t, then handled by SMALL_INT check
// << checked explicitly
@@ -489,31 +489,16 @@ mp_obj_t MICROPY_WRAP_MP_BINARY_OP(mp_binary_op)(mp_binary_op_t op, mp_obj_t lhs
break;
case MP_BINARY_OP_MULTIPLY:
case MP_BINARY_OP_INPLACE_MULTIPLY: {
-
- // If long long type exists and is larger than mp_int_t, then
- // we can use the following code to perform overflow-checked multiplication.
- // Otherwise (eg in x64 case) we must use mp_small_int_mul_overflow.
- #if 0
- // compute result using long long precision
- long long res = (long long)lhs_val * (long long)rhs_val;
- if (res > MP_SMALL_INT_MAX || res < MP_SMALL_INT_MIN) {
- // result overflowed SMALL_INT, so return higher precision integer
- return mp_obj_new_int_from_ll(res);
- } else {
- // use standard precision
- lhs_val = (mp_int_t)res;
- }
- #endif
-
mp_int_t int_res;
- if (mp_small_int_mul_overflow(lhs_val, rhs_val, &int_res)) {
+ if (mp_mul_mp_int_t_overflow(lhs_val, rhs_val, &int_res)) {
// use higher precision
lhs = mp_obj_new_int_from_ll(lhs_val);
goto generic_binary_op;
} else {
// use standard precision
- return MP_OBJ_NEW_SMALL_INT(int_res);
+ lhs_val = int_res;
}
+ break;
}
case MP_BINARY_OP_FLOOR_DIVIDE:
case MP_BINARY_OP_INPLACE_FLOOR_DIVIDE:
@@ -553,7 +538,7 @@ mp_obj_t MICROPY_WRAP_MP_BINARY_OP(mp_binary_op)(mp_binary_op_t op, mp_obj_t lhs
mp_int_t ans = 1;
while (rhs_val > 0) {
if (rhs_val & 1) {
- if (mp_small_int_mul_overflow(ans, lhs_val, &ans)) {
+ if (mp_mul_mp_int_t_overflow(ans, lhs_val, &ans)) {
goto power_overflow;
}
}
@@ -562,7 +547,7 @@ mp_obj_t MICROPY_WRAP_MP_BINARY_OP(mp_binary_op)(mp_binary_op_t op, mp_obj_t lhs
}
rhs_val /= 2;
mp_int_t int_res;
- if (mp_small_int_mul_overflow(lhs_val, lhs_val, &int_res)) {
+ if (mp_mul_mp_int_t_overflow(lhs_val, lhs_val, &int_res)) {
goto power_overflow;
}
lhs_val = int_res;
diff --git a/py/runtime_utils.c b/py/runtime_utils.c
index b92c6bd76..50526b210 100644
--- a/py/runtime_utils.c
+++ b/py/runtime_utils.c
@@ -50,3 +50,63 @@ mp_obj_t mp_call_function_2_protected(mp_obj_t fun, mp_obj_t arg1, mp_obj_t arg2
return MP_OBJ_NULL;
}
}
+
+#if !MICROPY_USE_GCC_MUL_OVERFLOW_INTRINSIC
+bool mp_mul_ll_overflow(long long int x, long long int y, long long int *res) {
+ bool overflow;
+
+ // Check for multiply overflow; see CERT INT32-C
+ if (x > 0) { // x is positive
+ if (y > 0) { // x and y are positive
+ overflow = (x > (LLONG_MAX / y));
+ } else { // x positive, y nonpositive
+ overflow = (y < (LLONG_MIN / x));
+ } // x positive, y nonpositive
+ } else { // x is nonpositive
+ if (y > 0) { // x is nonpositive, y is positive
+ overflow = (x < (LLONG_MIN / y));
+ } else { // x and y are nonpositive
+ overflow = (x != 0 && y < (LLONG_MAX / x));
+ } // End if x and y are nonpositive
+ } // End if x is nonpositive
+
+ if (!overflow) {
+ *res = x * y;
+ }
+
+ return overflow;
+}
+
+#define MP_UINT_MAX (~(mp_uint_t)0)
+#define MP_INT_MAX ((mp_int_t)(MP_UINT_MAX >> 1))
+#define MP_INT_MIN (-MP_INT_MAX - 1)
+
+bool mp_mul_mp_int_t_overflow(mp_int_t x, mp_int_t y, mp_int_t *res) {
+ // Check for multiply overflow; see CERT INT32-C
+ if (x > 0) { // x is positive
+ if (y > 0) { // x and y are positive
+ if (x > (MP_INT_MAX / y)) {
+ return true;
+ }
+ } else { // x positive, y nonpositive
+ if (y < (MP_INT_MIN / x)) {
+ return true;
+ }
+ } // x positive, y nonpositive
+ } else { // x is nonpositive
+ if (y > 0) { // x is nonpositive, y is positive
+ if (x < (MP_INT_MIN / y)) {
+ return true;
+ }
+ } else { // x and y are nonpositive
+ if (x != 0 && y < (MP_INT_MAX / x)) {
+ return true;
+ }
+ } // End if x and y are nonpositive
+ } // End if x is nonpositive
+
+ // Result doesn't overflow
+ *res = x * y;
+ return false;
+}
+#endif
diff --git a/py/smallint.c b/py/smallint.c
index a494093d6..eb99b5866 100644
--- a/py/smallint.c
+++ b/py/smallint.c
@@ -26,35 +26,6 @@
#include "py/smallint.h"
-bool mp_small_int_mul_overflow(mp_int_t x, mp_int_t y, mp_int_t *res) {
- // Check for multiply overflow; see CERT INT32-C
- if (x > 0) { // x is positive
- if (y > 0) { // x and y are positive
- if (x > (MP_SMALL_INT_MAX / y)) {
- return true;
- }
- } else { // x positive, y nonpositive
- if (y < (MP_SMALL_INT_MIN / x)) {
- return true;
- }
- } // x positive, y nonpositive
- } else { // x is nonpositive
- if (y > 0) { // x is nonpositive, y is positive
- if (x < (MP_SMALL_INT_MIN / y)) {
- return true;
- }
- } else { // x and y are nonpositive
- if (x != 0 && y < (MP_SMALL_INT_MAX / x)) {
- return true;
- }
- } // End if x and y are nonpositive
- } // End if x is nonpositive
-
- // Result doesn't overflow
- *res = x * y;
- return false;
-}
-
mp_int_t mp_small_int_modulo(mp_int_t dividend, mp_int_t divisor) {
// Python specs require that mod has same sign as second operand
dividend %= divisor;
diff --git a/py/smallint.h b/py/smallint.h
index e50f98651..ec5b0af3b 100644
--- a/py/smallint.h
+++ b/py/smallint.h
@@ -68,10 +68,6 @@
// The number of bits in a MP_SMALL_INT including the sign bit.
#define MP_SMALL_INT_BITS (MP_IMAX_BITS(MP_SMALL_INT_MAX) + 1)
-// Multiply two small ints.
-// If returns false, the correct result is stored in 'res'
-// If returns true, the multiplication would have overflowed. 'res' is unchanged.
-bool mp_small_int_mul_overflow(mp_int_t x, mp_int_t y, mp_int_t *res);
mp_int_t mp_small_int_modulo(mp_int_t dividend, mp_int_t divisor);
mp_int_t mp_small_int_floor_divide(mp_int_t num, mp_int_t denom);