summaryrefslogtreecommitdiff
path: root/py/parsenum.c
diff options
context:
space:
mode:
authorAngus Gratton <angus@redyak.com.au>2025-07-15 11:23:28 +1000
committerDamien George <damien@micropython.org>2025-07-18 00:12:16 +1000
commit17fbc5abdc7e139a922f6a11619deb7cb031e0cb (patch)
treea280d07a0c2e781e9605a863c33a723d941b08b9 /py/parsenum.c
parente9845ab20ec798c1d5bf00bd3b64ff5d96d94500 (diff)
py/parsenum: Extend mp_parse_num_integer() to parse long long.
If big integer support is 'long long' then mp_parse_num_integer() can parse to it directly instead of failing over from small int. This means strtoll() is no longer pulled in, and fixes some bugs parsing long long integers (i.e. can now parse negative values correctly, can now parse values which aren't NULL terminated). The (default) smallint parsing compiled code should stay the same here, macros and a typedef are used to abstract some parts of it out. When bigint is long long we parse to 'unsigned long long' first (to avoid the code size hit of pulling in signed 64-bit math routines) and the convert to signed at the end. One tricky case this routine correctly overflows on is int("9223372036854775808") which is one more than LLONG_MAX in decimal. No unit test case added for this as it's too hard to detect 64-bit long integer mode. This work was funded through GitHub Sponsors. Signed-off-by: Angus Gratton <angus@redyak.com.au>
Diffstat (limited to 'py/parsenum.c')
-rw-r--r--py/parsenum.c51
1 files changed, 40 insertions, 11 deletions
diff --git a/py/parsenum.c b/py/parsenum.c
index 31b332c18..fcc690917 100644
--- a/py/parsenum.c
+++ b/py/parsenum.c
@@ -46,6 +46,27 @@ static MP_NORETURN void raise_exc(mp_obj_t exc, mp_lexer_t *lex) {
nlr_raise(exc);
}
+#if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_LONGLONG
+// For the common small integer parsing case, we parse directly to mp_int_t and
+// check that the value doesn't overflow a smallint (in which case we fail over
+// to bigint parsing if supported)
+typedef mp_int_t parsed_int_t;
+
+#define PARSED_INT_MUL_OVERFLOW mp_small_int_mul_overflow
+#define PARSED_INT_FITS MP_SMALL_INT_FITS
+#else
+// In the special case where bigint support is long long, we save code size by
+// parsing directly to long long and then return either a bigint or smallint
+// from the same result.
+//
+// To avoid pulling in (slow) signed 64-bit math routines we do the initial
+// parsing to an unsigned long long and only convert to signed at the end.
+typedef unsigned long long parsed_int_t;
+
+#define PARSED_INT_MUL_OVERFLOW mp_mul_ull_overflow
+#define PARSED_INT_FITS(I) ((I) <= (unsigned long long)LLONG_MAX)
+#endif
+
mp_obj_t mp_parse_num_integer(const char *restrict str_, size_t len, int base, mp_lexer_t *lex) {
const byte *restrict str = (const byte *)str_;
const byte *restrict top = str + len;
@@ -76,7 +97,7 @@ mp_obj_t mp_parse_num_integer(const char *restrict str_, size_t len, int base, m
str += mp_parse_num_base((const char *)str, top - str, &base);
// string should be an integer number
- mp_int_t int_val = 0;
+ parsed_int_t parsed_val = 0;
const byte *restrict str_val_start = str;
for (; str < top; str++) {
// get next digit as a value
@@ -98,25 +119,29 @@ mp_obj_t mp_parse_num_integer(const char *restrict str_, size_t len, int base, m
break;
}
- // add next digi and check for overflow
- if (mp_small_int_mul_overflow(int_val, base, &int_val)) {
+ // add next digit and check for overflow
+ if (PARSED_INT_MUL_OVERFLOW(parsed_val, base, &parsed_val)) {
goto overflow;
}
- int_val += dig;
- if (!MP_SMALL_INT_FITS(int_val)) {
+ parsed_val += dig;
+ if (!PARSED_INT_FITS(parsed_val)) {
goto overflow;
}
}
- // negate value if needed
+ #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_LONGLONG
+ // The PARSED_INT_FITS check above ensures parsed_val fits in small int representation
+ ret_val = MP_OBJ_NEW_SMALL_INT(neg ? (-parsed_val) : parsed_val);
+have_ret_val:
+ #else
+ // The PARSED_INT_FITS check above ensures parsed_val won't overflow signed long long
+ long long signed_val = parsed_val;
if (neg) {
- int_val = -int_val;
+ signed_val = -signed_val;
}
+ ret_val = mp_obj_new_int_from_ll(signed_val); // Could be large or small int
+ #endif
- // create the small int
- ret_val = MP_OBJ_NEW_SMALL_INT(int_val);
-
-have_ret_val:
// check we parsed something
if (str == str_val_start) {
goto value_error;
@@ -135,6 +160,7 @@ have_ret_val:
return ret_val;
overflow:
+ #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_LONGLONG
// reparse using long int
{
const char *s2 = (const char *)str_val_start;
@@ -142,6 +168,9 @@ overflow:
str = (const byte *)s2;
goto have_ret_val;
}
+ #else
+ mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("result overflows long long storage"));
+ #endif
value_error:
{