py/parsenum: Extend mp_parse_num_integer() to parse long long.

If big integer support is 'long long' then mp_parse_num_integer() can parse to it directly instead of failing over from small int. This means strtoll() is no longer pulled in, and fixes some bugs parsing long long integers (i.e. can now parse negative values correctly, can now parse values which aren't NULL terminated). The (default) smallint parsing compiled code should stay the same here, macros and a typedef are used to abstract some parts of it out. When bigint is long long we parse to 'unsigned long long' first (to avoid the code size hit of pulling in signed 64-bit math routines) and the convert to signed at the end. One tricky case this routine correctly overflows on is int("9223372036854775808") which is one more than LLONG_MAX in decimal. No unit test case added for this as it's too hard to detect 64-bit long integer mode. This work was funded through GitHub Sponsors. Signed-off-by: Angus Gratton <angus@redyak.com.au>
author: Angus Gratton <angus@redyak.com.au> 2025-07-15 11:23:28 +1000
committer: Damien George <damien@micropython.org> 2025-07-18 00:12:16 +1000
commit: 17fbc5abdc7e139a922f6a11619deb7cb031e0cb (patch)
tree: a280d07a0c2e781e9605a863c33a723d941b08b9 /py/parsenum.c
parent: e9845ab20ec798c1d5bf00bd3b64ff5d96d94500 (diff)
1 files changed, 40 insertions, 11 deletions
diff --git a/py/parsenum.c b/py/parsenum.c
index 31b332c18..fcc690917 100644
--- a/py/parsenum.c
+++ b/py/parsenum.c
@@ -46,6 +46,27 @@ static MP_NORETURN void raise_exc(mp_obj_t exc, mp_lexer_t *lex) {
     nlr_raise(exc);
 }
 
+#if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_LONGLONG
+// For the common small integer parsing case, we parse directly to mp_int_t and
+// check that the value doesn't overflow a smallint (in which case we fail over
+// to bigint parsing if supported)
+typedef mp_int_t parsed_int_t;
+
+#define PARSED_INT_MUL_OVERFLOW mp_small_int_mul_overflow
+#define PARSED_INT_FITS MP_SMALL_INT_FITS
+#else
+// In the special case where bigint support is long long, we save code size by
+// parsing directly to long long and then return either a bigint or smallint
+// from the same result.
+//
+// To avoid pulling in (slow) signed 64-bit math routines we do the initial
+// parsing to an unsigned long long and only convert to signed at the end.
+typedef unsigned long long parsed_int_t;
+
+#define PARSED_INT_MUL_OVERFLOW mp_mul_ull_overflow
+#define PARSED_INT_FITS(I) ((I) <= (unsigned long long)LLONG_MAX)
+#endif
+
 mp_obj_t mp_parse_num_integer(const char *restrict str_, size_t len, int base, mp_lexer_t *lex) {
     const byte *restrict str = (const byte *)str_;
     const byte *restrict top = str + len;
@@ -76,7 +97,7 @@ mp_obj_t mp_parse_num_integer(const char *restrict str_, size_t len, int base, m
     str += mp_parse_num_base((const char *)str, top - str, &base);
 
     // string should be an integer number
-    mp_int_t int_val = 0;
+    parsed_int_t parsed_val = 0;
     const byte *restrict str_val_start = str;
     for (; str < top; str++) {
         // get next digit as a value
@@ -98,25 +119,29 @@ mp_obj_t mp_parse_num_integer(const char *restrict str_, size_t len, int base, m
             break;
         }
 
-        // add next digi and check for overflow
-        if (mp_small_int_mul_overflow(int_val, base, &int_val)) {
+        // add next digit and check for overflow
+        if (PARSED_INT_MUL_OVERFLOW(parsed_val, base, &parsed_val)) {
             goto overflow;
         }
-        int_val += dig;
-        if (!MP_SMALL_INT_FITS(int_val)) {
+        parsed_val += dig;
+        if (!PARSED_INT_FITS(parsed_val)) {
             goto overflow;
         }
     }
 
-    // negate value if needed
+    #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_LONGLONG
+    // The PARSED_INT_FITS check above ensures parsed_val fits in small int representation
+    ret_val = MP_OBJ_NEW_SMALL_INT(neg ? (-parsed_val) : parsed_val);
+have_ret_val:
+    #else
+    // The PARSED_INT_FITS check above ensures parsed_val won't overflow signed long long
+    long long signed_val = parsed_val;
     if (neg) {
-        int_val = -int_val;
+        signed_val = -signed_val;
     }
+    ret_val = mp_obj_new_int_from_ll(signed_val); // Could be large or small int
+    #endif
 
-    // create the small int
-    ret_val = MP_OBJ_NEW_SMALL_INT(int_val);
-
-have_ret_val:
     // check we parsed something
     if (str == str_val_start) {
         goto value_error;
@@ -135,6 +160,7 @@ have_ret_val:
     return ret_val;
 
 overflow:
+    #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_LONGLONG
     // reparse using long int
     {
         const char *s2 = (const char *)str_val_start;
@@ -142,6 +168,9 @@ overflow:
         str = (const byte *)s2;
         goto have_ret_val;
     }
+    #else
+    mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("result overflows long long storage"));
+    #endif
 
 value_error:
     {
author	Angus Gratton <angus@redyak.com.au>	2025-07-15 11:23:28 +1000
committer	Damien George <damien@micropython.org>	2025-07-18 00:12:16 +1000
commit	17fbc5abdc7e139a922f6a11619deb7cb031e0cb (patch)
tree	a280d07a0c2e781e9605a863c33a723d941b08b9 /py/parsenum.c
parent	e9845ab20ec798c1d5bf00bd3b64ff5d96d94500 (diff)