py/objint: Fix int.to_bytes() buffer size checks.

Fixes and improvements to `int.to_bytes()` are: - No longer overflows if byte size is 0 (closes #13041). - Raises OverflowError in any case where number won't fit into byte length (now matches CPython, previously MicroPython would return a truncated bytes object). - Document that `micropython int.to_bytes()` doesn't implement the optional signed kwarg, but will behave as if `signed=True` when the integer is negative (this is the current behaviour). Add tests for this also. Requires changes for small ints, MPZ large ints, and "long long" large ints. Adds a new set of unit tests for ints between 32 and 64 bits to increase coverage of "long long" large ints, which are otherwise untested. Tested on unix port (64 bit small ints, MPZ long ints) and Zephyr STM32WB board (32 bit small ints, long long large ints). This work was funded through GitHub Sponsors. Signed-off-by: Angus Gratton <angus@redyak.com.au>
author: Angus Gratton <angus@redyak.com.au> 2023-11-29 11:23:16 +1100
committer: Damien George <damien@micropython.org> 2024-06-24 14:07:00 +1000
commit: 908ab1ceca15ee6fd0ef82ca4cba770a3ec41894 (patch)
tree: 94d7e1d9e191d277f9500a310d8247ca528cfc37 /py/objint.c
parent: d933210d960a6f9337e85753e9568619bbfd54ec (diff)
1 files changed, 29 insertions, 8 deletions
diff --git a/py/objint.c b/py/objint.c
index 6caa608f3..467a4714e 100644
--- a/py/objint.c
+++ b/py/objint.c
@@ -421,29 +421,50 @@ static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(int_from_bytes_fun_obj, 3, 4, int_fro
 static MP_DEFINE_CONST_CLASSMETHOD_OBJ(int_from_bytes_obj, MP_ROM_PTR(&int_from_bytes_fun_obj));
 
 static mp_obj_t int_to_bytes(size_t n_args, const mp_obj_t *args) {
-    // TODO: Support signed param (assumes signed=False)
+    // TODO: Support signed (currently behaves as if signed=(val < 0))
     (void)n_args;
+    bool overflow;
 
-    mp_int_t len = mp_obj_get_int(args[1]);
-    if (len < 0) {
+    mp_int_t dlen = mp_obj_get_int(args[1]);
+    if (dlen < 0) {
         mp_raise_ValueError(NULL);
     }
     bool big_endian = args[2] != MP_OBJ_NEW_QSTR(MP_QSTR_little);
 
     vstr_t vstr;
-    vstr_init_len(&vstr, len);
+    vstr_init_len(&vstr, dlen);
     byte *data = (byte *)vstr.buf;
-    memset(data, 0, len);
 
     #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
     if (!mp_obj_is_small_int(args[0])) {
-        mp_obj_int_to_bytes_impl(args[0], big_endian, len, data);
+        overflow = !mp_obj_int_to_bytes_impl(args[0], big_endian, dlen, data);
     } else
     #endif
     {
         mp_int_t val = MP_OBJ_SMALL_INT_VALUE(args[0]);
-        size_t l = MIN((size_t)len, sizeof(val));
-        mp_binary_set_int(l, big_endian, data + (big_endian ? (len - l) : 0), val);
+        int slen = 0;  // Number of bytes to represent val
+
+        // This logic has a twin in objint_longlong.c
+        if (val > 0) {
+            slen = (sizeof(mp_int_t) * 8 - mp_clz_mpi(val) + 7) / 8;
+        } else if (val < -1) {
+            slen = (sizeof(mp_int_t) * 8 - mp_clz_mpi(~val) + 8) / 8;
+        } else {
+            // clz of 0 is defined, so 0 and -1 map to 0 and 1
+            slen = -val;
+        }
+
+        if (slen <= dlen) {
+            memset(data, val < 0 ? 0xFF : 0x00, dlen);
+            mp_binary_set_int(slen, big_endian, data + (big_endian ? (dlen - slen) : 0), val);
+            overflow = false;
+        } else {
+            overflow = true;
+        }
+    }
+
+    if (overflow) {
+        mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("buffer too small"));
     }
 
     return mp_obj_new_bytes_from_vstr(&vstr);
author	Angus Gratton <angus@redyak.com.au>	2023-11-29 11:23:16 +1100
committer	Damien George <damien@micropython.org>	2024-06-24 14:07:00 +1000
commit	908ab1ceca15ee6fd0ef82ca4cba770a3ec41894 (patch)
tree	94d7e1d9e191d277f9500a310d8247ca528cfc37 /py/objint.c
parent	d933210d960a6f9337e85753e9568619bbfd54ec (diff)