py/modstruct: Check and prevent buffer-read overflow in struct unpacking

Prior to this patch, the size of the buffer given to unpack/unpack_from was checked for being too small by using the count of the arguments, not their actual size. For example, a format spec of '4I' would only check that there was 4 bytes available, not 16; and 'I' would check for 1 byte, not 4. This bug is fixed in this patch by calculating the total size of the format spec at the start of the unpacking function. This function anyway needs to calculate the number of items at the start, so calculating the total size can be done at the same time.
author: Damien George <damien.p.george@gmail.com> 2017-09-01 10:53:29 +1000
committer: Damien George <damien.p.george@gmail.com> 2017-09-01 10:53:29 +1000
commit: 79d5acbd01fed21e1c8e46b06690072375de0bfe (patch)
tree: a785985cda844d79f02cdc1007fc0d6e9070d4ef
parent: 793d826d9df67c3f544505beda63ca36d8dfa1c4 (diff)
3 files changed, 38 insertions, 29 deletions
diff --git a/py/modstruct.c b/py/modstruct.c
index 0d4a45f6b..61dd0f81b 100644
--- a/py/modstruct.c
+++ b/py/modstruct.c
@@ -82,26 +82,10 @@ STATIC mp_uint_t get_fmt_num(const char **p) {
     return val;
 }
 
-STATIC uint calcsize_items(const char *fmt) {
-    uint cnt = 0;
-    while (*fmt) {
-        int num = 1;
-        if (unichar_isdigit(*fmt)) {
-            num = get_fmt_num(&fmt);
-            if (*fmt == 's') {
-                num = 1;
-            }
-        }
-        cnt += num;
-        fmt++;
-    }
-    return cnt;
-}
-
-STATIC mp_obj_t struct_calcsize(mp_obj_t fmt_in) {
-    const char *fmt = mp_obj_str_get_str(fmt_in);
+STATIC size_t calc_size_items(const char *fmt, size_t *total_sz) {
     char fmt_type = get_fmt_type(&fmt);
-    mp_uint_t size;
+    size_t total_cnt = 0;
+    size_t size;
     for (size = 0; *fmt; fmt++) {
         mp_uint_t cnt = 1;
         if (unichar_isdigit(*fmt)) {
@@ -109,8 +93,10 @@ STATIC mp_obj_t struct_calcsize(mp_obj_t fmt_in) {
         }
 
         if (*fmt == 's') {
+            total_cnt += 1;
             size += cnt;
         } else {
+            total_cnt += cnt;
             mp_uint_t align;
             size_t sz = mp_binary_get_size(fmt_type, *fmt, &align);
             while (cnt--) {
@@ -120,6 +106,14 @@ STATIC mp_obj_t struct_calcsize(mp_obj_t fmt_in) {
             }
         }
     }
+    *total_sz = size;
+    return total_cnt;
+}
+
+STATIC mp_obj_t struct_calcsize(mp_obj_t fmt_in) {
+    const char *fmt = mp_obj_str_get_str(fmt_in);
+    size_t size;
+    calc_size_items(fmt, &size);
     return MP_OBJ_NEW_SMALL_INT(size);
 }
 MP_DEFINE_CONST_FUN_OBJ_1(struct_calcsize_obj, struct_calcsize);
@@ -130,8 +124,9 @@ STATIC mp_obj_t struct_unpack_from(size_t n_args, const mp_obj_t *args) {
     // Since we implement unpack and unpack_from using the same function
     // we relax the "exact" requirement, and only implement "big enough".
     const char *fmt = mp_obj_str_get_str(args[0]);
+    size_t total_sz;
+    size_t num_items = calc_size_items(fmt, &total_sz);
     char fmt_type = get_fmt_type(&fmt);
-    uint num_items = calcsize_items(fmt);
     mp_obj_tuple_t *res = MP_OBJ_TO_PTR(mp_obj_new_tuple(num_items, NULL));
     mp_buffer_info_t bufinfo;
     mp_get_buffer_raise(args[1], &bufinfo, MP_BUFFER_READ);
@@ -152,21 +147,23 @@ STATIC mp_obj_t struct_unpack_from(size_t n_args, const mp_obj_t *args) {
         p += offset;
     }
 
-    for (uint i = 0; i < num_items;) {
-        mp_uint_t sz = 1;
+    // Check that the input buffer is big enough to unpack all the values
+    if (p + total_sz > end_p) {
+        mp_raise_ValueError("buffer too small");
+    }
+
+    for (size_t i = 0; i < num_items;) {
+        mp_uint_t cnt = 1;
         if (unichar_isdigit(*fmt)) {
-            sz = get_fmt_num(&fmt);
-        }
-        if (p + sz > end_p) {
-            mp_raise_ValueError("buffer too small");
+            cnt = get_fmt_num(&fmt);
         }
         mp_obj_t item;
         if (*fmt == 's') {
-            item = mp_obj_new_bytes(p, sz);
-            p += sz;
+            item = mp_obj_new_bytes(p, cnt);
+            p += cnt;
             res->items[i++] = item;
         } else {
-            while (sz--) {
+            while (cnt--) {
                 item = mp_binary_get_val(fmt_type, *fmt, &p);
                 res->items[i++] = item;
             }
diff --git a/tests/basics/struct1.py b/tests/basics/struct1.py
index a442beb1e..2cf75137b 100644
--- a/tests/basics/struct1.py
+++ b/tests/basics/struct1.py
@@ -39,6 +39,12 @@ print(v == (10, 100, 200, 300))
 # network byte order
 print(struct.pack('!i', 123))
 
+# check that we get an error if the buffer is too small
+try:
+    struct.unpack('I', b'\x00\x00\x00')
+except:
+    print('struct.error')
+
 # first arg must be a string
 try:
     struct.pack(1, 2)
diff --git a/tests/basics/struct2.py b/tests/basics/struct2.py
index 3b9dd5c1f..e3336c0c7 100644
--- a/tests/basics/struct2.py
+++ b/tests/basics/struct2.py
@@ -25,6 +25,12 @@ print(struct.calcsize('0s1s0H2H'))
 print(struct.unpack('<0s1s0H2H', b'01234'))
 print(struct.pack('<0s1s0H2H', b'abc', b'abc', 258, 515))
 
+# check that we get an error if the buffer is too small
+try:
+    struct.unpack('2H', b'\x00\x00')
+except:
+    print('Exception')
+
 # check that unknown types raise an exception
 try:
     struct.unpack('z', b'1')
author	Damien George <damien.p.george@gmail.com>	2017-09-01 10:53:29 +1000
committer	Damien George <damien.p.george@gmail.com>	2017-09-01 10:53:29 +1000
commit	79d5acbd01fed21e1c8e46b06690072375de0bfe (patch)
tree	a785985cda844d79f02cdc1007fc0d6e9070d4ef
parent	793d826d9df67c3f544505beda63ca36d8dfa1c4 (diff)