From 613a8e3edf078c284bd981426cc5a256eabb2323 Mon Sep 17 00:00:00 2001
From: xbe <xbe@machine>
Date: Tue, 18 Mar 2014 00:06:29 -0700
Subject: Implement str.partition and add tests for it.

---
 py/objstr.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

(limited to 'py/objstr.c')

diff --git a/py/objstr.c b/py/objstr.c
index d660bf952..03711debb 100644
--- a/py/objstr.c
+++ b/py/objstr.c
@@ -520,6 +520,31 @@ STATIC mp_obj_t str_count(uint n_args, const mp_obj_t *args) {
     return MP_OBJ_NEW_SMALL_INT(num_occurrences);
 }
 
+STATIC mp_obj_t str_partition(mp_obj_t self_in, mp_obj_t arg) {
+    assert(MP_OBJ_IS_STR(self_in));
+    if (!MP_OBJ_IS_STR(arg)) {
+        nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
+                                               "Can't convert '%s' object to str implicitly", mp_obj_get_type_str(arg)));
+    }
+
+    GET_STR_DATA_LEN(self_in, str, str_len);
+    GET_STR_DATA_LEN(arg, sep, sep_len);
+
+    if (sep_len == 0) {
+        nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "empty separator"));
+    }
+
+    for (machine_uint_t str_index = 0; str_index + sep_len <= str_len; str_index++) {
+        if (memcmp(&str[str_index], sep, sep_len) == 0) {
+            mp_obj_t items[] = {mp_obj_new_str(str, str_index, false), arg,
+                                mp_obj_new_str(str + str_index + sep_len, str_len - str_index - sep_len, false)};
+            return mp_obj_new_tuple(3, items);
+        }
+    }
+    mp_obj_t items[] = {mp_obj_new_str(str, str_len, false), MP_OBJ_NEW_QSTR(MP_QSTR_), MP_OBJ_NEW_QSTR(MP_QSTR_)};
+    return mp_obj_new_tuple(3, items);
+}
+
 STATIC machine_int_t str_get_buffer(mp_obj_t self_in, buffer_info_t *bufinfo, int flags) {
     if (flags == BUFFER_READ) {
         GET_STR_DATA_LEN(self_in, str_data, str_len);
@@ -542,6 +567,7 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_strip_obj, 1, 2, str_strip);
 STATIC MP_DEFINE_CONST_FUN_OBJ_VAR(str_format_obj, 1, str_format);
 STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_replace_obj, 3, 4, str_replace);
 STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_count_obj, 2, 4, str_count);
+STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_partition_obj, str_partition);
 
 STATIC const mp_method_t str_type_methods[] = {
     { "find", &str_find_obj },
@@ -552,6 +578,7 @@ STATIC const mp_method_t str_type_methods[] = {
     { "format", &str_format_obj },
     { "replace", &str_replace_obj },
     { "count", &str_count_obj },
+    { "partition", &str_partition_obj },
     { NULL, NULL }, // end-of-list sentinel
 };
 
-- 
cgit v1.2.3


From 4504ea8007bbc97aef51ced20a9ff3f460cd7caf Mon Sep 17 00:00:00 2001
From: xbe <xbe@machine>
Date: Wed, 19 Mar 2014 00:46:14 -0700
Subject: Implement str.rpartition and add tests for it.

---
 py/objstr.c                       | 36 ++++++++++++++++++++++++++++++++++++
 tests/basics/string_rpartition.py | 29 +++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+)
 create mode 100644 tests/basics/string_rpartition.py

(limited to 'py/objstr.c')

diff --git a/py/objstr.c b/py/objstr.c
index 03711debb..c71993578 100644
--- a/py/objstr.c
+++ b/py/objstr.c
@@ -545,6 +545,40 @@ STATIC mp_obj_t str_partition(mp_obj_t self_in, mp_obj_t arg) {
     return mp_obj_new_tuple(3, items);
 }
 
+STATIC mp_obj_t str_rpartition(mp_obj_t self_in, mp_obj_t arg) {
+    assert(MP_OBJ_IS_STR(self_in));
+    if (!MP_OBJ_IS_STR(arg)) {
+        nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
+                                               "Can't convert '%s' object to str implicitly", mp_obj_get_type_str(arg)));
+    }
+
+    GET_STR_DATA_LEN(self_in, str, str_len);
+    GET_STR_DATA_LEN(arg, sep, sep_len);
+
+    if (sep_len == 0) {
+        nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "empty separator"));
+    }
+
+    if (sep_len > str_len) {
+        goto not_found;
+    }
+
+    for (machine_uint_t str_index = str_len; ; str_index--) {
+        if (memcmp(&str[str_index - sep_len], sep, sep_len) == 0) {
+            mp_obj_t items[] = {mp_obj_new_str(str, str_index - sep_len, false), arg,
+                                mp_obj_new_str(str + str_index, str_len - str_index, false)};
+            return mp_obj_new_tuple(3, items);
+        }
+        if (str_index - sep_len == 0) {
+            break;
+        }
+    }
+
+not_found: ;
+    mp_obj_t items[] = {MP_OBJ_NEW_QSTR(MP_QSTR_), MP_OBJ_NEW_QSTR(MP_QSTR_), mp_obj_new_str(str, str_len, false)};
+    return mp_obj_new_tuple(3, items);
+}
+
 STATIC machine_int_t str_get_buffer(mp_obj_t self_in, buffer_info_t *bufinfo, int flags) {
     if (flags == BUFFER_READ) {
         GET_STR_DATA_LEN(self_in, str_data, str_len);
@@ -568,6 +602,7 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_VAR(str_format_obj, 1, str_format);
 STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_replace_obj, 3, 4, str_replace);
 STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_count_obj, 2, 4, str_count);
 STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_partition_obj, str_partition);
+STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_rpartition_obj, str_rpartition);
 
 STATIC const mp_method_t str_type_methods[] = {
     { "find", &str_find_obj },
@@ -579,6 +614,7 @@ STATIC const mp_method_t str_type_methods[] = {
     { "replace", &str_replace_obj },
     { "count", &str_count_obj },
     { "partition", &str_partition_obj },
+    { "rpartition", &str_rpartition_obj },
     { NULL, NULL }, // end-of-list sentinel
 };
 
diff --git a/tests/basics/string_rpartition.py b/tests/basics/string_rpartition.py
new file mode 100644
index 000000000..656121c94
--- /dev/null
+++ b/tests/basics/string_rpartition.py
@@ -0,0 +1,29 @@
+print("asdf".rpartition('g'))
+print("asdf".rpartition('a'))
+print("asdf".rpartition('s'))
+print("asdf".rpartition('f'))
+print("asdf".rpartition('d'))
+print("asdf".rpartition('asd'))
+print("asdf".rpartition('sdf'))
+print("asdf".rpartition('as'))
+print("asdf".rpartition('df'))
+print("asdf".rpartition('asdf'))
+print("asdf".rpartition('asdfa'))
+print("asdf".rpartition('fasdf'))
+print("asdf".rpartition('fasdfa'))
+print("abba".rpartition('a'))
+print("abba".rpartition('b'))
+
+try:
+    print("asdf".rpartition(1))
+except TypeError:
+    print("Raised TypeError")
+else:
+    print("Did not raise TypeError")
+
+try:
+    print("asdf".rpartition(''))
+except ValueError:
+    print("Raised ValueError")
+else:
+    print("Did not raise ValueError")
-- 
cgit v1.2.3


From 0a6894c24b0d760755253c10a59824c68a40701e Mon Sep 17 00:00:00 2001
From: xbe <xbe@machine>
Date: Fri, 21 Mar 2014 01:12:26 -0700
Subject: str.(r)partition: factor out duplicate code.

Switch str.rpartition to search from left to right. Factor the
duplicate code into one helper function.
---
 py/objstr.c | 57 +++++++++++++++++++--------------------------------------
 1 file changed, 19 insertions(+), 38 deletions(-)

(limited to 'py/objstr.c')

diff --git a/py/objstr.c b/py/objstr.c
index c71993578..c2b3f8d4c 100644
--- a/py/objstr.c
+++ b/py/objstr.c
@@ -520,63 +520,44 @@ STATIC mp_obj_t str_count(uint n_args, const mp_obj_t *args) {
     return MP_OBJ_NEW_SMALL_INT(num_occurrences);
 }
 
-STATIC mp_obj_t str_partition(mp_obj_t self_in, mp_obj_t arg) {
+STATIC mp_obj_t str_partitioner(mp_obj_t self_in, mp_obj_t arg, bool rpartition) {
     assert(MP_OBJ_IS_STR(self_in));
     if (!MP_OBJ_IS_STR(arg)) {
         nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
                                                "Can't convert '%s' object to str implicitly", mp_obj_get_type_str(arg)));
     }
-
     GET_STR_DATA_LEN(self_in, str, str_len);
     GET_STR_DATA_LEN(arg, sep, sep_len);
+    mp_obj_t result[] = {MP_OBJ_NEW_QSTR(MP_QSTR_), MP_OBJ_NEW_QSTR(MP_QSTR_), MP_OBJ_NEW_QSTR(MP_QSTR_)};
 
     if (sep_len == 0) {
         nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "empty separator"));
     }
+    if (rpartition) {
+        result[2] = mp_obj_new_str(str, str_len, false);
+    } else {
+        result[0] = mp_obj_new_str(str, str_len, false);
+    }
 
     for (machine_uint_t str_index = 0; str_index + sep_len <= str_len; str_index++) {
         if (memcmp(&str[str_index], sep, sep_len) == 0) {
-            mp_obj_t items[] = {mp_obj_new_str(str, str_index, false), arg,
-                                mp_obj_new_str(str + str_index + sep_len, str_len - str_index - sep_len, false)};
-            return mp_obj_new_tuple(3, items);
+            result[0] = mp_obj_new_str(str, str_index, false);
+            result[1] = arg;
+            result[2] = mp_obj_new_str(str + str_index + sep_len, str_len - str_index - sep_len, false);
+            if (!rpartition) {
+                break;
+            }
         }
     }
-    mp_obj_t items[] = {mp_obj_new_str(str, str_len, false), MP_OBJ_NEW_QSTR(MP_QSTR_), MP_OBJ_NEW_QSTR(MP_QSTR_)};
-    return mp_obj_new_tuple(3, items);
+    return mp_obj_new_tuple(3, result);
 }
 
-STATIC mp_obj_t str_rpartition(mp_obj_t self_in, mp_obj_t arg) {
-    assert(MP_OBJ_IS_STR(self_in));
-    if (!MP_OBJ_IS_STR(arg)) {
-        nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
-                                               "Can't convert '%s' object to str implicitly", mp_obj_get_type_str(arg)));
-    }
-
-    GET_STR_DATA_LEN(self_in, str, str_len);
-    GET_STR_DATA_LEN(arg, sep, sep_len);
-
-    if (sep_len == 0) {
-        nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "empty separator"));
-    }
-
-    if (sep_len > str_len) {
-        goto not_found;
-    }
-
-    for (machine_uint_t str_index = str_len; ; str_index--) {
-        if (memcmp(&str[str_index - sep_len], sep, sep_len) == 0) {
-            mp_obj_t items[] = {mp_obj_new_str(str, str_index - sep_len, false), arg,
-                                mp_obj_new_str(str + str_index, str_len - str_index, false)};
-            return mp_obj_new_tuple(3, items);
-        }
-        if (str_index - sep_len == 0) {
-            break;
-        }
-    }
+STATIC mp_obj_t str_partition(mp_obj_t self_in, mp_obj_t arg, bool partition) {
+    return str_partitioner(self_in, arg, false);
+}
 
-not_found: ;
-    mp_obj_t items[] = {MP_OBJ_NEW_QSTR(MP_QSTR_), MP_OBJ_NEW_QSTR(MP_QSTR_), mp_obj_new_str(str, str_len, false)};
-    return mp_obj_new_tuple(3, items);
+STATIC mp_obj_t str_rpartition(mp_obj_t self_in, mp_obj_t arg, bool partition) {
+    return str_partitioner(self_in, arg, true);
 }
 
 STATIC machine_int_t str_get_buffer(mp_obj_t self_in, buffer_info_t *bufinfo, int flags) {
-- 
cgit v1.2.3


From b035db355a995222588635d937585a7f5ab7dc93 Mon Sep 17 00:00:00 2001
From: Damien George <damien.p.george@gmail.com>
Date: Fri, 21 Mar 2014 20:39:40 +0000
Subject: py: Make str.[r]partition more efficient.

---
 py/objstr.c | 46 +++++++++++++++++++++++++++++++---------------
 1 file changed, 31 insertions(+), 15 deletions(-)

(limited to 'py/objstr.c')

diff --git a/py/objstr.c b/py/objstr.c
index c2b3f8d4c..77cefa82b 100644
--- a/py/objstr.c
+++ b/py/objstr.c
@@ -520,44 +520,60 @@ STATIC mp_obj_t str_count(uint n_args, const mp_obj_t *args) {
     return MP_OBJ_NEW_SMALL_INT(num_occurrences);
 }
 
-STATIC mp_obj_t str_partitioner(mp_obj_t self_in, mp_obj_t arg, bool rpartition) {
+STATIC mp_obj_t str_partitioner(mp_obj_t self_in, mp_obj_t arg, machine_int_t direction) {
     assert(MP_OBJ_IS_STR(self_in));
     if (!MP_OBJ_IS_STR(arg)) {
         nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_TypeError,
                                                "Can't convert '%s' object to str implicitly", mp_obj_get_type_str(arg)));
     }
+
     GET_STR_DATA_LEN(self_in, str, str_len);
     GET_STR_DATA_LEN(arg, sep, sep_len);
-    mp_obj_t result[] = {MP_OBJ_NEW_QSTR(MP_QSTR_), MP_OBJ_NEW_QSTR(MP_QSTR_), MP_OBJ_NEW_QSTR(MP_QSTR_)};
 
     if (sep_len == 0) {
         nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "empty separator"));
     }
-    if (rpartition) {
-        result[2] = mp_obj_new_str(str, str_len, false);
+
+    mp_obj_t result[] = {MP_OBJ_NEW_QSTR(MP_QSTR_), MP_OBJ_NEW_QSTR(MP_QSTR_), MP_OBJ_NEW_QSTR(MP_QSTR_)};
+
+    if (direction > 0) {
+        result[0] = self_in;
     } else {
-        result[0] = mp_obj_new_str(str, str_len, false);
+        result[2] = self_in;
     }
 
-    for (machine_uint_t str_index = 0; str_index + sep_len <= str_len; str_index++) {
-        if (memcmp(&str[str_index], sep, sep_len) == 0) {
-            result[0] = mp_obj_new_str(str, str_index, false);
-            result[1] = arg;
-            result[2] = mp_obj_new_str(str + str_index + sep_len, str_len - str_index - sep_len, false);
-            if (!rpartition) {
+    if (str_len >= sep_len) {
+        machine_uint_t str_index, str_index_end;
+        if (direction > 0) {
+            str_index = 0;
+            str_index_end = str_len - sep_len;
+        } else {
+            str_index = str_len - sep_len;
+            str_index_end = 0;
+        }
+        for (;;) {
+            if (memcmp(&str[str_index], sep, sep_len) == 0) {
+                result[0] = mp_obj_new_str(str, str_index, false);
+                result[1] = arg;
+                result[2] = mp_obj_new_str(str + str_index + sep_len, str_len - str_index - sep_len, false);
                 break;
             }
+            if (str_index == str_index_end) {
+                break;
+            }
+            str_index += direction;
         }
     }
+
     return mp_obj_new_tuple(3, result);
 }
 
-STATIC mp_obj_t str_partition(mp_obj_t self_in, mp_obj_t arg, bool partition) {
-    return str_partitioner(self_in, arg, false);
+STATIC mp_obj_t str_partition(mp_obj_t self_in, mp_obj_t arg) {
+    return str_partitioner(self_in, arg, 1);
 }
 
-STATIC mp_obj_t str_rpartition(mp_obj_t self_in, mp_obj_t arg, bool partition) {
-    return str_partitioner(self_in, arg, true);
+STATIC mp_obj_t str_rpartition(mp_obj_t self_in, mp_obj_t arg) {
+    return str_partitioner(self_in, arg, -1);
 }
 
 STATIC machine_int_t str_get_buffer(mp_obj_t self_in, buffer_info_t *bufinfo, int flags) {
-- 
cgit v1.2.3


From 5972b4c05ffe6973820d24161f604ae8db0d299b Mon Sep 17 00:00:00 2001
From: Paul Sokolovsky <pfalcon@users.sourceforge.net>
Date: Thu, 20 Mar 2014 16:47:44 +0200
Subject: objstr: Switch from in-object string data to ptr to separate memory
 area.

This is pre-requisite for having efficient implementation of str<->bytes
conversion, and having that efficient is required with unfortunare
str vs bytes dichotomy in Python3.
---
 py/objstr.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

(limited to 'py/objstr.c')

diff --git a/py/objstr.c b/py/objstr.c
index 77cefa82b..3c5cabe05 100644
--- a/py/objstr.c
+++ b/py/objstr.c
@@ -14,7 +14,7 @@ typedef struct _mp_obj_str_t {
     mp_obj_base_t base;
     machine_uint_t hash : 16; // XXX here we assume the hash size is 16 bits (it is at the moment; see qstr.c)
     machine_uint_t len : 16; // len == number of bytes used in data, alloc = len + 1 because (at the moment) we also append a null byte
-    byte data[];
+    const byte *data;
 } mp_obj_str_t;
 
 // use this macro to extract the string hash
@@ -636,10 +636,12 @@ const mp_obj_type_t bytes_type = {
 };
 
 mp_obj_t mp_obj_str_builder_start(const mp_obj_type_t *type, uint len, byte **data) {
-    mp_obj_str_t *o = m_new_obj_var(mp_obj_str_t, byte, len + 1);
+    mp_obj_str_t *o = m_new_obj(mp_obj_str_t);
     o->base.type = type;
     o->len = len;
-    *data = o->data;
+    byte *p = m_new(byte, len + 1);
+    o->data = p;
+    *data = p;
     return o;
 }
 
@@ -647,17 +649,22 @@ mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in) {
     assert(MP_OBJ_IS_STR(o_in));
     mp_obj_str_t *o = o_in;
     o->hash = qstr_compute_hash(o->data, o->len);
-    o->data[o->len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
+    byte *p = (byte*)o->data;
+    p[o->len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
     return o;
 }
 
 STATIC mp_obj_t str_new(const mp_obj_type_t *type, const byte* data, uint len) {
-    mp_obj_str_t *o = m_new_obj_var(mp_obj_str_t, byte, len + 1);
+    mp_obj_str_t *o = m_new_obj(mp_obj_str_t);
     o->base.type = type;
-    o->hash = qstr_compute_hash(data, len);
     o->len = len;
-    memcpy(o->data, data, len * sizeof(byte));
-    o->data[len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
+    if (data) {
+        o->hash = qstr_compute_hash(data, len);
+        byte *p = m_new(byte, len + 1);
+        o->data = p;
+        memcpy(p, data, len * sizeof(byte));
+        p[len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings
+    }
     return o;
 }
 
-- 
cgit v1.2.3


From be020c27a870feff9773c348fa04be8c54873f70 Mon Sep 17 00:00:00 2001
From: Paul Sokolovsky <pfalcon@users.sourceforge.net>
Date: Fri, 21 Mar 2014 11:39:01 +0200
Subject: py: Make 'str' be a proper type, support standard constructor args.

---
 py/builtin.c | 10 ----------
 py/objstr.c  | 36 ++++++++++++++++++++++++++++++++++++
 py/runtime.c |  2 +-
 3 files changed, 37 insertions(+), 11 deletions(-)

(limited to 'py/objstr.c')

diff --git a/py/builtin.c b/py/builtin.c
index 2e0627fa5..11b86111e 100644
--- a/py/builtin.c
+++ b/py/builtin.c
@@ -375,16 +375,6 @@ STATIC mp_obj_t mp_builtin_sorted(uint n_args, const mp_obj_t *args, mp_map_t *k
 
 MP_DEFINE_CONST_FUN_OBJ_KW(mp_builtin_sorted_obj, 1, mp_builtin_sorted);
 
-STATIC mp_obj_t mp_builtin_str(mp_obj_t o_in) {
-    vstr_t *vstr = vstr_new();
-    mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, vstr, o_in, PRINT_STR);
-    mp_obj_t s = mp_obj_new_str((byte*)vstr->buf, vstr->len, false);
-    vstr_free(vstr);
-    return s;
-}
-
-MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_str_obj, mp_builtin_str);
-
 // TODO: This should be type, this is just quick CPython compat hack
 STATIC mp_obj_t mp_builtin_bytes(uint n_args, const mp_obj_t *args) {
     if (!MP_OBJ_IS_QSTR(args[0]) && !MP_OBJ_IS_TYPE(args[0], &str_type)) {
diff --git a/py/objstr.c b/py/objstr.c
index 3c5cabe05..44e84d709 100644
--- a/py/objstr.c
+++ b/py/objstr.c
@@ -28,6 +28,7 @@ typedef struct _mp_obj_str_t {
 
 STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str);
 STATIC mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str);
+STATIC mp_obj_t str_new(const mp_obj_type_t *type, const byte* data, uint len);
 
 /******************************************************************************/
 /* str                                                                        */
@@ -78,6 +79,40 @@ STATIC void str_print(void (*print)(void *env, const char *fmt, ...), void *env,
     }
 }
 
+STATIC mp_obj_t str_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
+    switch (n_args) {
+        case 0:
+            return MP_OBJ_NEW_QSTR(MP_QSTR_);
+
+        case 1:
+        {
+            vstr_t *vstr = vstr_new();
+            mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, vstr, args[0], PRINT_STR);
+            mp_obj_t s = mp_obj_new_str((byte*)vstr->buf, vstr->len, false);
+            vstr_free(vstr);
+            return s;
+        }
+
+        case 2:
+        case 3:
+        {
+            // TODO: validate 2nd/3rd args
+            if (!MP_OBJ_IS_TYPE(args[0], &bytes_type)) {
+                nlr_jump(mp_obj_new_exception_msg(&mp_type_TypeError, "bytes expected"));
+            }
+            GET_STR_DATA_LEN(args[0], str_data, str_len);
+            GET_STR_HASH(args[0], str_hash);
+            mp_obj_str_t *o = str_new(&str_type, NULL, str_len);
+            o->data = str_data;
+            o->hash = str_hash;
+            return o;
+        }
+
+        default:
+            nlr_jump(mp_obj_new_exception_msg(&mp_type_TypeError, "str takes at most 3 arguments"));
+    }
+}
+
 // like strstr but with specified length and allows \0 bytes
 // TODO replace with something more efficient/standard
 STATIC const byte *find_subbytes(const byte *haystack, uint hlen, const byte *needle, uint nlen) {
@@ -619,6 +654,7 @@ const mp_obj_type_t str_type = {
     { &mp_type_type },
     .name = MP_QSTR_str,
     .print = str_print,
+    .make_new = str_make_new,
     .binary_op = str_binary_op,
     .getiter = mp_obj_new_str_iterator,
     .methods = str_type_methods,
diff --git a/py/runtime.c b/py/runtime.c
index c268fd546..2ab97ed18 100644
--- a/py/runtime.c
+++ b/py/runtime.c
@@ -102,6 +102,7 @@ STATIC const mp_builtin_elem_t builtin_table[] = {
     { MP_QSTR_list, (mp_obj_t)&list_type },
     { MP_QSTR_map, (mp_obj_t)&map_type },
     { MP_QSTR_set, (mp_obj_t)&set_type },
+    { MP_QSTR_str, (mp_obj_t)&str_type },
     { MP_QSTR_super, (mp_obj_t)&super_type },
     { MP_QSTR_tuple, (mp_obj_t)&tuple_type },
     { MP_QSTR_type, (mp_obj_t)&mp_type_type },
@@ -137,7 +138,6 @@ STATIC const mp_builtin_elem_t builtin_table[] = {
     { MP_QSTR_repr, (mp_obj_t)&mp_builtin_repr_obj },
     { MP_QSTR_sorted, (mp_obj_t)&mp_builtin_sorted_obj },
     { MP_QSTR_sum, (mp_obj_t)&mp_builtin_sum_obj },
-    { MP_QSTR_str, (mp_obj_t)&mp_builtin_str_obj },
     { MP_QSTR_bytearray, (mp_obj_t)&mp_builtin_bytearray_obj },
 
     // built-in exceptions
-- 
cgit v1.2.3


From 1ecea7c7539e73f105fef25da8a3bde7783da755 Mon Sep 17 00:00:00 2001
From: Paul Sokolovsky <pfalcon@users.sourceforge.net>
Date: Fri, 21 Mar 2014 23:46:59 +0200
Subject: py: Make 'bytes' be a proper type, support standard constructor args.

---
 py/builtin.c          | 12 --------
 py/objstr.c           | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 py/runtime.c          |  2 +-
 tests/basics/bytes.py | 28 +++++++++++++++++++
 4 files changed, 105 insertions(+), 14 deletions(-)

(limited to 'py/objstr.c')

diff --git a/py/builtin.c b/py/builtin.c
index 11b86111e..93e91072c 100644
--- a/py/builtin.c
+++ b/py/builtin.c
@@ -375,18 +375,6 @@ STATIC mp_obj_t mp_builtin_sorted(uint n_args, const mp_obj_t *args, mp_map_t *k
 
 MP_DEFINE_CONST_FUN_OBJ_KW(mp_builtin_sorted_obj, 1, mp_builtin_sorted);
 
-// TODO: This should be type, this is just quick CPython compat hack
-STATIC mp_obj_t mp_builtin_bytes(uint n_args, const mp_obj_t *args) {
-    if (!MP_OBJ_IS_QSTR(args[0]) && !MP_OBJ_IS_TYPE(args[0], &str_type)) {
-        assert(0);
-    }
-    // Currently, MicroPython strings are mix between CPython byte and unicode
-    // strings. So, conversion is null so far.
-    return args[0];
-}
-
-MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mp_builtin_bytes_obj, 1, 3, mp_builtin_bytes);
-
 STATIC mp_obj_t mp_builtin_id(mp_obj_t o_in) {
     return mp_obj_new_int((machine_int_t)o_in);
 }
diff --git a/py/objstr.c b/py/objstr.c
index 44e84d709..35a948700 100644
--- a/py/objstr.c
+++ b/py/objstr.c
@@ -17,6 +17,8 @@ typedef struct _mp_obj_str_t {
     const byte *data;
 } mp_obj_str_t;
 
+const mp_obj_t mp_const_empty_bytes;
+
 // use this macro to extract the string hash
 #define GET_STR_HASH(str_obj_in, str_hash) uint str_hash; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_hash = qstr_hash(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_hash = ((mp_obj_str_t*)str_obj_in)->hash; }
 
@@ -113,6 +115,75 @@ STATIC mp_obj_t str_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_
     }
 }
 
+STATIC mp_obj_t bytes_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) {
+    if (n_args == 0) {
+        return mp_const_empty_bytes;
+    }
+
+    if (MP_OBJ_IS_STR(args[0])) {
+        if (n_args < 2 || n_args > 3) {
+            goto wrong_args;
+        }
+        GET_STR_DATA_LEN(args[0], str_data, str_len);
+        GET_STR_HASH(args[0], str_hash);
+        mp_obj_str_t *o = str_new(&bytes_type, NULL, str_len);
+        o->data = str_data;
+        o->hash = str_hash;
+        return o;
+    }
+
+    if (n_args > 1) {
+        goto wrong_args;
+    }
+
+    if (MP_OBJ_IS_SMALL_INT(args[0])) {
+        uint len = MP_OBJ_SMALL_INT_VALUE(args[0]);
+        byte *data;
+
+        mp_obj_t o = mp_obj_str_builder_start(&bytes_type, len, &data);
+        memset(data, 0, len);
+        return mp_obj_str_builder_end(o);
+    }
+
+    int len;
+    byte *data;
+    vstr_t *vstr = NULL;
+    mp_obj_t o = NULL;
+    // Try to create array of exact len if initializer len is known
+    mp_obj_t len_in = mp_obj_len_maybe(args[0]);
+    if (len_in == MP_OBJ_NULL) {
+        len = -1;
+        vstr = vstr_new();
+    } else {
+        len = MP_OBJ_SMALL_INT_VALUE(len_in);
+        o = mp_obj_str_builder_start(&bytes_type, len, &data);
+    }
+
+    mp_obj_t iterable = rt_getiter(args[0]);
+    mp_obj_t item;
+    while ((item = rt_iternext(iterable)) != mp_const_stop_iteration) {
+        if (len == -1) {
+            vstr_add_char(vstr, MP_OBJ_SMALL_INT_VALUE(item));
+        } else {
+            *data++ = MP_OBJ_SMALL_INT_VALUE(item);
+        }
+    }
+
+    if (len == -1) {
+        vstr_shrink(vstr);
+        // TODO: Optimize, borrow buffer from vstr
+        len = vstr_len(vstr);
+        o = mp_obj_str_builder_start(&bytes_type, len, &data);
+        memcpy(data, vstr_str(vstr), len);
+        vstr_free(vstr);
+    }
+
+    return mp_obj_str_builder_end(o);
+
+wrong_args:
+        nlr_jump(mp_obj_new_exception_msg(&mp_type_TypeError, "wrong number of arguments"));
+}
+
 // like strstr but with specified length and allows \0 bytes
 // TODO replace with something more efficient/standard
 STATIC const byte *find_subbytes(const byte *haystack, uint hlen, const byte *needle, uint nlen) {
@@ -666,11 +737,16 @@ const mp_obj_type_t bytes_type = {
     { &mp_type_type },
     .name = MP_QSTR_bytes,
     .print = str_print,
+    .make_new = bytes_make_new,
     .binary_op = str_binary_op,
     .getiter = mp_obj_new_bytes_iterator,
     .methods = str_type_methods,
 };
 
+// the zero-length bytes
+STATIC const mp_obj_str_t empty_bytes_obj = {{&bytes_type}, 0, 0, NULL};
+const mp_obj_t mp_const_empty_bytes = (mp_obj_t)&empty_bytes_obj;
+
 mp_obj_t mp_obj_str_builder_start(const mp_obj_type_t *type, uint len, byte **data) {
     mp_obj_str_t *o = m_new_obj(mp_obj_str_t);
     o->base.type = type;
@@ -682,7 +758,6 @@ mp_obj_t mp_obj_str_builder_start(const mp_obj_type_t *type, uint len, byte **da
 }
 
 mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in) {
-    assert(MP_OBJ_IS_STR(o_in));
     mp_obj_str_t *o = o_in;
     o->hash = qstr_compute_hash(o->data, o->len);
     byte *p = (byte*)o->data;
diff --git a/py/runtime.c b/py/runtime.c
index 2ab97ed18..4bcb91c54 100644
--- a/py/runtime.c
+++ b/py/runtime.c
@@ -89,6 +89,7 @@ STATIC const mp_builtin_elem_t builtin_table[] = {
 
     // built-in types
     { MP_QSTR_bool, (mp_obj_t)&bool_type },
+    { MP_QSTR_bytes, (mp_obj_t)&bytes_type },
 #if MICROPY_ENABLE_FLOAT
     { MP_QSTR_complex, (mp_obj_t)&mp_type_complex },
 #endif
@@ -115,7 +116,6 @@ STATIC const mp_builtin_elem_t builtin_table[] = {
     { MP_QSTR_abs, (mp_obj_t)&mp_builtin_abs_obj },
     { MP_QSTR_all, (mp_obj_t)&mp_builtin_all_obj },
     { MP_QSTR_any, (mp_obj_t)&mp_builtin_any_obj },
-    { MP_QSTR_bytes, (mp_obj_t)&mp_builtin_bytes_obj },
     { MP_QSTR_callable, (mp_obj_t)&mp_builtin_callable_obj },
     { MP_QSTR_chr, (mp_obj_t)&mp_builtin_chr_obj },
     { MP_QSTR_dir, (mp_obj_t)&mp_builtin_dir_obj },
diff --git a/tests/basics/bytes.py b/tests/basics/bytes.py
index 7d0cf22d4..a084bc399 100644
--- a/tests/basics/bytes.py
+++ b/tests/basics/bytes.py
@@ -4,8 +4,36 @@ print(str(a))
 print(repr(a))
 print(a[0], a[2])
 print(a[-1])
+print(str(a, "utf-8"))
+print(str(a, "utf-8", "ignore"))
+try:
+    str(a, "utf-8", "ignore", "toomuch")
+except TypeError:
+    print("TypeError")
 
 s = 0
 for i in a:
     s += i
 print(s)
+
+
+print(bytes("abc", "utf-8"))
+print(bytes("abc", "utf-8", "replace"))
+try:
+    bytes("abc")
+except TypeError:
+    print("TypeError")
+try:
+    bytes("abc", "utf-8", "replace", "toomuch")
+except TypeError:
+    print("TypeError")
+
+print(bytes(3))
+
+print(bytes([3, 2, 1]))
+print(bytes(range(5)))
+
+def gen():
+    for i in range(4):
+        yield i
+print(bytes(gen()))
-- 
cgit v1.2.3