From 613a8e3edf078c284bd981426cc5a256eabb2323 Mon Sep 17 00:00:00 2001 From: xbe Date: Tue, 18 Mar 2014 00:06:29 -0700 Subject: Implement str.partition and add tests for it. --- py/objstr.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'py/objstr.c') diff --git a/py/objstr.c b/py/objstr.c index d660bf952..03711debb 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -520,6 +520,31 @@ STATIC mp_obj_t str_count(uint n_args, const mp_obj_t *args) { return MP_OBJ_NEW_SMALL_INT(num_occurrences); } +STATIC mp_obj_t str_partition(mp_obj_t self_in, mp_obj_t arg) { + assert(MP_OBJ_IS_STR(self_in)); + if (!MP_OBJ_IS_STR(arg)) { + nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_TypeError, + "Can't convert '%s' object to str implicitly", mp_obj_get_type_str(arg))); + } + + GET_STR_DATA_LEN(self_in, str, str_len); + GET_STR_DATA_LEN(arg, sep, sep_len); + + if (sep_len == 0) { + nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "empty separator")); + } + + for (machine_uint_t str_index = 0; str_index + sep_len <= str_len; str_index++) { + if (memcmp(&str[str_index], sep, sep_len) == 0) { + mp_obj_t items[] = {mp_obj_new_str(str, str_index, false), arg, + mp_obj_new_str(str + str_index + sep_len, str_len - str_index - sep_len, false)}; + return mp_obj_new_tuple(3, items); + } + } + mp_obj_t items[] = {mp_obj_new_str(str, str_len, false), MP_OBJ_NEW_QSTR(MP_QSTR_), MP_OBJ_NEW_QSTR(MP_QSTR_)}; + return mp_obj_new_tuple(3, items); +} + STATIC machine_int_t str_get_buffer(mp_obj_t self_in, buffer_info_t *bufinfo, int flags) { if (flags == BUFFER_READ) { GET_STR_DATA_LEN(self_in, str_data, str_len); @@ -542,6 +567,7 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_strip_obj, 1, 2, str_strip); STATIC MP_DEFINE_CONST_FUN_OBJ_VAR(str_format_obj, 1, str_format); STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_replace_obj, 3, 4, str_replace); STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_count_obj, 2, 4, str_count); +STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_partition_obj, str_partition); STATIC const mp_method_t str_type_methods[] = { { "find", &str_find_obj }, @@ -552,6 +578,7 @@ STATIC const mp_method_t str_type_methods[] = { { "format", &str_format_obj }, { "replace", &str_replace_obj }, { "count", &str_count_obj }, + { "partition", &str_partition_obj }, { NULL, NULL }, // end-of-list sentinel }; -- cgit v1.2.3 From 4504ea8007bbc97aef51ced20a9ff3f460cd7caf Mon Sep 17 00:00:00 2001 From: xbe Date: Wed, 19 Mar 2014 00:46:14 -0700 Subject: Implement str.rpartition and add tests for it. --- py/objstr.c | 36 ++++++++++++++++++++++++++++++++++++ tests/basics/string_rpartition.py | 29 +++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+) create mode 100644 tests/basics/string_rpartition.py (limited to 'py/objstr.c') diff --git a/py/objstr.c b/py/objstr.c index 03711debb..c71993578 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -545,6 +545,40 @@ STATIC mp_obj_t str_partition(mp_obj_t self_in, mp_obj_t arg) { return mp_obj_new_tuple(3, items); } +STATIC mp_obj_t str_rpartition(mp_obj_t self_in, mp_obj_t arg) { + assert(MP_OBJ_IS_STR(self_in)); + if (!MP_OBJ_IS_STR(arg)) { + nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_TypeError, + "Can't convert '%s' object to str implicitly", mp_obj_get_type_str(arg))); + } + + GET_STR_DATA_LEN(self_in, str, str_len); + GET_STR_DATA_LEN(arg, sep, sep_len); + + if (sep_len == 0) { + nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "empty separator")); + } + + if (sep_len > str_len) { + goto not_found; + } + + for (machine_uint_t str_index = str_len; ; str_index--) { + if (memcmp(&str[str_index - sep_len], sep, sep_len) == 0) { + mp_obj_t items[] = {mp_obj_new_str(str, str_index - sep_len, false), arg, + mp_obj_new_str(str + str_index, str_len - str_index, false)}; + return mp_obj_new_tuple(3, items); + } + if (str_index - sep_len == 0) { + break; + } + } + +not_found: ; + mp_obj_t items[] = {MP_OBJ_NEW_QSTR(MP_QSTR_), MP_OBJ_NEW_QSTR(MP_QSTR_), mp_obj_new_str(str, str_len, false)}; + return mp_obj_new_tuple(3, items); +} + STATIC machine_int_t str_get_buffer(mp_obj_t self_in, buffer_info_t *bufinfo, int flags) { if (flags == BUFFER_READ) { GET_STR_DATA_LEN(self_in, str_data, str_len); @@ -568,6 +602,7 @@ STATIC MP_DEFINE_CONST_FUN_OBJ_VAR(str_format_obj, 1, str_format); STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_replace_obj, 3, 4, str_replace); STATIC MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_count_obj, 2, 4, str_count); STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_partition_obj, str_partition); +STATIC MP_DEFINE_CONST_FUN_OBJ_2(str_rpartition_obj, str_rpartition); STATIC const mp_method_t str_type_methods[] = { { "find", &str_find_obj }, @@ -579,6 +614,7 @@ STATIC const mp_method_t str_type_methods[] = { { "replace", &str_replace_obj }, { "count", &str_count_obj }, { "partition", &str_partition_obj }, + { "rpartition", &str_rpartition_obj }, { NULL, NULL }, // end-of-list sentinel }; diff --git a/tests/basics/string_rpartition.py b/tests/basics/string_rpartition.py new file mode 100644 index 000000000..656121c94 --- /dev/null +++ b/tests/basics/string_rpartition.py @@ -0,0 +1,29 @@ +print("asdf".rpartition('g')) +print("asdf".rpartition('a')) +print("asdf".rpartition('s')) +print("asdf".rpartition('f')) +print("asdf".rpartition('d')) +print("asdf".rpartition('asd')) +print("asdf".rpartition('sdf')) +print("asdf".rpartition('as')) +print("asdf".rpartition('df')) +print("asdf".rpartition('asdf')) +print("asdf".rpartition('asdfa')) +print("asdf".rpartition('fasdf')) +print("asdf".rpartition('fasdfa')) +print("abba".rpartition('a')) +print("abba".rpartition('b')) + +try: + print("asdf".rpartition(1)) +except TypeError: + print("Raised TypeError") +else: + print("Did not raise TypeError") + +try: + print("asdf".rpartition('')) +except ValueError: + print("Raised ValueError") +else: + print("Did not raise ValueError") -- cgit v1.2.3 From 0a6894c24b0d760755253c10a59824c68a40701e Mon Sep 17 00:00:00 2001 From: xbe Date: Fri, 21 Mar 2014 01:12:26 -0700 Subject: str.(r)partition: factor out duplicate code. Switch str.rpartition to search from left to right. Factor the duplicate code into one helper function. --- py/objstr.c | 57 +++++++++++++++++++-------------------------------------- 1 file changed, 19 insertions(+), 38 deletions(-) (limited to 'py/objstr.c') diff --git a/py/objstr.c b/py/objstr.c index c71993578..c2b3f8d4c 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -520,63 +520,44 @@ STATIC mp_obj_t str_count(uint n_args, const mp_obj_t *args) { return MP_OBJ_NEW_SMALL_INT(num_occurrences); } -STATIC mp_obj_t str_partition(mp_obj_t self_in, mp_obj_t arg) { +STATIC mp_obj_t str_partitioner(mp_obj_t self_in, mp_obj_t arg, bool rpartition) { assert(MP_OBJ_IS_STR(self_in)); if (!MP_OBJ_IS_STR(arg)) { nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_TypeError, "Can't convert '%s' object to str implicitly", mp_obj_get_type_str(arg))); } - GET_STR_DATA_LEN(self_in, str, str_len); GET_STR_DATA_LEN(arg, sep, sep_len); + mp_obj_t result[] = {MP_OBJ_NEW_QSTR(MP_QSTR_), MP_OBJ_NEW_QSTR(MP_QSTR_), MP_OBJ_NEW_QSTR(MP_QSTR_)}; if (sep_len == 0) { nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "empty separator")); } + if (rpartition) { + result[2] = mp_obj_new_str(str, str_len, false); + } else { + result[0] = mp_obj_new_str(str, str_len, false); + } for (machine_uint_t str_index = 0; str_index + sep_len <= str_len; str_index++) { if (memcmp(&str[str_index], sep, sep_len) == 0) { - mp_obj_t items[] = {mp_obj_new_str(str, str_index, false), arg, - mp_obj_new_str(str + str_index + sep_len, str_len - str_index - sep_len, false)}; - return mp_obj_new_tuple(3, items); + result[0] = mp_obj_new_str(str, str_index, false); + result[1] = arg; + result[2] = mp_obj_new_str(str + str_index + sep_len, str_len - str_index - sep_len, false); + if (!rpartition) { + break; + } } } - mp_obj_t items[] = {mp_obj_new_str(str, str_len, false), MP_OBJ_NEW_QSTR(MP_QSTR_), MP_OBJ_NEW_QSTR(MP_QSTR_)}; - return mp_obj_new_tuple(3, items); + return mp_obj_new_tuple(3, result); } -STATIC mp_obj_t str_rpartition(mp_obj_t self_in, mp_obj_t arg) { - assert(MP_OBJ_IS_STR(self_in)); - if (!MP_OBJ_IS_STR(arg)) { - nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_TypeError, - "Can't convert '%s' object to str implicitly", mp_obj_get_type_str(arg))); - } - - GET_STR_DATA_LEN(self_in, str, str_len); - GET_STR_DATA_LEN(arg, sep, sep_len); - - if (sep_len == 0) { - nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "empty separator")); - } - - if (sep_len > str_len) { - goto not_found; - } - - for (machine_uint_t str_index = str_len; ; str_index--) { - if (memcmp(&str[str_index - sep_len], sep, sep_len) == 0) { - mp_obj_t items[] = {mp_obj_new_str(str, str_index - sep_len, false), arg, - mp_obj_new_str(str + str_index, str_len - str_index, false)}; - return mp_obj_new_tuple(3, items); - } - if (str_index - sep_len == 0) { - break; - } - } +STATIC mp_obj_t str_partition(mp_obj_t self_in, mp_obj_t arg, bool partition) { + return str_partitioner(self_in, arg, false); +} -not_found: ; - mp_obj_t items[] = {MP_OBJ_NEW_QSTR(MP_QSTR_), MP_OBJ_NEW_QSTR(MP_QSTR_), mp_obj_new_str(str, str_len, false)}; - return mp_obj_new_tuple(3, items); +STATIC mp_obj_t str_rpartition(mp_obj_t self_in, mp_obj_t arg, bool partition) { + return str_partitioner(self_in, arg, true); } STATIC machine_int_t str_get_buffer(mp_obj_t self_in, buffer_info_t *bufinfo, int flags) { -- cgit v1.2.3 From b035db355a995222588635d937585a7f5ab7dc93 Mon Sep 17 00:00:00 2001 From: Damien George Date: Fri, 21 Mar 2014 20:39:40 +0000 Subject: py: Make str.[r]partition more efficient. --- py/objstr.c | 46 +++++++++++++++++++++++++++++++--------------- 1 file changed, 31 insertions(+), 15 deletions(-) (limited to 'py/objstr.c') diff --git a/py/objstr.c b/py/objstr.c index c2b3f8d4c..77cefa82b 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -520,44 +520,60 @@ STATIC mp_obj_t str_count(uint n_args, const mp_obj_t *args) { return MP_OBJ_NEW_SMALL_INT(num_occurrences); } -STATIC mp_obj_t str_partitioner(mp_obj_t self_in, mp_obj_t arg, bool rpartition) { +STATIC mp_obj_t str_partitioner(mp_obj_t self_in, mp_obj_t arg, machine_int_t direction) { assert(MP_OBJ_IS_STR(self_in)); if (!MP_OBJ_IS_STR(arg)) { nlr_jump(mp_obj_new_exception_msg_varg(&mp_type_TypeError, "Can't convert '%s' object to str implicitly", mp_obj_get_type_str(arg))); } + GET_STR_DATA_LEN(self_in, str, str_len); GET_STR_DATA_LEN(arg, sep, sep_len); - mp_obj_t result[] = {MP_OBJ_NEW_QSTR(MP_QSTR_), MP_OBJ_NEW_QSTR(MP_QSTR_), MP_OBJ_NEW_QSTR(MP_QSTR_)}; if (sep_len == 0) { nlr_jump(mp_obj_new_exception_msg(&mp_type_ValueError, "empty separator")); } - if (rpartition) { - result[2] = mp_obj_new_str(str, str_len, false); + + mp_obj_t result[] = {MP_OBJ_NEW_QSTR(MP_QSTR_), MP_OBJ_NEW_QSTR(MP_QSTR_), MP_OBJ_NEW_QSTR(MP_QSTR_)}; + + if (direction > 0) { + result[0] = self_in; } else { - result[0] = mp_obj_new_str(str, str_len, false); + result[2] = self_in; } - for (machine_uint_t str_index = 0; str_index + sep_len <= str_len; str_index++) { - if (memcmp(&str[str_index], sep, sep_len) == 0) { - result[0] = mp_obj_new_str(str, str_index, false); - result[1] = arg; - result[2] = mp_obj_new_str(str + str_index + sep_len, str_len - str_index - sep_len, false); - if (!rpartition) { + if (str_len >= sep_len) { + machine_uint_t str_index, str_index_end; + if (direction > 0) { + str_index = 0; + str_index_end = str_len - sep_len; + } else { + str_index = str_len - sep_len; + str_index_end = 0; + } + for (;;) { + if (memcmp(&str[str_index], sep, sep_len) == 0) { + result[0] = mp_obj_new_str(str, str_index, false); + result[1] = arg; + result[2] = mp_obj_new_str(str + str_index + sep_len, str_len - str_index - sep_len, false); break; } + if (str_index == str_index_end) { + break; + } + str_index += direction; } } + return mp_obj_new_tuple(3, result); } -STATIC mp_obj_t str_partition(mp_obj_t self_in, mp_obj_t arg, bool partition) { - return str_partitioner(self_in, arg, false); +STATIC mp_obj_t str_partition(mp_obj_t self_in, mp_obj_t arg) { + return str_partitioner(self_in, arg, 1); } -STATIC mp_obj_t str_rpartition(mp_obj_t self_in, mp_obj_t arg, bool partition) { - return str_partitioner(self_in, arg, true); +STATIC mp_obj_t str_rpartition(mp_obj_t self_in, mp_obj_t arg) { + return str_partitioner(self_in, arg, -1); } STATIC machine_int_t str_get_buffer(mp_obj_t self_in, buffer_info_t *bufinfo, int flags) { -- cgit v1.2.3 From 5972b4c05ffe6973820d24161f604ae8db0d299b Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Thu, 20 Mar 2014 16:47:44 +0200 Subject: objstr: Switch from in-object string data to ptr to separate memory area. This is pre-requisite for having efficient implementation of str<->bytes conversion, and having that efficient is required with unfortunare str vs bytes dichotomy in Python3. --- py/objstr.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'py/objstr.c') diff --git a/py/objstr.c b/py/objstr.c index 77cefa82b..3c5cabe05 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -14,7 +14,7 @@ typedef struct _mp_obj_str_t { mp_obj_base_t base; machine_uint_t hash : 16; // XXX here we assume the hash size is 16 bits (it is at the moment; see qstr.c) machine_uint_t len : 16; // len == number of bytes used in data, alloc = len + 1 because (at the moment) we also append a null byte - byte data[]; + const byte *data; } mp_obj_str_t; // use this macro to extract the string hash @@ -636,10 +636,12 @@ const mp_obj_type_t bytes_type = { }; mp_obj_t mp_obj_str_builder_start(const mp_obj_type_t *type, uint len, byte **data) { - mp_obj_str_t *o = m_new_obj_var(mp_obj_str_t, byte, len + 1); + mp_obj_str_t *o = m_new_obj(mp_obj_str_t); o->base.type = type; o->len = len; - *data = o->data; + byte *p = m_new(byte, len + 1); + o->data = p; + *data = p; return o; } @@ -647,17 +649,22 @@ mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in) { assert(MP_OBJ_IS_STR(o_in)); mp_obj_str_t *o = o_in; o->hash = qstr_compute_hash(o->data, o->len); - o->data[o->len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings + byte *p = (byte*)o->data; + p[o->len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings return o; } STATIC mp_obj_t str_new(const mp_obj_type_t *type, const byte* data, uint len) { - mp_obj_str_t *o = m_new_obj_var(mp_obj_str_t, byte, len + 1); + mp_obj_str_t *o = m_new_obj(mp_obj_str_t); o->base.type = type; - o->hash = qstr_compute_hash(data, len); o->len = len; - memcpy(o->data, data, len * sizeof(byte)); - o->data[len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings + if (data) { + o->hash = qstr_compute_hash(data, len); + byte *p = m_new(byte, len + 1); + o->data = p; + memcpy(p, data, len * sizeof(byte)); + p[len] = '\0'; // for now we add null for compatibility with C ASCIIZ strings + } return o; } -- cgit v1.2.3 From be020c27a870feff9773c348fa04be8c54873f70 Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Fri, 21 Mar 2014 11:39:01 +0200 Subject: py: Make 'str' be a proper type, support standard constructor args. --- py/builtin.c | 10 ---------- py/objstr.c | 36 ++++++++++++++++++++++++++++++++++++ py/runtime.c | 2 +- 3 files changed, 37 insertions(+), 11 deletions(-) (limited to 'py/objstr.c') diff --git a/py/builtin.c b/py/builtin.c index 2e0627fa5..11b86111e 100644 --- a/py/builtin.c +++ b/py/builtin.c @@ -375,16 +375,6 @@ STATIC mp_obj_t mp_builtin_sorted(uint n_args, const mp_obj_t *args, mp_map_t *k MP_DEFINE_CONST_FUN_OBJ_KW(mp_builtin_sorted_obj, 1, mp_builtin_sorted); -STATIC mp_obj_t mp_builtin_str(mp_obj_t o_in) { - vstr_t *vstr = vstr_new(); - mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, vstr, o_in, PRINT_STR); - mp_obj_t s = mp_obj_new_str((byte*)vstr->buf, vstr->len, false); - vstr_free(vstr); - return s; -} - -MP_DEFINE_CONST_FUN_OBJ_1(mp_builtin_str_obj, mp_builtin_str); - // TODO: This should be type, this is just quick CPython compat hack STATIC mp_obj_t mp_builtin_bytes(uint n_args, const mp_obj_t *args) { if (!MP_OBJ_IS_QSTR(args[0]) && !MP_OBJ_IS_TYPE(args[0], &str_type)) { diff --git a/py/objstr.c b/py/objstr.c index 3c5cabe05..44e84d709 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -28,6 +28,7 @@ typedef struct _mp_obj_str_t { STATIC mp_obj_t mp_obj_new_str_iterator(mp_obj_t str); STATIC mp_obj_t mp_obj_new_bytes_iterator(mp_obj_t str); +STATIC mp_obj_t str_new(const mp_obj_type_t *type, const byte* data, uint len); /******************************************************************************/ /* str */ @@ -78,6 +79,40 @@ STATIC void str_print(void (*print)(void *env, const char *fmt, ...), void *env, } } +STATIC mp_obj_t str_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) { + switch (n_args) { + case 0: + return MP_OBJ_NEW_QSTR(MP_QSTR_); + + case 1: + { + vstr_t *vstr = vstr_new(); + mp_obj_print_helper((void (*)(void*, const char*, ...))vstr_printf, vstr, args[0], PRINT_STR); + mp_obj_t s = mp_obj_new_str((byte*)vstr->buf, vstr->len, false); + vstr_free(vstr); + return s; + } + + case 2: + case 3: + { + // TODO: validate 2nd/3rd args + if (!MP_OBJ_IS_TYPE(args[0], &bytes_type)) { + nlr_jump(mp_obj_new_exception_msg(&mp_type_TypeError, "bytes expected")); + } + GET_STR_DATA_LEN(args[0], str_data, str_len); + GET_STR_HASH(args[0], str_hash); + mp_obj_str_t *o = str_new(&str_type, NULL, str_len); + o->data = str_data; + o->hash = str_hash; + return o; + } + + default: + nlr_jump(mp_obj_new_exception_msg(&mp_type_TypeError, "str takes at most 3 arguments")); + } +} + // like strstr but with specified length and allows \0 bytes // TODO replace with something more efficient/standard STATIC const byte *find_subbytes(const byte *haystack, uint hlen, const byte *needle, uint nlen) { @@ -619,6 +654,7 @@ const mp_obj_type_t str_type = { { &mp_type_type }, .name = MP_QSTR_str, .print = str_print, + .make_new = str_make_new, .binary_op = str_binary_op, .getiter = mp_obj_new_str_iterator, .methods = str_type_methods, diff --git a/py/runtime.c b/py/runtime.c index c268fd546..2ab97ed18 100644 --- a/py/runtime.c +++ b/py/runtime.c @@ -102,6 +102,7 @@ STATIC const mp_builtin_elem_t builtin_table[] = { { MP_QSTR_list, (mp_obj_t)&list_type }, { MP_QSTR_map, (mp_obj_t)&map_type }, { MP_QSTR_set, (mp_obj_t)&set_type }, + { MP_QSTR_str, (mp_obj_t)&str_type }, { MP_QSTR_super, (mp_obj_t)&super_type }, { MP_QSTR_tuple, (mp_obj_t)&tuple_type }, { MP_QSTR_type, (mp_obj_t)&mp_type_type }, @@ -137,7 +138,6 @@ STATIC const mp_builtin_elem_t builtin_table[] = { { MP_QSTR_repr, (mp_obj_t)&mp_builtin_repr_obj }, { MP_QSTR_sorted, (mp_obj_t)&mp_builtin_sorted_obj }, { MP_QSTR_sum, (mp_obj_t)&mp_builtin_sum_obj }, - { MP_QSTR_str, (mp_obj_t)&mp_builtin_str_obj }, { MP_QSTR_bytearray, (mp_obj_t)&mp_builtin_bytearray_obj }, // built-in exceptions -- cgit v1.2.3 From 1ecea7c7539e73f105fef25da8a3bde7783da755 Mon Sep 17 00:00:00 2001 From: Paul Sokolovsky Date: Fri, 21 Mar 2014 23:46:59 +0200 Subject: py: Make 'bytes' be a proper type, support standard constructor args. --- py/builtin.c | 12 -------- py/objstr.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++- py/runtime.c | 2 +- tests/basics/bytes.py | 28 +++++++++++++++++++ 4 files changed, 105 insertions(+), 14 deletions(-) (limited to 'py/objstr.c') diff --git a/py/builtin.c b/py/builtin.c index 11b86111e..93e91072c 100644 --- a/py/builtin.c +++ b/py/builtin.c @@ -375,18 +375,6 @@ STATIC mp_obj_t mp_builtin_sorted(uint n_args, const mp_obj_t *args, mp_map_t *k MP_DEFINE_CONST_FUN_OBJ_KW(mp_builtin_sorted_obj, 1, mp_builtin_sorted); -// TODO: This should be type, this is just quick CPython compat hack -STATIC mp_obj_t mp_builtin_bytes(uint n_args, const mp_obj_t *args) { - if (!MP_OBJ_IS_QSTR(args[0]) && !MP_OBJ_IS_TYPE(args[0], &str_type)) { - assert(0); - } - // Currently, MicroPython strings are mix between CPython byte and unicode - // strings. So, conversion is null so far. - return args[0]; -} - -MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mp_builtin_bytes_obj, 1, 3, mp_builtin_bytes); - STATIC mp_obj_t mp_builtin_id(mp_obj_t o_in) { return mp_obj_new_int((machine_int_t)o_in); } diff --git a/py/objstr.c b/py/objstr.c index 44e84d709..35a948700 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -17,6 +17,8 @@ typedef struct _mp_obj_str_t { const byte *data; } mp_obj_str_t; +const mp_obj_t mp_const_empty_bytes; + // use this macro to extract the string hash #define GET_STR_HASH(str_obj_in, str_hash) uint str_hash; if (MP_OBJ_IS_QSTR(str_obj_in)) { str_hash = qstr_hash(MP_OBJ_QSTR_VALUE(str_obj_in)); } else { str_hash = ((mp_obj_str_t*)str_obj_in)->hash; } @@ -113,6 +115,75 @@ STATIC mp_obj_t str_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_ } } +STATIC mp_obj_t bytes_make_new(mp_obj_t type_in, uint n_args, uint n_kw, const mp_obj_t *args) { + if (n_args == 0) { + return mp_const_empty_bytes; + } + + if (MP_OBJ_IS_STR(args[0])) { + if (n_args < 2 || n_args > 3) { + goto wrong_args; + } + GET_STR_DATA_LEN(args[0], str_data, str_len); + GET_STR_HASH(args[0], str_hash); + mp_obj_str_t *o = str_new(&bytes_type, NULL, str_len); + o->data = str_data; + o->hash = str_hash; + return o; + } + + if (n_args > 1) { + goto wrong_args; + } + + if (MP_OBJ_IS_SMALL_INT(args[0])) { + uint len = MP_OBJ_SMALL_INT_VALUE(args[0]); + byte *data; + + mp_obj_t o = mp_obj_str_builder_start(&bytes_type, len, &data); + memset(data, 0, len); + return mp_obj_str_builder_end(o); + } + + int len; + byte *data; + vstr_t *vstr = NULL; + mp_obj_t o = NULL; + // Try to create array of exact len if initializer len is known + mp_obj_t len_in = mp_obj_len_maybe(args[0]); + if (len_in == MP_OBJ_NULL) { + len = -1; + vstr = vstr_new(); + } else { + len = MP_OBJ_SMALL_INT_VALUE(len_in); + o = mp_obj_str_builder_start(&bytes_type, len, &data); + } + + mp_obj_t iterable = rt_getiter(args[0]); + mp_obj_t item; + while ((item = rt_iternext(iterable)) != mp_const_stop_iteration) { + if (len == -1) { + vstr_add_char(vstr, MP_OBJ_SMALL_INT_VALUE(item)); + } else { + *data++ = MP_OBJ_SMALL_INT_VALUE(item); + } + } + + if (len == -1) { + vstr_shrink(vstr); + // TODO: Optimize, borrow buffer from vstr + len = vstr_len(vstr); + o = mp_obj_str_builder_start(&bytes_type, len, &data); + memcpy(data, vstr_str(vstr), len); + vstr_free(vstr); + } + + return mp_obj_str_builder_end(o); + +wrong_args: + nlr_jump(mp_obj_new_exception_msg(&mp_type_TypeError, "wrong number of arguments")); +} + // like strstr but with specified length and allows \0 bytes // TODO replace with something more efficient/standard STATIC const byte *find_subbytes(const byte *haystack, uint hlen, const byte *needle, uint nlen) { @@ -666,11 +737,16 @@ const mp_obj_type_t bytes_type = { { &mp_type_type }, .name = MP_QSTR_bytes, .print = str_print, + .make_new = bytes_make_new, .binary_op = str_binary_op, .getiter = mp_obj_new_bytes_iterator, .methods = str_type_methods, }; +// the zero-length bytes +STATIC const mp_obj_str_t empty_bytes_obj = {{&bytes_type}, 0, 0, NULL}; +const mp_obj_t mp_const_empty_bytes = (mp_obj_t)&empty_bytes_obj; + mp_obj_t mp_obj_str_builder_start(const mp_obj_type_t *type, uint len, byte **data) { mp_obj_str_t *o = m_new_obj(mp_obj_str_t); o->base.type = type; @@ -682,7 +758,6 @@ mp_obj_t mp_obj_str_builder_start(const mp_obj_type_t *type, uint len, byte **da } mp_obj_t mp_obj_str_builder_end(mp_obj_t o_in) { - assert(MP_OBJ_IS_STR(o_in)); mp_obj_str_t *o = o_in; o->hash = qstr_compute_hash(o->data, o->len); byte *p = (byte*)o->data; diff --git a/py/runtime.c b/py/runtime.c index 2ab97ed18..4bcb91c54 100644 --- a/py/runtime.c +++ b/py/runtime.c @@ -89,6 +89,7 @@ STATIC const mp_builtin_elem_t builtin_table[] = { // built-in types { MP_QSTR_bool, (mp_obj_t)&bool_type }, + { MP_QSTR_bytes, (mp_obj_t)&bytes_type }, #if MICROPY_ENABLE_FLOAT { MP_QSTR_complex, (mp_obj_t)&mp_type_complex }, #endif @@ -115,7 +116,6 @@ STATIC const mp_builtin_elem_t builtin_table[] = { { MP_QSTR_abs, (mp_obj_t)&mp_builtin_abs_obj }, { MP_QSTR_all, (mp_obj_t)&mp_builtin_all_obj }, { MP_QSTR_any, (mp_obj_t)&mp_builtin_any_obj }, - { MP_QSTR_bytes, (mp_obj_t)&mp_builtin_bytes_obj }, { MP_QSTR_callable, (mp_obj_t)&mp_builtin_callable_obj }, { MP_QSTR_chr, (mp_obj_t)&mp_builtin_chr_obj }, { MP_QSTR_dir, (mp_obj_t)&mp_builtin_dir_obj }, diff --git a/tests/basics/bytes.py b/tests/basics/bytes.py index 7d0cf22d4..a084bc399 100644 --- a/tests/basics/bytes.py +++ b/tests/basics/bytes.py @@ -4,8 +4,36 @@ print(str(a)) print(repr(a)) print(a[0], a[2]) print(a[-1]) +print(str(a, "utf-8")) +print(str(a, "utf-8", "ignore")) +try: + str(a, "utf-8", "ignore", "toomuch") +except TypeError: + print("TypeError") s = 0 for i in a: s += i print(s) + + +print(bytes("abc", "utf-8")) +print(bytes("abc", "utf-8", "replace")) +try: + bytes("abc") +except TypeError: + print("TypeError") +try: + bytes("abc", "utf-8", "replace", "toomuch") +except TypeError: + print("TypeError") + +print(bytes(3)) + +print(bytes([3, 2, 1])) +print(bytes(range(5))) + +def gen(): + for i in range(4): + yield i +print(bytes(gen())) -- cgit v1.2.3