diff options
author | Glenn Moloney <glenn.moloney@gmail.com> | 2025-02-25 14:36:29 +1100 |
---|---|---|
committer | Damien George <damien@micropython.org> | 2025-03-02 22:15:31 +1100 |
commit | eb45d97898abd9aae93d0c953634cabb5ea327e3 (patch) | |
tree | 17bc283a5f16724d6e4a3adf54eb7c0cb3b5b3cf /py/objstr.c | |
parent | 69ffd2aaf09e35b68ea045872a63af6403fc1c8f (diff) |
py/objstr: Support tuples and start/end args in startswith and endswith.
This change allows tuples to be passed as the prefix/suffix argument to the
`str.startswith()` and `str.endswith()` methods. The methods will return
`True` if the string starts/ends with any of the prefixes/suffixes in the
tuple.
Also adds full support for the `start` and `end` arguments to both methods
for compatibility with CPython.
Tests have been updated for the new behaviour.
Signed-off-by: Glenn Moloney <glenn.moloney@gmail.com>
Diffstat (limited to 'py/objstr.c')
-rw-r--r-- | py/objstr.c | 67 |
1 files changed, 42 insertions, 25 deletions
diff --git a/py/objstr.c b/py/objstr.c index fc0623eb7..307e956a1 100644 --- a/py/objstr.c +++ b/py/objstr.c @@ -67,6 +67,26 @@ static void check_is_str_or_bytes(mp_obj_t self_in) { mp_check_self(mp_obj_is_str_or_bytes(self_in)); } +static const byte *get_substring_data(const mp_obj_t obj, size_t n_args, const mp_obj_t *args, size_t *len) { + // Get substring data from obj, using args[0,1] to specify start and end indices. + GET_STR_DATA_LEN(obj, str, str_len); + if (n_args > 0) { + const mp_obj_type_t *self_type = mp_obj_get_type(obj); + const byte *end = str + str_len; + if (n_args > 1 && args[1] != mp_const_none) { + end = str_index_to_ptr(self_type, str, str_len, args[1], true); + } + if (args[0] != mp_const_none) { + str = str_index_to_ptr(self_type, str, str_len, args[0], true); + } + str_len = MAX(end - str, 0); + } + if (len) { + *len = str_len; + } + return str; +} + /******************************************************************************/ /* str */ @@ -802,37 +822,34 @@ static mp_obj_t str_rindex(size_t n_args, const mp_obj_t *args) { } MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rindex_obj, 2, 4, str_rindex); -// TODO: (Much) more variety in args -static mp_obj_t str_startswith(size_t n_args, const mp_obj_t *args) { - const mp_obj_type_t *self_type = mp_obj_get_type(args[0]); - GET_STR_DATA_LEN(args[0], str, str_len); - size_t prefix_len; - const char *prefix = mp_obj_str_get_data(args[1], &prefix_len); - const byte *start = str; - if (n_args > 2) { - start = str_index_to_ptr(self_type, str, str_len, args[2], true); +static mp_obj_t str_startendswith(size_t n_args, const mp_obj_t *args, bool ends_with) { + size_t str_len; + const byte *str = get_substring_data(args[0], n_args - 2, args + 2, &str_len); + mp_obj_t *prefixes = (mp_obj_t *)&args[1]; + size_t n_prefixes = 1; + if (mp_obj_is_type(args[1], &mp_type_tuple)) { + mp_obj_tuple_get(args[1], &n_prefixes, &prefixes); } - if (prefix_len + (start - str) > str_len) { - return mp_const_false; + size_t prefix_len; + for (size_t i = 0; i < n_prefixes; i++) { + const char *prefix = mp_obj_str_get_data(prefixes[i], &prefix_len); + const byte *s = str + (ends_with ? str_len - prefix_len : 0); + if (prefix_len <= str_len && memcmp(s, prefix, prefix_len) == 0) { + return mp_const_true; + } } - return mp_obj_new_bool(memcmp(start, prefix, prefix_len) == 0); + return mp_const_false; } -MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_startswith_obj, 2, 3, str_startswith); -static mp_obj_t str_endswith(size_t n_args, const mp_obj_t *args) { - GET_STR_DATA_LEN(args[0], str, str_len); - size_t suffix_len; - const char *suffix = mp_obj_str_get_data(args[1], &suffix_len); - if (n_args > 2) { - mp_raise_NotImplementedError(MP_ERROR_TEXT("start/end indices")); - } +static mp_obj_t str_startswith(size_t n_args, const mp_obj_t *args) { + return str_startendswith(n_args, args, false); +} +MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_startswith_obj, 2, 4, str_startswith); - if (suffix_len > str_len) { - return mp_const_false; - } - return mp_obj_new_bool(memcmp(str + (str_len - suffix_len), suffix, suffix_len) == 0); +static mp_obj_t str_endswith(size_t n_args, const mp_obj_t *args) { + return str_startendswith(n_args, args, true); } -MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_endswith_obj, 2, 3, str_endswith); +MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_endswith_obj, 2, 4, str_endswith); enum { LSTRIP, RSTRIP, STRIP }; |