summaryrefslogtreecommitdiff
path: root/py/objstr.c
diff options
context:
space:
mode:
authorGlenn Moloney <glenn.moloney@gmail.com>2025-02-25 14:36:29 +1100
committerDamien George <damien@micropython.org>2025-03-02 22:15:31 +1100
commiteb45d97898abd9aae93d0c953634cabb5ea327e3 (patch)
tree17bc283a5f16724d6e4a3adf54eb7c0cb3b5b3cf /py/objstr.c
parent69ffd2aaf09e35b68ea045872a63af6403fc1c8f (diff)
py/objstr: Support tuples and start/end args in startswith and endswith.
This change allows tuples to be passed as the prefix/suffix argument to the `str.startswith()` and `str.endswith()` methods. The methods will return `True` if the string starts/ends with any of the prefixes/suffixes in the tuple. Also adds full support for the `start` and `end` arguments to both methods for compatibility with CPython. Tests have been updated for the new behaviour. Signed-off-by: Glenn Moloney <glenn.moloney@gmail.com>
Diffstat (limited to 'py/objstr.c')
-rw-r--r--py/objstr.c67
1 files changed, 42 insertions, 25 deletions
diff --git a/py/objstr.c b/py/objstr.c
index fc0623eb7..307e956a1 100644
--- a/py/objstr.c
+++ b/py/objstr.c
@@ -67,6 +67,26 @@ static void check_is_str_or_bytes(mp_obj_t self_in) {
mp_check_self(mp_obj_is_str_or_bytes(self_in));
}
+static const byte *get_substring_data(const mp_obj_t obj, size_t n_args, const mp_obj_t *args, size_t *len) {
+ // Get substring data from obj, using args[0,1] to specify start and end indices.
+ GET_STR_DATA_LEN(obj, str, str_len);
+ if (n_args > 0) {
+ const mp_obj_type_t *self_type = mp_obj_get_type(obj);
+ const byte *end = str + str_len;
+ if (n_args > 1 && args[1] != mp_const_none) {
+ end = str_index_to_ptr(self_type, str, str_len, args[1], true);
+ }
+ if (args[0] != mp_const_none) {
+ str = str_index_to_ptr(self_type, str, str_len, args[0], true);
+ }
+ str_len = MAX(end - str, 0);
+ }
+ if (len) {
+ *len = str_len;
+ }
+ return str;
+}
+
/******************************************************************************/
/* str */
@@ -802,37 +822,34 @@ static mp_obj_t str_rindex(size_t n_args, const mp_obj_t *args) {
}
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_rindex_obj, 2, 4, str_rindex);
-// TODO: (Much) more variety in args
-static mp_obj_t str_startswith(size_t n_args, const mp_obj_t *args) {
- const mp_obj_type_t *self_type = mp_obj_get_type(args[0]);
- GET_STR_DATA_LEN(args[0], str, str_len);
- size_t prefix_len;
- const char *prefix = mp_obj_str_get_data(args[1], &prefix_len);
- const byte *start = str;
- if (n_args > 2) {
- start = str_index_to_ptr(self_type, str, str_len, args[2], true);
+static mp_obj_t str_startendswith(size_t n_args, const mp_obj_t *args, bool ends_with) {
+ size_t str_len;
+ const byte *str = get_substring_data(args[0], n_args - 2, args + 2, &str_len);
+ mp_obj_t *prefixes = (mp_obj_t *)&args[1];
+ size_t n_prefixes = 1;
+ if (mp_obj_is_type(args[1], &mp_type_tuple)) {
+ mp_obj_tuple_get(args[1], &n_prefixes, &prefixes);
}
- if (prefix_len + (start - str) > str_len) {
- return mp_const_false;
+ size_t prefix_len;
+ for (size_t i = 0; i < n_prefixes; i++) {
+ const char *prefix = mp_obj_str_get_data(prefixes[i], &prefix_len);
+ const byte *s = str + (ends_with ? str_len - prefix_len : 0);
+ if (prefix_len <= str_len && memcmp(s, prefix, prefix_len) == 0) {
+ return mp_const_true;
+ }
}
- return mp_obj_new_bool(memcmp(start, prefix, prefix_len) == 0);
+ return mp_const_false;
}
-MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_startswith_obj, 2, 3, str_startswith);
-static mp_obj_t str_endswith(size_t n_args, const mp_obj_t *args) {
- GET_STR_DATA_LEN(args[0], str, str_len);
- size_t suffix_len;
- const char *suffix = mp_obj_str_get_data(args[1], &suffix_len);
- if (n_args > 2) {
- mp_raise_NotImplementedError(MP_ERROR_TEXT("start/end indices"));
- }
+static mp_obj_t str_startswith(size_t n_args, const mp_obj_t *args) {
+ return str_startendswith(n_args, args, false);
+}
+MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_startswith_obj, 2, 4, str_startswith);
- if (suffix_len > str_len) {
- return mp_const_false;
- }
- return mp_obj_new_bool(memcmp(str + (str_len - suffix_len), suffix, suffix_len) == 0);
+static mp_obj_t str_endswith(size_t n_args, const mp_obj_t *args) {
+ return str_startendswith(n_args, args, true);
}
-MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_endswith_obj, 2, 3, str_endswith);
+MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(str_endswith_obj, 2, 4, str_endswith);
enum { LSTRIP, RSTRIP, STRIP };