summaryrefslogtreecommitdiff
path: root/py/objstr.c
diff options
context:
space:
mode:
authorJim Mussared <jim.mussared@gmail.com>2022-09-25 22:15:45 +1000
committerDamien George <damien@micropython.org>2022-09-26 00:54:18 +1000
commit9d6f474ea49fd89b7a1a90b830e6014ef70a89b7 (patch)
treefb192b82f10101554a30b67af098bf3a87d32eb7 /py/objstr.c
parentdd9dcb594c577cb818c336db59a884fd329c3840 (diff)
py/objstr: Don't treat bytes as unicode in str.count.
`b'\xaa \xaa'.count(b'\xaa')` now (correctly) returns 2 instead of 1. Fixes issue #9404. This work was funded through GitHub Sponsors. Signed-off-by: Jim Mussared <jim.mussared@gmail.com>
Diffstat (limited to 'py/objstr.c')
-rw-r--r--py/objstr.c4
1 files changed, 3 insertions, 1 deletions
diff --git a/py/objstr.c b/py/objstr.c
index 62d7bfb4c..55e737fff 100644
--- a/py/objstr.c
+++ b/py/objstr.c
@@ -1768,6 +1768,8 @@ STATIC mp_obj_t str_count(size_t n_args, const mp_obj_t *args) {
return MP_OBJ_NEW_SMALL_INT(utf8_charlen(start, end - start) + 1);
}
+ bool is_str = self_type == &mp_type_str;
+
// count the occurrences
mp_int_t num_occurrences = 0;
for (const byte *haystack_ptr = start; haystack_ptr + needle_len <= end;) {
@@ -1775,7 +1777,7 @@ STATIC mp_obj_t str_count(size_t n_args, const mp_obj_t *args) {
num_occurrences++;
haystack_ptr += needle_len;
} else {
- haystack_ptr = utf8_next_char(haystack_ptr);
+ haystack_ptr = is_str ? utf8_next_char(haystack_ptr) : haystack_ptr + 1;
}
}