diff options
| author | Jeff Epler <jepler@gmail.com> | 2022-09-05 07:58:04 -0500 |
|---|---|---|
| committer | Damien George <damien@micropython.org> | 2022-09-06 17:08:18 +1000 |
| commit | e90b85cc98a24003f2d673bab2c255ab3dce66e7 (patch) | |
| tree | 9403eb1246766f0d081bb8a78a2515bc1f6f567a /extmod | |
| parent | 719dbbf5639cdeff99bf629c45d66b18007e9958 (diff) | |
extmod/modure: Convert byte offsets to unicode indices when necessary.
And add a test.
Fixes issue #9202.
Signed-off-by: Jeff Epler <jepler@gmail.com>
Diffstat (limited to 'extmod')
| -rw-r--r-- | extmod/modure.c | 16 |
1 files changed, 16 insertions, 0 deletions
diff --git a/extmod/modure.c b/extmod/modure.c index 799fef13b..a674d6649 100644 --- a/extmod/modure.c +++ b/extmod/modure.c @@ -33,6 +33,10 @@ #include "py/objstr.h" #include "py/stackctrl.h" +#if MICROPY_PY_BUILTINS_STR_UNICODE +#include "py/unicode.h" +#endif + #if MICROPY_PY_URE #define re1_5_stack_chk() MP_STACK_CHECK() @@ -121,6 +125,18 @@ STATIC void match_span_helper(size_t n_args, const mp_obj_t *args, mp_obj_t span e = self->caps[no * 2 + 1] - begin; } + #if MICROPY_PY_BUILTINS_STR_UNICODE + if (mp_obj_get_type(self->str) == &mp_type_str) { + const byte *begin = (const byte *)mp_obj_str_get_str(self->str); + if (s != -1) { + s = utf8_ptr_to_index(begin, begin + s); + } + if (e != -1) { + e = utf8_ptr_to_index(begin, begin + e); + } + } + #endif + span[0] = mp_obj_new_int(s); span[1] = mp_obj_new_int(e); } |
