summaryrefslogtreecommitdiff
path: root/py/unicode.c
diff options
context:
space:
mode:
Diffstat (limited to 'py/unicode.c')
-rw-r--r--py/unicode.c29
1 files changed, 11 insertions, 18 deletions
diff --git a/py/unicode.c b/py/unicode.c
index 140b7ba71..935dc9012 100644
--- a/py/unicode.c
+++ b/py/unicode.c
@@ -67,9 +67,9 @@ STATIC const uint8_t attr[] = {
AT_LO, AT_LO, AT_LO, AT_PR, AT_PR, AT_PR, AT_PR, 0
};
-// TODO: Rename to str_get_char
-unichar utf8_get_char(const byte *s) {
#if MICROPY_PY_BUILTINS_STR_UNICODE
+
+unichar utf8_get_char(const byte *s) {
unichar ord = *s++;
if (!UTF8_IS_NONASCII(ord)) return ord;
ord &= 0x7F;
@@ -80,22 +80,14 @@ unichar utf8_get_char(const byte *s) {
ord = (ord << 6) | (*s++ & 0x3F);
}
return ord;
-#else
- return *s;
-#endif
}
-// TODO: Rename to str_next_char
const byte *utf8_next_char(const byte *s) {
-#if MICROPY_PY_BUILTINS_STR_UNICODE
++s;
while (UTF8_IS_CONT(*s)) {
++s;
}
return s;
-#else
- return s + 1;
-#endif
}
mp_uint_t utf8_ptr_to_index(const byte *s, const byte *ptr) {
@@ -109,21 +101,18 @@ mp_uint_t utf8_ptr_to_index(const byte *s, const byte *ptr) {
return i;
}
-// TODO: Rename to str_charlen
-mp_uint_t unichar_charlen(const char *str, mp_uint_t len) {
-#if MICROPY_PY_BUILTINS_STR_UNICODE
- mp_uint_t charlen = 0;
- for (const char *top = str + len; str < top; ++str) {
+size_t utf8_charlen(const byte *str, size_t len) {
+ size_t charlen = 0;
+ for (const byte *top = str + len; str < top; ++str) {
if (!UTF8_IS_CONT(*str)) {
++charlen;
}
}
return charlen;
-#else
- return len;
-#endif
}
+#endif
+
// Be aware: These unichar_is* functions are actually ASCII-only!
bool unichar_isspace(unichar c) {
return c < 128 && (attr[c] & FL_SPACE) != 0;
@@ -183,6 +172,8 @@ mp_uint_t unichar_xdigit_value(unichar c) {
return n;
}
+#if MICROPY_PY_BUILTINS_STR_UNICODE
+
bool utf8_check(const byte *p, size_t len) {
uint8_t need = 0;
const byte *end = p + len;
@@ -210,3 +201,5 @@ bool utf8_check(const byte *p, size_t len) {
}
return need == 0; // no pending fragments allowed
}
+
+#endif