summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDamien George <damien.p.george@gmail.com>2018-11-26 16:13:08 +1100
committerDamien George <damien.p.george@gmail.com>2018-11-26 16:13:08 +1100
commit7c85c7c210e3ad417f59038de95b71618783d76c (patch)
tree133d8e4c446a3f854213d547cfaad38af0eed36d
parentd63ef86c6e83205f18938bfa8e538e35eda5fd52 (diff)
py/unicode: Fix check for valid utf8 being stricter about contn chars.
-rw-r--r--py/unicode.c2
-rw-r--r--tests/unicode/unicode.py4
2 files changed, 5 insertions, 1 deletions
diff --git a/py/unicode.c b/py/unicode.c
index 935dc9012..d69b6f56f 100644
--- a/py/unicode.c
+++ b/py/unicode.c
@@ -180,7 +180,7 @@ bool utf8_check(const byte *p, size_t len) {
for (; p < end; p++) {
byte c = *p;
if (need) {
- if (c >= 0x80) {
+ if (UTF8_IS_CONT(c)) {
need--;
} else {
// mismatch
diff --git a/tests/unicode/unicode.py b/tests/unicode/unicode.py
index 3a35ce894..b3d4b09ee 100644
--- a/tests/unicode/unicode.py
+++ b/tests/unicode/unicode.py
@@ -47,3 +47,7 @@ try:
str(bytearray(b'ab\xc0a'), 'utf8')
except UnicodeError:
print('UnicodeError')
+try:
+ str(b'\xf0\xe0\xed\xe8', 'utf8')
+except UnicodeError:
+ print('UnicodeError')