diff options
| author | Damien George <damien@micropython.org> | 2022-05-16 19:20:52 +1000 |
|---|---|---|
| committer | Damien George <damien@micropython.org> | 2022-05-17 12:51:54 +1000 |
| commit | c49d5207e9437755be364639632be31c001955a8 (patch) | |
| tree | 3eb8ea6a6af8231c252b5e50173a5d34e5db2390 /py/lexer.c | |
| parent | b295b6f1f3dd529eea564b9e08b75c7ab4f48ce4 (diff) | |
py/persistentcode: Remove unicode feature flag from .mpy file.
Prior to this commit, even with unicode disabled .py and .mpy files could
contain unicode characters, eg by entering them directly in a string as
utf-8 encoded.
The only thing the compiler disallowed (with unicode disabled) was using
\uxxxx and \Uxxxxxxxx notation to specify a character within a string with
value >= 0x100; that would give a SyntaxError.
With this change mpy-cross will now accept \u and \U notation to insert a
character with value >= 0x100 into a string (because the -mno-unicode
option is now gone, there's no way to forbid this). The runtime will
happily work with strings with such characters, just like it already works
with strings with characters that were utf-8 encoded directly.
This change simplifies things because there are no longer any feature
flags in .mpy files, and any bytecode .mpy will now run on any target.
Signed-off-by: Damien George <damien@micropython.org>
Diffstat (limited to 'py/lexer.c')
| -rw-r--r-- | py/lexer.c | 36 |
1 files changed, 17 insertions, 19 deletions
diff --git a/py/lexer.c b/py/lexer.c index ac406bd46..39e9662f6 100644 --- a/py/lexer.c +++ b/py/lexer.c @@ -473,25 +473,23 @@ STATIC void parse_string_literal(mp_lexer_t *lex, bool is_raw, bool is_fstring) } } if (c != MP_LEXER_EOF) { - if (MICROPY_PY_BUILTINS_STR_UNICODE_DYNAMIC) { - if (c < 0x110000 && lex->tok_kind == MP_TOKEN_STRING) { - vstr_add_char(&lex->vstr, c); - } else if (c < 0x100 && lex->tok_kind == MP_TOKEN_BYTES) { - vstr_add_byte(&lex->vstr, c); - } else { - // unicode character out of range - // this raises a generic SyntaxError; could provide more info - lex->tok_kind = MP_TOKEN_INVALID; - } - } else { - // without unicode everything is just added as an 8-bit byte - if (c < 0x100) { - vstr_add_byte(&lex->vstr, c); - } else { - // 8-bit character out of range - // this raises a generic SyntaxError; could provide more info - lex->tok_kind = MP_TOKEN_INVALID; - } + #if MICROPY_PY_BUILTINS_STR_UNICODE + if (c < 0x110000 && lex->tok_kind == MP_TOKEN_STRING) { + // Valid unicode character in a str object. + vstr_add_char(&lex->vstr, c); + } else if (c < 0x100 && lex->tok_kind == MP_TOKEN_BYTES) { + // Valid byte in a bytes object. + vstr_add_byte(&lex->vstr, c); + } + #else + if (c < 0x100) { + // Without unicode everything is just added as an 8-bit byte. + vstr_add_byte(&lex->vstr, c); + } + #endif + else { + // Character out of range; this raises a generic SyntaxError. + lex->tok_kind = MP_TOKEN_INVALID; } } } else { |
