py/persistentcode: Remove unicode feature flag from .mpy file.

Prior to this commit, even with unicode disabled .py and .mpy files could contain unicode characters, eg by entering them directly in a string as utf-8 encoded. The only thing the compiler disallowed (with unicode disabled) was using \uxxxx and \Uxxxxxxxx notation to specify a character within a string with value >= 0x100; that would give a SyntaxError. With this change mpy-cross will now accept \u and \U notation to insert a character with value >= 0x100 into a string (because the -mno-unicode option is now gone, there's no way to forbid this). The runtime will happily work with strings with such characters, just like it already works with strings with characters that were utf-8 encoded directly. This change simplifies things because there are no longer any feature flags in .mpy files, and any bytecode .mpy will now run on any target. Signed-off-by: Damien George <damien@micropython.org>
author: Damien George <damien@micropython.org> 2022-05-16 19:20:52 +1000
committer: Damien George <damien@micropython.org> 2022-05-17 12:51:54 +1000
commit: c49d5207e9437755be364639632be31c001955a8 (patch)
tree: 3eb8ea6a6af8231c252b5e50173a5d34e5db2390 /py/lexer.c
parent: b295b6f1f3dd529eea564b9e08b75c7ab4f48ce4 (diff)
1 files changed, 17 insertions, 19 deletions
diff --git a/py/lexer.c b/py/lexer.c
index ac406bd46..39e9662f6 100644
--- a/py/lexer.c
+++ b/py/lexer.c
@@ -473,25 +473,23 @@ STATIC void parse_string_literal(mp_lexer_t *lex, bool is_raw, bool is_fstring)
                     }
                 }
                 if (c != MP_LEXER_EOF) {
-                    if (MICROPY_PY_BUILTINS_STR_UNICODE_DYNAMIC) {
-                        if (c < 0x110000 && lex->tok_kind == MP_TOKEN_STRING) {
-                            vstr_add_char(&lex->vstr, c);
-                        } else if (c < 0x100 && lex->tok_kind == MP_TOKEN_BYTES) {
-                            vstr_add_byte(&lex->vstr, c);
-                        } else {
-                            // unicode character out of range
-                            // this raises a generic SyntaxError; could provide more info
-                            lex->tok_kind = MP_TOKEN_INVALID;
-                        }
-                    } else {
-                        // without unicode everything is just added as an 8-bit byte
-                        if (c < 0x100) {
-                            vstr_add_byte(&lex->vstr, c);
-                        } else {
-                            // 8-bit character out of range
-                            // this raises a generic SyntaxError; could provide more info
-                            lex->tok_kind = MP_TOKEN_INVALID;
-                        }
+                    #if MICROPY_PY_BUILTINS_STR_UNICODE
+                    if (c < 0x110000 && lex->tok_kind == MP_TOKEN_STRING) {
+                        // Valid unicode character in a str object.
+                        vstr_add_char(&lex->vstr, c);
+                    } else if (c < 0x100 && lex->tok_kind == MP_TOKEN_BYTES) {
+                        // Valid byte in a bytes object.
+                        vstr_add_byte(&lex->vstr, c);
+                    }
+                    #else
+                    if (c < 0x100) {
+                        // Without unicode everything is just added as an 8-bit byte.
+                        vstr_add_byte(&lex->vstr, c);
+                    }
+                    #endif
+                    else {
+                        // Character out of range; this raises a generic SyntaxError.
+                        lex->tok_kind = MP_TOKEN_INVALID;
                     }
                 }
             } else {
author	Damien George <damien@micropython.org>	2022-05-16 19:20:52 +1000
committer	Damien George <damien@micropython.org>	2022-05-17 12:51:54 +1000
commit	c49d5207e9437755be364639632be31c001955a8 (patch)
tree	3eb8ea6a6af8231c252b5e50173a5d34e5db2390 /py/lexer.c
parent	b295b6f1f3dd529eea564b9e08b75c7ab4f48ce4 (diff)