diff options
author | Damien George <damien.p.george@gmail.com> | 2015-02-08 01:57:40 +0000 |
---|---|---|
committer | Damien George <damien.p.george@gmail.com> | 2015-02-08 01:57:40 +0000 |
commit | 7d414a1b52d193bab2c94cf56932e1eba23ba542 (patch) | |
tree | 69f6840e4f825ffc1047fe7cb0f52eba27b20d86 /py/lexer.c | |
parent | 5f97aaeca4dc607a2d32e758c3ef6131ffb168a6 (diff) |
py: Parse big-int/float/imag constants directly in parser.
Previous to this patch, a big-int, float or imag constant was interned
(made into a qstr) and then parsed at runtime to create an object each
time it was needed. This is wasteful in RAM and not efficient. Now,
these constants are parsed straight away in the parser and turned into
objects. This allows constants with large numbers of digits (so
addresses issue #1103) and takes us a step closer to #722.
Diffstat (limited to 'py/lexer.c')
-rw-r--r-- | py/lexer.c | 22 |
1 files changed, 19 insertions, 3 deletions
diff --git a/py/lexer.c b/py/lexer.c index e3d52e714..e77851020 100644 --- a/py/lexer.c +++ b/py/lexer.c @@ -104,6 +104,10 @@ STATIC bool is_following_digit(mp_lexer_t *lex) { return unichar_isdigit(lex->chr1); } +STATIC bool is_following_letter(mp_lexer_t *lex) { + return unichar_isalpha(lex->chr1); +} + STATIC bool is_following_odigit(mp_lexer_t *lex) { return lex->chr1 >= '0' && lex->chr1 <= '7'; } @@ -540,7 +544,15 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, bool first_token) { } } else if (is_digit(lex) || (is_char(lex, '.') && is_following_digit(lex))) { - lex->tok_kind = MP_TOKEN_NUMBER; + bool forced_integer = false; + if (is_char(lex, '.')) { + lex->tok_kind = MP_TOKEN_FLOAT_OR_IMAG; + } else { + lex->tok_kind = MP_TOKEN_INTEGER; + if (is_char(lex, '0') && is_following_letter(lex)) { + forced_integer = true; + } + } // get first char vstr_add_char(&lex->vstr, CUR_CHAR(lex)); @@ -548,14 +560,18 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, bool first_token) { // get tail chars while (!is_end(lex)) { - if (is_char_or(lex, 'e', 'E')) { + if (!forced_integer && is_char_or(lex, 'e', 'E')) { + lex->tok_kind = MP_TOKEN_FLOAT_OR_IMAG; vstr_add_char(&lex->vstr, 'e'); next_char(lex); if (is_char(lex, '+') || is_char(lex, '-')) { vstr_add_char(&lex->vstr, CUR_CHAR(lex)); next_char(lex); } - } else if (is_letter(lex) || is_digit(lex) || is_char_or(lex, '_', '.')) { + } else if (is_letter(lex) || is_digit(lex) || is_char(lex, '.')) { + if (is_char_or3(lex, '.', 'j', 'J')) { + lex->tok_kind = MP_TOKEN_FLOAT_OR_IMAG; + } vstr_add_char(&lex->vstr, CUR_CHAR(lex)); next_char(lex); } else { |