summaryrefslogtreecommitdiff
path: root/py/lexer.c
diff options
context:
space:
mode:
authorDamien George <damien.p.george@gmail.com>2015-02-08 01:57:40 +0000
committerDamien George <damien.p.george@gmail.com>2015-02-08 01:57:40 +0000
commit7d414a1b52d193bab2c94cf56932e1eba23ba542 (patch)
tree69f6840e4f825ffc1047fe7cb0f52eba27b20d86 /py/lexer.c
parent5f97aaeca4dc607a2d32e758c3ef6131ffb168a6 (diff)
py: Parse big-int/float/imag constants directly in parser.
Previous to this patch, a big-int, float or imag constant was interned (made into a qstr) and then parsed at runtime to create an object each time it was needed. This is wasteful in RAM and not efficient. Now, these constants are parsed straight away in the parser and turned into objects. This allows constants with large numbers of digits (so addresses issue #1103) and takes us a step closer to #722.
Diffstat (limited to 'py/lexer.c')
-rw-r--r--py/lexer.c22
1 files changed, 19 insertions, 3 deletions
diff --git a/py/lexer.c b/py/lexer.c
index e3d52e714..e77851020 100644
--- a/py/lexer.c
+++ b/py/lexer.c
@@ -104,6 +104,10 @@ STATIC bool is_following_digit(mp_lexer_t *lex) {
return unichar_isdigit(lex->chr1);
}
+STATIC bool is_following_letter(mp_lexer_t *lex) {
+ return unichar_isalpha(lex->chr1);
+}
+
STATIC bool is_following_odigit(mp_lexer_t *lex) {
return lex->chr1 >= '0' && lex->chr1 <= '7';
}
@@ -540,7 +544,15 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, bool first_token) {
}
} else if (is_digit(lex) || (is_char(lex, '.') && is_following_digit(lex))) {
- lex->tok_kind = MP_TOKEN_NUMBER;
+ bool forced_integer = false;
+ if (is_char(lex, '.')) {
+ lex->tok_kind = MP_TOKEN_FLOAT_OR_IMAG;
+ } else {
+ lex->tok_kind = MP_TOKEN_INTEGER;
+ if (is_char(lex, '0') && is_following_letter(lex)) {
+ forced_integer = true;
+ }
+ }
// get first char
vstr_add_char(&lex->vstr, CUR_CHAR(lex));
@@ -548,14 +560,18 @@ STATIC void mp_lexer_next_token_into(mp_lexer_t *lex, bool first_token) {
// get tail chars
while (!is_end(lex)) {
- if (is_char_or(lex, 'e', 'E')) {
+ if (!forced_integer && is_char_or(lex, 'e', 'E')) {
+ lex->tok_kind = MP_TOKEN_FLOAT_OR_IMAG;
vstr_add_char(&lex->vstr, 'e');
next_char(lex);
if (is_char(lex, '+') || is_char(lex, '-')) {
vstr_add_char(&lex->vstr, CUR_CHAR(lex));
next_char(lex);
}
- } else if (is_letter(lex) || is_digit(lex) || is_char_or(lex, '_', '.')) {
+ } else if (is_letter(lex) || is_digit(lex) || is_char(lex, '.')) {
+ if (is_char_or3(lex, '.', 'j', 'J')) {
+ lex->tok_kind = MP_TOKEN_FLOAT_OR_IMAG;
+ }
vstr_add_char(&lex->vstr, CUR_CHAR(lex));
next_char(lex);
} else {