summaryrefslogtreecommitdiff
path: root/py
diff options
context:
space:
mode:
Diffstat (limited to 'py')
-rw-r--r--py/lexer.c31
1 files changed, 20 insertions, 11 deletions
diff --git a/py/lexer.c b/py/lexer.c
index 9587f6b16..5e911a1a2 100644
--- a/py/lexer.c
+++ b/py/lexer.c
@@ -527,14 +527,14 @@ STATIC void parse_string_literal(mp_lexer_t *lex, bool is_raw, bool is_fstring)
vstr_cut_tail_bytes(&lex->vstr, n_closing);
}
+// This function returns whether it has crossed a newline or not.
+// It therefore always return true if stop_at_newline is true
STATIC bool skip_whitespace(mp_lexer_t *lex, bool stop_at_newline) {
- bool had_physical_newline = false;
while (!is_end(lex)) {
if (is_physical_newline(lex)) {
if (stop_at_newline && lex->nested_bracket_level == 0) {
- break;
+ return true;
}
- had_physical_newline = true;
next_char(lex);
} else if (is_whitespace(lex)) {
next_char(lex);
@@ -543,16 +543,16 @@ STATIC bool skip_whitespace(mp_lexer_t *lex, bool stop_at_newline) {
while (!is_end(lex) && !is_physical_newline(lex)) {
next_char(lex);
}
- // had_physical_newline will be set on next loop
+ // will return true on next loop
} else if (is_char_and(lex, '\\', '\n')) {
- // line-continuation, so don't set had_physical_newline
+ // line-continuation, so don't return true
next_char(lex);
next_char(lex);
} else {
break;
}
}
- return had_physical_newline;
+ return false;
}
void mp_lexer_to_next(mp_lexer_t *lex) {
@@ -577,7 +577,10 @@ void mp_lexer_to_next(mp_lexer_t *lex) {
vstr_reset(&lex->vstr);
// skip white space and comments
- bool had_physical_newline = skip_whitespace(lex, false);
+ // set the newline tokens at the line and column of the preceding line:
+ // only advance on the pointer until a new line is crossed, save the
+ // line and column, and then readvance it
+ bool had_physical_newline = skip_whitespace(lex, true);
// set token source information
lex->tok_line = lex->line;
@@ -591,7 +594,12 @@ void mp_lexer_to_next(mp_lexer_t *lex) {
lex->tok_kind = MP_TOKEN_INDENT;
lex->emit_dent -= 1;
- } else if (had_physical_newline && lex->nested_bracket_level == 0) {
+ } else if (had_physical_newline) {
+ // The cursor is at the end of the previous line, pointing to a
+ // physical newline. Skip any remaining whitespace, comments, and
+ // newlines.
+ skip_whitespace(lex, false);
+
lex->tok_kind = MP_TOKEN_NEWLINE;
size_t num_spaces = lex->column - 1;
@@ -862,9 +870,10 @@ mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader) {
// preload first token
mp_lexer_to_next(lex);
- // Check that the first token is in the first column. If it's not then we
- // convert the token kind to INDENT so that the parser gives a syntax error.
- if (lex->tok_column != 1) {
+ // Check that the first token is in the first column unless it is a
+ // newline. Otherwise we convert the token kind to INDENT so that
+ // the parser gives a syntax error.
+ if (lex->tok_column != 1 && lex->tok_kind != MP_TOKEN_NEWLINE) {
lex->tok_kind = MP_TOKEN_INDENT;
}