Skip to content

Commit 25835c5

Browse files
authored
bpo-45738: Fix computation of error location for invalid continuation (GH-29550)
characters in the parser
1 parent f8da00e commit 25835c5

File tree

4 files changed

+14
-12
lines changed

4 files changed

+14
-12
lines changed

Lib/test/test_syntax.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1505,7 +1505,13 @@ def func2():
15051505
def test_invalid_line_continuation_error_position(self):
15061506
self._check_error(r"a = 3 \ 4",
15071507
"unexpected character after line continuation character",
1508-
lineno=1, offset=9)
1508+
lineno=1, offset=8)
1509+
self._check_error('1,\\#\n2',
1510+
"unexpected character after line continuation character",
1511+
lineno=1, offset=4)
1512+
self._check_error('\nfgdfgf\n1,\\#\n2\n',
1513+
"unexpected character after line continuation character",
1514+
lineno=3, offset=4)
15091515

15101516
def test_invalid_line_continuation_left_recursive(self):
15111517
# Check bpo-42218: SyntaxErrors following left-recursive rules
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
Fix computation of error location for invalid continuation characters in the
2+
parser. Patch by Pablo Galindo.

Parser/pegen.c

Lines changed: 5 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -351,22 +351,17 @@ tokenizer_error(Parser *p)
351351
msg = "too many levels of indentation";
352352
break;
353353
case E_LINECONT: {
354-
char* loc = strrchr(p->tok->buf, '\n');
355-
const char* last_char = p->tok->cur - 1;
356-
if (loc != NULL && loc != last_char) {
357-
col_offset = p->tok->cur - loc - 1;
358-
p->tok->buf = loc;
359-
} else {
360-
col_offset = last_char - p->tok->buf - 1;
361-
}
354+
col_offset = p->tok->cur - p->tok->buf - 1;
362355
msg = "unexpected character after line continuation character";
363356
break;
364357
}
365358
default:
366359
msg = "unknown parsing error";
367360
}
368361

369-
RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno, col_offset, p->tok->lineno, -1, msg);
362+
RAISE_ERROR_KNOWN_LOCATION(p, errtype, p->tok->lineno,
363+
col_offset >= 0 ? col_offset : 0,
364+
p->tok->lineno, -1, msg);
370365
return -1;
371366
}
372367

@@ -497,7 +492,7 @@ _PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
497492
does not physically exist */
498493
assert(p->tok->fp == NULL || p->tok->fp == stdin || p->tok->done == E_EOF || !uses_utf8_codec);
499494

500-
if (p->tok->lineno <= lineno) {
495+
if (p->tok->lineno <= lineno && p->tok->inp > p->tok->buf) {
501496
Py_ssize_t size = p->tok->inp - p->tok->buf;
502497
error_line = PyUnicode_DecodeUTF8(p->tok->buf, size, "replace");
503498
}

Parser/tokenizer.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1970,7 +1970,6 @@ tok_get(struct tok_state *tok, const char **p_start, const char **p_end)
19701970
c = tok_nextc(tok);
19711971
if (c != '\n') {
19721972
tok->done = E_LINECONT;
1973-
tok->cur = tok->inp;
19741973
return ERRORTOKEN;
19751974
}
19761975
c = tok_nextc(tok);

0 commit comments

Comments
 (0)