Skip to content

Commit 02cdfc9

Browse files
bpo-42218: Correctly handle errors in left-recursive rules (GH-23065)
Left-recursive rules need to check for errors explicitly, since even if the rule returns NULL, the parsing might continue and lead to long-distance failures. Co-authored-by: Pablo Galindo <[email protected]>
1 parent d21cb2d commit 02cdfc9

File tree

4 files changed

+32
-0
lines changed

4 files changed

+32
-0
lines changed

Lib/test/test_syntax.py

+8
Original file line numberDiff line numberDiff line change
@@ -972,6 +972,14 @@ def func2():
972972
"""
973973
self._check_error(code, "invalid syntax")
974974

975+
def test_invalid_line_continuation_left_recursive(self):
976+
# Check bpo-42218: SyntaxErrors following left-recursive rules
977+
# (t_primary_raw in this case) need to be tested explicitly
978+
self._check_error("A.\u018a\\ ",
979+
"unexpected character after line continuation character")
980+
self._check_error("A.\u03bc\\\n",
981+
"unexpected EOF while parsing")
982+
975983
def test_main():
976984
support.run_unittest(SyntaxTestCase)
977985
from test import test_syntax
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
Fixed a bug in the PEG parser that was causing crashes in debug mode. Now errors are checked
2+
in left-recursive rules to avoid cases where such errors do not get handled in time and appear
3+
as long-distance crashes in other places.

Parser/parser.c

+18
Original file line numberDiff line numberDiff line change
@@ -3461,6 +3461,8 @@ dotted_name_rule(Parser *p)
34613461
}
34623462
p->mark = _mark;
34633463
void *_raw = dotted_name_raw(p);
3464+
if (p->error_indicator)
3465+
return NULL;
34643466
if (_raw == NULL || p->mark <= _resmark)
34653467
break;
34663468
_resmark = p->mark;
@@ -9045,6 +9047,8 @@ bitwise_or_rule(Parser *p)
90459047
}
90469048
p->mark = _mark;
90479049
void *_raw = bitwise_or_raw(p);
9050+
if (p->error_indicator)
9051+
return NULL;
90489052
if (_raw == NULL || p->mark <= _resmark)
90499053
break;
90509054
_resmark = p->mark;
@@ -9159,6 +9163,8 @@ bitwise_xor_rule(Parser *p)
91599163
}
91609164
p->mark = _mark;
91619165
void *_raw = bitwise_xor_raw(p);
9166+
if (p->error_indicator)
9167+
return NULL;
91629168
if (_raw == NULL || p->mark <= _resmark)
91639169
break;
91649170
_resmark = p->mark;
@@ -9273,6 +9279,8 @@ bitwise_and_rule(Parser *p)
92739279
}
92749280
p->mark = _mark;
92759281
void *_raw = bitwise_and_raw(p);
9282+
if (p->error_indicator)
9283+
return NULL;
92769284
if (_raw == NULL || p->mark <= _resmark)
92779285
break;
92789286
_resmark = p->mark;
@@ -9387,6 +9395,8 @@ shift_expr_rule(Parser *p)
93879395
}
93889396
p->mark = _mark;
93899397
void *_raw = shift_expr_raw(p);
9398+
if (p->error_indicator)
9399+
return NULL;
93909400
if (_raw == NULL || p->mark <= _resmark)
93919401
break;
93929402
_resmark = p->mark;
@@ -9540,6 +9550,8 @@ sum_rule(Parser *p)
95409550
}
95419551
p->mark = _mark;
95429552
void *_raw = sum_raw(p);
9553+
if (p->error_indicator)
9554+
return NULL;
95439555
if (_raw == NULL || p->mark <= _resmark)
95449556
break;
95459557
_resmark = p->mark;
@@ -9699,6 +9711,8 @@ term_rule(Parser *p)
96999711
}
97009712
p->mark = _mark;
97019713
void *_raw = term_raw(p);
9714+
if (p->error_indicator)
9715+
return NULL;
97029716
if (_raw == NULL || p->mark <= _resmark)
97039717
break;
97049718
_resmark = p->mark;
@@ -10303,6 +10317,8 @@ primary_rule(Parser *p)
1030310317
}
1030410318
p->mark = _mark;
1030510319
void *_raw = primary_raw(p);
10320+
if (p->error_indicator)
10321+
return NULL;
1030610322
if (_raw == NULL || p->mark <= _resmark)
1030710323
break;
1030810324
_resmark = p->mark;
@@ -13943,6 +13959,8 @@ t_primary_rule(Parser *p)
1394313959
}
1394413960
p->mark = _mark;
1394513961
void *_raw = t_primary_raw(p);
13962+
if (p->error_indicator)
13963+
return NULL;
1394613964
if (_raw == NULL || p->mark <= _resmark)
1394713965
break;
1394813966
_resmark = p->mark;

Tools/peg_generator/pegen/c_generator.py

+3
Original file line numberDiff line numberDiff line change
@@ -502,6 +502,9 @@ def _set_up_rule_memoization(self, node: Rule, result_type: str) -> None:
502502
)
503503
self.print("p->mark = _mark;")
504504
self.print(f"void *_raw = {node.name}_raw(p);")
505+
self.print("if (p->error_indicator)")
506+
with self.indent():
507+
self.print("return NULL;")
505508
self.print("if (_raw == NULL || p->mark <= _resmark)")
506509
with self.indent():
507510
self.print("break;")

0 commit comments

Comments
 (0)