Skip to content

Commit 30e35a3

Browse files
committed
Auto merge of #384 - adamcrume:master, r=BurntSushi
Move has_visited check to the top of the loop in backtrack::Bounded::step …pressions With certain repeated empty expressions similar to (x*)*?, the backtracker can go into an infinite loop. This change adds the Progress instruction which requires the engine to make progress to continue matching a repeated subexpression. Fixes #375 Note that this was inspired by https://swtch.com/~rsc/regexp/regexp2.html#real (mentioned in HACKING.md), which mentions that a progress instruction can be used to prevent backtracking loops.
2 parents 2f18730 + 6356d1a commit 30e35a3

File tree

2 files changed

+17
-3
lines changed

2 files changed

+17
-3
lines changed

src/backtrack.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,9 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {
216216
// from the stack. Namely, if we're pushing a job only to run it
217217
// next, avoid the push and just mutate `ip` (and possibly `at`)
218218
// in place.
219+
if self.has_visited(ip, at) {
220+
return false;
221+
}
219222
match self.prog[ip] {
220223
Match(slot) => {
221224
if slot < self.matches.len() {
@@ -275,9 +278,6 @@ impl<'a, 'm, 'r, 's, I: Input> Bounded<'a, 'm, 'r, 's, I> {
275278
return false;
276279
}
277280
}
278-
if self.has_visited(ip, at) {
279-
return false;
280-
}
281281
}
282282
}
283283

tests/crazy.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,20 @@ mat!(negclass_space_comma, r"[^,\s]", ", a", Some((2, 3)));
4747
mat!(negclass_comma_space, r"[^\s,]", " ,a", Some((2, 3)));
4848
mat!(negclass_ascii, r"[^[:alpha:]Z]", "A1", Some((1, 2)));
4949

50+
// Test that repeated empty expressions don't loop forever.
51+
mat!(lazy_many_many, r"((?:.*)*?)=", "a=b", Some((0, 2)));
52+
mat!(lazy_many_optional, r"((?:.?)*?)=", "a=b", Some((0, 2)));
53+
mat!(lazy_one_many_many, r"((?:.*)+?)=", "a=b", Some((0, 2)));
54+
mat!(lazy_one_many_optional, r"((?:.?)+?)=", "a=b", Some((0, 2)));
55+
mat!(lazy_range_min_many, r"((?:.*){1,}?)=", "a=b", Some((0, 2)));
56+
mat!(lazy_range_many, r"((?:.*){1,2}?)=", "a=b", Some((0, 2)));
57+
mat!(greedy_many_many, r"((?:.*)*)=", "a=b", Some((0, 2)));
58+
mat!(greedy_many_optional, r"((?:.?)*)=", "a=b", Some((0, 2)));
59+
mat!(greedy_one_many_many, r"((?:.*)+)=", "a=b", Some((0, 2)));
60+
mat!(greedy_one_many_optional, r"((?:.?)+)=", "a=b", Some((0, 2)));
61+
mat!(greedy_range_min_many, r"((?:.*){1,})=", "a=b", Some((0, 2)));
62+
mat!(greedy_range_many, r"((?:.*){1,2})=", "a=b", Some((0, 2)));
63+
5064
// Test that the DFA can handle pathological cases.
5165
// (This should result in the DFA's cache being flushed too frequently, which
5266
// should cause it to quit and fall back to the NFA algorithm.)

0 commit comments

Comments
 (0)