Skip to content

Commit b36c23e

Browse files
authored
Merge pull request #3500 from yoff/UnmatchableDollar
Python: Regexp: Handle repetions {n} (with no ,)
2 parents 0b36cd4 + f6c59ab commit b36c23e

File tree

9 files changed

+30
-15
lines changed

9 files changed

+30
-15
lines changed

python/ql/src/semmle/python/regex.qll

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -497,8 +497,12 @@ abstract class RegexString extends Expr {
497497
this.getChar(endin) = "}" and
498498
end > start and
499499
exists(string multiples | multiples = this.getText().substring(start + 1, endin) |
500+
multiples.regexpMatch("0+") and maybe_empty = true
501+
or
500502
multiples.regexpMatch("0*,[0-9]*") and maybe_empty = true
501503
or
504+
multiples.regexpMatch("0*[1-9][0-9]*") and maybe_empty = false
505+
or
502506
multiples.regexpMatch("0*[1-9][0-9]*,[0-9]*") and maybe_empty = false
503507
) and
504508
not exists(int mid |
@@ -643,9 +647,13 @@ abstract class RegexString extends Expr {
643647
start = 0 and end = this.getText().length()
644648
or
645649
exists(int y | this.lastPart(start, y) |
646-
this.emptyMatchAtEndGroup(end, y) or
647-
this.qualifiedItem(end, y, true) or
650+
this.emptyMatchAtEndGroup(end, y)
651+
or
652+
this.qualifiedItem(end, y, true)
653+
or
648654
this.specialCharacter(end, y, "$")
655+
or
656+
y = end + 2 and this.escapingChar(end) and this.getChar(end + 1) = "Z"
649657
)
650658
or
651659
exists(int x |

python/ql/test/library-tests/regex/Characters.expected

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,6 @@
110110
| ax{3,} | 5 | 6 |
111111
| ax{3} | 0 | 1 |
112112
| ax{3} | 1 | 2 |
113-
| ax{3} | 2 | 3 |
114113
| ax{3} | 3 | 4 |
115114
| ax{3} | 4 | 5 |
116115
| ax{,3} | 0 | 1 |

python/ql/test/library-tests/regex/FirstLast.expected

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,8 @@
8484
| ax{3,} | last | 1 | 6 |
8585
| ax{3,} | last | 5 | 6 |
8686
| ax{3} | first | 0 | 1 |
87+
| ax{3} | last | 1 | 2 |
88+
| ax{3} | last | 1 | 5 |
8789
| ax{3} | last | 4 | 5 |
8890
| ax{,3} | first | 0 | 1 |
8991
| ax{,3} | last | 0 | 1 |

python/ql/test/library-tests/regex/Qualified.expected

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,4 +11,5 @@
1111
| ^[A-Z_]+$(?<!not-this) | 1 | 8 | false |
1212
| ax{01,3} | 1 | 8 | false |
1313
| ax{3,} | 1 | 6 | false |
14+
| ax{3} | 1 | 5 | false |
1415
| ax{,3} | 1 | 6 | true |

python/ql/test/library-tests/regex/Regex.expected

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -207,9 +207,9 @@
207207
| ax{3,} | sequence | 0 | 6 |
208208
| ax{3} | char | 0 | 1 |
209209
| ax{3} | char | 1 | 2 |
210-
| ax{3} | char | 2 | 3 |
211210
| ax{3} | char | 3 | 4 |
212211
| ax{3} | char | 4 | 5 |
212+
| ax{3} | qualified | 1 | 5 |
213213
| ax{3} | sequence | 0 | 5 |
214214
| ax{,3} | char | 0 | 1 |
215215
| ax{,3} | char | 1 | 2 |
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
| test.py:41:12:41:18 | Str | This regular expression includes duplicate character 'A' in a set of characters. |
2-
| test.py:42:12:42:19 | Str | This regular expression includes duplicate character '0' in a set of characters. |
3-
| test.py:43:12:43:21 | Str | This regular expression includes duplicate character '-' in a set of characters. |
1+
| test.py:46:12:46:18 | Str | This regular expression includes duplicate character 'A' in a set of characters. |
2+
| test.py:47:12:47:19 | Str | This regular expression includes duplicate character '0' in a set of characters. |
3+
| test.py:48:12:48:21 | Str | This regular expression includes duplicate character '-' in a set of characters. |
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
| test.py:4:12:4:19 | Str | This regular expression includes an unmatchable caret at offset 1. |
22
| test.py:5:12:5:23 | Str | This regular expression includes an unmatchable caret at offset 5. |
33
| test.py:6:12:6:21 | Str | This regular expression includes an unmatchable caret at offset 2. |
4-
| test.py:74:12:74:27 | Str | This regular expression includes an unmatchable caret at offset 8. |
4+
| test.py:79:12:79:27 | Str | This regular expression includes an unmatchable caret at offset 8. |
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
| test.py:29:12:29:19 | Str | This regular expression includes an unmatchable dollar at offset 3. |
22
| test.py:30:12:30:23 | Str | This regular expression includes an unmatchable dollar at offset 3. |
33
| test.py:31:12:31:20 | Str | This regular expression includes an unmatchable dollar at offset 2. |
4-
| test.py:75:12:75:26 | Str | This regular expression includes an unmatchable dollar at offset 3. |
4+
| test.py:80:12:80:26 | Str | This regular expression includes an unmatchable dollar at offset 3. |

python/ql/test/query-tests/Expressions/Regex/test.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,17 @@
3030
re.compile(b"abc$ (?s)")
3131
re.compile(b"\[$] ")
3232

33-
#Likely false positives for unmatchable dollar
34-
re.compile(b"[$] ")
35-
re.compile(b"\$ ")
36-
re.compile(b"abc$(?m)")
37-
re.compile(b"abc$()")
38-
33+
#Not unmatchable dollar
34+
re.match(b"[$] ", b"$ ")
35+
re.match(b"\$ ", b"$ ")
36+
re.match(b"abc$(?m)", b"abc")
37+
re.match(b"abc$()", b"abc")
38+
re.match(b"((a$)|b)*", b"bba")
39+
re.match(b"((a$)|b){4}", b"bbba") # Inspired by FP report here: https://github.com/github/codeql/issues/2403
40+
re.match(b"((a$).*)", b"a")
41+
re.match("(\Aab$|\Aba$)$\Z", "ab")
42+
re.match(b"((a$\Z)|b){4}", b"bbba")
43+
re.match(b"(a){00}b", b"b")
3944

4045
#Duplicate character in set
4146
re.compile(b"[AA]")

0 commit comments

Comments
 (0)