Skip to content

Python: Regexp: Handle repetions {n} (with no ,) #3500

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jun 25, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions python/ql/src/semmle/python/regex.qll
Original file line number Diff line number Diff line change
Expand Up @@ -497,8 +497,12 @@ abstract class RegexString extends Expr {
this.getChar(endin) = "}" and
end > start and
exists(string multiples | multiples = this.getText().substring(start + 1, endin) |
multiples.regexpMatch("0+") and maybe_empty = true
or
multiples.regexpMatch("0*,[0-9]*") and maybe_empty = true
or
multiples.regexpMatch("0*[1-9][0-9]*") and maybe_empty = false
or
multiples.regexpMatch("0*[1-9][0-9]*,[0-9]*") and maybe_empty = false
) and
not exists(int mid |
Expand Down Expand Up @@ -643,9 +647,13 @@ abstract class RegexString extends Expr {
start = 0 and end = this.getText().length()
or
exists(int y | this.lastPart(start, y) |
this.emptyMatchAtEndGroup(end, y) or
this.qualifiedItem(end, y, true) or
this.emptyMatchAtEndGroup(end, y)
or
this.qualifiedItem(end, y, true)
or
this.specialCharacter(end, y, "$")
or
y = end + 2 and this.escapingChar(end) and this.getChar(end + 1) = "Z"
)
or
exists(int x |
Expand Down
1 change: 0 additions & 1 deletion python/ql/test/library-tests/regex/Characters.expected
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,6 @@
| ax{3,} | 5 | 6 |
| ax{3} | 0 | 1 |
| ax{3} | 1 | 2 |
| ax{3} | 2 | 3 |
| ax{3} | 3 | 4 |
| ax{3} | 4 | 5 |
| ax{,3} | 0 | 1 |
Expand Down
2 changes: 2 additions & 0 deletions python/ql/test/library-tests/regex/FirstLast.expected
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@
| ax{3,} | last | 1 | 6 |
| ax{3,} | last | 5 | 6 |
| ax{3} | first | 0 | 1 |
| ax{3} | last | 1 | 2 |
| ax{3} | last | 1 | 5 |
| ax{3} | last | 4 | 5 |
| ax{,3} | first | 0 | 1 |
| ax{,3} | last | 0 | 1 |
Expand Down
1 change: 1 addition & 0 deletions python/ql/test/library-tests/regex/Qualified.expected
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,5 @@
| ^[A-Z_]+$(?<!not-this) | 1 | 8 | false |
| ax{01,3} | 1 | 8 | false |
| ax{3,} | 1 | 6 | false |
| ax{3} | 1 | 5 | false |
| ax{,3} | 1 | 6 | true |
2 changes: 1 addition & 1 deletion python/ql/test/library-tests/regex/Regex.expected
Original file line number Diff line number Diff line change
Expand Up @@ -207,9 +207,9 @@
| ax{3,} | sequence | 0 | 6 |
| ax{3} | char | 0 | 1 |
| ax{3} | char | 1 | 2 |
| ax{3} | char | 2 | 3 |
| ax{3} | char | 3 | 4 |
| ax{3} | char | 4 | 5 |
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not sure why 4-5 is in there, but both that and removing 2-3 is consistent with how {n,m} is handled..

| ax{3} | qualified | 1 | 5 |
| ax{3} | sequence | 0 | 5 |
| ax{,3} | char | 0 | 1 |
| ax{,3} | char | 1 | 2 |
Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
| test.py:41:12:41:18 | Str | This regular expression includes duplicate character 'A' in a set of characters. |
| test.py:42:12:42:19 | Str | This regular expression includes duplicate character '0' in a set of characters. |
| test.py:43:12:43:21 | Str | This regular expression includes duplicate character '-' in a set of characters. |
| test.py:46:12:46:18 | Str | This regular expression includes duplicate character 'A' in a set of characters. |
| test.py:47:12:47:19 | Str | This regular expression includes duplicate character '0' in a set of characters. |
| test.py:48:12:48:21 | Str | This regular expression includes duplicate character '-' in a set of characters. |
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
| test.py:4:12:4:19 | Str | This regular expression includes an unmatchable caret at offset 1. |
| test.py:5:12:5:23 | Str | This regular expression includes an unmatchable caret at offset 5. |
| test.py:6:12:6:21 | Str | This regular expression includes an unmatchable caret at offset 2. |
| test.py:74:12:74:27 | Str | This regular expression includes an unmatchable caret at offset 8. |
| test.py:79:12:79:27 | Str | This regular expression includes an unmatchable caret at offset 8. |
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
| test.py:29:12:29:19 | Str | This regular expression includes an unmatchable dollar at offset 3. |
| test.py:30:12:30:23 | Str | This regular expression includes an unmatchable dollar at offset 3. |
| test.py:31:12:31:20 | Str | This regular expression includes an unmatchable dollar at offset 2. |
| test.py:75:12:75:26 | Str | This regular expression includes an unmatchable dollar at offset 3. |
| test.py:80:12:80:26 | Str | This regular expression includes an unmatchable dollar at offset 3. |
17 changes: 11 additions & 6 deletions python/ql/test/query-tests/Expressions/Regex/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,17 @@
re.compile(b"abc$ (?s)")
re.compile(b"\[$] ")

#Likely false positives for unmatchable dollar
re.compile(b"[$] ")
re.compile(b"\$ ")
re.compile(b"abc$(?m)")
re.compile(b"abc$()")

#Not unmatchable dollar
re.match(b"[$] ", b"$ ")
re.match(b"\$ ", b"$ ")
re.match(b"abc$(?m)", b"abc")
re.match(b"abc$()", b"abc")
re.match(b"((a$)|b)*", b"bba")
re.match(b"((a$)|b){4}", b"bbba") # Inspired by FP report here: https://github.com/github/codeql/issues/2403
re.match(b"((a$).*)", b"a")
re.match("(\Aab$|\Aba$)$\Z", "ab")
re.match(b"((a$\Z)|b){4}", b"bbba")
re.match(b"(a){00}b", b"b")

#Duplicate character in set
re.compile(b"[AA]")
Expand Down