From b56545b236f5a65e76dd379648e8f4804eccda74 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Mon, 18 May 2020 14:44:11 +0200 Subject: [PATCH 1/7] Python: Regexp: Handle repetions {n} (with no ,) --- python/ql/src/semmle/python/regex.qll | 4 ++++ .../Expressions/Regex/DuplicateCharacterInSet.expected | 6 +++--- .../query-tests/Expressions/Regex/UnmatchableCaret.expected | 2 +- .../Expressions/Regex/UnmatchableDollar.expected | 2 +- python/ql/test/query-tests/Expressions/Regex/test.py | 3 +++ 5 files changed, 12 insertions(+), 5 deletions(-) diff --git a/python/ql/src/semmle/python/regex.qll b/python/ql/src/semmle/python/regex.qll index bd7469ea8306..2e41009b522c 100644 --- a/python/ql/src/semmle/python/regex.qll +++ b/python/ql/src/semmle/python/regex.qll @@ -472,8 +472,12 @@ abstract class RegexString extends Expr { this.getChar(endin) = "}" and end > start and exists(string multiples | multiples = this.getText().substring(start + 1, endin) | + multiples.regexpMatch("0+") and maybe_empty = true + or multiples.regexpMatch("0*,[0-9]*") and maybe_empty = true or + multiples.regexpMatch("0*[1-9][0-9]*") and maybe_empty = false + or multiples.regexpMatch("0*[1-9][0-9]*,[0-9]*") and maybe_empty = false ) and not exists(int mid | diff --git a/python/ql/test/query-tests/Expressions/Regex/DuplicateCharacterInSet.expected b/python/ql/test/query-tests/Expressions/Regex/DuplicateCharacterInSet.expected index c79c219256c2..f0890736dec2 100644 --- a/python/ql/test/query-tests/Expressions/Regex/DuplicateCharacterInSet.expected +++ b/python/ql/test/query-tests/Expressions/Regex/DuplicateCharacterInSet.expected @@ -1,3 +1,3 @@ -| test.py:41:12:41:18 | Str | This regular expression includes duplicate character 'A' in a set of characters. | -| test.py:42:12:42:19 | Str | This regular expression includes duplicate character '0' in a set of characters. | -| test.py:43:12:43:21 | Str | This regular expression includes duplicate character '-' in a set of characters. | +| test.py:44:12:44:18 | Str | This regular expression includes duplicate character 'A' in a set of characters. | +| test.py:45:12:45:19 | Str | This regular expression includes duplicate character '0' in a set of characters. | +| test.py:46:12:46:21 | Str | This regular expression includes duplicate character '-' in a set of characters. | diff --git a/python/ql/test/query-tests/Expressions/Regex/UnmatchableCaret.expected b/python/ql/test/query-tests/Expressions/Regex/UnmatchableCaret.expected index 1d6cabfafdcd..3f145c6f3d43 100644 --- a/python/ql/test/query-tests/Expressions/Regex/UnmatchableCaret.expected +++ b/python/ql/test/query-tests/Expressions/Regex/UnmatchableCaret.expected @@ -1,4 +1,4 @@ | test.py:4:12:4:19 | Str | This regular expression includes an unmatchable caret at offset 1. | | test.py:5:12:5:23 | Str | This regular expression includes an unmatchable caret at offset 5. | | test.py:6:12:6:21 | Str | This regular expression includes an unmatchable caret at offset 2. | -| test.py:74:12:74:27 | Str | This regular expression includes an unmatchable caret at offset 8. | +| test.py:77:12:77:27 | Str | This regular expression includes an unmatchable caret at offset 8. | diff --git a/python/ql/test/query-tests/Expressions/Regex/UnmatchableDollar.expected b/python/ql/test/query-tests/Expressions/Regex/UnmatchableDollar.expected index 7771654c79bf..ef967982b8c5 100644 --- a/python/ql/test/query-tests/Expressions/Regex/UnmatchableDollar.expected +++ b/python/ql/test/query-tests/Expressions/Regex/UnmatchableDollar.expected @@ -1,4 +1,4 @@ | test.py:29:12:29:19 | Str | This regular expression includes an unmatchable dollar at offset 3. | | test.py:30:12:30:23 | Str | This regular expression includes an unmatchable dollar at offset 3. | | test.py:31:12:31:20 | Str | This regular expression includes an unmatchable dollar at offset 2. | -| test.py:75:12:75:26 | Str | This regular expression includes an unmatchable dollar at offset 3. | +| test.py:78:12:78:26 | Str | This regular expression includes an unmatchable dollar at offset 3. | diff --git a/python/ql/test/query-tests/Expressions/Regex/test.py b/python/ql/test/query-tests/Expressions/Regex/test.py index f0967dc6eefe..fefe42a1c870 100644 --- a/python/ql/test/query-tests/Expressions/Regex/test.py +++ b/python/ql/test/query-tests/Expressions/Regex/test.py @@ -35,6 +35,9 @@ re.compile(b"\$ ") re.compile(b"abc$(?m)") re.compile(b"abc$()") +re.compile(b"((a$)|b)*") +re.compile(b"((a$)|b){4}") +re.compile(b"((a$).*)") #Duplicate character in set From 4d6ad32f04cfc568f918916006ddb8cfe57d1c23 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Wed, 20 May 2020 08:11:03 +0200 Subject: [PATCH 2/7] Python: Update test expectations. As ar as I can tell, all these are improvements --- python/ql/test/library-tests/regex/Characters.expected | 1 - python/ql/test/library-tests/regex/FirstLast.expected | 2 ++ python/ql/test/library-tests/regex/Qualified.expected | 1 + python/ql/test/library-tests/regex/Regex.expected | 2 +- 4 files changed, 4 insertions(+), 2 deletions(-) diff --git a/python/ql/test/library-tests/regex/Characters.expected b/python/ql/test/library-tests/regex/Characters.expected index 61d13b7bf59b..373a6bf7a662 100644 --- a/python/ql/test/library-tests/regex/Characters.expected +++ b/python/ql/test/library-tests/regex/Characters.expected @@ -110,7 +110,6 @@ | ax{3,} | 5 | 6 | | ax{3} | 0 | 1 | | ax{3} | 1 | 2 | -| ax{3} | 2 | 3 | | ax{3} | 3 | 4 | | ax{3} | 4 | 5 | | ax{,3} | 0 | 1 | diff --git a/python/ql/test/library-tests/regex/FirstLast.expected b/python/ql/test/library-tests/regex/FirstLast.expected index cdea10fac051..974504bfc4a2 100644 --- a/python/ql/test/library-tests/regex/FirstLast.expected +++ b/python/ql/test/library-tests/regex/FirstLast.expected @@ -84,6 +84,8 @@ | ax{3,} | last | 1 | 6 | | ax{3,} | last | 5 | 6 | | ax{3} | first | 0 | 1 | +| ax{3} | last | 1 | 2 | +| ax{3} | last | 1 | 5 | | ax{3} | last | 4 | 5 | | ax{,3} | first | 0 | 1 | | ax{,3} | last | 0 | 1 | diff --git a/python/ql/test/library-tests/regex/Qualified.expected b/python/ql/test/library-tests/regex/Qualified.expected index d3cf69b986f3..30019f943eb9 100644 --- a/python/ql/test/library-tests/regex/Qualified.expected +++ b/python/ql/test/library-tests/regex/Qualified.expected @@ -11,4 +11,5 @@ | ^[A-Z_]+$(? Date: Tue, 26 May 2020 08:07:13 +0200 Subject: [PATCH 3/7] Python: re test with \Z --- .../Regex/UnmatchableDollar.expected | 6 +++++- .../query-tests/Expressions/Regex/test.py | 19 ++++++++++--------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/python/ql/test/query-tests/Expressions/Regex/UnmatchableDollar.expected b/python/ql/test/query-tests/Expressions/Regex/UnmatchableDollar.expected index ef967982b8c5..e9354b476971 100644 --- a/python/ql/test/query-tests/Expressions/Regex/UnmatchableDollar.expected +++ b/python/ql/test/query-tests/Expressions/Regex/UnmatchableDollar.expected @@ -1,4 +1,8 @@ | test.py:29:12:29:19 | Str | This regular expression includes an unmatchable dollar at offset 3. | | test.py:30:12:30:23 | Str | This regular expression includes an unmatchable dollar at offset 3. | | test.py:31:12:31:20 | Str | This regular expression includes an unmatchable dollar at offset 2. | -| test.py:78:12:78:26 | Str | This regular expression includes an unmatchable dollar at offset 3. | +| test.py:41:10:41:27 | Str | This regular expression includes an unmatchable dollar at offset 5. | +| test.py:41:10:41:27 | Str | This regular expression includes an unmatchable dollar at offset 11. | +| test.py:41:10:41:27 | Str | This regular expression includes an unmatchable dollar at offset 13. | +| test.py:42:10:42:25 | Str | This regular expression includes an unmatchable dollar at offset 3. | +| test.py:79:12:79:26 | Str | This regular expression includes an unmatchable dollar at offset 3. | diff --git a/python/ql/test/query-tests/Expressions/Regex/test.py b/python/ql/test/query-tests/Expressions/Regex/test.py index fefe42a1c870..f421572f9846 100644 --- a/python/ql/test/query-tests/Expressions/Regex/test.py +++ b/python/ql/test/query-tests/Expressions/Regex/test.py @@ -30,15 +30,16 @@ re.compile(b"abc$ (?s)") re.compile(b"\[$] ") -#Likely false positives for unmatchable dollar -re.compile(b"[$] ") -re.compile(b"\$ ") -re.compile(b"abc$(?m)") -re.compile(b"abc$()") -re.compile(b"((a$)|b)*") -re.compile(b"((a$)|b){4}") -re.compile(b"((a$).*)") - +#Not unmatchable dollar +re.match(b"[$] ", b"$ ") +re.match(b"\$ ", b"$ ") +re.match(b"abc$(?m)", b"abc") +re.match(b"abc$()", b"abc") +re.match(b"((a$)|b)*", b"bba") +re.match(b"((a$)|b){4}", b"bbba") +re.match(b"((a$).*)", b"a") +re.match("(\Aab$|\Aba$)$\Z", "ab") +re.match(b"((a$\Z)|b){4}", b"bbba") #Duplicate character in set re.compile(b"[AA]") From 6b168de7fc83696b9ea40917fe5b7b6a0206c5db Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Tue, 26 May 2020 11:42:21 +0200 Subject: [PATCH 4/7] Python: re, handle \Z --- python/ql/src/semmle/python/regex.qll | 3 ++- .../Expressions/Regex/DuplicateCharacterInSet.expected | 6 +++--- .../query-tests/Expressions/Regex/UnmatchableCaret.expected | 2 +- .../Expressions/Regex/UnmatchableDollar.expected | 4 ---- 4 files changed, 6 insertions(+), 9 deletions(-) diff --git a/python/ql/src/semmle/python/regex.qll b/python/ql/src/semmle/python/regex.qll index 2e41009b522c..986a89ccd1cd 100644 --- a/python/ql/src/semmle/python/regex.qll +++ b/python/ql/src/semmle/python/regex.qll @@ -624,7 +624,8 @@ abstract class RegexString extends Expr { exists(int y | this.lastPart(start, y) | this.emptyMatchAtEndGroup(end, y) or this.qualifiedItem(end, y, true) or - this.specialCharacter(end, y, "$") + this.specialCharacter(end, y, "$") or + y = end+2 and this.escapingChar(end) and this.getChar(end+1) = "Z" ) or exists(int x | diff --git a/python/ql/test/query-tests/Expressions/Regex/DuplicateCharacterInSet.expected b/python/ql/test/query-tests/Expressions/Regex/DuplicateCharacterInSet.expected index f0890736dec2..727afa89507f 100644 --- a/python/ql/test/query-tests/Expressions/Regex/DuplicateCharacterInSet.expected +++ b/python/ql/test/query-tests/Expressions/Regex/DuplicateCharacterInSet.expected @@ -1,3 +1,3 @@ -| test.py:44:12:44:18 | Str | This regular expression includes duplicate character 'A' in a set of characters. | -| test.py:45:12:45:19 | Str | This regular expression includes duplicate character '0' in a set of characters. | -| test.py:46:12:46:21 | Str | This regular expression includes duplicate character '-' in a set of characters. | +| test.py:45:12:45:18 | Str | This regular expression includes duplicate character 'A' in a set of characters. | +| test.py:46:12:46:19 | Str | This regular expression includes duplicate character '0' in a set of characters. | +| test.py:47:12:47:21 | Str | This regular expression includes duplicate character '-' in a set of characters. | diff --git a/python/ql/test/query-tests/Expressions/Regex/UnmatchableCaret.expected b/python/ql/test/query-tests/Expressions/Regex/UnmatchableCaret.expected index 3f145c6f3d43..cc4e57b5b7f6 100644 --- a/python/ql/test/query-tests/Expressions/Regex/UnmatchableCaret.expected +++ b/python/ql/test/query-tests/Expressions/Regex/UnmatchableCaret.expected @@ -1,4 +1,4 @@ | test.py:4:12:4:19 | Str | This regular expression includes an unmatchable caret at offset 1. | | test.py:5:12:5:23 | Str | This regular expression includes an unmatchable caret at offset 5. | | test.py:6:12:6:21 | Str | This regular expression includes an unmatchable caret at offset 2. | -| test.py:77:12:77:27 | Str | This regular expression includes an unmatchable caret at offset 8. | +| test.py:78:12:78:27 | Str | This regular expression includes an unmatchable caret at offset 8. | diff --git a/python/ql/test/query-tests/Expressions/Regex/UnmatchableDollar.expected b/python/ql/test/query-tests/Expressions/Regex/UnmatchableDollar.expected index e9354b476971..ad6980801133 100644 --- a/python/ql/test/query-tests/Expressions/Regex/UnmatchableDollar.expected +++ b/python/ql/test/query-tests/Expressions/Regex/UnmatchableDollar.expected @@ -1,8 +1,4 @@ | test.py:29:12:29:19 | Str | This regular expression includes an unmatchable dollar at offset 3. | | test.py:30:12:30:23 | Str | This regular expression includes an unmatchable dollar at offset 3. | | test.py:31:12:31:20 | Str | This regular expression includes an unmatchable dollar at offset 2. | -| test.py:41:10:41:27 | Str | This regular expression includes an unmatchable dollar at offset 5. | -| test.py:41:10:41:27 | Str | This regular expression includes an unmatchable dollar at offset 11. | -| test.py:41:10:41:27 | Str | This regular expression includes an unmatchable dollar at offset 13. | -| test.py:42:10:42:25 | Str | This regular expression includes an unmatchable dollar at offset 3. | | test.py:79:12:79:26 | Str | This regular expression includes an unmatchable dollar at offset 3. | From b5703cd3f6a1e44e8a9a4d2bbc8f32f98ac1049e Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Thu, 11 Jun 2020 07:14:48 +0200 Subject: [PATCH 5/7] Python: link to FP report in test file --- python/ql/test/query-tests/Expressions/Regex/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/ql/test/query-tests/Expressions/Regex/test.py b/python/ql/test/query-tests/Expressions/Regex/test.py index f421572f9846..f43c2d25cd07 100644 --- a/python/ql/test/query-tests/Expressions/Regex/test.py +++ b/python/ql/test/query-tests/Expressions/Regex/test.py @@ -36,7 +36,7 @@ re.match(b"abc$(?m)", b"abc") re.match(b"abc$()", b"abc") re.match(b"((a$)|b)*", b"bba") -re.match(b"((a$)|b){4}", b"bbba") +re.match(b"((a$)|b){4}", b"bbba") # Inspired by FP report here: https://github.com/github/codeql/issues/2403 re.match(b"((a$).*)", b"a") re.match("(\Aab$|\Aba$)$\Z", "ab") re.match(b"((a$\Z)|b){4}", b"bbba") From 226c295b4c83ab9828bdc21c252c5b4103641160 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Wed, 24 Jun 2020 10:48:51 +0200 Subject: [PATCH 6/7] Python: format --- python/ql/src/semmle/python/regex.qll | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/python/ql/src/semmle/python/regex.qll b/python/ql/src/semmle/python/regex.qll index 986a89ccd1cd..aa17b4164afe 100644 --- a/python/ql/src/semmle/python/regex.qll +++ b/python/ql/src/semmle/python/regex.qll @@ -622,10 +622,13 @@ abstract class RegexString extends Expr { start = 0 and end = this.getText().length() or exists(int y | this.lastPart(start, y) | - this.emptyMatchAtEndGroup(end, y) or - this.qualifiedItem(end, y, true) or - this.specialCharacter(end, y, "$") or - y = end+2 and this.escapingChar(end) and this.getChar(end+1) = "Z" + this.emptyMatchAtEndGroup(end, y) + or + this.qualifiedItem(end, y, true) + or + this.specialCharacter(end, y, "$") + or + y = end + 2 and this.escapingChar(end) and this.getChar(end + 1) = "Z" ) or exists(int x | From 6e9c48bba788dbb27399ae4fa4bf3c93382c3d71 Mon Sep 17 00:00:00 2001 From: Rasmus Lerchedahl Petersen Date: Wed, 24 Jun 2020 11:01:27 +0200 Subject: [PATCH 7/7] Python: test zero iterations --- .../Expressions/Regex/DuplicateCharacterInSet.expected | 6 +++--- .../query-tests/Expressions/Regex/UnmatchableCaret.expected | 2 +- .../Expressions/Regex/UnmatchableDollar.expected | 2 +- python/ql/test/query-tests/Expressions/Regex/test.py | 1 + 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/python/ql/test/query-tests/Expressions/Regex/DuplicateCharacterInSet.expected b/python/ql/test/query-tests/Expressions/Regex/DuplicateCharacterInSet.expected index 727afa89507f..b00619280698 100644 --- a/python/ql/test/query-tests/Expressions/Regex/DuplicateCharacterInSet.expected +++ b/python/ql/test/query-tests/Expressions/Regex/DuplicateCharacterInSet.expected @@ -1,3 +1,3 @@ -| test.py:45:12:45:18 | Str | This regular expression includes duplicate character 'A' in a set of characters. | -| test.py:46:12:46:19 | Str | This regular expression includes duplicate character '0' in a set of characters. | -| test.py:47:12:47:21 | Str | This regular expression includes duplicate character '-' in a set of characters. | +| test.py:46:12:46:18 | Str | This regular expression includes duplicate character 'A' in a set of characters. | +| test.py:47:12:47:19 | Str | This regular expression includes duplicate character '0' in a set of characters. | +| test.py:48:12:48:21 | Str | This regular expression includes duplicate character '-' in a set of characters. | diff --git a/python/ql/test/query-tests/Expressions/Regex/UnmatchableCaret.expected b/python/ql/test/query-tests/Expressions/Regex/UnmatchableCaret.expected index cc4e57b5b7f6..8b9f409ad848 100644 --- a/python/ql/test/query-tests/Expressions/Regex/UnmatchableCaret.expected +++ b/python/ql/test/query-tests/Expressions/Regex/UnmatchableCaret.expected @@ -1,4 +1,4 @@ | test.py:4:12:4:19 | Str | This regular expression includes an unmatchable caret at offset 1. | | test.py:5:12:5:23 | Str | This regular expression includes an unmatchable caret at offset 5. | | test.py:6:12:6:21 | Str | This regular expression includes an unmatchable caret at offset 2. | -| test.py:78:12:78:27 | Str | This regular expression includes an unmatchable caret at offset 8. | +| test.py:79:12:79:27 | Str | This regular expression includes an unmatchable caret at offset 8. | diff --git a/python/ql/test/query-tests/Expressions/Regex/UnmatchableDollar.expected b/python/ql/test/query-tests/Expressions/Regex/UnmatchableDollar.expected index ad6980801133..f0f93436ce97 100644 --- a/python/ql/test/query-tests/Expressions/Regex/UnmatchableDollar.expected +++ b/python/ql/test/query-tests/Expressions/Regex/UnmatchableDollar.expected @@ -1,4 +1,4 @@ | test.py:29:12:29:19 | Str | This regular expression includes an unmatchable dollar at offset 3. | | test.py:30:12:30:23 | Str | This regular expression includes an unmatchable dollar at offset 3. | | test.py:31:12:31:20 | Str | This regular expression includes an unmatchable dollar at offset 2. | -| test.py:79:12:79:26 | Str | This regular expression includes an unmatchable dollar at offset 3. | +| test.py:80:12:80:26 | Str | This regular expression includes an unmatchable dollar at offset 3. | diff --git a/python/ql/test/query-tests/Expressions/Regex/test.py b/python/ql/test/query-tests/Expressions/Regex/test.py index f43c2d25cd07..38715e11c8cc 100644 --- a/python/ql/test/query-tests/Expressions/Regex/test.py +++ b/python/ql/test/query-tests/Expressions/Regex/test.py @@ -40,6 +40,7 @@ re.match(b"((a$).*)", b"a") re.match("(\Aab$|\Aba$)$\Z", "ab") re.match(b"((a$\Z)|b){4}", b"bbba") +re.match(b"(a){00}b", b"b") #Duplicate character in set re.compile(b"[AA]")