From 602c4fef5bf96e9a03eae2bc1fb557f37a73844e Mon Sep 17 00:00:00 2001 From: Olli Saarikivi Date: Tue, 23 Aug 2022 16:34:07 -0700 Subject: [PATCH 1/6] Test for exposing timeout check bug --- .../FunctionalTests/Regex.Match.Tests.cs | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs index 9f73f9f1c30e88..8197bba71ec452 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs @@ -1278,6 +1278,47 @@ public void Match_CachedPattern_NewTimeoutApplies(RegexOptions options) Assert.InRange(sw.Elapsed.TotalSeconds, 0, 10); // arbitrary upper bound that should be well above what's needed with a 1ms timeout } + [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNetCore))] + public void Nonbacktracking_TimeoutCheckNotEOF() + { + // First figure out how many characters the innermost matching loop of NonBacktracking looks at between + // timeout checks. This makes the test more robust, as the value isn't exposed and we'd have to set it as + // a constant here. + Regex timeoutPattern = new Regex("a*", RegexHelpers.RegexOptionNonBacktracking, TimeSpan.FromTicks(1)); + // Function for comparing the left hand side to the internal chars matched per timeout check in NonBacktracking + int IsCharsPerTimeoutCheck(int candidate, int dummy) + { + try + { + timeoutPattern.Match(new string('a', candidate)); + try + { + timeoutPattern.Match(new string('a', candidate + 1)); + } + catch (RegexMatchTimeoutException) + { + // Only the higher check timed out, number of chars per timeout check found + return 0; + } + // Neither check timed out, chars per timeout check is higher + return -1; + } + catch (RegexMatchTimeoutException) + { + // Candidate times out, chars per timeout check is lower + return 1; + } + } + int charsPerTimeoutCheck = Array.BinarySearch(Enumerable.Range(0, 2048).ToArray(), -1, Comparer.Create(IsCharsPerTimeoutCheck)); + Assert.True(charsPerTimeoutCheck >= 0); + + // Now that the limit is known, do the actual test + Regex testPattern = new Regex("^a*b$", RegexHelpers.RegexOptionNonBacktracking, TimeSpan.FromHours(1)); + string input = string.Concat(new string('a', charsPerTimeoutCheck - 1), "bc"); + // Checking for timeout just before the 'c' shouldn't allow the $ anchor to match + Assert.False(testPattern.IsMatch(input)); + } + public static IEnumerable Match_Advanced_TestData() { foreach (RegexEngine engine in RegexHelpers.AvailableEngines) From 3b0e82d0010662b01e8719c5b6c3b5bbaaa68496 Mon Sep 17 00:00:00 2001 From: Olli Saarikivi Date: Wed, 24 Aug 2022 13:43:43 -0700 Subject: [PATCH 2/6] Fix timeout check bug --- .../Symbolic/SymbolicRegexMatcher.cs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs index 70390343c3405c..89650f3e27a69a 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs @@ -448,13 +448,13 @@ private int FindEndPosition inputForInnerLoop = _checkTimeout && input.Length - pos > CharsPerTimeoutCheck ? - input.Slice(0, pos + CharsPerTimeoutCheck) : - input; + int innerLoopLength = _checkTimeout && input.Length - pos > CharsPerTimeoutCheck ? + pos + CharsPerTimeoutCheck : + input.Length; bool done = currentState.NfaState is not null ? - FindEndPositionDeltas(inputForInnerLoop, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate) : - FindEndPositionDeltas(inputForInnerLoop, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate); + FindEndPositionDeltas(input, innerLoopLength, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate) : + FindEndPositionDeltas(input, innerLoopLength, mode, ref pos, ref currentState, ref endPos, ref endStateId, ref initialStatePos, ref initialStatePosCandidate); // If the inner loop indicates that the search finished (for example due to reaching a deadend state) or // there is no more input available, then the whole search is done. @@ -466,7 +466,7 @@ private int FindEndPosition - private bool FindEndPositionDeltas(ReadOnlySpan input, RegexRunnerMode mode, + private bool FindEndPositionDeltas(ReadOnlySpan input, int length, RegexRunnerMode mode, ref int posRef, ref CurrentState state, ref int endPosRef, ref int endStateIdRef, ref int initialStatePosRef, ref int initialStatePosCandidateRef) where TStateHandler : struct, IStateHandler where TInputReader : struct, IInputReader @@ -561,7 +561,7 @@ private bool FindEndPositionDeltas= length || !TStateHandler.TryTakeTransition(this, ref state, positionId)) { return false; } From aac55f51e0153544aa53b67b4c0e50ec533dd5e8 Mon Sep 17 00:00:00 2001 From: Olli Saarikivi Date: Wed, 24 Aug 2022 13:47:16 -0700 Subject: [PATCH 3/6] Improve naming --- .../tests/FunctionalTests/Regex.Match.Tests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs index 8197bba71ec452..e2c6df5245fe8e 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs @@ -1279,7 +1279,7 @@ public void Match_CachedPattern_NewTimeoutApplies(RegexOptions options) } [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNetCore))] - public void Nonbacktracking_TimeoutCheckNotEOF() + public void NonBacktracking_NoEndAnchorMatchAtTimeoutCheck() { // First figure out how many characters the innermost matching loop of NonBacktracking looks at between // timeout checks. This makes the test more robust, as the value isn't exposed and we'd have to set it as From 01943c1d83b1cad626a6a2d0b6a580f9bb65b444 Mon Sep 17 00:00:00 2001 From: Olli Saarikivi Date: Wed, 24 Aug 2022 13:52:45 -0700 Subject: [PATCH 4/6] Simplify test --- .../tests/FunctionalTests/Regex.Match.Tests.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs index e2c6df5245fe8e..72dd36366e6e69 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs @@ -1313,9 +1313,9 @@ int IsCharsPerTimeoutCheck(int candidate, int dummy) Assert.True(charsPerTimeoutCheck >= 0); // Now that the limit is known, do the actual test - Regex testPattern = new Regex("^a*b$", RegexHelpers.RegexOptionNonBacktracking, TimeSpan.FromHours(1)); - string input = string.Concat(new string('a', charsPerTimeoutCheck - 1), "bc"); - // Checking for timeout just before the 'c' shouldn't allow the $ anchor to match + Regex testPattern = new Regex("^a*$", RegexHelpers.RegexOptionNonBacktracking, TimeSpan.FromHours(1)); + string input = string.Concat(new string('a', charsPerTimeoutCheck), 'b'); + // Checking for timeout just before the 'b' shouldn't allow the $ anchor to match Assert.False(testPattern.IsMatch(input)); } From 7c6f17081951a2713ac5b4c5ece776d7a9a9b9d0 Mon Sep 17 00:00:00 2001 From: Olli Saarikivi Date: Wed, 24 Aug 2022 14:04:03 -0700 Subject: [PATCH 5/6] Make test search range higher --- .../tests/FunctionalTests/Regex.Match.Tests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs index 72dd36366e6e69..a9da24b8a8f782 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs @@ -1309,7 +1309,7 @@ int IsCharsPerTimeoutCheck(int candidate, int dummy) return 1; } } - int charsPerTimeoutCheck = Array.BinarySearch(Enumerable.Range(0, 2048).ToArray(), -1, Comparer.Create(IsCharsPerTimeoutCheck)); + int charsPerTimeoutCheck = Array.BinarySearch(Enumerable.Range(0, 16_000).ToArray(), -1, Comparer.Create(IsCharsPerTimeoutCheck)); Assert.True(charsPerTimeoutCheck >= 0); // Now that the limit is known, do the actual test From 3a9125cd1a8d7edf917fd5a69f021a53c2da0f43 Mon Sep 17 00:00:00 2001 From: Olli Saarikivi Date: Thu, 8 Sep 2022 09:30:32 -0700 Subject: [PATCH 6/6] Simplify test for timeout check bug --- .../FunctionalTests/Regex.Match.Tests.cs | 43 ++++--------------- 1 file changed, 9 insertions(+), 34 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs index a9da24b8a8f782..b5b27236a56a7d 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs @@ -1281,41 +1281,16 @@ public void Match_CachedPattern_NewTimeoutApplies(RegexOptions options) [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNetCore))] public void NonBacktracking_NoEndAnchorMatchAtTimeoutCheck() { - // First figure out how many characters the innermost matching loop of NonBacktracking looks at between - // timeout checks. This makes the test more robust, as the value isn't exposed and we'd have to set it as - // a constant here. - Regex timeoutPattern = new Regex("a*", RegexHelpers.RegexOptionNonBacktracking, TimeSpan.FromTicks(1)); - // Function for comparing the left hand side to the internal chars matched per timeout check in NonBacktracking - int IsCharsPerTimeoutCheck(int candidate, int dummy) - { - try - { - timeoutPattern.Match(new string('a', candidate)); - try - { - timeoutPattern.Match(new string('a', candidate + 1)); - } - catch (RegexMatchTimeoutException) - { - // Only the higher check timed out, number of chars per timeout check found - return 0; - } - // Neither check timed out, chars per timeout check is higher - return -1; - } - catch (RegexMatchTimeoutException) - { - // Candidate times out, chars per timeout check is lower - return 1; - } - } - int charsPerTimeoutCheck = Array.BinarySearch(Enumerable.Range(0, 16_000).ToArray(), -1, Comparer.Create(IsCharsPerTimeoutCheck)); - Assert.True(charsPerTimeoutCheck >= 0); - - // Now that the limit is known, do the actual test + // This constant must be at least as large as the one in the implementation that sets the maximum number + // of innermost loop iterations between timeout checks. + const int CharsToTriggerTimeoutCheck = 10000; + // Check that it is indeed large enough to trigger timeouts. If this fails the constant above needs to be larger. + Assert.Throws(() => new Regex("a*", RegexHelpers.RegexOptionNonBacktracking, TimeSpan.FromTicks(1)) + .Match(new string('a', CharsToTriggerTimeoutCheck))); + + // The actual test: ^a*$ shouldn't match in a string ending in 'b' Regex testPattern = new Regex("^a*$", RegexHelpers.RegexOptionNonBacktracking, TimeSpan.FromHours(1)); - string input = string.Concat(new string('a', charsPerTimeoutCheck), 'b'); - // Checking for timeout just before the 'b' shouldn't allow the $ anchor to match + string input = string.Concat(new string('a', CharsToTriggerTimeoutCheck), 'b'); Assert.False(testPattern.IsMatch(input)); }