Skip to content

Commit 55c713b

Browse files
committed
Added new regex option RegexOptions.AnyNewLine
1 parent 0bdab8a commit 55c713b

File tree

13 files changed

+324
-21
lines changed

13 files changed

+324
-21
lines changed

src/libraries/System.Text.RegularExpressions/ref/System.Text.RegularExpressions.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ public enum RegexOptions
243243
RightToLeft = 64,
244244
ECMAScript = 256,
245245
CultureInvariant = 512,
246+
AnyNewLine = 1024,
246247
}
247248
public abstract partial class RegexRunner
248249
{

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ namespace System.Text.RegularExpressions
2121
/// </summary>
2222
public partial class Regex : ISerializable
2323
{
24-
private const int MaxOptionShift = 10;
24+
private const int MaxOptionShift = 11;
2525

2626
protected internal string? pattern; // The string pattern provided
2727
protected internal RegexOptions roptions; // the top-level options from the options string
@@ -137,7 +137,8 @@ internal static void ValidateOptions(RegexOptions options)
137137
#if DEBUG
138138
RegexOptions.Debug |
139139
#endif
140-
RegexOptions.CultureInvariant)) != 0)
140+
RegexOptions.CultureInvariant |
141+
RegexOptions.AnyNewLine)) != 0)
141142
{
142143
throw new ArgumentOutOfRangeException(nameof(options));
143144
}

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ internal sealed class RegexCode
5353
public const int Beginning = 18; // \A
5454
public const int Start = 19; // \G
5555
public const int EndZ = 20; // \Z
56-
public const int End = 21; // \Z
56+
public const int End = 21; // \z
5757

5858
public const int Nothing = 22; // Reject!
5959

@@ -88,6 +88,9 @@ internal sealed class RegexCode
8888
public const int Notoneloopatomic = 44; // lef,back set,min,max (?> . {,n} )
8989
public const int Setloopatomic = 45; // lef,back set,min,max (?> [\d]{,n} )
9090

91+
public const int AnyEndZ = 46; // \Z
92+
public const int AnyEol = 47; // $
93+
9194
// Modifiers for alternate modes
9295
public const int Mask = 63; // Mask to get unmodified ordinary operator
9396
public const int Rtl = 64; // bit to indicate that we're reverse scanning.
@@ -168,13 +171,15 @@ public static int OpcodeSize(int opcode)
168171
case Nothing:
169172
case Bol:
170173
case Eol:
174+
case AnyEol:
171175
case Boundary:
172176
case Nonboundary:
173177
case ECMABoundary:
174178
case NonECMABoundary:
175179
case Beginning:
176180
case Start:
177181
case EndZ:
182+
case AnyEndZ:
178183
case End:
179184
case Nullmark:
180185
case Setmark:

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs

Lines changed: 119 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -876,7 +876,7 @@ protected void GenerateFindFirstChar()
876876
}
877877
}
878878

879-
if ((_anchors & (RegexFCD.Beginning | RegexFCD.Start | RegexFCD.EndZ | RegexFCD.End)) != 0)
879+
if ((_anchors & (RegexFCD.Beginning | RegexFCD.Start | RegexFCD.EndZ | RegexFCD.AnyEndZ | RegexFCD.End)) != 0)
880880
{
881881
if (!_code!.RightToLeft)
882882
{
@@ -908,7 +908,7 @@ protected void GenerateFindFirstChar()
908908
MarkLabel(l1);
909909
}
910910

911-
if ((_anchors & RegexFCD.EndZ) != 0)
911+
if ((_anchors & (RegexFCD.EndZ | RegexFCD.AnyEndZ)) != 0)
912912
{
913913
Label l1 = DefineLabel();
914914
Ldthisfld(s_runtextposField);
@@ -978,6 +978,60 @@ protected void GenerateFindFirstChar()
978978
MarkLabel(l2);
979979
}
980980

981+
if ((_anchors & RegexFCD.AnyEndZ) != 0)
982+
{
983+
LocalBuilder diff = _temp1Local;
984+
Label l1 = DefineLabel();
985+
Label l2 = DefineLabel();
986+
Label l3 = DefineLabel();
987+
Ldthisfld(s_runtextendField);
988+
Ldthisfld(s_runtextposField);
989+
Sub();
990+
Stloc(diff);
991+
Ldloc(diff);
992+
Ldc(2);
993+
Bgt(l1);
994+
Ldloc(diff);
995+
Ldc(2);
996+
Blt(l2);
997+
Ldthisfld(s_runtextField);
998+
Ldthisfld(s_runtextposField);
999+
Callvirt(s_stringGetCharsMethod);
1000+
Ldc('\r');
1001+
Bne(l1);
1002+
Ldthisfld(s_runtextField);
1003+
Ldthisfld(s_runtextposField);
1004+
Ldc(1);
1005+
Add();
1006+
Callvirt(s_stringGetCharsMethod);
1007+
Ldc('\n');
1008+
Bne(l1);
1009+
Br(l3);
1010+
1011+
MarkLabel(l2);
1012+
Ldloc(diff);
1013+
Ldc(1);
1014+
Blt(l3);
1015+
Ldthisfld(s_runtextField);
1016+
Ldthisfld(s_runtextposField);
1017+
Callvirt(s_stringGetCharsMethod);
1018+
Ldc('\n');
1019+
Beq(l3);
1020+
Ldthisfld(s_runtextField);
1021+
Ldthisfld(s_runtextposField);
1022+
Callvirt(s_stringGetCharsMethod);
1023+
Ldc('\r');
1024+
Beq(l3);
1025+
1026+
MarkLabel(l1);
1027+
Ldthis();
1028+
Ldthisfld(s_runtextbegField);
1029+
Stfld(s_runtextposField);
1030+
Ldc(0);
1031+
Ret();
1032+
MarkLabel(l3);
1033+
}
1034+
9811035
if ((_anchors & RegexFCD.Start) != 0)
9821036
{
9831037
Label l1 = DefineLabel();
@@ -3361,6 +3415,23 @@ private void GenerateOneCode()
33613415
break;
33623416
}
33633417

3418+
case RegexCode.AnyEol:
3419+
//: if (Rightchars() > 0 && CharAt(Textpos()) != '\n' && CharAt(Textpos()) != '\r')
3420+
//: break Backward;
3421+
{
3422+
Label l1 = _labels![NextCodepos()];
3423+
Ldloc(_runtextposLocal!);
3424+
Ldloc(_runtextendLocal!);
3425+
Bge(l1);
3426+
Rightchar();
3427+
Ldc('\n');
3428+
Beq(l1);
3429+
Rightchar();
3430+
Ldc('\r');
3431+
BneFar(_backtrack);
3432+
break;
3433+
}
3434+
33643435
case RegexCode.Boundary:
33653436
case RegexCode.Nonboundary:
33663437
//: if (!IsBoundary(Textpos(), _textbeg, _textend))
@@ -3431,6 +3502,52 @@ private void GenerateOneCode()
34313502
BneFar(_backtrack);
34323503
break;
34333504

3505+
case RegexCode.AnyEndZ:
3506+
//: if (rightChars > 2)
3507+
//: break Backward;
3508+
//: if (rightChars == 1 && CharAt(Textpos()) != '\r' && CharAt(Textpos()) != '\n')
3509+
//: break Backward;
3510+
//: if (rightChars == 2 && (CharAt(Textpos()) != '\r' || CharAt(Textpos()+1) != '\n'))
3511+
//: break Backward;
3512+
{
3513+
LocalBuilder diff = _temp1Local!;
3514+
Label l1 = DefineLabel();
3515+
3516+
Ldloc(_runtextendLocal!);
3517+
Ldloc(_runtextposLocal!);
3518+
Sub();
3519+
Stloc(diff);
3520+
Ldloc(diff);
3521+
Ldc(2);
3522+
BgtFar(_backtrack);
3523+
Ldloc(diff);
3524+
Ldc(2);
3525+
Blt(l1);
3526+
Rightchar();
3527+
Ldc('\r');
3528+
BneFar(_backtrack);
3529+
Ldloc(_runtextLocal!);
3530+
Ldloc(_runtextposLocal!);
3531+
Ldc(1);
3532+
Add();
3533+
Callvirt(s_stringGetCharsMethod);
3534+
Ldc('\n');
3535+
BneFar(_backtrack);
3536+
Br(_labels![NextCodepos()]);
3537+
3538+
MarkLabel(l1);
3539+
Ldloc(diff);
3540+
Ldc(1);
3541+
Blt(_labels![NextCodepos()]);
3542+
Rightchar();
3543+
Ldc('\n');
3544+
Beq(_labels![NextCodepos()]);
3545+
Rightchar();
3546+
Ldc('\r');
3547+
BneFar(_backtrack);
3548+
break;
3549+
}
3550+
34343551
case RegexCode.End:
34353552
//: if (Rightchars() > 0)
34363553
//: break Backward;

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFCD.cs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,9 @@ internal ref struct RegexFCD
3535
public const int Boundary = 0x0040;
3636
public const int ECMABoundary = 0x0080;
3737

38+
public const int AnyEndZ = 0x0100;
39+
public const int AnyEol = 0x0200;
40+
3841
private readonly List<RegexFC> _fcStack;
3942
private ValueListBuilder<int> _intStack; // must not be readonly
4043
private bool _skipAllChildren; // don't process any more children at the current level
@@ -130,11 +133,13 @@ public static RegexPrefix Prefix(RegexTree tree)
130133

131134
case RegexNode.Bol:
132135
case RegexNode.Eol:
136+
case RegexNode.AnyEol:
133137
case RegexNode.Boundary:
134138
case RegexNode.ECMABoundary:
135139
case RegexNode.Beginning:
136140
case RegexNode.Start:
137141
case RegexNode.EndZ:
142+
case RegexNode.AnyEndZ:
138143
case RegexNode.End:
139144
case RegexNode.Empty:
140145
case RegexNode.Require:
@@ -185,11 +190,13 @@ public static int Anchors(RegexTree tree)
185190

186191
case RegexNode.Bol:
187192
case RegexNode.Eol:
193+
case RegexNode.AnyEol:
188194
case RegexNode.Boundary:
189195
case RegexNode.ECMABoundary:
190196
case RegexNode.Beginning:
191197
case RegexNode.Start:
192198
case RegexNode.EndZ:
199+
case RegexNode.AnyEndZ:
193200
case RegexNode.End:
194201
return result | AnchorFromType(curNode.Type);
195202

@@ -217,11 +224,13 @@ private static int AnchorFromType(int type) =>
217224
{
218225
RegexNode.Bol => Bol,
219226
RegexNode.Eol => Eol,
227+
RegexNode.AnyEol => AnyEol,
220228
RegexNode.Boundary => Boundary,
221229
RegexNode.ECMABoundary => ECMABoundary,
222230
RegexNode.Beginning => Beginning,
223231
RegexNode.Start => Start,
224232
RegexNode.EndZ => EndZ,
233+
RegexNode.AnyEndZ => AnyEndZ,
225234
RegexNode.End => End,
226235
_ => 0,
227236
};
@@ -244,10 +253,14 @@ public static string AnchorDescription(int anchors)
244253
sb.Append(", ECMABoundary");
245254
if (0 != (anchors & Eol))
246255
sb.Append(", Eol");
256+
if (0 != (anchors & AnyEol))
257+
sb.Append(", AnyEol");
247258
if (0 != (anchors & End))
248259
sb.Append(", End");
249260
if (0 != (anchors & EndZ))
250261
sb.Append(", EndZ");
262+
if (0 != (anchors & AnyEndZ))
263+
sb.Append(", AnyEndZ");
251264

252265
if (sb.Length >= 2)
253266
return (sb.ToString(2, sb.Length - 2));
@@ -479,13 +492,15 @@ private void CalculateFC(int NodeType, RegexNode node, int CurIndex)
479492
case RegexNode.Nothing:
480493
case RegexNode.Bol:
481494
case RegexNode.Eol:
495+
case RegexNode.AnyEol:
482496
case RegexNode.Boundary:
483497
case RegexNode.Nonboundary:
484498
case RegexNode.ECMABoundary:
485499
case RegexNode.NonECMABoundary:
486500
case RegexNode.Beginning:
487501
case RegexNode.Start:
488502
case RegexNode.EndZ:
503+
case RegexNode.AnyEndZ:
489504
case RegexNode.End:
490505
PushFC(new RegexFC(true));
491506
break;

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,8 @@ private char CharAt(int j)
387387

388388
protected override bool FindFirstChar()
389389
{
390-
if (0 != (_code.Anchors & (RegexFCD.Beginning | RegexFCD.Start | RegexFCD.EndZ | RegexFCD.End)))
390+
if (0 != (_code.Anchors & (RegexFCD.Beginning | RegexFCD.Start | RegexFCD.EndZ |
391+
RegexFCD.AnyEndZ | RegexFCD.End)))
391392
{
392393
if (!_code.RightToLeft)
393394
{
@@ -397,7 +398,7 @@ protected override bool FindFirstChar()
397398
runtextpos = runtextend;
398399
return false;
399400
}
400-
if (0 != (_code.Anchors & RegexFCD.EndZ) && runtextpos < runtextend - 1)
401+
if (0 != (_code.Anchors & (RegexFCD.EndZ | RegexFCD.AnyEndZ)) && runtextpos < runtextend - 1)
401402
{
402403
runtextpos = runtextend - 1;
403404
}
@@ -411,6 +412,11 @@ protected override bool FindFirstChar()
411412
if ((0 != (_code.Anchors & RegexFCD.End) && runtextpos < runtextend) ||
412413
(0 != (_code.Anchors & RegexFCD.EndZ) && (runtextpos < runtextend - 1 ||
413414
(runtextpos == runtextend - 1 && CharAt(runtextpos) != '\n'))) ||
415+
(0 != (_code.Anchors & RegexFCD.AnyEndZ) && (runtextpos < runtextend - 2 ||
416+
(runtextpos == runtextend - 2 && (CharAt(runtextpos) != '\r'
417+
|| CharAt(runtextpos+1) != '\n')) ||
418+
(runtextpos == runtextend - 1 && CharAt(runtextpos) != '\n'
419+
&& CharAt(runtextpos) != '\r'))) ||
414420
(0 != (_code.Anchors & RegexFCD.Start) && runtextpos < runtextstart))
415421
{
416422
runtextpos = runtextbeg;
@@ -967,6 +973,12 @@ protected override void Go()
967973
advance = 0;
968974
continue;
969975

976+
case RegexCode.AnyEol:
977+
if (Rightchars() > 0 && CharAt(Textpos()) != '\n' && CharAt(Textpos()) != '\r')
978+
break;
979+
advance = 0;
980+
continue;
981+
970982
case RegexCode.Boundary:
971983
if (!IsBoundary(Textpos(), runtextbeg, runtextend))
972984
break;
@@ -1009,6 +1021,17 @@ protected override void Go()
10091021
advance = 0;
10101022
continue;
10111023

1024+
case RegexCode.AnyEndZ:
1025+
int rightChars = Rightchars();
1026+
if (rightChars > 2)
1027+
break;
1028+
if (rightChars == 1 && CharAt(Textpos()) != '\r' && CharAt(Textpos()) != '\n')
1029+
break;
1030+
if (rightChars == 2 && (CharAt(Textpos()) != '\r' || CharAt(Textpos() + 1) != '\n'))
1031+
break;
1032+
advance = 0;
1033+
continue;
1034+
10121035
case RegexCode.End:
10131036
if (Rightchars() > 0)
10141037
break;

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,13 +69,15 @@ internal sealed class RegexNode
6969

7070
public const int Bol = RegexCode.Bol; // ^
7171
public const int Eol = RegexCode.Eol; // $
72+
public const int AnyEol = RegexCode.AnyEol; // $
7273
public const int Boundary = RegexCode.Boundary; // \b
7374
public const int Nonboundary = RegexCode.Nonboundary; // \B
7475
public const int ECMABoundary = RegexCode.ECMABoundary; // \b
7576
public const int NonECMABoundary = RegexCode.NonECMABoundary; // \B
7677
public const int Beginning = RegexCode.Beginning; // \A
7778
public const int Start = RegexCode.Start; // \G
7879
public const int EndZ = RegexCode.EndZ; // \Z
80+
public const int AnyEndZ = RegexCode.AnyEndZ; // \Z
7981
public const int End = RegexCode.End; // \z
8082

8183
public const int Oneloopatomic = RegexCode.Oneloopatomic; // c,n (?> a*)
@@ -978,6 +980,7 @@ public string Description()
978980
if ((Options & RegexOptions.Singleline) != 0) argSb.Append("-S");
979981
if ((Options & RegexOptions.IgnorePatternWhitespace) != 0) argSb.Append("-X");
980982
if ((Options & RegexOptions.ECMAScript) != 0) argSb.Append("-E");
983+
if ((Options & RegexOptions.AnyNewLine) != 0) argSb.Append("-A");
981984

982985
switch (Type)
983986
{

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexOptions.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,5 +22,6 @@ public enum RegexOptions
2222

2323
ECMAScript = 0x0100, // "e"
2424
CultureInvariant = 0x0200,
25+
AnyNewLine = 0x0400, // "a", Treat "$" as (?=\r\z|\n\z|\r\n\z|\z)
2526
}
2627
}

0 commit comments

Comments
 (0)