@@ -2426,6 +2426,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2426
2426
// Quickly get to the end of regex such that we know the flags
2427
2427
let p = tokenStart + 1 ;
2428
2428
let inEscape = false ;
2429
+ let namedCaptureGroups = false ;
2429
2430
// Although nested character classes are allowed in Unicode Sets mode,
2430
2431
// an unescaped slash is nevertheless invalid even in a character class in Unicode mode.
2431
2432
// Additionally, parsing nested character classes will misinterpret regexes like `/[[]/`
@@ -2469,6 +2470,15 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2469
2470
else if ( ch === CharacterCodes . closeBracket ) {
2470
2471
inCharacterClass = false ;
2471
2472
}
2473
+ else if (
2474
+ ch === CharacterCodes . openParen
2475
+ && charCodeUnchecked ( p + 1 ) === CharacterCodes . question
2476
+ && charCodeUnchecked ( p + 2 ) === CharacterCodes . lessThan
2477
+ && charCodeUnchecked ( p + 3 ) !== CharacterCodes . equals
2478
+ && charCodeUnchecked ( p + 3 ) !== CharacterCodes . exclamation
2479
+ ) {
2480
+ namedCaptureGroups = true ;
2481
+ }
2472
2482
p ++ ;
2473
2483
}
2474
2484
const isUnterminated = ! ! ( tokenFlags & TokenFlags . Unterminated ) ;
@@ -2505,7 +2515,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2505
2515
const saveEnd = end ;
2506
2516
pos = tokenStart + 1 ;
2507
2517
end = endOfBody ;
2508
- scanRegularExpressionWorker ( regExpFlags , isUnterminated , /*annexB*/ true ) ;
2518
+ scanRegularExpressionWorker ( regExpFlags , isUnterminated , /*annexB*/ true , namedCaptureGroups ) ;
2509
2519
tokenStart = saveTokenStart ;
2510
2520
tokenFlags = saveTokenFlags ;
2511
2521
pos = savePos ;
@@ -2517,7 +2527,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2517
2527
return token ;
2518
2528
}
2519
2529
2520
- function scanRegularExpressionWorker ( regExpFlags : RegularExpressionFlags , isUnterminated : boolean , annexB : boolean ) {
2530
+ function scanRegularExpressionWorker ( regExpFlags : RegularExpressionFlags , isUnterminated : boolean , annexB : boolean , namedCaptureGroups : boolean ) {
2521
2531
// Why var? It avoids TDZ checks in the runtime which can be costly.
2522
2532
// See: https://github.com/microsoft/TypeScript/issues/52924
2523
2533
/* eslint-disable no-var */
@@ -2527,10 +2537,8 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2527
2537
/** Grammar parameter */
2528
2538
var unicodeMode = ! ! ( regExpFlags & RegularExpressionFlags . UnicodeMode ) ;
2529
2539
2530
- if ( unicodeMode ) {
2531
- // Annex B treats any unicode mode as the strict syntax.
2532
- annexB = false ;
2533
- }
2540
+ // Annex B treats any unicode mode as the strict syntax.
2541
+ var anyUnicodeModeOrNonAnnexB = unicodeMode || ! annexB ;
2534
2542
2535
2543
/** @see {scanClassSetExpression} */
2536
2544
var mayContainStrings = false ;
@@ -2626,7 +2634,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2626
2634
case CharacterCodes . exclamation :
2627
2635
pos ++ ;
2628
2636
// In Annex B, `(?=Disjunction)` and `(?!Disjunction)` are quantifiable
2629
- isPreviousTermQuantifiable = annexB ;
2637
+ isPreviousTermQuantifiable = ! anyUnicodeModeOrNonAnnexB ;
2630
2638
break ;
2631
2639
case CharacterCodes . lessThan :
2632
2640
const groupNameStart = pos ;
@@ -2675,7 +2683,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2675
2683
const digitsStart = pos ;
2676
2684
scanDigits ( ) ;
2677
2685
const min = tokenValue ;
2678
- if ( annexB && ! min ) {
2686
+ if ( ! anyUnicodeModeOrNonAnnexB && ! min ) {
2679
2687
isPreviousTermQuantifiable = true ;
2680
2688
break ;
2681
2689
}
@@ -2693,26 +2701,26 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2693
2701
break ;
2694
2702
}
2695
2703
}
2696
- else if ( max && Number . parseInt ( min ) > Number . parseInt ( max ) && ( ! annexB || text . charCodeAt ( pos ) === CharacterCodes . closeBrace ) ) {
2704
+ else if ( max && Number . parseInt ( min ) > Number . parseInt ( max ) && ( anyUnicodeModeOrNonAnnexB || text . charCodeAt ( pos ) === CharacterCodes . closeBrace ) ) {
2697
2705
error ( Diagnostics . Numbers_out_of_order_in_quantifier , digitsStart , pos - digitsStart ) ;
2698
2706
}
2699
2707
}
2700
2708
else if ( ! min ) {
2701
- if ( ! annexB ) {
2709
+ if ( anyUnicodeModeOrNonAnnexB ) {
2702
2710
error ( Diagnostics . Unexpected_0_Did_you_mean_to_escape_it_with_backslash , start , 1 , String . fromCharCode ( ch ) ) ;
2703
2711
}
2704
2712
isPreviousTermQuantifiable = true ;
2705
2713
break ;
2706
2714
}
2707
2715
if ( charCodeChecked ( pos ) !== CharacterCodes . closeBrace ) {
2708
- if ( annexB ) {
2709
- isPreviousTermQuantifiable = true ;
2710
- break ;
2711
- }
2712
- else {
2716
+ if ( anyUnicodeModeOrNonAnnexB ) {
2713
2717
error ( Diagnostics . _0_expected , pos , 0 , String . fromCharCode ( CharacterCodes . closeBrace ) ) ;
2714
2718
pos -- ;
2715
2719
}
2720
+ else {
2721
+ isPreviousTermQuantifiable = true ;
2722
+ break ;
2723
+ }
2716
2724
}
2717
2725
// falls through
2718
2726
case CharacterCodes . asterisk :
@@ -2754,7 +2762,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2754
2762
// Assume what starting from the character to be outside of the regex
2755
2763
return ;
2756
2764
}
2757
- if ( ! annexB || ch === CharacterCodes . closeParen ) {
2765
+ if ( anyUnicodeModeOrNonAnnexB || ch === CharacterCodes . closeParen ) {
2758
2766
error ( Diagnostics . Unexpected_0_Did_you_mean_to_escape_it_with_backslash , pos , 1 , String . fromCharCode ( ch ) ) ;
2759
2767
}
2760
2768
pos ++ ;
@@ -2811,10 +2819,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2811
2819
scanGroupName ( /*isReference*/ true ) ;
2812
2820
scanExpectedChar ( CharacterCodes . greaterThan ) ;
2813
2821
}
2814
- else {
2815
- // This is actually allowed in Annex B if there are no named capturing groups in the regex,
2816
- // but if we were going to suppress these errors, we would have to record the positions of all '\k's
2817
- // and defer the errors until after the scanning to know if the regex has any named capturing groups.
2822
+ else if ( namedCaptureGroups ) {
2818
2823
error ( Diagnostics . k_must_be_followed_by_a_capturing_group_name_enclosed_in_angle_brackets , pos - 2 , 2 ) ;
2819
2824
}
2820
2825
break ;
@@ -2864,7 +2869,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2864
2869
pos ++ ;
2865
2870
return String . fromCharCode ( ch & 0x1f ) ;
2866
2871
}
2867
- if ( ! annexB ) {
2872
+ if ( anyUnicodeModeOrNonAnnexB ) {
2868
2873
error ( Diagnostics . c_must_be_followed_by_an_ASCII_letter , pos - 2 , 2 ) ;
2869
2874
}
2870
2875
else if ( atomEscape ) {
@@ -2900,7 +2905,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2900
2905
return "\\" ;
2901
2906
}
2902
2907
pos -- ;
2903
- return scanEscapeSequence ( /*shouldEmitInvalidEscapeError*/ unicodeMode , /*isRegularExpression*/ annexB ? "annex-b" : true ) ;
2908
+ return scanEscapeSequence ( /*shouldEmitInvalidEscapeError*/ unicodeMode , /*isRegularExpression*/ ! anyUnicodeModeOrNonAnnexB || "annex-b" ) ;
2904
2909
}
2905
2910
}
2906
2911
@@ -2949,12 +2954,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
2949
2954
if ( isClassContentExit ( ch ) ) {
2950
2955
return ;
2951
2956
}
2952
- if ( ! minCharacter && ! annexB ) {
2957
+ if ( ! minCharacter && anyUnicodeModeOrNonAnnexB ) {
2953
2958
error ( Diagnostics . A_character_class_range_must_not_be_bounded_by_another_character_class , minStart , pos - 1 - minStart ) ;
2954
2959
}
2955
2960
const maxStart = pos ;
2956
2961
const maxCharacter = scanClassAtom ( ) ;
2957
- if ( ! maxCharacter && ! annexB ) {
2962
+ if ( ! maxCharacter && anyUnicodeModeOrNonAnnexB ) {
2958
2963
error ( Diagnostics . A_character_class_range_must_not_be_bounded_by_another_character_class , maxStart , pos - maxStart ) ;
2959
2964
continue ;
2960
2965
}
@@ -3450,12 +3455,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
3450
3455
error ( Diagnostics . Unicode_property_value_expressions_are_only_available_when_the_Unicode_u_flag_or_the_Unicode_Sets_v_flag_is_set , start , pos - start ) ;
3451
3456
}
3452
3457
}
3453
- else if ( annexB ) {
3454
- pos -- ;
3455
- return false ;
3458
+ else if ( anyUnicodeModeOrNonAnnexB ) {
3459
+ error ( Diagnostics . _0_must_be_followed_by_a_Unicode_property_value_expression_enclosed_in_braces , pos - 2 , 2 , String . fromCharCode ( ch ) ) ;
3456
3460
}
3457
3461
else {
3458
- error ( Diagnostics . _0_must_be_followed_by_a_Unicode_property_value_expression_enclosed_in_braces , pos - 2 , 2 , String . fromCharCode ( ch ) ) ;
3462
+ pos -- ;
3463
+ return false ;
3459
3464
}
3460
3465
return true ;
3461
3466
}
@@ -3500,7 +3505,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
3500
3505
forEach ( decimalEscapes , escape => {
3501
3506
// in AnnexB, if a DecimalEscape is greater than the number of capturing groups then it is treated as
3502
3507
// either a LegacyOctalEscapeSequence or IdentityEscape
3503
- if ( ! annexB && escape . value > numberOfCapturingGroups ) {
3508
+ if ( anyUnicodeModeOrNonAnnexB && escape . value > numberOfCapturingGroups ) {
3504
3509
if ( numberOfCapturingGroups ) {
3505
3510
error ( Diagnostics . This_backreference_refers_to_a_group_that_does_not_exist_There_are_only_0_capturing_groups_in_this_regular_expression , escape . pos , escape . end - escape . pos , numberOfCapturingGroups ) ;
3506
3511
}
0 commit comments