Skip to content

Commit 2868bd6

Browse files
Apply Suggested Changes
1 parent 603c3cf commit 2868bd6

File tree

1 file changed

+34
-29
lines changed

1 file changed

+34
-29
lines changed

src/compiler/scanner.ts

+34-29
Original file line numberDiff line numberDiff line change
@@ -2426,6 +2426,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
24262426
// Quickly get to the end of regex such that we know the flags
24272427
let p = tokenStart + 1;
24282428
let inEscape = false;
2429+
let namedCaptureGroups = false;
24292430
// Although nested character classes are allowed in Unicode Sets mode,
24302431
// an unescaped slash is nevertheless invalid even in a character class in Unicode mode.
24312432
// Additionally, parsing nested character classes will misinterpret regexes like `/[[]/`
@@ -2469,6 +2470,15 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
24692470
else if (ch === CharacterCodes.closeBracket) {
24702471
inCharacterClass = false;
24712472
}
2473+
else if (
2474+
ch === CharacterCodes.openParen
2475+
&& charCodeUnchecked(p + 1) === CharacterCodes.question
2476+
&& charCodeUnchecked(p + 2) === CharacterCodes.lessThan
2477+
&& charCodeUnchecked(p + 3) !== CharacterCodes.equals
2478+
&& charCodeUnchecked(p + 3) !== CharacterCodes.exclamation
2479+
) {
2480+
namedCaptureGroups = true;
2481+
}
24722482
p++;
24732483
}
24742484
const isUnterminated = !!(tokenFlags & TokenFlags.Unterminated);
@@ -2505,7 +2515,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
25052515
const saveEnd = end;
25062516
pos = tokenStart + 1;
25072517
end = endOfBody;
2508-
scanRegularExpressionWorker(regExpFlags, isUnterminated, /*annexB*/ true);
2518+
scanRegularExpressionWorker(regExpFlags, isUnterminated, /*annexB*/ true, namedCaptureGroups);
25092519
tokenStart = saveTokenStart;
25102520
tokenFlags = saveTokenFlags;
25112521
pos = savePos;
@@ -2517,7 +2527,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
25172527
return token;
25182528
}
25192529

2520-
function scanRegularExpressionWorker(regExpFlags: RegularExpressionFlags, isUnterminated: boolean, annexB: boolean) {
2530+
function scanRegularExpressionWorker(regExpFlags: RegularExpressionFlags, isUnterminated: boolean, annexB: boolean, namedCaptureGroups: boolean) {
25212531
// Why var? It avoids TDZ checks in the runtime which can be costly.
25222532
// See: https://github.com/microsoft/TypeScript/issues/52924
25232533
/* eslint-disable no-var */
@@ -2527,10 +2537,8 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
25272537
/** Grammar parameter */
25282538
var unicodeMode = !!(regExpFlags & RegularExpressionFlags.UnicodeMode);
25292539

2530-
if (unicodeMode) {
2531-
// Annex B treats any unicode mode as the strict syntax.
2532-
annexB = false;
2533-
}
2540+
// Annex B treats any unicode mode as the strict syntax.
2541+
var anyUnicodeModeOrNonAnnexB = unicodeMode || !annexB;
25342542

25352543
/** @see {scanClassSetExpression} */
25362544
var mayContainStrings = false;
@@ -2626,7 +2634,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
26262634
case CharacterCodes.exclamation:
26272635
pos++;
26282636
// In Annex B, `(?=Disjunction)` and `(?!Disjunction)` are quantifiable
2629-
isPreviousTermQuantifiable = annexB;
2637+
isPreviousTermQuantifiable = !anyUnicodeModeOrNonAnnexB;
26302638
break;
26312639
case CharacterCodes.lessThan:
26322640
const groupNameStart = pos;
@@ -2675,7 +2683,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
26752683
const digitsStart = pos;
26762684
scanDigits();
26772685
const min = tokenValue;
2678-
if (annexB && !min) {
2686+
if (!anyUnicodeModeOrNonAnnexB && !min) {
26792687
isPreviousTermQuantifiable = true;
26802688
break;
26812689
}
@@ -2693,26 +2701,26 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
26932701
break;
26942702
}
26952703
}
2696-
else if (max && Number.parseInt(min) > Number.parseInt(max) && (!annexB || text.charCodeAt(pos) === CharacterCodes.closeBrace)) {
2704+
else if (max && Number.parseInt(min) > Number.parseInt(max) && (anyUnicodeModeOrNonAnnexB || text.charCodeAt(pos) === CharacterCodes.closeBrace)) {
26972705
error(Diagnostics.Numbers_out_of_order_in_quantifier, digitsStart, pos - digitsStart);
26982706
}
26992707
}
27002708
else if (!min) {
2701-
if (!annexB) {
2709+
if (anyUnicodeModeOrNonAnnexB) {
27022710
error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, start, 1, String.fromCharCode(ch));
27032711
}
27042712
isPreviousTermQuantifiable = true;
27052713
break;
27062714
}
27072715
if (charCodeChecked(pos) !== CharacterCodes.closeBrace) {
2708-
if (annexB) {
2709-
isPreviousTermQuantifiable = true;
2710-
break;
2711-
}
2712-
else {
2716+
if (anyUnicodeModeOrNonAnnexB) {
27132717
error(Diagnostics._0_expected, pos, 0, String.fromCharCode(CharacterCodes.closeBrace));
27142718
pos--;
27152719
}
2720+
else {
2721+
isPreviousTermQuantifiable = true;
2722+
break;
2723+
}
27162724
}
27172725
// falls through
27182726
case CharacterCodes.asterisk:
@@ -2754,7 +2762,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
27542762
// Assume what starting from the character to be outside of the regex
27552763
return;
27562764
}
2757-
if (!annexB || ch === CharacterCodes.closeParen) {
2765+
if (anyUnicodeModeOrNonAnnexB || ch === CharacterCodes.closeParen) {
27582766
error(Diagnostics.Unexpected_0_Did_you_mean_to_escape_it_with_backslash, pos, 1, String.fromCharCode(ch));
27592767
}
27602768
pos++;
@@ -2811,10 +2819,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
28112819
scanGroupName(/*isReference*/ true);
28122820
scanExpectedChar(CharacterCodes.greaterThan);
28132821
}
2814-
else {
2815-
// This is actually allowed in Annex B if there are no named capturing groups in the regex,
2816-
// but if we were going to suppress these errors, we would have to record the positions of all '\k's
2817-
// and defer the errors until after the scanning to know if the regex has any named capturing groups.
2822+
else if (namedCaptureGroups) {
28182823
error(Diagnostics.k_must_be_followed_by_a_capturing_group_name_enclosed_in_angle_brackets, pos - 2, 2);
28192824
}
28202825
break;
@@ -2864,7 +2869,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
28642869
pos++;
28652870
return String.fromCharCode(ch & 0x1f);
28662871
}
2867-
if (!annexB) {
2872+
if (anyUnicodeModeOrNonAnnexB) {
28682873
error(Diagnostics.c_must_be_followed_by_an_ASCII_letter, pos - 2, 2);
28692874
}
28702875
else if (atomEscape) {
@@ -2900,7 +2905,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
29002905
return "\\";
29012906
}
29022907
pos--;
2903-
return scanEscapeSequence(/*shouldEmitInvalidEscapeError*/ unicodeMode, /*isRegularExpression*/ annexB ? "annex-b" : true);
2908+
return scanEscapeSequence(/*shouldEmitInvalidEscapeError*/ unicodeMode, /*isRegularExpression*/ !anyUnicodeModeOrNonAnnexB || "annex-b");
29042909
}
29052910
}
29062911

@@ -2949,12 +2954,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
29492954
if (isClassContentExit(ch)) {
29502955
return;
29512956
}
2952-
if (!minCharacter && !annexB) {
2957+
if (!minCharacter && anyUnicodeModeOrNonAnnexB) {
29532958
error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, minStart, pos - 1 - minStart);
29542959
}
29552960
const maxStart = pos;
29562961
const maxCharacter = scanClassAtom();
2957-
if (!maxCharacter && !annexB) {
2962+
if (!maxCharacter && anyUnicodeModeOrNonAnnexB) {
29582963
error(Diagnostics.A_character_class_range_must_not_be_bounded_by_another_character_class, maxStart, pos - maxStart);
29592964
continue;
29602965
}
@@ -3450,12 +3455,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
34503455
error(Diagnostics.Unicode_property_value_expressions_are_only_available_when_the_Unicode_u_flag_or_the_Unicode_Sets_v_flag_is_set, start, pos - start);
34513456
}
34523457
}
3453-
else if (annexB) {
3454-
pos--;
3455-
return false;
3458+
else if (anyUnicodeModeOrNonAnnexB) {
3459+
error(Diagnostics._0_must_be_followed_by_a_Unicode_property_value_expression_enclosed_in_braces, pos - 2, 2, String.fromCharCode(ch));
34563460
}
34573461
else {
3458-
error(Diagnostics._0_must_be_followed_by_a_Unicode_property_value_expression_enclosed_in_braces, pos - 2, 2, String.fromCharCode(ch));
3462+
pos--;
3463+
return false;
34593464
}
34603465
return true;
34613466
}
@@ -3500,7 +3505,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
35003505
forEach(decimalEscapes, escape => {
35013506
// in AnnexB, if a DecimalEscape is greater than the number of capturing groups then it is treated as
35023507
// either a LegacyOctalEscapeSequence or IdentityEscape
3503-
if (!annexB && escape.value > numberOfCapturingGroups) {
3508+
if (anyUnicodeModeOrNonAnnexB && escape.value > numberOfCapturingGroups) {
35043509
if (numberOfCapturingGroups) {
35053510
error(Diagnostics.This_backreference_refers_to_a_group_that_does_not_exist_There_are_only_0_capturing_groups_in_this_regular_expression, escape.pos, escape.end - escape.pos, numberOfCapturingGroups);
35063511
}

0 commit comments

Comments
 (0)