Skip to content

Commit b48f0d0

Browse files
Provide User-Friendly Errors for Decimal Escapes Inside Character Classes
1 parent 8b67d77 commit b48f0d0

15 files changed

+256
-143
lines changed

src/compiler/diagnosticMessages.json

+9-1
Original file line numberDiff line numberDiff line change
@@ -1789,14 +1789,22 @@
17891789
"category": "Error",
17901790
"code": 1533
17911791
},
1792-
"This backreference is invalid because the containing regular expression contains no capturing groups.": {
1792+
"This backreference refers to a group that does not exist. There are no capturing groups in this regular expression.": {
17931793
"category": "Error",
17941794
"code": 1534
17951795
},
17961796
"This character cannot be escaped in a regular expression.": {
17971797
"category": "Error",
17981798
"code": 1535
17991799
},
1800+
"Octal escape sequences and backreferences are not allowed in a character class. If this was intended as an escape sequence, use the syntax '{0}' instead.": {
1801+
"category": "Error",
1802+
"code": 1536
1803+
},
1804+
"Decimal escape sequences and backreferences are not allowed in a character class.": {
1805+
"category": "Error",
1806+
"code": 1537
1807+
},
18001808

18011809
"The types of '{0}' are incompatible between these types.": {
18021810
"category": "Error",

src/compiler/scanner.ts

+48-15
Original file line numberDiff line numberDiff line change
@@ -997,6 +997,19 @@ export function isIdentifierText(name: string, languageVersion: ScriptTarget | u
997997
return true;
998998
}
999999

1000+
const enum EscapeSequenceScanningFlags {
1001+
String = 1 << 0,
1002+
ReportErrors = 1 << 1,
1003+
1004+
RegularExpression = 1 << 2,
1005+
AnnexB = 1 << 3,
1006+
AnyUnicodeMode = 1 << 4,
1007+
AtomEscape = 1 << 5,
1008+
1009+
ReportInvalidEscapeErrors = RegularExpression | ReportErrors,
1010+
ScanExtendedUnicodeEscape = String | AnyUnicodeMode,
1011+
}
1012+
10001013
const enum ClassSetExpressionType {
10011014
Unknown,
10021015
ClassUnion,
@@ -1416,7 +1429,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
14161429
}
14171430
if (ch === CharacterCodes.backslash && !jsxAttributeString) {
14181431
result += text.substring(start, pos);
1419-
result += scanEscapeSequence(/*shouldEmitInvalidEscapeError*/ true, /*isRegularExpression*/ false);
1432+
result += scanEscapeSequence(EscapeSequenceScanningFlags.String | EscapeSequenceScanningFlags.ReportErrors);
14201433
start = pos;
14211434
continue;
14221435
}
@@ -1474,7 +1487,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
14741487
// Escape character
14751488
if (currChar === CharacterCodes.backslash) {
14761489
contents += text.substring(start, pos);
1477-
contents += scanEscapeSequence(shouldEmitInvalidEscapeError, /*isRegularExpression*/ false);
1490+
contents += scanEscapeSequence(EscapeSequenceScanningFlags.String | (shouldEmitInvalidEscapeError ? EscapeSequenceScanningFlags.ReportErrors : 0));
14781491
start = pos;
14791492
continue;
14801493
}
@@ -1517,7 +1530,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
15171530
// | [0-3] [0-7] [0-7]?
15181531
// | [4-7] [0-7]
15191532
// NonOctalDecimalEscapeSequence ::= [89]
1520-
function scanEscapeSequence(shouldEmitInvalidEscapeError: boolean, isRegularExpression: boolean | "annex-b"): string {
1533+
function scanEscapeSequence(flags: EscapeSequenceScanningFlags): string {
15211534
const start = pos;
15221535
pos++;
15231536
if (pos >= end) {
@@ -1554,18 +1567,28 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
15541567
}
15551568
// '\47'
15561569
tokenFlags |= TokenFlags.ContainsInvalidEscape;
1557-
if (isRegularExpression || shouldEmitInvalidEscapeError) {
1570+
if (flags & EscapeSequenceScanningFlags.ReportInvalidEscapeErrors) {
15581571
const code = parseInt(text.substring(start + 1, pos), 8);
1559-
error(Diagnostics.Octal_escape_sequences_are_not_allowed_Use_the_syntax_0, start, pos - start, "\\x" + code.toString(16).padStart(2, "0"));
1572+
if (flags & EscapeSequenceScanningFlags.RegularExpression && !(flags & EscapeSequenceScanningFlags.AtomEscape) && ch !== CharacterCodes._0) {
1573+
error(Diagnostics.Octal_escape_sequences_and_backreferences_are_not_allowed_in_a_character_class_If_this_was_intended_as_an_escape_sequence_use_the_syntax_0_instead, start, pos - start, "\\x" + code.toString(16).padStart(2, "0"));
1574+
}
1575+
else {
1576+
error(Diagnostics.Octal_escape_sequences_are_not_allowed_Use_the_syntax_0, start, pos - start, "\\x" + code.toString(16).padStart(2, "0"));
1577+
}
15601578
return String.fromCharCode(code);
15611579
}
15621580
return text.substring(start, pos);
15631581
case CharacterCodes._8:
15641582
case CharacterCodes._9:
15651583
// the invalid '\8' and '\9'
15661584
tokenFlags |= TokenFlags.ContainsInvalidEscape;
1567-
if (isRegularExpression || shouldEmitInvalidEscapeError) {
1568-
error(Diagnostics.Escape_sequence_0_is_not_allowed, start, pos - start, text.substring(start, pos));
1585+
if (flags & EscapeSequenceScanningFlags.ReportInvalidEscapeErrors) {
1586+
if (flags & EscapeSequenceScanningFlags.RegularExpression && !(flags & EscapeSequenceScanningFlags.AtomEscape)) {
1587+
error(Diagnostics.Decimal_escape_sequences_and_backreferences_are_not_allowed_in_a_character_class, start, pos - start);
1588+
}
1589+
else {
1590+
error(Diagnostics.Escape_sequence_0_is_not_allowed, start, pos - start, text.substring(start, pos));
1591+
}
15691592
return String.fromCharCode(ch);
15701593
}
15711594
return text.substring(start, pos);
@@ -1587,18 +1610,18 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
15871610
return '"';
15881611
case CharacterCodes.u:
15891612
if (
1590-
(!isRegularExpression || shouldEmitInvalidEscapeError) &&
1613+
flags & EscapeSequenceScanningFlags.ScanExtendedUnicodeEscape &&
15911614
pos < end && charCodeUnchecked(pos) === CharacterCodes.openBrace
15921615
) {
15931616
// '\u{DDDDDD}'
15941617
pos -= 2;
1595-
return scanExtendedUnicodeEscape(!!isRegularExpression || shouldEmitInvalidEscapeError);
1618+
return scanExtendedUnicodeEscape(!!(flags & EscapeSequenceScanningFlags.ReportInvalidEscapeErrors));
15961619
}
15971620
// '\uDDDD'
15981621
for (; pos < start + 6; pos++) {
15991622
if (!(pos < end && isHexDigit(charCodeUnchecked(pos)))) {
16001623
tokenFlags |= TokenFlags.ContainsInvalidEscape;
1601-
if (isRegularExpression || shouldEmitInvalidEscapeError) {
1624+
if (flags & EscapeSequenceScanningFlags.ReportInvalidEscapeErrors) {
16021625
error(Diagnostics.Hexadecimal_digit_expected);
16031626
}
16041627
return text.substring(start, pos);
@@ -1608,7 +1631,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
16081631
const escapedValue = parseInt(text.substring(start + 2, pos), 16);
16091632
const escapedValueString = String.fromCharCode(escapedValue);
16101633
if (
1611-
isRegularExpression && shouldEmitInvalidEscapeError && escapedValue >= 0xD800 && escapedValue <= 0xDBFF &&
1634+
flags & EscapeSequenceScanningFlags.AnyUnicodeMode && escapedValue >= 0xD800 && escapedValue <= 0xDBFF &&
16121635
pos + 6 < end && text.substring(pos, pos + 2) === "\\u" && charCodeUnchecked(pos + 2) !== CharacterCodes.openBrace
16131636
) {
16141637
// For regular expressions in any Unicode mode, \u HexLeadSurrogate \u HexTrailSurrogate is treated as a single character
@@ -1635,7 +1658,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
16351658
for (; pos < start + 4; pos++) {
16361659
if (!(pos < end && isHexDigit(charCodeUnchecked(pos)))) {
16371660
tokenFlags |= TokenFlags.ContainsInvalidEscape;
1638-
if (isRegularExpression || shouldEmitInvalidEscapeError) {
1661+
if (flags & EscapeSequenceScanningFlags.ReportInvalidEscapeErrors) {
16391662
error(Diagnostics.Hexadecimal_digit_expected);
16401663
}
16411664
return text.substring(start, pos);
@@ -1656,7 +1679,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
16561679
case CharacterCodes.paragraphSeparator:
16571680
return "";
16581681
default:
1659-
if (isRegularExpression === true && (shouldEmitInvalidEscapeError || isIdentifierPart(ch, languageVersion))) {
1682+
if (
1683+
flags & EscapeSequenceScanningFlags.AnyUnicodeMode
1684+
|| flags & EscapeSequenceScanningFlags.RegularExpression
1685+
&& !(flags & EscapeSequenceScanningFlags.AnnexB)
1686+
&& isIdentifierPart(ch, languageVersion)
1687+
) {
16601688
error(Diagnostics.This_character_cannot_be_escaped_in_a_regular_expression, pos - 2, 2);
16611689
}
16621690
return String.fromCharCode(ch);
@@ -2934,7 +2962,12 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
29342962
return String.fromCharCode(ch);
29352963
default:
29362964
pos--;
2937-
return scanEscapeSequence(/*shouldEmitInvalidEscapeError*/ anyUnicodeMode, /*isRegularExpression*/ anyUnicodeModeOrNonAnnexB || "annex-b");
2965+
return scanEscapeSequence(
2966+
EscapeSequenceScanningFlags.RegularExpression
2967+
| (annexB ? EscapeSequenceScanningFlags.AnnexB : 0)
2968+
| (anyUnicodeMode ? EscapeSequenceScanningFlags.AnyUnicodeMode : 0)
2969+
| (atomEscape ? EscapeSequenceScanningFlags.AtomEscape : 0),
2970+
);
29382971
}
29392972
}
29402973

@@ -3540,7 +3573,7 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
35403573
error(Diagnostics.This_backreference_refers_to_a_group_that_does_not_exist_There_are_only_0_capturing_groups_in_this_regular_expression, escape.pos, escape.end - escape.pos, numberOfCapturingGroups);
35413574
}
35423575
else {
3543-
error(Diagnostics.This_backreference_is_invalid_because_the_containing_regular_expression_contains_no_capturing_groups, escape.pos, escape.end - escape.pos);
3576+
error(Diagnostics.This_backreference_refers_to_a_group_that_does_not_exist_There_are_no_capturing_groups_in_this_regular_expression, escape.pos, escape.end - escape.pos);
35443577
}
35453578
}
35463579
});

0 commit comments

Comments
 (0)