diff --git a/src/compiler/scanner.ts b/src/compiler/scanner.ts index e93cc82b0a5db..f30287cf0b0a4 100644 --- a/src/compiler/scanner.ts +++ b/src/compiler/scanner.ts @@ -282,16 +282,16 @@ const textToToken = new Map(Object.entries({ "`": SyntaxKind.BacktickToken, })); -const charToRegExpFlag = new Map(Object.entries({ - d: RegularExpressionFlags.HasIndices, - g: RegularExpressionFlags.Global, - i: RegularExpressionFlags.IgnoreCase, - m: RegularExpressionFlags.Multiline, - s: RegularExpressionFlags.DotAll, - u: RegularExpressionFlags.Unicode, - v: RegularExpressionFlags.UnicodeSets, - y: RegularExpressionFlags.Sticky, -})); +const charCodeToRegExpFlag = new Map([ + [CharacterCodes.d, RegularExpressionFlags.HasIndices], + [CharacterCodes.g, RegularExpressionFlags.Global], + [CharacterCodes.i, RegularExpressionFlags.IgnoreCase], + [CharacterCodes.m, RegularExpressionFlags.Multiline], + [CharacterCodes.s, RegularExpressionFlags.DotAll], + [CharacterCodes.u, RegularExpressionFlags.Unicode], + [CharacterCodes.v, RegularExpressionFlags.UnicodeSets], + [CharacterCodes.y, RegularExpressionFlags.Sticky], +]); const regExpFlagToFirstAvailableLanguageVersion = new Map([ [RegularExpressionFlags.HasIndices, LanguageFeatureMinimumTarget.RegularExpressionFlagsHasIndices], @@ -394,8 +394,8 @@ function isUnicodeIdentifierPart(code: number, languageVersion: ScriptTarget | u lookupInUnicodeMap(code, unicodeES5IdentifierPart); } -function makeReverseMap(source: Map): string[] { - const result: string[] = []; +function makeReverseMap(source: Map): T[] { + const result: T[] = []; source.forEach((value, name) => { result[value] = name; }); @@ -416,16 +416,16 @@ export function stringToToken(s: string): SyntaxKind | undefined { return textToToken.get(s); } -const regExpFlagChars = makeReverseMap(charToRegExpFlag); +const regExpFlagCharCodes = makeReverseMap(charCodeToRegExpFlag); /** @internal */ -export function regularExpressionFlagToCharacter(f: RegularExpressionFlags): string | undefined { - return regExpFlagChars[f]; +export function regularExpressionFlagToCharacterCode(f: RegularExpressionFlags): CharacterCodes | undefined { + return regExpFlagCharCodes[f]; } /** @internal */ -export function characterToRegularExpressionFlag(c: string): RegularExpressionFlags | undefined { - return charToRegExpFlag.get(c); +export function characterCodeToRegularExpressionFlag(ch: CharacterCodes): RegularExpressionFlags | undefined { + return charCodeToRegExpFlag.get(ch); } /** @internal */ @@ -2558,27 +2558,28 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean pos++; let regExpFlags = RegularExpressionFlags.None; while (true) { - const ch = charCodeChecked(pos); + const ch = codePointChecked(pos); if (ch === CharacterCodes.EOF || !isIdentifierPart(ch, languageVersion)) { break; } + const size = charSize(ch); if (reportErrors) { - const flag = characterToRegularExpressionFlag(String.fromCharCode(ch)); + const flag = characterCodeToRegularExpressionFlag(ch); if (flag === undefined) { - error(Diagnostics.Unknown_regular_expression_flag, pos, 1); + error(Diagnostics.Unknown_regular_expression_flag, pos, size); } else if (regExpFlags & flag) { - error(Diagnostics.Duplicate_regular_expression_flag, pos, 1); + error(Diagnostics.Duplicate_regular_expression_flag, pos, size); } else if (((regExpFlags | flag) & RegularExpressionFlags.AnyUnicodeMode) === RegularExpressionFlags.AnyUnicodeMode) { - error(Diagnostics.The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously, pos, 1); + error(Diagnostics.The_Unicode_u_flag_and_the_Unicode_Sets_v_flag_cannot_be_set_simultaneously, pos, size); } else { regExpFlags |= flag; - checkRegularExpressionFlagAvailable(flag, pos); + checkRegularExpressionFlagAvailability(flag, size); } } - pos++; + pos += size; } if (reportErrors) { scanRange(startOfRegExpBody, endOfRegExpBody - startOfRegExpBody, () => { @@ -2843,25 +2844,26 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean function scanPatternModifiers(currFlags: RegularExpressionFlags): RegularExpressionFlags { while (true) { - const ch = charCodeChecked(pos); + const ch = codePointChecked(pos); if (ch === CharacterCodes.EOF || !isIdentifierPart(ch, languageVersion)) { break; } - const flag = characterToRegularExpressionFlag(String.fromCharCode(ch)); + const size = charSize(ch); + const flag = characterCodeToRegularExpressionFlag(ch); if (flag === undefined) { - error(Diagnostics.Unknown_regular_expression_flag, pos, 1); + error(Diagnostics.Unknown_regular_expression_flag, pos, size); } else if (currFlags & flag) { - error(Diagnostics.Duplicate_regular_expression_flag, pos, 1); + error(Diagnostics.Duplicate_regular_expression_flag, pos, size); } else if (!(flag & RegularExpressionFlags.Modifiers)) { - error(Diagnostics.This_regular_expression_flag_cannot_be_toggled_within_a_subpattern, pos, 1); + error(Diagnostics.This_regular_expression_flag_cannot_be_toggled_within_a_subpattern, pos, size); } else { currFlags |= flag; - checkRegularExpressionFlagAvailable(flag, pos); + checkRegularExpressionFlagAvailability(flag, size); } - pos++; + pos += size; } return currFlags; } @@ -3583,10 +3585,10 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean }); } - function checkRegularExpressionFlagAvailable(flag: RegularExpressionFlags, pos: number) { + function checkRegularExpressionFlagAvailability(flag: RegularExpressionFlags, size: number) { const availableFrom = regExpFlagToFirstAvailableLanguageVersion.get(flag) as ScriptTarget | undefined; if (availableFrom && languageVersion < availableFrom) { - error(Diagnostics.This_regular_expression_flag_is_only_available_when_targeting_0_or_later, pos, 1, getNameOfScriptTarget(availableFrom)); + error(Diagnostics.This_regular_expression_flag_is_only_available_when_targeting_0_or_later, pos, size, getNameOfScriptTarget(availableFrom)); } } diff --git a/tests/baselines/reference/regularExpressionWithNonBMPFlags.errors.txt b/tests/baselines/reference/regularExpressionWithNonBMPFlags.errors.txt new file mode 100644 index 0000000000000..9ad9c43caa0b7 --- /dev/null +++ b/tests/baselines/reference/regularExpressionWithNonBMPFlags.errors.txt @@ -0,0 +1,29 @@ +regularExpressionWithNonBMPFlags.ts(7,23): error TS1499: Unknown regular expression flag. +regularExpressionWithNonBMPFlags.ts(7,25): error TS1499: Unknown regular expression flag. +regularExpressionWithNonBMPFlags.ts(7,28): error TS1499: Unknown regular expression flag. +regularExpressionWithNonBMPFlags.ts(7,41): error TS1499: Unknown regular expression flag. +regularExpressionWithNonBMPFlags.ts(7,43): error TS1499: Unknown regular expression flag. +regularExpressionWithNonBMPFlags.ts(7,45): error TS1499: Unknown regular expression flag. + + +==== regularExpressionWithNonBMPFlags.ts (6 errors) ==== + // The characters in the following regular expression are ASCII-lookalike characters found in Unicode, including: + // - 𝘴 (U+1D634 Mathematical Sans-Serif Italic Small S) + // - 𝘪 (U+1D62A Mathematical Sans-Serif Italic Small I) + // - 𝘮 (U+1D62E Mathematical Sans-Serif Italic Small M) + // + // See https://en.wikipedia.org/wiki/Mathematical_Alphanumeric_Symbols + const 𝘳𝘦𝘨𝘦𝘹 = /(?𝘴𝘪-𝘮:^𝘧𝘰𝘰.)/𝘨𝘮𝘶; + ~~ +!!! error TS1499: Unknown regular expression flag. + ~~ +!!! error TS1499: Unknown regular expression flag. + ~~ +!!! error TS1499: Unknown regular expression flag. + ~~ +!!! error TS1499: Unknown regular expression flag. + ~~ +!!! error TS1499: Unknown regular expression flag. + ~~ +!!! error TS1499: Unknown regular expression flag. + \ No newline at end of file diff --git a/tests/baselines/reference/regularExpressionWithNonBMPFlags.js b/tests/baselines/reference/regularExpressionWithNonBMPFlags.js new file mode 100644 index 0000000000000..a6581efa2827e --- /dev/null +++ b/tests/baselines/reference/regularExpressionWithNonBMPFlags.js @@ -0,0 +1,20 @@ +//// [tests/cases/compiler/regularExpressionWithNonBMPFlags.ts] //// + +//// [regularExpressionWithNonBMPFlags.ts] +// The characters in the following regular expression are ASCII-lookalike characters found in Unicode, including: +// - 𝘴 (U+1D634 Mathematical Sans-Serif Italic Small S) +// - 𝘪 (U+1D62A Mathematical Sans-Serif Italic Small I) +// - 𝘮 (U+1D62E Mathematical Sans-Serif Italic Small M) +// +// See https://en.wikipedia.org/wiki/Mathematical_Alphanumeric_Symbols +const 𝘳𝘦𝘨𝘦𝘹 = /(?𝘴𝘪-𝘮:^𝘧𝘰𝘰.)/𝘨𝘮𝘶; + + +//// [regularExpressionWithNonBMPFlags.js] +// The characters in the following regular expression are ASCII-lookalike characters found in Unicode, including: +// - 𝘴 (U+1D634 Mathematical Sans-Serif Italic Small S) +// - 𝘪 (U+1D62A Mathematical Sans-Serif Italic Small I) +// - 𝘮 (U+1D62E Mathematical Sans-Serif Italic Small M) +// +// See https://en.wikipedia.org/wiki/Mathematical_Alphanumeric_Symbols +const 𝘳𝘦𝘨𝘦𝘹 = /(?𝘴𝘪-𝘮:^𝘧𝘰𝘰.)/𝘨𝘮𝘶; diff --git a/tests/baselines/reference/regularExpressionWithNonBMPFlags.symbols b/tests/baselines/reference/regularExpressionWithNonBMPFlags.symbols new file mode 100644 index 0000000000000..af3ec681f3252 --- /dev/null +++ b/tests/baselines/reference/regularExpressionWithNonBMPFlags.symbols @@ -0,0 +1,12 @@ +//// [tests/cases/compiler/regularExpressionWithNonBMPFlags.ts] //// + +=== regularExpressionWithNonBMPFlags.ts === +// The characters in the following regular expression are ASCII-lookalike characters found in Unicode, including: +// - 𝘴 (U+1D634 Mathematical Sans-Serif Italic Small S) +// - 𝘪 (U+1D62A Mathematical Sans-Serif Italic Small I) +// - 𝘮 (U+1D62E Mathematical Sans-Serif Italic Small M) +// +// See https://en.wikipedia.org/wiki/Mathematical_Alphanumeric_Symbols +const 𝘳𝘦𝘨𝘦𝘹 = /(?𝘴𝘪-𝘮:^𝘧𝘰𝘰.)/𝘨𝘮𝘶; +>𝘳𝘦𝘨𝘦𝘹 : Symbol(𝘳𝘦𝘨𝘦𝘹, Decl(regularExpressionWithNonBMPFlags.ts, 6, 5)) + diff --git a/tests/baselines/reference/regularExpressionWithNonBMPFlags.types b/tests/baselines/reference/regularExpressionWithNonBMPFlags.types new file mode 100644 index 0000000000000..cffacb8589261 --- /dev/null +++ b/tests/baselines/reference/regularExpressionWithNonBMPFlags.types @@ -0,0 +1,15 @@ +//// [tests/cases/compiler/regularExpressionWithNonBMPFlags.ts] //// + +=== regularExpressionWithNonBMPFlags.ts === +// The characters in the following regular expression are ASCII-lookalike characters found in Unicode, including: +// - 𝘴 (U+1D634 Mathematical Sans-Serif Italic Small S) +// - 𝘪 (U+1D62A Mathematical Sans-Serif Italic Small I) +// - 𝘮 (U+1D62E Mathematical Sans-Serif Italic Small M) +// +// See https://en.wikipedia.org/wiki/Mathematical_Alphanumeric_Symbols +const 𝘳𝘦𝘨𝘦𝘹 = /(?𝘴𝘪-𝘮:^𝘧𝘰𝘰.)/𝘨𝘮𝘶; +>𝘳𝘦𝘨𝘦𝘹 : RegExp +> : ^^^^^^ +>/(?𝘴𝘪-𝘮:^𝘧𝘰𝘰.)/𝘨𝘮𝘶 : RegExp +> : ^^^^^^ + diff --git a/tests/cases/compiler/regularExpressionWithNonBMPFlags.ts b/tests/cases/compiler/regularExpressionWithNonBMPFlags.ts new file mode 100644 index 0000000000000..85ffde8d8fcb5 --- /dev/null +++ b/tests/cases/compiler/regularExpressionWithNonBMPFlags.ts @@ -0,0 +1,9 @@ +// @target: esnext + +// The characters in the following regular expression are ASCII-lookalike characters found in Unicode, including: +// - 𝘴 (U+1D634 Mathematical Sans-Serif Italic Small S) +// - 𝘪 (U+1D62A Mathematical Sans-Serif Italic Small I) +// - 𝘮 (U+1D62E Mathematical Sans-Serif Italic Small M) +// +// See https://en.wikipedia.org/wiki/Mathematical_Alphanumeric_Symbols +const 𝘳𝘦𝘨𝘦𝘹 = /(?𝘴𝘪-𝘮:^𝘧𝘰𝘰.)/𝘨𝘮𝘶;