Skip to content

Commit a01ac5f

Browse files
committed
Just bail on NUL or replacementCharacter
1 parent 4e80546 commit a01ac5f

File tree

1 file changed

+14
-22
lines changed

1 file changed

+14
-22
lines changed

src/compiler/scanner.ts

+14-22
Original file line numberDiff line numberDiff line change
@@ -1795,29 +1795,21 @@ export function createScanner(languageVersion: ScriptTarget, skipTrivia: boolean
17951795

17961796
const ch = codePointAt(text, pos);
17971797
if (pos === 0) {
1798-
// If a file wasn't valid text at all, it will usually be apparent at
1799-
// position 0 because UTF-8 decode will fail and produce U+FFFD.
1798+
// If a file isn't valid text at all, it will usually be apparent
1799+
// in the first few characters because we'll see a NUL or UTF-8 decode will fail and produce U+FFFD.
18001800
// If that happens, just issue one error and refuse to try to scan further;
1801-
// this is likely a binary file that cannot be parsed
1802-
let isBinary = ch === CharacterCodes.replacementCharacter;
1803-
// See if this is an MPEG Transport Stream, where every 188th byte is "G" and the rest is garbage.
1804-
if (ch === CharacterCodes.G) {
1805-
const end = Math.min(text.length, pos + 188);
1806-
let i = pos + charSize(ch);
1807-
while (i < end) {
1808-
const ch = codePointAt(text, i);
1809-
if (codePointAt(text, i) === CharacterCodes.replacementCharacter) {
1810-
isBinary = true;
1811-
break;
1812-
}
1813-
i += charSize(ch);
1814-
}
1815-
}
1816-
if (isBinary) {
1817-
// Jump to the end of the file and fail.
1818-
error(Diagnostics.File_appears_to_be_binary);
1819-
pos = end;
1820-
return token = SyntaxKind.NonTextFileMarkerTrivia;
1801+
// this is likely a binary file that cannot be parsed.
1802+
let i = 0;
1803+
const stop = Math.min(text.length, 256);
1804+
while (i < stop) {
1805+
const ch = codePointAt(text, i);
1806+
if (!ch || ch === CharacterCodes.replacementCharacter) {
1807+
// Jump to the end of the file and fail.
1808+
error(Diagnostics.File_appears_to_be_binary);
1809+
pos = end;
1810+
return token = SyntaxKind.NonTextFileMarkerTrivia;
1811+
}
1812+
i += charSize(ch);
18211813
}
18221814
// Special handling for shebang
18231815
if (ch === CharacterCodes.hash && isShebangTrivia(text, pos)) {

0 commit comments

Comments
 (0)