diff --git a/src/language/__tests__/lexer.js b/src/language/__tests__/lexer.js index 03ef3dbaa6..30d247a1aa 100644 --- a/src/language/__tests__/lexer.js +++ b/src/language/__tests__/lexer.js @@ -22,6 +22,25 @@ function lexErr(str) { describe('Lexer', () => { + it('disallows uncommon control characters', () => { + + expect(lexErr('\u0007') + ).to.throw( + 'Syntax Error GraphQL (1:1) Invalid character "\\u0007"' + ); + + }); + + it('accepts BOM header', () => { + expect(lexOne('\uFEFF foo') + ).to.deep.equal({ + kind: TokenKind.NAME, + start: 2, + end: 5, + value: 'foo' + }); + }); + it('skips whitespace', () => { expect(lexOne(` @@ -136,53 +155,75 @@ describe('Lexer', () => { it('lex reports useful string errors', () => { + expect( + lexErr('"') + ).to.throw('Syntax Error GraphQL (1:2) Unterminated string'); + expect( lexErr('"no end quote') ).to.throw('Syntax Error GraphQL (1:14) Unterminated string'); expect( - lexErr('"multi\nline"') - ).to.throw('Syntax Error GraphQL (1:7) Unterminated string'); + lexErr('"contains unescaped \u0007 control char"') + ).to.throw( + 'Syntax Error GraphQL (1:21) Invalid character within String: "\\u0007".' + ); expect( - lexErr('"multi\rline"') - ).to.throw('Syntax Error GraphQL (1:7) Unterminated string'); + lexErr('"null-byte is not \u0000 end of file"') + ).to.throw( + 'Syntax Error GraphQL (1:19) Invalid character within String: "\\u0000".' + ); expect( - lexErr('"multi\u2028line"') + lexErr('"multi\nline"') ).to.throw('Syntax Error GraphQL (1:7) Unterminated string'); expect( - lexErr('"multi\u2029line"') + lexErr('"multi\rline"') ).to.throw('Syntax Error GraphQL (1:7) Unterminated string'); expect( lexErr('"bad \\z esc"') - ).to.throw('Syntax Error GraphQL (1:7) Bad character escape sequence'); + ).to.throw( + 'Syntax Error GraphQL (1:7) Invalid character escape sequence: \\z.' + ); expect( lexErr('"bad \\x esc"') - ).to.throw('Syntax Error GraphQL (1:7) Bad character escape sequence'); + ).to.throw( + 'Syntax Error GraphQL (1:7) Invalid character escape sequence: \\x.' + ); expect( lexErr('"bad \\u1 esc"') - ).to.throw('Syntax Error GraphQL (1:7) Bad character escape sequence'); + ).to.throw( + 'Syntax Error GraphQL (1:7) Invalid character escape sequence: \\u1 es.' + ); expect( lexErr('"bad \\u0XX1 esc"') - ).to.throw('Syntax Error GraphQL (1:7) Bad character escape sequence'); + ).to.throw( + 'Syntax Error GraphQL (1:7) Invalid character escape sequence: \\u0XX1.' + ); expect( lexErr('"bad \\uXXXX esc"') - ).to.throw('Syntax Error GraphQL (1:7) Bad character escape sequence'); + ).to.throw( + 'Syntax Error GraphQL (1:7) Invalid character escape sequence: \\uXXXX.' + ); expect( lexErr('"bad \\uFXXX esc"') - ).to.throw('Syntax Error GraphQL (1:7) Bad character escape sequence'); + ).to.throw( + 'Syntax Error GraphQL (1:7) Invalid character escape sequence: \\uFXXX.' + ); expect( lexErr('"bad \\uXXXF esc"') - ).to.throw('Syntax Error GraphQL (1:7) Bad character escape sequence'); + ).to.throw( + 'Syntax Error GraphQL (1:7) Invalid character escape sequence: \\uXXXF.' + ); }); it('lexes numbers', () => { diff --git a/src/language/lexer.js b/src/language/lexer.js index 5fcf265941..0f6024831b 100644 --- a/src/language/lexer.js +++ b/src/language/lexer.js @@ -110,7 +110,6 @@ tokenDescription[TokenKind.FLOAT] = 'Float'; tokenDescription[TokenKind.STRING] = 'String'; var charCodeAt = String.prototype.charCodeAt; -var fromCharCode = String.fromCharCode; var slice = String.prototype.slice; /** @@ -125,6 +124,10 @@ function makeToken( return { kind, start, end, value }; } +function printCharCode(code) { + return isNaN(code) ? 'EOF' : JSON.stringify(String.fromCharCode(code)); +} + /** * Gets the next token from the source starting at the given position. * @@ -137,12 +140,22 @@ function readToken(source: Source, fromPosition: number): Token { var bodyLength = body.length; var position = positionAfterWhitespace(body, fromPosition); - var code = charCodeAt.call(body, position); if (position >= bodyLength) { return makeToken(TokenKind.EOF, position, position); } + var code = charCodeAt.call(body, position); + + // SourceCharacter + if (code < 0x0020 && code !== 0x0009 && code !== 0x000A && code !== 0x000D) { + throw syntaxError( + source, + position, + `Invalid character ${printCharCode(code)}.` + ); + } + switch (code) { // ! case 33: return makeToken(TokenKind.BANG, position, position + 1); @@ -201,7 +214,7 @@ function readToken(source: Source, fromPosition: number): Token { throw syntaxError( source, position, - `Unexpected character "${fromCharCode(code)}".` + `Unexpected character ${printCharCode(code)}.` ); } @@ -215,14 +228,18 @@ function positionAfterWhitespace(body: string, startPosition: number): number { var position = startPosition; while (position < bodyLength) { var code = charCodeAt.call(body, position); - // Skip whitespace + // Skip Ignored if ( - code === 32 || // space - code === 44 || // comma - code === 160 || // '\xa0' - code === 0x2028 || // line separator - code === 0x2029 || // paragraph separator - code > 8 && code < 14 // whitespace + // BOM + code === 0xFEFF || + // White Space + code === 0x0009 || // tab + code === 0x0020 || // space + // Line Terminator + code === 0x000A || // new line + code === 0x000D || // carriage return + // Comma + code === 0x002C ) { ++position; // Skip comments @@ -230,8 +247,9 @@ function positionAfterWhitespace(body: string, startPosition: number): number { ++position; while ( position < bodyLength && - (code = charCodeAt.call(body, position)) && - code !== 10 && code !== 13 && code !== 0x2028 && code !== 0x2029 + (code = charCodeAt.call(body, position)) !== null && + // SourceCharacter but not LineTerminator + (code > 0x001F || code === 0x0009) && code !== 0x000A && code !== 0x000D ) { ++position; } @@ -265,7 +283,7 @@ function readNumber(source, start, firstCode) { throw syntaxError( source, position, - `Invalid number, unexpected digit after 0: "${fromCharCode(code)}".` + `Invalid number, unexpected digit after 0: ${printCharCode(code)}.` ); } } else { @@ -315,8 +333,7 @@ function readDigits(source, start, firstCode) { throw syntaxError( source, position, - 'Invalid number, expected digit but got: ' + - (code ? `"${fromCharCode(code)}"` : 'EOF') + '.' + `Invalid number, expected digit but got: ${printCharCode(code)}.` ); } @@ -329,15 +346,26 @@ function readString(source, start) { var body = source.body; var position = start + 1; var chunkStart = position; - var code; + var code = 0; var value = ''; while ( position < body.length && - (code = charCodeAt.call(body, position)) && - code !== 34 && - code !== 10 && code !== 13 && code !== 0x2028 && code !== 0x2029 + (code = charCodeAt.call(body, position)) !== null && + // not LineTerminator + code !== 0x000A && code !== 0x000D && + // not Quote (") + code !== 34 ) { + // SourceCharacter + if (code < 0x0020 && code !== 0x0009) { + throw syntaxError( + source, + position, + `Invalid character within String: ${printCharCode(code)}.` + ); + } + ++position; if (code === 92) { // \ value += slice.call(body, chunkStart, position - 1); @@ -351,7 +379,7 @@ function readString(source, start) { case 110: value += '\n'; break; case 114: value += '\r'; break; case 116: value += '\t'; break; - case 117: + case 117: // u var charCode = uniCharCode( charCodeAt.call(body, position + 1), charCodeAt.call(body, position + 2), @@ -362,17 +390,18 @@ function readString(source, start) { throw syntaxError( source, position, - 'Bad character escape sequence.' + `Invalid character escape sequence: ` + + `\\u${body.slice(position + 1, position + 5)}.` ); } - value += fromCharCode(charCode); + value += String.fromCharCode(charCode); position += 4; break; default: throw syntaxError( source, position, - 'Bad character escape sequence.' + `Invalid character escape sequence: \\${String.fromCharCode(code)}.` ); } ++position; @@ -380,7 +409,7 @@ function readString(source, start) { } } - if (code !== 34) { + if (code !== 34) { // quote (") throw syntaxError(source, position, 'Unterminated string.'); } @@ -428,10 +457,10 @@ function readName(source, position) { var body = source.body; var bodyLength = body.length; var end = position + 1; - var code; + var code = 0; while ( end !== bodyLength && - (code = charCodeAt.call(body, end)) && + (code = charCodeAt.call(body, end)) !== null && ( code === 95 || // _ code >= 48 && code <= 57 || // 0-9