diff --git a/src/index.ts b/src/index.ts index ddc799e2ba..1f80cf51f3 100644 --- a/src/index.ts +++ b/src/index.ts @@ -230,7 +230,6 @@ export { printSourceLocation, // Lex Lexer, - SchemaCoordinateLexer, TokenKind, // Parse parse, @@ -262,7 +261,6 @@ export { export type { ParseOptions, - ParseSchemaCoordinateOptions, SourceLocation, // Visitor utilities ASTVisitor, diff --git a/src/language/__tests__/lexer-test.ts b/src/language/__tests__/lexer-test.ts index 433d3c4181..85603dfaaa 100644 --- a/src/language/__tests__/lexer-test.ts +++ b/src/language/__tests__/lexer-test.ts @@ -9,11 +9,7 @@ import { inspect } from '../../jsutils/inspect.js'; import { GraphQLError } from '../../error/GraphQLError.js'; import type { Token } from '../ast.js'; -import { - isPunctuatorTokenKind, - Lexer, - SchemaCoordinateLexer, -} from '../lexer.js'; +import { isPunctuatorTokenKind, Lexer } from '../lexer.js'; import { Source } from '../source.js'; import { TokenKind } from '../tokenKind.js'; @@ -170,8 +166,8 @@ describe('Lexer', () => { }); it('reports unexpected characters', () => { - expectSyntaxError('^').to.deep.equal({ - message: 'Syntax Error: Unexpected character: "^".', + expectSyntaxError('.').to.deep.equal({ + message: 'Syntax Error: Unexpected character: ".".', locations: [{ line: 1, column: 1 }], }); }); @@ -969,13 +965,6 @@ describe('Lexer', () => { value: undefined, }); - expect(lexOne('.')).to.contain({ - kind: TokenKind.DOT, - start: 0, - end: 1, - value: undefined, - }); - expect(lexOne('...')).to.contain({ kind: TokenKind.SPREAD, start: 0, @@ -1193,33 +1182,6 @@ describe('Lexer', () => { }); }); -describe('SchemaCoordinateLexer', () => { - it('can be stringified', () => { - const lexer = new SchemaCoordinateLexer(new Source('Name.field')); - expect(Object.prototype.toString.call(lexer)).to.equal( - '[object SchemaCoordinateLexer]', - ); - }); - - it('tracks a schema coordinate', () => { - const lexer = new SchemaCoordinateLexer(new Source('Name.field')); - expect(lexer.advance()).to.contain({ - kind: TokenKind.NAME, - start: 0, - end: 4, - value: 'Name', - }); - }); - - it('forbids ignored tokens', () => { - const lexer = new SchemaCoordinateLexer(new Source('\nName.field')); - expectToThrowJSON(() => lexer.advance()).to.deep.equal({ - message: 'Syntax Error: Invalid character: U+000A.', - locations: [{ line: 1, column: 1 }], - }); - }); -}); - describe('isPunctuatorTokenKind', () => { function isPunctuatorToken(text: string) { return isPunctuatorTokenKind(lexOne(text).kind); diff --git a/src/language/__tests__/parser-test.ts b/src/language/__tests__/parser-test.ts index e8dd914f71..2ca4c86216 100644 --- a/src/language/__tests__/parser-test.ts +++ b/src/language/__tests__/parser-test.ts @@ -722,7 +722,7 @@ describe('Parser', () => { expect(() => parseSchemaCoordinate('MyType.field.deep')) .to.throw() .to.deep.include({ - message: 'Syntax Error: Expected , found ".".', + message: 'Syntax Error: Expected , found ..', locations: [{ line: 1, column: 13 }], }); }); @@ -751,10 +751,10 @@ describe('Parser', () => { }); it('rejects Name . Name ( Name : Name )', () => { - expect(() => parseSchemaCoordinate('MyType.field(arg:value)')) + expect(() => parseSchemaCoordinate('MyType.field(arg: value)')) .to.throw() .to.deep.include({ - message: 'Syntax Error: Expected ")", found Name "value".', + message: 'Syntax Error: Invalid character: " ".', locations: [{ line: 1, column: 18 }], }); }); @@ -794,9 +794,15 @@ describe('Parser', () => { expect(() => parseSchemaCoordinate('@myDirective.field')) .to.throw() .to.deep.include({ - message: 'Syntax Error: Expected , found ".".', + message: 'Syntax Error: Expected , found ..', locations: [{ line: 1, column: 13 }], }); }); + + it('accepts a Source object', () => { + expect(parseSchemaCoordinate('MyType')).to.deep.equal( + parseSchemaCoordinate(new Source('MyType')), + ); + }); }); }); diff --git a/src/language/__tests__/schemaCoordinateLexer-test.ts b/src/language/__tests__/schemaCoordinateLexer-test.ts new file mode 100644 index 0000000000..1851e227f1 --- /dev/null +++ b/src/language/__tests__/schemaCoordinateLexer-test.ts @@ -0,0 +1,52 @@ +import { expect } from 'chai'; +import { describe, it } from 'mocha'; + +import { expectToThrowJSON } from '../../__testUtils__/expectJSON.js'; + +import { SchemaCoordinateLexer } from '../schemaCoordinateLexer.js'; +import { Source } from '../source.js'; +import { TokenKind } from '../tokenKind.js'; + +function lexSecond(str: string) { + const lexer = new SchemaCoordinateLexer(new Source(str)); + lexer.advance(); + return lexer.advance(); +} + +function expectSyntaxError(text: string) { + return expectToThrowJSON(() => lexSecond(text)); +} + +describe('SchemaCoordinateLexer', () => { + it('can be stringified', () => { + const lexer = new SchemaCoordinateLexer(new Source('Name.field')); + expect(Object.prototype.toString.call(lexer)).to.equal( + '[object SchemaCoordinateLexer]', + ); + }); + + it('tracks a schema coordinate', () => { + const lexer = new SchemaCoordinateLexer(new Source('Name.field')); + expect(lexer.advance()).to.contain({ + kind: TokenKind.NAME, + start: 0, + end: 4, + value: 'Name', + }); + }); + + it('forbids ignored tokens', () => { + const lexer = new SchemaCoordinateLexer(new Source('\nName.field')); + expectToThrowJSON(() => lexer.advance()).to.deep.equal({ + message: 'Syntax Error: Invalid character: U+000A.', + locations: [{ line: 1, column: 1 }], + }); + }); + + it('lex reports a useful syntax errors', () => { + expectSyntaxError('Foo .bar').to.deep.equal({ + message: 'Syntax Error: Invalid character: " ".', + locations: [{ line: 1, column: 4 }], + }); + }); +}); diff --git a/src/language/index.ts b/src/language/index.ts index 1f2eff6bb7..c5620b4948 100644 --- a/src/language/index.ts +++ b/src/language/index.ts @@ -11,7 +11,7 @@ export { Kind } from './kinds.js'; export { TokenKind } from './tokenKind.js'; -export { Lexer, SchemaCoordinateLexer } from './lexer.js'; +export { Lexer } from './lexer.js'; export { parse, @@ -20,7 +20,7 @@ export { parseType, parseSchemaCoordinate, } from './parser.js'; -export type { ParseOptions, ParseSchemaCoordinateOptions } from './parser.js'; +export type { ParseOptions } from './parser.js'; export { print } from './printer.js'; diff --git a/src/language/lexer.ts b/src/language/lexer.ts index 4a2228e285..3709636e58 100644 --- a/src/language/lexer.ts +++ b/src/language/lexer.ts @@ -6,6 +6,21 @@ import { isDigit, isNameContinue, isNameStart } from './characterClasses.js'; import type { Source } from './source.js'; import { TokenKind } from './tokenKind.js'; +/** + * Parser supports parsing multiple Source types, which may have differing + * Lexer classes. This is used for schema coordinates which has its own distinct + * SchemaCoordinateLexer class. + */ +export interface LexerInterface { + source: Source; + lastToken: Token; + token: Token; + line: number; + lineStart: number; + advance: () => Token; + lookahead: () => Token; +} + /** * Given a Source object, creates a Lexer for that source. * A Lexer is a stateful stream generator in that every time @@ -14,7 +29,7 @@ import { TokenKind } from './tokenKind.js'; * EOF, after which the lexer will repeatedly return the same EOF token * whenever called. */ -export class Lexer { +export class Lexer implements LexerInterface { source: Source; /** @@ -83,27 +98,6 @@ export class Lexer { } return token; } - - validateIgnoredToken(_position: number): void { - /* noop - ignored tokens are ignored */ - } -} - -/** - * As `Lexer`, but forbids ignored tokens as required of schema coordinates. - */ -export class SchemaCoordinateLexer extends Lexer { - override get [Symbol.toStringTag]() { - return 'SchemaCoordinateLexer'; - } - - override validateIgnoredToken(position: number): void { - throw syntaxError( - this.source, - position, - `Invalid character: ${printCodePointAt(this, position)}.`, - ); - } } /** @@ -116,7 +110,6 @@ export function isPunctuatorTokenKind(kind: TokenKind): boolean { kind === TokenKind.AMP || kind === TokenKind.PAREN_L || kind === TokenKind.PAREN_R || - kind === TokenKind.DOT || kind === TokenKind.SPREAD || kind === TokenKind.COLON || kind === TokenKind.EQUALS || @@ -172,8 +165,13 @@ function isTrailingSurrogate(code: number): boolean { * * Printable ASCII is printed quoted, while other points are printed in Unicode * code point form (ie. U+1234). + * + * @internal */ -function printCodePointAt(lexer: Lexer, location: number): string { +export function printCodePointAt( + lexer: LexerInterface, + location: number, +): string { const code = lexer.source.body.codePointAt(location); if (code === undefined) { @@ -190,9 +188,11 @@ function printCodePointAt(lexer: Lexer, location: number): string { /** * Create a token with line and column location information. + * + * @internal */ -function createToken( - lexer: Lexer, +export function createToken( + lexer: LexerInterface, kind: TokenKind, start: number, end: number, @@ -238,7 +238,6 @@ function readNextToken(lexer: Lexer, start: number): Token { case 0x0009: // \t case 0x0020: // case 0x002c: // , - lexer.validateIgnoredToken(position); ++position; continue; // LineTerminator :: @@ -246,13 +245,11 @@ function readNextToken(lexer: Lexer, start: number): Token { // - "Carriage Return (U+000D)" [lookahead != "New Line (U+000A)"] // - "Carriage Return (U+000D)" "New Line (U+000A)" case 0x000a: // \n - lexer.validateIgnoredToken(position); ++position; ++lexer.line; lexer.lineStart = position; continue; case 0x000d: // \r - lexer.validateIgnoredToken(position); if (body.charCodeAt(position + 1) === 0x000a) { position += 2; } else { @@ -263,7 +260,6 @@ function readNextToken(lexer: Lexer, start: number): Token { continue; // Comment case 0x0023: // # - lexer.validateIgnoredToken(position); return readComment(lexer, position); // Token :: // - Punctuator @@ -272,11 +268,7 @@ function readNextToken(lexer: Lexer, start: number): Token { // - FloatValue // - StringValue // - // Punctuator :: - // - DotPunctuator - // - OtherPunctuator - // - // OtherPunctuator :: one of ! $ & ( ) ... : = @ [ ] { | } + // Punctuator :: one of ! $ & ( ) ... : = @ [ ] { | } case 0x0021: // ! return createToken(lexer, TokenKind.BANG, position, position + 1); case 0x0024: // $ @@ -293,7 +285,24 @@ function readNextToken(lexer: Lexer, start: number): Token { if (nextCode === 0x002e && body.charCodeAt(position + 2) === 0x002e) { return createToken(lexer, TokenKind.SPREAD, position, position + 3); } - return readDot(lexer, position); + if (nextCode === 0x002e) { + throw syntaxError( + lexer.source, + position, + 'Unexpected "..", did you mean "..."?', + ); + } else if (isDigit(nextCode)) { + const digits = lexer.source.body.slice( + position + 1, + readDigits(lexer, position + 1, nextCode), + ); + throw syntaxError( + lexer.source, + position, + `Invalid number, expected digit before ".", did you mean "0.${digits}"?`, + ); + } + break; } case 0x003a: // : return createToken(lexer, TokenKind.COLON, position, position + 1); @@ -346,35 +355,6 @@ function readNextToken(lexer: Lexer, start: number): Token { return createToken(lexer, TokenKind.EOF, bodyLength, bodyLength); } -/** - * Reads a dot token with helpful messages for negative lookahead. - * - * DotPunctuator :: `.` [lookahead != {`.`, Digit}] - */ -function readDot(lexer: Lexer, start: number): Token { - const nextCode = lexer.source.body.charCodeAt(start + 1); - // Full Stop (.) - if (nextCode === 0x002e) { - throw syntaxError( - lexer.source, - start, - 'Unexpected "..", did you mean "..."?', - ); - } - if (isDigit(nextCode)) { - const digits = lexer.source.body.slice( - start + 1, - readDigits(lexer, start + 1, nextCode), - ); - throw syntaxError( - lexer.source, - start, - `Invalid number, expected digit before ".", did you mean "0.${digits}"?`, - ); - } - return createToken(lexer, TokenKind.DOT, start, start + 1); -} - /** * Reads a comment token from the source file. * @@ -888,8 +868,10 @@ function readBlockString(lexer: Lexer, start: number): Token { * Name :: * - NameStart NameContinue* [lookahead != NameContinue] * ``` + * + * @internal */ -function readName(lexer: Lexer, start: number): Token { +export function readName(lexer: LexerInterface, start: number): Token { const body = lexer.source.body; const bodyLength = body.length; let position = start + 1; diff --git a/src/language/parser.ts b/src/language/parser.ts index 5cf3e14d21..369ec2bb02 100644 --- a/src/language/parser.ts +++ b/src/language/parser.ts @@ -70,11 +70,9 @@ import type { import { Location, OperationTypeNode } from './ast.js'; import { DirectiveLocation } from './directiveLocation.js'; import { Kind } from './kinds.js'; -import { - isPunctuatorTokenKind, - Lexer, - SchemaCoordinateLexer, -} from './lexer.js'; +import type { LexerInterface } from './lexer.js'; +import { isPunctuatorTokenKind, Lexer } from './lexer.js'; +import { SchemaCoordinateLexer } from './schemaCoordinateLexer.js'; import { isSource, Source } from './source.js'; import { TokenKind } from './tokenKind.js'; @@ -121,21 +119,9 @@ export interface ParseOptions { /** * You may override the Lexer class used to lex the source; this is used by - * schema coordinates to introduce a lexer that forbids ignored tokens. + * schema coordinates to introduce a lexer with a restricted syntax. */ - Lexer?: typeof Lexer | undefined; -} - -/** - * Configuration options to control schema coordinate parser behavior - */ -export interface ParseSchemaCoordinateOptions { - /** - * By default, the parser creates AST nodes that know the location - * in the source that they correspond to. This configuration flag - * disables that behavior for performance or testing. - */ - noLocation?: boolean | undefined; + lexer?: LexerInterface | undefined; } /** @@ -221,13 +207,10 @@ export function parseType( */ export function parseSchemaCoordinate( source: string | Source, - options?: ParseSchemaCoordinateOptions, ): SchemaCoordinateNode { - // Ignored tokens are excluded syntax for a Schema Coordinate. - const parser = new Parser(source, { - ...options, - Lexer: SchemaCoordinateLexer, - }); + const sourceObj = isSource(source) ? source : new Source(source); + const lexer = new SchemaCoordinateLexer(sourceObj); + const parser = new Parser(source, { lexer }); parser.expectToken(TokenKind.SOF); const coordinate = parser.parseSchemaCoordinate(); parser.expectToken(TokenKind.EOF); @@ -246,16 +229,21 @@ export function parseSchemaCoordinate( * @internal */ export class Parser { - protected _options: ParseOptions; - protected _lexer: Lexer; + protected _options: Omit; + protected _lexer: LexerInterface; protected _tokenCounter: number; constructor(source: string | Source, options: ParseOptions = {}) { - const sourceObj = isSource(source) ? source : new Source(source); + const { lexer, ..._options } = options; + + if (lexer) { + this._lexer = lexer; + } else { + const sourceObj = isSource(source) ? source : new Source(source); + this._lexer = new Lexer(sourceObj); + } - const LexerClass = options.Lexer ?? Lexer; - this._lexer = new LexerClass(sourceObj); - this._options = options; + this._options = _options; this._tokenCounter = 0; } diff --git a/src/language/schemaCoordinateLexer.ts b/src/language/schemaCoordinateLexer.ts new file mode 100644 index 0000000000..06be90ec0f --- /dev/null +++ b/src/language/schemaCoordinateLexer.ts @@ -0,0 +1,124 @@ +import { syntaxError } from '../error/syntaxError.js'; + +import { Token } from './ast.js'; +import { isNameStart } from './characterClasses.js'; +import type { LexerInterface } from './lexer.js'; +import { createToken, printCodePointAt, readName } from './lexer.js'; +import type { Source } from './source.js'; +import { TokenKind } from './tokenKind.js'; + +/** + * Given a Source schema coordinate, creates a Lexer for that source. + * A SchemaCoordinateLexer is a stateful stream generator in that every time + * it is advanced, it returns the next token in the Source. Assuming the + * source lexes, the final Token emitted by the lexer will be of kind + * EOF, after which the lexer will repeatedly return the same EOF token + * whenever called. + */ +export class SchemaCoordinateLexer implements LexerInterface { + source: Source; + + /** + * The previously focused non-ignored token. + */ + lastToken: Token; + + /** + * The currently focused non-ignored token. + */ + token: Token; + + /** + * The (1-indexed) line containing the current token. + * Since a schema coordinate may not contain newline, this value is always 1. + */ + line: 1 = 1 as const; + + /** + * The character offset at which the current line begins. + * Since a schema coordinate may not contain newline, this value is always 0. + */ + lineStart: 0 = 0 as const; + + constructor(source: Source) { + const startOfFileToken = new Token(TokenKind.SOF, 0, 0, 0, 0); + + this.source = source; + this.lastToken = startOfFileToken; + this.token = startOfFileToken; + } + + get [Symbol.toStringTag]() { + return 'SchemaCoordinateLexer'; + } + + /** + * Advances the token stream to the next non-ignored token. + */ + advance(): Token { + this.lastToken = this.token; + const token = (this.token = this.lookahead()); + return token; + } + + /** + * Looks ahead and returns the next non-ignored token, but does not change + * the current Lexer token. + */ + lookahead(): Token { + let token = this.token; + if (token.kind !== TokenKind.EOF) { + // Read the next token and form a link in the token linked-list. + const nextToken = readNextToken(this, token.end); + // @ts-expect-error next is only mutable during parsing. + token.next = nextToken; + // @ts-expect-error prev is only mutable during parsing. + nextToken.prev = token; + token = nextToken; + } + return token; + } +} + +/** + * Gets the next token from the source starting at the given position. + * + * This skips over whitespace until it finds the next lexable token, then lexes + * punctuators immediately or calls the appropriate helper function for more + * complicated tokens. + */ +function readNextToken(lexer: SchemaCoordinateLexer, start: number): Token { + const body = lexer.source.body; + const bodyLength = body.length; + const position = start; + + if (position < bodyLength) { + const code = body.charCodeAt(position); + + switch (code) { + case 0x002e: // . + return createToken(lexer, TokenKind.DOT, position, position + 1); + case 0x0028: // ( + return createToken(lexer, TokenKind.PAREN_L, position, position + 1); + case 0x0029: // ) + return createToken(lexer, TokenKind.PAREN_R, position, position + 1); + case 0x003a: // : + return createToken(lexer, TokenKind.COLON, position, position + 1); + case 0x0040: // @ + return createToken(lexer, TokenKind.AT, position, position + 1); + } + + // Name + if (isNameStart(code)) { + return readName(lexer, position); + } + + throw syntaxError( + lexer.source, + position, + `Invalid character: ${printCodePointAt(lexer, position)}.`, + ); + } + + return createToken(lexer, TokenKind.EOF, bodyLength, bodyLength); +}