Skip to content

Commit 9df9079

Browse files
parser: limit maximum number of tokens (#3684)
* parser: limit maximum number of tokens Motivation: Parser CPU and memory usage is linear to the number of tokens in a document however in extreme cases it becomes quadratic due to memory exhaustion. On my mashine it happens on queries with 2k tokens. For example: ``` { a a <repeat 2k times> a } ``` It takes 741ms on my machine. But if we create document of the same size but smaller number of tokens it would be a lot faster. Example: ``` { a(arg: "a <repeat 2k times> a" } ``` Now it takes only 17ms to process, which is 43 time faster. That mean if we limit document size we should make this limit small since it take only two bytes to create a token, e.g. ` a`. But that will hart legit documents that have long tokens in them (comments, describtions, strings, long names, etc.). That's why this PR adds a mechanism to limit number of token in parsed document. Also exact same mechanism implemented in graphql-java, see: graphql-java/graphql-java#2549 I also tried alternative approach of counting nodes and it gives slightly better approximation of how many resources would be consumed. However comparing to the tokens, AST nodes is implementation detail of graphql-js so it's imposible to replicate in other implementation (e.g. to count this number on a client). * Apply suggestions from code review Co-authored-by: Yaacov Rydzinski <[email protected]> Co-authored-by: Yaacov Rydzinski <[email protected]>
1 parent 67aefd9 commit 9df9079

File tree

2 files changed

+48
-8
lines changed

2 files changed

+48
-8
lines changed

src/language/__tests__/parser-test.ts

+13
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,19 @@ describe('Parser', () => {
8888
`);
8989
});
9090

91+
it('limit maximum number of tokens', () => {
92+
expect(() => parse('{ foo }', { maxTokens: 3 })).to.not.throw();
93+
expect(() => parse('{ foo }', { maxTokens: 2 })).to.throw(
94+
'Syntax Error: Document contains more that 2 tokens. Parsing aborted.',
95+
);
96+
97+
expect(() => parse('{ foo(bar: "baz") }', { maxTokens: 8 })).to.not.throw();
98+
99+
expect(() => parse('{ foo(bar: "baz") }', { maxTokens: 7 })).to.throw(
100+
'Syntax Error: Document contains more that 7 tokens. Parsing aborted.',
101+
);
102+
});
103+
91104
it('parses variable inline values', () => {
92105
expect(() =>
93106
parse('{ field(complex: { a: { b: [ $var ] } }) }'),

src/language/parser.ts

+35-8
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,15 @@ export interface ParseOptions {
8282
*/
8383
noLocation?: boolean | undefined;
8484

85+
/**
86+
* Parser CPU and memory usage is linear to the number of tokens in a document
87+
* however in extreme cases it becomes quadratic due to memory exhaustion.
88+
* Parsing happens before validation so even invalid queries can burn lots of
89+
* CPU time and memory.
90+
* To prevent this you can set a maximum number of tokens allowed within a document.
91+
*/
92+
maxTokens?: number | undefined;
93+
8594
/**
8695
* @deprecated will be removed in the v17.0.0
8796
*
@@ -206,12 +215,14 @@ export function parseType(
206215
export class Parser {
207216
protected _options: ParseOptions;
208217
protected _lexer: Lexer;
218+
protected _tokenCounter: number;
209219

210220
constructor(source: string | Source, options: ParseOptions = {}) {
211221
const sourceObj = isSource(source) ? source : new Source(source);
212222

213223
this._lexer = new Lexer(sourceObj);
214224
this._options = options;
225+
this._tokenCounter = 0;
215226
}
216227

217228
/**
@@ -634,13 +645,13 @@ export class Parser {
634645
case TokenKind.BRACE_L:
635646
return this.parseObject(isConst);
636647
case TokenKind.INT:
637-
this._lexer.advance();
648+
this.advanceLexer();
638649
return this.node<IntValueNode>(token, {
639650
kind: Kind.INT,
640651
value: token.value,
641652
});
642653
case TokenKind.FLOAT:
643-
this._lexer.advance();
654+
this.advanceLexer();
644655
return this.node<FloatValueNode>(token, {
645656
kind: Kind.FLOAT,
646657
value: token.value,
@@ -649,7 +660,7 @@ export class Parser {
649660
case TokenKind.BLOCK_STRING:
650661
return this.parseStringLiteral();
651662
case TokenKind.NAME:
652-
this._lexer.advance();
663+
this.advanceLexer();
653664
switch (token.value) {
654665
case 'true':
655666
return this.node<BooleanValueNode>(token, {
@@ -695,7 +706,7 @@ export class Parser {
695706

696707
parseStringLiteral(): StringValueNode {
697708
const token = this._lexer.token;
698-
this._lexer.advance();
709+
this.advanceLexer();
699710
return this.node<StringValueNode>(token, {
700711
kind: Kind.STRING,
701712
value: token.value,
@@ -1479,7 +1490,7 @@ export class Parser {
14791490
expectToken(kind: TokenKind): Token {
14801491
const token = this._lexer.token;
14811492
if (token.kind === kind) {
1482-
this._lexer.advance();
1493+
this.advanceLexer();
14831494
return token;
14841495
}
14851496

@@ -1497,7 +1508,7 @@ export class Parser {
14971508
expectOptionalToken(kind: TokenKind): boolean {
14981509
const token = this._lexer.token;
14991510
if (token.kind === kind) {
1500-
this._lexer.advance();
1511+
this.advanceLexer();
15011512
return true;
15021513
}
15031514
return false;
@@ -1510,7 +1521,7 @@ export class Parser {
15101521
expectKeyword(value: string): void {
15111522
const token = this._lexer.token;
15121523
if (token.kind === TokenKind.NAME && token.value === value) {
1513-
this._lexer.advance();
1524+
this.advanceLexer();
15141525
} else {
15151526
throw syntaxError(
15161527
this._lexer.source,
@@ -1527,7 +1538,7 @@ export class Parser {
15271538
expectOptionalKeyword(value: string): boolean {
15281539
const token = this._lexer.token;
15291540
if (token.kind === TokenKind.NAME && token.value === value) {
1530-
this._lexer.advance();
1541+
this.advanceLexer();
15311542
return true;
15321543
}
15331544
return false;
@@ -1616,6 +1627,22 @@ export class Parser {
16161627
} while (this.expectOptionalToken(delimiterKind));
16171628
return nodes;
16181629
}
1630+
1631+
advanceLexer(): void {
1632+
const { maxTokens } = this._options;
1633+
const token = this._lexer.advance();
1634+
1635+
if (maxTokens !== undefined && token.kind !== TokenKind.EOF) {
1636+
++this._tokenCounter;
1637+
if (this._tokenCounter > maxTokens) {
1638+
throw syntaxError(
1639+
this._lexer.source,
1640+
token.start,
1641+
`Document contains more that ${maxTokens} tokens. Parsing aborted.`,
1642+
);
1643+
}
1644+
}
1645+
}
16191646
}
16201647

16211648
/**

0 commit comments

Comments
 (0)