@@ -83,10 +83,6 @@ type Token struct {
83
83
Value string
84
84
}
85
85
86
- func (t * Token ) String () string {
87
- return fmt .Sprintf ("%s" , tokenDescription [t .Kind ])
88
- }
89
-
90
86
type Lexer func (resetPosition int ) (Token , error )
91
87
92
88
func Lex (s * source.Source ) Lexer {
@@ -106,24 +102,28 @@ func Lex(s *source.Source) Lexer {
106
102
107
103
// Reads an alphanumeric + underscore name from the source.
108
104
// [_A-Za-z][_0-9A-Za-z]*
109
- func readName (source * source.Source , position int ) Token {
105
+ // position: Points to the byte position in the byte array
106
+ // runePosition: Points to the rune position in the byte array
107
+ func readName (source * source.Source , position , runePosition int ) Token {
110
108
body := source .Body
111
109
bodyLength := len (body )
112
- end := position + 1
110
+ endByte := position + 1
111
+ endRune := runePosition + 1
113
112
for {
114
- code , n := runeAt (body , end )
115
- if (end != bodyLength ) &&
113
+ code , _ := runeAt (body , endByte )
114
+ if (endByte != bodyLength ) &&
116
115
(code == '_' || // _
117
116
code >= '0' && code <= '9' || // 0-9
118
117
code >= 'A' && code <= 'Z' || // A-Z
119
118
code >= 'a' && code <= 'z' ) { // a-z
120
- end += n
119
+ endByte ++
120
+ endRune ++
121
121
continue
122
122
} else {
123
123
break
124
124
}
125
125
}
126
- return makeToken (TokenKind [NAME ], position , end , string (body [position :end ]))
126
+ return makeToken (TokenKind [NAME ], runePosition , endRune , string (body [position :endByte ]))
127
127
}
128
128
129
129
// Reads a number token from the source file, either a float
@@ -212,6 +212,7 @@ func readDigits(s *source.Source, start int, firstCode rune, codeLength int) (in
212
212
func readString (s * source.Source , start int ) (Token , error ) {
213
213
body := s .Body
214
214
position := start + 1
215
+ runePosition := start + 1
215
216
chunkStart := position
216
217
var code rune
217
218
var n int
@@ -226,9 +227,10 @@ func readString(s *source.Source, start int) (Token, error) {
226
227
227
228
// SourceCharacter
228
229
if code < 0x0020 && code != 0x0009 {
229
- return Token {}, gqlerrors .NewSyntaxError (s , position , fmt .Sprintf (`Invalid character within String: %v.` , printCharCode (code )))
230
+ return Token {}, gqlerrors .NewSyntaxError (s , runePosition , fmt .Sprintf (`Invalid character within String: %v.` , printCharCode (code )))
230
231
}
231
232
position += n
233
+ runePosition ++
232
234
if code == '\\' { // \
233
235
valueBuffer .Write (body [chunkStart : position - 1 ])
234
236
code , n = runeAt (body , position )
@@ -260,9 +262,9 @@ func readString(s *source.Source, start int) (Token, error) {
260
262
case 'u' :
261
263
// Check if there are at least 4 bytes available
262
264
if len (body ) <= position + 4 {
263
- return Token {}, gqlerrors .NewSyntaxError (s , position ,
265
+ return Token {}, gqlerrors .NewSyntaxError (s , runePosition ,
264
266
fmt .Sprintf ("Invalid character escape sequence: " +
265
- "\\ u%v" , body [position + 1 :]))
267
+ "\\ u%v" , string ( body [position + 1 :]) ))
266
268
}
267
269
charCode := uniCharCode (
268
270
rune (body [position + 1 ]),
@@ -271,18 +273,20 @@ func readString(s *source.Source, start int) (Token, error) {
271
273
rune (body [position + 4 ]),
272
274
)
273
275
if charCode < 0 {
274
- return Token {}, gqlerrors .NewSyntaxError (s , position ,
276
+ return Token {}, gqlerrors .NewSyntaxError (s , runePosition ,
275
277
fmt .Sprintf ("Invalid character escape sequence: " +
276
- "\\ u%v" , body [position + 1 :position + 5 ]))
278
+ "\\ u%v" , string ( body [position + 1 :position + 5 ]) ))
277
279
}
278
280
valueBuffer .WriteRune (charCode )
279
281
position += 4
282
+ runePosition += 4
280
283
break
281
284
default :
282
- return Token {}, gqlerrors .NewSyntaxError (s , position ,
285
+ return Token {}, gqlerrors .NewSyntaxError (s , runePosition ,
283
286
fmt .Sprintf (`Invalid character escape sequence: \\%c.` , code ))
284
287
}
285
288
position += n
289
+ runePosition ++
286
290
chunkStart = position
287
291
}
288
292
continue
@@ -291,7 +295,7 @@ func readString(s *source.Source, start int) (Token, error) {
291
295
}
292
296
}
293
297
if code != '"' { // quote (")
294
- return Token {}, gqlerrors .NewSyntaxError (s , position , "Unterminated string." )
298
+ return Token {}, gqlerrors .NewSyntaxError (s , runePosition , "Unterminated string." )
295
299
}
296
300
stringContent := body [chunkStart :position ]
297
301
valueBuffer .Write (stringContent )
@@ -346,15 +350,15 @@ func printCharCode(code rune) string {
346
350
func readToken (s * source.Source , fromPosition int ) (Token , error ) {
347
351
body := s .Body
348
352
bodyLength := len (body )
349
- position := positionAfterWhitespace (body , fromPosition )
353
+ position , runePosition := positionAfterWhitespace (body , fromPosition )
350
354
if position >= bodyLength {
351
355
return makeToken (TokenKind [EOF ], position , position , "" ), nil
352
356
}
353
357
code , codeLength := runeAt (body , position )
354
358
355
359
// SourceCharacter
356
360
if code < 0x0020 && code != 0x0009 && code != 0x000A && code != 0x000D {
357
- return Token {}, gqlerrors .NewSyntaxError (s , position , fmt .Sprintf (`Invalid character %v` , printCharCode (code )))
361
+ return Token {}, gqlerrors .NewSyntaxError (s , runePosition , fmt .Sprintf (`Invalid character %v` , printCharCode (code )))
358
362
}
359
363
360
364
switch code {
@@ -405,12 +409,12 @@ func readToken(s *source.Source, fromPosition int) (Token, error) {
405
409
// A-Z
406
410
case 'A' , 'B' , 'C' , 'D' , 'E' , 'F' , 'G' , 'H' , 'I' , 'J' , 'K' , 'L' , 'M' , 'N' ,
407
411
'O' , 'P' , 'Q' , 'R' , 'S' , 'T' , 'U' , 'V' , 'W' , 'X' , 'Y' , 'Z' :
408
- return readName (s , position ), nil
412
+ return readName (s , position , runePosition ), nil
409
413
// _
410
414
// a-z
411
415
case '_' , 'a' , 'b' , 'c' , 'd' , 'e' , 'f' , 'g' , 'h' , 'i' , 'j' , 'k' , 'l' , 'm' , 'n' ,
412
416
'o' , 'p' , 'q' , 'r' , 's' , 't' , 'u' , 'v' , 'w' , 'x' , 'y' , 'z' :
413
- return readName (s , position ), nil
417
+ return readName (s , position , runePosition ), nil
414
418
// -
415
419
// 0-9
416
420
case '-' , '0' , '1' , '2' , '3' , '4' , '5' , '6' , '7' , '8' , '9' :
@@ -428,12 +432,14 @@ func readToken(s *source.Source, fromPosition int) (Token, error) {
428
432
return token , nil
429
433
}
430
434
description := fmt .Sprintf ("Unexpected character %v." , printCharCode (code ))
431
- return Token {}, gqlerrors .NewSyntaxError (s , position , description )
435
+ return Token {}, gqlerrors .NewSyntaxError (s , runePosition , description )
432
436
}
433
437
438
+ // Gets the rune from the byte array at given byte position and it's width in bytes
434
439
func runeAt (body []byte , position int ) (code rune , charWidth int ) {
435
440
if len (body ) <= position {
436
- return 0 , utf8 .RuneError
441
+ // <EOF>
442
+ return - 1 , utf8 .RuneError
437
443
}
438
444
439
445
c := body [position ]
@@ -448,9 +454,11 @@ func runeAt(body []byte, position int) (code rune, charWidth int) {
448
454
// Reads from body starting at startPosition until it finds a non-whitespace
449
455
// or commented character, then returns the position of that character for lexing.
450
456
// lexing.
451
- func positionAfterWhitespace (body []byte , startPosition int ) int {
457
+ // Returns both byte positions and rune position
458
+ func positionAfterWhitespace (body []byte , startPosition int ) (position int , runePosition int ) {
452
459
bodyLength := len (body )
453
- position := startPosition
460
+ position = startPosition
461
+ runePosition = startPosition
454
462
for {
455
463
if position < bodyLength {
456
464
code , n := runeAt (body , position )
@@ -466,15 +474,18 @@ func positionAfterWhitespace(body []byte, startPosition int) int {
466
474
// Comma
467
475
code == 0x002C {
468
476
position += n
477
+ runePosition ++
469
478
} else if code == 35 { // #
470
479
position += n
480
+ runePosition ++
471
481
for {
472
482
code , n := runeAt (body , position )
473
483
if position < bodyLength &&
474
484
code != 0 &&
475
485
// SourceCharacter but not LineTerminator
476
486
(code > 0x001F || code == 0x0009 ) && code != 0x000A && code != 0x000D {
477
487
position += n
488
+ runePosition ++
478
489
continue
479
490
} else {
480
491
break
@@ -488,7 +499,7 @@ func positionAfterWhitespace(body []byte, startPosition int) int {
488
499
break
489
500
}
490
501
}
491
- return position
502
+ return position , runePosition
492
503
}
493
504
494
505
func GetTokenDesc (token Token ) string {
0 commit comments