Skip to content

Commit 4a8d417

Browse files
authored
Merge pull request #138 from sogko/sogko/lexperf-followup
Fix `lexer` tests (follow up to #137)
2 parents 065ab6b + db630ca commit 4a8d417

File tree

5 files changed

+312
-79
lines changed

5 files changed

+312
-79
lines changed

gqlerrors/syntax.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ package gqlerrors
33
import (
44
"fmt"
55
"regexp"
6-
76
"strings"
87

98
"github.com/graphql-go/graphql/language/ast"

language/lexer/lexer.go

Lines changed: 37 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -83,10 +83,6 @@ type Token struct {
8383
Value string
8484
}
8585

86-
func (t *Token) String() string {
87-
return fmt.Sprintf("%s", tokenDescription[t.Kind])
88-
}
89-
9086
type Lexer func(resetPosition int) (Token, error)
9187

9288
func Lex(s *source.Source) Lexer {
@@ -106,24 +102,28 @@ func Lex(s *source.Source) Lexer {
106102

107103
// Reads an alphanumeric + underscore name from the source.
108104
// [_A-Za-z][_0-9A-Za-z]*
109-
func readName(source *source.Source, position int) Token {
105+
// position: Points to the byte position in the byte array
106+
// runePosition: Points to the rune position in the byte array
107+
func readName(source *source.Source, position, runePosition int) Token {
110108
body := source.Body
111109
bodyLength := len(body)
112-
end := position + 1
110+
endByte := position + 1
111+
endRune := runePosition + 1
113112
for {
114-
code, n := runeAt(body, end)
115-
if (end != bodyLength) &&
113+
code, _ := runeAt(body, endByte)
114+
if (endByte != bodyLength) &&
116115
(code == '_' || // _
117116
code >= '0' && code <= '9' || // 0-9
118117
code >= 'A' && code <= 'Z' || // A-Z
119118
code >= 'a' && code <= 'z') { // a-z
120-
end += n
119+
endByte++
120+
endRune++
121121
continue
122122
} else {
123123
break
124124
}
125125
}
126-
return makeToken(TokenKind[NAME], position, end, string(body[position:end]))
126+
return makeToken(TokenKind[NAME], runePosition, endRune, string(body[position:endByte]))
127127
}
128128

129129
// Reads a number token from the source file, either a float
@@ -212,6 +212,7 @@ func readDigits(s *source.Source, start int, firstCode rune, codeLength int) (in
212212
func readString(s *source.Source, start int) (Token, error) {
213213
body := s.Body
214214
position := start + 1
215+
runePosition := start + 1
215216
chunkStart := position
216217
var code rune
217218
var n int
@@ -226,9 +227,10 @@ func readString(s *source.Source, start int) (Token, error) {
226227

227228
// SourceCharacter
228229
if code < 0x0020 && code != 0x0009 {
229-
return Token{}, gqlerrors.NewSyntaxError(s, position, fmt.Sprintf(`Invalid character within String: %v.`, printCharCode(code)))
230+
return Token{}, gqlerrors.NewSyntaxError(s, runePosition, fmt.Sprintf(`Invalid character within String: %v.`, printCharCode(code)))
230231
}
231232
position += n
233+
runePosition++
232234
if code == '\\' { // \
233235
valueBuffer.Write(body[chunkStart : position-1])
234236
code, n = runeAt(body, position)
@@ -260,9 +262,9 @@ func readString(s *source.Source, start int) (Token, error) {
260262
case 'u':
261263
// Check if there are at least 4 bytes available
262264
if len(body) <= position+4 {
263-
return Token{}, gqlerrors.NewSyntaxError(s, position,
265+
return Token{}, gqlerrors.NewSyntaxError(s, runePosition,
264266
fmt.Sprintf("Invalid character escape sequence: "+
265-
"\\u%v", body[position+1:]))
267+
"\\u%v", string(body[position+1:])))
266268
}
267269
charCode := uniCharCode(
268270
rune(body[position+1]),
@@ -271,18 +273,20 @@ func readString(s *source.Source, start int) (Token, error) {
271273
rune(body[position+4]),
272274
)
273275
if charCode < 0 {
274-
return Token{}, gqlerrors.NewSyntaxError(s, position,
276+
return Token{}, gqlerrors.NewSyntaxError(s, runePosition,
275277
fmt.Sprintf("Invalid character escape sequence: "+
276-
"\\u%v", body[position+1:position+5]))
278+
"\\u%v", string(body[position+1:position+5])))
277279
}
278280
valueBuffer.WriteRune(charCode)
279281
position += 4
282+
runePosition += 4
280283
break
281284
default:
282-
return Token{}, gqlerrors.NewSyntaxError(s, position,
285+
return Token{}, gqlerrors.NewSyntaxError(s, runePosition,
283286
fmt.Sprintf(`Invalid character escape sequence: \\%c.`, code))
284287
}
285288
position += n
289+
runePosition++
286290
chunkStart = position
287291
}
288292
continue
@@ -291,7 +295,7 @@ func readString(s *source.Source, start int) (Token, error) {
291295
}
292296
}
293297
if code != '"' { // quote (")
294-
return Token{}, gqlerrors.NewSyntaxError(s, position, "Unterminated string.")
298+
return Token{}, gqlerrors.NewSyntaxError(s, runePosition, "Unterminated string.")
295299
}
296300
stringContent := body[chunkStart:position]
297301
valueBuffer.Write(stringContent)
@@ -346,15 +350,15 @@ func printCharCode(code rune) string {
346350
func readToken(s *source.Source, fromPosition int) (Token, error) {
347351
body := s.Body
348352
bodyLength := len(body)
349-
position := positionAfterWhitespace(body, fromPosition)
353+
position, runePosition := positionAfterWhitespace(body, fromPosition)
350354
if position >= bodyLength {
351355
return makeToken(TokenKind[EOF], position, position, ""), nil
352356
}
353357
code, codeLength := runeAt(body, position)
354358

355359
// SourceCharacter
356360
if code < 0x0020 && code != 0x0009 && code != 0x000A && code != 0x000D {
357-
return Token{}, gqlerrors.NewSyntaxError(s, position, fmt.Sprintf(`Invalid character %v`, printCharCode(code)))
361+
return Token{}, gqlerrors.NewSyntaxError(s, runePosition, fmt.Sprintf(`Invalid character %v`, printCharCode(code)))
358362
}
359363

360364
switch code {
@@ -405,12 +409,12 @@ func readToken(s *source.Source, fromPosition int) (Token, error) {
405409
// A-Z
406410
case 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N',
407411
'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z':
408-
return readName(s, position), nil
412+
return readName(s, position, runePosition), nil
409413
// _
410414
// a-z
411415
case '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
412416
'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z':
413-
return readName(s, position), nil
417+
return readName(s, position, runePosition), nil
414418
// -
415419
// 0-9
416420
case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
@@ -428,12 +432,14 @@ func readToken(s *source.Source, fromPosition int) (Token, error) {
428432
return token, nil
429433
}
430434
description := fmt.Sprintf("Unexpected character %v.", printCharCode(code))
431-
return Token{}, gqlerrors.NewSyntaxError(s, position, description)
435+
return Token{}, gqlerrors.NewSyntaxError(s, runePosition, description)
432436
}
433437

438+
// Gets the rune from the byte array at given byte position and it's width in bytes
434439
func runeAt(body []byte, position int) (code rune, charWidth int) {
435440
if len(body) <= position {
436-
return 0, utf8.RuneError
441+
// <EOF>
442+
return -1, utf8.RuneError
437443
}
438444

439445
c := body[position]
@@ -448,9 +454,11 @@ func runeAt(body []byte, position int) (code rune, charWidth int) {
448454
// Reads from body starting at startPosition until it finds a non-whitespace
449455
// or commented character, then returns the position of that character for lexing.
450456
// lexing.
451-
func positionAfterWhitespace(body []byte, startPosition int) int {
457+
// Returns both byte positions and rune position
458+
func positionAfterWhitespace(body []byte, startPosition int) (position int, runePosition int) {
452459
bodyLength := len(body)
453-
position := startPosition
460+
position = startPosition
461+
runePosition = startPosition
454462
for {
455463
if position < bodyLength {
456464
code, n := runeAt(body, position)
@@ -466,15 +474,18 @@ func positionAfterWhitespace(body []byte, startPosition int) int {
466474
// Comma
467475
code == 0x002C {
468476
position += n
477+
runePosition++
469478
} else if code == 35 { // #
470479
position += n
480+
runePosition++
471481
for {
472482
code, n := runeAt(body, position)
473483
if position < bodyLength &&
474484
code != 0 &&
475485
// SourceCharacter but not LineTerminator
476486
(code > 0x001F || code == 0x0009) && code != 0x000A && code != 0x000D {
477487
position += n
488+
runePosition++
478489
continue
479490
} else {
480491
break
@@ -488,7 +499,7 @@ func positionAfterWhitespace(body []byte, startPosition int) int {
488499
break
489500
}
490501
}
491-
return position
502+
return position, runePosition
492503
}
493504

494505
func GetTokenDesc(token Token) string {

0 commit comments

Comments
 (0)