Skip to content

Commit 04c51f7

Browse files
committed
Merge pull request google#5 from sparkprime/lexer_changes
Port lexer changes from google/jsonnet 0c96da7 to 27ddf2c Fix google#1
2 parents 2282fdf + c3f136d commit 04c51f7

File tree

3 files changed

+48
-31
lines changed

3 files changed

+48
-31
lines changed

lexer.go

Lines changed: 41 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,6 @@ const (
5555
tokenBraceR
5656
tokenBracketL
5757
tokenBracketR
58-
tokenColon
5958
tokenComma
6059
tokenDollar
6160
tokenDot
@@ -101,7 +100,6 @@ var tokenKindStrings = []string{
101100
tokenBraceR: "\"}\"",
102101
tokenBracketL: "\"[\"",
103102
tokenBracketR: "\"]\"",
104-
tokenColon: "\":\"",
105103
tokenComma: "\",\"",
106104
tokenDollar: "\"$\"",
107105
tokenDot: "\".\"",
@@ -197,7 +195,7 @@ func isIdentifier(r rune) bool {
197195

198196
func isSymbol(r rune) bool {
199197
switch r {
200-
case '&', '|', '^', '=', '<', '>', '*', '/', '%', '#':
198+
case '!', '$', ':', '~', '+', '-', '&', '|', '^', '=', '<', '>', '*', '/', '%':
201199
return true
202200
}
203201
return false
@@ -533,7 +531,7 @@ func (l *lexer) lexIdentifier() {
533531
}
534532

535533
// lexSymbol will lex a token that starts with a symbol. This could be a
536-
// comment, block quote or an operator. This function assumes that the next
534+
// C or C++ comment, block quote or an operator. This function assumes that the next
537535
// rune to be served by the lexer will be the first rune of the new token.
538536
func (l *lexer) lexSymbol() error {
539537
r := l.next()
@@ -550,16 +548,6 @@ func (l *lexer) lexSymbol() error {
550548
return nil
551549
}
552550

553-
if r == '#' {
554-
l.resetTokenStart() // Throw out the leading #
555-
for r = l.next(); r != lexEOF && r != '\n'; r = l.next() {
556-
}
557-
// Leave the '\n' in the lexer to be fodder for the next round
558-
l.backup()
559-
l.addCommentFodder(fodderCommentHash)
560-
return nil
561-
}
562-
563551
if r == '/' && l.peek() == '*' {
564552
commentStartLoc := l.tokenStartLoc
565553
l.next() // consume the '*'
@@ -640,10 +628,39 @@ func (l *lexer) lexSymbol() error {
640628

641629
// Assume any string of symbols is a single operator.
642630
for r = l.next(); isSymbol(r); r = l.next() {
643-
631+
// Not allowed // in operators
632+
if r == '/' && strings.HasPrefix(l.input[l.pos:], "/") {
633+
break
634+
}
635+
// Not allowed /* in operators
636+
if r == '/' && strings.HasPrefix(l.input[l.pos:], "*") {
637+
break
638+
}
639+
// Not allowed ||| in operators
640+
if r == '|' && strings.HasPrefix(l.input[l.pos:], "||") {
641+
break
642+
}
644643
}
644+
645645
l.backup()
646-
l.emitToken(tokenOperator)
646+
647+
// Operators are not allowed to end with + - ~ ! unless they are one rune long.
648+
// So, wind it back if we need to, but stop at the first rune.
649+
// This relies on the hack that all operator symbols are ASCII and thus there is
650+
// no need to treat this substring as general UTF-8.
651+
for r = rune(l.input[l.pos - 1]); l.pos > l.tokenStart + 1; l.pos-- {
652+
switch r {
653+
case '+', '-', '~', '!':
654+
continue
655+
}
656+
break
657+
}
658+
659+
if l.input[l.tokenStart:l.pos] == "$" {
660+
l.emitToken(tokenDollar)
661+
} else {
662+
l.emitToken(tokenOperator)
663+
}
647664
return nil
648665
}
649666

@@ -665,12 +682,8 @@ func lex(fn string, input string) (tokens, error) {
665682
l.emitToken(tokenBracketL)
666683
case ']':
667684
l.emitToken(tokenBracketR)
668-
case ':':
669-
l.emitToken(tokenColon)
670685
case ',':
671686
l.emitToken(tokenComma)
672-
case '$':
673-
l.emitToken(tokenDollar)
674687
case '.':
675688
l.emitToken(tokenDot)
676689
case '(':
@@ -680,15 +693,6 @@ func lex(fn string, input string) (tokens, error) {
680693
case ';':
681694
l.emitToken(tokenSemicolon)
682695

683-
// Operators
684-
case '!':
685-
if l.peek() == '=' {
686-
_ = l.next()
687-
}
688-
l.emitToken(tokenOperator)
689-
case '~', '+', '-':
690-
l.emitToken(tokenOperator)
691-
692696
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
693697
l.backup()
694698
err = l.lexNumber()
@@ -733,6 +737,14 @@ func lex(fn string, input string) (tokens, error) {
733737
r = l.next()
734738
}
735739
}
740+
case '#':
741+
l.resetTokenStart() // Throw out the leading #
742+
for r = l.next(); r != lexEOF && r != '\n'; r = l.next() {
743+
}
744+
// Leave the '\n' in the lexer to be fodder for the next round
745+
l.backup()
746+
l.addCommentFodder(fodderCommentHash)
747+
736748
default:
737749
if isIdentifierFirst(r) {
738750
l.backup()

lexer_test.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,12 @@ var lexTests = []lexTest{
3838
{"brace R", "}", tokens{{kind: tokenBraceR, data: "}"}}, ""},
3939
{"bracket L", "[", tokens{{kind: tokenBracketL, data: "["}}, ""},
4040
{"bracket R", "]", tokens{{kind: tokenBracketR, data: "]"}}, ""},
41-
{"colon", ":", tokens{{kind: tokenColon, data: ":"}}, ""},
41+
{"colon", ":", tokens{{kind: tokenOperator, data: ":"}}, ""},
42+
{"colon2", "::", tokens{{kind: tokenOperator, data: "::"}}, ""},
43+
{"colon3", ":::", tokens{{kind: tokenOperator, data: ":::"}}, ""},
44+
{"arrow right", "->", tokens{{kind: tokenOperator, data: "->"}}, ""},
45+
{"less than minus", "<-", tokens{{kind: tokenOperator, data: "<"},
46+
{kind: tokenOperator, data: "-"}}, ""},
4247
{"comma", ",", tokens{{kind: tokenComma, data: ","}}, ""},
4348
{"dollar", "$", tokens{{kind: tokenDollar, data: "$"}}, ""},
4449
{"dot", ".", tokens{{kind: tokenDot, data: "."}}, ""},

parser.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ func (p *parser) parse(prec precedence) (astNode, error) {
144144
return nil, err
145145
}
146146
var msg astNode
147-
if p.peek().kind == tokenColon {
147+
if p.peek().kind == tokenOperator && p.peek().data == ":" {
148148
p.pop()
149149
msg, err = p.parse(maxPrecedence)
150150
if err != nil {

0 commit comments

Comments
 (0)