@@ -55,7 +55,6 @@ const (
55
55
tokenBraceR
56
56
tokenBracketL
57
57
tokenBracketR
58
- tokenColon
59
58
tokenComma
60
59
tokenDollar
61
60
tokenDot
@@ -101,7 +100,6 @@ var tokenKindStrings = []string{
101
100
tokenBraceR : "\" }\" " ,
102
101
tokenBracketL : "\" [\" " ,
103
102
tokenBracketR : "\" ]\" " ,
104
- tokenColon : "\" :\" " ,
105
103
tokenComma : "\" ,\" " ,
106
104
tokenDollar : "\" $\" " ,
107
105
tokenDot : "\" .\" " ,
@@ -197,7 +195,7 @@ func isIdentifier(r rune) bool {
197
195
198
196
func isSymbol (r rune ) bool {
199
197
switch r {
200
- case '&' , '|' , '^' , '=' , '<' , '>' , '*' , '/' , '%' , '# ' :
198
+ case '!' , '$' , ':' , '~' , '+' , '-' , ' &' , '|' , '^' , '=' , '<' , '>' , '*' , '/' , '%' :
201
199
return true
202
200
}
203
201
return false
@@ -533,7 +531,7 @@ func (l *lexer) lexIdentifier() {
533
531
}
534
532
535
533
// lexSymbol will lex a token that starts with a symbol. This could be a
536
- // comment, block quote or an operator. This function assumes that the next
534
+ // C or C++ comment, block quote or an operator. This function assumes that the next
537
535
// rune to be served by the lexer will be the first rune of the new token.
538
536
func (l * lexer ) lexSymbol () error {
539
537
r := l .next ()
@@ -550,16 +548,6 @@ func (l *lexer) lexSymbol() error {
550
548
return nil
551
549
}
552
550
553
- if r == '#' {
554
- l .resetTokenStart () // Throw out the leading #
555
- for r = l .next (); r != lexEOF && r != '\n' ; r = l .next () {
556
- }
557
- // Leave the '\n' in the lexer to be fodder for the next round
558
- l .backup ()
559
- l .addCommentFodder (fodderCommentHash )
560
- return nil
561
- }
562
-
563
551
if r == '/' && l .peek () == '*' {
564
552
commentStartLoc := l .tokenStartLoc
565
553
l .next () // consume the '*'
@@ -640,10 +628,39 @@ func (l *lexer) lexSymbol() error {
640
628
641
629
// Assume any string of symbols is a single operator.
642
630
for r = l .next (); isSymbol (r ); r = l .next () {
643
-
631
+ // Not allowed // in operators
632
+ if r == '/' && strings .HasPrefix (l .input [l .pos :], "/" ) {
633
+ break
634
+ }
635
+ // Not allowed /* in operators
636
+ if r == '/' && strings .HasPrefix (l .input [l .pos :], "*" ) {
637
+ break
638
+ }
639
+ // Not allowed ||| in operators
640
+ if r == '|' && strings .HasPrefix (l .input [l .pos :], "||" ) {
641
+ break
642
+ }
644
643
}
644
+
645
645
l .backup ()
646
- l .emitToken (tokenOperator )
646
+
647
+ // Operators are not allowed to end with + - ~ ! unless they are one rune long.
648
+ // So, wind it back if we need to, but stop at the first rune.
649
+ // This relies on the hack that all operator symbols are ASCII and thus there is
650
+ // no need to treat this substring as general UTF-8.
651
+ for r = rune (l .input [l .pos - 1 ]); l .pos > l .tokenStart + 1 ; l .pos -- {
652
+ switch r {
653
+ case '+' , '-' , '~' , '!' :
654
+ continue
655
+ }
656
+ break
657
+ }
658
+
659
+ if l .input [l .tokenStart :l .pos ] == "$" {
660
+ l .emitToken (tokenDollar )
661
+ } else {
662
+ l .emitToken (tokenOperator )
663
+ }
647
664
return nil
648
665
}
649
666
@@ -665,12 +682,8 @@ func lex(fn string, input string) (tokens, error) {
665
682
l .emitToken (tokenBracketL )
666
683
case ']' :
667
684
l .emitToken (tokenBracketR )
668
- case ':' :
669
- l .emitToken (tokenColon )
670
685
case ',' :
671
686
l .emitToken (tokenComma )
672
- case '$' :
673
- l .emitToken (tokenDollar )
674
687
case '.' :
675
688
l .emitToken (tokenDot )
676
689
case '(' :
@@ -680,15 +693,6 @@ func lex(fn string, input string) (tokens, error) {
680
693
case ';' :
681
694
l .emitToken (tokenSemicolon )
682
695
683
- // Operators
684
- case '!' :
685
- if l .peek () == '=' {
686
- _ = l .next ()
687
- }
688
- l .emitToken (tokenOperator )
689
- case '~' , '+' , '-' :
690
- l .emitToken (tokenOperator )
691
-
692
696
case '0' , '1' , '2' , '3' , '4' , '5' , '6' , '7' , '8' , '9' :
693
697
l .backup ()
694
698
err = l .lexNumber ()
@@ -733,6 +737,14 @@ func lex(fn string, input string) (tokens, error) {
733
737
r = l .next ()
734
738
}
735
739
}
740
+ case '#' :
741
+ l .resetTokenStart () // Throw out the leading #
742
+ for r = l .next (); r != lexEOF && r != '\n' ; r = l .next () {
743
+ }
744
+ // Leave the '\n' in the lexer to be fodder for the next round
745
+ l .backup ()
746
+ l .addCommentFodder (fodderCommentHash )
747
+
736
748
default :
737
749
if isIdentifierFirst (r ) {
738
750
l .backup ()
0 commit comments