1
+ /*
2
+ [The "BSD licence"]
3
+ Copyright (c) 2013 Terence Parr, Sam Harwell
4
+ Copyright (c) 2017 Ivan Kochurkin (upgrade to Java 8)
5
+ Copyright (c) 2021 Michał Lorek (upgrade to Java 11)
6
+ Copyright (c) 2022 Michał Lorek (upgrade to Java 17)
7
+ All rights reserved.
8
+
9
+ Redistribution and use in source and binary forms, with or without
10
+ modification, are permitted provided that the following conditions
11
+ are met:
12
+ 1. Redistributions of source code must retain the above copyright
13
+ notice, this list of conditions and the following disclaimer.
14
+ 2. Redistributions in binary form must reproduce the above copyright
15
+ notice, this list of conditions and the following disclaimer in the
16
+ documentation and/or other materials provided with the distribution.
17
+ 3. The name of the author may not be used to endorse or promote products
18
+ derived from this software without specific prior written permission.
19
+
20
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
21
+ IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
22
+ OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
23
+ IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
24
+ INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
25
+ NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29
+ THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
+ */
31
+
32
+ // $antlr-format alignTrailingComments true, columnLimit 150, maxEmptyLinesToKeep 1, reflowComments false, useTab false
33
+ // $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons ownLine
34
+ // $antlr-format alignColons trailing, singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true
35
+
36
+ lexer grammar JavaLexer;
37
+
38
+ // Keywords
39
+
40
+ ABSTRACT : ' abstract' ;
41
+ ASSERT : ' assert' ;
42
+ BOOLEAN : ' boolean' ;
43
+ BREAK : ' break' ;
44
+ BYTE : ' byte' ;
45
+ CASE : ' case' ;
46
+ CATCH : ' catch' ;
47
+ CHAR : ' char' ;
48
+ CLASS : ' class' ;
49
+ CONST : ' const' ;
50
+ CONTINUE : ' continue' ;
51
+ DEFAULT : ' default' ;
52
+ DO : ' do' ;
53
+ DOUBLE : ' double' ;
54
+ ELSE : ' else' ;
55
+ ENUM : ' enum' ;
56
+ EXTENDS : ' extends' ;
57
+ FINAL : ' final' ;
58
+ FINALLY : ' finally' ;
59
+ FLOAT : ' float' ;
60
+ FOR : ' for' ;
61
+ IF : ' if' ;
62
+ GOTO : ' goto' ;
63
+ IMPLEMENTS : ' implements' ;
64
+ IMPORT : ' import' ;
65
+ INSTANCEOF : ' instanceof' ;
66
+ INT : ' int' ;
67
+ INTERFACE : ' interface' ;
68
+ LONG : ' long' ;
69
+ NATIVE : ' native' ;
70
+ NEW : ' new' ;
71
+ PACKAGE : ' package' ;
72
+ PRIVATE : ' private' ;
73
+ PROTECTED : ' protected' ;
74
+ PUBLIC : ' public' ;
75
+ RETURN : ' return' ;
76
+ SHORT : ' short' ;
77
+ STATIC : ' static' ;
78
+ STRICTFP : ' strictfp' ;
79
+ SUPER : ' super' ;
80
+ SWITCH : ' switch' ;
81
+ SYNCHRONIZED : ' synchronized' ;
82
+ THIS : ' this' ;
83
+ THROW : ' throw' ;
84
+ THROWS : ' throws' ;
85
+ TRANSIENT : ' transient' ;
86
+ TRY : ' try' ;
87
+ VOID : ' void' ;
88
+ VOLATILE : ' volatile' ;
89
+ WHILE : ' while' ;
90
+
91
+ // Module related keywords
92
+ MODULE : ' module' ;
93
+ OPEN : ' open' ;
94
+ REQUIRES : ' requires' ;
95
+ EXPORTS : ' exports' ;
96
+ OPENS : ' opens' ;
97
+ TO : ' to' ;
98
+ USES : ' uses' ;
99
+ PROVIDES : ' provides' ;
100
+ WITH : ' with' ;
101
+ TRANSITIVE : ' transitive' ;
102
+
103
+ // Local Variable Type Inference
104
+ VAR : ' var' ; // reserved type name
105
+
106
+ // Switch Expressions
107
+ YIELD : ' yield' ; // reserved type name from Java 14
108
+
109
+ // Records
110
+ RECORD : ' record' ;
111
+
112
+ // Sealed Classes
113
+ SEALED : ' sealed' ;
114
+ PERMITS : ' permits' ;
115
+ NON_SEALED : ' non-sealed' ;
116
+
117
+ // Literals
118
+
119
+ DECIMAL_LITERAL : (' 0' | [1-9] (Digits? | ' _' + Digits)) [lL]?;
120
+ HEX_LITERAL : ' 0' [xX] [0-9a-fA-F ] ([0-9a-fA-F_ ]* [0-9a-fA-F ])? [lL]?;
121
+ OCT_LITERAL : ' 0' ' _' * [0-7] ([0-7_]* [0-7])? [lL]?;
122
+ BINARY_LITERAL : ' 0' [bB] [01] ([01_]* [01])? [lL]?;
123
+
124
+ FLOAT_LITERAL :
125
+ (Digits ' .' Digits? | ' .' Digits) ExponentPart? [fFdD]?
126
+ | Digits (ExponentPart [fFdD]? | [fFdD])
127
+ ;
128
+
129
+ HEX_FLOAT_LITERAL : ' 0' [xX] (HexDigits ' .' ? | HexDigits? ' .' HexDigits) [pP] [+-]? Digits [fFdD]?;
130
+
131
+ BOOL_LITERAL : ' true' | ' false' ;
132
+
133
+ CHAR_LITERAL : ' \' ' (~[' \\\r\n ] | EscapeSequence) ' \' ' ;
134
+
135
+ STRING_LITERAL : ' "' (~[" \\\r\n ] | EscapeSequence)* '" ' ;
136
+
137
+ MULTI_STRING_LIT: ' " " " ' (~[\\ ] | EscapeSequence)*? '" " " ' ;
138
+
139
+ TEXT_BLOCK: ' " " " ' [ \t ]* [\r\n ] (. | EscapeSequence)*? '" " " ' ;
140
+
141
+ NULL_LITERAL: ' null' ;
142
+
143
+ // Separators
144
+
145
+ LPAREN : ' (' ;
146
+ RPAREN : ' )' ;
147
+ LBRACE : ' {' ;
148
+ RBRACE : ' }' ;
149
+ LBRACK : ' [' ;
150
+ RBRACK : ' ]' ;
151
+ SEMI : ' ;' ;
152
+ COMMA : ' ,' ;
153
+ DOT : ' .' ;
154
+
155
+ // Operators
156
+
157
+ ASSIGN : ' =' ;
158
+ GT : ' >' ;
159
+ LT : ' <' ;
160
+ BANG : ' !' ;
161
+ TILDE : ' ~' ;
162
+ QUESTION : ' ?' ;
163
+ COLON : ' :' ;
164
+ EQUAL : ' ==' ;
165
+ LE : ' <=' ;
166
+ GE : ' >=' ;
167
+ NOTEQUAL : ' !=' ;
168
+ AND : ' &&' ;
169
+ OR : ' ||' ;
170
+ INC : ' ++' ;
171
+ DEC : ' --' ;
172
+ ADD : ' +' ;
173
+ SUB : ' -' ;
174
+ MUL : ' *' ;
175
+ DIV : ' /' ;
176
+ BITAND : ' &' ;
177
+ BITOR : ' |' ;
178
+ CARET : ' ^' ;
179
+ MOD : ' %' ;
180
+
181
+ ADD_ASSIGN : ' +=' ;
182
+ SUB_ASSIGN : ' -=' ;
183
+ MUL_ASSIGN : ' *=' ;
184
+ DIV_ASSIGN : ' /=' ;
185
+ AND_ASSIGN : ' &=' ;
186
+ OR_ASSIGN : ' |=' ;
187
+ XOR_ASSIGN : ' ^=' ;
188
+ MOD_ASSIGN : ' %=' ;
189
+ LSHIFT_ASSIGN : ' <<=' ;
190
+ RSHIFT_ASSIGN : ' >>=' ;
191
+ URSHIFT_ASSIGN : ' >>>=' ;
192
+
193
+ // Java 8 tokens
194
+
195
+ ARROW : ' ->' ;
196
+ COLONCOLON : ' ::' ;
197
+
198
+ // Additional symbols not defined in the lexical specification
199
+
200
+ AT : ' @' ;
201
+ ELLIPSIS : ' ...' ;
202
+
203
+ // Whitespace and comments
204
+
205
+ WS : [ \t\r\n\u000C ]+ -> channel(HIDDEN);
206
+ COMMENT : ' /* ' .*? '*/ ' -> channel(HIDDEN);
207
+ LINE_COMMENT : ' // ' ~[\r\n]* -> channel(HIDDEN);
208
+
209
+ // Identifiers
210
+
211
+ IDENTIFIER : Letter LetterOrDigit*;
212
+
213
+ // Fragment rules
214
+
215
+ fragment ExponentPart: [eE] [+-]? Digits;
216
+
217
+ fragment EscapeSequence:
218
+ ' \\ ' ' u005c' ? [btnfr" '\\ ]
219
+ | '\\ ' 'u005c'? ([0-3]? [0-7])? [0-7]
220
+ | '\\ ' 'u'+ HexDigit HexDigit HexDigit HexDigit
221
+ ;
222
+
223
+ fragment HexDigits: HexDigit ((HexDigit | '_')* HexDigit)?;
224
+
225
+ fragment HexDigit: [0-9a-fA-F];
226
+
227
+ fragment Digits: [0-9] ([0-9_]* [0-9])?;
228
+
229
+ fragment LetterOrDigit: Letter | [0-9];
230
+
231
+ fragment Letter:
232
+ [a-zA-Z$_] // these are the " java letters" below 0x7F
233
+ | ~[\u0000 -\u007F\uD800 -\uDBFF ] // covers all characters above 0x7F which are not a surrogate
234
+ | [\uD800 -\uDBFF ] [\uDC00 -\uDFFF ] // covers UTF-16 surrogate pairs encodings for U+10000 to U+10FFFF
235
+ ;
0 commit comments