Skip to content

Commit 0a1c051

Browse files
committed
Merge pull request #133 from davidzchen/lexer-test
Add unit tests for lexer
2 parents 26ea096 + d0f5afa commit 0a1c051

File tree

4 files changed

+332
-11
lines changed

4 files changed

+332
-11
lines changed

core/BUILD

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,39 @@
11
package(default_visibility = ["//visibility:public"])
22

3+
cc_library(
4+
name = "common",
5+
hdrs = [
6+
"unicode.h",
7+
"static_error.h",
8+
],
9+
includes = ["."],
10+
)
11+
12+
cc_library(
13+
name = "lexer",
14+
srcs = ["lexer.cpp"],
15+
hdrs = ["lexer.h"],
16+
deps = [
17+
":common",
18+
],
19+
includes = ["."],
20+
)
21+
22+
cc_test(
23+
name = "lexer_test",
24+
srcs = ["lexer_test.cc"],
25+
deps = [
26+
":lexer",
27+
"//external:gtest_main",
28+
],
29+
)
30+
331
cc_library(
432
name = "jsonnet-common",
533
srcs = [
634
"desugarer.cpp",
735
"formatter.cpp",
836
"libjsonnet.cpp",
9-
"lexer.cpp",
1037
"parser.cpp",
1138
"static_analysis.cpp",
1239
"string_utils.cpp",
@@ -17,16 +44,15 @@ cc_library(
1744
"ast.h",
1845
"desugarer.h",
1946
"formatter.h",
20-
"lexer.h",
2147
"parser.h",
2248
"state.h",
2349
"static_analysis.h",
24-
"static_error.h",
2550
"string_utils.h",
26-
"unicode.h",
2751
"vm.h",
2852
],
2953
deps = [
54+
":common",
55+
":lexer",
3056
"//include:libjsonnet",
3157
],
3258
linkopts = ["-lm"],

core/lexer.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ struct Token {
136136
std::string stringBlockIndent;
137137

138138
/** If kind == STRING_BLOCK then stores the sequence of whitespace that indented the end of
139-
* the block.
139+
* the block.
140140
*
141141
* This is always fewer whitespace characters than in stringBlockIndent.
142142
*/
@@ -153,7 +153,8 @@ struct Token {
153153
stringBlockTermIndent(string_block_term_indent), location(location)
154154
{ }
155155

156-
//Token(Kind kind, const std::string &data="") : kind(kind), data(data) { }
156+
Token(Kind kind, const std::string &data="")
157+
: kind(kind), data(data) { }
157158

158159
static const char *toString(Kind v)
159160
{
@@ -204,7 +205,7 @@ struct Token {
204205
};
205206

206207
/** The result of lexing.
207-
*
208+
*
208209
* Because of the EOF token, this will always contain at least one token. So element 0 can be used
209210
* to get the filename.
210211
*/

core/lexer_test.cc

Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
// Copyright 2016 Google Inc. All rights reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "lexer.h"
16+
17+
#include <list>
18+
#include "gtest/gtest.h"
19+
20+
namespace {
21+
22+
void TestLex(const char* name,
23+
const char* input,
24+
const std::list<Token>& tokens,
25+
const std::string& error) {
26+
std::list<Token> test_tokens(tokens);
27+
test_tokens.push_back(Token(Token::Kind::END_OF_FILE, ""));
28+
29+
try {
30+
std::list<Token> lexed_tokens = jsonnet_lex(name, input);
31+
ASSERT_EQ(test_tokens, lexed_tokens)
32+
<< "Test failed: " << name << std::endl;
33+
} catch (StaticError& e) {
34+
ASSERT_EQ(error, e.toString());
35+
}
36+
}
37+
38+
TEST(Lexer, TestWhitespace) {
39+
TestLex("empty", "", {}, "");
40+
TestLex("whitespace", " \t\n\r\r\n", {}, "");
41+
}
42+
43+
TEST(Lexer, TestOperators) {
44+
TestLex("brace L", "{", {Token(Token::Kind::BRACE_L, "")}, "");
45+
TestLex("brace R", "}", {Token(Token::Kind::BRACE_R, "")}, "");
46+
TestLex("bracket L", "[", {Token(Token::Kind::BRACKET_L, "")}, "");
47+
TestLex("bracket R", "]", {Token(Token::Kind::BRACKET_R, "")}, "");
48+
TestLex("colon ", ":", {Token(Token::Kind::OPERATOR, ":")}, "");
49+
TestLex("colon 2", "::", {Token(Token::Kind::OPERATOR, "::")}, "");
50+
TestLex("colon 2", ":::", {Token(Token::Kind::OPERATOR, ":::")}, "");
51+
TestLex("arrow right", "->", {Token(Token::Kind::OPERATOR, "->")}, "");
52+
TestLex("less than minus", "<-",
53+
{Token(Token::Kind::OPERATOR, "<"),
54+
Token(Token::Kind::OPERATOR, "-")}, "");
55+
TestLex("comma", ",", {Token(Token::Kind::COMMA, "")}, "");
56+
TestLex("dollar", "$", {Token(Token::Kind::DOLLAR, "")}, "");
57+
TestLex("dot", ".", {Token(Token::Kind::DOT, "")}, "");
58+
TestLex("paren L", "(", {Token(Token::Kind::PAREN_L, "")}, "");
59+
TestLex("paren R", ")", {Token(Token::Kind::PAREN_R, "")}, "");
60+
TestLex("semicolon", ";", {Token(Token::Kind::SEMICOLON, "")}, "");
61+
62+
TestLex("not 1", "!", {Token(Token::Kind::OPERATOR, "!")}, "");
63+
TestLex("not 2", "! ", {Token(Token::Kind::OPERATOR, "!")}, "");
64+
TestLex("not equal", "!=", {Token(Token::Kind::OPERATOR, "!=")}, "");
65+
TestLex("tilde", "~", {Token(Token::Kind::OPERATOR, "~")}, "");
66+
TestLex("plus", "+", {Token(Token::Kind::OPERATOR, "+")}, "");
67+
TestLex("minus", "-", {Token(Token::Kind::OPERATOR, "-")}, "");
68+
}
69+
70+
TEST(Lexer, TestMiscOperators) {
71+
TestLex("op *", "*", {Token(Token::Kind::OPERATOR, "*")}, "");
72+
TestLex("op /", "/", {Token(Token::Kind::OPERATOR, "/")}, "");
73+
TestLex("op %", "%", {Token(Token::Kind::OPERATOR, "%")}, "");
74+
TestLex("op &", "&", {Token(Token::Kind::OPERATOR, "&")}, "");
75+
TestLex("op |", "|", {Token(Token::Kind::OPERATOR, "|")}, "");
76+
TestLex("op ^", "^", {Token(Token::Kind::OPERATOR, "^")}, "");
77+
TestLex("op =", "=", {Token(Token::Kind::OPERATOR, "=")}, "");
78+
TestLex("op <", "<", {Token(Token::Kind::OPERATOR, "<")}, "");
79+
TestLex("op >", ">", {Token(Token::Kind::OPERATOR, ">")}, "");
80+
TestLex("op >==|", ">==|", {Token(Token::Kind::OPERATOR, ">==|")}, "");
81+
}
82+
83+
TEST(Lexer, TestNumbers) {
84+
TestLex("number 0", "0", {Token(Token::Kind::NUMBER, "0")}, "");
85+
TestLex("number 1", "1", {Token(Token::Kind::NUMBER, "1")}, "");
86+
TestLex("number 1.0", "1.0", {Token(Token::Kind::NUMBER, "1.0")}, "");
87+
TestLex("number 0.10", "0.10", {Token(Token::Kind::NUMBER, "0.10")}, "");
88+
TestLex("number 0e100", "0e100", {Token(Token::Kind::NUMBER, "0e100")}, "");
89+
TestLex("number 1e100", "1e100", {Token(Token::Kind::NUMBER, "1e100")}, "");
90+
TestLex("number 1.1e100", "1.1e100",
91+
{Token(Token::Kind::NUMBER, "1.1e100")}, "");
92+
TestLex("number 1.1e-100", "1.1e-100",
93+
{Token(Token::Kind::NUMBER, "1.1e-100")}, "");
94+
TestLex("number 1.1e+100", "1.1e+100",
95+
{Token(Token::Kind::NUMBER, "1.1e+100")}, "");
96+
TestLex("number 0100", "0100",
97+
{Token(Token::Kind::NUMBER, "0"), Token(Token::Kind::NUMBER, "100")},
98+
"");
99+
TestLex("number 10+10", "10+10",
100+
{Token(Token::Kind::NUMBER, "10"),
101+
Token(Token::Kind::OPERATOR, "+"),
102+
Token(Token::Kind::NUMBER, "10")}, "");
103+
TestLex("number 1.+3", "1.+3", {},
104+
"number 1.+3:1:1: Couldn't lex number, junk after decimal point: +");
105+
TestLex("number 1e!", "1e!", {},
106+
"number 1e!:1:1: Couldn't lex number, junk after 'E': !");
107+
TestLex("number 1e+!", "1e+!", {},
108+
"number 1e+!:1:1: Couldn't lex number, junk after exponent sign: !");
109+
}
110+
111+
TEST(Lexer, TestDoubleStrings) {
112+
TestLex("double string \"hi\"",
113+
"\"hi\"", {Token(Token::Kind::STRING_DOUBLE, "hi")}, "");
114+
TestLex("double string \"hi nl\"",
115+
"\"hi\n\"", {Token(Token::Kind::STRING_DOUBLE, "hi\n")}, "");
116+
TestLex("double string \"hi\\\"\"",
117+
"\"hi\\\"\"", {Token(Token::Kind::STRING_DOUBLE, "hi\\\"")}, "");
118+
TestLex("double string \"hi\\nl\"",
119+
"\"hi\\\n\"", {Token(Token::Kind::STRING_DOUBLE, "hi\\\n")}, "");
120+
TestLex("double string \"hi",
121+
"\"hi", {}, "double string \"hi:1:1: Unterminated string");
122+
}
123+
124+
TEST(Lexer, TestSingleStrings) {
125+
TestLex("single string 'hi'",
126+
"'hi'", {Token(Token::Kind::STRING_SINGLE, "hi")}, "");
127+
TestLex("single string 'hi nl'",
128+
"'hi\n'", {Token(Token::Kind::STRING_SINGLE, "hi\n")}, "");
129+
TestLex("single string 'hi\\''",
130+
"'hi\\''", {Token(Token::Kind::STRING_SINGLE, "hi\\'")}, "");
131+
TestLex("single string 'hi\\nl'",
132+
"'hi\\\n'", {Token(Token::Kind::STRING_SINGLE, "hi\\\n")}, "");
133+
TestLex("single string 'hi",
134+
"'hi", {}, "single string 'hi:1:1: Unterminated string");
135+
}
136+
137+
TEST(Lexer, TestBlockStringSpaces) {
138+
const char str[] =
139+
"|||\n"
140+
" test\n"
141+
" more\n"
142+
" |||\n"
143+
" foo\n"
144+
"|||";
145+
const Token token = Token(
146+
Token::Kind::STRING_BLOCK,
147+
{},
148+
"test\n more\n|||\n foo\n",
149+
" ",
150+
"",
151+
{});
152+
TestLex("block string spaces", str, {token}, "");
153+
}
154+
155+
TEST(Lexer, TestBlockStringTabs) {
156+
const char str[] =
157+
"|||\n"
158+
"\ttest\n"
159+
"\t more\n"
160+
"\t|||\n"
161+
"\t foo\n"
162+
"|||";
163+
const Token token = Token(
164+
Token::Kind::STRING_BLOCK,
165+
{},
166+
"test\n more\n|||\n foo\n",
167+
"\t",
168+
"",
169+
{});
170+
TestLex("block string tabs", str, {token}, "");
171+
}
172+
173+
TEST(Lexer, TestBlockStringsMixed) {
174+
const char str[] =
175+
"|||\n"
176+
"\t \ttest\n"
177+
"\t \t more\n"
178+
"\t \t|||\n"
179+
"\t \t foo\n"
180+
"|||";
181+
const Token token = Token(
182+
Token::Kind::STRING_BLOCK,
183+
{},
184+
"test\n more\n|||\n foo\n",
185+
"\t \t",
186+
"",
187+
{});
188+
TestLex("block string mixed", str, {token}, "");
189+
}
190+
191+
TEST(Lexer, TestBlockStringBlanks) {
192+
const char str[] =
193+
"|||\n\n"
194+
" test\n\n\n"
195+
" more\n"
196+
" |||\n"
197+
" foo\n"
198+
"|||";
199+
const Token token = Token(
200+
Token::Kind::STRING_BLOCK,
201+
{},
202+
"\ntest\n\n\n more\n|||\n foo\n",
203+
" ",
204+
"",
205+
{});
206+
TestLex("block string blanks", str, {token}, "");
207+
}
208+
209+
TEST(Lexer, TestBlockStringBadIndent) {
210+
const char str[] =
211+
"|||\n"
212+
" test\n"
213+
" foo\n"
214+
"|||";
215+
TestLex("block string bad indent", str, {},
216+
"block string bad indent:1:1: Text block not terminated with |||");
217+
}
218+
219+
TEST(Lexer, TestBlockStringEof) {
220+
const char str[] =
221+
"|||\n"
222+
" test";
223+
TestLex("block string eof", str, {}, "block string eof:1:1: Unexpected EOF");
224+
}
225+
226+
TEST(Lexer, TestBlockStringNotTerm) {
227+
const char str[] =
228+
"|||\n"
229+
" test\n";
230+
TestLex("block string not term", str, {},
231+
"block string not term:1:1: Text block not terminated with |||");
232+
}
233+
234+
TEST(Lexer, TestBlockStringNoWs) {
235+
const char str[] =
236+
"|||\n"
237+
"test\n"
238+
"|||";
239+
TestLex("block string no ws", str, {},
240+
"block string no ws:1:1: Text block's first line must start with"
241+
" whitespace.");
242+
}
243+
244+
TEST(Lexer, TestKeywords) {
245+
TestLex("assert", "assert", {Token(Token::Kind::ASSERT, "assert")}, "");
246+
TestLex("else", "else", {Token(Token::Kind::ELSE, "else")}, "");
247+
TestLex("error", "error", {Token(Token::Kind::ERROR, "error")}, "");
248+
TestLex("false", "false", {Token(Token::Kind::FALSE, "false")}, "");
249+
TestLex("for", "for", {Token(Token::Kind::FOR, "for")}, "");
250+
TestLex("function", "function",
251+
{Token(Token::Kind::FUNCTION, "function")}, "");
252+
TestLex("if", "if", {Token(Token::Kind::IF, "if")}, "");
253+
TestLex("import", "import", {Token(Token::Kind::IMPORT, "import")}, "");
254+
TestLex("importstr", "importstr",
255+
{Token(Token::Kind::IMPORTSTR, "importstr")}, "");
256+
TestLex("in", "in", {Token(Token::Kind::IN, "in")}, "");
257+
TestLex("local", "local", {Token(Token::Kind::LOCAL, "local")}, "");
258+
TestLex("null", "null", {Token(Token::Kind::NULL_LIT, "null")}, "");
259+
TestLex("self", "self", {Token(Token::Kind::SELF, "self")}, "");
260+
TestLex("super", "super", {Token(Token::Kind::SUPER, "super")}, "");
261+
TestLex("tailstrict", "tailstrict",
262+
{Token(Token::Kind::TAILSTRICT, "tailstrict")}, "");
263+
TestLex("then", "then", {Token(Token::Kind::THEN, "then")}, "");
264+
TestLex("true", "true", {Token(Token::Kind::TRUE, "true")}, "");
265+
}
266+
267+
TEST(Lexer, TestIdentifier) {
268+
TestLex("identifier", "foobar123",
269+
{Token(Token::Kind::IDENTIFIER, "foobar123")}, "");
270+
TestLex("identifier", "foo bar123",
271+
{Token(Token::Kind::IDENTIFIER, "foo"),
272+
Token(Token::Kind::IDENTIFIER, "bar123")}, "");
273+
}
274+
275+
TEST(Lexer, TestComments) {
276+
// TODO(dzc): Test does not look at fodder yet.
277+
TestLex("c++ comment", "// hi", {}, "");
278+
TestLex("hash comment", "# hi", {}, "");
279+
TestLex("c comment", "/* hi */", {}, "");
280+
TestLex("c comment no term", "/* hi", {},
281+
"c comment no term:1:1: Multi-line comment has no terminating */.");
282+
}
283+
284+
} // namespace

0 commit comments

Comments
 (0)