|
| 1 | +// Copyright 2016 Google Inc. All rights reserved. |
| 2 | +// |
| 3 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +// you may not use this file except in compliance with the License. |
| 5 | +// You may obtain a copy of the License at |
| 6 | +// |
| 7 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +// |
| 9 | +// Unless required by applicable law or agreed to in writing, software |
| 10 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +// See the License for the specific language governing permissions and |
| 13 | +// limitations under the License. |
| 14 | + |
| 15 | +#include "lexer.h" |
| 16 | + |
| 17 | +#include <list> |
| 18 | +#include "gtest/gtest.h" |
| 19 | + |
| 20 | +namespace { |
| 21 | + |
| 22 | +void TestLex(const char* name, |
| 23 | + const char* input, |
| 24 | + const std::list<Token>& tokens, |
| 25 | + const std::string& error) { |
| 26 | + std::list<Token> test_tokens(tokens); |
| 27 | + test_tokens.push_back(Token(Token::Kind::END_OF_FILE, "")); |
| 28 | + |
| 29 | + try { |
| 30 | + std::list<Token> lexed_tokens = jsonnet_lex(name, input); |
| 31 | + ASSERT_EQ(test_tokens, lexed_tokens) |
| 32 | + << "Test failed: " << name << std::endl; |
| 33 | + } catch (StaticError& e) { |
| 34 | + ASSERT_EQ(error, e.toString()); |
| 35 | + } |
| 36 | +} |
| 37 | + |
| 38 | +TEST(Lexer, TestWhitespace) { |
| 39 | + TestLex("empty", "", {}, ""); |
| 40 | + TestLex("whitespace", " \t\n\r\r\n", {}, ""); |
| 41 | +} |
| 42 | + |
| 43 | +TEST(Lexer, TestOperators) { |
| 44 | + TestLex("brace L", "{", {Token(Token::Kind::BRACE_L, "")}, ""); |
| 45 | + TestLex("brace R", "}", {Token(Token::Kind::BRACE_R, "")}, ""); |
| 46 | + TestLex("bracket L", "[", {Token(Token::Kind::BRACKET_L, "")}, ""); |
| 47 | + TestLex("bracket R", "]", {Token(Token::Kind::BRACKET_R, "")}, ""); |
| 48 | + TestLex("colon ", ":", {Token(Token::Kind::OPERATOR, ":")}, ""); |
| 49 | + TestLex("colon 2", "::", {Token(Token::Kind::OPERATOR, "::")}, ""); |
| 50 | + TestLex("colon 2", ":::", {Token(Token::Kind::OPERATOR, ":::")}, ""); |
| 51 | + TestLex("arrow right", "->", {Token(Token::Kind::OPERATOR, "->")}, ""); |
| 52 | + TestLex("less than minus", "<-", |
| 53 | + {Token(Token::Kind::OPERATOR, "<"), |
| 54 | + Token(Token::Kind::OPERATOR, "-")}, ""); |
| 55 | + TestLex("comma", ",", {Token(Token::Kind::COMMA, "")}, ""); |
| 56 | + TestLex("dollar", "$", {Token(Token::Kind::DOLLAR, "")}, ""); |
| 57 | + TestLex("dot", ".", {Token(Token::Kind::DOT, "")}, ""); |
| 58 | + TestLex("paren L", "(", {Token(Token::Kind::PAREN_L, "")}, ""); |
| 59 | + TestLex("paren R", ")", {Token(Token::Kind::PAREN_R, "")}, ""); |
| 60 | + TestLex("semicolon", ";", {Token(Token::Kind::SEMICOLON, "")}, ""); |
| 61 | + |
| 62 | + TestLex("not 1", "!", {Token(Token::Kind::OPERATOR, "!")}, ""); |
| 63 | + TestLex("not 2", "! ", {Token(Token::Kind::OPERATOR, "!")}, ""); |
| 64 | + TestLex("not equal", "!=", {Token(Token::Kind::OPERATOR, "!=")}, ""); |
| 65 | + TestLex("tilde", "~", {Token(Token::Kind::OPERATOR, "~")}, ""); |
| 66 | + TestLex("plus", "+", {Token(Token::Kind::OPERATOR, "+")}, ""); |
| 67 | + TestLex("minus", "-", {Token(Token::Kind::OPERATOR, "-")}, ""); |
| 68 | +} |
| 69 | + |
| 70 | +TEST(Lexer, TestMiscOperators) { |
| 71 | + TestLex("op *", "*", {Token(Token::Kind::OPERATOR, "*")}, ""); |
| 72 | + TestLex("op /", "/", {Token(Token::Kind::OPERATOR, "/")}, ""); |
| 73 | + TestLex("op %", "%", {Token(Token::Kind::OPERATOR, "%")}, ""); |
| 74 | + TestLex("op &", "&", {Token(Token::Kind::OPERATOR, "&")}, ""); |
| 75 | + TestLex("op |", "|", {Token(Token::Kind::OPERATOR, "|")}, ""); |
| 76 | + TestLex("op ^", "^", {Token(Token::Kind::OPERATOR, "^")}, ""); |
| 77 | + TestLex("op =", "=", {Token(Token::Kind::OPERATOR, "=")}, ""); |
| 78 | + TestLex("op <", "<", {Token(Token::Kind::OPERATOR, "<")}, ""); |
| 79 | + TestLex("op >", ">", {Token(Token::Kind::OPERATOR, ">")}, ""); |
| 80 | + TestLex("op >==|", ">==|", {Token(Token::Kind::OPERATOR, ">==|")}, ""); |
| 81 | +} |
| 82 | + |
| 83 | +TEST(Lexer, TestNumbers) { |
| 84 | + TestLex("number 0", "0", {Token(Token::Kind::NUMBER, "0")}, ""); |
| 85 | + TestLex("number 1", "1", {Token(Token::Kind::NUMBER, "1")}, ""); |
| 86 | + TestLex("number 1.0", "1.0", {Token(Token::Kind::NUMBER, "1.0")}, ""); |
| 87 | + TestLex("number 0.10", "0.10", {Token(Token::Kind::NUMBER, "0.10")}, ""); |
| 88 | + TestLex("number 0e100", "0e100", {Token(Token::Kind::NUMBER, "0e100")}, ""); |
| 89 | + TestLex("number 1e100", "1e100", {Token(Token::Kind::NUMBER, "1e100")}, ""); |
| 90 | + TestLex("number 1.1e100", "1.1e100", |
| 91 | + {Token(Token::Kind::NUMBER, "1.1e100")}, ""); |
| 92 | + TestLex("number 1.1e-100", "1.1e-100", |
| 93 | + {Token(Token::Kind::NUMBER, "1.1e-100")}, ""); |
| 94 | + TestLex("number 1.1e+100", "1.1e+100", |
| 95 | + {Token(Token::Kind::NUMBER, "1.1e+100")}, ""); |
| 96 | + TestLex("number 0100", "0100", |
| 97 | + {Token(Token::Kind::NUMBER, "0"), Token(Token::Kind::NUMBER, "100")}, |
| 98 | + ""); |
| 99 | + TestLex("number 10+10", "10+10", |
| 100 | + {Token(Token::Kind::NUMBER, "10"), |
| 101 | + Token(Token::Kind::OPERATOR, "+"), |
| 102 | + Token(Token::Kind::NUMBER, "10")}, ""); |
| 103 | + TestLex("number 1.+3", "1.+3", {}, |
| 104 | + "number 1.+3:1:1: Couldn't lex number, junk after decimal point: +"); |
| 105 | + TestLex("number 1e!", "1e!", {}, |
| 106 | + "number 1e!:1:1: Couldn't lex number, junk after 'E': !"); |
| 107 | + TestLex("number 1e+!", "1e+!", {}, |
| 108 | + "number 1e+!:1:1: Couldn't lex number, junk after exponent sign: !"); |
| 109 | +} |
| 110 | + |
| 111 | +TEST(Lexer, TestDoubleStrings) { |
| 112 | + TestLex("double string \"hi\"", |
| 113 | + "\"hi\"", {Token(Token::Kind::STRING_DOUBLE, "hi")}, ""); |
| 114 | + TestLex("double string \"hi nl\"", |
| 115 | + "\"hi\n\"", {Token(Token::Kind::STRING_DOUBLE, "hi\n")}, ""); |
| 116 | + TestLex("double string \"hi\\\"\"", |
| 117 | + "\"hi\\\"\"", {Token(Token::Kind::STRING_DOUBLE, "hi\\\"")}, ""); |
| 118 | + TestLex("double string \"hi\\nl\"", |
| 119 | + "\"hi\\\n\"", {Token(Token::Kind::STRING_DOUBLE, "hi\\\n")}, ""); |
| 120 | + TestLex("double string \"hi", |
| 121 | + "\"hi", {}, "double string \"hi:1:1: Unterminated string"); |
| 122 | +} |
| 123 | + |
| 124 | +TEST(Lexer, TestSingleStrings) { |
| 125 | + TestLex("single string 'hi'", |
| 126 | + "'hi'", {Token(Token::Kind::STRING_SINGLE, "hi")}, ""); |
| 127 | + TestLex("single string 'hi nl'", |
| 128 | + "'hi\n'", {Token(Token::Kind::STRING_SINGLE, "hi\n")}, ""); |
| 129 | + TestLex("single string 'hi\\''", |
| 130 | + "'hi\\''", {Token(Token::Kind::STRING_SINGLE, "hi\\'")}, ""); |
| 131 | + TestLex("single string 'hi\\nl'", |
| 132 | + "'hi\\\n'", {Token(Token::Kind::STRING_SINGLE, "hi\\\n")}, ""); |
| 133 | + TestLex("single string 'hi", |
| 134 | + "'hi", {}, "single string 'hi:1:1: Unterminated string"); |
| 135 | +} |
| 136 | + |
| 137 | +TEST(Lexer, TestBlockStringSpaces) { |
| 138 | + const char str[] = |
| 139 | + "|||\n" |
| 140 | + " test\n" |
| 141 | + " more\n" |
| 142 | + " |||\n" |
| 143 | + " foo\n" |
| 144 | + "|||"; |
| 145 | + const Token token = Token( |
| 146 | + Token::Kind::STRING_BLOCK, |
| 147 | + {}, |
| 148 | + "test\n more\n|||\n foo\n", |
| 149 | + " ", |
| 150 | + "", |
| 151 | + {}); |
| 152 | + TestLex("block string spaces", str, {token}, ""); |
| 153 | +} |
| 154 | + |
| 155 | +TEST(Lexer, TestBlockStringTabs) { |
| 156 | + const char str[] = |
| 157 | + "|||\n" |
| 158 | + "\ttest\n" |
| 159 | + "\t more\n" |
| 160 | + "\t|||\n" |
| 161 | + "\t foo\n" |
| 162 | + "|||"; |
| 163 | + const Token token = Token( |
| 164 | + Token::Kind::STRING_BLOCK, |
| 165 | + {}, |
| 166 | + "test\n more\n|||\n foo\n", |
| 167 | + "\t", |
| 168 | + "", |
| 169 | + {}); |
| 170 | + TestLex("block string tabs", str, {token}, ""); |
| 171 | +} |
| 172 | + |
| 173 | +TEST(Lexer, TestBlockStringsMixed) { |
| 174 | + const char str[] = |
| 175 | + "|||\n" |
| 176 | + "\t \ttest\n" |
| 177 | + "\t \t more\n" |
| 178 | + "\t \t|||\n" |
| 179 | + "\t \t foo\n" |
| 180 | + "|||"; |
| 181 | + const Token token = Token( |
| 182 | + Token::Kind::STRING_BLOCK, |
| 183 | + {}, |
| 184 | + "test\n more\n|||\n foo\n", |
| 185 | + "\t \t", |
| 186 | + "", |
| 187 | + {}); |
| 188 | + TestLex("block string mixed", str, {token}, ""); |
| 189 | +} |
| 190 | + |
| 191 | +TEST(Lexer, TestBlockStringBlanks) { |
| 192 | + const char str[] = |
| 193 | + "|||\n\n" |
| 194 | + " test\n\n\n" |
| 195 | + " more\n" |
| 196 | + " |||\n" |
| 197 | + " foo\n" |
| 198 | + "|||"; |
| 199 | + const Token token = Token( |
| 200 | + Token::Kind::STRING_BLOCK, |
| 201 | + {}, |
| 202 | + "\ntest\n\n\n more\n|||\n foo\n", |
| 203 | + " ", |
| 204 | + "", |
| 205 | + {}); |
| 206 | + TestLex("block string blanks", str, {token}, ""); |
| 207 | +} |
| 208 | + |
| 209 | +TEST(Lexer, TestBlockStringBadIndent) { |
| 210 | + const char str[] = |
| 211 | + "|||\n" |
| 212 | + " test\n" |
| 213 | + " foo\n" |
| 214 | + "|||"; |
| 215 | + TestLex("block string bad indent", str, {}, |
| 216 | + "block string bad indent:1:1: Text block not terminated with |||"); |
| 217 | +} |
| 218 | + |
| 219 | +TEST(Lexer, TestBlockStringEof) { |
| 220 | + const char str[] = |
| 221 | + "|||\n" |
| 222 | + " test"; |
| 223 | + TestLex("block string eof", str, {}, "block string eof:1:1: Unexpected EOF"); |
| 224 | +} |
| 225 | + |
| 226 | +TEST(Lexer, TestBlockStringNotTerm) { |
| 227 | + const char str[] = |
| 228 | + "|||\n" |
| 229 | + " test\n"; |
| 230 | + TestLex("block string not term", str, {}, |
| 231 | + "block string not term:1:1: Text block not terminated with |||"); |
| 232 | +} |
| 233 | + |
| 234 | +TEST(Lexer, TestBlockStringNoWs) { |
| 235 | + const char str[] = |
| 236 | + "|||\n" |
| 237 | + "test\n" |
| 238 | + "|||"; |
| 239 | + TestLex("block string no ws", str, {}, |
| 240 | + "block string no ws:1:1: Text block's first line must start with" |
| 241 | + " whitespace."); |
| 242 | +} |
| 243 | + |
| 244 | +TEST(Lexer, TestKeywords) { |
| 245 | + TestLex("assert", "assert", {Token(Token::Kind::ASSERT, "assert")}, ""); |
| 246 | + TestLex("else", "else", {Token(Token::Kind::ELSE, "else")}, ""); |
| 247 | + TestLex("error", "error", {Token(Token::Kind::ERROR, "error")}, ""); |
| 248 | + TestLex("false", "false", {Token(Token::Kind::FALSE, "false")}, ""); |
| 249 | + TestLex("for", "for", {Token(Token::Kind::FOR, "for")}, ""); |
| 250 | + TestLex("function", "function", |
| 251 | + {Token(Token::Kind::FUNCTION, "function")}, ""); |
| 252 | + TestLex("if", "if", {Token(Token::Kind::IF, "if")}, ""); |
| 253 | + TestLex("import", "import", {Token(Token::Kind::IMPORT, "import")}, ""); |
| 254 | + TestLex("importstr", "importstr", |
| 255 | + {Token(Token::Kind::IMPORTSTR, "importstr")}, ""); |
| 256 | + TestLex("in", "in", {Token(Token::Kind::IN, "in")}, ""); |
| 257 | + TestLex("local", "local", {Token(Token::Kind::LOCAL, "local")}, ""); |
| 258 | + TestLex("null", "null", {Token(Token::Kind::NULL_LIT, "null")}, ""); |
| 259 | + TestLex("self", "self", {Token(Token::Kind::SELF, "self")}, ""); |
| 260 | + TestLex("super", "super", {Token(Token::Kind::SUPER, "super")}, ""); |
| 261 | + TestLex("tailstrict", "tailstrict", |
| 262 | + {Token(Token::Kind::TAILSTRICT, "tailstrict")}, ""); |
| 263 | + TestLex("then", "then", {Token(Token::Kind::THEN, "then")}, ""); |
| 264 | + TestLex("true", "true", {Token(Token::Kind::TRUE, "true")}, ""); |
| 265 | +} |
| 266 | + |
| 267 | +TEST(Lexer, TestIdentifier) { |
| 268 | + TestLex("identifier", "foobar123", |
| 269 | + {Token(Token::Kind::IDENTIFIER, "foobar123")}, ""); |
| 270 | + TestLex("identifier", "foo bar123", |
| 271 | + {Token(Token::Kind::IDENTIFIER, "foo"), |
| 272 | + Token(Token::Kind::IDENTIFIER, "bar123")}, ""); |
| 273 | +} |
| 274 | + |
| 275 | +TEST(Lexer, TestComments) { |
| 276 | + // TODO(dzc): Test does not look at fodder yet. |
| 277 | + TestLex("c++ comment", "// hi", {}, ""); |
| 278 | + TestLex("hash comment", "# hi", {}, ""); |
| 279 | + TestLex("c comment", "/* hi */", {}, ""); |
| 280 | + TestLex("c comment no term", "/* hi", {}, |
| 281 | + "c comment no term:1:1: Multi-line comment has no terminating */."); |
| 282 | +} |
| 283 | + |
| 284 | +} // namespace |
0 commit comments