Skip to content

Commit fef77da

Browse files
committed
Add Unicode support, fix #1
1 parent 78c8771 commit fef77da

11 files changed

+382
-213
lines changed

core/ast.h

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ limitations under the License.
2525
#include <vector>
2626

2727
#include "core/lexer.h"
28+
#include "core/string.h"
2829

2930
enum ASTType {
3031
AST_APPLY,
@@ -51,18 +52,17 @@ enum ASTType {
5152
AST_VAR
5253
};
5354

54-
5555
/** Represents a variable / parameter / field name. */
5656
struct Identifier {
57-
std::string name;
58-
Identifier(const std::string &name)
57+
String name;
58+
Identifier(const String &name)
5959
: name(name)
6060
{ }
6161
};
6262

6363
static inline std::ostream &operator<<(std::ostream &o, const Identifier *id)
6464
{
65-
o << id->name;
65+
o << encode_utf8(id->name);
6666
return o;
6767
}
6868

@@ -213,16 +213,16 @@ struct Function : public AST {
213213

214214
/** Represents import "file". */
215215
struct Import : public AST {
216-
std::string file;
217-
Import(const LocationRange &lr, const std::string &file)
216+
String file;
217+
Import(const LocationRange &lr, const String &file)
218218
: AST(lr, AST_IMPORT), file(file)
219219
{ }
220220
};
221221

222222
/** Represents importstr "file". */
223223
struct Importstr : public AST {
224-
std::string file;
225-
Importstr(const LocationRange &lr, const std::string &file)
224+
String file;
225+
Importstr(const LocationRange &lr, const String &file)
226226
: AST(lr, AST_IMPORTSTR), file(file)
227227
{ }
228228
};
@@ -271,8 +271,8 @@ struct LiteralNumber : public AST {
271271

272272
/** Represents JSON strings. */
273273
struct LiteralString : public AST {
274-
std::string value;
275-
LiteralString(const LocationRange &lr, const std::string &value)
274+
String value;
275+
LiteralString(const LocationRange &lr, const String &value)
276276
: AST(lr, AST_LITERAL_STRING), value(value)
277277
{ }
278278
};
@@ -373,7 +373,7 @@ struct Var : public AST {
373373
/** Allocates ASTs on demand, frees them in its destructor.
374374
*/
375375
class Allocator {
376-
std::map<std::string, const Identifier*> internedIdentifiers;
376+
std::map<String, const Identifier*> internedIdentifiers;
377377
std::vector<AST*> allocated;
378378
public:
379379
template <class T, class... Args> T* make(Args&&... args)
@@ -386,7 +386,7 @@ class Allocator {
386386
*
387387
* The location used in the Identifier AST is that of the first one parsed.
388388
*/
389-
const Identifier *makeIdentifier(const std::string &name)
389+
const Identifier *makeIdentifier(const String &name)
390390
{
391391
auto it = internedIdentifiers.find(name);
392392
if (it != internedIdentifiers.end()) {

core/lexer.cpp

Lines changed: 4 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,9 @@ limitations under the License.
1919
#include <string>
2020
#include <sstream>
2121

22-
#include "core/static_error.h"
2322
#include "core/lexer.h"
23+
#include "core/static_error.h"
24+
#include "core/string.h"
2425

2526
static bool is_upper(char c)
2627
{
@@ -411,23 +412,8 @@ std::list<Token> jsonnet_lex(const std::string &filename, const char *input)
411412
codepoint += digit;
412413
}
413414

414-
// Encode in UTF-8.
415-
if (codepoint < 0x0080) {
416-
data += codepoint;
417-
} else {
418-
auto msg = "Codepoint out of ascii range.";
419-
throw StaticError(filename, begin, msg);
420-
}
421-
/*
422-
} else if (codepoint < 0x0800) {
423-
data += 0xC0 | (codepoint >> 6);
424-
data += 0x80 | (codepoint & 0x3F);
425-
} else {
426-
data += 0xE0 | (codepoint >> 12);
427-
data += 0x80 | ((codepoint >> 6) & 0x3F);
428-
data += 0x80 | (codepoint & 0x3F);
429-
}
430-
*/
415+
encode_utf8(codepoint, data);
416+
431417
// Leave us on the last char, ready for the ++c at
432418
// the outer for loop.
433419
c += 3;

core/lexer.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ limitations under the License.
2323
#include <list>
2424
#include <sstream>
2525

26+
#include "core/string.h"
2627
#include "core/static_error.h"
2728

2829
struct Token {
@@ -71,6 +72,8 @@ struct Token {
7172

7273
std::string data;
7374

75+
String data32(void) { return decode_utf8(data); }
76+
7477
LocationRange location;
7578

7679
Token(Kind kind, const std::string &data, const LocationRange &location)

0 commit comments

Comments
 (0)