Skip to content

Commit a36fe10

Browse files
committed
Implement verbatim string literals
Fixes google#246
1 parent 8db69ed commit a36fe10

File tree

11 files changed

+120
-2
lines changed

11 files changed

+120
-2
lines changed

core/ast.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -473,7 +473,7 @@ struct LiteralNumber : public AST {
473473
/** Represents JSON strings. */
474474
struct LiteralString : public AST {
475475
String value;
476-
enum TokenKind { SINGLE, DOUBLE, BLOCK };
476+
enum TokenKind { SINGLE, DOUBLE, BLOCK, VERBATIM_SINGLE, VERBATIM_DOUBLE };
477477
TokenKind tokenKind;
478478
std::string blockIndent; // Only contains ' ' and '\t'.
479479
std::string blockTermIndent; // Only contains ' ' and '\t'.

core/desugarer.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -658,7 +658,9 @@ class Desugarer {
658658
// Nothing to do.
659659

660660
} else if (auto *ast = dynamic_cast<LiteralString*>(ast_)) {
661-
if (ast->tokenKind != LiteralString::BLOCK) {
661+
if ((ast->tokenKind != LiteralString::BLOCK) &&
662+
(ast->tokenKind != LiteralString::VERBATIM_DOUBLE) &&
663+
(ast->tokenKind != LiteralString::VERBATIM_SINGLE)) {
662664
ast->value = jsonnet_string_unescape(ast->location, ast->value);
663665
}
664666
ast->tokenKind = LiteralString::DOUBLE;

core/formatter.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -464,6 +464,30 @@ class Unparser {
464464
}
465465
}
466466
o << ast->blockTermIndent << "|||";
467+
} else if (ast->tokenKind == LiteralString::VERBATIM_DOUBLE) {
468+
o << "@\"";
469+
for (const char32_t *cp = ast->value.c_str() ; *cp != U'\0' ; ++cp) {
470+
if (*cp == U'"') {
471+
o << "\"\"";
472+
} else {
473+
std::string utf8;
474+
encode_utf8(*cp, utf8);
475+
o << utf8;
476+
}
477+
}
478+
o << "\"";
479+
} else if (ast->tokenKind == LiteralString::VERBATIM_SINGLE) {
480+
o << "@'";
481+
for (const char32_t *cp = ast->value.c_str() ; *cp != U'\0' ; ++cp) {
482+
if (*cp == U'\'') {
483+
o << "''";
484+
} else {
485+
std::string utf8;
486+
encode_utf8(*cp, utf8);
487+
o << utf8;
488+
}
489+
}
490+
o << "'";
467491
}
468492

469493
} else if (dynamic_cast<const LiteralNull*>(ast_)) {
@@ -613,6 +637,8 @@ class EnforceStringStyle : public FmtPass {
613637
void visit(LiteralString *lit)
614638
{
615639
if (lit->tokenKind == LiteralString::BLOCK) return;
640+
if (lit->tokenKind == LiteralString::VERBATIM_DOUBLE) return;
641+
if (lit->tokenKind == LiteralString::VERBATIM_SINGLE) return;
616642
String canonical = jsonnet_string_unescape(lit->location, lit->value);
617643
unsigned num_single = 0, num_double = 0;
618644
for (char32_t c : canonical) {

core/lexer.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -507,6 +507,38 @@ Tokens jsonnet_lex(const std::string &filename, const char *input)
507507
}
508508
break;
509509

510+
// Verbatim string literals.
511+
case '@': {
512+
c++;
513+
if (*c != '"' && *c != '\'') {
514+
std::stringstream ss;
515+
ss << "Couldn't lex verbatim string, junk after '@': " << *c;
516+
throw StaticError(filename, begin, ss.str());
517+
}
518+
const char quot = *c;
519+
c++; // Advance beyond the opening quote.
520+
for (; ; ++c) {
521+
if (*c == '\0') {
522+
throw StaticError(filename, begin, "Unterminated verbatim string");
523+
}
524+
if (*c == quot) {
525+
if (*(c+1) == quot) {
526+
c++;
527+
} else {
528+
break;
529+
}
530+
}
531+
data += *c;
532+
}
533+
c++; // Advance beyond the closing quote.
534+
if (quot == '"') {
535+
kind = Token::VERBATIM_STRING_DOUBLE;
536+
} else {
537+
kind = Token::VERBATIM_STRING_SINGLE;
538+
}
539+
}
540+
break;
541+
510542
// Keywords
511543
default:
512544
if (is_identifier_first(*c)) {

core/lexer.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,8 @@ struct Token {
102102
STRING_DOUBLE,
103103
STRING_SINGLE,
104104
STRING_BLOCK,
105+
VERBATIM_STRING_SINGLE,
106+
VERBATIM_STRING_DOUBLE,
105107

106108
// Keywords
107109
ASSERT,
@@ -176,6 +178,8 @@ struct Token {
176178
case OPERATOR: return "OPERATOR";
177179
case STRING_SINGLE: return "STRING_SINGLE";
178180
case STRING_DOUBLE: return "STRING_DOUBLE";
181+
case VERBATIM_STRING_SINGLE: return "VERBATIM_STRING_SINGLE";
182+
case VERBATIM_STRING_DOUBLE: return "VERBATIM_STRING_DOUBLE";
179183
case STRING_BLOCK: return "STRING_BLOCK";
180184

181185
case ASSERT: return "assert";

core/lexer_test.cpp

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,38 @@ TEST(Lexer, TestSingleStrings)
140140
"'hi", {}, "single string 'hi:1:1: Unterminated string");
141141
}
142142

143+
TEST(Lexer, TestVerbatimDoubleStrings)
144+
{
145+
testLex("verbatim double string @\"hi\"",
146+
"@\"hi\"", {Token(Token::Kind::VERBATIM_STRING_DOUBLE, "hi")}, "");
147+
testLex("verbatim double string @\"hi nl\"",
148+
"@\"hi\n\"", {Token(Token::Kind::VERBATIM_STRING_DOUBLE, "hi\n")}, "");
149+
testLex("verbatim double string @\"hi\\\"",
150+
"@\"hi\\\"", {Token(Token::Kind::VERBATIM_STRING_DOUBLE, "hi\\")}, "");
151+
testLex("verbatim double string @\"hi\\\\\"",
152+
"@\"hi\\\\\"", {Token(Token::Kind::VERBATIM_STRING_DOUBLE, "hi\\\\")}, "");
153+
testLex("verbatim double string @\"hi\"\"\"",
154+
"@\"hi\"\"\"", {Token(Token::Kind::VERBATIM_STRING_DOUBLE, "hi\"")}, "");
155+
testLex("verbatim double string @\"\"\"hi\"",
156+
"@\"\"\"hi\"", {Token(Token::Kind::VERBATIM_STRING_DOUBLE, "\"hi")}, "");
157+
}
158+
159+
TEST(Lexer, TestVerbatimSingleStrings)
160+
{
161+
testLex("verbatim single string @'hi'",
162+
"@'hi'", {Token(Token::Kind::VERBATIM_STRING_SINGLE, "hi")}, "");
163+
testLex("verbatim single string @'hi nl'",
164+
"@'hi\n'", {Token(Token::Kind::VERBATIM_STRING_SINGLE, "hi\n")}, "");
165+
testLex("verbatim single string @'hi\\'",
166+
"@'hi\\'", {Token(Token::Kind::VERBATIM_STRING_SINGLE, "hi\\")}, "");
167+
testLex("verbatim single string @'hi\\\\'",
168+
"@'hi\\\\'", {Token(Token::Kind::VERBATIM_STRING_SINGLE, "hi\\\\")}, "");
169+
testLex("verbatim single string @'hi'''",
170+
"@'hi'''", {Token(Token::Kind::VERBATIM_STRING_SINGLE, "hi'")}, "");
171+
testLex("verbatim single string @'''hi'",
172+
"@'''hi'", {Token(Token::Kind::VERBATIM_STRING_SINGLE, "'hi")}, "");
173+
}
174+
143175
TEST(Lexer, TestBlockStringSpaces)
144176
{
145177
const char str[] =

core/parser.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -634,6 +634,13 @@ class Parser {
634634
return alloc->make<LiteralString>(
635635
span(tok), tok.fodder, tok.data32(), LiteralString::BLOCK,
636636
tok.stringBlockIndent, tok.stringBlockTermIndent);
637+
case Token::VERBATIM_STRING_SINGLE:
638+
return alloc->make<LiteralString>(
639+
span(tok), tok.fodder, tok.data32(), LiteralString::VERBATIM_SINGLE, "", "");
640+
case Token::VERBATIM_STRING_DOUBLE:
641+
return alloc->make<LiteralString>(
642+
span(tok), tok.fodder, tok.data32(), LiteralString::VERBATIM_DOUBLE, "", "");
643+
637644

638645
case Token::FALSE:
639646
return alloc->make<LiteralBoolean>(span(tok), tok.fodder, false);

doc/language/spec.html

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,10 @@ <h2 id="lexing">Lexing</h2>
130130
<code>"</code> </li>
131131
<li>Single-quoted, beginning with <code>'</code> and ending with the first subsequent non-quoted
132132
<code>'</code> </li>
133+
<li>Double-quoted verbatim, beginning with <code>@"</code> and ending with the first subsequent
134+
non-doubled <code>"</code> </li>
135+
<li>Single-quoted verbatim, beginning with <code>@'</code> and ending with the first subsequent
136+
non-doubled <code>'</code> </li>
133137
<li>Text block, beginning with <code>|||</code>, followed by optional whitespace and a new-line.
134138
The next line must be prefixed with some non-zero length whitespace <i>W</i>. The block ends at the
135139
first subsequent line that does not begin with <i>W</i>, and it is an error if this line does not
@@ -143,6 +147,11 @@ <h2 id="lexing">Lexing</h2>
143147
characters: <code>"'\bfnrt0</code> which have their standard meanings, as well as
144148
<code>\uXXXX</code> for hexadecimal unicode escapes.</p>
145149

150+
<p>Verbatim strings eschew all of the normal string escaping, including hexidecimal unicode escapes.
151+
Every character in a verbatim string is processed literally, with the exception of doubled
152+
end-quotes. Within a verbatim single-quoted string, <code>''</code> is processed as <code>'</code>,
153+
and a verbatim double-quoted string, <code>""</code> is processed as <code>"</code>.</p>
154+
146155
<p>In the rest of this specification, the string is assumed to be canonicalized into a sequence of
147156
unicode codepoints with no record of the original quoting form as well and any escape characters
148157
removed. </p>

test_suite/unparse.jsonnet

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ limitations under the License.
2222
zero: 0,
2323
string: "'foo\n bar\n\n\"bar\u0005\"\'\t \u0050\b\f\r\\",
2424
string2: '"foo\n bar\n\n\'bar\u0005\"\'\t \u0050\b\f\r\\',
25+
string3: @'"foo\n bar\n\n''bar\u0005\"''\t \u0050\b\f\r\\',
26+
string4: @"'foo\n bar\n\n'bar\u0005""'\t \u0050\b\f\r\\",
2527
"lit_field1": 1,
2628
'lit_field2': 1,
2729
"false": false,

test_suite/unparse.jsonnet.fmt.golden

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ limitations under the License.
2222
zero: 0,
2323
string: "'foo\n bar\n\n\"bar\u0005\"\'\t \u0050\b\f\r\\",
2424
string2: '"foo\n bar\n\n\'bar\u0005\"\'\t \u0050\b\f\r\\',
25+
string3: @'"foo\n bar\n\n''bar\u0005\"''\t \u0050\b\f\r\\',
26+
string4: @"'foo\n bar\n\n'bar\u0005""'\t \u0050\b\f\r\\",
2527
lit_field1: 1,
2628
lit_field2: 1,
2729
"false": false,

test_suite/unparse.jsonnet.golden

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
"small_number": 1e-14,
1010
"string": "'foo\n bar\n\n\"bar\u0005\"'\t P\b\f\r\\",
1111
"string2": "\"foo\n bar\n\n'bar\u0005\"'\t P\b\f\r\\",
12+
"string3": "\"foo\\n bar\\n\\n'bar\\u0005\\\"'\\t \\u0050\\b\\f\\r\\\\",
13+
"string4": "'foo\\n bar\\n\\n'bar\\u0005\"'\\t \\u0050\\b\\f\\r\\\\",
1214
"true": true,
1315
"with\"quote": "\"",
1416
"zero": 0

0 commit comments

Comments
 (0)