Skip to content

Commit 13d9f67

Browse files
committed
SI-12290: support JDK15 text blocks in Java parser
JDK15 introduced text blocks (JEP 378) for writing multiline strings. This adds support for parsing these strings in the Java parser. The logic for interpretting the literals is a little complicated, but follows from the "3.10.6. Text Blocks" of the Java language specification. The test cases include examples from there and from the JEP. Fixes scala/bug#12290
1 parent 8a2cf63 commit 13d9f67

File tree

4 files changed

+288
-8
lines changed

4 files changed

+288
-8
lines changed

src/compiler/scala/tools/nsc/javac/JavaScanners.scala

Lines changed: 138 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,9 @@ trait JavaScanners extends ast.parser.ScannersCommon {
239239
*/
240240
protected def putChar(c: Char): Unit = { cbuf.append(c) }
241241

242+
/** Remove the last N characters from the buffer */
243+
private def popNChars(n: Int): Unit = if (n > 0) cbuf.setLength(cbuf.length - n)
244+
242245
/** Clear buffer and set name */
243246
private def setName(): Unit = {
244247
name = newTermName(cbuf.toString())
@@ -322,15 +325,26 @@ trait JavaScanners extends ast.parser.ScannersCommon {
322325

323326
case '\"' =>
324327
in.next()
325-
while (in.ch != '\"' && (in.isUnicode || in.ch != CR && in.ch != LF && in.ch != SU)) {
326-
getlitch()
327-
}
328-
if (in.ch == '\"') {
329-
token = STRINGLIT
330-
setName()
331-
in.next()
328+
if (in.ch != '\"') { // "..." non-empty string literal
329+
while (in.ch != '\"' && (in.isUnicode || in.ch != CR && in.ch != LF && in.ch != SU)) {
330+
getlitch()
331+
}
332+
if (in.ch == '\"') {
333+
token = STRINGLIT
334+
setName()
335+
in.next()
336+
} else {
337+
syntaxError("unclosed string literal")
338+
}
332339
} else {
333-
syntaxError("unclosed string literal")
340+
in.next()
341+
if (in.ch != '\"') { // "" empty string literal
342+
token = STRINGLIT
343+
setName()
344+
} else {
345+
in.next()
346+
getTextBlock()
347+
}
334348
}
335349
return
336350

@@ -691,6 +705,8 @@ trait JavaScanners extends ast.parser.ScannersCommon {
691705
case '\"' => putChar('\"')
692706
case '\'' => putChar('\'')
693707
case '\\' => putChar('\\')
708+
case 's' => putChar(' ') // specific to text blocks
709+
case CR | LF => // specific to text blocks
694710
case _ =>
695711
syntaxError(in.cpos - 1, "invalid escape character")
696712
putChar(in.ch)
@@ -702,6 +718,120 @@ trait JavaScanners extends ast.parser.ScannersCommon {
702718
in.next()
703719
}
704720

721+
/** read a triple-quote delimited text block, starting after the first three
722+
* double quotes
723+
*/
724+
private def getTextBlock(): Unit = {
725+
// Open delimiter is followed by optional space, then a newline
726+
while (in.ch == ' ' || in.ch == '\t' || in.ch == FF) {
727+
in.next()
728+
}
729+
if (in.ch != LF && in.ch != CR) { // CR-LF is already normalized into LF by `JavaCharArrayReader`
730+
syntaxError("illegal text block open delimiter sequence, missing line terminator")
731+
return
732+
}
733+
in.next()
734+
735+
/* Do a lookahead scan over the full text block to:
736+
* - compute common white space prefix
737+
* - find the offset where the text block ends
738+
*/
739+
var commonWhiteSpacePrefix = Int.MaxValue
740+
var blockEndOffset = 0
741+
val backtrackTo = in.copy
742+
var blockClosed = false
743+
var lineWhiteSpacePrefix = 0
744+
var lineIsOnlyWhitespace = true
745+
while (!blockClosed && (in.isUnicode || in.ch != SU)) {
746+
if (in.ch == '\"') { // Potential end of the block
747+
in.next()
748+
if (in.ch == '\"') {
749+
in.next()
750+
if (in.ch == '\"') {
751+
blockClosed = true
752+
commonWhiteSpacePrefix = commonWhiteSpacePrefix min lineWhiteSpacePrefix
753+
blockEndOffset = in.cpos - 2
754+
}
755+
}
756+
757+
// Not the end of the block - just a single or double " character
758+
if (!blockClosed) {
759+
lineIsOnlyWhitespace = false
760+
}
761+
} else if (in.ch == CR || in.ch == LF) { // new line in the block
762+
in.next()
763+
if (!lineIsOnlyWhitespace) {
764+
commonWhiteSpacePrefix = commonWhiteSpacePrefix min lineWhiteSpacePrefix
765+
}
766+
lineWhiteSpacePrefix = 0
767+
lineIsOnlyWhitespace = true
768+
} else if (lineIsOnlyWhitespace &&
769+
(in.ch == ' ' || in.ch == '\t' || in.ch == FF)) { // extend white space prefix
770+
in.next()
771+
lineWhiteSpacePrefix += 1
772+
} else {
773+
lineIsOnlyWhitespace = false
774+
getlitch()
775+
}
776+
}
777+
setName() // clear the literal buffer
778+
779+
// Bail out if the block never did have an end
780+
if (!blockClosed) {
781+
syntaxError("unclosed text block")
782+
return
783+
}
784+
785+
// Second pass: construct the literal string value this time
786+
in = backtrackTo
787+
while (in.cpos < blockEndOffset) {
788+
// Drop the line's leading whitespace
789+
var remainingPrefix = commonWhiteSpacePrefix
790+
while (remainingPrefix > 0 && in.ch != CR && in.ch != LF && in.cpos < blockEndOffset) {
791+
in.next()
792+
remainingPrefix -= 1
793+
}
794+
795+
var trailingWhitespaceLength = 0
796+
var escapedNewline = false // Does the line end with `\`?
797+
while (in.ch != CR && in.ch != LF && in.cpos < blockEndOffset && !escapedNewline) {
798+
if (isWhitespace(in.ch)) {
799+
trailingWhitespaceLength += 1
800+
} else {
801+
trailingWhitespaceLength = 0
802+
}
803+
804+
// Detect if the line is about to end with `\`
805+
if (in.ch == '\\' && {
806+
val lookahead = in.copy
807+
lookahead.next()
808+
lookahead.ch == CR || lookahead.ch == LF
809+
}) {
810+
escapedNewline = true
811+
}
812+
813+
getlitch()
814+
}
815+
816+
// Drop the line's trailing whitespace
817+
popNChars(trailingWhitespaceLength)
818+
819+
// Normalize line terminators
820+
if ((in.ch == CR || in.ch == LF) && !escapedNewline) {
821+
in.next()
822+
putChar('\n')
823+
}
824+
}
825+
826+
token = STRINGLIT
827+
setName()
828+
829+
// Trailing """
830+
in.next()
831+
in.next()
832+
in.next()
833+
}
834+
705835
/** read fractional part and exponent of floating point number
706836
* if one is present.
707837
*/

test/files/run/t12290.check

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
====
2+
A text
3+
4+
====
5+
<html>
6+
<body>
7+
<p>Hello, world</p>
8+
</body>
9+
</html>
10+
11+
====
12+
SELECT "EMP_ID", "LAST_NAME" FROM "EMPLOYEE_TB"
13+
WHERE "CITY" = 'INDIANAPOLIS'
14+
ORDER BY "EMP_ID", "LAST_NAME";
15+
16+
====
17+
<html>
18+
<body>
19+
<p>Hello, world</p>
20+
</body>
21+
</html>
22+
23+
====
24+
<html>
25+
<body>
26+
<p>Hello, world</p>
27+
</body>
28+
</html>
29+
30+
====
31+
<html>
32+
<body>
33+
<p>Hello, world</p>
34+
</body>
35+
36+
</html>
37+
38+
====
39+
<html>
40+
41+
<body> <p>Hello , world</p>
42+
</body>
43+
</html>
44+
45+
====
46+
String text = """
47+
A text block inside a text block
48+
""";
49+
50+
====
51+
foo bar
52+
baz
53+
====
54+
55+
====

test/files/run/t12290/Test.scala

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/* Using `valueOf` is a way to check that the Java string literals were properly
2+
* parsed, since the parsed value is what the Scala compiler will use when
3+
* resolving the singleton types
4+
*/
5+
object Test extends App {
6+
println("====")
7+
println(valueOf[TextBlocks.aText.type])
8+
println("====")
9+
println(valueOf[TextBlocks.html1.type])
10+
println("====")
11+
println(valueOf[TextBlocks.query.type])
12+
println("====")
13+
println(valueOf[TextBlocks.html2.type])
14+
println("====")
15+
println(valueOf[TextBlocks.html3.type])
16+
println("====")
17+
println(valueOf[TextBlocks.html4.type])
18+
println("====")
19+
println(valueOf[TextBlocks.html5.type])
20+
println("====")
21+
println(valueOf[TextBlocks.code.type])
22+
println("====")
23+
println(valueOf[TextBlocks.simpleString.type])
24+
println("====")
25+
println(valueOf[TextBlocks.emptyString.type])
26+
println("====")
27+
}

test/files/run/t12290/TextBlocks.java

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
class TextBlocks {
2+
3+
final static String aText = """
4+
A text
5+
""";
6+
7+
final static String html1 = """
8+
<html>
9+
<body>
10+
<p>Hello, world</p>
11+
</body>
12+
</html>
13+
""";
14+
15+
// quote characters are unescaped
16+
final static String query = """
17+
SELECT "EMP_ID", "LAST_NAME" FROM "EMPLOYEE_TB"
18+
WHERE "CITY" = 'INDIANAPOLIS'
19+
ORDER BY "EMP_ID", "LAST_NAME";
20+
""";
21+
22+
// incidental trailing spaces
23+
final static String html2 = """
24+
<html>
25+
<body>
26+
<p>Hello, world</p>
27+
</body>
28+
</html>
29+
""";
30+
31+
// trailing delimiter influences
32+
final static String html3 = """
33+
<html>
34+
<body>
35+
<p>Hello, world</p>
36+
</body>
37+
</html>
38+
""";
39+
40+
// blank line does not affect
41+
final static String html4 = """
42+
<html>
43+
<body>
44+
<p>Hello, world</p>
45+
</body>
46+
47+
</html>
48+
""";
49+
50+
// escape sequences
51+
final static String html5 = """
52+
<html>\n
53+
<body>\
54+
<p>Hello\s,\tworld</p>
55+
</body>
56+
</html>
57+
""";
58+
final static String code =
59+
"""
60+
String text = \"""
61+
A text block inside a text block
62+
\""";
63+
""";
64+
65+
final static String simpleString = "foo\tbar\nbaz";
66+
67+
final static String emptyString = "";
68+
}

0 commit comments

Comments
 (0)