Skip to content

Commit 8cdbbe4

Browse files
committed
SI-12290: support JDK15 text blocks in Java parser
JDK15 introduced text blocks (JEP 378) for writing multiline strings. This adds support for parsing these strings in the Java parser. The logic for interpretting the literals is a little complicated, but follows from the "3.10.6. Text Blocks" of the Java language specification. The test cases include examples from there and from the JEP. Fixes scala/bug#12290
1 parent 8a2cf63 commit 8cdbbe4

File tree

4 files changed

+275
-8
lines changed

4 files changed

+275
-8
lines changed

src/compiler/scala/tools/nsc/javac/JavaScanners.scala

Lines changed: 124 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,9 @@ trait JavaScanners extends ast.parser.ScannersCommon {
239239
*/
240240
protected def putChar(c: Char): Unit = { cbuf.append(c) }
241241

242+
/** Remove the last N characters from the buffer */
243+
private def popNChars(n: Int): Unit = if (n > 0) cbuf.setLength(cbuf.length - n)
244+
242245
/** Clear buffer and set name */
243246
private def setName(): Unit = {
244247
name = newTermName(cbuf.toString())
@@ -322,15 +325,26 @@ trait JavaScanners extends ast.parser.ScannersCommon {
322325

323326
case '\"' =>
324327
in.next()
325-
while (in.ch != '\"' && (in.isUnicode || in.ch != CR && in.ch != LF && in.ch != SU)) {
326-
getlitch()
327-
}
328-
if (in.ch == '\"') {
329-
token = STRINGLIT
330-
setName()
331-
in.next()
328+
if (in.ch != '\"') { // "..." non-empty string literal
329+
while (in.ch != '\"' && (in.isUnicode || in.ch != CR && in.ch != LF && in.ch != SU)) {
330+
getlitch()
331+
}
332+
if (in.ch == '\"') {
333+
token = STRINGLIT
334+
setName()
335+
in.next()
336+
} else {
337+
syntaxError("unclosed string literal")
338+
}
332339
} else {
333-
syntaxError("unclosed string literal")
340+
in.next()
341+
if (in.ch != '\"') { // "" empty string literal
342+
token = STRINGLIT
343+
setName()
344+
} else {
345+
in.next()
346+
getTextBlock()
347+
}
334348
}
335349
return
336350

@@ -702,6 +716,108 @@ trait JavaScanners extends ast.parser.ScannersCommon {
702716
in.next()
703717
}
704718

719+
/** read a triple-quote delimited text block, starting after the first three
720+
* double quotes
721+
*/
722+
private def getTextBlock(): Unit = {
723+
// Open delimiter is followed by optional space, then a newline
724+
while (in.ch = ' ' || in.ch == '\t' || in.ch == FF) {
725+
in.next()
726+
}
727+
if (in.ch != LF && in.ch != CR) {
728+
syntaxError("illegal text block open delimiter sequence, missing line terminator")
729+
return
730+
}
731+
in.next()
732+
733+
/* Do a lookahead scan over the full text block to:
734+
* - compute common white space prefix
735+
* - find the offset where the text block ends
736+
*/
737+
var commonWhiteSpacePrefix = Int.MaxValue
738+
var blockEndOffset = 0
739+
val backtrackTo = in.copy
740+
var blockClosed = false
741+
var lineWhiteSpacePrefix = 0
742+
var lineIsOnlyWhitespace = true
743+
while (!blockClosed && (in.isUnicode || in.ch != SU)) {
744+
if (in.ch == '\"') { // Potential end of the block
745+
in.next()
746+
if (in.ch == '\"') {
747+
in.next()
748+
if (in.ch == '\"') {
749+
blockClosed = true
750+
commonWhiteSpacePrefix = commonWhiteSpacePrefix min lineWhiteSpacePrefix
751+
blockEndOffset = in.cpos - 2
752+
}
753+
}
754+
755+
// Not the end of the block - just a single or double " character
756+
if (!blockClosed) {
757+
lineIsOnlyWhitespace = false
758+
}
759+
} else if (in.ch == CR || in.ch == LF) { // new line in the block
760+
in.next()
761+
if (!lineIsOnlyWhitespace) {
762+
commonWhiteSpacePrefix = commonWhiteSpacePrefix min lineWhiteSpacePrefix
763+
}
764+
lineWhiteSpacePrefix = 0
765+
lineIsOnlyWhitespace = true
766+
} else if (lineIsOnlyWhitespace && isWhitespace(in.ch)) { // extend white space prefix
767+
in.next()
768+
lineWhiteSpacePrefix += 1
769+
} else {
770+
lineIsOnlyWhitespace = false
771+
getlitch()
772+
}
773+
}
774+
setName() // clear the literal buffer
775+
776+
// Bail out if the block never did have an end
777+
if (!blockClosed) {
778+
syntaxError("unclosed text block")
779+
return
780+
}
781+
782+
// Second pass: construct the literal string value this time
783+
in = backtrackTo
784+
while (in.cpos < blockEndOffset) {
785+
// Drop the line's leading whitespace
786+
var remainingPrefix = commonWhiteSpacePrefix
787+
while (remainingPrefix > 0 && in.ch != CR && in.ch != LF && in.cpos < blockEndOffset) {
788+
in.next()
789+
remainingPrefix -= 1
790+
}
791+
792+
var trailingWhitespaceLength = 0
793+
while (in.ch != CR && in.ch != LF && in.cpos < blockEndOffset) {
794+
if (isWhitespace(in.ch)) {
795+
trailingWhitespaceLength += 1
796+
} else {
797+
trailingWhitespaceLength = 0
798+
}
799+
getlitch()
800+
}
801+
802+
// Drop the line's trailing whitespace
803+
popNChars(trailingWhitespaceLength)
804+
805+
// Normalize line terminators
806+
if (in.ch == CR || in.ch == LF) {
807+
in.next()
808+
putChar('\n')
809+
}
810+
}
811+
812+
token = STRINGLIT
813+
setName()
814+
815+
// Trailing """
816+
in.next()
817+
in.next()
818+
in.next()
819+
}
820+
705821
/** read fractional part and exponent of floating point number
706822
* if one is present.
707823
*/

test/files/run/t12290.check

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
====
2+
A text
3+
4+
====
5+
<html>
6+
<body>
7+
<p>Hello, world</p>
8+
</body>
9+
</html>
10+
11+
====
12+
SELECT "EMP_ID", "LAST_NAME" FROM "EMPLOYEE_TB"
13+
WHERE "CITY" = 'INDIANAPOLIS'
14+
ORDER BY "EMP_ID", "LAST_NAME";
15+
16+
====
17+
<html>
18+
<body>
19+
<p>Hello, world</p>
20+
</body>
21+
</html>
22+
23+
====
24+
<html>
25+
<body>
26+
<p>Hello, world</p>
27+
</body>
28+
</html>
29+
30+
====
31+
<html>
32+
<body>
33+
<p>Hello, world</p>
34+
</body>
35+
36+
</html>
37+
38+
====
39+
<html>
40+
41+
<body>
42+
<p>Hello, world</p>
43+
</body>
44+
</html>
45+
46+
====
47+
String text = """
48+
A text block inside a text block
49+
""";
50+
51+
====
52+
foo bar
53+
baz
54+
====
55+
56+
====

test/files/run/t12290/Test.scala

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/* Using `valueOf` is a way to check that the Java string literals were properly
2+
* parsed, since the parsed value is what the Scala compiler will use when
3+
* resolving the singleton types
4+
*/
5+
object Test extends App {
6+
println("====")
7+
println(valueOf[TextBlocks.aText.type])
8+
println("====")
9+
println(valueOf[TextBlocks.html1.type])
10+
println("====")
11+
println(valueOf[TextBlocks.query.type])
12+
println("====")
13+
println(valueOf[TextBlocks.html2.type])
14+
println("====")
15+
println(valueOf[TextBlocks.html3.type])
16+
println("====")
17+
println(valueOf[TextBlocks.html4.type])
18+
println("====")
19+
println(valueOf[TextBlocks.html5.type])
20+
println("====")
21+
println(valueOf[TextBlocks.code.type])
22+
println("====")
23+
println(valueOf[TextBlocks.simpleString.type])
24+
println("====")
25+
println(valueOf[TextBlocks.emptyString.type])
26+
println("====")
27+
}

test/files/run/t12290/TextBlocks.java

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
class TextBlocks {
2+
3+
final static String aText = """
4+
A text
5+
""";
6+
7+
final static String html1 = """
8+
<html>
9+
<body>
10+
<p>Hello, world</p>
11+
</body>
12+
</html>
13+
""";
14+
15+
// quote characters are unescaped
16+
final static String query = """
17+
SELECT "EMP_ID", "LAST_NAME" FROM "EMPLOYEE_TB"
18+
WHERE "CITY" = 'INDIANAPOLIS'
19+
ORDER BY "EMP_ID", "LAST_NAME";
20+
""";
21+
22+
// incidental trailing spaces
23+
final static String html2 = """
24+
<html>
25+
<body>
26+
<p>Hello, world</p>
27+
</body>
28+
</html>
29+
""";
30+
31+
// trailing delimiter influences
32+
final static String html3 = """
33+
<html>
34+
<body>
35+
<p>Hello, world</p>
36+
</body>
37+
</html>
38+
""";
39+
40+
// blank line does not affect
41+
final static String html4 = """
42+
<html>
43+
<body>
44+
<p>Hello, world</p>
45+
</body>
46+
47+
</html>
48+
""";
49+
50+
// escape sequences
51+
final static String html5 = """
52+
<html>\n
53+
<body>
54+
<p>Hello,\tworld</p>
55+
</body>
56+
</html>
57+
""";
58+
final static String code =
59+
"""
60+
String text = \"""
61+
A text block inside a text block
62+
\""";
63+
""";
64+
65+
final static String simpleString = "foo\tbar\nbaz";
66+
67+
final static String emptyString = "";
68+
}

0 commit comments

Comments
 (0)