diff --git a/compiler/src/dotty/tools/backend/jvm/BCodeBodyBuilder.scala b/compiler/src/dotty/tools/backend/jvm/BCodeBodyBuilder.scala index 47b2d3a69a10..334c47f3511d 100644 --- a/compiler/src/dotty/tools/backend/jvm/BCodeBodyBuilder.scala +++ b/compiler/src/dotty/tools/backend/jvm/BCodeBodyBuilder.scala @@ -1063,30 +1063,109 @@ trait BCodeBodyBuilder extends BCodeSkelBuilder { } } + /* Generate string concatenation + * + * On JDK 8: create and append using `StringBuilder` + * On JDK 9+: use `invokedynamic` with `StringConcatFactory` + */ def genStringConcat(tree: Tree): BType = { lineNumber(tree) liftStringConcat(tree) match { - // Optimization for expressions of the form "" + x. We can avoid the StringBuilder. + // Optimization for expressions of the form "" + x case List(Literal(Constant("")), arg) => genLoad(arg, ObjectReference) genCallMethod(defn.String_valueOf_Object, InvokeStyle.Static) case concatenations => - bc.genStartConcat - for (elem <- concatenations) { - val loadedElem = elem match { + val concatArguments = concatenations.view + .filter { + case Literal(Constant("")) => false // empty strings are no-ops in concatenation + case _ => true + } + .map { case Apply(boxOp, value :: Nil) if Erasure.Boxing.isBox(boxOp.symbol) && boxOp.symbol.denot.owner != defn.UnitModuleClass => // Eliminate boxing of primitive values. Boxing is introduced by erasure because // there's only a single synthetic `+` method "added" to the string class. value + case other => other + } + .toList + + // `StringConcatFactory` only got added in JDK 9, so use `StringBuilder` for lower + if (classfileVersion < asm.Opcodes.V9) { + + // Estimate capacity needed for the string builder + val approxBuilderSize = concatArguments.view.map { + case Literal(Constant(s: String)) => s.length + case Literal(c @ Constant(_)) if c.isNonUnitAnyVal => String.valueOf(c).length + case _ => 0 + }.sum + bc.genNewStringBuilder(approxBuilderSize) + + for (elem <- concatArguments) { + val elemType = tpeTK(elem) + genLoad(elem, elemType) + bc.genStringBuilderAppend(elemType) + } + bc.genStringBuilderEnd + } else { + + /* `StringConcatFactory#makeConcatWithConstants` accepts max 200 argument slots. If + * the string concatenation is longer (unlikely), we spill into multiple calls + */ + val MaxIndySlots = 200 + val TagArg = '\u0001' // indicates a hole (in the recipe string) for an argument + val TagConst = '\u0002' // indicates a hole (in the recipe string) for a constant + + val recipe = new StringBuilder() + val argTypes = Seq.newBuilder[asm.Type] + val constVals = Seq.newBuilder[String] + var totalArgSlots = 0 + var countConcats = 1 // ie. 1 + how many times we spilled + + for (elem <- concatArguments) { + val tpe = tpeTK(elem) + val elemSlots = tpe.size + + // Unlikely spill case + if (totalArgSlots + elemSlots >= MaxIndySlots) { + bc.genIndyStringConcat(recipe.toString, argTypes.result(), constVals.result()) + countConcats += 1 + totalArgSlots = 0 + recipe.setLength(0) + argTypes.clear() + constVals.clear() + } - case _ => elem + elem match { + case Literal(Constant(s: String)) => + if (s.contains(TagArg) || s.contains(TagConst)) { + totalArgSlots += elemSlots + recipe.append(TagConst) + constVals += s + } else { + recipe.append(s) + } + + case other => + totalArgSlots += elemSlots + recipe.append(TagArg) + val tpe = tpeTK(elem) + argTypes += tpe.toASMType + genLoad(elem, tpe) + } + } + bc.genIndyStringConcat(recipe.toString, argTypes.result(), constVals.result()) + + // If we spilled, generate one final concat + if (countConcats > 1) { + bc.genIndyStringConcat( + TagArg.toString * countConcats, + Seq.fill(countConcats)(StringRef.toASMType), + Seq.empty + ) } - val elemType = tpeTK(loadedElem) - genLoad(loadedElem, elemType) - bc.genConcat(elemType) } - bc.genEndConcat } StringRef } diff --git a/compiler/src/dotty/tools/backend/jvm/BCodeIdiomatic.scala b/compiler/src/dotty/tools/backend/jvm/BCodeIdiomatic.scala index 63dc4f430abc..ea257ee959ea 100644 --- a/compiler/src/dotty/tools/backend/jvm/BCodeIdiomatic.scala +++ b/compiler/src/dotty/tools/backend/jvm/BCodeIdiomatic.scala @@ -224,24 +224,27 @@ trait BCodeIdiomatic { } // end of method genPrimitiveShift() - /* + /* Creates a new `StringBuilder` instance with the requested capacity + * * can-multi-thread */ - final def genStartConcat: Unit = { + final def genNewStringBuilder(size: Int): Unit = { jmethod.visitTypeInsn(Opcodes.NEW, JavaStringBuilderClassName) jmethod.visitInsn(Opcodes.DUP) + jmethod.visitLdcInsn(Integer.valueOf(size)) invokespecial( JavaStringBuilderClassName, INSTANCE_CONSTRUCTOR_NAME, - "()V", + "(I)V", itf = false ) } - /* + /* Issue a call to `StringBuilder#append` for the right element type + * * can-multi-thread */ - def genConcat(elemType: BType): Unit = { + final def genStringBuilderAppend(elemType: BType): Unit = { val paramType = elemType match { case ct: ClassBType if ct.isSubtypeOf(StringRef) => StringRef case ct: ClassBType if ct.isSubtypeOf(jlStringBufferRef) => jlStringBufferRef @@ -257,13 +260,38 @@ trait BCodeIdiomatic { invokevirtual(JavaStringBuilderClassName, "append", bt.descriptor) } - /* + /* Extract the built `String` from the `StringBuilder` + * * can-multi-thread */ - final def genEndConcat: Unit = { + final def genStringBuilderEnd: Unit = { invokevirtual(JavaStringBuilderClassName, "toString", "()Ljava/lang/String;") } + /* Concatenate top N arguments on the stack with `StringConcatFactory#makeConcatWithConstants` + * (only works for JDK 9+) + * + * can-multi-thread + */ + final def genIndyStringConcat( + recipe: String, + argTypes: Seq[asm.Type], + constants: Seq[String] + ): Unit = { + jmethod.visitInvokeDynamicInsn( + "makeConcatWithConstants", + asm.Type.getMethodDescriptor(StringRef.toASMType, argTypes:_*), + new asm.Handle( + asm.Opcodes.H_INVOKESTATIC, + "java/lang/invoke/StringConcatFactory", + "makeConcatWithConstants", + "(Ljava/lang/invoke/MethodHandles$Lookup;Ljava/lang/String;Ljava/lang/invoke/MethodType;Ljava/lang/String;[Ljava/lang/Object;)Ljava/lang/invoke/CallSite;", + false + ), + (recipe +: constants):_* + ) + } + /* * Emits one or more conversion instructions based on the types given as arguments. * diff --git a/compiler/test/dotty/tools/backend/jvm/StringConcatTest.scala b/compiler/test/dotty/tools/backend/jvm/StringConcatTest.scala index f288a8d6ff33..613e72b32e52 100644 --- a/compiler/test/dotty/tools/backend/jvm/StringConcatTest.scala +++ b/compiler/test/dotty/tools/backend/jvm/StringConcatTest.scala @@ -61,7 +61,7 @@ class StringConcatTest extends DottyBytecodeTest { } assertEquals(List( - "()V", + "(I)V", "toString()Ljava/lang/String;", "append(Ljava/lang/String;)Ljava/lang/StringBuilder;", "append(Ljava/lang/Object;)Ljava/lang/StringBuilder;", @@ -82,7 +82,7 @@ class StringConcatTest extends DottyBytecodeTest { ) assertEquals(List( - "()V", + "(I)V", "toString()Ljava/lang/String;", "append(Ljava/lang/String;)Ljava/lang/StringBuilder;", "append(Ljava/lang/String;)Ljava/lang/StringBuilder;", diff --git a/tests/run/StringConcat.check b/tests/run/StringConcat.check new file mode 100644 index 000000000000..f7c52a0ffece Binary files /dev/null and b/tests/run/StringConcat.check differ diff --git a/tests/run/StringConcat.scala b/tests/run/StringConcat.scala new file mode 100644 index 000000000000..774147ba1303 --- /dev/null +++ b/tests/run/StringConcat.scala @@ -0,0 +1,79 @@ +@main def Test() = { + + // This should generally obey 15.18.1. of the JLS (String Concatenation Operator +) + def concatenatingVariousTypes(): String = { + val str: String = "some string" + val sb: StringBuffer = new StringBuffer("some stringbuffer") + val cs: CharSequence = java.nio.CharBuffer.allocate(50).append("charsequence") + val i: Int = 123456789 + val s: Short = 345 + val b: Byte = 12 + val z: Boolean = true + val f: Float = 3.14f + val j: Long = 98762147483647L + val d: Double = 3.1415d + + "String " + str + "\n" + + "StringBuffer " + sb + "\n" + + "CharSequence " + cs + "\n" + + "Int " + i + "\n" + + "Short " + s + "\n" + + "Byte " + b + "\n" + + "Boolean " + z + "\n" + + "Float " + f + "\n" + + "Long " + j + "\n" + + "Double " + d + "\n" + } + // The characters `\u0001` and `\u0002` play a special role in `StringConcatFactory` + def concatenationInvolvingSpecialCharacters(): String = { + val s1 = "Qux" + val s2 = "Quux" + + s"Foo \u0001 $s1 Bar \u0002 $s2 Baz" + } + // Concatenation involving more than 200 elements + def largeConcatenation(): String = { + val s00 = "s00" + val s01 = "s01" + val s02 = "s02" + val s03 = "s03" + val s04 = "s04" + val s05 = "s05" + val s06 = "s06" + val s07 = "s07" + val s08 = "s08" + + // 24 rows follow + ((s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n") + + (s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n")) + + ((s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n") + + (s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n" + + s00 + "," + s01 + "," + s02 + "," + s03 + "," + s04 + "," + s05 + "," + s06 + "," + s07 + "," + s08 + "\n")) + } + println("----------") + println(concatenatingVariousTypes()) + println("----------") + println(concatenationInvolvingSpecialCharacters()) + println("----------") + println(largeConcatenation()) + println("----------") +}