From 049987627007baffe603aa957607488daa090593 Mon Sep 17 00:00:00 2001 From: Ayke van Laethem Date: Thu, 26 Mar 2020 15:26:46 +0100 Subject: [PATCH 1/2] compiler,runtime: use LLVM intrinsics for memcpy/memmove This replaces the custom runtime.memcpy and runtime.memmove functions with calls to LLVM builtins that should hopefully allow LLVM to better optimize such calls. They will be lowered to regular libc memcpy/memmove when they can't be optimized away. When testing this change with some smoke tests, I found that many smoke tests resulted in slightly larger binary sizes with this commit applied. I looked into it and it appears that machine.sendUSBPacket was not inlined before while it is with this commit applied. Additionally, when I compared all driver smoke tests with -opt=1 I saw that many were reduced slightly in binary size and none increased in size. --- compiler/compiler.go | 13 ++----------- compiler/intrinsics.go | 30 ++++++++++++++++++++++++++++++ src/reflect/value.go | 3 ++- src/runtime/runtime.go | 23 ++++++----------------- 4 files changed, 40 insertions(+), 29 deletions(-) create mode 100644 compiler/intrinsics.go diff --git a/compiler/compiler.go b/compiler/compiler.go index 966b0f62c8..0767340ac6 100644 --- a/compiler/compiler.go +++ b/compiler/compiler.go @@ -330,7 +330,6 @@ func Compile(pkgName string, machine llvm.TargetMachine, config *compileopts.Con return c.ctx.CreateEnumAttribute(attrKind, 0) } nocapture := getAttr("nocapture") - writeonly := getAttr("writeonly") readonly := getAttr("readonly") // Tell the optimizer that runtime.alloc is an allocator, meaning that it @@ -357,16 +356,6 @@ func Compile(pkgName string, machine llvm.TargetMachine, config *compileopts.Con trackPointer.AddAttributeAtIndex(1, readonly) } - // Memory copy operations do not capture pointers, even though some weird - // pointer arithmetic is happening in the Go implementation. - for _, fnName := range []string{"runtime.memcpy", "runtime.memmove"} { - fn := c.mod.NamedFunction(fnName) - fn.AddAttributeAtIndex(1, nocapture) - fn.AddAttributeAtIndex(1, writeonly) - fn.AddAttributeAtIndex(2, nocapture) - fn.AddAttributeAtIndex(2, readonly) - } - // see: https://reviews.llvm.org/D18355 if c.Debug() { c.mod.AddNamedMetadataOperand("llvm.module.flags", @@ -1334,6 +1323,8 @@ func (b *builder) createFunctionCall(instr *ssa.CallCommon) (llvm.Value, error) // applied) function call. If it is anonymous, it may be a closure. name := fn.RelString(nil) switch { + case name == "runtime.memcpy" || name == "runtime.memmove" || name == "reflect.memcpy": + return b.createMemoryCopyCall(fn, instr.Args) case name == "device/arm.ReadRegister" || name == "device/riscv.ReadRegister": return b.createReadRegister(name, instr.Args) case name == "device/arm.Asm" || name == "device/avr.Asm" || name == "device/riscv.Asm": diff --git a/compiler/intrinsics.go b/compiler/intrinsics.go new file mode 100644 index 0000000000..4790b70d89 --- /dev/null +++ b/compiler/intrinsics.go @@ -0,0 +1,30 @@ +package compiler + +// This file contains helper functions to create calls to LLVM intrinsics. + +import ( + "strconv" + + "golang.org/x/tools/go/ssa" + "tinygo.org/x/go-llvm" +) + +// createMemoryCopyCall creates a call to a builtin LLVM memcpy or memmove +// function, declaring this function if needed. These calls are treated +// specially by optimization passes possibly resulting in better generated code, +// and will otherwise be lowered to regular libc memcpy/memmove calls. +func (b *builder) createMemoryCopyCall(fn *ssa.Function, args []ssa.Value) (llvm.Value, error) { + fnName := "llvm." + fn.Name() + ".p0i8.p0i8.i" + strconv.Itoa(b.uintptrType.IntTypeWidth()) + llvmFn := b.mod.NamedFunction(fnName) + if llvmFn.IsNil() { + fnType := llvm.FunctionType(b.ctx.VoidType(), []llvm.Type{b.i8ptrType, b.i8ptrType, b.uintptrType, b.ctx.Int1Type()}, false) + llvmFn = llvm.AddFunction(b.mod, fnName, fnType) + } + var params []llvm.Value + for _, param := range args { + params = append(params, b.getValue(param)) + } + params = append(params, llvm.ConstInt(b.ctx.Int1Type(), 0, false)) + b.CreateCall(llvmFn, params, "") + return llvm.Value{}, nil +} diff --git a/src/reflect/value.go b/src/reflect/value.go index ad3b1fcbf6..a1c69370e0 100644 --- a/src/reflect/value.go +++ b/src/reflect/value.go @@ -678,5 +678,6 @@ func (e *ValueError) Error() string { return "reflect: call of reflect.Value." + e.Method + " on invalid type" } -//go:linkname memcpy runtime.memcpy +// Calls to this function are converted to LLVM intrinsic calls such as +// llvm.memcpy.p0i8.p0i8.i32(). func memcpy(dst, src unsafe.Pointer, size uintptr) diff --git a/src/runtime/runtime.go b/src/runtime/runtime.go index ae9ab665f5..d545e1513a 100644 --- a/src/runtime/runtime.go +++ b/src/runtime/runtime.go @@ -30,26 +30,15 @@ func os_runtime_args() []string { } // Copy size bytes from src to dst. The memory areas must not overlap. -func memcpy(dst, src unsafe.Pointer, size uintptr) { - for i := uintptr(0); i < size; i++ { - *(*uint8)(unsafe.Pointer(uintptr(dst) + i)) = *(*uint8)(unsafe.Pointer(uintptr(src) + i)) - } -} +// Calls to this function are converted to LLVM intrinsic calls such as +// llvm.memcpy.p0i8.p0i8.i32(dst, src, size, false). +func memcpy(dst, src unsafe.Pointer, size uintptr) // Copy size bytes from src to dst. The memory areas may overlap and will do the // correct thing. -func memmove(dst, src unsafe.Pointer, size uintptr) { - if uintptr(dst) < uintptr(src) { - // Copy forwards. - memcpy(dst, src, size) - return - } - // Copy backwards. - for i := size; i != 0; { - i-- - *(*uint8)(unsafe.Pointer(uintptr(dst) + i)) = *(*uint8)(unsafe.Pointer(uintptr(src) + i)) - } -} +// Calls to this function are converted to LLVM intrinsic calls such as +// llvm.memmove.p0i8.p0i8.i32(dst, src, size, false). +func memmove(dst, src unsafe.Pointer, size uintptr) // Set the given number of bytes to zero. func memzero(ptr unsafe.Pointer, size uintptr) { From db008c7b9bf6036e8dc3c767d241143782b9de46 Mon Sep 17 00:00:00 2001 From: Ayke van Laethem Date: Thu, 26 Mar 2020 16:09:48 +0100 Subject: [PATCH 2/2] compiler,runtime: translate memzero calls to LLVM memset intrinsics This gives the optimizer a bit more information about what the calls do. This should result in slightly better generated code. Code size sometimes goes up and sometimes goes down. I blame the code size going up on the inliner which inlines more functions, because compiling the smoke tests in the drivers repository with -opt=1 results in a slight code size reduction in all cases. --- compiler/compiler.go | 2 ++ compiler/intrinsics.go | 20 ++++++++++++++++++++ src/runtime/runtime.go | 8 +++----- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/compiler/compiler.go b/compiler/compiler.go index 0767340ac6..a93e5ce7a5 100644 --- a/compiler/compiler.go +++ b/compiler/compiler.go @@ -1325,6 +1325,8 @@ func (b *builder) createFunctionCall(instr *ssa.CallCommon) (llvm.Value, error) switch { case name == "runtime.memcpy" || name == "runtime.memmove" || name == "reflect.memcpy": return b.createMemoryCopyCall(fn, instr.Args) + case name == "runtime.memzero": + return b.createMemoryZeroCall(instr.Args) case name == "device/arm.ReadRegister" || name == "device/riscv.ReadRegister": return b.createReadRegister(name, instr.Args) case name == "device/arm.Asm" || name == "device/avr.Asm" || name == "device/riscv.Asm": diff --git a/compiler/intrinsics.go b/compiler/intrinsics.go index 4790b70d89..634e8362ea 100644 --- a/compiler/intrinsics.go +++ b/compiler/intrinsics.go @@ -28,3 +28,23 @@ func (b *builder) createMemoryCopyCall(fn *ssa.Function, args []ssa.Value) (llvm b.CreateCall(llvmFn, params, "") return llvm.Value{}, nil } + +// createMemoryZeroCall creates calls to llvm.memset.* to zero a block of +// memory, declaring the function if needed. These calls will be lowered to +// regular libc memset calls if they aren't optimized out in a different way. +func (b *builder) createMemoryZeroCall(args []ssa.Value) (llvm.Value, error) { + fnName := "llvm.memset.p0i8.i" + strconv.Itoa(b.uintptrType.IntTypeWidth()) + llvmFn := b.mod.NamedFunction(fnName) + if llvmFn.IsNil() { + fnType := llvm.FunctionType(b.ctx.VoidType(), []llvm.Type{b.i8ptrType, b.ctx.Int8Type(), b.uintptrType, b.ctx.Int1Type()}, false) + llvmFn = llvm.AddFunction(b.mod, fnName, fnType) + } + params := []llvm.Value{ + b.getValue(args[0]), + llvm.ConstInt(b.ctx.Int8Type(), 0, false), + b.getValue(args[1]), + llvm.ConstInt(b.ctx.Int1Type(), 0, false), + } + b.CreateCall(llvmFn, params, "") + return llvm.Value{}, nil +} diff --git a/src/runtime/runtime.go b/src/runtime/runtime.go index d545e1513a..61d21029e8 100644 --- a/src/runtime/runtime.go +++ b/src/runtime/runtime.go @@ -41,11 +41,9 @@ func memcpy(dst, src unsafe.Pointer, size uintptr) func memmove(dst, src unsafe.Pointer, size uintptr) // Set the given number of bytes to zero. -func memzero(ptr unsafe.Pointer, size uintptr) { - for i := uintptr(0); i < size; i++ { - *(*byte)(unsafe.Pointer(uintptr(ptr) + i)) = 0 - } -} +// Calls to this function are converted to LLVM intrinsic calls such as +// llvm.memset.p0i8.i32(ptr, 0, size, false). +func memzero(ptr unsafe.Pointer, size uintptr) // Compare two same-size buffers for equality. func memequal(x, y unsafe.Pointer, n uintptr) bool {