Skip to content

Commit 9eb53ab

Browse files
bmkesslerrandall77
authored andcommitted
cmd/compile: intrinsify math/bits.Mul
Add SSA rules to intrinsify Mul/Mul64 (AMD64 and ARM64). SSA rules for other functions and architectures are left as a future optimization. Benchmark results on AMD64/ARM64 before and after SSA implementation are below. amd64 name old time/op new time/op delta Add-4 1.78ns ± 0% 1.85ns ±12% ~ (p=0.397 n=4+5) Add32-4 1.71ns ± 1% 1.70ns ± 0% ~ (p=0.683 n=5+5) Add64-4 1.80ns ± 2% 1.77ns ± 0% -1.22% (p=0.048 n=5+5) Sub-4 1.78ns ± 0% 1.78ns ± 0% ~ (all equal) Sub32-4 1.78ns ± 1% 1.78ns ± 0% ~ (p=1.000 n=5+5) Sub64-4 1.78ns ± 1% 1.78ns ± 0% ~ (p=0.968 n=5+4) Mul-4 11.5ns ± 1% 1.8ns ± 2% -84.39% (p=0.008 n=5+5) Mul32-4 1.39ns ± 0% 1.38ns ± 3% ~ (p=0.175 n=5+5) Mul64-4 6.85ns ± 1% 1.78ns ± 1% -73.97% (p=0.008 n=5+5) Div-4 57.1ns ± 1% 56.7ns ± 0% ~ (p=0.087 n=5+5) Div32-4 18.0ns ± 0% 18.0ns ± 0% ~ (all equal) Div64-4 56.4ns ±10% 53.6ns ± 1% ~ (p=0.071 n=5+5) arm64 name old time/op new time/op delta Add-96 5.51ns ± 0% 5.51ns ± 0% ~ (all equal) Add32-96 5.51ns ± 0% 5.51ns ± 0% ~ (all equal) Add64-96 5.52ns ± 0% 5.51ns ± 0% ~ (p=0.444 n=5+5) Sub-96 5.51ns ± 0% 5.51ns ± 0% ~ (all equal) Sub32-96 5.51ns ± 0% 5.51ns ± 0% ~ (all equal) Sub64-96 5.51ns ± 0% 5.51ns ± 0% ~ (all equal) Mul-96 34.6ns ± 0% 5.0ns ± 0% -85.52% (p=0.008 n=5+5) Mul32-96 4.51ns ± 0% 4.51ns ± 0% ~ (all equal) Mul64-96 21.1ns ± 0% 5.0ns ± 0% -76.26% (p=0.008 n=5+5) Div-96 64.7ns ± 0% 64.7ns ± 0% ~ (all equal) Div32-96 17.0ns ± 0% 17.0ns ± 0% ~ (all equal) Div64-96 53.1ns ± 0% 53.1ns ± 0% ~ (all equal) Updates #24813 Change-Id: I9bda6d2102f65cae3d436a2087b47ed8bafeb068 Reviewed-on: https://go-review.googlesource.com/129415 Run-TryBot: Keith Randall <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Keith Randall <[email protected]>
1 parent b50210f commit 9eb53ab

File tree

2 files changed

+22
-0
lines changed

2 files changed

+22
-0
lines changed

src/cmd/compile/internal/gc/ssa.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3435,6 +3435,12 @@ func init() {
34353435
addF("math/bits", "OnesCount",
34363436
makeOnesCountAMD64(ssa.OpPopCount64, ssa.OpPopCount32),
34373437
sys.AMD64)
3438+
alias("math/bits", "Mul", "math/bits", "Mul64", sys.ArchAMD64, sys.ArchARM64)
3439+
addF("math/bits", "Mul64",
3440+
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
3441+
return s.newValue2(ssa.OpMul64uhilo, types.NewTuple(types.Types[TUINT64], types.Types[TUINT64]), args[0], args[1])
3442+
},
3443+
sys.AMD64, sys.ARM64)
34383444

34393445
/******** sync/atomic ********/
34403446

test/codegen/mathbits.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,3 +302,19 @@ func IterateBits8(n uint8) int {
302302
}
303303
return i
304304
}
305+
306+
// --------------- //
307+
// bits.Mul* //
308+
// --------------- //
309+
310+
func Mul(x, y uint) (hi, lo uint) {
311+
// amd64:"MULQ"
312+
// arm64:"UMULH","MUL"
313+
return bits.Mul(x, y)
314+
}
315+
316+
func Mul64(x, y uint64) (hi, lo uint64) {
317+
// amd64:"MULQ"
318+
// arm64:"UMULH","MUL"
319+
return bits.Mul64(x, y)
320+
}

0 commit comments

Comments
 (0)