Skip to content

Commit b182ba7

Browse files
committed
cmd/compile: optimize codes with arm64 REV16 instruction
Optimize some patterns into rev16/rev16w instruction. Pattern1: (c & 0xff00ff00)>>8 | (c & 0x00ff00ff)<<8 To: rev16w c Pattern2: (c & 0xff00ff00ff00ff00)>>8 | (c & 0x00ff00ff00ff00ff)<<8 To: rev16 c This patch is a copy of CL 239637, contributed by Alice Xu([email protected]). Change-Id: I96936c1db87618bc1903c04221c7e9b2779455b3 Reviewed-on: https://go-review.googlesource.com/c/go/+/268377 Trust: fannie zhang <[email protected]> Run-TryBot: fannie zhang <[email protected]> TryBot-Result: Go Bot <[email protected]> Reviewed-by: Cherry Zhang <[email protected]>
1 parent d25476e commit b182ba7

File tree

7 files changed

+320
-0
lines changed

7 files changed

+320
-0
lines changed

src/cmd/compile/internal/arm64/ssa.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -915,6 +915,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
915915
ssa.OpARM64FCVTDS,
916916
ssa.OpARM64REV,
917917
ssa.OpARM64REVW,
918+
ssa.OpARM64REV16,
918919
ssa.OpARM64REV16W,
919920
ssa.OpARM64RBIT,
920921
ssa.OpARM64RBITW,

src/cmd/compile/internal/ssa/gen/ARM64.rules

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1762,9 +1762,25 @@
17621762
(CMPconst [64] (SUB <t> (MOVDconst [32]) (ANDconst <t> [31] y))))) && cc == OpARM64LessThanU
17631763
=> (RORW x y)
17641764

1765+
// rev16w | rev16
17651766
// ((x>>8) | (x<<8)) => (REV16W x), the type of x is uint16, "|" can also be "^" or "+".
17661767
((ADDshiftLL|ORshiftLL|XORshiftLL) <typ.UInt16> [8] (UBFX <typ.UInt16> [armBFAuxInt(8, 8)] x) x) => (REV16W x)
17671768

1769+
// ((x & 0xff00ff00)>>8) | ((x & 0x00ff00ff)<<8), "|" can also be "^" or "+".
1770+
((ADDshiftLL|ORshiftLL|XORshiftLL) [8] (UBFX [armBFAuxInt(8, 24)] (ANDconst [c1] x)) (ANDconst [c2] x))
1771+
&& uint32(c1) == 0xff00ff00 && uint32(c2) == 0x00ff00ff
1772+
=> (REV16W x)
1773+
1774+
// ((x & 0xff00ff00ff00ff00)>>8) | ((x & 0x00ff00ff00ff00ff)<<8), "|" can also be "^" or "+".
1775+
((ADDshiftLL|ORshiftLL|XORshiftLL) [8] (SRLconst [8] (ANDconst [c1] x)) (ANDconst [c2] x))
1776+
&& (uint64(c1) == 0xff00ff00ff00ff00 && uint64(c2) == 0x00ff00ff00ff00ff)
1777+
=> (REV16 x)
1778+
1779+
// ((x & 0xff00ff00)>>8) | ((x & 0x00ff00ff)<<8), "|" can also be "^" or "+".
1780+
((ADDshiftLL|ORshiftLL|XORshiftLL) [8] (SRLconst [8] (ANDconst [c1] x)) (ANDconst [c2] x))
1781+
&& (uint64(c1) == 0xff00ff00 && uint64(c2) == 0x00ff00ff)
1782+
=> (REV16 (ANDconst <x.Type> [0xffffffff] x))
1783+
17681784
// Extract from reg pair
17691785
(ADDshiftLL [c] (SRLconst x [64-c]) x2) => (EXTRconst [64-c] x2 x)
17701786
( ORshiftLL [c] (SRLconst x [64-c]) x2) => (EXTRconst [64-c] x2 x)

src/cmd/compile/internal/ssa/gen/ARM64Ops.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,7 @@ func init() {
239239
{name: "FSQRTS", argLength: 1, reg: fp11, asm: "FSQRTS"}, // sqrt(arg0), float32
240240
{name: "REV", argLength: 1, reg: gp11, asm: "REV"}, // byte reverse, 64-bit
241241
{name: "REVW", argLength: 1, reg: gp11, asm: "REVW"}, // byte reverse, 32-bit
242+
{name: "REV16", argLength: 1, reg: gp11, asm: "REV16"}, // byte reverse in each 16-bit halfword, 64-bit
242243
{name: "REV16W", argLength: 1, reg: gp11, asm: "REV16W"}, // byte reverse in each 16-bit halfword, 32-bit
243244
{name: "RBIT", argLength: 1, reg: gp11, asm: "RBIT"}, // bit reverse, 64-bit
244245
{name: "RBITW", argLength: 1, reg: gp11, asm: "RBITW"}, // bit reverse, 32-bit

src/cmd/compile/internal/ssa/opGen.go

Lines changed: 14 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/compile/internal/ssa/rewriteARM64.go

Lines changed: 225 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/compile/internal/test/testdata/arith_test.go

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1452,3 +1452,46 @@ func testDivisibility(t *testing.T) {
14521452
}
14531453
}
14541454
}
1455+
1456+
//go:noinline
1457+
func genREV16_1(c uint64) uint64 {
1458+
b := ((c & 0xff00ff00ff00ff00) >> 8) | ((c & 0x00ff00ff00ff00ff) << 8)
1459+
return b
1460+
}
1461+
1462+
//go:noinline
1463+
func genREV16_2(c uint64) uint64 {
1464+
b := ((c & 0xff00ff00) >> 8) | ((c & 0x00ff00ff) << 8)
1465+
return b
1466+
}
1467+
1468+
//go:noinline
1469+
func genREV16W(c uint32) uint32 {
1470+
b := ((c & 0xff00ff00) >> 8) | ((c & 0x00ff00ff) << 8)
1471+
return b
1472+
}
1473+
1474+
func TestREV16(t *testing.T) {
1475+
x := uint64(0x8f7f6f5f4f3f2f1f)
1476+
want1 := uint64(0x7f8f5f6f3f4f1f2f)
1477+
want2 := uint64(0x3f4f1f2f)
1478+
1479+
got1 := genREV16_1(x)
1480+
if got1 != want1 {
1481+
t.Errorf("genREV16_1(%#x) = %#x want %#x", x, got1, want1)
1482+
}
1483+
got2 := genREV16_2(x)
1484+
if got2 != want2 {
1485+
t.Errorf("genREV16_2(%#x) = %#x want %#x", x, got2, want2)
1486+
}
1487+
}
1488+
1489+
func TestREV16W(t *testing.T) {
1490+
x := uint32(0x4f3f2f1f)
1491+
want := uint32(0x3f4f1f2f)
1492+
1493+
got := genREV16W(x)
1494+
if got != want {
1495+
t.Errorf("genREV16W(%#x) = %#x want %#x", x, got, want)
1496+
}
1497+
}

test/codegen/bitfield.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,3 +244,23 @@ func shift_no_cmp(x int) int {
244244
// mips64:`SLLV\t[$]17`,-`SGT`
245245
return x << 17
246246
}
247+
248+
func rev16(c uint64) (uint64, uint64, uint64) {
249+
// arm64:`REV16`,-`AND`,-`LSR`,-`AND`,-`ORR\tR[0-9]+<<8`
250+
b1 := ((c & 0xff00ff00ff00ff00) >> 8) | ((c & 0x00ff00ff00ff00ff) << 8)
251+
// arm64:-`ADD\tR[0-9]+<<8`
252+
b2 := ((c & 0xff00ff00ff00ff00) >> 8) + ((c & 0x00ff00ff00ff00ff) << 8)
253+
// arm64:-`EOR\tR[0-9]+<<8`
254+
b3 := ((c & 0xff00ff00ff00ff00) >> 8) ^ ((c & 0x00ff00ff00ff00ff) << 8)
255+
return b1, b2, b3
256+
}
257+
258+
func rev16w(c uint32) (uint32, uint32, uint32) {
259+
// arm64:`REV16W`,-`AND`,-`UBFX`,-`AND`,-`ORR\tR[0-9]+<<8`
260+
b1 := ((c & 0xff00ff00) >> 8) | ((c & 0x00ff00ff) << 8)
261+
// arm64:-`ADD\tR[0-9]+<<8`
262+
b2 := ((c & 0xff00ff00) >> 8) + ((c & 0x00ff00ff) << 8)
263+
// arm64:-`EOR\tR[0-9]+<<8`
264+
b3 := ((c & 0xff00ff00) >> 8) ^ ((c & 0x00ff00ff) << 8)
265+
return b1, b2, b3
266+
}

0 commit comments

Comments
 (0)