Skip to content

Commit 208fc13

Browse files
committed
cmd/internal/obj/arm64: avoid unnecessary literal pool usage for moves
In a number of load and store cases, the use of the literal pool can be entirely avoided by simply adding or subtracting the offset from the register. This uses the same number of instructions, while avoiding a load from memory, along with the need for the value to be in the literal pool. Overall this reduces the size of binaries slightly and should have lower overhead. Updates #59615 Change-Id: I9cb6a403dc71e34a46af913f5db87dbf52f8688c Reviewed-on: https://go-review.googlesource.com/c/go/+/512539 Reviewed-by: David Chase <[email protected]> TryBot-Result: Gopher Robot <[email protected]> Reviewed-by: Cherry Mui <[email protected]> Run-TryBot: Joel Sing <[email protected]>
1 parent 3313b39 commit 208fc13

File tree

2 files changed

+74
-5
lines changed

2 files changed

+74
-5
lines changed

src/cmd/asm/internal/asm/testdata/arm64.s

+32-1
Original file line numberDiff line numberDiff line change
@@ -557,7 +557,38 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
557557
FMOVQ 65520(R10), F10 // 4afdff3d
558558
FMOVQ 64(RSP), F11 // eb13c03d
559559

560-
// large aligned offset, use two instructions(add+ldr/store).
560+
// medium offsets that either fit a single instruction or can use add+ldr/str
561+
MOVD -4095(R17), R3 // 3bfe3fd1630340f9
562+
MOVD -391(R17), R3 // 3b1e06d1630340f9
563+
MOVD -257(R17), R3 // 3b0604d1630340f9
564+
MOVD -256(R17), R3 // 230250f8
565+
MOVD 255(R17), R3 // 23f24ff8
566+
MOVD 256(R17), R3 // 238240f9
567+
MOVD 257(R17), R3 // 3b060491630340f9
568+
MOVD 391(R17), R3 // 3b1e0691630340f9
569+
MOVD 4095(R17), R3 // 3bfe3f91630340f9
570+
571+
MOVD R0, -4095(R17) // 3bfe3fd1600300f9
572+
MOVD R0, -391(R17) // 3b1e06d1600300f9
573+
MOVD R0, -257(R17) // 3b0604d1600300f9
574+
MOVD R0, -256(R17) // 200210f8
575+
MOVD R0, 255(R17) // 20f20ff8
576+
MOVD R0, 256(R17) // 208200f9
577+
MOVD R0, 257(R17) // 3b060491600300f9
578+
MOVD R0, 391(R17) // 3b1e0691600300f9
579+
MOVD R0, 4095(R17) // 3bfe3f91600300f9
580+
MOVD R0, 4096(R17) // 200208f9
581+
MOVD R3, -4095(R17) // 3bfe3fd1630300f9
582+
MOVD R3, -391(R17) // 3b1e06d1630300f9
583+
MOVD R3, -257(R17) // 3b0604d1630300f9
584+
MOVD R3, -256(R17) // 230210f8
585+
MOVD R3, 255(R17) // 23f20ff8
586+
MOVD R3, 256(R17) // 238200f9
587+
MOVD R3, 257(R17) // 3b060491630300f9
588+
MOVD R3, 391(R17) // 3b1e0691630300f9
589+
MOVD R3, 4095(R17) // 3bfe3f91630300f9
590+
591+
// large aligned offset, use two instructions(add+ldr/str).
561592
MOVB R1, 0x1001(R2) // MOVB R1, 4097(R2) // 5b04409161070039
562593
MOVB R1, 0xffffff(R2) // MOVB R1, 16777215(R2) // 5bfc7f9161ff3f39
563594
MOVH R1, 0x2002(R2) // MOVH R1, 8194(R2) // 5b08409161070079

src/cmd/internal/obj/arm64/asm7.go

+42-4
Original file line numberDiff line numberDiff line change
@@ -1959,6 +1959,10 @@ func (c *ctxt7) loadStoreClass(p *obj.Prog, lsc int, v int64) int {
19591959
}
19601960

19611961
needsPool := true
1962+
if v >= -4095 && v <= 4095 {
1963+
needsPool = false
1964+
}
1965+
19621966
switch p.As {
19631967
case AMOVB, AMOVBU:
19641968
if cmp(C_UAUTO4K, lsc) || cmp(C_UOREG4K, lsc) {
@@ -4015,10 +4019,13 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
40154019
o1 |= uint32(p.From.Reg&31)<<5 | uint32(p.To.Reg&31)
40164020

40174021
case 30: /* movT R,L(R) -> strT */
4018-
// if offset L can be split into hi+lo, and both fit into instructions, do
4022+
// If offset L fits in a 12 bit unsigned immediate:
4023+
// add $L, R, Rtmp or sub $L, R, Rtmp
4024+
// str R, (Rtmp)
4025+
// Otherwise, if offset L can be split into hi+lo, and both fit into instructions:
40194026
// add $hi, R, Rtmp
40204027
// str R, lo(Rtmp)
4021-
// otherwise, use constant pool
4028+
// Otherwise, use constant pool:
40224029
// mov $L, Rtmp (from constant pool)
40234030
// str R, (R+Rtmp)
40244031
s := movesize(o.as)
@@ -4032,6 +4039,20 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
40324039
}
40334040

40344041
v := c.regoff(&p.To)
4042+
if v >= -256 && v <= 256 {
4043+
c.ctxt.Diag("%v: bad type for offset %d (should be 9 bit signed immediate store)", p, v)
4044+
}
4045+
if v >= 0 && v <= 4095 && v&((1<<int32(s))-1) == 0 {
4046+
c.ctxt.Diag("%v: bad type for offset %d (should be 12 bit unsigned immediate store)", p, v)
4047+
}
4048+
4049+
// Handle smaller unaligned and negative offsets via addition or subtraction.
4050+
if v >= -4095 && v <= 4095 {
4051+
o1 = c.oaddi12(p, v, REGTMP, int16(r))
4052+
o2 = c.olsr12u(p, c.opstr(p, p.As), 0, REGTMP, p.From.Reg)
4053+
break
4054+
}
4055+
40354056
hi, lo, err := splitImm24uScaled(v, s)
40364057
if err != nil {
40374058
goto storeusepool
@@ -4054,10 +4075,13 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
40544075
o2 = c.olsxrr(p, int32(c.opstrr(p, p.As, false)), int(p.From.Reg), int(r), REGTMP)
40554076

40564077
case 31: /* movT L(R), R -> ldrT */
4057-
// if offset L can be split into hi+lo, and both fit into instructions, do
4078+
// If offset L fits in a 12 bit unsigned immediate:
4079+
// add $L, R, Rtmp or sub $L, R, Rtmp
4080+
// ldr R, (Rtmp)
4081+
// Otherwise, if offset L can be split into hi+lo, and both fit into instructions:
40584082
// add $hi, R, Rtmp
40594083
// ldr lo(Rtmp), R
4060-
// otherwise, use constant pool
4084+
// Otherwise, use constant pool:
40614085
// mov $L, Rtmp (from constant pool)
40624086
// ldr (R+Rtmp), R
40634087
s := movesize(o.as)
@@ -4071,6 +4095,20 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
40714095
}
40724096

40734097
v := c.regoff(&p.From)
4098+
if v >= -256 && v <= 256 {
4099+
c.ctxt.Diag("%v: bad type for offset %d (should be 9 bit signed immediate load)", p, v)
4100+
}
4101+
if v >= 0 && v <= 4095 && v&((1<<int32(s))-1) == 0 {
4102+
c.ctxt.Diag("%v: bad type for offset %d (should be 12 bit unsigned immediate load)", p, v)
4103+
}
4104+
4105+
// Handle smaller unaligned and negative offsets via addition or subtraction.
4106+
if v >= -4095 && v <= 4095 {
4107+
o1 = c.oaddi12(p, v, REGTMP, int16(r))
4108+
o2 = c.olsr12u(p, c.opldr(p, p.As), 0, REGTMP, p.To.Reg)
4109+
break
4110+
}
4111+
40744112
hi, lo, err := splitImm24uScaled(v, s)
40754113
if err != nil {
40764114
goto loadusepool

0 commit comments

Comments
 (0)