Skip to content

Commit 3313b39

Browse files
committed
cmd/internal/obj/arm64: improve classification of loads and stores
Currently, pool literals are added when they are not needed, namely in the case where the offset is a 24 bit unsigned scaled immediate. By improving the classification of loads and stores, we can avoid generating unused pool literals. However, more importantly this provides a basis for further improvement of the load and store code generation. Updates #59615 Change-Id: Ia3bad1709314565a05894a76c434cca2fa4533c4 Reviewed-on: https://go-review.googlesource.com/c/go/+/512538 Reviewed-by: Cherry Mui <[email protected]> Reviewed-by: David Chase <[email protected]> Run-TryBot: Joel Sing <[email protected]> TryBot-Result: Gopher Robot <[email protected]>
1 parent a37da52 commit 3313b39

File tree

3 files changed

+152
-31
lines changed

3 files changed

+152
-31
lines changed

src/cmd/internal/obj/arm64/a.out.go

+3-1
Original file line numberDiff line numberDiff line change
@@ -414,7 +414,8 @@ const (
414414
C_UAUTO32K_16 // 0 to 32760, 0 mod 16 + C_PSAUTO
415415
C_UAUTO32K // 0 to 32760, 0 mod 8 + C_PSAUTO
416416
C_UAUTO64K // 0 to 65520, 0 mod 16 + C_PSAUTO
417-
C_LAUTO // any other 32-bit constant
417+
C_LAUTOPOOL // any other constant up to 64 bits (needs pool literal)
418+
C_LAUTO // any other constant up to 64 bits
418419

419420
C_SEXT1 // 0 to 4095, direct
420421
C_SEXT2 // 0 to 8190
@@ -454,6 +455,7 @@ const (
454455
C_UOREG32K_16
455456
C_UOREG32K
456457
C_UOREG64K
458+
C_LOREGPOOL
457459
C_LOREG
458460

459461
C_ADDR // TODO(aram): explain difference from C_VCONADDR

src/cmd/internal/obj/arm64/anames7.go

+2
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ var cnames7 = []string{
7575
"UAUTO32K_8",
7676
"UAUTO32K",
7777
"UAUTO64K",
78+
"LAUTOPOOL",
7879
"LAUTO",
7980
"SEXT1",
8081
"SEXT2",
@@ -113,6 +114,7 @@ var cnames7 = []string{
113114
"UOREG32K_16",
114115
"UOREG32K",
115116
"UOREG64K",
117+
"LOREGPOOL",
116118
"LOREG",
117119
"ADDR",
118120
"GOTADDR",

src/cmd/internal/obj/arm64/asm7.go

+147-30
Original file line numberDiff line numberDiff line change
@@ -591,38 +591,66 @@ var optab = []Optab{
591591
{AFMOVQ, C_NSOREG, C_NONE, C_NONE, C_FREG, C_NONE, 21, 4, 0, 0, 0},
592592

593593
/* long displacement store */
594-
{AMOVB, C_ZREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 30, 8, REGSP, LTO, 0},
595-
{AMOVB, C_ZREG, C_NONE, C_NONE, C_LOREG, C_NONE, 30, 8, 0, LTO, 0},
596-
{AMOVH, C_ZREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 30, 8, REGSP, LTO, 0},
597-
{AMOVH, C_ZREG, C_NONE, C_NONE, C_LOREG, C_NONE, 30, 8, 0, LTO, 0},
598-
{AMOVW, C_ZREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 30, 8, REGSP, LTO, 0},
599-
{AMOVW, C_ZREG, C_NONE, C_NONE, C_LOREG, C_NONE, 30, 8, 0, LTO, 0},
600-
{AMOVD, C_ZREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 30, 8, REGSP, LTO, 0},
601-
{AMOVD, C_ZREG, C_NONE, C_NONE, C_LOREG, C_NONE, 30, 8, 0, LTO, 0},
602-
603-
{AFMOVS, C_FREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 30, 8, REGSP, LTO, 0},
604-
{AFMOVS, C_FREG, C_NONE, C_NONE, C_LOREG, C_NONE, 30, 8, 0, LTO, 0},
605-
{AFMOVD, C_FREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 30, 8, REGSP, LTO, 0},
606-
{AFMOVD, C_FREG, C_NONE, C_NONE, C_LOREG, C_NONE, 30, 8, 0, LTO, 0},
607-
{AFMOVQ, C_FREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 30, 8, REGSP, LTO, 0},
608-
{AFMOVQ, C_FREG, C_NONE, C_NONE, C_LOREG, C_NONE, 30, 8, 0, LTO, 0},
594+
{AMOVB, C_ZREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 30, 8, REGSP, 0, 0},
595+
{AMOVB, C_ZREG, C_NONE, C_NONE, C_LAUTOPOOL, C_NONE, 30, 8, REGSP, LTO, 0},
596+
{AMOVB, C_ZREG, C_NONE, C_NONE, C_LOREG, C_NONE, 30, 8, 0, 0, 0},
597+
{AMOVB, C_ZREG, C_NONE, C_NONE, C_LOREGPOOL, C_NONE, 30, 8, 0, LTO, 0},
598+
{AMOVH, C_ZREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 30, 8, REGSP, 0, 0},
599+
{AMOVH, C_ZREG, C_NONE, C_NONE, C_LAUTOPOOL, C_NONE, 30, 8, REGSP, LTO, 0},
600+
{AMOVH, C_ZREG, C_NONE, C_NONE, C_LOREG, C_NONE, 30, 8, 0, 0, 0},
601+
{AMOVH, C_ZREG, C_NONE, C_NONE, C_LOREGPOOL, C_NONE, 30, 8, 0, LTO, 0},
602+
{AMOVW, C_ZREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 30, 8, REGSP, 0, 0},
603+
{AMOVW, C_ZREG, C_NONE, C_NONE, C_LAUTOPOOL, C_NONE, 30, 8, REGSP, LTO, 0},
604+
{AMOVW, C_ZREG, C_NONE, C_NONE, C_LOREG, C_NONE, 30, 8, 0, 0, 0},
605+
{AMOVW, C_ZREG, C_NONE, C_NONE, C_LOREGPOOL, C_NONE, 30, 8, 0, LTO, 0},
606+
{AMOVD, C_ZREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 30, 8, REGSP, 0, 0},
607+
{AMOVD, C_ZREG, C_NONE, C_NONE, C_LAUTOPOOL, C_NONE, 30, 8, REGSP, LTO, 0},
608+
{AMOVD, C_ZREG, C_NONE, C_NONE, C_LOREG, C_NONE, 30, 8, 0, 0, 0},
609+
{AMOVD, C_ZREG, C_NONE, C_NONE, C_LOREGPOOL, C_NONE, 30, 8, 0, LTO, 0},
610+
611+
{AFMOVS, C_FREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 30, 8, REGSP, 0, 0},
612+
{AFMOVS, C_FREG, C_NONE, C_NONE, C_LAUTOPOOL, C_NONE, 30, 8, REGSP, LTO, 0},
613+
{AFMOVS, C_FREG, C_NONE, C_NONE, C_LOREG, C_NONE, 30, 8, 0, 0, 0},
614+
{AFMOVS, C_FREG, C_NONE, C_NONE, C_LOREGPOOL, C_NONE, 30, 8, 0, LTO, 0},
615+
{AFMOVD, C_FREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 30, 8, REGSP, 0, 0},
616+
{AFMOVD, C_FREG, C_NONE, C_NONE, C_LAUTOPOOL, C_NONE, 30, 8, REGSP, LTO, 0},
617+
{AFMOVD, C_FREG, C_NONE, C_NONE, C_LOREG, C_NONE, 30, 8, 0, 0, 0},
618+
{AFMOVD, C_FREG, C_NONE, C_NONE, C_LOREGPOOL, C_NONE, 30, 8, 0, LTO, 0},
619+
{AFMOVQ, C_FREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 30, 8, REGSP, 0, 0},
620+
{AFMOVQ, C_FREG, C_NONE, C_NONE, C_LAUTOPOOL, C_NONE, 30, 8, REGSP, LTO, 0},
621+
{AFMOVQ, C_FREG, C_NONE, C_NONE, C_LOREG, C_NONE, 30, 8, 0, 0, 0},
622+
{AFMOVQ, C_FREG, C_NONE, C_NONE, C_LOREGPOOL, C_NONE, 30, 8, 0, LTO, 0},
609623

610624
/* long displacement load */
611-
{AMOVB, C_LAUTO, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, REGSP, LFROM, 0},
612-
{AMOVB, C_LOREG, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, 0, LFROM, 0},
613-
{AMOVH, C_LAUTO, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, REGSP, LFROM, 0},
614-
{AMOVH, C_LOREG, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, 0, LFROM, 0},
615-
{AMOVW, C_LAUTO, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, REGSP, LFROM, 0},
616-
{AMOVW, C_LOREG, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, 0, LFROM, 0},
617-
{AMOVD, C_LAUTO, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, REGSP, LFROM, 0},
618-
{AMOVD, C_LOREG, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, 0, LFROM, 0},
619-
620-
{AFMOVS, C_LAUTO, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, REGSP, LFROM, 0},
621-
{AFMOVS, C_LOREG, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, 0, LFROM, 0},
622-
{AFMOVD, C_LAUTO, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, REGSP, LFROM, 0},
623-
{AFMOVD, C_LOREG, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, 0, LFROM, 0},
624-
{AFMOVQ, C_LAUTO, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, REGSP, LFROM, 0},
625-
{AFMOVQ, C_LOREG, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, 0, LFROM, 0},
625+
{AMOVB, C_LAUTO, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, REGSP, 0, 0},
626+
{AMOVB, C_LAUTOPOOL, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, REGSP, LFROM, 0},
627+
{AMOVB, C_LOREG, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, 0, 0, 0},
628+
{AMOVB, C_LOREGPOOL, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, 0, LFROM, 0},
629+
{AMOVH, C_LAUTO, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, REGSP, 0, 0},
630+
{AMOVH, C_LAUTOPOOL, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, REGSP, LFROM, 0},
631+
{AMOVH, C_LOREG, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, 0, 0, 0},
632+
{AMOVH, C_LOREGPOOL, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, 0, LFROM, 0},
633+
{AMOVW, C_LAUTO, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, REGSP, 0, 0},
634+
{AMOVW, C_LAUTOPOOL, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, REGSP, LFROM, 0},
635+
{AMOVW, C_LOREG, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, 0, 0, 0},
636+
{AMOVW, C_LOREGPOOL, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, 0, LFROM, 0},
637+
{AMOVD, C_LAUTO, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, REGSP, 0, 0},
638+
{AMOVD, C_LAUTOPOOL, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, REGSP, LFROM, 0},
639+
{AMOVD, C_LOREG, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, 0, 0, 0},
640+
{AMOVD, C_LOREGPOOL, C_NONE, C_NONE, C_ZREG, C_NONE, 31, 8, 0, LFROM, 0},
641+
642+
{AFMOVS, C_LAUTO, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, REGSP, 0, 0},
643+
{AFMOVS, C_LAUTOPOOL, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, REGSP, LFROM, 0},
644+
{AFMOVS, C_LOREG, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, 0, 0, 0},
645+
{AFMOVS, C_LOREGPOOL, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, 0, LFROM, 0},
646+
{AFMOVD, C_LAUTO, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, REGSP, 0, 0},
647+
{AFMOVD, C_LAUTOPOOL, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, REGSP, LFROM, 0},
648+
{AFMOVD, C_LOREG, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, 0, 0, 0},
649+
{AFMOVD, C_LOREGPOOL, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, 0, LFROM, 0},
650+
{AFMOVQ, C_LAUTO, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, REGSP, 0, 0},
651+
{AFMOVQ, C_LAUTOPOOL, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, REGSP, LFROM, 0},
652+
{AFMOVQ, C_LOREG, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, 0, 0, 0},
653+
{AFMOVQ, C_LOREGPOOL, C_NONE, C_NONE, C_FREG, C_NONE, 31, 8, 0, LFROM, 0},
626654

627655
/* pre/post-indexed load (unscaled, signed 9-bit offset) */
628656
{AMOVD, C_LOREG, C_NONE, C_NONE, C_ZREG, C_NONE, 22, 4, 0, 0, C_XPOST},
@@ -1476,6 +1504,14 @@ func isNEGop(op obj.As) bool {
14761504
return false
14771505
}
14781506

1507+
func isMOVop(op obj.As) bool {
1508+
switch op {
1509+
case AMOVB, AMOVBU, AMOVH, AMOVHU, AMOVW, AMOVWU, AMOVD, AFMOVS, AFMOVD, AFMOVQ:
1510+
return true
1511+
}
1512+
return false
1513+
}
1514+
14791515
func isRegShiftOrExt(a *obj.Addr) bool {
14801516
return (a.Index-obj.RBaseARM64)&REG_EXT != 0 || (a.Index-obj.RBaseARM64)&REG_LSL != 0
14811517
}
@@ -1912,6 +1948,63 @@ func (c *ctxt7) con64class(a *obj.Addr) int {
19121948
}
19131949
}
19141950

1951+
// loadStoreClass reclassifies a load or store operation based on its offset.
1952+
func (c *ctxt7) loadStoreClass(p *obj.Prog, lsc int, v int64) int {
1953+
// Avoid reclassification of pre/post-indexed loads and stores.
1954+
if p.Scond == C_XPRE || p.Scond == C_XPOST {
1955+
return lsc
1956+
}
1957+
if cmp(C_NSAUTO, lsc) || cmp(C_NSOREG, lsc) {
1958+
return lsc
1959+
}
1960+
1961+
needsPool := true
1962+
switch p.As {
1963+
case AMOVB, AMOVBU:
1964+
if cmp(C_UAUTO4K, lsc) || cmp(C_UOREG4K, lsc) {
1965+
return lsc
1966+
}
1967+
if v >= 0 && v <= 0xffffff {
1968+
needsPool = false
1969+
}
1970+
case AMOVH, AMOVHU:
1971+
if cmp(C_UAUTO8K, lsc) || cmp(C_UOREG8K, lsc) {
1972+
return lsc
1973+
}
1974+
if v >= 0 && v <= 0xfffffe && v&1 == 0 {
1975+
needsPool = false
1976+
}
1977+
case AMOVW, AMOVWU, AFMOVS:
1978+
if cmp(C_UAUTO16K, lsc) || cmp(C_UOREG16K, lsc) {
1979+
return lsc
1980+
}
1981+
if v >= 0 && v <= 0xfffffc && v&3 == 0 {
1982+
needsPool = false
1983+
}
1984+
case AMOVD, AFMOVD:
1985+
if cmp(C_UAUTO32K, lsc) || cmp(C_UOREG32K, lsc) {
1986+
return lsc
1987+
}
1988+
if v >= 0 && v <= 0xfffff8 && v&7 == 0 {
1989+
needsPool = false
1990+
}
1991+
case AFMOVQ:
1992+
if cmp(C_UAUTO64K, lsc) || cmp(C_UOREG64K, lsc) {
1993+
return lsc
1994+
}
1995+
if v >= 0 && v <= 0xfffff0 && v&15 == 0 {
1996+
needsPool = false
1997+
}
1998+
}
1999+
if needsPool && cmp(C_LAUTO, lsc) {
2000+
return C_LAUTOPOOL
2001+
}
2002+
if needsPool && cmp(C_LOREG, lsc) {
2003+
return C_LOREGPOOL
2004+
}
2005+
return lsc
2006+
}
2007+
19152008
func (c *ctxt7) aclass(a *obj.Addr) int {
19162009
switch a.Type {
19172010
case obj.TYPE_NONE:
@@ -2135,6 +2228,12 @@ func (c *ctxt7) oplook(p *obj.Prog) *Optab {
21352228
a1 = c.con64class(&p.From)
21362229
}
21372230
}
2231+
if p.From.Type == obj.TYPE_MEM {
2232+
if isMOVop(p.As) && (cmp(C_LAUTO, a1) || cmp(C_LOREG, a1)) {
2233+
// More specific classification of large offset loads and stores.
2234+
a1 = c.loadStoreClass(p, a1, c.instoffset)
2235+
}
2236+
}
21382237
p.From.Class = int8(a1)
21392238
}
21402239

@@ -2155,6 +2254,12 @@ func (c *ctxt7) oplook(p *obj.Prog) *Optab {
21552254
a4 := int(p.To.Class)
21562255
if a4 == 0 {
21572256
a4 = c.aclass(&p.To)
2257+
if p.To.Type == obj.TYPE_MEM {
2258+
if isMOVop(p.As) && (cmp(C_LAUTO, a4) || cmp(C_LOREG, a4)) {
2259+
// More specific classification of large offset loads and stores.
2260+
a4 = c.loadStoreClass(p, a4, c.instoffset)
2261+
}
2262+
}
21582263
p.To.Class = int8(a4)
21592264
}
21602265

@@ -3931,6 +4036,9 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
39314036
if err != nil {
39324037
goto storeusepool
39334038
}
4039+
if p.Pool != nil {
4040+
c.ctxt.Diag("%v: unused constant in pool (%v)\n", p, v)
4041+
}
39344042
o1 = c.oaddi(p, AADD, hi, REGTMP, r)
39354043
o2 = c.olsr12u(p, c.opstr(p, p.As), lo, REGTMP, p.From.Reg)
39364044
break
@@ -3939,6 +4047,9 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
39394047
if r == REGTMP || p.From.Reg == REGTMP {
39404048
c.ctxt.Diag("REGTMP used in large offset store: %v", p)
39414049
}
4050+
if p.Pool == nil {
4051+
c.ctxt.Diag("%v: constant is not in pool", p)
4052+
}
39424053
o1 = c.omovlit(AMOVD, p, &p.To, REGTMP)
39434054
o2 = c.olsxrr(p, int32(c.opstrr(p, p.As, false)), int(p.From.Reg), int(r), REGTMP)
39444055

@@ -3964,11 +4075,17 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
39644075
if err != nil {
39654076
goto loadusepool
39664077
}
4078+
if p.Pool != nil {
4079+
c.ctxt.Diag("%v: unused constant in pool (%v)\n", p, v)
4080+
}
39674081
o1 = c.oaddi(p, AADD, hi, REGTMP, r)
39684082
o2 = c.olsr12u(p, c.opldr(p, p.As), lo, REGTMP, p.To.Reg)
39694083
break
39704084

39714085
loadusepool:
4086+
if p.Pool == nil {
4087+
c.ctxt.Diag("%v: constant is not in pool", p)
4088+
}
39724089
if r == REGTMP || p.From.Reg == REGTMP {
39734090
c.ctxt.Diag("REGTMP used in large offset load: %v", p)
39744091
}

0 commit comments

Comments
 (0)