Skip to content

Commit 98cb767

Browse files
committed
cmd/compile: insert complicated x86 addressing modes as a separate pass
Use a separate compiler pass to introduce complicated x86 addressing modes. Loads in the normal architecture rules (for x86 and all other platforms) can have constant offsets (AuxInt values) and symbols (Aux values), but no more. The complex addressing modes (x+y, x+2*y, etc.) are introduced in a separate pass that combines loads with LEAQx ops. Organizing rewrites this way simplifies the number of rewrites required, as there are lots of different rule orderings that have to be specified to ensure these complex addressing modes are always found if they are possible. Update #36468 Change-Id: I5b4bf7b03a1e731d6dfeb9ef19b376175f3b4b44 Reviewed-on: https://go-review.googlesource.com/c/go/+/217097 Run-TryBot: Keith Randall <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Josh Bleecher Snyder <[email protected]>
1 parent d49fecc commit 98cb767

File tree

6 files changed

+3233
-8176
lines changed

6 files changed

+3233
-8176
lines changed
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
// Copyright 2020 The Go Authors. All rights reserved.
2+
// Use of this source code is governed by a BSD-style
3+
// license that can be found in the LICENSE file.
4+
5+
package ssa
6+
7+
// addressingModes combines address calculations into memory operations
8+
// that can perform complicated addressing modes.
9+
func addressingModes(f *Func) {
10+
switch f.Config.arch {
11+
default:
12+
// Most architectures can't do this.
13+
return
14+
case "amd64":
15+
// TODO: 386, s390x?
16+
}
17+
18+
var tmp []*Value
19+
for _, b := range f.Blocks {
20+
for _, v := range b.Values {
21+
if !combineFirst[v.Op] {
22+
continue
23+
}
24+
p := v.Args[0]
25+
c, ok := combine[[2]Op{v.Op, p.Op}]
26+
if !ok {
27+
continue
28+
}
29+
// See if we can combine the Aux/AuxInt values.
30+
switch [2]auxType{opcodeTable[v.Op].auxType, opcodeTable[p.Op].auxType} {
31+
case [2]auxType{auxSymOff, auxInt32}:
32+
// TODO: introduce auxSymOff32
33+
if !is32Bit(v.AuxInt + p.AuxInt) {
34+
continue
35+
}
36+
v.AuxInt += p.AuxInt
37+
case [2]auxType{auxSymOff, auxSymOff}:
38+
if v.Aux != nil && p.Aux != nil {
39+
continue
40+
}
41+
if !is32Bit(v.AuxInt + p.AuxInt) {
42+
continue
43+
}
44+
if p.Aux != nil {
45+
v.Aux = p.Aux
46+
}
47+
v.AuxInt += p.AuxInt
48+
case [2]auxType{auxSymValAndOff, auxInt32}:
49+
vo := ValAndOff(v.AuxInt)
50+
if !vo.canAdd(p.AuxInt) {
51+
continue
52+
}
53+
v.AuxInt = vo.add(p.AuxInt)
54+
case [2]auxType{auxSymValAndOff, auxSymOff}:
55+
vo := ValAndOff(v.AuxInt)
56+
if v.Aux != nil && p.Aux != nil {
57+
continue
58+
}
59+
if !vo.canAdd(p.AuxInt) {
60+
continue
61+
}
62+
if p.Aux != nil {
63+
v.Aux = p.Aux
64+
}
65+
v.AuxInt = vo.add(p.AuxInt)
66+
case [2]auxType{auxSymOff, auxNone}:
67+
// nothing to do
68+
case [2]auxType{auxSymValAndOff, auxNone}:
69+
// nothing to do
70+
default:
71+
f.Fatalf("unknown aux combining for %s and %s\n", v.Op, p.Op)
72+
}
73+
// Combine the operations.
74+
tmp = append(tmp[:0], v.Args[1:]...)
75+
v.resetArgs()
76+
v.Op = c
77+
v.AddArgs(p.Args...)
78+
v.AddArgs(tmp...)
79+
}
80+
}
81+
}
82+
83+
// combineFirst contains ops which appear in combine as the
84+
// first part of the key.
85+
var combineFirst = map[Op]bool{}
86+
87+
func init() {
88+
for k := range combine {
89+
combineFirst[k[0]] = true
90+
}
91+
}
92+
93+
// For each entry k, v in this map, if we have a value x with:
94+
// x.Op == k[0]
95+
// x.Args[0].Op == k[1]
96+
// then we can set x.Op to v and set x.Args like this:
97+
// x.Args[0].Args + x.Args[1:]
98+
// Additionally, the Aux/AuxInt from x.Args[0] is merged into x.
99+
var combine = map[[2]Op]Op{
100+
[2]Op{OpAMD64MOVBload, OpAMD64ADDQ}: OpAMD64MOVBloadidx1,
101+
[2]Op{OpAMD64MOVWload, OpAMD64ADDQ}: OpAMD64MOVWloadidx1,
102+
[2]Op{OpAMD64MOVLload, OpAMD64ADDQ}: OpAMD64MOVLloadidx1,
103+
[2]Op{OpAMD64MOVQload, OpAMD64ADDQ}: OpAMD64MOVQloadidx1,
104+
[2]Op{OpAMD64MOVSSload, OpAMD64ADDQ}: OpAMD64MOVSSloadidx1,
105+
[2]Op{OpAMD64MOVSDload, OpAMD64ADDQ}: OpAMD64MOVSDloadidx1,
106+
107+
[2]Op{OpAMD64MOVBstore, OpAMD64ADDQ}: OpAMD64MOVBstoreidx1,
108+
[2]Op{OpAMD64MOVWstore, OpAMD64ADDQ}: OpAMD64MOVWstoreidx1,
109+
[2]Op{OpAMD64MOVLstore, OpAMD64ADDQ}: OpAMD64MOVLstoreidx1,
110+
[2]Op{OpAMD64MOVQstore, OpAMD64ADDQ}: OpAMD64MOVQstoreidx1,
111+
[2]Op{OpAMD64MOVSSstore, OpAMD64ADDQ}: OpAMD64MOVSSstoreidx1,
112+
[2]Op{OpAMD64MOVSDstore, OpAMD64ADDQ}: OpAMD64MOVSDstoreidx1,
113+
114+
[2]Op{OpAMD64MOVBstoreconst, OpAMD64ADDQ}: OpAMD64MOVBstoreconstidx1,
115+
[2]Op{OpAMD64MOVWstoreconst, OpAMD64ADDQ}: OpAMD64MOVWstoreconstidx1,
116+
[2]Op{OpAMD64MOVLstoreconst, OpAMD64ADDQ}: OpAMD64MOVLstoreconstidx1,
117+
[2]Op{OpAMD64MOVQstoreconst, OpAMD64ADDQ}: OpAMD64MOVQstoreconstidx1,
118+
119+
[2]Op{OpAMD64MOVBload, OpAMD64LEAQ1}: OpAMD64MOVBloadidx1,
120+
[2]Op{OpAMD64MOVWload, OpAMD64LEAQ1}: OpAMD64MOVWloadidx1,
121+
[2]Op{OpAMD64MOVWload, OpAMD64LEAQ2}: OpAMD64MOVWloadidx2,
122+
[2]Op{OpAMD64MOVLload, OpAMD64LEAQ1}: OpAMD64MOVLloadidx1,
123+
[2]Op{OpAMD64MOVLload, OpAMD64LEAQ4}: OpAMD64MOVLloadidx4,
124+
[2]Op{OpAMD64MOVLload, OpAMD64LEAQ8}: OpAMD64MOVLloadidx8,
125+
[2]Op{OpAMD64MOVQload, OpAMD64LEAQ1}: OpAMD64MOVQloadidx1,
126+
[2]Op{OpAMD64MOVQload, OpAMD64LEAQ8}: OpAMD64MOVQloadidx8,
127+
[2]Op{OpAMD64MOVSSload, OpAMD64LEAQ1}: OpAMD64MOVSSloadidx1,
128+
[2]Op{OpAMD64MOVSSload, OpAMD64LEAQ4}: OpAMD64MOVSSloadidx4,
129+
[2]Op{OpAMD64MOVSDload, OpAMD64LEAQ1}: OpAMD64MOVSDloadidx1,
130+
[2]Op{OpAMD64MOVSDload, OpAMD64LEAQ8}: OpAMD64MOVSDloadidx8,
131+
132+
[2]Op{OpAMD64MOVBstore, OpAMD64LEAQ1}: OpAMD64MOVBstoreidx1,
133+
[2]Op{OpAMD64MOVWstore, OpAMD64LEAQ1}: OpAMD64MOVWstoreidx1,
134+
[2]Op{OpAMD64MOVWstore, OpAMD64LEAQ2}: OpAMD64MOVWstoreidx2,
135+
[2]Op{OpAMD64MOVLstore, OpAMD64LEAQ1}: OpAMD64MOVLstoreidx1,
136+
[2]Op{OpAMD64MOVLstore, OpAMD64LEAQ4}: OpAMD64MOVLstoreidx4,
137+
[2]Op{OpAMD64MOVLstore, OpAMD64LEAQ8}: OpAMD64MOVLstoreidx8,
138+
[2]Op{OpAMD64MOVQstore, OpAMD64LEAQ1}: OpAMD64MOVQstoreidx1,
139+
[2]Op{OpAMD64MOVQstore, OpAMD64LEAQ8}: OpAMD64MOVQstoreidx8,
140+
[2]Op{OpAMD64MOVSSstore, OpAMD64LEAQ1}: OpAMD64MOVSSstoreidx1,
141+
[2]Op{OpAMD64MOVSSstore, OpAMD64LEAQ4}: OpAMD64MOVSSstoreidx4,
142+
[2]Op{OpAMD64MOVSDstore, OpAMD64LEAQ1}: OpAMD64MOVSDstoreidx1,
143+
[2]Op{OpAMD64MOVSDstore, OpAMD64LEAQ8}: OpAMD64MOVSDstoreidx8,
144+
145+
[2]Op{OpAMD64MOVBstoreconst, OpAMD64LEAQ1}: OpAMD64MOVBstoreconstidx1,
146+
[2]Op{OpAMD64MOVWstoreconst, OpAMD64LEAQ1}: OpAMD64MOVWstoreconstidx1,
147+
[2]Op{OpAMD64MOVWstoreconst, OpAMD64LEAQ2}: OpAMD64MOVWstoreconstidx2,
148+
[2]Op{OpAMD64MOVLstoreconst, OpAMD64LEAQ1}: OpAMD64MOVLstoreconstidx1,
149+
[2]Op{OpAMD64MOVLstoreconst, OpAMD64LEAQ4}: OpAMD64MOVLstoreconstidx4,
150+
[2]Op{OpAMD64MOVQstoreconst, OpAMD64LEAQ1}: OpAMD64MOVQstoreconstidx1,
151+
[2]Op{OpAMD64MOVQstoreconst, OpAMD64LEAQ8}: OpAMD64MOVQstoreconstidx8,
152+
153+
// TODO: 386
154+
}

src/cmd/compile/internal/ssa/compile.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -442,6 +442,7 @@ var passes = [...]pass{
442442
{name: "insert resched checks", fn: insertLoopReschedChecks,
443443
disabled: objabi.Preemptibleloops_enabled == 0}, // insert resched checks in loops.
444444
{name: "lower", fn: lower, required: true},
445+
{name: "addressing modes", fn: addressingModes, required: false},
445446
{name: "lowered deadcode for cse", fn: deadcode}, // deadcode immediately before CSE avoids CSE making dead values live again
446447
{name: "lowered cse", fn: cse},
447448
{name: "elim unread autos", fn: elimUnreadAutos},

0 commit comments

Comments
 (0)