Skip to content

Commit cf92e38

Browse files
committed
[dev.ssa] cmd/compile: use 2-result divide op
We now allow Values to have 2 outputs. Use that ability for amd64. This allows x,y := a/b,a%b to use just a single divide instruction. Update #6815 Change-Id: Id70bcd20188a2dd8445e631a11d11f60991921e4 Reviewed-on: https://go-review.googlesource.com/25004 Reviewed-by: Josh Bleecher Snyder <[email protected]> Reviewed-by: David Chase <[email protected]>
1 parent 25e0a36 commit cf92e38

File tree

5 files changed

+218
-285
lines changed

5 files changed

+218
-285
lines changed

src/cmd/compile/internal/amd64/ssa.go

Lines changed: 70 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -209,89 +209,87 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
209209
}
210210
opregreg(v.Op.Asm(), r, gc.SSARegNum(v.Args[1]))
211211

212-
case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW,
213-
ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU,
214-
ssa.OpAMD64MODQ, ssa.OpAMD64MODL, ssa.OpAMD64MODW,
215-
ssa.OpAMD64MODQU, ssa.OpAMD64MODLU, ssa.OpAMD64MODWU:
212+
case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU:
213+
// Arg[0] (the dividend) is in AX.
214+
// Arg[1] (the divisor) can be in any other register.
215+
// Result[0] (the quotient) is in AX.
216+
// Result[1] (the remainder) is in DX.
217+
r := gc.SSARegNum(v.Args[1])
216218

217-
// Arg[0] is already in AX as it's the only register we allow
218-
// and AX is the only output
219-
x := gc.SSARegNum(v.Args[1])
220-
221-
// CPU faults upon signed overflow, which occurs when most
222-
// negative int is divided by -1.
223-
var j *obj.Prog
224-
if v.Op == ssa.OpAMD64DIVQ || v.Op == ssa.OpAMD64DIVL ||
225-
v.Op == ssa.OpAMD64DIVW || v.Op == ssa.OpAMD64MODQ ||
226-
v.Op == ssa.OpAMD64MODL || v.Op == ssa.OpAMD64MODW {
227-
228-
var c *obj.Prog
229-
switch v.Op {
230-
case ssa.OpAMD64DIVQ, ssa.OpAMD64MODQ:
231-
c = gc.Prog(x86.ACMPQ)
232-
j = gc.Prog(x86.AJEQ)
233-
// go ahead and sign extend to save doing it later
234-
gc.Prog(x86.ACQO)
219+
// Zero extend dividend.
220+
c := gc.Prog(x86.AXORL)
221+
c.From.Type = obj.TYPE_REG
222+
c.From.Reg = x86.REG_DX
223+
c.To.Type = obj.TYPE_REG
224+
c.To.Reg = x86.REG_DX
235225

236-
case ssa.OpAMD64DIVL, ssa.OpAMD64MODL:
237-
c = gc.Prog(x86.ACMPL)
238-
j = gc.Prog(x86.AJEQ)
239-
gc.Prog(x86.ACDQ)
240-
241-
case ssa.OpAMD64DIVW, ssa.OpAMD64MODW:
242-
c = gc.Prog(x86.ACMPW)
243-
j = gc.Prog(x86.AJEQ)
244-
gc.Prog(x86.ACWD)
245-
}
246-
c.From.Type = obj.TYPE_REG
247-
c.From.Reg = x
248-
c.To.Type = obj.TYPE_CONST
249-
c.To.Offset = -1
226+
// Issue divide.
227+
p := gc.Prog(v.Op.Asm())
228+
p.From.Type = obj.TYPE_REG
229+
p.From.Reg = r
250230

251-
j.To.Type = obj.TYPE_BRANCH
231+
case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW:
232+
// Arg[0] (the dividend) is in AX.
233+
// Arg[1] (the divisor) can be in any other register.
234+
// Result[0] (the quotient) is in AX.
235+
// Result[1] (the remainder) is in DX.
236+
r := gc.SSARegNum(v.Args[1])
252237

238+
// CPU faults upon signed overflow, which occurs when the most
239+
// negative int is divided by -1. Handle divide by -1 as a special case.
240+
var c *obj.Prog
241+
switch v.Op {
242+
case ssa.OpAMD64DIVQ:
243+
c = gc.Prog(x86.ACMPQ)
244+
case ssa.OpAMD64DIVL:
245+
c = gc.Prog(x86.ACMPL)
246+
case ssa.OpAMD64DIVW:
247+
c = gc.Prog(x86.ACMPW)
253248
}
249+
c.From.Type = obj.TYPE_REG
250+
c.From.Reg = r
251+
c.To.Type = obj.TYPE_CONST
252+
c.To.Offset = -1
253+
j1 := gc.Prog(x86.AJEQ)
254+
j1.To.Type = obj.TYPE_BRANCH
254255

255-
// for unsigned ints, we sign extend by setting DX = 0
256-
// signed ints were sign extended above
257-
if v.Op == ssa.OpAMD64DIVQU || v.Op == ssa.OpAMD64MODQU ||
258-
v.Op == ssa.OpAMD64DIVLU || v.Op == ssa.OpAMD64MODLU ||
259-
v.Op == ssa.OpAMD64DIVWU || v.Op == ssa.OpAMD64MODWU {
260-
c := gc.Prog(x86.AXORQ)
261-
c.From.Type = obj.TYPE_REG
262-
c.From.Reg = x86.REG_DX
263-
c.To.Type = obj.TYPE_REG
264-
c.To.Reg = x86.REG_DX
256+
// Sign extend dividend.
257+
switch v.Op {
258+
case ssa.OpAMD64DIVQ:
259+
gc.Prog(x86.ACQO)
260+
case ssa.OpAMD64DIVL:
261+
gc.Prog(x86.ACDQ)
262+
case ssa.OpAMD64DIVW:
263+
gc.Prog(x86.ACWD)
265264
}
266265

266+
// Issue divide.
267267
p := gc.Prog(v.Op.Asm())
268268
p.From.Type = obj.TYPE_REG
269-
p.From.Reg = x
269+
p.From.Reg = r
270270

271-
// signed division, rest of the check for -1 case
272-
if j != nil {
273-
j2 := gc.Prog(obj.AJMP)
274-
j2.To.Type = obj.TYPE_BRANCH
271+
// Skip over -1 fixup code.
272+
j2 := gc.Prog(obj.AJMP)
273+
j2.To.Type = obj.TYPE_BRANCH
275274

276-
var n *obj.Prog
277-
if v.Op == ssa.OpAMD64DIVQ || v.Op == ssa.OpAMD64DIVL ||
278-
v.Op == ssa.OpAMD64DIVW {
279-
// n * -1 = -n
280-
n = gc.Prog(x86.ANEGQ)
281-
n.To.Type = obj.TYPE_REG
282-
n.To.Reg = x86.REG_AX
283-
} else {
284-
// n % -1 == 0
285-
n = gc.Prog(x86.AXORQ)
286-
n.From.Type = obj.TYPE_REG
287-
n.From.Reg = x86.REG_DX
288-
n.To.Type = obj.TYPE_REG
289-
n.To.Reg = x86.REG_DX
290-
}
275+
// Issue -1 fixup code.
276+
// n / -1 = -n
277+
n1 := gc.Prog(x86.ANEGQ)
278+
n1.To.Type = obj.TYPE_REG
279+
n1.To.Reg = x86.REG_AX
291280

292-
j.To.Val = n
293-
j2.To.Val = s.Pc()
294-
}
281+
// n % -1 == 0
282+
n2 := gc.Prog(x86.AXORL)
283+
n2.From.Type = obj.TYPE_REG
284+
n2.From.Reg = x86.REG_DX
285+
n2.To.Type = obj.TYPE_REG
286+
n2.To.Reg = x86.REG_DX
287+
288+
// TODO(khr): issue only the -1 fixup code we need.
289+
// For instance, if only the quotient is used, no point in zeroing the remainder.
290+
291+
j1.To.Val = n1
292+
j2.To.Val = s.Pc()
295293

296294
case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULW, ssa.OpAMD64HMULB,
297295
ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU, ssa.OpAMD64HMULWU, ssa.OpAMD64HMULBU:
@@ -818,6 +816,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
818816
p.To.Reg = gc.SSARegNum(v)
819817
case ssa.OpSP, ssa.OpSB:
820818
// nothing to do
819+
case ssa.OpSelect0, ssa.OpSelect1:
820+
// nothing to do
821821
case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE,
822822
ssa.OpAMD64SETL, ssa.OpAMD64SETLE,
823823
ssa.OpAMD64SETG, ssa.OpAMD64SETGE,

src/cmd/compile/internal/ssa/gen/AMD64.rules

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,14 @@
2929
(Div32F x y) -> (DIVSS x y)
3030
(Div64F x y) -> (DIVSD x y)
3131

32-
(Div64 x y) -> (DIVQ x y)
33-
(Div64u x y) -> (DIVQU x y)
34-
(Div32 x y) -> (DIVL x y)
35-
(Div32u x y) -> (DIVLU x y)
36-
(Div16 x y) -> (DIVW x y)
37-
(Div16u x y) -> (DIVWU x y)
38-
(Div8 x y) -> (DIVW (SignExt8to16 x) (SignExt8to16 y))
39-
(Div8u x y) -> (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y))
32+
(Div64 x y) -> (Select0 (DIVQ x y <&TupleType{config.Frontend().TypeInt64(), config.Frontend().TypeInt64()}>))
33+
(Div64u x y) -> (Select0 (DIVQU x y <&TupleType{config.Frontend().TypeUInt64(), config.Frontend().TypeUInt64()}>))
34+
(Div32 x y) -> (Select0 (DIVL x y <&TupleType{config.Frontend().TypeInt32(), config.Frontend().TypeInt32()}>))
35+
(Div32u x y) -> (Select0 (DIVLU x y <&TupleType{config.Frontend().TypeUInt32(), config.Frontend().TypeUInt32()}>))
36+
(Div16 x y) -> (Select0 (DIVW x y <&TupleType{config.Frontend().TypeInt16(), config.Frontend().TypeInt16()}>))
37+
(Div16u x y) -> (Select0 (DIVWU x y <&TupleType{config.Frontend().TypeUInt16(), config.Frontend().TypeUInt16()}>))
38+
(Div8 x y) -> (Select0 (DIVW (SignExt8to16 x) (SignExt8to16 y) <&TupleType{config.Frontend().TypeInt8(), config.Frontend().TypeInt8()}>))
39+
(Div8u x y) -> (Select0 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y) <&TupleType{config.Frontend().TypeUInt8(), config.Frontend().TypeUInt8()}>))
4040

4141
(Hmul64 x y) -> (HMULQ x y)
4242
(Hmul64u x y) -> (HMULQU x y)
@@ -49,14 +49,14 @@
4949

5050
(Avg64u x y) -> (AVGQU x y)
5151

52-
(Mod64 x y) -> (MODQ x y)
53-
(Mod64u x y) -> (MODQU x y)
54-
(Mod32 x y) -> (MODL x y)
55-
(Mod32u x y) -> (MODLU x y)
56-
(Mod16 x y) -> (MODW x y)
57-
(Mod16u x y) -> (MODWU x y)
58-
(Mod8 x y) -> (MODW (SignExt8to16 x) (SignExt8to16 y))
59-
(Mod8u x y) -> (MODWU (ZeroExt8to16 x) (ZeroExt8to16 y))
52+
(Mod64 x y) -> (Select1 (DIVQ x y <&TupleType{config.Frontend().TypeInt64(), config.Frontend().TypeInt64()}>))
53+
(Mod64u x y) -> (Select1 (DIVQU x y <&TupleType{config.Frontend().TypeUInt64(), config.Frontend().TypeUInt64()}>))
54+
(Mod32 x y) -> (Select1 (DIVL x y <&TupleType{config.Frontend().TypeInt32(), config.Frontend().TypeInt32()}>))
55+
(Mod32u x y) -> (Select1 (DIVLU x y <&TupleType{config.Frontend().TypeUInt32(), config.Frontend().TypeUInt32()}>))
56+
(Mod16 x y) -> (Select1 (DIVW x y <&TupleType{config.Frontend().TypeInt16(), config.Frontend().TypeInt16()}>))
57+
(Mod16u x y) -> (Select1 (DIVWU x y <&TupleType{config.Frontend().TypeUInt16(), config.Frontend().TypeUInt16()}>))
58+
(Mod8 x y) -> (Select1 (DIVW (SignExt8to16 x) (SignExt8to16 y) <&TupleType{config.Frontend().TypeInt8(), config.Frontend().TypeInt8()}>))
59+
(Mod8u x y) -> (Select1 (DIVWU (ZeroExt8to16 x) (ZeroExt8to16 y) <&TupleType{config.Frontend().TypeUInt8(), config.Frontend().TypeUInt8()}>))
6060

6161
(And64 x y) -> (ANDQ x y)
6262
(And32 x y) -> (ANDL x y)

src/cmd/compile/internal/ssa/gen/AMD64Ops.go

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -119,12 +119,10 @@ func init() {
119119
gp21sp = regInfo{inputs: []regMask{gpsp, gp}, outputs: gponly, clobbers: flags}
120120
gp21sb = regInfo{inputs: []regMask{gpspsb, gpsp}, outputs: gponly}
121121
gp21shift = regInfo{inputs: []regMask{gp, cx}, outputs: []regMask{gp}, clobbers: flags}
122-
gp11div = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{ax},
123-
clobbers: dx | flags}
122+
gp11div = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{ax, dx},
123+
clobbers: flags}
124124
gp11hmul = regInfo{inputs: []regMask{ax, gpsp}, outputs: []regMask{dx},
125125
clobbers: ax | flags}
126-
gp11mod = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{dx},
127-
clobbers: ax | flags}
128126

129127
gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}, outputs: flagsonly}
130128
gp1flags = regInfo{inputs: []regMask{gpsp}, outputs: flagsonly}
@@ -214,19 +212,12 @@ func init() {
214212

215213
{name: "AVGQU", argLength: 2, reg: gp21, commutative: true, resultInArg0: true}, // (arg0 + arg1) / 2 as unsigned, all 64 result bits
216214

217-
{name: "DIVQ", argLength: 2, reg: gp11div, asm: "IDIVQ"}, // arg0 / arg1
218-
{name: "DIVL", argLength: 2, reg: gp11div, asm: "IDIVL"}, // arg0 / arg1
219-
{name: "DIVW", argLength: 2, reg: gp11div, asm: "IDIVW"}, // arg0 / arg1
220-
{name: "DIVQU", argLength: 2, reg: gp11div, asm: "DIVQ"}, // arg0 / arg1
221-
{name: "DIVLU", argLength: 2, reg: gp11div, asm: "DIVL"}, // arg0 / arg1
222-
{name: "DIVWU", argLength: 2, reg: gp11div, asm: "DIVW"}, // arg0 / arg1
223-
224-
{name: "MODQ", argLength: 2, reg: gp11mod, asm: "IDIVQ"}, // arg0 % arg1
225-
{name: "MODL", argLength: 2, reg: gp11mod, asm: "IDIVL"}, // arg0 % arg1
226-
{name: "MODW", argLength: 2, reg: gp11mod, asm: "IDIVW"}, // arg0 % arg1
227-
{name: "MODQU", argLength: 2, reg: gp11mod, asm: "DIVQ"}, // arg0 % arg1
228-
{name: "MODLU", argLength: 2, reg: gp11mod, asm: "DIVL"}, // arg0 % arg1
229-
{name: "MODWU", argLength: 2, reg: gp11mod, asm: "DIVW"}, // arg0 % arg1
215+
{name: "DIVQ", argLength: 2, reg: gp11div, asm: "IDIVQ"}, // [arg0 / arg1, arg0 % arg1]
216+
{name: "DIVL", argLength: 2, reg: gp11div, asm: "IDIVL"}, // [arg0 / arg1, arg0 % arg1]
217+
{name: "DIVW", argLength: 2, reg: gp11div, asm: "IDIVW"}, // [arg0 / arg1, arg0 % arg1]
218+
{name: "DIVQU", argLength: 2, reg: gp11div, asm: "DIVQ"}, // [arg0 / arg1, arg0 % arg1]
219+
{name: "DIVLU", argLength: 2, reg: gp11div, asm: "DIVL"}, // [arg0 / arg1, arg0 % arg1]
220+
{name: "DIVWU", argLength: 2, reg: gp11div, asm: "DIVW"}, // [arg0 / arg1, arg0 % arg1]
230221

231222
{name: "ANDQ", argLength: 2, reg: gp21, asm: "ANDQ", commutative: true, resultInArg0: true}, // arg0 & arg1
232223
{name: "ANDL", argLength: 2, reg: gp21, asm: "ANDL", commutative: true, resultInArg0: true}, // arg0 & arg1

0 commit comments

Comments
 (0)