Skip to content

Commit 150d244

Browse files
michalderkacz4a6f656c
authored andcommitted
cmd/compile,cmd/internal/obj/riscv,runtime: use Duff's devices on riscv64
Implement runtime.duffzero and runtime.duffcopy for riscv64. Use obj.ADUFFZERO/obj.ADUFFCOPY for medium size, word aligned zeroing/moving. Change-Id: I42ec622055630c94cb77e286d8d33dbe7c9f846c Reviewed-on: https://go-review.googlesource.com/c/go/+/237797 Run-TryBot: Cherry Zhang <[email protected]> Reviewed-by: Joel Sing <[email protected]> Reviewed-by: Cherry Zhang <[email protected]>
1 parent c95bd2e commit 150d244

File tree

9 files changed

+1076
-4
lines changed

9 files changed

+1076
-4
lines changed

src/cmd/compile/internal/riscv64/ggen.go

+9-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,15 @@ func zeroRange(pp *gc.Progs, p *obj.Prog, off, cnt int64, _ *uint32) *obj.Prog {
2525
return p
2626
}
2727

28-
// TODO(jsing): Add a duff zero implementation for medium sized ranges.
28+
if cnt <= int64(128*gc.Widthptr) {
29+
p = pp.Appendpp(p, riscv.AADDI, obj.TYPE_CONST, 0, off, obj.TYPE_REG, riscv.REG_A0, 0)
30+
p.Reg = riscv.REG_SP
31+
p = pp.Appendpp(p, obj.ADUFFZERO, obj.TYPE_NONE, 0, 0, obj.TYPE_MEM, 0, 0)
32+
p.To.Name = obj.NAME_EXTERN
33+
p.To.Sym = gc.Duffzero
34+
p.To.Offset = 8 * (128 - cnt/int64(gc.Widthptr))
35+
return p
36+
}
2937

3038
// Loop, zeroing pointer width bytes at a time.
3139
// ADD $(off), SP, T0

src/cmd/compile/internal/riscv64/ssa.go

+14
Original file line numberDiff line numberDiff line change
@@ -608,6 +608,20 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
608608
p.To.Type = obj.TYPE_REG
609609
p.To.Reg = v.Reg()
610610

611+
case ssa.OpRISCV64DUFFZERO:
612+
p := s.Prog(obj.ADUFFZERO)
613+
p.To.Type = obj.TYPE_MEM
614+
p.To.Name = obj.NAME_EXTERN
615+
p.To.Sym = gc.Duffzero
616+
p.To.Offset = v.AuxInt
617+
618+
case ssa.OpRISCV64DUFFCOPY:
619+
p := s.Prog(obj.ADUFFCOPY)
620+
p.To.Type = obj.TYPE_MEM
621+
p.To.Name = obj.NAME_EXTERN
622+
p.To.Sym = gc.Duffcopy
623+
p.To.Offset = v.AuxInt
624+
611625
default:
612626
v.Fatalf("Unhandled op %v", v.Op)
613627
}

src/cmd/compile/internal/ssa/gen/RISCV64.rules

+14
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,13 @@
360360
(Zero [4] ptr mem) => (MOVWstore ptr (MOVWconst) mem)
361361
(Zero [8] ptr mem) => (MOVDstore ptr (MOVDconst) mem)
362362

363+
// Medium zeroing uses a Duff's device
364+
// 8 and 128 are magic constants, see runtime/mkduff.go
365+
(Zero [s] {t} ptr mem)
366+
&& s%8 == 0 && s >= 16 && s <= 8*128
367+
&& t.Alignment()%8 == 0 && !config.noDuffDevice =>
368+
(DUFFZERO [8 * (128 - s/8)] ptr mem)
369+
363370
// Generic zeroing uses a loop
364371
(Zero [s] {t} ptr mem) =>
365372
(LoweredZero [t.Alignment()]
@@ -395,6 +402,13 @@
395402
(Move [4] dst src mem) => (MOVWstore dst (MOVWload src mem) mem)
396403
(Move [8] dst src mem) => (MOVDstore dst (MOVDload src mem) mem)
397404

405+
// Medium move uses a Duff's device
406+
// 16 and 128 are magic constants, see runtime/mkduff.go
407+
(Move [s] {t} dst src mem)
408+
&& s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0
409+
&& !config.noDuffDevice && logLargeCopy(v, s) =>
410+
(DUFFCOPY [16 * (128 - s/8)] dst src mem)
411+
398412
// Generic move uses a loop
399413
(Move [s] {t} dst src mem) && (s <= 16 || logLargeCopy(v, s)) =>
400414
(LoweredMove [t.Alignment()]

src/cmd/compile/internal/ssa/gen/RISCV64Ops.go

+38
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,44 @@ func init() {
240240
{name: "CALLclosure", argLength: 3, reg: callClosure, aux: "CallOff", call: true}, // call function via closure. arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
241241
{name: "CALLinter", argLength: 2, reg: callInter, aux: "CallOff", call: true}, // call fn by pointer. arg0=codeptr, arg1=mem, auxint=argsize, returns mem
242242

243+
// duffzero
244+
// arg0 = address of memory to zero (in X10, changed as side effect)
245+
// arg1 = mem
246+
// auxint = offset into duffzero code to start executing
247+
// X1 (link register) changed because of function call
248+
// returns mem
249+
{
250+
name: "DUFFZERO",
251+
aux: "Int64",
252+
argLength: 2,
253+
reg: regInfo{
254+
inputs: []regMask{regNamed["X10"]},
255+
clobbers: regNamed["X1"] | regNamed["X10"],
256+
},
257+
typ: "Mem",
258+
faultOnNilArg0: true,
259+
},
260+
261+
// duffcopy
262+
// arg0 = address of dst memory (in X11, changed as side effect)
263+
// arg1 = address of src memory (in X10, changed as side effect)
264+
// arg2 = mem
265+
// auxint = offset into duffcopy code to start executing
266+
// X1 (link register) changed because of function call
267+
// returns mem
268+
{
269+
name: "DUFFCOPY",
270+
aux: "Int64",
271+
argLength: 3,
272+
reg: regInfo{
273+
inputs: []regMask{regNamed["X11"], regNamed["X10"]},
274+
clobbers: regNamed["X1"] | regNamed["X10"] | regNamed["X11"],
275+
},
276+
typ: "Mem",
277+
faultOnNilArg0: true,
278+
faultOnNilArg1: true,
279+
},
280+
243281
// Generic moves and zeros
244282

245283
// general unaligned zeroing

src/cmd/compile/internal/ssa/opGen.go

+28
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/compile/internal/ssa/rewriteRISCV64.go

+33
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/internal/obj/riscv/obj.go

+5-3
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ func buildop(ctxt *obj.Link) {}
3333
// lr is the link register to use for the JALR.
3434
// p must be a CALL, JMP or RET.
3535
func jalrToSym(ctxt *obj.Link, p *obj.Prog, newprog obj.ProgAlloc, lr int16) *obj.Prog {
36-
if p.As != obj.ACALL && p.As != obj.AJMP && p.As != obj.ARET {
36+
if p.As != obj.ACALL && p.As != obj.AJMP && p.As != obj.ARET && p.As != obj.ADUFFZERO && p.As != obj.ADUFFCOPY {
3737
ctxt.Diag("unexpected Prog in jalrToSym: %v", p)
3838
return p
3939
}
@@ -417,7 +417,7 @@ func containsCall(sym *obj.LSym) bool {
417417
// CALLs are CALL or JAL(R) with link register LR.
418418
for p := sym.Func().Text; p != nil; p = p.Link {
419419
switch p.As {
420-
case obj.ACALL:
420+
case obj.ACALL, obj.ADUFFZERO, obj.ADUFFCOPY:
421421
return true
422422
case AJAL, AJALR:
423423
if p.From.Type == obj.TYPE_REG && p.From.Reg == REG_LR {
@@ -656,7 +656,7 @@ func preprocess(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
656656
p.From.Reg = REG_SP
657657
}
658658

659-
case obj.ACALL:
659+
case obj.ACALL, obj.ADUFFZERO, obj.ADUFFCOPY:
660660
switch p.To.Type {
661661
case obj.TYPE_MEM:
662662
jalrToSym(ctxt, p, newprog, REG_LR)
@@ -1696,6 +1696,8 @@ var encodings = [ALAST & obj.AMask]encoding{
16961696
obj.APCDATA: pseudoOpEncoding,
16971697
obj.ATEXT: pseudoOpEncoding,
16981698
obj.ANOP: pseudoOpEncoding,
1699+
obj.ADUFFZERO: pseudoOpEncoding,
1700+
obj.ADUFFCOPY: pseudoOpEncoding,
16991701
}
17001702

17011703
// encodingForAs returns the encoding for an obj.As.

0 commit comments

Comments
 (0)