Skip to content

Commit ac8dbe7

Browse files
committed
cmd/compile, runtime: make atomic loads/stores sequentially consistent on s390x
The z/Architecture does not guarantee that a load following a store will not be reordered with that store, unless they access the same address. Therefore if we want to ensure the sequential consistency of atomic loads and stores we need to perform serialization operations after atomic stores. We do not need to serialize in the runtime when using StoreRel[ease] and LoadAcq[uire]. The z/Architecture already provides sufficient ordering guarantees for these operations. name old time/op new time/op delta AtomicLoad64-16 0.51ns ± 0% 0.51ns ± 0% ~ (all equal) AtomicStore64-16 0.51ns ± 0% 0.60ns ± 9% +16.47% (p=0.000 n=17+20) AtomicLoad-16 0.51ns ± 0% 0.51ns ± 0% ~ (all equal) AtomicStore-16 0.51ns ± 0% 0.60ns ± 9% +16.50% (p=0.000 n=18+20) Fixes #32428. Change-Id: I88d19a4010c46070e4fff4b41587efe4c628d4d9 Reviewed-on: https://go-review.googlesource.com/c/go/+/180439 Run-TryBot: Michael Munday <[email protected]> TryBot-Result: Gobot Gobot <[email protected]> Reviewed-by: Austin Clements <[email protected]>
1 parent 53deb81 commit ac8dbe7

File tree

8 files changed

+108
-43
lines changed

8 files changed

+108
-43
lines changed

src/cmd/compile/internal/gc/ssa.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -3093,7 +3093,7 @@ func init() {
30933093
s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
30943094
return s.newValue1(ssa.OpSelect0, types.Types[TUINT32], v)
30953095
},
3096-
sys.PPC64)
3096+
sys.PPC64, sys.S390X)
30973097
addF("runtime/internal/atomic", "Loadp",
30983098
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {
30993099
v := s.newValue2(ssa.OpAtomicLoadPtr, types.NewTuple(s.f.Config.Types.BytePtr, types.TypeMem), args[0], s.mem())
@@ -3125,7 +3125,7 @@ func init() {
31253125
s.vars[&memVar] = s.newValue3(ssa.OpAtomicStoreRel32, types.TypeMem, args[0], args[1], s.mem())
31263126
return nil
31273127
},
3128-
sys.PPC64)
3128+
sys.PPC64, sys.S390X)
31293129

31303130
addF("runtime/internal/atomic", "Xchg",
31313131
func(s *state, n *Node, args []*ssa.Value) *ssa.Value {

src/cmd/compile/internal/s390x/ssa.go

+2
Original file line numberDiff line numberDiff line change
@@ -800,6 +800,8 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
800800
bne := s.Prog(s390x.ABNE)
801801
bne.To.Type = obj.TYPE_BRANCH
802802
gc.Patch(bne, cs)
803+
case ssa.OpS390XSYNC:
804+
s.Prog(s390x.ASYNC)
803805
case ssa.OpClobber:
804806
// TODO: implement for clobberdead experiment. Nop is ok for now.
805807
default:

src/cmd/compile/internal/ssa/gen/S390X.rules

+9-10
Original file line numberDiff line numberDiff line change
@@ -139,16 +139,15 @@
139139
(RoundToEven x) -> (FIDBR [4] x)
140140
(Round x) -> (FIDBR [1] x)
141141

142-
// Atomic loads.
143-
(AtomicLoad8 ptr mem) -> (MOVBZatomicload ptr mem)
144-
(AtomicLoad32 ptr mem) -> (MOVWZatomicload ptr mem)
145-
(AtomicLoad64 ptr mem) -> (MOVDatomicload ptr mem)
146-
(AtomicLoadPtr ptr mem) -> (MOVDatomicload ptr mem)
147-
148-
// Atomic stores.
149-
(AtomicStore32 ptr val mem) -> (MOVWatomicstore ptr val mem)
150-
(AtomicStore64 ptr val mem) -> (MOVDatomicstore ptr val mem)
151-
(AtomicStorePtrNoWB ptr val mem) -> (MOVDatomicstore ptr val mem)
142+
// Atomic loads and stores.
143+
// The SYNC instruction (fast-BCR-serialization) prevents store-load
144+
// reordering. Other sequences of memory operations (load-load,
145+
// store-store and load-store) are already guaranteed not to be reordered.
146+
(AtomicLoad(8|32|Acq32|64|Ptr) ptr mem) -> (MOV(BZ|WZ|WZ|D|D)atomicload ptr mem)
147+
(AtomicStore(32|64|PtrNoWB) ptr val mem) -> (SYNC (MOV(W|D|D)atomicstore ptr val mem))
148+
149+
// Store-release doesn't require store-load ordering.
150+
(AtomicStoreRel32 ptr val mem) -> (MOVWatomicstore ptr val mem)
152151

153152
// Atomic adds.
154153
(AtomicAdd32 ptr val mem) -> (AddTupleFirst32 val (LAA ptr val mem))

src/cmd/compile/internal/ssa/gen/S390XOps.go

+5
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,8 @@ func init() {
187187
fpstore = regInfo{inputs: []regMask{ptrspsb, fp, 0}}
188188
fpstoreidx = regInfo{inputs: []regMask{ptrsp, ptrsp, fp, 0}}
189189

190+
sync = regInfo{inputs: []regMask{0}}
191+
190192
// LoweredAtomicCas may overwrite arg1, so force it to R0 for now.
191193
cas = regInfo{inputs: []regMask{ptrsp, r0, gpsp, 0}, outputs: []regMask{gp, 0}, clobbers: r0}
192194

@@ -493,6 +495,9 @@ func init() {
493495
{name: "FlagGT"}, // CC=2 (greater than)
494496
{name: "FlagOV"}, // CC=3 (overflow)
495497

498+
// Fast-BCR-serialization to ensure store-load ordering.
499+
{name: "SYNC", argLength: 1, reg: sync, asm: "SYNC", typ: "Mem"},
500+
496501
// Atomic loads. These are just normal loads but return <value,memory> tuples
497502
// so they can be properly ordered with other loads.
498503
// load from arg0+auxint+aux. arg1=mem.

src/cmd/compile/internal/ssa/opGen.go

+7
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/compile/internal/ssa/rewriteS390X.go

+53-12
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/runtime/internal/atomic/asm_s390x.s

+24
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,30 @@
44

55
#include "textflag.h"
66

7+
// func Store(ptr *uint32, val uint32)
8+
TEXT ·Store(SB), NOSPLIT, $0
9+
MOVD ptr+0(FP), R2
10+
MOVWZ val+8(FP), R3
11+
MOVW R3, 0(R2)
12+
SYNC
13+
RET
14+
15+
// func Store64(ptr *uint64, val uint64)
16+
TEXT ·Store64(SB), NOSPLIT, $0
17+
MOVD ptr+0(FP), R2
18+
MOVD val+8(FP), R3
19+
MOVD R3, 0(R2)
20+
SYNC
21+
RET
22+
23+
// func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
24+
TEXT ·StorepNoWB(SB), NOSPLIT, $0
25+
MOVD ptr+0(FP), R2
26+
MOVD val+8(FP), R3
27+
MOVD R3, 0(R2)
28+
SYNC
29+
RET
30+
731
// func Cas(ptr *uint32, old, new uint32) bool
832
// Atomically:
933
// if *ptr == old {

src/runtime/internal/atomic/atomic_s390x.go

+6-19
Original file line numberDiff line numberDiff line change
@@ -36,30 +36,17 @@ func LoadAcq(ptr *uint32) uint32 {
3636
return *ptr
3737
}
3838

39-
//go:noinline
40-
//go:nosplit
41-
func Store(ptr *uint32, val uint32) {
42-
*ptr = val
43-
}
44-
45-
//go:noinline
46-
//go:nosplit
47-
func Store64(ptr *uint64, val uint64) {
48-
*ptr = val
49-
}
39+
//go:noescape
40+
func Store(ptr *uint32, val uint32)
5041

51-
//go:notinheap
52-
type noWB struct{}
42+
//go:noescape
43+
func Store64(ptr *uint64, val uint64)
5344

5445
// NO go:noescape annotation; see atomic_pointer.go.
55-
//go:noinline
56-
//go:nosplit
57-
func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer) {
58-
*(**noWB)(ptr) = (*noWB)(val)
59-
}
46+
func StorepNoWB(ptr unsafe.Pointer, val unsafe.Pointer)
6047

61-
//go:noinline
6248
//go:nosplit
49+
//go:noinline
6350
func StoreRel(ptr *uint32, val uint32) {
6451
*ptr = val
6552
}

0 commit comments

Comments
 (0)