Skip to content

Commit d85fd5a

Browse files
committed
[WebAssembly] Add atomic.fence instruction
Summary: This adds `atomic.fence` instruction: https://github.com/WebAssembly/threads/blob/master/proposals/threads/Overview.md#fence-operator And we now emit the new `atomic.fence` instruction for multithread fences, rather than the prevous `atomic.rmw` hack. Reviewers: dschuff Subscribers: sbc100, jgravelle-google, hiraditya, sunfish, jfb, tlively, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D66794 llvm-svn: 370272
1 parent 5be949e commit d85fd5a

File tree

5 files changed

+115
-100
lines changed

5 files changed

+115
-100
lines changed

llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp

+23-75
Original file line numberDiff line numberDiff line change
@@ -88,88 +88,36 @@ void WebAssemblyDAGToDAGISel::Select(SDNode *Node) {
8888

8989
uint64_t SyncScopeID =
9090
cast<ConstantSDNode>(Node->getOperand(2).getNode())->getZExtValue();
91+
MachineSDNode *Fence = nullptr;
9192
switch (SyncScopeID) {
92-
case SyncScope::SingleThread: {
93+
case SyncScope::SingleThread:
9394
// We lower a single-thread fence to a pseudo compiler barrier instruction
9495
// preventing instruction reordering. This will not be emitted in final
9596
// binary.
96-
MachineSDNode *Fence =
97-
CurDAG->getMachineNode(WebAssembly::COMPILER_FENCE,
98-
DL, // debug loc
99-
MVT::Other, // outchain type
100-
Node->getOperand(0) // inchain
101-
);
102-
ReplaceNode(Node, Fence);
103-
CurDAG->RemoveDeadNode(Node);
104-
return;
105-
}
106-
107-
case SyncScope::System: {
108-
// For non-emscripten systems, we have not decided on what we should
109-
// traslate fences to yet.
110-
if (!Subtarget->getTargetTriple().isOSEmscripten())
111-
report_fatal_error(
112-
"ATOMIC_FENCE is not yet supported in non-emscripten OSes");
113-
114-
// Wasm does not have a fence instruction, but because all atomic
115-
// instructions in wasm are sequentially consistent, we translate a
116-
// fence to an idempotent atomic RMW instruction to a linear memory
117-
// address. All atomic instructions in wasm are sequentially consistent,
118-
// but this is to ensure a fence also prevents reordering of non-atomic
119-
// instructions in the VM. Even though LLVM IR's fence instruction does
120-
// not say anything about its relationship with non-atomic instructions,
121-
// we think this is more user-friendly.
122-
//
123-
// While any address can work, here we use a value stored in
124-
// __stack_pointer wasm global because there's high chance that area is
125-
// in cache.
126-
//
127-
// So the selected instructions will be in the form of:
128-
// %addr = get_global $__stack_pointer
129-
// %0 = i32.const 0
130-
// i32.atomic.rmw.or %addr, %0
131-
SDValue StackPtrSym = CurDAG->getTargetExternalSymbol(
132-
"__stack_pointer", TLI->getPointerTy(CurDAG->getDataLayout()));
133-
MachineSDNode *GetGlobal =
134-
CurDAG->getMachineNode(WebAssembly::GLOBAL_GET_I32, // opcode
135-
DL, // debug loc
136-
MVT::i32, // result type
137-
StackPtrSym // __stack_pointer symbol
138-
);
139-
140-
SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
141-
auto *MMO = MF.getMachineMemOperand(
142-
MachinePointerInfo::getUnknownStack(MF),
143-
// FIXME Volatile isn't really correct, but currently all LLVM
144-
// atomic instructions are treated as volatiles in the backend, so
145-
// we should be consistent.
146-
MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad |
147-
MachineMemOperand::MOStore,
148-
4, 4, AAMDNodes(), nullptr, SyncScope::System,
149-
AtomicOrdering::SequentiallyConsistent);
150-
MachineSDNode *Const0 =
151-
CurDAG->getMachineNode(WebAssembly::CONST_I32, DL, MVT::i32, Zero);
152-
MachineSDNode *AtomicRMW = CurDAG->getMachineNode(
153-
WebAssembly::ATOMIC_RMW_OR_I32, // opcode
154-
DL, // debug loc
155-
MVT::i32, // result type
156-
MVT::Other, // outchain type
157-
{
158-
Zero, // alignment
159-
Zero, // offset
160-
SDValue(GetGlobal, 0), // __stack_pointer
161-
SDValue(Const0, 0), // OR with 0 to make it idempotent
162-
Node->getOperand(0) // inchain
163-
});
164-
165-
CurDAG->setNodeMemRefs(AtomicRMW, {MMO});
166-
ReplaceUses(SDValue(Node, 0), SDValue(AtomicRMW, 1));
167-
CurDAG->RemoveDeadNode(Node);
168-
return;
169-
}
97+
Fence = CurDAG->getMachineNode(WebAssembly::COMPILER_FENCE,
98+
DL, // debug loc
99+
MVT::Other, // outchain type
100+
Node->getOperand(0) // inchain
101+
);
102+
break;
103+
case SyncScope::System:
104+
// Currently wasm only supports sequentially consistent atomics, so we
105+
// always set the order to 0 (sequentially consistent).
106+
Fence = CurDAG->getMachineNode(
107+
WebAssembly::ATOMIC_FENCE,
108+
DL, // debug loc
109+
MVT::Other, // outchain type
110+
CurDAG->getTargetConstant(0, DL, MVT::i32), // order
111+
Node->getOperand(0) // inchain
112+
);
113+
break;
170114
default:
171115
llvm_unreachable("Unknown scope!");
172116
}
117+
118+
ReplaceNode(Node, Fence);
119+
CurDAG->RemoveDeadNode(Node);
120+
return;
173121
}
174122

175123
case ISD::GlobalTLSAddress: {

llvm/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td

+13-10
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,19 @@ def : WaitPatGlobalAddrOffOnly<i32, int_wasm_atomic_wait_i32, ATOMIC_WAIT_I32>;
126126
def : WaitPatGlobalAddrOffOnly<i64, int_wasm_atomic_wait_i64, ATOMIC_WAIT_I64>;
127127
} // Predicates = [HasAtomics]
128128

129+
//===----------------------------------------------------------------------===//
130+
// Atomic fences
131+
//===----------------------------------------------------------------------===//
132+
133+
// A compiler fence instruction that prevents reordering of instructions.
134+
let Defs = [ARGUMENTS] in {
135+
let isPseudo = 1, hasSideEffects = 1 in
136+
defm COMPILER_FENCE : ATOMIC_NRI<(outs), (ins), [], "compiler_fence">;
137+
let hasSideEffects = 1 in
138+
defm ATOMIC_FENCE : ATOMIC_NRI<(outs), (ins i8imm:$flags), [], "atomic.fence",
139+
0x03>;
140+
} // Defs = [ARGUMENTS]
141+
129142
//===----------------------------------------------------------------------===//
130143
// Atomic loads
131144
//===----------------------------------------------------------------------===//
@@ -887,13 +900,3 @@ defm : TerRMWTruncExtPattern<
887900
ATOMIC_RMW8_U_CMPXCHG_I32, ATOMIC_RMW16_U_CMPXCHG_I32,
888901
ATOMIC_RMW8_U_CMPXCHG_I64, ATOMIC_RMW16_U_CMPXCHG_I64,
889902
ATOMIC_RMW32_U_CMPXCHG_I64>;
890-
891-
//===----------------------------------------------------------------------===//
892-
// Atomic fences
893-
//===----------------------------------------------------------------------===//
894-
895-
// A compiler fence instruction that prevents reordering of instructions.
896-
let Defs = [ARGUMENTS] in {
897-
let isPseudo = 1, hasSideEffects = 1 in
898-
defm COMPILER_FENCE : ATOMIC_NRI<(outs), (ins), [], "compiler_fence">;
899-
} // Defs = [ARGUMENTS]

llvm/test/CodeGen/WebAssembly/atomic-fence.ll

+8-15
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,12 @@
11
; RUN: llc < %s | FileCheck %s --check-prefix NOATOMIC
2-
; RUN: not llc < %s -mtriple=wasm32-unknown-unknown -mattr=+atomics,+sign-ext 2>&1 | FileCheck %s --check-prefixes NOEMSCRIPTEN
3-
; RUN: not llc < %s -mtriple=wasm32-unknown-wasi -mattr=+atomics,+sign-ext 2>&1 | FileCheck %s --check-prefixes NOEMSCRIPTEN
4-
; RUN: llc < %s -mtriple=wasm32-unknown-emscripten -asm-verbose=false -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+atomics,+sign-ext | FileCheck %s
2+
; RUN: llc < %s -asm-verbose=false -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+atomics | FileCheck %s
53

64
target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
75
target triple = "wasm32-unknown-unknown"
86

9-
; NOEMSCRIPTEN: LLVM ERROR: ATOMIC_FENCE is not yet supported in non-emscripten OSes
10-
11-
; A multithread fence turns into 'global.get $__stack_pointer' followed by an
12-
; idempotent atomicrmw instruction.
7+
; A multithread fence is lowered to an atomic.fence instruction.
138
; CHECK-LABEL: multithread_fence:
14-
; CHECK: global.get $push[[SP:[0-9]+]]=, __stack_pointer
15-
; CHECK-NEXT: i32.const $push[[ZERO:[0-9]+]]=, 0
16-
; CHECK-NEXT: i32.atomic.rmw.or $drop=, 0($pop[[SP]]), $pop[[ZERO]]
9+
; CHECK: atomic.fence
1710
; NOATOMIC-NOT: i32.atomic.rmw.or
1811
define void @multithread_fence() {
1912
fence seq_cst
@@ -23,10 +16,9 @@ define void @multithread_fence() {
2316
; Fences with weaker memory orderings than seq_cst should be treated the same
2417
; because atomic memory access in wasm are sequentially consistent.
2518
; CHECK-LABEL: multithread_weak_fence:
26-
; CHECK: global.get $push{{.+}}=, __stack_pointer
27-
; CHECK: i32.atomic.rmw.or
28-
; CHECK: i32.atomic.rmw.or
29-
; CHECK: i32.atomic.rmw.or
19+
; CHECK: atomic.fence
20+
; CHECK-NEXT: atomic.fence
21+
; CHECK-NEXT: atomic.fence
3022
define void @multithread_weak_fence() {
3123
fence acquire
3224
fence release
@@ -37,7 +29,8 @@ define void @multithread_weak_fence() {
3729
; A singlethread fence becomes compiler_fence instruction, a pseudo instruction
3830
; that acts as a compiler barrier. The barrier should not be emitted to .s file.
3931
; CHECK-LABEL: singlethread_fence:
40-
; CHECK-NOT: compiler_fence
32+
; CHECK-NOT: compiler_fence
33+
; CHECK-NOT: atomic_fence
4134
define void @singlethread_fence() {
4235
fence syncscope("singlethread") seq_cst
4336
fence syncscope("singlethread") acquire
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# RUN: llc -mtriple=wasm32-unknown-unknown -run-pass wasm-reg-stackify -run-pass wasm-explicit-locals %s -o - | FileCheck %s
2+
3+
# In the two tests below, without compiler_fence or atomic.fence in between,
4+
# atomic.notify and i32.add will be reordered by register stackify pass to meet
5+
# 'call @foo''s requirements. But because we have fences between atomic.notify
6+
# and i32.add, they cannot be reordered, and local.set and local.get are
7+
# inserted to save and load atomic.notify's return value.
8+
9+
--- |
10+
target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
11+
target triple = "wasm32-unknown-unknown"
12+
13+
declare void @foo(i32, i32)
14+
define void @compiler_fence_test(i32) {
15+
ret void
16+
}
17+
define void @atomic_fence_test(i32) {
18+
ret void
19+
}
20+
...
21+
---
22+
# CHECK-LABEL: name: compiler_fence_test
23+
name: compiler_fence_test
24+
liveins:
25+
- { reg: '$arguments' }
26+
tracksRegLiveness: true
27+
body: |
28+
bb.0:
29+
; CHECK: %[[REG:[0-9]+]]:i32 = ATOMIC_NOTIFY
30+
; CHECK: LOCAL_SET_I32 [[LOCAL:[0-9]+]], %[[REG]]
31+
; CHECK: COMPILER_FENCE
32+
; CHECK: ADD_I32
33+
; CHECK: LOCAL_GET_I32 [[LOCAL]]
34+
; CHECK: CALL_VOID @foo
35+
36+
liveins: $arguments
37+
%0:i32 = CONST_I32 0, implicit-def $arguments
38+
%1:i32 = ATOMIC_NOTIFY 2, 0, %0:i32, %0:i32, implicit-def $arguments
39+
COMPILER_FENCE implicit-def $arguments
40+
%2:i32 = ADD_I32 %0:i32, %0:i32, implicit-def $arguments
41+
CALL_VOID @foo, %2:i32, %1:i32, implicit-def $arguments
42+
RETURN_VOID implicit-def $arguments
43+
...
44+
45+
---
46+
# CHECK-LABEL: name: atomic_fence_test
47+
name: atomic_fence_test
48+
liveins:
49+
- { reg: '$arguments' }
50+
tracksRegLiveness: true
51+
body: |
52+
bb.0:
53+
; CHECK: %[[REG:[0-9]+]]:i32 = ATOMIC_NOTIFY
54+
; CHECK: LOCAL_SET_I32 [[LOCAL:[0-9]+]], %[[REG]]
55+
; CHECK: ATOMIC_FENCE
56+
; CHECK: ADD_I32
57+
; CHECK: LOCAL_GET_I32 [[LOCAL]]
58+
; CHECK: CALL_VOID @foo
59+
60+
liveins: $arguments
61+
%0:i32 = CONST_I32 0, implicit-def $arguments
62+
%1:i32 = ATOMIC_NOTIFY 2, 0, %0:i32, %0:i32, implicit-def $arguments
63+
ATOMIC_FENCE 0, implicit-def $arguments
64+
%2:i32 = ADD_I32 %0:i32, %0:i32, implicit-def $arguments
65+
CALL_VOID @foo, %2:i32, %1:i32, implicit-def $arguments
66+
RETURN_VOID implicit-def $arguments
67+
...
68+

llvm/test/MC/WebAssembly/atomics-encodings.s

+3
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ main:
1010
# CHECK: i64.atomic.wait 0 # encoding: [0xfe,0x02,0x03,0x00]
1111
i64.atomic.wait 0
1212

13+
# CHECK: atomic.fence # encoding: [0xfe,0x03,0x00]
14+
atomic.fence
15+
1316
# CHECK: i32.atomic.load 0 # encoding: [0xfe,0x10,0x02,0x00]
1417
i32.atomic.load 0
1518
# CHECK: i64.atomic.load 4 # encoding: [0xfe,0x11,0x03,0x04]

0 commit comments

Comments
 (0)