Skip to content

Commit 7c6bfd9

Browse files
committed
[WebAssembly] v128.load{8,16,32,64}_lane instructions
Prototype the newly proposed load_lane instructions, as specified in WebAssembly/simd#350. Since these instructions are not available to origin trial users on Chrome stable, make them opt-in by only selecting them from intrinsics rather than normal ISel patterns. Since we only need rough prototypes to measure performance right now, this commit does not implement all the load and store patterns that would be necessary to make full use of the offset immediate. However, the full suite of offset tests is included to make it easy to track improvements in the future. Since these are the first instructions to have a memarg immediate as well as an additional immediate, the disassembler needed some additional hacks to be able to parse them correctly. Making that code more principled is left as future work. Differential Revision: https://reviews.llvm.org/D89366
1 parent 2de6937 commit 7c6bfd9

File tree

10 files changed

+1314
-8
lines changed

10 files changed

+1314
-8
lines changed

clang/include/clang/Basic/BuiltinsWebAssembly.def

+11-2
Original file line numberDiff line numberDiff line change
@@ -171,8 +171,17 @@ TARGET_BUILTIN(__builtin_wasm_narrow_u_i8x16_i16x8, "V16UcV8UsV8Us", "nc", "simd
171171
TARGET_BUILTIN(__builtin_wasm_narrow_s_i16x8_i32x4, "V8sV4iV4i", "nc", "simd128")
172172
TARGET_BUILTIN(__builtin_wasm_narrow_u_i16x8_i32x4, "V8UsV4UiV4Ui", "nc", "simd128")
173173

174-
TARGET_BUILTIN(__builtin_wasm_load32_zero, "V4ii*", "nU", "simd128")
175-
TARGET_BUILTIN(__builtin_wasm_load64_zero, "V2LLiLLi*", "nU", "simd128")
174+
TARGET_BUILTIN(__builtin_wasm_load32_zero, "V4ii*", "n", "simd128")
175+
TARGET_BUILTIN(__builtin_wasm_load64_zero, "V2LLiLLi*", "n", "simd128")
176+
177+
TARGET_BUILTIN(__builtin_wasm_load8_lane, "V16ScSc*V16ScIi", "n", "simd128")
178+
TARGET_BUILTIN(__builtin_wasm_load16_lane, "V8ss*V8sIi", "n", "simd128")
179+
TARGET_BUILTIN(__builtin_wasm_load32_lane, "V4ii*V4iIi", "n", "simd128")
180+
TARGET_BUILTIN(__builtin_wasm_load64_lane, "V2LLiLLi*V2LLiIi", "n", "simd128")
181+
TARGET_BUILTIN(__builtin_wasm_store8_lane, "vSc*V16ScIi", "n", "simd128")
182+
TARGET_BUILTIN(__builtin_wasm_store16_lane, "vs*V8sIi", "n", "simd128")
183+
TARGET_BUILTIN(__builtin_wasm_store32_lane, "vi*V4iIi", "n", "simd128")
184+
TARGET_BUILTIN(__builtin_wasm_store64_lane, "vLLi*V2LLiIi", "n", "simd128")
176185

177186
#undef BUILTIN
178187
#undef TARGET_BUILTIN

clang/lib/CodeGen/CGBuiltin.cpp

+46
Original file line numberDiff line numberDiff line change
@@ -16711,6 +16711,52 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
1671116711
Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_load64_zero);
1671216712
return Builder.CreateCall(Callee, {Ptr});
1671316713
}
16714+
case WebAssembly::BI__builtin_wasm_load8_lane:
16715+
case WebAssembly::BI__builtin_wasm_load16_lane:
16716+
case WebAssembly::BI__builtin_wasm_load32_lane:
16717+
case WebAssembly::BI__builtin_wasm_load64_lane:
16718+
case WebAssembly::BI__builtin_wasm_store8_lane:
16719+
case WebAssembly::BI__builtin_wasm_store16_lane:
16720+
case WebAssembly::BI__builtin_wasm_store32_lane:
16721+
case WebAssembly::BI__builtin_wasm_store64_lane: {
16722+
Value *Ptr = EmitScalarExpr(E->getArg(0));
16723+
Value *Vec = EmitScalarExpr(E->getArg(1));
16724+
Optional<llvm::APSInt> LaneIdxConst =
16725+
E->getArg(2)->getIntegerConstantExpr(getContext());
16726+
assert(LaneIdxConst && "Constant arg isn't actually constant?");
16727+
Value *LaneIdx = llvm::ConstantInt::get(getLLVMContext(), *LaneIdxConst);
16728+
unsigned IntNo;
16729+
switch (BuiltinID) {
16730+
case WebAssembly::BI__builtin_wasm_load8_lane:
16731+
IntNo = Intrinsic::wasm_load8_lane;
16732+
break;
16733+
case WebAssembly::BI__builtin_wasm_load16_lane:
16734+
IntNo = Intrinsic::wasm_load16_lane;
16735+
break;
16736+
case WebAssembly::BI__builtin_wasm_load32_lane:
16737+
IntNo = Intrinsic::wasm_load32_lane;
16738+
break;
16739+
case WebAssembly::BI__builtin_wasm_load64_lane:
16740+
IntNo = Intrinsic::wasm_load64_lane;
16741+
break;
16742+
case WebAssembly::BI__builtin_wasm_store8_lane:
16743+
IntNo = Intrinsic::wasm_store8_lane;
16744+
break;
16745+
case WebAssembly::BI__builtin_wasm_store16_lane:
16746+
IntNo = Intrinsic::wasm_store16_lane;
16747+
break;
16748+
case WebAssembly::BI__builtin_wasm_store32_lane:
16749+
IntNo = Intrinsic::wasm_store32_lane;
16750+
break;
16751+
case WebAssembly::BI__builtin_wasm_store64_lane:
16752+
IntNo = Intrinsic::wasm_store64_lane;
16753+
break;
16754+
default:
16755+
llvm_unreachable("unexpected builtin ID");
16756+
}
16757+
Function *Callee = CGM.getIntrinsic(IntNo);
16758+
return Builder.CreateCall(Callee, {Ptr, Vec, LaneIdx});
16759+
}
1671416760
case WebAssembly::BI__builtin_wasm_shuffle_v8x16: {
1671516761
Value *Ops[18];
1671616762
size_t OpIdx = 0;

clang/test/CodeGen/builtins-wasm.c

+56
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,62 @@ f64x2 replace_lane_f64x2(f64x2 v, double x) {
284284
// WEBASSEMBLY-NEXT: ret
285285
}
286286

287+
i8x16 load8_lane(signed char *p, i8x16 v) {
288+
return __builtin_wasm_load8_lane(p, v, 0);
289+
// WEBASSEMBLY: tail call <16 x i8> @llvm.wasm.load8.lane(
290+
// WEBASSEMBLY-SAME: i8* %p, <16 x i8> %v, i32 0)
291+
// WEBASSEMBLY-NEXT: ret
292+
}
293+
294+
i16x8 load16_lane(short *p, i16x8 v) {
295+
return __builtin_wasm_load16_lane(p, v, 0);
296+
// WEBASSEMBLY: tail call <8 x i16> @llvm.wasm.load16.lane(
297+
// WEBASSEMBLY-SAME: i16* %p, <8 x i16> %v, i32 0)
298+
// WEBASSEMBLY-NEXT: ret
299+
}
300+
301+
i32x4 load32_lane(int *p, i32x4 v) {
302+
return __builtin_wasm_load32_lane(p, v, 0);
303+
// WEBASSEMBLY: tail call <4 x i32> @llvm.wasm.load32.lane(
304+
// WEBASSEMBLY-SAME: i32* %p, <4 x i32> %v, i32 0)
305+
// WEBASSEMBLY-NEXT: ret
306+
}
307+
308+
i64x2 load64_lane(long long *p, i64x2 v) {
309+
return __builtin_wasm_load64_lane(p, v, 0);
310+
// WEBASSEMBLY: tail call <2 x i64> @llvm.wasm.load64.lane(
311+
// WEBASSEMBLY-SAME: i64* %p, <2 x i64> %v, i32 0)
312+
// WEBASSEMBLY-NEXT: ret
313+
}
314+
315+
void store8_lane(signed char *p, i8x16 v) {
316+
__builtin_wasm_store8_lane(p, v, 0);
317+
// WEBASSEMBLY: call void @llvm.wasm.store8.lane(
318+
// WEBASSEMBLY-SAME: i8* %p, <16 x i8> %v, i32 0)
319+
// WEBASSEMBLY-NEXT: ret
320+
}
321+
322+
void store16_lane(short *p, i16x8 v) {
323+
__builtin_wasm_store16_lane(p, v, 0);
324+
// WEBASSEMBLY: call void @llvm.wasm.store16.lane(
325+
// WEBASSEMBLY-SAME: i16* %p, <8 x i16> %v, i32 0)
326+
// WEBASSEMBLY-NEXT: ret
327+
}
328+
329+
void store32_lane(int *p, i32x4 v) {
330+
__builtin_wasm_store32_lane(p, v, 0);
331+
// WEBASSEMBLY: call void @llvm.wasm.store32.lane(
332+
// WEBASSEMBLY-SAME: i32* %p, <4 x i32> %v, i32 0)
333+
// WEBASSEMBLY-NEXT: ret
334+
}
335+
336+
void store64_lane(long long *p, i64x2 v) {
337+
__builtin_wasm_store64_lane(p, v, 0);
338+
// WEBASSEMBLY: call void @llvm.wasm.store64.lane(
339+
// WEBASSEMBLY-SAME: i64* %p, <2 x i64> %v, i32 0)
340+
// WEBASSEMBLY-NEXT: ret
341+
}
342+
287343
i8x16 add_saturate_s_i8x16(i8x16 x, i8x16 y) {
288344
return __builtin_wasm_add_saturate_s_i8x16(x, y);
289345
// WEBASSEMBLY: call <16 x i8> @llvm.sadd.sat.v16i8(

llvm/include/llvm/IR/IntrinsicsWebAssembly.td

+46
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,52 @@ def int_wasm_load64_zero :
208208
[IntrReadMem, IntrArgMemOnly],
209209
"", [SDNPMemOperand]>;
210210

211+
// These intrinsics do not mark their lane index arguments as immediate because
212+
// that changes the corresponding SDNode from ISD::Constant to
213+
// ISD::TargetConstant, which would require extra complications in the ISel
214+
// tablegen patterns. TODO: Replace these intrinsic with normal ISel patterns
215+
// once the load_lane instructions are merged to the proposal.
216+
def int_wasm_load8_lane :
217+
Intrinsic<[llvm_v16i8_ty],
218+
[LLVMPointerType<llvm_i8_ty>, llvm_v16i8_ty, llvm_i32_ty],
219+
[IntrReadMem, IntrArgMemOnly],
220+
"", [SDNPMemOperand]>;
221+
def int_wasm_load16_lane :
222+
Intrinsic<[llvm_v8i16_ty],
223+
[LLVMPointerType<llvm_i16_ty>, llvm_v8i16_ty, llvm_i32_ty],
224+
[IntrReadMem, IntrArgMemOnly],
225+
"", [SDNPMemOperand]>;
226+
def int_wasm_load32_lane :
227+
Intrinsic<[llvm_v4i32_ty],
228+
[LLVMPointerType<llvm_i32_ty>, llvm_v4i32_ty, llvm_i32_ty],
229+
[IntrReadMem, IntrArgMemOnly],
230+
"", [SDNPMemOperand]>;
231+
def int_wasm_load64_lane :
232+
Intrinsic<[llvm_v2i64_ty],
233+
[LLVMPointerType<llvm_i64_ty>, llvm_v2i64_ty, llvm_i32_ty],
234+
[IntrReadMem, IntrArgMemOnly],
235+
"", [SDNPMemOperand]>;
236+
def int_wasm_store8_lane :
237+
Intrinsic<[],
238+
[LLVMPointerType<llvm_i8_ty>, llvm_v16i8_ty, llvm_i32_ty],
239+
[IntrWriteMem, IntrArgMemOnly],
240+
"", [SDNPMemOperand]>;
241+
def int_wasm_store16_lane :
242+
Intrinsic<[],
243+
[LLVMPointerType<llvm_i16_ty>, llvm_v8i16_ty, llvm_i32_ty],
244+
[IntrWriteMem, IntrArgMemOnly],
245+
"", [SDNPMemOperand]>;
246+
def int_wasm_store32_lane :
247+
Intrinsic<[],
248+
[LLVMPointerType<llvm_i32_ty>, llvm_v4i32_ty, llvm_i32_ty],
249+
[IntrWriteMem, IntrArgMemOnly],
250+
"", [SDNPMemOperand]>;
251+
def int_wasm_store64_lane :
252+
Intrinsic<[],
253+
[LLVMPointerType<llvm_i64_ty>, llvm_v2i64_ty, llvm_i32_ty],
254+
[IntrWriteMem, IntrArgMemOnly],
255+
"", [SDNPMemOperand]>;
256+
211257
//===----------------------------------------------------------------------===//
212258
// Thread-local storage intrinsics
213259
//===----------------------------------------------------------------------===//

llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -421,6 +421,12 @@ class WebAssemblyAsmParser final : public MCTargetAsmParser {
421421
return error("Expected integer constant");
422422
parseSingleInteger(false, Operands);
423423
} else {
424+
// v128.{load,store}{8,16,32,64}_lane has both a memarg and a lane
425+
// index. We need to avoid parsing an extra alignment operand for the
426+
// lane index.
427+
auto IsLoadStoreLane = InstName.find("_lane") != StringRef::npos;
428+
if (IsLoadStoreLane && Operands.size() == 4)
429+
return false;
424430
// Alignment not specified (or atomics, must use default alignment).
425431
// We can't just call WebAssembly::GetDefaultP2Align since we don't have
426432
// an opcode until after the assembly matcher, so set a default to fix

llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h

+12-4
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,9 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) {
177177
WASM_LOAD_STORE(ATOMIC_RMW8_U_CMPXCHG_I32)
178178
WASM_LOAD_STORE(ATOMIC_RMW8_U_CMPXCHG_I64)
179179
WASM_LOAD_STORE(LOAD_SPLAT_v8x16)
180-
return 0;
180+
WASM_LOAD_STORE(LOAD_LANE_v16i8)
181+
WASM_LOAD_STORE(STORE_LANE_v16i8)
182+
return 0;
181183
WASM_LOAD_STORE(LOAD16_S_I32)
182184
WASM_LOAD_STORE(LOAD16_U_I32)
183185
WASM_LOAD_STORE(LOAD16_S_I64)
@@ -203,7 +205,9 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) {
203205
WASM_LOAD_STORE(ATOMIC_RMW16_U_CMPXCHG_I32)
204206
WASM_LOAD_STORE(ATOMIC_RMW16_U_CMPXCHG_I64)
205207
WASM_LOAD_STORE(LOAD_SPLAT_v16x8)
206-
return 1;
208+
WASM_LOAD_STORE(LOAD_LANE_v8i16)
209+
WASM_LOAD_STORE(STORE_LANE_v8i16)
210+
return 1;
207211
WASM_LOAD_STORE(LOAD_I32)
208212
WASM_LOAD_STORE(LOAD_F32)
209213
WASM_LOAD_STORE(STORE_I32)
@@ -233,7 +237,9 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) {
233237
WASM_LOAD_STORE(ATOMIC_WAIT_I32)
234238
WASM_LOAD_STORE(LOAD_SPLAT_v32x4)
235239
WASM_LOAD_STORE(LOAD_ZERO_v4i32)
236-
return 2;
240+
WASM_LOAD_STORE(LOAD_LANE_v4i32)
241+
WASM_LOAD_STORE(STORE_LANE_v4i32)
242+
return 2;
237243
WASM_LOAD_STORE(LOAD_I64)
238244
WASM_LOAD_STORE(LOAD_F64)
239245
WASM_LOAD_STORE(STORE_I64)
@@ -256,7 +262,9 @@ inline unsigned GetDefaultP2AlignAny(unsigned Opc) {
256262
WASM_LOAD_STORE(LOAD_EXTEND_S_v2i64)
257263
WASM_LOAD_STORE(LOAD_EXTEND_U_v2i64)
258264
WASM_LOAD_STORE(LOAD_ZERO_v2i64)
259-
return 3;
265+
WASM_LOAD_STORE(LOAD_LANE_v2i64)
266+
WASM_LOAD_STORE(STORE_LANE_v2i64)
267+
return 3;
260268
WASM_LOAD_STORE(LOAD_V128)
261269
WASM_LOAD_STORE(STORE_V128)
262270
return 4;

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp

+50
Original file line numberDiff line numberDiff line change
@@ -685,6 +685,56 @@ bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
685685
Info.align = Info.memVT == MVT::i32 ? Align(4) : Align(8);
686686
Info.flags = MachineMemOperand::MOLoad;
687687
return true;
688+
case Intrinsic::wasm_load8_lane:
689+
case Intrinsic::wasm_load16_lane:
690+
case Intrinsic::wasm_load32_lane:
691+
case Intrinsic::wasm_load64_lane:
692+
case Intrinsic::wasm_store8_lane:
693+
case Intrinsic::wasm_store16_lane:
694+
case Intrinsic::wasm_store32_lane:
695+
case Intrinsic::wasm_store64_lane: {
696+
MVT MemVT;
697+
Align MemAlign;
698+
switch (Intrinsic) {
699+
case Intrinsic::wasm_load8_lane:
700+
case Intrinsic::wasm_store8_lane:
701+
MemVT = MVT::i8;
702+
MemAlign = Align(1);
703+
break;
704+
case Intrinsic::wasm_load16_lane:
705+
case Intrinsic::wasm_store16_lane:
706+
MemVT = MVT::i16;
707+
MemAlign = Align(2);
708+
break;
709+
case Intrinsic::wasm_load32_lane:
710+
case Intrinsic::wasm_store32_lane:
711+
MemVT = MVT::i32;
712+
MemAlign = Align(4);
713+
break;
714+
case Intrinsic::wasm_load64_lane:
715+
case Intrinsic::wasm_store64_lane:
716+
MemVT = MVT::i64;
717+
MemAlign = Align(8);
718+
break;
719+
default:
720+
llvm_unreachable("unexpected intrinsic");
721+
}
722+
if (Intrinsic == Intrinsic::wasm_load8_lane ||
723+
Intrinsic == Intrinsic::wasm_load16_lane ||
724+
Intrinsic == Intrinsic::wasm_load32_lane ||
725+
Intrinsic == Intrinsic::wasm_load64_lane) {
726+
Info.opc = ISD::INTRINSIC_W_CHAIN;
727+
Info.flags = MachineMemOperand::MOLoad;
728+
} else {
729+
Info.opc = ISD::INTRINSIC_VOID;
730+
Info.flags = MachineMemOperand::MOStore;
731+
}
732+
Info.ptrVal = I.getArgOperand(0);
733+
Info.memVT = MemVT;
734+
Info.offset = 0;
735+
Info.align = MemAlign;
736+
return true;
737+
}
688738
default:
689739
return false;
690740
}

0 commit comments

Comments
 (0)