Skip to content

Commit dd33ae0

Browse files
authored
Prototype SIMD instructions implemented in LLVM (#3440)
- i64x2.eq (WebAssembly/simd#381) - i64x2 widens (WebAssembly/simd#290) - i64x2.bitmask (WebAssembly/simd#368) - signselect ops (WebAssembly/simd#124)
1 parent 290147d commit dd33ae0

18 files changed

+717
-215
lines changed

scripts/gen-s-parser.py

+10
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,7 @@
332332
("i32x4.le_u", "makeBinary(s, BinaryOp::LeUVecI32x4)"),
333333
("i32x4.ge_s", "makeBinary(s, BinaryOp::GeSVecI32x4)"),
334334
("i32x4.ge_u", "makeBinary(s, BinaryOp::GeUVecI32x4)"),
335+
("i64x2.eq", "makeBinary(s, BinaryOp::EqVecI64x2)"),
335336
("f32x4.eq", "makeBinary(s, BinaryOp::EqVecF32x4)"),
336337
("f32x4.ne", "makeBinary(s, BinaryOp::NeVecF32x4)"),
337338
("f32x4.lt", "makeBinary(s, BinaryOp::LtVecF32x4)"),
@@ -350,6 +351,10 @@
350351
("v128.xor", "makeBinary(s, BinaryOp::XorVec128)"),
351352
("v128.andnot", "makeBinary(s, BinaryOp::AndNotVec128)"),
352353
("v128.bitselect", "makeSIMDTernary(s, SIMDTernaryOp::Bitselect)"),
354+
("v8x16.signselect", "makeSIMDTernary(s, SIMDTernaryOp::SignSelectVec8x16)"),
355+
("v16x8.signselect", "makeSIMDTernary(s, SIMDTernaryOp::SignSelectVec16x8)"),
356+
("v32x4.signselect", "makeSIMDTernary(s, SIMDTernaryOp::SignSelectVec32x4)"),
357+
("v64x2.signselect", "makeSIMDTernary(s, SIMDTernaryOp::SignSelectVec64x2)"),
353358
("v128.load8_lane", "makeSIMDLoadStoreLane(s, LoadLaneVec8x16)"),
354359
("v128.load16_lane", "makeSIMDLoadStoreLane(s, LoadLaneVec16x8)"),
355360
("v128.load32_lane", "makeSIMDLoadStoreLane(s, LoadLaneVec32x4)"),
@@ -427,6 +432,7 @@
427432
("i64x2.neg", "makeUnary(s, UnaryOp::NegVecI64x2)"),
428433
("i64x2.any_true", "makeUnary(s, UnaryOp::AnyTrueVecI64x2)"),
429434
("i64x2.all_true", "makeUnary(s, UnaryOp::AllTrueVecI64x2)"),
435+
("i64x2.bitmask", "makeUnary(s, UnaryOp::BitmaskVecI64x2)"),
430436
("i64x2.shl", "makeSIMDShift(s, SIMDShiftOp::ShlVecI64x2)"),
431437
("i64x2.shr_s", "makeSIMDShift(s, SIMDShiftOp::ShrSVecI64x2)"),
432438
("i64x2.shr_u", "makeSIMDShift(s, SIMDShiftOp::ShrUVecI64x2)"),
@@ -503,6 +509,10 @@
503509
("i32x4.widen_high_i16x8_s", "makeUnary(s, UnaryOp::WidenHighSVecI16x8ToVecI32x4)"),
504510
("i32x4.widen_low_i16x8_u", "makeUnary(s, UnaryOp::WidenLowUVecI16x8ToVecI32x4)"),
505511
("i32x4.widen_high_i16x8_u", "makeUnary(s, UnaryOp::WidenHighUVecI16x8ToVecI32x4)"),
512+
("i64x2.widen_low_i32x4_s", "makeUnary(s, UnaryOp::WidenLowSVecI32x4ToVecI64x2)"),
513+
("i64x2.widen_high_i32x4_s", "makeUnary(s, UnaryOp::WidenHighSVecI32x4ToVecI64x2)"),
514+
("i64x2.widen_low_i32x4_u", "makeUnary(s, UnaryOp::WidenLowUVecI32x4ToVecI64x2)"),
515+
("i64x2.widen_high_i32x4_u", "makeUnary(s, UnaryOp::WidenHighUVecI32x4ToVecI64x2)"),
506516
("v8x16.swizzle", "makeBinary(s, BinaryOp::SwizzleVec8x16)"),
507517
# reference types instructions
508518
# TODO Add table instructions

src/gen-s-parser.inc

+98-33
Original file line numberDiff line numberDiff line change
@@ -2312,38 +2312,49 @@ switch (op[0]) {
23122312
default: goto parse_error;
23132313
}
23142314
}
2315+
case 'b':
2316+
if (strcmp(op, "i64x2.bitmask") == 0) { return makeUnary(s, UnaryOp::BitmaskVecI64x2); }
2317+
goto parse_error;
23152318
case 'e': {
2316-
switch (op[9]) {
2317-
case 'm': {
2318-
switch (op[13]) {
2319-
case 'h': {
2320-
switch (op[24]) {
2321-
case 's':
2322-
if (strcmp(op, "i64x2.extmul_high_i32x4_s") == 0) { return makeBinary(s, BinaryOp::ExtMulHighSVecI64x2); }
2323-
goto parse_error;
2324-
case 'u':
2325-
if (strcmp(op, "i64x2.extmul_high_i32x4_u") == 0) { return makeBinary(s, BinaryOp::ExtMulHighUVecI64x2); }
2326-
goto parse_error;
2327-
default: goto parse_error;
2328-
}
2329-
}
2330-
case 'l': {
2331-
switch (op[23]) {
2332-
case 's':
2333-
if (strcmp(op, "i64x2.extmul_low_i32x4_s") == 0) { return makeBinary(s, BinaryOp::ExtMulLowSVecI64x2); }
2334-
goto parse_error;
2335-
case 'u':
2336-
if (strcmp(op, "i64x2.extmul_low_i32x4_u") == 0) { return makeBinary(s, BinaryOp::ExtMulLowUVecI64x2); }
2337-
goto parse_error;
2319+
switch (op[7]) {
2320+
case 'q':
2321+
if (strcmp(op, "i64x2.eq") == 0) { return makeBinary(s, BinaryOp::EqVecI64x2); }
2322+
goto parse_error;
2323+
case 'x': {
2324+
switch (op[9]) {
2325+
case 'm': {
2326+
switch (op[13]) {
2327+
case 'h': {
2328+
switch (op[24]) {
2329+
case 's':
2330+
if (strcmp(op, "i64x2.extmul_high_i32x4_s") == 0) { return makeBinary(s, BinaryOp::ExtMulHighSVecI64x2); }
2331+
goto parse_error;
2332+
case 'u':
2333+
if (strcmp(op, "i64x2.extmul_high_i32x4_u") == 0) { return makeBinary(s, BinaryOp::ExtMulHighUVecI64x2); }
2334+
goto parse_error;
2335+
default: goto parse_error;
2336+
}
2337+
}
2338+
case 'l': {
2339+
switch (op[23]) {
2340+
case 's':
2341+
if (strcmp(op, "i64x2.extmul_low_i32x4_s") == 0) { return makeBinary(s, BinaryOp::ExtMulLowSVecI64x2); }
2342+
goto parse_error;
2343+
case 'u':
2344+
if (strcmp(op, "i64x2.extmul_low_i32x4_u") == 0) { return makeBinary(s, BinaryOp::ExtMulLowUVecI64x2); }
2345+
goto parse_error;
2346+
default: goto parse_error;
2347+
}
2348+
}
23382349
default: goto parse_error;
23392350
}
23402351
}
2352+
case 'r':
2353+
if (strcmp(op, "i64x2.extract_lane") == 0) { return makeSIMDExtract(s, SIMDExtractOp::ExtractLaneVecI64x2, 2); }
2354+
goto parse_error;
23412355
default: goto parse_error;
23422356
}
23432357
}
2344-
case 'r':
2345-
if (strcmp(op, "i64x2.extract_lane") == 0) { return makeSIMDExtract(s, SIMDExtractOp::ExtractLaneVecI64x2, 2); }
2346-
goto parse_error;
23472358
default: goto parse_error;
23482359
}
23492360
}
@@ -2408,6 +2419,33 @@ switch (op[0]) {
24082419
default: goto parse_error;
24092420
}
24102421
}
2422+
case 'w': {
2423+
switch (op[12]) {
2424+
case 'h': {
2425+
switch (op[23]) {
2426+
case 's':
2427+
if (strcmp(op, "i64x2.widen_high_i32x4_s") == 0) { return makeUnary(s, UnaryOp::WidenHighSVecI32x4ToVecI64x2); }
2428+
goto parse_error;
2429+
case 'u':
2430+
if (strcmp(op, "i64x2.widen_high_i32x4_u") == 0) { return makeUnary(s, UnaryOp::WidenHighUVecI32x4ToVecI64x2); }
2431+
goto parse_error;
2432+
default: goto parse_error;
2433+
}
2434+
}
2435+
case 'l': {
2436+
switch (op[22]) {
2437+
case 's':
2438+
if (strcmp(op, "i64x2.widen_low_i32x4_s") == 0) { return makeUnary(s, UnaryOp::WidenLowSVecI32x4ToVecI64x2); }
2439+
goto parse_error;
2440+
case 'u':
2441+
if (strcmp(op, "i64x2.widen_low_i32x4_u") == 0) { return makeUnary(s, UnaryOp::WidenLowUVecI32x4ToVecI64x2); }
2442+
goto parse_error;
2443+
default: goto parse_error;
2444+
}
2445+
}
2446+
default: goto parse_error;
2447+
}
2448+
}
24112449
default: goto parse_error;
24122450
}
24132451
}
@@ -2962,18 +3000,42 @@ switch (op[0]) {
29623000
default: goto parse_error;
29633001
}
29643002
}
2965-
case '6':
2966-
if (strcmp(op, "v16x8.load_splat") == 0) { return makeSIMDLoad(s, SIMDLoadOp::LoadSplatVec16x8); }
3003+
case '6': {
3004+
switch (op[6]) {
3005+
case 'l':
3006+
if (strcmp(op, "v16x8.load_splat") == 0) { return makeSIMDLoad(s, SIMDLoadOp::LoadSplatVec16x8); }
3007+
goto parse_error;
3008+
case 's':
3009+
if (strcmp(op, "v16x8.signselect") == 0) { return makeSIMDTernary(s, SIMDTernaryOp::SignSelectVec16x8); }
3010+
goto parse_error;
3011+
default: goto parse_error;
3012+
}
3013+
}
3014+
default: goto parse_error;
3015+
}
3016+
}
3017+
case '3': {
3018+
switch (op[6]) {
3019+
case 'l':
3020+
if (strcmp(op, "v32x4.load_splat") == 0) { return makeSIMDLoad(s, SIMDLoadOp::LoadSplatVec32x4); }
3021+
goto parse_error;
3022+
case 's':
3023+
if (strcmp(op, "v32x4.signselect") == 0) { return makeSIMDTernary(s, SIMDTernaryOp::SignSelectVec32x4); }
3024+
goto parse_error;
3025+
default: goto parse_error;
3026+
}
3027+
}
3028+
case '6': {
3029+
switch (op[6]) {
3030+
case 'l':
3031+
if (strcmp(op, "v64x2.load_splat") == 0) { return makeSIMDLoad(s, SIMDLoadOp::LoadSplatVec64x2); }
3032+
goto parse_error;
3033+
case 's':
3034+
if (strcmp(op, "v64x2.signselect") == 0) { return makeSIMDTernary(s, SIMDTernaryOp::SignSelectVec64x2); }
29673035
goto parse_error;
29683036
default: goto parse_error;
29693037
}
29703038
}
2971-
case '3':
2972-
if (strcmp(op, "v32x4.load_splat") == 0) { return makeSIMDLoad(s, SIMDLoadOp::LoadSplatVec32x4); }
2973-
goto parse_error;
2974-
case '6':
2975-
if (strcmp(op, "v64x2.load_splat") == 0) { return makeSIMDLoad(s, SIMDLoadOp::LoadSplatVec64x2); }
2976-
goto parse_error;
29773039
case '8': {
29783040
switch (op[6]) {
29793041
case 'l':
@@ -2984,6 +3046,9 @@ switch (op[0]) {
29843046
case 'h':
29853047
if (strcmp(op, "v8x16.shuffle") == 0) { return makeSIMDShuffle(s); }
29863048
goto parse_error;
3049+
case 'i':
3050+
if (strcmp(op, "v8x16.signselect") == 0) { return makeSIMDTernary(s, SIMDTernaryOp::SignSelectVec8x16); }
3051+
goto parse_error;
29873052
case 'w':
29883053
if (strcmp(op, "v8x16.swizzle") == 0) { return makeBinary(s, BinaryOp::SwizzleVec8x16); }
29893054
goto parse_error;

src/ir/cost.h

+10
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,7 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, Index> {
191191
case NegVecI64x2:
192192
case AnyTrueVecI64x2:
193193
case AllTrueVecI64x2:
194+
case BitmaskVecI64x2:
194195
case AbsVecF32x4:
195196
case NegVecF32x4:
196197
case SqrtVecF32x4:
@@ -221,6 +222,10 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, Index> {
221222
case WidenHighSVecI16x8ToVecI32x4:
222223
case WidenLowUVecI16x8ToVecI32x4:
223224
case WidenHighUVecI16x8ToVecI32x4:
225+
case WidenLowSVecI32x4ToVecI64x2:
226+
case WidenHighSVecI32x4ToVecI64x2:
227+
case WidenLowUVecI32x4ToVecI64x2:
228+
case WidenHighUVecI32x4ToVecI64x2:
224229
ret = 1;
225230
break;
226231
case InvalidUnary:
@@ -363,6 +368,7 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, Index> {
363368
case GtUVecI32x4:
364369
case GeSVecI32x4:
365370
case GeUVecI32x4:
371+
case EqVecI64x2:
366372
case EqVecF32x4:
367373
case NeVecF32x4:
368374
case LtVecF32x4:
@@ -508,6 +514,10 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, Index> {
508514
Index ret = 0;
509515
switch (curr->op) {
510516
case Bitselect:
517+
case SignSelectVec8x16:
518+
case SignSelectVec16x8:
519+
case SignSelectVec32x4:
520+
case SignSelectVec64x2:
511521
ret = 1;
512522
break;
513523
case QFMAF32x4:

src/literal.h

+1
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,7 @@ class Literal {
480480
Literal leUI32x4(const Literal& other) const;
481481
Literal geSI32x4(const Literal& other) const;
482482
Literal geUI32x4(const Literal& other) const;
483+
Literal eqI64x2(const Literal& other) const;
483484
Literal eqF32x4(const Literal& other) const;
484485
Literal neF32x4(const Literal& other) const;
485486
Literal ltF32x4(const Literal& other) const;

src/passes/Print.cpp

+30
Original file line numberDiff line numberDiff line change
@@ -533,6 +533,18 @@ struct PrintExpressionContents
533533
case QFMSF64x2:
534534
o << "f64x2.qfms";
535535
break;
536+
case SignSelectVec8x16:
537+
o << "v8x16.signselect";
538+
break;
539+
case SignSelectVec16x8:
540+
o << "v16x8.signselect";
541+
break;
542+
case SignSelectVec32x4:
543+
o << "v32x4.signselect";
544+
break;
545+
case SignSelectVec64x2:
546+
o << "v64x2.signselect";
547+
break;
536548
}
537549
}
538550
void visitSIMDShift(SIMDShift* curr) {
@@ -941,6 +953,9 @@ struct PrintExpressionContents
941953
case AllTrueVecI64x2:
942954
o << "i64x2.all_true";
943955
break;
956+
case BitmaskVecI64x2:
957+
o << "i64x2.bitmask";
958+
break;
944959
case AbsVecF32x4:
945960
o << "f32x4.abs";
946961
break;
@@ -1031,6 +1046,18 @@ struct PrintExpressionContents
10311046
case WidenHighUVecI16x8ToVecI32x4:
10321047
o << "i32x4.widen_high_i16x8_u";
10331048
break;
1049+
case WidenLowSVecI32x4ToVecI64x2:
1050+
o << "i64x2.widen_low_i32x4_s";
1051+
break;
1052+
case WidenHighSVecI32x4ToVecI64x2:
1053+
o << "i64x2.widen_high_i32x4_s";
1054+
break;
1055+
case WidenLowUVecI32x4ToVecI64x2:
1056+
o << "i64x2.widen_low_i32x4_u";
1057+
break;
1058+
case WidenHighUVecI32x4ToVecI64x2:
1059+
o << "i64x2.widen_high_i32x4_u";
1060+
break;
10341061
case InvalidUnary:
10351062
WASM_UNREACHABLE("unvalid unary operator");
10361063
}
@@ -1360,6 +1387,9 @@ struct PrintExpressionContents
13601387
case GeUVecI32x4:
13611388
o << "i32x4.ge_u";
13621389
break;
1390+
case EqVecI64x2:
1391+
o << "i64x2.eq";
1392+
break;
13631393
case EqVecF32x4:
13641394
o << "f32x4.eq";
13651395
break;

src/wasm-binary.h

+11
Original file line numberDiff line numberDiff line change
@@ -782,6 +782,7 @@ enum ASTNodes {
782782
I32x4LeU = 0x3e,
783783
I32x4GeS = 0x3f,
784784
I32x4GeU = 0x40,
785+
I64x2Eq = 0xc0,
785786
F32x4Eq = 0x41,
786787
F32x4Ne = 0x42,
787788
F32x4Lt = 0x43,
@@ -802,6 +803,11 @@ enum ASTNodes {
802803
V128Xor = 0x51,
803804
V128Bitselect = 0x52,
804805

806+
V8x16SignSelect = 0x7d,
807+
V16x8SignSelect = 0x7e,
808+
V32x4SignSelect = 0x7f,
809+
V64x2SignSelect = 0x94,
810+
805811
V128Load8Lane = 0x58,
806812
V128Load16Lane = 0x59,
807813
V128Load32Lane = 0x5a,
@@ -885,6 +891,11 @@ enum ASTNodes {
885891
I32x4MaxU = 0xb9,
886892
I32x4DotSVecI16x8 = 0xba,
887893

894+
I64x2Bitmask = 0xc4,
895+
I64x2WidenLowSI32x4 = 0xc7,
896+
I64x2WidenHighSI32x4 = 0xc8,
897+
I64x2WidenLowUI32x4 = 0xc9,
898+
I64x2WidenHighUI32x4 = 0xca,
888899
I64x2Neg = 0xc1,
889900
I64x2AnyTrue = 0xc2,
890901
I64x2AllTrue = 0xc3,

src/wasm-interpreter.h

+10-1
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,8 @@ class ExpressionRunner : public OverriddenVisitor<SubType, Flow> {
491491
return value.anyTrueI64x2();
492492
case AllTrueVecI64x2:
493493
return value.allTrueI64x2();
494+
case BitmaskVecI64x2:
495+
WASM_UNREACHABLE("unimp");
494496
case AbsVecF32x4:
495497
return value.absF32x4();
496498
case NegVecF32x4:
@@ -551,6 +553,11 @@ class ExpressionRunner : public OverriddenVisitor<SubType, Flow> {
551553
return value.widenLowUToVecI32x4();
552554
case WidenHighUVecI16x8ToVecI32x4:
553555
return value.widenHighUToVecI32x4();
556+
case WidenLowSVecI32x4ToVecI64x2:
557+
case WidenHighSVecI32x4ToVecI64x2:
558+
case WidenLowUVecI32x4ToVecI64x2:
559+
case WidenHighUVecI32x4ToVecI64x2:
560+
WASM_UNREACHABLE("unimp");
554561
case InvalidUnary:
555562
WASM_UNREACHABLE("invalid unary op");
556563
}
@@ -796,6 +803,8 @@ class ExpressionRunner : public OverriddenVisitor<SubType, Flow> {
796803
return left.geSI32x4(right);
797804
case GeUVecI32x4:
798805
return left.geUI32x4(right);
806+
case EqVecI64x2:
807+
return left.eqI64x2(right);
799808
case EqVecF32x4:
800809
return left.eqF32x4(right);
801810
case NeVecF32x4:
@@ -1067,7 +1076,7 @@ class ExpressionRunner : public OverriddenVisitor<SubType, Flow> {
10671076
case Bitselect:
10681077
return c.bitselectV128(a, b);
10691078
default:
1070-
// TODO: implement qfma/qfms
1079+
// TODO: implement qfma/qfms and signselect
10711080
WASM_UNREACHABLE("not implemented");
10721081
}
10731082
}

0 commit comments

Comments
 (0)