-
Notifications
You must be signed in to change notification settings - Fork 14.8k
Revert "[msan] Improve packed multiply-add instrumentation" #153343
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This reverts commit ba603b5.
@llvm/pr-subscribers-compiler-rt-sanitizer Author: Thurston Dang (thurstond) ChangesReverts llvm/llvm-project#152941 Buildbot breakage: https://lab.llvm.org/buildbot/#/builders/66/builds/17843 Patch is 38.36 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/153343.diff 8 Files Affected:
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 4cef57d43f203..21bd4164385ab 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3641,10 +3641,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOriginForNaryOp(I);
}
- // Get an MMX-sized (64-bit) vector type, or optionally, other sized
- // vectors.
- Type *getMMXVectorTy(unsigned EltSizeInBits,
- unsigned X86_MMXSizeInBits = 64) {
+ // Get an MMX-sized vector type.
+ Type *getMMXVectorTy(unsigned EltSizeInBits) {
+ const unsigned X86_MMXSizeInBits = 64;
assert(EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 &&
"Illegal MMX vector element size");
return FixedVectorType::get(IntegerType::get(*MS.C, EltSizeInBits),
@@ -3844,78 +3843,20 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOriginForNaryOp(I);
}
- // Instrument multiply-add intrinsics.
- //
- // e.g., Two operands:
- // <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a, <8 x i16> %b)
- // <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> %a, <1 x i64> %b)
- //
- // Three operands are not implemented yet:
- // <4 x i32> @llvm.x86.avx512.vpdpbusd.128
- // (<4 x i32> %s, <4 x i32> %a, <4 x i32> %b)
- // (the result of multiply-add'ing %a and %b is accumulated with %s)
- void handleVectorPmaddIntrinsic(IntrinsicInst &I, unsigned ReductionFactor,
- unsigned EltSizeInBits = 0) {
+ // Instrument multiply-add intrinsic.
+ void handleVectorPmaddIntrinsic(IntrinsicInst &I,
+ unsigned MMXEltSizeInBits = 0) {
+ Type *ResTy =
+ MMXEltSizeInBits ? getMMXVectorTy(MMXEltSizeInBits * 2) : I.getType();
IRBuilder<> IRB(&I);
-
- [[maybe_unused]] FixedVectorType *ReturnType =
- cast<FixedVectorType>(I.getType());
- assert(isa<FixedVectorType>(ReturnType));
-
- assert(I.arg_size() == 2);
-
- // Vectors A and B, and shadows
- Value *Va = I.getOperand(0);
- Value *Vb = I.getOperand(1);
-
- Value *Sa = getShadow(&I, 0);
- Value *Sb = getShadow(&I, 1);
-
- FixedVectorType *ParamType =
- cast<FixedVectorType>(I.getArgOperand(0)->getType());
- assert(ParamType == I.getArgOperand(1)->getType());
-
- assert(ParamType->getPrimitiveSizeInBits() ==
- ReturnType->getPrimitiveSizeInBits());
-
- // Step 1: instrument multiplication of corresponding vector elements
- if (EltSizeInBits) {
- ParamType = cast<FixedVectorType>(
- getMMXVectorTy(EltSizeInBits, ParamType->getPrimitiveSizeInBits()));
-
- Va = IRB.CreateBitCast(Va, ParamType);
- Vb = IRB.CreateBitCast(Vb, ParamType);
-
- Sa = IRB.CreateBitCast(Sa, getShadowTy(ParamType));
- Sb = IRB.CreateBitCast(Sb, getShadowTy(ParamType));
- } else {
- assert(ParamType->getNumElements() ==
- ReturnType->getNumElements() * ReductionFactor);
- }
-
- Value *Sab = IRB.CreateOr(Sa, Sb);
-
- // Multiplying an uninitialized / element by zero results in an initialized
- // element.
- Value *Zero = Constant::getNullValue(Va->getType());
- Value *VaNotZero = IRB.CreateICmpNE(Va, Zero);
- Value *VbNotZero = IRB.CreateICmpNE(Vb, Zero);
- Value *VaAndVbNotZero = IRB.CreateAnd(VaNotZero, VbNotZero);
-
- // After multiplying e.g., <8 x i16> %a, <8 x i16> %b, we should have
- // <8 x i32> %ab, but we cheated and ended up with <8 x i16>.
- Sab = IRB.CreateAnd(Sab, IRB.CreateSExt(VaAndVbNotZero, Sab->getType()));
-
- // Step 2: instrument horizontal add
- // e.g., collapse <8 x i16> into <4 x i16> (reduction factor == 2)
- // <16 x i8> into <4 x i8> (reduction factor == 4)
- Value *OutShadow = horizontalReduce(I, ReductionFactor, Sab, nullptr);
-
- // Extend to <4 x i32>.
- // For MMX, cast it back to <1 x i64>.
- OutShadow = CreateShadowCast(IRB, OutShadow, getShadowTy(&I));
-
- setShadow(&I, OutShadow);
+ auto *Shadow0 = getShadow(&I, 0);
+ auto *Shadow1 = getShadow(&I, 1);
+ Value *S = IRB.CreateOr(Shadow0, Shadow1);
+ S = IRB.CreateBitCast(S, ResTy);
+ S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
+ ResTy);
+ S = IRB.CreateBitCast(S, getShadowTy(&I));
+ setShadow(&I, S);
setOriginForNaryOp(I);
}
@@ -5450,28 +5391,19 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
handleVectorSadIntrinsic(I);
break;
- // Multiply and Add Packed Words
- // < 4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>)
- // < 8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>)
-
- // Multiply and Add Packed Signed and Unsigned Bytes
- // < 8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>)
- // <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>)
case Intrinsic::x86_sse2_pmadd_wd:
case Intrinsic::x86_avx2_pmadd_wd:
case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
case Intrinsic::x86_avx2_pmadd_ub_sw:
- handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2);
+ handleVectorPmaddIntrinsic(I);
break;
- // <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64>, <1 x i64>)
case Intrinsic::x86_ssse3_pmadd_ub_sw:
- handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2, /*EltSize=*/8);
+ handleVectorPmaddIntrinsic(I, 8);
break;
- // <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64>, <1 x i64>)
case Intrinsic::x86_mmx_pmadd_wd:
- handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2, /*EltSize=*/16);
+ handleVectorPmaddIntrinsic(I, 16);
break;
case Intrinsic::x86_sse_cmp_ss:
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll
index ab05e10ccd3c9..f916130fe53e5 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll
@@ -141,16 +141,10 @@ define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) #0 {
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i16> [[A0:%.*]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[A1:%.*]], zeroinitializer
-; CHECK-NEXT: [[TMP11:%.*]] = and <16 x i1> [[TMP4]], [[TMP5]]
-; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i16>
-; CHECK-NEXT: [[TMP7:%.*]] = and <16 x i16> [[TMP3]], [[TMP12]]
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i16> [[TMP7]], <16 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i16> [[TMP7]], <16 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i16> [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP6:%.*]] = zext <8 x i16> [[TMP10]] to <8 x i32>
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0]], <16 x i16> [[A1]])
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i16> [[TMP3]] to <8 x i32>
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP4]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i32>
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i32> [[RES]]
;
@@ -684,16 +678,10 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) #0 {
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP3:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <32 x i8> [[A0:%.*]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i8> [[A1:%.*]], zeroinitializer
-; CHECK-NEXT: [[TMP11:%.*]] = and <32 x i1> [[TMP4]], [[TMP5]]
-; CHECK-NEXT: [[TMP12:%.*]] = sext <32 x i1> [[TMP11]] to <32 x i8>
-; CHECK-NEXT: [[TMP7:%.*]] = and <32 x i8> [[TMP3]], [[TMP12]]
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <32 x i8> [[TMP7]], <32 x i8> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <32 x i8> [[TMP7]], <32 x i8> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
-; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i8> [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[TMP10]] to <16 x i16>
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1]])
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP3]] to <16 x i16>
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[TMP4]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i16>
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]])
; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x i16> [[RES]]
;
@@ -719,16 +707,10 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(ptr %ptr, <32 x i8> %a1) #
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i8>, ptr [[TMP7]], align 32
; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i8> [[_MSLD]], [[TMP2]]
-; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <32 x i8> [[A0]], zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i8> [[A1:%.*]], zeroinitializer
-; CHECK-NEXT: [[TMP16:%.*]] = and <32 x i1> [[TMP9]], [[TMP10]]
-; CHECK-NEXT: [[TMP17:%.*]] = sext <32 x i1> [[TMP16]] to <32 x i8>
-; CHECK-NEXT: [[TMP12:%.*]] = and <32 x i8> [[TMP8]], [[TMP17]]
-; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <32 x i8> [[TMP12]], <32 x i8> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
-; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <32 x i8> [[TMP12]], <32 x i8> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i8> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP11:%.*]] = zext <16 x i8> [[TMP15]] to <16 x i16>
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1]])
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i8> [[TMP8]] to <16 x i16>
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i16> [[TMP9]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i16>
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1:%.*]])
; CHECK-NEXT: store <16 x i16> [[TMP11]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x i16> [[RES]]
;
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll
index a0341c67b1365..ac3bb56719038 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll
@@ -1687,27 +1687,16 @@ define i64 @test49(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[MMX_VAR_I]] to <4 x i16>
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[MMX_VAR1_I]] to <4 x i16>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP6]] to <4 x i16>
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
-; CHECK-NEXT: [[TMP22:%.*]] = or <4 x i16> [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP29:%.*]] = icmp ne <4 x i16> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[TMP30:%.*]] = icmp ne <4 x i16> [[TMP9]], zeroinitializer
-; CHECK-NEXT: [[TMP31:%.*]] = and <4 x i1> [[TMP29]], [[TMP30]]
-; CHECK-NEXT: [[TMP32:%.*]] = sext <4 x i1> [[TMP31]] to <4 x i16>
-; CHECK-NEXT: [[TMP23:%.*]] = and <4 x i16> [[TMP22]], [[TMP32]]
-; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x i16> [[TMP23]], <4 x i16> poison, <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i16> [[TMP23]], <4 x i16> poison, <2 x i32> <i32 1, i32 3>
-; CHECK-NEXT: [[TMP26:%.*]] = or <2 x i16> [[TMP24]], [[TMP25]]
-; CHECK-NEXT: [[TMP27:%.*]] = bitcast <2 x i16> [[TMP26]] to i32
-; CHECK-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP28]] to <1 x i64>
-; CHECK-NEXT: [[TMP33:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT: [[TMP8:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <2 x i32> [[TMP9]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = sext <2 x i1> [[TMP10]] to <2 x i32>
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
+; CHECK-NEXT: [[TMP14:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP12]] to <2 x i32>
; CHECK-NEXT: [[TMP20:%.*]] = bitcast <1 x i64> [[TMP14]] to <2 x i32>
-; CHECK-NEXT: [[TMP34:%.*]] = bitcast <1 x i64> [[TMP33]] to <2 x i32>
-; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64>
-; CHECK-NEXT: [[TMP21:%.*]] = bitcast <2 x i32> [[TMP34]] to <1 x i64>
+; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64>
+; CHECK-NEXT: [[TMP21:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64>
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP17]], i32 0
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0
; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
@@ -3326,27 +3315,16 @@ define i64 @test7(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP17]] to <1 x i64>
; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP22]] to <8 x i8>
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP23]] to <8 x i8>
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP21]] to <8 x i8>
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
-; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i8> [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP32:%.*]] = icmp ne <8 x i8> [[TMP10]], zeroinitializer
-; CHECK-NEXT: [[TMP33:%.*]] = icmp ne <8 x i8> [[TMP11]], zeroinitializer
-; CHECK-NEXT: [[TMP34:%.*]] = and <8 x i1> [[TMP32]], [[TMP33]]
-; CHECK-NEXT: [[TMP35:%.*]] = sext <8 x i1> [[TMP34]] to <8 x i8>
-; CHECK-NEXT: [[TMP16:%.*]] = and <8 x i8> [[TMP14]], [[TMP35]]
-; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <8 x i8> [[TMP16]], <8 x i8> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <8 x i8> [[TMP16]], <8 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT: [[TMP27:%.*]] = or <4 x i8> [[TMP25]], [[TMP26]]
-; CHECK-NEXT: [[TMP29:%.*]] = bitcast <4 x i8> [[TMP27]] to i32
-; CHECK-NEXT: [[TMP24:%.*]] = zext i32 [[TMP29]] to i64
-; CHECK-NEXT: [[TMP30:%.*]] = bitcast i64 [[TMP24]] to <1 x i64>
-; CHECK-NEXT: [[TMP36:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[TMP22]], <1 x i64> [[TMP23]]) #[[ATTR5]]
-; CHECK-NEXT: [[TMP31:%.*]] = bitcast <1 x i64> [[TMP30]] to <8 x i8>
-; CHECK-NEXT: [[TMP28:%.*]] = bitcast <1 x i64> [[TMP36]] to <8 x i8>
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP31]] to <1 x i64>
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i8> [[TMP28]] to <1 x i64>
+; CHECK-NEXT: [[TMP10:%.*]] = or <1 x i64> [[TMP21]], [[TMP8]]
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <4 x i16> [[TMP11]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i16>
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64>
+; CHECK-NEXT: [[TMP24:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[TMP22]], <1 x i64> [[TMP23]]) #[[ATTR5]]
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP14]] to <8 x i8>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP24]] to <8 x i8>
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <1 x i64> [[TMP19]], i32 0
; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll
index fe1245553c116..8f915a59db8e5 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll
@@ -763,16 +763,10 @@ define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) #0 {
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i16> [[A0:%.*]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i16> [[A1:%.*]], zeroinitializer
-; CHECK-NEXT: [[TMP11:%.*]] = and <8 x i1> [[TMP4]], [[TMP5]]
-; CHECK-NEXT: [[TMP12:%.*]] = sext <8 x i1> [[TMP11]] to <8 x i16>
-; CHECK-NEXT: [[TMP7:%.*]] = and <8 x i16> [[TMP3]], [[TMP12]]
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i16> [[TMP7]], <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i16> [[TMP7]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i16> [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i16> [[TMP10]] to <4 x i32>
-; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0]], <8 x i16> [[A1]])
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32>
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32>
+; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]])
; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i32> [[RES]]
;
diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll
index bf87027b056fa..5cc56baf0e0de 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll
@@ -149,17 +149,11 @@ define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) #0 {
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoin...
[truncated]
|
@llvm/pr-subscribers-llvm-transforms Author: Thurston Dang (thurstond) ChangesReverts llvm/llvm-project#152941 Buildbot breakage: https://lab.llvm.org/buildbot/#/builders/66/builds/17843 Patch is 38.36 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/153343.diff 8 Files Affected:
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 4cef57d43f203..21bd4164385ab 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3641,10 +3641,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOriginForNaryOp(I);
}
- // Get an MMX-sized (64-bit) vector type, or optionally, other sized
- // vectors.
- Type *getMMXVectorTy(unsigned EltSizeInBits,
- unsigned X86_MMXSizeInBits = 64) {
+ // Get an MMX-sized vector type.
+ Type *getMMXVectorTy(unsigned EltSizeInBits) {
+ const unsigned X86_MMXSizeInBits = 64;
assert(EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 &&
"Illegal MMX vector element size");
return FixedVectorType::get(IntegerType::get(*MS.C, EltSizeInBits),
@@ -3844,78 +3843,20 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOriginForNaryOp(I);
}
- // Instrument multiply-add intrinsics.
- //
- // e.g., Two operands:
- // <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a, <8 x i16> %b)
- // <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> %a, <1 x i64> %b)
- //
- // Three operands are not implemented yet:
- // <4 x i32> @llvm.x86.avx512.vpdpbusd.128
- // (<4 x i32> %s, <4 x i32> %a, <4 x i32> %b)
- // (the result of multiply-add'ing %a and %b is accumulated with %s)
- void handleVectorPmaddIntrinsic(IntrinsicInst &I, unsigned ReductionFactor,
- unsigned EltSizeInBits = 0) {
+ // Instrument multiply-add intrinsic.
+ void handleVectorPmaddIntrinsic(IntrinsicInst &I,
+ unsigned MMXEltSizeInBits = 0) {
+ Type *ResTy =
+ MMXEltSizeInBits ? getMMXVectorTy(MMXEltSizeInBits * 2) : I.getType();
IRBuilder<> IRB(&I);
-
- [[maybe_unused]] FixedVectorType *ReturnType =
- cast<FixedVectorType>(I.getType());
- assert(isa<FixedVectorType>(ReturnType));
-
- assert(I.arg_size() == 2);
-
- // Vectors A and B, and shadows
- Value *Va = I.getOperand(0);
- Value *Vb = I.getOperand(1);
-
- Value *Sa = getShadow(&I, 0);
- Value *Sb = getShadow(&I, 1);
-
- FixedVectorType *ParamType =
- cast<FixedVectorType>(I.getArgOperand(0)->getType());
- assert(ParamType == I.getArgOperand(1)->getType());
-
- assert(ParamType->getPrimitiveSizeInBits() ==
- ReturnType->getPrimitiveSizeInBits());
-
- // Step 1: instrument multiplication of corresponding vector elements
- if (EltSizeInBits) {
- ParamType = cast<FixedVectorType>(
- getMMXVectorTy(EltSizeInBits, ParamType->getPrimitiveSizeInBits()));
-
- Va = IRB.CreateBitCast(Va, ParamType);
- Vb = IRB.CreateBitCast(Vb, ParamType);
-
- Sa = IRB.CreateBitCast(Sa, getShadowTy(ParamType));
- Sb = IRB.CreateBitCast(Sb, getShadowTy(ParamType));
- } else {
- assert(ParamType->getNumElements() ==
- ReturnType->getNumElements() * ReductionFactor);
- }
-
- Value *Sab = IRB.CreateOr(Sa, Sb);
-
- // Multiplying an uninitialized / element by zero results in an initialized
- // element.
- Value *Zero = Constant::getNullValue(Va->getType());
- Value *VaNotZero = IRB.CreateICmpNE(Va, Zero);
- Value *VbNotZero = IRB.CreateICmpNE(Vb, Zero);
- Value *VaAndVbNotZero = IRB.CreateAnd(VaNotZero, VbNotZero);
-
- // After multiplying e.g., <8 x i16> %a, <8 x i16> %b, we should have
- // <8 x i32> %ab, but we cheated and ended up with <8 x i16>.
- Sab = IRB.CreateAnd(Sab, IRB.CreateSExt(VaAndVbNotZero, Sab->getType()));
-
- // Step 2: instrument horizontal add
- // e.g., collapse <8 x i16> into <4 x i16> (reduction factor == 2)
- // <16 x i8> into <4 x i8> (reduction factor == 4)
- Value *OutShadow = horizontalReduce(I, ReductionFactor, Sab, nullptr);
-
- // Extend to <4 x i32>.
- // For MMX, cast it back to <1 x i64>.
- OutShadow = CreateShadowCast(IRB, OutShadow, getShadowTy(&I));
-
- setShadow(&I, OutShadow);
+ auto *Shadow0 = getShadow(&I, 0);
+ auto *Shadow1 = getShadow(&I, 1);
+ Value *S = IRB.CreateOr(Shadow0, Shadow1);
+ S = IRB.CreateBitCast(S, ResTy);
+ S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
+ ResTy);
+ S = IRB.CreateBitCast(S, getShadowTy(&I));
+ setShadow(&I, S);
setOriginForNaryOp(I);
}
@@ -5450,28 +5391,19 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
handleVectorSadIntrinsic(I);
break;
- // Multiply and Add Packed Words
- // < 4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>)
- // < 8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>)
-
- // Multiply and Add Packed Signed and Unsigned Bytes
- // < 8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>)
- // <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>)
case Intrinsic::x86_sse2_pmadd_wd:
case Intrinsic::x86_avx2_pmadd_wd:
case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
case Intrinsic::x86_avx2_pmadd_ub_sw:
- handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2);
+ handleVectorPmaddIntrinsic(I);
break;
- // <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64>, <1 x i64>)
case Intrinsic::x86_ssse3_pmadd_ub_sw:
- handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2, /*EltSize=*/8);
+ handleVectorPmaddIntrinsic(I, 8);
break;
- // <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64>, <1 x i64>)
case Intrinsic::x86_mmx_pmadd_wd:
- handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2, /*EltSize=*/16);
+ handleVectorPmaddIntrinsic(I, 16);
break;
case Intrinsic::x86_sse_cmp_ss:
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll
index ab05e10ccd3c9..f916130fe53e5 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll
@@ -141,16 +141,10 @@ define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) #0 {
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i16> [[A0:%.*]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[A1:%.*]], zeroinitializer
-; CHECK-NEXT: [[TMP11:%.*]] = and <16 x i1> [[TMP4]], [[TMP5]]
-; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i16>
-; CHECK-NEXT: [[TMP7:%.*]] = and <16 x i16> [[TMP3]], [[TMP12]]
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i16> [[TMP7]], <16 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i16> [[TMP7]], <16 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i16> [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP6:%.*]] = zext <8 x i16> [[TMP10]] to <8 x i32>
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0]], <16 x i16> [[A1]])
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i16> [[TMP3]] to <8 x i32>
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP4]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i32>
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i32> [[RES]]
;
@@ -684,16 +678,10 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) #0 {
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP3:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <32 x i8> [[A0:%.*]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i8> [[A1:%.*]], zeroinitializer
-; CHECK-NEXT: [[TMP11:%.*]] = and <32 x i1> [[TMP4]], [[TMP5]]
-; CHECK-NEXT: [[TMP12:%.*]] = sext <32 x i1> [[TMP11]] to <32 x i8>
-; CHECK-NEXT: [[TMP7:%.*]] = and <32 x i8> [[TMP3]], [[TMP12]]
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <32 x i8> [[TMP7]], <32 x i8> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <32 x i8> [[TMP7]], <32 x i8> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
-; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i8> [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[TMP10]] to <16 x i16>
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1]])
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP3]] to <16 x i16>
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[TMP4]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i16>
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]])
; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x i16> [[RES]]
;
@@ -719,16 +707,10 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(ptr %ptr, <32 x i8> %a1) #
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i8>, ptr [[TMP7]], align 32
; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i8> [[_MSLD]], [[TMP2]]
-; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <32 x i8> [[A0]], zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i8> [[A1:%.*]], zeroinitializer
-; CHECK-NEXT: [[TMP16:%.*]] = and <32 x i1> [[TMP9]], [[TMP10]]
-; CHECK-NEXT: [[TMP17:%.*]] = sext <32 x i1> [[TMP16]] to <32 x i8>
-; CHECK-NEXT: [[TMP12:%.*]] = and <32 x i8> [[TMP8]], [[TMP17]]
-; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <32 x i8> [[TMP12]], <32 x i8> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
-; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <32 x i8> [[TMP12]], <32 x i8> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i8> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP11:%.*]] = zext <16 x i8> [[TMP15]] to <16 x i16>
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1]])
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i8> [[TMP8]] to <16 x i16>
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i16> [[TMP9]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i16>
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1:%.*]])
; CHECK-NEXT: store <16 x i16> [[TMP11]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x i16> [[RES]]
;
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll
index a0341c67b1365..ac3bb56719038 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll
@@ -1687,27 +1687,16 @@ define i64 @test49(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[MMX_VAR_I]] to <4 x i16>
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[MMX_VAR1_I]] to <4 x i16>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP6]] to <4 x i16>
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
-; CHECK-NEXT: [[TMP22:%.*]] = or <4 x i16> [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP29:%.*]] = icmp ne <4 x i16> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[TMP30:%.*]] = icmp ne <4 x i16> [[TMP9]], zeroinitializer
-; CHECK-NEXT: [[TMP31:%.*]] = and <4 x i1> [[TMP29]], [[TMP30]]
-; CHECK-NEXT: [[TMP32:%.*]] = sext <4 x i1> [[TMP31]] to <4 x i16>
-; CHECK-NEXT: [[TMP23:%.*]] = and <4 x i16> [[TMP22]], [[TMP32]]
-; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x i16> [[TMP23]], <4 x i16> poison, <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i16> [[TMP23]], <4 x i16> poison, <2 x i32> <i32 1, i32 3>
-; CHECK-NEXT: [[TMP26:%.*]] = or <2 x i16> [[TMP24]], [[TMP25]]
-; CHECK-NEXT: [[TMP27:%.*]] = bitcast <2 x i16> [[TMP26]] to i32
-; CHECK-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP28]] to <1 x i64>
-; CHECK-NEXT: [[TMP33:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT: [[TMP8:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <2 x i32> [[TMP9]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = sext <2 x i1> [[TMP10]] to <2 x i32>
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
+; CHECK-NEXT: [[TMP14:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP12]] to <2 x i32>
; CHECK-NEXT: [[TMP20:%.*]] = bitcast <1 x i64> [[TMP14]] to <2 x i32>
-; CHECK-NEXT: [[TMP34:%.*]] = bitcast <1 x i64> [[TMP33]] to <2 x i32>
-; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64>
-; CHECK-NEXT: [[TMP21:%.*]] = bitcast <2 x i32> [[TMP34]] to <1 x i64>
+; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64>
+; CHECK-NEXT: [[TMP21:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64>
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP17]], i32 0
; CHECK-NEXT: [[TMP18:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0
; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
@@ -3326,27 +3315,16 @@ define i64 @test7(<1 x i64> %a, <1 x i64> %b) #0 {
; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP17]] to <1 x i64>
; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP22]] to <8 x i8>
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP23]] to <8 x i8>
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP21]] to <8 x i8>
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
-; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i8> [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP32:%.*]] = icmp ne <8 x i8> [[TMP10]], zeroinitializer
-; CHECK-NEXT: [[TMP33:%.*]] = icmp ne <8 x i8> [[TMP11]], zeroinitializer
-; CHECK-NEXT: [[TMP34:%.*]] = and <8 x i1> [[TMP32]], [[TMP33]]
-; CHECK-NEXT: [[TMP35:%.*]] = sext <8 x i1> [[TMP34]] to <8 x i8>
-; CHECK-NEXT: [[TMP16:%.*]] = and <8 x i8> [[TMP14]], [[TMP35]]
-; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <8 x i8> [[TMP16]], <8 x i8> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <8 x i8> [[TMP16]], <8 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT: [[TMP27:%.*]] = or <4 x i8> [[TMP25]], [[TMP26]]
-; CHECK-NEXT: [[TMP29:%.*]] = bitcast <4 x i8> [[TMP27]] to i32
-; CHECK-NEXT: [[TMP24:%.*]] = zext i32 [[TMP29]] to i64
-; CHECK-NEXT: [[TMP30:%.*]] = bitcast i64 [[TMP24]] to <1 x i64>
-; CHECK-NEXT: [[TMP36:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[TMP22]], <1 x i64> [[TMP23]]) #[[ATTR5]]
-; CHECK-NEXT: [[TMP31:%.*]] = bitcast <1 x i64> [[TMP30]] to <8 x i8>
-; CHECK-NEXT: [[TMP28:%.*]] = bitcast <1 x i64> [[TMP36]] to <8 x i8>
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP31]] to <1 x i64>
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i8> [[TMP28]] to <1 x i64>
+; CHECK-NEXT: [[TMP10:%.*]] = or <1 x i64> [[TMP21]], [[TMP8]]
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <4 x i16> [[TMP11]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i16>
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64>
+; CHECK-NEXT: [[TMP24:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[TMP22]], <1 x i64> [[TMP23]]) #[[ATTR5]]
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP14]] to <8 x i8>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP24]] to <8 x i8>
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <1 x i64> [[TMP19]], i32 0
; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll
index fe1245553c116..8f915a59db8e5 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll
@@ -763,16 +763,10 @@ define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) #0 {
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i16> [[A0:%.*]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i16> [[A1:%.*]], zeroinitializer
-; CHECK-NEXT: [[TMP11:%.*]] = and <8 x i1> [[TMP4]], [[TMP5]]
-; CHECK-NEXT: [[TMP12:%.*]] = sext <8 x i1> [[TMP11]] to <8 x i16>
-; CHECK-NEXT: [[TMP7:%.*]] = and <8 x i16> [[TMP3]], [[TMP12]]
-; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i16> [[TMP7]], <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i16> [[TMP7]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i16> [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i16> [[TMP10]] to <4 x i32>
-; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0]], <8 x i16> [[A1]])
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32>
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32>
+; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]])
; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i32> [[RES]]
;
diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll
index bf87027b056fa..5cc56baf0e0de 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll
@@ -149,17 +149,11 @@ define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) #0 {
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoin...
[truncated]
|
…3343) This reverts commit cf00284 i.e., relands ba603b5. It was reverted because it was subtly wrong: multiplying an uninitialized zero should not result in an initialized zero. This reland fixes the issue by using instrumentation analogous to visitAnd (bitwise AND of an initialized zero and an uninitialized value results in an initialized value), and expands a test case. Original commit message: The current instrumentation has false positives: if there is a single uninitialized bit in any of the operands, the entire output is poisoned. This does not take into account that multiplying an uninitialized value with zero results in an initialized zero value. This step allows elements that are zero to clear the corresponding shadow during the multiplication step. The horizontal add step and accumulation step (if any) are modeled using bitwise OR. Future work can apply this improved handler to the AVX512 equivalent intrinsics (x86_avx512_pmaddw_d_512, x86_avx512_pmaddubs_w_512.) and AVX VNNI intrinsics.
Reverts #152941
Buildbot breakage: https://lab.llvm.org/buildbot/#/builders/66/builds/17843