Revert "[msan] Improve packed multiply-add instrumentation" #153343

thurstond · 2025-08-13T04:31:47Z

Reverts #152941

Buildbot breakage: https://lab.llvm.org/buildbot/#/builders/66/builds/17843

This reverts commit ba603b5.

llvmbot · 2025-08-13T04:32:23Z

@llvm/pr-subscribers-compiler-rt-sanitizer

Author: Thurston Dang (thurstond)

Changes

Reverts llvm/llvm-project#152941

Buildbot breakage: https://lab.llvm.org/buildbot/#/builders/66/builds/17843

Patch is 38.36 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/153343.diff

8 Files Affected:

(modified) llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp (+19-87)
(modified) llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll (+12-30)
(modified) llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll (+19-41)
(modified) llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll (+4-10)
(modified) llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll (+16-34)
(modified) llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll (+19-41)
(modified) llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll (+5-11)
(modified) llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll (+9-26)

diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 4cef57d43f203..21bd4164385ab 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3641,10 +3641,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     setOriginForNaryOp(I);
   }
 
-  // Get an MMX-sized (64-bit) vector type, or optionally, other sized
-  // vectors.
-  Type *getMMXVectorTy(unsigned EltSizeInBits,
-                       unsigned X86_MMXSizeInBits = 64) {
+  // Get an MMX-sized vector type.
+  Type *getMMXVectorTy(unsigned EltSizeInBits) {
+    const unsigned X86_MMXSizeInBits = 64;
     assert(EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 &&
            "Illegal MMX vector element size");
     return FixedVectorType::get(IntegerType::get(*MS.C, EltSizeInBits),
@@ -3844,78 +3843,20 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     setOriginForNaryOp(I);
   }
 
-  // Instrument multiply-add intrinsics.
-  //
-  // e.g., Two operands:
-  //         <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a, <8 x i16> %b)
-  //         <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> %a, <1 x i64> %b)
-  //
-  //       Three operands are not implemented yet:
-  //         <4 x i32> @llvm.x86.avx512.vpdpbusd.128
-  //                       (<4 x i32> %s, <4 x i32> %a, <4 x i32> %b)
-  //         (the result of multiply-add'ing %a and %b is accumulated with %s)
-  void handleVectorPmaddIntrinsic(IntrinsicInst &I, unsigned ReductionFactor,
-                                  unsigned EltSizeInBits = 0) {
+  // Instrument multiply-add intrinsic.
+  void handleVectorPmaddIntrinsic(IntrinsicInst &I,
+                                  unsigned MMXEltSizeInBits = 0) {
+    Type *ResTy =
+        MMXEltSizeInBits ? getMMXVectorTy(MMXEltSizeInBits * 2) : I.getType();
     IRBuilder<> IRB(&I);
-
-    [[maybe_unused]] FixedVectorType *ReturnType =
-        cast<FixedVectorType>(I.getType());
-    assert(isa<FixedVectorType>(ReturnType));
-
-    assert(I.arg_size() == 2);
-
-    // Vectors A and B, and shadows
-    Value *Va = I.getOperand(0);
-    Value *Vb = I.getOperand(1);
-
-    Value *Sa = getShadow(&I, 0);
-    Value *Sb = getShadow(&I, 1);
-
-    FixedVectorType *ParamType =
-        cast<FixedVectorType>(I.getArgOperand(0)->getType());
-    assert(ParamType == I.getArgOperand(1)->getType());
-
-    assert(ParamType->getPrimitiveSizeInBits() ==
-           ReturnType->getPrimitiveSizeInBits());
-
-    // Step 1: instrument multiplication of corresponding vector elements
-    if (EltSizeInBits) {
-      ParamType = cast<FixedVectorType>(
-          getMMXVectorTy(EltSizeInBits, ParamType->getPrimitiveSizeInBits()));
-
-      Va = IRB.CreateBitCast(Va, ParamType);
-      Vb = IRB.CreateBitCast(Vb, ParamType);
-
-      Sa = IRB.CreateBitCast(Sa, getShadowTy(ParamType));
-      Sb = IRB.CreateBitCast(Sb, getShadowTy(ParamType));
-    } else {
-      assert(ParamType->getNumElements() ==
-             ReturnType->getNumElements() * ReductionFactor);
-    }
-
-    Value *Sab = IRB.CreateOr(Sa, Sb);
-
-    // Multiplying an uninitialized / element by zero results in an initialized
-    // element.
-    Value *Zero = Constant::getNullValue(Va->getType());
-    Value *VaNotZero = IRB.CreateICmpNE(Va, Zero);
-    Value *VbNotZero = IRB.CreateICmpNE(Vb, Zero);
-    Value *VaAndVbNotZero = IRB.CreateAnd(VaNotZero, VbNotZero);
-
-    // After multiplying e.g., <8 x i16> %a, <8 x i16> %b, we should have
-    // <8 x i32> %ab, but we cheated and ended up with <8 x i16>.
-    Sab = IRB.CreateAnd(Sab, IRB.CreateSExt(VaAndVbNotZero, Sab->getType()));
-
-    // Step 2: instrument horizontal add
-    // e.g., collapse <8 x i16> into <4 x i16> (reduction factor == 2)
-    //                <16 x i8> into <4 x i8>  (reduction factor == 4)
-    Value *OutShadow = horizontalReduce(I, ReductionFactor, Sab, nullptr);
-
-    // Extend to <4 x i32>.
-    // For MMX, cast it back to <1 x i64>.
-    OutShadow = CreateShadowCast(IRB, OutShadow, getShadowTy(&I));
-
-    setShadow(&I, OutShadow);
+    auto *Shadow0 = getShadow(&I, 0);
+    auto *Shadow1 = getShadow(&I, 1);
+    Value *S = IRB.CreateOr(Shadow0, Shadow1);
+    S = IRB.CreateBitCast(S, ResTy);
+    S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
+                       ResTy);
+    S = IRB.CreateBitCast(S, getShadowTy(&I));
+    setShadow(&I, S);
     setOriginForNaryOp(I);
   }
 
@@ -5450,28 +5391,19 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       handleVectorSadIntrinsic(I);
       break;
 
-    // Multiply and Add Packed Words
-    //   < 4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>)
-    //   < 8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>)
-
-    // Multiply and Add Packed Signed and Unsigned Bytes
-    //   < 8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>)
-    //   <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>)
     case Intrinsic::x86_sse2_pmadd_wd:
     case Intrinsic::x86_avx2_pmadd_wd:
     case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
     case Intrinsic::x86_avx2_pmadd_ub_sw:
-      handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2);
+      handleVectorPmaddIntrinsic(I);
       break;
 
-    // <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64>, <1 x i64>)
     case Intrinsic::x86_ssse3_pmadd_ub_sw:
-      handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2, /*EltSize=*/8);
+      handleVectorPmaddIntrinsic(I, 8);
       break;
 
-    // <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64>, <1 x i64>)
     case Intrinsic::x86_mmx_pmadd_wd:
-      handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2, /*EltSize=*/16);
+      handleVectorPmaddIntrinsic(I, 16);
       break;
 
     case Intrinsic::x86_sse_cmp_ss:
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll
index ab05e10ccd3c9..f916130fe53e5 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll
@@ -141,16 +141,10 @@ define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) #0 {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP3:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne <16 x i16> [[A0:%.*]], zeroinitializer
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <16 x i16> [[A1:%.*]], zeroinitializer
-; CHECK-NEXT:    [[TMP11:%.*]] = and <16 x i1> [[TMP4]], [[TMP5]]
-; CHECK-NEXT:    [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i16>
-; CHECK-NEXT:    [[TMP7:%.*]] = and <16 x i16> [[TMP3]], [[TMP12]]
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <16 x i16> [[TMP7]], <16 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <16 x i16> [[TMP7]], <16 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; CHECK-NEXT:    [[TMP10:%.*]] = or <8 x i16> [[TMP8]], [[TMP9]]
-; CHECK-NEXT:    [[TMP6:%.*]] = zext <8 x i16> [[TMP10]] to <8 x i32>
-; CHECK-NEXT:    [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0]], <16 x i16> [[A1]])
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i16> [[TMP3]] to <8 x i32>
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP4]], zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i32>
+; CHECK-NEXT:    [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
 ; CHECK-NEXT:    store <8 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <8 x i32> [[RES]]
 ;
@@ -684,16 +678,10 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) #0 {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP3:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne <32 x i8> [[A0:%.*]], zeroinitializer
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <32 x i8> [[A1:%.*]], zeroinitializer
-; CHECK-NEXT:    [[TMP11:%.*]] = and <32 x i1> [[TMP4]], [[TMP5]]
-; CHECK-NEXT:    [[TMP12:%.*]] = sext <32 x i1> [[TMP11]] to <32 x i8>
-; CHECK-NEXT:    [[TMP7:%.*]] = and <32 x i8> [[TMP3]], [[TMP12]]
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <32 x i8> [[TMP7]], <32 x i8> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
-; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <32 x i8> [[TMP7]], <32 x i8> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
-; CHECK-NEXT:    [[TMP10:%.*]] = or <16 x i8> [[TMP8]], [[TMP9]]
-; CHECK-NEXT:    [[TMP6:%.*]] = zext <16 x i8> [[TMP10]] to <16 x i16>
-; CHECK-NEXT:    [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1]])
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <32 x i8> [[TMP3]] to <16 x i16>
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <16 x i16> [[TMP4]], zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i16>
+; CHECK-NEXT:    [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]])
 ; CHECK-NEXT:    store <16 x i16> [[TMP6]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <16 x i16> [[RES]]
 ;
@@ -719,16 +707,10 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(ptr %ptr, <32 x i8> %a1) #
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <32 x i8>, ptr [[TMP7]], align 32
 ; CHECK-NEXT:    [[TMP8:%.*]] = or <32 x i8> [[_MSLD]], [[TMP2]]
-; CHECK-NEXT:    [[TMP9:%.*]] = icmp ne <32 x i8> [[A0]], zeroinitializer
-; CHECK-NEXT:    [[TMP10:%.*]] = icmp ne <32 x i8> [[A1:%.*]], zeroinitializer
-; CHECK-NEXT:    [[TMP16:%.*]] = and <32 x i1> [[TMP9]], [[TMP10]]
-; CHECK-NEXT:    [[TMP17:%.*]] = sext <32 x i1> [[TMP16]] to <32 x i8>
-; CHECK-NEXT:    [[TMP12:%.*]] = and <32 x i8> [[TMP8]], [[TMP17]]
-; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <32 x i8> [[TMP12]], <32 x i8> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
-; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <32 x i8> [[TMP12]], <32 x i8> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
-; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i8> [[TMP13]], [[TMP14]]
-; CHECK-NEXT:    [[TMP11:%.*]] = zext <16 x i8> [[TMP15]] to <16 x i16>
-; CHECK-NEXT:    [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1]])
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <32 x i8> [[TMP8]] to <16 x i16>
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp ne <16 x i16> [[TMP9]], zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i16>
+; CHECK-NEXT:    [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1:%.*]])
 ; CHECK-NEXT:    store <16 x i16> [[TMP11]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <16 x i16> [[RES]]
 ;
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll
index a0341c67b1365..ac3bb56719038 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll
@@ -1687,27 +1687,16 @@ define i64 @test49(<1 x i64> %a, <1 x i64> %b) #0 {
 ; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
 ; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
 ; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[MMX_VAR_I]] to <4 x i16>
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[MMX_VAR1_I]] to <4 x i16>
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <1 x i64> [[TMP6]] to <4 x i16>
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
-; CHECK-NEXT:    [[TMP22:%.*]] = or <4 x i16> [[TMP10]], [[TMP11]]
-; CHECK-NEXT:    [[TMP29:%.*]] = icmp ne <4 x i16> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP30:%.*]] = icmp ne <4 x i16> [[TMP9]], zeroinitializer
-; CHECK-NEXT:    [[TMP31:%.*]] = and <4 x i1> [[TMP29]], [[TMP30]]
-; CHECK-NEXT:    [[TMP32:%.*]] = sext <4 x i1> [[TMP31]] to <4 x i16>
-; CHECK-NEXT:    [[TMP23:%.*]] = and <4 x i16> [[TMP22]], [[TMP32]]
-; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <4 x i16> [[TMP23]], <4 x i16> poison, <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <4 x i16> [[TMP23]], <4 x i16> poison, <2 x i32> <i32 1, i32 3>
-; CHECK-NEXT:    [[TMP26:%.*]] = or <2 x i16> [[TMP24]], [[TMP25]]
-; CHECK-NEXT:    [[TMP27:%.*]] = bitcast <2 x i16> [[TMP26]] to i32
-; CHECK-NEXT:    [[TMP28:%.*]] = zext i32 [[TMP27]] to i64
-; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i64 [[TMP28]] to <1 x i64>
-; CHECK-NEXT:    [[TMP33:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP8:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp ne <2 x i32> [[TMP9]], zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = sext <2 x i1> [[TMP10]] to <2 x i32>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
+; CHECK-NEXT:    [[TMP14:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i64> [[TMP12]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP20:%.*]] = bitcast <1 x i64> [[TMP14]] to <2 x i32>
-; CHECK-NEXT:    [[TMP34:%.*]] = bitcast <1 x i64> [[TMP33]] to <2 x i32>
-; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64>
-; CHECK-NEXT:    [[TMP21:%.*]] = bitcast <2 x i32> [[TMP34]] to <1 x i64>
+; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP21:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64>
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP17]], i32 0
 ; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0
 ; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
@@ -3326,27 +3315,16 @@ define i64 @test7(<1 x i64> %a, <1 x i64> %b) #0 {
 ; CHECK-NEXT:    [[TMP22:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
 ; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i8> [[TMP17]] to <1 x i64>
 ; CHECK-NEXT:    [[TMP23:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <1 x i64> [[TMP22]] to <8 x i8>
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP23]] to <8 x i8>
-; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP21]] to <8 x i8>
-; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
-; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i8> [[TMP12]], [[TMP13]]
-; CHECK-NEXT:    [[TMP32:%.*]] = icmp ne <8 x i8> [[TMP10]], zeroinitializer
-; CHECK-NEXT:    [[TMP33:%.*]] = icmp ne <8 x i8> [[TMP11]], zeroinitializer
-; CHECK-NEXT:    [[TMP34:%.*]] = and <8 x i1> [[TMP32]], [[TMP33]]
-; CHECK-NEXT:    [[TMP35:%.*]] = sext <8 x i1> [[TMP34]] to <8 x i8>
-; CHECK-NEXT:    [[TMP16:%.*]] = and <8 x i8> [[TMP14]], [[TMP35]]
-; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <8 x i8> [[TMP16]], <8 x i8> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT:    [[TMP26:%.*]] = shufflevector <8 x i8> [[TMP16]], <8 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT:    [[TMP27:%.*]] = or <4 x i8> [[TMP25]], [[TMP26]]
-; CHECK-NEXT:    [[TMP29:%.*]] = bitcast <4 x i8> [[TMP27]] to i32
-; CHECK-NEXT:    [[TMP24:%.*]] = zext i32 [[TMP29]] to i64
-; CHECK-NEXT:    [[TMP30:%.*]] = bitcast i64 [[TMP24]] to <1 x i64>
-; CHECK-NEXT:    [[TMP36:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[TMP22]], <1 x i64> [[TMP23]]) #[[ATTR5]]
-; CHECK-NEXT:    [[TMP31:%.*]] = bitcast <1 x i64> [[TMP30]] to <8 x i8>
-; CHECK-NEXT:    [[TMP28:%.*]] = bitcast <1 x i64> [[TMP36]] to <8 x i8>
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP31]] to <1 x i64>
-; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <8 x i8> [[TMP28]] to <1 x i64>
+; CHECK-NEXT:    [[TMP10:%.*]] = or <1 x i64> [[TMP21]], [[TMP8]]
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne <4 x i16> [[TMP11]], zeroinitializer
+; CHECK-NEXT:    [[TMP13:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i16>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64>
+; CHECK-NEXT:    [[TMP24:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[TMP22]], <1 x i64> [[TMP23]]) #[[ATTR5]]
+; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP14]] to <8 x i8>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <1 x i64> [[TMP24]] to <8 x i8>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
+; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
 ; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <1 x i64> [[TMP19]], i32 0
 ; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll
index fe1245553c116..8f915a59db8e5 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll
@@ -763,16 +763,10 @@ define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) #0 {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP3:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne <8 x i16> [[A0:%.*]], zeroinitializer
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <8 x i16> [[A1:%.*]], zeroinitializer
-; CHECK-NEXT:    [[TMP11:%.*]] = and <8 x i1> [[TMP4]], [[TMP5]]
-; CHECK-NEXT:    [[TMP12:%.*]] = sext <8 x i1> [[TMP11]] to <8 x i16>
-; CHECK-NEXT:    [[TMP7:%.*]] = and <8 x i16> [[TMP3]], [[TMP12]]
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <8 x i16> [[TMP7]], <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <8 x i16> [[TMP7]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT:    [[TMP10:%.*]] = or <4 x i16> [[TMP8]], [[TMP9]]
-; CHECK-NEXT:    [[TMP6:%.*]] = zext <4 x i16> [[TMP10]] to <4 x i32>
-; CHECK-NEXT:    [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0]], <8 x i16> [[A1]])
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32>
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32>
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]])
 ; CHECK-NEXT:    store <4 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x i32> [[RES]]
 ;
diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll
index bf87027b056fa..5cc56baf0e0de 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll
@@ -149,17 +149,11 @@ define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) #0 {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoin...
[truncated]

llvmbot · 2025-08-13T04:32:23Z

@llvm/pr-subscribers-llvm-transforms

Author: Thurston Dang (thurstond)

Changes

Reverts llvm/llvm-project#152941

Buildbot breakage: https://lab.llvm.org/buildbot/#/builders/66/builds/17843

Patch is 38.36 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/153343.diff

8 Files Affected:

(modified) llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp (+19-87)
(modified) llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll (+12-30)
(modified) llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll (+19-41)
(modified) llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll (+4-10)
(modified) llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll (+16-34)
(modified) llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll (+19-41)
(modified) llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll (+5-11)
(modified) llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll (+9-26)

diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 4cef57d43f203..21bd4164385ab 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3641,10 +3641,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     setOriginForNaryOp(I);
   }
 
-  // Get an MMX-sized (64-bit) vector type, or optionally, other sized
-  // vectors.
-  Type *getMMXVectorTy(unsigned EltSizeInBits,
-                       unsigned X86_MMXSizeInBits = 64) {
+  // Get an MMX-sized vector type.
+  Type *getMMXVectorTy(unsigned EltSizeInBits) {
+    const unsigned X86_MMXSizeInBits = 64;
     assert(EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 &&
            "Illegal MMX vector element size");
     return FixedVectorType::get(IntegerType::get(*MS.C, EltSizeInBits),
@@ -3844,78 +3843,20 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     setOriginForNaryOp(I);
   }
 
-  // Instrument multiply-add intrinsics.
-  //
-  // e.g., Two operands:
-  //         <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a, <8 x i16> %b)
-  //         <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> %a, <1 x i64> %b)
-  //
-  //       Three operands are not implemented yet:
-  //         <4 x i32> @llvm.x86.avx512.vpdpbusd.128
-  //                       (<4 x i32> %s, <4 x i32> %a, <4 x i32> %b)
-  //         (the result of multiply-add'ing %a and %b is accumulated with %s)
-  void handleVectorPmaddIntrinsic(IntrinsicInst &I, unsigned ReductionFactor,
-                                  unsigned EltSizeInBits = 0) {
+  // Instrument multiply-add intrinsic.
+  void handleVectorPmaddIntrinsic(IntrinsicInst &I,
+                                  unsigned MMXEltSizeInBits = 0) {
+    Type *ResTy =
+        MMXEltSizeInBits ? getMMXVectorTy(MMXEltSizeInBits * 2) : I.getType();
     IRBuilder<> IRB(&I);
-
-    [[maybe_unused]] FixedVectorType *ReturnType =
-        cast<FixedVectorType>(I.getType());
-    assert(isa<FixedVectorType>(ReturnType));
-
-    assert(I.arg_size() == 2);
-
-    // Vectors A and B, and shadows
-    Value *Va = I.getOperand(0);
-    Value *Vb = I.getOperand(1);
-
-    Value *Sa = getShadow(&I, 0);
-    Value *Sb = getShadow(&I, 1);
-
-    FixedVectorType *ParamType =
-        cast<FixedVectorType>(I.getArgOperand(0)->getType());
-    assert(ParamType == I.getArgOperand(1)->getType());
-
-    assert(ParamType->getPrimitiveSizeInBits() ==
-           ReturnType->getPrimitiveSizeInBits());
-
-    // Step 1: instrument multiplication of corresponding vector elements
-    if (EltSizeInBits) {
-      ParamType = cast<FixedVectorType>(
-          getMMXVectorTy(EltSizeInBits, ParamType->getPrimitiveSizeInBits()));
-
-      Va = IRB.CreateBitCast(Va, ParamType);
-      Vb = IRB.CreateBitCast(Vb, ParamType);
-
-      Sa = IRB.CreateBitCast(Sa, getShadowTy(ParamType));
-      Sb = IRB.CreateBitCast(Sb, getShadowTy(ParamType));
-    } else {
-      assert(ParamType->getNumElements() ==
-             ReturnType->getNumElements() * ReductionFactor);
-    }
-
-    Value *Sab = IRB.CreateOr(Sa, Sb);
-
-    // Multiplying an uninitialized / element by zero results in an initialized
-    // element.
-    Value *Zero = Constant::getNullValue(Va->getType());
-    Value *VaNotZero = IRB.CreateICmpNE(Va, Zero);
-    Value *VbNotZero = IRB.CreateICmpNE(Vb, Zero);
-    Value *VaAndVbNotZero = IRB.CreateAnd(VaNotZero, VbNotZero);
-
-    // After multiplying e.g., <8 x i16> %a, <8 x i16> %b, we should have
-    // <8 x i32> %ab, but we cheated and ended up with <8 x i16>.
-    Sab = IRB.CreateAnd(Sab, IRB.CreateSExt(VaAndVbNotZero, Sab->getType()));
-
-    // Step 2: instrument horizontal add
-    // e.g., collapse <8 x i16> into <4 x i16> (reduction factor == 2)
-    //                <16 x i8> into <4 x i8>  (reduction factor == 4)
-    Value *OutShadow = horizontalReduce(I, ReductionFactor, Sab, nullptr);
-
-    // Extend to <4 x i32>.
-    // For MMX, cast it back to <1 x i64>.
-    OutShadow = CreateShadowCast(IRB, OutShadow, getShadowTy(&I));
-
-    setShadow(&I, OutShadow);
+    auto *Shadow0 = getShadow(&I, 0);
+    auto *Shadow1 = getShadow(&I, 1);
+    Value *S = IRB.CreateOr(Shadow0, Shadow1);
+    S = IRB.CreateBitCast(S, ResTy);
+    S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
+                       ResTy);
+    S = IRB.CreateBitCast(S, getShadowTy(&I));
+    setShadow(&I, S);
     setOriginForNaryOp(I);
   }
 
@@ -5450,28 +5391,19 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       handleVectorSadIntrinsic(I);
       break;
 
-    // Multiply and Add Packed Words
-    //   < 4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>)
-    //   < 8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>)
-
-    // Multiply and Add Packed Signed and Unsigned Bytes
-    //   < 8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>)
-    //   <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>)
     case Intrinsic::x86_sse2_pmadd_wd:
     case Intrinsic::x86_avx2_pmadd_wd:
     case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
     case Intrinsic::x86_avx2_pmadd_ub_sw:
-      handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2);
+      handleVectorPmaddIntrinsic(I);
       break;
 
-    // <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64>, <1 x i64>)
     case Intrinsic::x86_ssse3_pmadd_ub_sw:
-      handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2, /*EltSize=*/8);
+      handleVectorPmaddIntrinsic(I, 8);
       break;
 
-    // <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64>, <1 x i64>)
     case Intrinsic::x86_mmx_pmadd_wd:
-      handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2, /*EltSize=*/16);
+      handleVectorPmaddIntrinsic(I, 16);
       break;
 
     case Intrinsic::x86_sse_cmp_ss:
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll
index ab05e10ccd3c9..f916130fe53e5 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll
@@ -141,16 +141,10 @@ define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) #0 {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP3:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne <16 x i16> [[A0:%.*]], zeroinitializer
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <16 x i16> [[A1:%.*]], zeroinitializer
-; CHECK-NEXT:    [[TMP11:%.*]] = and <16 x i1> [[TMP4]], [[TMP5]]
-; CHECK-NEXT:    [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i16>
-; CHECK-NEXT:    [[TMP7:%.*]] = and <16 x i16> [[TMP3]], [[TMP12]]
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <16 x i16> [[TMP7]], <16 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
-; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <16 x i16> [[TMP7]], <16 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
-; CHECK-NEXT:    [[TMP10:%.*]] = or <8 x i16> [[TMP8]], [[TMP9]]
-; CHECK-NEXT:    [[TMP6:%.*]] = zext <8 x i16> [[TMP10]] to <8 x i32>
-; CHECK-NEXT:    [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0]], <16 x i16> [[A1]])
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i16> [[TMP3]] to <8 x i32>
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP4]], zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i32>
+; CHECK-NEXT:    [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
 ; CHECK-NEXT:    store <8 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <8 x i32> [[RES]]
 ;
@@ -684,16 +678,10 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) #0 {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP3:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne <32 x i8> [[A0:%.*]], zeroinitializer
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <32 x i8> [[A1:%.*]], zeroinitializer
-; CHECK-NEXT:    [[TMP11:%.*]] = and <32 x i1> [[TMP4]], [[TMP5]]
-; CHECK-NEXT:    [[TMP12:%.*]] = sext <32 x i1> [[TMP11]] to <32 x i8>
-; CHECK-NEXT:    [[TMP7:%.*]] = and <32 x i8> [[TMP3]], [[TMP12]]
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <32 x i8> [[TMP7]], <32 x i8> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
-; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <32 x i8> [[TMP7]], <32 x i8> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
-; CHECK-NEXT:    [[TMP10:%.*]] = or <16 x i8> [[TMP8]], [[TMP9]]
-; CHECK-NEXT:    [[TMP6:%.*]] = zext <16 x i8> [[TMP10]] to <16 x i16>
-; CHECK-NEXT:    [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1]])
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <32 x i8> [[TMP3]] to <16 x i16>
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <16 x i16> [[TMP4]], zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i16>
+; CHECK-NEXT:    [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]])
 ; CHECK-NEXT:    store <16 x i16> [[TMP6]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <16 x i16> [[RES]]
 ;
@@ -719,16 +707,10 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(ptr %ptr, <32 x i8> %a1) #
 ; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
 ; CHECK-NEXT:    [[_MSLD:%.*]] = load <32 x i8>, ptr [[TMP7]], align 32
 ; CHECK-NEXT:    [[TMP8:%.*]] = or <32 x i8> [[_MSLD]], [[TMP2]]
-; CHECK-NEXT:    [[TMP9:%.*]] = icmp ne <32 x i8> [[A0]], zeroinitializer
-; CHECK-NEXT:    [[TMP10:%.*]] = icmp ne <32 x i8> [[A1:%.*]], zeroinitializer
-; CHECK-NEXT:    [[TMP16:%.*]] = and <32 x i1> [[TMP9]], [[TMP10]]
-; CHECK-NEXT:    [[TMP17:%.*]] = sext <32 x i1> [[TMP16]] to <32 x i8>
-; CHECK-NEXT:    [[TMP12:%.*]] = and <32 x i8> [[TMP8]], [[TMP17]]
-; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <32 x i8> [[TMP12]], <32 x i8> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
-; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <32 x i8> [[TMP12]], <32 x i8> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
-; CHECK-NEXT:    [[TMP15:%.*]] = or <16 x i8> [[TMP13]], [[TMP14]]
-; CHECK-NEXT:    [[TMP11:%.*]] = zext <16 x i8> [[TMP15]] to <16 x i16>
-; CHECK-NEXT:    [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1]])
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <32 x i8> [[TMP8]] to <16 x i16>
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp ne <16 x i16> [[TMP9]], zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i16>
+; CHECK-NEXT:    [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1:%.*]])
 ; CHECK-NEXT:    store <16 x i16> [[TMP11]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <16 x i16> [[RES]]
 ;
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll
index a0341c67b1365..ac3bb56719038 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll
@@ -1687,27 +1687,16 @@ define i64 @test49(<1 x i64> %a, <1 x i64> %b) #0 {
 ; CHECK-NEXT:    [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64>
 ; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64>
 ; CHECK-NEXT:    [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64>
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <1 x i64> [[MMX_VAR_I]] to <4 x i16>
-; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[MMX_VAR1_I]] to <4 x i16>
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <1 x i64> [[TMP6]] to <4 x i16>
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16>
-; CHECK-NEXT:    [[TMP22:%.*]] = or <4 x i16> [[TMP10]], [[TMP11]]
-; CHECK-NEXT:    [[TMP29:%.*]] = icmp ne <4 x i16> [[TMP8]], zeroinitializer
-; CHECK-NEXT:    [[TMP30:%.*]] = icmp ne <4 x i16> [[TMP9]], zeroinitializer
-; CHECK-NEXT:    [[TMP31:%.*]] = and <4 x i1> [[TMP29]], [[TMP30]]
-; CHECK-NEXT:    [[TMP32:%.*]] = sext <4 x i1> [[TMP31]] to <4 x i16>
-; CHECK-NEXT:    [[TMP23:%.*]] = and <4 x i16> [[TMP22]], [[TMP32]]
-; CHECK-NEXT:    [[TMP24:%.*]] = shufflevector <4 x i16> [[TMP23]], <4 x i16> poison, <2 x i32> <i32 0, i32 2>
-; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <4 x i16> [[TMP23]], <4 x i16> poison, <2 x i32> <i32 1, i32 3>
-; CHECK-NEXT:    [[TMP26:%.*]] = or <2 x i16> [[TMP24]], [[TMP25]]
-; CHECK-NEXT:    [[TMP27:%.*]] = bitcast <2 x i16> [[TMP26]] to i32
-; CHECK-NEXT:    [[TMP28:%.*]] = zext i32 [[TMP27]] to i64
-; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i64 [[TMP28]] to <1 x i64>
-; CHECK-NEXT:    [[TMP33:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP8:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32>
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp ne <2 x i32> [[TMP9]], zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = sext <2 x i1> [[TMP10]] to <2 x i32>
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64>
+; CHECK-NEXT:    [[TMP14:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <1 x i64> [[TMP12]] to <2 x i32>
 ; CHECK-NEXT:    [[TMP20:%.*]] = bitcast <1 x i64> [[TMP14]] to <2 x i32>
-; CHECK-NEXT:    [[TMP34:%.*]] = bitcast <1 x i64> [[TMP33]] to <2 x i32>
-; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64>
-; CHECK-NEXT:    [[TMP21:%.*]] = bitcast <2 x i32> [[TMP34]] to <1 x i64>
+; CHECK-NEXT:    [[TMP17:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64>
+; CHECK-NEXT:    [[TMP21:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64>
 ; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <1 x i64> [[TMP17]], i32 0
 ; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0
 ; CHECK-NEXT:    store i64 [[TMP5]], ptr @__msan_retval_tls, align 8
@@ -3326,27 +3315,16 @@ define i64 @test7(<1 x i64> %a, <1 x i64> %b) #0 {
 ; CHECK-NEXT:    [[TMP22:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64>
 ; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <8 x i8> [[TMP17]] to <1 x i64>
 ; CHECK-NEXT:    [[TMP23:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64>
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <1 x i64> [[TMP22]] to <8 x i8>
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP23]] to <8 x i8>
-; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <1 x i64> [[TMP21]] to <8 x i8>
-; CHECK-NEXT:    [[TMP13:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8>
-; CHECK-NEXT:    [[TMP14:%.*]] = or <8 x i8> [[TMP12]], [[TMP13]]
-; CHECK-NEXT:    [[TMP32:%.*]] = icmp ne <8 x i8> [[TMP10]], zeroinitializer
-; CHECK-NEXT:    [[TMP33:%.*]] = icmp ne <8 x i8> [[TMP11]], zeroinitializer
-; CHECK-NEXT:    [[TMP34:%.*]] = and <8 x i1> [[TMP32]], [[TMP33]]
-; CHECK-NEXT:    [[TMP35:%.*]] = sext <8 x i1> [[TMP34]] to <8 x i8>
-; CHECK-NEXT:    [[TMP16:%.*]] = and <8 x i8> [[TMP14]], [[TMP35]]
-; CHECK-NEXT:    [[TMP25:%.*]] = shufflevector <8 x i8> [[TMP16]], <8 x i8> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT:    [[TMP26:%.*]] = shufflevector <8 x i8> [[TMP16]], <8 x i8> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT:    [[TMP27:%.*]] = or <4 x i8> [[TMP25]], [[TMP26]]
-; CHECK-NEXT:    [[TMP29:%.*]] = bitcast <4 x i8> [[TMP27]] to i32
-; CHECK-NEXT:    [[TMP24:%.*]] = zext i32 [[TMP29]] to i64
-; CHECK-NEXT:    [[TMP30:%.*]] = bitcast i64 [[TMP24]] to <1 x i64>
-; CHECK-NEXT:    [[TMP36:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[TMP22]], <1 x i64> [[TMP23]]) #[[ATTR5]]
-; CHECK-NEXT:    [[TMP31:%.*]] = bitcast <1 x i64> [[TMP30]] to <8 x i8>
-; CHECK-NEXT:    [[TMP28:%.*]] = bitcast <1 x i64> [[TMP36]] to <8 x i8>
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP31]] to <1 x i64>
-; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <8 x i8> [[TMP28]] to <1 x i64>
+; CHECK-NEXT:    [[TMP10:%.*]] = or <1 x i64> [[TMP21]], [[TMP8]]
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16>
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne <4 x i16> [[TMP11]], zeroinitializer
+; CHECK-NEXT:    [[TMP13:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i16>
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64>
+; CHECK-NEXT:    [[TMP24:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[TMP22]], <1 x i64> [[TMP23]]) #[[ATTR5]]
+; CHECK-NEXT:    [[TMP16:%.*]] = bitcast <1 x i64> [[TMP14]] to <8 x i8>
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <1 x i64> [[TMP24]] to <8 x i8>
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64>
+; CHECK-NEXT:    [[TMP19:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64>
 ; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0
 ; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <1 x i64> [[TMP19]], i32 0
 ; CHECK-NEXT:    store i64 [[TMP7]], ptr @__msan_retval_tls, align 8
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll
index fe1245553c116..8f915a59db8e5 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll
@@ -763,16 +763,10 @@ define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) #0 {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
 ; CHECK-NEXT:    [[TMP3:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp ne <8 x i16> [[A0:%.*]], zeroinitializer
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <8 x i16> [[A1:%.*]], zeroinitializer
-; CHECK-NEXT:    [[TMP11:%.*]] = and <8 x i1> [[TMP4]], [[TMP5]]
-; CHECK-NEXT:    [[TMP12:%.*]] = sext <8 x i1> [[TMP11]] to <8 x i16>
-; CHECK-NEXT:    [[TMP7:%.*]] = and <8 x i16> [[TMP3]], [[TMP12]]
-; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <8 x i16> [[TMP7]], <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
-; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <8 x i16> [[TMP7]], <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-; CHECK-NEXT:    [[TMP10:%.*]] = or <4 x i16> [[TMP8]], [[TMP9]]
-; CHECK-NEXT:    [[TMP6:%.*]] = zext <4 x i16> [[TMP10]] to <4 x i32>
-; CHECK-NEXT:    [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0]], <8 x i16> [[A1]])
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32>
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32>
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]])
 ; CHECK-NEXT:    store <4 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <4 x i32> [[RES]]
 ;
diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll
index bf87027b056fa..5cc56baf0e0de 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll
@@ -149,17 +149,11 @@ define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) #0 {
 ; CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoin...
[truncated]

…3343) This reverts commit cf00284 i.e., relands ba603b5. It was reverted because it was subtly wrong: multiplying an uninitialized zero should not result in an initialized zero. This reland fixes the issue by using instrumentation analogous to visitAnd (bitwise AND of an initialized zero and an uninitialized value results in an initialized value), and expands a test case. Original commit message: The current instrumentation has false positives: if there is a single uninitialized bit in any of the operands, the entire output is poisoned. This does not take into account that multiplying an uninitialized value with zero results in an initialized zero value. This step allows elements that are zero to clear the corresponding shadow during the multiplication step. The horizontal add step and accumulation step (if any) are modeled using bitwise OR. Future work can apply this improved handler to the AVX512 equivalent intrinsics (x86_avx512_pmaddw_d_512, x86_avx512_pmaddubs_w_512.) and AVX VNNI intrinsics.

Revert "[msan] Improve packed multiply-add instrumentation (#152941)"

d7b5ea3

This reverts commit ba603b5.

thurstond added the skip-precommit-approval PR for CI feedback, not intended for review label Aug 13, 2025

thurstond enabled auto-merge (squash) August 13, 2025 04:31

llvmbot added compiler-rt:sanitizer llvm:transforms labels Aug 13, 2025

thurstond disabled auto-merge August 13, 2025 04:32

thurstond merged commit cf00284 into main Aug 13, 2025
11 of 13 checks passed

thurstond deleted the revert-152941-msan_pmadd_update branch August 13, 2025 04:32

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Revert "[msan] Improve packed multiply-add instrumentation" #153343

Revert "[msan] Improve packed multiply-add instrumentation" #153343

Uh oh!

thurstond commented Aug 13, 2025

Uh oh!

Uh oh!

llvmbot commented Aug 13, 2025

Uh oh!

llvmbot commented Aug 13, 2025

Uh oh!

Uh oh!

Revert "[msan] Improve packed multiply-add instrumentation" #153343

Revert "[msan] Improve packed multiply-add instrumentation" #153343

Uh oh!

Conversation

thurstond commented Aug 13, 2025

Uh oh!

Uh oh!

llvmbot commented Aug 13, 2025

Uh oh!

llvmbot commented Aug 13, 2025

Uh oh!

Uh oh!