diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 21bd4164385ab..4cef57d43f203 100644 --- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -3641,9 +3641,10 @@ struct MemorySanitizerVisitor : public InstVisitor { setOriginForNaryOp(I); } - // Get an MMX-sized vector type. - Type *getMMXVectorTy(unsigned EltSizeInBits) { - const unsigned X86_MMXSizeInBits = 64; + // Get an MMX-sized (64-bit) vector type, or optionally, other sized + // vectors. + Type *getMMXVectorTy(unsigned EltSizeInBits, + unsigned X86_MMXSizeInBits = 64) { assert(EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 && "Illegal MMX vector element size"); return FixedVectorType::get(IntegerType::get(*MS.C, EltSizeInBits), @@ -3843,20 +3844,78 @@ struct MemorySanitizerVisitor : public InstVisitor { setOriginForNaryOp(I); } - // Instrument multiply-add intrinsic. - void handleVectorPmaddIntrinsic(IntrinsicInst &I, - unsigned MMXEltSizeInBits = 0) { - Type *ResTy = - MMXEltSizeInBits ? getMMXVectorTy(MMXEltSizeInBits * 2) : I.getType(); + // Instrument multiply-add intrinsics. + // + // e.g., Two operands: + // <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a, <8 x i16> %b) + // <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> %a, <1 x i64> %b) + // + // Three operands are not implemented yet: + // <4 x i32> @llvm.x86.avx512.vpdpbusd.128 + // (<4 x i32> %s, <4 x i32> %a, <4 x i32> %b) + // (the result of multiply-add'ing %a and %b is accumulated with %s) + void handleVectorPmaddIntrinsic(IntrinsicInst &I, unsigned ReductionFactor, + unsigned EltSizeInBits = 0) { IRBuilder<> IRB(&I); - auto *Shadow0 = getShadow(&I, 0); - auto *Shadow1 = getShadow(&I, 1); - Value *S = IRB.CreateOr(Shadow0, Shadow1); - S = IRB.CreateBitCast(S, ResTy); - S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)), - ResTy); - S = IRB.CreateBitCast(S, getShadowTy(&I)); - setShadow(&I, S); + + [[maybe_unused]] FixedVectorType *ReturnType = + cast(I.getType()); + assert(isa(ReturnType)); + + assert(I.arg_size() == 2); + + // Vectors A and B, and shadows + Value *Va = I.getOperand(0); + Value *Vb = I.getOperand(1); + + Value *Sa = getShadow(&I, 0); + Value *Sb = getShadow(&I, 1); + + FixedVectorType *ParamType = + cast(I.getArgOperand(0)->getType()); + assert(ParamType == I.getArgOperand(1)->getType()); + + assert(ParamType->getPrimitiveSizeInBits() == + ReturnType->getPrimitiveSizeInBits()); + + // Step 1: instrument multiplication of corresponding vector elements + if (EltSizeInBits) { + ParamType = cast( + getMMXVectorTy(EltSizeInBits, ParamType->getPrimitiveSizeInBits())); + + Va = IRB.CreateBitCast(Va, ParamType); + Vb = IRB.CreateBitCast(Vb, ParamType); + + Sa = IRB.CreateBitCast(Sa, getShadowTy(ParamType)); + Sb = IRB.CreateBitCast(Sb, getShadowTy(ParamType)); + } else { + assert(ParamType->getNumElements() == + ReturnType->getNumElements() * ReductionFactor); + } + + Value *Sab = IRB.CreateOr(Sa, Sb); + + // Multiplying an uninitialized / element by zero results in an initialized + // element. + Value *Zero = Constant::getNullValue(Va->getType()); + Value *VaNotZero = IRB.CreateICmpNE(Va, Zero); + Value *VbNotZero = IRB.CreateICmpNE(Vb, Zero); + Value *VaAndVbNotZero = IRB.CreateAnd(VaNotZero, VbNotZero); + + // After multiplying e.g., <8 x i16> %a, <8 x i16> %b, we should have + // <8 x i32> %ab, but we cheated and ended up with <8 x i16>. + Sab = IRB.CreateAnd(Sab, IRB.CreateSExt(VaAndVbNotZero, Sab->getType())); + + // Step 2: instrument horizontal add + // e.g., collapse <8 x i16> into <4 x i16> (reduction factor == 2) + // <16 x i8> into <4 x i8> (reduction factor == 4) + Value *OutShadow = horizontalReduce(I, ReductionFactor, Sab, nullptr); + + // Extend to <4 x i32>. + // For MMX, cast it back to <1 x i64>. + OutShadow = CreateShadowCast(IRB, OutShadow, getShadowTy(&I)); + + setShadow(&I, OutShadow); setOriginForNaryOp(I); } @@ -5391,19 +5450,28 @@ struct MemorySanitizerVisitor : public InstVisitor { handleVectorSadIntrinsic(I); break; + // Multiply and Add Packed Words + // < 4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) + // < 8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) + + // Multiply and Add Packed Signed and Unsigned Bytes + // < 8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) + // <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) case Intrinsic::x86_sse2_pmadd_wd: case Intrinsic::x86_avx2_pmadd_wd: case Intrinsic::x86_ssse3_pmadd_ub_sw_128: case Intrinsic::x86_avx2_pmadd_ub_sw: - handleVectorPmaddIntrinsic(I); + handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2); break; + // <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64>, <1 x i64>) case Intrinsic::x86_ssse3_pmadd_ub_sw: - handleVectorPmaddIntrinsic(I, 8); + handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2, /*EltSize=*/8); break; + // <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64>, <1 x i64>) case Intrinsic::x86_mmx_pmadd_wd: - handleVectorPmaddIntrinsic(I, 16); + handleVectorPmaddIntrinsic(I, /*ReductionFactor=*/2, /*EltSize=*/16); break; case Intrinsic::x86_sse_cmp_ss: diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll index f916130fe53e5..ab05e10ccd3c9 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx2-intrinsics-x86.ll @@ -141,10 +141,16 @@ define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i16> [[TMP3]] to <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i32> -; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i16> [[A0:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[A1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = and <16 x i1> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = and <16 x i16> [[TMP3]], [[TMP12]] +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <16 x i16> [[TMP7]], <16 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i16> [[TMP7]], <16 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i16> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP6:%.*]] = zext <8 x i16> [[TMP10]] to <8 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0]], <16 x i16> [[A1]]) ; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[RES]] ; @@ -678,10 +684,16 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP3:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP3]] to <16 x i16> -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i16> -; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]]) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <32 x i8> [[A0:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i8> [[A1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = and <32 x i1> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = sext <32 x i1> [[TMP11]] to <32 x i8> +; CHECK-NEXT: [[TMP7:%.*]] = and <32 x i8> [[TMP3]], [[TMP12]] +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <32 x i8> [[TMP7]], <32 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <32 x i8> [[TMP7]], <32 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i8> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[TMP10]] to <16 x i16> +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1]]) ; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[RES]] ; @@ -707,10 +719,16 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(ptr %ptr, <32 x i8> %a1) # ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i8>, ptr [[TMP7]], align 32 ; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i8> [[_MSLD]], [[TMP2]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i8> [[TMP8]] to <16 x i16> -; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i16> [[TMP9]], zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i16> -; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <32 x i8> [[A0]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i8> [[A1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = and <32 x i1> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP17:%.*]] = sext <32 x i1> [[TMP16]] to <32 x i8> +; CHECK-NEXT: [[TMP12:%.*]] = and <32 x i8> [[TMP8]], [[TMP17]] +; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <32 x i8> [[TMP12]], <32 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <32 x i8> [[TMP12]], <32 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i8> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP11:%.*]] = zext <16 x i8> [[TMP15]] to <16 x i16> +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1]]) ; CHECK-NEXT: store <16 x i16> [[TMP11]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[RES]] ; diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll index ac3bb56719038..a0341c67b1365 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/mmx-intrinsics.ll @@ -1687,16 +1687,27 @@ define i64 @test49(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64> ; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[TMP8:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <2 x i32> [[TMP9]], zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = sext <2 x i1> [[TMP10]] to <2 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> -; CHECK-NEXT: [[TMP14:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP12]] to <2 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = bitcast <1 x i64> [[MMX_VAR_I]] to <4 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[MMX_VAR1_I]] to <4 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP6]] to <4 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP22:%.*]] = or <4 x i16> [[TMP10]], [[TMP11]] +; CHECK-NEXT: [[TMP29:%.*]] = icmp ne <4 x i16> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP30:%.*]] = icmp ne <4 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP31:%.*]] = and <4 x i1> [[TMP29]], [[TMP30]] +; CHECK-NEXT: [[TMP32:%.*]] = sext <4 x i1> [[TMP31]] to <4 x i16> +; CHECK-NEXT: [[TMP23:%.*]] = and <4 x i16> [[TMP22]], [[TMP32]] +; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <4 x i16> [[TMP23]], <4 x i16> poison, <2 x i32> +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i16> [[TMP23]], <4 x i16> poison, <2 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = or <2 x i16> [[TMP24]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = bitcast <2 x i16> [[TMP26]] to i32 +; CHECK-NEXT: [[TMP28:%.*]] = zext i32 [[TMP27]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP28]] to <1 x i64> +; CHECK-NEXT: [[TMP33:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] ; CHECK-NEXT: [[TMP20:%.*]] = bitcast <1 x i64> [[TMP14]] to <2 x i32> -; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> -; CHECK-NEXT: [[TMP21:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64> +; CHECK-NEXT: [[TMP34:%.*]] = bitcast <1 x i64> [[TMP33]] to <2 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64> +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <2 x i32> [[TMP34]] to <1 x i64> ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP17]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0 ; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 @@ -3315,16 +3326,27 @@ define i64 @test7(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP17]] to <1 x i64> ; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[TMP10:%.*]] = or <1 x i64> [[TMP21]], [[TMP8]] -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> -; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <4 x i16> [[TMP11]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i16> -; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64> -; CHECK-NEXT: [[TMP24:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[TMP22]], <1 x i64> [[TMP23]]) #[[ATTR5]] -; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP14]] to <8 x i8> -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP24]] to <8 x i8> -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64> -; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <1 x i64> [[TMP22]] to <8 x i8> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP23]] to <8 x i8> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP21]] to <8 x i8> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i8> [[TMP12]], [[TMP13]] +; CHECK-NEXT: [[TMP32:%.*]] = icmp ne <8 x i8> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP33:%.*]] = icmp ne <8 x i8> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP34:%.*]] = and <8 x i1> [[TMP32]], [[TMP33]] +; CHECK-NEXT: [[TMP35:%.*]] = sext <8 x i1> [[TMP34]] to <8 x i8> +; CHECK-NEXT: [[TMP16:%.*]] = and <8 x i8> [[TMP14]], [[TMP35]] +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <8 x i8> [[TMP16]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <8 x i8> [[TMP16]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP27:%.*]] = or <4 x i8> [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP29:%.*]] = bitcast <4 x i8> [[TMP27]] to i32 +; CHECK-NEXT: [[TMP24:%.*]] = zext i32 [[TMP29]] to i64 +; CHECK-NEXT: [[TMP30:%.*]] = bitcast i64 [[TMP24]] to <1 x i64> +; CHECK-NEXT: [[TMP36:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[TMP22]], <1 x i64> [[TMP23]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP31:%.*]] = bitcast <1 x i64> [[TMP30]] to <8 x i8> +; CHECK-NEXT: [[TMP28:%.*]] = bitcast <1 x i64> [[TMP36]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP31]] to <1 x i64> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i8> [[TMP28]] to <1 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <1 x i64> [[TMP19]], i32 0 ; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll index 8f915a59db8e5..fe1245553c116 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll @@ -763,10 +763,16 @@ define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i16> [[A0:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i16> [[A1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = and <8 x i1> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = sext <8 x i1> [[TMP11]] to <8 x i16> +; CHECK-NEXT: [[TMP7:%.*]] = and <8 x i16> [[TMP3]], [[TMP12]] +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i16> [[TMP7]], <8 x i16> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i16> [[TMP7]], <8 x i16> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i16> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i16> [[TMP10]] to <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0]], <8 x i16> [[A1]]) ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[RES]] ; diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll index 5cc56baf0e0de..bf87027b056fa 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/avx2-intrinsics-i386.ll @@ -149,11 +149,17 @@ define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i16> [[TMP3]] to <8 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i32> -; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP4:%.*]] = or <16 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[A0:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i16> [[A1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = and <16 x i1> [[TMP5]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = sext <16 x i1> [[TMP13]] to <16 x i16> +; CHECK-NEXT: [[TMP8:%.*]] = and <16 x i16> [[TMP4]], [[TMP14]] +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <16 x i16> [[TMP8]], <16 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <16 x i16> [[TMP8]], <16 x i16> poison, <8 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i16> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP6:%.*]] = zext <8 x i16> [[TMP11]] to <8 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0]], <16 x i16> [[A1]]) ; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <8 x i32> [[RES]] ; @@ -714,11 +720,17 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8 ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i8> [[TMP3]] to <16 x i16> -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i16> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i16> -; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0:%.*]], <32 x i8> [[A1:%.*]]) +; CHECK-NEXT: [[TMP4:%.*]] = or <32 x i8> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i8> [[A0:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <32 x i8> [[A1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = and <32 x i1> [[TMP5]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = sext <32 x i1> [[TMP13]] to <32 x i8> +; CHECK-NEXT: [[TMP8:%.*]] = and <32 x i8> [[TMP4]], [[TMP14]] +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <32 x i8> [[TMP8]], <32 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <32 x i8> [[TMP8]], <32 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i8> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP6:%.*]] = zext <16 x i8> [[TMP11]] to <16 x i16> +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1]]) ; CHECK-NEXT: store <16 x i16> [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[RES]] ; @@ -734,7 +746,7 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(ptr %ptr, <32 x i8> %a1) # ; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0 -; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP12:%.*]], !prof [[PROF1]] +; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP8:%.*]], !prof [[PROF1]] ; CHECK: 4: ; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]] ; CHECK-NEXT: unreachable @@ -744,11 +756,17 @@ define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(ptr %ptr, <32 x i8> %a1) # ; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], -2147483649 ; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr ; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i8>, ptr [[TMP7]], align 32 -; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i8> [[_MSLD]], [[TMP2]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i8> [[TMP8]] to <16 x i16> -; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i16> [[TMP9]], zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i16> -; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1:%.*]]) +; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i8> [[_MSLD]], [[TMP2]] +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i8> [[A0]], zeroinitializer +; CHECK-NEXT: [[TMP17:%.*]] = icmp ne <32 x i8> [[A1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = and <32 x i1> [[TMP10]], [[TMP17]] +; CHECK-NEXT: [[TMP18:%.*]] = sext <32 x i1> [[TMP12]] to <32 x i8> +; CHECK-NEXT: [[TMP13:%.*]] = and <32 x i8> [[TMP9]], [[TMP18]] +; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <32 x i8> [[TMP13]], <32 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <32 x i8> [[TMP13]], <32 x i8> poison, <16 x i32> +; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i8> [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP11:%.*]] = zext <16 x i8> [[TMP16]] to <16 x i16> +; CHECK-NEXT: [[RES:%.*]] = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> [[A0]], <32 x i8> [[A1]]) ; CHECK-NEXT: store <16 x i16> [[TMP11]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <16 x i16> [[RES]] ; diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll index 0a3efaaea149f..e0e75a91cbd2e 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/mmx-intrinsics.ll @@ -1730,16 +1730,27 @@ define i64 @test49(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-NEXT: [[MMX_VAR_I:%.*]] = bitcast <4 x i16> [[TMP1]] to <1 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i16> [[TMP16]] to <1 x i64> ; CHECK-NEXT: [[MMX_VAR1_I:%.*]] = bitcast <4 x i16> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[TMP8:%.*]] = or <1 x i64> [[TMP6]], [[TMP7]] -; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[TMP8]] to <2 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <2 x i32> [[TMP9]], zeroinitializer -; CHECK-NEXT: [[TMP11:%.*]] = sext <2 x i1> [[TMP10]] to <2 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i32> [[TMP11]] to <1 x i64> -; CHECK-NEXT: [[TMP14:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP12]] to <2 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = bitcast <1 x i64> [[MMX_VAR_I]] to <4 x i16> +; CHECK-NEXT: [[TMP10:%.*]] = bitcast <1 x i64> [[MMX_VAR1_I]] to <4 x i16> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP6]] to <4 x i16> +; CHECK-NEXT: [[TMP22:%.*]] = bitcast <1 x i64> [[TMP7]] to <4 x i16> +; CHECK-NEXT: [[TMP23:%.*]] = or <4 x i16> [[TMP11]], [[TMP22]] +; CHECK-NEXT: [[TMP30:%.*]] = icmp ne <4 x i16> [[TMP9]], zeroinitializer +; CHECK-NEXT: [[TMP31:%.*]] = icmp ne <4 x i16> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP32:%.*]] = and <4 x i1> [[TMP30]], [[TMP31]] +; CHECK-NEXT: [[TMP33:%.*]] = sext <4 x i1> [[TMP32]] to <4 x i16> +; CHECK-NEXT: [[TMP24:%.*]] = and <4 x i16> [[TMP23]], [[TMP33]] +; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x i16> [[TMP24]], <4 x i16> poison, <2 x i32> +; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x i16> [[TMP24]], <4 x i16> poison, <2 x i32> +; CHECK-NEXT: [[TMP27:%.*]] = or <2 x i16> [[TMP25]], [[TMP26]] +; CHECK-NEXT: [[TMP28:%.*]] = bitcast <2 x i16> [[TMP27]] to i32 +; CHECK-NEXT: [[TMP29:%.*]] = zext i32 [[TMP28]] to i64 +; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP29]] to <1 x i64> +; CHECK-NEXT: [[TMP34:%.*]] = tail call <1 x i64> @llvm.x86.mmx.pmadd.wd(<1 x i64> [[MMX_VAR_I]], <1 x i64> [[MMX_VAR1_I]]) #[[ATTR2]] ; CHECK-NEXT: [[TMP20:%.*]] = bitcast <1 x i64> [[TMP14]] to <2 x i32> -; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP3]] to <1 x i64> -; CHECK-NEXT: [[TMP21:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64> +; CHECK-NEXT: [[TMP35:%.*]] = bitcast <1 x i64> [[TMP34]] to <2 x i32> +; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i32> [[TMP20]] to <1 x i64> +; CHECK-NEXT: [[TMP21:%.*]] = bitcast <2 x i32> [[TMP35]] to <1 x i64> ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <1 x i64> [[TMP17]], i32 0 ; CHECK-NEXT: [[TMP18:%.*]] = extractelement <1 x i64> [[TMP21]], i32 0 ; CHECK-NEXT: store i64 [[TMP5]], ptr @__msan_retval_tls, align 8 @@ -3401,16 +3412,27 @@ define i64 @test7(<1 x i64> %a, <1 x i64> %b) #0 { ; CHECK-NEXT: [[TMP22:%.*]] = bitcast <8 x i8> [[TMP1]] to <1 x i64> ; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP17]] to <1 x i64> ; CHECK-NEXT: [[TMP23:%.*]] = bitcast <8 x i8> [[TMP0]] to <1 x i64> -; CHECK-NEXT: [[TMP10:%.*]] = or <1 x i64> [[TMP21]], [[TMP8]] -; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP10]] to <4 x i16> -; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <4 x i16> [[TMP11]], zeroinitializer -; CHECK-NEXT: [[TMP13:%.*]] = sext <4 x i1> [[TMP12]] to <4 x i16> -; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i16> [[TMP13]] to <1 x i64> -; CHECK-NEXT: [[TMP24:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[TMP22]], <1 x i64> [[TMP23]]) #[[ATTR5]] -; CHECK-NEXT: [[TMP16:%.*]] = bitcast <1 x i64> [[TMP14]] to <8 x i8> -; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP24]] to <8 x i8> -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP16]] to <1 x i64> -; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i8> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP11:%.*]] = bitcast <1 x i64> [[TMP22]] to <8 x i8> +; CHECK-NEXT: [[TMP12:%.*]] = bitcast <1 x i64> [[TMP23]] to <8 x i8> +; CHECK-NEXT: [[TMP13:%.*]] = bitcast <1 x i64> [[TMP21]] to <8 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = bitcast <1 x i64> [[TMP8]] to <8 x i8> +; CHECK-NEXT: [[TMP26:%.*]] = or <8 x i8> [[TMP13]], [[TMP14]] +; CHECK-NEXT: [[TMP33:%.*]] = icmp ne <8 x i8> [[TMP11]], zeroinitializer +; CHECK-NEXT: [[TMP34:%.*]] = icmp ne <8 x i8> [[TMP12]], zeroinitializer +; CHECK-NEXT: [[TMP35:%.*]] = and <8 x i1> [[TMP33]], [[TMP34]] +; CHECK-NEXT: [[TMP36:%.*]] = sext <8 x i1> [[TMP35]] to <8 x i8> +; CHECK-NEXT: [[TMP16:%.*]] = and <8 x i8> [[TMP26]], [[TMP36]] +; CHECK-NEXT: [[TMP27:%.*]] = shufflevector <8 x i8> [[TMP16]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <8 x i8> [[TMP16]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP29:%.*]] = or <4 x i8> [[TMP27]], [[TMP28]] +; CHECK-NEXT: [[TMP24:%.*]] = bitcast <4 x i8> [[TMP29]] to i32 +; CHECK-NEXT: [[TMP30:%.*]] = zext i32 [[TMP24]] to i64 +; CHECK-NEXT: [[TMP32:%.*]] = bitcast i64 [[TMP30]] to <1 x i64> +; CHECK-NEXT: [[TMP31:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[TMP22]], <1 x i64> [[TMP23]]) #[[ATTR5]] +; CHECK-NEXT: [[TMP25:%.*]] = bitcast <1 x i64> [[TMP32]] to <8 x i8> +; CHECK-NEXT: [[TMP37:%.*]] = bitcast <1 x i64> [[TMP31]] to <8 x i8> +; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i8> [[TMP25]] to <1 x i64> +; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i8> [[TMP37]] to <1 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <1 x i64> [[TMP6]], i32 0 ; CHECK-NEXT: [[TMP20:%.*]] = extractelement <1 x i64> [[TMP19]], i32 0 ; CHECK-NEXT: store i64 [[TMP7]], ptr @__msan_retval_tls, align 8 diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll index e771e60e2f294..5edc13b7abbc4 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll @@ -800,11 +800,17 @@ define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) #0 { ; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: [[TMP7:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[TMP3]] to <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP4]], zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i1> [[TMP5]] to <4 x i32> -; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]]) +; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i16> [[TMP1]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i16> [[A0:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <8 x i16> [[A1:%.*]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = and <8 x i1> [[TMP5]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = sext <8 x i1> [[TMP13]] to <8 x i16> +; CHECK-NEXT: [[TMP8:%.*]] = and <8 x i16> [[TMP4]], [[TMP14]] +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> poison, <4 x i32> +; CHECK-NEXT: [[TMP11:%.*]] = or <4 x i16> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i16> [[TMP11]] to <4 x i32> +; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0]], <8 x i16> [[A1]]) ; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[RES]] ; diff --git a/llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll b/llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll index d614bb85d8584..21996b13a9961 100644 --- a/llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll +++ b/llvm/test/Instrumentation/MemorySanitizer/vector_arith.ll @@ -18,9 +18,15 @@ define <4 x i32> @Test_sse2_pmadd_wd(<8 x i16> %a, <8 x i16> %b) sanitize_memory ; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() ; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i16> [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[TMP3]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP4]] to <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i16> [[A]], zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i16> [[B]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = and <8 x i1> [[TMP3]], [[TMP4]] +; CHECK-NEXT: [[TMP11:%.*]] = sext <8 x i1> [[TMP10]] to <8 x i16> +; CHECK-NEXT: [[TMP6:%.*]] = and <8 x i16> [[TMP2]], [[TMP11]] +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i16> [[TMP6]], <8 x i16> poison, <4 x i32> +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i16> [[TMP6]], <8 x i16> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i16> [[TMP7]], [[TMP8]] +; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i16> [[TMP9]] to <4 x i32> ; CHECK-NEXT: [[C:%.*]] = tail call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A]], <8 x i16> [[B]]) #[[ATTR2:[0-9]+]] ; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <4 x i32> [[C]] @@ -39,13 +45,24 @@ define <1 x i64> @Test_ssse3_pmadd_ub_sw(<1 x i64> %a, <1 x i64> %b) sanitize_me ; CHECK-NEXT: [[TMP0:%.*]] = load <1 x i64>, ptr @__msan_param_tls, align 8 ; CHECK-NEXT: [[TMP1:%.*]] = load <1 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8 ; CHECK-NEXT: call void @llvm.donothing() -; CHECK-NEXT: [[TMP2:%.*]] = or <1 x i64> [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[TMP2]] to <4 x i16> -; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i16> [[TMP3]], zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP4]] to <4 x i16> -; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[TMP5]] to <1 x i64> +; CHECK-NEXT: [[TMP2:%.*]] = bitcast <1 x i64> [[A]] to <8 x i8> +; CHECK-NEXT: [[TMP3:%.*]] = bitcast <1 x i64> [[B]] to <8 x i8> +; CHECK-NEXT: [[TMP4:%.*]] = bitcast <1 x i64> [[TMP0]] to <8 x i8> +; CHECK-NEXT: [[TMP5:%.*]] = bitcast <1 x i64> [[TMP1]] to <8 x i8> +; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i8> [[TMP4]], [[TMP5]] +; CHECK-NEXT: [[TMP14:%.*]] = icmp ne <8 x i8> [[TMP2]], zeroinitializer +; CHECK-NEXT: [[TMP15:%.*]] = icmp ne <8 x i8> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = and <8 x i1> [[TMP14]], [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = sext <8 x i1> [[TMP16]] to <8 x i8> +; CHECK-NEXT: [[TMP7:%.*]] = and <8 x i8> [[TMP13]], [[TMP17]] +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <8 x i8> [[TMP7]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <8 x i8> [[TMP7]], <8 x i8> poison, <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i8> [[TMP8]], [[TMP9]] +; CHECK-NEXT: [[TMP18:%.*]] = bitcast <4 x i8> [[TMP10]] to i32 +; CHECK-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +; CHECK-NEXT: [[TMP20:%.*]] = bitcast i64 [[TMP19]] to <1 x i64> ; CHECK-NEXT: [[C:%.*]] = tail call <1 x i64> @llvm.x86.ssse3.pmadd.ub.sw(<1 x i64> [[A]], <1 x i64> [[B]]) #[[ATTR2]] -; CHECK-NEXT: store <1 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8 +; CHECK-NEXT: store <1 x i64> [[TMP20]], ptr @__msan_retval_tls, align 8 ; CHECK-NEXT: ret <1 x i64> [[C]] ; entry: