-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[msan] Handle SSE2 cvt(t?)ps2dq/cvt(t?)pd2dq and cvtpd2ps using handleSSEVectorConvertIntrinsicByProp #132815
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…insicByProp This generalizes handleSSEVectorConvertIntrinsicByProp from llvm#130705 to handle SSE intrinsics that do not have a rounding mode parameter. cvtps2dq/cvtpd2dq were previously handled strictly.
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-compiler-rt-sanitizer Author: Thurston Dang (thurstond) ChangesThis generalizes handleSSEVectorConvertIntrinsicByProp from #130705 to handle SSE intrinsics that do not have a rounding mode parameter. cvtps2dq/cvtpd2dq were previously handled strictly. Full diff: https://github.com/llvm/llvm-project/pull/132815.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index e330c7c89b0c5..f17c5e254f85f 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3274,22 +3274,32 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOriginForNaryOp(I);
}
- /// Handle x86 SSE single-precision to half-precision conversion.
+ /// Handle x86 SSE vector conversion.
///
- /// e.g.,
+ /// e.g., single-precision to half-precision conversion:
/// <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0)
/// <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0)
+ ///
+ /// floating-point to integer:
+ /// <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>)
+ /// <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>)
+ ///
/// Note: if the output has more elements, they are zero-initialized (and
/// therefore the shadow will also be initialized).
///
/// This differs from handleSSEVectorConvertIntrinsic() because it
/// propagates uninitialized shadow (instead of checking the shadow).
- void handleSSEVectorConvertIntrinsicByProp(IntrinsicInst &I) {
- assert(I.arg_size() == 2);
+ void handleSSEVectorConvertIntrinsicByProp(IntrinsicInst &I, bool HasRoundingMode = false) {
+ if (HasRoundingMode) {
+ assert(I.arg_size() == 2);
+ [[maybe_unused]] Value *RoundingMode = I.getArgOperand(1);
+ assert(RoundingMode->getType()->isIntegerTy());
+ } else {
+ assert(I.arg_size() == 1);
+ }
+
Value *Src = I.getArgOperand(0);
assert(Src->getType()->isVectorTy());
- [[maybe_unused]] Value *RoundingMode = I.getArgOperand(1);
- assert(RoundingMode->getType()->isIntegerTy());
// The return type might have more elements than the input.
// Temporarily shrink the return type's number of elements.
@@ -3305,7 +3315,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *S0 = getShadow(&I, 0);
/// For scalars:
- /// Since they are converting from floating-point to integer, the output is
+ /// Since they are converting from floating-point to integer (or
+ /// vice-versa), the output is
/// - fully uninitialized if *any* bit of the input is uninitialized
/// - fully ininitialized if all bits of the input are ininitialized
/// We apply the same principle on a per-field basis for vectors.
@@ -4653,6 +4664,23 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
handleSSEVectorConvertIntrinsic(I, 2);
break;
+ // TODO:
+ // <1 x i64> @llvm.x86.sse.cvtpd2pi(<2 x double>)
+ // <2 x double> @llvm.x86.sse.cvtpi2pd(<1 x i64>)
+ // <4 x float> @llvm.x86.sse.cvtpi2ps(<4 x float>, <1 x i64>)
+
+ case Intrinsic::x86_vcvtps2ph_128:
+ case Intrinsic::x86_vcvtps2ph_256: {
+ handleSSEVectorConvertIntrinsicByProp(I, /*HasRoundingMode=*/ true);
+ break;
+ }
+
+ case Intrinsic::x86_sse2_cvtps2dq:
+ case Intrinsic::x86_sse2_cvtpd2dq: {
+ handleSSEVectorConvertIntrinsicByProp(I, /*HasRoundingMode=*/ false);
+ break;
+ }
+
case Intrinsic::x86_avx512_psll_w_512:
case Intrinsic::x86_avx512_psll_d_512:
case Intrinsic::x86_avx512_psll_q_512:
@@ -4998,12 +5026,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
break;
}
- case Intrinsic::x86_vcvtps2ph_128:
- case Intrinsic::x86_vcvtps2ph_256: {
- handleSSEVectorConvertIntrinsicByProp(I);
- break;
- }
-
case Intrinsic::fshl:
case Intrinsic::fshr:
handleFunnelShift(I);
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll
index 9d075f7974cd9..7398b9df2a99c 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/sse2-intrinsics-x86.ll
@@ -160,15 +160,11 @@ define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) #0 {
; CHECK-LABEL: @test_x86_sse2_cvtpd2dq(
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1:![0-9]+]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i64> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0:%.*]])
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i32> [[RES]]
;
%res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
@@ -181,18 +177,16 @@ define <2 x i64> @test_mm_cvtpd_epi32_zext(<2 x double> %a0) nounwind #0 {
; CHECK-LABEL: @test_mm_cvtpd_epi32_zext(
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <2 x i64> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = sext <2 x i1> [[TMP2]] to <2 x i32>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0:%.*]])
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: [[RES1:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64>
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x i64> [[BC]]
+; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[RES1]] to <2 x i64>
+; CHECK-NEXT: store <2 x i64> [[BC]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i64> [[BC1]]
;
%cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
%res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
@@ -206,9 +200,9 @@ define <2 x i64> @test_mm_cvtpd_epi32_zext_load(ptr %p0) nounwind #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1:![0-9]+]]
; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]]
; CHECK-NEXT: unreachable
; CHECK: 3:
; CHECK-NEXT: [[A0:%.*]] = load <2 x double>, ptr [[P0:%.*]], align 16
@@ -216,18 +210,16 @@ define <2 x i64> @test_mm_cvtpd_epi32_zext_load(ptr %p0) nounwind #0 {
; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080
; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <2 x i64> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = sext <2 x i1> [[TMP7]] to <2 x i32>
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x i32> [[TMP8]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0]])
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[TMP9]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: [[RES1:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64>
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x i64> [[BC]]
+; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[RES1]] to <2 x i64>
+; CHECK-NEXT: store <2 x i64> [[BC]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i64> [[BC1]]
;
%a0 = load <2 x double>, ptr %p0
%cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
@@ -315,15 +307,10 @@ define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) #0 {
; CHECK-LABEL: @test_x86_sse2_cvtps2dq(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i1> [[TMP2]] to <4 x i32>
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> [[A0:%.*]])
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i32> [[RES]]
;
%res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll
index 3e5db7822b0ef..55a52003db2e7 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/i386/sse2-intrinsics-i386.ll
@@ -169,15 +169,11 @@ define <4 x i32> @test_x86_sse2_cvtpd2dq(<2 x double> %a0) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP5:%.*]], !prof [[PROF1:![0-9]+]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0:%.*]])
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i32> [[RES]]
;
%res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) ; <<4 x i32>> [#uses=1]
@@ -191,18 +187,16 @@ define <2 x i64> @test_mm_cvtpd_epi32_zext(<2 x double> %a0) nounwind #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <2 x i64> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = sext <2 x i1> [[TMP3]] to <2 x i32>
+; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0:%.*]])
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: [[RES1:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64>
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x i64> [[BC]]
+; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[RES1]] to <2 x i64>
+; CHECK-NEXT: store <2 x i64> [[BC]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i64> [[BC1]]
;
%cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
%res = shufflevector <4 x i32> %cvt, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
@@ -217,9 +211,9 @@ define <2 x i64> @test_mm_cvtpd_epi32_zext_load(ptr %p0) nounwind #0 {
; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP7:%.*]], !prof [[PROF1:![0-9]+]]
; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5:[0-9]+]]
; CHECK-NEXT: unreachable
; CHECK: 4:
; CHECK-NEXT: [[A0:%.*]] = load <2 x double>, ptr [[P0:%.*]], align 16
@@ -227,18 +221,16 @@ define <2 x i64> @test_mm_cvtpd_epi32_zext_load(ptr %p0) nounwind #0 {
; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP4]], -2147483649
; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP6]], align 16
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
-; CHECK: 9:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
-; CHECK-NEXT: unreachable
-; CHECK: 10:
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <2 x i64> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = sext <2 x i1> [[TMP8]] to <2 x i32>
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x i32> [[TMP9]], <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
; CHECK-NEXT: [[CVT:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> [[A0]])
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: [[RES1:%.*]] = shufflevector <4 x i32> [[CVT]], <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[RES]] to <2 x i64>
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x i64> [[BC]]
+; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[RES1]] to <2 x i64>
+; CHECK-NEXT: store <2 x i64> [[BC]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x i64> [[BC1]]
;
%a0 = load <2 x double>, ptr %p0
%cvt = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0)
@@ -330,15 +322,10 @@ define <4 x i32> @test_x86_sse2_cvtps2dq(<4 x float> %a0) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR5]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i1> [[TMP3]] to <4 x i32>
; CHECK-NEXT: [[RES:%.*]] = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> [[A0:%.*]])
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <4 x i32> [[TMP5]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i32> [[RES]]
;
%res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) ; <<4 x i32>> [#uses=1]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
Add comments on strictly handled instructions Fix comment
cvt(t?)ps2dq/cvt(t?)pd2dq and cvtpd2ps are currently handled strictly. This patch handles them using handleSSEVectorConvertIntrinsicByProp (from #130705), generalized to handle SSE intrinsics that do not have a rounding mode parameter.